51 files changed, 19227 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
new file mode 100644
index 000000000..741a8fa8a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_CELL
+	bool
+	default n
+
+config PPC_CELL_COMMON
+	bool
+	select PPC_CELL
+	select PPC_DCR_MMIO
+	select PPC_INDIRECT_PIO
+	select PPC_INDIRECT_MMIO
+	select PPC_NATIVE
+	select PPC_RTAS
+	select IRQ_EDGE_EOI_HANDLER
+
+config PPC_CELL_NATIVE
+	bool
+	select PPC_CELL_COMMON
+	select MPIC
+	select PPC_IO_WORKAROUNDS
+	select IBM_EMAC_EMAC4 if IBM_EMAC
+	select IBM_EMAC_RGMII if IBM_EMAC
+	select IBM_EMAC_ZMII if IBM_EMAC #test only
+	select IBM_EMAC_TAH if IBM_EMAC  #test only
+	default n
+
+config PPC_IBM_CELL_BLADE
+	bool "IBM Cell Blade"
+	depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
+	select PPC_CELL_NATIVE
+	select PPC_OF_PLATFORM_PCI
+	select PCI
+	select MMIO_NVRAM
+	select PPC_UDBG_16550
+	select UDBG_RTAS_CONSOLE
+
+config AXON_MSI
+	bool
+	depends on PPC_IBM_CELL_BLADE && PCI_MSI
+	default y
+
+menu "Cell Broadband Engine options"
+	depends on PPC_CELL
+
+config SPU_FS
+	tristate "SPU file system"
+	default m
+	depends on PPC_CELL
+	depends on COREDUMP
+	select SPU_BASE
+	help
+	  The SPU file system is used to access Synergistic Processing
+	  Units on machines implementing the Broadband Processor
+	  Architecture.
+
+config SPU_BASE
+	bool
+	default n
+	select PPC_COPRO_BASE
+
+config CBE_RAS
+	bool "RAS features for bare metal Cell BE"
+	depends on PPC_CELL_NATIVE
+	default y
+
+config PPC_IBM_CELL_RESETBUTTON
+	bool "IBM Cell Blade Pinhole reset button"
+	depends on CBE_RAS && PPC_IBM_CELL_BLADE
+	default y
+	help
+	  Support Pinhole Resetbutton on IBM Cell blades.
+	  This adds a method to trigger system reset via front panel pinhole button.
+
+config PPC_IBM_CELL_POWERBUTTON
+	tristate "IBM Cell Blade power button"
+	depends on PPC_IBM_CELL_BLADE && INPUT_EVDEV
+	default y
+	help
+	  Support Powerbutton on IBM Cell blades.
+	  This will enable the powerbutton as an input device.
+
+config CBE_THERM
+	tristate "CBE thermal support"
+	default m
+	depends on CBE_RAS && SPU_BASE
+
+config PPC_PMI
+	tristate
+	default y
+	depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON
+	help
+	  PMI (Platform Management Interrupt) is a way to
+	  communicate with the BMC (Baseboard Management Controller).
+	  It is used in some IBM Cell blades.
+
+config CBE_CPUFREQ_SPU_GOVERNOR
+	tristate "CBE frequency scaling based on SPU usage"
+	depends on SPU_FS && CPU_FREQ
+	default m
+	help
+	  This governor checks for spu usage to adjust the cpu frequency.
+	  If no spu is running on a given cpu, that cpu will be throttled to
+	  the minimal possible frequency.
+
+endmenu
+
+config OPROFILE_CELL
+	def_bool y
+	depends on PPC_CELL_NATIVE && (OPROFILE = m || OPROFILE = y) && SPU_BASE
+
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
new file mode 100644
index 000000000..10064a33c
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PPC_CELL_COMMON)		+= cbe_regs.o interrupt.o pervasive.o
+
+obj-$(CONFIG_PPC_CELL_NATIVE)		+= iommu.o setup.o spider-pic.o \
+					   pmu.o spider-pci.o
+obj-$(CONFIG_CBE_RAS)			+= ras.o
+
+obj-$(CONFIG_CBE_THERM)			+= cbe_thermal.o
+obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR)	+= cpufreq_spudemand.o
+
+obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON)	+= cbe_powerbutton.o
+
+ifdef CONFIG_SMP
+obj-$(CONFIG_PPC_CELL_NATIVE)		+= smp.o
+endif
+
+# needed only when building loadable spufs.ko
+spu-priv1-$(CONFIG_PPC_CELL_COMMON)	+= spu_priv1_mmio.o
+spu-manage-$(CONFIG_PPC_CELL_COMMON)	+= spu_manage.o
+
+obj-$(CONFIG_SPU_BASE)			+= spu_callbacks.o spu_base.o \
+					   spu_notify.o \
+					   spu_syscalls.o \
+					   $(spu-priv1-y) \
+					   $(spu-manage-y) \
+					   spufs/
+
+obj-$(CONFIG_AXON_MSI)			+= axon_msi.o
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
new file mode 100644
index 000000000..326d34e2a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright 2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/export.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+
+#include <asm/debugfs.h>
+#include <asm/dcr.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+
+#include "cell.h"
+
+/*
+ * MSIC registers, specified as offsets from dcr_base
+ */
+#define MSIC_CTRL_REG	0x0
+
+/* Base Address registers specify FIFO location in BE memory */
+#define MSIC_BASE_ADDR_HI_REG	0x3
+#define MSIC_BASE_ADDR_LO_REG	0x4
+
+/* Hold the read/write offsets into the FIFO */
+#define MSIC_READ_OFFSET_REG	0x5
+#define MSIC_WRITE_OFFSET_REG	0x6
+
+
+/* MSIC control register flags */
+#define MSIC_CTRL_ENABLE		0x0001
+#define MSIC_CTRL_FIFO_FULL_ENABLE	0x0002
+#define MSIC_CTRL_IRQ_ENABLE		0x0008
+#define MSIC_CTRL_FULL_STOP_ENABLE	0x0010
+
+/*
+ * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
+ * Currently we're using a 64KB FIFO size.
+ */
+#define MSIC_FIFO_SIZE_SHIFT	16
+#define MSIC_FIFO_SIZE_BYTES	(1 << MSIC_FIFO_SIZE_SHIFT)
+
+/*
+ * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
+ * 8-9 of the MSIC control reg.
+ */
+#define MSIC_CTRL_FIFO_SIZE	(((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
+
+/*
+ * We need to mask the read/write offsets to make sure they stay within
+ * the bounds of the FIFO. Also they should always be 16-byte aligned.
+ */
+#define MSIC_FIFO_SIZE_MASK	((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
+
+/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
+#define MSIC_FIFO_ENTRY_SIZE	0x10
+
+
+struct axon_msic {
+	struct irq_domain *irq_domain;
+	__le32 *fifo_virt;
+	dma_addr_t fifo_phys;
+	dcr_host_t dcr_host;
+	u32 read_offset;
+#ifdef DEBUG
+	u32 __iomem *trigger;
+#endif
+};
+
+#ifdef DEBUG
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic);
+#else
+static inline void axon_msi_debug_setup(struct device_node *dn,
+					struct axon_msic *msic) { }
+#endif
+
+
+static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
+{
+	pr_devel("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
+
+	dcr_write(msic->dcr_host, dcr_n, val);
+}
+
+static void axon_msi_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct axon_msic *msic = irq_desc_get_handler_data(desc);
+	u32 write_offset, msi;
+	int idx;
+	int retry = 0;
+
+	write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
+	pr_devel("axon_msi: original write_offset 0x%x\n", write_offset);
+
+	/* write_offset doesn't wrap properly, so we have to mask it */
+	write_offset &= MSIC_FIFO_SIZE_MASK;
+
+	while (msic->read_offset != write_offset && retry < 100) {
+		idx  = msic->read_offset / sizeof(__le32);
+		msi  = le32_to_cpu(msic->fifo_virt[idx]);
+		msi &= 0xFFFF;
+
+		pr_devel("axon_msi: woff %x roff %x msi %x\n",
+			  write_offset, msic->read_offset, msi);
+
+		if (msi < nr_irqs && irq_get_chip_data(msi) == msic) {
+			generic_handle_irq(msi);
+			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
+		} else {
+			/*
+			 * Reading the MSIC_WRITE_OFFSET_REG does not
+			 * reliably flush the outstanding DMA to the
+			 * FIFO buffer. Here we were reading stale
+			 * data, so we need to retry.
+			 */
+			udelay(1);
+			retry++;
+			pr_devel("axon_msi: invalid irq 0x%x!\n", msi);
+			continue;
+		}
+
+		if (retry) {
+			pr_devel("axon_msi: late irq 0x%x, retry %d\n",
+				 msi, retry);
+			retry = 0;
+		}
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
+
+	if (retry) {
+		printk(KERN_WARNING "axon_msi: irq timed out\n");
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static struct axon_msic *find_msi_translator(struct pci_dev *dev)
+{
+	struct irq_domain *irq_domain;
+	struct device_node *dn, *tmp;
+	const phandle *ph;
+	struct axon_msic *msic = NULL;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	if (!dn) {
+		dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+		return NULL;
+	}
+
+	for (; dn; dn = of_get_next_parent(dn)) {
+		ph = of_get_property(dn, "msi-translator", NULL);
+		if (ph)
+			break;
+	}
+
+	if (!ph) {
+		dev_dbg(&dev->dev,
+			"axon_msi: no msi-translator property found\n");
+		goto out_error;
+	}
+
+	tmp = dn;
+	dn = of_find_node_by_phandle(*ph);
+	of_node_put(tmp);
+	if (!dn) {
+		dev_dbg(&dev->dev,
+			"axon_msi: msi-translator doesn't point to a node\n");
+		goto out_error;
+	}
+
+	irq_domain = irq_find_host(dn);
+	if (!irq_domain) {
+		dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %pOF\n",
+			dn);
+		goto out_error;
+	}
+
+	msic = irq_domain->host_data;
+
+out_error:
+	of_node_put(dn);
+
+	return msic;
+}
+
+static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
+{
+	struct device_node *dn;
+	struct msi_desc *entry;
+	int len;
+	const u32 *prop;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	if (!dn) {
+		dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+		return -ENODEV;
+	}
+
+	entry = first_pci_msi_entry(dev);
+
+	for (; dn; dn = of_get_next_parent(dn)) {
+		if (entry->msi_attrib.is_64) {
+			prop = of_get_property(dn, "msi-address-64", &len);
+			if (prop)
+				break;
+		}
+
+		prop = of_get_property(dn, "msi-address-32", &len);
+		if (prop)
+			break;
+	}
+
+	if (!prop) {
+		dev_dbg(&dev->dev,
+			"axon_msi: no msi-address-(32|64) properties found\n");
+		return -ENOENT;
+	}
+
+	switch (len) {
+	case 8:
+		msg->address_hi = prop[0];
+		msg->address_lo = prop[1];
+		break;
+	case 4:
+		msg->address_hi = 0;
+		msg->address_lo = prop[0];
+		break;
+	default:
+		dev_dbg(&dev->dev,
+			"axon_msi: malformed msi-address-(32|64) property\n");
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	of_node_put(dn);
+
+	return 0;
+}
+
+static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	unsigned int virq, rc;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	struct axon_msic *msic;
+
+	msic = find_msi_translator(dev);
+	if (!msic)
+		return -ENODEV;
+
+	rc = setup_msi_msg_address(dev, &msg);
+	if (rc)
+		return rc;
+
+	for_each_pci_msi_entry(entry, dev) {
+		virq = irq_create_direct_mapping(msic->irq_domain);
+		if (!virq) {
+			dev_warn(&dev->dev,
+				 "axon_msi: virq allocation failed!\n");
+			return -1;
+		}
+		dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
+
+		irq_set_msi_desc(virq, entry);
+		msg.data = virq;
+		pci_write_msi_msg(virq, &msg);
+	}
+
+	return 0;
+}
+
+static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+
+	dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
+
+	for_each_pci_msi_entry(entry, dev) {
+		if (!entry->irq)
+			continue;
+
+		irq_set_msi_desc(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+	}
+}
+
+static struct irq_chip msic_irq_chip = {
+	.irq_mask	= pci_msi_mask_irq,
+	.irq_unmask	= pci_msi_unmask_irq,
+	.irq_shutdown	= pci_msi_mask_irq,
+	.name		= "AXON-MSI",
+};
+
+static int msic_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops msic_host_ops = {
+	.map	= msic_host_map,
+};
+
+static void axon_msi_shutdown(struct platform_device *device)
+{
+	struct axon_msic *msic = dev_get_drvdata(&device->dev);
+	u32 tmp;
+
+	pr_devel("axon_msi: disabling %pOF\n",
+		 irq_domain_get_of_node(msic->irq_domain));
+	tmp  = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
+	tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
+	msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
+}
+
+static int axon_msi_probe(struct platform_device *device)
+{
+	struct device_node *dn = device->dev.of_node;
+	struct axon_msic *msic;
+	unsigned int virq;
+	int dcr_base, dcr_len;
+
+	pr_devel("axon_msi: setting up dn %pOF\n", dn);
+
+	msic = kzalloc(sizeof(*msic), GFP_KERNEL);
+	if (!msic) {
+		printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n",
+		       dn);
+		goto out;
+	}
+
+	dcr_base = dcr_resource_start(dn, 0);
+	dcr_len = dcr_resource_len(dn, 0);
+
+	if (dcr_base == 0 || dcr_len == 0) {
+		printk(KERN_ERR
+		       "axon_msi: couldn't parse dcr properties on %pOF\n",
+			dn);
+		goto out_free_msic;
+	}
+
+	msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(msic->dcr_host)) {
+		printk(KERN_ERR "axon_msi: dcr_map failed for %pOF\n",
+		       dn);
+		goto out_free_msic;
+	}
+
+	msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
+					     &msic->fifo_phys, GFP_KERNEL);
+	if (!msic->fifo_virt) {
+		printk(KERN_ERR "axon_msi: couldn't allocate fifo for %pOF\n",
+		       dn);
+		goto out_free_msic;
+	}
+
+	virq = irq_of_parse_and_map(dn, 0);
+	if (!virq) {
+		printk(KERN_ERR "axon_msi: irq parse and map failed for %pOF\n",
+		       dn);
+		goto out_free_fifo;
+	}
+	memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
+
+	/* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */
+	msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic);
+	if (!msic->irq_domain) {
+		printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %pOF\n",
+		       dn);
+		goto out_free_fifo;
+	}
+
+	irq_set_handler_data(virq, msic);
+	irq_set_chained_handler(virq, axon_msi_cascade);
+	pr_devel("axon_msi: irq 0x%x setup for axon_msi\n", virq);
+
+	/* Enable the MSIC hardware */
+	msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32);
+	msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
+				  msic->fifo_phys & 0xFFFFFFFF);
+	msic_dcr_write(msic, MSIC_CTRL_REG,
+			MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
+			MSIC_CTRL_FIFO_SIZE);
+
+	msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
+				& MSIC_FIFO_SIZE_MASK;
+
+	dev_set_drvdata(&device->dev, msic);
+
+	cell_pci_controller_ops.setup_msi_irqs = axon_msi_setup_msi_irqs;
+	cell_pci_controller_ops.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+
+	axon_msi_debug_setup(dn, msic);
+
+	printk(KERN_DEBUG "axon_msi: setup MSIC on %pOF\n", dn);
+
+	return 0;
+
+out_free_fifo:
+	dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt,
+			  msic->fifo_phys);
+out_free_msic:
+	kfree(msic);
+out:
+
+	return -1;
+}
+
+static const struct of_device_id axon_msi_device_id[] = {
+	{
+		.compatible	= "ibm,axon-msic"
+	},
+	{}
+};
+
+static struct platform_driver axon_msi_driver = {
+	.probe		= axon_msi_probe,
+	.shutdown	= axon_msi_shutdown,
+	.driver = {
+		.name = "axon-msi",
+		.of_match_table = axon_msi_device_id,
+	},
+};
+
+static int __init axon_msi_init(void)
+{
+	return platform_driver_register(&axon_msi_driver);
+}
+subsys_initcall(axon_msi_init);
+
+
+#ifdef DEBUG
+static int msic_set(void *data, u64 val)
+{
+	struct axon_msic *msic = data;
+	out_le32(msic->trigger, val);
+	return 0;
+}
+
+static int msic_get(void *data, u64 *val)
+{
+	*val = 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n");
+
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
+{
+	char name[8];
+	u64 addr;
+
+	addr = of_translate_address(dn, of_get_property(dn, "reg", NULL));
+	if (addr == OF_BAD_ADDR) {
+		pr_devel("axon_msi: couldn't translate reg property\n");
+		return;
+	}
+
+	msic->trigger = ioremap(addr, 0x4);
+	if (!msic->trigger) {
+		pr_devel("axon_msi: ioremap failed\n");
+		return;
+	}
+
+	snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
+
+	if (!debugfs_create_file(name, 0600, powerpc_debugfs_root,
+				 msic, &fops_msic)) {
+		pr_devel("axon_msi: debugfs_create_file failed!\n");
+		return;
+	}
+}
+#endif /* DEBUG */
diff --git a/arch/powerpc/platforms/cell/cbe_powerbutton.c b/arch/powerpc/platforms/cell/cbe_powerbutton.c
new file mode 100644
index 000000000..2bb803130
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_powerbutton.c
@@ -0,0 +1,118 @@
+/*
+ * driver for powerbutton on IBM cell blades
+ *
+ * (C) Copyright IBM Corp. 2005-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <asm/pmi.h>
+#include <asm/prom.h>
+
+static struct input_dev *button_dev;
+static struct platform_device *button_pdev;
+
+static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg)
+{
+	BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON);
+
+	input_report_key(button_dev, KEY_POWER, 1);
+	input_sync(button_dev);
+	input_report_key(button_dev, KEY_POWER, 0);
+	input_sync(button_dev);
+}
+
+static struct pmi_handler cbe_pmi_handler = {
+	.type			= PMI_TYPE_POWER_BUTTON,
+	.handle_pmi_message	= cbe_powerbutton_handle_pmi,
+};
+
+static int __init cbe_powerbutton_init(void)
+{
+	int ret = 0;
+	struct input_dev *dev;
+
+	if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) {
+		printk(KERN_ERR "%s: Not a cell blade.\n", __func__);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	dev = input_allocate_device();
+	if (!dev) {
+		ret = -ENOMEM;
+		printk(KERN_ERR "%s: Not enough memory.\n", __func__);
+		goto out;
+	}
+
+	set_bit(EV_KEY, dev->evbit);
+	set_bit(KEY_POWER, dev->keybit);
+
+	dev->name = "Power Button";
+	dev->id.bustype = BUS_HOST;
+
+	/* this makes the button look like an acpi power button
+	 * no clue whether anyone relies on that though */
+	dev->id.product = 0x02;
+	dev->phys = "LNXPWRBN/button/input0";
+
+	button_pdev = platform_device_register_simple("power_button", 0, NULL, 0);
+	if (IS_ERR(button_pdev)) {
+		ret = PTR_ERR(button_pdev);
+		goto out_free_input;
+	}
+
+	dev->dev.parent = &button_pdev->dev;
+	ret = input_register_device(dev);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to register device\n", __func__);
+		goto out_free_pdev;
+	}
+
+	button_dev = dev;
+
+	ret = pmi_register_handler(&cbe_pmi_handler);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__);
+		goto out_free_pdev;
+	}
+
+	goto out;
+
+out_free_pdev:
+	platform_device_unregister(button_pdev);
+out_free_input:
+	input_free_device(dev);
+out:
+	return ret;
+}
+
+static void __exit cbe_powerbutton_exit(void)
+{
+	pmi_unregister_handler(&cbe_pmi_handler);
+	platform_device_unregister(button_pdev);
+	input_free_device(button_dev);
+}
+
+module_init(cbe_powerbutton_init);
+module_exit(cbe_powerbutton_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
new file mode 100644
index 000000000..b926438d7
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -0,0 +1,281 @@
+/*
+ * cbe_regs.c
+ *
+ * Accessor routines for the various MMIO register blocks of the CBE
+ *
+ * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ */
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/cell-regs.h>
+
+/*
+ * Current implementation uses "cpu" nodes. We build our own mapping
+ * array of cpu numbers to cpu nodes locally for now to allow interrupt
+ * time code to have a fast path rather than call of_get_cpu_node(). If
+ * we implement cpu hotplug, we'll have to install an appropriate norifier
+ * in order to release references to the cpu going away
+ */
+static struct cbe_regs_map
+{
+	struct device_node *cpu_node;
+	struct device_node *be_node;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct cbe_iic_regs __iomem *iic_regs;
+	struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+	struct cbe_pmd_shadow_regs pmd_shadow_regs;
+} cbe_regs_maps[MAX_CBE];
+static int cbe_regs_map_count;
+
+static struct cbe_thread_map
+{
+	struct device_node *cpu_node;
+	struct device_node *be_node;
+	struct cbe_regs_map *regs;
+	unsigned int thread_id;
+	unsigned int cbe_id;
+} cbe_thread_map[NR_CPUS];
+
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
+
+static struct cbe_regs_map *cbe_find_map(struct device_node *np)
+{
+	int i;
+	struct device_node *tmp_np;
+
+	if (strcasecmp(np->type, "spe")) {
+		for (i = 0; i < cbe_regs_map_count; i++)
+			if (cbe_regs_maps[i].cpu_node == np ||
+			    cbe_regs_maps[i].be_node == np)
+				return &cbe_regs_maps[i];
+		return NULL;
+	}
+
+	if (np->data)
+		return np->data;
+
+	/* walk up path until cpu or be node was found */
+	tmp_np = np;
+	do {
+		tmp_np = tmp_np->parent;
+		/* on a correct devicetree we wont get up to root */
+		BUG_ON(!tmp_np);
+	} while (strcasecmp(tmp_np->type, "cpu") &&
+		 strcasecmp(tmp_np->type, "be"));
+
+	np->data = cbe_find_map(tmp_np);
+
+	return np->data;
+}
+
+struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_pmd_regs);
+
+struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs);
+
+struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return &map->pmd_shadow_regs;
+}
+
+struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return &map->pmd_shadow_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->iic_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->iic_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->mic_tm_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->mic_tm_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
+
+u32 cbe_get_hw_thread_id(int cpu)
+{
+	return cbe_thread_map[cpu].thread_id;
+}
+EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
+
+u32 cbe_cpu_to_node(int cpu)
+{
+	return cbe_thread_map[cpu].cbe_id;
+}
+EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
+
+u32 cbe_node_to_cpu(int node)
+{
+	return cpumask_first(&cbe_local_mask[node]);
+
+}
+EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
+
+static struct device_node *cbe_get_be_node(int cpu_id)
+{
+	struct device_node *np;
+
+	for_each_node_by_type (np, "be") {
+		int len,i;
+		const phandle *cpu_handle;
+
+		cpu_handle = of_get_property(np, "cpus", &len);
+
+		/*
+		 * the CAB SLOF tree is non compliant, so we just assume
+		 * there is only one node
+		 */
+		if (WARN_ON_ONCE(!cpu_handle))
+			return np;
+
+		for (i=0; i<len; i++)
+			if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
+				return np;
+	}
+
+	return NULL;
+}
+
+static void __init cbe_fill_regs_map(struct cbe_regs_map *map)
+{
+	if(map->be_node) {
+		struct device_node *be, *np;
+
+		be = map->be_node;
+
+		for_each_node_by_type(np, "pervasive")
+			if (of_get_parent(np) == be)
+				map->pmd_regs = of_iomap(np, 0);
+
+		for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller")
+			if (of_get_parent(np) == be)
+				map->iic_regs = of_iomap(np, 2);
+
+		for_each_node_by_type(np, "mic-tm")
+			if (of_get_parent(np) == be)
+				map->mic_tm_regs = of_iomap(np, 0);
+	} else {
+		struct device_node *cpu;
+		/* That hack must die die die ! */
+		const struct address_prop {
+			unsigned long address;
+			unsigned int len;
+		} __attribute__((packed)) *prop;
+
+		cpu = map->cpu_node;
+
+		prop = of_get_property(cpu, "pervasive", NULL);
+		if (prop != NULL)
+			map->pmd_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "iic", NULL);
+		if (prop != NULL)
+			map->iic_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "mic-tm", NULL);
+		if (prop != NULL)
+			map->mic_tm_regs = ioremap(prop->address, prop->len);
+	}
+}
+
+
+void __init cbe_regs_init(void)
+{
+	int i;
+	unsigned int thread_id;
+	struct device_node *cpu;
+
+	/* Build local fast map of CPUs */
+	for_each_possible_cpu(i) {
+		cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
+		cbe_thread_map[i].be_node = cbe_get_be_node(i);
+		cbe_thread_map[i].thread_id = thread_id;
+	}
+
+	/* Find maps for each device tree CPU */
+	for_each_node_by_type(cpu, "cpu") {
+		struct cbe_regs_map *map;
+		unsigned int cbe_id;
+
+		cbe_id = cbe_regs_map_count++;
+		map = &cbe_regs_maps[cbe_id];
+
+		if (cbe_regs_map_count > MAX_CBE) {
+			printk(KERN_ERR "cbe_regs: More BE chips than supported"
+			       "!\n");
+			cbe_regs_map_count--;
+			of_node_put(cpu);
+			return;
+		}
+		map->cpu_node = cpu;
+
+		for_each_possible_cpu(i) {
+			struct cbe_thread_map *thread = &cbe_thread_map[i];
+
+			if (thread->cpu_node == cpu) {
+				thread->regs = map;
+				thread->cbe_id = cbe_id;
+				map->be_node = thread->be_node;
+				cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
+				if(thread->thread_id == 0)
+					cpumask_set_cpu(i, &cbe_first_online_cpu);
+			}
+		}
+
+		cbe_fill_regs_map(map);
+	}
+}
+
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
new file mode 100644
index 000000000..55aac74e1
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -0,0 +1,400 @@
+/*
+ * thermal support for the cell processor
+ *
+ * This module adds some sysfs attributes to cpu and spu nodes.
+ * Base for measurements are the digital thermal sensors (DTS)
+ * located on the chip.
+ * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius
+ * The attributes can be found under
+ * /sys/devices/system/cpu/cpuX/thermal
+ * /sys/devices/system/spu/spuX/thermal
+ *
+ * The following attributes are added for each node:
+ * temperature:
+ *	contains the current temperature measured by the DTS
+ * throttle_begin:
+ *	throttling begins when temperature is greater or equal to
+ *	throttle_begin. Setting this value to 125 prevents throttling.
+ * throttle_end:
+ *	throttling is being ceased, if the temperature is lower than
+ *	throttle_end. Due to a delay between applying throttling and
+ *	a reduced temperature this value should be less than throttle_begin.
+ *	A value equal to throttle_begin provides only a very little hysteresis.
+ * throttle_full_stop:
+ *	If the temperatrue is greater or equal to throttle_full_stop,
+ *	full throttling is applied to the cpu or spu. This value should be
+ *	greater than throttle_begin and throttle_end. Setting this value to
+ *	65 prevents the unit from running code at all.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/stringify.h>
+#include <asm/spu.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/cell-regs.h>
+
+#include "spu_priv1_mmio.h"
+
+#define TEMP_MIN 65
+#define TEMP_MAX 125
+
+#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode)			\
+struct device_attribute attr_ ## _prefix ## _ ## _name = {	\
+	.attr = { .name = __stringify(_name), .mode = _mode },	\
+	.show	= _prefix ## _show_ ## _name,			\
+	.store	= _prefix ## _store_ ## _name,			\
+};
+
+static inline u8 reg_to_temp(u8 reg_value)
+{
+	return ((reg_value & 0x3f) << 1) + TEMP_MIN;
+}
+
+static inline u8 temp_to_reg(u8 temp)
+{
+	return ((temp - TEMP_MIN) >> 1) & 0x3f;
+}
+
+static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev)
+{
+	struct spu *spu;
+
+	spu = container_of(dev, struct spu, dev);
+
+	return cbe_get_pmd_regs(spu_devnode(spu));
+}
+
+/* returns the value for a given spu in a given register */
+static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg)
+{
+	union spe_reg value;
+	struct spu *spu;
+
+	spu = container_of(dev, struct spu, dev);
+	value.val = in_be64(&reg->val);
+
+	return value.spe[spu->spe_id];
+}
+
+static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	u8 value;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+
+	pmd_regs = get_pmd_regs(dev);
+
+	value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1);
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos)
+{
+	u64 value;
+
+	value = in_be64(&pmd_regs->tm_tpr.val);
+	/* access the corresponding byte */
+	value >>= pos;
+	value &= 0x3F;
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
+{
+	u64 reg_value;
+	unsigned int temp;
+	u64 new_value;
+	int ret;
+
+	ret = sscanf(buf, "%u", &temp);
+
+	if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX)
+		return -EINVAL;
+
+	new_value = temp_to_reg(temp);
+
+	reg_value = in_be64(&pmd_regs->tm_tpr.val);
+
+	/* zero out bits for new value */
+	reg_value &= ~(0xffull << pos);
+	/* set bits to new value */
+	reg_value |= new_value << pos;
+
+	out_be64(&pmd_regs->tm_tpr.val, reg_value);
+	return size;
+}
+
+static ssize_t spu_show_throttle_end(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 0);
+}
+
+static ssize_t spu_show_throttle_begin(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 8);
+}
+
+static ssize_t spu_show_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 16);
+}
+
+static ssize_t spu_store_throttle_end(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 0);
+}
+
+static ssize_t spu_store_throttle_begin(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 8);
+}
+
+static ssize_t spu_store_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 16);
+}
+
+static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	u64 value;
+
+	pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+	value = in_be64(&pmd_regs->ts_ctsr2);
+
+	value = (value >> pos) & 0x3f;
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+
+/* shows the temperature of the DTS on the PPE,
+ * located near the linear thermal sensor */
+static ssize_t ppe_show_temp0(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return ppe_show_temp(dev, buf, 32);
+}
+
+/* shows the temperature of the second DTS on the PPE */
+static ssize_t ppe_show_temp1(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return ppe_show_temp(dev, buf, 0);
+}
+
+static ssize_t ppe_show_throttle_end(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32);
+}
+
+static ssize_t ppe_show_throttle_begin(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40);
+}
+
+static ssize_t ppe_show_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48);
+}
+
+static ssize_t ppe_store_throttle_end(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32);
+}
+
+static ssize_t ppe_store_throttle_begin(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40);
+}
+
+static ssize_t ppe_store_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48);
+}
+
+
+static struct device_attribute attr_spu_temperature = {
+	.attr = {.name = "temperature", .mode = 0400 },
+	.show = spu_show_temp,
+};
+
+static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600);
+
+
+static struct attribute *spu_attributes[] = {
+	&attr_spu_temperature.attr,
+	&attr_spu_throttle_end.attr,
+	&attr_spu_throttle_begin.attr,
+	&attr_spu_throttle_full_stop.attr,
+	NULL,
+};
+
+static struct attribute_group spu_attribute_group = {
+	.name	= "thermal",
+	.attrs	= spu_attributes,
+};
+
+static struct device_attribute attr_ppe_temperature0 = {
+	.attr = {.name = "temperature0", .mode = 0400 },
+	.show = ppe_show_temp0,
+};
+
+static struct device_attribute attr_ppe_temperature1 = {
+	.attr = {.name = "temperature1", .mode = 0400 },
+	.show = ppe_show_temp1,
+};
+
+static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
+
+static struct attribute *ppe_attributes[] = {
+	&attr_ppe_temperature0.attr,
+	&attr_ppe_temperature1.attr,
+	&attr_ppe_throttle_end.attr,
+	&attr_ppe_throttle_begin.attr,
+	&attr_ppe_throttle_full_stop.attr,
+	NULL,
+};
+
+static struct attribute_group ppe_attribute_group = {
+	.name	= "thermal",
+	.attrs	= ppe_attributes,
+};
+
+/*
+ * initialize throttling with default values
+ */
+static int __init init_default_values(void)
+{
+	int cpu;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct device *dev;
+	union ppe_spe_reg tpr;
+	union spe_reg str1;
+	u64 str2;
+	union spe_reg cr1;
+	u64 cr2;
+
+	/* TPR defaults */
+	/* ppe
+	 *	1F - no full stop
+	 *	08 - dynamic throttling starts if over 80 degrees
+	 *	03 - dynamic throttling ceases if below 70 degrees */
+	tpr.ppe = 0x1F0803;
+	/* spe
+	 *	10 - full stopped when over 96 degrees
+	 *	08 - dynamic throttling starts if over 80 degrees
+	 *	03 - dynamic throttling ceases if below 70 degrees
+	 */
+	tpr.spe = 0x100803;
+
+	/* STR defaults */
+	/* str1
+	 *	10 - stop 16 of 32 cycles
+	 */
+	str1.val = 0x1010101010101010ull;
+	/* str2
+	 *	10 - stop 16 of 32 cycles
+	 */
+	str2 = 0x10;
+
+	/* CR defaults */
+	/* cr1
+	 *	4 - normal operation
+	 */
+	cr1.val = 0x0404040404040404ull;
+	/* cr2
+	 *	4 - normal operation
+	 */
+	cr2 = 0x04;
+
+	for_each_possible_cpu (cpu) {
+		pr_debug("processing cpu %d\n", cpu);
+		dev = get_cpu_device(cpu);
+
+		if (!dev) {
+			pr_info("invalid dev pointer for cbe_thermal\n");
+			return -EINVAL;
+		}
+
+		pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+
+		if (!pmd_regs) {
+			pr_info("invalid CBE regs pointer for cbe_thermal\n");
+			return -EINVAL;
+		}
+
+		out_be64(&pmd_regs->tm_str2, str2);
+		out_be64(&pmd_regs->tm_str1.val, str1.val);
+		out_be64(&pmd_regs->tm_tpr.val, tpr.val);
+		out_be64(&pmd_regs->tm_cr1.val, cr1.val);
+		out_be64(&pmd_regs->tm_cr2, cr2);
+	}
+
+	return 0;
+}
+
+
+static int __init thermal_init(void)
+{
+	int rc = init_default_values();
+
+	if (rc == 0) {
+		spu_add_dev_attr_group(&spu_attribute_group);
+		cpu_add_dev_attr_group(&ppe_attribute_group);
+	}
+
+	return rc;
+}
+module_init(thermal_init);
+
+static void __exit thermal_exit(void)
+{
+	spu_remove_dev_attr_group(&spu_attribute_group);
+	cpu_remove_dev_attr_group(&ppe_attribute_group);
+}
+module_exit(thermal_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/cell.h b/arch/powerpc/platforms/cell/cell.h
new file mode 100644
index 000000000..ef143dfee
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cell.h
@@ -0,0 +1,24 @@
+/*
+ * Cell Platform common data structures
+ *
+ * Copyright 2015, Daniel Axtens, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef CELL_H
+#define CELL_H
+
+#include <asm/pci-bridge.h>
+
+extern struct pci_controller_ops cell_pci_controller_ops;
+
+#endif
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
new file mode 100644
index 000000000..882944c36
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -0,0 +1,168 @@
+/*
+ * spu aware cpufreq governor for the cell processor
+ *
+ * © Copyright IBM Corporation 2006-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/atomic.h>
+#include <asm/machdep.h>
+#include <asm/spu.h>
+
+#define POLL_TIME	100000		/* in µs */
+#define EXP		753		/* exp(-1) in fixed-point */
+
+struct spu_gov_info_struct {
+	unsigned long busy_spus;	/* fixed-point */
+	struct cpufreq_policy *policy;
+	struct delayed_work work;
+	unsigned int poll_int;		/* µs */
+};
+static DEFINE_PER_CPU(struct spu_gov_info_struct, spu_gov_info);
+
+static int calc_freq(struct spu_gov_info_struct *info)
+{
+	int cpu;
+	int busy_spus;
+
+	cpu = info->policy->cpu;
+	busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
+
+	CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1);
+	pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
+			cpu, busy_spus, info->busy_spus);
+
+	return info->policy->max * info->busy_spus / FIXED_1;
+}
+
+static void spu_gov_work(struct work_struct *work)
+{
+	struct spu_gov_info_struct *info;
+	int delay;
+	unsigned long target_freq;
+
+	info = container_of(work, struct spu_gov_info_struct, work.work);
+
+	/* after cancel_delayed_work_sync we unset info->policy */
+	BUG_ON(info->policy == NULL);
+
+	target_freq = calc_freq(info);
+	__cpufreq_driver_target(info->policy, target_freq, CPUFREQ_RELATION_H);
+
+	delay = usecs_to_jiffies(info->poll_int);
+	schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_init_work(struct spu_gov_info_struct *info)
+{
+	int delay = usecs_to_jiffies(info->poll_int);
+	INIT_DEFERRABLE_WORK(&info->work, spu_gov_work);
+	schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_cancel_work(struct spu_gov_info_struct *info)
+{
+	cancel_delayed_work_sync(&info->work);
+}
+
+static int spu_gov_start(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu);
+	struct spu_gov_info_struct *affected_info;
+	int i;
+
+	if (!cpu_online(cpu)) {
+		printk(KERN_ERR "cpu %d is not online\n", cpu);
+		return -EINVAL;
+	}
+
+	if (!policy->cur) {
+		printk(KERN_ERR "no cpu specified in policy\n");
+		return -EINVAL;
+	}
+
+	/* initialize spu_gov_info for all affected cpus */
+	for_each_cpu(i, policy->cpus) {
+		affected_info = &per_cpu(spu_gov_info, i);
+		affected_info->policy = policy;
+	}
+
+	info->poll_int = POLL_TIME;
+
+	/* setup timer */
+	spu_gov_init_work(info);
+
+	return 0;
+}
+
+static void spu_gov_stop(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu);
+	int i;
+
+	/* cancel timer */
+	spu_gov_cancel_work(info);
+
+	/* clean spu_gov_info for all affected cpus */
+	for_each_cpu (i, policy->cpus) {
+		info = &per_cpu(spu_gov_info, i);
+		info->policy = NULL;
+	}
+}
+
+static struct cpufreq_governor spu_governor = {
+	.name = "spudemand",
+	.start = spu_gov_start,
+	.stop = spu_gov_stop,
+	.owner = THIS_MODULE,
+};
+
+/*
+ * module init and destoy
+ */
+
+static int __init spu_gov_init(void)
+{
+	int ret;
+
+	ret = cpufreq_register_governor(&spu_governor);
+	if (ret)
+		printk(KERN_ERR "registration of governor failed\n");
+	return ret;
+}
+
+static void __exit spu_gov_exit(void)
+{
+	cpufreq_unregister_governor(&spu_governor);
+}
+
+
+module_init(spu_gov_init);
+module_exit(spu_gov_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
new file mode 100644
index 000000000..5d4bf9aed
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -0,0 +1,406 @@
+/*
+ * Cell Internal Interrupt Controller
+ *
+ * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *                    IBM, Corp.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * TODO:
+ * - Fix various assumptions related to HW CPU numbers vs. linux CPU numbers
+ *   vs node numbers in the setup code
+ * - Implement proper handling of maxcpus=1/2 (that is, routing of irqs from
+ *   a non-active node to the active node)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+struct iic {
+	struct cbe_iic_thread_regs __iomem *regs;
+	u8 target_id;
+	u8 eoi_stack[16];
+	int eoi_ptr;
+	struct device_node *node;
+};
+
+static DEFINE_PER_CPU(struct iic, cpu_iic);
+#define IIC_NODE_COUNT	2
+static struct irq_domain *iic_host;
+
+/* Convert between "pending" bits and hw irq number */
+static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits)
+{
+	unsigned char unit = bits.source & 0xf;
+	unsigned char node = bits.source >> 4;
+	unsigned char class = bits.class & 3;
+
+	/* Decode IPIs */
+	if (bits.flags & CBE_IIC_IRQ_IPI)
+		return IIC_IRQ_TYPE_IPI | (bits.prio >> 4);
+	else
+		return (node << IIC_IRQ_NODE_SHIFT) | (class << 4) | unit;
+}
+
+static void iic_mask(struct irq_data *d)
+{
+}
+
+static void iic_unmask(struct irq_data *d)
+{
+}
+
+static void iic_eoi(struct irq_data *d)
+{
+	struct iic *iic = this_cpu_ptr(&cpu_iic);
+	out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
+	BUG_ON(iic->eoi_ptr < 0);
+}
+
+static struct irq_chip iic_chip = {
+	.name = "CELL-IIC",
+	.irq_mask = iic_mask,
+	.irq_unmask = iic_unmask,
+	.irq_eoi = iic_eoi,
+};
+
+
+static void iic_ioexc_eoi(struct irq_data *d)
+{
+}
+
+static void iic_ioexc_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct cbe_iic_regs __iomem *node_iic =
+		(void __iomem *)irq_desc_get_handler_data(desc);
+	unsigned int irq = irq_desc_get_irq(desc);
+	unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC;
+	unsigned long bits, ack;
+	int cascade;
+
+	for (;;) {
+		bits = in_be64(&node_iic->iic_is);
+		if (bits == 0)
+			break;
+		/* pre-ack edge interrupts */
+		ack = bits & IIC_ISR_EDGE_MASK;
+		if (ack)
+			out_be64(&node_iic->iic_is, ack);
+		/* handle them */
+		for (cascade = 63; cascade >= 0; cascade--)
+			if (bits & (0x8000000000000000UL >> cascade)) {
+				unsigned int cirq =
+					irq_linear_revmap(iic_host,
+							  base | cascade);
+				if (cirq)
+					generic_handle_irq(cirq);
+			}
+		/* post-ack level interrupts */
+		ack = bits & ~IIC_ISR_EDGE_MASK;
+		if (ack)
+			out_be64(&node_iic->iic_is, ack);
+	}
+	chip->irq_eoi(&desc->irq_data);
+}
+
+
+static struct irq_chip iic_ioexc_chip = {
+	.name = "CELL-IOEX",
+	.irq_mask = iic_mask,
+	.irq_unmask = iic_unmask,
+	.irq_eoi = iic_ioexc_eoi,
+};
+
+/* Get an IRQ number from the pending state register of the IIC */
+static unsigned int iic_get_irq(void)
+{
+	struct cbe_iic_pending_bits pending;
+	struct iic *iic;
+	unsigned int virq;
+
+	iic = this_cpu_ptr(&cpu_iic);
+	*(unsigned long *) &pending =
+		in_be64((u64 __iomem *) &iic->regs->pending_destr);
+	if (!(pending.flags & CBE_IIC_IRQ_VALID))
+		return 0;
+	virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
+	if (!virq)
+		return 0;
+	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+	BUG_ON(iic->eoi_ptr > 15);
+	return virq;
+}
+
+void iic_setup_cpu(void)
+{
+	out_be64(&this_cpu_ptr(&cpu_iic)->regs->prio, 0xff);
+}
+
+u8 iic_get_target_id(int cpu)
+{
+	return per_cpu(cpu_iic, cpu).target_id;
+}
+
+EXPORT_SYMBOL_GPL(iic_get_target_id);
+
+#ifdef CONFIG_SMP
+
+/* Use the highest interrupt priorities for IPI */
+static inline int iic_msg_to_irq(int msg)
+{
+	return IIC_IRQ_TYPE_IPI + 0xf - msg;
+}
+
+void iic_message_pass(int cpu, int msg)
+{
+	out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4);
+}
+
+static void iic_request_ipi(int msg)
+{
+	int virq;
+
+	virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg));
+	if (!virq) {
+		printk(KERN_ERR
+		       "iic: failed to map IPI %s\n", smp_ipi_name[msg]);
+		return;
+	}
+
+	/*
+	 * If smp_request_message_ipi encounters an error it will notify
+	 * the error.  If a message is not needed it will return non-zero.
+	 */
+	if (smp_request_message_ipi(virq, msg))
+		irq_dispose_mapping(virq);
+}
+
+void iic_request_IPIs(void)
+{
+	iic_request_ipi(PPC_MSG_CALL_FUNCTION);
+	iic_request_ipi(PPC_MSG_RESCHEDULE);
+	iic_request_ipi(PPC_MSG_TICK_BROADCAST);
+	iic_request_ipi(PPC_MSG_NMI_IPI);
+}
+
+#endif /* CONFIG_SMP */
+
+
+static int iic_host_match(struct irq_domain *h, struct device_node *node,
+			  enum irq_domain_bus_token bus_token)
+{
+	return of_device_is_compatible(node,
+				    "IBM,CBEA-Internal-Interrupt-Controller");
+}
+
+static int iic_host_map(struct irq_domain *h, unsigned int virq,
+			irq_hw_number_t hw)
+{
+	switch (hw & IIC_IRQ_TYPE_MASK) {
+	case IIC_IRQ_TYPE_IPI:
+		irq_set_chip_and_handler(virq, &iic_chip, handle_percpu_irq);
+		break;
+	case IIC_IRQ_TYPE_IOEXC:
+		irq_set_chip_and_handler(virq, &iic_ioexc_chip,
+					 handle_edge_eoi_irq);
+		break;
+	default:
+		irq_set_chip_and_handler(virq, &iic_chip, handle_edge_eoi_irq);
+	}
+	return 0;
+}
+
+static int iic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	unsigned int node, ext, unit, class;
+	const u32 *val;
+
+	if (!of_device_is_compatible(ct,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+		return -ENODEV;
+	if (intsize != 1)
+		return -ENODEV;
+	val = of_get_property(ct, "#interrupt-cells", NULL);
+	if (val == NULL || *val != 1)
+		return -ENODEV;
+
+	node = intspec[0] >> 24;
+	ext = (intspec[0] >> 16) & 0xff;
+	class = (intspec[0] >> 8) & 0xff;
+	unit = intspec[0] & 0xff;
+
+	/* Check if node is in supported range */
+	if (node > 1)
+		return -EINVAL;
+
+	/* Build up interrupt number, special case for IO exceptions */
+	*out_hwirq = (node << IIC_IRQ_NODE_SHIFT);
+	if (unit == IIC_UNIT_IIC && class == 1)
+		*out_hwirq |= IIC_IRQ_TYPE_IOEXC | ext;
+	else
+		*out_hwirq |= IIC_IRQ_TYPE_NORMAL |
+			(class << IIC_IRQ_CLASS_SHIFT) | unit;
+
+	/* Dummy flags, ignored by iic code */
+	*out_flags = IRQ_TYPE_EDGE_RISING;
+
+	return 0;
+}
+
+static const struct irq_domain_ops iic_host_ops = {
+	.match = iic_host_match,
+	.map = iic_host_map,
+	.xlate = iic_host_xlate,
+};
+
+static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
+				struct device_node *node)
+{
+	/* XXX FIXME: should locate the linux CPU number from the HW cpu
+	 * number properly. We are lucky for now
+	 */
+	struct iic *iic = &per_cpu(cpu_iic, hw_cpu);
+
+	iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
+	BUG_ON(iic->regs == NULL);
+
+	iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe);
+	iic->eoi_stack[0] = 0xff;
+	iic->node = of_node_get(node);
+	out_be64(&iic->regs->prio, 0);
+
+	printk(KERN_INFO "IIC for CPU %d target id 0x%x : %pOF\n",
+	       hw_cpu, iic->target_id, node);
+}
+
+static int __init setup_iic(void)
+{
+	struct device_node *dn;
+	struct resource r0, r1;
+	unsigned int node, cascade, found = 0;
+	struct cbe_iic_regs __iomem *node_iic;
+	const u32 *np;
+
+	for_each_node_by_name(dn, "interrupt-controller") {
+		if (!of_device_is_compatible(dn,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+			continue;
+		np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL);
+		if (np == NULL) {
+			printk(KERN_WARNING "IIC: CPU association not found\n");
+			of_node_put(dn);
+			return -ENODEV;
+		}
+		if (of_address_to_resource(dn, 0, &r0) ||
+		    of_address_to_resource(dn, 1, &r1)) {
+			printk(KERN_WARNING "IIC: Can't resolve addresses\n");
+			of_node_put(dn);
+			return -ENODEV;
+		}
+		found++;
+		init_one_iic(np[0], r0.start, dn);
+		init_one_iic(np[1], r1.start, dn);
+
+		/* Setup cascade for IO exceptions. XXX cleanup tricks to get
+		 * node vs CPU etc...
+		 * Note that we configure the IIC_IRR here with a hard coded
+		 * priority of 1. We might want to improve that later.
+		 */
+		node = np[0] >> 1;
+		node_iic = cbe_get_cpu_iic_regs(np[0]);
+		cascade = node << IIC_IRQ_NODE_SHIFT;
+		cascade |= 1 << IIC_IRQ_CLASS_SHIFT;
+		cascade |= IIC_UNIT_IIC;
+		cascade = irq_create_mapping(iic_host, cascade);
+		if (!cascade)
+			continue;
+		/*
+		 * irq_data is a generic pointer that gets passed back
+		 * to us later, so the forced cast is fine.
+		 */
+		irq_set_handler_data(cascade, (void __force *)node_iic);
+		irq_set_chained_handler(cascade, iic_ioexc_cascade);
+		out_be64(&node_iic->iic_ir,
+			 (1 << 12)		/* priority */ |
+			 (node << 4)		/* dest node */ |
+			 IIC_UNIT_THREAD_0	/* route them to thread 0 */);
+		/* Flush pending (make sure it triggers if there is
+		 * anything pending
+		 */
+		out_be64(&node_iic->iic_is, 0xfffffffffffffffful);
+	}
+
+	if (found)
+		return 0;
+	else
+		return -ENODEV;
+}
+
+void __init iic_init_IRQ(void)
+{
+	/* Setup an irq host data structure */
+	iic_host = irq_domain_add_linear(NULL, IIC_SOURCE_COUNT, &iic_host_ops,
+					 NULL);
+	BUG_ON(iic_host == NULL);
+	irq_set_default_host(iic_host);
+
+	/* Discover and initialize iics */
+	if (setup_iic() < 0)
+		panic("IIC: Failed to initialize !\n");
+
+	/* Set master interrupt handling function */
+	ppc_md.get_irq = iic_get_irq;
+
+	/* Enable on current CPU */
+	iic_setup_cpu();
+}
+
+void iic_set_interrupt_routing(int cpu, int thread, int priority)
+{
+	struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu);
+	u64 iic_ir = 0;
+	int node = cpu >> 1;
+
+	/* Set which node and thread will handle the next interrupt */
+	iic_ir |= CBE_IIC_IR_PRIO(priority) |
+		  CBE_IIC_IR_DEST_NODE(node);
+	if (thread == 0)
+		iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0);
+	else
+		iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1);
+	out_be64(&iic_regs->iic_ir, iic_ir);
+}
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
new file mode 100644
index 000000000..a47902248
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_CELL_PIC_H
+#define ASM_CELL_PIC_H
+#ifdef __KERNEL__
+/*
+ * Mapping of IIC pending bits into per-node interrupt numbers.
+ *
+ * Interrupt numbers are in the range 0...0x1ff where the top bit
+ * (0x100) represent the source node. Only 2 nodes are supported with
+ * the current code though it's trivial to extend that if necessary using
+ * higher level bits
+ *
+ * The bottom 8 bits are split into 2 type bits and 6 data bits that
+ * depend on the type:
+ *
+ * 00 (0x00 | data) : normal interrupt. data is (class << 4) | source
+ * 01 (0x40 | data) : IO exception. data is the exception number as
+ *                    defined by bit numbers in IIC_SR
+ * 10 (0x80 | data) : IPI. data is the IPI number (obtained from the priority)
+ *                    and node is always 0 (IPIs are per-cpu, their source is
+ *                    not relevant)
+ * 11 (0xc0 | data) : reserved
+ *
+ * In addition, interrupt number 0x80000000 is defined as always invalid
+ * (that is the node field is expected to never extend to move than 23 bits)
+ *
+ */
+
+enum {
+	IIC_IRQ_INVALID		= 0x80000000u,
+	IIC_IRQ_NODE_MASK	= 0x100,
+	IIC_IRQ_NODE_SHIFT	= 8,
+	IIC_IRQ_MAX		= 0x1ff,
+	IIC_IRQ_TYPE_MASK	= 0xc0,
+	IIC_IRQ_TYPE_NORMAL	= 0x00,
+	IIC_IRQ_TYPE_IOEXC	= 0x40,
+	IIC_IRQ_TYPE_IPI	= 0x80,
+	IIC_IRQ_CLASS_SHIFT	= 4,
+	IIC_IRQ_CLASS_0		= 0x00,
+	IIC_IRQ_CLASS_1		= 0x10,
+	IIC_IRQ_CLASS_2		= 0x20,
+	IIC_SOURCE_COUNT	= 0x200,
+
+	/* Here are defined the various source/dest units. Avoid using those
+	 * definitions if you can, they are mostly here for reference
+	 */
+	IIC_UNIT_SPU_0		= 0x4,
+	IIC_UNIT_SPU_1		= 0x7,
+	IIC_UNIT_SPU_2		= 0x3,
+	IIC_UNIT_SPU_3		= 0x8,
+	IIC_UNIT_SPU_4		= 0x2,
+	IIC_UNIT_SPU_5		= 0x9,
+	IIC_UNIT_SPU_6		= 0x1,
+	IIC_UNIT_SPU_7		= 0xa,
+	IIC_UNIT_IOC_0		= 0x0,
+	IIC_UNIT_IOC_1		= 0xb,
+	IIC_UNIT_THREAD_0	= 0xe, /* target only */
+	IIC_UNIT_THREAD_1	= 0xf, /* target only */
+	IIC_UNIT_IIC		= 0xe, /* source only (IO exceptions) */
+
+	/* Base numbers for the external interrupts */
+	IIC_IRQ_EXT_IOIF0	=
+		IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_0,
+	IIC_IRQ_EXT_IOIF1	=
+		IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_1,
+
+	/* Base numbers for the IIC_ISR interrupts */
+	IIC_IRQ_IOEX_TMI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 63,
+	IIC_IRQ_IOEX_PMI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 62,
+	IIC_IRQ_IOEX_ATI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 61,
+	IIC_IRQ_IOEX_MATBFI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 60,
+	IIC_IRQ_IOEX_ELDI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 59,
+
+	/* Which bits in IIC_ISR are edge sensitive */
+	IIC_ISR_EDGE_MASK	= 0x4ul,
+};
+
+extern void iic_init_IRQ(void);
+extern void iic_message_pass(int cpu, int msg);
+extern void iic_request_IPIs(void);
+extern void iic_setup_cpu(void);
+
+extern u8 iic_get_target_id(int cpu);
+
+extern void spider_init_IRQ(void);
+
+extern void iic_set_interrupt_routing(int cpu, int thread, int priority);
+
+#endif
+#endif /* ASM_CELL_PIC_H */
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
new file mode 100644
index 000000000..d9c2c4cc6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -0,0 +1,1223 @@
+/*
+ * IOMMU implementation for Cell Broadband Processor Architecture
+ *
+ * (C) Copyright IBM Corporation 2006-2008
+ *
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "cell.h"
+#include "interrupt.h"
+
+/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
+ * instead of leaving them mapped to some dummy page. This can be
+ * enabled once the appropriate workarounds for spider bugs have
+ * been enabled
+ */
+#define CELL_IOMMU_REAL_UNMAP
+
+/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of
+ * IO PTEs based on the transfer direction. That can be enabled
+ * once spider-net has been fixed to pass the correct direction
+ * to the DMA mapping functions
+ */
+#define CELL_IOMMU_STRICT_PROTECTION
+
+
+#define NR_IOMMUS			2
+
+/* IOC mmap registers */
+#define IOC_Reg_Size			0x2000
+
+#define IOC_IOPT_CacheInvd		0x908
+#define IOC_IOPT_CacheInvd_NE_Mask	0xffe0000000000000ul
+#define IOC_IOPT_CacheInvd_IOPTE_Mask	0x000003fffffffff8ul
+#define IOC_IOPT_CacheInvd_Busy		0x0000000000000001ul
+
+#define IOC_IOST_Origin			0x918
+#define IOC_IOST_Origin_E		0x8000000000000000ul
+#define IOC_IOST_Origin_HW		0x0000000000000800ul
+#define IOC_IOST_Origin_HL		0x0000000000000400ul
+
+#define IOC_IO_ExcpStat			0x920
+#define IOC_IO_ExcpStat_V		0x8000000000000000ul
+#define IOC_IO_ExcpStat_SPF_Mask	0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_S		0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_P		0x2000000000000000ul
+#define IOC_IO_ExcpStat_ADDR_Mask	0x00000007fffff000ul
+#define IOC_IO_ExcpStat_RW_Mask		0x0000000000000800ul
+#define IOC_IO_ExcpStat_IOID_Mask	0x00000000000007fful
+
+#define IOC_IO_ExcpMask			0x928
+#define IOC_IO_ExcpMask_SFE		0x4000000000000000ul
+#define IOC_IO_ExcpMask_PFE		0x2000000000000000ul
+
+#define IOC_IOCmd_Offset		0x1000
+
+#define IOC_IOCmd_Cfg			0xc00
+#define IOC_IOCmd_Cfg_TE		0x0000800000000000ul
+
+
+/* Segment table entries */
+#define IOSTE_V			0x8000000000000000ul /* valid */
+#define IOSTE_H			0x4000000000000000ul /* cache hint */
+#define IOSTE_PT_Base_RPN_Mask  0x3ffffffffffff000ul /* base RPN of IOPT */
+#define IOSTE_NPPT_Mask		0x0000000000000fe0ul /* no. pages in IOPT */
+#define IOSTE_PS_Mask		0x0000000000000007ul /* page size */
+#define IOSTE_PS_4K		0x0000000000000001ul /*   - 4kB  */
+#define IOSTE_PS_64K		0x0000000000000003ul /*   - 64kB */
+#define IOSTE_PS_1M		0x0000000000000005ul /*   - 1MB  */
+#define IOSTE_PS_16M		0x0000000000000007ul /*   - 16MB */
+
+
+/* IOMMU sizing */
+#define IO_SEGMENT_SHIFT	28
+#define IO_PAGENO_BITS(shift)	(IO_SEGMENT_SHIFT - (shift))
+
+/* The high bit needs to be set on every DMA address */
+#define SPIDER_DMA_OFFSET	0x80000000ul
+
+struct iommu_window {
+	struct list_head list;
+	struct cbe_iommu *iommu;
+	unsigned long offset;
+	unsigned long size;
+	unsigned int ioid;
+	struct iommu_table table;
+};
+
+#define NAMESIZE 8
+struct cbe_iommu {
+	int nid;
+	char name[NAMESIZE];
+	void __iomem *xlate_regs;
+	void __iomem *cmd_regs;
+	unsigned long *stab;
+	unsigned long *ptab;
+	void *pad_page;
+	struct list_head windows;
+};
+
+/* Static array of iommus, one per node
+ *   each contains a list of windows, keyed from dma_window property
+ *   - on bus setup, look for a matching window, or create one
+ *   - on dev setup, assign iommu_table ptr
+ */
+static struct cbe_iommu iommus[NR_IOMMUS];
+static int cbe_nr_iommus;
+
+static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
+		long n_ptes)
+{
+	u64 __iomem *reg;
+	u64 val;
+	long n;
+
+	reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
+
+	while (n_ptes > 0) {
+		/* we can invalidate up to 1 << 11 PTEs at once */
+		n = min(n_ptes, 1l << 11);
+		val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask)
+			| (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask)
+		        | IOC_IOPT_CacheInvd_Busy;
+
+		out_be64(reg, val);
+		while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy)
+			;
+
+		n_ptes -= n;
+		pte += n;
+	}
+}
+
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
+		unsigned long uaddr, enum dma_data_direction direction,
+		unsigned long attrs)
+{
+	int i;
+	unsigned long *io_pte, base_pte;
+	struct iommu_window *window =
+		container_of(tbl, struct iommu_window, table);
+
+	/* implementing proper protection causes problems with the spidernet
+	 * driver - check mapping directions later, but allow read & write by
+	 * default for now.*/
+#ifdef CELL_IOMMU_STRICT_PROTECTION
+	/* to avoid referencing a global, we use a trick here to setup the
+	 * protection bit. "prot" is setup to be 3 fields of 4 bits appended
+	 * together for each of the 3 supported direction values. It is then
+	 * shifted left so that the fields matching the desired direction
+	 * lands on the appropriate bits, and other bits are masked out.
+	 */
+	const unsigned long prot = 0xc48;
+	base_pte =
+		((prot << (52 + 4 * direction)) &
+		 (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) |
+		CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+		(window->ioid & CBE_IOPTE_IOID_Mask);
+#else
+	base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+		CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+	if (unlikely(attrs & DMA_ATTR_WEAK_ORDERING))
+		base_pte &= ~CBE_IOPTE_SO_RW;
+
+	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+	for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift))
+		io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
+
+	mb();
+
+	invalidate_tce_cache(window->iommu, io_pte, npages);
+
+	pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
+		 index, npages, direction, base_pte);
+	return 0;
+}
+
+static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
+{
+
+	int i;
+	unsigned long *io_pte, pte;
+	struct iommu_window *window =
+		container_of(tbl, struct iommu_window, table);
+
+	pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages);
+
+#ifdef CELL_IOMMU_REAL_UNMAP
+	pte = 0;
+#else
+	/* spider bridge does PCI reads after freeing - insert a mapping
+	 * to a scratch page instead of an invalid entry */
+	pte = CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+		__pa(window->iommu->pad_page) |
+		(window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+
+	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+	for (i = 0; i < npages; i++)
+		io_pte[i] = pte;
+
+	mb();
+
+	invalidate_tce_cache(window->iommu, io_pte, npages);
+}
+
+static irqreturn_t ioc_interrupt(int irq, void *data)
+{
+	unsigned long stat, spf;
+	struct cbe_iommu *iommu = data;
+
+	stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+	spf = stat & IOC_IO_ExcpStat_SPF_Mask;
+
+	/* Might want to rate limit it */
+	printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat);
+	printk(KERN_ERR "  V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n",
+	       !!(stat & IOC_IO_ExcpStat_V),
+	       (spf == IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ',
+	       (spf == IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ',
+	       (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write",
+	       (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask));
+	printk(KERN_ERR "  page=0x%016lx\n",
+	       stat & IOC_IO_ExcpStat_ADDR_Mask);
+
+	/* clear interrupt */
+	stat &= ~IOC_IO_ExcpStat_V;
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat);
+
+	return IRQ_HANDLED;
+}
+
+static int cell_iommu_find_ioc(int nid, unsigned long *base)
+{
+	struct device_node *np;
+	struct resource r;
+
+	*base = 0;
+
+	/* First look for new style /be nodes */
+	for_each_node_by_name(np, "ioc") {
+		if (of_node_to_nid(np) != nid)
+			continue;
+		if (of_address_to_resource(np, 0, &r)) {
+			printk(KERN_ERR "iommu: can't get address for %pOF\n",
+			       np);
+			continue;
+		}
+		*base = r.start;
+		of_node_put(np);
+		return 0;
+	}
+
+	/* Ok, let's try the old way */
+	for_each_node_by_type(np, "cpu") {
+		const unsigned int *nidp;
+		const unsigned long *tmp;
+
+		nidp = of_get_property(np, "node-id", NULL);
+		if (nidp && *nidp == nid) {
+			tmp = of_get_property(np, "ioc-translation", NULL);
+			if (tmp) {
+				*base = *tmp;
+				of_node_put(np);
+				return 0;
+			}
+		}
+	}
+
+	return -ENODEV;
+}
+
+static void cell_iommu_setup_stab(struct cbe_iommu *iommu,
+				unsigned long dbase, unsigned long dsize,
+				unsigned long fbase, unsigned long fsize)
+{
+	struct page *page;
+	unsigned long segments, stab_size;
+
+	segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT;
+
+	pr_debug("%s: iommu[%d]: segments: %lu\n",
+			__func__, iommu->nid, segments);
+
+	/* set up the segment table */
+	stab_size = segments * sizeof(unsigned long);
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size));
+	BUG_ON(!page);
+	iommu->stab = page_address(page);
+	memset(iommu->stab, 0, stab_size);
+}
+
+static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
+		unsigned long base, unsigned long size, unsigned long gap_base,
+		unsigned long gap_size, unsigned long page_shift)
+{
+	struct page *page;
+	int i;
+	unsigned long reg, segments, pages_per_segment, ptab_size,
+		      n_pte_pages, start_seg, *ptab;
+
+	start_seg = base >> IO_SEGMENT_SHIFT;
+	segments  = size >> IO_SEGMENT_SHIFT;
+	pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift);
+	/* PTEs for each segment must start on a 4K boundary */
+	pages_per_segment = max(pages_per_segment,
+				(1 << 12) / sizeof(unsigned long));
+
+	ptab_size = segments * pages_per_segment * sizeof(unsigned long);
+	pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __func__,
+			iommu->nid, ptab_size, get_order(ptab_size));
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size));
+	BUG_ON(!page);
+
+	ptab = page_address(page);
+	memset(ptab, 0, ptab_size);
+
+	/* number of 4K pages needed for a page table */
+	n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12;
+
+	pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n",
+			__func__, iommu->nid, iommu->stab, ptab,
+			n_pte_pages);
+
+	/* initialise the STEs */
+	reg = IOSTE_V | ((n_pte_pages - 1) << 5);
+
+	switch (page_shift) {
+	case 12: reg |= IOSTE_PS_4K;  break;
+	case 16: reg |= IOSTE_PS_64K; break;
+	case 20: reg |= IOSTE_PS_1M;  break;
+	case 24: reg |= IOSTE_PS_16M; break;
+	default: BUG();
+	}
+
+	gap_base = gap_base >> IO_SEGMENT_SHIFT;
+	gap_size = gap_size >> IO_SEGMENT_SHIFT;
+
+	pr_debug("Setting up IOMMU stab:\n");
+	for (i = start_seg; i < (start_seg + segments); i++) {
+		if (i >= gap_base && i < (gap_base + gap_size)) {
+			pr_debug("\toverlap at %d, skipping\n", i);
+			continue;
+		}
+		iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) *
+					(i - start_seg));
+		pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]);
+	}
+
+	return ptab;
+}
+
+static void cell_iommu_enable_hardware(struct cbe_iommu *iommu)
+{
+	int ret;
+	unsigned long reg, xlate_base;
+	unsigned int virq;
+
+	if (cell_iommu_find_ioc(iommu->nid, &xlate_base))
+		panic("%s: missing IOC register mappings for node %d\n",
+		      __func__, iommu->nid);
+
+	iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size);
+	iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset;
+
+	/* ensure that the STEs have updated */
+	mb();
+
+	/* setup interrupts for the iommu. */
+	reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpStat,
+			reg & ~IOC_IO_ExcpStat_V);
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpMask,
+			IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE);
+
+	virq = irq_create_mapping(NULL,
+			IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT));
+	BUG_ON(!virq);
+
+	ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu);
+	BUG_ON(ret);
+
+	/* set the IOC segment table origin register (and turn on the iommu) */
+	reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW;
+	out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg);
+	in_be64(iommu->xlate_regs + IOC_IOST_Origin);
+
+	/* turn on IO translation */
+	reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE;
+	out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg);
+}
+
+static void cell_iommu_setup_hardware(struct cbe_iommu *iommu,
+	unsigned long base, unsigned long size)
+{
+	cell_iommu_setup_stab(iommu, base, size, 0, 0);
+	iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
+					    IOMMU_PAGE_SHIFT_4K);
+	cell_iommu_enable_hardware(iommu);
+}
+
+#if 0/* Unused for now */
+static struct iommu_window *find_window(struct cbe_iommu *iommu,
+		unsigned long offset, unsigned long size)
+{
+	struct iommu_window *window;
+
+	/* todo: check for overlapping (but not equal) windows) */
+
+	list_for_each_entry(window, &(iommu->windows), list) {
+		if (window->offset == offset && window->size == size)
+			return window;
+	}
+
+	return NULL;
+}
+#endif
+
+static inline u32 cell_iommu_get_ioid(struct device_node *np)
+{
+	const u32 *ioid;
+
+	ioid = of_get_property(np, "ioid", NULL);
+	if (ioid == NULL) {
+		printk(KERN_WARNING "iommu: missing ioid for %pOF using 0\n",
+		       np);
+		return 0;
+	}
+
+	return *ioid;
+}
+
+static struct iommu_table_ops cell_iommu_ops = {
+	.set = tce_build_cell,
+	.clear = tce_free_cell
+};
+
+static struct iommu_window * __init
+cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
+			unsigned long offset, unsigned long size,
+			unsigned long pte_offset)
+{
+	struct iommu_window *window;
+	struct page *page;
+	u32 ioid;
+
+	ioid = cell_iommu_get_ioid(np);
+
+	window = kzalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid);
+	BUG_ON(window == NULL);
+
+	window->offset = offset;
+	window->size = size;
+	window->ioid = ioid;
+	window->iommu = iommu;
+
+	window->table.it_blocksize = 16;
+	window->table.it_base = (unsigned long)iommu->ptab;
+	window->table.it_index = iommu->nid;
+	window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+	window->table.it_offset =
+		(offset >> window->table.it_page_shift) + pte_offset;
+	window->table.it_size = size >> window->table.it_page_shift;
+	window->table.it_ops = &cell_iommu_ops;
+
+	iommu_init_table(&window->table, iommu->nid);
+
+	pr_debug("\tioid      %d\n", window->ioid);
+	pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
+	pr_debug("\tbase      0x%016lx\n", window->table.it_base);
+	pr_debug("\toffset    0x%lx\n", window->table.it_offset);
+	pr_debug("\tsize      %ld\n", window->table.it_size);
+
+	list_add(&window->list, &iommu->windows);
+
+	if (offset != 0)
+		return window;
+
+	/* We need to map and reserve the first IOMMU page since it's used
+	 * by the spider workaround. In theory, we only need to do that when
+	 * running on spider but it doesn't really matter.
+	 *
+	 * This code also assumes that we have a window that starts at 0,
+	 * which is the case on all spider based blades.
+	 */
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0);
+	BUG_ON(!page);
+	iommu->pad_page = page_address(page);
+	clear_page(iommu->pad_page);
+
+	__set_bit(0, window->table.it_map);
+	tce_build_cell(&window->table, window->table.it_offset, 1,
+		       (unsigned long)iommu->pad_page, DMA_TO_DEVICE, 0);
+
+	return window;
+}
+
+static struct cbe_iommu *cell_iommu_for_node(int nid)
+{
+	int i;
+
+	for (i = 0; i < cbe_nr_iommus; i++)
+		if (iommus[i].nid == nid)
+			return &iommus[i];
+	return NULL;
+}
+
+static unsigned long cell_dma_nommu_offset;
+
+static unsigned long dma_iommu_fixed_base;
+
+/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */
+static int iommu_fixed_is_weak;
+
+static struct iommu_table *cell_get_iommu_table(struct device *dev)
+{
+	struct iommu_window *window;
+	struct cbe_iommu *iommu;
+
+	/* Current implementation uses the first window available in that
+	 * node's iommu. We -might- do something smarter later though it may
+	 * never be necessary
+	 */
+	iommu = cell_iommu_for_node(dev_to_node(dev));
+	if (iommu == NULL || list_empty(&iommu->windows)) {
+		dev_err(dev, "iommu: missing iommu for %pOF (node %d)\n",
+		       dev->of_node, dev_to_node(dev));
+		return NULL;
+	}
+	window = list_entry(iommu->windows.next, struct iommu_window, list);
+
+	return &window->table;
+}
+
+/* A coherent allocation implies strong ordering */
+
+static void *dma_fixed_alloc_coherent(struct device *dev, size_t size,
+				      dma_addr_t *dma_handle, gfp_t flag,
+				      unsigned long attrs)
+{
+	if (iommu_fixed_is_weak)
+		return iommu_alloc_coherent(dev, cell_get_iommu_table(dev),
+					    size, dma_handle,
+					    device_to_mask(dev), flag,
+					    dev_to_node(dev));
+	else
+		return dma_nommu_ops.alloc(dev, size, dma_handle, flag,
+					    attrs);
+}
+
+static void dma_fixed_free_coherent(struct device *dev, size_t size,
+				    void *vaddr, dma_addr_t dma_handle,
+				    unsigned long attrs)
+{
+	if (iommu_fixed_is_weak)
+		iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr,
+				    dma_handle);
+	else
+		dma_nommu_ops.free(dev, size, vaddr, dma_handle, attrs);
+}
+
+static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction direction,
+				     unsigned long attrs)
+{
+	if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
+		return dma_nommu_ops.map_page(dev, page, offset, size,
+					       direction, attrs);
+	else
+		return iommu_map_page(dev, cell_get_iommu_table(dev), page,
+				      offset, size, device_to_mask(dev),
+				      direction, attrs);
+}
+
+static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr,
+				 size_t size, enum dma_data_direction direction,
+				 unsigned long attrs)
+{
+	if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
+		dma_nommu_ops.unmap_page(dev, dma_addr, size, direction,
+					  attrs);
+	else
+		iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size,
+				 direction, attrs);
+}
+
+static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
+			   int nents, enum dma_data_direction direction,
+			   unsigned long attrs)
+{
+	if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
+		return dma_nommu_ops.map_sg(dev, sg, nents, direction, attrs);
+	else
+		return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg,
+					nents, device_to_mask(dev),
+					direction, attrs);
+}
+
+static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
+			       int nents, enum dma_data_direction direction,
+			       unsigned long attrs)
+{
+	if (iommu_fixed_is_weak == (attrs & DMA_ATTR_WEAK_ORDERING))
+		dma_nommu_ops.unmap_sg(dev, sg, nents, direction, attrs);
+	else
+		ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents,
+				   direction, attrs);
+}
+
+static int dma_suported_and_switch(struct device *dev, u64 dma_mask);
+
+static const struct dma_map_ops dma_iommu_fixed_ops = {
+	.alloc          = dma_fixed_alloc_coherent,
+	.free           = dma_fixed_free_coherent,
+	.map_sg         = dma_fixed_map_sg,
+	.unmap_sg       = dma_fixed_unmap_sg,
+	.dma_supported  = dma_suported_and_switch,
+	.map_page       = dma_fixed_map_page,
+	.unmap_page     = dma_fixed_unmap_page,
+	.mapping_error	= dma_iommu_mapping_error,
+};
+
+static void cell_dma_dev_setup(struct device *dev)
+{
+	if (get_pci_dma_ops() == &dma_iommu_ops)
+		set_iommu_table_base(dev, cell_get_iommu_table(dev));
+	else if (get_pci_dma_ops() == &dma_nommu_ops)
+		set_dma_offset(dev, cell_dma_nommu_offset);
+	else
+		BUG();
+}
+
+static void cell_pci_dma_dev_setup(struct pci_dev *dev)
+{
+	cell_dma_dev_setup(&dev->dev);
+}
+
+static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
+			      void *data)
+{
+	struct device *dev = data;
+
+	/* We are only intereted in device addition */
+	if (action != BUS_NOTIFY_ADD_DEVICE)
+		return 0;
+
+	/* We use the PCI DMA ops */
+	dev->dma_ops = get_pci_dma_ops();
+
+	cell_dma_dev_setup(dev);
+
+	return 0;
+}
+
+static struct notifier_block cell_of_bus_notifier = {
+	.notifier_call = cell_of_bus_notify
+};
+
+static int __init cell_iommu_get_window(struct device_node *np,
+					 unsigned long *base,
+					 unsigned long *size)
+{
+	const __be32 *dma_window;
+	unsigned long index;
+
+	/* Use ibm,dma-window if available, else, hard code ! */
+	dma_window = of_get_property(np, "ibm,dma-window", NULL);
+	if (dma_window == NULL) {
+		*base = 0;
+		*size = 0x80000000u;
+		return -ENODEV;
+	}
+
+	of_parse_dma_window(np, dma_window, &index, base, size);
+	return 0;
+}
+
+static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
+{
+	struct cbe_iommu *iommu;
+	int nid, i;
+
+	/* Get node ID */
+	nid = of_node_to_nid(np);
+	if (nid < 0) {
+		printk(KERN_ERR "iommu: failed to get node for %pOF\n",
+		       np);
+		return NULL;
+	}
+	pr_debug("iommu: setting up iommu for node %d (%pOF)\n",
+		 nid, np);
+
+	/* XXX todo: If we can have multiple windows on the same IOMMU, which
+	 * isn't the case today, we probably want here to check whether the
+	 * iommu for that node is already setup.
+	 * However, there might be issue with getting the size right so let's
+	 * ignore that for now. We might want to completely get rid of the
+	 * multiple window support since the cell iommu supports per-page ioids
+	 */
+
+	if (cbe_nr_iommus >= NR_IOMMUS) {
+		printk(KERN_ERR "iommu: too many IOMMUs detected ! (%pOF)\n",
+		       np);
+		return NULL;
+	}
+
+	/* Init base fields */
+	i = cbe_nr_iommus++;
+	iommu = &iommus[i];
+	iommu->stab = NULL;
+	iommu->nid = nid;
+	snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i);
+	INIT_LIST_HEAD(&iommu->windows);
+
+	return iommu;
+}
+
+static void __init cell_iommu_init_one(struct device_node *np,
+				       unsigned long offset)
+{
+	struct cbe_iommu *iommu;
+	unsigned long base, size;
+
+	iommu = cell_iommu_alloc(np);
+	if (!iommu)
+		return;
+
+	/* Obtain a window for it */
+	cell_iommu_get_window(np, &base, &size);
+
+	pr_debug("\ttranslating window 0x%lx...0x%lx\n",
+		 base, base + size - 1);
+
+	/* Initialize the hardware */
+	cell_iommu_setup_hardware(iommu, base, size);
+
+	/* Setup the iommu_table */
+	cell_iommu_setup_window(iommu, np, base, size,
+				offset >> IOMMU_PAGE_SHIFT_4K);
+}
+
+static void __init cell_disable_iommus(void)
+{
+	int node;
+	unsigned long base, val;
+	void __iomem *xregs, *cregs;
+
+	/* Make sure IOC translation is disabled on all nodes */
+	for_each_online_node(node) {
+		if (cell_iommu_find_ioc(node, &base))
+			continue;
+		xregs = ioremap(base, IOC_Reg_Size);
+		if (xregs == NULL)
+			continue;
+		cregs = xregs + IOC_IOCmd_Offset;
+
+		pr_debug("iommu: cleaning up iommu on node %d\n", node);
+
+		out_be64(xregs + IOC_IOST_Origin, 0);
+		(void)in_be64(xregs + IOC_IOST_Origin);
+		val = in_be64(cregs + IOC_IOCmd_Cfg);
+		val &= ~IOC_IOCmd_Cfg_TE;
+		out_be64(cregs + IOC_IOCmd_Cfg, val);
+		(void)in_be64(cregs + IOC_IOCmd_Cfg);
+
+		iounmap(xregs);
+	}
+}
+
+static int __init cell_iommu_init_disabled(void)
+{
+	struct device_node *np = NULL;
+	unsigned long base = 0, size;
+
+	/* When no iommu is present, we use direct DMA ops */
+	set_pci_dma_ops(&dma_nommu_ops);
+
+	/* First make sure all IOC translation is turned off */
+	cell_disable_iommus();
+
+	/* If we have no Axon, we set up the spider DMA magic offset */
+	if (of_find_node_by_name(NULL, "axon") == NULL)
+		cell_dma_nommu_offset = SPIDER_DMA_OFFSET;
+
+	/* Now we need to check to see where the memory is mapped
+	 * in PCI space. We assume that all busses use the same dma
+	 * window which is always the case so far on Cell, thus we
+	 * pick up the first pci-internal node we can find and check
+	 * the DMA window from there.
+	 */
+	for_each_node_by_name(np, "axon") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		if (cell_iommu_get_window(np, &base, &size) == 0)
+			break;
+	}
+	if (np == NULL) {
+		for_each_node_by_name(np, "pci-internal") {
+			if (np->parent == NULL || np->parent->parent != NULL)
+				continue;
+			if (cell_iommu_get_window(np, &base, &size) == 0)
+				break;
+		}
+	}
+	of_node_put(np);
+
+	/* If we found a DMA window, we check if it's big enough to enclose
+	 * all of physical memory. If not, we force enable IOMMU
+	 */
+	if (np && size < memblock_end_of_DRAM()) {
+		printk(KERN_WARNING "iommu: force-enabled, dma window"
+		       " (%ldMB) smaller than total memory (%lldMB)\n",
+		       size >> 20, memblock_end_of_DRAM() >> 20);
+		return -ENODEV;
+	}
+
+	cell_dma_nommu_offset += base;
+
+	if (cell_dma_nommu_offset != 0)
+		cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
+
+	printk("iommu: disabled, direct DMA offset is 0x%lx\n",
+	       cell_dma_nommu_offset);
+
+	return 0;
+}
+
+/*
+ *  Fixed IOMMU mapping support
+ *
+ *  This code adds support for setting up a fixed IOMMU mapping on certain
+ *  cell machines. For 64-bit devices this avoids the performance overhead of
+ *  mapping and unmapping pages at runtime. 32-bit devices are unable to use
+ *  the fixed mapping.
+ *
+ *  The fixed mapping is established at boot, and maps all of physical memory
+ *  1:1 into device space at some offset. On machines with < 30 GB of memory
+ *  we setup the fixed mapping immediately above the normal IOMMU window.
+ *
+ *  For example a machine with 4GB of memory would end up with the normal
+ *  IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In
+ *  this case a 64-bit device wishing to DMA to 1GB would be told to DMA to
+ *  3GB, plus any offset required by firmware. The firmware offset is encoded
+ *  in the "dma-ranges" property.
+ *
+ *  On machines with 30GB or more of memory, we are unable to place the fixed
+ *  mapping above the normal IOMMU window as we would run out of address space.
+ *  Instead we move the normal IOMMU window to coincide with the hash page
+ *  table, this region does not need to be part of the fixed mapping as no
+ *  device should ever be DMA'ing to it. We then setup the fixed mapping
+ *  from 0 to 32GB.
+ */
+
+static u64 cell_iommu_get_fixed_address(struct device *dev)
+{
+	u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR;
+	struct device_node *np;
+	const u32 *ranges = NULL;
+	int i, len, best, naddr, nsize, pna, range_size;
+
+	np = of_node_get(dev->of_node);
+	while (1) {
+		naddr = of_n_addr_cells(np);
+		nsize = of_n_size_cells(np);
+		np = of_get_next_parent(np);
+		if (!np)
+			break;
+
+		ranges = of_get_property(np, "dma-ranges", &len);
+
+		/* Ignore empty ranges, they imply no translation required */
+		if (ranges && len > 0)
+			break;
+	}
+
+	if (!ranges) {
+		dev_dbg(dev, "iommu: no dma-ranges found\n");
+		goto out;
+	}
+
+	len /= sizeof(u32);
+
+	pna = of_n_addr_cells(np);
+	range_size = naddr + nsize + pna;
+
+	/* dma-ranges format:
+	 * child addr	: naddr cells
+	 * parent addr	: pna cells
+	 * size		: nsize cells
+	 */
+	for (i = 0, best = -1, best_size = 0; i < len; i += range_size) {
+		cpu_addr = of_translate_dma_address(np, ranges + i + naddr);
+		size = of_read_number(ranges + i + naddr + pna, nsize);
+
+		if (cpu_addr == 0 && size > best_size) {
+			best = i;
+			best_size = size;
+		}
+	}
+
+	if (best >= 0) {
+		dev_addr = of_read_number(ranges + best, naddr);
+	} else
+		dev_dbg(dev, "iommu: no suitable range found!\n");
+
+out:
+	of_node_put(np);
+
+	return dev_addr;
+}
+
+static int dma_suported_and_switch(struct device *dev, u64 dma_mask)
+{
+	if (dma_mask == DMA_BIT_MASK(64) &&
+	    cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) {
+		u64 addr = cell_iommu_get_fixed_address(dev) +
+			dma_iommu_fixed_base;
+		dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+		dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
+		set_dma_ops(dev, &dma_iommu_fixed_ops);
+		set_dma_offset(dev, addr);
+		return 1;
+	}
+
+	if (dma_iommu_dma_supported(dev, dma_mask)) {
+		dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+		set_dma_ops(dev, get_pci_dma_ops());
+		cell_dma_dev_setup(dev);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
+			   unsigned long base_pte)
+{
+	unsigned long segment, offset;
+
+	segment = addr >> IO_SEGMENT_SHIFT;
+	offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24));
+	ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long));
+
+	pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n",
+		  addr, ptab, segment, offset);
+
+	ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask);
+}
+
+static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu,
+	struct device_node *np, unsigned long dbase, unsigned long dsize,
+	unsigned long fbase, unsigned long fsize)
+{
+	unsigned long base_pte, uaddr, ioaddr, *ptab;
+
+	ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize, 24);
+
+	dma_iommu_fixed_base = fbase;
+
+	pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase);
+
+	base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+		(cell_iommu_get_ioid(np) & CBE_IOPTE_IOID_Mask);
+
+	if (iommu_fixed_is_weak)
+		pr_info("IOMMU: Using weak ordering for fixed mapping\n");
+	else {
+		pr_info("IOMMU: Using strong ordering for fixed mapping\n");
+		base_pte |= CBE_IOPTE_SO_RW;
+	}
+
+	for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) {
+		/* Don't touch the dynamic region */
+		ioaddr = uaddr + fbase;
+		if (ioaddr >= dbase && ioaddr < (dbase + dsize)) {
+			pr_debug("iommu: fixed/dynamic overlap, skipping\n");
+			continue;
+		}
+
+		insert_16M_pte(uaddr, ptab, base_pte);
+	}
+
+	mb();
+}
+
+static int __init cell_iommu_fixed_mapping_init(void)
+{
+	unsigned long dbase, dsize, fbase, fsize, hbase, hend;
+	struct cbe_iommu *iommu;
+	struct device_node *np;
+
+	/* The fixed mapping is only supported on axon machines */
+	np = of_find_node_by_name(NULL, "axon");
+	of_node_put(np);
+
+	if (!np) {
+		pr_debug("iommu: fixed mapping disabled, no axons found\n");
+		return -1;
+	}
+
+	/* We must have dma-ranges properties for fixed mapping to work */
+	np = of_find_node_with_property(NULL, "dma-ranges");
+	of_node_put(np);
+
+	if (!np) {
+		pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
+		return -1;
+	}
+
+	/* The default setup is to have the fixed mapping sit after the
+	 * dynamic region, so find the top of the largest IOMMU window
+	 * on any axon, then add the size of RAM and that's our max value.
+	 * If that is > 32GB we have to do other shennanigans.
+	 */
+	fbase = 0;
+	for_each_node_by_name(np, "axon") {
+		cell_iommu_get_window(np, &dbase, &dsize);
+		fbase = max(fbase, dbase + dsize);
+	}
+
+	fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT);
+	fsize = memblock_phys_mem_size();
+
+	if ((fbase + fsize) <= 0x800000000ul)
+		hbase = 0; /* use the device tree window */
+	else {
+		/* If we're over 32 GB we need to cheat. We can't map all of
+		 * RAM with the fixed mapping, and also fit the dynamic
+		 * region. So try to place the dynamic region where the hash
+		 * table sits, drivers never need to DMA to it, we don't
+		 * need a fixed mapping for that area.
+		 */
+		if (!htab_address) {
+			pr_debug("iommu: htab is NULL, on LPAR? Huh?\n");
+			return -1;
+		}
+		hbase = __pa(htab_address);
+		hend  = hbase + htab_size_bytes;
+
+		/* The window must start and end on a segment boundary */
+		if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) ||
+		    (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) {
+			pr_debug("iommu: hash window not segment aligned\n");
+			return -1;
+		}
+
+		/* Check the hash window fits inside the real DMA window */
+		for_each_node_by_name(np, "axon") {
+			cell_iommu_get_window(np, &dbase, &dsize);
+
+			if (hbase < dbase || (hend > (dbase + dsize))) {
+				pr_debug("iommu: hash window doesn't fit in"
+					 "real DMA window\n");
+				of_node_put(np);
+				return -1;
+			}
+		}
+
+		fbase = 0;
+	}
+
+	/* Setup the dynamic regions */
+	for_each_node_by_name(np, "axon") {
+		iommu = cell_iommu_alloc(np);
+		BUG_ON(!iommu);
+
+		if (hbase == 0)
+			cell_iommu_get_window(np, &dbase, &dsize);
+		else {
+			dbase = hbase;
+			dsize = htab_size_bytes;
+		}
+
+		printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
+			"fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
+			 dbase + dsize, fbase, fbase + fsize);
+
+		cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
+		iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
+						    IOMMU_PAGE_SHIFT_4K);
+		cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
+					     fbase, fsize);
+		cell_iommu_enable_hardware(iommu);
+		cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
+	}
+
+	dma_iommu_ops.dma_supported = dma_suported_and_switch;
+	set_pci_dma_ops(&dma_iommu_ops);
+
+	return 0;
+}
+
+static int iommu_fixed_disabled;
+
+static int __init setup_iommu_fixed(char *str)
+{
+	struct device_node *pciep;
+
+	if (strcmp(str, "off") == 0)
+		iommu_fixed_disabled = 1;
+
+	/* If we can find a pcie-endpoint in the device tree assume that
+	 * we're on a triblade or a CAB so by default the fixed mapping
+	 * should be set to be weakly ordered; but only if the boot
+	 * option WASN'T set for strong ordering
+	 */
+	pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+	if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
+		iommu_fixed_is_weak = DMA_ATTR_WEAK_ORDERING;
+
+	of_node_put(pciep);
+
+	return 1;
+}
+__setup("iommu_fixed=", setup_iommu_fixed);
+
+static u64 cell_dma_get_required_mask(struct device *dev)
+{
+	const struct dma_map_ops *dma_ops;
+
+	if (!dev->dma_mask)
+		return 0;
+
+	if (!iommu_fixed_disabled &&
+			cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+		return DMA_BIT_MASK(64);
+
+	dma_ops = get_dma_ops(dev);
+	if (dma_ops->get_required_mask)
+		return dma_ops->get_required_mask(dev);
+
+	WARN_ONCE(1, "no get_required_mask in %p ops", dma_ops);
+
+	return DMA_BIT_MASK(64);
+}
+
+static int __init cell_iommu_init(void)
+{
+	struct device_node *np;
+
+	/* If IOMMU is disabled or we have little enough RAM to not need
+	 * to enable it, we setup a direct mapping.
+	 *
+	 * Note: should we make sure we have the IOMMU actually disabled ?
+	 */
+	if (iommu_is_off ||
+	    (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull))
+		if (cell_iommu_init_disabled() == 0)
+			goto bail;
+
+	/* Setup various callbacks */
+	cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
+	ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
+
+	if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
+		goto bail;
+
+	/* Create an iommu for each /axon node.  */
+	for_each_node_by_name(np, "axon") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		cell_iommu_init_one(np, 0);
+	}
+
+	/* Create an iommu for each toplevel /pci-internal node for
+	 * old hardware/firmware
+	 */
+	for_each_node_by_name(np, "pci-internal") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
+	}
+
+	/* Setup default PCI iommu ops */
+	set_pci_dma_ops(&dma_iommu_ops);
+
+ bail:
+	/* Register callbacks on OF platform device addition/removal
+	 * to handle linking them to the right DMA operations
+	 */
+	bus_register_notifier(&platform_bus_type, &cell_of_bus_notifier);
+
+	return 0;
+}
+machine_arch_initcall(cell, cell_iommu_init);
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
new file mode 100644
index 000000000..97ac2622e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -0,0 +1,138 @@
+/*
+ * CBE Pervasive Monitor and Debug
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ *          Michael N. Day (mnday@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/pgtable.h>
+#include <asm/reg.h>
+#include <asm/cell-regs.h>
+#include <asm/cpu_has_feature.h>
+
+#include "pervasive.h"
+
+static void cbe_power_save(void)
+{
+	unsigned long ctrl, thread_switch_control;
+
+	/* Ensure our interrupt state is properly tracked */
+	if (!prep_irq_for_idle())
+		return;
+
+	ctrl = mfspr(SPRN_CTRLF);
+
+	/* Enable DEC and EE interrupt request */
+	thread_switch_control  = mfspr(SPRN_TSC_CELL);
+	thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST;
+
+	switch (ctrl & CTRL_CT) {
+	case CTRL_CT0:
+		thread_switch_control |= TSC_CELL_DEC_ENABLE_0;
+		break;
+	case CTRL_CT1:
+		thread_switch_control |= TSC_CELL_DEC_ENABLE_1;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unknown configuration\n",
+			__func__);
+		break;
+	}
+	mtspr(SPRN_TSC_CELL, thread_switch_control);
+
+	/*
+	 * go into low thread priority, medium priority will be
+	 * restored for us after wake-up.
+	 */
+	HMT_low();
+
+	/*
+	 * atomically disable thread execution and runlatch.
+	 * External and Decrementer exceptions are still handled when the
+	 * thread is disabled but now enter in cbe_system_reset_exception()
+	 */
+	ctrl &= ~(CTRL_RUNLATCH | CTRL_TE);
+	mtspr(SPRN_CTRLT, ctrl);
+
+	/* Re-enable interrupts in MSR */
+	__hard_irq_enable();
+}
+
+static int cbe_system_reset_exception(struct pt_regs *regs)
+{
+	switch (regs->msr & SRR1_WAKEMASK) {
+	case SRR1_WAKEDEC:
+		set_dec(1);
+		break;
+	case SRR1_WAKEEE:
+		/*
+		 * Handle these when interrupts get re-enabled and we take
+		 * them as regular exceptions. We are in an NMI context
+		 * and can't handle these here.
+		 */
+		break;
+	case SRR1_WAKEMT:
+		return cbe_sysreset_hack();
+#ifdef CONFIG_CBE_RAS
+	case SRR1_WAKESYSERR:
+		cbe_system_error_exception(regs);
+		break;
+	case SRR1_WAKETHERM:
+		cbe_thermal_exception(regs);
+		break;
+#endif /* CONFIG_CBE_RAS */
+	default:
+		/* do system reset */
+		return 0;
+	}
+	/* everything handled */
+	return 1;
+}
+
+void __init cbe_pervasive_init(void)
+{
+	int cpu;
+
+	if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu);
+		if (!regs)
+			continue;
+
+		 /* Enable Pause(0) control bit */
+		out_be64(&regs->pmcr, in_be64(&regs->pmcr) |
+					    CBE_PMD_PAUSE_ZERO_CONTROL);
+	}
+
+	ppc_md.power_save = cbe_power_save;
+	ppc_md.system_reset_exception = cbe_system_reset_exception;
+}
diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h
new file mode 100644
index 000000000..fd4d7b709
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.h
@@ -0,0 +1,42 @@
+/*
+ * Cell Pervasive Monitor and Debug interface and HW structures
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ *          David J. Erb (djerb@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#ifndef PERVASIVE_H
+#define PERVASIVE_H
+
+extern void cbe_pervasive_init(void);
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+extern int cbe_sysreset_hack(void);
+#else
+static inline int cbe_sysreset_hack(void)
+{
+	return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+#endif
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
new file mode 100644
index 000000000..e3ad0c38f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -0,0 +1,424 @@
+/*
+ * Cell Broadband Engine Performance Monitor
+ *
+ * (C) Copyright IBM Corporation 2001,2006
+ *
+ * Author:
+ *    David Erb (djerb@us.ibm.com)
+ *    Kevin Corry (kevcorry@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+#include <asm/machdep.h>
+#include <asm/pmc.h>
+#include <asm/reg.h>
+#include <asm/spu.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/*
+ * When writing to write-only mmio addresses, save a shadow copy. All of the
+ * registers are 32-bit, but stored in the upper-half of a 64-bit field in
+ * pmd_regs.
+ */
+
+#define WRITE_WO_MMIO(reg, x)					\
+	do {							\
+		u32 _x = (x);					\
+		struct cbe_pmd_regs __iomem *pmd_regs;		\
+		struct cbe_pmd_shadow_regs *shadow_regs;	\
+		pmd_regs = cbe_get_cpu_pmd_regs(cpu);		\
+		shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);	\
+		out_be64(&(pmd_regs->reg), (((u64)_x) << 32));	\
+		shadow_regs->reg = _x;				\
+	} while (0)
+
+#define READ_SHADOW_REG(val, reg)				\
+	do {							\
+		struct cbe_pmd_shadow_regs *shadow_regs;	\
+		shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);	\
+		(val) = shadow_regs->reg;			\
+	} while (0)
+
+#define READ_MMIO_UPPER32(val, reg)				\
+	do {							\
+		struct cbe_pmd_regs __iomem *pmd_regs;		\
+		pmd_regs = cbe_get_cpu_pmd_regs(cpu);		\
+		(val) = (u32)(in_be64(&pmd_regs->reg) >> 32);	\
+	} while (0)
+
+/*
+ * Physical counter registers.
+ * Each physical counter can act as one 32-bit counter or two 16-bit counters.
+ */
+
+u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr)
+{
+	u32 val_in_latch, val = 0;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		READ_SHADOW_REG(val_in_latch, counter_value_in_latch);
+
+		/* Read the latch or the actual counter, whichever is newer. */
+		if (val_in_latch & (1 << phys_ctr)) {
+			READ_SHADOW_REG(val, pm_ctr[phys_ctr]);
+		} else {
+			READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]);
+		}
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_phys_ctr);
+
+void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val)
+{
+	struct cbe_pmd_shadow_regs *shadow_regs;
+	u32 pm_ctrl;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		/* Writing to a counter only writes to a hardware latch.
+		 * The new value is not propagated to the actual counter
+		 * until the performance monitor is enabled.
+		 */
+		WRITE_WO_MMIO(pm_ctr[phys_ctr], val);
+
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) {
+			/* The counters are already active, so we need to
+			 * rewrite the pm_control register to "re-enable"
+			 * the PMU.
+			 */
+			cbe_write_pm(cpu, pm_control, pm_ctrl);
+		} else {
+			shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+			shadow_regs->counter_value_in_latch |= (1 << phys_ctr);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_write_phys_ctr);
+
+/*
+ * "Logical" counter registers.
+ * These will read/write 16-bits or 32-bits depending on the
+ * current size of the counter. Counters 4 - 7 are always 16-bit.
+ */
+
+u32 cbe_read_ctr(u32 cpu, u32 ctr)
+{
+	u32 val;
+	u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+	val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+	if (cbe_get_ctr_size(cpu, phys_ctr) == 16)
+		val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_ctr);
+
+void cbe_write_ctr(u32 cpu, u32 ctr, u32 val)
+{
+	u32 phys_ctr;
+	u32 phys_val;
+
+	phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+	if (cbe_get_ctr_size(cpu, phys_ctr) == 16) {
+		phys_val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+		if (ctr < NR_PHYS_CTRS)
+			val = (val << 16) | (phys_val & 0xffff);
+		else
+			val = (val & 0xffff) | (phys_val & 0xffff0000);
+	}
+
+	cbe_write_phys_ctr(cpu, phys_ctr, val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_ctr);
+
+/*
+ * Counter-control registers.
+ * Each "logical" counter has a corresponding control register.
+ */
+
+u32 cbe_read_pm07_control(u32 cpu, u32 ctr)
+{
+	u32 pm07_control = 0;
+
+	if (ctr < NR_CTRS)
+		READ_SHADOW_REG(pm07_control, pm07_control[ctr]);
+
+	return pm07_control;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm07_control);
+
+void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val)
+{
+	if (ctr < NR_CTRS)
+		WRITE_WO_MMIO(pm07_control[ctr], val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm07_control);
+
+/*
+ * Other PMU control registers. Most of these are write-only.
+ */
+
+u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg)
+{
+	u32 val = 0;
+
+	switch (reg) {
+	case group_control:
+		READ_SHADOW_REG(val, group_control);
+		break;
+
+	case debug_bus_control:
+		READ_SHADOW_REG(val, debug_bus_control);
+		break;
+
+	case trace_address:
+		READ_MMIO_UPPER32(val, trace_address);
+		break;
+
+	case ext_tr_timer:
+		READ_SHADOW_REG(val, ext_tr_timer);
+		break;
+
+	case pm_status:
+		READ_MMIO_UPPER32(val, pm_status);
+		break;
+
+	case pm_control:
+		READ_SHADOW_REG(val, pm_control);
+		break;
+
+	case pm_interval:
+		READ_MMIO_UPPER32(val, pm_interval);
+		break;
+
+	case pm_start_stop:
+		READ_SHADOW_REG(val, pm_start_stop);
+		break;
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm);
+
+void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val)
+{
+	switch (reg) {
+	case group_control:
+		WRITE_WO_MMIO(group_control, val);
+		break;
+
+	case debug_bus_control:
+		WRITE_WO_MMIO(debug_bus_control, val);
+		break;
+
+	case trace_address:
+		WRITE_WO_MMIO(trace_address, val);
+		break;
+
+	case ext_tr_timer:
+		WRITE_WO_MMIO(ext_tr_timer, val);
+		break;
+
+	case pm_status:
+		WRITE_WO_MMIO(pm_status, val);
+		break;
+
+	case pm_control:
+		WRITE_WO_MMIO(pm_control, val);
+		break;
+
+	case pm_interval:
+		WRITE_WO_MMIO(pm_interval, val);
+		break;
+
+	case pm_start_stop:
+		WRITE_WO_MMIO(pm_start_stop, val);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm);
+
+/*
+ * Get/set the size of a physical counter to either 16 or 32 bits.
+ */
+
+u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr)
+{
+	u32 pm_ctrl, size = 0;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(cbe_get_ctr_size);
+
+void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size)
+{
+	u32 pm_ctrl;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		switch (ctr_size) {
+		case 16:
+			pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr);
+			break;
+
+		case 32:
+			pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr);
+			break;
+		}
+		cbe_write_pm(cpu, pm_control, pm_ctrl);
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_set_ctr_size);
+
+/*
+ * Enable/disable the entire performance monitoring unit.
+ * When we enable the PMU, all pending writes to counters get committed.
+ */
+
+void cbe_enable_pm(u32 cpu)
+{
+	struct cbe_pmd_shadow_regs *shadow_regs;
+	u32 pm_ctrl;
+
+	shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+	shadow_regs->counter_value_in_latch = 0;
+
+	pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON;
+	cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm);
+
+void cbe_disable_pm(u32 cpu)
+{
+	u32 pm_ctrl;
+	pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON;
+	cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm);
+
+/*
+ * Reading from the trace_buffer.
+ * The trace buffer is two 64-bit registers. Reading from
+ * the second half automatically increments the trace_address.
+ */
+
+void cbe_read_trace_buffer(u32 cpu, u64 *buf)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+	*buf++ = in_be64(&pmd_regs->trace_buffer_0_63);
+	*buf++ = in_be64(&pmd_regs->trace_buffer_64_127);
+}
+EXPORT_SYMBOL_GPL(cbe_read_trace_buffer);
+
+/*
+ * Enabling/disabling interrupts for the entire performance monitoring unit.
+ */
+
+u32 cbe_get_and_clear_pm_interrupts(u32 cpu)
+{
+	/* Reading pm_status clears the interrupt bits. */
+	return cbe_read_pm(cpu, pm_status);
+}
+EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts);
+
+void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask)
+{
+	/* Set which node and thread will handle the next interrupt. */
+	iic_set_interrupt_routing(cpu, thread, 0);
+
+	/* Enable the interrupt bits in the pm_status register. */
+	if (mask)
+		cbe_write_pm(cpu, pm_status, mask);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts);
+
+void cbe_disable_pm_interrupts(u32 cpu)
+{
+	cbe_get_and_clear_pm_interrupts(cpu);
+	cbe_write_pm(cpu, pm_status, 0);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts);
+
+static irqreturn_t cbe_pm_irq(int irq, void *dev_id)
+{
+	perf_irq(get_irq_regs());
+	return IRQ_HANDLED;
+}
+
+static int __init cbe_init_pm_irq(void)
+{
+	unsigned int irq;
+	int rc, node;
+
+	for_each_online_node(node) {
+		irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI |
+					       (node << IIC_IRQ_NODE_SHIFT));
+		if (!irq) {
+			printk("ERROR: Unable to allocate irq for node %d\n",
+			       node);
+			return -EINVAL;
+		}
+
+		rc = request_irq(irq, cbe_pm_irq,
+				 0, "cbe-pmu-0", NULL);
+		if (rc) {
+			printk("ERROR: Request for irq on node %d failed\n",
+			       node);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+machine_arch_initcall(cell, cbe_init_pm_irq);
+
+void cbe_sync_irq(int node)
+{
+	unsigned int irq;
+
+	irq = irq_find_mapping(NULL,
+			       IIC_IRQ_IOEX_PMI
+			       | (node << IIC_IRQ_NODE_SHIFT));
+
+	if (!irq) {
+		printk(KERN_WARNING "ERROR, unable to get existing irq %d " \
+		"for node %d\n", irq, node);
+		return;
+	}
+
+	synchronize_irq(irq);
+}
+EXPORT_SYMBOL_GPL(cbe_sync_irq);
+
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
new file mode 100644
index 000000000..2f704afe9
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright 2006-2008, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+
+#include <asm/kexec.h>
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "ras.h"
+
+
+static void dump_fir(int cpu)
+{
+	struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
+	struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
+
+	if (pregs == NULL)
+		return;
+
+	/* Todo: do some nicer parsing of bits and based on them go down
+	 * to other sub-units FIRs and not only IIC
+	 */
+	printk(KERN_ERR "Global Checkstop FIR    : 0x%016llx\n",
+	       in_be64(&pregs->checkstop_fir));
+	printk(KERN_ERR "Global Recoverable FIR  : 0x%016llx\n",
+	       in_be64(&pregs->checkstop_fir));
+	printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n",
+	       in_be64(&pregs->spec_att_mchk_fir));
+
+	if (iregs == NULL)
+		return;
+	printk(KERN_ERR "IOC FIR                 : 0x%016llx\n",
+	       in_be64(&iregs->ioc_fir));
+
+}
+
+void cbe_system_error_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
+	dump_fir(cpu);
+	dump_stack();
+}
+
+void cbe_maintenance_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Nothing implemented for the maintenance interrupt at this point
+	 */
+
+	printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
+	dump_stack();
+}
+
+void cbe_thermal_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Nothing implemented for the thermal interrupt at this point
+	 */
+
+	printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
+	dump_stack();
+}
+
+static int cbe_machine_check_handler(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
+	dump_fir(cpu);
+
+	/* No recovery from this code now, lets continue */
+	return 0;
+}
+
+struct ptcal_area {
+	struct list_head list;
+	int nid;
+	int order;
+	struct page *pages;
+};
+
+static LIST_HEAD(ptcal_list);
+
+static int ptcal_start_tok, ptcal_stop_tok;
+
+static int __init cbe_ptcal_enable_on_node(int nid, int order)
+{
+	struct ptcal_area *area;
+	int ret = -ENOMEM;
+	unsigned long addr;
+
+	if (is_kdump_kernel())
+		rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
+
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		goto out_err;
+
+	area->nid = nid;
+	area->order = order;
+	area->pages = __alloc_pages_node(area->nid,
+						GFP_KERNEL|__GFP_THISNODE,
+						area->order);
+
+	if (!area->pages) {
+		printk(KERN_WARNING "%s: no page on node %d\n",
+			__func__, area->nid);
+		goto out_free_area;
+	}
+
+	/*
+	 * We move the ptcal area to the middle of the allocated
+	 * page, in order to avoid prefetches in memcpy and similar
+	 * functions stepping on it.
+	 */
+	addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1);
+	printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n",
+			__func__, area->nid, addr);
+
+	ret = -EIO;
+	if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
+				(unsigned int)(addr >> 32),
+				(unsigned int)(addr & 0xffffffff))) {
+		printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
+				__func__, nid);
+		goto out_free_pages;
+	}
+
+	list_add(&area->list, &ptcal_list);
+
+	return 0;
+
+out_free_pages:
+	__free_pages(area->pages, area->order);
+out_free_area:
+	kfree(area);
+out_err:
+	return ret;
+}
+
+static int __init cbe_ptcal_enable(void)
+{
+	const u32 *size;
+	struct device_node *np;
+	int order, found_mic = 0;
+
+	np = of_find_node_by_path("/rtas");
+	if (!np)
+		return -ENODEV;
+
+	size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
+	if (!size) {
+		of_node_put(np);
+		return -ENODEV;
+	}
+
+	pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size);
+	order = get_order(*size);
+	of_node_put(np);
+
+	/* support for malta device trees, with be@/mic@ nodes */
+	for_each_node_by_type(np, "mic-tm") {
+		cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
+		found_mic = 1;
+	}
+
+	if (found_mic)
+		return 0;
+
+	/* support for older device tree - use cpu nodes */
+	for_each_node_by_type(np, "cpu") {
+		const u32 *nid = of_get_property(np, "node-id", NULL);
+		if (!nid) {
+			printk(KERN_ERR "%s: node %pOF is missing node-id?\n",
+					__func__, np);
+			continue;
+		}
+		cbe_ptcal_enable_on_node(*nid, order);
+		found_mic = 1;
+	}
+
+	return found_mic ? 0 : -ENODEV;
+}
+
+static int cbe_ptcal_disable(void)
+{
+	struct ptcal_area *area, *tmp;
+	int ret = 0;
+
+	pr_debug("%s: disabling PTCAL\n", __func__);
+
+	list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
+		/* disable ptcal on this node */
+		if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
+			printk(KERN_ERR "%s: error disabling PTCAL "
+					"on node %d!\n", __func__,
+					area->nid);
+			ret = -EIO;
+			continue;
+		}
+
+		/* ensure we can access the PTCAL area */
+		memset(page_address(area->pages), 0,
+				1 << (area->order + PAGE_SHIFT));
+
+		/* clean up */
+		list_del(&area->list);
+		__free_pages(area->pages, area->order);
+		kfree(area);
+	}
+
+	return ret;
+}
+
+static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
+		unsigned long code, void *data)
+{
+	return cbe_ptcal_disable();
+}
+
+static void cbe_ptcal_crash_shutdown(void)
+{
+	cbe_ptcal_disable();
+}
+
+static struct notifier_block cbe_ptcal_reboot_notifier = {
+	.notifier_call = cbe_ptcal_notify_reboot
+};
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+static int sysreset_hack;
+
+static int __init cbe_sysreset_init(void)
+{
+	struct cbe_pmd_regs __iomem *regs;
+
+	sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0");
+	if (!sysreset_hack)
+		return 0;
+
+	regs = cbe_get_cpu_pmd_regs(0);
+	if (!regs)
+		return 0;
+
+	/* Enable JTAG system-reset hack */
+	out_be32(&regs->fir_mode_reg,
+		in_be32(&regs->fir_mode_reg) |
+		CBE_PMD_FIR_MODE_M8);
+
+	return 0;
+}
+device_initcall(cbe_sysreset_init);
+
+int cbe_sysreset_hack(void)
+{
+	struct cbe_pmd_regs __iomem *regs;
+
+	/*
+	 * The BMC can inject user triggered system reset exceptions,
+	 * but cannot set the system reset reason in srr1,
+	 * so check an extra register here.
+	 */
+	if (sysreset_hack && (smp_processor_id() == 0)) {
+		regs = cbe_get_cpu_pmd_regs(0);
+		if (!regs)
+			return 0;
+		if (in_be64(&regs->ras_esc_0) & 0x0000ffff) {
+			out_be64(&regs->ras_esc_0, 0);
+			return 0;
+		}
+	}
+	return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+static int __init cbe_ptcal_init(void)
+{
+	int ret;
+	ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal");
+	ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal");
+
+	if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
+			|| ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
+	if (ret)
+		goto out1;
+
+	ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown);
+	if (ret)
+		goto out2;
+
+	return cbe_ptcal_enable();
+
+out2:
+	unregister_reboot_notifier(&cbe_ptcal_reboot_notifier);
+out1:
+	printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
+	return ret;
+}
+
+arch_initcall(cbe_ptcal_init);
+
+void __init cbe_ras_init(void)
+{
+	unsigned long hid0;
+
+	/*
+	 * Enable System Error & thermal interrupts and wakeup conditions
+	 */
+
+	hid0 = mfspr(SPRN_HID0);
+	hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
+		HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
+	mtspr(SPRN_HID0, hid0);
+	mb();
+
+	/*
+	 * Install machine check handler. Leave setting of precise mode to
+	 * what the firmware did for now
+	 */
+	ppc_md.machine_check_exception = cbe_machine_check_handler;
+	mb();
+
+	/*
+	 * For now, we assume that IOC_FIR is already set to forward some
+	 * error conditions to the System Error handler. If that is not true
+	 * then it will have to be fixed up here.
+	 */
+}
diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h
new file mode 100644
index 000000000..6c2e6bc00
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef RAS_H
+#define RAS_H
+
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+extern void cbe_ras_init(void);
+
+#endif /* RAS_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
new file mode 100644
index 000000000..7d31b8d14
--- /dev/null
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -0,0 +1,282 @@
+/*
+ *  linux/arch/powerpc/platforms/cell/cell_setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified by PPC64 Team, IBM Corp
+ *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#undef DEBUG
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/export.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/cputable.h>
+#include <asm/ppc-pci.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
+
+#include "cell.h"
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static void cell_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: CHRP %s\n", model);
+	of_node_put(root);
+}
+
+static void cell_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+	const char *s;
+	int i;
+
+	if (!machine_is(cell))
+		return;
+
+	/* We're searching for a direct child of the PHB */
+	if (dev->bus->self != NULL || dev->devfn != 0)
+		return;
+
+	hose = pci_bus_to_host(dev->bus);
+	if (hose == NULL)
+		return;
+
+	/* Only on PCIE */
+	if (!of_device_is_compatible(hose->dn, "pciex"))
+		return;
+
+	/* And only on axon */
+	s = of_get_property(hose->dn, "model", NULL);
+	if (!s || strcmp(s, "Axon") != 0)
+		return;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+		dev->resource[i].start = dev->resource[i].end = 0;
+		dev->resource[i].flags = 0;
+	}
+
+	printk(KERN_DEBUG "PCI: Hiding resources on Axon PCIE RC %s\n",
+	       pci_name(dev));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex);
+
+static int cell_setup_phb(struct pci_controller *phb)
+{
+	const char *model;
+	struct device_node *np;
+
+	int rc = rtas_setup_phb(phb);
+	if (rc)
+		return rc;
+
+	phb->controller_ops = cell_pci_controller_ops;
+
+	np = phb->dn;
+	model = of_get_property(np, "model", NULL);
+	if (model == NULL || strcmp(np->name, "pci"))
+		return 0;
+
+	/* Setup workarounds for spider */
+	if (strcmp(model, "Spider"))
+		return 0;
+
+	iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
+				  (void *)SPIDER_PCI_REG_BASE);
+	return 0;
+}
+
+static const struct of_device_id cell_bus_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .type = "spider", },
+	{ .type = "axon", },
+	{ .type = "plb5", },
+	{ .type = "plb4", },
+	{ .type = "opb", },
+	{ .type = "ebc", },
+	{},
+};
+
+static int __init cell_publish_devices(void)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	struct device_node *np;
+	int node;
+
+	/* Publish OF platform devices for southbridge IOs */
+	of_platform_bus_probe(NULL, cell_bus_ids, NULL);
+
+	/* On spider based blades, we need to manually create the OF
+	 * platform devices for the PCI host bridges
+	 */
+	for_each_child_of_node(root, np) {
+		if (np->type == NULL || (strcmp(np->type, "pci") != 0 &&
+					 strcmp(np->type, "pciex") != 0))
+			continue;
+		of_platform_device_create(np, NULL, NULL);
+	}
+
+	/* There is no device for the MIC memory controller, thus we create
+	 * a platform device for it to attach the EDAC driver to.
+	 */
+	for_each_online_node(node) {
+		if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+			continue;
+		platform_device_register_simple("cbe-mic", node, NULL, 0);
+	}
+
+	return 0;
+}
+machine_subsys_initcall(cell, cell_publish_devices);
+
+static void __init mpic_init_IRQ(void)
+{
+	struct device_node *dn;
+	struct mpic *mpic;
+
+	for_each_node_by_name(dn, "interrupt-controller") {
+		if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
+			continue;
+
+		/* The MPIC driver will get everything it needs from the
+		 * device-tree, just pass 0 to all arguments
+		 */
+		mpic = mpic_alloc(dn, 0, MPIC_SECONDARY | MPIC_NO_RESET,
+				0, 0, " MPIC     ");
+		if (mpic == NULL)
+			continue;
+		mpic_init(mpic);
+	}
+}
+
+
+static void __init cell_init_irq(void)
+{
+	iic_init_IRQ();
+	spider_init_IRQ();
+	mpic_init_IRQ();
+}
+
+static void __init cell_set_dabrx(void)
+{
+	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static void __init cell_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops = &spu_priv1_mmio_ops;
+	spu_management_ops = &spu_management_of_ops;
+#endif
+
+	cbe_regs_init();
+
+	cell_set_dabrx();
+
+#ifdef CONFIG_CBE_RAS
+	cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+	smp_init_cell();
+#endif
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	/* Find and initialize PCI host bridges */
+	init_pci_config_tokens();
+
+	cbe_pervasive_init();
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	mmio_nvram_init();
+}
+
+static int __init cell_probe(void)
+{
+	if (!of_machine_is_compatible("IBM,CBEA") &&
+	    !of_machine_is_compatible("IBM,CPBW-1.0"))
+		return 0;
+
+	pm_power_off = rtas_power_off;
+
+	return 1;
+}
+
+define_machine(cell) {
+	.name			= "Cell",
+	.probe			= cell_probe,
+	.setup_arch		= cell_setup_arch,
+	.show_cpuinfo		= cell_show_cpuinfo,
+	.restart		= rtas_restart,
+	.halt			= rtas_halt,
+	.get_boot_time		= rtas_get_boot_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= cell_progress,
+	.init_IRQ       	= cell_init_irq,
+	.pci_setup_phb		= cell_setup_phb,
+};
+
+struct pci_controller_ops cell_pci_controller_ops;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
new file mode 100644
index 000000000..1aeac5761
--- /dev/null
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -0,0 +1,169 @@
+/*
+ * SMP support for BPA machines.
+ *
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * Plus various changes from other IBM teams...
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/cputhreads.h>
+#include <asm/code-patching.h>
+
+#include "interrupt.h"
+#include <asm/udbg.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
+ */
+static cpumask_t of_spin_map;
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware.  For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ *	0	- failure
+ *	1	- success
+ */
+static inline int smp_startup_cpu(unsigned int lcpu)
+{
+	int status;
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
+	unsigned int pcpu;
+	int start_cpu;
+
+	if (cpumask_test_cpu(lcpu, &of_spin_map))
+		/* Already started by OF and sitting in spin loop */
+		return 1;
+
+	pcpu = get_hard_smp_processor_id(lcpu);
+
+	/* Fixup atomic count: it exited inside IRQ handler. */
+	task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count	= 0;
+
+	/*
+	 * If the RTAS start-cpu token does not exist then presume the
+	 * cpu is already spinning.
+	 */
+	start_cpu = rtas_token("start-cpu");
+	if (start_cpu == RTAS_UNKNOWN_SERVICE)
+		return 1;
+
+	status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu);
+	if (status != 0) {
+		printk(KERN_ERR "start-cpu failed: %i\n", status);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void smp_cell_setup_cpu(int cpu)
+{
+	if (cpu != boot_cpuid)
+		iic_setup_cpu();
+
+	/*
+	 * change default DABRX to allow user watchpoints
+	 */
+	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static int smp_cell_kick_cpu(int nr)
+{
+	if (nr < 0 || nr >= nr_cpu_ids)
+		return -EINVAL;
+
+	if (!smp_startup_cpu(nr))
+		return -ENOENT;
+
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
+	 */
+	paca_ptrs[nr]->cpu_start = 1;
+
+	return 0;
+}
+
+static struct smp_ops_t bpa_iic_smp_ops = {
+	.message_pass	= iic_message_pass,
+	.probe		= iic_request_IPIs,
+	.kick_cpu	= smp_cell_kick_cpu,
+	.setup_cpu	= smp_cell_setup_cpu,
+	.cpu_bootable	= smp_generic_cpu_bootable,
+};
+
+/* This is called very early */
+void __init smp_init_cell(void)
+{
+	int i;
+
+	DBG(" -> smp_init_cell()\n");
+
+	smp_ops = &bpa_iic_smp_ops;
+
+	/* Mark threads which are still spinning in hold loops. */
+	if (cpu_has_feature(CPU_FTR_SMT)) {
+		for_each_present_cpu(i) {
+			if (cpu_thread_in_core(i) == 0)
+				cpumask_set_cpu(i, &of_spin_map);
+		}
+	} else
+		cpumask_copy(&of_spin_map, cpu_present_mask);
+
+	cpumask_clear_cpu(boot_cpuid, &of_spin_map);
+
+	/* Non-lpar has additional take/give timebase */
+	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
+		smp_ops->give_timebase = rtas_give_timebase;
+		smp_ops->take_timebase = rtas_take_timebase;
+	}
+
+	DBG(" <- smp_init_cell()\n");
+}
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
new file mode 100644
index 000000000..1200d0dea
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -0,0 +1,184 @@
+/*
+ * IO workarounds for PCI on Celleb/Cell platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/io-workarounds.h>
+
+#define SPIDER_PCI_DISABLE_PREFETCH
+
+struct spiderpci_iowa_private {
+	void __iomem *regs;
+};
+
+static void spiderpci_io_flush(struct iowa_bus *bus)
+{
+	struct spiderpci_iowa_private *priv;
+	u32 val;
+
+	priv = bus->private;
+	val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+	iosync();
+}
+
+#define SPIDER_PCI_MMIO_READ(name, ret)					\
+static ret spiderpci_##name(const PCI_IO_ADDR addr)			\
+{									\
+	ret val = __do_##name(addr);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+	return val;							\
+}
+
+#define SPIDER_PCI_MMIO_READ_STR(name)					\
+static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, 	\
+			     unsigned long count)			\
+{									\
+	__do_##name(addr, buf, count);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+}
+
+SPIDER_PCI_MMIO_READ(readb, u8)
+SPIDER_PCI_MMIO_READ(readw, u16)
+SPIDER_PCI_MMIO_READ(readl, u32)
+SPIDER_PCI_MMIO_READ(readq, u64)
+SPIDER_PCI_MMIO_READ(readw_be, u16)
+SPIDER_PCI_MMIO_READ(readl_be, u32)
+SPIDER_PCI_MMIO_READ(readq_be, u64)
+SPIDER_PCI_MMIO_READ_STR(readsb)
+SPIDER_PCI_MMIO_READ_STR(readsw)
+SPIDER_PCI_MMIO_READ_STR(readsl)
+
+static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+				    unsigned long n)
+{
+	__do_memcpy_fromio(dest, src, n);
+	spiderpci_io_flush(iowa_mem_find_bus(src));
+}
+
+static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
+					   void __iomem *regs)
+{
+	void *dummy_page_va;
+	dma_addr_t dummy_page_da;
+
+#ifdef SPIDER_PCI_DISABLE_PREFETCH
+	u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT);
+	pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val);
+	out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
+#endif /* SPIDER_PCI_DISABLE_PREFETCH */
+
+	/* setup dummy read */
+	/*
+	 * On CellBlade, we can't know that which XDR memory is used by
+	 * kmalloc() to allocate dummy_page_va.
+	 * In order to imporve the performance, the XDR which is used to
+	 * allocate dummy_page_va is the nearest the spider-pci.
+	 * We have to select the CBE which is the nearest the spider-pci
+	 * to allocate memory from the best XDR, but I don't know that
+	 * how to do.
+	 *
+	 * Celleb does not have this problem, because it has only one XDR.
+	 */
+	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dummy_page_va) {
+		pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n");
+		return -1;
+	}
+
+	dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
+				       PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(phb->parent, dummy_page_da)) {
+		pr_err("SPIDER-IOWA:Map dummy page filed.\n");
+		kfree(dummy_page_va);
+		return -1;
+	}
+
+	out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da);
+
+	return 0;
+}
+
+int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
+{
+	void __iomem *regs = NULL;
+	struct spiderpci_iowa_private *priv;
+	struct device_node *np = bus->phb->dn;
+	struct resource r;
+	unsigned long offset = (unsigned long)data;
+
+	pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n",
+		 np);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		pr_err("SPIDERPCI-IOWA:"
+		       "Can't allocate struct spiderpci_iowa_private");
+		return -1;
+	}
+
+	if (of_address_to_resource(np, 0, &r)) {
+		pr_err("SPIDERPCI-IOWA:Can't get resource.\n");
+		goto error;
+	}
+
+	regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE);
+	if (!regs) {
+		pr_err("SPIDERPCI-IOWA:ioremap failed.\n");
+		goto error;
+	}
+	priv->regs = regs;
+	bus->private = priv;
+
+	if (spiderpci_pci_setup_chip(bus->phb, regs))
+		goto error;
+
+	return 0;
+
+error:
+	kfree(priv);
+	bus->private = NULL;
+
+	if (regs)
+		iounmap(regs);
+
+	return -1;
+}
+
+struct ppc_pci_io spiderpci_ops = {
+	.readb = spiderpci_readb,
+	.readw = spiderpci_readw,
+	.readl = spiderpci_readl,
+	.readq = spiderpci_readq,
+	.readw_be = spiderpci_readw_be,
+	.readl_be = spiderpci_readl_be,
+	.readq_be = spiderpci_readq_be,
+	.readsb = spiderpci_readsb,
+	.readsw = spiderpci_readsw,
+	.readsl = spiderpci_readsl,
+	.memcpy_fromio = spiderpci_memcpy_fromio,
+};
+
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
new file mode 100644
index 000000000..c137f0cb4
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -0,0 +1,361 @@
+/*
+ * External Interrupt Controller on Spider South Bridge
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/ioport.h>
+
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+
+#include "interrupt.h"
+
+/* register layout taken from Spider spec, table 7.4-4 */
+enum {
+	TIR_DEN		= 0x004, /* Detection Enable Register */
+	TIR_MSK		= 0x084, /* Mask Level Register */
+	TIR_EDC		= 0x0c0, /* Edge Detection Clear Register */
+	TIR_PNDA	= 0x100, /* Pending Register A */
+	TIR_PNDB	= 0x104, /* Pending Register B */
+	TIR_CS		= 0x144, /* Current Status Register */
+	TIR_LCSA	= 0x150, /* Level Current Status Register A */
+	TIR_LCSB	= 0x154, /* Level Current Status Register B */
+	TIR_LCSC	= 0x158, /* Level Current Status Register C */
+	TIR_LCSD	= 0x15c, /* Level Current Status Register D */
+	TIR_CFGA	= 0x200, /* Setting Register A0 */
+	TIR_CFGB	= 0x204, /* Setting Register B0 */
+			/* 0x208 ... 0x3ff Setting Register An/Bn */
+	TIR_PPNDA	= 0x400, /* Packet Pending Register A */
+	TIR_PPNDB	= 0x404, /* Packet Pending Register B */
+	TIR_PIERA	= 0x408, /* Packet Output Error Register A */
+	TIR_PIERB	= 0x40c, /* Packet Output Error Register B */
+	TIR_PIEN	= 0x444, /* Packet Output Enable Register */
+	TIR_PIPND	= 0x454, /* Packet Output Pending Register */
+	TIRDID		= 0x484, /* Spider Device ID Register */
+	REISTIM		= 0x500, /* Reissue Command Timeout Time Setting */
+	REISTIMEN	= 0x504, /* Reissue Command Timeout Setting */
+	REISWAITEN	= 0x508, /* Reissue Wait Control*/
+};
+
+#define SPIDER_CHIP_COUNT	4
+#define SPIDER_SRC_COUNT	64
+#define SPIDER_IRQ_INVALID	63
+
+struct spider_pic {
+	struct irq_domain		*host;
+	void __iomem		*regs;
+	unsigned int		node_id;
+};
+static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
+
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
+{
+	return irq_data_get_irq_chip_data(d);
+}
+
+static void __iomem *spider_get_irq_config(struct spider_pic *pic,
+					   unsigned int src)
+{
+	return pic->regs + TIR_CFGA + 8 * src;
+}
+
+static void spider_unmask_irq(struct irq_data *d)
+{
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+	out_be32(cfg, in_be32(cfg) | 0x30000000u);
+}
+
+static void spider_mask_irq(struct irq_data *d)
+{
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+}
+
+static void spider_ack_irq(struct irq_data *d)
+{
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int src = irqd_to_hwirq(d);
+
+	/* Reset edge detection logic if necessary
+	 */
+	if (irqd_is_level_type(d))
+		return;
+
+	/* Only interrupts 47 to 50 can be set to edge */
+	if (src < 47 || src > 50)
+		return;
+
+	/* Perform the clear of the edge logic */
+	out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf));
+}
+
+static int spider_set_irq_type(struct irq_data *d, unsigned int type)
+{
+	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int hw = irqd_to_hwirq(d);
+	void __iomem *cfg = spider_get_irq_config(pic, hw);
+	u32 old_mask;
+	u32 ic;
+
+	/* Note that only level high is supported for most interrupts */
+	if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH &&
+	    (hw < 47 || hw > 50))
+		return -EINVAL;
+
+	/* Decode sense type */
+	switch(sense) {
+	case IRQ_TYPE_EDGE_RISING:
+		ic = 0x3;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		ic = 0x2;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		ic = 0x0;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+	case IRQ_TYPE_NONE:
+		ic = 0x1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Configure the source. One gross hack that was there before and
+	 * that I've kept around is the priority to the BE which I set to
+	 * be the same as the interrupt source number. I don't know whether
+	 * that's supposed to make any kind of sense however, we'll have to
+	 * decide that, but for now, I'm not changing the behaviour.
+	 */
+	old_mask = in_be32(cfg) & 0x30000000u;
+	out_be32(cfg, old_mask | (ic << 24) | (0x7 << 16) |
+		 (pic->node_id << 4) | 0xe);
+	out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff));
+
+	return 0;
+}
+
+static struct irq_chip spider_pic = {
+	.name = "SPIDER",
+	.irq_unmask = spider_unmask_irq,
+	.irq_mask = spider_mask_irq,
+	.irq_ack = spider_ack_irq,
+	.irq_set_type = spider_set_irq_type,
+};
+
+static int spider_host_map(struct irq_domain *h, unsigned int virq,
+			irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static int spider_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/* Spider interrupts have 2 cells, first is the interrupt source,
+	 * second, well, I don't know for sure yet ... We mask the top bits
+	 * because old device-trees encode a node number in there
+	 */
+	*out_hwirq = intspec[0] & 0x3f;
+	*out_flags = IRQ_TYPE_LEVEL_HIGH;
+	return 0;
+}
+
+static const struct irq_domain_ops spider_host_ops = {
+	.map = spider_host_map,
+	.xlate = spider_host_xlate,
+};
+
+static void spider_irq_cascade(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct spider_pic *pic = irq_desc_get_handler_data(desc);
+	unsigned int cs, virq;
+
+	cs = in_be32(pic->regs + TIR_CS) >> 24;
+	if (cs == SPIDER_IRQ_INVALID)
+		virq = 0;
+	else
+		virq = irq_linear_revmap(pic->host, cs);
+
+	if (virq)
+		generic_handle_irq(virq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+/* For hooking up the cascade we have a problem. Our device-tree is
+ * crap and we don't know on which BE iic interrupt we are hooked on at
+ * least not the "standard" way. We can reconstitute it based on two
+ * informations though: which BE node we are connected to and whether
+ * we are connected to IOIF0 or IOIF1. Right now, we really only care
+ * about the IBM cell blade and we know that its firmware gives us an
+ * interrupt-map property which is pretty strange.
+ */
+static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
+{
+	unsigned int virq;
+	const u32 *imap, *tmp;
+	int imaplen, intsize, unit;
+	struct device_node *iic;
+	struct device_node *of_node;
+
+	of_node = irq_domain_get_of_node(pic->host);
+
+	/* First, we check whether we have a real "interrupts" in the device
+	 * tree in case the device-tree is ever fixed
+	 */
+	virq = irq_of_parse_and_map(of_node, 0);
+	if (virq)
+		return virq;
+
+	/* Now do the horrible hacks */
+	tmp = of_get_property(of_node, "#interrupt-cells", NULL);
+	if (tmp == NULL)
+		return 0;
+	intsize = *tmp;
+	imap = of_get_property(of_node, "interrupt-map", &imaplen);
+	if (imap == NULL || imaplen < (intsize + 1))
+		return 0;
+	iic = of_find_node_by_phandle(imap[intsize]);
+	if (iic == NULL)
+		return 0;
+	imap += intsize + 1;
+	tmp = of_get_property(iic, "#interrupt-cells", NULL);
+	if (tmp == NULL) {
+		of_node_put(iic);
+		return 0;
+	}
+	intsize = *tmp;
+	/* Assume unit is last entry of interrupt specifier */
+	unit = imap[intsize - 1];
+	/* Ok, we have a unit, now let's try to get the node */
+	tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL);
+	if (tmp == NULL) {
+		of_node_put(iic);
+		return 0;
+	}
+	/* ugly as hell but works for now */
+	pic->node_id = (*tmp) >> 1;
+	of_node_put(iic);
+
+	/* Ok, now let's get cracking. You may ask me why I just didn't match
+	 * the iic host from the iic OF node, but that way I'm still compatible
+	 * with really really old old firmwares for which we don't have a node
+	 */
+	/* Manufacture an IIC interrupt number of class 2 */
+	virq = irq_create_mapping(NULL,
+				  (pic->node_id << IIC_IRQ_NODE_SHIFT) |
+				  (2 << IIC_IRQ_CLASS_SHIFT) |
+				  unit);
+	if (!virq)
+		printk(KERN_ERR "spider_pic: failed to map cascade !");
+	return virq;
+}
+
+
+static void __init spider_init_one(struct device_node *of_node, int chip,
+				   unsigned long addr)
+{
+	struct spider_pic *pic = &spider_pics[chip];
+	int i, virq;
+
+	/* Map registers */
+	pic->regs = ioremap(addr, 0x1000);
+	if (pic->regs == NULL)
+		panic("spider_pic: can't map registers !");
+
+	/* Allocate a host */
+	pic->host = irq_domain_add_linear(of_node, SPIDER_SRC_COUNT,
+					  &spider_host_ops, pic);
+	if (pic->host == NULL)
+		panic("spider_pic: can't allocate irq host !");
+
+	/* Go through all sources and disable them */
+	for (i = 0; i < SPIDER_SRC_COUNT; i++) {
+		void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i;
+		out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+	}
+
+	/* do not mask any interrupts because of level */
+	out_be32(pic->regs + TIR_MSK, 0x0);
+
+	/* enable interrupt packets to be output */
+	out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1);
+
+	/* Hook up the cascade interrupt to the iic and nodeid */
+	virq = spider_find_cascade_and_node(pic);
+	if (!virq)
+		return;
+	irq_set_handler_data(virq, pic);
+	irq_set_chained_handler(virq, spider_irq_cascade);
+
+	printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %pOF\n",
+	       pic->node_id, addr, of_node);
+
+	/* Enable the interrupt detection enable bit. Do this last! */
+	out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
+}
+
+void __init spider_init_IRQ(void)
+{
+	struct resource r;
+	struct device_node *dn;
+	int chip = 0;
+
+	/* XXX node numbers are totally bogus. We _hope_ we get the device
+	 * nodes in the right order here but that's definitely not guaranteed,
+	 * we need to get the node from the device tree instead.
+	 * There is currently no proper property for it (but our whole
+	 * device-tree is bogus anyway) so all we can do is pray or maybe test
+	 * the address and deduce the node-id
+	 */
+	for_each_node_by_name(dn, "interrupt-controller") {
+		if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
+			if (of_address_to_resource(dn, 0, &r)) {
+				printk(KERN_WARNING "spider-pic: Failed\n");
+				continue;
+			}
+		} else if (of_device_is_compatible(dn, "sti,platform-spider-pic")
+			   && (chip < 2)) {
+			static long hard_coded_pics[] =
+				{ 0x24000008000ul, 0x34000008000ul};
+			r.start = hard_coded_pics[chip];
+		} else
+			continue;
+		spider_init_one(dn, chip++, r.start);
+	}
+}
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
new file mode 100644
index 000000000..0c45cdbac
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -0,0 +1,810 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/linux_logo.h>
+#include <linux/syscore_ops.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/xmon.h>
+#include <asm/prom.h>
+#include <asm/kexec.h>
+
+const struct spu_management_ops *spu_management_ops;
+EXPORT_SYMBOL_GPL(spu_management_ops);
+
+const struct spu_priv1_ops *spu_priv1_ops;
+EXPORT_SYMBOL_GPL(spu_priv1_ops);
+
+struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
+EXPORT_SYMBOL_GPL(cbe_spu_info);
+
+/*
+ * The spufs fault-handling code needs to call force_sig_info to raise signals
+ * on DMA errors. Export it here to avoid general kernel-wide access to this
+ * function
+ */
+EXPORT_SYMBOL_GPL(force_sig_info);
+
+/*
+ * Protects cbe_spu_info and spu->number.
+ */
+static DEFINE_SPINLOCK(spu_lock);
+
+/*
+ * List of all spus in the system.
+ *
+ * This list is iterated by callers from irq context and callers that
+ * want to sleep.  Thus modifications need to be done with both
+ * spu_full_list_lock and spu_full_list_mutex held, while iterating
+ * through it requires either of these locks.
+ *
+ * In addition spu_full_list_lock protects all assignments to
+ * spu->mm.
+ */
+static LIST_HEAD(spu_full_list);
+static DEFINE_SPINLOCK(spu_full_list_lock);
+static DEFINE_MUTEX(spu_full_list_mutex);
+
+void spu_invalidate_slbs(struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu->register_lock, flags);
+	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
+		out_be64(&priv2->slb_invalidate_all_W, 0UL);
+	spin_unlock_irqrestore(&spu->register_lock, flags);
+}
+EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
+
+/* This is called by the MM core when a segment size is changed, to
+ * request a flush of all the SPEs using a given mm
+ */
+void spu_flush_all_slbs(struct mm_struct *mm)
+{
+	struct spu *spu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		if (spu->mm == mm)
+			spu_invalidate_slbs(spu);
+	}
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+}
+
+/* The hack below stinks... try to do something better one of
+ * these days... Does it even work properly with NR_CPUS == 1 ?
+ */
+static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+{
+	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
+
+	/* Global TLBIE broadcast required with SPEs. */
+	bitmap_fill(cpumask_bits(mm_cpumask(mm)), nr);
+}
+
+void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	spu->mm = mm;
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+	if (mm)
+		mm_needs_global_tlbie(mm);
+}
+EXPORT_SYMBOL_GPL(spu_associate_mm);
+
+int spu_64k_pages_available(void)
+{
+	return mmu_psize_defs[MMU_PAGE_64K].shift != 0;
+}
+EXPORT_SYMBOL_GPL(spu_64k_pages_available);
+
+static void spu_restart_dma(struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+	else {
+		set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+		mb();
+	}
+}
+
+static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n",
+			__func__, slbe, slb->vsid, slb->esid);
+
+	out_be64(&priv2->slb_index_W, slbe);
+	/* set invalid before writing vsid */
+	out_be64(&priv2->slb_esid_RW, 0);
+	/* now it's safe to write the vsid */
+	out_be64(&priv2->slb_vsid_RW, slb->vsid);
+	/* setting the new esid makes the entry valid again */
+	out_be64(&priv2->slb_esid_RW, slb->esid);
+}
+
+static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
+{
+	struct copro_slb slb;
+	int ret;
+
+	ret = copro_calculate_slb(spu->mm, ea, &slb);
+	if (ret)
+		return ret;
+
+	spu_load_slb(spu, spu->slb_replace, &slb);
+
+	spu->slb_replace++;
+	if (spu->slb_replace >= 8)
+		spu->slb_replace = 0;
+
+	spu_restart_dma(spu);
+	spu->stats.slb_flt++;
+	return 0;
+}
+
+extern int hash_page(unsigned long ea, unsigned long access,
+		     unsigned long trap, unsigned long dsisr); //XXX
+static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
+{
+	int ret;
+
+	pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea);
+
+	/*
+	 * Handle kernel space hash faults immediately. User hash
+	 * faults need to be deferred to process context.
+	 */
+	if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
+	    (REGION_ID(ea) != USER_REGION_ID)) {
+
+		spin_unlock(&spu->register_lock);
+		ret = hash_page(ea,
+				_PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED,
+				0x300, dsisr);
+		spin_lock(&spu->register_lock);
+
+		if (!ret) {
+			spu_restart_dma(spu);
+			return 0;
+		}
+	}
+
+	spu->class_1_dar = ea;
+	spu->class_1_dsisr = dsisr;
+
+	spu->stop_callback(spu, 1);
+
+	spu->class_1_dar = 0;
+	spu->class_1_dsisr = 0;
+
+	return 0;
+}
+
+static void __spu_kernel_slb(void *addr, struct copro_slb *slb)
+{
+	unsigned long ea = (unsigned long)addr;
+	u64 llp;
+
+	if (REGION_ID(ea) == KERNEL_REGION_ID)
+		llp = mmu_psize_defs[mmu_linear_psize].sllp;
+	else
+		llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+
+	slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) |
+		SLB_VSID_KERNEL | llp;
+	slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
+}
+
+/**
+ * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the
+ * address @new_addr is present.
+ */
+static inline int __slb_present(struct copro_slb *slbs, int nr_slbs,
+		void *new_addr)
+{
+	unsigned long ea = (unsigned long)new_addr;
+	int i;
+
+	for (i = 0; i < nr_slbs; i++)
+		if (!((slbs[i].esid ^ ea) & ESID_MASK))
+			return 1;
+
+	return 0;
+}
+
+/**
+ * Setup the SPU kernel SLBs, in preparation for a context save/restore. We
+ * need to map both the context save area, and the save/restore code.
+ *
+ * Because the lscsa and code may cross segment boundaries, we check to see
+ * if mappings are required for the start and end of each range. We currently
+ * assume that the mappings are smaller that one segment - if not, something
+ * is seriously wrong.
+ */
+void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa,
+		void *code, int code_size)
+{
+	struct copro_slb slbs[4];
+	int i, nr_slbs = 0;
+	/* start and end addresses of both mappings */
+	void *addrs[] = {
+		lscsa, (void *)lscsa + sizeof(*lscsa) - 1,
+		code, code + code_size - 1
+	};
+
+	/* check the set of addresses, and create a new entry in the slbs array
+	 * if there isn't already a SLB for that address */
+	for (i = 0; i < ARRAY_SIZE(addrs); i++) {
+		if (__slb_present(slbs, nr_slbs, addrs[i]))
+			continue;
+
+		__spu_kernel_slb(addrs[i], &slbs[nr_slbs]);
+		nr_slbs++;
+	}
+
+	spin_lock_irq(&spu->register_lock);
+	/* Add the set of SLBs */
+	for (i = 0; i < nr_slbs; i++)
+		spu_load_slb(spu, i, &slbs[i]);
+	spin_unlock_irq(&spu->register_lock);
+}
+EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs);
+
+static irqreturn_t
+spu_irq_class_0(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat, mask;
+
+	spu = data;
+
+	spin_lock(&spu->register_lock);
+	mask = spu_int_mask_get(spu, 0);
+	stat = spu_int_stat_get(spu, 0) & mask;
+
+	spu->class_0_pending |= stat;
+	spu->class_0_dar = spu_mfc_dar_get(spu);
+	spu->stop_callback(spu, 0);
+	spu->class_0_pending = 0;
+	spu->class_0_dar = 0;
+
+	spu_int_stat_clear(spu, 0, stat);
+	spin_unlock(&spu->register_lock);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t
+spu_irq_class_1(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat, mask, dar, dsisr;
+
+	spu = data;
+
+	/* atomically read & clear class1 status. */
+	spin_lock(&spu->register_lock);
+	mask  = spu_int_mask_get(spu, 1);
+	stat  = spu_int_stat_get(spu, 1) & mask;
+	dar   = spu_mfc_dar_get(spu);
+	dsisr = spu_mfc_dsisr_get(spu);
+	if (stat & CLASS1_STORAGE_FAULT_INTR)
+		spu_mfc_dsisr_set(spu, 0ul);
+	spu_int_stat_clear(spu, 1, stat);
+
+	pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat,
+			dar, dsisr);
+
+	if (stat & CLASS1_SEGMENT_FAULT_INTR)
+		__spu_trap_data_seg(spu, dar);
+
+	if (stat & CLASS1_STORAGE_FAULT_INTR)
+		__spu_trap_data_map(spu, dar, dsisr);
+
+	if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_GET_INTR)
+		;
+
+	if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_PUT_INTR)
+		;
+
+	spu->class_1_dsisr = 0;
+	spu->class_1_dar = 0;
+
+	spin_unlock(&spu->register_lock);
+
+	return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static irqreturn_t
+spu_irq_class_2(int irq, void *data)
+{
+	struct spu *spu;
+	unsigned long stat;
+	unsigned long mask;
+	const int mailbox_intrs =
+		CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR;
+
+	spu = data;
+	spin_lock(&spu->register_lock);
+	stat = spu_int_stat_get(spu, 2);
+	mask = spu_int_mask_get(spu, 2);
+	/* ignore interrupts we're not waiting for */
+	stat &= mask;
+	/* mailbox interrupts are level triggered. mask them now before
+	 * acknowledging */
+	if (stat & mailbox_intrs)
+		spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs));
+	/* acknowledge all interrupts before the callbacks */
+	spu_int_stat_clear(spu, 2, stat);
+
+	pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);
+
+	if (stat & CLASS2_MAILBOX_INTR)
+		spu->ibox_callback(spu);
+
+	if (stat & CLASS2_SPU_STOP_INTR)
+		spu->stop_callback(spu, 2);
+
+	if (stat & CLASS2_SPU_HALT_INTR)
+		spu->stop_callback(spu, 2);
+
+	if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR)
+		spu->mfc_callback(spu);
+
+	if (stat & CLASS2_MAILBOX_THRESHOLD_INTR)
+		spu->wbox_callback(spu);
+
+	spu->stats.class2_intr++;
+
+	spin_unlock(&spu->register_lock);
+
+	return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int spu_request_irqs(struct spu *spu)
+{
+	int ret = 0;
+
+	if (spu->irqs[0]) {
+		snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
+			 spu->number);
+		ret = request_irq(spu->irqs[0], spu_irq_class_0,
+				  0, spu->irq_c0, spu);
+		if (ret)
+			goto bail0;
+	}
+	if (spu->irqs[1]) {
+		snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
+			 spu->number);
+		ret = request_irq(spu->irqs[1], spu_irq_class_1,
+				  0, spu->irq_c1, spu);
+		if (ret)
+			goto bail1;
+	}
+	if (spu->irqs[2]) {
+		snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
+			 spu->number);
+		ret = request_irq(spu->irqs[2], spu_irq_class_2,
+				  0, spu->irq_c2, spu);
+		if (ret)
+			goto bail2;
+	}
+	return 0;
+
+bail2:
+	if (spu->irqs[1])
+		free_irq(spu->irqs[1], spu);
+bail1:
+	if (spu->irqs[0])
+		free_irq(spu->irqs[0], spu);
+bail0:
+	return ret;
+}
+
+static void spu_free_irqs(struct spu *spu)
+{
+	if (spu->irqs[0])
+		free_irq(spu->irqs[0], spu);
+	if (spu->irqs[1])
+		free_irq(spu->irqs[1], spu);
+	if (spu->irqs[2])
+		free_irq(spu->irqs[2], spu);
+}
+
+void spu_init_channels(struct spu *spu)
+{
+	static const struct {
+		 unsigned channel;
+		 unsigned count;
+	} zero_list[] = {
+		{ 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, },
+		{ 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, },
+	}, count_list[] = {
+		{ 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, },
+		{ 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, },
+		{ 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, },
+	};
+	struct spu_priv2 __iomem *priv2;
+	int i;
+
+	priv2 = spu->priv2;
+
+	/* initialize all channel data to zero */
+	for (i = 0; i < ARRAY_SIZE(zero_list); i++) {
+		int count;
+
+		out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel);
+		for (count = 0; count < zero_list[i].count; count++)
+			out_be64(&priv2->spu_chnldata_RW, 0);
+	}
+
+	/* initialize channel counts to meaningful values */
+	for (i = 0; i < ARRAY_SIZE(count_list); i++) {
+		out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel);
+		out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
+	}
+}
+EXPORT_SYMBOL_GPL(spu_init_channels);
+
+static struct bus_type spu_subsys = {
+	.name = "spu",
+	.dev_name = "spu",
+};
+
+int spu_add_dev_attr(struct device_attribute *attr)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		device_create_file(&spu->dev, attr);
+	mutex_unlock(&spu_full_list_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr);
+
+int spu_add_dev_attr_group(struct attribute_group *attrs)
+{
+	struct spu *spu;
+	int rc = 0;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		rc = sysfs_create_group(&spu->dev.kobj, attrs);
+
+		/* we're in trouble here, but try unwinding anyway */
+		if (rc) {
+			printk(KERN_ERR "%s: can't create sysfs group '%s'\n",
+					__func__, attrs->name);
+
+			list_for_each_entry_continue_reverse(spu,
+					&spu_full_list, full_list)
+				sysfs_remove_group(&spu->dev.kobj, attrs);
+			break;
+		}
+	}
+
+	mutex_unlock(&spu_full_list_mutex);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr_group);
+
+
+void spu_remove_dev_attr(struct device_attribute *attr)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		device_remove_file(&spu->dev, attr);
+	mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr);
+
+void spu_remove_dev_attr_group(struct attribute_group *attrs)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysfs_remove_group(&spu->dev.kobj, attrs);
+	mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group);
+
+static int spu_create_dev(struct spu *spu)
+{
+	int ret;
+
+	spu->dev.id = spu->number;
+	spu->dev.bus = &spu_subsys;
+	ret = device_register(&spu->dev);
+	if (ret) {
+		printk(KERN_ERR "Can't register SPU %d with sysfs\n",
+				spu->number);
+		return ret;
+	}
+
+	sysfs_add_device_to_node(&spu->dev, spu->node);
+
+	return 0;
+}
+
+static int __init create_spu(void *data)
+{
+	struct spu *spu;
+	int ret;
+	static int number;
+	unsigned long flags;
+
+	ret = -ENOMEM;
+	spu = kzalloc(sizeof (*spu), GFP_KERNEL);
+	if (!spu)
+		goto out;
+
+	spu->alloc_state = SPU_FREE;
+
+	spin_lock_init(&spu->register_lock);
+	spin_lock(&spu_lock);
+	spu->number = number++;
+	spin_unlock(&spu_lock);
+
+	ret = spu_create_spu(spu, data);
+
+	if (ret)
+		goto out_free;
+
+	spu_mfc_sdr_setup(spu);
+	spu_mfc_sr1_set(spu, 0x33);
+	ret = spu_request_irqs(spu);
+	if (ret)
+		goto out_destroy;
+
+	ret = spu_create_dev(spu);
+	if (ret)
+		goto out_free_irqs;
+
+	mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+	list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
+	cbe_spu_info[spu->node].n_spus++;
+	mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
+
+	mutex_lock(&spu_full_list_mutex);
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	list_add(&spu->full_list, &spu_full_list);
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+	mutex_unlock(&spu_full_list_mutex);
+
+	spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
+	spu->stats.tstamp = ktime_get_ns();
+
+	INIT_LIST_HEAD(&spu->aff_list);
+
+	goto out;
+
+out_free_irqs:
+	spu_free_irqs(spu);
+out_destroy:
+	spu_destroy_spu(spu);
+out_free:
+	kfree(spu);
+out:
+	return ret;
+}
+
+static const char *spu_state_names[] = {
+	"user", "system", "iowait", "idle"
+};
+
+static unsigned long long spu_acct_time(struct spu *spu,
+		enum spu_utilization_state state)
+{
+	unsigned long long time = spu->stats.times[state];
+
+	/*
+	 * If the spu is idle or the context is stopped, utilization
+	 * statistics are not updated.  Apply the time delta from the
+	 * last recorded state of the spu.
+	 */
+	if (spu->stats.util_state == state)
+		time += ktime_get_ns() - spu->stats.tstamp;
+
+	return time / NSEC_PER_MSEC;
+}
+
+
+static ssize_t spu_stat_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct spu *spu = container_of(dev, struct spu, dev);
+
+	return sprintf(buf, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		spu_state_names[spu->stats.util_state],
+		spu_acct_time(spu, SPU_UTIL_USER),
+		spu_acct_time(spu, SPU_UTIL_SYSTEM),
+		spu_acct_time(spu, SPU_UTIL_IOWAIT),
+		spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
+		spu->stats.vol_ctx_switch,
+		spu->stats.invol_ctx_switch,
+		spu->stats.slb_flt,
+		spu->stats.hash_flt,
+		spu->stats.min_flt,
+		spu->stats.maj_flt,
+		spu->stats.class2_intr,
+		spu->stats.libassist);
+}
+
+static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL);
+
+#ifdef CONFIG_KEXEC_CORE
+
+struct crash_spu_info {
+	struct spu *spu;
+	u32 saved_spu_runcntl_RW;
+	u32 saved_spu_status_R;
+	u32 saved_spu_npc_RW;
+	u64 saved_mfc_sr1_RW;
+	u64 saved_mfc_dar;
+	u64 saved_mfc_dsisr;
+};
+
+#define CRASH_NUM_SPUS	16	/* Enough for current hardware */
+static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
+
+static void crash_kexec_stop_spus(void)
+{
+	struct spu *spu;
+	int i;
+	u64 tmp;
+
+	for (i = 0; i < CRASH_NUM_SPUS; i++) {
+		if (!crash_spu_info[i].spu)
+			continue;
+
+		spu = crash_spu_info[i].spu;
+
+		crash_spu_info[i].saved_spu_runcntl_RW =
+			in_be32(&spu->problem->spu_runcntl_RW);
+		crash_spu_info[i].saved_spu_status_R =
+			in_be32(&spu->problem->spu_status_R);
+		crash_spu_info[i].saved_spu_npc_RW =
+			in_be32(&spu->problem->spu_npc_RW);
+
+		crash_spu_info[i].saved_mfc_dar    = spu_mfc_dar_get(spu);
+		crash_spu_info[i].saved_mfc_dsisr  = spu_mfc_dsisr_get(spu);
+		tmp = spu_mfc_sr1_get(spu);
+		crash_spu_info[i].saved_mfc_sr1_RW = tmp;
+
+		tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+		spu_mfc_sr1_set(spu, tmp);
+
+		__delay(200);
+	}
+}
+
+static void crash_register_spus(struct list_head *list)
+{
+	struct spu *spu;
+	int ret;
+
+	list_for_each_entry(spu, list, full_list) {
+		if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
+			continue;
+
+		crash_spu_info[spu->number].spu = spu;
+	}
+
+	ret = crash_shutdown_register(&crash_kexec_stop_spus);
+	if (ret)
+		printk(KERN_ERR "Could not register SPU crash handler");
+}
+
+#else
+static inline void crash_register_spus(struct list_head *list)
+{
+}
+#endif
+
+static void spu_shutdown(void)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		spu_free_irqs(spu);
+		spu_destroy_spu(spu);
+	}
+	mutex_unlock(&spu_full_list_mutex);
+}
+
+static struct syscore_ops spu_syscore_ops = {
+	.shutdown = spu_shutdown,
+};
+
+static int __init init_spu_base(void)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		mutex_init(&cbe_spu_info[i].list_mutex);
+		INIT_LIST_HEAD(&cbe_spu_info[i].spus);
+	}
+
+	if (!spu_management_ops)
+		goto out;
+
+	/* create system subsystem for spus */
+	ret = subsys_system_register(&spu_subsys, NULL);
+	if (ret)
+		goto out;
+
+	ret = spu_enumerate_spus(create_spu);
+
+	if (ret < 0) {
+		printk(KERN_WARNING "%s: Error initializing spus\n",
+			__func__);
+		goto out_unregister_subsys;
+	}
+
+	if (ret > 0)
+		fb_append_extra_logo(&logo_spe_clut224, ret);
+
+	mutex_lock(&spu_full_list_mutex);
+	xmon_register_spus(&spu_full_list);
+	crash_register_spus(&spu_full_list);
+	mutex_unlock(&spu_full_list_mutex);
+	spu_add_dev_attr(&dev_attr_stat);
+	register_syscore_ops(&spu_syscore_ops);
+
+	spu_init_affinity();
+
+	return 0;
+
+ out_unregister_subsys:
+	bus_unregister(&spu_subsys);
+ out:
+	return ret;
+}
+device_initcall(init_spu_base);
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
new file mode 100644
index 000000000..8ae86200e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -0,0 +1,74 @@
+/*
+ * System call callback functions for SPUs
+ */
+
+#undef DEBUG
+
+#include <linux/kallsyms.h>
+#include <linux/export.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+#include <asm/syscalls.h>
+#include <asm/unistd.h>
+
+/*
+ * This table defines the system calls that an SPU can call.
+ * It is currently a subset of the 64 bit powerpc system calls,
+ * with the exact semantics.
+ *
+ * The reasons for disabling some of the system calls are:
+ * 1. They interact with the way SPU syscalls are handled
+ *    and we can't let them execute ever:
+ *	restart_syscall, exit, for, execve, ptrace, ...
+ * 2. They are deprecated and replaced by other means:
+ *	uselib, pciconfig_*, sysfs, ...
+ * 3. They are somewhat interacting with the system in a way
+ *    we don't want an SPU to:
+ *	reboot, init_module, mount, kexec_load
+ * 4. They are optional and we can't rely on them being
+ *    linked into the kernel. Unfortunately, the cond_syscall
+ *    helper does not work here as it does not add the necessary
+ *    opd symbols:
+ *	mbind, mq_open, ipc, ...
+ */
+
+static void *spu_syscall_table[] = {
+#define SYSCALL(func)		sys_ni_syscall,
+#define COMPAT_SYS(func)	sys_ni_syscall,
+#define PPC_SYS(func)		sys_ni_syscall,
+#define OLDSYS(func)		sys_ni_syscall,
+#define SYS32ONLY(func)		sys_ni_syscall,
+#define PPC64ONLY(func)		sys_ni_syscall,
+#define SYSX(f, f3264, f32)	sys_ni_syscall,
+
+#define SYSCALL_SPU(func)	sys_##func,
+#define COMPAT_SYS_SPU(func)	sys_##func,
+#define COMPAT_SPU_NEW(func)	sys_##func,
+#define SYSX_SPU(f, f3264, f32)	f,
+
+#include <asm/systbl.h>
+};
+
+long spu_sys_callback(struct spu_syscall_block *s)
+{
+	long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6);
+
+	if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) {
+		pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret);
+		return -ENOSYS;
+	}
+
+	syscall = spu_syscall_table[s->nr_ret];
+
+	pr_debug("SPU-syscall "
+		 "%pSR:syscall%lld(%llx, %llx, %llx, %llx, %llx, %llx)\n",
+		 syscall,
+		 s->nr_ret,
+		 s->parm[0], s->parm[1], s->parm[2],
+		 s->parm[3], s->parm[4], s->parm[5]);
+
+	return syscall(s->parm[0], s->parm[1], s->parm[2],
+		       s->parm[3], s->parm[4], s->parm[5]);
+}
+EXPORT_SYMBOL_GPL(spu_sys_callback);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
new file mode 100644
index 000000000..5c409c98c
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -0,0 +1,557 @@
+/*
+ * spu management operations for of based platforms
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/export.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "spufs/spufs.h"
+#include "interrupt.h"
+
+struct device_node *spu_devnode(struct spu *spu)
+{
+	return spu->devnode;
+}
+
+EXPORT_SYMBOL_GPL(spu_devnode);
+
+static u64 __init find_spu_unit_number(struct device_node *spe)
+{
+	const unsigned int *prop;
+	int proplen;
+
+	/* new device trees should provide the physical-id attribute */
+	prop = of_get_property(spe, "physical-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	/* celleb device tree provides the unit-id */
+	prop = of_get_property(spe, "unit-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	/* legacy device trees provide the id in the reg attribute */
+	prop = of_get_property(spe, "reg", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	return 0;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		iounmap(spu->priv1);
+	iounmap(spu->priv2);
+	iounmap(spu->problem);
+	iounmap((__force u8 __iomem *)spu->local_store);
+}
+
+static int __init spu_map_interrupts_old(struct spu *spu,
+	struct device_node *np)
+{
+	unsigned int isrc;
+	const u32 *tmp;
+	int nid;
+
+	/* Get the interrupt source unit from the device-tree */
+	tmp = of_get_property(np, "isrc", NULL);
+	if (!tmp)
+		return -ENODEV;
+	isrc = tmp[0];
+
+	tmp = of_get_property(np->parent->parent, "node-id", NULL);
+	if (!tmp) {
+		printk(KERN_WARNING "%s: can't find node-id\n", __func__);
+		nid = spu->node;
+	} else
+		nid = tmp[0];
+
+	/* Add the node number */
+	isrc |= nid << IIC_IRQ_NODE_SHIFT;
+
+	/* Now map interrupts of all 3 classes */
+	spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
+	spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
+	spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+
+	/* Right now, we only fail if class 2 failed */
+	if (!spu->irqs[2])
+		return -EINVAL;
+
+	return 0;
+}
+
+static void __iomem * __init spu_map_prop_old(struct spu *spu,
+					      struct device_node *n,
+					      const char *name)
+{
+	const struct address_prop {
+		unsigned long address;
+		unsigned int len;
+	} __attribute__((packed)) *prop;
+	int proplen;
+
+	prop = of_get_property(n, name, &proplen);
+	if (prop == NULL || proplen != sizeof (struct address_prop))
+		return NULL;
+
+	return ioremap(prop->address, prop->len);
+}
+
+static int __init spu_map_device_old(struct spu *spu)
+{
+	struct device_node *node = spu->devnode;
+	const char *prop;
+	int ret;
+
+	ret = -ENODEV;
+	spu->name = of_get_property(node, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	prop = of_get_property(node, "local-store", NULL);
+	if (!prop)
+		goto out;
+	spu->local_store_phys = *(unsigned long *)prop;
+
+	/* we use local store as ram, not io memory */
+	spu->local_store = (void __force *)
+		spu_map_prop_old(spu, node, "local-store");
+	if (!spu->local_store)
+		goto out;
+
+	prop = of_get_property(node, "problem", NULL);
+	if (!prop)
+		goto out_unmap;
+	spu->problem_phys = *(unsigned long *)prop;
+
+	spu->problem = spu_map_prop_old(spu, node, "problem");
+	if (!spu->problem)
+		goto out_unmap;
+
+	spu->priv2 = spu_map_prop_old(spu, node, "priv2");
+	if (!spu->priv2)
+		goto out_unmap;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		spu->priv1 = spu_map_prop_old(spu, node, "priv1");
+		if (!spu->priv1)
+			goto out_unmap;
+	}
+
+	ret = 0;
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+	struct of_phandle_args oirq;
+	int ret;
+	int i;
+
+	for (i=0; i < 3; i++) {
+		ret = of_irq_parse_one(np, i, &oirq);
+		if (ret) {
+			pr_debug("spu_new: failed to get irq %d\n", i);
+			goto err;
+		}
+		ret = -EINVAL;
+		pr_debug("  irq %d no 0x%x on %pOF\n", i, oirq.args[0],
+			 oirq.np);
+		spu->irqs[i] = irq_create_of_mapping(&oirq);
+		if (!spu->irqs[i]) {
+			pr_debug("spu_new: failed to map it !\n");
+			goto err;
+		}
+	}
+	return 0;
+
+err:
+	pr_debug("failed to map irq %x for spu %s\n", *oirq.args,
+		spu->name);
+	for (; i >= 0; i--) {
+		if (spu->irqs[i])
+			irq_dispose_mapping(spu->irqs[i]);
+	}
+	return ret;
+}
+
+static int spu_map_resource(struct spu *spu, int nr,
+			    void __iomem** virt, unsigned long *phys)
+{
+	struct device_node *np = spu->devnode;
+	struct resource resource = { };
+	unsigned long len;
+	int ret;
+
+	ret = of_address_to_resource(np, nr, &resource);
+	if (ret)
+		return ret;
+	if (phys)
+		*phys = resource.start;
+	len = resource_size(&resource);
+	*virt = ioremap(resource.start, len);
+	if (!*virt)
+		return -EINVAL;
+	return 0;
+}
+
+static int __init spu_map_device(struct spu *spu)
+{
+	struct device_node *np = spu->devnode;
+	int ret = -ENODEV;
+
+	spu->name = of_get_property(np, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
+			       &spu->local_store_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %pOF resource 0\n",
+			 np);
+		goto out;
+	}
+	ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
+			       &spu->problem_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %pOF resource 1\n",
+			 np);
+		goto out_unmap;
+	}
+	ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %pOF resource 2\n",
+			 np);
+		goto out_unmap;
+	}
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		ret = spu_map_resource(spu, 3,
+			       (void __iomem**)&spu->priv1, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %pOF resource 3\n",
+			 np);
+		goto out_unmap;
+	}
+	pr_debug("spu_new: %pOF maps:\n", np);
+	pr_debug("  local store   : 0x%016lx -> 0x%p\n",
+		 spu->local_store_phys, spu->local_store);
+	pr_debug("  problem state : 0x%016lx -> 0x%p\n",
+		 spu->problem_phys, spu->problem);
+	pr_debug("  priv2         :                       0x%p\n", spu->priv2);
+	pr_debug("  priv1         :                       0x%p\n", spu->priv1);
+
+	return 0;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	pr_debug("failed to map spe %s: %d\n", spu->name, ret);
+	return ret;
+}
+
+static int __init of_enumerate_spus(int (*fn)(void *data))
+{
+	int ret;
+	struct device_node *node;
+	unsigned int n = 0;
+
+	ret = -ENODEV;
+	for_each_node_by_type(node, "spe") {
+		ret = fn(node);
+		if (ret) {
+			printk(KERN_WARNING "%s: Error initializing %s\n",
+				__func__, node->name);
+			of_node_put(node);
+			break;
+		}
+		n++;
+	}
+	return ret ? ret : n;
+}
+
+static int __init of_create_spu(struct spu *spu, void *data)
+{
+	int ret;
+	struct device_node *spe = (struct device_node *)data;
+	static int legacy_map = 0, legacy_irq = 0;
+
+	spu->devnode = of_node_get(spe);
+	spu->spe_id = find_spu_unit_number(spe);
+
+	spu->node = of_node_to_nid(spe);
+	if (spu->node >= MAX_NUMNODES) {
+		printk(KERN_WARNING "SPE %pOF on node %d ignored,"
+		       " node number too big\n", spe, spu->node);
+		printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	ret = spu_map_device(spu);
+	if (ret) {
+		if (!legacy_map) {
+			legacy_map = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying to map old style\n", __func__);
+		}
+		ret = spu_map_device_old(spu);
+		if (ret) {
+			printk(KERN_ERR "Unable to map %s\n",
+				spu->name);
+			goto out;
+		}
+	}
+
+	ret = spu_map_interrupts(spu, spe);
+	if (ret) {
+		if (!legacy_irq) {
+			legacy_irq = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying old style irq\n", __func__);
+		}
+		ret = spu_map_interrupts_old(spu, spe);
+		if (ret) {
+			printk(KERN_ERR "%s: could not map interrupts\n",
+				spu->name);
+			goto out_unmap;
+		}
+	}
+
+	pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name,
+		spu->local_store, spu->problem, spu->priv1,
+		spu->priv2, spu->number);
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int of_destroy_spu(struct spu *spu)
+{
+	spu_unmap(spu);
+	of_node_put(spu->devnode);
+	return 0;
+}
+
+static void enable_spu_by_master_run(struct spu_context *ctx)
+{
+	ctx->ops->master_start(ctx);
+}
+
+static void disable_spu_by_master_run(struct spu_context *ctx)
+{
+	ctx->ops->master_stop(ctx);
+}
+
+/* Hardcoded affinity idxs for qs20 */
+#define QS20_SPES_PER_BE 8
+static int qs20_reg_idxs[QS20_SPES_PER_BE] =   { 0, 2, 4, 6, 7, 5, 3, 1 };
+static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
+
+static struct spu *spu_lookup_reg(int node, u32 reg)
+{
+	struct spu *spu;
+	const u32 *spu_reg;
+
+	list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+		spu_reg = of_get_property(spu_devnode(spu), "reg", NULL);
+		if (*spu_reg == reg)
+			return spu;
+	}
+	return NULL;
+}
+
+static void init_affinity_qs20_harcoded(void)
+{
+	int node, i;
+	struct spu *last_spu, *spu;
+	u32 reg;
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		last_spu = NULL;
+		for (i = 0; i < QS20_SPES_PER_BE; i++) {
+			reg = qs20_reg_idxs[i];
+			spu = spu_lookup_reg(node, reg);
+			if (!spu)
+				continue;
+			spu->has_mem_affinity = qs20_reg_memory[reg];
+			if (last_spu)
+				list_add_tail(&spu->aff_list,
+						&last_spu->aff_list);
+			last_spu = spu;
+		}
+	}
+}
+
+static int of_has_vicinity(void)
+{
+	struct device_node *dn;
+
+	for_each_node_by_type(dn, "spe") {
+		if (of_find_property(dn, "vicinity", NULL))  {
+			of_node_put(dn);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static struct spu *devnode_spu(int cbe, struct device_node *dn)
+{
+	struct spu *spu;
+
+	list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
+		if (spu_devnode(spu) == dn)
+			return spu;
+	return NULL;
+}
+
+static struct spu *
+neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
+{
+	struct spu *spu;
+	struct device_node *spu_dn;
+	const phandle *vic_handles;
+	int lenp, i;
+
+	list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
+		spu_dn = spu_devnode(spu);
+		if (spu_dn == avoid)
+			continue;
+		vic_handles = of_get_property(spu_dn, "vicinity", &lenp);
+		for (i=0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == target->phandle)
+				return spu;
+		}
+	}
+	return NULL;
+}
+
+static void init_affinity_node(int cbe)
+{
+	struct spu *spu, *last_spu;
+	struct device_node *vic_dn, *last_spu_dn;
+	phandle avoid_ph;
+	const phandle *vic_handles;
+	const char *name;
+	int lenp, i, added;
+
+	last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu,
+								cbe_list);
+	avoid_ph = 0;
+	for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
+		last_spu_dn = spu_devnode(last_spu);
+		vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp);
+
+		/*
+		 * Walk through each phandle in vicinity property of the spu
+		 * (tipically two vicinity phandles per spe node)
+		 */
+		for (i = 0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == avoid_ph)
+				continue;
+
+			vic_dn = of_find_node_by_phandle(vic_handles[i]);
+			if (!vic_dn)
+				continue;
+
+			/* a neighbour might be spe, mic-tm, or bif0 */
+			name = of_get_property(vic_dn, "name", NULL);
+			if (!name)
+				continue;
+
+			if (strcmp(name, "spe") == 0) {
+				spu = devnode_spu(cbe, vic_dn);
+				avoid_ph = last_spu_dn->phandle;
+			} else {
+				/*
+				 * "mic-tm" and "bif0" nodes do not have
+				 * vicinity property. So we need to find the
+				 * spe which has vic_dn as neighbour, but
+				 * skipping the one we came from (last_spu_dn)
+				 */
+				spu = neighbour_spu(cbe, vic_dn, last_spu_dn);
+				if (!spu)
+					continue;
+				if (!strcmp(name, "mic-tm")) {
+					last_spu->has_mem_affinity = 1;
+					spu->has_mem_affinity = 1;
+				}
+				avoid_ph = vic_dn->phandle;
+			}
+
+			list_add_tail(&spu->aff_list, &last_spu->aff_list);
+			last_spu = spu;
+			break;
+		}
+	}
+}
+
+static void init_affinity_fw(void)
+{
+	int cbe;
+
+	for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
+		init_affinity_node(cbe);
+}
+
+static int __init init_affinity(void)
+{
+	if (of_has_vicinity()) {
+		init_affinity_fw();
+	} else {
+		if (of_machine_is_compatible("IBM,CPBW-1.0"))
+			init_affinity_qs20_harcoded();
+		else
+			printk("No affinity configuration found\n");
+	}
+
+	return 0;
+}
+
+const struct spu_management_ops spu_management_of_ops = {
+	.enumerate_spus = of_enumerate_spus,
+	.create_spu = of_create_spu,
+	.destroy_spu = of_destroy_spu,
+	.enable_spu = enable_spu_by_master_run,
+	.disable_spu = disable_spu_by_master_run,
+	.init_affinity = init_affinity,
+};
diff --git a/arch/powerpc/platforms/cell/spu_notify.c b/arch/powerpc/platforms/cell/spu_notify.c
new file mode 100644
index 000000000..afdf857c3
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_notify.c
@@ -0,0 +1,68 @@
+/*
+ * Move OProfile dependencies from spufs module to the kernel so it
+ * can run on non-cell PPC.
+ *
+ * Copyright (C) IBM 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/notifier.h>
+#include <asm/spu.h>
+#include "spufs/spufs.h"
+
+static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
+
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
+{
+	blocking_notifier_call_chain(&spu_switch_notifier,
+				     ctx ? ctx->object_id : 0, spu);
+}
+EXPORT_SYMBOL_GPL(spu_switch_notify);
+
+int spu_switch_event_register(struct notifier_block *n)
+{
+	int ret;
+	ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
+	if (!ret)
+		notify_spus_active();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_register);
+
+int spu_switch_event_unregister(struct notifier_block *n)
+{
+	return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
+
+void spu_set_profile_private_kref(struct spu_context *ctx,
+				  struct kref *prof_info_kref,
+				  void (* prof_info_release) (struct kref *kref))
+{
+	ctx->prof_priv_kref = prof_info_kref;
+	ctx->prof_priv_release = prof_info_release;
+}
+EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
+
+void *spu_get_profile_private_kref(struct spu_context *ctx)
+{
+	return ctx->prof_priv_kref;
+}
+EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
+
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
new file mode 100644
index 000000000..66d33724f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -0,0 +1,180 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "interrupt.h"
+#include "spu_priv1_mmio.h"
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+	out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+	out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	out_be64(&spu->priv1->int_mask_RW[class], mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+	return in_be64(&spu->priv1->int_mask_RW[class]);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+	out_be64(&spu->priv1->int_stat_RW[class], stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+	return in_be64(&spu->priv1->int_stat_RW[class]);
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+	u64 target;
+	u64 route;
+
+	if (nr_cpus_node(spu->node)) {
+		const struct cpumask *spumask = cpumask_of_node(spu->node),
+			*cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+		if (!cpumask_intersects(spumask, cpumask))
+			return;
+	}
+
+	target = iic_get_target_id(cpu);
+	route = target << 48 | target << 32 | target << 16;
+	out_be64(&spu->priv1->int_route_RW, route);
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_dar_RW);
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_dsisr_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+	out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+	out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+	out_be64(&spu->priv1->mfc_sr1_RW, sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_sr1_RW);
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+	out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_tclass_id_RW);
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+	out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+	out_be64(&spu->priv1->resource_allocation_groupID_RW, id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->resource_allocation_groupID_RW);
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+	out_be64(&spu->priv1->resource_allocation_enable_RW, enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->resource_allocation_enable_RW);
+}
+
+const struct spu_priv1_ops spu_priv1_mmio_ops =
+{
+	.int_mask_and = int_mask_and,
+	.int_mask_or = int_mask_or,
+	.int_mask_set = int_mask_set,
+	.int_mask_get = int_mask_get,
+	.int_stat_clear = int_stat_clear,
+	.int_stat_get = int_stat_get,
+	.cpu_affinity_set = cpu_affinity_set,
+	.mfc_dar_get = mfc_dar_get,
+	.mfc_dsisr_get = mfc_dsisr_get,
+	.mfc_dsisr_set = mfc_dsisr_set,
+	.mfc_sdr_setup = mfc_sdr_setup,
+	.mfc_sr1_set = mfc_sr1_set,
+	.mfc_sr1_get = mfc_sr1_get,
+	.mfc_tclass_id_set = mfc_tclass_id_set,
+	.mfc_tclass_id_get = mfc_tclass_id_get,
+	.tlb_invalidate = tlb_invalidate,
+	.resource_allocation_groupID_set = resource_allocation_groupID_set,
+	.resource_allocation_groupID_get = resource_allocation_groupID_get,
+	.resource_allocation_enable_set = resource_allocation_enable_set,
+	.resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
new file mode 100644
index 000000000..7b62bd1cc
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
@@ -0,0 +1,26 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef SPU_PRIV1_MMIO_H
+#define SPU_PRIV1_MMIO_H
+
+struct device_node *spu_devnode(struct spu *spu);
+
+#endif /* SPU_PRIV1_MMIO_H */
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
new file mode 100644
index 000000000..263413a34
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -0,0 +1,179 @@
+/*
+ * SPU file system -- system call stubs
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * (C) Copyright 2006-2007, IBM Corporation
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
+#include <linux/binfmts.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+
+/* protected by rcu */
+static struct spufs_calls *spufs_calls;
+
+#ifdef CONFIG_SPU_FS_MODULE
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+	struct spufs_calls *calls = NULL;
+
+	rcu_read_lock();
+	calls = rcu_dereference(spufs_calls);
+	if (calls && !try_module_get(calls->owner))
+		calls = NULL;
+	rcu_read_unlock();
+
+	return calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls)
+{
+	BUG_ON(calls != spufs_calls);
+
+	/* we don't need to rcu this, as we hold a reference to the module */
+	module_put(spufs_calls->owner);
+}
+
+#else /* !defined CONFIG_SPU_FS_MODULE */
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+	return spufs_calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls) { }
+
+#endif /* CONFIG_SPU_FS_MODULE */
+
+SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
+	umode_t, mode, int, neighbor_fd)
+{
+	long ret;
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return -ENOSYS;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		struct fd neighbor = fdget(neighbor_fd);
+		ret = -EBADF;
+		if (neighbor.file) {
+			ret = calls->create_thread(name, flags, mode, neighbor.file);
+			fdput(neighbor);
+		}
+	} else
+		ret = calls->create_thread(name, flags, mode, NULL);
+
+	spufs_calls_put(calls);
+	return ret;
+}
+
+SYSCALL_DEFINE3(spu_run,int, fd, __u32 __user *, unpc, __u32 __user *, ustatus)
+{
+	long ret;
+	struct fd arg;
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return -ENOSYS;
+
+	ret = -EBADF;
+	arg = fdget(fd);
+	if (arg.file) {
+		ret = calls->spu_run(arg.file, unpc, ustatus);
+		fdput(arg);
+	}
+
+	spufs_calls_put(calls);
+	return ret;
+}
+
+#ifdef CONFIG_COREDUMP
+int elf_coredump_extra_notes_size(void)
+{
+	struct spufs_calls *calls;
+	int ret;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return 0;
+
+	ret = calls->coredump_extra_notes_size();
+
+	spufs_calls_put(calls);
+
+	return ret;
+}
+
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+	struct spufs_calls *calls;
+	int ret;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return 0;
+
+	ret = calls->coredump_extra_notes_write(cprm);
+
+	spufs_calls_put(calls);
+
+	return ret;
+}
+#endif
+
+void notify_spus_active(void)
+{
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return;
+
+	calls->notify_spus_active();
+	spufs_calls_put(calls);
+
+	return;
+}
+
+int register_spu_syscalls(struct spufs_calls *calls)
+{
+	if (spufs_calls)
+		return -EBUSY;
+
+	rcu_assign_pointer(spufs_calls, calls);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_spu_syscalls);
+
+void unregister_spu_syscalls(struct spufs_calls *calls)
+{
+	BUG_ON(spufs_calls->owner != calls->owner);
+	RCU_INIT_POINTER(spufs_calls, NULL);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(unregister_spu_syscalls);
diff --git a/arch/powerpc/platforms/cell/spufs/.gitignore b/arch/powerpc/platforms/cell/spufs/.gitignore
new file mode 100644
index 000000000..a09ee8d84
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/.gitignore
@@ -0,0 +1,2 @@
+spu_save_dump.h
+spu_restore_dump.h
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
new file mode 100644
index 000000000..52e4c80ec
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_SPU_FS) += spufs.o
+spufs-y += inode.o file.o context.o syscalls.o
+spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
+spufs-y += switch.o fault.o lscsa_alloc.o
+spufs-$(CONFIG_COREDUMP) += coredump.o
+
+# magic for the trace events
+CFLAGS_sched.o := -I$(src)
+
+# Rules to build switch.o with the help of SPU tool chain
+SPU_CROSS	:= spu-
+SPU_CC		:= $(SPU_CROSS)gcc
+SPU_AS		:= $(SPU_CROSS)gcc
+SPU_LD		:= $(SPU_CROSS)ld
+SPU_OBJCOPY	:= $(SPU_CROSS)objcopy
+SPU_CFLAGS	:= -O2 -Wall -I$(srctree)/include -D__KERNEL__
+SPU_AFLAGS	:= -c -D__ASSEMBLY__ -I$(srctree)/include -D__KERNEL__
+SPU_LDFLAGS	:= -N -Ttext=0x0
+
+$(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h
+clean-files := spu_save_dump.h spu_restore_dump.h
+
+# Compile SPU files
+      cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $<
+quiet_cmd_spu_cc = SPU_CC  $@
+$(obj)/spu_%.o: $(src)/spu_%.c
+	$(call if_changed,spu_cc)
+
+# Assemble SPU files
+      cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $<
+quiet_cmd_spu_as = SPU_AS  $@
+$(obj)/spu_%.o: $(src)/spu_%.S
+	$(call if_changed,spu_as)
+
+# Link SPU Executables
+      cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^
+quiet_cmd_spu_ld = SPU_LD  $@
+$(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o
+	$(call if_changed,spu_ld)
+
+# Copy into binary format
+      cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@
+quiet_cmd_spu_objcopy = OBJCOPY $@
+$(obj)/spu_%.bin: $(src)/spu_%
+	$(call if_changed,spu_objcopy)
+
+# create C code from ELF executable
+cmd_hexdump   = ( \
+		echo "/*" ; \
+		echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \
+		echo " * Hex-dump auto generated from $*.c." ; \
+		echo " * Do not edit!" ; \
+		echo " */" ; \
+		echo "static unsigned int $*_code[] " \
+			"__attribute__((__aligned__(128))) = {" ; \
+		hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \
+		echo "};" ; \
+		) > $@
+quiet_cmd_hexdump = HEXDUMP $@
+$(obj)/%_dump.h: $(obj)/%.bin
+	$(call if_changed,hexdump)
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
new file mode 100644
index 000000000..857580a78
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -0,0 +1,413 @@
+/* backing_ops.c - query/set operations on saved SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * These register operations allow SPUFS to operate on saved
+ * SPU contexts rather than hardware.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/poll.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+/*
+ * Reads/writes to various problem and priv2 registers require
+ * state changes, i.e.  generate SPU events, modify channel
+ * counts, etc.
+ */
+
+static void gen_spu_event(struct spu_context *ctx, u32 event)
+{
+	u64 ch0_cnt;
+	u64 ch0_data;
+	u64 ch1_data;
+
+	ch0_cnt = ctx->csa.spu_chnlcnt_RW[0];
+	ch0_data = ctx->csa.spu_chnldata_RW[0];
+	ch1_data = ctx->csa.spu_chnldata_RW[1];
+	ctx->csa.spu_chnldata_RW[0] |= event;
+	if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) {
+		ctx->csa.spu_chnlcnt_RW[0] = 1;
+	}
+}
+
+static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data)
+{
+	u32 mbox_stat;
+	int ret = 0;
+
+	spin_lock(&ctx->csa.register_lock);
+	mbox_stat = ctx->csa.prob.mb_stat_R;
+	if (mbox_stat & 0x0000ff) {
+		/* Read the first available word.
+		 * Implementation note: the depth
+		 * of pu_mb_R is currently 1.
+		 */
+		*data = ctx->csa.prob.pu_mb_R;
+		ctx->csa.prob.mb_stat_R &= ~(0x0000ff);
+		ctx->csa.spu_chnlcnt_RW[28] = 1;
+		gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT);
+		ret = 4;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static u32 spu_backing_mbox_stat_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.mb_stat_R;
+}
+
+static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx,
+					  __poll_t events)
+{
+	__poll_t ret;
+	u32 stat;
+
+	ret = 0;
+	spin_lock_irq(&ctx->csa.register_lock);
+	stat = ctx->csa.prob.mb_stat_R;
+
+	/* if the requested event is there, return the poll
+	   mask, otherwise enable the interrupt to get notified,
+	   but first mark any pending interrupts as done so
+	   we don't get woken up unnecessarily */
+
+	if (events & (EPOLLIN | EPOLLRDNORM)) {
+		if (stat & 0xff0000)
+			ret |= EPOLLIN | EPOLLRDNORM;
+		else {
+			ctx->csa.priv1.int_stat_class2_RW &=
+				~CLASS2_MAILBOX_INTR;
+			ctx->csa.priv1.int_mask_class2_RW |=
+				CLASS2_ENABLE_MAILBOX_INTR;
+		}
+	}
+	if (events & (EPOLLOUT | EPOLLWRNORM)) {
+		if (stat & 0x00ff00)
+			ret = EPOLLOUT | EPOLLWRNORM;
+		else {
+			ctx->csa.priv1.int_stat_class2_RW &=
+				~CLASS2_MAILBOX_THRESHOLD_INTR;
+			ctx->csa.priv1.int_mask_class2_RW |=
+				CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+		}
+	}
+	spin_unlock_irq(&ctx->csa.register_lock);
+	return ret;
+}
+
+static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.prob.mb_stat_R & 0xff0000) {
+		/* Read the first available word.
+		 * Implementation note: the depth
+		 * of puint_mb_R is currently 1.
+		 */
+		*data = ctx->csa.priv2.puint_mb_R;
+		ctx->csa.prob.mb_stat_R &= ~(0xff0000);
+		ctx->csa.spu_chnlcnt_RW[30] = 1;
+		gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt */
+		ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR;
+		ret = 0;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static int spu_backing_wbox_write(struct spu_context *ctx, u32 data)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) {
+		int slot = ctx->csa.spu_chnlcnt_RW[29];
+		int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8;
+
+		/* We have space to write wbox_data.
+		 * Implementation note: the depth
+		 * of spu_mb_W is currently 4.
+		 */
+		BUG_ON(avail != (4 - slot));
+		ctx->csa.spu_mailbox_data[slot] = data;
+		ctx->csa.spu_chnlcnt_RW[29] = ++slot;
+		ctx->csa.prob.mb_stat_R &= ~(0x00ff00);
+		ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8);
+		gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt when space
+		   becomes available */
+		ctx->csa.priv1.int_mask_class2_RW |=
+			CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+		ret = 0;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+	return ret;
+}
+
+static u32 spu_backing_signal1_read(struct spu_context *ctx)
+{
+	return ctx->csa.spu_chnldata_RW[3];
+}
+
+static void spu_backing_signal1_write(struct spu_context *ctx, u32 data)
+{
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.priv2.spu_cfg_RW & 0x1)
+		ctx->csa.spu_chnldata_RW[3] |= data;
+	else
+		ctx->csa.spu_chnldata_RW[3] = data;
+	ctx->csa.spu_chnlcnt_RW[3] = 1;
+	gen_spu_event(ctx, MFC_SIGNAL_1_EVENT);
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u32 spu_backing_signal2_read(struct spu_context *ctx)
+{
+	return ctx->csa.spu_chnldata_RW[4];
+}
+
+static void spu_backing_signal2_write(struct spu_context *ctx, u32 data)
+{
+	spin_lock(&ctx->csa.register_lock);
+	if (ctx->csa.priv2.spu_cfg_RW & 0x2)
+		ctx->csa.spu_chnldata_RW[4] |= data;
+	else
+		ctx->csa.spu_chnldata_RW[4] = data;
+	ctx->csa.spu_chnlcnt_RW[4] = 1;
+	gen_spu_event(ctx, MFC_SIGNAL_2_EVENT);
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+	u64 tmp;
+
+	spin_lock(&ctx->csa.register_lock);
+	tmp = ctx->csa.priv2.spu_cfg_RW;
+	if (val)
+		tmp |= 1;
+	else
+		tmp &= ~1;
+	ctx->csa.priv2.spu_cfg_RW = tmp;
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal1_type_get(struct spu_context *ctx)
+{
+	return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0);
+}
+
+static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+	u64 tmp;
+
+	spin_lock(&ctx->csa.register_lock);
+	tmp = ctx->csa.priv2.spu_cfg_RW;
+	if (val)
+		tmp |= 2;
+	else
+		tmp &= ~2;
+	ctx->csa.priv2.spu_cfg_RW = tmp;
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal2_type_get(struct spu_context *ctx)
+{
+	return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0);
+}
+
+static u32 spu_backing_npc_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_npc_RW;
+}
+
+static void spu_backing_npc_write(struct spu_context *ctx, u32 val)
+{
+	ctx->csa.prob.spu_npc_RW = val;
+}
+
+static u32 spu_backing_status_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_status_R;
+}
+
+static char *spu_backing_get_ls(struct spu_context *ctx)
+{
+	return ctx->csa.lscsa->ls;
+}
+
+static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val)
+{
+	ctx->csa.priv2.spu_privcntl_RW = val;
+}
+
+static u32 spu_backing_runcntl_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_runcntl_RW;
+}
+
+static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
+{
+	spin_lock(&ctx->csa.register_lock);
+	ctx->csa.prob.spu_runcntl_RW = val;
+	if (val & SPU_RUNCNTL_RUNNABLE) {
+		ctx->csa.prob.spu_status_R &=
+			~SPU_STATUS_STOPPED_BY_STOP &
+			~SPU_STATUS_STOPPED_BY_HALT &
+			~SPU_STATUS_SINGLE_STEP &
+			~SPU_STATUS_INVALID_INSTR &
+			~SPU_STATUS_INVALID_CH;
+		ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
+	} else {
+		ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
+	}
+	spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_runcntl_stop(struct spu_context *ctx)
+{
+	spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP);
+}
+
+static void spu_backing_master_start(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static void spu_backing_master_stop(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
+					u32 mode)
+{
+	struct spu_problem_collapsed *prob = &ctx->csa.prob;
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	if (prob->dma_querytype_RW)
+		goto out;
+	ret = 0;
+	/* FIXME: what are the side-effects of this? */
+	prob->dma_querymask_RW = mask;
+	prob->dma_querytype_RW = mode;
+	/* In the current implementation, the SPU context is always
+	 * acquired in runnable state when new bits are added to the
+	 * mask (tagwait), so it's sufficient just to mask
+	 * dma_tagstatus_R with the 'mask' parameter here.
+	 */
+	ctx->csa.prob.dma_tagstatus_R &= mask;
+out:
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return ctx->csa.prob.dma_tagstatus_R;
+}
+
+static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return ctx->csa.prob.dma_qstatus_R;
+}
+
+static int spu_backing_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	/* FIXME: set up priv2->puq */
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+	ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+}
+
+struct spu_context_ops spu_backing_ops = {
+	.mbox_read = spu_backing_mbox_read,
+	.mbox_stat_read = spu_backing_mbox_stat_read,
+	.mbox_stat_poll = spu_backing_mbox_stat_poll,
+	.ibox_read = spu_backing_ibox_read,
+	.wbox_write = spu_backing_wbox_write,
+	.signal1_read = spu_backing_signal1_read,
+	.signal1_write = spu_backing_signal1_write,
+	.signal2_read = spu_backing_signal2_read,
+	.signal2_write = spu_backing_signal2_write,
+	.signal1_type_set = spu_backing_signal1_type_set,
+	.signal1_type_get = spu_backing_signal1_type_get,
+	.signal2_type_set = spu_backing_signal2_type_set,
+	.signal2_type_get = spu_backing_signal2_type_get,
+	.npc_read = spu_backing_npc_read,
+	.npc_write = spu_backing_npc_write,
+	.status_read = spu_backing_status_read,
+	.get_ls = spu_backing_get_ls,
+	.privcntl_write = spu_backing_privcntl_write,
+	.runcntl_read = spu_backing_runcntl_read,
+	.runcntl_write = spu_backing_runcntl_write,
+	.runcntl_stop = spu_backing_runcntl_stop,
+	.master_start = spu_backing_master_start,
+	.master_stop = spu_backing_master_stop,
+	.set_mfc_query = spu_backing_set_mfc_query,
+	.read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_backing_get_mfc_free_elements,
+	.send_mfc_command = spu_backing_send_mfc_command,
+	.restart_dma = spu_backing_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
new file mode 100644
index 000000000..b500b1725
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -0,0 +1,188 @@
+/*
+ * SPU file system -- SPU context management
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include "spufs.h"
+#include "sputrace.h"
+
+
+atomic_t nr_spu_contexts = ATOMIC_INIT(0);
+
+struct spu_context *alloc_spu_context(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+
+	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+	if (!ctx)
+		goto out;
+	/* Binding to physical processor deferred
+	 * until spu_activate().
+	 */
+	if (spu_init_csa(&ctx->csa))
+		goto out_free;
+	spin_lock_init(&ctx->mmio_lock);
+	mutex_init(&ctx->mapping_lock);
+	kref_init(&ctx->kref);
+	mutex_init(&ctx->state_mutex);
+	mutex_init(&ctx->run_mutex);
+	init_waitqueue_head(&ctx->ibox_wq);
+	init_waitqueue_head(&ctx->wbox_wq);
+	init_waitqueue_head(&ctx->stop_wq);
+	init_waitqueue_head(&ctx->mfc_wq);
+	init_waitqueue_head(&ctx->run_wq);
+	ctx->state = SPU_STATE_SAVED;
+	ctx->ops = &spu_backing_ops;
+	ctx->owner = get_task_mm(current);
+	INIT_LIST_HEAD(&ctx->rq);
+	INIT_LIST_HEAD(&ctx->aff_list);
+	if (gang)
+		spu_gang_add_ctx(gang, ctx);
+
+	__spu_update_sched_info(ctx);
+	spu_set_timeslice(ctx);
+	ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
+	ctx->stats.tstamp = ktime_get_ns();
+
+	atomic_inc(&nr_spu_contexts);
+	goto out;
+out_free:
+	kfree(ctx);
+	ctx = NULL;
+out:
+	return ctx;
+}
+
+void destroy_spu_context(struct kref *kref)
+{
+	struct spu_context *ctx;
+	ctx = container_of(kref, struct spu_context, kref);
+	spu_context_nospu_trace(destroy_spu_context__enter, ctx);
+	mutex_lock(&ctx->state_mutex);
+	spu_deactivate(ctx);
+	mutex_unlock(&ctx->state_mutex);
+	spu_fini_csa(&ctx->csa);
+	if (ctx->gang)
+		spu_gang_remove_ctx(ctx->gang, ctx);
+	if (ctx->prof_priv_kref)
+		kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
+	BUG_ON(!list_empty(&ctx->rq));
+	atomic_dec(&nr_spu_contexts);
+	kfree(ctx->switch_log);
+	kfree(ctx);
+}
+
+struct spu_context * get_spu_context(struct spu_context *ctx)
+{
+	kref_get(&ctx->kref);
+	return ctx;
+}
+
+int put_spu_context(struct spu_context *ctx)
+{
+	return kref_put(&ctx->kref, &destroy_spu_context);
+}
+
+/* give up the mm reference when the context is about to be destroyed */
+void spu_forget(struct spu_context *ctx)
+{
+	struct mm_struct *mm;
+
+	/*
+	 * This is basically an open-coded spu_acquire_saved, except that
+	 * we don't acquire the state mutex interruptible, and we don't
+	 * want this context to be rescheduled on release.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->state != SPU_STATE_SAVED)
+		spu_deactivate(ctx);
+
+	mm = ctx->owner;
+	ctx->owner = NULL;
+	mmput(mm);
+	spu_release(ctx);
+}
+
+void spu_unmap_mappings(struct spu_context *ctx)
+{
+	mutex_lock(&ctx->mapping_lock);
+	if (ctx->local_store)
+		unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+	if (ctx->mfc)
+		unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1);
+	if (ctx->cntl)
+		unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1);
+	if (ctx->signal1)
+		unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+	if (ctx->signal2)
+		unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+	if (ctx->mss)
+		unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1);
+	if (ctx->psmap)
+		unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1);
+	mutex_unlock(&ctx->mapping_lock);
+}
+
+/**
+ * spu_acquire_saved - lock spu contex and make sure it is in saved state
+ * @ctx:	spu contex to lock
+ */
+int spu_acquire_saved(struct spu_context *ctx)
+{
+	int ret;
+
+	spu_context_nospu_trace(spu_acquire_saved__enter, ctx);
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	if (ctx->state != SPU_STATE_SAVED) {
+		set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
+		spu_deactivate(ctx);
+	}
+
+	return 0;
+}
+
+/**
+ * spu_release_saved - unlock spu context and return it to the runqueue
+ * @ctx:	context to unlock
+ */
+void spu_release_saved(struct spu_context *ctx)
+{
+	BUG_ON(ctx->state != SPU_STATE_SAVED);
+
+	if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags) &&
+			test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+		spu_activate(ctx, 0);
+
+	spu_release(ctx);
+}
+
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
new file mode 100644
index 000000000..84b7ac926
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -0,0 +1,214 @@
+/*
+ * SPU core dump code
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/elf.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
+
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+
+static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
+				size_t size, loff_t *off)
+{
+	u64 data;
+	int ret;
+
+	if (spufs_coredump_read[num].read)
+		return spufs_coredump_read[num].read(ctx, buffer, size, off);
+
+	data = spufs_coredump_read[num].get(ctx);
+	ret = snprintf(buffer, size, "0x%.16llx", data);
+	if (ret >= size)
+		return size;
+	return ++ret; /* count trailing NULL */
+}
+
+static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
+{
+	int i, sz, total = 0;
+	char *name;
+	char fullname[80];
+
+	for (i = 0; spufs_coredump_read[i].name != NULL; i++) {
+		name = spufs_coredump_read[i].name;
+		sz = spufs_coredump_read[i].size;
+
+		sprintf(fullname, "SPU/%d/%s", dfd, name);
+
+		total += sizeof(struct elf_note);
+		total += roundup(strlen(fullname) + 1, 4);
+		total += roundup(sz, 4);
+	}
+
+	return total;
+}
+
+static int match_context(const void *v, struct file *file, unsigned fd)
+{
+	struct spu_context *ctx;
+	if (file->f_op != &spufs_context_fops)
+		return 0;
+	ctx = SPUFS_I(file_inode(file))->i_ctx;
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		return 0;
+	return fd + 1;
+}
+
+/*
+ * The additional architecture-specific notes for Cell are various
+ * context files in the spu context.
+ *
+ * This function iterates over all open file descriptors and sees
+ * if they are a directory in spufs.  In that case we use spufs
+ * internal functionality to dump them without needing to actually
+ * open the files.
+ */
+/*
+ * descriptor table is not shared, so files can't change or go away.
+ */
+static struct spu_context *coredump_next_context(int *fd)
+{
+	struct file *file;
+	int n = iterate_fd(current->files, *fd, match_context, NULL);
+	if (!n)
+		return NULL;
+	*fd = n - 1;
+	file = fcheck(*fd);
+	return SPUFS_I(file_inode(file))->i_ctx;
+}
+
+int spufs_coredump_extra_notes_size(void)
+{
+	struct spu_context *ctx;
+	int size = 0, rc, fd;
+
+	fd = 0;
+	while ((ctx = coredump_next_context(&fd)) != NULL) {
+		rc = spu_acquire_saved(ctx);
+		if (rc)
+			break;
+		rc = spufs_ctx_note_size(ctx, fd);
+		spu_release_saved(ctx);
+		if (rc < 0)
+			break;
+
+		size += rc;
+
+		/* start searching the next fd next time */
+		fd++;
+	}
+
+	return size;
+}
+
+static int spufs_arch_write_note(struct spu_context *ctx, int i,
+				  struct coredump_params *cprm, int dfd)
+{
+	loff_t pos = 0;
+	int sz, rc, total = 0;
+	const int bufsz = PAGE_SIZE;
+	char *name;
+	char fullname[80], *buf;
+	struct elf_note en;
+	size_t skip;
+
+	buf = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	name = spufs_coredump_read[i].name;
+	sz = spufs_coredump_read[i].size;
+
+	sprintf(fullname, "SPU/%d/%s", dfd, name);
+	en.n_namesz = strlen(fullname) + 1;
+	en.n_descsz = sz;
+	en.n_type = NT_SPU;
+
+	if (!dump_emit(cprm, &en, sizeof(en)))
+		goto Eio;
+
+	if (!dump_emit(cprm, fullname, en.n_namesz))
+		goto Eio;
+
+	if (!dump_align(cprm, 4))
+		goto Eio;
+
+	do {
+		rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
+		if (rc > 0) {
+			if (!dump_emit(cprm, buf, rc))
+				goto Eio;
+			total += rc;
+		}
+	} while (rc == bufsz && total < sz);
+
+	if (rc < 0)
+		goto out;
+
+	skip = roundup(cprm->pos - total + sz, 4) - cprm->pos;
+	if (!dump_skip(cprm, skip))
+		goto Eio;
+
+	rc = 0;
+out:
+	free_page((unsigned long)buf);
+	return rc;
+Eio:
+	free_page((unsigned long)buf);
+	return -EIO;
+}
+
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+	struct spu_context *ctx;
+	int fd, j, rc;
+
+	fd = 0;
+	while ((ctx = coredump_next_context(&fd)) != NULL) {
+		rc = spu_acquire_saved(ctx);
+		if (rc)
+			return rc;
+
+		for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
+			rc = spufs_arch_write_note(ctx, j, cprm, fd);
+			if (rc) {
+				spu_release_saved(ctx);
+				return rc;
+			}
+		}
+
+		spu_release_saved(ctx);
+
+		/* start searching the next fd next time */
+		fd++;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644
index 000000000..83cf58daa
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,191 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched/signal.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/**
+ * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag.
+ *
+ * If the context was created with events, we just set the return event.
+ * Otherwise, send an appropriate signal to the process.
+ */
+static void spufs_handle_event(struct spu_context *ctx,
+				unsigned long ea, int type)
+{
+	siginfo_t info;
+
+	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+		ctx->event_return |= type;
+		wake_up_all(&ctx->stop_wq);
+		return;
+	}
+
+	clear_siginfo(&info);
+
+	switch (type) {
+	case SPE_EVENT_INVALID_DMA:
+		info.si_signo = SIGBUS;
+		info.si_code = BUS_OBJERR;
+		break;
+	case SPE_EVENT_SPE_DATA_STORAGE:
+		info.si_signo = SIGSEGV;
+		info.si_addr = (void __user *)ea;
+		info.si_code = SEGV_ACCERR;
+		ctx->ops->restart_dma(ctx);
+		break;
+	case SPE_EVENT_DMA_ALIGNMENT:
+		info.si_signo = SIGBUS;
+		/* DAR isn't set for an alignment fault :( */
+		info.si_code = BUS_ADRALN;
+		break;
+	case SPE_EVENT_SPE_ERROR:
+		info.si_signo = SIGILL;
+		info.si_addr = (void __user *)(unsigned long)
+			ctx->ops->npc_read(ctx) - 4;
+		info.si_code = ILL_ILLOPC;
+		break;
+	}
+
+	if (info.si_signo)
+		force_sig_info(info.si_signo, &info, current);
+}
+
+int spufs_handle_class0(struct spu_context *ctx)
+{
+	unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK;
+
+	if (likely(!stat))
+		return 0;
+
+	if (stat & CLASS0_DMA_ALIGNMENT_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_DMA_ALIGNMENT);
+
+	if (stat & CLASS0_INVALID_DMA_COMMAND_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_INVALID_DMA);
+
+	if (stat & CLASS0_SPU_ERROR_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_SPE_ERROR);
+
+	ctx->csa.class_0_pending = 0;
+
+	return -EIO;
+}
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+	u64 ea, dsisr, access;
+	unsigned long flags;
+	vm_fault_t flt = 0;
+	int ret;
+
+	/*
+	 * dar and dsisr get passed from the registers
+	 * to the spu_context, to this function, but not
+	 * back to the spu if it gets scheduled again.
+	 *
+	 * if we don't handle the fault for a saved context
+	 * in time, we can still expect to get the same fault
+	 * the immediately after the context restore.
+	 */
+	ea = ctx->csa.class_1_dar;
+	dsisr = ctx->csa.class_1_dsisr;
+
+	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+		return 0;
+
+	spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
+
+	pr_debug("ctx %p: ea %016llx, dsisr %016llx state %d\n", ctx, ea,
+		dsisr, ctx->state);
+
+	ctx->stats.hash_flt++;
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		ctx->spu->stats.hash_flt++;
+
+	/* we must not hold the lock when entering copro_handle_mm_fault */
+	spu_release(ctx);
+
+	access = (_PAGE_PRESENT | _PAGE_READ);
+	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
+	local_irq_save(flags);
+	ret = hash_page(ea, access, 0x300, dsisr);
+	local_irq_restore(flags);
+
+	/* hashing failed, so try the actual fault handler */
+	if (ret)
+		ret = copro_handle_mm_fault(current->mm, ea, dsisr, &flt);
+
+	/*
+	 * This is nasty: we need the state_mutex for all the bookkeeping even
+	 * if the syscall was interrupted by a signal. ewww.
+	 */
+	mutex_lock(&ctx->state_mutex);
+
+	/*
+	 * Clear dsisr under ctxt lock after handling the fault, so that
+	 * time slicing will not preempt the context while the page fault
+	 * handler is running. Context switch code removes mappings.
+	 */
+	ctx->csa.class_1_dar = ctx->csa.class_1_dsisr = 0;
+
+	/*
+	 * If we handled the fault successfully and are in runnable
+	 * state, restart the DMA.
+	 * In case of unhandled error report the problem to user space.
+	 */
+	if (!ret) {
+		if (flt & VM_FAULT_MAJOR)
+			ctx->stats.maj_flt++;
+		else
+			ctx->stats.min_flt++;
+		if (ctx->state == SPU_STATE_RUNNABLE) {
+			if (flt & VM_FAULT_MAJOR)
+				ctx->spu->stats.maj_flt++;
+			else
+				ctx->spu->stats.min_flt++;
+		}
+
+		if (ctx->spu)
+			ctx->ops->restart_dma(ctx);
+	} else
+		spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
new file mode 100644
index 000000000..d16adcd93
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -0,0 +1,2700 @@
+/*
+ * SPU file system -- file contents
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/export.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/ptrace.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/spu.h>
+#include <asm/spu_info.h>
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+#include "sputrace.h"
+
+#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
+
+/* Simple attribute files */
+struct spufs_attr {
+	int (*get)(void *, u64 *);
+	int (*set)(void *, u64);
+	char get_buf[24];       /* enough to store a u64 and "\n\0" */
+	char set_buf[24];
+	void *data;
+	const char *fmt;        /* format for read operation */
+	struct mutex mutex;     /* protects access to these buffers */
+};
+
+static int spufs_attr_open(struct inode *inode, struct file *file,
+		int (*get)(void *, u64 *), int (*set)(void *, u64),
+		const char *fmt)
+{
+	struct spufs_attr *attr;
+
+	attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return -ENOMEM;
+
+	attr->get = get;
+	attr->set = set;
+	attr->data = inode->i_private;
+	attr->fmt = fmt;
+	mutex_init(&attr->mutex);
+	file->private_data = attr;
+
+	return nonseekable_open(inode, file);
+}
+
+static int spufs_attr_release(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+	return 0;
+}
+
+static ssize_t spufs_attr_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct spufs_attr *attr;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+	if (!attr->get)
+		return -EACCES;
+
+	ret = mutex_lock_interruptible(&attr->mutex);
+	if (ret)
+		return ret;
+
+	if (*ppos) {		/* continued read */
+		size = strlen(attr->get_buf);
+	} else {		/* first read */
+		u64 val;
+		ret = attr->get(attr->data, &val);
+		if (ret)
+			goto out;
+
+		size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
+				 attr->fmt, (unsigned long long)val);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+out:
+	mutex_unlock(&attr->mutex);
+	return ret;
+}
+
+static ssize_t spufs_attr_write(struct file *file, const char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct spufs_attr *attr;
+	u64 val;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+	if (!attr->set)
+		return -EACCES;
+
+	ret = mutex_lock_interruptible(&attr->mutex);
+	if (ret)
+		return ret;
+
+	ret = -EFAULT;
+	size = min(sizeof(attr->set_buf) - 1, len);
+	if (copy_from_user(attr->set_buf, buf, size))
+		goto out;
+
+	ret = len; /* claim we got the whole input */
+	attr->set_buf[size] = '\0';
+	val = simple_strtol(attr->set_buf, NULL, 0);
+	attr->set(attr->data, val);
+out:
+	mutex_unlock(&attr->mutex);
+	return ret;
+}
+
+#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)	\
+static int __fops ## _open(struct inode *inode, struct file *file)	\
+{									\
+	__simple_attr_check_format(__fmt, 0ull);			\
+	return spufs_attr_open(inode, file, __get, __set, __fmt);	\
+}									\
+static const struct file_operations __fops = {				\
+	.open	 = __fops ## _open,					\
+	.release = spufs_attr_release,					\
+	.read	 = spufs_attr_read,					\
+	.write	 = spufs_attr_write,					\
+	.llseek  = generic_file_llseek,					\
+};
+
+
+static int
+spufs_mem_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->local_store = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->local_store = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t
+__spufs_mem_read(struct spu_context *ctx, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	char *local_store = ctx->ops->get_ls(ctx);
+	return simple_read_from_buffer(buffer, size, pos, local_store,
+					LS_SIZE);
+}
+
+static ssize_t
+spufs_mem_read(struct file *file, char __user *buffer,
+				size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_mem_read(ctx, buffer, size, pos);
+	spu_release(ctx);
+
+	return ret;
+}
+
+static ssize_t
+spufs_mem_write(struct file *file, const char __user *buffer,
+					size_t size, loff_t *ppos)
+{
+	struct spu_context *ctx = file->private_data;
+	char *local_store;
+	loff_t pos = *ppos;
+	int ret;
+
+	if (pos > LS_SIZE)
+		return -EFBIG;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	local_store = ctx->ops->get_ls(ctx);
+	size = simple_write_to_buffer(local_store, LS_SIZE, ppos, buffer, size);
+	spu_release(ctx);
+
+	return size;
+}
+
+static vm_fault_t
+spufs_mem_mmap_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct spu_context *ctx	= vma->vm_file->private_data;
+	unsigned long pfn, offset;
+	vm_fault_t ret;
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+	if (offset >= LS_SIZE)
+		return VM_FAULT_SIGBUS;
+
+	pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n",
+			vmf->address, offset);
+
+	if (spu_acquire(ctx))
+		return VM_FAULT_NOPAGE;
+
+	if (ctx->state == SPU_STATE_SAVED) {
+		vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+		pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset);
+	} else {
+		vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+		pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
+	}
+	ret = vmf_insert_pfn(vma, vmf->address, pfn);
+
+	spu_release(ctx);
+
+	return ret;
+}
+
+static int spufs_mem_mmap_access(struct vm_area_struct *vma,
+				unsigned long address,
+				void *buf, int len, int write)
+{
+	struct spu_context *ctx = vma->vm_file->private_data;
+	unsigned long offset = address - vma->vm_start;
+	char *local_store;
+
+	if (write && !(vma->vm_flags & VM_WRITE))
+		return -EACCES;
+	if (spu_acquire(ctx))
+		return -EINTR;
+	if ((offset + len) > vma->vm_end)
+		len = vma->vm_end - offset;
+	local_store = ctx->ops->get_ls(ctx);
+	if (write)
+		memcpy_toio(local_store + offset, buf, len);
+	else
+		memcpy_fromio(buf, local_store + offset, len);
+	spu_release(ctx);
+	return len;
+}
+
+static const struct vm_operations_struct spufs_mem_mmap_vmops = {
+	.fault = spufs_mem_mmap_fault,
+	.access = spufs_mem_mmap_access,
+};
+
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mem_mmap_vmops;
+	return 0;
+}
+
+static const struct file_operations spufs_mem_fops = {
+	.open			= spufs_mem_open,
+	.release		= spufs_mem_release,
+	.read			= spufs_mem_read,
+	.write			= spufs_mem_write,
+	.llseek			= generic_file_llseek,
+	.mmap			= spufs_mem_mmap,
+};
+
+static vm_fault_t spufs_ps_fault(struct vm_fault *vmf,
+				    unsigned long ps_offs,
+				    unsigned long ps_size)
+{
+	struct spu_context *ctx = vmf->vma->vm_file->private_data;
+	unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
+	int err = 0;
+	vm_fault_t ret = VM_FAULT_NOPAGE;
+
+	spu_context_nospu_trace(spufs_ps_fault__enter, ctx);
+
+	if (offset >= ps_size)
+		return VM_FAULT_SIGBUS;
+
+	if (fatal_signal_pending(current))
+		return VM_FAULT_SIGBUS;
+
+	/*
+	 * Because we release the mmap_sem, the context may be destroyed while
+	 * we're in spu_wait. Grab an extra reference so it isn't destroyed
+	 * in the meantime.
+	 */
+	get_spu_context(ctx);
+
+	/*
+	 * We have to wait for context to be loaded before we have
+	 * pages to hand out to the user, but we don't want to wait
+	 * with the mmap_sem held.
+	 * It is possible to drop the mmap_sem here, but then we need
+	 * to return VM_FAULT_NOPAGE because the mappings may have
+	 * hanged.
+	 */
+	if (spu_acquire(ctx))
+		goto refault;
+
+	if (ctx->state == SPU_STATE_SAVED) {
+		up_read(&current->mm->mmap_sem);
+		spu_context_nospu_trace(spufs_ps_fault__sleep, ctx);
+		err = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+		spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu);
+		down_read(&current->mm->mmap_sem);
+	} else {
+		area = ctx->spu->problem_phys + ps_offs;
+		ret = vmf_insert_pfn(vmf->vma, vmf->address,
+				(area + offset) >> PAGE_SHIFT);
+		spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
+	}
+
+	if (!err)
+		spu_release(ctx);
+
+refault:
+	put_spu_context(ctx);
+	return ret;
+}
+
+#if SPUFS_MMAP_4K
+static vm_fault_t spufs_cntl_mmap_fault(struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
+	.fault = spufs_cntl_mmap_fault,
+};
+
+/*
+ * mmap support for problem state control area [0x4000 - 0x4fff].
+ */
+static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_cntl_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_cntl_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_cntl_get(void *data, u64 *val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	*val = ctx->ops->status_read(ctx);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int spufs_cntl_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->runcntl_write(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int spufs_cntl_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->cntl = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return simple_attr_open(inode, file, spufs_cntl_get,
+					spufs_cntl_set, "0x%08lx");
+}
+
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	simple_attr_release(inode, file);
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->cntl = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_cntl_fops = {
+	.open = spufs_cntl_open,
+	.release = spufs_cntl_release,
+	.read = simple_attr_read,
+	.write = simple_attr_write,
+	.llseek	= generic_file_llseek,
+	.mmap = spufs_cntl_mmap,
+};
+
+static int
+spufs_regs_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	file->private_data = i->i_ctx;
+	return 0;
+}
+
+static ssize_t
+__spufs_regs_read(struct spu_context *ctx, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return simple_read_from_buffer(buffer, size, pos,
+				      lscsa->gprs, sizeof lscsa->gprs);
+}
+
+static ssize_t
+spufs_regs_read(struct file *file, char __user *buffer,
+		size_t size, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	/* pre-check for file position: if we'd return EOF, there's no point
+	 * causing a deschedule */
+	if (*pos >= sizeof(ctx->csa.lscsa->gprs))
+		return 0;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_regs_read(ctx, buffer, size, pos);
+	spu_release_saved(ctx);
+	return ret;
+}
+
+static ssize_t
+spufs_regs_write(struct file *file, const char __user *buffer,
+		 size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	if (*pos >= sizeof(lscsa->gprs))
+		return -EFBIG;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+
+	size = simple_write_to_buffer(lscsa->gprs, sizeof(lscsa->gprs), pos,
+					buffer, size);
+
+	spu_release_saved(ctx);
+	return size;
+}
+
+static const struct file_operations spufs_regs_fops = {
+	.open	 = spufs_regs_open,
+	.read    = spufs_regs_read,
+	.write   = spufs_regs_write,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t
+__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer,
+			size_t size, loff_t * pos)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return simple_read_from_buffer(buffer, size, pos,
+				      &lscsa->fpcr, sizeof(lscsa->fpcr));
+}
+
+static ssize_t
+spufs_fpcr_read(struct file *file, char __user * buffer,
+		size_t size, loff_t * pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_fpcr_read(ctx, buffer, size, pos);
+	spu_release_saved(ctx);
+	return ret;
+}
+
+static ssize_t
+spufs_fpcr_write(struct file *file, const char __user * buffer,
+		 size_t size, loff_t * pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	if (*pos >= sizeof(lscsa->fpcr))
+		return -EFBIG;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+
+	size = simple_write_to_buffer(&lscsa->fpcr, sizeof(lscsa->fpcr), pos,
+					buffer, size);
+
+	spu_release_saved(ctx);
+	return size;
+}
+
+static const struct file_operations spufs_fpcr_fops = {
+	.open = spufs_regs_open,
+	.read = spufs_fpcr_read,
+	.write = spufs_fpcr_write,
+	.llseek = generic_file_llseek,
+};
+
+/* generic open function for all pipe-like files */
+static int spufs_pipe_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	file->private_data = i->i_ctx;
+
+	return nonseekable_open(inode, file);
+}
+
+/*
+ * Read as many bytes from the mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ */
+static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 mbox_data, __user *udata;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	udata = (void __user *)buf;
+
+	count = spu_acquire(ctx);
+	if (count)
+		return count;
+
+	for (count = 0; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->mbox_read(ctx, &mbox_data);
+		if (ret == 0)
+			break;
+
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = __put_user(mbox_data, udata);
+		if (ret) {
+			if (!count)
+				count = -EFAULT;
+			break;
+		}
+	}
+	spu_release(ctx);
+
+	if (!count)
+		count = -EAGAIN;
+
+	return count;
+}
+
+static const struct file_operations spufs_mbox_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_mbox_read,
+	.llseek	= no_llseek,
+};
+
+static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 mbox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff;
+
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_mbox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_mbox_stat_read,
+	.llseek = no_llseek,
+};
+
+/* low-level ibox access function */
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data)
+{
+	return ctx->ops->ibox_read(ctx, data);
+}
+
+/* interrupt-level ibox callback function. */
+void spufs_ibox_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (ctx)
+		wake_up_all(&ctx->ibox_wq);
+}
+
+/*
+ * Read as many bytes from the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * any data is available, but return when we have been able to
+ * read something.
+ */
+static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 ibox_data, __user *udata;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	udata = (void __user *)buf;
+
+	count = spu_acquire(ctx);
+	if (count)
+		goto out;
+
+	/* wait only for the first element */
+	count = 0;
+	if (file->f_flags & O_NONBLOCK) {
+		if (!spu_ibox_read(ctx, &ibox_data)) {
+			count = -EAGAIN;
+			goto out_unlock;
+		}
+	} else {
+		count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+		if (count)
+			goto out;
+	}
+
+	/* if we can't write at all, return -EFAULT */
+	count = __put_user(ibox_data, udata);
+	if (count)
+		goto out_unlock;
+
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->ibox_read(ctx, &ibox_data);
+		if (ret == 0)
+			break;
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = __put_user(ibox_data, udata);
+		if (ret)
+			break;
+	}
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return count;
+}
+
+static __poll_t spufs_ibox_poll(struct file *file, poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	__poll_t mask;
+
+	poll_wait(file, &ctx->ibox_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	mask = ctx->ops->mbox_stat_poll(ctx, EPOLLIN | EPOLLRDNORM);
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_ibox_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_ibox_read,
+	.poll	= spufs_ibox_poll,
+	.llseek = no_llseek,
+};
+
+static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 ibox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff;
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_ibox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_ibox_stat_read,
+	.llseek = no_llseek,
+};
+
+/* low-level mailbox write */
+size_t spu_wbox_write(struct spu_context *ctx, u32 data)
+{
+	return ctx->ops->wbox_write(ctx, data);
+}
+
+/* interrupt-level wbox callback function. */
+void spufs_wbox_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (ctx)
+		wake_up_all(&ctx->wbox_wq);
+}
+
+/*
+ * Write as many bytes to the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - the mailbox is full
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * space is available, but return when we have been able to
+ * write something.
+ */
+static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 wbox_data, __user *udata;
+	ssize_t count;
+
+	if (len < 4)
+		return -EINVAL;
+
+	udata = (void __user *)buf;
+	if (!access_ok(VERIFY_READ, buf, len))
+		return -EFAULT;
+
+	if (__get_user(wbox_data, udata))
+		return -EFAULT;
+
+	count = spu_acquire(ctx);
+	if (count)
+		goto out;
+
+	/*
+	 * make sure we can at least write one element, by waiting
+	 * in case of !O_NONBLOCK
+	 */
+	count = 0;
+	if (file->f_flags & O_NONBLOCK) {
+		if (!spu_wbox_write(ctx, wbox_data)) {
+			count = -EAGAIN;
+			goto out_unlock;
+		}
+	} else {
+		count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+		if (count)
+			goto out;
+	}
+
+
+	/* write as much as possible */
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = __get_user(wbox_data, udata);
+		if (ret)
+			break;
+
+		ret = spu_wbox_write(ctx, wbox_data);
+		if (ret == 0)
+			break;
+	}
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return count;
+}
+
+static __poll_t spufs_wbox_poll(struct file *file, poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	__poll_t mask;
+
+	poll_wait(file, &ctx->wbox_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	mask = ctx->ops->mbox_stat_poll(ctx, EPOLLOUT | EPOLLWRNORM);
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_wbox_fops = {
+	.open	= spufs_pipe_open,
+	.write	= spufs_wbox_write,
+	.poll	= spufs_wbox_poll,
+	.llseek = no_llseek,
+};
+
+static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
+	u32 wbox_stat;
+
+	if (len < 4)
+		return -EINVAL;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff;
+	spu_release(ctx);
+
+	if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat))
+		return -EFAULT;
+
+	return 4;
+}
+
+static const struct file_operations spufs_wbox_stat_fops = {
+	.open	= spufs_pipe_open,
+	.read	= spufs_wbox_stat_read,
+	.llseek = no_llseek,
+};
+
+static int spufs_signal1_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->signal1 = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal1 = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret = 0;
+	u32 data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (ctx->csa.spu_chnlcnt_RW[3]) {
+		data = ctx->csa.spu_chnldata_RW[3];
+		ret = 4;
+	}
+
+	if (!ret)
+		goto out;
+
+	if (copy_to_user(buf, &data, 4))
+		return -EFAULT;
+
+out:
+	return ret;
+}
+
+static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_signal1_read(ctx, buf, len, pos);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx;
+	ssize_t ret;
+	u32 data;
+
+	ctx = file->private_data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 4))
+		return -EFAULT;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal1_write(ctx, data);
+	spu_release(ctx);
+
+	return 4;
+}
+
+static vm_fault_t
+spufs_signal1_mmap_fault(struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+	return spufs_ps_fault(vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal1_mmap_vmops = {
+	.fault = spufs_signal1_mmap_fault,
+};
+
+static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_signal1_mmap_vmops;
+	return 0;
+}
+
+static const struct file_operations spufs_signal1_fops = {
+	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
+	.read = spufs_signal1_read,
+	.write = spufs_signal1_write,
+	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal1_nosched_fops = {
+	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
+	.write = spufs_signal1_write,
+	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
+};
+
+static int spufs_signal2_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->signal2 = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal2 = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret = 0;
+	u32 data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (ctx->csa.spu_chnlcnt_RW[4]) {
+		data =  ctx->csa.spu_chnldata_RW[4];
+		ret = 4;
+	}
+
+	if (!ret)
+		goto out;
+
+	if (copy_to_user(buf, &data, 4))
+		return -EFAULT;
+
+out:
+	return ret;
+}
+
+static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_signal2_read(ctx, buf, len, pos);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx;
+	ssize_t ret;
+	u32 data;
+
+	ctx = file->private_data;
+
+	if (len < 4)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 4))
+		return -EFAULT;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal2_write(ctx, data);
+	spu_release(ctx);
+
+	return 4;
+}
+
+#if SPUFS_MMAP_4K
+static vm_fault_t
+spufs_signal2_mmap_fault(struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+	return spufs_ps_fault(vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal2_mmap_vmops = {
+	.fault = spufs_signal2_mmap_fault,
+};
+
+static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_signal2_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_signal2_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static const struct file_operations spufs_signal2_fops = {
+	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
+	.read = spufs_signal2_read,
+	.write = spufs_signal2_write,
+	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal2_nosched_fops = {
+	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
+	.write = spufs_signal2_write,
+	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
+};
+
+/*
+ * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the
+ * work of acquiring (or not) the SPU context before calling through
+ * to the actual get routine. The set routine is called directly.
+ */
+#define SPU_ATTR_NOACQUIRE	0
+#define SPU_ATTR_ACQUIRE	1
+#define SPU_ATTR_ACQUIRE_SAVED	2
+
+#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire)	\
+static int __##__get(void *data, u64 *val)				\
+{									\
+	struct spu_context *ctx = data;					\
+	int ret = 0;							\
+									\
+	if (__acquire == SPU_ATTR_ACQUIRE) {				\
+		ret = spu_acquire(ctx);					\
+		if (ret)						\
+			return ret;					\
+		*val = __get(ctx);					\
+		spu_release(ctx);					\
+	} else if (__acquire == SPU_ATTR_ACQUIRE_SAVED)	{		\
+		ret = spu_acquire_saved(ctx);				\
+		if (ret)						\
+			return ret;					\
+		*val = __get(ctx);					\
+		spu_release_saved(ctx);					\
+	} else								\
+		*val = __get(ctx);					\
+									\
+	return 0;							\
+}									\
+DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt);
+
+static int spufs_signal1_type_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal1_type_set(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_signal1_type_get(struct spu_context *ctx)
+{
+	return ctx->ops->signal1_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get,
+		       spufs_signal1_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+
+static int spufs_signal2_type_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal2_type_set(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_signal2_type_get(struct spu_context *ctx)
+{
+	return ctx->ops->signal2_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
+		       spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+#if SPUFS_MMAP_4K
+static vm_fault_t
+spufs_mss_mmap_fault(struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mss_mmap_vmops = {
+	.fault = spufs_mss_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mss_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mss_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mss_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	file->private_data = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!i->i_openers++)
+		ctx->mss = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mss = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_mss_fops = {
+	.open	 = spufs_mss_open,
+	.release = spufs_mss_release,
+	.mmap	 = spufs_mss_mmap,
+	.llseek  = no_llseek,
+};
+
+static vm_fault_t
+spufs_psmap_mmap_fault(struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vmf, 0x0000, SPUFS_PS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
+	.fault = spufs_psmap_mmap_fault,
+};
+
+/*
+ * mmap support for full problem state area [0x00000 - 0x1ffff].
+ */
+static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_psmap_mmap_vmops;
+	return 0;
+}
+
+static int spufs_psmap_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = i->i_ctx;
+	if (!i->i_openers++)
+		ctx->psmap = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->psmap = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_psmap_fops = {
+	.open	 = spufs_psmap_open,
+	.release = spufs_psmap_release,
+	.mmap	 = spufs_psmap_mmap,
+	.llseek  = no_llseek,
+};
+
+
+#if SPUFS_MMAP_4K
+static vm_fault_t
+spufs_mfc_mmap_fault(struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
+	.fault = spufs_mfc_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mfc_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mfc_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mfc_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	/* we don't want to deal with DMA into other processes */
+	if (ctx->owner != current->mm)
+		return -EINVAL;
+
+	if (atomic_read(&inode->i_count) != 1)
+		return -EBUSY;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->mfc = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mfc = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+/* interrupt-level mfc callback function. */
+void spufs_mfc_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (ctx)
+		wake_up_all(&ctx->mfc_wq);
+}
+
+static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status)
+{
+	/* See if there is one tag group is complete */
+	/* FIXME we need locking around tagwait */
+	*status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait;
+	ctx->tagwait &= ~*status;
+	if (*status)
+		return 1;
+
+	/* enable interrupt waiting for any tag group,
+	   may silently fail if interrupts are already enabled */
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+	return 0;
+}
+
+static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret = -EINVAL;
+	u32 status;
+
+	if (size != 4)
+		goto out;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	if (file->f_flags & O_NONBLOCK) {
+		status = ctx->ops->read_mfc_tagstatus(ctx);
+		if (!(status & ctx->tagwait))
+			ret = -EAGAIN;
+		else
+			/* XXX(hch): shouldn't we clear ret here? */
+			ctx->tagwait &= ~status;
+	} else {
+		ret = spufs_wait(ctx->mfc_wq,
+			   spufs_read_mfc_tagstatus(ctx, &status));
+		if (ret)
+			goto out;
+	}
+	spu_release(ctx);
+
+	ret = 4;
+	if (copy_to_user(buffer, &status, 4))
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
+static int spufs_check_valid_dma(struct mfc_dma_command *cmd)
+{
+	pr_debug("queueing DMA %x %llx %x %x %x\n", cmd->lsa,
+		 cmd->ea, cmd->size, cmd->tag, cmd->cmd);
+
+	switch (cmd->cmd) {
+	case MFC_PUT_CMD:
+	case MFC_PUTF_CMD:
+	case MFC_PUTB_CMD:
+	case MFC_GET_CMD:
+	case MFC_GETF_CMD:
+	case MFC_GETB_CMD:
+		break;
+	default:
+		pr_debug("invalid DMA opcode %x\n", cmd->cmd);
+		return -EIO;
+	}
+
+	if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) {
+		pr_debug("invalid DMA alignment, ea %llx lsa %x\n",
+				cmd->ea, cmd->lsa);
+		return -EIO;
+	}
+
+	switch (cmd->size & 0xf) {
+	case 1:
+		break;
+	case 2:
+		if (cmd->lsa & 1)
+			goto error;
+		break;
+	case 4:
+		if (cmd->lsa & 3)
+			goto error;
+		break;
+	case 8:
+		if (cmd->lsa & 7)
+			goto error;
+		break;
+	case 0:
+		if (cmd->lsa & 15)
+			goto error;
+		break;
+	error:
+	default:
+		pr_debug("invalid DMA alignment %x for size %x\n",
+			cmd->lsa & 0xf, cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->size > 16 * 1024) {
+		pr_debug("invalid DMA size %x\n", cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->tag & 0xfff0) {
+		/* we reserve the higher tag numbers for kernel use */
+		pr_debug("invalid DMA tag\n");
+		return -EIO;
+	}
+
+	if (cmd->class) {
+		/* not supported in this version */
+		pr_debug("invalid DMA class\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int spu_send_mfc_command(struct spu_context *ctx,
+				struct mfc_dma_command cmd,
+				int *error)
+{
+	*error = ctx->ops->send_mfc_command(ctx, &cmd);
+	if (*error == -EAGAIN) {
+		/* wait for any tag group to complete
+		   so we have space for the new command */
+		ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+		/* try again, because the queue might be
+		   empty again */
+		*error = ctx->ops->send_mfc_command(ctx, &cmd);
+		if (*error == -EAGAIN)
+			return 0;
+	}
+	return 1;
+}
+
+static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct mfc_dma_command cmd;
+	int ret = -EINVAL;
+
+	if (size != sizeof cmd)
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_from_user(&cmd, buffer, sizeof cmd))
+		goto out;
+
+	ret = spufs_check_valid_dma(&cmd);
+	if (ret)
+		goto out;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out;
+
+	ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+	if (ret)
+		goto out;
+
+	if (file->f_flags & O_NONBLOCK) {
+		ret = ctx->ops->send_mfc_command(ctx, &cmd);
+	} else {
+		int status;
+		ret = spufs_wait(ctx->mfc_wq,
+				 spu_send_mfc_command(ctx, cmd, &status));
+		if (ret)
+			goto out;
+		if (status)
+			ret = status;
+	}
+
+	if (ret)
+		goto out_unlock;
+
+	ctx->tagwait |= 1 << cmd.tag;
+	ret = size;
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return ret;
+}
+
+static __poll_t spufs_mfc_poll(struct file *file,poll_table *wait)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 free_elements, tagstatus;
+	__poll_t mask;
+
+	poll_wait(file, &ctx->mfc_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2);
+	free_elements = ctx->ops->get_mfc_free_elements(ctx);
+	tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+	spu_release(ctx);
+
+	mask = 0;
+	if (free_elements & 0xffff)
+		mask |= EPOLLOUT | EPOLLWRNORM;
+	if (tagstatus & ctx->tagwait)
+		mask |= EPOLLIN | EPOLLRDNORM;
+
+	pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
+		free_elements, tagstatus, ctx->tagwait);
+
+	return mask;
+}
+
+static int spufs_mfc_flush(struct file *file, fl_owner_t id)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out;
+#if 0
+/* this currently hangs */
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2));
+	if (ret)
+		goto out;
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
+	if (ret)
+		goto out;
+#else
+	ret = 0;
+#endif
+	spu_release(ctx);
+out:
+	return ret;
+}
+
+static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	struct inode *inode = file_inode(file);
+	int err = file_write_and_wait_range(file, start, end);
+	if (!err) {
+		inode_lock(inode);
+		err = spufs_mfc_flush(file, NULL);
+		inode_unlock(inode);
+	}
+	return err;
+}
+
+static const struct file_operations spufs_mfc_fops = {
+	.open	 = spufs_mfc_open,
+	.release = spufs_mfc_release,
+	.read	 = spufs_mfc_read,
+	.write	 = spufs_mfc_write,
+	.poll	 = spufs_mfc_poll,
+	.flush	 = spufs_mfc_flush,
+	.fsync	 = spufs_mfc_fsync,
+	.mmap	 = spufs_mfc_mmap,
+	.llseek  = no_llseek,
+};
+
+static int spufs_npc_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->npc_write(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static u64 spufs_npc_get(struct spu_context *ctx)
+{
+	return ctx->ops->npc_read(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE);
+
+static int spufs_decr_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->decr.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_decr_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->decr.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_decr_status_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	if (val)
+		ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
+	else
+		ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_decr_status_get(struct spu_context *ctx)
+{
+	if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
+		return SPU_DECR_STATUS_RUNNING;
+	else
+		return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
+		       spufs_decr_status_set, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_event_mask_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->event_mask.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_event_mask_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->event_mask.slot[0];
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
+		       spufs_event_mask_set, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static u64 spufs_event_status_get(struct spu_context *ctx)
+{
+	struct spu_state *state = &ctx->csa;
+	u64 stat;
+	stat = state->spu_chnlcnt_RW[0];
+	if (stat)
+		return state->spu_chnldata_RW[0];
+	return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
+		       NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static int spufs_srr0_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->srr0.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_srr0_get(struct spu_context *ctx)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return lscsa->srr0.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static u64 spufs_id_get(struct spu_context *ctx)
+{
+	u64 num;
+
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		num = ctx->spu->number;
+	else
+		num = (unsigned int)-1;
+
+	return num;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE)
+
+static u64 spufs_object_id_get(struct spu_context *ctx)
+{
+	/* FIXME: Should there really be no locking here? */
+	return ctx->object_id;
+}
+
+static int spufs_object_id_set(void *data, u64 id)
+{
+	struct spu_context *ctx = data;
+	ctx->object_id = id;
+
+	return 0;
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get,
+		       spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE);
+
+static u64 spufs_lslr_get(struct spu_context *ctx)
+{
+	return ctx->csa.priv2.spu_lslr_RW;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_info_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+	file->private_data = ctx;
+	return 0;
+}
+
+static int spufs_caps_show(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	if (!(ctx->flags & SPU_CREATE_NOSCHED))
+		seq_puts(s, "sched\n");
+	if (!(ctx->flags & SPU_CREATE_ISOLATE))
+		seq_puts(s, "step\n");
+	return 0;
+}
+
+static int spufs_caps_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_caps_fops = {
+	.open		= spufs_caps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	u32 data;
+
+	/* EOF if there's no entry in the mbox */
+	if (!(ctx->csa.prob.mb_stat_R & 0x0000ff))
+		return 0;
+
+	data = ctx->csa.prob.pu_mb_R;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 stat, data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	stat = ctx->csa.prob.mb_stat_R;
+	data = ctx->csa.prob.pu_mb_R;
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	/* EOF if there's no entry in the mbox */
+	if (!(stat & 0x0000ff))
+		return 0;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof(data));
+}
+
+static const struct file_operations spufs_mbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_mbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_ibox_info_read(struct spu_context *ctx,
+				char __user *buf, size_t len, loff_t *pos)
+{
+	u32 data;
+
+	/* EOF if there's no entry in the ibox */
+	if (!(ctx->csa.prob.mb_stat_R & 0xff0000))
+		return 0;
+
+	data = ctx->csa.priv2.puint_mb_R;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 stat, data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	stat = ctx->csa.prob.mb_stat_R;
+	data = ctx->csa.priv2.puint_mb_R;
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	/* EOF if there's no entry in the ibox */
+	if (!(stat & 0xff0000))
+		return 0;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof(data));
+}
+
+static const struct file_operations spufs_ibox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_ibox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static size_t spufs_wbox_info_cnt(struct spu_context *ctx)
+{
+	return (4 - ((ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8)) * sizeof(u32);
+}
+
+static ssize_t __spufs_wbox_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	int i, cnt;
+	u32 data[4];
+	u32 wbox_stat;
+
+	wbox_stat = ctx->csa.prob.mb_stat_R;
+	cnt = spufs_wbox_info_cnt(ctx);
+	for (i = 0; i < cnt; i++) {
+		data[i] = ctx->csa.spu_mailbox_data[i];
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &data,
+				cnt * sizeof(u32));
+}
+
+static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	u32 data[ARRAY_SIZE(ctx->csa.spu_mailbox_data)];
+	int ret, count;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	count = spufs_wbox_info_cnt(ctx);
+	memcpy(&data, &ctx->csa.spu_mailbox_data, sizeof(data));
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return simple_read_from_buffer(buf, len, pos, &data,
+				count * sizeof(u32));
+}
+
+static const struct file_operations spufs_wbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_wbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static void spufs_get_dma_info(struct spu_context *ctx,
+		struct spu_dma_info *info)
+{
+	int i;
+
+	info->dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+	info->dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+	info->dma_info_status = ctx->csa.spu_chnldata_RW[24];
+	info->dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+	info->dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+	for (i = 0; i < 16; i++) {
+		struct mfc_cq_sr *qp = &info->dma_info_command_data[i];
+		struct mfc_cq_sr *spuqp = &ctx->csa.priv2.spuq[i];
+
+		qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
+	}
+}
+
+static ssize_t __spufs_dma_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	struct spu_dma_info info;
+
+	spufs_get_dma_info(ctx, &info);
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof info);
+}
+
+static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_dma_info info;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	spufs_get_dma_info(ctx, &info);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof(info));
+}
+
+static const struct file_operations spufs_dma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_dma_info_read,
+	.llseek = no_llseek,
+};
+
+static void spufs_get_proxydma_info(struct spu_context *ctx,
+		struct spu_proxydma_info *info)
+{
+	int i;
+
+	info->proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+	info->proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+	info->proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
+
+	for (i = 0; i < 8; i++) {
+		struct mfc_cq_sr *qp = &info->proxydma_info_command_data[i];
+		struct mfc_cq_sr *puqp = &ctx->csa.priv2.puq[i];
+
+		qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
+	}
+}
+
+static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	struct spu_proxydma_info info;
+	int ret = sizeof info;
+
+	if (len < ret)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	spufs_get_proxydma_info(ctx, &info);
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof info);
+}
+
+static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct spu_proxydma_info info;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	spufs_get_proxydma_info(ctx, &info);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof(info));
+}
+
+static const struct file_operations spufs_proxydma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_proxydma_info_read,
+	.llseek = no_llseek,
+};
+
+static int spufs_show_tid(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	seq_printf(s, "%d\n", ctx->tid);
+	return 0;
+}
+
+static int spufs_tid_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_tid_fops = {
+	.open		= spufs_tid_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const char *ctx_state_names[] = {
+	"user", "system", "iowait", "loaded"
+};
+
+static unsigned long long spufs_acct_time(struct spu_context *ctx,
+		enum spu_utilization_state state)
+{
+	unsigned long long time = ctx->stats.times[state];
+
+	/*
+	 * In general, utilization statistics are updated by the controlling
+	 * thread as the spu context moves through various well defined
+	 * state transitions, but if the context is lazily loaded its
+	 * utilization statistics are not updated as the controlling thread
+	 * is not tightly coupled with the execution of the spu context.  We
+	 * calculate and apply the time delta from the last recorded state
+	 * of the spu context.
+	 */
+	if (ctx->spu && ctx->stats.util_state == state) {
+		time += ktime_get_ns() - ctx->stats.tstamp;
+	}
+
+	return time / NSEC_PER_MSEC;
+}
+
+static unsigned long long spufs_slb_flts(struct spu_context *ctx)
+{
+	unsigned long long slb_flts = ctx->stats.slb_flt;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		slb_flts += (ctx->spu->stats.slb_flt -
+			     ctx->stats.slb_flt_base);
+	}
+
+	return slb_flts;
+}
+
+static unsigned long long spufs_class2_intrs(struct spu_context *ctx)
+{
+	unsigned long long class2_intrs = ctx->stats.class2_intr;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		class2_intrs += (ctx->spu->stats.class2_intr -
+				 ctx->stats.class2_intr_base);
+	}
+
+	return class2_intrs;
+}
+
+
+static int spufs_show_stat(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		ctx_state_names[ctx->stats.util_state],
+		spufs_acct_time(ctx, SPU_UTIL_USER),
+		spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
+		spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
+		spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
+		ctx->stats.vol_ctx_switch,
+		ctx->stats.invol_ctx_switch,
+		spufs_slb_flts(ctx),
+		ctx->stats.hash_flt,
+		ctx->stats.min_flt,
+		ctx->stats.maj_flt,
+		spufs_class2_intrs(ctx),
+		ctx->stats.libassist);
+	spu_release(ctx);
+	return 0;
+}
+
+static int spufs_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_stat_fops = {
+	.open		= spufs_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static inline int spufs_switch_log_used(struct spu_context *ctx)
+{
+	return (ctx->switch_log->head - ctx->switch_log->tail) %
+		SWITCH_LOG_BUFSIZE;
+}
+
+static inline int spufs_switch_log_avail(struct spu_context *ctx)
+{
+	return SWITCH_LOG_BUFSIZE - spufs_switch_log_used(ctx);
+}
+
+static int spufs_switch_log_open(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int rc;
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	if (ctx->switch_log) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	ctx->switch_log = kmalloc(sizeof(struct switch_log) +
+		SWITCH_LOG_BUFSIZE * sizeof(struct switch_log_entry),
+		GFP_KERNEL);
+
+	if (!ctx->switch_log) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	ctx->switch_log->head = ctx->switch_log->tail = 0;
+	init_waitqueue_head(&ctx->switch_log->wait);
+	rc = 0;
+
+out:
+	spu_release(ctx);
+	return rc;
+}
+
+static int spufs_switch_log_release(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int rc;
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	kfree(ctx->switch_log);
+	ctx->switch_log = NULL;
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
+{
+	struct switch_log_entry *p;
+
+	p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
+
+	return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n",
+			(unsigned long long) p->tstamp.tv_sec,
+			(unsigned int) p->tstamp.tv_nsec,
+			p->spu_id,
+			(unsigned int) p->type,
+			(unsigned int) p->val,
+			(unsigned long long) p->timebase);
+}
+
+static ssize_t spufs_switch_log_read(struct file *file, char __user *buf,
+			     size_t len, loff_t *ppos)
+{
+	struct inode *inode = file_inode(file);
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int error = 0, cnt = 0;
+
+	if (!buf)
+		return -EINVAL;
+
+	error = spu_acquire(ctx);
+	if (error)
+		return error;
+
+	while (cnt < len) {
+		char tbuf[128];
+		int width;
+
+		if (spufs_switch_log_used(ctx) == 0) {
+			if (cnt > 0) {
+				/* If there's data ready to go, we can
+				 * just return straight away */
+				break;
+
+			} else if (file->f_flags & O_NONBLOCK) {
+				error = -EAGAIN;
+				break;
+
+			} else {
+				/* spufs_wait will drop the mutex and
+				 * re-acquire, but since we're in read(), the
+				 * file cannot be _released (and so
+				 * ctx->switch_log is stable).
+				 */
+				error = spufs_wait(ctx->switch_log->wait,
+						spufs_switch_log_used(ctx) > 0);
+
+				/* On error, spufs_wait returns without the
+				 * state mutex held */
+				if (error)
+					return error;
+
+				/* We may have had entries read from underneath
+				 * us while we dropped the mutex in spufs_wait,
+				 * so re-check */
+				if (spufs_switch_log_used(ctx) == 0)
+					continue;
+			}
+		}
+
+		width = switch_log_sprint(ctx, tbuf, sizeof(tbuf));
+		if (width < len)
+			ctx->switch_log->tail =
+				(ctx->switch_log->tail + 1) %
+				 SWITCH_LOG_BUFSIZE;
+		else
+			/* If the record is greater than space available return
+			 * partial buffer (so far) */
+			break;
+
+		error = copy_to_user(buf + cnt, tbuf, width);
+		if (error)
+			break;
+		cnt += width;
+	}
+
+	spu_release(ctx);
+
+	return cnt == 0 ? error : cnt;
+}
+
+static __poll_t spufs_switch_log_poll(struct file *file, poll_table *wait)
+{
+	struct inode *inode = file_inode(file);
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	__poll_t mask = 0;
+	int rc;
+
+	poll_wait(file, &ctx->switch_log->wait, wait);
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	if (spufs_switch_log_used(ctx) > 0)
+		mask |= EPOLLIN;
+
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_switch_log_fops = {
+	.open		= spufs_switch_log_open,
+	.read		= spufs_switch_log_read,
+	.poll		= spufs_switch_log_poll,
+	.release	= spufs_switch_log_release,
+	.llseek		= no_llseek,
+};
+
+/**
+ * Log a context switch event to a switch log reader.
+ *
+ * Must be called with ctx->state_mutex held.
+ */
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+		u32 type, u32 val)
+{
+	if (!ctx->switch_log)
+		return;
+
+	if (spufs_switch_log_avail(ctx) > 1) {
+		struct switch_log_entry *p;
+
+		p = ctx->switch_log->log + ctx->switch_log->head;
+		ktime_get_ts64(&p->tstamp);
+		p->timebase = get_tb();
+		p->spu_id = spu ? spu->number : -1;
+		p->type = type;
+		p->val = val;
+
+		ctx->switch_log->head =
+			(ctx->switch_log->head + 1) % SWITCH_LOG_BUFSIZE;
+	}
+
+	wake_up(&ctx->switch_log->wait);
+}
+
+static int spufs_show_ctx(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+	u64 mfc_control_RW;
+
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->spu) {
+		struct spu *spu = ctx->spu;
+		struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+		spin_lock_irq(&spu->register_lock);
+		mfc_control_RW = in_be64(&priv2->mfc_control_RW);
+		spin_unlock_irq(&spu->register_lock);
+	} else {
+		struct spu_state *csa = &ctx->csa;
+
+		mfc_control_RW = csa->priv2.mfc_control_RW;
+	}
+
+	seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)"
+		" %c %llx %llx %llx %llx %x %x\n",
+		ctx->state == SPU_STATE_SAVED ? 'S' : 'R',
+		ctx->flags,
+		ctx->sched_flags,
+		ctx->prio,
+		ctx->time_slice,
+		ctx->spu ? ctx->spu->number : -1,
+		!list_empty(&ctx->rq) ? 'q' : ' ',
+		ctx->csa.class_0_pending,
+		ctx->csa.class_0_dar,
+		ctx->csa.class_1_dsisr,
+		mfc_control_RW,
+		ctx->ops->runcntl_read(ctx),
+		ctx->ops->status_read(ctx));
+
+	mutex_unlock(&ctx->state_mutex);
+
+	return 0;
+}
+
+static int spufs_ctx_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_ctx, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_ctx_fops = {
+	.open           = spufs_ctx_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+const struct spufs_tree_descr spufs_dir_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
+	{ "mem",  &spufs_mem_fops,  0666, LS_SIZE, },
+	{ "regs", &spufs_regs_fops,  0666, sizeof(struct spu_reg128[128]), },
+	{ "mbox", &spufs_mbox_fops, 0444, },
+	{ "ibox", &spufs_ibox_fops, 0444, },
+	{ "wbox", &spufs_wbox_fops, 0222, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+	{ "signal1", &spufs_signal1_fops, 0666, },
+	{ "signal2", &spufs_signal2_fops, 0666, },
+	{ "signal1_type", &spufs_signal1_type, 0666, },
+	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "fpcr", &spufs_fpcr_fops, 0666, sizeof(struct spu_reg128), },
+	{ "lslr", &spufs_lslr_ops, 0444, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "srr0", &spufs_srr0_ops, 0666, },
+	{ "decr", &spufs_decr_ops, 0666, },
+	{ "decr_status", &spufs_decr_status_ops, 0666, },
+	{ "event_mask", &spufs_event_mask_ops, 0666, },
+	{ "event_status", &spufs_event_status_ops, 0444, },
+	{ "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "mbox_info", &spufs_mbox_info_fops, 0444, sizeof(u32), },
+	{ "ibox_info", &spufs_ibox_info_fops, 0444, sizeof(u32), },
+	{ "wbox_info", &spufs_wbox_info_fops, 0444, sizeof(u32), },
+	{ "dma_info", &spufs_dma_info_fops, 0444,
+		sizeof(struct spu_dma_info), },
+	{ "proxydma_info", &spufs_proxydma_info_fops, 0444,
+		sizeof(struct spu_proxydma_info)},
+	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
+	{ "switch_log", &spufs_switch_log_fops, 0444 },
+	{},
+};
+
+const struct spufs_tree_descr spufs_dir_nosched_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
+	{ "mem",  &spufs_mem_fops,  0666, LS_SIZE, },
+	{ "mbox", &spufs_mbox_fops, 0444, },
+	{ "ibox", &spufs_ibox_fops, 0444, },
+	{ "wbox", &spufs_wbox_fops, 0222, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+	{ "signal1", &spufs_signal1_nosched_fops, 0222, },
+	{ "signal2", &spufs_signal2_nosched_fops, 0222, },
+	{ "signal1_type", &spufs_signal1_type, 0666, },
+	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
+	{},
+};
+
+const struct spufs_tree_descr spufs_dir_debug_contents[] = {
+	{ ".ctx", &spufs_ctx_fops, 0444, },
+	{},
+};
+
+const struct spufs_coredump_reader spufs_coredump_read[] = {
+	{ "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])},
+	{ "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) },
+	{ "lslr", NULL, spufs_lslr_get, 19 },
+	{ "decr", NULL, spufs_decr_get, 19 },
+	{ "decr_status", NULL, spufs_decr_status_get, 19 },
+	{ "mem", __spufs_mem_read, NULL, LS_SIZE, },
+	{ "signal1", __spufs_signal1_read, NULL, sizeof(u32) },
+	{ "signal1_type", NULL, spufs_signal1_type_get, 19 },
+	{ "signal2", __spufs_signal2_read, NULL, sizeof(u32) },
+	{ "signal2_type", NULL, spufs_signal2_type_get, 19 },
+	{ "event_mask", NULL, spufs_event_mask_get, 19 },
+	{ "event_status", NULL, spufs_event_status_get, 19 },
+	{ "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) },
+	{ "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) },
+	{ "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)},
+	{ "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)},
+	{ "proxydma_info", __spufs_proxydma_info_read,
+			   NULL, sizeof(struct spu_proxydma_info)},
+	{ "object-id", NULL, spufs_object_id_get, 19 },
+	{ "npc", NULL, spufs_npc_get, 19 },
+	{ NULL },
+};
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c
new file mode 100644
index 000000000..71a443253
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -0,0 +1,87 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "spufs.h"
+
+struct spu_gang *alloc_spu_gang(void)
+{
+	struct spu_gang *gang;
+
+	gang = kzalloc(sizeof *gang, GFP_KERNEL);
+	if (!gang)
+		goto out;
+
+	kref_init(&gang->kref);
+	mutex_init(&gang->mutex);
+	mutex_init(&gang->aff_mutex);
+	INIT_LIST_HEAD(&gang->list);
+	INIT_LIST_HEAD(&gang->aff_list_head);
+
+out:
+	return gang;
+}
+
+static void destroy_spu_gang(struct kref *kref)
+{
+	struct spu_gang *gang;
+	gang = container_of(kref, struct spu_gang, kref);
+	WARN_ON(gang->contexts || !list_empty(&gang->list));
+	kfree(gang);
+}
+
+struct spu_gang *get_spu_gang(struct spu_gang *gang)
+{
+	kref_get(&gang->kref);
+	return gang;
+}
+
+int put_spu_gang(struct spu_gang *gang)
+{
+	return kref_put(&gang->kref, &destroy_spu_gang);
+}
+
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	ctx->gang = get_spu_gang(gang);
+	list_add(&ctx->gang_list, &gang->list);
+	gang->contexts++;
+	mutex_unlock(&gang->mutex);
+}
+
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	WARN_ON(ctx->gang != gang);
+	if (!list_empty(&ctx->aff_list)) {
+		list_del_init(&ctx->aff_list);
+		gang->aff_flags &= ~AFF_OFFSETS_SET;
+	}
+	list_del_init(&ctx->gang_list);
+	gang->contexts--;
+	mutex_unlock(&gang->mutex);
+
+	put_spu_gang(gang);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
new file mode 100644
index 000000000..ae9d24d31
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -0,0 +1,348 @@
+/* hw_ops.c - query/set operations on active SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+static int spu_hw_mbox_read(struct spu_context *ctx, u32 * data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mbox_stat;
+	int ret = 0;
+
+	spin_lock_irq(&spu->register_lock);
+	mbox_stat = in_be32(&prob->mb_stat_R);
+	if (mbox_stat & 0x0000ff) {
+		*data = in_be32(&prob->pu_mb_R);
+		ret = 4;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static u32 spu_hw_mbox_stat_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->mb_stat_R);
+}
+
+static __poll_t spu_hw_mbox_stat_poll(struct spu_context *ctx, __poll_t events)
+{
+	struct spu *spu = ctx->spu;
+	__poll_t ret = 0;
+	u32 stat;
+
+	spin_lock_irq(&spu->register_lock);
+	stat = in_be32(&spu->problem->mb_stat_R);
+
+	/* if the requested event is there, return the poll
+	   mask, otherwise enable the interrupt to get notified,
+	   but first mark any pending interrupts as done so
+	   we don't get woken up unnecessarily */
+
+	if (events & (EPOLLIN | EPOLLRDNORM)) {
+		if (stat & 0xff0000)
+			ret |= EPOLLIN | EPOLLRDNORM;
+		else {
+			spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
+			spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		}
+	}
+	if (events & (EPOLLOUT | EPOLLWRNORM)) {
+		if (stat & 0x00ff00)
+			ret = EPOLLOUT | EPOLLWRNORM;
+		else {
+			spu_int_stat_clear(spu, 2,
+					CLASS2_MAILBOX_THRESHOLD_INTR);
+			spu_int_mask_or(spu, 2,
+					CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+		}
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int ret;
+
+	spin_lock_irq(&spu->register_lock);
+	if (in_be32(&prob->mb_stat_R) & 0xff0000) {
+		/* read the first available word */
+		*data = in_be64(&priv2->puint_mb_R);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt */
+		spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		ret = 0;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static int spu_hw_wbox_write(struct spu_context *ctx, u32 data)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_problem __iomem *prob = spu->problem;
+	int ret;
+
+	spin_lock_irq(&spu->register_lock);
+	if (in_be32(&prob->mb_stat_R) & 0x00ff00) {
+		/* we have space to write wbox_data to */
+		out_be32(&prob->spu_mb_W, data);
+		ret = 4;
+	} else {
+		/* make sure we get woken up by the interrupt when space
+		   becomes available */
+		spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+		ret = 0;
+	}
+	spin_unlock_irq(&spu->register_lock);
+	return ret;
+}
+
+static void spu_hw_signal1_write(struct spu_context *ctx, u32 data)
+{
+	out_be32(&ctx->spu->problem->signal_notify1, data);
+}
+
+static void spu_hw_signal2_write(struct spu_context *ctx, u32 data)
+{
+	out_be32(&ctx->spu->problem->signal_notify2, data);
+}
+
+static void spu_hw_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 tmp;
+
+	spin_lock_irq(&spu->register_lock);
+	tmp = in_be64(&priv2->spu_cfg_RW);
+	if (val)
+		tmp |= 1;
+	else
+		tmp &= ~1;
+	out_be64(&priv2->spu_cfg_RW, tmp);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal1_type_get(struct spu_context *ctx)
+{
+	return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0);
+}
+
+static void spu_hw_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 tmp;
+
+	spin_lock_irq(&spu->register_lock);
+	tmp = in_be64(&priv2->spu_cfg_RW);
+	if (val)
+		tmp |= 2;
+	else
+		tmp &= ~2;
+	out_be64(&priv2->spu_cfg_RW, tmp);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal2_type_get(struct spu_context *ctx)
+{
+	return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0);
+}
+
+static u32 spu_hw_npc_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_npc_RW);
+}
+
+static void spu_hw_npc_write(struct spu_context *ctx, u32 val)
+{
+	out_be32(&ctx->spu->problem->spu_npc_RW, val);
+}
+
+static u32 spu_hw_status_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_status_R);
+}
+
+static char *spu_hw_get_ls(struct spu_context *ctx)
+{
+	return ctx->spu->local_store;
+}
+
+static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val)
+{
+	out_be64(&ctx->spu->priv2->spu_privcntl_RW, val);
+}
+
+static u32 spu_hw_runcntl_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_runcntl_RW);
+}
+
+static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val)
+{
+	spin_lock_irq(&ctx->spu->register_lock);
+	if (val & SPU_RUNCNTL_ISOLATE)
+		spu_hw_privcntl_write(ctx,
+			SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK);
+	out_be32(&ctx->spu->problem->spu_runcntl_RW, val);
+	spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_runcntl_stop(struct spu_context *ctx)
+{
+	spin_lock_irq(&ctx->spu->register_lock);
+	out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+	while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING)
+		cpu_relax();
+	spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_master_start(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static void spu_hw_master_stop(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode)
+{
+	struct spu_problem __iomem *prob = ctx->spu->problem;
+	int ret;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	ret = -EAGAIN;
+	if (in_be32(&prob->dma_querytype_RW))
+		goto out;
+	ret = 0;
+	out_be32(&prob->dma_querymask_RW, mask);
+	out_be32(&prob->dma_querytype_RW, mode);
+out:
+	spin_unlock_irq(&ctx->spu->register_lock);
+	return ret;
+}
+
+static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_tagstatus_R);
+}
+
+static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_qstatus_R);
+}
+
+static int spu_hw_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	u32 status;
+	struct spu_problem __iomem *prob = ctx->spu->problem;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	out_be32(&prob->mfc_lsa_W, cmd->lsa);
+	out_be64(&prob->mfc_ea_W, cmd->ea);
+	out_be32(&prob->mfc_union_W.by32.mfc_size_tag32,
+				cmd->size << 16 | cmd->tag);
+	out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32,
+				cmd->class << 16 | cmd->cmd);
+	status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+	spin_unlock_irq(&ctx->spu->register_lock);
+
+	switch (status & 0xffff) {
+	case 0:
+		return 0;
+	case 2:
+		return -EAGAIN;
+	default:
+		return -EINVAL;
+	}
+}
+
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+	struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
+struct spu_context_ops spu_hw_ops = {
+	.mbox_read = spu_hw_mbox_read,
+	.mbox_stat_read = spu_hw_mbox_stat_read,
+	.mbox_stat_poll = spu_hw_mbox_stat_poll,
+	.ibox_read = spu_hw_ibox_read,
+	.wbox_write = spu_hw_wbox_write,
+	.signal1_write = spu_hw_signal1_write,
+	.signal2_write = spu_hw_signal2_write,
+	.signal1_type_set = spu_hw_signal1_type_set,
+	.signal1_type_get = spu_hw_signal1_type_get,
+	.signal2_type_set = spu_hw_signal2_type_set,
+	.signal2_type_get = spu_hw_signal2_type_get,
+	.npc_read = spu_hw_npc_read,
+	.npc_write = spu_hw_npc_write,
+	.status_read = spu_hw_status_read,
+	.get_ls = spu_hw_get_ls,
+	.privcntl_write = spu_hw_privcntl_write,
+	.runcntl_read = spu_hw_runcntl_read,
+	.runcntl_write = spu_hw_runcntl_write,
+	.runcntl_stop = spu_hw_runcntl_stop,
+	.master_start = spu_hw_master_start,
+	.master_stop = spu_hw_master_stop,
+	.set_mfc_query = spu_hw_set_mfc_query,
+	.read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_hw_get_mfc_free_elements,
+	.send_mfc_command = spu_hw_send_mfc_command,
+	.restart_dma = spu_hw_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
new file mode 100644
index 000000000..db329d4bf
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -0,0 +1,829 @@
+
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/backing-dev.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/parser.h>
+
+#include <asm/prom.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+
+struct spufs_sb_info {
+	int debug;
+};
+
+static struct kmem_cache *spufs_inode_cache;
+char *isolated_loader;
+static int isolated_loader_size;
+
+static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static struct inode *
+spufs_alloc_inode(struct super_block *sb)
+{
+	struct spufs_inode_info *ei;
+
+	ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
+	if (!ei)
+		return NULL;
+
+	ei->i_gang = NULL;
+	ei->i_ctx = NULL;
+	ei->i_openers = 0;
+
+	return &ei->vfs_inode;
+}
+
+static void spufs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
+}
+
+static void spufs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, spufs_i_callback);
+}
+
+static void
+spufs_init_once(void *p)
+{
+	struct spufs_inode_info *ei = p;
+
+	inode_init_once(&ei->vfs_inode);
+}
+
+static struct inode *
+spufs_new_inode(struct super_block *sb, umode_t mode)
+{
+	struct inode *inode;
+
+	inode = new_inode(sb);
+	if (!inode)
+		goto out;
+
+	inode->i_ino = get_next_ino();
+	inode->i_mode = mode;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+out:
+	return inode;
+}
+
+static int
+spufs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = d_inode(dentry);
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    (attr->ia_size != inode->i_size))
+		return -EINVAL;
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
+}
+
+
+static int
+spufs_new_file(struct super_block *sb, struct dentry *dentry,
+		const struct file_operations *fops, umode_t mode,
+		size_t size, struct spu_context *ctx)
+{
+	static const struct inode_operations spufs_file_iops = {
+		.setattr = spufs_setattr,
+	};
+	struct inode *inode;
+	int ret;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(sb, S_IFREG | mode);
+	if (!inode)
+		goto out;
+
+	ret = 0;
+	inode->i_op = &spufs_file_iops;
+	inode->i_fop = fops;
+	inode->i_size = size;
+	inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
+	d_add(dentry, inode);
+out:
+	return ret;
+}
+
+static void
+spufs_evict_inode(struct inode *inode)
+{
+	struct spufs_inode_info *ei = SPUFS_I(inode);
+	clear_inode(inode);
+	if (ei->i_ctx)
+		put_spu_context(ei->i_ctx);
+	if (ei->i_gang)
+		put_spu_gang(ei->i_gang);
+}
+
+static void spufs_prune_dir(struct dentry *dir)
+{
+	struct dentry *dentry, *tmp;
+
+	inode_lock(d_inode(dir));
+	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
+		spin_lock(&dentry->d_lock);
+		if (simple_positive(dentry)) {
+			dget_dlock(dentry);
+			__d_drop(dentry);
+			spin_unlock(&dentry->d_lock);
+			simple_unlink(d_inode(dir), dentry);
+			/* XXX: what was dcache_lock protecting here? Other
+			 * filesystems (IB, configfs) release dcache_lock
+			 * before unlink */
+			dput(dentry);
+		} else {
+			spin_unlock(&dentry->d_lock);
+		}
+	}
+	shrink_dcache_parent(dir);
+	inode_unlock(d_inode(dir));
+}
+
+/* Caller must hold parent->i_mutex */
+static int spufs_rmdir(struct inode *parent, struct dentry *dir)
+{
+	/* remove all entries */
+	int res;
+	spufs_prune_dir(dir);
+	d_drop(dir);
+	res = simple_rmdir(parent, dir);
+	/* We have to give up the mm_struct */
+	spu_forget(SPUFS_I(d_inode(dir))->i_ctx);
+	return res;
+}
+
+static int spufs_fill_dir(struct dentry *dir,
+		const struct spufs_tree_descr *files, umode_t mode,
+		struct spu_context *ctx)
+{
+	while (files->name && files->name[0]) {
+		int ret;
+		struct dentry *dentry = d_alloc_name(dir, files->name);
+		if (!dentry)
+			return -ENOMEM;
+		ret = spufs_new_file(dir->d_sb, dentry, files->ops,
+					files->mode & mode, files->size, ctx);
+		if (ret)
+			return ret;
+		files++;
+	}
+	return 0;
+}
+
+static int spufs_dir_close(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx;
+	struct inode *parent;
+	struct dentry *dir;
+	int ret;
+
+	dir = file->f_path.dentry;
+	parent = d_inode(dir->d_parent);
+	ctx = SPUFS_I(d_inode(dir))->i_ctx;
+
+	inode_lock_nested(parent, I_MUTEX_PARENT);
+	ret = spufs_rmdir(parent, dir);
+	inode_unlock(parent);
+	WARN_ON(ret);
+
+	return dcache_dir_close(inode, file);
+}
+
+const struct file_operations spufs_context_fops = {
+	.open		= dcache_dir_open,
+	.release	= spufs_dir_close,
+	.llseek		= dcache_dir_lseek,
+	.read		= generic_read_dir,
+	.iterate_shared	= dcache_readdir,
+	.fsync		= noop_fsync,
+};
+EXPORT_SYMBOL_GPL(spufs_context_fops);
+
+static int
+spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
+		umode_t mode)
+{
+	int ret;
+	struct inode *inode;
+	struct spu_context *ctx;
+
+	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+	if (!inode)
+		return -ENOSPC;
+
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		inode->i_mode &= S_ISGID;
+	}
+	ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
+	SPUFS_I(inode)->i_ctx = ctx;
+	if (!ctx) {
+		iput(inode);
+		return -ENOSPC;
+	}
+
+	ctx->flags = flags;
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+
+	inode_lock(inode);
+
+	dget(dentry);
+	inc_nlink(dir);
+	inc_nlink(inode);
+
+	d_instantiate(dentry, inode);
+
+	if (flags & SPU_CREATE_NOSCHED)
+		ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
+					 mode, ctx);
+	else
+		ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
+
+	if (!ret && spufs_get_sb_info(dir->i_sb)->debug)
+		ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
+				mode, ctx);
+
+	if (ret)
+		spufs_rmdir(dir, dentry);
+
+	inode_unlock(inode);
+
+	return ret;
+}
+
+static int spufs_context_open(struct path *path)
+{
+	int ret;
+	struct file *filp;
+
+	ret = get_unused_fd_flags(0);
+	if (ret < 0)
+		return ret;
+
+	filp = dentry_open(path, O_RDONLY, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(ret);
+		return PTR_ERR(filp);
+	}
+
+	filp->f_op = &spufs_context_fops;
+	fd_install(ret, filp);
+	return ret;
+}
+
+static struct spu_context *
+spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
+						struct file *filp)
+{
+	struct spu_context *tmp, *neighbor, *err;
+	int count, node;
+	int aff_supp;
+
+	aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
+					struct spu, cbe_list))->aff_list);
+
+	if (!aff_supp)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_GANG)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_AFFINITY_MEM &&
+	    gang->aff_ref_ctx &&
+	    gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
+		return ERR_PTR(-EEXIST);
+
+	if (gang->aff_flags & AFF_MERGED)
+		return ERR_PTR(-EBUSY);
+
+	neighbor = NULL;
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (!filp || filp->f_op != &spufs_context_fops)
+			return ERR_PTR(-EINVAL);
+
+		neighbor = get_spu_context(
+				SPUFS_I(file_inode(filp))->i_ctx);
+
+		if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
+		    !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
+		    !list_entry(neighbor->aff_list.next, struct spu_context,
+		    aff_list)->aff_head) {
+			err = ERR_PTR(-EEXIST);
+			goto out_put_neighbor;
+		}
+
+		if (gang != neighbor->gang) {
+			err = ERR_PTR(-EINVAL);
+			goto out_put_neighbor;
+		}
+
+		count = 1;
+		list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+			count++;
+		if (list_empty(&neighbor->aff_list))
+			count++;
+
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			if ((cbe_spu_info[node].n_spus - atomic_read(
+				&cbe_spu_info[node].reserved_spus)) >= count)
+				break;
+		}
+
+		if (node == MAX_NUMNODES) {
+			err = ERR_PTR(-EEXIST);
+			goto out_put_neighbor;
+		}
+	}
+
+	return neighbor;
+
+out_put_neighbor:
+	put_spu_context(neighbor);
+	return err;
+}
+
+static void
+spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
+					struct spu_context *neighbor)
+{
+	if (flags & SPU_CREATE_AFFINITY_MEM)
+		ctx->gang->aff_ref_ctx = ctx;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (list_empty(&neighbor->aff_list)) {
+			list_add_tail(&neighbor->aff_list,
+				&ctx->gang->aff_list_head);
+			neighbor->aff_head = 1;
+		}
+
+		if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
+		    || list_entry(neighbor->aff_list.next, struct spu_context,
+							aff_list)->aff_head) {
+			list_add(&ctx->aff_list, &neighbor->aff_list);
+		} else  {
+			list_add_tail(&ctx->aff_list, &neighbor->aff_list);
+			if (neighbor->aff_head) {
+				neighbor->aff_head = 0;
+				ctx->aff_head = 1;
+			}
+		}
+
+		if (!ctx->gang->aff_ref_ctx)
+			ctx->gang->aff_ref_ctx = ctx;
+	}
+}
+
+static int
+spufs_create_context(struct inode *inode, struct dentry *dentry,
+			struct vfsmount *mnt, int flags, umode_t mode,
+			struct file *aff_filp)
+{
+	int ret;
+	int affinity;
+	struct spu_gang *gang;
+	struct spu_context *neighbor;
+	struct path path = {.mnt = mnt, .dentry = dentry};
+
+	if ((flags & SPU_CREATE_NOSCHED) &&
+	    !capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
+	    == SPU_CREATE_ISOLATE)
+		return -EINVAL;
+
+	if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
+		return -ENODEV;
+
+	gang = NULL;
+	neighbor = NULL;
+	affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
+	if (affinity) {
+		gang = SPUFS_I(inode)->i_gang;
+		if (!gang)
+			return -EINVAL;
+		mutex_lock(&gang->aff_mutex);
+		neighbor = spufs_assert_affinity(flags, gang, aff_filp);
+		if (IS_ERR(neighbor)) {
+			ret = PTR_ERR(neighbor);
+			goto out_aff_unlock;
+		}
+	}
+
+	ret = spufs_mkdir(inode, dentry, flags, mode & 0777);
+	if (ret)
+		goto out_aff_unlock;
+
+	if (affinity) {
+		spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx,
+								neighbor);
+		if (neighbor)
+			put_spu_context(neighbor);
+	}
+
+	ret = spufs_context_open(&path);
+	if (ret < 0)
+		WARN_ON(spufs_rmdir(inode, dentry));
+
+out_aff_unlock:
+	if (affinity)
+		mutex_unlock(&gang->aff_mutex);
+	return ret;
+}
+
+static int
+spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int ret;
+	struct inode *inode;
+	struct spu_gang *gang;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+	if (!inode)
+		goto out;
+
+	ret = 0;
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		inode->i_mode &= S_ISGID;
+	}
+	gang = alloc_spu_gang();
+	SPUFS_I(inode)->i_ctx = NULL;
+	SPUFS_I(inode)->i_gang = gang;
+	if (!gang) {
+		ret = -ENOMEM;
+		goto out_iput;
+	}
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+
+	d_instantiate(dentry, inode);
+	inc_nlink(dir);
+	inc_nlink(d_inode(dentry));
+	return ret;
+
+out_iput:
+	iput(inode);
+out:
+	return ret;
+}
+
+static int spufs_gang_open(struct path *path)
+{
+	int ret;
+	struct file *filp;
+
+	ret = get_unused_fd_flags(0);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * get references for dget and mntget, will be released
+	 * in error path of *_open().
+	 */
+	filp = dentry_open(path, O_RDONLY, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(ret);
+		return PTR_ERR(filp);
+	}
+
+	filp->f_op = &simple_dir_operations;
+	fd_install(ret, filp);
+	return ret;
+}
+
+static int spufs_create_gang(struct inode *inode,
+			struct dentry *dentry,
+			struct vfsmount *mnt, umode_t mode)
+{
+	struct path path = {.mnt = mnt, .dentry = dentry};
+	int ret;
+
+	ret = spufs_mkgang(inode, dentry, mode & 0777);
+	if (!ret) {
+		ret = spufs_gang_open(&path);
+		if (ret < 0) {
+			int err = simple_rmdir(inode, dentry);
+			WARN_ON(err);
+		}
+	}
+	return ret;
+}
+
+
+static struct file_system_type spufs_type;
+
+long spufs_create(struct path *path, struct dentry *dentry,
+		unsigned int flags, umode_t mode, struct file *filp)
+{
+	struct inode *dir = d_inode(path->dentry);
+	int ret;
+
+	/* check if we are on spufs */
+	if (path->dentry->d_sb->s_type != &spufs_type)
+		return -EINVAL;
+
+	/* don't accept undefined flags */
+	if (flags & (~SPU_CREATE_FLAG_ALL))
+		return -EINVAL;
+
+	/* only threads can be underneath a gang */
+	if (path->dentry != path->dentry->d_sb->s_root)
+		if ((flags & SPU_CREATE_GANG) || !SPUFS_I(dir)->i_gang)
+			return -EINVAL;
+
+	mode &= ~current_umask();
+
+	if (flags & SPU_CREATE_GANG)
+		ret = spufs_create_gang(dir, dentry, path->mnt, mode);
+	else
+		ret = spufs_create_context(dir, dentry, path->mnt, flags, mode,
+					    filp);
+	if (ret >= 0)
+		fsnotify_mkdir(dir, dentry);
+
+	return ret;
+}
+
+/* File system initialization */
+enum {
+	Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
+};
+
+static const match_table_t spufs_tokens = {
+	{ Opt_uid,   "uid=%d" },
+	{ Opt_gid,   "gid=%d" },
+	{ Opt_mode,  "mode=%o" },
+	{ Opt_debug, "debug" },
+	{ Opt_err,    NULL  },
+};
+
+static int spufs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct spufs_sb_info *sbi = spufs_get_sb_info(root->d_sb);
+	struct inode *inode = root->d_inode;
+
+	if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID))
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(&init_user_ns, inode->i_uid));
+	if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID))
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(&init_user_ns, inode->i_gid));
+	if ((inode->i_mode & S_IALLUGO) != 0775)
+		seq_printf(m, ",mode=%o", inode->i_mode);
+	if (sbi->debug)
+		seq_puts(m, ",debug");
+	return 0;
+}
+
+static int
+spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
+{
+	char *p;
+	substring_t args[MAX_OPT_ARGS];
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token, option;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, spufs_tokens, args);
+		switch (token) {
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return 0;
+			root->i_uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(root->i_uid))
+				return 0;
+			break;
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+			root->i_gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(root->i_gid))
+				return 0;
+			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return 0;
+			root->i_mode = option | S_IFDIR;
+			break;
+		case Opt_debug:
+			spufs_get_sb_info(sb)->debug = 1;
+			break;
+		default:
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static void spufs_exit_isolated_loader(void)
+{
+	free_pages((unsigned long) isolated_loader,
+			get_order(isolated_loader_size));
+}
+
+static void
+spufs_init_isolated_loader(void)
+{
+	struct device_node *dn;
+	const char *loader;
+	int size;
+
+	dn = of_find_node_by_path("/spu-isolation");
+	if (!dn)
+		return;
+
+	loader = of_get_property(dn, "loader", &size);
+	if (!loader)
+		return;
+
+	/* the loader must be align on a 16 byte boundary */
+	isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size));
+	if (!isolated_loader)
+		return;
+
+	isolated_loader_size = size;
+	memcpy(isolated_loader, loader, size);
+	printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
+}
+
+static int
+spufs_create_root(struct super_block *sb, void *data)
+{
+	struct inode *inode;
+	int ret;
+
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
+	ret = -ENOMEM;
+	inode = spufs_new_inode(sb, S_IFDIR | 0775);
+	if (!inode)
+		goto out;
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	SPUFS_I(inode)->i_ctx = NULL;
+	inc_nlink(inode);
+
+	ret = -EINVAL;
+	if (!spufs_parse_options(sb, data, inode))
+		goto out_iput;
+
+	ret = -ENOMEM;
+	sb->s_root = d_make_root(inode);
+	if (!sb->s_root)
+		goto out;
+
+	return 0;
+out_iput:
+	iput(inode);
+out:
+	return ret;
+}
+
+static int
+spufs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct spufs_sb_info *info;
+	static const struct super_operations s_ops = {
+		.alloc_inode = spufs_alloc_inode,
+		.destroy_inode = spufs_destroy_inode,
+		.statfs = simple_statfs,
+		.evict_inode = spufs_evict_inode,
+		.show_options = spufs_show_options,
+	};
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
+	sb->s_magic = SPUFS_MAGIC;
+	sb->s_op = &s_ops;
+	sb->s_fs_info = info;
+
+	return spufs_create_root(sb, data);
+}
+
+static struct dentry *
+spufs_mount(struct file_system_type *fstype, int flags,
+		const char *name, void *data)
+{
+	return mount_single(fstype, flags, data, spufs_fill_super);
+}
+
+static struct file_system_type spufs_type = {
+	.owner = THIS_MODULE,
+	.name = "spufs",
+	.mount = spufs_mount,
+	.kill_sb = kill_litter_super,
+};
+MODULE_ALIAS_FS("spufs");
+
+static int __init spufs_init(void)
+{
+	int ret;
+
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
+	ret = -ENOMEM;
+	spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
+			sizeof(struct spufs_inode_info), 0,
+			SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, spufs_init_once);
+
+	if (!spufs_inode_cache)
+		goto out;
+	ret = spu_sched_init();
+	if (ret)
+		goto out_cache;
+	ret = register_spu_syscalls(&spufs_calls);
+	if (ret)
+		goto out_sched;
+	ret = register_filesystem(&spufs_type);
+	if (ret)
+		goto out_syscalls;
+
+	spufs_init_isolated_loader();
+
+	return 0;
+
+out_syscalls:
+	unregister_spu_syscalls(&spufs_calls);
+out_sched:
+	spu_sched_exit();
+out_cache:
+	kmem_cache_destroy(spufs_inode_cache);
+out:
+	return ret;
+}
+module_init(spufs_init);
+
+static void __exit spufs_exit(void)
+{
+	spu_sched_exit();
+	spufs_exit_isolated_loader();
+	unregister_spu_syscalls(&spufs_calls);
+	unregister_filesystem(&spufs_type);
+	kmem_cache_destroy(spufs_inode_cache);
+}
+module_exit(spufs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 000000000..d9de848da
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,63 @@
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+#include "spufs.h"
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	struct spu_lscsa *lscsa;
+	unsigned char *p;
+
+	lscsa = vzalloc(sizeof(*lscsa));
+	if (!lscsa)
+		return -ENOMEM;
+	csa->lscsa = lscsa;
+
+	/* Set LS pages reserved to allow for user-space mapping. */
+	for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	return 0;
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	/* Clear reserved bit before vfree. */
+	unsigned char *p;
+
+	if (csa->lscsa == NULL)
+		return;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vfree(csa->lscsa);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
new file mode 100644
index 000000000..07f82d739
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -0,0 +1,455 @@
+// SPDX-License-Identifier: GPL-2.0
+#define DEBUG
+
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include "spufs.h"
+
+/* interrupt-level stop callback function. */
+void spufs_stop_callback(struct spu *spu, int irq)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	/*
+	 * It should be impossible to preempt a context while an exception
+	 * is being processed, since the context switch code is specially
+	 * coded to deal with interrupts ... But, just in case, sanity check
+	 * the context pointer.  It is OK to return doing nothing since
+	 * the exception will be regenerated when the context is resumed.
+	 */
+	if (ctx) {
+		/* Copy exception arguments into module specific structure */
+		switch(irq) {
+		case 0 :
+			ctx->csa.class_0_pending = spu->class_0_pending;
+			ctx->csa.class_0_dar = spu->class_0_dar;
+			break;
+		case 1 :
+			ctx->csa.class_1_dsisr = spu->class_1_dsisr;
+			ctx->csa.class_1_dar = spu->class_1_dar;
+			break;
+		case 2 :
+			break;
+		}
+
+		/* ensure that the exception status has hit memory before a
+		 * thread waiting on the context's stop queue is woken */
+		smp_wmb();
+
+		wake_up_all(&ctx->stop_wq);
+	}
+}
+
+int spu_stopped(struct spu_context *ctx, u32 *stat)
+{
+	u64 dsisr;
+	u32 stopped;
+
+	stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+		SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+
+top:
+	*stat = ctx->ops->status_read(ctx);
+	if (*stat & stopped) {
+		/*
+		 * If the spu hasn't finished stopping, we need to
+		 * re-read the register to get the stopped value.
+		 */
+		if (*stat & SPU_STATUS_RUNNING)
+			goto top;
+		return 1;
+	}
+
+	if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
+		return 1;
+
+	dsisr = ctx->csa.class_1_dsisr;
+	if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))
+		return 1;
+
+	if (ctx->csa.class_0_pending)
+		return 1;
+
+	return 0;
+}
+
+static int spu_setup_isolated(struct spu_context *ctx)
+{
+	int ret;
+	u64 __iomem *mfc_cntl;
+	u64 sr1;
+	u32 status;
+	unsigned long timeout;
+	const u32 status_loading = SPU_STATUS_RUNNING
+		| SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
+
+	ret = -ENODEV;
+	if (!isolated_loader)
+		goto out;
+
+	/*
+	 * We need to exclude userspace access to the context.
+	 *
+	 * To protect against memory access we invalidate all ptes
+	 * and make sure the pagefault handlers block on the mutex.
+	 */
+	spu_unmap_mappings(ctx);
+
+	mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
+
+	/* purge the MFC DMA queue to ensure no spurious accesses before we
+	 * enter kernel mode */
+	timeout = jiffies + HZ;
+	out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST);
+	while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK)
+			!= MFC_CNTL_PURGE_DMA_COMPLETE) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
+					__func__);
+			ret = -EIO;
+			goto out;
+		}
+		cond_resched();
+	}
+
+	/* clear purge status */
+	out_be64(mfc_cntl, 0);
+
+	/* put the SPE in kernel mode to allow access to the loader */
+	sr1 = spu_mfc_sr1_get(ctx->spu);
+	sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+	/* start the loader */
+	ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32);
+	ctx->ops->signal2_write(ctx,
+			(unsigned long)isolated_loader & 0xffffffff);
+
+	ctx->ops->runcntl_write(ctx,
+			SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+
+	ret = 0;
+	timeout = jiffies + HZ;
+	while (((status = ctx->ops->status_read(ctx)) & status_loading) ==
+				status_loading) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout waiting for loader\n",
+					__func__);
+			ret = -EIO;
+			goto out_drop_priv;
+		}
+		cond_resched();
+	}
+
+	if (!(status & SPU_STATUS_RUNNING)) {
+		/* If isolated LOAD has failed: run SPU, we will get a stop-and
+		 * signal later. */
+		pr_debug("%s: isolated LOAD failed\n", __func__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+		ret = -EACCES;
+		goto out_drop_priv;
+	}
+
+	if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+		/* This isn't allowed by the CBEA, but check anyway */
+		pr_debug("%s: SPU fell out of isolated mode?\n", __func__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
+		ret = -EINVAL;
+		goto out_drop_priv;
+	}
+
+out_drop_priv:
+	/* Finished accessing the loader. Drop kernel mode */
+	sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+out:
+	return ret;
+}
+
+static int spu_run_init(struct spu_context *ctx, u32 *npc)
+{
+	unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
+	int ret;
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+	/*
+	 * NOSCHED is synchronous scheduling with respect to the caller.
+	 * The caller waits for the context to be loaded.
+	 */
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		if (ctx->state == SPU_STATE_SAVED) {
+			ret = spu_activate(ctx, 0);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/*
+	 * Apply special setup as required.
+	 */
+	if (ctx->flags & SPU_CREATE_ISOLATE) {
+		if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
+			ret = spu_setup_isolated(ctx);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		 * If userspace has set the runcntrl register (eg, to
+		 * issue an isolated exit), we need to re-set it here
+		 */
+		runcntl = ctx->ops->runcntl_read(ctx) &
+			(SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+		if (runcntl == 0)
+			runcntl = SPU_RUNCNTL_RUNNABLE;
+	} else {
+		unsigned long privcntl;
+
+		if (test_thread_flag(TIF_SINGLESTEP))
+			privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP;
+		else
+			privcntl = SPU_PRIVCNTL_MODE_NORMAL;
+
+		ctx->ops->privcntl_write(ctx, privcntl);
+		ctx->ops->npc_write(ctx, *npc);
+	}
+
+	ctx->ops->runcntl_write(ctx, runcntl);
+
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		spuctx_switch_state(ctx, SPU_UTIL_USER);
+	} else {
+
+		if (ctx->state == SPU_STATE_SAVED) {
+			ret = spu_activate(ctx, 0);
+			if (ret)
+				return ret;
+		} else {
+			spuctx_switch_state(ctx, SPU_UTIL_USER);
+		}
+	}
+
+	set_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+	return 0;
+}
+
+static int spu_run_fini(struct spu_context *ctx, u32 *npc,
+			       u32 *status)
+{
+	int ret = 0;
+
+	spu_del_from_rq(ctx);
+
+	*status = ctx->ops->status_read(ctx);
+	*npc = ctx->ops->npc_read(ctx);
+
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	clear_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+	spu_switch_log_notify(NULL, ctx, SWITCH_LOG_EXIT, *status);
+	spu_release(ctx);
+
+	if (signal_pending(current))
+		ret = -ERESTARTSYS;
+
+	return ret;
+}
+
+/*
+ * SPU syscall restarting is tricky because we violate the basic
+ * assumption that the signal handler is running on the interrupted
+ * thread. Here instead, the handler runs on PowerPC user space code,
+ * while the syscall was called from the SPU.
+ * This means we can only do a very rough approximation of POSIX
+ * signal semantics.
+ */
+static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret,
+			  unsigned int *npc)
+{
+	int ret;
+
+	switch (*spu_ret) {
+	case -ERESTARTSYS:
+	case -ERESTARTNOINTR:
+		/*
+		 * Enter the regular syscall restarting for
+		 * sys_spu_run, then restart the SPU syscall
+		 * callback.
+		 */
+		*npc -= 8;
+		ret = -ERESTARTSYS;
+		break;
+	case -ERESTARTNOHAND:
+	case -ERESTART_RESTARTBLOCK:
+		/*
+		 * Restart block is too hard for now, just return -EINTR
+		 * to the SPU.
+		 * ERESTARTNOHAND comes from sys_pause, we also return
+		 * -EINTR from there.
+		 * Assume that we need to be restarted ourselves though.
+		 */
+		*spu_ret = -EINTR;
+		ret = -ERESTARTSYS;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unexpected return code %ld\n",
+			__func__, *spu_ret);
+		ret = 0;
+	}
+	return ret;
+}
+
+static int spu_process_callback(struct spu_context *ctx)
+{
+	struct spu_syscall_block s;
+	u32 ls_pointer, npc;
+	void __iomem *ls;
+	long spu_ret;
+	int ret;
+
+	/* get syscall block from local store */
+	npc = ctx->ops->npc_read(ctx) & ~3;
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+	ls_pointer = in_be32(ls + npc);
+	if (ls_pointer > (LS_SIZE - sizeof(s)))
+		return -EFAULT;
+	memcpy_fromio(&s, ls + ls_pointer, sizeof(s));
+
+	/* do actual syscall without pinning the spu */
+	ret = 0;
+	spu_ret = -ENOSYS;
+	npc += 4;
+
+	if (s.nr_ret < NR_syscalls) {
+		spu_release(ctx);
+		/* do actual system call from here */
+		spu_ret = spu_sys_callback(&s);
+		if (spu_ret <= -ERESTARTSYS) {
+			ret = spu_handle_restartsys(ctx, &spu_ret, &npc);
+		}
+		mutex_lock(&ctx->state_mutex);
+		if (ret == -ERESTARTSYS)
+			return ret;
+	}
+
+	/* need to re-get the ls, as it may have changed when we released the
+	 * spu */
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+
+	/* write result, jump over indirect pointer */
+	memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret));
+	ctx->ops->npc_write(ctx, npc);
+	ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+	return ret;
+}
+
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
+{
+	int ret;
+	struct spu *spu;
+	u32 status;
+
+	if (mutex_lock_interruptible(&ctx->run_mutex))
+		return -ERESTARTSYS;
+
+	ctx->event_return = 0;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out_unlock;
+
+	spu_enable_spu(ctx);
+
+	spu_update_sched_info(ctx);
+
+	ret = spu_run_init(ctx, npc);
+	if (ret) {
+		spu_release(ctx);
+		goto out;
+	}
+
+	do {
+		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
+		if (unlikely(ret)) {
+			/*
+			 * This is nasty: we need the state_mutex for all the
+			 * bookkeeping even if the syscall was interrupted by
+			 * a signal. ewww.
+			 */
+			mutex_lock(&ctx->state_mutex);
+			break;
+		}
+		spu = ctx->spu;
+		if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
+						&ctx->sched_flags))) {
+			if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
+				spu_switch_notify(spu, ctx);
+				continue;
+			}
+		}
+
+		spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+		if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+		    (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
+			ret = spu_process_callback(ctx);
+			if (ret)
+				break;
+			status &= ~SPU_STATUS_STOPPED_BY_STOP;
+		}
+		ret = spufs_handle_class1(ctx);
+		if (ret)
+			break;
+
+		ret = spufs_handle_class0(ctx);
+		if (ret)
+			break;
+
+		if (signal_pending(current))
+			ret = -ERESTARTSYS;
+	} while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
+				      SPU_STATUS_STOPPED_BY_HALT |
+				       SPU_STATUS_SINGLE_STEP)));
+
+	spu_disable_spu(ctx);
+	ret = spu_run_fini(ctx, npc, &status);
+	spu_yield(ctx);
+
+	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+		ctx->stats.libassist++;
+
+	if ((ret == 0) ||
+	    ((ret == -ERESTARTSYS) &&
+	     ((status & SPU_STATUS_STOPPED_BY_HALT) ||
+	      (status & SPU_STATUS_SINGLE_STEP) ||
+	      ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	       (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+		ret = status;
+
+	/* Note: we don't need to force_sig SIGTRAP on single-step
+	 * since we have TIF_SINGLESTEP set, thus the kernel will do
+	 * it upon return from the syscall anyway.
+	 */
+	if (unlikely(status & SPU_STATUS_SINGLE_STEP))
+		ret = -ERESTARTSYS;
+
+	else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
+	    && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
+		force_sig(SIGTRAP, current);
+		ret = -ERESTARTSYS;
+	}
+
+out:
+	*event = ctx->event_return;
+out_unlock:
+	mutex_unlock(&ctx->run_mutex);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
new file mode 100644
index 000000000..c9ef3c532
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -0,0 +1,1161 @@
+/* sched.c - SPU scheduler.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * 2006-03-31	NUMA domains added.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/errno.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/rt.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/numa.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_priv1.h>
+#include "spufs.h"
+#define CREATE_TRACE_POINTS
+#include "sputrace.h"
+
+struct spu_prio_array {
+	DECLARE_BITMAP(bitmap, MAX_PRIO);
+	struct list_head runq[MAX_PRIO];
+	spinlock_t runq_lock;
+	int nr_waiting;
+};
+
+static unsigned long spu_avenrun[3];
+static struct spu_prio_array *spu_prio;
+static struct task_struct *spusched_task;
+static struct timer_list spusched_timer;
+static struct timer_list spuloadavg_timer;
+
+/*
+ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+ */
+#define NORMAL_PRIO		120
+
+/*
+ * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
+ * tick for every 10 CPU scheduler ticks.
+ */
+#define SPUSCHED_TICK		(10)
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is
+ * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ */
+#define MIN_SPU_TIMESLICE	max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
+#define DEF_SPU_TIMESLICE	(100 * HZ / (1000 * SPUSCHED_TICK))
+
+#define SCALE_PRIO(x, prio) \
+	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
+
+/*
+ * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
+ * [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+void spu_set_timeslice(struct spu_context *ctx)
+{
+	if (ctx->prio < NORMAL_PRIO)
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
+	else
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
+}
+
+/*
+ * Update scheduling information from the owning thread.
+ */
+void __spu_update_sched_info(struct spu_context *ctx)
+{
+	/*
+	 * assert that the context is not on the runqueue, so it is safe
+	 * to change its scheduling parameters.
+	 */
+	BUG_ON(!list_empty(&ctx->rq));
+
+	/*
+	 * 32-Bit assignments are atomic on powerpc, and we don't care about
+	 * memory ordering here because retrieving the controlling thread is
+	 * per definition racy.
+	 */
+	ctx->tid = current->pid;
+
+	/*
+	 * We do our own priority calculations, so we normally want
+	 * ->static_prio to start with. Unfortunately this field
+	 * contains junk for threads with a realtime scheduling
+	 * policy so we have to look at ->prio in this case.
+	 */
+	if (rt_prio(current->prio))
+		ctx->prio = current->prio;
+	else
+		ctx->prio = current->static_prio;
+	ctx->policy = current->policy;
+
+	/*
+	 * TO DO: the context may be loaded, so we may need to activate
+	 * it again on a different node. But it shouldn't hurt anything
+	 * to update its parameters, because we know that the scheduler
+	 * is not actively looking at this field, since it is not on the
+	 * runqueue. The context will be rescheduled on the proper node
+	 * if it is timesliced or preempted.
+	 */
+	cpumask_copy(&ctx->cpus_allowed, &current->cpus_allowed);
+
+	/* Save the current cpu id for spu interrupt routing. */
+	ctx->last_ran = raw_smp_processor_id();
+}
+
+void spu_update_sched_info(struct spu_context *ctx)
+{
+	int node;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		node = ctx->spu->node;
+
+		/*
+		 * Take list_mutex to sync with find_victim().
+		 */
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		__spu_update_sched_info(ctx);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	} else {
+		__spu_update_sched_info(ctx);
+	}
+}
+
+static int __node_allowed(struct spu_context *ctx, int node)
+{
+	if (nr_cpus_node(node)) {
+		const struct cpumask *mask = cpumask_of_node(node);
+
+		if (cpumask_intersects(mask, &ctx->cpus_allowed))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int node_allowed(struct spu_context *ctx, int node)
+{
+	int rval;
+
+	spin_lock(&spu_prio->runq_lock);
+	rval = __node_allowed(ctx, node);
+	spin_unlock(&spu_prio->runq_lock);
+
+	return rval;
+}
+
+void do_notify_spus_active(void)
+{
+	int node;
+
+	/*
+	 * Wake up the active spu_contexts.
+	 *
+	 * When the awakened processes see their "notify_active" flag is set,
+	 * they will call spu_switch_notify().
+	 */
+	for_each_online_node(node) {
+		struct spu *spu;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state != SPU_FREE) {
+				struct spu_context *ctx = spu->ctx;
+				set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+					&ctx->sched_flags);
+				mb();
+				wake_up_all(&ctx->stop_wq);
+			}
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+}
+
+/**
+ * spu_bind_context - bind spu context to physical spu
+ * @spu:	physical spu to bind to
+ * @ctx:	context to bind
+ */
+static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
+{
+	spu_context_trace(spu_bind_context__enter, ctx, spu);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
+
+	ctx->stats.slb_flt_base = spu->stats.slb_flt;
+	ctx->stats.class2_intr_base = spu->stats.class2_intr;
+
+	spu_associate_mm(spu, ctx->owner);
+
+	spin_lock_irq(&spu->register_lock);
+	spu->ctx = ctx;
+	spu->flags = 0;
+	ctx->spu = spu;
+	ctx->ops = &spu_hw_ops;
+	spu->pid = current->pid;
+	spu->tgid = current->tgid;
+	spu->ibox_callback = spufs_ibox_callback;
+	spu->wbox_callback = spufs_wbox_callback;
+	spu->stop_callback = spufs_stop_callback;
+	spu->mfc_callback = spufs_mfc_callback;
+	spin_unlock_irq(&spu->register_lock);
+
+	spu_unmap_mappings(ctx);
+
+	spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0);
+	spu_restore(&ctx->csa, spu);
+	spu->timestamp = jiffies;
+	spu_switch_notify(spu, ctx);
+	ctx->state = SPU_STATE_RUNNABLE;
+
+	spuctx_switch_state(ctx, SPU_UTIL_USER);
+}
+
+/*
+ * Must be used with the list_mutex held.
+ */
+static inline int sched_spu(struct spu *spu)
+{
+	BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
+	return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
+}
+
+static void aff_merge_remaining_ctxs(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+
+	list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
+		if (list_empty(&ctx->aff_list))
+			list_add(&ctx->aff_list, &gang->aff_list_head);
+	}
+	gang->aff_flags |= AFF_MERGED;
+}
+
+static void aff_set_offsets(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+	int offset;
+
+	offset = -1;
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset--;
+	}
+
+	offset = 0;
+	list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset++;
+	}
+
+	gang->aff_flags |= AFF_OFFSETS_SET;
+}
+
+static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
+		 int group_size, int lowest_offset)
+{
+	struct spu *spu;
+	int node, n;
+
+	/*
+	 * TODO: A better algorithm could be used to find a good spu to be
+	 *       used as reference location for the ctxs chain.
+	 */
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		/*
+		 * "available_spus" counts how many spus are not potentially
+		 * going to be used by other affinity gangs whose reference
+		 * context is already in place. Although this code seeks to
+		 * avoid having affinity gangs with a summed amount of
+		 * contexts bigger than the amount of spus in the node,
+		 * this may happen sporadically. In this case, available_spus
+		 * becomes negative, which is harmless.
+		 */
+		int available_spus;
+
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		available_spus = 0;
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset
+					&& spu->ctx->gang->aff_ref_spu)
+				available_spus -= spu->ctx->gang->contexts;
+			available_spus++;
+		}
+		if (available_spus < ctx->gang->contexts) {
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+			continue;
+		}
+
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if ((!mem_aff || spu->has_mem_affinity) &&
+							sched_spu(spu)) {
+				mutex_unlock(&cbe_spu_info[node].list_mutex);
+				return spu;
+			}
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+	return NULL;
+}
+
+static void aff_set_ref_point_location(struct spu_gang *gang)
+{
+	int mem_aff, gs, lowest_offset;
+	struct spu_context *ctx;
+	struct spu *tmp;
+
+	mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
+	lowest_offset = 0;
+	gs = 0;
+
+	list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+		gs++;
+
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		lowest_offset = ctx->aff_offset;
+	}
+
+	gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs,
+							lowest_offset);
+}
+
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
+{
+	struct spu *spu;
+
+	spu = NULL;
+	if (offset >= 0) {
+		list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+			BUG_ON(spu->node != node);
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset--;
+		}
+	} else {
+		list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+			BUG_ON(spu->node != node);
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset++;
+		}
+	}
+
+	return spu;
+}
+
+/*
+ * affinity_check is called each time a context is going to be scheduled.
+ * It returns the spu ptr on which the context must run.
+ */
+static int has_affinity(struct spu_context *ctx)
+{
+	struct spu_gang *gang = ctx->gang;
+
+	if (list_empty(&ctx->aff_list))
+		return 0;
+
+	if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+		ctx->gang->aff_ref_spu = NULL;
+
+	if (!gang->aff_ref_spu) {
+		if (!(gang->aff_flags & AFF_MERGED))
+			aff_merge_remaining_ctxs(gang);
+		if (!(gang->aff_flags & AFF_OFFSETS_SET))
+			aff_set_offsets(gang);
+		aff_set_ref_point_location(gang);
+	}
+
+	return gang->aff_ref_spu != NULL;
+}
+
+/**
+ * spu_unbind_context - unbind spu context from physical spu
+ * @spu:	physical spu to unbind from
+ * @ctx:	context to unbind
+ */
+static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
+{
+	u32 status;
+
+	spu_context_trace(spu_unbind_context__enter, ctx, spu);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ 	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
+
+	if (ctx->gang)
+		/*
+		 * If ctx->gang->aff_sched_count is positive, SPU affinity is
+		 * being considered in this gang. Using atomic_dec_if_positive
+		 * allow us to skip an explicit check for affinity in this gang
+		 */
+		atomic_dec_if_positive(&ctx->gang->aff_sched_count);
+
+	spu_switch_notify(spu, NULL);
+	spu_unmap_mappings(ctx);
+	spu_save(&ctx->csa, spu);
+	spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0);
+
+	spin_lock_irq(&spu->register_lock);
+	spu->timestamp = jiffies;
+	ctx->state = SPU_STATE_SAVED;
+	spu->ibox_callback = NULL;
+	spu->wbox_callback = NULL;
+	spu->stop_callback = NULL;
+	spu->mfc_callback = NULL;
+	spu->pid = 0;
+	spu->tgid = 0;
+	ctx->ops = &spu_backing_ops;
+	spu->flags = 0;
+	spu->ctx = NULL;
+	spin_unlock_irq(&spu->register_lock);
+
+	spu_associate_mm(spu, NULL);
+
+	ctx->stats.slb_flt +=
+		(spu->stats.slb_flt - ctx->stats.slb_flt_base);
+	ctx->stats.class2_intr +=
+		(spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+	/* This maps the underlying spu state to idle */
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	ctx->spu = NULL;
+
+	if (spu_stopped(ctx, &status))
+		wake_up_all(&ctx->stop_wq);
+}
+
+/**
+ * spu_add_to_rq - add a context to the runqueue
+ * @ctx:       context to add
+ */
+static void __spu_add_to_rq(struct spu_context *ctx)
+{
+	/*
+	 * Unfortunately this code path can be called from multiple threads
+	 * on behalf of a single context due to the way the problem state
+	 * mmap support works.
+	 *
+	 * Fortunately we need to wake up all these threads at the same time
+	 * and can simply skip the runqueue addition for every but the first
+	 * thread getting into this codepath.
+	 *
+	 * It's still quite hacky, and long-term we should proxy all other
+	 * threads through the owner thread so that spu_run is in control
+	 * of all the scheduling activity for a given context.
+	 */
+	if (list_empty(&ctx->rq)) {
+		list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+		set_bit(ctx->prio, spu_prio->bitmap);
+		if (!spu_prio->nr_waiting++)
+			mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	}
+}
+
+static void spu_add_to_rq(struct spu_context *ctx)
+{
+	spin_lock(&spu_prio->runq_lock);
+	__spu_add_to_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
+}
+
+static void __spu_del_from_rq(struct spu_context *ctx)
+{
+	int prio = ctx->prio;
+
+	if (!list_empty(&ctx->rq)) {
+		if (!--spu_prio->nr_waiting)
+			del_timer(&spusched_timer);
+		list_del_init(&ctx->rq);
+
+		if (list_empty(&spu_prio->runq[prio]))
+			clear_bit(prio, spu_prio->bitmap);
+	}
+}
+
+void spu_del_from_rq(struct spu_context *ctx)
+{
+	spin_lock(&spu_prio->runq_lock);
+	__spu_del_from_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
+}
+
+static void spu_prio_wait(struct spu_context *ctx)
+{
+	DEFINE_WAIT(wait);
+
+	/*
+	 * The caller must explicitly wait for a context to be loaded
+	 * if the nosched flag is set.  If NOSCHED is not set, the caller
+	 * queues the context and waits for an spu event or error.
+	 */
+	BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED));
+
+	spin_lock(&spu_prio->runq_lock);
+	prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
+	if (!signal_pending(current)) {
+		__spu_add_to_rq(ctx);
+		spin_unlock(&spu_prio->runq_lock);
+		mutex_unlock(&ctx->state_mutex);
+		schedule();
+		mutex_lock(&ctx->state_mutex);
+		spin_lock(&spu_prio->runq_lock);
+		__spu_del_from_rq(ctx);
+	}
+	spin_unlock(&spu_prio->runq_lock);
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&ctx->stop_wq, &wait);
+}
+
+static struct spu *spu_get_idle(struct spu_context *ctx)
+{
+	struct spu *spu, *aff_ref_spu;
+	int node, n;
+
+	spu_context_nospu_trace(spu_get_idle__enter, ctx);
+
+	if (ctx->gang) {
+		mutex_lock(&ctx->gang->aff_mutex);
+		if (has_affinity(ctx)) {
+			aff_ref_spu = ctx->gang->aff_ref_spu;
+			atomic_inc(&ctx->gang->aff_sched_count);
+			mutex_unlock(&ctx->gang->aff_mutex);
+			node = aff_ref_spu->node;
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
+			if (spu && spu->alloc_state == SPU_FREE)
+				goto found;
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			atomic_dec(&ctx->gang->aff_sched_count);
+			goto not_found;
+		}
+		mutex_unlock(&ctx->gang->aff_mutex);
+	}
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state == SPU_FREE)
+				goto found;
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+
+ not_found:
+	spu_context_nospu_trace(spu_get_idle__not_found, ctx);
+	return NULL;
+
+ found:
+	spu->alloc_state = SPU_USED;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spu_context_trace(spu_get_idle__found, ctx, spu);
+	spu_init_channels(spu);
+	return spu;
+}
+
+/**
+ * find_victim - find a lower priority context to preempt
+ * @ctx:	candidate context for running
+ *
+ * Returns the freed physical spu to run the new context on.
+ */
+static struct spu *find_victim(struct spu_context *ctx)
+{
+	struct spu_context *victim = NULL;
+	struct spu *spu;
+	int node, n;
+
+	spu_context_nospu_trace(spu_find_victim__enter, ctx);
+
+	/*
+	 * Look for a possible preemption candidate on the local node first.
+	 * If there is no candidate look at the other nodes.  This isn't
+	 * exactly fair, but so far the whole spu scheduler tries to keep
+	 * a strong node affinity.  We might want to fine-tune this in
+	 * the future.
+	 */
+ restart:
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			struct spu_context *tmp = spu->ctx;
+
+			if (tmp && tmp->prio > ctx->prio &&
+			    !(tmp->flags & SPU_CREATE_NOSCHED) &&
+			    (!victim || tmp->prio > victim->prio)) {
+				victim = spu->ctx;
+			}
+		}
+		if (victim)
+			get_spu_context(victim);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+		if (victim) {
+			/*
+			 * This nests ctx->state_mutex, but we always lock
+			 * higher priority contexts before lower priority
+			 * ones, so this is safe until we introduce
+			 * priority inheritance schemes.
+			 *
+			 * XXX if the highest priority context is locked,
+			 * this can loop a long time.  Might be better to
+			 * look at another context or give up after X retries.
+			 */
+			if (!mutex_trylock(&victim->state_mutex)) {
+				put_spu_context(victim);
+				victim = NULL;
+				goto restart;
+			}
+
+			spu = victim->spu;
+			if (!spu || victim->prio <= ctx->prio) {
+				/*
+				 * This race can happen because we've dropped
+				 * the active list mutex.  Not a problem, just
+				 * restart the search.
+				 */
+				mutex_unlock(&victim->state_mutex);
+				put_spu_context(victim);
+				victim = NULL;
+				goto restart;
+			}
+
+			spu_context_trace(__spu_deactivate__unload, ctx, spu);
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			cbe_spu_info[node].nr_active--;
+			spu_unbind_context(spu, victim);
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			victim->stats.invol_ctx_switch++;
+			spu->stats.invol_ctx_switch++;
+			if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags))
+				spu_add_to_rq(victim);
+
+			mutex_unlock(&victim->state_mutex);
+			put_spu_context(victim);
+
+			return spu;
+		}
+	}
+
+	return NULL;
+}
+
+static void __spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+	int node = spu->node;
+	int success = 0;
+
+	spu_set_timeslice(ctx);
+
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	if (spu->ctx == NULL) {
+		spu_bind_context(spu, ctx);
+		cbe_spu_info[node].nr_active++;
+		spu->alloc_state = SPU_USED;
+		success = 1;
+	}
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+	if (success)
+		wake_up_all(&ctx->run_wq);
+	else
+		spu_add_to_rq(ctx);
+}
+
+static void spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+	/* not a candidate for interruptible because it's called either
+	   from the scheduler thread or from spu_deactivate */
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->state == SPU_STATE_SAVED)
+		__spu_schedule(spu, ctx);
+	spu_release(ctx);
+}
+
+/**
+ * spu_unschedule - remove a context from a spu, and possibly release it.
+ * @spu:	The SPU to unschedule from
+ * @ctx:	The context currently scheduled on the SPU
+ * @free_spu	Whether to free the SPU for other contexts
+ *
+ * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the
+ * SPU is made available for other contexts (ie, may be returned by
+ * spu_get_idle). If this is zero, the caller is expected to schedule another
+ * context to this spu.
+ *
+ * Should be called with ctx->state_mutex held.
+ */
+static void spu_unschedule(struct spu *spu, struct spu_context *ctx,
+		int free_spu)
+{
+	int node = spu->node;
+
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	cbe_spu_info[node].nr_active--;
+	if (free_spu)
+		spu->alloc_state = SPU_FREE;
+	spu_unbind_context(spu, ctx);
+	ctx->stats.invol_ctx_switch++;
+	spu->stats.invol_ctx_switch++;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+}
+
+/**
+ * spu_activate - find a free spu for a context and execute it
+ * @ctx:	spu context to schedule
+ * @flags:	flags (currently ignored)
+ *
+ * Tries to find a free spu to run @ctx.  If no free spu is available
+ * add the context to the runqueue so it gets woken up once an spu
+ * is available.
+ */
+int spu_activate(struct spu_context *ctx, unsigned long flags)
+{
+	struct spu *spu;
+
+	/*
+	 * If there are multiple threads waiting for a single context
+	 * only one actually binds the context while the others will
+	 * only be able to acquire the state_mutex once the context
+	 * already is in runnable state.
+	 */
+	if (ctx->spu)
+		return 0;
+
+spu_activate_top:
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+
+	spu = spu_get_idle(ctx);
+	/*
+	 * If this is a realtime thread we try to get it running by
+	 * preempting a lower priority thread.
+	 */
+	if (!spu && rt_prio(ctx->prio))
+		spu = find_victim(ctx);
+	if (spu) {
+		unsigned long runcntl;
+
+		runcntl = ctx->ops->runcntl_read(ctx);
+		__spu_schedule(spu, ctx);
+		if (runcntl & SPU_RUNCNTL_RUNNABLE)
+			spuctx_switch_state(ctx, SPU_UTIL_USER);
+
+		return 0;
+	}
+
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		spu_prio_wait(ctx);
+		goto spu_activate_top;
+	}
+
+	spu_add_to_rq(ctx);
+
+	return 0;
+}
+
+/**
+ * grab_runnable_context - try to find a runnable context
+ *
+ * Remove the highest priority context on the runqueue and return it
+ * to the caller.  Returns %NULL if no runnable context was found.
+ */
+static struct spu_context *grab_runnable_context(int prio, int node)
+{
+	struct spu_context *ctx;
+	int best;
+
+	spin_lock(&spu_prio->runq_lock);
+	best = find_first_bit(spu_prio->bitmap, prio);
+	while (best < prio) {
+		struct list_head *rq = &spu_prio->runq[best];
+
+		list_for_each_entry(ctx, rq, rq) {
+			/* XXX(hch): check for affinity here as well */
+			if (__node_allowed(ctx, node)) {
+				__spu_del_from_rq(ctx);
+				goto found;
+			}
+		}
+		best++;
+	}
+	ctx = NULL;
+ found:
+	spin_unlock(&spu_prio->runq_lock);
+	return ctx;
+}
+
+static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_context *new = NULL;
+
+	if (spu) {
+		new = grab_runnable_context(max_prio, spu->node);
+		if (new || force) {
+			spu_unschedule(spu, ctx, new == NULL);
+			if (new) {
+				if (new->flags & SPU_CREATE_NOSCHED)
+					wake_up(&new->stop_wq);
+				else {
+					spu_release(ctx);
+					spu_schedule(spu, new);
+					/* this one can't easily be made
+					   interruptible */
+					mutex_lock(&ctx->state_mutex);
+				}
+			}
+		}
+	}
+
+	return new != NULL;
+}
+
+/**
+ * spu_deactivate - unbind a context from it's physical spu
+ * @ctx:	spu context to unbind
+ *
+ * Unbind @ctx from the physical spu it is running on and schedule
+ * the highest priority context to run on the freed physical spu.
+ */
+void spu_deactivate(struct spu_context *ctx)
+{
+	spu_context_nospu_trace(spu_deactivate__enter, ctx);
+	__spu_deactivate(ctx, 1, MAX_PRIO);
+}
+
+/**
+ * spu_yield -	yield a physical spu if others are waiting
+ * @ctx:	spu context to yield
+ *
+ * Check if there is a higher priority context waiting and if yes
+ * unbind @ctx from the physical spu and schedule the highest
+ * priority context to run on the freed physical spu instead.
+ */
+void spu_yield(struct spu_context *ctx)
+{
+	spu_context_nospu_trace(spu_yield__enter, ctx);
+	if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
+		mutex_lock(&ctx->state_mutex);
+		__spu_deactivate(ctx, 0, MAX_PRIO);
+		mutex_unlock(&ctx->state_mutex);
+	}
+}
+
+static noinline void spusched_tick(struct spu_context *ctx)
+{
+	struct spu_context *new = NULL;
+	struct spu *spu = NULL;
+
+	if (spu_acquire(ctx))
+		BUG();	/* a kernel thread never has signals pending */
+
+	if (ctx->state != SPU_STATE_RUNNABLE)
+		goto out;
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		goto out;
+	if (ctx->policy == SCHED_FIFO)
+		goto out;
+
+	if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+		goto out;
+
+	spu = ctx->spu;
+
+	spu_context_trace(spusched_tick__preempt, ctx, spu);
+
+	new = grab_runnable_context(ctx->prio + 1, spu->node);
+	if (new) {
+		spu_unschedule(spu, ctx, 0);
+		if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+			spu_add_to_rq(ctx);
+	} else {
+		spu_context_nospu_trace(spusched_tick__newslice, ctx);
+		if (!ctx->time_slice)
+			ctx->time_slice++;
+	}
+out:
+	spu_release(ctx);
+
+	if (new)
+		spu_schedule(spu, new);
+}
+
+/**
+ * count_active_contexts - count nr of active tasks
+ *
+ * Return the number of tasks currently running or waiting to run.
+ *
+ * Note that we don't take runq_lock / list_mutex here.  Reading
+ * a single 32bit value is atomic on powerpc, and we don't care
+ * about memory ordering issues here.
+ */
+static unsigned long count_active_contexts(void)
+{
+	int nr_active = 0, node;
+
+	for (node = 0; node < MAX_NUMNODES; node++)
+		nr_active += cbe_spu_info[node].nr_active;
+	nr_active += spu_prio->nr_waiting;
+
+	return nr_active;
+}
+
+/**
+ * spu_calc_load - update the avenrun load estimates.
+ *
+ * No locking against reading these values from userspace, as for
+ * the CPU loadavg code.
+ */
+static void spu_calc_load(void)
+{
+	unsigned long active_tasks; /* fixed-point */
+
+	active_tasks = count_active_contexts() * FIXED_1;
+	CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
+	CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
+	CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
+}
+
+static void spusched_wake(struct timer_list *unused)
+{
+	mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	wake_up_process(spusched_task);
+}
+
+static void spuloadavg_wake(struct timer_list *unused)
+{
+	mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ);
+	spu_calc_load();
+}
+
+static int spusched_thread(void *unused)
+{
+	struct spu *spu;
+	int node;
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			struct mutex *mtx = &cbe_spu_info[node].list_mutex;
+
+			mutex_lock(mtx);
+			list_for_each_entry(spu, &cbe_spu_info[node].spus,
+					cbe_list) {
+				struct spu_context *ctx = spu->ctx;
+
+				if (ctx) {
+					get_spu_context(ctx);
+					mutex_unlock(mtx);
+					spusched_tick(ctx);
+					mutex_lock(mtx);
+					put_spu_context(ctx);
+				}
+			}
+			mutex_unlock(mtx);
+		}
+	}
+
+	return 0;
+}
+
+void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state)
+{
+	unsigned long long curtime;
+	signed long long delta;
+	struct spu *spu;
+	enum spu_utilization_state old_state;
+	int node;
+
+	curtime = ktime_get_ns();
+	delta = curtime - ctx->stats.tstamp;
+
+	WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+	WARN_ON(delta < 0);
+
+	spu = ctx->spu;
+	old_state = ctx->stats.util_state;
+	ctx->stats.util_state = new_state;
+	ctx->stats.tstamp = curtime;
+
+	/*
+	 * Update the physical SPU utilization statistics.
+	 */
+	if (spu) {
+		ctx->stats.times[old_state] += delta;
+		spu->stats.times[old_state] += delta;
+		spu->stats.util_state = new_state;
+		spu->stats.tstamp = curtime;
+		node = spu->node;
+		if (old_state == SPU_UTIL_USER)
+			atomic_dec(&cbe_spu_info[node].busy_spus);
+		if (new_state == SPU_UTIL_USER)
+			atomic_inc(&cbe_spu_info[node].busy_spus);
+	}
+}
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static int show_spu_loadavg(struct seq_file *s, void *private)
+{
+	int a, b, c;
+
+	a = spu_avenrun[0] + (FIXED_1/200);
+	b = spu_avenrun[1] + (FIXED_1/200);
+	c = spu_avenrun[2] + (FIXED_1/200);
+
+	/*
+	 * Note that last_pid doesn't really make much sense for the
+	 * SPU loadavg (it even seems very odd on the CPU side...),
+	 * but we include it here to have a 100% compatible interface.
+	 */
+	seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+		LOAD_INT(a), LOAD_FRAC(a),
+		LOAD_INT(b), LOAD_FRAC(b),
+		LOAD_INT(c), LOAD_FRAC(c),
+		count_active_contexts(),
+		atomic_read(&nr_spu_contexts),
+		idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
+	return 0;
+};
+
+int __init spu_sched_init(void)
+{
+	struct proc_dir_entry *entry;
+	int err = -ENOMEM, i;
+
+	spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
+	if (!spu_prio)
+		goto out;
+
+	for (i = 0; i < MAX_PRIO; i++) {
+		INIT_LIST_HEAD(&spu_prio->runq[i]);
+		__clear_bit(i, spu_prio->bitmap);
+	}
+	spin_lock_init(&spu_prio->runq_lock);
+
+	timer_setup(&spusched_timer, spusched_wake, 0);
+	timer_setup(&spuloadavg_timer, spuloadavg_wake, 0);
+
+	spusched_task = kthread_run(spusched_thread, NULL, "spusched");
+	if (IS_ERR(spusched_task)) {
+		err = PTR_ERR(spusched_task);
+		goto out_free_spu_prio;
+	}
+
+	mod_timer(&spuloadavg_timer, 0);
+
+	entry = proc_create_single("spu_loadavg", 0, NULL, show_spu_loadavg);
+	if (!entry)
+		goto out_stop_kthread;
+
+	pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
+			SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
+	return 0;
+
+ out_stop_kthread:
+	kthread_stop(spusched_task);
+ out_free_spu_prio:
+	kfree(spu_prio);
+ out:
+	return err;
+}
+
+void spu_sched_exit(void)
+{
+	struct spu *spu;
+	int node;
+
+	remove_proc_entry("spu_loadavg", NULL);
+
+	del_timer_sync(&spusched_timer);
+	del_timer_sync(&spuloadavg_timer);
+	kthread_stop(spusched_task);
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+			if (spu->alloc_state != SPU_FREE)
+				spu->alloc_state = SPU_FREE;
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+	kfree(spu_prio);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c
new file mode 100644
index 000000000..72c905f1e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -0,0 +1,336 @@
+/*
+ * spu_restore.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context restore sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE                 0x40000	/* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+#define BR_INSTR		0x327fff80	/* br -4         */
+#define NOP_INSTR		0x40200000	/* nop           */
+#define HEQ_INSTR		0x7b000000	/* heq $0, $0    */
+#define STOP_INSTR		0x00000000	/* stop 0x0      */
+#define ILLEGAL_INSTR		0x00800000	/* illegal instr */
+#define RESTORE_COMPLETE	0x00003ffc	/* stop 0x3ffc   */
+
+static inline void fetch_regs_from_mem(addr64 lscsa_ea)
+{
+	unsigned int ls = (unsigned int)&regs_spill[0];
+	unsigned int size = sizeof(regs_spill);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x40;	/* GET */
+
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_upper_240kb(addr64 lscsa_ea)
+{
+	unsigned int ls = 16384;
+	unsigned int list = (unsigned int)&dma_list[0];
+	unsigned int size = sizeof(dma_list);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x44;	/* GETL */
+
+	/* Restore, Step 4:
+	 *    Enqueue the GETL command (tag 0) to the MFC SPU command
+	 *    queue to transfer the upper 240 kb of LS from CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, list);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_decr(void)
+{
+	unsigned int offset;
+	unsigned int decr_running;
+	unsigned int decr;
+
+	/* Restore, Step 6(moved):
+	 *    If the LSCSA "decrementer running" flag is set
+	 *    then write the SPU_WrDec channel with the
+	 *    decrementer value from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(decr_status);
+	decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
+	if (decr_running) {
+		offset = LSCSA_QW_OFFSET(decr);
+		decr = regs_spill[offset].slot[0];
+		spu_writech(SPU_WrDec, decr);
+	}
+}
+
+static inline void write_ppu_mb(void)
+{
+	unsigned int offset;
+	unsigned int data;
+
+	/* Restore, Step 11:
+	 *    Write the MFC_WrOut_MB channel with the PPU_MB
+	 *    data from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(ppu_mb);
+	data = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrOutMbox, data);
+}
+
+static inline void write_ppuint_mb(void)
+{
+	unsigned int offset;
+	unsigned int data;
+
+	/* Restore, Step 12:
+	 *    Write the MFC_WrInt_MB channel with the PPUINT_MB
+	 *    data from LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(ppuint_mb);
+	data = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrOutIntrMbox, data);
+}
+
+static inline void restore_fpcr(void)
+{
+	unsigned int offset;
+	vector unsigned int fpcr;
+
+	/* Restore, Step 13:
+	 *    Restore the floating-point status and control
+	 *    register from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(fpcr);
+	fpcr = regs_spill[offset].v;
+	spu_mtfpscr(fpcr);
+}
+
+static inline void restore_srr0(void)
+{
+	unsigned int offset;
+	unsigned int srr0;
+
+	/* Restore, Step 14:
+	 *    Restore the SPU SRR0 data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(srr0);
+	srr0 = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrSRR0, srr0);
+}
+
+static inline void restore_event_mask(void)
+{
+	unsigned int offset;
+	unsigned int event_mask;
+
+	/* Restore, Step 15:
+	 *    Restore the SPU_RdEventMsk data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(event_mask);
+	event_mask = regs_spill[offset].slot[0];
+	spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void restore_tag_mask(void)
+{
+	unsigned int offset;
+	unsigned int tag_mask;
+
+	/* Restore, Step 16:
+	 *    Restore the SPU_RdTagMsk data from the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(tag_mask);
+	tag_mask = regs_spill[offset].slot[0];
+	spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void restore_complete(void)
+{
+	extern void exit_fini(void);
+	unsigned int *exit_instrs = (unsigned int *)exit_fini;
+	unsigned int offset;
+	unsigned int stopped_status;
+	unsigned int stopped_code;
+
+	/* Restore, Step 18:
+	 *    Issue a stop-and-signal instruction with
+	 *    "good context restore" signal value.
+	 *
+	 * Restore, Step 19:
+	 *    There may be additional instructions placed
+	 *    here by the PPE Sequence for SPU Context
+	 *    Restore in order to restore the correct
+	 *    "stopped state".
+	 *
+	 *    This step is handled here by analyzing the
+	 *    LSCSA.stopped_status and then modifying the
+	 *    exit() function to behave appropriately.
+	 */
+
+	offset = LSCSA_QW_OFFSET(stopped_status);
+	stopped_status = regs_spill[offset].slot[0];
+	stopped_code = regs_spill[offset].slot[1];
+
+	switch (stopped_status) {
+	case SPU_STOPPED_STATUS_P_I:
+		/* SPU_Status[P,I]=1.  Add illegal instruction
+		 * followed by stop-and-signal instruction after
+		 * end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_P_H:
+		/* SPU_Status[P,H]=1.  Add 'heq $0, $0' followed
+		 * by stop-and-signal instruction after end of
+		 * restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = HEQ_INSTR;
+		exit_instrs[2] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_S_P:
+		/* SPU_Status[S,P]=1.  Add nop instruction
+		 * followed by 'br -4' after end of restore
+		 * code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = STOP_INSTR | stopped_code;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_S_I:
+		/* SPU_Status[S,I]=1.  Add  illegal instruction
+		 * followed by 'br -4' after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_I:
+		/* SPU_Status[I]=1. Add illegal instruction followed
+		 * by infinite loop after end of restore sequence.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = ILLEGAL_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_S:
+		/* SPU_Status[S]=1. Add two 'nop' instructions. */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = NOP_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_H:
+		/* SPU_Status[H]=1. Add 'heq $0, $0' instruction
+		 * after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = HEQ_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	case SPU_STOPPED_STATUS_P:
+		/* SPU_Status[P]=1. Add stop-and-signal instruction
+		 * after end of restore code.
+		 */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = STOP_INSTR | stopped_code;
+		break;
+	case SPU_STOPPED_STATUS_R:
+		/* SPU_Status[I,S,H,P,R]=0. Add infinite loop. */
+		exit_instrs[0] = RESTORE_COMPLETE;
+		exit_instrs[1] = NOP_INSTR;
+		exit_instrs[2] = NOP_INSTR;
+		exit_instrs[3] = BR_INSTR;
+		break;
+	default:
+		/* SPU_Status[R]=1. No additional instructions. */
+		break;
+	}
+	spu_sync();
+}
+
+/**
+ * main - entry point for SPU-side context restore.
+ *
+ * This code deviates from the documented sequence in the
+ * following aspects:
+ *
+ *	1. The EA for LSCSA is passed from PPE in the
+ *	   signal notification channels.
+ *	2. The register spill area is pulled by SPU
+ *	   into LS, rather than pushed by PPE.
+ *	3. All 128 registers are restored by exit().
+ *	4. The exit() function is modified at run
+ *	   time in order to properly restore the
+ *	   SPU_Status register.
+ */
+int main()
+{
+	addr64 lscsa_ea;
+
+	lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+	lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+	fetch_regs_from_mem(lscsa_ea);
+
+	set_event_mask();		/* Step 1.  */
+	set_tag_mask();			/* Step 2.  */
+	build_dma_list(lscsa_ea);	/* Step 3.  */
+	restore_upper_240kb(lscsa_ea);	/* Step 4.  */
+					/* Step 5: done by 'exit'. */
+	enqueue_putllc(lscsa_ea);	/* Step 7. */
+	set_tag_update();		/* Step 8. */
+	read_tag_status();		/* Step 9. */
+	restore_decr();			/* moved Step 6. */
+	read_llar_status();		/* Step 10. */
+	write_ppu_mb();			/* Step 11. */
+	write_ppuint_mb();		/* Step 12. */
+	restore_fpcr();			/* Step 13. */
+	restore_srr0();			/* Step 14. */
+	restore_event_mask();		/* Step 15. */
+	restore_tag_mask();		/* Step 16. */
+					/* Step 17. done by 'exit'. */
+	restore_complete();		/* Step 18. */
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
new file mode 100644
index 000000000..2905949de
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
@@ -0,0 +1,116 @@
+/*
+ * crt0_r.S: Entry function for SPU-side context restore.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry and exit function for SPU-side of the context restore
+ * sequence.  Sets up an initial stack frame, then branches to
+ * 'main'.  On return, restores all 128 registers from the LSCSA
+ * and exits.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+	/* Initialize the stack pointer to point to 16368
+	 * (16kb-16). The back chain pointer is initialized
+	 * to NULL.
+	 */
+	il      $0, 0
+	il      $SP, 16368
+	stqd    $0, 0($SP)
+
+	/* Allocate a minimum stack frame for the called main.
+	 * This is needed so that main has a place to save the
+	 * link register when it calls another function.
+	 */
+	stqd    $SP, -160($SP)
+	ai      $SP, $SP, -160
+
+	/* Call the program's main function. */
+	brsl    $0, main
+
+.global exit
+.global	_exit
+exit:
+_exit:
+	/* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */
+	ila     $3, regs_spill + 256
+restore_regs:
+	lqr     $4, restore_reg_insts
+restore_reg_loop:
+	ai      $4, $4, 4
+	.balignl 16, 0x40200000
+restore_reg_insts:       /* must be quad-word aligned. */
+	lqd     $16, 0($3)
+	lqd     $17, 16($3)
+	lqd     $18, 32($3)
+	lqd     $19, 48($3)
+	andi    $5, $4, 0x7F
+	stqr    $4, restore_reg_insts
+	ai      $3, $3, 64
+	brnz    $5, restore_reg_loop
+
+	/* SPU Context Restore Step 17: Restore the first 16 GPRs. */
+	lqa $0, regs_spill + 0
+	lqa $1, regs_spill + 16
+	lqa $2, regs_spill + 32
+	lqa $3, regs_spill + 48
+	lqa $4, regs_spill + 64
+	lqa $5, regs_spill + 80
+	lqa $6, regs_spill + 96
+	lqa $7, regs_spill + 112
+	lqa $8, regs_spill + 128
+	lqa $9, regs_spill + 144
+	lqa $10, regs_spill + 160
+	lqa $11, regs_spill + 176
+	lqa $12, regs_spill + 192
+	lqa $13, regs_spill + 208
+	lqa $14, regs_spill + 224
+	lqa $15, regs_spill + 240
+
+	/* Under normal circumstances, the 'exit' function
+	 * terminates with 'stop SPU_RESTORE_COMPLETE',
+	 * indicating that the SPU-side restore code has
+	 * completed.
+	 *
+	 * However it is possible that instructions immediately
+	 * following the 'stop 0x3ffc' have been modified at run
+	 * time so as to recreate the exact SPU_Status settings
+	 * from the application, e.g. illegal instruciton, halt,
+	 * etc.
+	 */
+.global exit_fini
+.global	_exit_fini
+exit_fini:
+_exit_fini:
+	stop	SPU_RESTORE_COMPLETE
+	stop	0
+	stop	0
+	stop	0
+
+	/* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
new file mode 100644
index 000000000..f383b027e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
@@ -0,0 +1,935 @@
+/*
+ * spu_restore_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_restore.c.
+ * Do not edit!
+ */
+static unsigned int spu_restore_code[]  __attribute__((__aligned__(128))) = {
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33001180,
+0x42034003,
+0x33800284,
+0x1c010204,
+0x40200000,
+0x40200000,
+0x40200000,
+0x34000190,
+0x34004191,
+0x34008192,
+0x3400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffa85,
+0x3080b000,
+0x3080b201,
+0x3080b402,
+0x3080b603,
+0x3080b804,
+0x3080ba05,
+0x3080bc06,
+0x3080be07,
+0x3080c008,
+0x3080c209,
+0x3080c40a,
+0x3080c60b,
+0x3080c80c,
+0x3080ca0d,
+0x3080cc0e,
+0x3080ce0f,
+0x00003ffc,
+0x00000000,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb0a14103,
+0x01a00204,
+0x3ec10083,
+0x4202c002,
+0xb0a14203,
+0x21a00802,
+0x3fbf028a,
+0x3f20050a,
+0x3fbe0502,
+0x3fe30102,
+0x21a00882,
+0x3f82028b,
+0x3fe3058b,
+0x3fbf0584,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x04000203,
+0x21a00903,
+0x40848002,
+0x21a00982,
+0x40800003,
+0x21a00a03,
+0x40802002,
+0x21a00a82,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x10002612,
+0x42a00003,
+0x42074006,
+0x1800c204,
+0x40a00008,
+0x40800789,
+0x1c010305,
+0x34000302,
+0x1cffc489,
+0x3ec00303,
+0x3ec00287,
+0xb0408403,
+0x24000302,
+0x34000282,
+0x1c020306,
+0xb0408207,
+0x18020204,
+0x24000282,
+0x217ffa09,
+0x04000402,
+0x21a00802,
+0x3fbe0504,
+0x3fe30204,
+0x21a00884,
+0x42074002,
+0x21a00902,
+0x40803c03,
+0x21a00983,
+0x04000485,
+0x21a00a05,
+0x40802202,
+0x21a00a82,
+0x21a00805,
+0x21a00884,
+0x3fbf0582,
+0x3f200102,
+0x3fbe0102,
+0x3fe30102,
+0x21a00902,
+0x40804003,
+0x21a00983,
+0x21a00a05,
+0x40805a02,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x30809c03,
+0x34000182,
+0x14004102,
+0x21002082,
+0x01a00d82,
+0x3080a003,
+0x34000182,
+0x21a00e02,
+0x3080a203,
+0x34000182,
+0x21a00f02,
+0x3080a403,
+0x34000182,
+0x77400100,
+0x3080a603,
+0x34000182,
+0x21a00702,
+0x3080a803,
+0x34000182,
+0x21a00082,
+0x3080aa03,
+0x34000182,
+0x21a00b02,
+0x4020007f,
+0x3080ae02,
+0x42004805,
+0x3080ac04,
+0x34000103,
+0x34000202,
+0x1cffc183,
+0x3b810106,
+0x0f608184,
+0x42013802,
+0x5c020183,
+0x38810102,
+0x3b810102,
+0x21000e83,
+0x4020007f,
+0x35000100,
+0x00000470,
+0x000002f8,
+0x00000430,
+0x00000360,
+0x000002f8,
+0x000003c8,
+0x000004a8,
+0x00000298,
+0x00000360,
+0x00200000,
+0x409ffe02,
+0x30801203,
+0x40800208,
+0x3ec40084,
+0x40800407,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38820282,
+0x41004003,
+0xb0408189,
+0x28820282,
+0x3881c282,
+0xb0408304,
+0x2881c282,
+0x00400000,
+0x40800003,
+0x35000000,
+0x30809e03,
+0x34000182,
+0x21a00382,
+0x4020007f,
+0x327fde00,
+0x409ffe02,
+0x30801203,
+0x40800206,
+0x3ec40084,
+0x40800407,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38818282,
+0x41004003,
+0xb040818a,
+0x10005b0b,
+0x41201003,
+0x28818282,
+0x3881c282,
+0xb0408184,
+0x41193f83,
+0x60ffc003,
+0x2881c282,
+0x38820282,
+0xb0408189,
+0x28820282,
+0x327fef80,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40086,
+0x4120100b,
+0x10005b14,
+0x40800404,
+0x3ac1c289,
+0x40800608,
+0xb060c106,
+0x3ac10286,
+0x3ac2028a,
+0x20801203,
+0x3881c282,
+0x41193f83,
+0x60ffc003,
+0xb0408589,
+0x2881c282,
+0x38810282,
+0xb0408586,
+0x28810282,
+0x38820282,
+0xb040818a,
+0x28820282,
+0x4020007f,
+0x327fe280,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40084,
+0x40800408,
+0x10005b14,
+0x40800609,
+0x3ac1c28a,
+0x3ac2028b,
+0xb060c104,
+0x3ac24284,
+0x20801203,
+0x41201003,
+0x3881c282,
+0xb040830a,
+0x2881c282,
+0x38820282,
+0xb040818b,
+0x41193f83,
+0x60ffc003,
+0x28820282,
+0x38824282,
+0xb0408184,
+0x28824282,
+0x4020007f,
+0x327fd580,
+0x409ffe02,
+0x1000658e,
+0x40800206,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38818282,
+0x4020007f,
+0x327fd800,
+0x409ffe03,
+0x30801202,
+0x40800207,
+0x3ec40084,
+0x10005b09,
+0x3ac1c288,
+0xb0408184,
+0x4020007f,
+0x4020007f,
+0x20801202,
+0x3881c282,
+0xb0408308,
+0x2881c282,
+0x327fc680,
+0x409ffe02,
+0x1000588b,
+0x40800208,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38820282,
+0x327fbd80,
+0x00200000,
+0x00000da0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d90,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000db0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dc0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d80,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000df0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000de0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dd0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e04,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e00,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c
new file mode 100644
index 000000000..ae95cc170
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save.c
@@ -0,0 +1,195 @@
+/*
+ * spu_save.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context save sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE                 0x40000	/* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+static inline void save_event_mask(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 2:
+	 *    Read the SPU_RdEventMsk channel and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(event_mask);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask);
+}
+
+static inline void save_tag_mask(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 3:
+	 *    Read the SPU_RdTagMsk channel and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(tag_mask);
+	regs_spill[offset].slot[0] = spu_readch(MFC_RdTagMask);
+}
+
+static inline void save_upper_240kb(addr64 lscsa_ea)
+{
+	unsigned int ls = 16384;
+	unsigned int list = (unsigned int)&dma_list[0];
+	unsigned int size = sizeof(dma_list);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x24;	/* PUTL */
+
+	/* Save, Step 7:
+	 *    Enqueue the PUTL command (tag 0) to the MFC SPU command
+	 *    queue to transfer the remaining 240 kb of LS to CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, list);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_fpcr(void)
+{
+	// vector unsigned int fpcr;
+	unsigned int offset;
+
+	/* Save, Step 9:
+	 *    Issue the floating-point status and control register
+	 *    read instruction, and save to the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(fpcr);
+	regs_spill[offset].v = spu_mffpscr();
+}
+
+static inline void save_decr(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 10:
+	 *    Read and save the SPU_RdDec channel data to
+	 *    the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(decr);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdDec);
+}
+
+static inline void save_srr0(void)
+{
+	unsigned int offset;
+
+	/* Save, Step 11:
+	 *    Read and save the SPU_WSRR0 channel data to
+	 *    the LSCSA.
+	 */
+	offset = LSCSA_QW_OFFSET(srr0);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdSRR0);
+}
+
+static inline void spill_regs_to_mem(addr64 lscsa_ea)
+{
+	unsigned int ls = (unsigned int)&regs_spill[0];
+	unsigned int size = sizeof(regs_spill);
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0x20;	/* PUT */
+
+	/* Save, Step 13:
+	 *    Enqueue a PUT command (tag 0) to send the LSCSA
+	 *    to the CSA.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void enqueue_sync(addr64 lscsa_ea)
+{
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0xCC;
+
+	/* Save, Step 14:
+	 *    Enqueue an MFC_SYNC command (tag 0).
+	 */
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_complete(void)
+{
+	/* Save, Step 18:
+	 *    Issue a stop-and-signal instruction indicating
+	 *    "save complete".  Note: This function will not
+	 *    return!!
+	 */
+	spu_stop(SPU_SAVE_COMPLETE);
+}
+
+/**
+ * main - entry point for SPU-side context save.
+ *
+ * This code deviates from the documented sequence as follows:
+ *
+ *      1. The EA for LSCSA is passed from PPE in the
+ *         signal notification channels.
+ *      2. All 128 registers are saved by crt0.o.
+ */
+int main()
+{
+	addr64 lscsa_ea;
+
+	lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+	lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+
+	/* Step 1: done by exit(). */
+	save_event_mask();	/* Step 2.  */
+	save_tag_mask();	/* Step 3.  */
+	set_event_mask();	/* Step 4.  */
+	set_tag_mask();		/* Step 5.  */
+	build_dma_list(lscsa_ea);	/* Step 6.  */
+	save_upper_240kb(lscsa_ea);	/* Step 7.  */
+	/* Step 8: done by exit(). */
+	save_fpcr();		/* Step 9.  */
+	save_decr();		/* Step 10. */
+	save_srr0();		/* Step 11. */
+	enqueue_putllc(lscsa_ea);	/* Step 12. */
+	spill_regs_to_mem(lscsa_ea);	/* Step 13. */
+	enqueue_sync(lscsa_ea);	/* Step 14. */
+	set_tag_update();	/* Step 15. */
+	read_tag_status();	/* Step 16. */
+	read_llar_status();	/* Step 17. */
+	save_complete();	/* Step 18. */
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
new file mode 100644
index 000000000..6659d6a66
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
@@ -0,0 +1,102 @@
+/*
+ * crt0_s.S: Entry function for SPU-side context save.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry function for SPU-side of the context save sequence.
+ * Saves all 128 GPRs, sets up an initial stack frame, then
+ * branches to 'main'.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+	/* SPU Context Save Step 1: Save the first 16 GPRs. */
+	stqa $0, regs_spill + 0
+	stqa $1, regs_spill + 16
+	stqa $2, regs_spill + 32
+	stqa $3, regs_spill + 48
+	stqa $4, regs_spill + 64
+	stqa $5, regs_spill + 80
+	stqa $6, regs_spill + 96
+	stqa $7, regs_spill + 112
+	stqa $8, regs_spill + 128
+	stqa $9, regs_spill + 144
+	stqa $10, regs_spill + 160
+	stqa $11, regs_spill + 176
+	stqa $12, regs_spill + 192
+	stqa $13, regs_spill + 208
+	stqa $14, regs_spill + 224
+	stqa $15, regs_spill + 240
+
+	/* SPU Context Save, Step 8: Save the remaining 112 GPRs. */
+	ila     $3, regs_spill + 256
+save_regs:
+	lqr     $4, save_reg_insts
+save_reg_loop:
+	ai      $4, $4, 4
+	.balignl 16, 0x40200000
+save_reg_insts:       /* must be quad-word aligned. */
+	stqd    $16, 0($3)
+	stqd    $17, 16($3)
+	stqd    $18, 32($3)
+	stqd    $19, 48($3)
+	andi    $5, $4, 0x7F
+	stqr    $4, save_reg_insts
+	ai      $3, $3, 64
+	brnz    $5, save_reg_loop
+
+	/* Initialize the stack pointer to point to 16368
+	 * (16kb-16). The back chain pointer is initialized
+	 * to NULL.
+	 */
+	il	$0, 0
+	il	$SP, 16368
+	stqd	$0, 0($SP)
+
+	/* Allocate a minimum stack frame for the called main.
+	 * This is needed so that main has a place to save the
+	 * link register when it calls another function.
+	 */
+	stqd	$SP, -160($SP)
+	ai	$SP, $SP, -160
+
+	/* Call the program's main function. */
+	brsl	$0, main
+
+	/* In this case main should not return; if it does
+	 * there has been an error in the sequence.  Execute
+	 * stop-and-signal with code=0.
+	 */
+.global exit
+.global	_exit
+exit:
+_exit:
+	stop	0x0
+
+	/* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
+
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
new file mode 100644
index 000000000..b9f81ac8a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
@@ -0,0 +1,743 @@
+/*
+ * spu_save_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_save.c.
+ * Do not edit!
+ */
+static unsigned int spu_save_code[]  __attribute__((__aligned__(128))) = {
+0x20805000,
+0x20805201,
+0x20805402,
+0x20805603,
+0x20805804,
+0x20805a05,
+0x20805c06,
+0x20805e07,
+0x20806008,
+0x20806209,
+0x2080640a,
+0x2080660b,
+0x2080680c,
+0x20806a0d,
+0x20806c0e,
+0x20806e0f,
+0x4201c003,
+0x33800184,
+0x1c010204,
+0x40200000,
+0x24000190,
+0x24004191,
+0x24008192,
+0x2400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffb85,
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33000180,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb1c38103,
+0x01a00204,
+0x3ec10082,
+0x4201400d,
+0xb1c38202,
+0x01a00583,
+0x34218682,
+0x3ed80684,
+0xb0408184,
+0x24218682,
+0x01a00603,
+0x00200000,
+0x34214682,
+0x3ed40684,
+0xb0408184,
+0x40800003,
+0x24214682,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x4020007f,
+0x1000251e,
+0x42a00002,
+0x32800008,
+0x4205c00c,
+0x00200000,
+0x40a0000b,
+0x3f82070f,
+0x4080020a,
+0x40800709,
+0x3fe3078f,
+0x3fbf0783,
+0x3f200183,
+0x3fbe0183,
+0x3fe30187,
+0x18008387,
+0x4205c002,
+0x3ac30404,
+0x1cffc489,
+0x00200000,
+0x18008403,
+0x38830402,
+0x4cffc486,
+0x3ac28185,
+0xb0408584,
+0x28830402,
+0x1c020408,
+0x38828182,
+0xb0408385,
+0x1802c387,
+0x28828182,
+0x217ff886,
+0x04000582,
+0x32800007,
+0x21a00802,
+0x3fbf0705,
+0x3f200285,
+0x3fbe0285,
+0x3fe30285,
+0x21a00885,
+0x04000603,
+0x21a00903,
+0x40803c02,
+0x21a00982,
+0x04000386,
+0x21a00a06,
+0x40801202,
+0x21a00a82,
+0x73000003,
+0x24200683,
+0x01a00404,
+0x00200000,
+0x34204682,
+0x3ec40683,
+0xb0408203,
+0x24204682,
+0x01a00783,
+0x00200000,
+0x3421c682,
+0x3edc0684,
+0xb0408184,
+0x2421c682,
+0x21a00806,
+0x21a00885,
+0x3fbf0784,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x21a00904,
+0x40804002,
+0x21a00982,
+0x21a00a06,
+0x40805a02,
+0x21a00a82,
+0x04000683,
+0x21a00803,
+0x21a00885,
+0x21a00904,
+0x40848002,
+0x21a00982,
+0x21a00a06,
+0x40801002,
+0x21a00a82,
+0x21a00a06,
+0x40806602,
+0x00200000,
+0x35800009,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x01a00d83,
+0x00003ffb,
+0x40800003,
+0x4020007f,
+0x35000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/spu_utils.h b/arch/powerpc/platforms/cell/spufs/spu_utils.h
new file mode 100644
index 000000000..58359feb6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_utils.h
@@ -0,0 +1,160 @@
+/*
+ * utils.h: Utilities for SPU-side of the context switch operation.
+ *
+ * (C) Copyright IBM 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _SPU_CONTEXT_UTILS_H_
+#define _SPU_CONTEXT_UTILS_H_
+
+/*
+ * 64-bit safe EA.
+ */
+typedef union {
+	unsigned long long ull;
+	unsigned int ui[2];
+} addr64;
+
+/*
+ * 128-bit register template.
+ */
+typedef union {
+	unsigned int slot[4];
+	vector unsigned int v;
+} spu_reg128v;
+
+/*
+ * DMA list structure.
+ */
+struct dma_list_elem {
+	unsigned int size;
+	unsigned int ea_low;
+};
+
+/*
+ * Declare storage for 8-byte aligned DMA list.
+ */
+struct dma_list_elem dma_list[15] __attribute__ ((aligned(8)));
+
+/*
+ * External definition for storage
+ * declared in crt0.
+ */
+extern spu_reg128v regs_spill[NR_SPU_SPILL_REGS];
+
+/*
+ * Compute LSCSA byte offset for a given field.
+ */
+static struct spu_lscsa *dummy = (struct spu_lscsa *)0;
+#define LSCSA_BYTE_OFFSET(_field)  \
+	((char *)(&(dummy->_field)) - (char *)(&(dummy->gprs[0].slot[0])))
+#define LSCSA_QW_OFFSET(_field)  (LSCSA_BYTE_OFFSET(_field) >> 4)
+
+static inline void set_event_mask(void)
+{
+	unsigned int event_mask = 0;
+
+	/* Save, Step 4:
+	 * Restore, Step 1:
+	 *    Set the SPU_RdEventMsk channel to zero to mask
+	 *    all events.
+	 */
+	spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void set_tag_mask(void)
+{
+	unsigned int tag_mask = 1;
+
+	/* Save, Step 5:
+	 * Restore, Step 2:
+	 *    Set the SPU_WrTagMsk channel to '01' to unmask
+	 *    only tag group 0.
+	 */
+	spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void build_dma_list(addr64 lscsa_ea)
+{
+	unsigned int ea_low;
+	int i;
+
+	/* Save, Step 6:
+	 * Restore, Step 3:
+	 *    Update the effective address for the CSA in the
+	 *    pre-canned DMA-list in local storage.
+	 */
+	ea_low = lscsa_ea.ui[1];
+	ea_low += LSCSA_BYTE_OFFSET(ls[16384]);
+
+	for (i = 0; i < 15; i++, ea_low += 16384) {
+		dma_list[i].size = 16384;
+		dma_list[i].ea_low = ea_low;
+	}
+}
+
+static inline void enqueue_putllc(addr64 lscsa_ea)
+{
+	unsigned int ls = 0;
+	unsigned int size = 128;
+	unsigned int tag_id = 0;
+	unsigned int cmd = 0xB4;	/* PUTLLC */
+
+	/* Save, Step 12:
+	 * Restore, Step 7:
+	 *    Send a PUTLLC (tag 0) command to the MFC using
+	 *    an effective address in the CSA in order to
+	 *    remove any possible lock-line reservation.
+	 */
+	spu_writech(MFC_LSA, ls);
+	spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+	spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+	spu_writech(MFC_Size, size);
+	spu_writech(MFC_TagID, tag_id);
+	spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void set_tag_update(void)
+{
+	unsigned int update_any = 1;
+
+	/* Save, Step 15:
+	 * Restore, Step 8:
+	 *    Write the MFC_TagUpdate channel with '01'.
+	 */
+	spu_writech(MFC_WrTagUpdate, update_any);
+}
+
+static inline void read_tag_status(void)
+{
+	/* Save, Step 16:
+	 * Restore, Step 9:
+	 *    Read the MFC_TagStat channel data.
+	 */
+	spu_readch(MFC_RdTagStat);
+}
+
+static inline void read_llar_status(void)
+{
+	/* Save, Step 17:
+	 * Restore, Step 10:
+	 *    Read the MFC_AtomicStat channel data.
+	 */
+	spu_readch(MFC_RdAtomicStat);
+}
+
+#endif				/* _SPU_CONTEXT_UTILS_H_ */
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
new file mode 100644
index 000000000..b5fc1b3fe
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -0,0 +1,373 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef SPUFS_H
+#define SPUFS_H
+
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cpumask.h>
+#include <linux/sched/signal.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+
+#define SPUFS_PS_MAP_SIZE	0x20000
+#define SPUFS_MFC_MAP_SIZE	0x1000
+#define SPUFS_CNTL_MAP_SIZE	0x1000
+#define SPUFS_SIGNAL_MAP_SIZE	PAGE_SIZE
+#define SPUFS_MSS_MAP_SIZE	0x1000
+
+/* The magic number for our file system */
+enum {
+	SPUFS_MAGIC = 0x23c9b64e,
+};
+
+struct spu_context_ops;
+struct spu_gang;
+
+/* ctx->sched_flags */
+enum {
+	SPU_SCHED_NOTIFY_ACTIVE,
+	SPU_SCHED_WAS_ACTIVE,	/* was active upon spu_acquire_saved()  */
+	SPU_SCHED_SPU_RUN,	/* context is within spu_run */
+};
+
+enum {
+	SWITCH_LOG_BUFSIZE = 4096,
+};
+
+enum {
+	SWITCH_LOG_START,
+	SWITCH_LOG_STOP,
+	SWITCH_LOG_EXIT,
+};
+
+struct switch_log {
+	wait_queue_head_t	wait;
+	unsigned long		head;
+	unsigned long		tail;
+	struct switch_log_entry {
+		struct timespec64 tstamp;
+		s32		spu_id;
+		u32		type;
+		u32		val;
+		u64		timebase;
+	} log[];
+};
+
+struct spu_context {
+	struct spu *spu;		  /* pointer to a physical SPU */
+	struct spu_state csa;		  /* SPU context save area. */
+	spinlock_t mmio_lock;		  /* protects mmio access */
+	struct address_space *local_store; /* local store mapping.  */
+	struct address_space *mfc;	   /* 'mfc' area mappings. */
+	struct address_space *cntl;	   /* 'control' area mappings. */
+	struct address_space *signal1;	   /* 'signal1' area mappings. */
+	struct address_space *signal2;	   /* 'signal2' area mappings. */
+	struct address_space *mss;	   /* 'mss' area mappings. */
+	struct address_space *psmap;	   /* 'psmap' area mappings. */
+	struct mutex mapping_lock;
+	u64 object_id;		   /* user space pointer for oprofile */
+
+	enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
+	struct mutex state_mutex;
+	struct mutex run_mutex;
+
+	struct mm_struct *owner;
+
+	struct kref kref;
+	wait_queue_head_t ibox_wq;
+	wait_queue_head_t wbox_wq;
+	wait_queue_head_t stop_wq;
+	wait_queue_head_t mfc_wq;
+	wait_queue_head_t run_wq;
+	u32 tagwait;
+	struct spu_context_ops *ops;
+	struct work_struct reap_work;
+	unsigned long flags;
+	unsigned long event_return;
+
+	struct list_head gang_list;
+	struct spu_gang *gang;
+	struct kref *prof_priv_kref;
+	void ( * prof_priv_release) (struct kref *kref);
+
+	/* owner thread */
+	pid_t tid;
+
+	/* scheduler fields */
+	struct list_head rq;
+	unsigned int time_slice;
+	unsigned long sched_flags;
+	cpumask_t cpus_allowed;
+	int policy;
+	int prio;
+	int last_ran;
+
+	/* statistics */
+	struct {
+		/* updates protected by ctx->state_mutex */
+		enum spu_utilization_state util_state;
+		unsigned long long tstamp;	/* time of last state switch */
+		unsigned long long times[SPU_UTIL_MAX];
+		unsigned long long vol_ctx_switch;
+		unsigned long long invol_ctx_switch;
+		unsigned long long min_flt;
+		unsigned long long maj_flt;
+		unsigned long long hash_flt;
+		unsigned long long slb_flt;
+		unsigned long long slb_flt_base; /* # at last ctx switch */
+		unsigned long long class2_intr;
+		unsigned long long class2_intr_base; /* # at last ctx switch */
+		unsigned long long libassist;
+	} stats;
+
+	/* context switch log */
+	struct switch_log *switch_log;
+
+	struct list_head aff_list;
+	int aff_head;
+	int aff_offset;
+};
+
+struct spu_gang {
+	struct list_head list;
+	struct mutex mutex;
+	struct kref kref;
+	int contexts;
+
+	struct spu_context *aff_ref_ctx;
+	struct list_head aff_list_head;
+	struct mutex aff_mutex;
+	int aff_flags;
+	struct spu *aff_ref_spu;
+	atomic_t aff_sched_count;
+};
+
+/* Flag bits for spu_gang aff_flags */
+#define AFF_OFFSETS_SET		1
+#define AFF_MERGED		2
+
+struct mfc_dma_command {
+	int32_t pad;	/* reserved */
+	uint32_t lsa;	/* local storage address */
+	uint64_t ea;	/* effective address */
+	uint16_t size;	/* transfer size */
+	uint16_t tag;	/* command tag */
+	uint16_t class;	/* class ID */
+	uint16_t cmd;	/* command opcode */
+};
+
+
+/* SPU context query/set operations. */
+struct spu_context_ops {
+	int (*mbox_read) (struct spu_context * ctx, u32 * data);
+	 u32(*mbox_stat_read) (struct spu_context * ctx);
+	__poll_t (*mbox_stat_poll)(struct spu_context *ctx, __poll_t events);
+	int (*ibox_read) (struct spu_context * ctx, u32 * data);
+	int (*wbox_write) (struct spu_context * ctx, u32 data);
+	 u32(*signal1_read) (struct spu_context * ctx);
+	void (*signal1_write) (struct spu_context * ctx, u32 data);
+	 u32(*signal2_read) (struct spu_context * ctx);
+	void (*signal2_write) (struct spu_context * ctx, u32 data);
+	void (*signal1_type_set) (struct spu_context * ctx, u64 val);
+	 u64(*signal1_type_get) (struct spu_context * ctx);
+	void (*signal2_type_set) (struct spu_context * ctx, u64 val);
+	 u64(*signal2_type_get) (struct spu_context * ctx);
+	 u32(*npc_read) (struct spu_context * ctx);
+	void (*npc_write) (struct spu_context * ctx, u32 data);
+	 u32(*status_read) (struct spu_context * ctx);
+	char*(*get_ls) (struct spu_context * ctx);
+	void (*privcntl_write) (struct spu_context *ctx, u64 data);
+	 u32 (*runcntl_read) (struct spu_context * ctx);
+	void (*runcntl_write) (struct spu_context * ctx, u32 data);
+	void (*runcntl_stop) (struct spu_context * ctx);
+	void (*master_start) (struct spu_context * ctx);
+	void (*master_stop) (struct spu_context * ctx);
+	int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode);
+	u32 (*read_mfc_tagstatus)(struct spu_context * ctx);
+	u32 (*get_mfc_free_elements)(struct spu_context *ctx);
+	int (*send_mfc_command)(struct spu_context * ctx,
+				struct mfc_dma_command * cmd);
+	void (*dma_info_read) (struct spu_context * ctx,
+			       struct spu_dma_info * info);
+	void (*proxydma_info_read) (struct spu_context * ctx,
+				    struct spu_proxydma_info * info);
+	void (*restart_dma)(struct spu_context *ctx);
+};
+
+extern struct spu_context_ops spu_hw_ops;
+extern struct spu_context_ops spu_backing_ops;
+
+struct spufs_inode_info {
+	struct spu_context *i_ctx;
+	struct spu_gang *i_gang;
+	struct inode vfs_inode;
+	int i_openers;
+};
+#define SPUFS_I(inode) \
+	container_of(inode, struct spufs_inode_info, vfs_inode)
+
+struct spufs_tree_descr {
+	const char *name;
+	const struct file_operations *ops;
+	umode_t mode;
+	size_t size;
+};
+
+extern const struct spufs_tree_descr spufs_dir_contents[];
+extern const struct spufs_tree_descr spufs_dir_nosched_contents[];
+extern const struct spufs_tree_descr spufs_dir_debug_contents[];
+
+/* system call implementation */
+extern struct spufs_calls spufs_calls;
+struct coredump_params;
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
+long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
+			umode_t mode, struct file *filp);
+/* ELF coredump callbacks for writing SPU ELF notes */
+extern int spufs_coredump_extra_notes_size(void);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
+
+extern const struct file_operations spufs_context_fops;
+
+/* gang management */
+struct spu_gang *alloc_spu_gang(void);
+struct spu_gang *get_spu_gang(struct spu_gang *gang);
+int put_spu_gang(struct spu_gang *gang);
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
+
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+int spufs_handle_class0(struct spu_context *ctx);
+
+/* affinity */
+struct spu *affinity_check(struct spu_context *ctx);
+
+/* context management */
+extern atomic_t nr_spu_contexts;
+static inline int __must_check spu_acquire(struct spu_context *ctx)
+{
+	return mutex_lock_interruptible(&ctx->state_mutex);
+}
+
+static inline void spu_release(struct spu_context *ctx)
+{
+	mutex_unlock(&ctx->state_mutex);
+}
+
+struct spu_context * alloc_spu_context(struct spu_gang *gang);
+void destroy_spu_context(struct kref *kref);
+struct spu_context * get_spu_context(struct spu_context *ctx);
+int put_spu_context(struct spu_context *ctx);
+void spu_unmap_mappings(struct spu_context *ctx);
+
+void spu_forget(struct spu_context *ctx);
+int __must_check spu_acquire_saved(struct spu_context *ctx);
+void spu_release_saved(struct spu_context *ctx);
+
+int spu_stopped(struct spu_context *ctx, u32 * stat);
+void spu_del_from_rq(struct spu_context *ctx);
+int spu_activate(struct spu_context *ctx, unsigned long flags);
+void spu_deactivate(struct spu_context *ctx);
+void spu_yield(struct spu_context *ctx);
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+		u32 type, u32 val);
+void spu_set_timeslice(struct spu_context *ctx);
+void spu_update_sched_info(struct spu_context *ctx);
+void __spu_update_sched_info(struct spu_context *ctx);
+int __init spu_sched_init(void);
+void spu_sched_exit(void);
+
+extern char *isolated_loader;
+
+/*
+ * spufs_wait
+ *	Same as wait_event_interruptible(), except that here
+ *	we need to call spu_release(ctx) before sleeping, and
+ *	then spu_acquire(ctx) when awoken.
+ *
+ * 	Returns with state_mutex re-acquired when successful or
+ * 	with -ERESTARTSYS and the state_mutex dropped when interrupted.
+ */
+
+#define spufs_wait(wq, condition)					\
+({									\
+	int __ret = 0;							\
+	DEFINE_WAIT(__wait);						\
+	for (;;) {							\
+		prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		spu_release(ctx);					\
+		if (signal_pending(current)) {				\
+			__ret = -ERESTARTSYS;				\
+			break;						\
+		}							\
+		schedule();						\
+		__ret = spu_acquire(ctx);				\
+		if (__ret)						\
+			break;						\
+	}								\
+	finish_wait(&(wq), &__wait);					\
+	__ret;								\
+})
+
+size_t spu_wbox_write(struct spu_context *ctx, u32 data);
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data);
+
+/* irq callback funcs. */
+void spufs_ibox_callback(struct spu *spu);
+void spufs_wbox_callback(struct spu *spu);
+void spufs_stop_callback(struct spu *spu, int irq);
+void spufs_mfc_callback(struct spu *spu);
+void spufs_dma_callback(struct spu *spu, int type);
+
+extern struct spu_coredump_calls spufs_coredump_calls;
+struct spufs_coredump_reader {
+	char *name;
+	ssize_t (*read)(struct spu_context *ctx,
+			char __user *buffer, size_t size, loff_t *pos);
+	u64 (*get)(struct spu_context *ctx);
+	size_t size;
+};
+extern const struct spufs_coredump_reader spufs_coredump_read[];
+extern int spufs_coredump_num_notes;
+
+extern int spu_init_csa(struct spu_state *csa);
+extern void spu_fini_csa(struct spu_state *csa);
+extern int spu_save(struct spu_state *prev, struct spu *spu);
+extern int spu_restore(struct spu_state *new, struct spu *spu);
+extern int spu_switch(struct spu_state *prev, struct spu_state *new,
+		      struct spu *spu);
+extern int spu_alloc_lscsa(struct spu_state *csa);
+extern void spu_free_lscsa(struct spu_state *csa);
+
+extern void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state);
+
+#endif
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.h b/arch/powerpc/platforms/cell/spufs/sputrace.h
new file mode 100644
index 000000000..1def11e91
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SPUFS_H
+
+#include <linux/tracepoint.h>
+#include <linux/stringify.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM spufs
+
+TRACE_EVENT(spufs_context,
+	TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name),
+	TP_ARGS(ctx, spu, name),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(int, owner_tid)
+		__field(int, number)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->owner_tid = ctx->tid;
+		__entry->number = spu ? spu->number : -1;
+	),
+
+	TP_printk("%s (ctxthread = %d, spu = %d)",
+		__entry->name, __entry->owner_tid, __entry->number)
+);
+
+#define spu_context_trace(name, ctx, spu) \
+	trace_spufs_context(ctx, spu, __stringify(name))
+#define spu_context_nospu_trace(name, ctx) \
+	trace_spufs_context(ctx, NULL, __stringify(name))
+
+#endif /* _TRACE_SPUFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE sputrace
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
new file mode 100644
index 000000000..dde35551e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -0,0 +1,2222 @@
+/*
+ * spu_switch.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * Host-side part of SPU context switch sequence outlined in
+ * Synergistic Processor Element, Book IV.
+ *
+ * A fully premptive switch of an SPE is very expensive in terms
+ * of time and system resources.  SPE Book IV indicates that SPE
+ * allocation should follow a "serially reusable device" model,
+ * in which the SPE is assigned a task until it completes.  When
+ * this is not possible, this sequence may be used to premptively
+ * save, and then later (optionally) restore the context of a
+ * program executing on an SPE.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+
+#include "spufs.h"
+
+#include "spu_save_dump.h"
+#include "spu_restore_dump.h"
+
+#if 0
+#define POLL_WHILE_TRUE(_c) {				\
+    do {						\
+    } while (_c);					\
+  }
+#else
+#define RELAX_SPIN_COUNT				1000
+#define POLL_WHILE_TRUE(_c) {				\
+    do {						\
+	int _i;						\
+	for (_i=0; _i<RELAX_SPIN_COUNT && (_c); _i++) { \
+	    cpu_relax();				\
+	}						\
+	if (unlikely(_c)) yield();			\
+	else break;					\
+    } while (_c);					\
+  }
+#endif				/* debug */
+
+#define POLL_WHILE_FALSE(_c)	POLL_WHILE_TRUE(!(_c))
+
+static inline void acquire_spu_lock(struct spu *spu)
+{
+	/* Save, Step 1:
+	 * Restore, Step 1:
+	 *    Acquire SPU-specific mutual exclusion lock.
+	 *    TBD.
+	 */
+}
+
+static inline void release_spu_lock(struct spu *spu)
+{
+	/* Restore, Step 76:
+	 *    Release SPU-specific mutual exclusion lock.
+	 *    TBD.
+	 */
+}
+
+static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 isolate_state;
+
+	/* Save, Step 2:
+	 * Save, Step 6:
+	 *     If SPU_Status[E,L,IS] any field is '1', this
+	 *     SPU is in isolate state and cannot be context
+	 *     saved at this time.
+	 */
+	isolate_state = SPU_STATUS_ISOLATED_STATE |
+	    SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS;
+	return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0;
+}
+
+static inline void disable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 3:
+	 * Restore, Step 2:
+	 *     Save INT_Mask_class0 in CSA.
+	 *     Write INT_MASK_class0 with value of 0.
+	 *     Save INT_Mask_class1 in CSA.
+	 *     Write INT_MASK_class1 with value of 0.
+	 *     Save INT_Mask_class2 in CSA.
+	 *     Write INT_MASK_class2 with value of 0.
+	 *     Synchronize all three interrupts to be sure
+	 *     we no longer execute a handler on another CPU.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	if (csa) {
+		csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0);
+		csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1);
+		csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2);
+	}
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, 0ul);
+	spu_int_mask_set(spu, 2, 0ul);
+	eieio();
+	spin_unlock_irq(&spu->register_lock);
+
+	/*
+	 * This flag needs to be set before calling synchronize_irq so
+	 * that the update will be visible to the relevant handlers
+	 * via a simple load.
+	 */
+	set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+	clear_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+	synchronize_irq(spu->irqs[0]);
+	synchronize_irq(spu->irqs[1]);
+	synchronize_irq(spu->irqs[2]);
+}
+
+static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 4:
+	 * Restore, Step 25.
+	 *    Set a software watchdog timer, which specifies the
+	 *    maximum allowable time for a context save sequence.
+	 *
+	 *    For present, this implementation will not set a global
+	 *    watchdog timer, as virtualization & variable system load
+	 *    may cause unpredictable execution times.
+	 */
+}
+
+static inline void inhibit_user_access(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 5:
+	 * Restore, Step 3:
+	 *     Inhibit user-space access (if provided) to this
+	 *     SPU by unmapping the virtual pages assigned to
+	 *     the SPU memory-mapped I/O (MMIO) for problem
+	 *     state. TBD.
+	 */
+}
+
+static inline void set_switch_pending(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 7:
+	 * Restore, Step 5:
+	 *     Set a software context switch pending flag.
+	 *     Done above in Step 3 - disable_interrupts().
+	 */
+}
+
+static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 8:
+	 *     Suspend DMA and save MFC_CNTL.
+	 */
+	switch (in_be64(&priv2->mfc_control_RW) &
+	       MFC_CNTL_SUSPEND_DMA_STATUS_MASK) {
+	case MFC_CNTL_SUSPEND_IN_PROGRESS:
+		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+				 MFC_CNTL_SUSPEND_COMPLETE);
+		/* fall through */
+	case MFC_CNTL_SUSPEND_COMPLETE:
+		if (csa)
+			csa->priv2.mfc_control_RW =
+				in_be64(&priv2->mfc_control_RW) |
+				MFC_CNTL_SUSPEND_DMA_QUEUE;
+		break;
+	case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION:
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+				 MFC_CNTL_SUSPEND_COMPLETE);
+		if (csa)
+			csa->priv2.mfc_control_RW =
+				in_be64(&priv2->mfc_control_RW) &
+				~MFC_CNTL_SUSPEND_DMA_QUEUE &
+				~MFC_CNTL_SUSPEND_MASK;
+		break;
+	}
+}
+
+static inline void save_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 9:
+	 *     Save SPU_Runcntl in the CSA.  This value contains
+	 *     the "Application Desired State".
+	 */
+	csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW);
+}
+
+static inline void save_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 10:
+	 *     Save MFC_SR1 in the CSA.
+	 */
+	csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu);
+}
+
+static inline void save_spu_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 11:
+	 *     Read SPU_Status[R], and save to CSA.
+	 */
+	if ((in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) == 0) {
+		csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+	} else {
+		u32 stopped;
+
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+		stopped =
+		    SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+		    SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+		if ((in_be32(&prob->spu_status_R) & stopped) == 0)
+			csa->prob.spu_status_R = SPU_STATUS_RUNNING;
+		else
+			csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+	}
+}
+
+static inline void save_mfc_stopped_status(struct spu_state *csa,
+		struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	const u64 mask = MFC_CNTL_DECREMENTER_RUNNING |
+			MFC_CNTL_DMA_QUEUES_EMPTY;
+
+	/* Save, Step 12:
+	 *     Read MFC_CNTL[Ds].  Update saved copy of
+	 *     CSA.MFC_CNTL[Ds].
+	 *
+	 * update: do the same with MFC_CNTL[Q].
+	 */
+	csa->priv2.mfc_control_RW &= ~mask;
+	csa->priv2.mfc_control_RW |= in_be64(&priv2->mfc_control_RW) & mask;
+}
+
+static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 13:
+	 *     Write MFC_CNTL[Dh] set to a '1' to halt
+	 *     the decrementer.
+	 */
+	out_be64(&priv2->mfc_control_RW,
+		 MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
+	eieio();
+}
+
+static inline void save_timebase(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 14:
+	 *    Read PPE Timebase High and Timebase low registers
+	 *    and save in CSA.  TBD.
+	 */
+	csa->suspend_time = get_cycles();
+}
+
+static inline void remove_other_spu_access(struct spu_state *csa,
+					   struct spu *spu)
+{
+	/* Save, Step 15:
+	 *     Remove other SPU access to this SPU by unmapping
+	 *     this SPU's pages from their address space.  TBD.
+	 */
+}
+
+static inline void do_mfc_mssync(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 16:
+	 * Restore, Step 11.
+	 *     Write SPU_MSSync register. Poll SPU_MSSync[P]
+	 *     for a value of 0.
+	 */
+	out_be64(&prob->spc_mssync_RW, 1UL);
+	POLL_WHILE_TRUE(in_be64(&prob->spc_mssync_RW) & MS_SYNC_PENDING);
+}
+
+static inline void issue_mfc_tlbie(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 17:
+	 * Restore, Step 12.
+	 * Restore, Step 48.
+	 *     Write TLB_Invalidate_Entry[IS,VPN,L,Lp]=0 register.
+	 *     Then issue a PPE sync instruction.
+	 */
+	spu_tlb_invalidate(spu);
+	mb();
+}
+
+static inline void handle_pending_interrupts(struct spu_state *csa,
+					     struct spu *spu)
+{
+	/* Save, Step 18:
+	 *     Handle any pending interrupts from this SPU
+	 *     here.  This is OS or hypervisor specific.  One
+	 *     option is to re-enable interrupts to handle any
+	 *     pending interrupts, with the interrupt handlers
+	 *     recognizing the software Context Switch Pending
+	 *     flag, to ensure the SPU execution or MFC command
+	 *     queue is not restarted.  TBD.
+	 */
+}
+
+static inline void save_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Save, Step 19:
+	 *     If MFC_Cntl[Se]=0 then save
+	 *     MFC command queues.
+	 */
+	if ((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DMA_QUEUES_EMPTY) == 0) {
+		for (i = 0; i < 8; i++) {
+			csa->priv2.puq[i].mfc_cq_data0_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data0_RW);
+			csa->priv2.puq[i].mfc_cq_data1_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data1_RW);
+			csa->priv2.puq[i].mfc_cq_data2_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data2_RW);
+			csa->priv2.puq[i].mfc_cq_data3_RW =
+			    in_be64(&priv2->puq[i].mfc_cq_data3_RW);
+		}
+		for (i = 0; i < 16; i++) {
+			csa->priv2.spuq[i].mfc_cq_data0_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data0_RW);
+			csa->priv2.spuq[i].mfc_cq_data1_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data1_RW);
+			csa->priv2.spuq[i].mfc_cq_data2_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data2_RW);
+			csa->priv2.spuq[i].mfc_cq_data3_RW =
+			    in_be64(&priv2->spuq[i].mfc_cq_data3_RW);
+		}
+	}
+}
+
+static inline void save_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 20:
+	 *     Save the PPU_QueryMask register
+	 *     in the CSA.
+	 */
+	csa->prob.dma_querymask_RW = in_be32(&prob->dma_querymask_RW);
+}
+
+static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 21:
+	 *     Save the PPU_QueryType register
+	 *     in the CSA.
+	 */
+	csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW);
+}
+
+static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save the Prxy_TagStatus register in the CSA.
+	 *
+	 * It is unnecessary to restore dma_tagstatus_R, however,
+	 * dma_tagstatus_R in the CSA is accessed via backing_ops, so
+	 * we must save it.
+	 */
+	csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R);
+}
+
+static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 22:
+	 *     Save the MFC_CSR_TSQ register
+	 *     in the LSCSA.
+	 */
+	csa->priv2.spu_tag_status_query_RW =
+	    in_be64(&priv2->spu_tag_status_query_RW);
+}
+
+static inline void save_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 23:
+	 *     Save the MFC_CSR_CMD1 and MFC_CSR_CMD2
+	 *     registers in the CSA.
+	 */
+	csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW);
+	csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW);
+}
+
+static inline void save_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 24:
+	 *     Save the MFC_CSR_ATO register in
+	 *     the CSA.
+	 */
+	csa->priv2.spu_atomic_status_RW = in_be64(&priv2->spu_atomic_status_RW);
+}
+
+static inline void save_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 25:
+	 *     Save the MFC_TCLASS_ID register in
+	 *     the CSA.
+	 */
+	csa->priv1.mfc_tclass_id_RW = spu_mfc_tclass_id_get(spu);
+}
+
+static inline void set_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 26:
+	 * Restore, Step 23.
+	 *     Write the MFC_TCLASS_ID register with
+	 *     the value 0x10000000.
+	 */
+	spu_mfc_tclass_id_set(spu, 0x10000000);
+	eieio();
+}
+
+static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 27:
+	 * Restore, Step 14.
+	 *     Write MFC_CNTL[Pc]=1 (purge queue).
+	 */
+	out_be64(&priv2->mfc_control_RW,
+			MFC_CNTL_PURGE_DMA_REQUEST |
+			MFC_CNTL_SUSPEND_MASK);
+	eieio();
+}
+
+static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 28:
+	 *     Poll MFC_CNTL[Ps] until value '11' is read
+	 *     (purge complete).
+	 */
+	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+			 MFC_CNTL_PURGE_DMA_STATUS_MASK) ==
+			 MFC_CNTL_PURGE_DMA_COMPLETE);
+}
+
+static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 30:
+	 * Restore, Step 18:
+	 *     Write MFC_SR1 with MFC_SR1[D=0,S=1] and
+	 *     MFC_SR1[TL,R,Pr,T] set correctly for the
+	 *     OS specific environment.
+	 *
+	 *     Implementation note: The SPU-side code
+	 *     for save/restore is privileged, so the
+	 *     MFC_SR1[Pr] bit is not set.
+	 *
+	 */
+	spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+			      MFC_STATE1_RELOCATE_MASK |
+			      MFC_STATE1_BUS_TLBIE_MASK));
+}
+
+static inline void save_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 31:
+	 *     Save SPU_NPC in the CSA.
+	 */
+	csa->prob.spu_npc_RW = in_be32(&prob->spu_npc_RW);
+}
+
+static inline void save_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 32:
+	 *     Save SPU_PrivCntl in the CSA.
+	 */
+	csa->priv2.spu_privcntl_RW = in_be64(&priv2->spu_privcntl_RW);
+}
+
+static inline void reset_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 33:
+	 * Restore, Step 16:
+	 *     Write SPU_PrivCntl[S,Le,A] fields reset to 0.
+	 */
+	out_be64(&priv2->spu_privcntl_RW, 0UL);
+	eieio();
+}
+
+static inline void save_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 34:
+	 *     Save SPU_LSLR in the CSA.
+	 */
+	csa->priv2.spu_lslr_RW = in_be64(&priv2->spu_lslr_RW);
+}
+
+static inline void reset_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 35:
+	 * Restore, Step 17.
+	 *     Reset SPU_LSLR.
+	 */
+	out_be64(&priv2->spu_lslr_RW, LS_ADDR_MASK);
+	eieio();
+}
+
+static inline void save_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 36:
+	 *     Save SPU_Cfg in the CSA.
+	 */
+	csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW);
+}
+
+static inline void save_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 37:
+	 *     Save PM_Trace_Tag_Wait_Mask in the CSA.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void save_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 38:
+	 *     Save RA_GROUP_ID register and the
+	 *     RA_ENABLE reigster in the CSA.
+	 */
+	csa->priv1.resource_allocation_groupID_RW =
+		spu_resource_allocation_groupID_get(spu);
+	csa->priv1.resource_allocation_enable_RW =
+		spu_resource_allocation_enable_get(spu);
+}
+
+static inline void save_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 39:
+	 *     Save MB_Stat register in the CSA.
+	 */
+	csa->prob.mb_stat_R = in_be32(&prob->mb_stat_R);
+}
+
+static inline void save_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 40:
+	 *     Save the PPU_MB register in the CSA.
+	 */
+	csa->prob.pu_mb_R = in_be32(&prob->pu_mb_R);
+}
+
+static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 41:
+	 *     Save the PPUINT_MB register in the CSA.
+	 */
+	csa->priv2.puint_mb_R = in_be64(&priv2->puint_mb_R);
+}
+
+static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	int i;
+
+	/* Save, Step 42:
+	 */
+
+	/* Save CH 1, without channel count */
+	out_be64(&priv2->spu_chnlcntptr_RW, 1);
+	csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
+
+	/* Save the following CH: [0,3,4,24,25,27] */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		csa->spu_chnldata_RW[idx] = in_be64(&priv2->spu_chnldata_RW);
+		csa->spu_chnlcnt_RW[idx] = in_be64(&priv2->spu_chnlcnt_RW);
+		out_be64(&priv2->spu_chnldata_RW, 0UL);
+		out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+		eieio();
+	}
+}
+
+static inline void save_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Save, Step 43:
+	 *     Save SPU Read Mailbox Channel.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+	eieio();
+	csa->spu_chnlcnt_RW[29] = in_be64(&priv2->spu_chnlcnt_RW);
+	for (i = 0; i < 4; i++) {
+		csa->spu_mailbox_data[i] = in_be64(&priv2->spu_chnldata_RW);
+	}
+	out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+	eieio();
+}
+
+static inline void save_mfc_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 44:
+	 *     Save MFC_CMD Channel.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 21UL);
+	eieio();
+	csa->spu_chnlcnt_RW[21] = in_be64(&priv2->spu_chnlcnt_RW);
+	eieio();
+}
+
+static inline void reset_ch(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[4] = { 21UL, 23UL, 28UL, 30UL };
+	u64 ch_counts[4] = { 16UL, 1UL, 1UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Save, Step 45:
+	 *     Reset the following CH: [21, 23, 28, 30]
+	 */
+	for (i = 0; i < 4; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Save, Step 46:
+	 * Restore, Step 25.
+	 *     Write MFC_CNTL[Sc]=0 (resume queue processing).
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
+}
+
+static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu,
+		unsigned int *code, int code_size)
+{
+	/* Save, Step 47:
+	 * Restore, Step 30.
+	 *     If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All
+	 *     register, then initialize SLB_VSID and SLB_ESID
+	 *     to provide access to SPU context save code and
+	 *     LSCSA.
+	 *
+	 *     This implementation places both the context
+	 *     switch code and LSCSA in kernel address space.
+	 *
+	 *     Further this implementation assumes that the
+	 *     MFC_SR1[R]=1 (in other words, assume that
+	 *     translation is desired by OS environment).
+	 */
+	spu_invalidate_slbs(spu);
+	spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size);
+}
+
+static inline void set_switch_active(struct spu_state *csa, struct spu *spu)
+{
+	/* Save, Step 48:
+	 * Restore, Step 23.
+	 *     Change the software context switch pending flag
+	 *     to context switch active.  This implementation does
+	 *     not uses a switch active flag.
+	 *
+	 * Now that we have saved the mfc in the csa, we can add in the
+	 * restart command if an exception occurred.
+	 */
+	if (test_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags))
+		csa->priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+	clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+	mb();
+}
+
+static inline void enable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long class1_mask = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+	    CLASS1_ENABLE_STORAGE_FAULT_INTR;
+
+	/* Save, Step 49:
+	 * Restore, Step 22:
+	 *     Reset and then enable interrupts, as
+	 *     needed by OS.
+	 *
+	 *     This implementation enables only class1
+	 *     (translation) interrupts.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, class1_mask);
+	spu_int_mask_set(spu, 2, 0ul);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static inline int send_mfc_dma(struct spu *spu, unsigned long ea,
+			       unsigned int ls_offset, unsigned int size,
+			       unsigned int tag, unsigned int rclass,
+			       unsigned int cmd)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union mfc_tag_size_class_cmd command;
+	unsigned int transfer_size;
+	volatile unsigned int status = 0x0;
+
+	while (size > 0) {
+		transfer_size =
+		    (size > MFC_MAX_DMA_SIZE) ? MFC_MAX_DMA_SIZE : size;
+		command.u.mfc_size = transfer_size;
+		command.u.mfc_tag = tag;
+		command.u.mfc_rclassid = rclass;
+		command.u.mfc_cmd = cmd;
+		do {
+			out_be32(&prob->mfc_lsa_W, ls_offset);
+			out_be64(&prob->mfc_ea_W, ea);
+			out_be64(&prob->mfc_union_W.all64, command.all64);
+			status =
+			    in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+			if (unlikely(status & 0x2)) {
+				cpu_relax();
+			}
+		} while (status & 0x3);
+		size -= transfer_size;
+		ea += transfer_size;
+		ls_offset += transfer_size;
+	}
+	return 0;
+}
+
+static inline void save_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = 16384;
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_PUT_CMD;
+
+	/* Save, Step 50:
+	 *     Issue a DMA command to copy the first 16K bytes
+	 *     of local storage to the CSA.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 51:
+	 * Restore, Step 31.
+	 *     Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry
+	 *     point address of context save code in local
+	 *     storage.
+	 *
+	 *     This implementation uses SPU-side save/restore
+	 *     programs with entry points at LSA of 0.
+	 */
+	out_be32(&prob->spu_npc_RW, 0);
+	eieio();
+}
+
+static inline void set_signot1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union {
+		u64 ull;
+		u32 ui[2];
+	} addr64;
+
+	/* Save, Step 52:
+	 * Restore, Step 32:
+	 *    Write SPU_Sig_Notify_1 register with upper 32-bits
+	 *    of the CSA.LSCSA effective address.
+	 */
+	addr64.ull = (u64) csa->lscsa;
+	out_be32(&prob->signal_notify1, addr64.ui[0]);
+	eieio();
+}
+
+static inline void set_signot2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	union {
+		u64 ull;
+		u32 ui[2];
+	} addr64;
+
+	/* Save, Step 53:
+	 * Restore, Step 33:
+	 *    Write SPU_Sig_Notify_2 register with lower 32-bits
+	 *    of the CSA.LSCSA effective address.
+	 */
+	addr64.ull = (u64) csa->lscsa;
+	out_be32(&prob->signal_notify2, addr64.ui[1]);
+	eieio();
+}
+
+static inline void send_save_code(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&spu_save_code[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = sizeof(spu_save_code);
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GETFS_CMD;
+
+	/* Save, Step 54:
+	 *     Issue a DMA command to copy context save code
+	 *     to local storage and start SPU.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save, Step 55:
+	 * Restore, Step 38.
+	 *     Write PPU_QueryMask=1 (enable Tag Group 0)
+	 *     and issue eieio instruction.
+	 */
+	out_be32(&prob->dma_querymask_RW, MFC_TAGID_TO_TAGMASK(0));
+	eieio();
+}
+
+static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask = MFC_TAGID_TO_TAGMASK(0);
+	unsigned long flags;
+
+	/* Save, Step 56:
+	 * Restore, Step 39.
+	 * Restore, Step 39.
+	 * Restore, Step 46.
+	 *     Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete)
+	 *     or write PPU_QueryType[TS]=01 and wait for Tag Group
+	 *     Complete Interrupt.  Write INT_Stat_Class0 or
+	 *     INT_Stat_Class2 with value of 'handled'.
+	 */
+	POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask);
+
+	local_irq_save(flags);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	local_irq_restore(flags);
+}
+
+static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	unsigned long flags;
+
+	/* Save, Step 57:
+	 * Restore, Step 40.
+	 *     Poll until SPU_Status[R]=0 or wait for SPU Class 0
+	 *     or SPU Class 2 interrupt.  Write INT_Stat_class0
+	 *     or INT_Stat_class2 with value of handled.
+	 */
+	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+	local_irq_save(flags);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	local_irq_restore(flags);
+}
+
+static inline int check_save_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 complete;
+
+	/* Save, Step 54:
+	 *     If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+	 *     context save succeeded, otherwise context save
+	 *     failed.
+	 */
+	complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+		    SPU_STATUS_STOPPED_BY_STOP);
+	return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 4:
+	 *    If required, notify the "using application" that
+	 *    the SPU task has been terminated.  TBD.
+	 */
+}
+
+static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
+		struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 7:
+	 *     Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
+	 *     the queue and halt the decrementer.
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
+		 MFC_CNTL_DECREMENTER_HALTED);
+	eieio();
+}
+
+static inline void wait_suspend_mfc_complete(struct spu_state *csa,
+					     struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 8:
+	 * Restore, Step 47.
+	 *     Poll MFC_CNTL[Ss] until 11 is returned.
+	 */
+	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+			 MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+			 MFC_CNTL_SUSPEND_COMPLETE);
+}
+
+static inline int suspend_spe(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 9:
+	 *    If SPU_Status[R]=1, stop SPU execution
+	 *    and wait for stop to complete.
+	 *
+	 *    Returns       1 if SPU_Status[R]=1 on entry.
+	 *                  0 otherwise
+	 */
+	if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) {
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if ((in_be32(&prob->spu_status_R) &
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
+		    || (in_be32(&prob->spu_status_R) &
+			SPU_STATUS_ISOLATED_STATE)) {
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+			out_be32(&prob->spu_runcntl_RW, 0x2);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_WAITING_FOR_CHANNEL) {
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 10:
+	 *    If SPU_Status[R]=0 and SPU_Status[E,L,IS]=1,
+	 *    release SPU from isolate state.
+	 */
+	if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) {
+		if (in_be32(&prob->spu_status_R) &
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
+			spu_mfc_sr1_set(spu,
+					MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+			eieio();
+			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+		if ((in_be32(&prob->spu_status_R) &
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
+		    || (in_be32(&prob->spu_status_R) &
+			SPU_STATUS_ISOLATED_STATE)) {
+			spu_mfc_sr1_set(spu,
+					MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+			eieio();
+			out_be32(&prob->spu_runcntl_RW, 0x2);
+			eieio();
+			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+					SPU_STATUS_RUNNING);
+		}
+	}
+}
+
+static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 20:
+	 */
+
+	/* Reset CH 1 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 1);
+	out_be64(&priv2->spu_chnldata_RW, 0UL);
+
+	/* Reset the following CH: [0,3,4,24,25,27] */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnldata_RW, 0UL);
+		out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+		eieio();
+	}
+}
+
+static inline void reset_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[5] = { 21UL, 23UL, 28UL, 29UL, 30UL };
+	u64 ch_counts[5] = { 16UL, 1UL, 1UL, 0UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 21:
+	 *     Reset the following CH: [21, 23, 28, 29, 30]
+	 */
+	for (i = 0; i < 5; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void setup_spu_status_part1(struct spu_state *csa,
+					  struct spu *spu)
+{
+	u32 status_P = SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_I = SPU_STATUS_INVALID_INSTR;
+	u32 status_H = SPU_STATUS_STOPPED_BY_HALT;
+	u32 status_S = SPU_STATUS_SINGLE_STEP;
+	u32 status_S_I = SPU_STATUS_SINGLE_STEP | SPU_STATUS_INVALID_INSTR;
+	u32 status_S_P = SPU_STATUS_SINGLE_STEP | SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_P_H = SPU_STATUS_STOPPED_BY_HALT |SPU_STATUS_STOPPED_BY_STOP;
+	u32 status_P_I = SPU_STATUS_STOPPED_BY_STOP |SPU_STATUS_INVALID_INSTR;
+	u32 status_code;
+
+	/* Restore, Step 27:
+	 *     If the CSA.SPU_Status[I,S,H,P]=1 then add the correct
+	 *     instruction sequence to the end of the SPU based restore
+	 *     code (after the "context restored" stop and signal) to
+	 *     restore the correct SPU status.
+	 *
+	 *     NOTE: Rather than modifying the SPU executable, we
+	 *     instead add a new 'stopped_status' field to the
+	 *     LSCSA.  The SPU-side restore reads this field and
+	 *     takes the appropriate action when exiting.
+	 */
+
+	status_code =
+	    (csa->prob.spu_status_R >> SPU_STOP_STATUS_SHIFT) & 0xFFFF;
+	if ((csa->prob.spu_status_R & status_P_I) == status_P_I) {
+
+		/* SPU_Status[P,I]=1 - Illegal Instruction followed
+		 * by Stop and Signal instruction, followed by 'br -4'.
+		 *
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_I;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_P_H) == status_P_H) {
+
+		/* SPU_Status[P,H]=1 - Halt Conditional, followed
+		 * by Stop and Signal instruction, followed by
+		 * 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_H;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_S_P) == status_S_P) {
+
+		/* SPU_Status[S,P]=1 - Stop and Signal instruction
+		 * followed by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_P;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_S_I) == status_S_I) {
+
+		/* SPU_Status[S,I]=1 - Illegal instruction followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_I;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_P) == status_P) {
+
+		/* SPU_Status[P]=1 - Stop and Signal instruction
+		 * followed by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P;
+		csa->lscsa->stopped_status.slot[1] = status_code;
+
+	} else if ((csa->prob.spu_status_R & status_H) == status_H) {
+
+		/* SPU_Status[H]=1 - Halt Conditional, followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_H;
+
+	} else if ((csa->prob.spu_status_R & status_S) == status_S) {
+
+		/* SPU_Status[S]=1 - Two nop instructions.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S;
+
+	} else if ((csa->prob.spu_status_R & status_I) == status_I) {
+
+		/* SPU_Status[I]=1 - Illegal instruction followed
+		 * by 'br -4'.
+		 */
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_I;
+
+	}
+}
+
+static inline void setup_spu_status_part2(struct spu_state *csa,
+					  struct spu *spu)
+{
+	u32 mask;
+
+	/* Restore, Step 28:
+	 *     If the CSA.SPU_Status[I,S,H,P,R]=0 then
+	 *     add a 'br *' instruction to the end of
+	 *     the SPU based restore code.
+	 *
+	 *     NOTE: Rather than modifying the SPU executable, we
+	 *     instead add a new 'stopped_status' field to the
+	 *     LSCSA.  The SPU-side restore reads this field and
+	 *     takes the appropriate action when exiting.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT |
+	    SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+	if (!(csa->prob.spu_status_R & mask)) {
+		csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_R;
+	}
+}
+
+static inline void restore_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 29:
+	 *     Restore RA_GROUP_ID register and the
+	 *     RA_ENABLE reigster from the CSA.
+	 */
+	spu_resource_allocation_groupID_set(spu,
+			csa->priv1.resource_allocation_groupID_RW);
+	spu_resource_allocation_enable_set(spu,
+			csa->priv1.resource_allocation_enable_RW);
+}
+
+static inline void send_restore_code(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&spu_restore_code[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = sizeof(spu_restore_code);
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GETFS_CMD;
+
+	/* Restore, Step 37:
+	 *     Issue MFC DMA command to copy context
+	 *     restore code to local storage.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void setup_decr(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 34:
+	 *     If CSA.MFC_CNTL[Ds]=1 (decrementer was
+	 *     running) then adjust decrementer, set
+	 *     decrementer running status in LSCSA,
+	 *     and set decrementer "wrapped" status
+	 *     in LSCSA.
+	 */
+	if (csa->priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) {
+		cycles_t resume_time = get_cycles();
+		cycles_t delta_time = resume_time - csa->suspend_time;
+
+		csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
+		if (csa->lscsa->decr.slot[0] < delta_time) {
+			csa->lscsa->decr_status.slot[0] |=
+				 SPU_DECR_STATUS_WRAPPED;
+		}
+
+		csa->lscsa->decr.slot[0] -= delta_time;
+	} else {
+		csa->lscsa->decr_status.slot[0] = 0;
+	}
+}
+
+static inline void setup_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 35:
+	 *     Copy the CSA.PU_MB data into the LSCSA.
+	 */
+	csa->lscsa->ppu_mb.slot[0] = csa->prob.pu_mb_R;
+}
+
+static inline void setup_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 36:
+	 *     Copy the CSA.PUINT_MB data into the LSCSA.
+	 */
+	csa->lscsa->ppuint_mb.slot[0] = csa->priv2.puint_mb_R;
+}
+
+static inline int check_restore_status(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 complete;
+
+	/* Restore, Step 40:
+	 *     If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+	 *     context restore succeeded, otherwise context restore
+	 *     failed.
+	 */
+	complete = ((SPU_RESTORE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+		    SPU_STATUS_STOPPED_BY_STOP);
+	return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void restore_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 41:
+	 *     Restore SPU_PrivCntl from the CSA.
+	 */
+	out_be64(&priv2->spu_privcntl_RW, csa->priv2.spu_privcntl_RW);
+	eieio();
+}
+
+static inline void restore_status_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask;
+
+	/* Restore, Step 42:
+	 *     If any CSA.SPU_Status[I,S,H,P]=1, then
+	 *     restore the error or single step state.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+	if (csa->prob.spu_status_R & mask) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+	}
+}
+
+static inline void restore_status_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 mask;
+
+	/* Restore, Step 43:
+	 *     If all CSA.SPU_Status[I,S,H,P,R]=0 then write
+	 *     SPU_RunCntl[R0R1]='01', wait for SPU_Status[R]=1,
+	 *     then write '00' to SPU_RunCntl[R0R1] and wait
+	 *     for SPU_Status[R]=0.
+	 */
+	mask = SPU_STATUS_INVALID_INSTR |
+	    SPU_STATUS_SINGLE_STEP |
+	    SPU_STATUS_STOPPED_BY_HALT |
+	    SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+	if (!(csa->prob.spu_status_R & mask)) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+		POLL_WHILE_FALSE(in_be32(&prob->spu_status_R) &
+				 SPU_STATUS_RUNNING);
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+		eieio();
+		POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+				SPU_STATUS_RUNNING);
+	}
+}
+
+static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+	unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+	unsigned int ls_offset = 0x0;
+	unsigned int size = 16384;
+	unsigned int tag = 0;
+	unsigned int rclass = 0;
+	unsigned int cmd = MFC_GET_CMD;
+
+	/* Restore, Step 44:
+	 *     Issue a DMA command to restore the first
+	 *     16kb of local storage from CSA.
+	 */
+	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 47.
+	 *     Write MFC_Cntl[Sc,Sm]='1','0' to suspend
+	 *     the queue.
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+	eieio();
+}
+
+static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 49:
+	 *     Write INT_MASK_class0 with value of 0.
+	 *     Write INT_MASK_class1 with value of 0.
+	 *     Write INT_MASK_class2 with value of 0.
+	 *     Write INT_STAT_class0 with value of -1.
+	 *     Write INT_STAT_class1 with value of -1.
+	 *     Write INT_STAT_class2 with value of -1.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_mask_set(spu, 0, 0ul);
+	spu_int_mask_set(spu, 1, 0ul);
+	spu_int_mask_set(spu, 2, 0ul);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static inline void restore_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Restore, Step 50:
+	 *     If MFC_Cntl[Se]!=0 then restore
+	 *     MFC command queues.
+	 */
+	if ((csa->priv2.mfc_control_RW & MFC_CNTL_DMA_QUEUES_EMPTY_MASK) == 0) {
+		for (i = 0; i < 8; i++) {
+			out_be64(&priv2->puq[i].mfc_cq_data0_RW,
+				 csa->priv2.puq[i].mfc_cq_data0_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data1_RW,
+				 csa->priv2.puq[i].mfc_cq_data1_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data2_RW,
+				 csa->priv2.puq[i].mfc_cq_data2_RW);
+			out_be64(&priv2->puq[i].mfc_cq_data3_RW,
+				 csa->priv2.puq[i].mfc_cq_data3_RW);
+		}
+		for (i = 0; i < 16; i++) {
+			out_be64(&priv2->spuq[i].mfc_cq_data0_RW,
+				 csa->priv2.spuq[i].mfc_cq_data0_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data1_RW,
+				 csa->priv2.spuq[i].mfc_cq_data1_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data2_RW,
+				 csa->priv2.spuq[i].mfc_cq_data2_RW);
+			out_be64(&priv2->spuq[i].mfc_cq_data3_RW,
+				 csa->priv2.spuq[i].mfc_cq_data3_RW);
+		}
+	}
+	eieio();
+}
+
+static inline void restore_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 51:
+	 *     Restore the PPU_QueryMask register from CSA.
+	 */
+	out_be32(&prob->dma_querymask_RW, csa->prob.dma_querymask_RW);
+	eieio();
+}
+
+static inline void restore_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 52:
+	 *     Restore the PPU_QueryType register from CSA.
+	 */
+	out_be32(&prob->dma_querytype_RW, csa->prob.dma_querytype_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 53:
+	 *     Restore the MFC_CSR_TSQ register from CSA.
+	 */
+	out_be64(&priv2->spu_tag_status_query_RW,
+		 csa->priv2.spu_tag_status_query_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 54:
+	 *     Restore the MFC_CSR_CMD1 and MFC_CSR_CMD2
+	 *     registers from CSA.
+	 */
+	out_be64(&priv2->spu_cmd_buf1_RW, csa->priv2.spu_cmd_buf1_RW);
+	out_be64(&priv2->spu_cmd_buf2_RW, csa->priv2.spu_cmd_buf2_RW);
+	eieio();
+}
+
+static inline void restore_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 55:
+	 *     Restore the MFC_CSR_ATO register from CSA.
+	 */
+	out_be64(&priv2->spu_atomic_status_RW, csa->priv2.spu_atomic_status_RW);
+}
+
+static inline void restore_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 56:
+	 *     Restore the MFC_TCLASS_ID register from CSA.
+	 */
+	spu_mfc_tclass_id_set(spu, csa->priv1.mfc_tclass_id_RW);
+	eieio();
+}
+
+static inline void set_llr_event(struct spu_state *csa, struct spu *spu)
+{
+	u64 ch0_cnt, ch0_data;
+	u64 ch1_data;
+
+	/* Restore, Step 57:
+	 *    Set the Lock Line Reservation Lost Event by:
+	 *      1. OR CSA.SPU_Event_Status with bit 21 (Lr) set to 1.
+	 *      2. If CSA.SPU_Channel_0_Count=0 and
+	 *         CSA.SPU_Wr_Event_Mask[Lr]=1 and
+	 *         CSA.SPU_Event_Status[Lr]=0 then set
+	 *         CSA.SPU_Event_Status_Count=1.
+	 */
+	ch0_cnt = csa->spu_chnlcnt_RW[0];
+	ch0_data = csa->spu_chnldata_RW[0];
+	ch1_data = csa->spu_chnldata_RW[1];
+	csa->spu_chnldata_RW[0] |= MFC_LLR_LOST_EVENT;
+	if ((ch0_cnt == 0) && !(ch0_data & MFC_LLR_LOST_EVENT) &&
+	    (ch1_data & MFC_LLR_LOST_EVENT)) {
+		csa->spu_chnlcnt_RW[0] = 1;
+	}
+}
+
+static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 58:
+	 *     If the status of the CSA software decrementer
+	 *     "wrapped" flag is set, OR in a '1' to
+	 *     CSA.SPU_Event_Status[Tm].
+	 */
+	if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED))
+		return;
+
+	if ((csa->spu_chnlcnt_RW[0] == 0) &&
+	    (csa->spu_chnldata_RW[1] & 0x20) &&
+	    !(csa->spu_chnldata_RW[0] & 0x20))
+		csa->spu_chnlcnt_RW[0] = 1;
+
+	csa->spu_chnldata_RW[0] |= 0x20;
+}
+
+static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	int i;
+
+	/* Restore, Step 59:
+	 *	Restore the following CH: [0,3,4,24,25,27]
+	 */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[idx]);
+		out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[idx]);
+		eieio();
+	}
+}
+
+static inline void restore_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 ch_indices[3] = { 9UL, 21UL, 23UL };
+	u64 ch_counts[3] = { 1UL, 16UL, 1UL };
+	u64 idx;
+	int i;
+
+	/* Restore, Step 60:
+	 *     Restore the following CH: [9,21,23].
+	 */
+	ch_counts[0] = 1UL;
+	ch_counts[1] = csa->spu_chnlcnt_RW[21];
+	ch_counts[2] = 1UL;
+	for (i = 0; i < 3; i++) {
+		idx = ch_indices[i];
+		out_be64(&priv2->spu_chnlcntptr_RW, idx);
+		eieio();
+		out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+		eieio();
+	}
+}
+
+static inline void restore_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 61:
+	 *     Restore the SPU_LSLR register from CSA.
+	 */
+	out_be64(&priv2->spu_lslr_RW, csa->priv2.spu_lslr_RW);
+	eieio();
+}
+
+static inline void restore_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 62:
+	 *     Restore the SPU_Cfg register from CSA.
+	 */
+	out_be64(&priv2->spu_cfg_RW, csa->priv2.spu_cfg_RW);
+	eieio();
+}
+
+static inline void restore_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 63:
+	 *     Restore PM_Trace_Tag_Wait_Mask from CSA.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void restore_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 64:
+	 *     Restore SPU_NPC from CSA.
+	 */
+	out_be32(&prob->spu_npc_RW, csa->prob.spu_npc_RW);
+	eieio();
+}
+
+static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	int i;
+
+	/* Restore, Step 65:
+	 *     Restore MFC_RdSPU_MB from CSA.
+	 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+	eieio();
+	out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[29]);
+	for (i = 0; i < 4; i++) {
+		out_be64(&priv2->spu_chnldata_RW, csa->spu_mailbox_data[i]);
+	}
+	eieio();
+}
+
+static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	u32 dummy = 0;
+
+	/* Restore, Step 66:
+	 *     If CSA.MB_Stat[P]=0 (mailbox empty) then
+	 *     read from the PPU_MB register.
+	 */
+	if ((csa->prob.mb_stat_R & 0xFF) == 0) {
+		dummy = in_be32(&prob->pu_mb_R);
+		eieio();
+	}
+}
+
+static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	u64 dummy = 0UL;
+
+	/* Restore, Step 66:
+	 *     If CSA.MB_Stat[I]=0 (mailbox empty) then
+	 *     read from the PPUINT_MB register.
+	 */
+	if ((csa->prob.mb_stat_R & 0xFF0000) == 0) {
+		dummy = in_be64(&priv2->puint_mb_R);
+		eieio();
+		spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+		eieio();
+	}
+}
+
+static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 69:
+	 *     Restore the MFC_SR1 register from CSA.
+	 */
+	spu_mfc_sr1_set(spu, csa->priv1.mfc_sr1_RW);
+	eieio();
+}
+
+static inline void set_int_route(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	spu_cpu_affinity_set(spu, ctx->last_ran);
+}
+
+static inline void restore_other_spu_access(struct spu_state *csa,
+					    struct spu *spu)
+{
+	/* Restore, Step 70:
+	 *     Restore other SPU mappings to this SPU. TBD.
+	 */
+}
+
+static inline void restore_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Restore, Step 71:
+	 *     If CSA.SPU_Status[R]=1 then write
+	 *     SPU_RunCntl[R0R1]='01'.
+	 */
+	if (csa->prob.spu_status_R & SPU_STATUS_RUNNING) {
+		out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+		eieio();
+	}
+}
+
+static inline void restore_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 72:
+	 *    Restore the MFC_CNTL register for the CSA.
+	 */
+	out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW);
+	eieio();
+
+	/*
+	 * The queue is put back into the same state that was evident prior to
+	 * the context switch. The suspend flag is added to the saved state in
+	 * the csa, if the operational state was suspending or suspended. In
+	 * this case, the code that suspended the mfc is responsible for
+	 * continuing it. Note that SPE faults do not change the operational
+	 * state of the spu.
+	 */
+}
+
+static inline void enable_user_access(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 73:
+	 *     Enable user-space access (if provided) to this
+	 *     SPU by mapping the virtual pages assigned to
+	 *     the SPU memory-mapped I/O (MMIO) for problem
+	 *     state. TBD.
+	 */
+}
+
+static inline void reset_switch_active(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 74:
+	 *     Reset the "context switch active" flag.
+	 *     Not performed by this implementation.
+	 */
+}
+
+static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+	/* Restore, Step 75:
+	 *     Re-enable SPU interrupts.
+	 */
+	spin_lock_irq(&spu->register_lock);
+	spu_int_mask_set(spu, 0, csa->priv1.int_mask_class0_RW);
+	spu_int_mask_set(spu, 1, csa->priv1.int_mask_class1_RW);
+	spu_int_mask_set(spu, 2, csa->priv1.int_mask_class2_RW);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static int quiece_spu(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Combined steps 2-18 of SPU context save sequence, which
+	 * quiesce the SPU state (disable SPU execution, MFC command
+	 * queues, decrementer, SPU interrupts, etc.).
+	 *
+	 * Returns      0 on success.
+	 *              2 if failed step 2.
+	 *              6 if failed step 6.
+	 */
+
+	if (check_spu_isolate(prev, spu)) {	/* Step 2. */
+		return 2;
+	}
+	disable_interrupts(prev, spu);	        /* Step 3. */
+	set_watchdog_timer(prev, spu);	        /* Step 4. */
+	inhibit_user_access(prev, spu);	        /* Step 5. */
+	if (check_spu_isolate(prev, spu)) {	/* Step 6. */
+		return 6;
+	}
+	set_switch_pending(prev, spu);	        /* Step 7. */
+	save_mfc_cntl(prev, spu);		/* Step 8. */
+	save_spu_runcntl(prev, spu);	        /* Step 9. */
+	save_mfc_sr1(prev, spu);	        /* Step 10. */
+	save_spu_status(prev, spu);	        /* Step 11. */
+	save_mfc_stopped_status(prev, spu);     /* Step 12. */
+	halt_mfc_decr(prev, spu);	        /* Step 13. */
+	save_timebase(prev, spu);		/* Step 14. */
+	remove_other_spu_access(prev, spu);	/* Step 15. */
+	do_mfc_mssync(prev, spu);	        /* Step 16. */
+	issue_mfc_tlbie(prev, spu);	        /* Step 17. */
+	handle_pending_interrupts(prev, spu);	/* Step 18. */
+
+	return 0;
+}
+
+static void save_csa(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Combine steps 19-44 of SPU context save sequence, which
+	 * save regions of the privileged & problem state areas.
+	 */
+
+	save_mfc_queues(prev, spu);	/* Step 19. */
+	save_ppu_querymask(prev, spu);	/* Step 20. */
+	save_ppu_querytype(prev, spu);	/* Step 21. */
+	save_ppu_tagstatus(prev, spu);  /* NEW.     */
+	save_mfc_csr_tsq(prev, spu);	/* Step 22. */
+	save_mfc_csr_cmd(prev, spu);	/* Step 23. */
+	save_mfc_csr_ato(prev, spu);	/* Step 24. */
+	save_mfc_tclass_id(prev, spu);	/* Step 25. */
+	set_mfc_tclass_id(prev, spu);	/* Step 26. */
+	save_mfc_cmd(prev, spu);	/* Step 26a - moved from 44. */
+	purge_mfc_queue(prev, spu);	/* Step 27. */
+	wait_purge_complete(prev, spu);	/* Step 28. */
+	setup_mfc_sr1(prev, spu);	/* Step 30. */
+	save_spu_npc(prev, spu);	/* Step 31. */
+	save_spu_privcntl(prev, spu);	/* Step 32. */
+	reset_spu_privcntl(prev, spu);	/* Step 33. */
+	save_spu_lslr(prev, spu);	/* Step 34. */
+	reset_spu_lslr(prev, spu);	/* Step 35. */
+	save_spu_cfg(prev, spu);	/* Step 36. */
+	save_pm_trace(prev, spu);	/* Step 37. */
+	save_mfc_rag(prev, spu);	/* Step 38. */
+	save_ppu_mb_stat(prev, spu);	/* Step 39. */
+	save_ppu_mb(prev, spu);	        /* Step 40. */
+	save_ppuint_mb(prev, spu);	/* Step 41. */
+	save_ch_part1(prev, spu);	/* Step 42. */
+	save_spu_mb(prev, spu);	        /* Step 43. */
+	reset_ch(prev, spu);	        /* Step 45. */
+}
+
+static void save_lscsa(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Perform steps 46-57 of SPU context save sequence,
+	 * which save regions of the local store and register
+	 * file.
+	 */
+
+	resume_mfc_queue(prev, spu);	/* Step 46. */
+	/* Step 47. */
+	setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code));
+	set_switch_active(prev, spu);	/* Step 48. */
+	enable_interrupts(prev, spu);	/* Step 49. */
+	save_ls_16kb(prev, spu);	/* Step 50. */
+	set_spu_npc(prev, spu);	        /* Step 51. */
+	set_signot1(prev, spu);		/* Step 52. */
+	set_signot2(prev, spu);		/* Step 53. */
+	send_save_code(prev, spu);	/* Step 54. */
+	set_ppu_querymask(prev, spu);	/* Step 55. */
+	wait_tag_complete(prev, spu);	/* Step 56. */
+	wait_spu_stopped(prev, spu);	/* Step 57. */
+}
+
+static void force_spu_isolate_exit(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Stop SPE execution and wait for completion. */
+	out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+	iobarrier_rw();
+	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+	/* Restart SPE master runcntl. */
+	spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+	iobarrier_w();
+
+	/* Initiate isolate exit request and wait for completion. */
+	out_be64(&priv2->spu_privcntl_RW, 4LL);
+	iobarrier_w();
+	out_be32(&prob->spu_runcntl_RW, 2);
+	iobarrier_rw();
+	POLL_WHILE_FALSE((in_be32(&prob->spu_status_R)
+				& SPU_STATUS_STOPPED_BY_STOP));
+
+	/* Reset load request to normal. */
+	out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL);
+	iobarrier_w();
+}
+
+/**
+ * stop_spu_isolate
+ *	Check SPU run-control state and force isolated
+ *	exit function as necessary.
+ */
+static void stop_spu_isolate(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) {
+		/* The SPU is in isolated state; the only way
+		 * to get it out is to perform an isolated
+		 * exit (clean) operation.
+		 */
+		force_spu_isolate_exit(spu);
+	}
+}
+
+static void harvest(struct spu_state *prev, struct spu *spu)
+{
+	/*
+	 * Perform steps 2-25 of SPU context restore sequence,
+	 * which resets an SPU either after a failed save, or
+	 * when using SPU for first time.
+	 */
+
+	disable_interrupts(prev, spu);	        /* Step 2.  */
+	inhibit_user_access(prev, spu);	        /* Step 3.  */
+	terminate_spu_app(prev, spu);	        /* Step 4.  */
+	set_switch_pending(prev, spu);	        /* Step 5.  */
+	stop_spu_isolate(spu);			/* NEW.     */
+	remove_other_spu_access(prev, spu);	/* Step 6.  */
+	suspend_mfc_and_halt_decr(prev, spu);	/* Step 7.  */
+	wait_suspend_mfc_complete(prev, spu);	/* Step 8.  */
+	if (!suspend_spe(prev, spu))	        /* Step 9.  */
+		clear_spu_status(prev, spu);	/* Step 10. */
+	do_mfc_mssync(prev, spu);	        /* Step 11. */
+	issue_mfc_tlbie(prev, spu);	        /* Step 12. */
+	handle_pending_interrupts(prev, spu);	/* Step 13. */
+	purge_mfc_queue(prev, spu);	        /* Step 14. */
+	wait_purge_complete(prev, spu);	        /* Step 15. */
+	reset_spu_privcntl(prev, spu);	        /* Step 16. */
+	reset_spu_lslr(prev, spu);              /* Step 17. */
+	setup_mfc_sr1(prev, spu);	        /* Step 18. */
+	spu_invalidate_slbs(spu);		/* Step 19. */
+	reset_ch_part1(prev, spu);	        /* Step 20. */
+	reset_ch_part2(prev, spu);	        /* Step 21. */
+	enable_interrupts(prev, spu);	        /* Step 22. */
+	set_switch_active(prev, spu);	        /* Step 23. */
+	set_mfc_tclass_id(prev, spu);	        /* Step 24. */
+	resume_mfc_queue(prev, spu);	        /* Step 25. */
+}
+
+static void restore_lscsa(struct spu_state *next, struct spu *spu)
+{
+	/*
+	 * Perform steps 26-40 of SPU context restore sequence,
+	 * which restores regions of the local store and register
+	 * file.
+	 */
+
+	set_watchdog_timer(next, spu);	        /* Step 26. */
+	setup_spu_status_part1(next, spu);	/* Step 27. */
+	setup_spu_status_part2(next, spu);	/* Step 28. */
+	restore_mfc_rag(next, spu);	        /* Step 29. */
+	/* Step 30. */
+	setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code));
+	set_spu_npc(next, spu);	                /* Step 31. */
+	set_signot1(next, spu);	                /* Step 32. */
+	set_signot2(next, spu);	                /* Step 33. */
+	setup_decr(next, spu);	                /* Step 34. */
+	setup_ppu_mb(next, spu);	        /* Step 35. */
+	setup_ppuint_mb(next, spu);	        /* Step 36. */
+	send_restore_code(next, spu);	        /* Step 37. */
+	set_ppu_querymask(next, spu);	        /* Step 38. */
+	wait_tag_complete(next, spu);	        /* Step 39. */
+	wait_spu_stopped(next, spu);	        /* Step 40. */
+}
+
+static void restore_csa(struct spu_state *next, struct spu *spu)
+{
+	/*
+	 * Combine steps 41-76 of SPU context restore sequence, which
+	 * restore regions of the privileged & problem state areas.
+	 */
+
+	restore_spu_privcntl(next, spu);	/* Step 41. */
+	restore_status_part1(next, spu);	/* Step 42. */
+	restore_status_part2(next, spu);	/* Step 43. */
+	restore_ls_16kb(next, spu);	        /* Step 44. */
+	wait_tag_complete(next, spu);	        /* Step 45. */
+	suspend_mfc(next, spu);	                /* Step 46. */
+	wait_suspend_mfc_complete(next, spu);	/* Step 47. */
+	issue_mfc_tlbie(next, spu);	        /* Step 48. */
+	clear_interrupts(next, spu);	        /* Step 49. */
+	restore_mfc_queues(next, spu);	        /* Step 50. */
+	restore_ppu_querymask(next, spu);	/* Step 51. */
+	restore_ppu_querytype(next, spu);	/* Step 52. */
+	restore_mfc_csr_tsq(next, spu);	        /* Step 53. */
+	restore_mfc_csr_cmd(next, spu);	        /* Step 54. */
+	restore_mfc_csr_ato(next, spu);	        /* Step 55. */
+	restore_mfc_tclass_id(next, spu);	/* Step 56. */
+	set_llr_event(next, spu);	        /* Step 57. */
+	restore_decr_wrapped(next, spu);	/* Step 58. */
+	restore_ch_part1(next, spu);	        /* Step 59. */
+	restore_ch_part2(next, spu);	        /* Step 60. */
+	restore_spu_lslr(next, spu);	        /* Step 61. */
+	restore_spu_cfg(next, spu);	        /* Step 62. */
+	restore_pm_trace(next, spu);	        /* Step 63. */
+	restore_spu_npc(next, spu);	        /* Step 64. */
+	restore_spu_mb(next, spu);	        /* Step 65. */
+	check_ppu_mb_stat(next, spu);	        /* Step 66. */
+	check_ppuint_mb_stat(next, spu);	/* Step 67. */
+	spu_invalidate_slbs(spu);		/* Modified Step 68. */
+	restore_mfc_sr1(next, spu);	        /* Step 69. */
+	set_int_route(next, spu);		/* NEW      */
+	restore_other_spu_access(next, spu);	/* Step 70. */
+	restore_spu_runcntl(next, spu);	        /* Step 71. */
+	restore_mfc_cntl(next, spu);	        /* Step 72. */
+	enable_user_access(next, spu);	        /* Step 73. */
+	reset_switch_active(next, spu);	        /* Step 74. */
+	reenable_interrupts(next, spu);	        /* Step 75. */
+}
+
+static int __do_spu_save(struct spu_state *prev, struct spu *spu)
+{
+	int rc;
+
+	/*
+	 * SPU context save can be broken into three phases:
+	 *
+	 *     (a) quiesce [steps 2-16].
+	 *     (b) save of CSA, performed by PPE [steps 17-42]
+	 *     (c) save of LSCSA, mostly performed by SPU [steps 43-52].
+	 *
+	 * Returns      0 on success.
+	 *              2,6 if failed to quiece SPU
+	 *              53 if SPU-side of save failed.
+	 */
+
+	rc = quiece_spu(prev, spu);	        /* Steps 2-16. */
+	switch (rc) {
+	default:
+	case 2:
+	case 6:
+		harvest(prev, spu);
+		return rc;
+		break;
+	case 0:
+		break;
+	}
+	save_csa(prev, spu);	                /* Steps 17-43. */
+	save_lscsa(prev, spu);	                /* Steps 44-53. */
+	return check_save_status(prev, spu);	/* Step 54.     */
+}
+
+static int __do_spu_restore(struct spu_state *next, struct spu *spu)
+{
+	int rc;
+
+	/*
+	 * SPU context restore can be broken into three phases:
+	 *
+	 *    (a) harvest (or reset) SPU [steps 2-24].
+	 *    (b) restore LSCSA [steps 25-40], mostly performed by SPU.
+	 *    (c) restore CSA [steps 41-76], performed by PPE.
+	 *
+	 * The 'harvest' step is not performed here, but rather
+	 * as needed below.
+	 */
+
+	restore_lscsa(next, spu);	        /* Steps 24-39. */
+	rc = check_restore_status(next, spu);	/* Step 40.     */
+	switch (rc) {
+	default:
+		/* Failed. Return now. */
+		return rc;
+		break;
+	case 0:
+		/* Fall through to next step. */
+		break;
+	}
+	restore_csa(next, spu);
+
+	return 0;
+}
+
+/**
+ * spu_save - SPU context save, with locking.
+ * @prev: pointer to SPU context save area, to be saved.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Acquire locks, perform the save operation then return.
+ */
+int spu_save(struct spu_state *prev, struct spu *spu)
+{
+	int rc;
+
+	acquire_spu_lock(spu);	        /* Step 1.     */
+	rc = __do_spu_save(prev, spu);	/* Steps 2-53. */
+	release_spu_lock(spu);
+	if (rc != 0 && rc != 2 && rc != 6) {
+		panic("%s failed on SPU[%d], rc=%d.\n",
+		      __func__, spu->number, rc);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spu_save);
+
+/**
+ * spu_restore - SPU context restore, with harvest and locking.
+ * @new: pointer to SPU context save area, to be restored.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Perform harvest + restore, as we may not be coming
+ * from a previous successful save operation, and the
+ * hardware state is unknown.
+ */
+int spu_restore(struct spu_state *new, struct spu *spu)
+{
+	int rc;
+
+	acquire_spu_lock(spu);
+	harvest(NULL, spu);
+	spu->slb_replace = 0;
+	rc = __do_spu_restore(new, spu);
+	release_spu_lock(spu);
+	if (rc) {
+		panic("%s failed on SPU[%d] rc=%d.\n",
+		       __func__, spu->number, rc);
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(spu_restore);
+
+static void init_prob(struct spu_state *csa)
+{
+	csa->spu_chnlcnt_RW[9] = 1;
+	csa->spu_chnlcnt_RW[21] = 16;
+	csa->spu_chnlcnt_RW[23] = 1;
+	csa->spu_chnlcnt_RW[28] = 1;
+	csa->spu_chnlcnt_RW[30] = 1;
+	csa->prob.spu_runcntl_RW = SPU_RUNCNTL_STOP;
+	csa->prob.mb_stat_R = 0x000400;
+}
+
+static void init_priv1(struct spu_state *csa)
+{
+	/* Enable decode, relocate, tlbie response, master runcntl. */
+	csa->priv1.mfc_sr1_RW = MFC_STATE1_LOCAL_STORAGE_DECODE_MASK |
+	    MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+	    MFC_STATE1_PROBLEM_STATE_MASK |
+	    MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK;
+
+	/* Enable OS-specific set of interrupts. */
+	csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR |
+	    CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR |
+	    CLASS0_ENABLE_SPU_ERROR_INTR;
+	csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+	    CLASS1_ENABLE_STORAGE_FAULT_INTR;
+	csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR |
+	    CLASS2_ENABLE_SPU_HALT_INTR |
+	    CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR;
+}
+
+static void init_priv2(struct spu_state *csa)
+{
+	csa->priv2.spu_lslr_RW = LS_ADDR_MASK;
+	csa->priv2.mfc_control_RW = MFC_CNTL_RESUME_DMA_QUEUE |
+	    MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION |
+	    MFC_CNTL_DMA_QUEUES_EMPTY_MASK;
+}
+
+/**
+ * spu_alloc_csa - allocate and initialize an SPU context save area.
+ *
+ * Allocate and initialize the contents of an SPU context save area.
+ * This includes enabling address translation, interrupt masks, etc.,
+ * as appropriate for the given OS environment.
+ *
+ * Note that storage for the 'lscsa' is allocated separately,
+ * as it is by far the largest of the context save regions,
+ * and may need to be pinned or otherwise specially aligned.
+ */
+int spu_init_csa(struct spu_state *csa)
+{
+	int rc;
+
+	if (!csa)
+		return -EINVAL;
+	memset(csa, 0, sizeof(struct spu_state));
+
+	rc = spu_alloc_lscsa(csa);
+	if (rc)
+		return rc;
+
+	spin_lock_init(&csa->register_lock);
+
+	init_prob(csa);
+	init_priv1(csa);
+	init_priv2(csa);
+
+	return 0;
+}
+
+void spu_fini_csa(struct spu_state *csa)
+{
+	spu_free_lscsa(csa);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
new file mode 100644
index 000000000..157e046e6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/export.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+
+/**
+ * sys_spu_run - run code loaded into an SPU
+ *
+ * @unpc:    next program counter for the SPU
+ * @ustatus: status of the SPU
+ *
+ * This system call transfers the control of execution of a
+ * user space thread to an SPU. It will return when the
+ * SPU has finished executing or when it hits an error
+ * condition and it will be interrupted if a signal needs
+ * to be delivered to a handler in user space.
+ *
+ * The next program counter is set to the passed value
+ * before the SPU starts fetching code and the user space
+ * pointer gets updated with the new value when returning
+ * from kernel space.
+ *
+ * The status value returned from spu_run reflects the
+ * value of the spu_status register after the SPU has stopped.
+ *
+ */
+static long do_spu_run(struct file *filp,
+			__u32 __user *unpc,
+			__u32 __user *ustatus)
+{
+	long ret;
+	struct spufs_inode_info *i;
+	u32 npc, status;
+
+	ret = -EFAULT;
+	if (get_user(npc, unpc))
+		goto out;
+
+	/* check if this file was created by spu_create */
+	ret = -EINVAL;
+	if (filp->f_op != &spufs_context_fops)
+		goto out;
+
+	i = SPUFS_I(file_inode(filp));
+	ret = spufs_run_spu(i->i_ctx, &npc, &status);
+
+	if (put_user(npc, unpc))
+		ret = -EFAULT;
+
+	if (ustatus && put_user(status, ustatus))
+		ret = -EFAULT;
+out:
+	return ret;
+}
+
+static long do_spu_create(const char __user *pathname, unsigned int flags,
+		umode_t mode, struct file *neighbor)
+{
+	struct path path;
+	struct dentry *dentry;
+	int ret;
+
+	dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
+	ret = PTR_ERR(dentry);
+	if (!IS_ERR(dentry)) {
+		ret = spufs_create(&path, dentry, flags, mode, neighbor);
+		done_path_create(&path, dentry);
+	}
+
+	return ret;
+}
+
+struct spufs_calls spufs_calls = {
+	.create_thread = do_spu_create,
+	.spu_run = do_spu_run,
+	.notify_spus_active = do_notify_spus_active,
+	.owner = THIS_MODULE,
+#ifdef CONFIG_COREDUMP
+	.coredump_extra_notes_size = spufs_coredump_extra_notes_size,
+	.coredump_extra_notes_write = spufs_coredump_extra_notes_write,
+#endif
+};