65 files changed, 22046 insertions, 0 deletions
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
new file mode 100644
index 000000000..227cc7443
--- /dev/null
+++ b/drivers/misc/mic/Kconfig
@@ -0,0 +1,156 @@
+menu "Intel MIC & related support"
+
+comment "Intel MIC Bus Driver"
+
+config INTEL_MIC_BUS
+	tristate "Intel MIC Bus Driver"
+	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	help
+	  This option is selected by any driver which registers a
+	  device or driver on the MIC Bus, such as CONFIG_INTEL_MIC_HOST,
+	  CONFIG_INTEL_MIC_CARD, CONFIG_INTEL_MIC_X100_DMA etc.
+
+	  If you are building a host/card kernel with an Intel MIC device
+	  then say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "SCIF Bus Driver"
+
+config SCIF_BUS
+	tristate "SCIF Bus Driver"
+	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	help
+	  This option is selected by any driver which registers a
+	  device or driver on the SCIF Bus, such as CONFIG_INTEL_MIC_HOST
+	  and CONFIG_INTEL_MIC_CARD.
+
+	  If you are building a host/card kernel with an Intel MIC device
+	  then say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "VOP Bus Driver"
+
+config VOP_BUS
+	tristate "VOP Bus Driver"
+	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	help
+	  This option is selected by any driver which registers a
+	  device or driver on the VOP Bus, such as CONFIG_INTEL_MIC_HOST
+	  and CONFIG_INTEL_MIC_CARD.
+
+	  If you are building a host/card kernel with an Intel MIC device
+	  then say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Host Driver"
+
+config INTEL_MIC_HOST
+	tristate "Intel MIC Host Driver"
+	depends on 64BIT && PCI && X86
+	depends on INTEL_MIC_BUS && SCIF_BUS && MIC_COSM && VOP_BUS
+	help
+	  This enables Host Driver support for the Intel Many Integrated
+	  Core (MIC) family of PCIe form factor coprocessor devices that
+	  run a 64 bit Linux OS. The driver manages card OS state and
+	  enables communication between host and card. Intel MIC X100
+	  devices are currently supported.
+
+	  If you are building a host kernel with an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Card Driver"
+
+config INTEL_MIC_CARD
+	tristate "Intel MIC Card Driver"
+	depends on 64BIT && X86
+	depends on INTEL_MIC_BUS && SCIF_BUS && MIC_COSM && VOP_BUS
+	select VIRTIO
+	help
+	  This enables card driver support for the Intel Many Integrated
+	  Core (MIC) device family. The card driver communicates shutdown/
+	  crash events to the host and allows registration/configuration of
+	  virtio devices. Intel MIC X100 devices are currently supported.
+
+	  If you are building a card kernel for an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  For more information see
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "SCIF Driver"
+
+config SCIF
+	tristate "SCIF Driver"
+	depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT
+	select IOMMU_IOVA
+	help
+	  This enables SCIF Driver support for the Intel Many Integrated
+	  Core (MIC) family of PCIe form factor coprocessor devices that
+	  run a 64 bit Linux OS. The Symmetric Communication Interface
+	  (SCIF (pronounced as skiff)) is a low level communications API
+	  across PCIe currently implemented for MIC.
+
+	  If you are building a host kernel with an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Coprocessor State Management (COSM) Drivers"
+
+config MIC_COSM
+	tristate "Intel MIC Coprocessor State Management (COSM) Drivers"
+	depends on 64BIT && PCI && X86 && SCIF
+	help
+	  This enables COSM driver support for the Intel Many
+	  Integrated Core (MIC) family of PCIe form factor coprocessor
+	  devices. COSM drivers implement functions such as boot,
+	  shutdown, reset and reboot of MIC devices.
+
+	  If you are building a host kernel with an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "VOP Driver"
+
+config VOP
+	tristate "VOP Driver"
+	depends on 64BIT && PCI && X86 && VOP_BUS
+	select VHOST_RING
+	select VIRTIO
+	help
+	  This enables VOP (Virtio over PCIe) Driver support for the Intel
+	  Many Integrated Core (MIC) family of PCIe form factor coprocessor
+	  devices. The VOP driver allows virtio drivers, e.g. net, console
+	  and block drivers, on the card connect to user space virtio
+	  devices on the host.
+
+	  If you are building a host kernel with an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+if VOP
+source "drivers/vhost/Kconfig.vringh"
+endif
+
+endmenu
diff --git a/drivers/misc/mic/Makefile b/drivers/misc/mic/Makefile
new file mode 100644
index 000000000..1a43622b1
--- /dev/null
+++ b/drivers/misc/mic/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2013, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MIC_HOST) += host/
+obj-$(CONFIG_INTEL_MIC_CARD) += card/
+obj-y += bus/
+obj-$(CONFIG_SCIF) += scif/
+obj-$(CONFIG_MIC_COSM) += cosm/
+obj-$(CONFIG_MIC_COSM) += cosm_client/
+obj-$(CONFIG_VOP) += vop/
diff --git a/drivers/misc/mic/bus/Makefile b/drivers/misc/mic/bus/Makefile
new file mode 100644
index 000000000..8758a7daa
--- /dev/null
+++ b/drivers/misc/mic/bus/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2014, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o
+obj-$(CONFIG_SCIF_BUS) += scif_bus.o
+obj-$(CONFIG_MIC_COSM) += cosm_bus.o
+obj-$(CONFIG_VOP_BUS) += vop_bus.o
diff --git a/drivers/misc/mic/bus/cosm_bus.c b/drivers/misc/mic/bus/cosm_bus.c
new file mode 100644
index 000000000..d31d6c6e6
--- /dev/null
+++ b/drivers/misc/mic/bus/cosm_bus.c
@@ -0,0 +1,141 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Bus Driver
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include "cosm_bus.h"
+
+/* Unique numbering for cosm devices. */
+static DEFINE_IDA(cosm_index_ida);
+
+static int cosm_dev_probe(struct device *d)
+{
+	struct cosm_device *dev = dev_to_cosm(d);
+	struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
+
+	return drv->probe(dev);
+}
+
+static int cosm_dev_remove(struct device *d)
+{
+	struct cosm_device *dev = dev_to_cosm(d);
+	struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type cosm_bus = {
+	.name  = "cosm_bus",
+	.probe = cosm_dev_probe,
+	.remove = cosm_dev_remove,
+};
+
+int cosm_register_driver(struct cosm_driver *driver)
+{
+	driver->driver.bus = &cosm_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(cosm_register_driver);
+
+void cosm_unregister_driver(struct cosm_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(cosm_unregister_driver);
+
+static inline void cosm_release_dev(struct device *d)
+{
+	struct cosm_device *cdev = dev_to_cosm(d);
+
+	kfree(cdev);
+}
+
+struct cosm_device *
+cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops)
+{
+	struct cosm_device *cdev;
+	int ret;
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		return ERR_PTR(-ENOMEM);
+
+	cdev->dev.parent = pdev;
+	cdev->dev.release = cosm_release_dev;
+	cdev->hw_ops = hw_ops;
+	dev_set_drvdata(&cdev->dev, cdev);
+	cdev->dev.bus = &cosm_bus;
+
+	/* Assign a unique device index and hence name */
+	ret = ida_simple_get(&cosm_index_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto free_cdev;
+
+	cdev->index = ret;
+	cdev->dev.id = ret;
+	dev_set_name(&cdev->dev, "cosm-dev%u", cdev->index);
+
+	ret = device_register(&cdev->dev);
+	if (ret)
+		goto ida_remove;
+	return cdev;
+ida_remove:
+	ida_simple_remove(&cosm_index_ida, cdev->index);
+free_cdev:
+	put_device(&cdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(cosm_register_device);
+
+void cosm_unregister_device(struct cosm_device *dev)
+{
+	int index = dev->index; /* save for after device release */
+
+	device_unregister(&dev->dev);
+	ida_simple_remove(&cosm_index_ida, index);
+}
+EXPORT_SYMBOL_GPL(cosm_unregister_device);
+
+struct cosm_device *cosm_find_cdev_by_id(int id)
+{
+	struct device *dev = subsys_find_device_by_id(&cosm_bus, id, NULL);
+
+	return dev ? container_of(dev, struct cosm_device, dev) : NULL;
+}
+EXPORT_SYMBOL_GPL(cosm_find_cdev_by_id);
+
+static int __init cosm_init(void)
+{
+	return bus_register(&cosm_bus);
+}
+
+static void __exit cosm_exit(void)
+{
+	bus_unregister(&cosm_bus);
+	ida_destroy(&cosm_index_ida);
+}
+
+core_initcall(cosm_init);
+module_exit(cosm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC card OS state management bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/cosm_bus.h b/drivers/misc/mic/bus/cosm_bus.h
new file mode 100644
index 000000000..8b6341855
--- /dev/null
+++ b/drivers/misc/mic/bus/cosm_bus.h
@@ -0,0 +1,136 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Bus Driver
+ */
+#ifndef _COSM_BUS_H_
+#define _COSM_BUS_H_
+
+#include <linux/scif.h>
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+/**
+ * cosm_device - representation of a cosm device
+ *
+ * @attr_group: Pointer to list of sysfs attribute groups.
+ * @sdev: Device for sysfs entries.
+ * @state: MIC state.
+ * @prev_state: MIC state previous to MIC_RESETTING
+ * @shutdown_status: MIC status reported by card for shutdown/crashes.
+ * @shutdown_status_int: Internal shutdown status maintained by the driver
+ * @cosm_mutex: Mutex for synchronizing access to data structures.
+ * @reset_trigger_work: Work for triggering reset requests.
+ * @scif_work: Work for handling per device SCIF connections
+ * @cmdline: Kernel command line.
+ * @firmware: Firmware file name.
+ * @ramdisk: Ramdisk file name.
+ * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates.
+ * @log_buf_addr: Log buffer address for MIC.
+ * @log_buf_len: Log buffer length address for MIC.
+ * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes.
+ * @hw_ops: the hardware bus ops for this device.
+ * @dev: underlying device.
+ * @index: unique position on the cosm bus
+ * @dbg_dir: debug fs directory
+ * @newepd: new endpoint from scif accept to be assigned to this cdev
+ * @epd: SCIF endpoint for this cdev
+ * @heartbeat_watchdog_enable: if heartbeat watchdog is enabled for this cdev
+ * @sysfs_heartbeat_enable: sysfs setting for disabling heartbeat notification
+ */
+struct cosm_device {
+	const struct attribute_group **attr_group;
+	struct device *sdev;
+	u8 state;
+	u8 prev_state;
+	u8 shutdown_status;
+	u8 shutdown_status_int;
+	struct mutex cosm_mutex;
+	struct work_struct reset_trigger_work;
+	struct work_struct scif_work;
+	char *cmdline;
+	char *firmware;
+	char *ramdisk;
+	char *bootmode;
+	void *log_buf_addr;
+	int *log_buf_len;
+	struct kernfs_node *state_sysfs;
+	struct cosm_hw_ops *hw_ops;
+	struct device dev;
+	int index;
+	struct dentry *dbg_dir;
+	scif_epd_t newepd;
+	scif_epd_t epd;
+	bool heartbeat_watchdog_enable;
+	bool sysfs_heartbeat_enable;
+};
+
+/**
+ * cosm_driver - operations for a cosm driver
+ *
+ * @driver: underlying device driver (populate name and owner).
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct cosm_driver {
+	struct device_driver driver;
+	int (*probe)(struct cosm_device *dev);
+	void (*remove)(struct cosm_device *dev);
+};
+
+/**
+ * cosm_hw_ops - cosm bus ops
+ *
+ * @reset: trigger MIC reset
+ * @force_reset: force MIC reset
+ * @post_reset: inform MIC reset is complete
+ * @ready: is MIC ready for OS download
+ * @start: boot MIC
+ * @stop: prepare MIC for reset
+ * @family: return MIC HW family string
+ * @stepping: return MIC HW stepping string
+ * @aper: return MIC PCIe aperture
+ */
+struct cosm_hw_ops {
+	void (*reset)(struct cosm_device *cdev);
+	void (*force_reset)(struct cosm_device *cdev);
+	void (*post_reset)(struct cosm_device *cdev, enum mic_states state);
+	bool (*ready)(struct cosm_device *cdev);
+	int (*start)(struct cosm_device *cdev, int id);
+	void (*stop)(struct cosm_device *cdev, bool force);
+	ssize_t (*family)(struct cosm_device *cdev, char *buf);
+	ssize_t (*stepping)(struct cosm_device *cdev, char *buf);
+	struct mic_mw *(*aper)(struct cosm_device *cdev);
+};
+
+struct cosm_device *
+cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops);
+void cosm_unregister_device(struct cosm_device *dev);
+int cosm_register_driver(struct cosm_driver *drv);
+void cosm_unregister_driver(struct cosm_driver *drv);
+struct cosm_device *cosm_find_cdev_by_id(int id);
+
+static inline struct cosm_device *dev_to_cosm(struct device *dev)
+{
+	return container_of(dev, struct cosm_device, dev);
+}
+
+static inline struct cosm_driver *drv_to_cosm(struct device_driver *drv)
+{
+	return container_of(drv, struct cosm_driver, driver);
+}
+#endif /* _COSM_BUS_H */
diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c
new file mode 100644
index 000000000..77b16ca66
--- /dev/null
+++ b/drivers/misc/mic/bus/mic_bus.c
@@ -0,0 +1,204 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Bus driver.
+ *
+ * This implementation is very similar to the the virtio bus driver
+ * implementation @ drivers/virtio/virtio.c
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/mic_bus.h>
+
+static ssize_t device_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	return sprintf(buf, "0x%04x\n", dev->id.device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t vendor_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	return sprintf(buf, "0x%04x\n", dev->id.vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t modalias_show(struct device *d,
+			     struct device_attribute *attr, char *buf)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	return sprintf(buf, "mbus:d%08Xv%08X\n",
+		       dev->id.device, dev->id.vendor);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *mbus_dev_attrs[] = {
+	&dev_attr_device.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(mbus_dev);
+
+static inline int mbus_id_match(const struct mbus_device *dev,
+				const struct mbus_device_id *id)
+{
+	if (id->device != dev->id.device && id->device != MBUS_DEV_ANY_ID)
+		return 0;
+
+	return id->vendor == MBUS_DEV_ANY_ID || id->vendor == dev->id.vendor;
+}
+
+/*
+ * This looks through all the IDs a driver claims to support.  If any of them
+ * match, we return 1 and the kernel will call mbus_dev_probe().
+ */
+static int mbus_dev_match(struct device *dv, struct device_driver *dr)
+{
+	unsigned int i;
+	struct mbus_device *dev = dev_to_mbus(dv);
+	const struct mbus_device_id *ids;
+
+	ids = drv_to_mbus(dr)->id_table;
+	for (i = 0; ids[i].device; i++)
+		if (mbus_id_match(dev, &ids[i]))
+			return 1;
+	return 0;
+}
+
+static int mbus_uevent(struct device *dv, struct kobj_uevent_env *env)
+{
+	struct mbus_device *dev = dev_to_mbus(dv);
+
+	return add_uevent_var(env, "MODALIAS=mbus:d%08Xv%08X",
+			      dev->id.device, dev->id.vendor);
+}
+
+static int mbus_dev_probe(struct device *d)
+{
+	int err;
+	struct mbus_device *dev = dev_to_mbus(d);
+	struct mbus_driver *drv = drv_to_mbus(dev->dev.driver);
+
+	err = drv->probe(dev);
+	if (!err)
+		if (drv->scan)
+			drv->scan(dev);
+	return err;
+}
+
+static int mbus_dev_remove(struct device *d)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	struct mbus_driver *drv = drv_to_mbus(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type mic_bus = {
+	.name  = "mic_bus",
+	.match = mbus_dev_match,
+	.dev_groups = mbus_dev_groups,
+	.uevent = mbus_uevent,
+	.probe = mbus_dev_probe,
+	.remove = mbus_dev_remove,
+};
+
+int mbus_register_driver(struct mbus_driver *driver)
+{
+	driver->driver.bus = &mic_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(mbus_register_driver);
+
+void mbus_unregister_driver(struct mbus_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(mbus_unregister_driver);
+
+static void mbus_release_dev(struct device *d)
+{
+	struct mbus_device *mbdev = dev_to_mbus(d);
+	kfree(mbdev);
+}
+
+struct mbus_device *
+mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
+		     struct mbus_hw_ops *hw_ops, int index,
+		     void __iomem *mmio_va)
+{
+	int ret;
+	struct mbus_device *mbdev;
+
+	mbdev = kzalloc(sizeof(*mbdev), GFP_KERNEL);
+	if (!mbdev)
+		return ERR_PTR(-ENOMEM);
+
+	mbdev->mmio_va = mmio_va;
+	mbdev->dev.parent = pdev;
+	mbdev->id.device = id;
+	mbdev->id.vendor = MBUS_DEV_ANY_ID;
+	mbdev->dev.dma_ops = dma_ops;
+	mbdev->dev.dma_mask = &mbdev->dev.coherent_dma_mask;
+	dma_set_mask(&mbdev->dev, DMA_BIT_MASK(64));
+	mbdev->dev.release = mbus_release_dev;
+	mbdev->hw_ops = hw_ops;
+	mbdev->dev.bus = &mic_bus;
+	mbdev->index = index;
+	dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index);
+	/*
+	 * device_register() causes the bus infrastructure to look for a
+	 * matching driver.
+	 */
+	ret = device_register(&mbdev->dev);
+	if (ret)
+		goto free_mbdev;
+	return mbdev;
+free_mbdev:
+	put_device(&mbdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(mbus_register_device);
+
+void mbus_unregister_device(struct mbus_device *mbdev)
+{
+	device_unregister(&mbdev->dev);
+}
+EXPORT_SYMBOL_GPL(mbus_unregister_device);
+
+static int __init mbus_init(void)
+{
+	return bus_register(&mic_bus);
+}
+
+static void __exit mbus_exit(void)
+{
+	bus_unregister(&mic_bus);
+}
+
+core_initcall(mbus_init);
+module_exit(mbus_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC Bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c
new file mode 100644
index 000000000..a444db5f6
--- /dev/null
+++ b/drivers/misc/mic/bus/scif_bus.c
@@ -0,0 +1,209 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel Symmetric Communications Interface Bus driver.
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/dma-mapping.h>
+
+#include "scif_bus.h"
+
+static ssize_t device_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+
+	return sprintf(buf, "0x%04x\n", dev->id.device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t vendor_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+
+	return sprintf(buf, "0x%04x\n", dev->id.vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t modalias_show(struct device *d,
+			     struct device_attribute *attr, char *buf)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+
+	return sprintf(buf, "scif:d%08Xv%08X\n",
+		       dev->id.device, dev->id.vendor);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *scif_dev_attrs[] = {
+	&dev_attr_device.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(scif_dev);
+
+static inline int scif_id_match(const struct scif_hw_dev *dev,
+				const struct scif_hw_dev_id *id)
+{
+	if (id->device != dev->id.device && id->device != SCIF_DEV_ANY_ID)
+		return 0;
+
+	return id->vendor == SCIF_DEV_ANY_ID || id->vendor == dev->id.vendor;
+}
+
+/*
+ * This looks through all the IDs a driver claims to support.  If any of them
+ * match, we return 1 and the kernel will call scif_dev_probe().
+ */
+static int scif_dev_match(struct device *dv, struct device_driver *dr)
+{
+	unsigned int i;
+	struct scif_hw_dev *dev = dev_to_scif(dv);
+	const struct scif_hw_dev_id *ids;
+
+	ids = drv_to_scif(dr)->id_table;
+	for (i = 0; ids[i].device; i++)
+		if (scif_id_match(dev, &ids[i]))
+			return 1;
+	return 0;
+}
+
+static int scif_uevent(struct device *dv, struct kobj_uevent_env *env)
+{
+	struct scif_hw_dev *dev = dev_to_scif(dv);
+
+	return add_uevent_var(env, "MODALIAS=scif:d%08Xv%08X",
+			      dev->id.device, dev->id.vendor);
+}
+
+static int scif_dev_probe(struct device *d)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+	struct scif_driver *drv = drv_to_scif(dev->dev.driver);
+
+	return drv->probe(dev);
+}
+
+static int scif_dev_remove(struct device *d)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+	struct scif_driver *drv = drv_to_scif(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type scif_bus = {
+	.name  = "scif_bus",
+	.match = scif_dev_match,
+	.dev_groups = scif_dev_groups,
+	.uevent = scif_uevent,
+	.probe = scif_dev_probe,
+	.remove = scif_dev_remove,
+};
+
+int scif_register_driver(struct scif_driver *driver)
+{
+	driver->driver.bus = &scif_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(scif_register_driver);
+
+void scif_unregister_driver(struct scif_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(scif_unregister_driver);
+
+static void scif_release_dev(struct device *d)
+{
+	struct scif_hw_dev *sdev = dev_to_scif(d);
+
+	kfree(sdev);
+}
+
+struct scif_hw_dev *
+scif_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
+		     struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
+		     struct mic_mw *mmio, struct mic_mw *aper, void *dp,
+		     void __iomem *rdp, struct dma_chan **chan, int num_chan,
+		     bool card_rel_da)
+{
+	int ret;
+	struct scif_hw_dev *sdev;
+
+	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+	if (!sdev)
+		return ERR_PTR(-ENOMEM);
+
+	sdev->dev.parent = pdev;
+	sdev->id.device = id;
+	sdev->id.vendor = SCIF_DEV_ANY_ID;
+	sdev->dev.dma_ops = dma_ops;
+	sdev->dev.release = scif_release_dev;
+	sdev->hw_ops = hw_ops;
+	sdev->dnode = dnode;
+	sdev->snode = snode;
+	dev_set_drvdata(&sdev->dev, sdev);
+	sdev->dev.bus = &scif_bus;
+	sdev->mmio = mmio;
+	sdev->aper = aper;
+	sdev->dp = dp;
+	sdev->rdp = rdp;
+	sdev->dev.dma_mask = &sdev->dev.coherent_dma_mask;
+	dma_set_mask(&sdev->dev, DMA_BIT_MASK(64));
+	sdev->dma_ch = chan;
+	sdev->num_dma_ch = num_chan;
+	sdev->card_rel_da = card_rel_da;
+	dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode);
+	/*
+	 * device_register() causes the bus infrastructure to look for a
+	 * matching driver.
+	 */
+	ret = device_register(&sdev->dev);
+	if (ret)
+		goto free_sdev;
+	return sdev;
+free_sdev:
+	put_device(&sdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(scif_register_device);
+
+void scif_unregister_device(struct scif_hw_dev *sdev)
+{
+	device_unregister(&sdev->dev);
+}
+EXPORT_SYMBOL_GPL(scif_unregister_device);
+
+static int __init scif_init(void)
+{
+	return bus_register(&scif_bus);
+}
+
+static void __exit scif_exit(void)
+{
+	bus_unregister(&scif_bus);
+}
+
+core_initcall(scif_init);
+module_exit(scif_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) SCIF Bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h
new file mode 100644
index 000000000..ff5956821
--- /dev/null
+++ b/drivers/misc/mic/bus/scif_bus.h
@@ -0,0 +1,133 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel Symmetric Communications Interface Bus driver.
+ */
+#ifndef _SCIF_BUS_H_
+#define _SCIF_BUS_H_
+/*
+ * Everything a scif driver needs to work with any particular scif
+ * hardware abstraction layer.
+ */
+#include <linux/dma-mapping.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+struct scif_hw_dev_id {
+	u32 device;
+	u32 vendor;
+};
+
+#define MIC_SCIF_DEV 1
+#define SCIF_DEV_ANY_ID 0xffffffff
+
+/**
+ * scif_hw_dev - representation of a hardware device abstracted for scif
+ * @hw_ops: the hardware ops supported by this device
+ * @id: the device type identification (used to match it with a driver)
+ * @mmio: MMIO memory window
+ * @aper: Aperture memory window
+ * @dev: underlying device
+ * @dnode - The destination node which this device will communicate with.
+ * @snode - The source node for this device.
+ * @dp - Self device page
+ * @rdp - Remote device page
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @card_rel_da - Set to true if DMA addresses programmed in the DMA engine
+ *		are relative to the card point of view
+ */
+struct scif_hw_dev {
+	struct scif_hw_ops *hw_ops;
+	struct scif_hw_dev_id id;
+	struct mic_mw *mmio;
+	struct mic_mw *aper;
+	struct device dev;
+	u8 dnode;
+	u8 snode;
+	void *dp;
+	void __iomem *rdp;
+	struct dma_chan **dma_ch;
+	int num_dma_ch;
+	bool card_rel_da;
+};
+
+/**
+ * scif_driver - operations for a scif I/O driver
+ * @driver: underlying device driver (populate name and owner).
+ * @id_table: the ids serviced by this driver.
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct scif_driver {
+	struct device_driver driver;
+	const struct scif_hw_dev_id *id_table;
+	int (*probe)(struct scif_hw_dev *dev);
+	void (*remove)(struct scif_hw_dev *dev);
+};
+
+/**
+ * scif_hw_ops - Hardware operations for accessing a SCIF device on the SCIF bus.
+ *
+ * @next_db: Obtain the next available doorbell.
+ * @request_irq: Request an interrupt on a particular doorbell.
+ * @free_irq: Free an interrupt requested previously.
+ * @ack_interrupt: acknowledge an interrupt in the ISR.
+ * @send_intr: Send an interrupt to the remote node on a specified doorbell.
+ * @send_p2p_intr: Send an interrupt to the peer node on a specified doorbell
+ * which is specifically targeted for a peer to peer node.
+ * @ioremap: Map a buffer with the specified physical address and length.
+ * @iounmap: Unmap a buffer previously mapped.
+ */
+struct scif_hw_ops {
+	int (*next_db)(struct scif_hw_dev *sdev);
+	struct mic_irq * (*request_irq)(struct scif_hw_dev *sdev,
+					irqreturn_t (*func)(int irq,
+							    void *data),
+					const char *name, void *data,
+					int db);
+	void (*free_irq)(struct scif_hw_dev *sdev,
+			 struct mic_irq *cookie, void *data);
+	void (*ack_interrupt)(struct scif_hw_dev *sdev, int num);
+	void (*send_intr)(struct scif_hw_dev *sdev, int db);
+	void (*send_p2p_intr)(struct scif_hw_dev *sdev, int db,
+			      struct mic_mw *mw);
+	void __iomem * (*ioremap)(struct scif_hw_dev *sdev,
+				  phys_addr_t pa, size_t len);
+	void (*iounmap)(struct scif_hw_dev *sdev, void __iomem *va);
+};
+
+int scif_register_driver(struct scif_driver *driver);
+void scif_unregister_driver(struct scif_driver *driver);
+struct scif_hw_dev *
+scif_register_device(struct device *pdev, int id,
+		     const struct dma_map_ops *dma_ops,
+		     struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
+		     struct mic_mw *mmio, struct mic_mw *aper,
+		     void *dp, void __iomem *rdp,
+		     struct dma_chan **chan, int num_chan,
+		     bool card_rel_da);
+void scif_unregister_device(struct scif_hw_dev *sdev);
+
+static inline struct scif_hw_dev *dev_to_scif(struct device *dev)
+{
+	return container_of(dev, struct scif_hw_dev, dev);
+}
+
+static inline struct scif_driver *drv_to_scif(struct device_driver *drv)
+{
+	return container_of(drv, struct scif_driver, driver);
+}
+#endif /* _SCIF_BUS_H */
diff --git a/drivers/misc/mic/bus/vop_bus.c b/drivers/misc/mic/bus/vop_bus.c
new file mode 100644
index 000000000..e5bb9c749
--- /dev/null
+++ b/drivers/misc/mic/bus/vop_bus.c
@@ -0,0 +1,205 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) Bus driver.
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/dma-mapping.h>
+
+#include "vop_bus.h"
+
+static ssize_t device_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct vop_device *dev = dev_to_vop(d);
+
+	return sprintf(buf, "0x%04x\n", dev->id.device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t vendor_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct vop_device *dev = dev_to_vop(d);
+
+	return sprintf(buf, "0x%04x\n", dev->id.vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t modalias_show(struct device *d,
+			     struct device_attribute *attr, char *buf)
+{
+	struct vop_device *dev = dev_to_vop(d);
+
+	return sprintf(buf, "vop:d%08Xv%08X\n",
+		       dev->id.device, dev->id.vendor);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *vop_dev_attrs[] = {
+	&dev_attr_device.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vop_dev);
+
+static inline int vop_id_match(const struct vop_device *dev,
+			       const struct vop_device_id *id)
+{
+	if (id->device != dev->id.device && id->device != VOP_DEV_ANY_ID)
+		return 0;
+
+	return id->vendor == VOP_DEV_ANY_ID || id->vendor == dev->id.vendor;
+}
+
+/*
+ * This looks through all the IDs a driver claims to support.  If any of them
+ * match, we return 1 and the kernel will call vop_dev_probe().
+ */
+static int vop_dev_match(struct device *dv, struct device_driver *dr)
+{
+	unsigned int i;
+	struct vop_device *dev = dev_to_vop(dv);
+	const struct vop_device_id *ids;
+
+	ids = drv_to_vop(dr)->id_table;
+	for (i = 0; ids[i].device; i++)
+		if (vop_id_match(dev, &ids[i]))
+			return 1;
+	return 0;
+}
+
+static int vop_uevent(struct device *dv, struct kobj_uevent_env *env)
+{
+	struct vop_device *dev = dev_to_vop(dv);
+
+	return add_uevent_var(env, "MODALIAS=vop:d%08Xv%08X",
+			      dev->id.device, dev->id.vendor);
+}
+
+static int vop_dev_probe(struct device *d)
+{
+	struct vop_device *dev = dev_to_vop(d);
+	struct vop_driver *drv = drv_to_vop(dev->dev.driver);
+
+	return drv->probe(dev);
+}
+
+static int vop_dev_remove(struct device *d)
+{
+	struct vop_device *dev = dev_to_vop(d);
+	struct vop_driver *drv = drv_to_vop(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type vop_bus = {
+	.name  = "vop_bus",
+	.match = vop_dev_match,
+	.dev_groups = vop_dev_groups,
+	.uevent = vop_uevent,
+	.probe = vop_dev_probe,
+	.remove = vop_dev_remove,
+};
+
+int vop_register_driver(struct vop_driver *driver)
+{
+	driver->driver.bus = &vop_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(vop_register_driver);
+
+void vop_unregister_driver(struct vop_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(vop_unregister_driver);
+
+static void vop_release_dev(struct device *d)
+{
+	struct vop_device *dev = dev_to_vop(d);
+
+	kfree(dev);
+}
+
+struct vop_device *
+vop_register_device(struct device *pdev, int id,
+		    const struct dma_map_ops *dma_ops,
+		    struct vop_hw_ops *hw_ops, u8 dnode, struct mic_mw *aper,
+		    struct dma_chan *chan)
+{
+	int ret;
+	struct vop_device *vdev;
+
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev)
+		return ERR_PTR(-ENOMEM);
+
+	vdev->dev.parent = pdev;
+	vdev->id.device = id;
+	vdev->id.vendor = VOP_DEV_ANY_ID;
+	vdev->dev.dma_ops = dma_ops;
+	vdev->dev.dma_mask = &vdev->dev.coherent_dma_mask;
+	dma_set_mask(&vdev->dev, DMA_BIT_MASK(64));
+	vdev->dev.release = vop_release_dev;
+	vdev->hw_ops = hw_ops;
+	vdev->dev.bus = &vop_bus;
+	vdev->dnode = dnode;
+	vdev->aper = aper;
+	vdev->dma_ch = chan;
+	vdev->index = dnode - 1;
+	dev_set_name(&vdev->dev, "vop-dev%u", vdev->index);
+	/*
+	 * device_register() causes the bus infrastructure to look for a
+	 * matching driver.
+	 */
+	ret = device_register(&vdev->dev);
+	if (ret)
+		goto free_vdev;
+	return vdev;
+free_vdev:
+	put_device(&vdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(vop_register_device);
+
+void vop_unregister_device(struct vop_device *dev)
+{
+	device_unregister(&dev->dev);
+}
+EXPORT_SYMBOL_GPL(vop_unregister_device);
+
+static int __init vop_init(void)
+{
+	return bus_register(&vop_bus);
+}
+
+static void __exit vop_exit(void)
+{
+	bus_unregister(&vop_bus);
+}
+
+core_initcall(vop_init);
+module_exit(vop_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) VOP Bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/vop_bus.h b/drivers/misc/mic/bus/vop_bus.h
new file mode 100644
index 000000000..fff7a865d
--- /dev/null
+++ b/drivers/misc/mic/bus/vop_bus.h
@@ -0,0 +1,140 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio over PCIe Bus driver.
+ */
+#ifndef _VOP_BUS_H_
+#define _VOP_BUS_H_
+/*
+ * Everything a vop driver needs to work with any particular vop
+ * implementation.
+ */
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#include "../common/mic_dev.h"
+
+struct vop_device_id {
+	u32 device;
+	u32 vendor;
+};
+
+#define VOP_DEV_TRNSP 1
+#define VOP_DEV_ANY_ID 0xffffffff
+/*
+ * Size of the internal buffer used during DMA's as an intermediate buffer
+ * for copy to/from user. Must be an integral number of pages.
+ */
+#define VOP_INT_DMA_BUF_SIZE PAGE_ALIGN(64 * 1024ULL)
+
+/**
+ * vop_device - representation of a device using vop
+ * @hw_ops: the hardware ops supported by this device.
+ * @id: the device type identification (used to match it with a driver).
+ * @dev: underlying device.
+ * @dnode - The destination node which this device will communicate with.
+ * @aper: Aperture memory window
+ * @dma_ch - DMA channel
+ * @index: unique position on the vop bus
+ */
+struct vop_device {
+	struct vop_hw_ops *hw_ops;
+	struct vop_device_id id;
+	struct device dev;
+	u8 dnode;
+	struct mic_mw *aper;
+	struct dma_chan *dma_ch;
+	int index;
+};
+
+/**
+ * vop_driver - operations for a vop I/O driver
+ * @driver: underlying device driver (populate name and owner).
+ * @id_table: the ids serviced by this driver.
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct vop_driver {
+	struct device_driver driver;
+	const struct vop_device_id *id_table;
+	int (*probe)(struct vop_device *dev);
+	void (*remove)(struct vop_device *dev);
+};
+
+/**
+ * vop_hw_ops - Hardware operations for accessing a VOP device on the VOP bus.
+ *
+ * @next_db: Obtain the next available doorbell.
+ * @request_irq: Request an interrupt on a particular doorbell.
+ * @free_irq: Free an interrupt requested previously.
+ * @ack_interrupt: acknowledge an interrupt in the ISR.
+ * @get_remote_dp: Get access to the virtio device page used by the remote
+ *                 node to add/remove/configure virtio devices.
+ * @get_dp: Get access to the virtio device page used by the self
+ *          node to add/remove/configure virtio devices.
+ * @send_intr: Send an interrupt to the peer node on a specified doorbell.
+ * @ioremap: Map a buffer with the specified DMA address and length.
+ * @iounmap: Unmap a buffer previously mapped.
+ * @dma_filter: The DMA filter function to use for obtaining access to
+ *		a DMA channel on the peer node.
+ */
+struct vop_hw_ops {
+	int (*next_db)(struct vop_device *vpdev);
+	struct mic_irq *(*request_irq)(struct vop_device *vpdev,
+				       irqreturn_t (*func)(int irq, void *data),
+				       const char *name, void *data,
+				       int intr_src);
+	void (*free_irq)(struct vop_device *vpdev,
+			 struct mic_irq *cookie, void *data);
+	void (*ack_interrupt)(struct vop_device *vpdev, int num);
+	void __iomem * (*get_remote_dp)(struct vop_device *vpdev);
+	void * (*get_dp)(struct vop_device *vpdev);
+	void (*send_intr)(struct vop_device *vpdev, int db);
+	void __iomem * (*ioremap)(struct vop_device *vpdev,
+				  dma_addr_t pa, size_t len);
+	void (*iounmap)(struct vop_device *vpdev, void __iomem *va);
+};
+
+struct vop_device *
+vop_register_device(struct device *pdev, int id,
+		    const struct dma_map_ops *dma_ops,
+		    struct vop_hw_ops *hw_ops, u8 dnode, struct mic_mw *aper,
+		    struct dma_chan *chan);
+void vop_unregister_device(struct vop_device *dev);
+int vop_register_driver(struct vop_driver *drv);
+void vop_unregister_driver(struct vop_driver *drv);
+
+/*
+ * module_vop_driver() - Helper macro for drivers that don't do
+ * anything special in module init/exit.  This eliminates a lot of
+ * boilerplate.  Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit()
+ */
+#define module_vop_driver(__vop_driver) \
+	module_driver(__vop_driver, vop_register_driver, \
+			vop_unregister_driver)
+
+static inline struct vop_device *dev_to_vop(struct device *dev)
+{
+	return container_of(dev, struct vop_device, dev);
+}
+
+static inline struct vop_driver *drv_to_vop(struct device_driver *drv)
+{
+	return container_of(drv, struct vop_driver, driver);
+}
+#endif /* _VOP_BUS_H */
diff --git a/drivers/misc/mic/card/Makefile b/drivers/misc/mic/card/Makefile
new file mode 100644
index 000000000..921a7e7e0
--- /dev/null
+++ b/drivers/misc/mic/card/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2013, Intel Corporation.
+#
+ccflags-y += -DINTEL_MIC_CARD
+
+obj-$(CONFIG_INTEL_MIC_CARD) += mic_card.o
+mic_card-y += mic_x100.o
+mic_card-y += mic_device.o
+mic_card-y += mic_debugfs.o
diff --git a/drivers/misc/mic/card/mic_debugfs.c b/drivers/misc/mic/card/mic_debugfs.c
new file mode 100644
index 000000000..421b3d791
--- /dev/null
+++ b/drivers/misc/mic/card/mic_debugfs.c
@@ -0,0 +1,130 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+
+/* Debugfs parent dir */
+static struct dentry *mic_dbg;
+
+/**
+ * mic_intr_test - Send interrupts to host.
+ */
+static int mic_intr_test(struct seq_file *s, void *unused)
+{
+	struct mic_driver *mdrv = s->private;
+	struct mic_device *mdev = &mdrv->mdev;
+
+	mic_send_intr(mdev, 0);
+	msleep(1000);
+	mic_send_intr(mdev, 1);
+	msleep(1000);
+	mic_send_intr(mdev, 2);
+	msleep(1000);
+	mic_send_intr(mdev, 3);
+	msleep(1000);
+
+	return 0;
+}
+
+static int mic_intr_test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_intr_test, inode->i_private);
+}
+
+static int mic_intr_test_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations intr_test_ops = {
+	.owner   = THIS_MODULE,
+	.open    = mic_intr_test_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = mic_intr_test_release
+};
+
+/**
+ * mic_create_card_debug_dir - Initialize MIC debugfs entries.
+ */
+void __init mic_create_card_debug_dir(struct mic_driver *mdrv)
+{
+	struct dentry *d;
+
+	if (!mic_dbg)
+		return;
+
+	mdrv->dbg_dir = debugfs_create_dir(mdrv->name, mic_dbg);
+	if (!mdrv->dbg_dir) {
+		dev_err(mdrv->dev, "Cant create dbg_dir %s\n", mdrv->name);
+		return;
+	}
+
+	d = debugfs_create_file("intr_test", 0444, mdrv->dbg_dir,
+		mdrv, &intr_test_ops);
+
+	if (!d) {
+		dev_err(mdrv->dev,
+			"Cant create dbg intr_test %s\n", mdrv->name);
+		return;
+	}
+}
+
+/**
+ * mic_delete_card_debug_dir - Uninitialize MIC debugfs entries.
+ */
+void mic_delete_card_debug_dir(struct mic_driver *mdrv)
+{
+	if (!mdrv->dbg_dir)
+		return;
+
+	debugfs_remove_recursive(mdrv->dbg_dir);
+}
+
+/**
+ * mic_init_card_debugfs - Initialize global debugfs entry.
+ */
+void __init mic_init_card_debugfs(void)
+{
+	mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!mic_dbg)
+		pr_err("can't create debugfs dir\n");
+}
+
+/**
+ * mic_exit_card_debugfs - Uninitialize global debugfs entry
+ */
+void mic_exit_card_debugfs(void)
+{
+	debugfs_remove(mic_dbg);
+}
diff --git a/drivers/misc/mic/card/mic_device.c b/drivers/misc/mic/card/mic_device.c
new file mode 100644
index 000000000..e749af48f
--- /dev/null
+++ b/drivers/misc/mic/card/mic_device.c
@@ -0,0 +1,429 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/dmaengine.h>
+#include <linux/kmod.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+
+static struct mic_driver *g_drv;
+
+static int __init mic_dp_init(void)
+{
+	struct mic_driver *mdrv = g_drv;
+	struct mic_device *mdev = &mdrv->mdev;
+	struct mic_bootparam __iomem *bootparam;
+	u64 lo, hi, dp_dma_addr;
+	u32 magic;
+
+	lo = mic_read_spad(&mdrv->mdev, MIC_DPLO_SPAD);
+	hi = mic_read_spad(&mdrv->mdev, MIC_DPHI_SPAD);
+
+	dp_dma_addr = lo | (hi << 32);
+	mdrv->dp = mic_card_map(mdev, dp_dma_addr, MIC_DP_SIZE);
+	if (!mdrv->dp) {
+		dev_err(mdrv->dev, "Cannot remap Aperture BAR\n");
+		return -ENOMEM;
+	}
+	bootparam = mdrv->dp;
+	magic = ioread32(&bootparam->magic);
+	if (MIC_MAGIC != magic) {
+		dev_err(mdrv->dev, "bootparam magic mismatch 0x%x\n", magic);
+		return -EIO;
+	}
+	return 0;
+}
+
+/* Uninitialize the device page */
+static void mic_dp_uninit(void)
+{
+	mic_card_unmap(&g_drv->mdev, g_drv->dp);
+}
+
+/**
+ * mic_request_card_irq - request an irq.
+ *
+ * @handler: interrupt handler passed to request_threaded_irq.
+ * @thread_fn: thread fn. passed to request_threaded_irq.
+ * @name: The ASCII name of the callee requesting the irq.
+ * @data: private data that is returned back when calling the
+ * function handler.
+ * @index: The doorbell index of the requester.
+ *
+ * returns: The cookie that is transparent to the caller. Passed
+ * back when calling mic_free_irq. An appropriate error code
+ * is returned on failure. Caller needs to use IS_ERR(return_val)
+ * to check for failure and PTR_ERR(return_val) to obtained the
+ * error code.
+ *
+ */
+struct mic_irq *
+mic_request_card_irq(irq_handler_t handler,
+		     irq_handler_t thread_fn, const char *name,
+		     void *data, int index)
+{
+	int rc = 0;
+	unsigned long cookie;
+	struct mic_driver *mdrv = g_drv;
+
+	rc  = request_threaded_irq(mic_db_to_irq(mdrv, index), handler,
+				   thread_fn, 0, name, data);
+	if (rc) {
+		dev_err(mdrv->dev, "request_threaded_irq failed rc = %d\n", rc);
+		goto err;
+	}
+	mdrv->irq_info.irq_usage_count[index]++;
+	cookie = index;
+	return (struct mic_irq *)cookie;
+err:
+	return ERR_PTR(rc);
+}
+
+/**
+ * mic_free_card_irq - free irq.
+ *
+ * @cookie: cookie obtained during a successful call to mic_request_threaded_irq
+ * @data: private data specified by the calling function during the
+ * mic_request_threaded_irq
+ *
+ * returns: none.
+ */
+void mic_free_card_irq(struct mic_irq *cookie, void *data)
+{
+	int index;
+	struct mic_driver *mdrv = g_drv;
+
+	index = (unsigned long)cookie & 0xFFFFU;
+	free_irq(mic_db_to_irq(mdrv, index), data);
+	mdrv->irq_info.irq_usage_count[index]--;
+}
+
+/**
+ * mic_next_card_db - Get the doorbell with minimum usage count.
+ *
+ * Returns the irq index.
+ */
+int mic_next_card_db(void)
+{
+	int i;
+	int index = 0;
+	struct mic_driver *mdrv = g_drv;
+
+	for (i = 0; i < mdrv->intr_info.num_intr; i++) {
+		if (mdrv->irq_info.irq_usage_count[i] <
+			mdrv->irq_info.irq_usage_count[index])
+			index = i;
+	}
+
+	return index;
+}
+
+/**
+ * mic_init_irq - Initialize irq information.
+ *
+ * Returns 0 in success. Appropriate error code on failure.
+ */
+static int mic_init_irq(void)
+{
+	struct mic_driver *mdrv = g_drv;
+
+	mdrv->irq_info.irq_usage_count = kzalloc((sizeof(u32) *
+			mdrv->intr_info.num_intr),
+			GFP_KERNEL);
+	if (!mdrv->irq_info.irq_usage_count)
+		return -ENOMEM;
+	return 0;
+}
+
+/**
+ * mic_uninit_irq - Uninitialize irq information.
+ *
+ * None.
+ */
+static void mic_uninit_irq(void)
+{
+	struct mic_driver *mdrv = g_drv;
+
+	kfree(mdrv->irq_info.irq_usage_count);
+}
+
+static inline struct mic_driver *scdev_to_mdrv(struct scif_hw_dev *scdev)
+{
+	return dev_get_drvdata(scdev->dev.parent);
+}
+
+static struct mic_irq *
+___mic_request_irq(struct scif_hw_dev *scdev,
+		   irqreturn_t (*func)(int irq, void *data),
+				       const char *name, void *data,
+				       int db)
+{
+	return mic_request_card_irq(func, NULL, name, data, db);
+}
+
+static void
+___mic_free_irq(struct scif_hw_dev *scdev,
+		struct mic_irq *cookie, void *data)
+{
+	return mic_free_card_irq(cookie, data);
+}
+
+static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	mic_ack_interrupt(&mdrv->mdev);
+}
+
+static int ___mic_next_db(struct scif_hw_dev *scdev)
+{
+	return mic_next_card_db();
+}
+
+static void ___mic_send_intr(struct scif_hw_dev *scdev, int db)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	mic_send_intr(&mdrv->mdev, db);
+}
+
+static void ___mic_send_p2p_intr(struct scif_hw_dev *scdev, int db,
+				 struct mic_mw *mw)
+{
+	mic_send_p2p_intr(db, mw);
+}
+
+static void __iomem *
+___mic_ioremap(struct scif_hw_dev *scdev,
+	       phys_addr_t pa, size_t len)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	return mic_card_map(&mdrv->mdev, pa, len);
+}
+
+static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	mic_card_unmap(&mdrv->mdev, va);
+}
+
+static struct scif_hw_ops scif_hw_ops = {
+	.request_irq = ___mic_request_irq,
+	.free_irq = ___mic_free_irq,
+	.ack_interrupt = ___mic_ack_interrupt,
+	.next_db = ___mic_next_db,
+	.send_intr = ___mic_send_intr,
+	.send_p2p_intr = ___mic_send_p2p_intr,
+	.ioremap = ___mic_ioremap,
+	.iounmap = ___mic_iounmap,
+};
+
+static inline struct mic_driver *vpdev_to_mdrv(struct vop_device *vpdev)
+{
+	return dev_get_drvdata(vpdev->dev.parent);
+}
+
+static struct mic_irq *
+__mic_request_irq(struct vop_device *vpdev,
+		  irqreturn_t (*func)(int irq, void *data),
+		   const char *name, void *data, int intr_src)
+{
+	return mic_request_card_irq(func, NULL, name, data, intr_src);
+}
+
+static void __mic_free_irq(struct vop_device *vpdev,
+			   struct mic_irq *cookie, void *data)
+{
+	return mic_free_card_irq(cookie, data);
+}
+
+static void __mic_ack_interrupt(struct vop_device *vpdev, int num)
+{
+	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
+
+	mic_ack_interrupt(&mdrv->mdev);
+}
+
+static int __mic_next_db(struct vop_device *vpdev)
+{
+	return mic_next_card_db();
+}
+
+static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev)
+{
+	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
+
+	return mdrv->dp;
+}
+
+static void __mic_send_intr(struct vop_device *vpdev, int db)
+{
+	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
+
+	mic_send_intr(&mdrv->mdev, db);
+}
+
+static void __iomem *__mic_ioremap(struct vop_device *vpdev,
+				   dma_addr_t pa, size_t len)
+{
+	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
+
+	return mic_card_map(&mdrv->mdev, pa, len);
+}
+
+static void __mic_iounmap(struct vop_device *vpdev, void __iomem *va)
+{
+	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
+
+	mic_card_unmap(&mdrv->mdev, va);
+}
+
+static struct vop_hw_ops vop_hw_ops = {
+	.request_irq = __mic_request_irq,
+	.free_irq = __mic_free_irq,
+	.ack_interrupt = __mic_ack_interrupt,
+	.next_db = __mic_next_db,
+	.get_remote_dp = __mic_get_remote_dp,
+	.send_intr = __mic_send_intr,
+	.ioremap = __mic_ioremap,
+	.iounmap = __mic_iounmap,
+};
+
+static int mic_request_dma_chans(struct mic_driver *mdrv)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+
+	do {
+		chan = dma_request_channel(mask, NULL, NULL);
+		if (chan) {
+			mdrv->dma_ch[mdrv->num_dma_ch++] = chan;
+			if (mdrv->num_dma_ch >= MIC_MAX_DMA_CHAN)
+				break;
+		}
+	} while (chan);
+	dev_info(mdrv->dev, "DMA channels # %d\n", mdrv->num_dma_ch);
+	return mdrv->num_dma_ch;
+}
+
+static void mic_free_dma_chans(struct mic_driver *mdrv)
+{
+	int i = 0;
+
+	for (i = 0; i < mdrv->num_dma_ch; i++) {
+		dma_release_channel(mdrv->dma_ch[i]);
+		mdrv->dma_ch[i] = NULL;
+	}
+	mdrv->num_dma_ch = 0;
+}
+
+/*
+ * mic_driver_init - MIC driver initialization tasks.
+ *
+ * Returns 0 in success. Appropriate error code on failure.
+ */
+int __init mic_driver_init(struct mic_driver *mdrv)
+{
+	int rc;
+	struct mic_bootparam __iomem *bootparam;
+	u8 node_id;
+
+	g_drv = mdrv;
+	/* Unloading the card module is not supported. */
+	if (!try_module_get(mdrv->dev->driver->owner)) {
+		rc = -ENODEV;
+		goto done;
+	}
+	rc = mic_dp_init();
+	if (rc)
+		goto put;
+	rc = mic_init_irq();
+	if (rc)
+		goto dp_uninit;
+	if (!mic_request_dma_chans(mdrv)) {
+		rc = -ENODEV;
+		goto irq_uninit;
+	}
+	mdrv->vpdev = vop_register_device(mdrv->dev, VOP_DEV_TRNSP,
+					  NULL, &vop_hw_ops, 0,
+					  NULL, mdrv->dma_ch[0]);
+	if (IS_ERR(mdrv->vpdev)) {
+		rc = PTR_ERR(mdrv->vpdev);
+		goto dma_free;
+	}
+	bootparam = mdrv->dp;
+	node_id = ioread8(&bootparam->node_id);
+	mdrv->scdev = scif_register_device(mdrv->dev, MIC_SCIF_DEV,
+					   NULL, &scif_hw_ops,
+					   0, node_id, &mdrv->mdev.mmio, NULL,
+					   NULL, mdrv->dp, mdrv->dma_ch,
+					   mdrv->num_dma_ch, true);
+	if (IS_ERR(mdrv->scdev)) {
+		rc = PTR_ERR(mdrv->scdev);
+		goto vop_remove;
+	}
+	mic_create_card_debug_dir(mdrv);
+done:
+	return rc;
+vop_remove:
+	vop_unregister_device(mdrv->vpdev);
+dma_free:
+	mic_free_dma_chans(mdrv);
+irq_uninit:
+	mic_uninit_irq();
+dp_uninit:
+	mic_dp_uninit();
+put:
+	module_put(mdrv->dev->driver->owner);
+	return rc;
+}
+
+/*
+ * mic_driver_uninit - MIC driver uninitialization tasks.
+ *
+ * Returns None
+ */
+void mic_driver_uninit(struct mic_driver *mdrv)
+{
+	mic_delete_card_debug_dir(mdrv);
+	scif_unregister_device(mdrv->scdev);
+	vop_unregister_device(mdrv->vpdev);
+	mic_free_dma_chans(mdrv);
+	mic_uninit_irq();
+	mic_dp_uninit();
+	module_put(mdrv->dev->driver->owner);
+}
diff --git a/drivers/misc/mic/card/mic_device.h b/drivers/misc/mic/card/mic_device.h
new file mode 100644
index 000000000..333dbed97
--- /dev/null
+++ b/drivers/misc/mic/card/mic_device.h
@@ -0,0 +1,149 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#ifndef _MIC_CARD_DEVICE_H_
+#define _MIC_CARD_DEVICE_H_
+
+#include <linux/workqueue.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
+#include "../bus/vop_bus.h"
+
+/**
+ * struct mic_intr_info - Contains h/w specific interrupt sources info
+ *
+ * @num_intr: The number of irqs available
+ */
+struct mic_intr_info {
+	u32 num_intr;
+};
+
+/**
+ * struct mic_irq_info - OS specific irq information
+ *
+ * @irq_usage_count: usage count array tracking the number of sources
+ * assigned for each irq.
+ */
+struct mic_irq_info {
+	int *irq_usage_count;
+};
+
+/**
+ * struct mic_device -  MIC device information.
+ *
+ * @mmio: MMIO bar information.
+ */
+struct mic_device {
+	struct mic_mw mmio;
+};
+
+/**
+ * struct mic_driver - MIC card driver information.
+ *
+ * @name: Name for MIC driver.
+ * @dbg_dir: debugfs directory of this MIC device.
+ * @dev: The device backing this MIC.
+ * @dp: The pointer to the virtio device page.
+ * @mdev: MIC device information for the host.
+ * @hotplug_work: Hot plug work for adding/removing virtio devices.
+ * @irq_info: The OS specific irq information
+ * @intr_info: H/W specific interrupt information.
+ * @dma_mbdev: dma device on the MIC virtual bus.
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @scdev: SCIF device on the SCIF virtual bus.
+ * @vpdev: Virtio over PCIe device on the VOP virtual bus.
+ */
+struct mic_driver {
+	char name[20];
+	struct dentry *dbg_dir;
+	struct device *dev;
+	void __iomem *dp;
+	struct mic_device mdev;
+	struct work_struct hotplug_work;
+	struct mic_irq_info irq_info;
+	struct mic_intr_info intr_info;
+	struct mbus_device *dma_mbdev;
+	struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
+	int num_dma_ch;
+	struct scif_hw_dev *scdev;
+	struct vop_device *vpdev;
+};
+
+/**
+ * struct mic_irq - opaque pointer used as cookie
+ */
+struct mic_irq;
+
+/**
+ * mic_mmio_read - read from an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @offset: register offset.
+ *
+ * RETURNS: register value.
+ */
+static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset)
+{
+	return ioread32(mw->va + offset);
+}
+
+/**
+ * mic_mmio_write - write to an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @val: the data value to put into the register
+ * @offset: register offset.
+ *
+ * RETURNS: none.
+ */
+static inline void
+mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
+{
+	iowrite32(val, mw->va + offset);
+}
+
+int mic_driver_init(struct mic_driver *mdrv);
+void mic_driver_uninit(struct mic_driver *mdrv);
+int mic_next_card_db(void);
+struct mic_irq *
+mic_request_card_irq(irq_handler_t handler, irq_handler_t thread_fn,
+		     const char *name, void *data, int db);
+void mic_free_card_irq(struct mic_irq *cookie, void *data);
+u32 mic_read_spad(struct mic_device *mdev, unsigned int idx);
+void mic_send_intr(struct mic_device *mdev, int doorbell);
+void mic_send_p2p_intr(int doorbell, struct mic_mw *mw);
+int mic_db_to_irq(struct mic_driver *mdrv, int db);
+u32 mic_ack_interrupt(struct mic_device *mdev);
+void mic_hw_intr_init(struct mic_driver *mdrv);
+void __iomem *
+mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size);
+void mic_card_unmap(struct mic_device *mdev, void __iomem *addr);
+void __init mic_create_card_debug_dir(struct mic_driver *mdrv);
+void mic_delete_card_debug_dir(struct mic_driver *mdrv);
+void __init mic_init_card_debugfs(void);
+void mic_exit_card_debugfs(void);
+#endif
diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c
new file mode 100644
index 000000000..4007adc66
--- /dev/null
+++ b/drivers/misc/mic/card/mic_x100.c
@@ -0,0 +1,359 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_x100.h"
+
+static const char mic_driver_name[] = "mic";
+
+static struct mic_driver g_drv;
+
+/**
+ * mic_read_spad - read from the scratchpad register
+ * @mdev: pointer to mic_device instance
+ * @idx: index to scratchpad register, 0 based
+ *
+ * This function allows reading of the 32bit scratchpad register.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+u32 mic_read_spad(struct mic_device *mdev, unsigned int idx)
+{
+	return mic_mmio_read(&mdev->mmio,
+		MIC_X100_SBOX_BASE_ADDRESS +
+		MIC_X100_SBOX_SPAD0 + idx * 4);
+}
+
+/**
+ * __mic_send_intr - Send interrupt to Host.
+ * @mdev: pointer to mic_device instance
+ * @doorbell: Doorbell number.
+ */
+void mic_send_intr(struct mic_device *mdev, int doorbell)
+{
+	struct mic_mw *mw = &mdev->mmio;
+
+	if (doorbell > MIC_X100_MAX_DOORBELL_IDX)
+		return;
+	/* Ensure that the interrupt is ordered w.r.t previous stores. */
+	wmb();
+	mic_mmio_write(mw, MIC_X100_SBOX_SDBIC0_DBREQ_BIT,
+		       MIC_X100_SBOX_BASE_ADDRESS +
+		       (MIC_X100_SBOX_SDBIC0 + (4 * doorbell)));
+}
+
+/*
+ * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC.
+ */
+static void mic_x100_send_sbox_intr(struct mic_mw *mw, int doorbell)
+{
+	u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
+	u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS +
+					apic_icr_offset);
+
+	/* for MIC we need to make sure we "hit" the send_icr bit (13) */
+	apicicr_low = (apicicr_low | (1 << 13));
+	/*
+	 * Ensure that the interrupt is ordered w.r.t. previous stores
+	 * to main memory. Fence instructions are not implemented in X100
+	 * since execution is in order but a compiler barrier is still
+	 * required.
+	 */
+	wmb();
+	mic_mmio_write(mw, apicicr_low,
+		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
+}
+
+static void mic_x100_send_rdmasr_intr(struct mic_mw *mw, int doorbell)
+{
+	int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
+	/*
+	 * Ensure that the interrupt is ordered w.r.t. previous stores
+	 * to main memory. Fence instructions are not implemented in X100
+	 * since execution is in order but a compiler barrier is still
+	 * required.
+	 */
+	wmb();
+	mic_mmio_write(mw, 0, MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset);
+}
+
+/**
+ * mic_ack_interrupt - Device specific interrupt handling.
+ * @mdev: pointer to mic_device instance
+ *
+ * Returns: bitmask of doorbell events triggered.
+ */
+u32 mic_ack_interrupt(struct mic_device *mdev)
+{
+	return 0;
+}
+
+static inline int mic_get_sbox_irq(int db)
+{
+	return MIC_X100_IRQ_BASE + db;
+}
+
+static inline int mic_get_rdmasr_irq(int index)
+{
+	return  MIC_X100_RDMASR_IRQ_BASE + index;
+}
+
+void mic_send_p2p_intr(int db, struct mic_mw *mw)
+{
+	int rdmasr_index;
+
+	if (db < MIC_X100_NUM_SBOX_IRQ) {
+		mic_x100_send_sbox_intr(mw, db);
+	} else {
+		rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ;
+		mic_x100_send_rdmasr_intr(mw, rdmasr_index);
+	}
+}
+
+/**
+ * mic_hw_intr_init - Initialize h/w specific interrupt
+ * information.
+ * @mdrv: pointer to mic_driver
+ */
+void mic_hw_intr_init(struct mic_driver *mdrv)
+{
+	mdrv->intr_info.num_intr = MIC_X100_NUM_SBOX_IRQ +
+				MIC_X100_NUM_RDMASR_IRQ;
+}
+
+/**
+ * mic_db_to_irq - Retrieve irq number corresponding to a doorbell.
+ * @mdrv: pointer to mic_driver
+ * @db: The doorbell obtained for which the irq is needed. Doorbell
+ * may correspond to an sbox doorbell or an rdmasr index.
+ *
+ * Returns the irq corresponding to the doorbell.
+ */
+int mic_db_to_irq(struct mic_driver *mdrv, int db)
+{
+	int rdmasr_index;
+
+	/*
+	 * The total number of doorbell interrupts on the card are 16. Indices
+	 * 0-8 falls in the SBOX category and 8-15 fall in the RDMASR category.
+	 */
+	if (db < MIC_X100_NUM_SBOX_IRQ) {
+		return mic_get_sbox_irq(db);
+	} else {
+		rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ;
+		return mic_get_rdmasr_irq(rdmasr_index);
+	}
+}
+
+/*
+ * mic_card_map - Allocate virtual address for a remote memory region.
+ * @mdev: pointer to mic_device instance.
+ * @addr: Remote DMA address.
+ * @size: Size of the region.
+ *
+ * Returns: Virtual address backing the remote memory region.
+ */
+void __iomem *
+mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size)
+{
+	return ioremap(addr, size);
+}
+
+/*
+ * mic_card_unmap - Unmap the virtual address for a remote memory region.
+ * @mdev: pointer to mic_device instance.
+ * @addr: Virtual address for remote memory region.
+ *
+ * Returns: None.
+ */
+void mic_card_unmap(struct mic_device *mdev, void __iomem *addr)
+{
+	iounmap(addr);
+}
+
+static inline struct mic_driver *mbdev_to_mdrv(struct mbus_device *mbdev)
+{
+	return dev_get_drvdata(mbdev->dev.parent);
+}
+
+static struct mic_irq *
+_mic_request_threaded_irq(struct mbus_device *mbdev,
+			  irq_handler_t handler, irq_handler_t thread_fn,
+			  const char *name, void *data, int intr_src)
+{
+	int rc = 0;
+	unsigned int irq = intr_src;
+	unsigned long cookie = irq;
+
+	rc  = request_threaded_irq(irq, handler, thread_fn, 0, name, data);
+	if (rc) {
+		dev_err(mbdev_to_mdrv(mbdev)->dev,
+			"request_threaded_irq failed rc = %d\n", rc);
+		return ERR_PTR(rc);
+	}
+	return (struct mic_irq *)cookie;
+}
+
+static void _mic_free_irq(struct mbus_device *mbdev,
+			  struct mic_irq *cookie, void *data)
+{
+	unsigned long irq = (unsigned long)cookie;
+	free_irq(irq, data);
+}
+
+static void _mic_ack_interrupt(struct mbus_device *mbdev, int num)
+{
+	mic_ack_interrupt(&mbdev_to_mdrv(mbdev)->mdev);
+}
+
+static struct mbus_hw_ops mbus_hw_ops = {
+	.request_threaded_irq = _mic_request_threaded_irq,
+	.free_irq = _mic_free_irq,
+	.ack_interrupt = _mic_ack_interrupt,
+};
+
+static int __init mic_probe(struct platform_device *pdev)
+{
+	struct mic_driver *mdrv = &g_drv;
+	struct mic_device *mdev = &mdrv->mdev;
+	int rc = 0;
+
+	mdrv->dev = &pdev->dev;
+	snprintf(mdrv->name, sizeof(mic_driver_name), mic_driver_name);
+
+	/* FIXME: use dma_set_mask_and_coherent() and check result */
+	dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+
+	mdev->mmio.pa = MIC_X100_MMIO_BASE;
+	mdev->mmio.len = MIC_X100_MMIO_LEN;
+	mdev->mmio.va = devm_ioremap(&pdev->dev, MIC_X100_MMIO_BASE,
+				     MIC_X100_MMIO_LEN);
+	if (!mdev->mmio.va) {
+		dev_err(&pdev->dev, "Cannot remap MMIO BAR\n");
+		rc = -EIO;
+		goto done;
+	}
+	mic_hw_intr_init(mdrv);
+	platform_set_drvdata(pdev, mdrv);
+	mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC,
+					       NULL, &mbus_hw_ops, 0,
+					       mdrv->mdev.mmio.va);
+	if (IS_ERR(mdrv->dma_mbdev)) {
+		rc = PTR_ERR(mdrv->dma_mbdev);
+		dev_err(&pdev->dev, "mbus_add_device failed rc %d\n", rc);
+		goto done;
+	}
+	rc = mic_driver_init(mdrv);
+	if (rc) {
+		dev_err(&pdev->dev, "mic_driver_init failed rc %d\n", rc);
+		goto remove_dma;
+	}
+done:
+	return rc;
+remove_dma:
+	mbus_unregister_device(mdrv->dma_mbdev);
+	return rc;
+}
+
+static int mic_remove(struct platform_device *pdev)
+{
+	struct mic_driver *mdrv = &g_drv;
+
+	mic_driver_uninit(mdrv);
+	mbus_unregister_device(mdrv->dma_mbdev);
+	return 0;
+}
+
+static void mic_platform_shutdown(struct platform_device *pdev)
+{
+	mic_remove(pdev);
+}
+
+static struct platform_driver __refdata mic_platform_driver = {
+	.probe = mic_probe,
+	.remove = mic_remove,
+	.shutdown = mic_platform_shutdown,
+	.driver         = {
+		.name   = mic_driver_name,
+	},
+};
+
+static struct platform_device *mic_platform_dev;
+
+static int __init mic_init(void)
+{
+	int ret;
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	if (!(c->x86 == 11 && c->x86_model == 1)) {
+		ret = -ENODEV;
+		pr_err("%s not running on X100 ret %d\n", __func__, ret);
+		goto done;
+	}
+
+	request_module("mic_x100_dma");
+	mic_init_card_debugfs();
+
+	mic_platform_dev = platform_device_register_simple(mic_driver_name,
+							   0, NULL, 0);
+	ret = PTR_ERR_OR_ZERO(mic_platform_dev);
+	if (ret) {
+		pr_err("platform_device_register_full ret %d\n", ret);
+		goto cleanup_debugfs;
+	}
+	ret = platform_driver_register(&mic_platform_driver);
+	if (ret) {
+		pr_err("platform_driver_register ret %d\n", ret);
+		goto device_unregister;
+	}
+	return ret;
+
+device_unregister:
+	platform_device_unregister(mic_platform_dev);
+cleanup_debugfs:
+	mic_exit_card_debugfs();
+done:
+	return ret;
+}
+
+static void __exit mic_exit(void)
+{
+	platform_driver_unregister(&mic_platform_driver);
+	platform_device_unregister(mic_platform_dev);
+	mic_exit_card_debugfs();
+}
+
+module_init(mic_init);
+module_exit(mic_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC X100 Card driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/card/mic_x100.h b/drivers/misc/mic/card/mic_x100.h
new file mode 100644
index 000000000..7e2224934
--- /dev/null
+++ b/drivers/misc/mic/card/mic_x100.h
@@ -0,0 +1,49 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#ifndef _MIC_X100_CARD_H_
+#define _MIC_X100_CARD_H_
+
+#define MIC_X100_MMIO_BASE 0x08007C0000ULL
+#define MIC_X100_MMIO_LEN 0x00020000ULL
+#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000ULL
+
+#define MIC_X100_SBOX_SPAD0 0x0000AB20
+#define MIC_X100_SBOX_SDBIC0 0x0000CC90
+#define MIC_X100_SBOX_SDBIC0_DBREQ_BIT 0x80000000
+#define MIC_X100_SBOX_RDMASR0	0x0000B180
+#define MIC_X100_SBOX_APICICR0 0x0000A9D0
+
+#define MIC_X100_MAX_DOORBELL_IDX 8
+
+#define MIC_X100_NUM_SBOX_IRQ 8
+#define MIC_X100_NUM_RDMASR_IRQ 8
+#define MIC_X100_SBOX_IRQ_BASE 0
+#define MIC_X100_RDMASR_IRQ_BASE 17
+
+#define MIC_X100_IRQ_BASE 26
+
+#endif
diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h
new file mode 100644
index 000000000..50776772e
--- /dev/null
+++ b/drivers/misc/mic/common/mic_dev.h
@@ -0,0 +1,67 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC driver.
+ *
+ */
+#ifndef __MIC_DEV_H__
+#define __MIC_DEV_H__
+
+/* The maximum number of MIC devices supported in a single host system. */
+#define MIC_MAX_NUM_DEVS 128
+
+/**
+ * enum mic_hw_family - The hardware family to which a device belongs.
+ */
+enum mic_hw_family {
+	MIC_FAMILY_X100 = 0,
+	MIC_FAMILY_X200,
+	MIC_FAMILY_UNKNOWN,
+	MIC_FAMILY_LAST
+};
+
+/**
+ * struct mic_mw - MIC memory window
+ *
+ * @pa: Base physical address.
+ * @va: Base ioremap'd virtual address.
+ * @len: Size of the memory window.
+ */
+struct mic_mw {
+	phys_addr_t pa;
+	void __iomem *va;
+	resource_size_t len;
+};
+
+/*
+ * Scratch pad register offsets used by the host to communicate
+ * device page DMA address to the card.
+ */
+#define MIC_DPLO_SPAD 14
+#define MIC_DPHI_SPAD 15
+
+/*
+ * These values are supposed to be in the config_change field of the
+ * device page when the host sends a config change interrupt to the card.
+ */
+#define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1
+#define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2
+
+/* Maximum number of DMA channels */
+#define MIC_MAX_DMA_CHAN 4
+
+#endif
diff --git a/drivers/misc/mic/cosm/Makefile b/drivers/misc/mic/cosm/Makefile
new file mode 100644
index 000000000..97d74cb12
--- /dev/null
+++ b/drivers/misc/mic/cosm/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - Intel MIC Coprocessor State Management (COSM) Driver
+# Copyright(c) 2015, Intel Corporation.
+#
+obj-$(CONFIG_MIC_COSM) += mic_cosm.o
+
+mic_cosm-objs := cosm_main.o
+mic_cosm-objs += cosm_debugfs.o
+mic_cosm-objs += cosm_sysfs.o
+mic_cosm-objs += cosm_scif_server.o
diff --git a/drivers/misc/mic/cosm/cosm_debugfs.c b/drivers/misc/mic/cosm/cosm_debugfs.c
new file mode 100644
index 000000000..216cb3cd2
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_debugfs.c
@@ -0,0 +1,156 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include "cosm_main.h"
+
+/* Debugfs parent dir */
+static struct dentry *cosm_dbg;
+
+/**
+ * cosm_log_buf_show - Display MIC kernel log buffer
+ *
+ * log_buf addr/len is read from System.map by user space
+ * and populated in sysfs entries.
+ */
+static int cosm_log_buf_show(struct seq_file *s, void *unused)
+{
+	void __iomem *log_buf_va;
+	int __iomem *log_buf_len_va;
+	struct cosm_device *cdev = s->private;
+	void *kva;
+	int size;
+	u64 aper_offset;
+
+	if (!cdev || !cdev->log_buf_addr || !cdev->log_buf_len)
+		goto done;
+
+	mutex_lock(&cdev->cosm_mutex);
+	switch (cdev->state) {
+	case MIC_BOOTING:
+	case MIC_ONLINE:
+	case MIC_SHUTTING_DOWN:
+		break;
+	default:
+		goto unlock;
+	}
+
+	/*
+	 * Card kernel will never be relocated and any kernel text/data mapping
+	 * can be translated to phys address by subtracting __START_KERNEL_map.
+	 */
+	aper_offset = (u64)cdev->log_buf_len - __START_KERNEL_map;
+	log_buf_len_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
+	aper_offset = (u64)cdev->log_buf_addr - __START_KERNEL_map;
+	log_buf_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
+
+	size = ioread32(log_buf_len_va);
+	kva = kmalloc(size, GFP_KERNEL);
+	if (!kva)
+		goto unlock;
+
+	memcpy_fromio(kva, log_buf_va, size);
+	seq_write(s, kva, size);
+	kfree(kva);
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+done:
+	return 0;
+}
+
+static int cosm_log_buf_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cosm_log_buf_show, inode->i_private);
+}
+
+static const struct file_operations log_buf_ops = {
+	.owner   = THIS_MODULE,
+	.open    = cosm_log_buf_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+/**
+ * cosm_force_reset_show - Force MIC reset
+ *
+ * Invokes the force_reset COSM bus op instead of the standard reset
+ * op in case a force reset of the MIC device is required
+ */
+static int cosm_force_reset_show(struct seq_file *s, void *pos)
+{
+	struct cosm_device *cdev = s->private;
+
+	cosm_stop(cdev, true);
+	return 0;
+}
+
+static int cosm_force_reset_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cosm_force_reset_show, inode->i_private);
+}
+
+static const struct file_operations force_reset_ops = {
+	.owner   = THIS_MODULE,
+	.open    = cosm_force_reset_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+void cosm_create_debug_dir(struct cosm_device *cdev)
+{
+	char name[16];
+
+	if (!cosm_dbg)
+		return;
+
+	scnprintf(name, sizeof(name), "mic%d", cdev->index);
+	cdev->dbg_dir = debugfs_create_dir(name, cosm_dbg);
+	if (!cdev->dbg_dir)
+		return;
+
+	debugfs_create_file("log_buf", 0444, cdev->dbg_dir, cdev, &log_buf_ops);
+	debugfs_create_file("force_reset", 0444, cdev->dbg_dir, cdev,
+			    &force_reset_ops);
+}
+
+void cosm_delete_debug_dir(struct cosm_device *cdev)
+{
+	if (!cdev->dbg_dir)
+		return;
+
+	debugfs_remove_recursive(cdev->dbg_dir);
+}
+
+void cosm_init_debugfs(void)
+{
+	cosm_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!cosm_dbg)
+		pr_err("can't create debugfs dir\n");
+}
+
+void cosm_exit_debugfs(void)
+{
+	debugfs_remove(cosm_dbg);
+}
diff --git a/drivers/misc/mic/cosm/cosm_main.c b/drivers/misc/mic/cosm/cosm_main.c
new file mode 100644
index 000000000..7005cb1e0
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_main.c
@@ -0,0 +1,393 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include "cosm_main.h"
+
+static const char cosm_driver_name[] = "mic";
+
+/* COSM ID allocator */
+static struct ida g_cosm_ida;
+/* Class of MIC devices for sysfs accessibility. */
+static struct class *g_cosm_class;
+/* Number of MIC devices */
+static atomic_t g_num_dev;
+
+/**
+ * cosm_hw_reset - Issue a HW reset for the MIC device
+ * @cdev: pointer to cosm_device instance
+ */
+static void cosm_hw_reset(struct cosm_device *cdev, bool force)
+{
+	int i;
+
+#define MIC_RESET_TO (45)
+	if (force && cdev->hw_ops->force_reset)
+		cdev->hw_ops->force_reset(cdev);
+	else
+		cdev->hw_ops->reset(cdev);
+
+	for (i = 0; i < MIC_RESET_TO; i++) {
+		if (cdev->hw_ops->ready(cdev)) {
+			cosm_set_state(cdev, MIC_READY);
+			return;
+		}
+		/*
+		 * Resets typically take 10s of seconds to complete.
+		 * Since an MMIO read is required to check if the
+		 * firmware is ready or not, a 1 second delay works nicely.
+		 */
+		msleep(1000);
+	}
+	cosm_set_state(cdev, MIC_RESET_FAILED);
+}
+
+/**
+ * cosm_start - Start the MIC
+ * @cdev: pointer to cosm_device instance
+ *
+ * This function prepares an MIC for boot and initiates boot.
+ * RETURNS: An appropriate -ERRNO error value on error, or 0 for success.
+ */
+int cosm_start(struct cosm_device *cdev)
+{
+	const struct cred *orig_cred;
+	struct cred *override_cred;
+	int rc;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (!cdev->bootmode) {
+		dev_err(&cdev->dev, "%s %d bootmode not set\n",
+			__func__, __LINE__);
+		rc = -EINVAL;
+		goto unlock_ret;
+	}
+retry:
+	if (cdev->state != MIC_READY) {
+		dev_err(&cdev->dev, "%s %d MIC state not READY\n",
+			__func__, __LINE__);
+		rc = -EINVAL;
+		goto unlock_ret;
+	}
+	if (!cdev->hw_ops->ready(cdev)) {
+		cosm_hw_reset(cdev, false);
+		/*
+		 * The state will either be MIC_READY if the reset succeeded
+		 * or MIC_RESET_FAILED if the firmware reset failed.
+		 */
+		goto retry;
+	}
+
+	/*
+	 * Set credentials to root to allow non-root user to download initramsfs
+	 * with 600 permissions
+	 */
+	override_cred = prepare_creds();
+	if (!override_cred) {
+		dev_err(&cdev->dev, "%s %d prepare_creds failed\n",
+			__func__, __LINE__);
+		rc = -ENOMEM;
+		goto unlock_ret;
+	}
+	override_cred->fsuid = GLOBAL_ROOT_UID;
+	orig_cred = override_creds(override_cred);
+
+	rc = cdev->hw_ops->start(cdev, cdev->index);
+
+	revert_creds(orig_cred);
+	put_cred(override_cred);
+	if (rc)
+		goto unlock_ret;
+
+	/*
+	 * If linux is being booted, card is treated 'online' only
+	 * when the scif interface in the card is up. If anything else
+	 * is booted, we set card to 'online' immediately.
+	 */
+	if (!strcmp(cdev->bootmode, "linux"))
+		cosm_set_state(cdev, MIC_BOOTING);
+	else
+		cosm_set_state(cdev, MIC_ONLINE);
+unlock_ret:
+	mutex_unlock(&cdev->cosm_mutex);
+	if (rc)
+		dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc);
+	return rc;
+}
+
+/**
+ * cosm_stop - Prepare the MIC for reset and trigger reset
+ * @cdev: pointer to cosm_device instance
+ * @force: force a MIC to reset even if it is already reset and ready.
+ *
+ * RETURNS: None
+ */
+void cosm_stop(struct cosm_device *cdev, bool force)
+{
+	mutex_lock(&cdev->cosm_mutex);
+	if (cdev->state != MIC_READY || force) {
+		/*
+		 * Don't call hw_ops if they have been called previously.
+		 * stop(..) calls device_unregister and will crash the system if
+		 * called multiple times.
+		 */
+		u8 state = cdev->state == MIC_RESETTING ?
+					cdev->prev_state : cdev->state;
+		bool call_hw_ops = state != MIC_RESET_FAILED &&
+					state != MIC_READY;
+
+		if (cdev->state != MIC_RESETTING)
+			cosm_set_state(cdev, MIC_RESETTING);
+		cdev->heartbeat_watchdog_enable = false;
+		if (call_hw_ops)
+			cdev->hw_ops->stop(cdev, force);
+		cosm_hw_reset(cdev, force);
+		cosm_set_shutdown_status(cdev, MIC_NOP);
+		if (call_hw_ops && cdev->hw_ops->post_reset)
+			cdev->hw_ops->post_reset(cdev, cdev->state);
+	}
+	mutex_unlock(&cdev->cosm_mutex);
+	flush_work(&cdev->scif_work);
+}
+
+/**
+ * cosm_reset_trigger_work - Trigger MIC reset
+ * @work: The work structure
+ *
+ * This work is scheduled whenever the host wants to reset the MIC.
+ */
+static void cosm_reset_trigger_work(struct work_struct *work)
+{
+	struct cosm_device *cdev = container_of(work, struct cosm_device,
+						reset_trigger_work);
+	cosm_stop(cdev, false);
+}
+
+/**
+ * cosm_reset - Schedule MIC reset
+ * @cdev: pointer to cosm_device instance
+ *
+ * RETURNS: An -EINVAL if the card is already READY or 0 for success.
+ */
+int cosm_reset(struct cosm_device *cdev)
+{
+	int rc = 0;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (cdev->state != MIC_READY) {
+		if (cdev->state != MIC_RESETTING) {
+			cdev->prev_state = cdev->state;
+			cosm_set_state(cdev, MIC_RESETTING);
+			schedule_work(&cdev->reset_trigger_work);
+		}
+	} else {
+		dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__);
+		rc = -EINVAL;
+	}
+	mutex_unlock(&cdev->cosm_mutex);
+	return rc;
+}
+
+/**
+ * cosm_shutdown - Initiate MIC shutdown.
+ * @cdev: pointer to cosm_device instance
+ *
+ * RETURNS: None
+ */
+int cosm_shutdown(struct cosm_device *cdev)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN };
+	int rc = 0;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (cdev->state != MIC_ONLINE) {
+		rc = -EINVAL;
+		dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n",
+			__func__, __LINE__, cosm_state_string[cdev->state]);
+		goto err;
+	}
+
+	if (!cdev->epd) {
+		rc = -ENOTCONN;
+		dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n",
+			__func__, __LINE__, rc);
+		goto err;
+	}
+
+	rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+	if (rc < 0) {
+		dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
+			__func__, __LINE__, rc);
+		goto err;
+	}
+	cdev->heartbeat_watchdog_enable = false;
+	cosm_set_state(cdev, MIC_SHUTTING_DOWN);
+	rc = 0;
+err:
+	mutex_unlock(&cdev->cosm_mutex);
+	return rc;
+}
+
+static int cosm_driver_probe(struct cosm_device *cdev)
+{
+	int rc;
+
+	/* Initialize SCIF server at first probe */
+	if (atomic_add_return(1, &g_num_dev) == 1) {
+		rc = cosm_scif_init();
+		if (rc)
+			goto scif_exit;
+	}
+	mutex_init(&cdev->cosm_mutex);
+	INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work);
+	INIT_WORK(&cdev->scif_work, cosm_scif_work);
+	cdev->sysfs_heartbeat_enable = true;
+	cosm_sysfs_init(cdev);
+	cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent,
+			       MKDEV(0, cdev->index), cdev, cdev->attr_group,
+			       "mic%d", cdev->index);
+	if (IS_ERR(cdev->sdev)) {
+		rc = PTR_ERR(cdev->sdev);
+		dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n",
+			rc);
+		goto scif_exit;
+	}
+
+	cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd,
+		"state");
+	if (!cdev->state_sysfs) {
+		rc = -ENODEV;
+		dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
+		goto destroy_device;
+	}
+	cosm_create_debug_dir(cdev);
+	return 0;
+destroy_device:
+	device_destroy(g_cosm_class, MKDEV(0, cdev->index));
+scif_exit:
+	if (atomic_dec_and_test(&g_num_dev))
+		cosm_scif_exit();
+	return rc;
+}
+
+static void cosm_driver_remove(struct cosm_device *cdev)
+{
+	cosm_delete_debug_dir(cdev);
+	sysfs_put(cdev->state_sysfs);
+	device_destroy(g_cosm_class, MKDEV(0, cdev->index));
+	flush_work(&cdev->reset_trigger_work);
+	cosm_stop(cdev, false);
+	if (atomic_dec_and_test(&g_num_dev))
+		cosm_scif_exit();
+
+	/* These sysfs entries might have allocated */
+	kfree(cdev->cmdline);
+	kfree(cdev->firmware);
+	kfree(cdev->ramdisk);
+	kfree(cdev->bootmode);
+}
+
+static int cosm_suspend(struct device *dev)
+{
+	struct cosm_device *cdev = dev_to_cosm(dev);
+
+	mutex_lock(&cdev->cosm_mutex);
+	switch (cdev->state) {
+	/**
+	 * Suspend/freeze hooks in userspace have already shutdown the card.
+	 * Card should be 'ready' in most cases. It is however possible that
+	 * some userspace application initiated a boot. In those cases, we
+	 * simply reset the card.
+	 */
+	case MIC_ONLINE:
+	case MIC_BOOTING:
+	case MIC_SHUTTING_DOWN:
+		mutex_unlock(&cdev->cosm_mutex);
+		cosm_stop(cdev, false);
+		break;
+	default:
+		mutex_unlock(&cdev->cosm_mutex);
+		break;
+	}
+	return 0;
+}
+
+static const struct dev_pm_ops cosm_pm_ops = {
+	.suspend = cosm_suspend,
+	.freeze = cosm_suspend
+};
+
+static struct cosm_driver cosm_driver = {
+	.driver = {
+		.name =  KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+		.pm = &cosm_pm_ops,
+	},
+	.probe = cosm_driver_probe,
+	.remove = cosm_driver_remove
+};
+
+static int __init cosm_init(void)
+{
+	int ret;
+
+	cosm_init_debugfs();
+
+	g_cosm_class = class_create(THIS_MODULE, cosm_driver_name);
+	if (IS_ERR(g_cosm_class)) {
+		ret = PTR_ERR(g_cosm_class);
+		pr_err("class_create failed ret %d\n", ret);
+		goto cleanup_debugfs;
+	}
+
+	ida_init(&g_cosm_ida);
+	ret = cosm_register_driver(&cosm_driver);
+	if (ret) {
+		pr_err("cosm_register_driver failed ret %d\n", ret);
+		goto ida_destroy;
+	}
+	return 0;
+ida_destroy:
+	ida_destroy(&g_cosm_ida);
+	class_destroy(g_cosm_class);
+cleanup_debugfs:
+	cosm_exit_debugfs();
+	return ret;
+}
+
+static void __exit cosm_exit(void)
+{
+	cosm_unregister_driver(&cosm_driver);
+	ida_destroy(&g_cosm_ida);
+	class_destroy(g_cosm_class);
+	cosm_exit_debugfs();
+}
+
+module_init(cosm_init);
+module_exit(cosm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/cosm/cosm_main.h b/drivers/misc/mic/cosm/cosm_main.h
new file mode 100644
index 000000000..aa78cdf25
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_main.h
@@ -0,0 +1,73 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#ifndef _COSM_COSM_H_
+#define _COSM_COSM_H_
+
+#include <linux/scif.h>
+#include "../bus/cosm_bus.h"
+
+#define COSM_HEARTBEAT_SEND_SEC 30
+#define SCIF_COSM_LISTEN_PORT  201
+
+/**
+ * enum COSM msg id's
+ * @COSM_MSG_SHUTDOWN: host->card trigger shutdown
+ * @COSM_MSG_SYNC_TIME: host->card send host time to card to sync time
+ * @COSM_MSG_HEARTBEAT: card->host heartbeat
+ * @COSM_MSG_SHUTDOWN_STATUS: card->host with shutdown status as payload
+ */
+enum cosm_msg_id {
+	COSM_MSG_SHUTDOWN,
+	COSM_MSG_SYNC_TIME,
+	COSM_MSG_HEARTBEAT,
+	COSM_MSG_SHUTDOWN_STATUS,
+};
+
+struct cosm_msg {
+	u64 id;
+	union {
+		u64 shutdown_status;
+		struct {
+			u64 tv_sec;
+			u64 tv_nsec;
+		} timespec;
+	};
+};
+
+extern const char * const cosm_state_string[];
+extern const char * const cosm_shutdown_status_string[];
+
+void cosm_sysfs_init(struct cosm_device *cdev);
+int cosm_start(struct cosm_device *cdev);
+void cosm_stop(struct cosm_device *cdev, bool force);
+int cosm_reset(struct cosm_device *cdev);
+int cosm_shutdown(struct cosm_device *cdev);
+void cosm_set_state(struct cosm_device *cdev, u8 state);
+void cosm_set_shutdown_status(struct cosm_device *cdev, u8 status);
+void cosm_init_debugfs(void);
+void cosm_exit_debugfs(void);
+void cosm_create_debug_dir(struct cosm_device *cdev);
+void cosm_delete_debug_dir(struct cosm_device *cdev);
+int cosm_scif_init(void);
+void cosm_scif_exit(void);
+void cosm_scif_work(struct work_struct *work);
+
+#endif
diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c
new file mode 100644
index 000000000..e94b7eac4
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_scif_server.c
@@ -0,0 +1,411 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+
+#include "cosm_main.h"
+
+/*
+ * The COSM driver uses SCIF to communicate between the management node and the
+ * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b)
+ * receive a shutdown status back from the card upon completion of shutdown and
+ * (c) receive periodic heartbeat messages from the card used to deduce if the
+ * card has crashed.
+ *
+ * A COSM server consisting of a SCIF listening endpoint waits for incoming
+ * connections from the card. Upon acceptance of the connection, a separate
+ * work-item is scheduled to handle SCIF message processing for that card. The
+ * life-time of this work-item is therefore the time from which the connection
+ * from a card is accepted to the time at which the connection is closed. A new
+ * work-item starts each time the card boots and is alive till the card (a)
+ * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is
+ * unloaded.
+ *
+ * From the point of view of COSM interactions with SCIF during card
+ * shutdown, reset and crash are as follows:
+ *
+ * Card shutdown
+ * -------------
+ * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN
+ *    message from the host.
+ * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting
+ *    in scif_remove(..) getting called on the card
+ * 3. scif_remove -> scif_stop -> scif_handle_remove_node ->
+ *    scif_peer_unregister_device -> device_unregister for the host peer device
+ * 4. During device_unregister remove(..) method of cosm_client is invoked which
+ *    closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT
+ *    message being sent to host SCIF. SCIF_DISCNCT message processing on the
+ *    host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes
+ *    up the host COSM thread blocked in scif_poll(..) resulting in
+ *    scif_poll(..)  returning EPOLLHUP.
+ * 5. On the card, scif_peer_release_dev is next called which results in an
+ *    SCIF_EXIT message being sent to the host and after receiving the
+ *    SCIF_EXIT_ACK from the host the peer device teardown on the card is
+ *    complete.
+ * 6. As part of the SCIF_EXIT message processing on the host, host sends a
+ *    SCIF_REMOVE_NODE to itself corresponding to the card being removed. This
+ *    starts a similar SCIF peer device teardown sequence on the host
+ *    corresponding to the card being shut down.
+ *
+ * Card reset
+ * ----------
+ * The case of interest here is when the card has not been previously shut down
+ * since most of the steps below are skipped in that case:
+
+ * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver
+ *    which unregisters the SCIF HW device resulting in scif_remove(..) being
+ *    called on the host.
+ * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a
+ *    SCIF_EXIT message being sent to the card.
+ * 3. The card executes scif_stop() as part of SCIF_EXIT message
+ *    processing. This results in the COSM endpoint on the card being closed and
+ *    the SCIF host peer device on the card getting unregistered similar to
+ *    steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the
+ *    host returns EPOLLHUP as a result.
+ * 4. On the host, card peer device unregister and SCIF HW remove(..) also
+ *    subsequently complete.
+ *
+ * Card crash
+ * ----------
+ * If a reset is issued after the card has crashed, there is no SCIF_DISCNT
+ * message from the card which would result in scif_poll(..) returning
+ * EPOLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE
+ * message to itself resulting in the card SCIF peer device being unregistered,
+ * this results in a scif_peer_release_dev -> scif_cleanup_scifdev->
+ * scif_invalidate_ep call sequence which sets the endpoint state to
+ * DISCONNECTED and results in scif_poll(..) returning EPOLLHUP.
+ */
+
+#define COSM_SCIF_BACKLOG 16
+#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10
+#define COSM_HEARTBEAT_TIMEOUT_SEC \
+		(COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC)
+#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC)
+
+static struct task_struct *server_thread;
+static scif_epd_t listen_epd;
+
+/* Publish MIC card's shutdown status to user space MIC daemon */
+static void cosm_update_mic_status(struct cosm_device *cdev)
+{
+	if (cdev->shutdown_status_int != MIC_NOP) {
+		cosm_set_shutdown_status(cdev, cdev->shutdown_status_int);
+		cdev->shutdown_status_int = MIC_NOP;
+	}
+}
+
+/* Store MIC card's shutdown status internally when it is received */
+static void cosm_shutdown_status_int(struct cosm_device *cdev,
+				     enum mic_status shutdown_status)
+{
+	switch (shutdown_status) {
+	case MIC_HALTED:
+	case MIC_POWER_OFF:
+	case MIC_RESTART:
+	case MIC_CRASHED:
+		break;
+	default:
+		dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n",
+			__func__, __LINE__, shutdown_status);
+		return;
+	};
+	cdev->shutdown_status_int = shutdown_status;
+	cdev->heartbeat_watchdog_enable = false;
+
+	if (cdev->state != MIC_SHUTTING_DOWN)
+		cosm_set_state(cdev, MIC_SHUTTING_DOWN);
+}
+
+/* Non-blocking recv. Read and process all available messages */
+static void cosm_scif_recv(struct cosm_device *cdev)
+{
+	struct cosm_msg msg;
+	int rc;
+
+	while (1) {
+		rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0);
+		if (!rc) {
+			break;
+		} else if (rc < 0) {
+			dev_dbg(&cdev->dev, "%s: %d rc %d\n",
+				__func__, __LINE__, rc);
+			break;
+		}
+		dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n",
+			__func__, __LINE__, rc, msg.id);
+
+		switch (msg.id) {
+		case COSM_MSG_SHUTDOWN_STATUS:
+			cosm_shutdown_status_int(cdev, msg.shutdown_status);
+			break;
+		case COSM_MSG_HEARTBEAT:
+			/* Nothing to do, heartbeat only unblocks scif_poll */
+			break;
+		default:
+			dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n",
+				__func__, __LINE__, msg.id);
+			break;
+		}
+	}
+}
+
+/* Publish crashed status for this MIC card */
+static void cosm_set_crashed(struct cosm_device *cdev)
+{
+	dev_err(&cdev->dev, "node alive timeout\n");
+	cosm_shutdown_status_int(cdev, MIC_CRASHED);
+	cosm_update_mic_status(cdev);
+}
+
+/* Send host time to the MIC card to sync system time between host and MIC */
+static void cosm_send_time(struct cosm_device *cdev)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME };
+	struct timespec64 ts;
+	int rc;
+
+	ktime_get_real_ts64(&ts);
+	msg.timespec.tv_sec = ts.tv_sec;
+	msg.timespec.tv_nsec = ts.tv_nsec;
+
+	rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+	if (rc < 0)
+		dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
+			__func__, __LINE__, rc);
+}
+
+/*
+ * Close this cosm_device's endpoint after its peer endpoint on the card has
+ * been closed. In all cases except MIC card crash EPOLLHUP on the host is
+ * triggered by the client's endpoint being closed.
+ */
+static void cosm_scif_close(struct cosm_device *cdev)
+{
+	/*
+	 * Because SHUTDOWN_STATUS message is sent by the MIC cards in the
+	 * reboot notifier when shutdown is still not complete, we notify mpssd
+	 * to reset the card when SCIF endpoint is closed.
+	 */
+	cosm_update_mic_status(cdev);
+	scif_close(cdev->epd);
+	cdev->epd = NULL;
+	dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
+}
+
+/*
+ * Set card state to ONLINE when a new SCIF connection from a MIC card is
+ * received. Normally the state is BOOTING when the connection comes in, but can
+ * be ONLINE if cosm_client driver on the card was unloaded and then reloaded.
+ */
+static int cosm_set_online(struct cosm_device *cdev)
+{
+	int rc = 0;
+
+	if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) {
+		cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable;
+		cdev->epd = cdev->newepd;
+		if (cdev->state == MIC_BOOTING)
+			cosm_set_state(cdev, MIC_ONLINE);
+		cosm_send_time(cdev);
+		dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
+	} else {
+		dev_warn(&cdev->dev, "%s %d not going online in state: %s\n",
+			 __func__, __LINE__, cosm_state_string[cdev->state]);
+		rc = -EINVAL;
+	}
+	/* Drop reference acquired by bus_find_device in the server thread */
+	put_device(&cdev->dev);
+	return rc;
+}
+
+/*
+ * Work function for handling work for a SCIF connection from a particular MIC
+ * card. It first sets the card state to ONLINE and then calls scif_poll to
+ * block on activity such as incoming messages on the SCIF endpoint. When the
+ * endpoint is closed, the work function exits, completing its life cycle, from
+ * MIC card boot to card shutdown/reset/crash.
+ */
+void cosm_scif_work(struct work_struct *work)
+{
+	struct cosm_device *cdev = container_of(work, struct cosm_device,
+						scif_work);
+	struct scif_pollepd pollepd;
+	int rc;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (cosm_set_online(cdev))
+		goto exit;
+
+	while (1) {
+		pollepd.epd = cdev->epd;
+		pollepd.events = EPOLLIN;
+
+		/* Drop the mutex before blocking in scif_poll(..) */
+		mutex_unlock(&cdev->cosm_mutex);
+		/* poll(..) with timeout on our endpoint */
+		rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC);
+		mutex_lock(&cdev->cosm_mutex);
+		if (rc < 0) {
+			dev_err(&cdev->dev, "%s %d scif_poll rc %d\n",
+				__func__, __LINE__, rc);
+			continue;
+		}
+
+		/* There is a message from the card */
+		if (pollepd.revents & EPOLLIN)
+			cosm_scif_recv(cdev);
+
+		/* The peer endpoint is closed or this endpoint disconnected */
+		if (pollepd.revents & EPOLLHUP) {
+			cosm_scif_close(cdev);
+			break;
+		}
+
+		/* Did we timeout from poll? */
+		if (!rc && cdev->heartbeat_watchdog_enable)
+			cosm_set_crashed(cdev);
+	}
+exit:
+	dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__);
+	mutex_unlock(&cdev->cosm_mutex);
+}
+
+/*
+ * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC
+ * cards, finds the correct cosm_device to associate that connection with and
+ * schedules individual work items for each MIC card.
+ */
+static int cosm_scif_server(void *unused)
+{
+	struct cosm_device *cdev;
+	scif_epd_t newepd;
+	struct scif_port_id port_id;
+	int rc;
+
+	allow_signal(SIGKILL);
+
+	while (!kthread_should_stop()) {
+		rc = scif_accept(listen_epd, &port_id, &newepd,
+				 SCIF_ACCEPT_SYNC);
+		if (rc < 0) {
+			if (-ERESTARTSYS != rc)
+				pr_err("%s %d rc %d\n", __func__, __LINE__, rc);
+			continue;
+		}
+
+		/*
+		 * Associate the incoming connection with a particular
+		 * cosm_device, COSM device ID == SCIF node ID - 1
+		 */
+		cdev = cosm_find_cdev_by_id(port_id.node - 1);
+		if (!cdev)
+			continue;
+		cdev->newepd = newepd;
+		schedule_work(&cdev->scif_work);
+	}
+
+	pr_debug("%s %d Server thread stopped\n", __func__, __LINE__);
+	return 0;
+}
+
+static int cosm_scif_listen(void)
+{
+	int rc;
+
+	listen_epd = scif_open();
+	if (!listen_epd) {
+		pr_err("%s %d scif_open failed\n", __func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT);
+	if (rc < 0) {
+		pr_err("%s %d scif_bind failed rc %d\n",
+		       __func__, __LINE__, rc);
+		goto err;
+	}
+
+	rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG);
+	if (rc < 0) {
+		pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc);
+		goto err;
+	}
+	pr_debug("%s %d listen_epd set up\n", __func__, __LINE__);
+	return 0;
+err:
+	scif_close(listen_epd);
+	listen_epd = NULL;
+	return rc;
+}
+
+static void cosm_scif_listen_exit(void)
+{
+	pr_debug("%s %d closing listen_epd\n", __func__, __LINE__);
+	if (listen_epd) {
+		scif_close(listen_epd);
+		listen_epd = NULL;
+	}
+}
+
+/*
+ * Create a listening SCIF endpoint and a server kthread which accepts incoming
+ * SCIF connections from MIC cards
+ */
+int cosm_scif_init(void)
+{
+	int rc = cosm_scif_listen();
+
+	if (rc) {
+		pr_err("%s %d cosm_scif_listen rc %d\n",
+		       __func__, __LINE__, rc);
+		goto err;
+	}
+
+	server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server");
+	if (IS_ERR(server_thread)) {
+		rc = PTR_ERR(server_thread);
+		pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc);
+		goto listen_exit;
+	}
+	return 0;
+listen_exit:
+	cosm_scif_listen_exit();
+err:
+	return rc;
+}
+
+/* Stop the running server thread and close the listening SCIF endpoint */
+void cosm_scif_exit(void)
+{
+	int rc;
+
+	if (!IS_ERR_OR_NULL(server_thread)) {
+		rc = send_sig(SIGKILL, server_thread, 0);
+		if (rc) {
+			pr_err("%s %d send_sig rc %d\n",
+			       __func__, __LINE__, rc);
+			return;
+		}
+		kthread_stop(server_thread);
+	}
+
+	cosm_scif_listen_exit();
+}
diff --git a/drivers/misc/mic/cosm/cosm_sysfs.c b/drivers/misc/mic/cosm/cosm_sysfs.c
new file mode 100644
index 000000000..29d6863b6
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_sysfs.c
@@ -0,0 +1,461 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#include <linux/slab.h>
+#include "cosm_main.h"
+
+/*
+ * A state-to-string lookup table, for exposing a human readable state
+ * via sysfs. Always keep in sync with enum cosm_states
+ */
+const char * const cosm_state_string[] = {
+	[MIC_READY] = "ready",
+	[MIC_BOOTING] = "booting",
+	[MIC_ONLINE] = "online",
+	[MIC_SHUTTING_DOWN] = "shutting_down",
+	[MIC_RESETTING] = "resetting",
+	[MIC_RESET_FAILED] = "reset_failed",
+};
+
+/*
+ * A shutdown-status-to-string lookup table, for exposing a human
+ * readable state via sysfs. Always keep in sync with enum cosm_shutdown_status
+ */
+const char * const cosm_shutdown_status_string[] = {
+	[MIC_NOP] = "nop",
+	[MIC_CRASHED] = "crashed",
+	[MIC_HALTED] = "halted",
+	[MIC_POWER_OFF] = "poweroff",
+	[MIC_RESTART] = "restart",
+};
+
+void cosm_set_shutdown_status(struct cosm_device *cdev, u8 shutdown_status)
+{
+	dev_dbg(&cdev->dev, "Shutdown Status %s -> %s\n",
+		cosm_shutdown_status_string[cdev->shutdown_status],
+		cosm_shutdown_status_string[shutdown_status]);
+	cdev->shutdown_status = shutdown_status;
+}
+
+void cosm_set_state(struct cosm_device *cdev, u8 state)
+{
+	dev_dbg(&cdev->dev, "State %s -> %s\n",
+		cosm_state_string[cdev->state],
+		cosm_state_string[state]);
+	cdev->state = state;
+	sysfs_notify_dirent(cdev->state_sysfs);
+}
+
+static ssize_t
+family_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return cdev->hw_ops->family(cdev, buf);
+}
+static DEVICE_ATTR_RO(family);
+
+static ssize_t
+stepping_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return cdev->hw_ops->stepping(cdev, buf);
+}
+static DEVICE_ATTR_RO(stepping);
+
+static ssize_t
+state_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev || cdev->state >= MIC_LAST)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n",
+		cosm_state_string[cdev->state]);
+}
+
+static ssize_t
+state_store(struct device *dev, struct device_attribute *attr,
+	    const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int rc;
+
+	if (!cdev)
+		return -EINVAL;
+
+	if (sysfs_streq(buf, "boot")) {
+		rc = cosm_start(cdev);
+		goto done;
+	}
+	if (sysfs_streq(buf, "reset")) {
+		rc = cosm_reset(cdev);
+		goto done;
+	}
+
+	if (sysfs_streq(buf, "shutdown")) {
+		rc = cosm_shutdown(cdev);
+		goto done;
+	}
+	rc = -EINVAL;
+done:
+	if (rc)
+		count = rc;
+	return count;
+}
+static DEVICE_ATTR_RW(state);
+
+static ssize_t shutdown_status_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev || cdev->shutdown_status >= MIC_STATUS_LAST)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n",
+		cosm_shutdown_status_string[cdev->shutdown_status]);
+}
+static DEVICE_ATTR_RO(shutdown_status);
+
+static ssize_t
+heartbeat_enable_show(struct device *dev,
+		      struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", cdev->sysfs_heartbeat_enable);
+}
+
+static ssize_t
+heartbeat_enable_store(struct device *dev,
+		       struct device_attribute *attr,
+		       const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int enable;
+	int ret;
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	ret = kstrtoint(buf, 10, &enable);
+	if (ret)
+		goto unlock;
+
+	cdev->sysfs_heartbeat_enable = enable;
+	/* if state is not online, cdev->heartbeat_watchdog_enable is 0 */
+	if (cdev->state == MIC_ONLINE)
+		cdev->heartbeat_watchdog_enable = enable;
+	ret = count;
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return ret;
+}
+static DEVICE_ATTR_RW(heartbeat_enable);
+
+static ssize_t
+cmdline_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *cmdline;
+
+	if (!cdev)
+		return -EINVAL;
+
+	cmdline = cdev->cmdline;
+
+	if (cmdline)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline);
+	return 0;
+}
+
+static ssize_t
+cmdline_store(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->cmdline);
+
+	cdev->cmdline = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->cmdline) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+
+	strncpy(cdev->cmdline, buf, count);
+
+	if (cdev->cmdline[count - 1] == '\n')
+		cdev->cmdline[count - 1] = '\0';
+	else
+		cdev->cmdline[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(cmdline);
+
+static ssize_t
+firmware_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *firmware;
+
+	if (!cdev)
+		return -EINVAL;
+
+	firmware = cdev->firmware;
+
+	if (firmware)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", firmware);
+	return 0;
+}
+
+static ssize_t
+firmware_store(struct device *dev, struct device_attribute *attr,
+	       const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->firmware);
+
+	cdev->firmware = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->firmware) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+	strncpy(cdev->firmware, buf, count);
+
+	if (cdev->firmware[count - 1] == '\n')
+		cdev->firmware[count - 1] = '\0';
+	else
+		cdev->firmware[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(firmware);
+
+static ssize_t
+ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *ramdisk;
+
+	if (!cdev)
+		return -EINVAL;
+
+	ramdisk = cdev->ramdisk;
+
+	if (ramdisk)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk);
+	return 0;
+}
+
+static ssize_t
+ramdisk_store(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->ramdisk);
+
+	cdev->ramdisk = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->ramdisk) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+
+	strncpy(cdev->ramdisk, buf, count);
+
+	if (cdev->ramdisk[count - 1] == '\n')
+		cdev->ramdisk[count - 1] = '\0';
+	else
+		cdev->ramdisk[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(ramdisk);
+
+static ssize_t
+bootmode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *bootmode;
+
+	if (!cdev)
+		return -EINVAL;
+
+	bootmode = cdev->bootmode;
+
+	if (bootmode)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode);
+	return 0;
+}
+
+static ssize_t
+bootmode_store(struct device *dev, struct device_attribute *attr,
+	       const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "flash"))
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->bootmode);
+
+	cdev->bootmode = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->bootmode) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+
+	strncpy(cdev->bootmode, buf, count);
+
+	if (cdev->bootmode[count - 1] == '\n')
+		cdev->bootmode[count - 1] = '\0';
+	else
+		cdev->bootmode[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(bootmode);
+
+static ssize_t
+log_buf_addr_show(struct device *dev, struct device_attribute *attr,
+		  char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_addr);
+}
+
+static ssize_t
+log_buf_addr_store(struct device *dev, struct device_attribute *attr,
+		   const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int ret;
+	unsigned long addr;
+
+	if (!cdev)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 16, &addr);
+	if (ret)
+		goto exit;
+
+	cdev->log_buf_addr = (void *)addr;
+	ret = count;
+exit:
+	return ret;
+}
+static DEVICE_ATTR_RW(log_buf_addr);
+
+static ssize_t
+log_buf_len_show(struct device *dev, struct device_attribute *attr,
+		 char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_len);
+}
+
+static ssize_t
+log_buf_len_store(struct device *dev, struct device_attribute *attr,
+		  const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int ret;
+	unsigned long addr;
+
+	if (!cdev)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 16, &addr);
+	if (ret)
+		goto exit;
+
+	cdev->log_buf_len = (int *)addr;
+	ret = count;
+exit:
+	return ret;
+}
+static DEVICE_ATTR_RW(log_buf_len);
+
+static struct attribute *cosm_default_attrs[] = {
+	&dev_attr_family.attr,
+	&dev_attr_stepping.attr,
+	&dev_attr_state.attr,
+	&dev_attr_shutdown_status.attr,
+	&dev_attr_heartbeat_enable.attr,
+	&dev_attr_cmdline.attr,
+	&dev_attr_firmware.attr,
+	&dev_attr_ramdisk.attr,
+	&dev_attr_bootmode.attr,
+	&dev_attr_log_buf_addr.attr,
+	&dev_attr_log_buf_len.attr,
+
+	NULL
+};
+
+ATTRIBUTE_GROUPS(cosm_default);
+
+void cosm_sysfs_init(struct cosm_device *cdev)
+{
+	cdev->attr_group = cosm_default_groups;
+}
diff --git a/drivers/misc/mic/cosm_client/Makefile b/drivers/misc/mic/cosm_client/Makefile
new file mode 100644
index 000000000..6f751a519
--- /dev/null
+++ b/drivers/misc/mic/cosm_client/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile - Intel MIC COSM Client Driver
+# Copyright(c) 2015, Intel Corporation.
+#
+obj-$(CONFIG_MIC_COSM) += cosm_client.o
+
+cosm_client-objs += cosm_scif_client.o
diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c
new file mode 100644
index 000000000..225078cb5
--- /dev/null
+++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c
@@ -0,0 +1,281 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Client Driver
+ *
+ */
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+
+#include "../cosm/cosm_main.h"
+
+#define COSM_SCIF_MAX_RETRIES 10
+#define COSM_HEARTBEAT_SEND_MSEC (COSM_HEARTBEAT_SEND_SEC * MSEC_PER_SEC)
+
+static struct task_struct *client_thread;
+static scif_epd_t client_epd;
+static struct scif_peer_dev *client_spdev;
+
+/*
+ * Reboot notifier: receives shutdown status from the OS and communicates it
+ * back to the COSM process on the host
+ */
+static int cosm_reboot_event(struct notifier_block *this, unsigned long event,
+			     void *ptr)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN_STATUS };
+	int rc;
+
+	event = (event == SYS_RESTART) ? SYSTEM_RESTART : event;
+	dev_info(&client_spdev->dev, "%s %d received event %ld\n",
+		 __func__, __LINE__, event);
+
+	msg.shutdown_status = event;
+	rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+	if (rc < 0)
+		dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
+			__func__, __LINE__, rc);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cosm_reboot = {
+	.notifier_call  = cosm_reboot_event,
+};
+
+/* Set system time from timespec value received from the host */
+static void cosm_set_time(struct cosm_msg *msg)
+{
+	struct timespec64 ts = {
+		.tv_sec = msg->timespec.tv_sec,
+		.tv_nsec = msg->timespec.tv_nsec,
+	};
+	int rc = do_settimeofday64(&ts);
+
+	if (rc)
+		dev_err(&client_spdev->dev, "%s: %d settimeofday rc %d\n",
+			__func__, __LINE__, rc);
+}
+
+/* COSM client receive message processing */
+static void cosm_client_recv(void)
+{
+	struct cosm_msg msg;
+	int rc;
+
+	while (1) {
+		rc = scif_recv(client_epd, &msg, sizeof(msg), 0);
+		if (!rc) {
+			return;
+		} else if (rc < 0) {
+			dev_err(&client_spdev->dev, "%s: %d rc %d\n",
+				__func__, __LINE__, rc);
+			return;
+		}
+
+		dev_dbg(&client_spdev->dev, "%s: %d rc %d id 0x%llx\n",
+			__func__, __LINE__, rc, msg.id);
+
+		switch (msg.id) {
+		case COSM_MSG_SYNC_TIME:
+			cosm_set_time(&msg);
+			break;
+		case COSM_MSG_SHUTDOWN:
+			orderly_poweroff(true);
+			break;
+		default:
+			dev_err(&client_spdev->dev, "%s: %d unknown id %lld\n",
+				__func__, __LINE__, msg.id);
+			break;
+		}
+	}
+}
+
+/* Initiate connection to the COSM server on the host */
+static int cosm_scif_connect(void)
+{
+	struct scif_port_id port_id;
+	int i, rc;
+
+	client_epd = scif_open();
+	if (!client_epd) {
+		dev_err(&client_spdev->dev, "%s %d scif_open failed\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	port_id.node = 0;
+	port_id.port = SCIF_COSM_LISTEN_PORT;
+
+	for (i = 0; i < COSM_SCIF_MAX_RETRIES; i++) {
+		rc = scif_connect(client_epd, &port_id);
+		if (rc < 0)
+			msleep(1000);
+		else
+			break;
+	}
+
+	if (rc < 0) {
+		dev_err(&client_spdev->dev, "%s %d scif_connect rc %d\n",
+			__func__, __LINE__, rc);
+		scif_close(client_epd);
+		client_epd = NULL;
+	}
+	return rc < 0 ? rc : 0;
+}
+
+/* Close host SCIF connection */
+static void cosm_scif_connect_exit(void)
+{
+	if (client_epd) {
+		scif_close(client_epd);
+		client_epd = NULL;
+	}
+}
+
+/*
+ * COSM SCIF client thread function: waits for messages from the host and sends
+ * a heartbeat to the host
+ */
+static int cosm_scif_client(void *unused)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_HEARTBEAT };
+	struct scif_pollepd pollepd;
+	int rc;
+
+	allow_signal(SIGKILL);
+
+	while (!kthread_should_stop()) {
+		pollepd.epd = client_epd;
+		pollepd.events = EPOLLIN;
+
+		rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC);
+		if (rc < 0) {
+			if (-EINTR != rc)
+				dev_err(&client_spdev->dev,
+					"%s %d scif_poll rc %d\n",
+					__func__, __LINE__, rc);
+			continue;
+		}
+
+		if (pollepd.revents & EPOLLIN)
+			cosm_client_recv();
+
+		msg.id = COSM_MSG_HEARTBEAT;
+		rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+		if (rc < 0)
+			dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
+				__func__, __LINE__, rc);
+	}
+
+	dev_dbg(&client_spdev->dev, "%s %d Client thread stopped\n",
+		__func__, __LINE__);
+	return 0;
+}
+
+static void cosm_scif_probe(struct scif_peer_dev *spdev)
+{
+	int rc;
+
+	dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
+		__func__, __LINE__, spdev->dnode);
+
+	/* We are only interested in the host with spdev->dnode == 0 */
+	if (spdev->dnode)
+		return;
+
+	client_spdev = spdev;
+	rc = cosm_scif_connect();
+	if (rc)
+		goto exit;
+
+	rc = register_reboot_notifier(&cosm_reboot);
+	if (rc) {
+		dev_err(&spdev->dev,
+			"reboot notifier registration failed rc %d\n", rc);
+		goto connect_exit;
+	}
+
+	client_thread = kthread_run(cosm_scif_client, NULL, "cosm_client");
+	if (IS_ERR(client_thread)) {
+		rc = PTR_ERR(client_thread);
+		dev_err(&spdev->dev, "%s %d kthread_run rc %d\n",
+			__func__, __LINE__, rc);
+		goto unreg_reboot;
+	}
+	return;
+unreg_reboot:
+	unregister_reboot_notifier(&cosm_reboot);
+connect_exit:
+	cosm_scif_connect_exit();
+exit:
+	client_spdev = NULL;
+}
+
+static void cosm_scif_remove(struct scif_peer_dev *spdev)
+{
+	int rc;
+
+	dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
+		__func__, __LINE__, spdev->dnode);
+
+	if (spdev->dnode)
+		return;
+
+	if (!IS_ERR_OR_NULL(client_thread)) {
+		rc = send_sig(SIGKILL, client_thread, 0);
+		if (rc) {
+			pr_err("%s %d send_sig rc %d\n",
+			       __func__, __LINE__, rc);
+			return;
+		}
+		kthread_stop(client_thread);
+	}
+	unregister_reboot_notifier(&cosm_reboot);
+	cosm_scif_connect_exit();
+	client_spdev = NULL;
+}
+
+static struct scif_client scif_client_cosm = {
+	.name = KBUILD_MODNAME,
+	.probe = cosm_scif_probe,
+	.remove = cosm_scif_remove,
+};
+
+static int __init cosm_client_init(void)
+{
+	int rc = scif_client_register(&scif_client_cosm);
+
+	if (rc)
+		pr_err("scif_client_register failed rc %d\n", rc);
+	return rc;
+}
+
+static void __exit cosm_client_exit(void)
+{
+	scif_client_unregister(&scif_client_cosm);
+}
+
+module_init(cosm_client_init);
+module_exit(cosm_client_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC card OS state management client driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile
new file mode 100644
index 000000000..25f153367
--- /dev/null
+++ b/drivers/misc/mic/host/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2013, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o
+mic_host-objs := mic_main.o
+mic_host-objs += mic_x100.o
+mic_host-objs += mic_smpt.o
+mic_host-objs += mic_intr.o
+mic_host-objs += mic_boot.o
+mic_host-objs += mic_debugfs.o
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
new file mode 100644
index 000000000..c327985c9
--- /dev/null
+++ b/drivers/misc/mic/host/mic_boot.c
@@ -0,0 +1,599 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/pci.h>
+#include <linux/kmod.h>
+#include <linux/mic_common.h>
+#include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
+#include "../bus/vop_bus.h"
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+
+static inline struct mic_device *vpdev_to_mdev(struct device *dev)
+{
+	return dev_get_drvdata(dev->parent);
+}
+
+static dma_addr_t
+_mic_dma_map_page(struct device *dev, struct page *page,
+		  unsigned long offset, size_t size,
+		  enum dma_data_direction dir, unsigned long attrs)
+{
+	void *va = phys_to_virt(page_to_phys(page)) + offset;
+	struct mic_device *mdev = vpdev_to_mdev(dev);
+
+	return mic_map_single(mdev, va, size);
+}
+
+static void _mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+				size_t size, enum dma_data_direction dir,
+				unsigned long attrs)
+{
+	struct mic_device *mdev = vpdev_to_mdev(dev);
+
+	mic_unmap_single(mdev, dma_addr, size);
+}
+
+static const struct dma_map_ops _mic_dma_ops = {
+	.map_page = _mic_dma_map_page,
+	.unmap_page = _mic_dma_unmap_page,
+};
+
+static struct mic_irq *
+__mic_request_irq(struct vop_device *vpdev,
+		  irqreturn_t (*func)(int irq, void *data),
+		  const char *name, void *data, int intr_src)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	return mic_request_threaded_irq(mdev, func, NULL, name, data,
+					intr_src, MIC_INTR_DB);
+}
+
+static void __mic_free_irq(struct vop_device *vpdev,
+			   struct mic_irq *cookie, void *data)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	mic_free_irq(mdev, cookie, data);
+}
+
+static void __mic_ack_interrupt(struct vop_device *vpdev, int num)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	mdev->ops->intr_workarounds(mdev);
+}
+
+static int __mic_next_db(struct vop_device *vpdev)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	return mic_next_db(mdev);
+}
+
+static void *__mic_get_dp(struct vop_device *vpdev)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	return mdev->dp;
+}
+
+static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev)
+{
+	return NULL;
+}
+
+static void __mic_send_intr(struct vop_device *vpdev, int db)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	mdev->ops->send_intr(mdev, db);
+}
+
+static void __iomem *__mic_ioremap(struct vop_device *vpdev,
+				   dma_addr_t pa, size_t len)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	return mdev->aper.va + pa;
+}
+
+static void __mic_iounmap(struct vop_device *vpdev, void __iomem *va)
+{
+	/* nothing to do */
+}
+
+static struct vop_hw_ops vop_hw_ops = {
+	.request_irq = __mic_request_irq,
+	.free_irq = __mic_free_irq,
+	.ack_interrupt = __mic_ack_interrupt,
+	.next_db = __mic_next_db,
+	.get_dp = __mic_get_dp,
+	.get_remote_dp = __mic_get_remote_dp,
+	.send_intr = __mic_send_intr,
+	.ioremap = __mic_ioremap,
+	.iounmap = __mic_iounmap,
+};
+
+static inline struct mic_device *scdev_to_mdev(struct scif_hw_dev *scdev)
+{
+	return dev_get_drvdata(scdev->dev.parent);
+}
+
+static void *__mic_dma_alloc(struct device *dev, size_t size,
+			     dma_addr_t *dma_handle, gfp_t gfp,
+			     unsigned long attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+	dma_addr_t tmp;
+	void *va = kmalloc(size, gfp);
+
+	if (va) {
+		tmp = mic_map_single(mdev, va, size);
+		if (dma_mapping_error(dev, tmp)) {
+			kfree(va);
+			va = NULL;
+		} else {
+			*dma_handle = tmp;
+		}
+	}
+	return va;
+}
+
+static void __mic_dma_free(struct device *dev, size_t size, void *vaddr,
+			   dma_addr_t dma_handle, unsigned long attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mic_unmap_single(mdev, dma_handle, size);
+	kfree(vaddr);
+}
+
+static dma_addr_t
+__mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+		   size_t size, enum dma_data_direction dir,
+		   unsigned long attrs)
+{
+	void *va = phys_to_virt(page_to_phys(page)) + offset;
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mic_map_single(mdev, va, size);
+}
+
+static void
+__mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+		     size_t size, enum dma_data_direction dir,
+		     unsigned long attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mic_unmap_single(mdev, dma_addr, size);
+}
+
+static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			    int nents, enum dma_data_direction dir,
+			    unsigned long attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+	struct scatterlist *s;
+	int i, j, ret;
+	dma_addr_t da;
+
+	ret = dma_map_sg(&mdev->pdev->dev, sg, nents, dir);
+	if (ret <= 0)
+		return 0;
+
+	for_each_sg(sg, s, nents, i) {
+		da = mic_map(mdev, sg_dma_address(s) + s->offset, s->length);
+		if (!da)
+			goto err;
+		sg_dma_address(s) = da;
+	}
+	return nents;
+err:
+	for_each_sg(sg, s, i, j) {
+		mic_unmap(mdev, sg_dma_address(s), s->length);
+		sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s));
+	}
+	dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
+	return 0;
+}
+
+static void __mic_dma_unmap_sg(struct device *dev,
+			       struct scatterlist *sg, int nents,
+			       enum dma_data_direction dir,
+			       unsigned long attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+	struct scatterlist *s;
+	dma_addr_t da;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		da = mic_to_dma_addr(mdev, sg_dma_address(s));
+		mic_unmap(mdev, sg_dma_address(s), s->length);
+		sg_dma_address(s) = da;
+	}
+	dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
+}
+
+static const struct dma_map_ops __mic_dma_ops = {
+	.alloc = __mic_dma_alloc,
+	.free = __mic_dma_free,
+	.map_page = __mic_dma_map_page,
+	.unmap_page = __mic_dma_unmap_page,
+	.map_sg = __mic_dma_map_sg,
+	.unmap_sg = __mic_dma_unmap_sg,
+};
+
+static struct mic_irq *
+___mic_request_irq(struct scif_hw_dev *scdev,
+		   irqreturn_t (*func)(int irq, void *data),
+				       const char *name,
+				       void *data, int db)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mic_request_threaded_irq(mdev, func, NULL, name, data,
+					db, MIC_INTR_DB);
+}
+
+static void
+___mic_free_irq(struct scif_hw_dev *scdev,
+		struct mic_irq *cookie, void *data)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mic_free_irq(mdev, cookie, data);
+}
+
+static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mdev->ops->intr_workarounds(mdev);
+}
+
+static int ___mic_next_db(struct scif_hw_dev *scdev)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mic_next_db(mdev);
+}
+
+static void ___mic_send_intr(struct scif_hw_dev *scdev, int db)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mdev->ops->send_intr(mdev, db);
+}
+
+static void __iomem *___mic_ioremap(struct scif_hw_dev *scdev,
+				    phys_addr_t pa, size_t len)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mdev->aper.va + pa;
+}
+
+static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va)
+{
+	/* nothing to do */
+}
+
+static struct scif_hw_ops scif_hw_ops = {
+	.request_irq = ___mic_request_irq,
+	.free_irq = ___mic_free_irq,
+	.ack_interrupt = ___mic_ack_interrupt,
+	.next_db = ___mic_next_db,
+	.send_intr = ___mic_send_intr,
+	.ioremap = ___mic_ioremap,
+	.iounmap = ___mic_iounmap,
+};
+
+static inline struct mic_device *mbdev_to_mdev(struct mbus_device *mbdev)
+{
+	return dev_get_drvdata(mbdev->dev.parent);
+}
+
+static dma_addr_t
+mic_dma_map_page(struct device *dev, struct page *page,
+		 unsigned long offset, size_t size, enum dma_data_direction dir,
+		 unsigned long attrs)
+{
+	void *va = phys_to_virt(page_to_phys(page)) + offset;
+	struct mic_device *mdev = dev_get_drvdata(dev->parent);
+
+	return mic_map_single(mdev, va, size);
+}
+
+static void
+mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+		   size_t size, enum dma_data_direction dir,
+		   unsigned long attrs)
+{
+	struct mic_device *mdev = dev_get_drvdata(dev->parent);
+	mic_unmap_single(mdev, dma_addr, size);
+}
+
+static const struct dma_map_ops mic_dma_ops = {
+	.map_page = mic_dma_map_page,
+	.unmap_page = mic_dma_unmap_page,
+};
+
+static struct mic_irq *
+_mic_request_threaded_irq(struct mbus_device *mbdev,
+			  irq_handler_t handler, irq_handler_t thread_fn,
+			  const char *name, void *data, int intr_src)
+{
+	return mic_request_threaded_irq(mbdev_to_mdev(mbdev), handler,
+					thread_fn, name, data,
+					intr_src, MIC_INTR_DMA);
+}
+
+static void _mic_free_irq(struct mbus_device *mbdev,
+			  struct mic_irq *cookie, void *data)
+{
+	mic_free_irq(mbdev_to_mdev(mbdev), cookie, data);
+}
+
+static void _mic_ack_interrupt(struct mbus_device *mbdev, int num)
+{
+	struct mic_device *mdev = mbdev_to_mdev(mbdev);
+	mdev->ops->intr_workarounds(mdev);
+}
+
+static struct mbus_hw_ops mbus_hw_ops = {
+	.request_threaded_irq = _mic_request_threaded_irq,
+	.free_irq = _mic_free_irq,
+	.ack_interrupt = _mic_ack_interrupt,
+};
+
+/* Initialize the MIC bootparams */
+void mic_bootparam_init(struct mic_device *mdev)
+{
+	struct mic_bootparam *bootparam = mdev->dp;
+
+	bootparam->magic = cpu_to_le32(MIC_MAGIC);
+	bootparam->h2c_config_db = -1;
+	bootparam->node_id = mdev->id + 1;
+	bootparam->scif_host_dma_addr = 0x0;
+	bootparam->scif_card_dma_addr = 0x0;
+	bootparam->c2h_scif_db = -1;
+	bootparam->h2c_scif_db = -1;
+}
+
+static inline struct mic_device *cosmdev_to_mdev(struct cosm_device *cdev)
+{
+	return dev_get_drvdata(cdev->dev.parent);
+}
+
+static void _mic_reset(struct cosm_device *cdev)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+	mdev->ops->reset_fw_ready(mdev);
+	mdev->ops->reset(mdev);
+}
+
+static bool _mic_ready(struct cosm_device *cdev)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+	return mdev->ops->is_fw_ready(mdev);
+}
+
+/**
+ * mic_request_dma_chans - Request DMA channels
+ * @mdev: pointer to mic_device instance
+ *
+ * returns number of DMA channels acquired
+ */
+static int mic_request_dma_chans(struct mic_device *mdev)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+
+	do {
+		chan = dma_request_channel(mask, mdev->ops->dma_filter,
+					   &mdev->pdev->dev);
+		if (chan) {
+			mdev->dma_ch[mdev->num_dma_ch++] = chan;
+			if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN)
+				break;
+		}
+	} while (chan);
+	dev_info(&mdev->pdev->dev, "DMA channels # %d\n", mdev->num_dma_ch);
+	return mdev->num_dma_ch;
+}
+
+/**
+ * mic_free_dma_chans - release DMA channels
+ * @mdev: pointer to mic_device instance
+ *
+ * returns none
+ */
+static void mic_free_dma_chans(struct mic_device *mdev)
+{
+	int i = 0;
+
+	for (i = 0; i < mdev->num_dma_ch; i++) {
+		dma_release_channel(mdev->dma_ch[i]);
+		mdev->dma_ch[i] = NULL;
+	}
+	mdev->num_dma_ch = 0;
+}
+
+/**
+ * _mic_start - Start the MIC.
+ * @cdev: pointer to cosm_device instance
+ * @id: MIC device id/index provided by COSM used in other drivers like SCIF
+ *
+ * This function prepares an MIC for boot and initiates boot.
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ *
+ * For all cosm_hw_ops the caller holds a mutex to ensure serialization.
+ */
+static int _mic_start(struct cosm_device *cdev, int id)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+	int rc;
+
+	mic_bootparam_init(mdev);
+	mdev->dma_mbdev = mbus_register_device(&mdev->pdev->dev,
+					       MBUS_DEV_DMA_HOST, &mic_dma_ops,
+					       &mbus_hw_ops, id, mdev->mmio.va);
+	if (IS_ERR(mdev->dma_mbdev)) {
+		rc = PTR_ERR(mdev->dma_mbdev);
+		goto unlock_ret;
+	}
+	if (!mic_request_dma_chans(mdev)) {
+		rc = -ENODEV;
+		goto dma_remove;
+	}
+	mdev->scdev = scif_register_device(&mdev->pdev->dev, MIC_SCIF_DEV,
+					   &__mic_dma_ops, &scif_hw_ops,
+					   id + 1, 0, &mdev->mmio,
+					   &mdev->aper, mdev->dp, NULL,
+					   mdev->dma_ch, mdev->num_dma_ch,
+					   true);
+	if (IS_ERR(mdev->scdev)) {
+		rc = PTR_ERR(mdev->scdev);
+		goto dma_free;
+	}
+
+	mdev->vpdev = vop_register_device(&mdev->pdev->dev,
+					  VOP_DEV_TRNSP, &_mic_dma_ops,
+					  &vop_hw_ops, id + 1, &mdev->aper,
+					  mdev->dma_ch[0]);
+	if (IS_ERR(mdev->vpdev)) {
+		rc = PTR_ERR(mdev->vpdev);
+		goto scif_remove;
+	}
+
+	rc = mdev->ops->load_mic_fw(mdev, NULL);
+	if (rc)
+		goto vop_remove;
+	mic_smpt_restore(mdev);
+	mic_intr_restore(mdev);
+	mdev->intr_ops->enable_interrupts(mdev);
+	mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
+	mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
+	mdev->ops->send_firmware_intr(mdev);
+	goto unlock_ret;
+vop_remove:
+	vop_unregister_device(mdev->vpdev);
+scif_remove:
+	scif_unregister_device(mdev->scdev);
+dma_free:
+	mic_free_dma_chans(mdev);
+dma_remove:
+	mbus_unregister_device(mdev->dma_mbdev);
+unlock_ret:
+	return rc;
+}
+
+/**
+ * _mic_stop - Prepare the MIC for reset and trigger reset.
+ * @cdev: pointer to cosm_device instance
+ * @force: force a MIC to reset even if it is already offline.
+ *
+ * RETURNS: None.
+ */
+static void _mic_stop(struct cosm_device *cdev, bool force)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+	/*
+	 * Since SCIF handles card shutdown and reset (using COSM), it will
+	 * will be the first to be registered and the last to be
+	 * unregistered.
+	 */
+	vop_unregister_device(mdev->vpdev);
+	scif_unregister_device(mdev->scdev);
+	mic_free_dma_chans(mdev);
+	mbus_unregister_device(mdev->dma_mbdev);
+	mic_bootparam_init(mdev);
+}
+
+static ssize_t _mic_family(struct cosm_device *cdev, char *buf)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+	static const char *family[MIC_FAMILY_LAST] = { "x100", "Unknown" };
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", family[mdev->family]);
+}
+
+static ssize_t _mic_stepping(struct cosm_device *cdev, char *buf)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+	const char *string = "??";
+
+	switch (mdev->stepping) {
+	case MIC_A0_STEP:
+		string = "A0";
+		break;
+	case MIC_B0_STEP:
+		string = "B0";
+		break;
+	case MIC_B1_STEP:
+		string = "B1";
+		break;
+	case MIC_C0_STEP:
+		string = "C0";
+		break;
+	default:
+		break;
+	}
+	return scnprintf(buf, PAGE_SIZE, "%s\n", string);
+}
+
+static struct mic_mw *_mic_aper(struct cosm_device *cdev)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+	return &mdev->aper;
+}
+
+struct cosm_hw_ops cosm_hw_ops = {
+	.reset = _mic_reset,
+	.force_reset = _mic_reset,
+	.post_reset = NULL,
+	.ready = _mic_ready,
+	.start = _mic_start,
+	.stop = _mic_stop,
+	.family = _mic_family,
+	.stepping = _mic_stepping,
+	.aper = _mic_aper,
+};
diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c
new file mode 100644
index 000000000..0a9daba8b
--- /dev/null
+++ b/drivers/misc/mic/host/mic_debugfs.c
@@ -0,0 +1,216 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+
+/* Debugfs parent dir */
+static struct dentry *mic_dbg;
+
+static int mic_smpt_show(struct seq_file *s, void *pos)
+{
+	int i;
+	struct mic_device *mdev = s->private;
+	unsigned long flags;
+
+	seq_printf(s, "MIC %-2d |%-10s| %-14s %-10s\n",
+		   mdev->id, "SMPT entry", "SW DMA addr", "RefCount");
+	seq_puts(s, "====================================================\n");
+
+	if (mdev->smpt) {
+		struct mic_smpt_info *smpt_info = mdev->smpt;
+		spin_lock_irqsave(&smpt_info->smpt_lock, flags);
+		for (i = 0; i < smpt_info->info.num_reg; i++) {
+			seq_printf(s, "%9s|%-10d| %-#14llx %-10lld\n",
+				   " ",  i, smpt_info->entry[i].dma_addr,
+				   smpt_info->entry[i].ref_count);
+		}
+		spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+	}
+	seq_puts(s, "====================================================\n");
+	return 0;
+}
+
+static int mic_smpt_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_smpt_show, inode->i_private);
+}
+
+static int mic_smpt_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations smpt_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = mic_smpt_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = mic_smpt_debug_release
+};
+
+static int mic_post_code_show(struct seq_file *s, void *pos)
+{
+	struct mic_device *mdev = s->private;
+	u32 reg = mdev->ops->get_postcode(mdev);
+
+	seq_printf(s, "%c%c", reg & 0xff, (reg >> 8) & 0xff);
+	return 0;
+}
+
+static int mic_post_code_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_post_code_show, inode->i_private);
+}
+
+static int mic_post_code_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations post_code_ops = {
+	.owner   = THIS_MODULE,
+	.open    = mic_post_code_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = mic_post_code_debug_release
+};
+
+static int mic_msi_irq_info_show(struct seq_file *s, void *pos)
+{
+	struct mic_device *mdev  = s->private;
+	int reg;
+	int i, j;
+	u16 entry;
+	u16 vector;
+	struct pci_dev *pdev = mdev->pdev;
+
+	if (pci_dev_msi_enabled(pdev)) {
+		for (i = 0; i < mdev->irq_info.num_vectors; i++) {
+			if (pdev->msix_enabled) {
+				entry = mdev->irq_info.msix_entries[i].entry;
+				vector = mdev->irq_info.msix_entries[i].vector;
+			} else {
+				entry = 0;
+				vector = pdev->irq;
+			}
+
+			reg = mdev->intr_ops->read_msi_to_src_map(mdev, entry);
+
+			seq_printf(s, "%s %-10d %s %-10d MXAR[%d]: %08X\n",
+				   "IRQ:", vector, "Entry:", entry, i, reg);
+
+			seq_printf(s, "%-10s", "offset:");
+			for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--)
+				seq_printf(s, "%4d ", j);
+			seq_puts(s, "\n");
+
+
+			seq_printf(s, "%-10s", "count:");
+			for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--)
+				seq_printf(s, "%4d ",
+					   (mdev->irq_info.mic_msi_map[i] &
+					   BIT(j)) ? 1 : 0);
+			seq_puts(s, "\n\n");
+		}
+	} else {
+		seq_puts(s, "MSI/MSIx interrupts not enabled\n");
+	}
+
+	return 0;
+}
+
+static int mic_msi_irq_info_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_msi_irq_info_show, inode->i_private);
+}
+
+static int
+mic_msi_irq_info_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations msi_irq_info_ops = {
+	.owner   = THIS_MODULE,
+	.open    = mic_msi_irq_info_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = mic_msi_irq_info_debug_release
+};
+
+/**
+ * mic_create_debug_dir - Initialize MIC debugfs entries.
+ */
+void mic_create_debug_dir(struct mic_device *mdev)
+{
+	char name[16];
+
+	if (!mic_dbg)
+		return;
+
+	scnprintf(name, sizeof(name), "mic%d", mdev->id);
+	mdev->dbg_dir = debugfs_create_dir(name, mic_dbg);
+	if (!mdev->dbg_dir)
+		return;
+
+	debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops);
+
+	debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev,
+			    &post_code_ops);
+
+	debugfs_create_file("msi_irq_info", 0444, mdev->dbg_dir, mdev,
+			    &msi_irq_info_ops);
+}
+
+/**
+ * mic_delete_debug_dir - Uninitialize MIC debugfs entries.
+ */
+void mic_delete_debug_dir(struct mic_device *mdev)
+{
+	if (!mdev->dbg_dir)
+		return;
+
+	debugfs_remove_recursive(mdev->dbg_dir);
+}
+
+/**
+ * mic_init_debugfs - Initialize global debugfs entry.
+ */
+void __init mic_init_debugfs(void)
+{
+	mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!mic_dbg)
+		pr_err("can't create debugfs dir\n");
+}
+
+/**
+ * mic_exit_debugfs - Uninitialize global debugfs entry
+ */
+void mic_exit_debugfs(void)
+{
+	debugfs_remove(mic_dbg);
+}
diff --git a/drivers/misc/mic/host/mic_device.h b/drivers/misc/mic/host/mic_device.h
new file mode 100644
index 000000000..52b12b22f
--- /dev/null
+++ b/drivers/misc/mic/host/mic_device.h
@@ -0,0 +1,169 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef _MIC_DEVICE_H_
+#define _MIC_DEVICE_H_
+
+#include <linux/cdev.h>
+#include <linux/idr.h>
+#include <linux/notifier.h>
+#include <linux/irqreturn.h>
+#include <linux/dmaengine.h>
+#include <linux/miscdevice.h>
+#include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
+#include "../bus/vop_bus.h"
+#include "../bus/cosm_bus.h"
+#include "mic_intr.h"
+
+/**
+ * enum mic_stepping - MIC stepping ids.
+ */
+enum mic_stepping {
+	MIC_A0_STEP = 0x0,
+	MIC_B0_STEP = 0x10,
+	MIC_B1_STEP = 0x11,
+	MIC_C0_STEP = 0x20,
+};
+
+extern struct cosm_hw_ops cosm_hw_ops;
+
+/**
+ * struct mic_device -  MIC device information for each card.
+ *
+ * @mmio: MMIO bar information.
+ * @aper: Aperture bar information.
+ * @family: The MIC family to which this device belongs.
+ * @ops: MIC HW specific operations.
+ * @id: The unique device id for this MIC device.
+ * @stepping: Stepping ID.
+ * @pdev: Underlying PCI device.
+ * @mic_mutex: Mutex for synchronizing access to mic_device.
+ * @intr_ops: HW specific interrupt operations.
+ * @smpt_ops: Hardware specific SMPT operations.
+ * @smpt: MIC SMPT information.
+ * @intr_info: H/W specific interrupt information.
+ * @irq_info: The OS specific irq information
+ * @dbg_dir: debugfs directory of this MIC device.
+ * @bootaddr: MIC boot address.
+ * @dp: virtio device page
+ * @dp_dma_addr: virtio device page DMA address.
+ * @dma_mbdev: MIC BUS DMA device.
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @scdev: SCIF device on the SCIF virtual bus.
+ * @vpdev: Virtio over PCIe device on the VOP virtual bus.
+ * @cosm_dev: COSM device
+ */
+struct mic_device {
+	struct mic_mw mmio;
+	struct mic_mw aper;
+	enum mic_hw_family family;
+	struct mic_hw_ops *ops;
+	int id;
+	enum mic_stepping stepping;
+	struct pci_dev *pdev;
+	struct mutex mic_mutex;
+	struct mic_hw_intr_ops *intr_ops;
+	struct mic_smpt_ops *smpt_ops;
+	struct mic_smpt_info *smpt;
+	struct mic_intr_info *intr_info;
+	struct mic_irq_info irq_info;
+	struct dentry *dbg_dir;
+	u32 bootaddr;
+	void *dp;
+	dma_addr_t dp_dma_addr;
+	struct mbus_device *dma_mbdev;
+	struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
+	int num_dma_ch;
+	struct scif_hw_dev *scdev;
+	struct vop_device *vpdev;
+	struct cosm_device *cosm_dev;
+};
+
+/**
+ * struct mic_hw_ops - MIC HW specific operations.
+ * @aper_bar: Aperture bar resource number.
+ * @mmio_bar: MMIO bar resource number.
+ * @read_spad: Read from scratch pad register.
+ * @write_spad: Write to scratch pad register.
+ * @send_intr: Send an interrupt for a particular doorbell on the card.
+ * @ack_interrupt: Hardware specific operations to ack the h/w on
+ * receipt of an interrupt.
+ * @intr_workarounds: Hardware specific workarounds needed after
+ * handling an interrupt.
+ * @reset: Reset the remote processor.
+ * @reset_fw_ready: Reset firmware ready field.
+ * @is_fw_ready: Check if firmware is ready for OS download.
+ * @send_firmware_intr: Send an interrupt to the card firmware.
+ * @load_mic_fw: Load firmware segments required to boot the card
+ * into card memory. This includes the kernel, command line, ramdisk etc.
+ * @get_postcode: Get post code status from firmware.
+ * @dma_filter: DMA filter function to be used.
+ */
+struct mic_hw_ops {
+	u8 aper_bar;
+	u8 mmio_bar;
+	u32 (*read_spad)(struct mic_device *mdev, unsigned int idx);
+	void (*write_spad)(struct mic_device *mdev, unsigned int idx, u32 val);
+	void (*send_intr)(struct mic_device *mdev, int doorbell);
+	u32 (*ack_interrupt)(struct mic_device *mdev);
+	void (*intr_workarounds)(struct mic_device *mdev);
+	void (*reset)(struct mic_device *mdev);
+	void (*reset_fw_ready)(struct mic_device *mdev);
+	bool (*is_fw_ready)(struct mic_device *mdev);
+	void (*send_firmware_intr)(struct mic_device *mdev);
+	int (*load_mic_fw)(struct mic_device *mdev, const char *buf);
+	u32 (*get_postcode)(struct mic_device *mdev);
+	bool (*dma_filter)(struct dma_chan *chan, void *param);
+};
+
+/**
+ * mic_mmio_read - read from an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @offset: register offset.
+ *
+ * RETURNS: register value.
+ */
+static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset)
+{
+	return ioread32(mw->va + offset);
+}
+
+/**
+ * mic_mmio_write - write to an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @val: the data value to put into the register
+ * @offset: register offset.
+ *
+ * RETURNS: none.
+ */
+static inline void
+mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
+{
+	iowrite32(val, mw->va + offset);
+}
+
+void mic_bootparam_init(struct mic_device *mdev);
+void mic_create_debug_dir(struct mic_device *dev);
+void mic_delete_debug_dir(struct mic_device *dev);
+void __init mic_init_debugfs(void);
+void mic_exit_debugfs(void);
+#endif
diff --git a/drivers/misc/mic/host/mic_intr.c b/drivers/misc/mic/host/mic_intr.c
new file mode 100644
index 000000000..08ca3e372
--- /dev/null
+++ b/drivers/misc/mic/host/mic_intr.c
@@ -0,0 +1,645 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+
+static irqreturn_t mic_thread_fn(int irq, void *dev)
+{
+	struct mic_device *mdev = dev;
+	struct mic_intr_info *intr_info = mdev->intr_info;
+	struct mic_irq_info *irq_info = &mdev->irq_info;
+	struct mic_intr_cb *intr_cb;
+	struct pci_dev *pdev = mdev->pdev;
+	int i;
+
+	spin_lock(&irq_info->mic_thread_lock);
+	for (i = intr_info->intr_start_idx[MIC_INTR_DB];
+			i < intr_info->intr_len[MIC_INTR_DB]; i++)
+		if (test_and_clear_bit(i, &irq_info->mask)) {
+			list_for_each_entry(intr_cb, &irq_info->cb_list[i],
+					    list)
+				if (intr_cb->thread_fn)
+					intr_cb->thread_fn(pdev->irq,
+							 intr_cb->data);
+		}
+	spin_unlock(&irq_info->mic_thread_lock);
+	return IRQ_HANDLED;
+}
+/**
+ * mic_interrupt - Generic interrupt handler for
+ * MSI and INTx based interrupts.
+ */
+static irqreturn_t mic_interrupt(int irq, void *dev)
+{
+	struct mic_device *mdev = dev;
+	struct mic_intr_info *intr_info = mdev->intr_info;
+	struct mic_irq_info *irq_info = &mdev->irq_info;
+	struct mic_intr_cb *intr_cb;
+	struct pci_dev *pdev = mdev->pdev;
+	u32 mask;
+	int i;
+
+	mask = mdev->ops->ack_interrupt(mdev);
+	if (!mask)
+		return IRQ_NONE;
+
+	spin_lock(&irq_info->mic_intr_lock);
+	for (i = intr_info->intr_start_idx[MIC_INTR_DB];
+			i < intr_info->intr_len[MIC_INTR_DB]; i++)
+		if (mask & BIT(i)) {
+			list_for_each_entry(intr_cb, &irq_info->cb_list[i],
+					    list)
+				if (intr_cb->handler)
+					intr_cb->handler(pdev->irq,
+							 intr_cb->data);
+			set_bit(i, &irq_info->mask);
+		}
+	spin_unlock(&irq_info->mic_intr_lock);
+	return IRQ_WAKE_THREAD;
+}
+
+/* Return the interrupt offset from the index. Index is 0 based. */
+static u16 mic_map_src_to_offset(struct mic_device *mdev,
+				 int intr_src, enum mic_intr_type type)
+{
+	if (type >= MIC_NUM_INTR_TYPES)
+		return MIC_NUM_OFFSETS;
+	if (intr_src >= mdev->intr_info->intr_len[type])
+		return MIC_NUM_OFFSETS;
+
+	return mdev->intr_info->intr_start_idx[type] + intr_src;
+}
+
+/* Return next available msix_entry. */
+static struct msix_entry *mic_get_available_vector(struct mic_device *mdev)
+{
+	int i;
+	struct mic_irq_info *info = &mdev->irq_info;
+
+	for (i = 0; i < info->num_vectors; i++)
+		if (!info->mic_msi_map[i])
+			return &info->msix_entries[i];
+	return NULL;
+}
+
+/**
+ * mic_register_intr_callback - Register a callback handler for the
+ * given source id.
+ *
+ * @mdev: pointer to the mic_device instance
+ * @idx: The source id to be registered.
+ * @handler: The function to be called when the source id receives
+ * the interrupt.
+ * @thread_fn: thread fn. corresponding to the handler
+ * @data: Private data of the requester.
+ * Return the callback structure that was registered or an
+ * appropriate error on failure.
+ */
+static struct mic_intr_cb *mic_register_intr_callback(struct mic_device *mdev,
+			u8 idx, irq_handler_t handler, irq_handler_t thread_fn,
+			void *data)
+{
+	struct mic_intr_cb *intr_cb;
+	unsigned long flags;
+	int rc;
+	intr_cb = kmalloc(sizeof(*intr_cb), GFP_KERNEL);
+
+	if (!intr_cb)
+		return ERR_PTR(-ENOMEM);
+
+	intr_cb->handler = handler;
+	intr_cb->thread_fn = thread_fn;
+	intr_cb->data = data;
+	intr_cb->cb_id = ida_simple_get(&mdev->irq_info.cb_ida,
+		0, 0, GFP_KERNEL);
+	if (intr_cb->cb_id < 0) {
+		rc = intr_cb->cb_id;
+		goto ida_fail;
+	}
+
+	spin_lock(&mdev->irq_info.mic_thread_lock);
+	spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
+	list_add_tail(&intr_cb->list, &mdev->irq_info.cb_list[idx]);
+	spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
+	spin_unlock(&mdev->irq_info.mic_thread_lock);
+
+	return intr_cb;
+ida_fail:
+	kfree(intr_cb);
+	return ERR_PTR(rc);
+}
+
+/**
+ * mic_unregister_intr_callback - Unregister the callback handler
+ * identified by its callback id.
+ *
+ * @mdev: pointer to the mic_device instance
+ * @idx: The callback structure id to be unregistered.
+ * Return the source id that was unregistered or MIC_NUM_OFFSETS if no
+ * such callback handler was found.
+ */
+static u8 mic_unregister_intr_callback(struct mic_device *mdev, u32 idx)
+{
+	struct list_head *pos, *tmp;
+	struct mic_intr_cb *intr_cb;
+	unsigned long flags;
+	int i;
+
+	spin_lock(&mdev->irq_info.mic_thread_lock);
+	spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
+	for (i = 0;  i < MIC_NUM_OFFSETS; i++) {
+		list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) {
+			intr_cb = list_entry(pos, struct mic_intr_cb, list);
+			if (intr_cb->cb_id == idx) {
+				list_del(pos);
+				ida_simple_remove(&mdev->irq_info.cb_ida,
+						  intr_cb->cb_id);
+				kfree(intr_cb);
+				spin_unlock_irqrestore(
+					&mdev->irq_info.mic_intr_lock, flags);
+				spin_unlock(&mdev->irq_info.mic_thread_lock);
+				return i;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
+	spin_unlock(&mdev->irq_info.mic_thread_lock);
+	return MIC_NUM_OFFSETS;
+}
+
+/**
+ * mic_setup_msix - Initializes MSIx interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ *
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	int rc, i;
+	int entry_size = sizeof(*mdev->irq_info.msix_entries);
+
+	mdev->irq_info.msix_entries = kmalloc_array(MIC_MIN_MSIX,
+						    entry_size, GFP_KERNEL);
+	if (!mdev->irq_info.msix_entries) {
+		rc = -ENOMEM;
+		goto err_nomem1;
+	}
+
+	for (i = 0; i < MIC_MIN_MSIX; i++)
+		mdev->irq_info.msix_entries[i].entry = i;
+
+	rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries,
+				   MIC_MIN_MSIX);
+	if (rc) {
+		dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc);
+		goto err_enable_msix;
+	}
+
+	mdev->irq_info.num_vectors = MIC_MIN_MSIX;
+	mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) *
+		mdev->irq_info.num_vectors), GFP_KERNEL);
+
+	if (!mdev->irq_info.mic_msi_map) {
+		rc = -ENOMEM;
+		goto err_nomem2;
+	}
+
+	dev_dbg(&mdev->pdev->dev,
+		"%d MSIx irqs setup\n", mdev->irq_info.num_vectors);
+	return 0;
+err_nomem2:
+	pci_disable_msix(pdev);
+err_enable_msix:
+	kfree(mdev->irq_info.msix_entries);
+err_nomem1:
+	mdev->irq_info.num_vectors = 0;
+	return rc;
+}
+
+/**
+ * mic_setup_callbacks - Initialize data structures needed
+ * to handle callbacks.
+ *
+ * @mdev: pointer to mic_device instance
+ */
+static int mic_setup_callbacks(struct mic_device *mdev)
+{
+	int i;
+
+	mdev->irq_info.cb_list = kmalloc_array(MIC_NUM_OFFSETS,
+					       sizeof(*mdev->irq_info.cb_list),
+					       GFP_KERNEL);
+	if (!mdev->irq_info.cb_list)
+		return -ENOMEM;
+
+	for (i = 0; i < MIC_NUM_OFFSETS; i++)
+		INIT_LIST_HEAD(&mdev->irq_info.cb_list[i]);
+	ida_init(&mdev->irq_info.cb_ida);
+	spin_lock_init(&mdev->irq_info.mic_intr_lock);
+	spin_lock_init(&mdev->irq_info.mic_thread_lock);
+	return 0;
+}
+
+/**
+ * mic_release_callbacks - Uninitialize data structures needed
+ * to handle callbacks.
+ *
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_release_callbacks(struct mic_device *mdev)
+{
+	unsigned long flags;
+	struct list_head *pos, *tmp;
+	struct mic_intr_cb *intr_cb;
+	int i;
+
+	spin_lock(&mdev->irq_info.mic_thread_lock);
+	spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
+	for (i = 0; i < MIC_NUM_OFFSETS; i++) {
+		if (list_empty(&mdev->irq_info.cb_list[i]))
+			break;
+
+		list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) {
+			intr_cb = list_entry(pos, struct mic_intr_cb, list);
+			list_del(pos);
+			ida_simple_remove(&mdev->irq_info.cb_ida,
+					  intr_cb->cb_id);
+			kfree(intr_cb);
+		}
+	}
+	spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
+	spin_unlock(&mdev->irq_info.mic_thread_lock);
+	ida_destroy(&mdev->irq_info.cb_ida);
+	kfree(mdev->irq_info.cb_list);
+}
+
+/**
+ * mic_setup_msi - Initializes MSI interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int mic_setup_msi(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	int rc;
+
+	rc = pci_enable_msi(pdev);
+	if (rc) {
+		dev_dbg(&pdev->dev, "Error enabling MSI. rc = %d\n", rc);
+		return rc;
+	}
+
+	mdev->irq_info.num_vectors = 1;
+	mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) *
+		mdev->irq_info.num_vectors), GFP_KERNEL);
+
+	if (!mdev->irq_info.mic_msi_map) {
+		rc = -ENOMEM;
+		goto err_nomem1;
+	}
+
+	rc = mic_setup_callbacks(mdev);
+	if (rc) {
+		dev_err(&pdev->dev, "Error setting up callbacks\n");
+		goto err_nomem2;
+	}
+
+	rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn,
+				  0, "mic-msi", mdev);
+	if (rc) {
+		dev_err(&pdev->dev, "Error allocating MSI interrupt\n");
+		goto err_irq_req_fail;
+	}
+
+	dev_dbg(&pdev->dev, "%d MSI irqs setup\n", mdev->irq_info.num_vectors);
+	return 0;
+err_irq_req_fail:
+	mic_release_callbacks(mdev);
+err_nomem2:
+	kfree(mdev->irq_info.mic_msi_map);
+err_nomem1:
+	pci_disable_msi(pdev);
+	mdev->irq_info.num_vectors = 0;
+	return rc;
+}
+
+/**
+ * mic_setup_intx - Initializes legacy interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int mic_setup_intx(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	int rc;
+
+	/* Enable intx */
+	pci_intx(pdev, 1);
+	rc = mic_setup_callbacks(mdev);
+	if (rc) {
+		dev_err(&pdev->dev, "Error setting up callbacks\n");
+		goto err_nomem;
+	}
+
+	rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn,
+				  IRQF_SHARED, "mic-intx", mdev);
+	if (rc)
+		goto err;
+
+	dev_dbg(&pdev->dev, "intx irq setup\n");
+	return 0;
+err:
+	mic_release_callbacks(mdev);
+err_nomem:
+	return rc;
+}
+
+/**
+ * mic_next_db - Retrieve the next doorbell interrupt source id.
+ * The id is picked sequentially from the available pool of
+ * doorlbell ids.
+ *
+ * @mdev: pointer to the mic_device instance.
+ *
+ * Returns the next doorbell interrupt source.
+ */
+int mic_next_db(struct mic_device *mdev)
+{
+	int next_db;
+
+	next_db = mdev->irq_info.next_avail_src %
+		mdev->intr_info->intr_len[MIC_INTR_DB];
+	mdev->irq_info.next_avail_src++;
+	return next_db;
+}
+
+#define COOKIE_ID_SHIFT 16
+#define GET_ENTRY(cookie) ((cookie) & 0xFFFF)
+#define GET_OFFSET(cookie) ((cookie) >> COOKIE_ID_SHIFT)
+#define MK_COOKIE(x, y) ((x) | (y) << COOKIE_ID_SHIFT)
+
+/**
+ * mic_request_threaded_irq - request an irq. mic_mutex needs
+ * to be held before calling this function.
+ *
+ * @mdev: pointer to mic_device instance
+ * @handler: The callback function that handles the interrupt.
+ * The function needs to call ack_interrupts
+ * (mdev->ops->ack_interrupt(mdev)) when handling the interrupts.
+ * @thread_fn: thread fn required by request_threaded_irq.
+ * @name: The ASCII name of the callee requesting the irq.
+ * @data: private data that is returned back when calling the
+ * function handler.
+ * @intr_src: The source id of the requester. Its the doorbell id
+ * for Doorbell interrupts and DMA channel id for DMA interrupts.
+ * @type: The type of interrupt. Values defined in mic_intr_type
+ *
+ * returns: The cookie that is transparent to the caller. Passed
+ * back when calling mic_free_irq. An appropriate error code
+ * is returned on failure. Caller needs to use IS_ERR(return_val)
+ * to check for failure and PTR_ERR(return_val) to obtained the
+ * error code.
+ *
+ */
+struct mic_irq *
+mic_request_threaded_irq(struct mic_device *mdev,
+			 irq_handler_t handler, irq_handler_t thread_fn,
+			 const char *name, void *data, int intr_src,
+			 enum mic_intr_type type)
+{
+	u16 offset;
+	int rc = 0;
+	struct msix_entry *msix = NULL;
+	unsigned long cookie = 0;
+	u16 entry;
+	struct mic_intr_cb *intr_cb;
+	struct pci_dev *pdev = mdev->pdev;
+
+	offset = mic_map_src_to_offset(mdev, intr_src, type);
+	if (offset >= MIC_NUM_OFFSETS) {
+		dev_err(&mdev->pdev->dev,
+			"Error mapping index %d to a valid source id.\n",
+			intr_src);
+		rc = -EINVAL;
+		goto err;
+	}
+
+	if (mdev->irq_info.num_vectors > 1) {
+		msix = mic_get_available_vector(mdev);
+		if (!msix) {
+			dev_err(&mdev->pdev->dev,
+				"No MSIx vectors available for use.\n");
+			rc = -ENOSPC;
+			goto err;
+		}
+
+		rc = request_threaded_irq(msix->vector, handler, thread_fn,
+					  0, name, data);
+		if (rc) {
+			dev_dbg(&mdev->pdev->dev,
+				"request irq failed rc = %d\n", rc);
+			goto err;
+		}
+		entry = msix->entry;
+		mdev->irq_info.mic_msi_map[entry] |= BIT(offset);
+		mdev->intr_ops->program_msi_to_src_map(mdev,
+				entry, offset, true);
+		cookie = MK_COOKIE(entry, offset);
+		dev_dbg(&mdev->pdev->dev, "irq: %d assigned for src: %d\n",
+			msix->vector, intr_src);
+	} else {
+		intr_cb = mic_register_intr_callback(mdev, offset, handler,
+						     thread_fn, data);
+		if (IS_ERR(intr_cb)) {
+			dev_err(&mdev->pdev->dev,
+				"No available callback entries for use\n");
+			rc = PTR_ERR(intr_cb);
+			goto err;
+		}
+
+		entry = 0;
+		if (pci_dev_msi_enabled(pdev)) {
+			mdev->irq_info.mic_msi_map[entry] |= (1 << offset);
+			mdev->intr_ops->program_msi_to_src_map(mdev,
+				entry, offset, true);
+		}
+		cookie = MK_COOKIE(entry, intr_cb->cb_id);
+		dev_dbg(&mdev->pdev->dev, "callback %d registered for src: %d\n",
+			intr_cb->cb_id, intr_src);
+	}
+	return (struct mic_irq *)cookie;
+err:
+	return ERR_PTR(rc);
+}
+
+/**
+ * mic_free_irq - free irq. mic_mutex
+ *  needs to be held before calling this function.
+ *
+ * @mdev: pointer to mic_device instance
+ * @cookie: cookie obtained during a successful call to mic_request_threaded_irq
+ * @data: private data specified by the calling function during the
+ * mic_request_threaded_irq
+ *
+ * returns: none.
+ */
+void mic_free_irq(struct mic_device *mdev,
+		  struct mic_irq *cookie, void *data)
+{
+	u32 offset;
+	u32 entry;
+	u8 src_id;
+	unsigned int irq;
+	struct pci_dev *pdev = mdev->pdev;
+
+	entry = GET_ENTRY((unsigned long)cookie);
+	offset = GET_OFFSET((unsigned long)cookie);
+	if (mdev->irq_info.num_vectors > 1) {
+		if (entry >= mdev->irq_info.num_vectors) {
+			dev_warn(&mdev->pdev->dev,
+				 "entry %d should be < num_irq %d\n",
+				entry, mdev->irq_info.num_vectors);
+			return;
+		}
+		irq = mdev->irq_info.msix_entries[entry].vector;
+		free_irq(irq, data);
+		mdev->irq_info.mic_msi_map[entry] &= ~(BIT(offset));
+		mdev->intr_ops->program_msi_to_src_map(mdev,
+			entry, offset, false);
+
+		dev_dbg(&mdev->pdev->dev, "irq: %d freed\n", irq);
+	} else {
+		irq = pdev->irq;
+		src_id = mic_unregister_intr_callback(mdev, offset);
+		if (src_id >= MIC_NUM_OFFSETS) {
+			dev_warn(&mdev->pdev->dev, "Error unregistering callback\n");
+			return;
+		}
+		if (pci_dev_msi_enabled(pdev)) {
+			mdev->irq_info.mic_msi_map[entry] &= ~(BIT(src_id));
+			mdev->intr_ops->program_msi_to_src_map(mdev,
+				entry, src_id, false);
+		}
+		dev_dbg(&mdev->pdev->dev, "callback %d unregistered for src: %d\n",
+			offset, src_id);
+	}
+}
+
+/**
+ * mic_setup_interrupts - Initializes interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	int rc;
+
+	rc = mic_setup_msix(mdev, pdev);
+	if (!rc)
+		goto done;
+
+	rc = mic_setup_msi(mdev, pdev);
+	if (!rc)
+		goto done;
+
+	rc = mic_setup_intx(mdev, pdev);
+	if (rc) {
+		dev_err(&mdev->pdev->dev, "no usable interrupts\n");
+		return rc;
+	}
+done:
+	mdev->intr_ops->enable_interrupts(mdev);
+	return 0;
+}
+
+/**
+ * mic_free_interrupts - Frees interrupts setup by mic_setup_interrupts
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * returns none.
+ */
+void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	int i;
+
+	mdev->intr_ops->disable_interrupts(mdev);
+	if (mdev->irq_info.num_vectors > 1) {
+		for (i = 0; i < mdev->irq_info.num_vectors; i++) {
+			if (mdev->irq_info.mic_msi_map[i])
+				dev_warn(&pdev->dev, "irq %d may still be in use.\n",
+					 mdev->irq_info.msix_entries[i].vector);
+		}
+		kfree(mdev->irq_info.mic_msi_map);
+		kfree(mdev->irq_info.msix_entries);
+		pci_disable_msix(pdev);
+	} else {
+		if (pci_dev_msi_enabled(pdev)) {
+			free_irq(pdev->irq, mdev);
+			kfree(mdev->irq_info.mic_msi_map);
+			pci_disable_msi(pdev);
+		} else {
+			free_irq(pdev->irq, mdev);
+		}
+		mic_release_callbacks(mdev);
+	}
+}
+
+/**
+ * mic_intr_restore - Restore MIC interrupt registers.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * Restore the interrupt registers to values previously
+ * stored in the SW data structures. mic_mutex needs to
+ * be held before calling this function.
+ *
+ * returns None.
+ */
+void mic_intr_restore(struct mic_device *mdev)
+{
+	int entry, offset;
+	struct pci_dev *pdev = mdev->pdev;
+
+	if (!pci_dev_msi_enabled(pdev))
+		return;
+
+	for (entry = 0; entry < mdev->irq_info.num_vectors; entry++) {
+		for (offset = 0; offset < MIC_NUM_OFFSETS; offset++) {
+			if (mdev->irq_info.mic_msi_map[entry] & BIT(offset))
+				mdev->intr_ops->program_msi_to_src_map(mdev,
+					entry, offset, true);
+		}
+	}
+}
diff --git a/drivers/misc/mic/host/mic_intr.h b/drivers/misc/mic/host/mic_intr.h
new file mode 100644
index 000000000..cce28824d
--- /dev/null
+++ b/drivers/misc/mic/host/mic_intr.h
@@ -0,0 +1,149 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef _MIC_INTR_H_
+#define _MIC_INTR_H_
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+/*
+ * The minimum number of msix vectors required for normal operation.
+ * 3 for virtio network, console and block devices.
+ * 1 for card shutdown notifications.
+ * 4 for host owned DMA channels.
+ * 1 for SCIF
+ */
+#define MIC_MIN_MSIX 9
+#define MIC_NUM_OFFSETS 32
+
+/**
+ * mic_intr_source - The type of source that will generate
+ * the interrupt.The number of types needs to be in sync with
+ * MIC_NUM_INTR_TYPES
+ *
+ * MIC_INTR_DB: The source is a doorbell
+ * MIC_INTR_DMA: The source is a DMA channel
+ * MIC_INTR_ERR: The source is an error interrupt e.g. SBOX ERR
+ * MIC_NUM_INTR_TYPES: Total number of interrupt sources.
+ */
+enum mic_intr_type {
+	MIC_INTR_DB = 0,
+	MIC_INTR_DMA,
+	MIC_INTR_ERR,
+	MIC_NUM_INTR_TYPES
+};
+
+/**
+ * struct mic_intr_info - Contains h/w specific interrupt sources
+ * information.
+ *
+ * @intr_start_idx: Contains the starting indexes of the
+ * interrupt types.
+ * @intr_len: Contains the length of the interrupt types.
+ */
+struct mic_intr_info {
+	u16 intr_start_idx[MIC_NUM_INTR_TYPES];
+	u16 intr_len[MIC_NUM_INTR_TYPES];
+};
+
+/**
+ * struct mic_irq_info - OS specific irq information
+ *
+ * @next_avail_src: next available doorbell that can be assigned.
+ * @msix_entries: msix entries allocated while setting up MSI-x
+ * @mic_msi_map: The MSI/MSI-x mapping information.
+ * @num_vectors: The number of MSI/MSI-x vectors that have been allocated.
+ * @cb_ida: callback ID allocator to track the callbacks registered.
+ * @mic_intr_lock: spinlock to protect the interrupt callback list.
+ * @mic_thread_lock: spinlock to protect the thread callback list.
+ *		   This lock is used to protect against thread_fn while
+ *		   mic_intr_lock is used to protect against interrupt handler.
+ * @cb_list: Array of callback lists one for each source.
+ * @mask: Mask used by the main thread fn to call the underlying thread fns.
+ */
+struct mic_irq_info {
+	int next_avail_src;
+	struct msix_entry *msix_entries;
+	u32 *mic_msi_map;
+	u16 num_vectors;
+	struct ida cb_ida;
+	spinlock_t mic_intr_lock;
+	spinlock_t mic_thread_lock;
+	struct list_head *cb_list;
+	unsigned long mask;
+};
+
+/**
+ * struct mic_intr_cb - Interrupt callback structure.
+ *
+ * @handler: The callback function
+ * @thread_fn: The thread_fn.
+ * @data: Private data of the requester.
+ * @cb_id: The callback id. Identifies this callback.
+ * @list: list head pointing to the next callback structure.
+ */
+struct mic_intr_cb {
+	irq_handler_t handler;
+	irq_handler_t thread_fn;
+	void *data;
+	int cb_id;
+	struct list_head list;
+};
+
+/**
+ * struct mic_irq - opaque pointer used as cookie
+ */
+struct mic_irq;
+
+/* Forward declaration */
+struct mic_device;
+
+/**
+ * struct mic_hw_intr_ops: MIC HW specific interrupt operations
+ * @intr_init: Initialize H/W specific interrupt information.
+ * @enable_interrupts: Enable interrupts from the hardware.
+ * @disable_interrupts: Disable interrupts from the hardware.
+ * @program_msi_to_src_map: Update MSI mapping registers with
+ * irq information.
+ * @read_msi_to_src_map: Read MSI mapping registers containing
+ * irq information.
+ */
+struct mic_hw_intr_ops {
+	void (*intr_init)(struct mic_device *mdev);
+	void (*enable_interrupts)(struct mic_device *mdev);
+	void (*disable_interrupts)(struct mic_device *mdev);
+	void (*program_msi_to_src_map) (struct mic_device *mdev,
+			int idx, int intr_src, bool set);
+	u32 (*read_msi_to_src_map) (struct mic_device *mdev,
+			int idx);
+};
+
+int mic_next_db(struct mic_device *mdev);
+struct mic_irq *
+mic_request_threaded_irq(struct mic_device *mdev,
+			 irq_handler_t handler, irq_handler_t thread_fn,
+			 const char *name, void *data, int intr_src,
+			 enum mic_intr_type type);
+void mic_free_irq(struct mic_device *mdev,
+		struct mic_irq *cookie, void *data);
+int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev);
+void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev);
+void mic_intr_restore(struct mic_device *mdev);
+#endif
diff --git a/drivers/misc/mic/host/mic_main.c b/drivers/misc/mic/host/mic_main.c
new file mode 100644
index 000000000..035be3e9c
--- /dev/null
+++ b/drivers/misc/mic/host/mic_main.c
@@ -0,0 +1,347 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ */
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_x100.h"
+#include "mic_smpt.h"
+
+static const char mic_driver_name[] = "mic";
+
+static const struct pci_device_id mic_pci_tbl[] = {
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2250)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2251)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2252)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2253)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2254)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2255)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2256)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2257)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2258)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2259)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225a)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225b)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225c)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225d)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225e)},
+
+	/* required last entry */
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mic_pci_tbl);
+
+/* ID allocator for MIC devices */
+static struct ida g_mic_ida;
+
+/* Initialize the device page */
+static int mic_dp_init(struct mic_device *mdev)
+{
+	mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL);
+	if (!mdev->dp)
+		return -ENOMEM;
+
+	mdev->dp_dma_addr = mic_map_single(mdev,
+		mdev->dp, MIC_DP_SIZE);
+	if (mic_map_error(mdev->dp_dma_addr)) {
+		kfree(mdev->dp);
+		dev_err(&mdev->pdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, -ENOMEM);
+		return -ENOMEM;
+	}
+	mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
+	mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
+	return 0;
+}
+
+/* Uninitialize the device page */
+static void mic_dp_uninit(struct mic_device *mdev)
+{
+	mic_unmap_single(mdev, mdev->dp_dma_addr, MIC_DP_SIZE);
+	kfree(mdev->dp);
+}
+
+/**
+ * mic_ops_init: Initialize HW specific operation tables.
+ *
+ * @mdev: pointer to mic_device instance
+ *
+ * returns none.
+ */
+static void mic_ops_init(struct mic_device *mdev)
+{
+	switch (mdev->family) {
+	case MIC_FAMILY_X100:
+		mdev->ops = &mic_x100_ops;
+		mdev->intr_ops = &mic_x100_intr_ops;
+		mdev->smpt_ops = &mic_x100_smpt_ops;
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * mic_get_family - Determine hardware family to which this MIC belongs.
+ *
+ * @pdev: The pci device structure
+ *
+ * returns family.
+ */
+static enum mic_hw_family mic_get_family(struct pci_dev *pdev)
+{
+	enum mic_hw_family family;
+
+	switch (pdev->device) {
+	case MIC_X100_PCI_DEVICE_2250:
+	case MIC_X100_PCI_DEVICE_2251:
+	case MIC_X100_PCI_DEVICE_2252:
+	case MIC_X100_PCI_DEVICE_2253:
+	case MIC_X100_PCI_DEVICE_2254:
+	case MIC_X100_PCI_DEVICE_2255:
+	case MIC_X100_PCI_DEVICE_2256:
+	case MIC_X100_PCI_DEVICE_2257:
+	case MIC_X100_PCI_DEVICE_2258:
+	case MIC_X100_PCI_DEVICE_2259:
+	case MIC_X100_PCI_DEVICE_225a:
+	case MIC_X100_PCI_DEVICE_225b:
+	case MIC_X100_PCI_DEVICE_225c:
+	case MIC_X100_PCI_DEVICE_225d:
+	case MIC_X100_PCI_DEVICE_225e:
+		family = MIC_FAMILY_X100;
+		break;
+	default:
+		family = MIC_FAMILY_UNKNOWN;
+		break;
+	}
+	return family;
+}
+
+/**
+ * mic_device_init - Allocates and initializes the MIC device structure
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: The pci device structure
+ *
+ * returns none.
+ */
+static void
+mic_device_init(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	mdev->pdev = pdev;
+	mdev->family = mic_get_family(pdev);
+	mdev->stepping = pdev->revision;
+	mic_ops_init(mdev);
+	mutex_init(&mdev->mic_mutex);
+	mdev->irq_info.next_avail_src = 0;
+}
+
+/**
+ * mic_probe - Device Initialization Routine
+ *
+ * @pdev: PCI device structure
+ * @ent: entry in mic_pci_tbl
+ *
+ * returns 0 on success, < 0 on failure.
+ */
+static int mic_probe(struct pci_dev *pdev,
+		     const struct pci_device_id *ent)
+{
+	int rc;
+	struct mic_device *mdev;
+
+	mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+	if (!mdev) {
+		rc = -ENOMEM;
+		dev_err(&pdev->dev, "mdev kmalloc failed rc %d\n", rc);
+		goto mdev_alloc_fail;
+	}
+	mdev->id = ida_simple_get(&g_mic_ida, 0, MIC_MAX_NUM_DEVS, GFP_KERNEL);
+	if (mdev->id < 0) {
+		rc = mdev->id;
+		dev_err(&pdev->dev, "ida_simple_get failed rc %d\n", rc);
+		goto ida_fail;
+	}
+
+	mic_device_init(mdev, pdev);
+
+	rc = pci_enable_device(pdev);
+	if (rc) {
+		dev_err(&pdev->dev, "failed to enable pci device.\n");
+		goto ida_remove;
+	}
+
+	pci_set_master(pdev);
+
+	rc = pci_request_regions(pdev, mic_driver_name);
+	if (rc) {
+		dev_err(&pdev->dev, "failed to get pci regions.\n");
+		goto disable_device;
+	}
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc) {
+		dev_err(&pdev->dev, "Cannot set DMA mask\n");
+		goto release_regions;
+	}
+
+	mdev->mmio.pa = pci_resource_start(pdev, mdev->ops->mmio_bar);
+	mdev->mmio.len = pci_resource_len(pdev, mdev->ops->mmio_bar);
+	mdev->mmio.va = pci_ioremap_bar(pdev, mdev->ops->mmio_bar);
+	if (!mdev->mmio.va) {
+		dev_err(&pdev->dev, "Cannot remap MMIO BAR\n");
+		rc = -EIO;
+		goto release_regions;
+	}
+
+	mdev->aper.pa = pci_resource_start(pdev, mdev->ops->aper_bar);
+	mdev->aper.len = pci_resource_len(pdev, mdev->ops->aper_bar);
+	mdev->aper.va = ioremap_wc(mdev->aper.pa, mdev->aper.len);
+	if (!mdev->aper.va) {
+		dev_err(&pdev->dev, "Cannot remap Aperture BAR\n");
+		rc = -EIO;
+		goto unmap_mmio;
+	}
+
+	mdev->intr_ops->intr_init(mdev);
+	rc = mic_setup_interrupts(mdev, pdev);
+	if (rc) {
+		dev_err(&pdev->dev, "mic_setup_interrupts failed %d\n", rc);
+		goto unmap_aper;
+	}
+	rc = mic_smpt_init(mdev);
+	if (rc) {
+		dev_err(&pdev->dev, "smpt_init failed %d\n", rc);
+		goto free_interrupts;
+	}
+
+	pci_set_drvdata(pdev, mdev);
+
+	rc = mic_dp_init(mdev);
+	if (rc) {
+		dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc);
+		goto smpt_uninit;
+	}
+	mic_bootparam_init(mdev);
+	mic_create_debug_dir(mdev);
+
+	mdev->cosm_dev = cosm_register_device(&mdev->pdev->dev, &cosm_hw_ops);
+	if (IS_ERR(mdev->cosm_dev)) {
+		rc = PTR_ERR(mdev->cosm_dev);
+		dev_err(&pdev->dev, "cosm_add_device failed rc %d\n", rc);
+		goto cleanup_debug_dir;
+	}
+	return 0;
+cleanup_debug_dir:
+	mic_delete_debug_dir(mdev);
+	mic_dp_uninit(mdev);
+smpt_uninit:
+	mic_smpt_uninit(mdev);
+free_interrupts:
+	mic_free_interrupts(mdev, pdev);
+unmap_aper:
+	iounmap(mdev->aper.va);
+unmap_mmio:
+	iounmap(mdev->mmio.va);
+release_regions:
+	pci_release_regions(pdev);
+disable_device:
+	pci_disable_device(pdev);
+ida_remove:
+	ida_simple_remove(&g_mic_ida, mdev->id);
+ida_fail:
+	kfree(mdev);
+mdev_alloc_fail:
+	dev_err(&pdev->dev, "Probe failed rc %d\n", rc);
+	return rc;
+}
+
+/**
+ * mic_remove - Device Removal Routine
+ * mic_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ *
+ * @pdev: PCI device structure
+ */
+static void mic_remove(struct pci_dev *pdev)
+{
+	struct mic_device *mdev;
+
+	mdev = pci_get_drvdata(pdev);
+	if (!mdev)
+		return;
+
+	cosm_unregister_device(mdev->cosm_dev);
+	mic_delete_debug_dir(mdev);
+	mic_dp_uninit(mdev);
+	mic_smpt_uninit(mdev);
+	mic_free_interrupts(mdev, pdev);
+	iounmap(mdev->aper.va);
+	iounmap(mdev->mmio.va);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	ida_simple_remove(&g_mic_ida, mdev->id);
+	kfree(mdev);
+}
+
+static struct pci_driver mic_driver = {
+	.name = mic_driver_name,
+	.id_table = mic_pci_tbl,
+	.probe = mic_probe,
+	.remove = mic_remove
+};
+
+static int __init mic_init(void)
+{
+	int ret;
+
+	request_module("mic_x100_dma");
+	mic_init_debugfs();
+	ida_init(&g_mic_ida);
+	ret = pci_register_driver(&mic_driver);
+	if (ret) {
+		pr_err("pci_register_driver failed ret %d\n", ret);
+		goto cleanup_debugfs;
+	}
+	return 0;
+cleanup_debugfs:
+	ida_destroy(&g_mic_ida);
+	mic_exit_debugfs();
+	return ret;
+}
+
+static void __exit mic_exit(void)
+{
+	pci_unregister_driver(&mic_driver);
+	ida_destroy(&g_mic_ida);
+	mic_exit_debugfs();
+}
+
+module_init(mic_init);
+module_exit(mic_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC X100 Host driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/host/mic_smpt.c b/drivers/misc/mic/host/mic_smpt.c
new file mode 100644
index 000000000..c3f958580
--- /dev/null
+++ b/drivers/misc/mic/host/mic_smpt.c
@@ -0,0 +1,439 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/pci.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+
+static inline u64 mic_system_page_mask(struct mic_device *mdev)
+{
+	return (1ULL << mdev->smpt->info.page_shift) - 1ULL;
+}
+
+static inline u8 mic_sys_addr_to_smpt(struct mic_device *mdev, dma_addr_t pa)
+{
+	return (pa - mdev->smpt->info.base) >> mdev->smpt->info.page_shift;
+}
+
+static inline u64 mic_smpt_to_pa(struct mic_device *mdev, u8 index)
+{
+	return mdev->smpt->info.base + (index * mdev->smpt->info.page_size);
+}
+
+static inline u64 mic_smpt_offset(struct mic_device *mdev, dma_addr_t pa)
+{
+	return pa & mic_system_page_mask(mdev);
+}
+
+static inline u64 mic_smpt_align_low(struct mic_device *mdev, dma_addr_t pa)
+{
+	return ALIGN(pa - mic_system_page_mask(mdev),
+		mdev->smpt->info.page_size);
+}
+
+static inline u64 mic_smpt_align_high(struct mic_device *mdev, dma_addr_t pa)
+{
+	return ALIGN(pa, mdev->smpt->info.page_size);
+}
+
+/* Total Cumulative system memory accessible by MIC across all SMPT entries */
+static inline u64 mic_max_system_memory(struct mic_device *mdev)
+{
+	return mdev->smpt->info.num_reg * mdev->smpt->info.page_size;
+}
+
+/* Maximum system memory address accessible by MIC */
+static inline u64 mic_max_system_addr(struct mic_device *mdev)
+{
+	return mdev->smpt->info.base + mic_max_system_memory(mdev) - 1ULL;
+}
+
+/* Check if the DMA address is a MIC system memory address */
+static inline bool
+mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa)
+{
+	return pa >= mdev->smpt->info.base && pa <= mic_max_system_addr(mdev);
+}
+
+/* Populate an SMPT entry and update the reference counts. */
+static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr,
+			       int entries, struct mic_device *mdev)
+{
+	struct mic_smpt_info *smpt_info = mdev->smpt;
+	int i;
+
+	for (i = spt; i < spt + entries; i++,
+		addr += smpt_info->info.page_size) {
+		if (!smpt_info->entry[i].ref_count &&
+		    (smpt_info->entry[i].dma_addr != addr)) {
+			mdev->smpt_ops->set(mdev, addr, i);
+			smpt_info->entry[i].dma_addr = addr;
+		}
+		smpt_info->entry[i].ref_count += ref[i - spt];
+	}
+}
+
+/*
+ * Find an available MIC address in MIC SMPT address space
+ * for a given DMA address and size.
+ */
+static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr,
+			      int entries, s64 *ref, size_t size)
+{
+	int spt;
+	int ae = 0;
+	int i;
+	unsigned long flags;
+	dma_addr_t mic_addr = 0;
+	dma_addr_t addr = dma_addr;
+	struct mic_smpt_info *smpt_info = mdev->smpt;
+
+	spin_lock_irqsave(&smpt_info->smpt_lock, flags);
+
+	/* find existing entries */
+	for (i = 0; i < smpt_info->info.num_reg; i++) {
+		if (smpt_info->entry[i].dma_addr == addr) {
+			ae++;
+			addr += smpt_info->info.page_size;
+		} else if (ae) /* cannot find contiguous entries */
+			goto not_found;
+
+		if (ae == entries)
+			goto found;
+	}
+
+	/* find free entry */
+	for (ae = 0, i = 0; i < smpt_info->info.num_reg; i++) {
+		ae = (smpt_info->entry[i].ref_count == 0) ? ae + 1 : 0;
+		if (ae == entries)
+			goto found;
+	}
+
+not_found:
+	spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+	return mic_addr;
+
+found:
+	spt = i - entries + 1;
+	mic_addr = mic_smpt_to_pa(mdev, spt);
+	mic_add_smpt_entry(spt, ref, dma_addr, entries, mdev);
+	smpt_info->map_count++;
+	smpt_info->ref_count += (s64)size;
+	spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+	return mic_addr;
+}
+
+/*
+ * Returns number of smpt entries needed for dma_addr to dma_addr + size
+ * also returns the reference count array for each of those entries
+ * and the starting smpt address
+ */
+static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr,
+				  size_t size, s64 *ref,  u64 *smpt_start)
+{
+	u64 start =  dma_addr;
+	u64 end = dma_addr + size;
+	int i = 0;
+
+	while (start < end) {
+		ref[i++] = min(mic_smpt_align_high(mdev, start + 1),
+			end) - start;
+		start = mic_smpt_align_high(mdev, start + 1);
+	}
+
+	if (smpt_start)
+		*smpt_start = mic_smpt_align_low(mdev, dma_addr);
+
+	return i;
+}
+
+/*
+ * mic_to_dma_addr - Converts a MIC address to a DMA address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @mic_addr: MIC address.
+ *
+ * returns a DMA address.
+ */
+dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr)
+{
+	struct mic_smpt_info *smpt_info = mdev->smpt;
+	int spt;
+	dma_addr_t dma_addr;
+
+	if (!mic_is_system_addr(mdev, mic_addr)) {
+		dev_err(&mdev->pdev->dev,
+			"mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr);
+		return -EINVAL;
+	}
+	spt = mic_sys_addr_to_smpt(mdev, mic_addr);
+	dma_addr = smpt_info->entry[spt].dma_addr +
+		mic_smpt_offset(mdev, mic_addr);
+	return dma_addr;
+}
+
+/**
+ * mic_map - Maps a DMA address to a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @dma_addr: DMA address.
+ * @size: Size of the region to be mapped.
+ *
+ * This API converts the DMA address provided to a DMA address understood
+ * by MIC. Caller should check for errors by calling mic_map_error(..).
+ *
+ * returns DMA address as required by MIC.
+ */
+dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size)
+{
+	dma_addr_t mic_addr = 0;
+	int num_entries;
+	s64 *ref;
+	u64 smpt_start;
+
+	if (!size || size > mic_max_system_memory(mdev))
+		return mic_addr;
+
+	ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC);
+	if (!ref)
+		return mic_addr;
+
+	num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size,
+					     ref, &smpt_start);
+
+	/* Set the smpt table appropriately and get 16G aligned mic address */
+	mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size);
+
+	kfree(ref);
+
+	/*
+	 * If mic_addr is zero then its an error case
+	 * since mic_addr can never be zero.
+	 * else generate mic_addr by adding the 16G offset in dma_addr
+	 */
+	if (!mic_addr && MIC_FAMILY_X100 == mdev->family) {
+		dev_err(&mdev->pdev->dev,
+			"mic_map failed dma_addr 0x%llx size 0x%lx\n",
+			dma_addr, size);
+		return mic_addr;
+	} else {
+		return mic_addr + mic_smpt_offset(mdev, dma_addr);
+	}
+}
+
+/**
+ * mic_unmap - Unmaps a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @mic_addr: MIC physical address.
+ * @size: Size of the region to be unmapped.
+ *
+ * This API unmaps the mappings created by mic_map(..).
+ *
+ * returns None.
+ */
+void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
+{
+	struct mic_smpt_info *smpt_info = mdev->smpt;
+	s64 *ref;
+	int num_smpt;
+	int spt;
+	int i;
+	unsigned long flags;
+
+	if (!size)
+		return;
+
+	if (!mic_is_system_addr(mdev, mic_addr)) {
+		dev_err(&mdev->pdev->dev,
+			"invalid address: 0x%llx\n", mic_addr);
+		return;
+	}
+
+	spt = mic_sys_addr_to_smpt(mdev, mic_addr);
+	ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC);
+	if (!ref)
+		return;
+
+	/* Get number of smpt entries to be mapped, ref count array */
+	num_smpt = mic_get_smpt_ref_count(mdev, mic_addr, size, ref, NULL);
+
+	spin_lock_irqsave(&smpt_info->smpt_lock, flags);
+	smpt_info->unmap_count++;
+	smpt_info->ref_count -= (s64)size;
+
+	for (i = spt; i < spt + num_smpt; i++) {
+		smpt_info->entry[i].ref_count -= ref[i - spt];
+		if (smpt_info->entry[i].ref_count < 0)
+			dev_warn(&mdev->pdev->dev,
+				 "ref count for entry %d is negative\n", i);
+	}
+	spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+	kfree(ref);
+}
+
+/**
+ * mic_map_single - Maps a virtual address to a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @va: Kernel direct mapped virtual address.
+ * @size: Size of the region to be mapped.
+ *
+ * This API calls pci_map_single(..) for the direct mapped virtual address
+ * and then converts the DMA address provided to a DMA address understood
+ * by MIC. Caller should check for errors by calling mic_map_error(..).
+ *
+ * returns DMA address as required by MIC.
+ */
+dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size)
+{
+	dma_addr_t mic_addr = 0;
+	struct pci_dev *pdev = mdev->pdev;
+	dma_addr_t dma_addr =
+		pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL);
+
+	if (!pci_dma_mapping_error(pdev, dma_addr)) {
+		mic_addr = mic_map(mdev, dma_addr, size);
+		if (!mic_addr) {
+			dev_err(&mdev->pdev->dev,
+				"mic_map failed dma_addr 0x%llx size 0x%lx\n",
+				dma_addr, size);
+			pci_unmap_single(pdev, dma_addr,
+					 size, PCI_DMA_BIDIRECTIONAL);
+		}
+	}
+	return mic_addr;
+}
+
+/**
+ * mic_unmap_single - Unmaps a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @mic_addr: MIC physical address.
+ * @size: Size of the region to be unmapped.
+ *
+ * This API unmaps the mappings created by mic_map_single(..).
+ *
+ * returns None.
+ */
+void
+mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
+{
+	struct pci_dev *pdev = mdev->pdev;
+	dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr);
+	mic_unmap(mdev, mic_addr, size);
+	pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
+}
+
+/**
+ * mic_smpt_init - Initialize MIC System Memory Page Tables.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * returns 0 for success and -errno for error.
+ */
+int mic_smpt_init(struct mic_device *mdev)
+{
+	int i, err = 0;
+	dma_addr_t dma_addr;
+	struct mic_smpt_info *smpt_info;
+
+	mdev->smpt = kmalloc(sizeof(*mdev->smpt), GFP_KERNEL);
+	if (!mdev->smpt)
+		return -ENOMEM;
+
+	smpt_info = mdev->smpt;
+	mdev->smpt_ops->init(mdev);
+	smpt_info->entry = kmalloc_array(smpt_info->info.num_reg,
+					 sizeof(*smpt_info->entry), GFP_KERNEL);
+	if (!smpt_info->entry) {
+		err = -ENOMEM;
+		goto free_smpt;
+	}
+	spin_lock_init(&smpt_info->smpt_lock);
+	for (i = 0; i < smpt_info->info.num_reg; i++) {
+		dma_addr = i * smpt_info->info.page_size;
+		smpt_info->entry[i].dma_addr = dma_addr;
+		smpt_info->entry[i].ref_count = 0;
+		mdev->smpt_ops->set(mdev, dma_addr, i);
+	}
+	smpt_info->ref_count = 0;
+	smpt_info->map_count = 0;
+	smpt_info->unmap_count = 0;
+	return 0;
+free_smpt:
+	kfree(smpt_info);
+	return err;
+}
+
+/**
+ * mic_smpt_uninit - UnInitialize MIC System Memory Page Tables.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * returns None.
+ */
+void mic_smpt_uninit(struct mic_device *mdev)
+{
+	struct mic_smpt_info *smpt_info = mdev->smpt;
+	int i;
+
+	dev_dbg(&mdev->pdev->dev,
+		"nodeid %d SMPT ref count %lld map %lld unmap %lld\n",
+		mdev->id, smpt_info->ref_count,
+		smpt_info->map_count, smpt_info->unmap_count);
+
+	for (i = 0; i < smpt_info->info.num_reg; i++) {
+		dev_dbg(&mdev->pdev->dev,
+			"SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n",
+			i, smpt_info->entry[i].dma_addr,
+			smpt_info->entry[i].ref_count);
+		if (smpt_info->entry[i].ref_count)
+			dev_warn(&mdev->pdev->dev,
+				 "ref count for entry %d is not zero\n", i);
+	}
+	kfree(smpt_info->entry);
+	kfree(smpt_info);
+}
+
+/**
+ * mic_smpt_restore - Restore MIC System Memory Page Tables.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * Restore the SMPT registers to values previously stored in the
+ * SW data structures. Some MIC steppings lose register state
+ * across resets and this API should be called for performing
+ * a restore operation if required.
+ *
+ * returns None.
+ */
+void mic_smpt_restore(struct mic_device *mdev)
+{
+	int i;
+	dma_addr_t dma_addr;
+
+	for (i = 0; i < mdev->smpt->info.num_reg; i++) {
+		dma_addr = mdev->smpt->entry[i].dma_addr;
+		mdev->smpt_ops->set(mdev, dma_addr, i);
+	}
+}
diff --git a/drivers/misc/mic/host/mic_smpt.h b/drivers/misc/mic/host/mic_smpt.h
new file mode 100644
index 000000000..68721c6e7
--- /dev/null
+++ b/drivers/misc/mic/host/mic_smpt.h
@@ -0,0 +1,99 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef MIC_SMPT_H
+#define MIC_SMPT_H
+/**
+ * struct mic_smpt_ops - MIC HW specific SMPT operations.
+ * @init: Initialize hardware specific SMPT information in mic_smpt_hw_info.
+ * @set: Set the value for a particular SMPT entry.
+ */
+struct mic_smpt_ops {
+	void (*init)(struct mic_device *mdev);
+	void (*set)(struct mic_device *mdev, dma_addr_t dma_addr, u8 index);
+};
+
+/**
+ * struct mic_smpt - MIC SMPT entry information.
+ * @dma_addr: Base DMA address for this SMPT entry.
+ * @ref_count: Number of active mappings for this SMPT entry in bytes.
+ */
+struct mic_smpt {
+	dma_addr_t dma_addr;
+	s64 ref_count;
+};
+
+/**
+ * struct mic_smpt_hw_info - MIC SMPT hardware specific information.
+ * @num_reg: Number of SMPT registers.
+ * @page_shift: System memory page shift.
+ * @page_size: System memory page size.
+ * @base: System address base.
+ */
+struct mic_smpt_hw_info {
+	u8 num_reg;
+	u8 page_shift;
+	u64 page_size;
+	u64 base;
+};
+
+/**
+ * struct mic_smpt_info - MIC SMPT information.
+ * @entry: Array of SMPT entries.
+ * @smpt_lock: Spin lock protecting access to SMPT data structures.
+ * @info: Hardware specific SMPT information.
+ * @ref_count: Number of active SMPT mappings (for debug).
+ * @map_count: Number of SMPT mappings created (for debug).
+ * @unmap_count: Number of SMPT mappings destroyed (for debug).
+ */
+struct mic_smpt_info {
+	struct mic_smpt *entry;
+	spinlock_t smpt_lock;
+	struct mic_smpt_hw_info info;
+	s64 ref_count;
+	s64 map_count;
+	s64 unmap_count;
+};
+
+dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size);
+void mic_unmap_single(struct mic_device *mdev,
+	dma_addr_t mic_addr, size_t size);
+dma_addr_t mic_map(struct mic_device *mdev,
+	dma_addr_t dma_addr, size_t size);
+void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size);
+dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr);
+
+/**
+ * mic_map_error - Check a MIC address for errors.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * returns Whether there was an error during mic_map..(..) APIs.
+ */
+static inline bool mic_map_error(dma_addr_t mic_addr)
+{
+	return !mic_addr;
+}
+
+int mic_smpt_init(struct mic_device *mdev);
+void mic_smpt_uninit(struct mic_device *mdev);
+void mic_smpt_restore(struct mic_device *mdev);
+
+#endif
diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c
new file mode 100644
index 000000000..82a973c85
--- /dev/null
+++ b/drivers/misc/mic/host/mic_x100.c
@@ -0,0 +1,584 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/fs.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/firmware.h>
+#include <linux/delay.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_x100.h"
+#include "mic_smpt.h"
+
+/**
+ * mic_x100_write_spad - write to the scratchpad register
+ * @mdev: pointer to mic_device instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register.
+ *
+ * RETURNS: none.
+ */
+static void
+mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val)
+{
+	dev_dbg(&mdev->pdev->dev, "Writing 0x%x to scratch pad index %d\n",
+		val, idx);
+	mic_mmio_write(&mdev->mmio, val,
+		       MIC_X100_SBOX_BASE_ADDRESS +
+		       MIC_X100_SBOX_SPAD0 + idx * 4);
+}
+
+/**
+ * mic_x100_read_spad - read from the scratchpad register
+ * @mdev: pointer to mic_device instance
+ * @idx: index to scratchpad register, 0 based
+ *
+ * This function allows reading of the 32bit scratchpad register.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static u32
+mic_x100_read_spad(struct mic_device *mdev, unsigned int idx)
+{
+	u32 val = mic_mmio_read(&mdev->mmio,
+		MIC_X100_SBOX_BASE_ADDRESS +
+		MIC_X100_SBOX_SPAD0 + idx * 4);
+
+	dev_dbg(&mdev->pdev->dev,
+		"Reading 0x%x from scratch pad index %d\n", val, idx);
+	return val;
+}
+
+/**
+ * mic_x100_enable_interrupts - Enable interrupts.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_enable_interrupts(struct mic_device *mdev)
+{
+	u32 reg;
+	struct mic_mw *mw = &mdev->mmio;
+	u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0;
+	u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0;
+
+	reg = mic_mmio_read(mw, sice0);
+	reg |= MIC_X100_SBOX_DBR_BITS(0xf) | MIC_X100_SBOX_DMA_BITS(0xff);
+	mic_mmio_write(mw, reg, sice0);
+
+	/*
+	 * Enable auto-clear when enabling interrupts. Applicable only for
+	 * MSI-x. Legacy and MSI mode cannot have auto-clear enabled.
+	 */
+	if (mdev->irq_info.num_vectors > 1) {
+		reg = mic_mmio_read(mw, siac0);
+		reg |= MIC_X100_SBOX_DBR_BITS(0xf) |
+			MIC_X100_SBOX_DMA_BITS(0xff);
+		mic_mmio_write(mw, reg, siac0);
+	}
+}
+
+/**
+ * mic_x100_disable_interrupts - Disable interrupts.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_disable_interrupts(struct mic_device *mdev)
+{
+	u32 reg;
+	struct mic_mw *mw = &mdev->mmio;
+	u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0;
+	u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0;
+	u32 sicc0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICC0;
+
+	reg = mic_mmio_read(mw, sice0);
+	mic_mmio_write(mw, reg, sicc0);
+
+	if (mdev->irq_info.num_vectors > 1) {
+		reg = mic_mmio_read(mw, siac0);
+		reg &= ~(MIC_X100_SBOX_DBR_BITS(0xf) |
+			MIC_X100_SBOX_DMA_BITS(0xff));
+		mic_mmio_write(mw, reg, siac0);
+	}
+}
+
+/**
+ * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_send_sbox_intr(struct mic_device *mdev,
+				    int doorbell)
+{
+	struct mic_mw *mw = &mdev->mmio;
+	u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
+	u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS +
+					apic_icr_offset);
+
+	/* for MIC we need to make sure we "hit" the send_icr bit (13) */
+	apicicr_low = (apicicr_low | (1 << 13));
+
+	/* Ensure that the interrupt is ordered w.r.t. previous stores. */
+	wmb();
+	mic_mmio_write(mw, apicicr_low,
+		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
+}
+
+/**
+ * mic_x100_send_rdmasr_intr - Send an RDMASR interrupt to MIC.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_send_rdmasr_intr(struct mic_device *mdev,
+				      int doorbell)
+{
+	int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
+	/* Ensure that the interrupt is ordered w.r.t. previous stores. */
+	wmb();
+	mic_mmio_write(&mdev->mmio, 0,
+		       MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset);
+}
+
+/**
+ * __mic_x100_send_intr - Send interrupt to MIC.
+ * @mdev: pointer to mic_device instance
+ * @doorbell: doorbell number.
+ */
+static void mic_x100_send_intr(struct mic_device *mdev, int doorbell)
+{
+	int rdmasr_db;
+	if (doorbell < MIC_X100_NUM_SBOX_IRQ) {
+		mic_x100_send_sbox_intr(mdev, doorbell);
+	} else {
+		rdmasr_db = doorbell - MIC_X100_NUM_SBOX_IRQ;
+		mic_x100_send_rdmasr_intr(mdev, rdmasr_db);
+	}
+}
+
+/**
+ * mic_x100_ack_interrupt - Read the interrupt sources register and
+ * clear it. This function will be called in the MSI/INTx case.
+ * @mdev: Pointer to mic_device instance.
+ *
+ * Returns: bitmask of interrupt sources triggered.
+ */
+static u32 mic_x100_ack_interrupt(struct mic_device *mdev)
+{
+	u32 sicr0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICR0;
+	u32 reg = mic_mmio_read(&mdev->mmio, sicr0);
+	mic_mmio_write(&mdev->mmio, reg, sicr0);
+	return reg;
+}
+
+/**
+ * mic_x100_intr_workarounds - These hardware specific workarounds are
+ * to be invoked everytime an interrupt is handled.
+ * @mdev: Pointer to mic_device instance.
+ *
+ * Returns: none
+ */
+static void mic_x100_intr_workarounds(struct mic_device *mdev)
+{
+	struct mic_mw *mw = &mdev->mmio;
+
+	/* Clear pending bit array. */
+	if (MIC_A0_STEP == mdev->stepping)
+		mic_mmio_write(mw, 1, MIC_X100_SBOX_BASE_ADDRESS +
+			MIC_X100_SBOX_MSIXPBACR);
+
+	if (mdev->stepping >= MIC_B0_STEP)
+		mdev->intr_ops->enable_interrupts(mdev);
+}
+
+/**
+ * mic_x100_hw_intr_init - Initialize h/w specific interrupt
+ * information.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_hw_intr_init(struct mic_device *mdev)
+{
+	mdev->intr_info = (struct mic_intr_info *)mic_x100_intr_init;
+}
+
+/**
+ * mic_x100_read_msi_to_src_map - read from the MSI mapping registers
+ * @mdev: pointer to mic_device instance
+ * @idx: index to the mapping register, 0 based
+ *
+ * This function allows reading of the 32bit MSI mapping register.
+ *
+ * RETURNS: The value in the register.
+ */
+static u32
+mic_x100_read_msi_to_src_map(struct mic_device *mdev, int idx)
+{
+	return mic_mmio_read(&mdev->mmio,
+		MIC_X100_SBOX_BASE_ADDRESS +
+		MIC_X100_SBOX_MXAR0 + idx * 4);
+}
+
+/**
+ * mic_x100_program_msi_to_src_map - program the MSI mapping registers
+ * @mdev: pointer to mic_device instance
+ * @idx: index to the mapping register, 0 based
+ * @offset: The bit offset in the register that needs to be updated.
+ * @set: boolean specifying if the bit in the specified offset needs
+ * to be set or cleared.
+ *
+ * RETURNS: None.
+ */
+static void
+mic_x100_program_msi_to_src_map(struct mic_device *mdev,
+				int idx, int offset, bool set)
+{
+	unsigned long reg;
+	struct mic_mw *mw = &mdev->mmio;
+	u32 mxar = MIC_X100_SBOX_BASE_ADDRESS +
+		MIC_X100_SBOX_MXAR0 + idx * 4;
+
+	reg = mic_mmio_read(mw, mxar);
+	if (set)
+		__set_bit(offset, &reg);
+	else
+		__clear_bit(offset, &reg);
+	mic_mmio_write(mw, reg, mxar);
+}
+
+/*
+ * mic_x100_reset_fw_ready - Reset Firmware ready status field.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_reset_fw_ready(struct mic_device *mdev)
+{
+	mdev->ops->write_spad(mdev, MIC_X100_DOWNLOAD_INFO, 0);
+}
+
+/*
+ * mic_x100_is_fw_ready - Check if firmware is ready.
+ * @mdev: pointer to mic_device instance
+ */
+static bool mic_x100_is_fw_ready(struct mic_device *mdev)
+{
+	u32 scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
+	return MIC_X100_SPAD2_DOWNLOAD_STATUS(scratch2) ? true : false;
+}
+
+/**
+ * mic_x100_get_apic_id - Get bootstrap APIC ID.
+ * @mdev: pointer to mic_device instance
+ */
+static u32 mic_x100_get_apic_id(struct mic_device *mdev)
+{
+	u32 scratch2 = 0;
+
+	scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
+	return MIC_X100_SPAD2_APIC_ID(scratch2);
+}
+
+/**
+ * mic_x100_send_firmware_intr - Send an interrupt to the firmware on MIC.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_send_firmware_intr(struct mic_device *mdev)
+{
+	u32 apicicr_low;
+	u64 apic_icr_offset = MIC_X100_SBOX_APICICR7;
+	int vector = MIC_X100_BSP_INTERRUPT_VECTOR;
+	struct mic_mw *mw = &mdev->mmio;
+
+	/*
+	 * For MIC we need to make sure we "hit"
+	 * the send_icr bit (13).
+	 */
+	apicicr_low = (vector | (1 << 13));
+
+	mic_mmio_write(mw, mic_x100_get_apic_id(mdev),
+		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset + 4);
+
+	/* Ensure that the interrupt is ordered w.r.t. previous stores. */
+	wmb();
+	mic_mmio_write(mw, apicicr_low,
+		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
+}
+
+/**
+ * mic_x100_hw_reset - Reset the MIC device.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_hw_reset(struct mic_device *mdev)
+{
+	u32 reset_reg;
+	u32 rgcr = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_RGCR;
+	struct mic_mw *mw = &mdev->mmio;
+
+	/* Ensure that the reset is ordered w.r.t. previous loads and stores */
+	mb();
+	/* Trigger reset */
+	reset_reg = mic_mmio_read(mw, rgcr);
+	reset_reg |= 0x1;
+	mic_mmio_write(mw, reset_reg, rgcr);
+	/*
+	 * It seems we really want to delay at least 1 second
+	 * after touching reset to prevent a lot of problems.
+	 */
+	msleep(1000);
+}
+
+/**
+ * mic_x100_load_command_line - Load command line to MIC.
+ * @mdev: pointer to mic_device instance
+ * @fw: the firmware image
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw)
+{
+	u32 len = 0;
+	u32 boot_mem;
+	char *buf;
+	void __iomem *cmd_line_va = mdev->aper.va + mdev->bootaddr + fw->size;
+#define CMDLINE_SIZE 2048
+
+	boot_mem = mdev->aper.len >> 20;
+	buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	len += snprintf(buf, CMDLINE_SIZE - len,
+		" mem=%dM", boot_mem);
+	if (mdev->cosm_dev->cmdline)
+		snprintf(buf + len, CMDLINE_SIZE - len, " %s",
+			 mdev->cosm_dev->cmdline);
+	memcpy_toio(cmd_line_va, buf, strlen(buf) + 1);
+	kfree(buf);
+	return 0;
+}
+
+/**
+ * mic_x100_load_ramdisk - Load ramdisk to MIC.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_load_ramdisk(struct mic_device *mdev)
+{
+	const struct firmware *fw;
+	int rc;
+	struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr;
+
+	rc = request_firmware(&fw, mdev->cosm_dev->ramdisk, &mdev->pdev->dev);
+	if (rc < 0) {
+		dev_err(&mdev->pdev->dev,
+			"ramdisk request_firmware failed: %d %s\n",
+			rc, mdev->cosm_dev->ramdisk);
+		goto error;
+	}
+	/*
+	 * Typically the bootaddr for card OS is 64M
+	 * so copy over the ramdisk @ 128M.
+	 */
+	memcpy_toio(mdev->aper.va + (mdev->bootaddr << 1), fw->data, fw->size);
+	iowrite32(mdev->bootaddr << 1, &bp->hdr.ramdisk_image);
+	iowrite32(fw->size, &bp->hdr.ramdisk_size);
+	release_firmware(fw);
+error:
+	return rc;
+}
+
+/**
+ * mic_x100_get_boot_addr - Get MIC boot address.
+ * @mdev: pointer to mic_device instance
+ *
+ * This function is called during firmware load to determine
+ * the address at which the OS should be downloaded in card
+ * memory i.e. GDDR.
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_get_boot_addr(struct mic_device *mdev)
+{
+	u32 scratch2, boot_addr;
+	int rc = 0;
+
+	scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
+	boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2);
+	dev_dbg(&mdev->pdev->dev, "%s %d boot_addr 0x%x\n",
+		__func__, __LINE__, boot_addr);
+	if (boot_addr > (1 << 31)) {
+		dev_err(&mdev->pdev->dev,
+			"incorrect bootaddr 0x%x\n",
+			boot_addr);
+		rc = -EINVAL;
+		goto error;
+	}
+	mdev->bootaddr = boot_addr;
+error:
+	return rc;
+}
+
+/**
+ * mic_x100_load_firmware - Load firmware to MIC.
+ * @mdev: pointer to mic_device instance
+ * @buf: buffer containing boot string including firmware/ramdisk path.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_load_firmware(struct mic_device *mdev, const char *buf)
+{
+	int rc;
+	const struct firmware *fw;
+
+	rc = mic_x100_get_boot_addr(mdev);
+	if (rc)
+		return rc;
+	/* load OS */
+	rc = request_firmware(&fw, mdev->cosm_dev->firmware, &mdev->pdev->dev);
+	if (rc < 0) {
+		dev_err(&mdev->pdev->dev,
+			"ramdisk request_firmware failed: %d %s\n",
+			rc, mdev->cosm_dev->firmware);
+		return rc;
+	}
+	if (mdev->bootaddr > mdev->aper.len - fw->size) {
+		rc = -EINVAL;
+		dev_err(&mdev->pdev->dev, "%s %d rc %d bootaddr 0x%x\n",
+			__func__, __LINE__, rc, mdev->bootaddr);
+		goto error;
+	}
+	memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size);
+	mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size);
+	if (!strcmp(mdev->cosm_dev->bootmode, "flash")) {
+		rc = -EINVAL;
+		dev_err(&mdev->pdev->dev, "%s %d rc %d\n",
+			__func__, __LINE__, rc);
+		goto error;
+	}
+	/* load command line */
+	rc = mic_x100_load_command_line(mdev, fw);
+	if (rc) {
+		dev_err(&mdev->pdev->dev, "%s %d rc %d\n",
+			__func__, __LINE__, rc);
+		goto error;
+	}
+	release_firmware(fw);
+	/* load ramdisk */
+	if (mdev->cosm_dev->ramdisk)
+		rc = mic_x100_load_ramdisk(mdev);
+
+	return rc;
+
+error:
+	release_firmware(fw);
+	return rc;
+}
+
+/**
+ * mic_x100_get_postcode - Get postcode status from firmware.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: postcode.
+ */
+static u32 mic_x100_get_postcode(struct mic_device *mdev)
+{
+	return mic_mmio_read(&mdev->mmio, MIC_X100_POSTCODE);
+}
+
+/**
+ * mic_x100_smpt_set - Update an SMPT entry with a DMA address.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: none.
+ */
+static void
+mic_x100_smpt_set(struct mic_device *mdev, dma_addr_t dma_addr, u8 index)
+{
+#define SNOOP_ON	(0 << 0)
+#define SNOOP_OFF	(1 << 0)
+/*
+ * Sbox Smpt Reg Bits:
+ * Bits	31:2	Host address
+ * Bits	1	RSVD
+ * Bits	0	No snoop
+ */
+#define BUILD_SMPT(NO_SNOOP, HOST_ADDR)  \
+	(u32)(((HOST_ADDR) << 2) | ((NO_SNOOP) & 0x01))
+
+	uint32_t smpt_reg_val = BUILD_SMPT(SNOOP_ON,
+			dma_addr >> mdev->smpt->info.page_shift);
+	mic_mmio_write(&mdev->mmio, smpt_reg_val,
+		       MIC_X100_SBOX_BASE_ADDRESS +
+		       MIC_X100_SBOX_SMPT00 + (4 * index));
+}
+
+/**
+ * mic_x100_smpt_hw_init - Initialize SMPT X100 specific fields.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: none.
+ */
+static void mic_x100_smpt_hw_init(struct mic_device *mdev)
+{
+	struct mic_smpt_hw_info *info = &mdev->smpt->info;
+
+	info->num_reg = 32;
+	info->page_shift = 34;
+	info->page_size = (1ULL << info->page_shift);
+	info->base = 0x8000000000ULL;
+}
+
+struct mic_smpt_ops mic_x100_smpt_ops = {
+	.init = mic_x100_smpt_hw_init,
+	.set = mic_x100_smpt_set,
+};
+
+static bool mic_x100_dma_filter(struct dma_chan *chan, void *param)
+{
+	if (chan->device->dev->parent == (struct device *)param)
+		return true;
+	return false;
+}
+
+struct mic_hw_ops mic_x100_ops = {
+	.aper_bar = MIC_X100_APER_BAR,
+	.mmio_bar = MIC_X100_MMIO_BAR,
+	.read_spad = mic_x100_read_spad,
+	.write_spad = mic_x100_write_spad,
+	.send_intr = mic_x100_send_intr,
+	.ack_interrupt = mic_x100_ack_interrupt,
+	.intr_workarounds = mic_x100_intr_workarounds,
+	.reset = mic_x100_hw_reset,
+	.reset_fw_ready = mic_x100_reset_fw_ready,
+	.is_fw_ready = mic_x100_is_fw_ready,
+	.send_firmware_intr = mic_x100_send_firmware_intr,
+	.load_mic_fw = mic_x100_load_firmware,
+	.get_postcode = mic_x100_get_postcode,
+	.dma_filter = mic_x100_dma_filter,
+};
+
+struct mic_hw_intr_ops mic_x100_intr_ops = {
+	.intr_init = mic_x100_hw_intr_init,
+	.enable_interrupts = mic_x100_enable_interrupts,
+	.disable_interrupts = mic_x100_disable_interrupts,
+	.program_msi_to_src_map = mic_x100_program_msi_to_src_map,
+	.read_msi_to_src_map = mic_x100_read_msi_to_src_map,
+};
diff --git a/drivers/misc/mic/host/mic_x100.h b/drivers/misc/mic/host/mic_x100.h
new file mode 100644
index 000000000..8b7daa182
--- /dev/null
+++ b/drivers/misc/mic/host/mic_x100.h
@@ -0,0 +1,98 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef _MIC_X100_HW_H_
+#define _MIC_X100_HW_H_
+
+#define MIC_X100_PCI_DEVICE_2250 0x2250
+#define MIC_X100_PCI_DEVICE_2251 0x2251
+#define MIC_X100_PCI_DEVICE_2252 0x2252
+#define MIC_X100_PCI_DEVICE_2253 0x2253
+#define MIC_X100_PCI_DEVICE_2254 0x2254
+#define MIC_X100_PCI_DEVICE_2255 0x2255
+#define MIC_X100_PCI_DEVICE_2256 0x2256
+#define MIC_X100_PCI_DEVICE_2257 0x2257
+#define MIC_X100_PCI_DEVICE_2258 0x2258
+#define MIC_X100_PCI_DEVICE_2259 0x2259
+#define MIC_X100_PCI_DEVICE_225a 0x225a
+#define MIC_X100_PCI_DEVICE_225b 0x225b
+#define MIC_X100_PCI_DEVICE_225c 0x225c
+#define MIC_X100_PCI_DEVICE_225d 0x225d
+#define MIC_X100_PCI_DEVICE_225e 0x225e
+
+#define MIC_X100_APER_BAR 0
+#define MIC_X100_MMIO_BAR 4
+
+#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000
+#define MIC_X100_SBOX_SPAD0 0x0000AB20
+#define MIC_X100_SBOX_SICR0_DBR(x) ((x) & 0xf)
+#define MIC_X100_SBOX_SICR0_DMA(x) (((x) >> 8) & 0xff)
+#define MIC_X100_SBOX_SICE0_DBR(x) ((x) & 0xf)
+#define MIC_X100_SBOX_DBR_BITS(x) ((x) & 0xf)
+#define MIC_X100_SBOX_SICE0_DMA(x) (((x) >> 8) & 0xff)
+#define MIC_X100_SBOX_DMA_BITS(x) (((x) & 0xff) << 8)
+
+#define MIC_X100_SBOX_APICICR0 0x0000A9D0
+#define MIC_X100_SBOX_SICR0 0x00009004
+#define MIC_X100_SBOX_SICE0 0x0000900C
+#define MIC_X100_SBOX_SICC0 0x00009010
+#define MIC_X100_SBOX_SIAC0 0x00009014
+#define MIC_X100_SBOX_MSIXPBACR 0x00009084
+#define MIC_X100_SBOX_MXAR0 0x00009044
+#define MIC_X100_SBOX_SMPT00 0x00003100
+#define MIC_X100_SBOX_RDMASR0 0x0000B180
+
+#define MIC_X100_DOORBELL_IDX_START 0
+#define MIC_X100_NUM_DOORBELL 4
+#define MIC_X100_DMA_IDX_START 8
+#define MIC_X100_NUM_DMA 8
+#define MIC_X100_ERR_IDX_START 30
+#define MIC_X100_NUM_ERR 1
+
+#define MIC_X100_NUM_SBOX_IRQ 8
+#define MIC_X100_NUM_RDMASR_IRQ 8
+#define MIC_X100_RDMASR_IRQ_BASE 17
+#define MIC_X100_SPAD2_DOWNLOAD_STATUS(x) ((x) & 0x1)
+#define MIC_X100_SPAD2_APIC_ID(x)	(((x) >> 1) & 0x1ff)
+#define MIC_X100_SPAD2_DOWNLOAD_ADDR(x) ((x) & 0xfffff000)
+#define MIC_X100_SBOX_APICICR7 0x0000AA08
+#define MIC_X100_SBOX_RGCR 0x00004010
+#define MIC_X100_SBOX_SDBIC0 0x0000CC90
+#define MIC_X100_DOWNLOAD_INFO 2
+#define MIC_X100_FW_SIZE 5
+#define MIC_X100_POSTCODE 0x242c
+
+static const u16 mic_x100_intr_init[] = {
+		MIC_X100_DOORBELL_IDX_START,
+		MIC_X100_DMA_IDX_START,
+		MIC_X100_ERR_IDX_START,
+		MIC_X100_NUM_DOORBELL,
+		MIC_X100_NUM_DMA,
+		MIC_X100_NUM_ERR,
+};
+
+/* Host->Card(bootstrap) Interrupt Vector */
+#define MIC_X100_BSP_INTERRUPT_VECTOR 229
+
+extern struct mic_hw_ops mic_x100_ops;
+extern struct mic_smpt_ops mic_x100_smpt_ops;
+extern struct mic_hw_intr_ops mic_x100_intr_ops;
+
+#endif
diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile
new file mode 100644
index 000000000..ff372555d
--- /dev/null
+++ b/drivers/misc/mic/scif/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - SCIF driver.
+# Copyright(c) 2014, Intel Corporation.
+#
+obj-$(CONFIG_SCIF) += scif.o
+scif-objs := scif_main.o
+scif-objs += scif_peer_bus.o
+scif-objs += scif_ports.o
+scif-objs += scif_debugfs.o
+scif-objs += scif_fd.o
+scif-objs += scif_api.o
+scif-objs += scif_epd.o
+scif-objs += scif_rb.o
+scif-objs += scif_nodeqp.o
+scif-objs += scif_nm.o
+scif-objs += scif_dma.o
+scif-objs += scif_fence.o
+scif-objs += scif_mmap.o
+scif-objs += scif_rma.o
+scif-objs += scif_rma_list.o
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
new file mode 100644
index 000000000..8dd0ccede
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -0,0 +1,1496 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/scif.h>
+#include "scif_main.h"
+#include "scif_map.h"
+
+static const char * const scif_ep_states[] = {
+	"Unbound",
+	"Bound",
+	"Listening",
+	"Connected",
+	"Connecting",
+	"Mapping",
+	"Closing",
+	"Close Listening",
+	"Disconnected",
+	"Zombie"};
+
+enum conn_async_state {
+	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
+	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
+	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
+};
+
+/*
+ * File operations for anonymous inode file associated with a SCIF endpoint,
+ * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
+ * poll API in the kernel and these take in a struct file *. Since a struct
+ * file is not available to kernel mode SCIF, it uses an anonymous file for
+ * this purpose.
+ */
+const struct file_operations scif_anon_fops = {
+	.owner = THIS_MODULE,
+};
+
+scif_epd_t scif_open(void)
+{
+	struct scif_endpt *ep;
+	int err;
+
+	might_sleep();
+	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+	if (!ep)
+		goto err_ep_alloc;
+
+	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
+	if (!ep->qp_info.qp)
+		goto err_qp_alloc;
+
+	err = scif_anon_inode_getfile(ep);
+	if (err)
+		goto err_anon_inode;
+
+	spin_lock_init(&ep->lock);
+	mutex_init(&ep->sendlock);
+	mutex_init(&ep->recvlock);
+
+	scif_rma_ep_init(ep);
+	ep->state = SCIFEP_UNBOUND;
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI open: ep %p success\n", ep);
+	return ep;
+
+err_anon_inode:
+	kfree(ep->qp_info.qp);
+err_qp_alloc:
+	kfree(ep);
+err_ep_alloc:
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(scif_open);
+
+/*
+ * scif_disconnect_ep - Disconnects the endpoint if found
+ * @epd: The end point returned from scif_open()
+ */
+static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
+{
+	struct scifmsg msg;
+	struct scif_endpt *fep = NULL;
+	struct scif_endpt *tmpep;
+	struct list_head *pos, *tmpq;
+	int err;
+
+	/*
+	 * Wake up any threads blocked in send()/recv() before closing
+	 * out the connection. Grabbing and releasing the send/recv lock
+	 * will ensure that any blocked senders/receivers have exited for
+	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
+	 * close. Ring 3 endpoints are not affected since close will not
+	 * be called while there are IOCTLs executing.
+	 */
+	wake_up_interruptible(&ep->sendwq);
+	wake_up_interruptible(&ep->recvwq);
+	mutex_lock(&ep->sendlock);
+	mutex_unlock(&ep->sendlock);
+	mutex_lock(&ep->recvlock);
+	mutex_unlock(&ep->recvlock);
+
+	/* Remove from the connected list */
+	mutex_lock(&scif_info.connlock);
+	list_for_each_safe(pos, tmpq, &scif_info.connected) {
+		tmpep = list_entry(pos, struct scif_endpt, list);
+		if (tmpep == ep) {
+			list_del(pos);
+			fep = tmpep;
+			spin_lock(&ep->lock);
+			break;
+		}
+	}
+
+	if (!fep) {
+		/*
+		 * The other side has completed the disconnect before
+		 * the end point can be removed from the list. Therefore
+		 * the ep lock is not locked, traverse the disconnected
+		 * list to find the endpoint and release the conn lock.
+		 */
+		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+			tmpep = list_entry(pos, struct scif_endpt, list);
+			if (tmpep == ep) {
+				list_del(pos);
+				break;
+			}
+		}
+		mutex_unlock(&scif_info.connlock);
+		return NULL;
+	}
+
+	init_completion(&ep->discon);
+	msg.uop = SCIF_DISCNCT;
+	msg.src = ep->port;
+	msg.dst = ep->peer;
+	msg.payload[0] = (u64)ep;
+	msg.payload[1] = ep->remote_ep;
+
+	err = scif_nodeqp_send(ep->remote_dev, &msg);
+	spin_unlock(&ep->lock);
+	mutex_unlock(&scif_info.connlock);
+
+	if (!err)
+		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
+		wait_for_completion_timeout(&ep->discon,
+					    SCIF_NODE_ALIVE_TIMEOUT);
+	return ep;
+}
+
+int scif_close(scif_epd_t epd)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_endpt *tmpep;
+	struct list_head *pos, *tmpq;
+	enum scif_epd_state oldstate;
+	bool flush_conn;
+
+	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
+		ep, scif_ep_states[ep->state]);
+	might_sleep();
+	spin_lock(&ep->lock);
+	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
+	spin_unlock(&ep->lock);
+
+	if (flush_conn)
+		flush_work(&scif_info.conn_work);
+
+	spin_lock(&ep->lock);
+	oldstate = ep->state;
+
+	ep->state = SCIFEP_CLOSING;
+
+	switch (oldstate) {
+	case SCIFEP_ZOMBIE:
+		dev_err(scif_info.mdev.this_device,
+			"SCIFAPI close: zombie state unexpected\n");
+		/* fall through */
+	case SCIFEP_DISCONNECTED:
+		spin_unlock(&ep->lock);
+		scif_unregister_all_windows(epd);
+		/* Remove from the disconnected list */
+		mutex_lock(&scif_info.connlock);
+		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+			tmpep = list_entry(pos, struct scif_endpt, list);
+			if (tmpep == ep) {
+				list_del(pos);
+				break;
+			}
+		}
+		mutex_unlock(&scif_info.connlock);
+		break;
+	case SCIFEP_UNBOUND:
+	case SCIFEP_BOUND:
+	case SCIFEP_CONNECTING:
+		spin_unlock(&ep->lock);
+		break;
+	case SCIFEP_MAPPING:
+	case SCIFEP_CONNECTED:
+	case SCIFEP_CLOSING:
+	{
+		spin_unlock(&ep->lock);
+		scif_unregister_all_windows(epd);
+		scif_disconnect_ep(ep);
+		break;
+	}
+	case SCIFEP_LISTENING:
+	case SCIFEP_CLLISTEN:
+	{
+		struct scif_conreq *conreq;
+		struct scifmsg msg;
+		struct scif_endpt *aep;
+
+		spin_unlock(&ep->lock);
+		mutex_lock(&scif_info.eplock);
+
+		/* remove from listen list */
+		list_for_each_safe(pos, tmpq, &scif_info.listen) {
+			tmpep = list_entry(pos, struct scif_endpt, list);
+			if (tmpep == ep)
+				list_del(pos);
+		}
+		/* Remove any dangling accepts */
+		while (ep->acceptcnt) {
+			aep = list_first_entry(&ep->li_accept,
+					       struct scif_endpt, liacceptlist);
+			list_del(&aep->liacceptlist);
+			scif_put_port(aep->port.port);
+			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
+				tmpep = list_entry(pos, struct scif_endpt,
+						   miacceptlist);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			mutex_unlock(&scif_info.eplock);
+			mutex_lock(&scif_info.connlock);
+			list_for_each_safe(pos, tmpq, &scif_info.connected) {
+				tmpep = list_entry(pos,
+						   struct scif_endpt, list);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+				tmpep = list_entry(pos,
+						   struct scif_endpt, list);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			mutex_unlock(&scif_info.connlock);
+			scif_teardown_ep(aep);
+			mutex_lock(&scif_info.eplock);
+			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
+			ep->acceptcnt--;
+		}
+
+		spin_lock(&ep->lock);
+		mutex_unlock(&scif_info.eplock);
+
+		/* Remove and reject any pending connection requests. */
+		while (ep->conreqcnt) {
+			conreq = list_first_entry(&ep->conlist,
+						  struct scif_conreq, list);
+			list_del(&conreq->list);
+
+			msg.uop = SCIF_CNCT_REJ;
+			msg.dst.node = conreq->msg.src.node;
+			msg.dst.port = conreq->msg.src.port;
+			msg.payload[0] = conreq->msg.payload[0];
+			msg.payload[1] = conreq->msg.payload[1];
+			/*
+			 * No Error Handling on purpose for scif_nodeqp_send().
+			 * If the remote node is lost we still want free the
+			 * connection requests on the self node.
+			 */
+			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
+					 &msg);
+			ep->conreqcnt--;
+			kfree(conreq);
+		}
+
+		spin_unlock(&ep->lock);
+		/* If a kSCIF accept is waiting wake it up */
+		wake_up_interruptible(&ep->conwq);
+		break;
+	}
+	}
+	scif_put_port(ep->port.port);
+	scif_anon_inode_fput(ep);
+	scif_teardown_ep(ep);
+	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scif_close);
+
+/**
+ * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
+ *			accept new connections.
+ * @epd: The end point returned from scif_open()
+ */
+int __scif_flush(scif_epd_t epd)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	switch (ep->state) {
+	case SCIFEP_LISTENING:
+	{
+		ep->state = SCIFEP_CLLISTEN;
+
+		/* If an accept is waiting wake it up */
+		wake_up_interruptible(&ep->conwq);
+		break;
+	}
+	default:
+		break;
+	}
+	return 0;
+}
+
+int scif_bind(scif_epd_t epd, u16 pn)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int ret = 0;
+	int tmp;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI bind: ep %p %s requested port number %d\n",
+		ep, scif_ep_states[ep->state], pn);
+	if (pn) {
+		/*
+		 * Similar to IETF RFC 1700, SCIF ports below
+		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
+		 * processes or by processes executed by privileged users.
+		 */
+		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
+			ret = -EACCES;
+			goto scif_bind_admin_exit;
+		}
+	}
+
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_BOUND) {
+		ret = -EINVAL;
+		goto scif_bind_exit;
+	} else if (ep->state != SCIFEP_UNBOUND) {
+		ret = -EISCONN;
+		goto scif_bind_exit;
+	}
+
+	if (pn) {
+		tmp = scif_rsrv_port(pn);
+		if (tmp != pn) {
+			ret = -EINVAL;
+			goto scif_bind_exit;
+		}
+	} else {
+		ret = scif_get_new_port();
+		if (ret < 0)
+			goto scif_bind_exit;
+		pn = ret;
+	}
+
+	ep->state = SCIFEP_BOUND;
+	ep->port.node = scif_info.nodeid;
+	ep->port.port = pn;
+	ep->conn_async_state = ASYNC_CONN_IDLE;
+	ret = pn;
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI bind: bound to port number %d\n", pn);
+scif_bind_exit:
+	spin_unlock(&ep->lock);
+scif_bind_admin_exit:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scif_bind);
+
+int scif_listen(scif_epd_t epd, int backlog)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
+	spin_lock(&ep->lock);
+	switch (ep->state) {
+	case SCIFEP_ZOMBIE:
+	case SCIFEP_CLOSING:
+	case SCIFEP_CLLISTEN:
+	case SCIFEP_UNBOUND:
+	case SCIFEP_DISCONNECTED:
+		spin_unlock(&ep->lock);
+		return -EINVAL;
+	case SCIFEP_LISTENING:
+	case SCIFEP_CONNECTED:
+	case SCIFEP_CONNECTING:
+	case SCIFEP_MAPPING:
+		spin_unlock(&ep->lock);
+		return -EISCONN;
+	case SCIFEP_BOUND:
+		break;
+	}
+
+	ep->state = SCIFEP_LISTENING;
+	ep->backlog = backlog;
+
+	ep->conreqcnt = 0;
+	ep->acceptcnt = 0;
+	INIT_LIST_HEAD(&ep->conlist);
+	init_waitqueue_head(&ep->conwq);
+	INIT_LIST_HEAD(&ep->li_accept);
+	spin_unlock(&ep->lock);
+
+	/*
+	 * Listen status is complete so delete the qp information not needed
+	 * on a listen before placing on the list of listening ep's
+	 */
+	scif_teardown_ep(ep);
+	ep->qp_info.qp = NULL;
+
+	mutex_lock(&scif_info.eplock);
+	list_add_tail(&ep->list, &scif_info.listen);
+	mutex_unlock(&scif_info.eplock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scif_listen);
+
+/*
+ ************************************************************************
+ * SCIF connection flow:
+ *
+ * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
+ *	connections via a SCIF_CNCT_REQ message
+ * 2) A SCIF endpoint can initiate a SCIF connection by calling
+ *	scif_connect(..) which calls scif_setup_qp_connect(..) which
+ *	allocates the local qp for the endpoint ring buffer and then sends
+ *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
+ *	a SCIF_CNCT_REJ message
+ * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
+ *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
+ *	message otherwise
+ * 4) A thread blocked waiting for incoming connections allocates its local
+ *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
+ *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
+ *	the node sends a SCIF_CNCT_REJ message
+ * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
+ *	connecting endpoint is woken up as part of handling
+ *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
+ *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
+ *	success or a SCIF_CNCT_GNTNACK message on failure and completes
+ *	the scif_connect(..) API
+ * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
+ *	in step 4 is woken up and completes the scif_accept(..) API
+ * 7) The SCIF connection is now established between the two SCIF endpoints.
+ */
+static int scif_conn_func(struct scif_endpt *ep)
+{
+	int err = 0;
+	struct scifmsg msg;
+	struct device *spdev;
+
+	err = scif_reserve_dma_chan(ep);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		ep->state = SCIFEP_BOUND;
+		goto connect_error_simple;
+	}
+	/* Initiate the first part of the endpoint QP setup */
+	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
+				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s err %d qp_offset 0x%llx\n",
+			__func__, err, ep->qp_info.qp_offset);
+		ep->state = SCIFEP_BOUND;
+		goto connect_error_simple;
+	}
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		goto cleanup_qp;
+	}
+	/* Format connect message and send it */
+	msg.src = ep->port;
+	msg.dst = ep->conn_port;
+	msg.uop = SCIF_CNCT_REQ;
+	msg.payload[0] = (u64)ep;
+	msg.payload[1] = ep->qp_info.qp_offset;
+	err = _scif_nodeqp_send(ep->remote_dev, &msg);
+	if (err)
+		goto connect_error_dec;
+	scif_put_peer_dev(spdev);
+	/*
+	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
+	 * SCIF_CNCT_REJ message.
+	 */
+	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d timeout\n", __func__, __LINE__);
+		ep->state = SCIFEP_BOUND;
+	}
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		goto cleanup_qp;
+	}
+	if (ep->state == SCIFEP_MAPPING) {
+		err = scif_setup_qp_connect_response(ep->remote_dev,
+						     ep->qp_info.qp,
+						     ep->qp_info.gnt_pld);
+		/*
+		 * If the resource to map the queue are not available then
+		 * we need to tell the other side to terminate the accept
+		 */
+		if (err) {
+			dev_err(&ep->remote_dev->sdev->dev,
+				"%s %d err %d\n", __func__, __LINE__, err);
+			msg.uop = SCIF_CNCT_GNTNACK;
+			msg.payload[0] = ep->remote_ep;
+			_scif_nodeqp_send(ep->remote_dev, &msg);
+			ep->state = SCIFEP_BOUND;
+			goto connect_error_dec;
+		}
+
+		msg.uop = SCIF_CNCT_GNTACK;
+		msg.payload[0] = ep->remote_ep;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		if (err) {
+			ep->state = SCIFEP_BOUND;
+			goto connect_error_dec;
+		}
+		ep->state = SCIFEP_CONNECTED;
+		mutex_lock(&scif_info.connlock);
+		list_add_tail(&ep->list, &scif_info.connected);
+		mutex_unlock(&scif_info.connlock);
+		dev_dbg(&ep->remote_dev->sdev->dev,
+			"SCIFAPI connect: ep %p connected\n", ep);
+	} else if (ep->state == SCIFEP_BOUND) {
+		dev_dbg(&ep->remote_dev->sdev->dev,
+			"SCIFAPI connect: ep %p connection refused\n", ep);
+		err = -ECONNREFUSED;
+		goto connect_error_dec;
+	}
+	scif_put_peer_dev(spdev);
+	return err;
+connect_error_dec:
+	scif_put_peer_dev(spdev);
+cleanup_qp:
+	scif_cleanup_ep_qp(ep);
+connect_error_simple:
+	return err;
+}
+
+/*
+ * scif_conn_handler:
+ *
+ * Workqueue handler for servicing non-blocking SCIF connect
+ *
+ */
+void scif_conn_handler(struct work_struct *work)
+{
+	struct scif_endpt *ep;
+
+	do {
+		ep = NULL;
+		spin_lock(&scif_info.nb_connect_lock);
+		if (!list_empty(&scif_info.nb_connect_list)) {
+			ep = list_first_entry(&scif_info.nb_connect_list,
+					      struct scif_endpt, conn_list);
+			list_del(&ep->conn_list);
+		}
+		spin_unlock(&scif_info.nb_connect_lock);
+		if (ep) {
+			ep->conn_err = scif_conn_func(ep);
+			wake_up_interruptible(&ep->conn_pend_wq);
+		}
+	} while (ep);
+}
+
+int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+	struct scif_dev *remote_dev;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
+		scif_ep_states[ep->state]);
+
+	if (!scif_dev || dst->node > scif_info.maxid)
+		return -ENODEV;
+
+	might_sleep();
+
+	remote_dev = &scif_dev[dst->node];
+	spdev = scif_get_peer_dev(remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+
+	spin_lock(&ep->lock);
+	switch (ep->state) {
+	case SCIFEP_ZOMBIE:
+	case SCIFEP_CLOSING:
+		err = -EINVAL;
+		break;
+	case SCIFEP_DISCONNECTED:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		else
+			err = -EINVAL;
+		break;
+	case SCIFEP_LISTENING:
+	case SCIFEP_CLLISTEN:
+		err = -EOPNOTSUPP;
+		break;
+	case SCIFEP_CONNECTING:
+	case SCIFEP_MAPPING:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			err = -EINPROGRESS;
+		else
+			err = -EISCONN;
+		break;
+	case SCIFEP_CONNECTED:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		else
+			err = -EISCONN;
+		break;
+	case SCIFEP_UNBOUND:
+		err = scif_get_new_port();
+		if (err < 0)
+			break;
+		ep->port.port = err;
+		ep->port.node = scif_info.nodeid;
+		ep->conn_async_state = ASYNC_CONN_IDLE;
+		/* Fall through */
+	case SCIFEP_BOUND:
+		/*
+		 * If a non-blocking connect has been already initiated
+		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
+		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
+		 * SCIF_BOUND due an error in the connection process
+		 * (e.g., connection refused) If conn_async_state is
+		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
+		 * so that the error status can be collected. If the state is
+		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
+		 * EINPROGRESS since some other thread is waiting to collect
+		 * error status.
+		 */
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+			err = -EINPROGRESS;
+		} else {
+			ep->conn_port = *dst;
+			init_waitqueue_head(&ep->sendwq);
+			init_waitqueue_head(&ep->recvwq);
+			init_waitqueue_head(&ep->conwq);
+			ep->conn_async_state = 0;
+
+			if (unlikely(non_block))
+				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
+		}
+		break;
+	}
+
+	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
+			goto connect_simple_unlock1;
+
+	ep->state = SCIFEP_CONNECTING;
+	ep->remote_dev = &scif_dev[dst->node];
+	ep->qp_info.qp->magic = SCIFEP_MAGIC;
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		init_waitqueue_head(&ep->conn_pend_wq);
+		spin_lock(&scif_info.nb_connect_lock);
+		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
+		spin_unlock(&scif_info.nb_connect_lock);
+		err = -EINPROGRESS;
+		schedule_work(&scif_info.conn_work);
+	}
+connect_simple_unlock1:
+	spin_unlock(&ep->lock);
+	scif_put_peer_dev(spdev);
+	if (err) {
+		return err;
+	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+		flush_work(&scif_info.conn_work);
+		err = ep->conn_err;
+		spin_lock(&ep->lock);
+		ep->conn_async_state = ASYNC_CONN_IDLE;
+		spin_unlock(&ep->lock);
+	} else {
+		err = scif_conn_func(ep);
+	}
+	return err;
+}
+
+int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
+{
+	return __scif_connect(epd, dst, false);
+}
+EXPORT_SYMBOL_GPL(scif_connect);
+
+/**
+ * scif_accept() - Accept a connection request from the remote node
+ *
+ * The function accepts a connection request from the remote node.  Successful
+ * complete is indicate by a new end point being created and passed back
+ * to the caller for future reference.
+ *
+ * Upon successful complete a zero will be returned and the peer information
+ * will be filled in.
+ *
+ * If the end point is not in the listening state -EINVAL will be returned.
+ *
+ * If during the connection sequence resource allocation fails the -ENOMEM
+ * will be returned.
+ *
+ * If the function is called with the ASYNC flag set and no connection requests
+ * are pending it will return -EAGAIN.
+ *
+ * If the remote side is not sending any connection requests the caller may
+ * terminate this function with a signal.  If so a -EINTR will be returned.
+ */
+int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
+		scif_epd_t *newepd, int flags)
+{
+	struct scif_endpt *lep = (struct scif_endpt *)epd;
+	struct scif_endpt *cep;
+	struct scif_conreq *conreq;
+	struct scifmsg msg;
+	int err;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
+
+	if (flags & ~SCIF_ACCEPT_SYNC)
+		return -EINVAL;
+
+	if (!peer || !newepd)
+		return -EINVAL;
+
+	might_sleep();
+	spin_lock(&lep->lock);
+	if (lep->state != SCIFEP_LISTENING) {
+		spin_unlock(&lep->lock);
+		return -EINVAL;
+	}
+
+	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
+		/* No connection request present and we do not want to wait */
+		spin_unlock(&lep->lock);
+		return -EAGAIN;
+	}
+
+	lep->files = current->files;
+retry_connection:
+	spin_unlock(&lep->lock);
+	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
+	err = wait_event_interruptible(lep->conwq,
+				       (lep->conreqcnt ||
+				       (lep->state != SCIFEP_LISTENING)));
+	if (err)
+		return err;
+
+	if (lep->state != SCIFEP_LISTENING)
+		return -EINTR;
+
+	spin_lock(&lep->lock);
+
+	if (!lep->conreqcnt)
+		goto retry_connection;
+
+	/* Get the first connect request off the list */
+	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
+	list_del(&conreq->list);
+	lep->conreqcnt--;
+	spin_unlock(&lep->lock);
+
+	/* Fill in the peer information */
+	peer->node = conreq->msg.src.node;
+	peer->port = conreq->msg.src.port;
+
+	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
+	if (!cep) {
+		err = -ENOMEM;
+		goto scif_accept_error_epalloc;
+	}
+	spin_lock_init(&cep->lock);
+	mutex_init(&cep->sendlock);
+	mutex_init(&cep->recvlock);
+	cep->state = SCIFEP_CONNECTING;
+	cep->remote_dev = &scif_dev[peer->node];
+	cep->remote_ep = conreq->msg.payload[0];
+
+	scif_rma_ep_init(cep);
+
+	err = scif_reserve_dma_chan(cep);
+	if (err) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto scif_accept_error_qpalloc;
+	}
+
+	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
+	if (!cep->qp_info.qp) {
+		err = -ENOMEM;
+		goto scif_accept_error_qpalloc;
+	}
+
+	err = scif_anon_inode_getfile(cep);
+	if (err)
+		goto scif_accept_error_anon_inode;
+
+	cep->qp_info.qp->magic = SCIFEP_MAGIC;
+	spdev = scif_get_peer_dev(cep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		goto scif_accept_error_map;
+	}
+	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
+				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
+				   cep->remote_dev);
+	if (err) {
+		dev_dbg(&cep->remote_dev->sdev->dev,
+			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
+			lep, cep, err, cep->qp_info.qp_offset);
+		scif_put_peer_dev(spdev);
+		goto scif_accept_error_map;
+	}
+
+	cep->port.node = lep->port.node;
+	cep->port.port = lep->port.port;
+	cep->peer.node = peer->node;
+	cep->peer.port = peer->port;
+	init_waitqueue_head(&cep->sendwq);
+	init_waitqueue_head(&cep->recvwq);
+	init_waitqueue_head(&cep->conwq);
+
+	msg.uop = SCIF_CNCT_GNT;
+	msg.src = cep->port;
+	msg.payload[0] = cep->remote_ep;
+	msg.payload[1] = cep->qp_info.qp_offset;
+	msg.payload[2] = (u64)cep;
+
+	err = _scif_nodeqp_send(cep->remote_dev, &msg);
+	scif_put_peer_dev(spdev);
+	if (err)
+		goto scif_accept_error_map;
+retry:
+	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
+	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
+				 SCIF_NODE_ACCEPT_TIMEOUT);
+	if (!err && scifdev_alive(cep))
+		goto retry;
+	err = !err ? -ENODEV : 0;
+	if (err)
+		goto scif_accept_error_map;
+	kfree(conreq);
+
+	spin_lock(&cep->lock);
+
+	if (cep->state == SCIFEP_CLOSING) {
+		/*
+		 * Remote failed to allocate resources and NAKed the grant.
+		 * There is at this point nothing referencing the new end point.
+		 */
+		spin_unlock(&cep->lock);
+		scif_teardown_ep(cep);
+		kfree(cep);
+
+		/* If call with sync flag then go back and wait. */
+		if (flags & SCIF_ACCEPT_SYNC) {
+			spin_lock(&lep->lock);
+			goto retry_connection;
+		}
+		return -EAGAIN;
+	}
+
+	scif_get_port(cep->port.port);
+	*newepd = (scif_epd_t)cep;
+	spin_unlock(&cep->lock);
+	return 0;
+scif_accept_error_map:
+	scif_anon_inode_fput(cep);
+scif_accept_error_anon_inode:
+	scif_teardown_ep(cep);
+scif_accept_error_qpalloc:
+	kfree(cep);
+scif_accept_error_epalloc:
+	msg.uop = SCIF_CNCT_REJ;
+	msg.dst.node = conreq->msg.src.node;
+	msg.dst.port = conreq->msg.src.port;
+	msg.payload[0] = conreq->msg.payload[0];
+	msg.payload[1] = conreq->msg.payload[1];
+	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
+	kfree(conreq);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_accept);
+
+/*
+ * scif_msg_param_check:
+ * @epd: The end point returned from scif_open()
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
+ */
+static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
+{
+	int ret = -EINVAL;
+
+	if (len < 0)
+		goto err_ret;
+	if (flags && (!(flags & SCIF_RECV_BLOCK)))
+		goto err_ret;
+	ret = 0;
+err_ret:
+	return ret;
+}
+
+static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scifmsg notif_msg;
+	int curr_xfer_len = 0, sent_len = 0, write_count;
+	int ret = 0;
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (flags & SCIF_SEND_BLOCK)
+		might_sleep();
+
+	spin_lock(&ep->lock);
+	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
+		write_count = scif_rb_space(&qp->outbound_q);
+		if (write_count) {
+			/* Best effort to send as much data as possible */
+			curr_xfer_len = min(len - sent_len, write_count);
+			ret = scif_rb_write(&qp->outbound_q, msg,
+					    curr_xfer_len);
+			if (ret < 0)
+				break;
+			/* Success. Update write pointer */
+			scif_rb_commit(&qp->outbound_q);
+			/*
+			 * Send a notification to the peer about the
+			 * produced data message.
+			 */
+			notif_msg.src = ep->port;
+			notif_msg.uop = SCIF_CLIENT_SENT;
+			notif_msg.payload[0] = ep->remote_ep;
+			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
+			if (ret)
+				break;
+			sent_len += curr_xfer_len;
+			msg = msg + curr_xfer_len;
+			continue;
+		}
+		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
+		/* Not enough RB space. return for the Non Blocking case */
+		if (!(flags & SCIF_SEND_BLOCK))
+			break;
+
+		spin_unlock(&ep->lock);
+		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
+		ret =
+		wait_event_interruptible(ep->sendwq,
+					 (SCIFEP_CONNECTED != ep->state) ||
+					 (scif_rb_space(&qp->outbound_q) >=
+					 curr_xfer_len));
+		spin_lock(&ep->lock);
+		if (ret)
+			break;
+	}
+	if (sent_len)
+		ret = sent_len;
+	else if (!ret && SCIFEP_CONNECTED != ep->state)
+		ret = SCIFEP_DISCONNECTED == ep->state ?
+			-ECONNRESET : -ENOTCONN;
+	spin_unlock(&ep->lock);
+	return ret;
+}
+
+static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+	int read_size;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scifmsg notif_msg;
+	int curr_recv_len = 0, remaining_len = len, read_count;
+	int ret = 0;
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (flags & SCIF_RECV_BLOCK)
+		might_sleep();
+	spin_lock(&ep->lock);
+	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
+				 SCIFEP_DISCONNECTED == ep->state)) {
+		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
+		if (read_count) {
+			/*
+			 * Best effort to recv as much data as there
+			 * are bytes to read in the RB particularly
+			 * important for the Non Blocking case.
+			 */
+			curr_recv_len = min(remaining_len, read_count);
+			read_size = scif_rb_get_next(&qp->inbound_q,
+						     msg, curr_recv_len);
+			if (ep->state == SCIFEP_CONNECTED) {
+				/*
+				 * Update the read pointer only if the endpoint
+				 * is still connected else the read pointer
+				 * might no longer exist since the peer has
+				 * freed resources!
+				 */
+				scif_rb_update_read_ptr(&qp->inbound_q);
+				/*
+				 * Send a notification to the peer about the
+				 * consumed data message only if the EP is in
+				 * SCIFEP_CONNECTED state.
+				 */
+				notif_msg.src = ep->port;
+				notif_msg.uop = SCIF_CLIENT_RCVD;
+				notif_msg.payload[0] = ep->remote_ep;
+				ret = _scif_nodeqp_send(ep->remote_dev,
+							&notif_msg);
+				if (ret)
+					break;
+			}
+			remaining_len -= curr_recv_len;
+			msg = msg + curr_recv_len;
+			continue;
+		}
+		/*
+		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
+		 * we will keep looping forever.
+		 */
+		if (ep->state == SCIFEP_DISCONNECTED)
+			break;
+		/*
+		 * Return in the Non Blocking case if there is no data
+		 * to read in this iteration.
+		 */
+		if (!(flags & SCIF_RECV_BLOCK))
+			break;
+		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
+		spin_unlock(&ep->lock);
+		/*
+		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
+		 * or until other side disconnects.
+		 */
+		ret =
+		wait_event_interruptible(ep->recvwq,
+					 SCIFEP_CONNECTED != ep->state ||
+					 scif_rb_count(&qp->inbound_q,
+						       curr_recv_len)
+					 >= curr_recv_len);
+		spin_lock(&ep->lock);
+		if (ret)
+			break;
+	}
+	if (len - remaining_len)
+		ret = len - remaining_len;
+	else if (!ret && ep->state != SCIFEP_CONNECTED)
+		ret = ep->state == SCIFEP_DISCONNECTED ?
+			-ECONNRESET : -ENOTCONN;
+	spin_unlock(&ep->lock);
+	return ret;
+}
+
+/**
+ * scif_user_send() - Send data to connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_send().
+ */
+int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+	int sent_len = 0;
+	char *tmp;
+	int loop_len;
+	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	err = scif_msg_param_check(epd, len, flags);
+	if (err)
+		goto send_err;
+
+	tmp = kmalloc(chunk_len, GFP_KERNEL);
+	if (!tmp) {
+		err = -ENOMEM;
+		goto send_err;
+	}
+	/*
+	 * Grabbing the lock before breaking up the transfer in
+	 * multiple chunks is required to ensure that messages do
+	 * not get fragmented and reordered.
+	 */
+	mutex_lock(&ep->sendlock);
+	while (sent_len != len) {
+		loop_len = len - sent_len;
+		loop_len = min(chunk_len, loop_len);
+		if (copy_from_user(tmp, msg, loop_len)) {
+			err = -EFAULT;
+			goto send_free_err;
+		}
+		err = _scif_send(epd, tmp, loop_len, flags);
+		if (err < 0)
+			goto send_free_err;
+		sent_len += err;
+		msg += err;
+		if (err != loop_len)
+			goto send_free_err;
+	}
+send_free_err:
+	mutex_unlock(&ep->sendlock);
+	kfree(tmp);
+send_err:
+	return err < 0 ? err : sent_len;
+}
+
+/**
+ * scif_user_recv() - Receive data from connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_recv().
+ */
+int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+	int recv_len = 0;
+	char *tmp;
+	int loop_len;
+	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	err = scif_msg_param_check(epd, len, flags);
+	if (err)
+		goto recv_err;
+
+	tmp = kmalloc(chunk_len, GFP_KERNEL);
+	if (!tmp) {
+		err = -ENOMEM;
+		goto recv_err;
+	}
+	/*
+	 * Grabbing the lock before breaking up the transfer in
+	 * multiple chunks is required to ensure that messages do
+	 * not get fragmented and reordered.
+	 */
+	mutex_lock(&ep->recvlock);
+	while (recv_len != len) {
+		loop_len = len - recv_len;
+		loop_len = min(chunk_len, loop_len);
+		err = _scif_recv(epd, tmp, loop_len, flags);
+		if (err < 0)
+			goto recv_free_err;
+		if (copy_to_user(msg, tmp, err)) {
+			err = -EFAULT;
+			goto recv_free_err;
+		}
+		recv_len += err;
+		msg += err;
+		if (err != loop_len)
+			goto recv_free_err;
+	}
+recv_free_err:
+	mutex_unlock(&ep->recvlock);
+	kfree(tmp);
+recv_err:
+	return err < 0 ? err : recv_len;
+}
+
+/**
+ * scif_send() - Send data to connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_send().
+ */
+int scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int ret;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	ret = scif_msg_param_check(epd, len, flags);
+	if (ret)
+		return ret;
+	if (!ep->remote_dev)
+		return -ENOTCONN;
+	/*
+	 * Grab the mutex lock in the blocking case only
+	 * to ensure messages do not get fragmented/reordered.
+	 * The non blocking mode is protected using spin locks
+	 * in _scif_send().
+	 */
+	if (flags & SCIF_SEND_BLOCK)
+		mutex_lock(&ep->sendlock);
+
+	ret = _scif_send(epd, msg, len, flags);
+
+	if (flags & SCIF_SEND_BLOCK)
+		mutex_unlock(&ep->sendlock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scif_send);
+
+/**
+ * scif_recv() - Receive data from connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_recv().
+ */
+int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int ret;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	ret = scif_msg_param_check(epd, len, flags);
+	if (ret)
+		return ret;
+	/*
+	 * Grab the mutex lock in the blocking case only
+	 * to ensure messages do not get fragmented/reordered.
+	 * The non blocking mode is protected using spin locks
+	 * in _scif_send().
+	 */
+	if (flags & SCIF_RECV_BLOCK)
+		mutex_lock(&ep->recvlock);
+
+	ret = _scif_recv(epd, msg, len, flags);
+
+	if (flags & SCIF_RECV_BLOCK)
+		mutex_unlock(&ep->recvlock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scif_recv);
+
+static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
+				   poll_table *p, struct scif_endpt *ep)
+{
+	/*
+	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
+	 * and regrab it afterwards. Because the endpoint state might have
+	 * changed while the lock was given up, the state must be checked
+	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
+	 * does this.
+	 */
+	spin_unlock(&ep->lock);
+	poll_wait(f, wq, p);
+	spin_lock(&ep->lock);
+}
+
+__poll_t
+__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
+{
+	__poll_t mask = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	spin_lock(&ep->lock);
+
+	/* Endpoint is waiting for a non-blocking connect to complete */
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+			if (ep->state == SCIFEP_CONNECTED ||
+			    ep->state == SCIFEP_DISCONNECTED ||
+			    ep->conn_err)
+				mask |= EPOLLOUT;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is listening for incoming connection requests */
+	if (ep->state == SCIFEP_LISTENING) {
+		_scif_poll_wait(f, &ep->conwq, wait, ep);
+		if (ep->state == SCIFEP_LISTENING) {
+			if (ep->conreqcnt)
+				mask |= EPOLLIN;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is connected or disconnected */
+	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
+		if (poll_requested_events(wait) & EPOLLIN)
+			_scif_poll_wait(f, &ep->recvwq, wait, ep);
+		if (poll_requested_events(wait) & EPOLLOUT)
+			_scif_poll_wait(f, &ep->sendwq, wait, ep);
+		if (ep->state == SCIFEP_CONNECTED ||
+		    ep->state == SCIFEP_DISCONNECTED) {
+			/* Data can be read without blocking */
+			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
+				mask |= EPOLLIN;
+			/* Data can be written without blocking */
+			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
+				mask |= EPOLLOUT;
+			/* Return EPOLLHUP if endpoint is disconnected */
+			if (ep->state == SCIFEP_DISCONNECTED)
+				mask |= EPOLLHUP;
+			goto exit;
+		}
+	}
+
+	/* Return EPOLLERR if the endpoint is in none of the above states */
+	mask |= EPOLLERR;
+exit:
+	spin_unlock(&ep->lock);
+	return mask;
+}
+
+/**
+ * scif_poll() - Kernel mode SCIF poll
+ * @ufds: Array of scif_pollepd structures containing the end points
+ *	  and events to poll on
+ * @nfds: Size of the ufds array
+ * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
+ *
+ * The code flow in this function is based on do_poll(..) in select.c
+ *
+ * Returns the number of endpoints which have pending events or 0 in
+ * the event of a timeout. If a signal is used for wake up, -EINTR is
+ * returned.
+ */
+int
+scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
+{
+	struct poll_wqueues table;
+	poll_table *pt;
+	int i, count = 0, timed_out = timeout_msecs == 0;
+	__poll_t mask;
+	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
+		: msecs_to_jiffies(timeout_msecs);
+
+	poll_initwait(&table);
+	pt = &table.pt;
+	while (1) {
+		for (i = 0; i < nfds; i++) {
+			pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
+			mask = __scif_pollfd(ufds[i].epd->anon,
+					     pt, ufds[i].epd);
+			mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
+			if (mask) {
+				count++;
+				pt->_qproc = NULL;
+			}
+			ufds[i].revents = mask;
+		}
+		pt->_qproc = NULL;
+		if (!count) {
+			count = table.error;
+			if (signal_pending(current))
+				count = -EINTR;
+		}
+		if (count || timed_out)
+			break;
+
+		if (!schedule_timeout_interruptible(timeout))
+			timed_out = 1;
+	}
+	poll_freewait(&table);
+	return count;
+}
+EXPORT_SYMBOL_GPL(scif_poll);
+
+int scif_get_node_ids(u16 *nodes, int len, u16 *self)
+{
+	int online = 0;
+	int offset = 0;
+	int node;
+
+	if (!scif_is_mgmt_node())
+		scif_get_node_info();
+
+	*self = scif_info.nodeid;
+	mutex_lock(&scif_info.conflock);
+	len = min_t(int, len, scif_info.total);
+	for (node = 0; node <= scif_info.maxid; node++) {
+		if (_scifdev_alive(&scif_dev[node])) {
+			online++;
+			if (offset < len)
+				nodes[offset++] = node;
+		}
+	}
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
+		scif_info.total, online, offset);
+	mutex_unlock(&scif_info.conflock);
+
+	return online;
+}
+EXPORT_SYMBOL_GPL(scif_get_node_ids);
+
+static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
+{
+	struct scif_client *client =
+		container_of(si, struct scif_client, si);
+	struct scif_peer_dev *spdev =
+		container_of(dev, struct scif_peer_dev, dev);
+
+	if (client->probe)
+		client->probe(spdev);
+	return 0;
+}
+
+static void scif_remove_client_dev(struct device *dev,
+				   struct subsys_interface *si)
+{
+	struct scif_client *client =
+		container_of(si, struct scif_client, si);
+	struct scif_peer_dev *spdev =
+		container_of(dev, struct scif_peer_dev, dev);
+
+	if (client->remove)
+		client->remove(spdev);
+}
+
+void scif_client_unregister(struct scif_client *client)
+{
+	subsys_interface_unregister(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_unregister);
+
+int scif_client_register(struct scif_client *client)
+{
+	struct subsys_interface *si = &client->si;
+
+	si->name = client->name;
+	si->subsys = &scif_peer_bus;
+	si->add_dev = scif_add_client_dev;
+	si->remove_dev = scif_remove_client_dev;
+
+	return subsys_interface_register(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_register);
diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c
new file mode 100644
index 000000000..6884dad97
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_debugfs.c
@@ -0,0 +1,162 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "../common/mic_dev.h"
+#include "scif_main.h"
+
+/* Debugfs parent dir */
+static struct dentry *scif_dbg;
+
+static int scif_dev_test(struct seq_file *s, void *unused)
+{
+	int node;
+
+	seq_printf(s, "Total Nodes %d Self Node Id %d Maxid %d\n",
+		   scif_info.total, scif_info.nodeid,
+		   scif_info.maxid);
+
+	if (!scif_dev)
+		return 0;
+
+	seq_printf(s, "%-16s\t%-16s\n", "node_id", "state");
+
+	for (node = 0; node <= scif_info.maxid; node++)
+		seq_printf(s, "%-16d\t%-16s\n", scif_dev[node].node,
+			   _scifdev_alive(&scif_dev[node]) ?
+			   "Running" : "Offline");
+	return 0;
+}
+
+static int scif_dev_test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_dev_test, inode->i_private);
+}
+
+static int scif_dev_test_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations scif_dev_ops = {
+	.owner   = THIS_MODULE,
+	.open    = scif_dev_test_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = scif_dev_test_release
+};
+
+static void scif_display_window(struct scif_window *window, struct seq_file *s)
+{
+	int j;
+	struct scatterlist *sg;
+	scif_pinned_pages_t pin = window->pinned_pages;
+
+	seq_printf(s, "window %p type %d temp %d offset 0x%llx ",
+		   window, window->type, window->temp, window->offset);
+	seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ",
+		   window->nr_pages, window->nr_contig_chunks, window->prot);
+	seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ",
+		   window->ref_count, window->magic, window->peer_window);
+	seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n",
+		   window->unreg_state, window->va_for_temp);
+
+	for (j = 0; j < window->nr_contig_chunks; j++)
+		seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j,
+			   window->dma_addr[j], window->num_pages[j]);
+
+	if (window->type == SCIF_WINDOW_SELF && pin)
+		for (j = 0; j < window->nr_pages; j++)
+			seq_printf(s, "page[%d] = pinned_pages %p address %p\n",
+				   j, pin->pages[j],
+				   page_address(pin->pages[j]));
+
+	if (window->st)
+		for_each_sg(window->st->sgl, sg, window->st->nents, j)
+			seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n",
+				   j, sg_dma_address(sg), sg_dma_len(sg));
+}
+
+static void scif_display_all_windows(struct list_head *head, struct seq_file *s)
+{
+	struct list_head *item;
+	struct scif_window *window;
+
+	list_for_each(item, head) {
+		window = list_entry(item, struct scif_window, list);
+		scif_display_window(window, s);
+	}
+}
+
+static int scif_rma_test(struct seq_file *s, void *unused)
+{
+	struct scif_endpt *ep;
+	struct list_head *pos;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each(pos, &scif_info.connected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		seq_printf(s, "ep %p self windows\n", ep);
+		mutex_lock(&ep->rma_info.rma_lock);
+		scif_display_all_windows(&ep->rma_info.reg_list, s);
+		seq_printf(s, "ep %p remote windows\n", ep);
+		scif_display_all_windows(&ep->rma_info.remote_reg_list, s);
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	mutex_unlock(&scif_info.connlock);
+	return 0;
+}
+
+static int scif_rma_test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_rma_test, inode->i_private);
+}
+
+static int scif_rma_test_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations scif_rma_ops = {
+	.owner   = THIS_MODULE,
+	.open    = scif_rma_test_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = scif_rma_test_release
+};
+
+void __init scif_init_debugfs(void)
+{
+	scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!scif_dbg) {
+		dev_err(scif_info.mdev.this_device,
+			"can't create debugfs dir scif\n");
+		return;
+	}
+
+	debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops);
+	debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops);
+	debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log);
+	debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable);
+}
+
+void scif_exit_debugfs(void)
+{
+	debugfs_remove_recursive(scif_dbg);
+}
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
new file mode 100644
index 000000000..6369aeaa7
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -0,0 +1,1960 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+/*
+ * struct scif_dma_comp_cb - SCIF DMA completion callback
+ *
+ * @dma_completion_func: DMA completion callback
+ * @cb_cookie: DMA completion callback cookie
+ * @temp_buf: Temporary buffer
+ * @temp_buf_to_free: Temporary buffer to be freed
+ * @is_cache: Is a kmem_cache allocated buffer
+ * @dst_offset: Destination registration offset
+ * @dst_window: Destination registration window
+ * @len: Length of the temp buffer
+ * @temp_phys: DMA address of the temp buffer
+ * @sdev: The SCIF device
+ * @header_padding: padding for cache line alignment
+ */
+struct scif_dma_comp_cb {
+	void (*dma_completion_func)(void *cookie);
+	void *cb_cookie;
+	u8 *temp_buf;
+	u8 *temp_buf_to_free;
+	bool is_cache;
+	s64 dst_offset;
+	struct scif_window *dst_window;
+	size_t len;
+	dma_addr_t temp_phys;
+	struct scif_dev *sdev;
+	int header_padding;
+};
+
+/**
+ * struct scif_copy_work - Work for DMA copy
+ *
+ * @src_offset: Starting source offset
+ * @dst_offset: Starting destination offset
+ * @src_window: Starting src registered window
+ * @dst_window: Starting dst registered window
+ * @loopback: true if this is a loopback DMA transfer
+ * @len: Length of the transfer
+ * @comp_cb: DMA copy completion callback
+ * @remote_dev: The remote SCIF peer device
+ * @fence_type: polling or interrupt based
+ * @ordered: is this a tail byte ordered DMA transfer
+ */
+struct scif_copy_work {
+	s64 src_offset;
+	s64 dst_offset;
+	struct scif_window *src_window;
+	struct scif_window *dst_window;
+	int loopback;
+	size_t len;
+	struct scif_dma_comp_cb   *comp_cb;
+	struct scif_dev	*remote_dev;
+	int fence_type;
+	bool ordered;
+};
+
+/**
+ * scif_reserve_dma_chan:
+ * @ep: Endpoint Descriptor.
+ *
+ * This routine reserves a DMA channel for a particular
+ * endpoint. All DMA transfers for an endpoint are always
+ * programmed on the same DMA channel.
+ */
+int scif_reserve_dma_chan(struct scif_endpt *ep)
+{
+	int err = 0;
+	struct scif_dev *scifdev;
+	struct scif_hw_dev *sdev;
+	struct dma_chan *chan;
+
+	/* Loopback DMAs are not supported on the management node */
+	if (!scif_info.nodeid && scifdev_self(ep->remote_dev))
+		return 0;
+	if (scif_info.nodeid)
+		scifdev = &scif_dev[0];
+	else
+		scifdev = ep->remote_dev;
+	sdev = scifdev->sdev;
+	if (!sdev->num_dma_ch)
+		return -ENODEV;
+	chan = sdev->dma_ch[scifdev->dma_ch_idx];
+	scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch;
+	mutex_lock(&ep->rma_info.rma_lock);
+	ep->rma_info.dma_chan = chan;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+#ifdef CONFIG_MMU_NOTIFIER
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached windows
+ */
+static
+void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
+			    u64 start, u64 len)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	u64 start_va, end_va;
+	u64 end = start + len;
+
+	if (end <= start)
+		return;
+
+	list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
+		window = list_entry(item, struct scif_window, list);
+		if (!len)
+			break;
+		start_va = window->va_for_temp;
+		end_va = start_va + (window->nr_pages << PAGE_SHIFT);
+		if (start < start_va && end <= start_va)
+			break;
+		if (start >= end_va)
+			continue;
+		__scif_rma_destroy_tcw_helper(window);
+	}
+}
+
+static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len)
+{
+	struct scif_endpt *ep = mmn->ep;
+
+	spin_lock(&ep->rma_info.tc_lock);
+	__scif_rma_destroy_tcw(mmn, start, len);
+	spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+	struct list_head *item, *tmp;
+	struct scif_mmu_notif *mmn;
+
+	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+	}
+}
+
+static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+	struct list_head *item, *tmp;
+	struct scif_mmu_notif *mmn;
+
+	spin_lock(&ep->rma_info.tc_lock);
+	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		__scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+	}
+	spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+	if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit)
+		return false;
+	if ((atomic_read(&ep->rma_info.tcw_total_pages)
+			+ (cur_bytes >> PAGE_SHIFT)) >
+			scif_info.rma_tc_limit) {
+		dev_info(scif_info.mdev.this_device,
+			 "%s %d total=%d, current=%zu reached max\n",
+			 __func__, __LINE__,
+			 atomic_read(&ep->rma_info.tcw_total_pages),
+			 (1 + (cur_bytes >> PAGE_SHIFT)));
+		scif_rma_destroy_tcw_invalid();
+		__scif_rma_destroy_tcw_ep(ep);
+	}
+	return true;
+}
+
+static void scif_mmu_notifier_release(struct mmu_notifier *mn,
+				      struct mm_struct *mm)
+{
+	struct scif_mmu_notif	*mmn;
+
+	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+	scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+	schedule_work(&scif_info.misc_work);
+}
+
+static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+						     struct mm_struct *mm,
+						     unsigned long start,
+						     unsigned long end,
+						     bool blockable)
+{
+	struct scif_mmu_notif	*mmn;
+
+	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+	scif_rma_destroy_tcw(mmn, start, end - start);
+
+	return 0;
+}
+
+static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+						   struct mm_struct *mm,
+						   unsigned long start,
+						   unsigned long end)
+{
+	/*
+	 * Nothing to do here, everything needed was done in
+	 * invalidate_range_start.
+	 */
+}
+
+static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
+	.release = scif_mmu_notifier_release,
+	.clear_flush_young = NULL,
+	.invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
+	.invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
+
+static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep)
+{
+	struct scif_endpt_rma_info *rma = &ep->rma_info;
+	struct scif_mmu_notif *mmn = NULL;
+	struct list_head *item, *tmp;
+
+	mutex_lock(&ep->rma_info.mmn_lock);
+	list_for_each_safe(item, tmp, &rma->mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
+		list_del(item);
+		kfree(mmn);
+	}
+	mutex_unlock(&ep->rma_info.mmn_lock);
+}
+
+static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn,
+				   struct mm_struct *mm, struct scif_endpt *ep)
+{
+	mmn->ep = ep;
+	mmn->mm = mm;
+	mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
+	INIT_LIST_HEAD(&mmn->list);
+	INIT_LIST_HEAD(&mmn->tc_reg_list);
+}
+
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma)
+{
+	struct scif_mmu_notif *mmn;
+
+	list_for_each_entry(mmn, &rma->mmn_list, list)
+		if (mmn->mm == mm)
+			return mmn;
+	return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+	struct scif_mmu_notif *mmn
+		 = kzalloc(sizeof(*mmn), GFP_KERNEL);
+
+	if (!mmn)
+		return ERR_PTR(-ENOMEM);
+
+	scif_init_mmu_notifier(mmn, current->mm, ep);
+	if (mmu_notifier_register(&mmn->ep_mmu_notifier, current->mm)) {
+		kfree(mmn);
+		return ERR_PTR(-EBUSY);
+	}
+	list_add(&mmn->list, &ep->rma_info.mmn_list);
+	return mmn;
+}
+
+/*
+ * Called from the misc thread to destroy temporary cached windows and
+ * unregister the MMU notifier for the SCIF endpoint.
+ */
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+	struct list_head *pos, *tmpq;
+	struct scif_endpt *ep;
+restart:
+	scif_rma_destroy_tcw_invalid();
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) {
+		ep = list_entry(pos, struct scif_endpt, mmu_list);
+		list_del(&ep->mmu_list);
+		spin_unlock(&scif_info.rmalock);
+		scif_rma_destroy_tcw_ep(ep);
+		scif_ep_unregister_mmu_notifier(ep);
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+	return !!(flags & SCIF_RMA_USECACHE);
+}
+#else
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm,
+		       struct scif_endpt_rma_info *rma)
+{
+	return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+	return NULL;
+}
+
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+	return false;
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+	return false;
+}
+#endif
+
+/**
+ * scif_register_temp:
+ * @epd: End Point Descriptor.
+ * @addr: virtual address to/from which to copy
+ * @len: length of range to copy
+ * @out_offset: computed offset returned by reference.
+ * @out_window: allocated registered window returned by reference.
+ *
+ * Create a temporary registered window. The peer will not know about this
+ * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
+ */
+static int
+scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot,
+		   off_t *out_offset, struct scif_window **out_window)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err;
+	scif_pinned_pages_t pinned_pages;
+	size_t aligned_len;
+
+	aligned_len = ALIGN(len, PAGE_SIZE);
+
+	err = __scif_pin_pages((void *)(addr & PAGE_MASK),
+			       aligned_len, &prot, 0, &pinned_pages);
+	if (err)
+		return err;
+
+	pinned_pages->prot = prot;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, 0, 0,
+				     aligned_len >> PAGE_SHIFT,
+				     (s64 *)out_offset);
+	if (err)
+		goto error_unpin;
+
+	/* Allocate and prepare self registration window */
+	*out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT,
+					*out_offset, true);
+	if (!*out_window) {
+		scif_free_window_offset(ep, NULL, *out_offset);
+		err = -ENOMEM;
+		goto error_unpin;
+	}
+
+	(*out_window)->pinned_pages = pinned_pages;
+	(*out_window)->nr_pages = pinned_pages->nr_pages;
+	(*out_window)->prot = pinned_pages->prot;
+
+	(*out_window)->va_for_temp = addr & PAGE_MASK;
+	err = scif_map_window(ep->remote_dev, *out_window);
+	if (err) {
+		/* Something went wrong! Rollback */
+		scif_destroy_window(ep, *out_window);
+		*out_window = NULL;
+	} else {
+		*out_offset |= (addr - (*out_window)->va_for_temp);
+	}
+	return err;
+error_unpin:
+	if (err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	scif_unpin_pages(pinned_pages);
+	return err;
+}
+
+#define SCIF_DMA_TO (3 * HZ)
+
+/*
+ * scif_sync_dma - Program a DMA without an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * @sync_wait: Wait for DMA to complete?
+ *
+ * Return 0 on success and -errno on error.
+ */
+static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan,
+			 bool sync_wait)
+{
+	int err = 0;
+	struct dma_async_tx_descriptor *tx = NULL;
+	enum dma_ctrl_flags flags = DMA_PREP_FENCE;
+	dma_cookie_t cookie;
+	struct dma_device *ddev;
+
+	if (!chan) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	ddev = chan->device;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	if (!sync_wait) {
+		dma_async_issue_pending(chan);
+	} else {
+		if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) {
+			err = 0;
+		} else {
+			err = -EIO;
+			dev_err(&sdev->dev, "%s %d err %d\n",
+				__func__, __LINE__, err);
+		}
+	}
+release:
+	return err;
+}
+
+static void scif_dma_callback(void *arg)
+{
+	struct completion *done = (struct completion *)arg;
+
+	complete(done);
+}
+
+#define SCIF_DMA_SYNC_WAIT true
+#define SCIF_DMA_POLL BIT(0)
+#define SCIF_DMA_INTR BIT(1)
+
+/*
+ * scif_async_dma - Program a DMA with an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * Return 0 on success and -errno on error.
+ */
+static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	int err = 0;
+	struct dma_device *ddev;
+	struct dma_async_tx_descriptor *tx = NULL;
+	enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
+	DECLARE_COMPLETION_ONSTACK(done_wait);
+	dma_cookie_t cookie;
+	enum dma_status status;
+
+	if (!chan) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	ddev = chan->device;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	reinit_completion(&done_wait);
+	tx->callback = scif_dma_callback;
+	tx->callback_param = &done_wait;
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	dma_async_issue_pending(chan);
+
+	err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO);
+	if (!err) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	err = 0;
+	status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+	if (status != DMA_COMPLETE) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+release:
+	return err;
+}
+
+/*
+ * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular
+ * DMA channel via polling.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	if (!chan)
+		return -EINVAL;
+	return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT);
+}
+
+/*
+ * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular
+ * DMA channel via interrupt based blocking wait.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	if (!chan)
+		return -EINVAL;
+	return scif_async_dma(sdev, chan);
+}
+
+/**
+ * scif_rma_destroy_windows:
+ *
+ * This routine destroys all windows queued for cleanup
+ */
+void scif_rma_destroy_windows(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep;
+	struct dma_chan *chan;
+
+	might_sleep();
+restart:
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(item, tmp, &scif_info.rma) {
+		window = list_entry(item, struct scif_window,
+				    list);
+		ep = (struct scif_endpt *)window->ep;
+		chan = ep->rma_info.dma_chan;
+
+		list_del_init(&window->list);
+		spin_unlock(&scif_info.rmalock);
+		if (!chan || !scifdev_alive(ep) ||
+		    !scif_drain_dma_intr(ep->remote_dev->sdev,
+					 ep->rma_info.dma_chan))
+			/* Remove window from global list */
+			window->unreg_state = OP_COMPLETED;
+		else
+			dev_warn(&ep->remote_dev->sdev->dev,
+				 "DMA engine hung?\n");
+		if (window->unreg_state == OP_COMPLETED) {
+			if (window->type == SCIF_WINDOW_SELF)
+				scif_destroy_window(ep, window);
+			else
+				scif_destroy_remote_window(window);
+			atomic_dec(&ep->rma_info.tw_refcount);
+		}
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached registered windows
+ * which have been queued for cleanup.
+ */
+void scif_rma_destroy_tcw_invalid(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep;
+	struct dma_chan *chan;
+
+	might_sleep();
+restart:
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(item, tmp, &scif_info.rma_tc) {
+		window = list_entry(item, struct scif_window, list);
+		ep = (struct scif_endpt *)window->ep;
+		chan = ep->rma_info.dma_chan;
+		list_del_init(&window->list);
+		spin_unlock(&scif_info.rmalock);
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (!chan || !scifdev_alive(ep) ||
+		    !scif_drain_dma_intr(ep->remote_dev->sdev,
+					 ep->rma_info.dma_chan)) {
+			atomic_sub(window->nr_pages,
+				   &ep->rma_info.tcw_total_pages);
+			scif_destroy_window(ep, window);
+			atomic_dec(&ep->rma_info.tcw_refcount);
+		} else {
+			dev_warn(&ep->remote_dev->sdev->dev,
+				 "DMA engine hung?\n");
+		}
+		mutex_unlock(&ep->rma_info.rma_lock);
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+static inline
+void *_get_local_va(off_t off, struct scif_window *window, size_t len)
+{
+	int page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+	void *va = NULL;
+
+	if (window->type == SCIF_WINDOW_SELF) {
+		struct page **pages = window->pinned_pages->pages;
+
+		va = page_address(pages[page_nr]) + page_off;
+	}
+	return va;
+}
+
+static inline
+void *ioremap_remote(off_t off, struct scif_window *window,
+		     size_t len, struct scif_dev *dev,
+		     struct scif_window_iter *iter)
+{
+	dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter);
+
+	/*
+	 * If the DMA address is not card relative then we need the DMA
+	 * addresses to be an offset into the bar. The aperture base was already
+	 * added so subtract it here since scif_ioremap is going to add it again
+	 */
+	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+	    dev->sdev->aper && !dev->sdev->card_rel_da)
+		phys = phys - dev->sdev->aper->pa;
+	return scif_ioremap(phys, len, dev);
+}
+
+static inline void
+iounmap_remote(void *virt, size_t size, struct scif_copy_work *work)
+{
+	scif_iounmap(virt, size, work->remote_dev);
+}
+
+/*
+ * Takes care of ordering issue caused by
+ * 1. Hardware:  Only in the case of cpu copy from mgmt node to card
+ * because of WC memory.
+ * 2. Software: If memcpy reorders copy instructions for optimization.
+ * This could happen at both mgmt node and card.
+ */
+static inline void
+scif_ordered_memcpy_toio(char *dst, const char *src, size_t count)
+{
+	if (!count)
+		return;
+
+	memcpy_toio((void __iomem __force *)dst, src, --count);
+	/* Order the last byte with the previous stores */
+	wmb();
+	*(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_toio(char *dst, const char *src,
+					   size_t count, bool ordered)
+{
+	if (ordered)
+		scif_ordered_memcpy_toio(dst, src, count);
+	else
+		memcpy_toio((void __iomem __force *)dst, src, count);
+}
+
+static inline
+void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count)
+{
+	if (!count)
+		return;
+
+	memcpy_fromio(dst, (void __iomem __force *)src, --count);
+	/* Order the last byte with the previous loads */
+	rmb();
+	*(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_fromio(char *dst, const char *src,
+					     size_t count, bool ordered)
+{
+	if (ordered)
+		scif_ordered_memcpy_fromio(dst, src, count);
+	else
+		memcpy_fromio(dst, (void __iomem __force *)src, count);
+}
+
+#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0)
+
+/*
+ * scif_off_to_dma_addr:
+ * Obtain the dma_addr given the window and the offset.
+ * @window: Registered window.
+ * @off: Window offset.
+ * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
+ * @index: Return the index of the dma_addr array found.
+ * @start_off: start offset of index of the dma addr array found.
+ * The nr_bytes provides the callee an estimate of the maximum possible
+ * DMA xfer possible while the index/start_off provide faster lookups
+ * for the next iteration.
+ */
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+				size_t *nr_bytes, struct scif_window_iter *iter)
+{
+	int i, page_nr;
+	s64 start, end;
+	off_t page_off;
+
+	if (window->nr_pages == window->nr_contig_chunks) {
+		page_nr = (off - window->offset) >> PAGE_SHIFT;
+		page_off = off & ~PAGE_MASK;
+
+		if (nr_bytes)
+			*nr_bytes = PAGE_SIZE - page_off;
+		return window->dma_addr[page_nr] | page_off;
+	}
+	if (iter) {
+		i = iter->index;
+		start = iter->offset;
+	} else {
+		i =  0;
+		start =  window->offset;
+	}
+	for (; i < window->nr_contig_chunks; i++) {
+		end = start + (window->num_pages[i] << PAGE_SHIFT);
+		if (off >= start && off < end) {
+			if (iter) {
+				iter->index = i;
+				iter->offset = start;
+			}
+			if (nr_bytes)
+				*nr_bytes = end - off;
+			return (window->dma_addr[i] + (off - start));
+		}
+		start += (window->num_pages[i] << PAGE_SHIFT);
+	}
+	dev_err(scif_info.mdev.this_device,
+		"%s %d BUG. Addr not found? window %p off 0x%llx\n",
+		__func__, __LINE__, window, off);
+	return SCIF_RMA_ERROR_CODE;
+}
+
+/*
+ * Copy between rma window and temporary buffer
+ */
+static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window,
+				    u8 *temp, size_t rem_len, bool to_temp)
+{
+	void *window_virt;
+	size_t loop_len;
+	int offset_in_page;
+	s64 end_offset;
+
+	offset_in_page = offset & ~PAGE_MASK;
+	loop_len = PAGE_SIZE - offset_in_page;
+
+	if (rem_len < loop_len)
+		loop_len = rem_len;
+
+	window_virt = _get_local_va(offset, window, loop_len);
+	if (!window_virt)
+		return;
+	if (to_temp)
+		memcpy(temp, window_virt, loop_len);
+	else
+		memcpy(window_virt, temp, loop_len);
+
+	offset += loop_len;
+	temp += loop_len;
+	rem_len -= loop_len;
+
+	end_offset = window->offset +
+		(window->nr_pages << PAGE_SHIFT);
+	while (rem_len) {
+		if (offset == end_offset) {
+			window = list_next_entry(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		loop_len = min(PAGE_SIZE, rem_len);
+		window_virt = _get_local_va(offset, window, loop_len);
+		if (!window_virt)
+			return;
+		if (to_temp)
+			memcpy(temp, window_virt, loop_len);
+		else
+			memcpy(window_virt, temp, loop_len);
+		offset	+= loop_len;
+		temp	+= loop_len;
+		rem_len	-= loop_len;
+	}
+}
+
+/**
+ * scif_rma_completion_cb:
+ * @data: RMA cookie
+ *
+ * RMA interrupt completion callback.
+ */
+static void scif_rma_completion_cb(void *data)
+{
+	struct scif_dma_comp_cb *comp_cb = data;
+
+	/* Free DMA Completion CB. */
+	if (comp_cb->dst_window)
+		scif_rma_local_cpu_copy(comp_cb->dst_offset,
+					comp_cb->dst_window,
+					comp_cb->temp_buf +
+					comp_cb->header_padding,
+					comp_cb->len, false);
+	scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev,
+			  SCIF_KMEM_UNALIGNED_BUF_SIZE);
+	if (comp_cb->is_cache)
+		kmem_cache_free(unaligned_cache,
+				comp_cb->temp_buf_to_free);
+	else
+		kfree(comp_cb->temp_buf_to_free);
+}
+
+/* Copies between temporary buffer and offsets provided in work */
+static int
+scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
+				 u8 *temp, struct dma_chan *chan,
+				 bool src_local)
+{
+	struct scif_dma_comp_cb *comp_cb = work->comp_cb;
+	dma_addr_t window_dma_addr, temp_dma_addr;
+	dma_addr_t temp_phys = comp_cb->temp_phys;
+	size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
+	int offset_in_ca, ret = 0;
+	s64 end_offset, offset;
+	struct scif_window *window;
+	void *window_virt_addr;
+	size_t tail_len;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	if (src_local) {
+		offset = work->dst_offset;
+		window = work->dst_window;
+	} else {
+		offset = work->src_offset;
+		window = work->src_window;
+	}
+
+	offset_in_ca = offset & (L1_CACHE_BYTES - 1);
+	if (offset_in_ca) {
+		loop_len = L1_CACHE_BYTES - offset_in_ca;
+		loop_len = min(loop_len, remaining_len);
+		window_virt_addr = ioremap_remote(offset, window,
+						  loop_len,
+						  work->remote_dev,
+						  NULL);
+		if (!window_virt_addr)
+			return -ENOMEM;
+		if (src_local)
+			scif_unaligned_cpy_toio(window_virt_addr, temp,
+						loop_len,
+						work->ordered &&
+						!(remaining_len - loop_len));
+		else
+			scif_unaligned_cpy_fromio(temp, window_virt_addr,
+						  loop_len, work->ordered &&
+						  !(remaining_len - loop_len));
+		iounmap_remote(window_virt_addr, loop_len, work);
+
+		offset += loop_len;
+		temp += loop_len;
+		temp_phys += loop_len;
+		remaining_len -= loop_len;
+	}
+
+	offset_in_ca = offset & ~PAGE_MASK;
+	end_offset = window->offset +
+		(window->nr_pages << PAGE_SHIFT);
+
+	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+	remaining_len -= tail_len;
+	while (remaining_len) {
+		if (offset == end_offset) {
+			window = list_next_entry(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		if (scif_is_mgmt_node())
+			temp_dma_addr = temp_phys;
+		else
+			/* Fix if we ever enable IOMMU on the card */
+			temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
+		window_dma_addr = scif_off_to_dma_addr(window, offset,
+						       &nr_contig_bytes,
+						       NULL);
+		loop_len = min(nr_contig_bytes, remaining_len);
+		if (src_local) {
+			if (work->ordered && !tail_len &&
+			    !(remaining_len - loop_len) &&
+			    loop_len != L1_CACHE_BYTES) {
+				/*
+				 * Break up the last chunk of the transfer into
+				 * two steps. if there is no tail to guarantee
+				 * DMA ordering. SCIF_DMA_POLLING inserts
+				 * a status update descriptor in step 1 which
+				 * acts as a double sided synchronization fence
+				 * for the DMA engine to ensure that the last
+				 * cache line in step 2 is updated last.
+				 */
+				/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len -
+							    L1_CACHE_BYTES,
+							    DMA_PREP_FENCE);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+				offset += (loop_len - L1_CACHE_BYTES);
+				temp_dma_addr += (loop_len - L1_CACHE_BYTES);
+				window_dma_addr += (loop_len - L1_CACHE_BYTES);
+				remaining_len -= (loop_len - L1_CACHE_BYTES);
+				loop_len = remaining_len;
+
+				/* Step 2) DMA: L1_CACHE_BYTES */
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len, 0);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+			} else {
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len, 0);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+			}
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr,
+					window_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		}
+		if (ret < 0)
+			goto err;
+		offset += loop_len;
+		temp += loop_len;
+		temp_phys += loop_len;
+		remaining_len -= loop_len;
+		offset_in_ca = 0;
+	}
+	if (tail_len) {
+		if (offset == end_offset) {
+			window = list_next_entry(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		window_virt_addr = ioremap_remote(offset, window, tail_len,
+						  work->remote_dev,
+						  NULL);
+		if (!window_virt_addr)
+			return -ENOMEM;
+		/*
+		 * The CPU copy for the tail bytes must be initiated only once
+		 * previous DMA transfers for this endpoint have completed
+		 * to guarantee ordering.
+		 */
+		if (work->ordered) {
+			struct scif_dev *rdev = work->remote_dev;
+
+			ret = scif_drain_dma_intr(rdev->sdev, chan);
+			if (ret)
+				return ret;
+		}
+		if (src_local)
+			scif_unaligned_cpy_toio(window_virt_addr, temp,
+						tail_len, work->ordered);
+		else
+			scif_unaligned_cpy_fromio(temp, window_virt_addr,
+						  tail_len, work->ordered);
+		iounmap_remote(window_virt_addr, tail_len, work);
+	}
+	tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		ret = -ENOMEM;
+		return ret;
+	}
+	tx->callback = &scif_rma_completion_cb;
+	tx->callback_param = comp_cb;
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		ret = -ENOMEM;
+		return ret;
+	}
+	dma_async_issue_pending(chan);
+	return 0;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * _scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+					   struct dma_chan *chan)
+{
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	size_t loop_len, remaining_len, src_contig_bytes = 0;
+	size_t dst_contig_bytes = 0;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+	s64 end_src_offset, end_dst_offset;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	int ret = 0;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	remaining_len = work->len;
+
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+	end_src_offset = src_window->offset +
+		(src_window->nr_pages << PAGE_SHIFT);
+	end_dst_offset = dst_window->offset +
+		(dst_window->nr_pages << PAGE_SHIFT);
+	while (remaining_len) {
+		if (src_offset == end_src_offset) {
+			src_window = list_next_entry(src_window, list);
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(src_window, &src_win_iter);
+		}
+		if (dst_offset == end_dst_offset) {
+			dst_window = list_next_entry(dst_window, list);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(dst_window, &dst_win_iter);
+		}
+
+		/* compute dma addresses for transfer */
+		src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+						    &src_contig_bytes,
+						    &src_win_iter);
+		dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+						    &dst_contig_bytes,
+						    &dst_win_iter);
+		loop_len = min(src_contig_bytes, dst_contig_bytes);
+		loop_len = min(loop_len, remaining_len);
+		if (work->ordered && !(remaining_len - loop_len)) {
+			/*
+			 * Break up the last chunk of the transfer into two
+			 * steps to ensure that the last byte in step 2 is
+			 * updated last.
+			 */
+			/* Step 1) DMA: Body Length - 1 */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len - 1,
+							 DMA_PREP_FENCE);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			src_offset += (loop_len - 1);
+			dst_offset += (loop_len - 1);
+			src_dma_addr += (loop_len - 1);
+			dst_dma_addr += (loop_len - 1);
+			remaining_len -= (loop_len - 1);
+			loop_len = remaining_len;
+
+			/* Step 2) DMA: 1 BYTES */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+					src_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+					src_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+		}
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+	return ret;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+					  struct dma_chan *chan)
+{
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0;
+	size_t dst_contig_bytes = 0;
+	int src_cache_off;
+	s64 end_src_offset, end_dst_offset;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+	void *src_virt, *dst_virt;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	int ret = 0;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	remaining_len = work->len;
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+
+	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+	if (src_cache_off != 0) {
+		/* Head */
+		loop_len = L1_CACHE_BYTES - src_cache_off;
+		loop_len = min(loop_len, remaining_len);
+		src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+		dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!src_virt)
+			return -ENOMEM;
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!dst_virt) {
+			if (src_window->type != SCIF_WINDOW_SELF)
+				iounmap_remote(src_virt, loop_len, work);
+			return -ENOMEM;
+		}
+		if (src_window->type == SCIF_WINDOW_SELF)
+			scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+						remaining_len == loop_len ?
+						work->ordered : false);
+		else
+			scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len,
+						  remaining_len == loop_len ?
+						  work->ordered : false);
+		if (src_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(src_virt, loop_len, work);
+		if (dst_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(dst_virt, loop_len, work);
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+
+	end_src_offset = src_window->offset +
+		(src_window->nr_pages << PAGE_SHIFT);
+	end_dst_offset = dst_window->offset +
+		(dst_window->nr_pages << PAGE_SHIFT);
+	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+	remaining_len -= tail_len;
+	while (remaining_len) {
+		if (src_offset == end_src_offset) {
+			src_window = list_next_entry(src_window, list);
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(src_window, &src_win_iter);
+		}
+		if (dst_offset == end_dst_offset) {
+			dst_window = list_next_entry(dst_window, list);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(dst_window, &dst_win_iter);
+		}
+
+		/* compute dma addresses for transfer */
+		src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+						    &src_contig_bytes,
+						    &src_win_iter);
+		dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+						    &dst_contig_bytes,
+						    &dst_win_iter);
+		loop_len = min(src_contig_bytes, dst_contig_bytes);
+		loop_len = min(loop_len, remaining_len);
+		if (work->ordered && !tail_len &&
+		    !(remaining_len - loop_len)) {
+			/*
+			 * Break up the last chunk of the transfer into two
+			 * steps. if there is no tail to gurantee DMA ordering.
+			 * Passing SCIF_DMA_POLLING inserts a status update
+			 * descriptor in step 1 which acts as a double sided
+			 * synchronization fence for the DMA engine to ensure
+			 * that the last cache line in step 2 is updated last.
+			 */
+			/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len -
+							 L1_CACHE_BYTES,
+							 DMA_PREP_FENCE);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+			src_offset += (loop_len - L1_CACHE_BYTES);
+			dst_offset += (loop_len - L1_CACHE_BYTES);
+			src_dma_addr += (loop_len - L1_CACHE_BYTES);
+			dst_dma_addr += (loop_len - L1_CACHE_BYTES);
+			remaining_len -= (loop_len - L1_CACHE_BYTES);
+			loop_len = remaining_len;
+
+			/* Step 2) DMA: L1_CACHE_BYTES */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		}
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+	remaining_len = tail_len;
+	if (remaining_len) {
+		loop_len = remaining_len;
+		if (src_offset == end_src_offset)
+			src_window = list_next_entry(src_window, list);
+		if (dst_offset == end_dst_offset)
+			dst_window = list_next_entry(dst_window, list);
+
+		src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+		dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+		/*
+		 * The CPU copy for the tail bytes must be initiated only once
+		 * previous DMA transfers for this endpoint have completed to
+		 * guarantee ordering.
+		 */
+		if (work->ordered) {
+			struct scif_dev *rdev = work->remote_dev;
+
+			ret = scif_drain_dma_poll(rdev->sdev, chan);
+			if (ret)
+				return ret;
+		}
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!src_virt)
+			return -ENOMEM;
+
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!dst_virt) {
+			if (src_window->type != SCIF_WINDOW_SELF)
+				iounmap_remote(src_virt, loop_len, work);
+			return -ENOMEM;
+		}
+
+		if (src_window->type == SCIF_WINDOW_SELF)
+			scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+						work->ordered);
+		else
+			scif_unaligned_cpy_fromio(dst_virt, src_virt,
+						  loop_len, work->ordered);
+		if (src_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(src_virt, loop_len, work);
+
+		if (dst_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(dst_virt, loop_len, work);
+		remaining_len -= loop_len;
+	}
+	return ret;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * scif_rma_list_cpu_copy:
+ *
+ * Traverse all the windows and perform CPU copy.
+ */
+static int scif_rma_list_cpu_copy(struct scif_copy_work *work)
+{
+	void *src_virt, *dst_virt;
+	size_t loop_len, remaining_len;
+	int src_page_off, dst_page_off;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 end_src_offset, end_dst_offset;
+	int ret = 0;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+
+	remaining_len = work->len;
+
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+	while (remaining_len) {
+		src_page_off = src_offset & ~PAGE_MASK;
+		dst_page_off = dst_offset & ~PAGE_MASK;
+		loop_len = min(PAGE_SIZE -
+			       max(src_page_off, dst_page_off),
+			       remaining_len);
+
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev,
+						  &src_win_iter);
+		if (!src_virt) {
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev,
+						  &dst_win_iter);
+		if (!dst_virt) {
+			if (src_window->type == SCIF_WINDOW_PEER)
+				iounmap_remote(src_virt, loop_len, work);
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (work->loopback) {
+			memcpy(dst_virt, src_virt, loop_len);
+		} else {
+			if (src_window->type == SCIF_WINDOW_SELF)
+				memcpy_toio((void __iomem __force *)dst_virt,
+					    src_virt, loop_len);
+			else
+				memcpy_fromio(dst_virt,
+					      (void __iomem __force *)src_virt,
+					      loop_len);
+		}
+		if (src_window->type == SCIF_WINDOW_PEER)
+			iounmap_remote(src_virt, loop_len, work);
+
+		if (dst_window->type == SCIF_WINDOW_PEER)
+			iounmap_remote(dst_virt, loop_len, work);
+
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+		if (remaining_len) {
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			if (src_offset == end_src_offset) {
+				src_window = list_next_entry(src_window, list);
+				scif_init_window_iter(src_window,
+						      &src_win_iter);
+			}
+			if (dst_offset == end_dst_offset) {
+				dst_window = list_next_entry(dst_window, list);
+				scif_init_window_iter(dst_window,
+						      &dst_win_iter);
+			}
+		}
+	}
+error:
+	return ret;
+}
+
+static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
+					  struct scif_copy_work *work,
+					  struct dma_chan *chan, off_t loffset)
+{
+	int src_cache_off, dst_cache_off;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	u8 *temp = NULL;
+	bool src_local = true, dst_local = false;
+	struct scif_dma_comp_cb *comp_cb;
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	int err;
+
+	if (is_dma_copy_aligned(chan->device, 1, 1, 1))
+		return _scif_rma_list_dma_copy_aligned(work, chan);
+
+	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+	dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
+
+	if (dst_cache_off == src_cache_off)
+		return scif_rma_list_dma_copy_aligned(work, chan);
+
+	if (work->loopback)
+		return scif_rma_list_cpu_copy(work);
+	src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset);
+	dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset);
+	src_local = work->src_window->type == SCIF_WINDOW_SELF;
+	dst_local = work->dst_window->type == SCIF_WINDOW_SELF;
+
+	dst_local = dst_local;
+	/* Allocate dma_completion cb */
+	comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
+	if (!comp_cb)
+		goto error;
+
+	work->comp_cb = comp_cb;
+	comp_cb->cb_cookie = comp_cb;
+	comp_cb->dma_completion_func = &scif_rma_completion_cb;
+
+	if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) {
+		comp_cb->is_cache = false;
+		/* Allocate padding bytes to align to a cache line */
+		temp = kmalloc(work->len + (L1_CACHE_BYTES << 1),
+			       GFP_KERNEL);
+		if (!temp)
+			goto free_comp_cb;
+		comp_cb->temp_buf_to_free = temp;
+		/* kmalloc(..) does not guarantee cache line alignment */
+		if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES))
+			temp = PTR_ALIGN(temp, L1_CACHE_BYTES);
+	} else {
+		comp_cb->is_cache = true;
+		temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL);
+		if (!temp)
+			goto free_comp_cb;
+		comp_cb->temp_buf_to_free = temp;
+	}
+
+	if (src_local) {
+		temp += dst_cache_off;
+		scif_rma_local_cpu_copy(work->src_offset, work->src_window,
+					temp, work->len, true);
+	} else {
+		comp_cb->dst_window = work->dst_window;
+		comp_cb->dst_offset = work->dst_offset;
+		work->src_offset = work->src_offset - src_cache_off;
+		comp_cb->len = work->len;
+		work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
+		comp_cb->header_padding = src_cache_off;
+	}
+	comp_cb->temp_buf = temp;
+
+	err = scif_map_single(&comp_cb->temp_phys, temp,
+			      work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE);
+	if (err)
+		goto free_temp_buf;
+	comp_cb->sdev = work->remote_dev;
+	if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0)
+		goto free_temp_buf;
+	if (!src_local)
+		work->fence_type = SCIF_DMA_INTR;
+	return 0;
+free_temp_buf:
+	if (comp_cb->is_cache)
+		kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free);
+	else
+		kfree(comp_cb->temp_buf_to_free);
+free_comp_cb:
+	kfree(comp_cb);
+error:
+	return -ENOMEM;
+}
+
+/**
+ * scif_rma_copy:
+ * @epd: end point descriptor.
+ * @loffset: offset in local registered address space to/from which to copy
+ * @addr: user virtual address to/from which to copy
+ * @len: length of range to copy
+ * @roffset: offset in remote registered address space to/from which to copy
+ * @flags: flags
+ * @dir: LOCAL->REMOTE or vice versa.
+ * @last_chunk: true if this is the last chunk of a larger transfer
+ *
+ * Validate parameters, check if src/dst registered ranges requested for copy
+ * are valid and initiate either CPU or DMA copy.
+ */
+static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr,
+			 size_t len, off_t roffset, int flags,
+			 enum scif_rma_dir dir, bool last_chunk)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_rma_req remote_req;
+	struct scif_rma_req req;
+	struct scif_window *local_window = NULL;
+	struct scif_window *remote_window = NULL;
+	struct scif_copy_work copy_work;
+	bool loopback;
+	int err = 0;
+	struct dma_chan *chan;
+	struct scif_mmu_notif *mmn = NULL;
+	bool cache = false;
+	struct device *spdev;
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE |
+				SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
+		return -EINVAL;
+
+	loopback = scifdev_self(ep->remote_dev) ? true : false;
+	copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ?
+				SCIF_DMA_POLL : 0;
+	copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
+
+	/* Use CPU for Mgmt node <-> Mgmt node copies */
+	if (loopback && scif_is_mgmt_node()) {
+		flags |= SCIF_RMA_USECPU;
+		copy_work.fence_type = 0x0;
+	}
+
+	cache = scif_is_set_reg_cache(flags);
+
+	remote_req.out_window = &remote_window;
+	remote_req.offset = roffset;
+	remote_req.nr_bytes = len;
+	/*
+	 * If transfer is from local to remote then the remote window
+	 * must be writeable and vice versa.
+	 */
+	remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ;
+	remote_req.type = SCIF_WINDOW_PARTIAL;
+	remote_req.head = &ep->rma_info.remote_reg_list;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+
+	if (addr && cache) {
+		mutex_lock(&ep->rma_info.mmn_lock);
+		mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info);
+		if (!mmn)
+			mmn = scif_add_mmu_notifier(current->mm, ep);
+		mutex_unlock(&ep->rma_info.mmn_lock);
+		if (IS_ERR(mmn)) {
+			scif_put_peer_dev(spdev);
+			return PTR_ERR(mmn);
+		}
+		cache = cache && !scif_rma_tc_can_cache(ep, len);
+	}
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (addr) {
+		req.out_window = &local_window;
+		req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK),
+				     PAGE_SIZE);
+		req.va_for_temp = addr & PAGE_MASK;
+		req.prot = (dir == SCIF_LOCAL_TO_REMOTE ?
+			    VM_READ : VM_WRITE | VM_READ);
+		/* Does a valid local window exist? */
+		if (mmn) {
+			spin_lock(&ep->rma_info.tc_lock);
+			req.head = &mmn->tc_reg_list;
+			err = scif_query_tcw(ep, &req);
+			spin_unlock(&ep->rma_info.tc_lock);
+		}
+		if (!mmn || err) {
+			err = scif_register_temp(epd, req.va_for_temp,
+						 req.nr_bytes, req.prot,
+						 &loffset, &local_window);
+			if (err) {
+				mutex_unlock(&ep->rma_info.rma_lock);
+				goto error;
+			}
+			if (!cache)
+				goto skip_cache;
+			atomic_inc(&ep->rma_info.tcw_refcount);
+			atomic_add_return(local_window->nr_pages,
+					  &ep->rma_info.tcw_total_pages);
+			if (mmn) {
+				spin_lock(&ep->rma_info.tc_lock);
+				scif_insert_tcw(local_window,
+						&mmn->tc_reg_list);
+				spin_unlock(&ep->rma_info.tc_lock);
+			}
+		}
+skip_cache:
+		loffset = local_window->offset +
+				(addr - local_window->va_for_temp);
+	} else {
+		req.out_window = &local_window;
+		req.offset = loffset;
+		/*
+		 * If transfer is from local to remote then the self window
+		 * must be readable and vice versa.
+		 */
+		req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE;
+		req.nr_bytes = len;
+		req.type = SCIF_WINDOW_PARTIAL;
+		req.head = &ep->rma_info.reg_list;
+		/* Does a valid local window exist? */
+		err = scif_query_window(&req);
+		if (err) {
+			mutex_unlock(&ep->rma_info.rma_lock);
+			goto error;
+		}
+	}
+
+	/* Does a valid remote window exist? */
+	err = scif_query_window(&remote_req);
+	if (err) {
+		mutex_unlock(&ep->rma_info.rma_lock);
+		goto error;
+	}
+
+	/*
+	 * Prepare copy_work for submitting work to the DMA kernel thread
+	 * or CPU copy routine.
+	 */
+	copy_work.len = len;
+	copy_work.loopback = loopback;
+	copy_work.remote_dev = ep->remote_dev;
+	if (dir == SCIF_LOCAL_TO_REMOTE) {
+		copy_work.src_offset = loffset;
+		copy_work.src_window = local_window;
+		copy_work.dst_offset = roffset;
+		copy_work.dst_window = remote_window;
+	} else {
+		copy_work.src_offset = roffset;
+		copy_work.src_window = remote_window;
+		copy_work.dst_offset = loffset;
+		copy_work.dst_window = local_window;
+	}
+
+	if (flags & SCIF_RMA_USECPU) {
+		scif_rma_list_cpu_copy(&copy_work);
+	} else {
+		chan = ep->rma_info.dma_chan;
+		err = scif_rma_list_dma_copy_wrapper(epd, &copy_work,
+						     chan, loffset);
+	}
+	if (addr && !cache)
+		atomic_inc(&ep->rma_info.tw_refcount);
+
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	if (last_chunk) {
+		struct scif_dev *rdev = ep->remote_dev;
+
+		if (copy_work.fence_type == SCIF_DMA_POLL)
+			err = scif_drain_dma_poll(rdev->sdev,
+						  ep->rma_info.dma_chan);
+		else if (copy_work.fence_type == SCIF_DMA_INTR)
+			err = scif_drain_dma_intr(rdev->sdev,
+						  ep->rma_info.dma_chan);
+	}
+
+	if (addr && !cache)
+		scif_queue_for_cleanup(local_window, &scif_info.rma);
+	scif_put_peer_dev(spdev);
+	return err;
+error:
+	if (err) {
+		if (addr && local_window && !cache)
+			scif_destroy_window(ep, local_window);
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d len 0x%lx\n",
+			__func__, __LINE__, err, len);
+	}
+	scif_put_peer_dev(spdev);
+	return err;
+}
+
+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
+		  off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n",
+		epd, loffset, len, roffset, flags);
+	if (scif_unaligned(loffset, roffset)) {
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, loffset, 0x0,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_REMOTE_TO_LOCAL, false);
+			if (err)
+				goto readfrom_err;
+			loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, loffset, 0x0, len,
+			    roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+readfrom_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_readfrom);
+
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
+		 off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, loffset, len, roffset, flags);
+	if (scif_unaligned(loffset, roffset)) {
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, loffset, 0x0,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_LOCAL_TO_REMOTE, false);
+			if (err)
+				goto writeto_err;
+			loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, loffset, 0x0, len,
+			    roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+writeto_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_writeto);
+
+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len,
+		   off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, addr, len, roffset, flags);
+	if (scif_unaligned((off_t __force)addr, roffset)) {
+		if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+			flags &= ~SCIF_RMA_USECACHE;
+
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, 0, (u64)addr,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_REMOTE_TO_LOCAL, false);
+			if (err)
+				goto vreadfrom_err;
+			addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, 0, (u64)addr, len,
+			    roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+vreadfrom_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_vreadfrom);
+
+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len,
+		  off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, addr, len, roffset, flags);
+	if (scif_unaligned((off_t __force)addr, roffset)) {
+		if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+			flags &= ~SCIF_RMA_USECACHE;
+
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, 0, (u64)addr,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_LOCAL_TO_REMOTE, false);
+			if (err)
+				goto vwriteto_err;
+			addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, 0, (u64)addr, len,
+			    roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+vwriteto_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_vwriteto);
diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c
new file mode 100644
index 000000000..00e5d6d66
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_epd.c
@@ -0,0 +1,357 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+void scif_cleanup_ep_qp(struct scif_endpt *ep)
+{
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (qp->outbound_q.rb_base) {
+		scif_iounmap((void *)qp->outbound_q.rb_base,
+			     qp->outbound_q.size, ep->remote_dev);
+		qp->outbound_q.rb_base = NULL;
+	}
+	if (qp->remote_qp) {
+		scif_iounmap((void *)qp->remote_qp,
+			     sizeof(struct scif_qp), ep->remote_dev);
+		qp->remote_qp = NULL;
+	}
+	if (qp->local_qp) {
+		scif_unmap_single(qp->local_qp, ep->remote_dev,
+				  sizeof(struct scif_qp));
+		qp->local_qp = 0x0;
+	}
+	if (qp->local_buf) {
+		scif_unmap_single(qp->local_buf, ep->remote_dev,
+				  SCIF_ENDPT_QP_SIZE);
+		qp->local_buf = 0;
+	}
+}
+
+void scif_teardown_ep(void *endpt)
+{
+	struct scif_endpt *ep = endpt;
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (qp) {
+		spin_lock(&ep->lock);
+		scif_cleanup_ep_qp(ep);
+		spin_unlock(&ep->lock);
+		kfree(qp->inbound_q.rb_base);
+		kfree(qp);
+	}
+}
+
+/*
+ * Enqueue the endpoint to the zombie list for cleanup.
+ * The endpoint should not be accessed once this API returns.
+ */
+void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held)
+{
+	if (!eplock_held)
+		mutex_lock(&scif_info.eplock);
+	spin_lock(&ep->lock);
+	ep->state = SCIFEP_ZOMBIE;
+	spin_unlock(&ep->lock);
+	list_add_tail(&ep->list, &scif_info.zombie);
+	scif_info.nr_zombies++;
+	if (!eplock_held)
+		mutex_unlock(&scif_info.eplock);
+	schedule_work(&scif_info.misc_work);
+}
+
+static struct scif_endpt *scif_find_listen_ep(u16 port)
+{
+	struct scif_endpt *ep = NULL;
+	struct list_head *pos, *tmpq;
+
+	mutex_lock(&scif_info.eplock);
+	list_for_each_safe(pos, tmpq, &scif_info.listen) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (ep->port.port == port) {
+			mutex_unlock(&scif_info.eplock);
+			return ep;
+		}
+	}
+	mutex_unlock(&scif_info.eplock);
+	return NULL;
+}
+
+void scif_cleanup_zombie_epd(void)
+{
+	struct list_head *pos, *tmpq;
+	struct scif_endpt *ep;
+
+	mutex_lock(&scif_info.eplock);
+	list_for_each_safe(pos, tmpq, &scif_info.zombie) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (scif_rma_ep_can_uninit(ep)) {
+			list_del(pos);
+			scif_info.nr_zombies--;
+			put_iova_domain(&ep->rma_info.iovad);
+			kfree(ep);
+		}
+	}
+	mutex_unlock(&scif_info.eplock);
+}
+
+/**
+ * scif_cnctreq() - Respond to SCIF_CNCT_REQ interrupt message
+ * @msg:        Interrupt message
+ *
+ * This message is initiated by the remote node to request a connection
+ * to the local node.  This function looks for an end point in the
+ * listen state on the requested port id.
+ *
+ * If it finds a listening port it places the connect request on the
+ * listening end points queue and wakes up any pending accept calls.
+ *
+ * If it does not find a listening end point it sends a connection
+ * reject message to the remote node.
+ */
+void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = NULL;
+	struct scif_conreq *conreq;
+
+	conreq = kmalloc(sizeof(*conreq), GFP_KERNEL);
+	if (!conreq)
+		/* Lack of resources so reject the request. */
+		goto conreq_sendrej;
+
+	ep = scif_find_listen_ep(msg->dst.port);
+	if (!ep)
+		/*  Send reject due to no listening ports */
+		goto conreq_sendrej_free;
+	else
+		spin_lock(&ep->lock);
+
+	if (ep->backlog <= ep->conreqcnt) {
+		/*  Send reject due to too many pending requests */
+		spin_unlock(&ep->lock);
+		goto conreq_sendrej_free;
+	}
+
+	conreq->msg = *msg;
+	list_add_tail(&conreq->list, &ep->conlist);
+	ep->conreqcnt++;
+	wake_up_interruptible(&ep->conwq);
+	spin_unlock(&ep->lock);
+	return;
+
+conreq_sendrej_free:
+	kfree(conreq);
+conreq_sendrej:
+	msg->uop = SCIF_CNCT_REJ;
+	scif_nodeqp_send(&scif_dev[msg->src.node], msg);
+}
+
+/**
+ * scif_cnctgnt() - Respond to SCIF_CNCT_GNT interrupt message
+ * @msg:        Interrupt message
+ *
+ * An accept() on the remote node has occurred and sent this message
+ * to indicate success.  Place the end point in the MAPPING state and
+ * save the remote nodes memory information.  Then wake up the connect
+ * request so it can finish.
+ */
+void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTING == ep->state) {
+		ep->peer.node = msg->src.node;
+		ep->peer.port = msg->src.port;
+		ep->qp_info.gnt_pld = msg->payload[1];
+		ep->remote_ep = msg->payload[2];
+		ep->state = SCIFEP_MAPPING;
+
+		wake_up(&ep->conwq);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_cnctgnt_ack() - Respond to SCIF_CNCT_GNTACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote connection request has finished mapping the local memory.
+ * Place the connection in the connected state and wake up the pending
+ * accept() call.
+ */
+void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	mutex_lock(&scif_info.connlock);
+	spin_lock(&ep->lock);
+	/* New ep is now connected with all resources set. */
+	ep->state = SCIFEP_CONNECTED;
+	list_add_tail(&ep->list, &scif_info.connected);
+	wake_up(&ep->conwq);
+	spin_unlock(&ep->lock);
+	mutex_unlock(&scif_info.connlock);
+}
+
+/**
+ * scif_cnctgnt_nack() - Respond to SCIF_CNCT_GNTNACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote connection request failed to map the local memory it was sent.
+ * Place the end point in the CLOSING state to indicate it and wake up
+ * the pending accept();
+ */
+void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	ep->state = SCIFEP_CLOSING;
+	wake_up(&ep->conwq);
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_cnctrej() - Respond to SCIF_CNCT_REJ interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote end has rejected the connection request.  Set the end
+ * point back to the bound state and wake up the pending connect().
+ */
+void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTING == ep->state) {
+		ep->state = SCIFEP_BOUND;
+		wake_up(&ep->conwq);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_discnct() - Respond to SCIF_DISCNCT interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote node has indicated close() has been called on its end
+ * point.  Remove the local end point from the connected list, set its
+ * state to disconnected and ensure accesses to the remote node are
+ * shutdown.
+ *
+ * When all accesses to the remote end have completed then send a
+ * DISCNT_ACK to indicate it can remove its resources and complete
+ * the close routine.
+ */
+void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = NULL;
+	struct scif_endpt *tmpep;
+	struct list_head *pos, *tmpq;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each_safe(pos, tmpq, &scif_info.connected) {
+		tmpep = list_entry(pos, struct scif_endpt, list);
+		/*
+		 * The local ep may have sent a disconnect and and been closed
+		 * due to a message response time out. It may have been
+		 * allocated again and formed a new connection so we want to
+		 * check if the remote ep matches
+		 */
+		if (((u64)tmpep == msg->payload[1]) &&
+		    ((u64)tmpep->remote_ep == msg->payload[0])) {
+			list_del(pos);
+			ep = tmpep;
+			spin_lock(&ep->lock);
+			break;
+		}
+	}
+
+	/*
+	 * If the terminated end is not found then this side started closing
+	 * before the other side sent the disconnect.  If so the ep will no
+	 * longer be on the connected list.  Regardless the other side
+	 * needs to be acked to let it know close is complete.
+	 */
+	if (!ep) {
+		mutex_unlock(&scif_info.connlock);
+		goto discnct_ack;
+	}
+
+	ep->state = SCIFEP_DISCONNECTED;
+	list_add_tail(&ep->list, &scif_info.disconnected);
+
+	wake_up_interruptible(&ep->sendwq);
+	wake_up_interruptible(&ep->recvwq);
+	spin_unlock(&ep->lock);
+	mutex_unlock(&scif_info.connlock);
+
+discnct_ack:
+	msg->uop = SCIF_DISCNT_ACK;
+	scif_nodeqp_send(&scif_dev[msg->src.node], msg);
+}
+
+/**
+ * scif_discnct_ack() - Respond to SCIF_DISCNT_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side has indicated it has not more references to local resources
+ */
+void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	ep->state = SCIFEP_DISCONNECTED;
+	spin_unlock(&ep->lock);
+	complete(&ep->discon);
+}
+
+/**
+ * scif_clientsend() - Respond to SCIF_CLIENT_SEND interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is confirming send or receive interrupt handling is complete.
+ */
+void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTED == ep->state)
+		wake_up_interruptible(&ep->recvwq);
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_clientrcvd() - Respond to SCIF_CLIENT_RCVD interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is confirming send or receive interrupt handling is complete.
+ */
+void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTED == ep->state)
+		wake_up_interruptible(&ep->sendwq);
+	spin_unlock(&ep->lock);
+}
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
new file mode 100644
index 000000000..f39b663da
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_epd.h
@@ -0,0 +1,210 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_EPD_H
+#define SCIF_EPD_H
+
+#include <linux/delay.h>
+#include <linux/scif.h>
+#include <linux/scif_ioctl.h>
+
+#define SCIF_EPLOCK_HELD true
+
+enum scif_epd_state {
+	SCIFEP_UNBOUND,
+	SCIFEP_BOUND,
+	SCIFEP_LISTENING,
+	SCIFEP_CONNECTED,
+	SCIFEP_CONNECTING,
+	SCIFEP_MAPPING,
+	SCIFEP_CLOSING,
+	SCIFEP_CLLISTEN,
+	SCIFEP_DISCONNECTED,
+	SCIFEP_ZOMBIE
+};
+
+/*
+ * struct scif_conreq - Data structure added to the connection list.
+ *
+ * @msg: connection request message received
+ * @list: link to list of connection requests
+ */
+struct scif_conreq {
+	struct scifmsg msg;
+	struct list_head list;
+};
+
+/* Size of the RB for the Endpoint QP */
+#define SCIF_ENDPT_QP_SIZE 0x1000
+
+/*
+ * scif_endpt_qp_info - SCIF endpoint queue pair
+ *
+ * @qp - Qpair for this endpoint
+ * @qp_offset - DMA address of the QP
+ * @gnt_pld - Payload in a SCIF_CNCT_GNT message containing the
+ * physical address of the remote_qp.
+ */
+struct scif_endpt_qp_info {
+	struct scif_qp *qp;
+	dma_addr_t qp_offset;
+	dma_addr_t gnt_pld;
+};
+
+/*
+ * struct scif_endpt - The SCIF endpoint data structure
+ *
+ * @state: end point state
+ * @lock: lock synchronizing access to endpoint fields like state etc
+ * @port: self port information
+ * @peer: peer port information
+ * @backlog: maximum pending connection requests
+ * @qp_info: Endpoint QP information for SCIF messaging
+ * @remote_dev: scifdev used by this endpt to communicate with remote node.
+ * @remote_ep: remote endpoint
+ * @conreqcnt: Keep track of number of connection requests.
+ * @files: Open file information used to match the id passed in with
+ *         the flush routine.
+ * @conlist: list of connection requests
+ * @conwq: waitqueue for connection processing
+ * @discon: completion used during disconnection
+ * @sendwq: waitqueue used during sending messages
+ * @recvwq: waitqueue used during message receipt
+ * @sendlock: Synchronize ordering of messages sent
+ * @recvlock: Synchronize ordering of messages received
+ * @list: link to list of various endpoints like connected, listening etc
+ * @li_accept: pending ACCEPTREG
+ * @acceptcnt: pending ACCEPTREG cnt
+ * @liacceptlist: link to listen accept
+ * @miacceptlist: link to uaccept
+ * @listenep: associated listen ep
+ * @conn_work: Non blocking connect work
+ * @conn_port: Connection port
+ * @conn_err: Errors during connection
+ * @conn_async_state: Async connection
+ * @conn_pend_wq: Used by poll while waiting for incoming connections
+ * @conn_list: List of async connection requests
+ * @rma_info: Information for triggering SCIF RMA and DMA operations
+ * @mmu_list: link to list of MMU notifier cleanup work
+ * @anon: anonymous file for use in kernel mode scif poll
+ */
+struct scif_endpt {
+	enum scif_epd_state state;
+	spinlock_t lock;
+	struct scif_port_id port;
+	struct scif_port_id peer;
+	int backlog;
+	struct scif_endpt_qp_info qp_info;
+	struct scif_dev *remote_dev;
+	u64 remote_ep;
+	int conreqcnt;
+	struct files_struct *files;
+	struct list_head conlist;
+	wait_queue_head_t conwq;
+	struct completion discon;
+	wait_queue_head_t sendwq;
+	wait_queue_head_t recvwq;
+	struct mutex sendlock;
+	struct mutex recvlock;
+	struct list_head list;
+	struct list_head li_accept;
+	int acceptcnt;
+	struct list_head liacceptlist;
+	struct list_head miacceptlist;
+	struct scif_endpt *listenep;
+	struct scif_port_id conn_port;
+	int conn_err;
+	int conn_async_state;
+	wait_queue_head_t conn_pend_wq;
+	struct list_head conn_list;
+	struct scif_endpt_rma_info rma_info;
+	struct list_head mmu_list;
+	struct file *anon;
+};
+
+static inline int scifdev_alive(struct scif_endpt *ep)
+{
+	return _scifdev_alive(ep->remote_dev);
+}
+
+/*
+ * scif_verify_epd:
+ * ep: SCIF endpoint
+ *
+ * Checks several generic error conditions and returns the
+ * appropriate error.
+ */
+static inline int scif_verify_epd(struct scif_endpt *ep)
+{
+	if (ep->state == SCIFEP_DISCONNECTED)
+		return -ECONNRESET;
+
+	if (ep->state != SCIFEP_CONNECTED)
+		return -ENOTCONN;
+
+	if (!scifdev_alive(ep))
+		return -ENODEV;
+
+	return 0;
+}
+
+static inline int scif_anon_inode_getfile(scif_epd_t epd)
+{
+	epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
+	if (IS_ERR(epd->anon))
+		return PTR_ERR(epd->anon);
+	return 0;
+}
+
+static inline void scif_anon_inode_fput(scif_epd_t epd)
+{
+	if (epd->anon) {
+		fput(epd->anon);
+		epd->anon = NULL;
+	}
+}
+
+void scif_cleanup_zombie_epd(void);
+void scif_teardown_ep(void *endpt);
+void scif_cleanup_ep_qp(struct scif_endpt *ep);
+void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held);
+void scif_get_node_info(void);
+void scif_send_acks(struct scif_dev *dev);
+void scif_conn_handler(struct work_struct *work);
+int scif_rsrv_port(u16 port);
+void scif_get_port(u16 port);
+int scif_get_new_port(void);
+void scif_put_port(u16 port);
+int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags);
+int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags);
+void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
+int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
+int __scif_flush(scif_epd_t epd);
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
+__poll_t __scif_pollfd(struct file *f, poll_table *wait,
+			   struct scif_endpt *ep);
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+		     int map_flags, scif_pinned_pages_t *pages);
+#endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
new file mode 100644
index 000000000..5c2a57ae4
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_fd.c
@@ -0,0 +1,471 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+static int scif_fdopen(struct inode *inode, struct file *f)
+{
+	struct scif_endpt *priv = scif_open();
+
+	if (!priv)
+		return -ENOMEM;
+	f->private_data = priv;
+	return 0;
+}
+
+static int scif_fdclose(struct inode *inode, struct file *f)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return scif_close(priv);
+}
+
+static int scif_fdmmap(struct file *f, struct vm_area_struct *vma)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return scif_mmap(vma, priv);
+}
+
+static __poll_t scif_fdpoll(struct file *f, poll_table *wait)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return __scif_pollfd(f, wait, priv);
+}
+
+static int scif_fdflush(struct file *f, fl_owner_t id)
+{
+	struct scif_endpt *ep = f->private_data;
+
+	spin_lock(&ep->lock);
+	/*
+	 * The listening endpoint stashes the open file information before
+	 * waiting for incoming connections. The release callback would never be
+	 * called if the application closed the endpoint, while waiting for
+	 * incoming connections from a separate thread since the file descriptor
+	 * reference count is bumped up in the accept IOCTL. Call the flush
+	 * routine if the id matches the endpoint open file information so that
+	 * the listening endpoint can be woken up and the fd released.
+	 */
+	if (ep->files == id)
+		__scif_flush(ep);
+	spin_unlock(&ep->lock);
+	return 0;
+}
+
+static __always_inline void scif_err_debug(int err, const char *str)
+{
+	/*
+	 * ENOTCONN is a common uninteresting error which is
+	 * flooding debug messages to the console unnecessarily.
+	 */
+	if (err < 0 && err != -ENOTCONN)
+		dev_dbg(scif_info.mdev.this_device, "%s err %d\n", str, err);
+}
+
+static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	struct scif_endpt *priv = f->private_data;
+	void __user *argp = (void __user *)arg;
+	int err = 0;
+	struct scifioctl_msg request;
+	bool non_block = false;
+
+	non_block = !!(f->f_flags & O_NONBLOCK);
+
+	switch (cmd) {
+	case SCIF_BIND:
+	{
+		int pn;
+
+		if (copy_from_user(&pn, argp, sizeof(pn)))
+			return -EFAULT;
+
+		pn = scif_bind(priv, pn);
+		if (pn < 0)
+			return pn;
+
+		if (copy_to_user(argp, &pn, sizeof(pn)))
+			return -EFAULT;
+
+		return 0;
+	}
+	case SCIF_LISTEN:
+		return scif_listen(priv, arg);
+	case SCIF_CONNECT:
+	{
+		struct scifioctl_connect req;
+		struct scif_endpt *ep = (struct scif_endpt *)priv;
+
+		if (copy_from_user(&req, argp, sizeof(req)))
+			return -EFAULT;
+
+		err = __scif_connect(priv, &req.peer, non_block);
+		if (err < 0)
+			return err;
+
+		req.self.node = ep->port.node;
+		req.self.port = ep->port.port;
+
+		if (copy_to_user(argp, &req, sizeof(req)))
+			return -EFAULT;
+
+		return 0;
+	}
+	/*
+	 * Accept is done in two halves.  The request ioctl does the basic
+	 * functionality of accepting the request and returning the information
+	 * about it including the internal ID of the end point.  The register
+	 * is done with the internal ID on a new file descriptor opened by the
+	 * requesting process.
+	 */
+	case SCIF_ACCEPTREQ:
+	{
+		struct scifioctl_accept request;
+		scif_epd_t *ep = (scif_epd_t *)&request.endpt;
+
+		if (copy_from_user(&request, argp, sizeof(request)))
+			return -EFAULT;
+
+		err = scif_accept(priv, &request.peer, ep, request.flags);
+		if (err < 0)
+			return err;
+
+		if (copy_to_user(argp, &request, sizeof(request))) {
+			scif_close(*ep);
+			return -EFAULT;
+		}
+		/*
+		 * Add to the list of user mode eps where the second half
+		 * of the accept is not yet completed.
+		 */
+		mutex_lock(&scif_info.eplock);
+		list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept);
+		list_add_tail(&((*ep)->liacceptlist), &priv->li_accept);
+		(*ep)->listenep = priv;
+		priv->acceptcnt++;
+		mutex_unlock(&scif_info.eplock);
+
+		return 0;
+	}
+	case SCIF_ACCEPTREG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scif_endpt *newep;
+		struct scif_endpt *lisep;
+		struct scif_endpt *fep = NULL;
+		struct scif_endpt *tmpep;
+		struct list_head *pos, *tmpq;
+
+		/* Finally replace the pointer to the accepted endpoint */
+		if (copy_from_user(&newep, argp, sizeof(void *)))
+			return -EFAULT;
+
+		/* Remove form the user accept queue */
+		mutex_lock(&scif_info.eplock);
+		list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
+			tmpep = list_entry(pos,
+					   struct scif_endpt, miacceptlist);
+			if (tmpep == newep) {
+				list_del(pos);
+				fep = tmpep;
+				break;
+			}
+		}
+
+		if (!fep) {
+			mutex_unlock(&scif_info.eplock);
+			return -ENOENT;
+		}
+
+		lisep = newep->listenep;
+		list_for_each_safe(pos, tmpq, &lisep->li_accept) {
+			tmpep = list_entry(pos,
+					   struct scif_endpt, liacceptlist);
+			if (tmpep == newep) {
+				list_del(pos);
+				lisep->acceptcnt--;
+				break;
+			}
+		}
+
+		mutex_unlock(&scif_info.eplock);
+
+		/* Free the resources automatically created from the open. */
+		scif_anon_inode_fput(priv);
+		scif_teardown_ep(priv);
+		scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
+		f->private_data = newep;
+		return 0;
+	}
+	case SCIF_SEND:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		if (copy_from_user(&request, argp,
+				   sizeof(struct scifioctl_msg))) {
+			err = -EFAULT;
+			goto send_err;
+		}
+		err = scif_user_send(priv, (void __user *)request.msg,
+				     request.len, request.flags);
+		if (err < 0)
+			goto send_err;
+		if (copy_to_user(&
+				 ((struct scifioctl_msg __user *)argp)->out_len,
+				 &err, sizeof(err))) {
+			err = -EFAULT;
+			goto send_err;
+		}
+		err = 0;
+send_err:
+		scif_err_debug(err, "scif_send");
+		return err;
+	}
+	case SCIF_RECV:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		if (copy_from_user(&request, argp,
+				   sizeof(struct scifioctl_msg))) {
+			err = -EFAULT;
+			goto recv_err;
+		}
+
+		err = scif_user_recv(priv, (void __user *)request.msg,
+				     request.len, request.flags);
+		if (err < 0)
+			goto recv_err;
+
+		if (copy_to_user(&
+				 ((struct scifioctl_msg __user *)argp)->out_len,
+			&err, sizeof(err))) {
+			err = -EFAULT;
+			goto recv_err;
+		}
+		err = 0;
+recv_err:
+		scif_err_debug(err, "scif_recv");
+		return err;
+	}
+	case SCIF_GET_NODEIDS:
+	{
+		struct scifioctl_node_ids node_ids;
+		int entries;
+		u16 *nodes;
+		void __user *unodes, *uself;
+		u16 self;
+
+		if (copy_from_user(&node_ids, argp, sizeof(node_ids))) {
+			err = -EFAULT;
+			goto getnodes_err2;
+		}
+
+		entries = min_t(int, scif_info.maxid, node_ids.len);
+		nodes = kmalloc_array(entries, sizeof(u16), GFP_KERNEL);
+		if (entries && !nodes) {
+			err = -ENOMEM;
+			goto getnodes_err2;
+		}
+		node_ids.len = scif_get_node_ids(nodes, entries, &self);
+
+		unodes = (void __user *)node_ids.nodes;
+		if (copy_to_user(unodes, nodes, sizeof(u16) * entries)) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+
+		uself = (void __user *)node_ids.self;
+		if (copy_to_user(uself, &self, sizeof(u16))) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+
+		if (copy_to_user(argp, &node_ids, sizeof(node_ids))) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+getnodes_err1:
+		kfree(nodes);
+getnodes_err2:
+		return err;
+	}
+	case SCIF_REG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_reg reg;
+		off_t ret;
+
+		if (copy_from_user(&reg, argp, sizeof(reg))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		if (reg.flags & SCIF_MAP_KERNEL) {
+			err = -EINVAL;
+			goto reg_err;
+		}
+		ret = scif_register(priv, (void *)reg.addr, reg.len,
+				    reg.offset, reg.prot, reg.flags);
+		if (ret < 0) {
+			err = (int)ret;
+			goto reg_err;
+		}
+
+		if (copy_to_user(&((struct scifioctl_reg __user *)argp)
+				 ->out_offset, &ret, sizeof(reg.out_offset))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		err = 0;
+reg_err:
+		scif_err_debug(err, "scif_register");
+		return err;
+	}
+	case SCIF_UNREG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_unreg unreg;
+
+		if (copy_from_user(&unreg, argp, sizeof(unreg))) {
+			err = -EFAULT;
+			goto unreg_err;
+		}
+		err = scif_unregister(priv, unreg.offset, unreg.len);
+unreg_err:
+		scif_err_debug(err, "scif_unregister");
+		return err;
+	}
+	case SCIF_READFROM:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto readfrom_err;
+		}
+		err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset,
+				    copy.flags);
+readfrom_err:
+		scif_err_debug(err, "scif_readfrom");
+		return err;
+	}
+	case SCIF_WRITETO:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto writeto_err;
+		}
+		err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset,
+				   copy.flags);
+writeto_err:
+		scif_err_debug(err, "scif_writeto");
+		return err;
+	}
+	case SCIF_VREADFROM:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vreadfrom_err;
+		}
+		err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len,
+				     copy.roffset, copy.flags);
+vreadfrom_err:
+		scif_err_debug(err, "scif_vreadfrom");
+		return err;
+	}
+	case SCIF_VWRITETO:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vwriteto_err;
+		}
+		err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len,
+				    copy.roffset, copy.flags);
+vwriteto_err:
+		scif_err_debug(err, "scif_vwriteto");
+		return err;
+	}
+	case SCIF_FENCE_MARK:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_fence_mark mark;
+		int tmp_mark = 0;
+
+		if (copy_from_user(&mark, argp, sizeof(mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+		err = scif_fence_mark(priv, mark.flags, &tmp_mark);
+		if (err)
+			goto fence_mark_err;
+		if (copy_to_user((void __user *)mark.mark, &tmp_mark,
+				 sizeof(tmp_mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+fence_mark_err:
+		scif_err_debug(err, "scif_fence_mark");
+		return err;
+	}
+	case SCIF_FENCE_WAIT:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		err = scif_fence_wait(priv, arg);
+		scif_err_debug(err, "scif_fence_wait");
+		return err;
+	}
+	case SCIF_FENCE_SIGNAL:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_fence_signal signal;
+
+		if (copy_from_user(&signal, argp, sizeof(signal))) {
+			err = -EFAULT;
+			goto fence_signal_err;
+		}
+
+		err = scif_fence_signal(priv, signal.loff, signal.lval,
+					signal.roff, signal.rval, signal.flags);
+fence_signal_err:
+		scif_err_debug(err, "scif_fence_signal");
+		return err;
+	}
+	}
+	return -EINVAL;
+}
+
+const struct file_operations scif_fops = {
+	.open = scif_fdopen,
+	.release = scif_fdclose,
+	.unlocked_ioctl = scif_fdioctl,
+	.mmap = scif_fdmmap,
+	.poll = scif_fdpoll,
+	.flush = scif_fdflush,
+	.owner = THIS_MODULE,
+};
diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c
new file mode 100644
index 000000000..7bb929f05
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_fence.c
@@ -0,0 +1,772 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+
+#include "scif_main.h"
+
+/**
+ * scif_recv_mark: Handle SCIF_MARK request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a mark.
+ */
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int mark = 0;
+	int err;
+
+	err = _scif_fence_mark(ep, &mark);
+	if (err)
+		msg->uop = SCIF_MARK_NACK;
+	else
+		msg->uop = SCIF_MARK_ACK;
+	msg->payload[0] = ep->remote_ep;
+	msg->payload[2] = mark;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a SCIF_MARK message.
+ */
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_MARK_ACK) {
+		fence_req->state = OP_COMPLETED;
+		fence_req->dma_mark = (int)msg->payload[2];
+	} else {
+		fence_req->state = OP_FAILED;
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_wait: Handle SCIF_WAIT request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested waiting on a fence.
+ */
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_remote_fence_info *fence;
+
+	/*
+	 * Allocate structure for remote fence information and
+	 * send a NACK if the allocation failed. The peer will
+	 * return ENOMEM upon receiving a NACK.
+	 */
+	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence) {
+		msg->payload[0] = ep->remote_ep;
+		msg->uop = SCIF_WAIT_NACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+		return;
+	}
+
+	/* Prepare the fence request */
+	memcpy(&fence->msg, msg, sizeof(struct scifmsg));
+	INIT_LIST_HEAD(&fence->list);
+
+	/* Insert to the global remote fence request list */
+	mutex_lock(&scif_info.fencelock);
+	atomic_inc(&ep->rma_info.fence_refcount);
+	list_add_tail(&fence->list, &scif_info.fence);
+	mutex_unlock(&scif_info.fencelock);
+
+	schedule_work(&scif_info.misc_work);
+}
+
+/**
+ * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a SCIF_WAIT message.
+ */
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_WAIT_ACK)
+		fence_req->state = OP_COMPLETED;
+	else
+		fence_req->state = OP_FAILED;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a signal on a local offset.
+ */
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int err;
+
+	err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+			       SCIF_WINDOW_SELF);
+	if (err)
+		msg->uop = SCIF_SIG_NACK;
+	else
+		msg->uop = SCIF_SIG_ACK;
+	msg->payload[0] = ep->remote_ep;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a signal on a remote offset.
+ */
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int err;
+
+	err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+			       SCIF_WINDOW_PEER);
+	if (err)
+		msg->uop = SCIF_SIG_NACK;
+	else
+		msg->uop = SCIF_SIG_ACK;
+	msg->payload[0] = ep->remote_ep;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a signal request.
+ */
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[3];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_SIG_ACK)
+		fence_req->state = OP_COMPLETED;
+	else
+		fence_req->state = OP_FAILED;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+static inline void *scif_get_local_va(off_t off, struct scif_window *window)
+{
+	struct page **pages = window->pinned_pages->pages;
+	int page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+
+	return page_address(pages[page_nr]) + page_off;
+}
+
+static void scif_prog_signal_cb(void *arg)
+{
+	struct scif_status *status = arg;
+
+	dma_pool_free(status->ep->remote_dev->signal_pool, status,
+		      status->src_dma_addr);
+}
+
+static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct dma_chan *chan = ep->rma_info.dma_chan;
+	struct dma_device *ddev = chan->device;
+	bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1);
+	struct dma_async_tx_descriptor *tx;
+	struct scif_status *status = NULL;
+	dma_addr_t src;
+	dma_cookie_t cookie;
+	int err;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto alloc_fail;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto alloc_fail;
+	}
+	dma_async_issue_pending(chan);
+	if (x100) {
+		/*
+		 * For X100 use the status descriptor to write the value to
+		 * the destination.
+		 */
+		tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0);
+	} else {
+		status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL,
+					&src);
+		if (!status) {
+			err = -ENOMEM;
+			dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto alloc_fail;
+		}
+		status->val = val;
+		status->src_dma_addr = src;
+		status->ep = ep;
+		src += offsetof(struct scif_status, val);
+		tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val),
+						  DMA_PREP_INTERRUPT);
+	}
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto dma_fail;
+	}
+	if (!x100) {
+		tx->callback = scif_prog_signal_cb;
+		tx->callback_param = status;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = -EIO;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto dma_fail;
+	}
+	dma_async_issue_pending(chan);
+	return 0;
+dma_fail:
+	if (!x100)
+		dma_pool_free(ep->remote_dev->signal_pool, status,
+			      src - offsetof(struct scif_status, val));
+alloc_fail:
+	return err;
+}
+
+/*
+ * scif_prog_signal:
+ * @epd - Endpoint Descriptor
+ * @offset - registered address to write @val to
+ * @val - Value to be written at @offset
+ * @type - Type of the window.
+ *
+ * Arrange to write a value to the registered offset after ensuring that the
+ * offset provided is indeed valid.
+ */
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+		     enum scif_window_type type)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_window *window = NULL;
+	struct scif_rma_req req;
+	dma_addr_t dst_dma_addr;
+	int err;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	req.out_window = &window;
+	req.offset = offset;
+	req.nr_bytes = sizeof(u64);
+	req.prot = SCIF_PROT_WRITE;
+	req.type = SCIF_WINDOW_SINGLE;
+	if (type == SCIF_WINDOW_SELF)
+		req.head = &ep->rma_info.reg_list;
+	else
+		req.head = &ep->rma_info.remote_reg_list;
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto unlock_ret;
+	}
+
+	if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) {
+		u64 *dst_virt;
+
+		if (type == SCIF_WINDOW_SELF)
+			dst_virt = scif_get_local_va(offset, window);
+		else
+			dst_virt =
+			scif_get_local_va(offset, (struct scif_window *)
+					  window->peer_window);
+		*dst_virt = val;
+	} else {
+		dst_dma_addr = __scif_off_to_dma_addr(window, offset);
+		err = _scif_prog_signal(epd, dst_dma_addr, val);
+	}
+unlock_ret:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+static int _scif_fence_wait(scif_epd_t epd, int mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE;
+	int err;
+
+	/* Wait for DMA callback in scif_fence_mark_cb(..) */
+	err = wait_event_interruptible_timeout(ep->rma_info.markwq,
+					       dma_async_is_tx_complete(
+					       ep->rma_info.dma_chan,
+					       cookie, NULL, NULL) ==
+					       DMA_COMPLETE,
+					       SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err)
+		err = -ETIMEDOUT;
+	else if (err > 0)
+		err = 0;
+	return err;
+}
+
+/**
+ * scif_rma_handle_remote_fences:
+ *
+ * This routine services remote fence requests.
+ */
+void scif_rma_handle_remote_fences(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_remote_fence_info *fence;
+	struct scif_endpt *ep;
+	int mark, err;
+
+	might_sleep();
+	mutex_lock(&scif_info.fencelock);
+	list_for_each_safe(item, tmp, &scif_info.fence) {
+		fence = list_entry(item, struct scif_remote_fence_info,
+				   list);
+		/* Remove fence from global list */
+		list_del(&fence->list);
+
+		/* Initiate the fence operation */
+		ep = (struct scif_endpt *)fence->msg.payload[0];
+		mark = fence->msg.payload[2];
+		err = _scif_fence_wait(ep, mark);
+		if (err)
+			fence->msg.uop = SCIF_WAIT_NACK;
+		else
+			fence->msg.uop = SCIF_WAIT_ACK;
+		fence->msg.payload[0] = ep->remote_ep;
+		scif_nodeqp_send(ep->remote_dev, &fence->msg);
+		kfree(fence);
+		if (!atomic_sub_return(1, &ep->rma_info.fence_refcount))
+			schedule_work(&scif_info.misc_work);
+	}
+	mutex_unlock(&scif_info.fencelock);
+}
+
+static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark)
+{
+	int err;
+	struct scifmsg msg;
+	struct scif_fence_info *fence_req;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+	if (!fence_req) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_completion(&fence_req->comp);
+
+	msg.src = ep->port;
+	msg.uop = uop;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = (u64)fence_req;
+	if (uop == SCIF_WAIT)
+		msg.payload[2] = mark;
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED)
+		err = scif_nodeqp_send(ep->remote_dev, &msg);
+	else
+		err = -ENOTCONN;
+	spin_unlock(&ep->lock);
+	if (err)
+		goto error_free;
+retry:
+	/* Wait for a SCIF_WAIT_(N)ACK message */
+	err = wait_for_completion_timeout(&fence_req->comp,
+					  SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+	if (!err)
+		err = -ENODEV;
+	if (err > 0)
+		err = 0;
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (err < 0) {
+		if (fence_req->state == OP_IN_PROGRESS)
+			fence_req->state = OP_FAILED;
+	}
+	if (fence_req->state == OP_FAILED && !err)
+		err = -ENOMEM;
+	if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED)
+		*out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
+	mutex_unlock(&ep->rma_info.rma_lock);
+error_free:
+	kfree(fence_req);
+error:
+	return err;
+}
+
+/**
+ * scif_send_fence_mark:
+ * @epd: end point descriptor.
+ * @out_mark: Output DMA mark reported by peer.
+ *
+ * Send a remote fence mark request.
+ */
+static int scif_send_fence_mark(scif_epd_t epd, int *out_mark)
+{
+	return _scif_send_fence(epd, SCIF_MARK, 0, out_mark);
+}
+
+/**
+ * scif_send_fence_wait:
+ * @epd: end point descriptor.
+ * @mark: DMA mark to wait for.
+ *
+ * Send a remote fence wait request.
+ */
+static int scif_send_fence_wait(scif_epd_t epd, int mark)
+{
+	return _scif_send_fence(epd, SCIF_WAIT, mark, NULL);
+}
+
+static int _scif_send_fence_signal_wait(struct scif_endpt *ep,
+					struct scif_fence_info *fence_req)
+{
+	int err;
+
+retry:
+	/* Wait for a SCIF_SIG_(N)ACK message */
+	err = wait_for_completion_timeout(&fence_req->comp,
+					  SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+	if (!err)
+		err = -ENODEV;
+	if (err > 0)
+		err = 0;
+	if (err < 0) {
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (fence_req->state == OP_IN_PROGRESS)
+			fence_req->state = OP_FAILED;
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	if (fence_req->state == OP_FAILED && !err)
+		err = -ENXIO;
+	return err;
+}
+
+/**
+ * scif_send_fence_signal:
+ * @epd - endpoint descriptor
+ * @loff - local offset
+ * @lval - local value to write to loffset
+ * @roff - remote offset
+ * @rval - remote value to write to roffset
+ * @flags - flags
+ *
+ * Sends a remote fence signal request
+ */
+static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval,
+				  off_t loff, u64 lval, int flags)
+{
+	int err = 0;
+	struct scifmsg msg;
+	struct scif_fence_info *fence_req;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+	if (!fence_req) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_completion(&fence_req->comp);
+	msg.src = ep->port;
+	if (flags & SCIF_SIGNAL_LOCAL) {
+		msg.uop = SCIF_SIG_LOCAL;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = roff;
+		msg.payload[2] = rval;
+		msg.payload[3] = (u64)fence_req;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		if (err)
+			goto error_free;
+		err = _scif_send_fence_signal_wait(ep, fence_req);
+		if (err)
+			goto error_free;
+	}
+	fence_req->state = OP_IN_PROGRESS;
+
+	if (flags & SCIF_SIGNAL_REMOTE) {
+		msg.uop = SCIF_SIG_REMOTE;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = loff;
+		msg.payload[2] = lval;
+		msg.payload[3] = (u64)fence_req;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		if (err)
+			goto error_free;
+		err = _scif_send_fence_signal_wait(ep, fence_req);
+	}
+error_free:
+	kfree(fence_req);
+error:
+	return err;
+}
+
+static void scif_fence_mark_cb(void *arg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)arg;
+
+	wake_up_interruptible(&ep->rma_info.markwq);
+	atomic_dec(&ep->rma_info.fence_refcount);
+}
+
+/*
+ * _scif_fence_mark:
+ *
+ * @epd - endpoint descriptor
+ * Set up a mark for this endpoint and return the value of the mark.
+ */
+int _scif_fence_mark(scif_epd_t epd, int *mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct dma_chan *chan = ep->rma_info.dma_chan;
+	struct dma_device *ddev = chan->device;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+	int err;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	dma_async_issue_pending(chan);
+	tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	tx->callback = scif_fence_mark_cb;
+	tx->callback_param = ep;
+	*mark = cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	atomic_inc(&ep->rma_info.fence_refcount);
+	dma_async_issue_pending(chan);
+	return 0;
+}
+
+#define SCIF_LOOPB_MAGIC_MARK 0xdead
+
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n",
+		ep, flags, *mark);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Invalid flags? */
+	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/* At least one of init self or peer RMA should be set */
+	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+		return -EINVAL;
+
+	/* Exactly one of init self or peer RMA should be set but not both */
+	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/*
+	 * Management node loopback does not need to use DMA.
+	 * Return a valid mark to be symmetric.
+	 */
+	if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+		*mark = SCIF_LOOPB_MAGIC_MARK;
+		return 0;
+	}
+
+	if (flags & SCIF_FENCE_INIT_SELF)
+		err = _scif_fence_mark(epd, mark);
+	else
+		err = scif_send_fence_mark(ep, mark);
+
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n",
+		ep, flags, *mark, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_mark);
+
+int scif_fence_wait(scif_epd_t epd, int mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_wait: ep %p mark 0x%x\n",
+		ep, mark);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+	/*
+	 * Management node loopback does not need to use DMA.
+	 * The only valid mark provided is 0 so simply
+	 * return success if the mark is valid.
+	 */
+	if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+		if (mark == SCIF_LOOPB_MAGIC_MARK)
+			return 0;
+		else
+			return -EINVAL;
+	}
+	if (mark & SCIF_REMOTE_FENCE)
+		err = scif_send_fence_wait(epd, mark);
+	else
+		err = _scif_fence_wait(epd, mark);
+	if (err < 0)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_wait);
+
+int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval,
+		      off_t roff, u64 rval, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n",
+		ep, loff, lval, roff, rval, flags);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Invalid flags? */
+	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
+			SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
+		return -EINVAL;
+
+	/* At least one of init self or peer RMA should be set */
+	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+		return -EINVAL;
+
+	/* Exactly one of init self or peer RMA should be set but not both */
+	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
+	if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
+		return -EINVAL;
+
+	/* Only Dword offsets allowed */
+	if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1)))
+		return -EINVAL;
+
+	/* Only Dword aligned offsets allowed */
+	if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1)))
+		return -EINVAL;
+
+	if (flags & SCIF_FENCE_INIT_PEER) {
+		err = scif_send_fence_signal(epd, roff, rval, loff,
+					     lval, flags);
+	} else {
+		/* Local Signal in Local RAS */
+		if (flags & SCIF_SIGNAL_LOCAL) {
+			err = scif_prog_signal(epd, loff, lval,
+					       SCIF_WINDOW_SELF);
+			if (err)
+				goto error_ret;
+		}
+
+		/* Signal in Remote RAS */
+		if (flags & SCIF_SIGNAL_REMOTE)
+			err = scif_prog_signal(epd, roff,
+					       rval, SCIF_WINDOW_PEER);
+	}
+error_ret:
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_signal);
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
new file mode 100644
index 000000000..36d847af1
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_main.c
@@ -0,0 +1,359 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/idr.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_map.h"
+
+struct scif_info scif_info = {
+	.mdev = {
+		.minor = MISC_DYNAMIC_MINOR,
+		.name = "scif",
+		.fops = &scif_fops,
+	}
+};
+
+struct scif_dev *scif_dev;
+struct kmem_cache *unaligned_cache;
+static atomic_t g_loopb_cnt;
+
+/* Runs in the context of intr_wq */
+static void scif_intr_bh_handler(struct work_struct *work)
+{
+	struct scif_dev *scifdev =
+			container_of(work, struct scif_dev, intr_bh);
+
+	if (scifdev_self(scifdev))
+		scif_loopb_msg_handler(scifdev, scifdev->qpairs);
+	else
+		scif_nodeqp_intrhandler(scifdev, scifdev->qpairs);
+}
+
+int scif_setup_intr_wq(struct scif_dev *scifdev)
+{
+	if (!scifdev->intr_wq) {
+		snprintf(scifdev->intr_wqname, sizeof(scifdev->intr_wqname),
+			 "SCIF INTR %d", scifdev->node);
+		scifdev->intr_wq =
+			alloc_ordered_workqueue(scifdev->intr_wqname, 0);
+		if (!scifdev->intr_wq)
+			return -ENOMEM;
+		INIT_WORK(&scifdev->intr_bh, scif_intr_bh_handler);
+	}
+	return 0;
+}
+
+void scif_destroy_intr_wq(struct scif_dev *scifdev)
+{
+	if (scifdev->intr_wq) {
+		destroy_workqueue(scifdev->intr_wq);
+		scifdev->intr_wq = NULL;
+	}
+}
+
+irqreturn_t scif_intr_handler(int irq, void *data)
+{
+	struct scif_dev *scifdev = data;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	sdev->hw_ops->ack_interrupt(sdev, scifdev->db);
+	queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+	return IRQ_HANDLED;
+}
+
+static void scif_qp_setup_handler(struct work_struct *work)
+{
+	struct scif_dev *scifdev = container_of(work, struct scif_dev,
+						qp_dwork.work);
+	struct scif_hw_dev *sdev = scifdev->sdev;
+	dma_addr_t da = 0;
+	int err;
+
+	if (scif_is_mgmt_node()) {
+		struct mic_bootparam *bp = sdev->dp;
+
+		da = bp->scif_card_dma_addr;
+		scifdev->rdb = bp->h2c_scif_db;
+	} else {
+		struct mic_bootparam __iomem *bp = sdev->rdp;
+
+		da = readq(&bp->scif_host_dma_addr);
+		scifdev->rdb = ioread8(&bp->c2h_scif_db);
+	}
+	if (da) {
+		err = scif_qp_response(da, scifdev);
+		if (err)
+			dev_err(&scifdev->sdev->dev,
+				"scif_qp_response err %d\n", err);
+	} else {
+		schedule_delayed_work(&scifdev->qp_dwork,
+				      msecs_to_jiffies(1000));
+	}
+}
+
+static int scif_setup_scifdev(void)
+{
+	/* We support a maximum of 129 SCIF nodes including the mgmt node */
+#define MAX_SCIF_NODES 129
+	int i;
+	u8 num_nodes = MAX_SCIF_NODES;
+
+	scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL);
+	if (!scif_dev)
+		return -ENOMEM;
+	for (i = 0; i < num_nodes; i++) {
+		struct scif_dev *scifdev = &scif_dev[i];
+
+		scifdev->node = i;
+		scifdev->exit = OP_IDLE;
+		init_waitqueue_head(&scifdev->disconn_wq);
+		mutex_init(&scifdev->lock);
+		INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device);
+		INIT_DELAYED_WORK(&scifdev->p2p_dwork,
+				  scif_poll_qp_state);
+		INIT_DELAYED_WORK(&scifdev->qp_dwork,
+				  scif_qp_setup_handler);
+		INIT_LIST_HEAD(&scifdev->p2p);
+		RCU_INIT_POINTER(scifdev->spdev, NULL);
+	}
+	return 0;
+}
+
+static void scif_destroy_scifdev(void)
+{
+	kfree(scif_dev);
+}
+
+static int scif_probe(struct scif_hw_dev *sdev)
+{
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+	int rc;
+
+	dev_set_drvdata(&sdev->dev, sdev);
+	scifdev->sdev = sdev;
+
+	if (1 == atomic_add_return(1, &g_loopb_cnt)) {
+		struct scif_dev *loopb_dev = &scif_dev[sdev->snode];
+
+		loopb_dev->sdev = sdev;
+		rc = scif_setup_loopback_qp(loopb_dev);
+		if (rc)
+			goto exit;
+	}
+
+	rc = scif_setup_intr_wq(scifdev);
+	if (rc)
+		goto destroy_loopb;
+	rc = scif_setup_qp(scifdev);
+	if (rc)
+		goto destroy_intr;
+	scifdev->db = sdev->hw_ops->next_db(sdev);
+	scifdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+						    "SCIF_INTR", scifdev,
+						    scifdev->db);
+	if (IS_ERR(scifdev->cookie)) {
+		rc = PTR_ERR(scifdev->cookie);
+		goto free_qp;
+	}
+	if (scif_is_mgmt_node()) {
+		struct mic_bootparam *bp = sdev->dp;
+
+		bp->c2h_scif_db = scifdev->db;
+		bp->scif_host_dma_addr = scifdev->qp_dma_addr;
+	} else {
+		struct mic_bootparam __iomem *bp = sdev->rdp;
+
+		iowrite8(scifdev->db, &bp->h2c_scif_db);
+		writeq(scifdev->qp_dma_addr, &bp->scif_card_dma_addr);
+	}
+	schedule_delayed_work(&scifdev->qp_dwork,
+			      msecs_to_jiffies(1000));
+	return rc;
+free_qp:
+	scif_free_qp(scifdev);
+destroy_intr:
+	scif_destroy_intr_wq(scifdev);
+destroy_loopb:
+	if (atomic_dec_and_test(&g_loopb_cnt))
+		scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
+exit:
+	return rc;
+}
+
+void scif_stop(struct scif_dev *scifdev)
+{
+	struct scif_dev *dev;
+	int i;
+
+	for (i = scif_info.maxid; i >= 0; i--) {
+		dev = &scif_dev[i];
+		if (scifdev_self(dev))
+			continue;
+		scif_handle_remove_node(i);
+	}
+}
+
+static void scif_remove(struct scif_hw_dev *sdev)
+{
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+	if (scif_is_mgmt_node()) {
+		struct mic_bootparam *bp = sdev->dp;
+
+		bp->c2h_scif_db = -1;
+		bp->scif_host_dma_addr = 0x0;
+	} else {
+		struct mic_bootparam __iomem *bp = sdev->rdp;
+
+		iowrite8(-1, &bp->h2c_scif_db);
+		writeq(0x0, &bp->scif_card_dma_addr);
+	}
+	if (scif_is_mgmt_node()) {
+		scif_disconnect_node(scifdev->node, true);
+	} else {
+		scif_info.card_initiated_exit = true;
+		scif_stop(scifdev);
+	}
+	if (atomic_dec_and_test(&g_loopb_cnt))
+		scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
+	if (scifdev->cookie) {
+		sdev->hw_ops->free_irq(sdev, scifdev->cookie, scifdev);
+		scifdev->cookie = NULL;
+	}
+	scif_destroy_intr_wq(scifdev);
+	cancel_delayed_work(&scifdev->qp_dwork);
+	scif_free_qp(scifdev);
+	scifdev->rdb = -1;
+	scifdev->sdev = NULL;
+}
+
+static struct scif_hw_dev_id id_table[] = {
+	{ MIC_SCIF_DEV, SCIF_DEV_ANY_ID },
+	{ 0 },
+};
+
+static struct scif_driver scif_driver = {
+	.driver.name =	KBUILD_MODNAME,
+	.driver.owner =	THIS_MODULE,
+	.id_table = id_table,
+	.probe = scif_probe,
+	.remove = scif_remove,
+};
+
+static int _scif_init(void)
+{
+	int rc;
+
+	mutex_init(&scif_info.eplock);
+	spin_lock_init(&scif_info.rmalock);
+	spin_lock_init(&scif_info.nb_connect_lock);
+	spin_lock_init(&scif_info.port_lock);
+	mutex_init(&scif_info.conflock);
+	mutex_init(&scif_info.connlock);
+	mutex_init(&scif_info.fencelock);
+	INIT_LIST_HEAD(&scif_info.uaccept);
+	INIT_LIST_HEAD(&scif_info.listen);
+	INIT_LIST_HEAD(&scif_info.zombie);
+	INIT_LIST_HEAD(&scif_info.connected);
+	INIT_LIST_HEAD(&scif_info.disconnected);
+	INIT_LIST_HEAD(&scif_info.rma);
+	INIT_LIST_HEAD(&scif_info.rma_tc);
+	INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup);
+	INIT_LIST_HEAD(&scif_info.fence);
+	INIT_LIST_HEAD(&scif_info.nb_connect_list);
+	init_waitqueue_head(&scif_info.exitwq);
+	scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
+	scif_info.en_msg_log = 0;
+	scif_info.p2p_enable = 1;
+	rc = scif_setup_scifdev();
+	if (rc)
+		goto error;
+	unaligned_cache = kmem_cache_create("Unaligned_DMA",
+					    SCIF_KMEM_UNALIGNED_BUF_SIZE,
+					    0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!unaligned_cache) {
+		rc = -ENOMEM;
+		goto free_sdev;
+	}
+	INIT_WORK(&scif_info.misc_work, scif_misc_handler);
+	INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler);
+	INIT_WORK(&scif_info.conn_work, scif_conn_handler);
+	idr_init(&scif_ports);
+	return 0;
+free_sdev:
+	scif_destroy_scifdev();
+error:
+	return rc;
+}
+
+static void _scif_exit(void)
+{
+	idr_destroy(&scif_ports);
+	kmem_cache_destroy(unaligned_cache);
+	scif_destroy_scifdev();
+}
+
+static int __init scif_init(void)
+{
+	struct miscdevice *mdev = &scif_info.mdev;
+	int rc;
+
+	_scif_init();
+	iova_cache_get();
+	rc = scif_peer_bus_init();
+	if (rc)
+		goto exit;
+	rc = scif_register_driver(&scif_driver);
+	if (rc)
+		goto peer_bus_exit;
+	rc = misc_register(mdev);
+	if (rc)
+		goto unreg_scif;
+	scif_init_debugfs();
+	return 0;
+unreg_scif:
+	scif_unregister_driver(&scif_driver);
+peer_bus_exit:
+	scif_peer_bus_exit();
+exit:
+	_scif_exit();
+	return rc;
+}
+
+static void __exit scif_exit(void)
+{
+	scif_exit_debugfs();
+	misc_deregister(&scif_info.mdev);
+	scif_unregister_driver(&scif_driver);
+	scif_peer_bus_exit();
+	iova_cache_put();
+	_scif_exit();
+}
+
+module_init(scif_init);
+module_exit(scif_exit);
+
+MODULE_DEVICE_TABLE(scif, id_table);
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) SCIF driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
new file mode 100644
index 000000000..0e5eff9ad
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -0,0 +1,283 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_MAIN_H
+#define SCIF_MAIN_H
+
+#include <linux/sched/signal.h>
+#include <linux/pci.h>
+#include <linux/miscdevice.h>
+#include <linux/dmaengine.h>
+#include <linux/iova.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/vmalloc.h>
+#include <linux/scif.h>
+#include "../common/mic_dev.h"
+
+#define SCIF_MGMT_NODE 0
+#define SCIF_DEFAULT_WATCHDOG_TO 30
+#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ)
+#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ)
+#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
+
+/*
+ * Generic state used for certain node QP message exchanges
+ * like Unregister, Alloc etc.
+ */
+enum scif_msg_state {
+	OP_IDLE = 1,
+	OP_IN_PROGRESS,
+	OP_COMPLETED,
+	OP_FAILED
+};
+
+/*
+ * struct scif_info - Global SCIF information
+ *
+ * @nodeid: Node ID this node is to others
+ * @maxid: Max known node ID
+ * @total: Total number of SCIF nodes
+ * @nr_zombies: number of zombie endpoints
+ * @eplock: Lock to synchronize listening, zombie endpoint lists
+ * @connlock: Lock to synchronize connected and disconnected lists
+ * @nb_connect_lock: Synchronize non blocking connect operations
+ * @port_lock: Synchronize access to SCIF ports
+ * @uaccept: List of user acceptreq waiting for acceptreg
+ * @listen: List of listening end points
+ * @zombie: List of zombie end points with pending RMA's
+ * @connected: List of end points in connected state
+ * @disconnected: List of end points in disconnected state
+ * @nb_connect_list: List for non blocking connections
+ * @misc_work: miscellaneous SCIF tasks
+ * @conflock: Lock to synchronize SCIF node configuration changes
+ * @en_msg_log: Enable debug message logging
+ * @p2p_enable: Enable P2P SCIF network
+ * @mdev: The MISC device
+ * @conn_work: Work for workqueue handling all connections
+ * @exitwq: Wait queue for waiting for an EXIT node QP message response
+ * @loopb_dev: Dummy SCIF device used for loopback
+ * @loopb_wq: Workqueue used for handling loopback messages
+ * @loopb_wqname[16]: Name of loopback workqueue
+ * @loopb_work: Used for submitting work to loopb_wq
+ * @loopb_recv_q: List of messages received on the loopb_wq
+ * @card_initiated_exit: set when the card has initiated the exit
+ * @rmalock: Synchronize access to RMA operations
+ * @fencelock: Synchronize access to list of remote fences requested.
+ * @rma: List of temporary registered windows to be destroyed.
+ * @rma_tc: List of temporary registered & cached Windows to be destroyed
+ * @fence: List of remote fence requests
+ * @mmu_notif_work: Work for registration caching MMU notifier workqueue
+ * @mmu_notif_cleanup: List of temporary cached windows for reg cache
+ * @rma_tc_limit: RMA temporary cache limit
+ */
+struct scif_info {
+	u8 nodeid;
+	u8 maxid;
+	u8 total;
+	u32 nr_zombies;
+	struct mutex eplock;
+	struct mutex connlock;
+	spinlock_t nb_connect_lock;
+	spinlock_t port_lock;
+	struct list_head uaccept;
+	struct list_head listen;
+	struct list_head zombie;
+	struct list_head connected;
+	struct list_head disconnected;
+	struct list_head nb_connect_list;
+	struct work_struct misc_work;
+	struct mutex conflock;
+	u8 en_msg_log;
+	u8 p2p_enable;
+	struct miscdevice mdev;
+	struct work_struct conn_work;
+	wait_queue_head_t exitwq;
+	struct scif_dev *loopb_dev;
+	struct workqueue_struct *loopb_wq;
+	char loopb_wqname[16];
+	struct work_struct loopb_work;
+	struct list_head loopb_recv_q;
+	bool card_initiated_exit;
+	spinlock_t rmalock;
+	struct mutex fencelock;
+	struct list_head rma;
+	struct list_head rma_tc;
+	struct list_head fence;
+	struct work_struct mmu_notif_work;
+	struct list_head mmu_notif_cleanup;
+	unsigned long rma_tc_limit;
+};
+
+/*
+ * struct scif_p2p_info - SCIF mapping information used for P2P
+ *
+ * @ppi_peer_id - SCIF peer node id
+ * @ppi_sg - Scatter list for bar information (One for mmio and one for aper)
+ * @sg_nentries - Number of entries in the scatterlist
+ * @ppi_da: DMA address for MMIO and APER bars
+ * @ppi_len: Length of MMIO and APER bars
+ * @ppi_list: Link in list of mapping information
+ */
+struct scif_p2p_info {
+	u8 ppi_peer_id;
+	struct scatterlist *ppi_sg[2];
+	u64 sg_nentries[2];
+	dma_addr_t ppi_da[2];
+	u64 ppi_len[2];
+#define SCIF_PPI_MMIO 0
+#define SCIF_PPI_APER 1
+	struct list_head ppi_list;
+};
+
+/*
+ * struct scif_dev - SCIF remote device specific fields
+ *
+ * @node: Node id
+ * @p2p: List of P2P mapping information
+ * @qpairs: The node queue pair for exchanging control messages
+ * @intr_wq: Workqueue for handling Node QP messages
+ * @intr_wqname: Name of node QP workqueue for handling interrupts
+ * @intr_bh: Used for submitting work to intr_wq
+ * @lock: Lock used for synchronizing access to the scif device
+ * @sdev: SCIF hardware device on the SCIF hardware bus
+ * @db: doorbell the peer will trigger to generate an interrupt on self
+ * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer
+ * @cookie: Cookie received while registering the interrupt handler
+ * @peer_add_work: Work for handling device_add for peer devices
+ * @p2p_dwork: Delayed work to enable polling for P2P state
+ * @qp_dwork: Delayed work for enabling polling for remote QP information
+ * @p2p_retry: Number of times to retry polling of P2P state
+ * @base_addr: P2P aperture bar base address
+ * @mic_mw mmio: The peer MMIO information used for P2P
+ * @spdev: SCIF peer device on the SCIF peer bus
+ * @node_remove_ack_pending: True if a node_remove_ack is pending
+ * @exit_ack_pending: true if an exit_ack is pending
+ * @disconn_wq: Used while waiting for a node remove response
+ * @disconn_rescnt: Keeps track of number of node remove requests sent
+ * @exit: Status of exit message
+ * @qp_dma_addr: Queue pair DMA address passed to the peer
+ * @dma_ch_idx: Round robin index for DMA channels
+ * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's
+*/
+struct scif_dev {
+	u8 node;
+	struct list_head p2p;
+	struct scif_qp *qpairs;
+	struct workqueue_struct *intr_wq;
+	char intr_wqname[16];
+	struct work_struct intr_bh;
+	struct mutex lock;
+	struct scif_hw_dev *sdev;
+	int db;
+	int rdb;
+	struct mic_irq *cookie;
+	struct work_struct peer_add_work;
+	struct delayed_work p2p_dwork;
+	struct delayed_work qp_dwork;
+	int p2p_retry;
+	dma_addr_t base_addr;
+	struct mic_mw mmio;
+	struct scif_peer_dev __rcu *spdev;
+	bool node_remove_ack_pending;
+	bool exit_ack_pending;
+	wait_queue_head_t disconn_wq;
+	atomic_t disconn_rescnt;
+	enum scif_msg_state exit;
+	dma_addr_t qp_dma_addr;
+	int dma_ch_idx;
+	struct dma_pool *signal_pool;
+};
+
+extern bool scif_reg_cache_enable;
+extern bool scif_ulimit_check;
+extern struct scif_info scif_info;
+extern struct idr scif_ports;
+extern struct bus_type scif_peer_bus;
+extern struct scif_dev *scif_dev;
+extern const struct file_operations scif_fops;
+extern const struct file_operations scif_anon_fops;
+
+/* Size of the RB for the Node QP */
+#define SCIF_NODE_QP_SIZE 0x10000
+
+#include "scif_nodeqp.h"
+#include "scif_rma.h"
+#include "scif_rma_list.h"
+
+/*
+ * scifdev_self:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device passed is the self aka Loopback SCIF device.
+ */
+static inline int scifdev_self(struct scif_dev *dev)
+{
+	return dev->node == scif_info.nodeid;
+}
+
+static inline bool scif_is_mgmt_node(void)
+{
+	return !scif_info.nodeid;
+}
+
+/*
+ * scifdev_is_p2p:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device.
+ */
+static inline bool scifdev_is_p2p(struct scif_dev *dev)
+{
+	if (scif_is_mgmt_node())
+		return false;
+	else
+		return dev != &scif_dev[SCIF_MGMT_NODE] &&
+			!scifdev_self(dev);
+}
+
+/*
+ * scifdev_alive:
+ * @scifdev: The remote SCIF Device
+ *
+ * Returns true if the remote SCIF Device is running or sleeping for
+ * this endpoint.
+ */
+static inline int _scifdev_alive(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+
+	rcu_read_lock();
+	spdev = rcu_dereference(scifdev->spdev);
+	rcu_read_unlock();
+	return !!spdev;
+}
+
+#include "scif_epd.h"
+
+void __init scif_init_debugfs(void);
+void scif_exit_debugfs(void);
+int scif_setup_intr_wq(struct scif_dev *scifdev);
+void scif_destroy_intr_wq(struct scif_dev *scifdev);
+void scif_cleanup_scifdev(struct scif_dev *dev);
+void scif_handle_remove_node(int node);
+void scif_disconnect_node(u32 node_id, bool mgmt_initiated);
+void scif_free_qp(struct scif_dev *dev);
+void scif_misc_handler(struct work_struct *work);
+void scif_stop(struct scif_dev *scifdev);
+irqreturn_t scif_intr_handler(int irq, void *data);
+#endif /* SCIF_MAIN_H */
diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h
new file mode 100644
index 000000000..3e86360ba
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_map.h
@@ -0,0 +1,136 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_MAP_H
+#define SCIF_MAP_H
+
+#include "../bus/scif_bus.h"
+
+static __always_inline void *
+scif_alloc_coherent(dma_addr_t *dma_handle,
+		    struct scif_dev *scifdev, size_t size,
+		    gfp_t gfp)
+{
+	void *va;
+
+	if (scifdev_self(scifdev)) {
+		va = kmalloc(size, gfp);
+		if (va)
+			*dma_handle = virt_to_phys(va);
+	} else {
+		va = dma_alloc_coherent(&scifdev->sdev->dev,
+					size, dma_handle, gfp);
+		if (va && scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	return va;
+}
+
+static __always_inline void
+scif_free_coherent(void *va, dma_addr_t local,
+		   struct scif_dev *scifdev, size_t size)
+{
+	if (scifdev_self(scifdev)) {
+		kfree(va);
+	} else {
+		if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr)
+			local = local - scifdev->base_addr;
+		dma_free_coherent(&scifdev->sdev->dev,
+				  size, va, local);
+	}
+}
+
+static __always_inline int
+scif_map_single(dma_addr_t *dma_handle,
+		void *local, struct scif_dev *scifdev, size_t size)
+{
+	int err = 0;
+
+	if (scifdev_self(scifdev)) {
+		*dma_handle = virt_to_phys((local));
+	} else {
+		*dma_handle = dma_map_single(&scifdev->sdev->dev,
+					     local, size, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&scifdev->sdev->dev, *dma_handle))
+			err = -ENOMEM;
+		else if (scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	if (err)
+		*dma_handle = 0;
+	return err;
+}
+
+static __always_inline void
+scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev,
+		  size_t size)
+{
+	if (!scifdev_self(scifdev)) {
+		if (scifdev_is_p2p(scifdev))
+			local = local - scifdev->base_addr;
+		dma_unmap_single(&scifdev->sdev->dev, local,
+				 size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static __always_inline void *
+scif_ioremap(dma_addr_t phys, size_t size, struct scif_dev *scifdev)
+{
+	void *out_virt;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	if (scifdev_self(scifdev))
+		out_virt = phys_to_virt(phys);
+	else
+		out_virt = (void __force *)
+			   sdev->hw_ops->ioremap(sdev, phys, size);
+	return out_virt;
+}
+
+static __always_inline void
+scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev)
+{
+	if (!scifdev_self(scifdev)) {
+		struct scif_hw_dev *sdev = scifdev->sdev;
+
+		sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt);
+	}
+}
+
+static __always_inline int
+scif_map_page(dma_addr_t *dma_handle, struct page *page,
+	      struct scif_dev *scifdev)
+{
+	int err = 0;
+
+	if (scifdev_self(scifdev)) {
+		*dma_handle = page_to_phys(page);
+	} else {
+		struct scif_hw_dev *sdev = scifdev->sdev;
+		*dma_handle = dma_map_page(&sdev->dev,
+					   page, 0x0, PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&sdev->dev, *dma_handle))
+			err = -ENOMEM;
+		else if (scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	if (err)
+		*dma_handle = 0;
+	return err;
+}
+#endif  /* SCIF_MAP_H */
diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c
new file mode 100644
index 000000000..928211677
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_mmap.c
@@ -0,0 +1,699 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+/*
+ * struct scif_vma_info - Information about a remote memory mapping
+ *			  created via scif_mmap(..)
+ * @vma: VM area struct
+ * @list: link to list of active vmas
+ */
+struct scif_vma_info {
+	struct vm_area_struct *vma;
+	struct list_head list;
+};
+
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_window *recv_window =
+		(struct scif_window *)msg->payload[0];
+	struct scif_endpt *ep;
+
+	ep = (struct scif_endpt *)recv_window->ep;
+	req.out_window = &window;
+	req.offset = recv_window->offset;
+	req.prot = recv_window->prot;
+	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.reg_list;
+	msg->payload[0] = ep->remote_ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if (scif_query_window(&req)) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d -ENXIO\n", __func__, __LINE__);
+		msg->uop = SCIF_UNREGISTER_ACK;
+		goto error;
+	}
+
+	scif_put_window(window, window->nr_pages);
+
+	if (!window->ref_count) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		ep->rma_info.async_list_del = 1;
+		list_del_init(&window->list);
+		scif_free_window_offset(ep, window, window->offset);
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (window && !window->ref_count)
+		scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/*
+ * Remove valid remote memory mappings created via scif_mmap(..) from the
+ * process address space since the remote node is lost
+ */
+static void __scif_zap_mmaps(struct scif_endpt *ep)
+{
+	struct list_head *item;
+	struct scif_vma_info *info;
+	struct vm_area_struct *vma;
+	unsigned long size;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.vma_list) {
+		info = list_entry(item, struct scif_vma_info, list);
+		vma = info->vma;
+		size = vma->vm_end - vma->vm_start;
+		zap_vma_ptes(vma, vma->vm_start, size);
+		dev_dbg(scif_info.mdev.this_device,
+			"%s ep %p zap vma %p size 0x%lx\n",
+			__func__, ep, info->vma, size);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/*
+ * Traverse the list of endpoints for a particular remote node and
+ * zap valid remote memory mappings since the remote node is lost
+ */
+static void _scif_zap_mmaps(int node, struct list_head *head)
+{
+	struct scif_endpt *ep;
+	struct list_head *item;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each(item, head) {
+		ep = list_entry(item, struct scif_endpt, list);
+		if (ep->remote_dev->node == node)
+			__scif_zap_mmaps(ep);
+	}
+	mutex_unlock(&scif_info.connlock);
+}
+
+/*
+ * Wrapper for removing remote memory mappings for a particular node. This API
+ * is called by peer nodes as part of handling a lost node.
+ */
+void scif_zap_mmaps(int node)
+{
+	_scif_zap_mmaps(node, &scif_info.connected);
+	_scif_zap_mmaps(node, &scif_info.disconnected);
+}
+
+/*
+ * This API is only called while handling a lost node:
+ * a) Remote node is dead.
+ * b) Remote memory mappings have been zapped
+ * So we can traverse the remote_reg_list without any locks. Since
+ * the window has not yet been unregistered we can drop the ref count
+ * and queue it to the cleanup thread.
+ */
+static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
+{
+	struct list_head *pos, *tmp;
+	struct scif_window *window;
+
+	list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
+		window = list_entry(pos, struct scif_window, list);
+		if (window->ref_count)
+			scif_put_window(window, window->nr_pages);
+		else
+			dev_err(scif_info.mdev.this_device,
+				"%s %d unexpected\n",
+				__func__, __LINE__);
+		if (!window->ref_count) {
+			atomic_inc(&ep->rma_info.tw_refcount);
+			list_del_init(&window->list);
+			scif_queue_for_cleanup(window, &scif_info.rma);
+		}
+	}
+}
+
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node)
+{
+	struct scif_endpt *ep;
+	struct list_head *item;
+
+	mutex_lock(&scif_info.eplock);
+	list_for_each(item, &scif_info.zombie) {
+		ep = list_entry(item, struct scif_endpt, list);
+		if (ep->remote_dev && ep->remote_dev->node == node)
+			__scif_cleanup_rma_for_zombies(ep);
+	}
+	mutex_unlock(&scif_info.eplock);
+	flush_work(&scif_info.misc_work);
+}
+
+/* Insert the VMA into the per endpoint VMA list */
+static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+	struct scif_vma_info *info;
+	int err = 0;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		err = -ENOMEM;
+		goto done;
+	}
+	info->vma = vma;
+	spin_lock(&ep->lock);
+	list_add_tail(&info->list, &ep->rma_info.vma_list);
+	spin_unlock(&ep->lock);
+done:
+	return err;
+}
+
+/* Delete the VMA from the per endpoint VMA list */
+static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+	struct list_head *item;
+	struct scif_vma_info *info;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.vma_list) {
+		info = list_entry(item, struct scif_vma_info, list);
+		if (info->vma == vma) {
+			list_del(&info->list);
+			kfree(info);
+			break;
+		}
+	}
+	spin_unlock(&ep->lock);
+}
+
+static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
+{
+	struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+	phys_addr_t out_phys, apt_base = 0;
+
+	/*
+	 * If the DMA address is card relative then we need to add the
+	 * aperture base for mmap to work correctly
+	 */
+	if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
+		apt_base = sdev->aper->pa;
+	out_phys = apt_base + phys;
+	return out_phys;
+}
+
+int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
+		   struct scif_range **pages)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	int nr_pages, err, i;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
+		ep, offset, len);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	if (!len || (offset < 0) ||
+	    (offset + len < offset) ||
+	    (ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (ALIGN(len, PAGE_SIZE) != len))
+		return -EINVAL;
+
+	nr_pages = len >> PAGE_SHIFT;
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.prot = 0;
+	req.nr_bytes = len;
+	req.type = SCIF_WINDOW_SINGLE;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+
+	/* Allocate scif_range */
+	*pages = kzalloc(sizeof(**pages), GFP_KERNEL);
+	if (!*pages) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* Allocate phys addr array */
+	(*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
+	if (!((*pages)->phys_addr)) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
+		/* Allocate virtual address array */
+		((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
+		if (!(*pages)->va) {
+			err = -ENOMEM;
+			goto error;
+		}
+	}
+	/* Populate the values */
+	(*pages)->cookie = window;
+	(*pages)->nr_pages = nr_pages;
+	(*pages)->prot_flags = window->prot;
+
+	for (i = 0; i < nr_pages; i++) {
+		(*pages)->phys_addr[i] =
+			__scif_off_to_dma_addr(window, offset +
+					       (i * PAGE_SIZE));
+		(*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
+							ep);
+		if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
+			(*pages)->va[i] =
+				ep->remote_dev->sdev->aper->va +
+				(*pages)->phys_addr[i] -
+				ep->remote_dev->sdev->aper->pa;
+	}
+
+	scif_get_window(window, nr_pages);
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (err) {
+		if (*pages) {
+			scif_free((*pages)->phys_addr,
+				  nr_pages * sizeof(dma_addr_t));
+			scif_free((*pages)->va,
+				  nr_pages * sizeof(void *));
+			kfree(*pages);
+			*pages = NULL;
+		}
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_get_pages);
+
+int scif_put_pages(struct scif_range *pages)
+{
+	struct scif_endpt *ep;
+	struct scif_window *window;
+	struct scifmsg msg;
+
+	if (!pages || !pages->cookie)
+		return -EINVAL;
+
+	window = pages->cookie;
+
+	if (!window || window->magic != SCIFEP_MAGIC)
+		return -EINVAL;
+
+	ep = (struct scif_endpt *)window->ep;
+	/*
+	 * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
+	 * callee should be allowed to release references to the pages,
+	 * else the endpoint was not connected in the first place,
+	 * hence the ENOTCONN.
+	 */
+	if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
+		return -ENOTCONN;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+
+	scif_put_window(window, pages->nr_pages);
+
+	/* Initiate window destruction if ref count is zero */
+	if (!window->ref_count) {
+		list_del(&window->list);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		scif_drain_dma_intr(ep->remote_dev->sdev,
+				    ep->rma_info.dma_chan);
+		/* Inform the peer about this window being destroyed. */
+		msg.uop = SCIF_MUNMAP;
+		msg.src = ep->port;
+		msg.payload[0] = window->peer_window;
+		/* No error handling for notification messages */
+		scif_nodeqp_send(ep->remote_dev, &msg);
+		/* Destroy this window from the peer's registered AS */
+		scif_destroy_remote_window(window);
+	} else {
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+
+	scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
+	scif_free(pages->va, pages->nr_pages * sizeof(void *));
+	kfree(pages);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scif_put_pages);
+
+/*
+ * scif_rma_list_mmap:
+ *
+ * Traverse the remote registration list starting from start_window:
+ * 1) Create VtoP mappings via remap_pfn_range(..)
+ * 2) Once step 1) and 2) complete successfully then traverse the range of
+ *    windows again and bump the reference count.
+ * RMA lock must be held.
+ */
+static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
+			      int nr_pages, struct vm_area_struct *vma)
+{
+	s64 end_offset, loop_offset = offset;
+	struct scif_window *window = start_window;
+	int loop_nr_pages, nr_pages_left = nr_pages;
+	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+	struct list_head *head = &ep->rma_info.remote_reg_list;
+	int i, err = 0;
+	dma_addr_t phys_addr;
+	struct scif_window_iter src_win_iter;
+	size_t contig_bytes = 0;
+
+	might_sleep();
+	list_for_each_entry_from(window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_init_window_iter(window, &src_win_iter);
+		for (i = 0; i < loop_nr_pages; i++) {
+			phys_addr = scif_off_to_dma_addr(window, loop_offset,
+							 &contig_bytes,
+							 &src_win_iter);
+			phys_addr = scif_get_phys(phys_addr, ep);
+			err = remap_pfn_range(vma,
+					      vma->vm_start +
+					      loop_offset - offset,
+					      phys_addr >> PAGE_SHIFT,
+					      PAGE_SIZE,
+					      vma->vm_page_prot);
+			if (err)
+				goto error;
+			loop_offset += PAGE_SIZE;
+		}
+		nr_pages_left -= loop_nr_pages;
+		if (!nr_pages_left)
+			break;
+	}
+	/*
+	 * No more failures expected. Bump up the ref count for all
+	 * the windows. Another traversal from start_window required
+	 * for handling errors encountered across windows during
+	 * remap_pfn_range(..).
+	 */
+	loop_offset = offset;
+	nr_pages_left = nr_pages;
+	window = start_window;
+	head = &ep->rma_info.remote_reg_list;
+	list_for_each_entry_from(window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_get_window(window, loop_nr_pages);
+		nr_pages_left -= loop_nr_pages;
+		loop_offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages_left)
+			break;
+	}
+error:
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+
+/*
+ * scif_rma_list_munmap:
+ *
+ * Traverse the remote registration list starting from window:
+ * 1) Decrement ref count.
+ * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
+ * RMA lock must be held.
+ */
+static void scif_rma_list_munmap(struct scif_window *start_window,
+				 s64 offset, int nr_pages)
+{
+	struct scifmsg msg;
+	s64 loop_offset = offset, end_offset;
+	int loop_nr_pages, nr_pages_left = nr_pages;
+	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+	struct list_head *head = &ep->rma_info.remote_reg_list;
+	struct scif_window *window = start_window, *_window;
+
+	msg.uop = SCIF_MUNMAP;
+	msg.src = ep->port;
+	loop_offset = offset;
+	nr_pages_left = nr_pages;
+	list_for_each_entry_safe_from(window, _window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_put_window(window, loop_nr_pages);
+		if (!window->ref_count) {
+			struct scif_dev *rdev = ep->remote_dev;
+
+			scif_drain_dma_intr(rdev->sdev,
+					    ep->rma_info.dma_chan);
+			/* Inform the peer about this munmap */
+			msg.payload[0] = window->peer_window;
+			/* No error handling for Notification messages. */
+			scif_nodeqp_send(ep->remote_dev, &msg);
+			list_del(&window->list);
+			/* Destroy this window from the peer's registered AS */
+			scif_destroy_remote_window(window);
+		}
+		nr_pages_left -= loop_nr_pages;
+		loop_offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages_left)
+			break;
+	}
+}
+
+/*
+ * The private data field of each VMA used to mmap a remote window
+ * points to an instance of struct vma_pvt
+ */
+struct vma_pvt {
+	struct scif_endpt *ep;	/* End point for remote window */
+	s64 offset;		/* offset within remote window */
+	bool valid_offset;	/* offset is valid only if the original
+				 * mmap request was for a single page
+				 * else the offset within the vma is
+				 * the correct offset
+				 */
+	struct kref ref;
+};
+
+static void vma_pvt_release(struct kref *ref)
+{
+	struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
+
+	kfree(vmapvt);
+}
+
+/**
+ * scif_vma_open - VMA open driver callback
+ * @vma: VMM memory area.
+ * The open method is called by the kernel to allow the subsystem implementing
+ * the VMA to initialize the area. This method is invoked any time a new
+ * reference to the VMA is made (when a process forks, for example).
+ * The one exception happens when the VMA is first created by mmap;
+ * in this case, the driver's mmap method is called instead.
+ * This function is also invoked when an existing VMA is split by the kernel
+ * due to a call to munmap on a subset of the VMA resulting in two VMAs.
+ * The kernel invokes this function only on one of the two VMAs.
+ */
+static void scif_vma_open(struct vm_area_struct *vma)
+{
+	struct vma_pvt *vmapvt = vma->vm_private_data;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
+		vma->vm_start, vma->vm_end);
+	scif_insert_vma(vmapvt->ep, vma);
+	kref_get(&vmapvt->ref);
+}
+
+/**
+ * scif_munmap - VMA close driver callback.
+ * @vma: VMM memory area.
+ * When an area is destroyed, the kernel calls its close operation.
+ * Note that there's no usage count associated with VMA's; the area
+ * is opened and closed exactly once by each process that uses it.
+ */
+static void scif_munmap(struct vm_area_struct *vma)
+{
+	struct scif_endpt *ep;
+	struct vma_pvt *vmapvt = vma->vm_private_data;
+	int nr_pages = vma_pages(vma);
+	s64 offset;
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	int err;
+
+	might_sleep();
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
+		vma->vm_start, vma->vm_end);
+	ep = vmapvt->ep;
+	offset = vmapvt->valid_offset ? vmapvt->offset :
+		(vma->vm_pgoff) << PAGE_SHIFT;
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
+		ep, nr_pages, offset);
+	req.out_window = &window;
+	req.offset = offset;
+	req.nr_bytes = vma->vm_end - vma->vm_start;
+	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+	req.type = SCIF_WINDOW_PARTIAL;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+
+	err = scif_query_window(&req);
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	else
+		scif_rma_list_munmap(window, offset, nr_pages);
+
+	mutex_unlock(&ep->rma_info.rma_lock);
+	/*
+	 * The kernel probably zeroes these out but we still want
+	 * to clean up our own mess just in case.
+	 */
+	vma->vm_ops = NULL;
+	vma->vm_private_data = NULL;
+	kref_put(&vmapvt->ref, vma_pvt_release);
+	scif_delete_vma(ep, vma);
+}
+
+static const struct vm_operations_struct scif_vm_ops = {
+	.open = scif_vma_open,
+	.close = scif_munmap,
+};
+
+/**
+ * scif_mmap - Map pages in virtual address space to a remote window.
+ * @vma: VMM memory area.
+ * @epd: endpoint descriptor
+ *
+ * Return: Upon successful completion, scif_mmap() returns zero
+ * else an apt error is returned as documented in scif.h
+ */
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
+	int nr_pages = vma_pages(vma);
+	int err;
+	struct vma_pvt *vmapvt;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
+		ep, start_offset, nr_pages);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	might_sleep();
+
+	err = scif_insert_vma(ep, vma);
+	if (err)
+		return err;
+
+	vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
+	if (!vmapvt) {
+		scif_delete_vma(ep, vma);
+		return -ENOMEM;
+	}
+
+	vmapvt->ep = ep;
+	kref_init(&vmapvt->ref);
+
+	req.out_window = &window;
+	req.offset = start_offset;
+	req.nr_bytes = vma->vm_end - vma->vm_start;
+	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+	req.type = SCIF_WINDOW_PARTIAL;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unlock;
+	}
+
+	/* Default prot for loopback */
+	if (!scifdev_self(ep->remote_dev))
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+	/*
+	 * VM_DONTCOPY - Do not copy this vma on fork
+	 * VM_DONTEXPAND - Cannot expand with mremap()
+	 * VM_RESERVED - Count as reserved_vm like IO
+	 * VM_PFNMAP - Page-ranges managed without "struct page"
+	 * VM_IO - Memory mapped I/O or similar
+	 *
+	 * We do not want to copy this VMA automatically on a fork(),
+	 * expand this VMA due to mremap() or swap out these pages since
+	 * the VMA is actually backed by physical pages in the remote
+	 * node's physical memory and not via a struct page.
+	 */
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
+
+	if (!scifdev_self(ep->remote_dev))
+		vma->vm_flags |= VM_IO | VM_PFNMAP;
+
+	/* Map this range of windows */
+	err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unlock;
+	}
+	/* Set up the driver call back */
+	vma->vm_ops = &scif_vm_ops;
+	vma->vm_private_data = vmapvt;
+error_unlock:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (err) {
+		kfree(vmapvt);
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		scif_delete_vma(ep, vma);
+	}
+	return err;
+}
diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c
new file mode 100644
index 000000000..79f26a02a
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nm.c
@@ -0,0 +1,237 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_peer_bus.h"
+
+#include "scif_main.h"
+#include "scif_map.h"
+
+/**
+ * scif_invalidate_ep() - Set state for all connected endpoints
+ * to disconnected and wake up all send/recv waitqueues
+ */
+static void scif_invalidate_ep(int node)
+{
+	struct scif_endpt *ep;
+	struct list_head *pos, *tmpq;
+
+	flush_work(&scif_info.conn_work);
+	mutex_lock(&scif_info.connlock);
+	list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (ep->remote_dev->node == node) {
+			scif_unmap_all_windows(ep);
+			spin_lock(&ep->lock);
+			scif_cleanup_ep_qp(ep);
+			spin_unlock(&ep->lock);
+		}
+	}
+	list_for_each_safe(pos, tmpq, &scif_info.connected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (ep->remote_dev->node == node) {
+			list_del(pos);
+			spin_lock(&ep->lock);
+			ep->state = SCIFEP_DISCONNECTED;
+			list_add_tail(&ep->list, &scif_info.disconnected);
+			scif_cleanup_ep_qp(ep);
+			wake_up_interruptible(&ep->sendwq);
+			wake_up_interruptible(&ep->recvwq);
+			spin_unlock(&ep->lock);
+			scif_unmap_all_windows(ep);
+		}
+	}
+	mutex_unlock(&scif_info.connlock);
+}
+
+void scif_free_qp(struct scif_dev *scifdev)
+{
+	struct scif_qp *qp = scifdev->qpairs;
+
+	if (!qp)
+		return;
+	scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size);
+	kfree(qp->inbound_q.rb_base);
+	scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp));
+	kfree(scifdev->qpairs);
+	scifdev->qpairs = NULL;
+}
+
+static void scif_cleanup_qp(struct scif_dev *dev)
+{
+	struct scif_qp *qp = &dev->qpairs[0];
+
+	if (!qp)
+		return;
+	scif_iounmap((void *)qp->remote_qp, sizeof(struct scif_qp), dev);
+	scif_iounmap((void *)qp->outbound_q.rb_base,
+		     sizeof(struct scif_qp), dev);
+	qp->remote_qp = NULL;
+	qp->local_write = 0;
+	qp->inbound_q.current_write_offset = 0;
+	qp->inbound_q.current_read_offset = 0;
+	if (scifdev_is_p2p(dev))
+		scif_free_qp(dev);
+}
+
+void scif_send_acks(struct scif_dev *dev)
+{
+	struct scifmsg msg;
+
+	if (dev->node_remove_ack_pending) {
+		msg.uop = SCIF_NODE_REMOVE_ACK;
+		msg.src.node = scif_info.nodeid;
+		msg.dst.node = SCIF_MGMT_NODE;
+		msg.payload[0] = dev->node;
+		scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg);
+		dev->node_remove_ack_pending = false;
+	}
+	if (dev->exit_ack_pending) {
+		msg.uop = SCIF_EXIT_ACK;
+		msg.src.node = scif_info.nodeid;
+		msg.dst.node = dev->node;
+		scif_nodeqp_send(dev, &msg);
+		dev->exit_ack_pending = false;
+	}
+}
+
+/*
+ * scif_cleanup_scifdev
+ *
+ * @dev: Remote SCIF device.
+ * Uninitialize SCIF data structures for remote SCIF device.
+ */
+void scif_cleanup_scifdev(struct scif_dev *dev)
+{
+	struct scif_hw_dev *sdev = dev->sdev;
+
+	if (!dev->sdev)
+		return;
+	if (scifdev_is_p2p(dev)) {
+		if (dev->cookie) {
+			sdev->hw_ops->free_irq(sdev, dev->cookie, dev);
+			dev->cookie = NULL;
+		}
+		scif_destroy_intr_wq(dev);
+	}
+	flush_work(&scif_info.misc_work);
+	scif_destroy_p2p(dev);
+	scif_invalidate_ep(dev->node);
+	scif_zap_mmaps(dev->node);
+	scif_cleanup_rma_for_zombies(dev->node);
+	flush_work(&scif_info.misc_work);
+	scif_send_acks(dev);
+	if (!dev->node && scif_info.card_initiated_exit) {
+		/*
+		 * Send an SCIF_EXIT message which is the last message from MIC
+		 * to the Host and wait for a SCIF_EXIT_ACK
+		 */
+		scif_send_exit(dev);
+		scif_info.card_initiated_exit = false;
+	}
+	scif_cleanup_qp(dev);
+}
+
+/*
+ * scif_remove_node:
+ *
+ * @node: Node to remove
+ */
+void scif_handle_remove_node(int node)
+{
+	struct scif_dev *scifdev = &scif_dev[node];
+
+	if (scif_peer_unregister_device(scifdev))
+		scif_send_acks(scifdev);
+}
+
+static int scif_send_rmnode_msg(int node, int remove_node)
+{
+	struct scifmsg notif_msg;
+	struct scif_dev *dev = &scif_dev[node];
+
+	notif_msg.uop = SCIF_NODE_REMOVE;
+	notif_msg.src.node = scif_info.nodeid;
+	notif_msg.dst.node = node;
+	notif_msg.payload[0] = remove_node;
+	return scif_nodeqp_send(dev, &notif_msg);
+}
+
+/**
+ * scif_node_disconnect:
+ *
+ * @node_id[in]: source node id.
+ * @mgmt_initiated: Disconnection initiated from the mgmt node
+ *
+ * Disconnect a node from the scif network.
+ */
+void scif_disconnect_node(u32 node_id, bool mgmt_initiated)
+{
+	int ret;
+	int msg_cnt = 0;
+	u32 i = 0;
+	struct scif_dev *scifdev = &scif_dev[node_id];
+
+	if (!node_id)
+		return;
+
+	atomic_set(&scifdev->disconn_rescnt, 0);
+
+	/* Destroy p2p network */
+	for (i = 1; i <= scif_info.maxid; i++) {
+		if (i == node_id)
+			continue;
+		ret = scif_send_rmnode_msg(i, node_id);
+		if (!ret)
+			msg_cnt++;
+	}
+	/* Wait for the remote nodes to respond with SCIF_NODE_REMOVE_ACK */
+	ret = wait_event_timeout(scifdev->disconn_wq,
+				 (atomic_read(&scifdev->disconn_rescnt)
+				 == msg_cnt), SCIF_NODE_ALIVE_TIMEOUT);
+	/* Tell the card to clean up */
+	if (mgmt_initiated && _scifdev_alive(scifdev))
+		/*
+		 * Send an SCIF_EXIT message which is the last message from Host
+		 * to the MIC and wait for a SCIF_EXIT_ACK
+		 */
+		scif_send_exit(scifdev);
+	atomic_set(&scifdev->disconn_rescnt, 0);
+	/* Tell the mgmt node to clean up */
+	ret = scif_send_rmnode_msg(SCIF_MGMT_NODE, node_id);
+	if (!ret)
+		/* Wait for mgmt node to respond with SCIF_NODE_REMOVE_ACK */
+		wait_event_timeout(scifdev->disconn_wq,
+				   (atomic_read(&scifdev->disconn_rescnt) == 1),
+				   SCIF_NODE_ALIVE_TIMEOUT);
+}
+
+void scif_get_node_info(void)
+{
+	struct scifmsg msg;
+	DECLARE_COMPLETION_ONSTACK(node_info);
+
+	msg.uop = SCIF_GET_NODE_INFO;
+	msg.src.node = scif_info.nodeid;
+	msg.dst.node = SCIF_MGMT_NODE;
+	msg.payload[3] = (u64)&node_info;
+
+	if ((scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg)))
+		return;
+
+	/* Wait for a response with SCIF_GET_NODE_INFO */
+	wait_for_completion(&node_info);
+}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c
new file mode 100644
index 000000000..c66ca1a58
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nodeqp.c
@@ -0,0 +1,1354 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_nodeqp.h"
+#include "scif_map.h"
+
+/*
+ ************************************************************************
+ * SCIF node Queue Pair (QP) setup flow:
+ *
+ * 1) SCIF driver gets probed with a scif_hw_dev via the scif_hw_bus
+ * 2) scif_setup_qp(..) allocates the local qp and calls
+ *	scif_setup_qp_connect(..) which allocates and maps the local
+ *	buffer for the inbound QP
+ * 3) The local node updates the device page with the DMA address of the QP
+ * 4) A delayed work is scheduled (qp_dwork) which periodically reads if
+ *	the peer node has updated its QP DMA address
+ * 5) Once a valid non zero address is found in the QP DMA address field
+ *	in the device page, the local node maps the remote node's QP,
+ *	updates its outbound QP and sends a SCIF_INIT message to the peer
+ * 6) The SCIF_INIT message is received by the peer node QP interrupt bottom
+ *	half handler by calling scif_init(..)
+ * 7) scif_init(..) registers a new SCIF peer node by calling
+ *	scif_peer_register_device(..) which signifies the addition of a new
+ *	SCIF node
+ * 8) On the mgmt node, P2P network setup/teardown is initiated if all the
+ *	remote nodes are online via scif_p2p_setup(..)
+ * 9) For P2P setup, the host maps the remote nodes' aperture and memory
+ *	bars and sends a SCIF_NODE_ADD message to both nodes
+ * 10) As part of scif_nodeadd, both nodes set up their local inbound
+ *	QPs and send a SCIF_NODE_ADD_ACK to the mgmt node
+ * 11) As part of scif_node_add_ack(..) the mgmt node forwards the
+ *	SCIF_NODE_ADD_ACK to the remote nodes
+ * 12) As part of scif_node_add_ack(..) the remote nodes update their
+ *	outbound QPs, make sure they can access memory on the remote node
+ *	and then add a new SCIF peer node by calling
+ *	scif_peer_register_device(..) which signifies the addition of a new
+ *	SCIF node.
+ * 13) The SCIF network is now established across all nodes.
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by non mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) A non mgmt node now cleans up all local data structures and sends a
+ *	SCIF_EXIT message to the peer and waits for a SCIF_EXIT_ACK
+ * 4) As part of scif_exit(..) handling scif_disconnect_node(..) is called
+ * 5) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the
+ *	peers and waits for a SCIF_NODE_REMOVE_ACK
+ * 6) As part of scif_node_remove(..) a remote node unregisters the peer
+ *	node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 7) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ *	it sends itself a node remove message whose handling cleans up local
+ *	data structures and unregisters the peer node from the SCIF network
+ * 8) The mgmt node sends a SCIF_EXIT_ACK
+ * 9) Upon receipt of the SCIF_EXIT_ACK the node initiating the teardown
+ *	completes the SCIF remove routine
+ * 10) The SCIF network is now torn down for the node initiating the
+ *	teardown sequence
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) The mgmt node calls scif_disconnect_node(..)
+ * 4) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the peers
+ *	and waits for a SCIF_NODE_REMOVE_ACK
+ * 5) As part of scif_node_remove(..) a remote node unregisters the peer
+ *	node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 6) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ *	it unregisters the peer node from the SCIF network
+ * 7) The mgmt node sends a SCIF_EXIT message and waits for a SCIF_EXIT_ACK.
+ * 8) A non mgmt node upon receipt of a SCIF_EXIT message calls scif_stop(..)
+ *	which would clean up local data structures for all SCIF nodes and
+ *	then send a SCIF_EXIT_ACK back to the mgmt node
+ * 9) Upon receipt of the SCIF_EXIT_ACK the the mgmt node sends itself a node
+ *	remove message whose handling cleans up local data structures and
+ *	destroys any P2P mappings.
+ * 10) The SCIF hardware device for which a remove callback was received is now
+ *	disconnected from the SCIF network.
+ */
+/*
+ * Initializes "local" data structures for the QP. Allocates the QP
+ * ring buffer (rb) and initializes the "in bound" queue.
+ */
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+			  int local_size, struct scif_dev *scifdev)
+{
+	void *local_q = qp->inbound_q.rb_base;
+	int err = 0;
+	u32 tmp_rd = 0;
+
+	spin_lock_init(&qp->send_lock);
+	spin_lock_init(&qp->recv_lock);
+
+	/* Allocate rb only if not already allocated */
+	if (!local_q) {
+		local_q = kzalloc(local_size, GFP_KERNEL);
+		if (!local_q) {
+			err = -ENOMEM;
+			return err;
+		}
+	}
+
+	err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+	if (err)
+		goto kfree;
+	/*
+	 * To setup the inbound_q, the buffer lives locally, the read pointer
+	 * is remote and the write pointer is local.
+	 */
+	scif_rb_init(&qp->inbound_q,
+		     &tmp_rd,
+		     &qp->local_write,
+		     local_q, get_count_order(local_size));
+	/*
+	 * The read pointer is NULL initially and it is unsafe to use the ring
+	 * buffer til this changes!
+	 */
+	qp->inbound_q.read_ptr = NULL;
+	err = scif_map_single(qp_offset, qp,
+			      scifdev, sizeof(struct scif_qp));
+	if (err)
+		goto unmap;
+	qp->local_qp = *qp_offset;
+	return err;
+unmap:
+	scif_unmap_single(qp->local_buf, scifdev, local_size);
+	qp->local_buf = 0;
+kfree:
+	kfree(local_q);
+	return err;
+}
+
+/* When the other side has already done it's allocation, this is called */
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+			 dma_addr_t phys, int local_size,
+			 struct scif_dev *scifdev)
+{
+	void *local_q;
+	void *remote_q;
+	struct scif_qp *remote_qp;
+	int remote_size;
+	int err = 0;
+
+	spin_lock_init(&qp->send_lock);
+	spin_lock_init(&qp->recv_lock);
+	/* Start by figuring out where we need to point */
+	remote_qp = scif_ioremap(phys, sizeof(struct scif_qp), scifdev);
+	if (!remote_qp)
+		return -EIO;
+	qp->remote_qp = remote_qp;
+	if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+		err = -EIO;
+		goto iounmap;
+	}
+	qp->remote_buf = remote_qp->local_buf;
+	remote_size = qp->remote_qp->inbound_q.size;
+	remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev);
+	if (!remote_q) {
+		err = -EIO;
+		goto iounmap;
+	}
+	qp->remote_qp->local_write = 0;
+	/*
+	 * To setup the outbound_q, the buffer lives in remote memory,
+	 * the read pointer is local, the write pointer is remote
+	 */
+	scif_rb_init(&qp->outbound_q,
+		     &qp->local_read,
+		     &qp->remote_qp->local_write,
+		     remote_q,
+		     get_count_order(remote_size));
+	local_q = kzalloc(local_size, GFP_KERNEL);
+	if (!local_q) {
+		err = -ENOMEM;
+		goto iounmap_1;
+	}
+	err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+	if (err)
+		goto kfree;
+	qp->remote_qp->local_read = 0;
+	/*
+	 * To setup the inbound_q, the buffer lives locally, the read pointer
+	 * is remote and the write pointer is local
+	 */
+	scif_rb_init(&qp->inbound_q,
+		     &qp->remote_qp->local_read,
+		     &qp->local_write,
+		     local_q, get_count_order(local_size));
+	err = scif_map_single(qp_offset, qp, scifdev,
+			      sizeof(struct scif_qp));
+	if (err)
+		goto unmap;
+	qp->local_qp = *qp_offset;
+	return err;
+unmap:
+	scif_unmap_single(qp->local_buf, scifdev, local_size);
+	qp->local_buf = 0;
+kfree:
+	kfree(local_q);
+iounmap_1:
+	scif_iounmap(remote_q, remote_size, scifdev);
+	qp->outbound_q.rb_base = NULL;
+iounmap:
+	scif_iounmap(qp->remote_qp, sizeof(struct scif_qp), scifdev);
+	qp->remote_qp = NULL;
+	return err;
+}
+
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+				   struct scif_qp *qp, u64 payload)
+{
+	int err = 0;
+	void *r_buf;
+	int remote_size;
+	phys_addr_t tmp_phys;
+
+	qp->remote_qp = scif_ioremap(payload, sizeof(struct scif_qp), scifdev);
+
+	if (!qp->remote_qp) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+		dev_err(&scifdev->sdev->dev,
+			"SCIFEP_MAGIC mismatch between self %d remote %d\n",
+			scif_dev[scif_info.nodeid].node, scifdev->node);
+		err = -ENODEV;
+		goto error;
+	}
+
+	tmp_phys = qp->remote_qp->local_buf;
+	remote_size = qp->remote_qp->inbound_q.size;
+	r_buf = scif_ioremap(tmp_phys, remote_size, scifdev);
+
+	if (!r_buf)
+		return -EIO;
+
+	qp->local_read = 0;
+	scif_rb_init(&qp->outbound_q,
+		     &qp->local_read,
+		     &qp->remote_qp->local_write,
+		     r_buf,
+		     get_count_order(remote_size));
+	/*
+	 * Because the node QP may already be processing an INIT message, set
+	 * the read pointer so the cached read offset isn't lost
+	 */
+	qp->remote_qp->local_read = qp->inbound_q.current_read_offset;
+	/*
+	 * resetup the inbound_q now that we know where the
+	 * inbound_read really is.
+	 */
+	scif_rb_init(&qp->inbound_q,
+		     &qp->remote_qp->local_read,
+		     &qp->local_write,
+		     qp->inbound_q.rb_base,
+		     get_count_order(qp->inbound_q.size));
+error:
+	return err;
+}
+
+static __always_inline void
+scif_send_msg_intr(struct scif_dev *scifdev)
+{
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	if (scifdev_is_p2p(scifdev))
+		sdev->hw_ops->send_p2p_intr(sdev, scifdev->rdb, &scifdev->mmio);
+	else
+		sdev->hw_ops->send_intr(sdev, scifdev->rdb);
+}
+
+int scif_qp_response(phys_addr_t phys, struct scif_dev *scifdev)
+{
+	int err = 0;
+	struct scifmsg msg;
+
+	err = scif_setup_qp_connect_response(scifdev, scifdev->qpairs, phys);
+	if (!err) {
+		/*
+		 * Now that everything is setup and mapped, we're ready
+		 * to tell the peer about our queue's location
+		 */
+		msg.uop = SCIF_INIT;
+		msg.dst.node = scifdev->node;
+		err = scif_nodeqp_send(scifdev, &msg);
+	}
+	return err;
+}
+
+void scif_send_exit(struct scif_dev *scifdev)
+{
+	struct scifmsg msg;
+	int ret;
+
+	scifdev->exit = OP_IN_PROGRESS;
+	msg.uop = SCIF_EXIT;
+	msg.src.node = scif_info.nodeid;
+	msg.dst.node = scifdev->node;
+	ret = scif_nodeqp_send(scifdev, &msg);
+	if (ret)
+		goto done;
+	/* Wait for a SCIF_EXIT_ACK message */
+	wait_event_timeout(scif_info.exitwq, scifdev->exit == OP_COMPLETED,
+			   SCIF_NODE_ALIVE_TIMEOUT);
+done:
+	scifdev->exit = OP_IDLE;
+}
+
+int scif_setup_qp(struct scif_dev *scifdev)
+{
+	int err = 0;
+	int local_size;
+	struct scif_qp *qp;
+
+	local_size = SCIF_NODE_QP_SIZE;
+
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp) {
+		err = -ENOMEM;
+		return err;
+	}
+	qp->magic = SCIFEP_MAGIC;
+	scifdev->qpairs = qp;
+	err = scif_setup_qp_connect(qp, &scifdev->qp_dma_addr,
+				    local_size, scifdev);
+	if (err)
+		goto free_qp;
+	/*
+	 * We're as setup as we can be. The inbound_q is setup, w/o a usable
+	 * outbound q.  When we get a message, the read_ptr will be updated,
+	 * and we will pull the message.
+	 */
+	return err;
+free_qp:
+	kfree(scifdev->qpairs);
+	scifdev->qpairs = NULL;
+	return err;
+}
+
+static void scif_p2p_freesg(struct scatterlist *sg)
+{
+	kfree(sg);
+}
+
+static struct scatterlist *
+scif_p2p_setsg(phys_addr_t pa, int page_size, int page_cnt)
+{
+	struct scatterlist *sg;
+	struct page *page;
+	int i;
+
+	sg = kcalloc(page_cnt, sizeof(struct scatterlist), GFP_KERNEL);
+	if (!sg)
+		return NULL;
+	sg_init_table(sg, page_cnt);
+	for (i = 0; i < page_cnt; i++) {
+		page = pfn_to_page(pa >> PAGE_SHIFT);
+		sg_set_page(&sg[i], page, page_size, 0);
+		pa += page_size;
+	}
+	return sg;
+}
+
+/* Init p2p mappings required to access peerdev from scifdev */
+static struct scif_p2p_info *
+scif_init_p2p_info(struct scif_dev *scifdev, struct scif_dev *peerdev)
+{
+	struct scif_p2p_info *p2p;
+	int num_mmio_pages, num_aper_pages, sg_page_shift, err, num_aper_chunks;
+	struct scif_hw_dev *psdev = peerdev->sdev;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	num_mmio_pages = psdev->mmio->len >> PAGE_SHIFT;
+	num_aper_pages = psdev->aper->len >> PAGE_SHIFT;
+
+	p2p = kzalloc(sizeof(*p2p), GFP_KERNEL);
+	if (!p2p)
+		return NULL;
+	p2p->ppi_sg[SCIF_PPI_MMIO] = scif_p2p_setsg(psdev->mmio->pa,
+						    PAGE_SIZE, num_mmio_pages);
+	if (!p2p->ppi_sg[SCIF_PPI_MMIO])
+		goto free_p2p;
+	p2p->sg_nentries[SCIF_PPI_MMIO] = num_mmio_pages;
+	sg_page_shift = get_order(min(psdev->aper->len, (u64)(1 << 30)));
+	num_aper_chunks = num_aper_pages >> (sg_page_shift - PAGE_SHIFT);
+	p2p->ppi_sg[SCIF_PPI_APER] = scif_p2p_setsg(psdev->aper->pa,
+						    1 << sg_page_shift,
+						    num_aper_chunks);
+	p2p->sg_nentries[SCIF_PPI_APER] = num_aper_chunks;
+	err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+			 num_mmio_pages, PCI_DMA_BIDIRECTIONAL);
+	if (err != num_mmio_pages)
+		goto scif_p2p_free;
+	err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+			 num_aper_chunks, PCI_DMA_BIDIRECTIONAL);
+	if (err != num_aper_chunks)
+		goto dma_unmap;
+	p2p->ppi_da[SCIF_PPI_MMIO] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_MMIO]);
+	p2p->ppi_da[SCIF_PPI_APER] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_APER]);
+	p2p->ppi_len[SCIF_PPI_MMIO] = num_mmio_pages;
+	p2p->ppi_len[SCIF_PPI_APER] = num_aper_pages;
+	p2p->ppi_peer_id = peerdev->node;
+	return p2p;
+dma_unmap:
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+		     p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+scif_p2p_free:
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+free_p2p:
+	kfree(p2p);
+	return NULL;
+}
+
+/* Uninitialize and release resources from a p2p mapping */
+static void scif_deinit_p2p_info(struct scif_dev *scifdev,
+				 struct scif_p2p_info *p2p)
+{
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+		     p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+		     p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+	kfree(p2p);
+}
+
+/**
+ * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
+ * @dst: Destination node
+ *
+ * Connect the src and dst node by setting up the p2p connection
+ * between them. Management node here acts like a proxy.
+ */
+static void scif_node_connect(struct scif_dev *scifdev, int dst)
+{
+	struct scif_dev *dev_j = scifdev;
+	struct scif_dev *dev_i = NULL;
+	struct scif_p2p_info *p2p_ij = NULL;    /* bus addr for j from i */
+	struct scif_p2p_info *p2p_ji = NULL;    /* bus addr for i from j */
+	struct scif_p2p_info *p2p;
+	struct list_head *pos, *tmp;
+	struct scifmsg msg;
+	int err;
+	u64 tmppayload;
+
+	if (dst < 1 || dst > scif_info.maxid)
+		return;
+
+	dev_i = &scif_dev[dst];
+
+	if (!_scifdev_alive(dev_i))
+		return;
+	/*
+	 * If the p2p connection is already setup or in the process of setting
+	 * up then just ignore this request. The requested node will get
+	 * informed by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK
+	 */
+	if (!list_empty(&dev_i->p2p)) {
+		list_for_each_safe(pos, tmp, &dev_i->p2p) {
+			p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+			if (p2p->ppi_peer_id == dev_j->node)
+				return;
+		}
+	}
+	p2p_ij = scif_init_p2p_info(dev_i, dev_j);
+	if (!p2p_ij)
+		return;
+	p2p_ji = scif_init_p2p_info(dev_j, dev_i);
+	if (!p2p_ji) {
+		scif_deinit_p2p_info(dev_i, p2p_ij);
+		return;
+	}
+	list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p);
+	list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p);
+
+	/*
+	 * Send a SCIF_NODE_ADD to dev_i, pass it its bus address
+	 * as seen from dev_j
+	 */
+	msg.uop = SCIF_NODE_ADD;
+	msg.src.node = dev_j->node;
+	msg.dst.node = dev_i->node;
+
+	msg.payload[0] = p2p_ji->ppi_da[SCIF_PPI_APER];
+	msg.payload[1] = p2p_ij->ppi_da[SCIF_PPI_MMIO];
+	msg.payload[2] = p2p_ij->ppi_da[SCIF_PPI_APER];
+	msg.payload[3] = p2p_ij->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+	err = scif_nodeqp_send(dev_i,  &msg);
+	if (err) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d error %d\n", __func__, __LINE__, err);
+		return;
+	}
+
+	/* Same as above but to dev_j */
+	msg.uop = SCIF_NODE_ADD;
+	msg.src.node = dev_i->node;
+	msg.dst.node = dev_j->node;
+
+	tmppayload = msg.payload[0];
+	msg.payload[0] = msg.payload[2];
+	msg.payload[2] = tmppayload;
+	msg.payload[1] = p2p_ji->ppi_da[SCIF_PPI_MMIO];
+	msg.payload[3] = p2p_ji->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+	scif_nodeqp_send(dev_j, &msg);
+}
+
+static void scif_p2p_setup(void)
+{
+	int i, j;
+
+	if (!scif_info.p2p_enable)
+		return;
+
+	for (i = 1; i <= scif_info.maxid; i++)
+		if (!_scifdev_alive(&scif_dev[i]))
+			return;
+
+	for (i = 1; i <= scif_info.maxid; i++) {
+		for (j = 1; j <= scif_info.maxid; j++) {
+			struct scif_dev *scifdev = &scif_dev[i];
+
+			if (i == j)
+				continue;
+			scif_node_connect(scifdev, j);
+		}
+	}
+}
+
+static char *message_types[] = {"BAD",
+				"INIT",
+				"EXIT",
+				"SCIF_EXIT_ACK",
+				"SCIF_NODE_ADD",
+				"SCIF_NODE_ADD_ACK",
+				"SCIF_NODE_ADD_NACK",
+				"REMOVE_NODE",
+				"REMOVE_NODE_ACK",
+				"CNCT_REQ",
+				"CNCT_GNT",
+				"CNCT_GNTACK",
+				"CNCT_GNTNACK",
+				"CNCT_REJ",
+				"DISCNCT",
+				"DISCNT_ACK",
+				"CLIENT_SENT",
+				"CLIENT_RCVD",
+				"SCIF_GET_NODE_INFO",
+				"REGISTER",
+				"REGISTER_ACK",
+				"REGISTER_NACK",
+				"UNREGISTER",
+				"UNREGISTER_ACK",
+				"UNREGISTER_NACK",
+				"ALLOC_REQ",
+				"ALLOC_GNT",
+				"ALLOC_REJ",
+				"FREE_PHYS",
+				"FREE_VIRT",
+				"MUNMAP",
+				"MARK",
+				"MARK_ACK",
+				"MARK_NACK",
+				"WAIT",
+				"WAIT_ACK",
+				"WAIT_NACK",
+				"SIGNAL_LOCAL",
+				"SIGNAL_REMOTE",
+				"SIG_ACK",
+				"SIG_NACK"};
+
+static void
+scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
+		     const char *label)
+{
+	if (!scif_info.en_msg_log)
+		return;
+	if (msg->uop > SCIF_MAX_MSG) {
+		dev_err(&scifdev->sdev->dev,
+			"%s: unknown msg type %d\n", label, msg->uop);
+		return;
+	}
+	dev_info(&scifdev->sdev->dev,
+		 "%s: msg type %s, src %d:%d, dest %d:%d payload 0x%llx:0x%llx:0x%llx:0x%llx\n",
+		 label, message_types[msg->uop], msg->src.node, msg->src.port,
+		 msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1],
+		 msg->payload[2], msg->payload[3]);
+}
+
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_qp *qp = scifdev->qpairs;
+	int err = -ENOMEM, loop_cnt = 0;
+
+	scif_display_message(scifdev, msg, "Sent");
+	if (!qp) {
+		err = -EINVAL;
+		goto error;
+	}
+	spin_lock(&qp->send_lock);
+
+	while ((err = scif_rb_write(&qp->outbound_q,
+				    msg, sizeof(struct scifmsg)))) {
+		mdelay(1);
+#define SCIF_NODEQP_SEND_TO_MSEC (3 * 1000)
+		if (loop_cnt++ > (SCIF_NODEQP_SEND_TO_MSEC)) {
+			err = -ENODEV;
+			break;
+		}
+	}
+	if (!err)
+		scif_rb_commit(&qp->outbound_q);
+	spin_unlock(&qp->send_lock);
+	if (!err) {
+		if (scifdev_self(scifdev))
+			/*
+			 * For loopback we need to emulate an interrupt by
+			 * queuing work for the queue handling real node
+			 * Qp interrupts.
+			 */
+			queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+		else
+			scif_send_msg_intr(scifdev);
+	}
+error:
+	if (err)
+		dev_dbg(&scifdev->sdev->dev,
+			"%s %d error %d uop %d\n",
+			 __func__, __LINE__, err, msg->uop);
+	return err;
+}
+
+/**
+ * scif_nodeqp_send - Send a message on the node queue pair
+ * @scifdev: Scif Device.
+ * @msg: The message to be sent.
+ */
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	int err;
+	struct device *spdev = NULL;
+
+	if (msg->uop > SCIF_EXIT_ACK) {
+		/* Dont send messages once the exit flow has begun */
+		if (OP_IDLE != scifdev->exit)
+			return -ENODEV;
+		spdev = scif_get_peer_dev(scifdev);
+		if (IS_ERR(spdev)) {
+			err = PTR_ERR(spdev);
+			return err;
+		}
+	}
+	err = _scif_nodeqp_send(scifdev, msg);
+	if (msg->uop > SCIF_EXIT_ACK)
+		scif_put_peer_dev(spdev);
+	return err;
+}
+
+/*
+ * scif_misc_handler:
+ *
+ * Work queue handler for servicing miscellaneous SCIF tasks.
+ * Examples include:
+ * 1) Remote fence requests.
+ * 2) Destruction of temporary registered windows
+ *    created during scif_vreadfrom()/scif_vwriteto().
+ * 3) Cleanup of zombie endpoints.
+ */
+void scif_misc_handler(struct work_struct *work)
+{
+	scif_rma_handle_remote_fences();
+	scif_rma_destroy_windows();
+	scif_rma_destroy_tcw_invalid();
+	scif_cleanup_zombie_epd();
+}
+
+/**
+ * scif_init() - Respond to SCIF_INIT interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ */
+static __always_inline void
+scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	/*
+	 * Allow the thread waiting for device page updates for the peer QP DMA
+	 * address to complete initializing the inbound_q.
+	 */
+	flush_delayed_work(&scifdev->qp_dwork);
+
+	scif_peer_register_device(scifdev);
+
+	if (scif_is_mgmt_node()) {
+		mutex_lock(&scif_info.conflock);
+		scif_p2p_setup();
+		mutex_unlock(&scif_info.conflock);
+	}
+}
+
+/**
+ * scif_exit() - Respond to SCIF_EXIT interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * This function stops the SCIF interface for the node which sent
+ * the SCIF_EXIT message and starts waiting for that node to
+ * resetup the queue pair again.
+ */
+static __always_inline void
+scif_exit(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+	scifdev->exit_ack_pending = true;
+	if (scif_is_mgmt_node())
+		scif_disconnect_node(scifdev->node, false);
+	else
+		scif_stop(scifdev);
+	schedule_delayed_work(&scifdev->qp_dwork,
+			      msecs_to_jiffies(1000));
+}
+
+/**
+ * scif_exitack() - Respond to SCIF_EXIT_ACK interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ */
+static __always_inline void
+scif_exit_ack(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+	scifdev->exit = OP_COMPLETED;
+	wake_up(&scif_info.exitwq);
+}
+
+/**
+ * scif_node_add() - Respond to SCIF_NODE_ADD interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * When the mgmt node driver has finished initializing a MIC node queue pair it
+ * marks the node as online. It then looks for all currently online MIC cards
+ * and send a SCIF_NODE_ADD message to identify the ID of the new card for
+ * peer to peer initialization
+ *
+ * The local node allocates its incoming queue and sends its address in the
+ * SCIF_NODE_ADD_ACK message back to the mgmt node, the mgmt node "reflects"
+ * this message to the new node
+ */
+static __always_inline void
+scif_node_add(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_dev *newdev;
+	dma_addr_t qp_offset;
+	int qp_connect;
+	struct scif_hw_dev *sdev;
+
+	dev_dbg(&scifdev->sdev->dev,
+		"Scifdev %d:%d received NODE_ADD msg for node %d\n",
+		scifdev->node, msg->dst.node, msg->src.node);
+	dev_dbg(&scifdev->sdev->dev,
+		"Remote address for this node's aperture %llx\n",
+		msg->payload[0]);
+	newdev = &scif_dev[msg->src.node];
+	newdev->node = msg->src.node;
+	newdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+	sdev = newdev->sdev;
+
+	if (scif_setup_intr_wq(newdev)) {
+		dev_err(&scifdev->sdev->dev,
+			"failed to setup interrupts for %d\n", msg->src.node);
+		goto interrupt_setup_error;
+	}
+	newdev->mmio.va = ioremap_nocache(msg->payload[1], sdev->mmio->len);
+	if (!newdev->mmio.va) {
+		dev_err(&scifdev->sdev->dev,
+			"failed to map mmio for %d\n", msg->src.node);
+		goto mmio_map_error;
+	}
+	newdev->qpairs = kzalloc(sizeof(*newdev->qpairs), GFP_KERNEL);
+	if (!newdev->qpairs)
+		goto qp_alloc_error;
+	/*
+	 * Set the base address of the remote node's memory since it gets
+	 * added to qp_offset
+	 */
+	newdev->base_addr = msg->payload[0];
+
+	qp_connect = scif_setup_qp_connect(newdev->qpairs, &qp_offset,
+					   SCIF_NODE_QP_SIZE, newdev);
+	if (qp_connect) {
+		dev_err(&scifdev->sdev->dev,
+			"failed to setup qp_connect %d\n", qp_connect);
+		goto qp_connect_error;
+	}
+
+	newdev->db = sdev->hw_ops->next_db(sdev);
+	newdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+						   "SCIF_INTR", newdev,
+						   newdev->db);
+	if (IS_ERR(newdev->cookie))
+		goto qp_connect_error;
+	newdev->qpairs->magic = SCIFEP_MAGIC;
+	newdev->qpairs->qp_state = SCIF_QP_OFFLINE;
+
+	msg->uop = SCIF_NODE_ADD_ACK;
+	msg->dst.node = msg->src.node;
+	msg->src.node = scif_info.nodeid;
+	msg->payload[0] = qp_offset;
+	msg->payload[2] = newdev->db;
+	scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+	return;
+qp_connect_error:
+	kfree(newdev->qpairs);
+	newdev->qpairs = NULL;
+qp_alloc_error:
+	iounmap(newdev->mmio.va);
+	newdev->mmio.va = NULL;
+mmio_map_error:
+interrupt_setup_error:
+	dev_err(&scifdev->sdev->dev,
+		"node add failed for node %d\n", msg->src.node);
+	msg->uop = SCIF_NODE_ADD_NACK;
+	msg->dst.node = msg->src.node;
+	msg->src.node = scif_info.nodeid;
+	scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+}
+
+void scif_poll_qp_state(struct work_struct *work)
+{
+#define SCIF_NODE_QP_RETRY 100
+#define SCIF_NODE_QP_TIMEOUT 100
+	struct scif_dev *peerdev = container_of(work, struct scif_dev,
+							p2p_dwork.work);
+	struct scif_qp *qp = &peerdev->qpairs[0];
+
+	if (qp->qp_state != SCIF_QP_ONLINE ||
+	    qp->remote_qp->qp_state != SCIF_QP_ONLINE) {
+		if (peerdev->p2p_retry++ == SCIF_NODE_QP_RETRY) {
+			dev_err(&peerdev->sdev->dev,
+				"Warning: QP check timeout with state %d\n",
+				qp->qp_state);
+			goto timeout;
+		}
+		schedule_delayed_work(&peerdev->p2p_dwork,
+				      msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
+		return;
+	}
+	return;
+timeout:
+	dev_err(&peerdev->sdev->dev,
+		"%s %d remote node %d offline,  state = 0x%x\n",
+		__func__, __LINE__, peerdev->node, qp->qp_state);
+	qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
+	scif_peer_unregister_device(peerdev);
+	scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_ack() - Respond to SCIF_NODE_ADD_ACK interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * After a MIC node receives the SCIF_NODE_ADD_ACK message it send this
+ * message to the mgmt node to confirm the sequence is finished.
+ *
+ */
+static __always_inline void
+scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_dev *peerdev;
+	struct scif_qp *qp;
+	struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+	dev_dbg(&scifdev->sdev->dev,
+		"Scifdev %d received SCIF_NODE_ADD_ACK msg src %d dst %d\n",
+		scifdev->node, msg->src.node, msg->dst.node);
+	dev_dbg(&scifdev->sdev->dev,
+		"payload %llx %llx %llx %llx\n", msg->payload[0],
+		msg->payload[1], msg->payload[2], msg->payload[3]);
+	if (scif_is_mgmt_node()) {
+		/*
+		 * the lock serializes with scif_qp_response_ack. The mgmt node
+		 * is forwarding the NODE_ADD_ACK message from src to dst we
+		 * need to make sure that the dst has already received a
+		 * NODE_ADD for src and setup its end of the qp to dst
+		 */
+		mutex_lock(&scif_info.conflock);
+		msg->payload[1] = scif_info.maxid;
+		scif_nodeqp_send(dst_dev, msg);
+		mutex_unlock(&scif_info.conflock);
+		return;
+	}
+	peerdev = &scif_dev[msg->src.node];
+	peerdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+	peerdev->node = msg->src.node;
+
+	qp = &peerdev->qpairs[0];
+
+	if ((scif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0],
+					    msg->payload[0])))
+		goto local_error;
+	peerdev->rdb = msg->payload[2];
+	qp->remote_qp->qp_state = SCIF_QP_ONLINE;
+
+	scif_peer_register_device(peerdev);
+
+	schedule_delayed_work(&peerdev->p2p_dwork, 0);
+	return;
+local_error:
+	scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_nack: Respond to SCIF_NODE_ADD_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * SCIF_NODE_ADD failed, so inform the waiting wq.
+ */
+static __always_inline void
+scif_node_add_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	if (scif_is_mgmt_node()) {
+		struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+		dev_dbg(&scifdev->sdev->dev,
+			"SCIF_NODE_ADD_NACK received from %d\n", scifdev->node);
+		scif_nodeqp_send(dst_dev, msg);
+	}
+}
+
+/*
+ * scif_node_remove: Handle SCIF_NODE_REMOVE message
+ * @msg: Interrupt message
+ *
+ * Handle node removal.
+ */
+static __always_inline void
+scif_node_remove(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	int node = msg->payload[0];
+	struct scif_dev *scdev = &scif_dev[node];
+
+	scdev->node_remove_ack_pending = true;
+	scif_handle_remove_node(node);
+}
+
+/*
+ * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message
+ * @msg: Interrupt message
+ *
+ * The peer has acked a SCIF_NODE_REMOVE message.
+ */
+static __always_inline void
+scif_node_remove_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_dev *sdev = &scif_dev[msg->payload[0]];
+
+	atomic_inc(&sdev->disconn_rescnt);
+	wake_up(&sdev->disconn_wq);
+}
+
+/**
+ * scif_get_node_info: Respond to SCIF_GET_NODE_INFO interrupt message
+ * @msg:        Interrupt message
+ *
+ * Retrieve node info i.e maxid and total from the mgmt node.
+ */
+static __always_inline void
+scif_get_node_info_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	if (scif_is_mgmt_node()) {
+		swap(msg->dst.node, msg->src.node);
+		mutex_lock(&scif_info.conflock);
+		msg->payload[1] = scif_info.maxid;
+		msg->payload[2] = scif_info.total;
+		mutex_unlock(&scif_info.conflock);
+		scif_nodeqp_send(scifdev, msg);
+	} else {
+		struct completion *node_info =
+			(struct completion *)msg->payload[3];
+
+		mutex_lock(&scif_info.conflock);
+		scif_info.maxid = msg->payload[1];
+		scif_info.total = msg->payload[2];
+		complete_all(node_info);
+		mutex_unlock(&scif_info.conflock);
+	}
+}
+
+static void
+scif_msg_unknown(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	/* Bogus Node Qp Message? */
+	dev_err(&scifdev->sdev->dev,
+		"Unknown message 0x%xn scifdev->node 0x%x\n",
+		msg->uop, scifdev->node);
+}
+
+static void (*scif_intr_func[SCIF_MAX_MSG + 1])
+	    (struct scif_dev *, struct scifmsg *msg) = {
+	scif_msg_unknown,	/* Error */
+	scif_init,		/* SCIF_INIT */
+	scif_exit,		/* SCIF_EXIT */
+	scif_exit_ack,		/* SCIF_EXIT_ACK */
+	scif_node_add,		/* SCIF_NODE_ADD */
+	scif_node_add_ack,	/* SCIF_NODE_ADD_ACK */
+	scif_node_add_nack,	/* SCIF_NODE_ADD_NACK */
+	scif_node_remove,	/* SCIF_NODE_REMOVE */
+	scif_node_remove_ack,	/* SCIF_NODE_REMOVE_ACK */
+	scif_cnctreq,		/* SCIF_CNCT_REQ */
+	scif_cnctgnt,		/* SCIF_CNCT_GNT */
+	scif_cnctgnt_ack,	/* SCIF_CNCT_GNTACK */
+	scif_cnctgnt_nack,	/* SCIF_CNCT_GNTNACK */
+	scif_cnctrej,		/* SCIF_CNCT_REJ */
+	scif_discnct,		/* SCIF_DISCNCT */
+	scif_discnt_ack,	/* SCIF_DISCNT_ACK */
+	scif_clientsend,	/* SCIF_CLIENT_SENT */
+	scif_clientrcvd,	/* SCIF_CLIENT_RCVD */
+	scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
+	scif_recv_reg,		/* SCIF_REGISTER */
+	scif_recv_reg_ack,	/* SCIF_REGISTER_ACK */
+	scif_recv_reg_nack,	/* SCIF_REGISTER_NACK */
+	scif_recv_unreg,	/* SCIF_UNREGISTER */
+	scif_recv_unreg_ack,	/* SCIF_UNREGISTER_ACK */
+	scif_recv_unreg_nack,	/* SCIF_UNREGISTER_NACK */
+	scif_alloc_req,		/* SCIF_ALLOC_REQ */
+	scif_alloc_gnt_rej,	/* SCIF_ALLOC_GNT */
+	scif_alloc_gnt_rej,	/* SCIF_ALLOC_REJ */
+	scif_free_virt,		/* SCIF_FREE_VIRT */
+	scif_recv_munmap,	/* SCIF_MUNMAP */
+	scif_recv_mark,		/* SCIF_MARK */
+	scif_recv_mark_resp,	/* SCIF_MARK_ACK */
+	scif_recv_mark_resp,	/* SCIF_MARK_NACK */
+	scif_recv_wait,		/* SCIF_WAIT */
+	scif_recv_wait_resp,	/* SCIF_WAIT_ACK */
+	scif_recv_wait_resp,	/* SCIF_WAIT_NACK */
+	scif_recv_sig_local,	/* SCIF_SIG_LOCAL */
+	scif_recv_sig_remote,	/* SCIF_SIG_REMOTE */
+	scif_recv_sig_resp,	/* SCIF_SIG_ACK */
+	scif_recv_sig_resp,	/* SCIF_SIG_NACK */
+};
+
+/**
+ * scif_nodeqp_msg_handler() - Common handler for node messages
+ * @scifdev: Remote device to respond to
+ * @qp: Remote memory pointer
+ * @msg: The message to be handled.
+ *
+ * This routine calls the appropriate routine to handle a Node Qp
+ * message receipt
+ */
+static int scif_max_msg_id = SCIF_MAX_MSG;
+
+static void
+scif_nodeqp_msg_handler(struct scif_dev *scifdev,
+			struct scif_qp *qp, struct scifmsg *msg)
+{
+	scif_display_message(scifdev, msg, "Rcvd");
+
+	if (msg->uop > (u32)scif_max_msg_id) {
+		/* Bogus Node Qp Message? */
+		dev_err(&scifdev->sdev->dev,
+			"Unknown message 0x%xn scifdev->node 0x%x\n",
+			msg->uop, scifdev->node);
+		return;
+	}
+
+	scif_intr_func[msg->uop](scifdev, msg);
+}
+
+/**
+ * scif_nodeqp_intrhandler() - Interrupt handler for node messages
+ * @scifdev:    Remote device to respond to
+ * @qp:         Remote memory pointer
+ *
+ * This routine is triggered by the interrupt mechanism.  It reads
+ * messages from the node queue RB and calls the Node QP Message handling
+ * routine.
+ */
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+	struct scifmsg msg;
+	int read_size;
+
+	do {
+		read_size = scif_rb_get_next(&qp->inbound_q, &msg, sizeof(msg));
+		if (!read_size)
+			break;
+		scif_nodeqp_msg_handler(scifdev, qp, &msg);
+		/*
+		 * The node queue pair is unmapped so skip the read pointer
+		 * update after receipt of a SCIF_EXIT_ACK
+		 */
+		if (SCIF_EXIT_ACK == msg.uop)
+			break;
+		scif_rb_update_read_ptr(&qp->inbound_q);
+	} while (1);
+}
+
+/**
+ * scif_loopb_wq_handler - Loopback Workqueue Handler.
+ * @work: loop back work
+ *
+ * This work queue routine is invoked by the loopback work queue handler.
+ * It grabs the recv lock, dequeues any available messages from the head
+ * of the loopback message list, calls the node QP message handler,
+ * waits for it to return, then frees up this message and dequeues more
+ * elements of the list if available.
+ */
+static void scif_loopb_wq_handler(struct work_struct *unused)
+{
+	struct scif_dev *scifdev = scif_info.loopb_dev;
+	struct scif_qp *qp = scifdev->qpairs;
+	struct scif_loopb_msg *msg;
+
+	do {
+		msg = NULL;
+		spin_lock(&qp->recv_lock);
+		if (!list_empty(&scif_info.loopb_recv_q)) {
+			msg = list_first_entry(&scif_info.loopb_recv_q,
+					       struct scif_loopb_msg,
+					       list);
+			list_del(&msg->list);
+		}
+		spin_unlock(&qp->recv_lock);
+
+		if (msg) {
+			scif_nodeqp_msg_handler(scifdev, qp, &msg->msg);
+			kfree(msg);
+		}
+	} while (msg);
+}
+
+/**
+ * scif_loopb_msg_handler() - Workqueue handler for loopback messages.
+ * @scifdev: SCIF device
+ * @qp: Queue pair.
+ *
+ * This work queue routine is triggered when a loopback message is received.
+ *
+ * We need special handling for receiving Node Qp messages on a loopback SCIF
+ * device via two workqueues for receiving messages.
+ *
+ * The reason we need the extra workqueue which is not required with *normal*
+ * non-loopback SCIF devices is the potential classic deadlock described below:
+ *
+ * Thread A tries to send a message on a loopback SCIF device and blocks since
+ * there is no space in the RB while it has the send_lock held or another
+ * lock called lock X for example.
+ *
+ * Thread B: The Loopback Node QP message receive workqueue receives the message
+ * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries
+ * to grab the send lock again or lock X and deadlocks with Thread A. The RB
+ * cannot be drained any further due to this classic deadlock.
+ *
+ * In order to avoid deadlocks as mentioned above we have an extra level of
+ * indirection achieved by having two workqueues.
+ * 1) The first workqueue whose handler is scif_loopb_msg_handler reads
+ * messages from the Node QP RB, adds them to a list and queues work for the
+ * second workqueue.
+ *
+ * 2) The second workqueue whose handler is scif_loopb_wq_handler dequeues
+ * messages from the list, handles them, frees up the memory and dequeues
+ * more elements from the list if possible.
+ */
+int
+scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+	int read_size;
+	struct scif_loopb_msg *msg;
+
+	do {
+		msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+		if (!msg)
+			return -ENOMEM;
+		read_size = scif_rb_get_next(&qp->inbound_q, &msg->msg,
+					     sizeof(struct scifmsg));
+		if (read_size != sizeof(struct scifmsg)) {
+			kfree(msg);
+			scif_rb_update_read_ptr(&qp->inbound_q);
+			break;
+		}
+		spin_lock(&qp->recv_lock);
+		list_add_tail(&msg->list, &scif_info.loopb_recv_q);
+		spin_unlock(&qp->recv_lock);
+		queue_work(scif_info.loopb_wq, &scif_info.loopb_work);
+		scif_rb_update_read_ptr(&qp->inbound_q);
+	} while (read_size == sizeof(struct scifmsg));
+	return read_size;
+}
+
+/**
+ * scif_setup_loopback_qp - One time setup work for Loopback Node Qp.
+ * @scifdev: SCIF device
+ *
+ * Sets up the required loopback workqueues, queue pairs and ring buffers
+ */
+int scif_setup_loopback_qp(struct scif_dev *scifdev)
+{
+	int err = 0;
+	void *local_q;
+	struct scif_qp *qp;
+
+	err = scif_setup_intr_wq(scifdev);
+	if (err)
+		goto exit;
+	INIT_LIST_HEAD(&scif_info.loopb_recv_q);
+	snprintf(scif_info.loopb_wqname, sizeof(scif_info.loopb_wqname),
+		 "SCIF LOOPB %d", scifdev->node);
+	scif_info.loopb_wq =
+		alloc_ordered_workqueue(scif_info.loopb_wqname, 0);
+	if (!scif_info.loopb_wq) {
+		err = -ENOMEM;
+		goto destroy_intr;
+	}
+	INIT_WORK(&scif_info.loopb_work, scif_loopb_wq_handler);
+	/* Allocate Self Qpair */
+	scifdev->qpairs = kzalloc(sizeof(*scifdev->qpairs), GFP_KERNEL);
+	if (!scifdev->qpairs) {
+		err = -ENOMEM;
+		goto destroy_loopb_wq;
+	}
+
+	qp = scifdev->qpairs;
+	qp->magic = SCIFEP_MAGIC;
+	spin_lock_init(&qp->send_lock);
+	spin_lock_init(&qp->recv_lock);
+
+	local_q = kzalloc(SCIF_NODE_QP_SIZE, GFP_KERNEL);
+	if (!local_q) {
+		err = -ENOMEM;
+		goto free_qpairs;
+	}
+	/*
+	 * For loopback the inbound_q and outbound_q are essentially the same
+	 * since the Node sends a message on the loopback interface to the
+	 * outbound_q which is then received on the inbound_q.
+	 */
+	scif_rb_init(&qp->outbound_q,
+		     &qp->local_read,
+		     &qp->local_write,
+		     local_q, get_count_order(SCIF_NODE_QP_SIZE));
+
+	scif_rb_init(&qp->inbound_q,
+		     &qp->local_read,
+		     &qp->local_write,
+		     local_q, get_count_order(SCIF_NODE_QP_SIZE));
+	scif_info.nodeid = scifdev->node;
+
+	scif_peer_register_device(scifdev);
+
+	scif_info.loopb_dev = scifdev;
+	return err;
+free_qpairs:
+	kfree(scifdev->qpairs);
+destroy_loopb_wq:
+	destroy_workqueue(scif_info.loopb_wq);
+destroy_intr:
+	scif_destroy_intr_wq(scifdev);
+exit:
+	return err;
+}
+
+/**
+ * scif_destroy_loopback_qp - One time uninit work for Loopback Node Qp
+ * @scifdev: SCIF device
+ *
+ * Destroys the workqueues and frees up the Ring Buffer and Queue Pair memory.
+ */
+int scif_destroy_loopback_qp(struct scif_dev *scifdev)
+{
+	scif_peer_unregister_device(scifdev);
+	destroy_workqueue(scif_info.loopb_wq);
+	scif_destroy_intr_wq(scifdev);
+	kfree(scifdev->qpairs->outbound_q.rb_base);
+	kfree(scifdev->qpairs);
+	scifdev->sdev = NULL;
+	scif_info.loopb_dev = NULL;
+	return 0;
+}
+
+void scif_destroy_p2p(struct scif_dev *scifdev)
+{
+	struct scif_dev *peer_dev;
+	struct scif_p2p_info *p2p;
+	struct list_head *pos, *tmp;
+	int bd;
+
+	mutex_lock(&scif_info.conflock);
+	/* Free P2P mappings in the given node for all its peer nodes */
+	list_for_each_safe(pos, tmp, &scifdev->p2p) {
+		p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+		dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+			     p2p->sg_nentries[SCIF_PPI_MMIO],
+			     DMA_BIDIRECTIONAL);
+		dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+			     p2p->sg_nentries[SCIF_PPI_APER],
+			     DMA_BIDIRECTIONAL);
+		scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+		scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+		list_del(pos);
+		kfree(p2p);
+	}
+
+	/* Free P2P mapping created in the peer nodes for the given node */
+	for (bd = SCIF_MGMT_NODE + 1; bd <= scif_info.maxid; bd++) {
+		peer_dev = &scif_dev[bd];
+		list_for_each_safe(pos, tmp, &peer_dev->p2p) {
+			p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+			if (p2p->ppi_peer_id == scifdev->node) {
+				dma_unmap_sg(&peer_dev->sdev->dev,
+					     p2p->ppi_sg[SCIF_PPI_MMIO],
+					     p2p->sg_nentries[SCIF_PPI_MMIO],
+					     DMA_BIDIRECTIONAL);
+				dma_unmap_sg(&peer_dev->sdev->dev,
+					     p2p->ppi_sg[SCIF_PPI_APER],
+					     p2p->sg_nentries[SCIF_PPI_APER],
+					     DMA_BIDIRECTIONAL);
+				scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+				scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+				list_del(pos);
+				kfree(p2p);
+			}
+		}
+	}
+	mutex_unlock(&scif_info.conflock);
+}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h
new file mode 100644
index 000000000..958962731
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nodeqp.h
@@ -0,0 +1,221 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_NODEQP
+#define SCIF_NODEQP
+
+#include "scif_rb.h"
+#include "scif_peer_bus.h"
+
+#define SCIF_INIT 1  /* First message sent to the peer node for discovery */
+#define SCIF_EXIT 2  /* Last message from the peer informing intent to exit */
+#define SCIF_EXIT_ACK 3 /* Response to SCIF_EXIT message */
+#define SCIF_NODE_ADD 4  /* Tell Online nodes a new node exits */
+#define SCIF_NODE_ADD_ACK 5  /* Confirm to mgmt node sequence is finished */
+#define SCIF_NODE_ADD_NACK 6 /* SCIF_NODE_ADD failed */
+#define SCIF_NODE_REMOVE 7 /* Request to deactivate a SCIF node */
+#define SCIF_NODE_REMOVE_ACK 8 /* Response to a SCIF_NODE_REMOVE message */
+#define SCIF_CNCT_REQ 9  /* Phys addr of Request connection to a port */
+#define SCIF_CNCT_GNT 10  /* Phys addr of new Grant connection request */
+#define SCIF_CNCT_GNTACK 11  /* Error type Reject a connection request */
+#define SCIF_CNCT_GNTNACK 12  /* Error type Reject a connection request */
+#define SCIF_CNCT_REJ 13  /* Error type Reject a connection request */
+#define SCIF_DISCNCT 14 /* Notify peer that connection is being terminated */
+#define SCIF_DISCNT_ACK 15 /* Notify peer that connection is being terminated */
+#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */
+#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */
+#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/
+#define SCIF_REGISTER 19 /* Tell peer about a new registered window */
+#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */
+#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */
+#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */
+#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */
+#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */
+#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */
+#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */
+#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */
+#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */
+#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */
+#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */
+#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */
+#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */
+#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */
+#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */
+#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */
+#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */
+#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */
+#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */
+#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */
+#define SCIF_MAX_MSG SCIF_SIG_NACK
+
+/*
+ * struct scifmsg - Node QP message format
+ *
+ * @src: Source information
+ * @dst: Destination information
+ * @uop: The message opcode
+ * @payload: Unique payload format for each message
+ */
+struct scifmsg {
+	struct scif_port_id src;
+	struct scif_port_id dst;
+	u32 uop;
+	u64 payload[4];
+} __packed;
+
+/*
+ * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request
+ * the remote note to allocate memory
+ *
+ * phys_addr: Physical address of the buffer
+ * vaddr: Virtual address of the buffer
+ * size: Size of the buffer
+ * state: Current state
+ * allocwq: wait queue for status
+ */
+struct scif_allocmsg {
+	dma_addr_t phys_addr;
+	unsigned long vaddr;
+	size_t size;
+	enum scif_msg_state state;
+	wait_queue_head_t allocwq;
+};
+
+/*
+ * struct scif_qp - Node Queue Pair
+ *
+ * Interesting structure -- a little difficult because we can only
+ * write across the PCIe, so any r/w pointer we need to read is
+ * local. We only need to read the read pointer on the inbound_q
+ * and read the write pointer in the outbound_q
+ *
+ * @magic: Magic value to ensure the peer sees the QP correctly
+ * @outbound_q: The outbound ring buffer for sending messages
+ * @inbound_q: The inbound ring buffer for receiving messages
+ * @local_write: Local write index
+ * @local_read: Local read index
+ * @remote_qp: The remote queue pair
+ * @local_buf: DMA address of local ring buffer
+ * @local_qp: DMA address of the local queue pair data structure
+ * @remote_buf: DMA address of remote ring buffer
+ * @qp_state: QP state i.e. online or offline used for P2P
+ * @send_lock: synchronize access to outbound queue
+ * @recv_lock: Synchronize access to inbound queue
+ */
+struct scif_qp {
+	u64 magic;
+#define SCIFEP_MAGIC 0x5c1f000000005c1fULL
+	struct scif_rb outbound_q;
+	struct scif_rb inbound_q;
+
+	u32 local_write __aligned(64);
+	u32 local_read __aligned(64);
+	struct scif_qp *remote_qp;
+	dma_addr_t local_buf;
+	dma_addr_t local_qp;
+	dma_addr_t remote_buf;
+	u32 qp_state;
+#define SCIF_QP_OFFLINE 0xdead
+#define SCIF_QP_ONLINE 0xc0de
+	spinlock_t send_lock;
+	spinlock_t recv_lock;
+};
+
+/*
+ * struct scif_loopb_msg - An element in the loopback Node QP message list.
+ *
+ * @msg - The SCIF node QP message
+ * @list - link in the list of messages
+ */
+struct scif_loopb_msg {
+	struct scifmsg msg;
+	struct list_head list;
+};
+
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp);
+int scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp);
+int scif_setup_qp(struct scif_dev *scifdev);
+int scif_qp_response(phys_addr_t phys, struct scif_dev *dev);
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+			  int local_size, struct scif_dev *scifdev);
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+			 dma_addr_t phys, int local_size,
+			 struct scif_dev *scifdev);
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+				   struct scif_qp *qp, u64 payload);
+int scif_setup_loopback_qp(struct scif_dev *scifdev);
+int scif_destroy_loopback_qp(struct scif_dev *scifdev);
+void scif_poll_qp_state(struct work_struct *work);
+void scif_destroy_p2p(struct scif_dev *scifdev);
+void scif_send_exit(struct scif_dev *scifdev);
+static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+	struct device *spdev_ret;
+
+	rcu_read_lock();
+	spdev = rcu_dereference(scifdev->spdev);
+	if (spdev)
+		spdev_ret = get_device(&spdev->dev);
+	else
+		spdev_ret = ERR_PTR(-ENODEV);
+	rcu_read_unlock();
+	return spdev_ret;
+}
+
+static inline void scif_put_peer_dev(struct device *dev)
+{
+	put_device(dev);
+}
+#endif  /* SCIF_NODEQP */
diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c
new file mode 100644
index 000000000..6ffa3bdbd
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_peer_bus.c
@@ -0,0 +1,183 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ */
+#include "scif_main.h"
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+
+static inline struct scif_peer_dev *
+dev_to_scif_peer(struct device *dev)
+{
+	return container_of(dev, struct scif_peer_dev, dev);
+}
+
+struct bus_type scif_peer_bus = {
+	.name  = "scif_peer_bus",
+};
+
+static void scif_peer_release_dev(struct device *d)
+{
+	struct scif_peer_dev *sdev = dev_to_scif_peer(d);
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+	scif_cleanup_scifdev(scifdev);
+	kfree(sdev);
+}
+
+static int scif_peer_initialize_device(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+	int ret;
+
+	spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
+	if (!spdev) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	spdev->dev.parent = scifdev->sdev->dev.parent;
+	spdev->dev.release = scif_peer_release_dev;
+	spdev->dnode = scifdev->node;
+	spdev->dev.bus = &scif_peer_bus;
+	dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
+
+	device_initialize(&spdev->dev);
+	get_device(&spdev->dev);
+	rcu_assign_pointer(scifdev->spdev, spdev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total++;
+	scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
+	mutex_unlock(&scif_info.conflock);
+	return 0;
+err:
+	dev_err(&scifdev->sdev->dev,
+		"dnode %d: initialize_device rc %d\n", scifdev->node, ret);
+	return ret;
+}
+
+static int scif_peer_add_device(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev);
+	char pool_name[16];
+	int ret;
+
+	ret = device_add(&spdev->dev);
+	put_device(&spdev->dev);
+	if (ret) {
+		dev_err(&scifdev->sdev->dev,
+			"dnode %d: peer device_add failed\n", scifdev->node);
+		goto put_spdev;
+	}
+
+	scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode);
+	scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev,
+						sizeof(struct scif_status), 1,
+						0);
+	if (!scifdev->signal_pool) {
+		dev_err(&scifdev->sdev->dev,
+			"dnode %d: dmam_pool_create failed\n", scifdev->node);
+		ret = -ENOMEM;
+		goto del_spdev;
+	}
+	dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode);
+	return 0;
+del_spdev:
+	device_del(&spdev->dev);
+put_spdev:
+	RCU_INIT_POINTER(scifdev->spdev, NULL);
+	synchronize_rcu();
+	put_device(&spdev->dev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total--;
+	mutex_unlock(&scif_info.conflock);
+	return ret;
+}
+
+void scif_add_peer_device(struct work_struct *work)
+{
+	struct scif_dev *scifdev = container_of(work, struct scif_dev,
+						peer_add_work);
+
+	scif_peer_add_device(scifdev);
+}
+
+/*
+ * Peer device registration is split into a device_initialize and a device_add.
+ * The reason for doing this is as follows: First, peer device registration
+ * itself cannot be done in the message processing thread and must be delegated
+ * to another workqueue, otherwise if SCIF client probe, called during peer
+ * device registration, calls scif_connect(..), it will block the message
+ * processing thread causing a deadlock. Next, device_initialize is done in the
+ * "top-half" message processing thread and device_add in the "bottom-half"
+ * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing
+ * concurrently with SCIF_INIT message processing is unable to get a reference
+ * on the peer device, thereby failing the connect request.
+ */
+void scif_peer_register_device(struct scif_dev *scifdev)
+{
+	int ret;
+
+	mutex_lock(&scifdev->lock);
+	ret = scif_peer_initialize_device(scifdev);
+	if (ret)
+		goto exit;
+	schedule_work(&scifdev->peer_add_work);
+exit:
+	mutex_unlock(&scifdev->lock);
+}
+
+int scif_peer_unregister_device(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+
+	mutex_lock(&scifdev->lock);
+	/* Flush work to ensure device register is complete */
+	flush_work(&scifdev->peer_add_work);
+
+	/*
+	 * Continue holding scifdev->lock since theoretically unregister_device
+	 * can be called simultaneously from multiple threads
+	 */
+	spdev = rcu_dereference(scifdev->spdev);
+	if (!spdev) {
+		mutex_unlock(&scifdev->lock);
+		return -ENODEV;
+	}
+
+	RCU_INIT_POINTER(scifdev->spdev, NULL);
+	synchronize_rcu();
+	mutex_unlock(&scifdev->lock);
+
+	dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode);
+	device_unregister(&spdev->dev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total--;
+	mutex_unlock(&scif_info.conflock);
+	return 0;
+}
+
+int scif_peer_bus_init(void)
+{
+	return bus_register(&scif_peer_bus);
+}
+
+void scif_peer_bus_exit(void)
+{
+	bus_unregister(&scif_peer_bus);
+}
diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h
new file mode 100644
index 000000000..a3b8dd2ed
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_peer_bus.h
@@ -0,0 +1,31 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ */
+#ifndef _SCIF_PEER_BUS_H_
+#define _SCIF_PEER_BUS_H_
+
+#include <linux/device.h>
+#include <linux/mic_common.h>
+#include <linux/scif.h>
+
+struct scif_dev;
+
+void scif_add_peer_device(struct work_struct *work);
+void scif_peer_register_device(struct scif_dev *sdev);
+int scif_peer_unregister_device(struct scif_dev *scifdev);
+int scif_peer_bus_init(void);
+void scif_peer_bus_exit(void);
+#endif /* _SCIF_PEER_BUS_H */
diff --git a/drivers/misc/mic/scif/scif_ports.c b/drivers/misc/mic/scif/scif_ports.c
new file mode 100644
index 000000000..594e18d27
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_ports.c
@@ -0,0 +1,124 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/idr.h>
+
+#include "scif_main.h"
+
+#define SCIF_PORT_COUNT	0x10000	/* Ports available */
+
+struct idr scif_ports;
+
+/*
+ * struct scif_port - SCIF port information
+ *
+ * @ref_cnt - Reference count since there can be multiple endpoints
+ *		created via scif_accept(..) simultaneously using a port.
+ */
+struct scif_port {
+	int ref_cnt;
+};
+
+/**
+ * __scif_get_port - Reserve a specified port # for SCIF and add it
+ * to the global list.
+ * @port : port # to be reserved.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
+ *		On memory allocation failure, returns -ENOMEM.
+ */
+static int __scif_get_port(int start, int end)
+{
+	int id;
+	struct scif_port *port = kzalloc(sizeof(*port), GFP_ATOMIC);
+
+	if (!port)
+		return -ENOMEM;
+	spin_lock(&scif_info.port_lock);
+	id = idr_alloc(&scif_ports, port, start, end, GFP_ATOMIC);
+	if (id >= 0)
+		port->ref_cnt++;
+	spin_unlock(&scif_info.port_lock);
+	return id;
+}
+
+/**
+ * scif_rsrv_port - Reserve a specified port # for SCIF.
+ * @port : port # to be reserved.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
+ *		On memory allocation failure, returns -ENOMEM.
+ */
+int scif_rsrv_port(u16 port)
+{
+	return __scif_get_port(port, port + 1);
+}
+
+/**
+ * scif_get_new_port - Get and reserve any port # for SCIF in the range
+ *			SCIF_PORT_RSVD + 1 to SCIF_PORT_COUNT - 1.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if no ports available.
+ *		On memory allocation failure, returns -ENOMEM.
+ */
+int scif_get_new_port(void)
+{
+	return __scif_get_port(SCIF_PORT_RSVD + 1, SCIF_PORT_COUNT);
+}
+
+/**
+ * scif_get_port - Increment the reference count for a SCIF port
+ * @id : SCIF port
+ *
+ * @return : None
+ */
+void scif_get_port(u16 id)
+{
+	struct scif_port *port;
+
+	if (!id)
+		return;
+	spin_lock(&scif_info.port_lock);
+	port = idr_find(&scif_ports, id);
+	if (port)
+		port->ref_cnt++;
+	spin_unlock(&scif_info.port_lock);
+}
+
+/**
+ * scif_put_port - Release a reserved SCIF port
+ * @id : SCIF port to be released.
+ *
+ * @return : None
+ */
+void scif_put_port(u16 id)
+{
+	struct scif_port *port;
+
+	if (!id)
+		return;
+	spin_lock(&scif_info.port_lock);
+	port = idr_find(&scif_ports, id);
+	if (port) {
+		port->ref_cnt--;
+		if (!port->ref_cnt) {
+			idr_remove(&scif_ports, id);
+			kfree(port);
+		}
+	}
+	spin_unlock(&scif_info.port_lock);
+}
diff --git a/drivers/misc/mic/scif/scif_rb.c b/drivers/misc/mic/scif/scif_rb.c
new file mode 100644
index 000000000..b665757ca
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rb.c
@@ -0,0 +1,249 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/circ_buf.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+
+#include "scif_rb.h"
+
+#define scif_rb_ring_cnt(head, tail, size) CIRC_CNT(head, tail, size)
+#define scif_rb_ring_space(head, tail, size) CIRC_SPACE(head, tail, size)
+
+/**
+ * scif_rb_init - Initializes the ring buffer
+ * @rb: ring buffer
+ * @read_ptr: A pointer to the read offset
+ * @write_ptr: A pointer to the write offset
+ * @rb_base: A pointer to the base of the ring buffer
+ * @size: The size of the ring buffer in powers of two
+ */
+void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
+		  void *rb_base, u8 size)
+{
+	rb->rb_base = rb_base;
+	rb->size = (1 << size);
+	rb->read_ptr = read_ptr;
+	rb->write_ptr = write_ptr;
+	rb->current_read_offset = *read_ptr;
+	rb->current_write_offset = *write_ptr;
+}
+
+/* Copies a message to the ring buffer -- handles the wrap around case */
+static void memcpy_torb(struct scif_rb *rb, void *header,
+			void *msg, u32 size)
+{
+	u32 size1, size2;
+
+	if (header + size >= rb->rb_base + rb->size) {
+		/* Need to call two copies if it wraps around */
+		size1 = (u32)(rb->rb_base + rb->size - header);
+		size2 = size - size1;
+		memcpy_toio((void __iomem __force *)header, msg, size1);
+		memcpy_toio((void __iomem __force *)rb->rb_base,
+			    msg + size1, size2);
+	} else {
+		memcpy_toio((void __iomem __force *)header, msg, size);
+	}
+}
+
+/* Copies a message from the ring buffer -- handles the wrap around case */
+static void memcpy_fromrb(struct scif_rb *rb, void *header,
+			  void *msg, u32 size)
+{
+	u32 size1, size2;
+
+	if (header + size >= rb->rb_base + rb->size) {
+		/* Need to call two copies if it wraps around */
+		size1 = (u32)(rb->rb_base + rb->size - header);
+		size2 = size - size1;
+		memcpy_fromio(msg, (void __iomem __force *)header, size1);
+		memcpy_fromio(msg + size1,
+			      (void __iomem __force *)rb->rb_base, size2);
+	} else {
+		memcpy_fromio(msg, (void __iomem __force *)header, size);
+	}
+}
+
+/**
+ * scif_rb_space - Query space available for writing to the RB
+ * @rb: ring buffer
+ *
+ * Return: size available for writing to RB in bytes.
+ */
+u32 scif_rb_space(struct scif_rb *rb)
+{
+	rb->current_read_offset = *rb->read_ptr;
+	/*
+	 * Update from the HW read pointer only once the peer has exposed the
+	 * new empty slot. This barrier is paired with the memory barrier
+	 * scif_rb_update_read_ptr()
+	 */
+	mb();
+	return scif_rb_ring_space(rb->current_write_offset,
+				  rb->current_read_offset, rb->size);
+}
+
+/**
+ * scif_rb_write - Write a message to the RB
+ * @rb: ring buffer
+ * @msg: buffer to send the message.  Must be at least size bytes long
+ * @size: the size (in bytes) to be copied to the RB
+ *
+ * This API does not block if there isn't enough space in the RB.
+ * Returns: 0 on success or -ENOMEM on failure
+ */
+int scif_rb_write(struct scif_rb *rb, void *msg, u32 size)
+{
+	void *header;
+
+	if (scif_rb_space(rb) < size)
+		return -ENOMEM;
+	header = rb->rb_base + rb->current_write_offset;
+	memcpy_torb(rb, header, msg, size);
+	/*
+	 * Wait until scif_rb_commit(). Update the local ring
+	 * buffer data, not the shared data until commit.
+	 */
+	rb->current_write_offset =
+		(rb->current_write_offset + size) & (rb->size - 1);
+	return 0;
+}
+
+/**
+ * scif_rb_commit - To submit the message to let the peer fetch it
+ * @rb: ring buffer
+ */
+void scif_rb_commit(struct scif_rb *rb)
+{
+	/*
+	 * We must ensure ordering between the all the data committed
+	 * previously before we expose the new message to the peer by
+	 * updating the write_ptr. This write barrier is paired with
+	 * the read barrier in scif_rb_count(..)
+	 */
+	wmb();
+	WRITE_ONCE(*rb->write_ptr, rb->current_write_offset);
+#ifdef CONFIG_INTEL_MIC_CARD
+	/*
+	 * X100 Si bug: For the case where a Core is performing an EXT_WR
+	 * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+	 * same address with the same data before it does the Doorbell Write.
+	 * This way, if ordering is violated for the Interrupt Message, it will
+	 * fall just behind the first Posted associated with the first EXT_WR.
+	 */
+	WRITE_ONCE(*rb->write_ptr, rb->current_write_offset);
+#endif
+}
+
+/**
+ * scif_rb_get - To get next message from the ring buffer
+ * @rb: ring buffer
+ * @size: Number of bytes to be read
+ *
+ * Return: NULL if no bytes to be read from the ring buffer, otherwise the
+ *	pointer to the next byte
+ */
+static void *scif_rb_get(struct scif_rb *rb, u32 size)
+{
+	void *header = NULL;
+
+	if (scif_rb_count(rb, size) >= size)
+		header = rb->rb_base + rb->current_read_offset;
+	return header;
+}
+
+/*
+ * scif_rb_get_next - Read from ring buffer.
+ * @rb: ring buffer
+ * @msg: buffer to hold the message.  Must be at least size bytes long
+ * @size: Number of bytes to be read
+ *
+ * Return: number of bytes read if available bytes are >= size, otherwise
+ * returns zero.
+ */
+u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size)
+{
+	void *header = NULL;
+	int read_size = 0;
+
+	header = scif_rb_get(rb, size);
+	if (header) {
+		u32 next_cmd_offset =
+			(rb->current_read_offset + size) & (rb->size - 1);
+
+		read_size = size;
+		rb->current_read_offset = next_cmd_offset;
+		memcpy_fromrb(rb, header, msg, size);
+	}
+	return read_size;
+}
+
+/**
+ * scif_rb_update_read_ptr
+ * @rb: ring buffer
+ */
+void scif_rb_update_read_ptr(struct scif_rb *rb)
+{
+	u32 new_offset;
+
+	new_offset = rb->current_read_offset;
+	/*
+	 * We must ensure ordering between the all the data committed or read
+	 * previously before we expose the empty slot to the peer by updating
+	 * the read_ptr. This barrier is paired with the memory barrier in
+	 * scif_rb_space(..)
+	 */
+	mb();
+	WRITE_ONCE(*rb->read_ptr, new_offset);
+#ifdef CONFIG_INTEL_MIC_CARD
+	/*
+	 * X100 Si Bug: For the case where a Core is performing an EXT_WR
+	 * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+	 * same address with the same data before it does the Doorbell Write.
+	 * This way, if ordering is violated for the Interrupt Message, it will
+	 * fall just behind the first Posted associated with the first EXT_WR.
+	 */
+	WRITE_ONCE(*rb->read_ptr, new_offset);
+#endif
+}
+
+/**
+ * scif_rb_count
+ * @rb: ring buffer
+ * @size: Number of bytes expected to be read
+ *
+ * Return: number of bytes that can be read from the RB
+ */
+u32 scif_rb_count(struct scif_rb *rb, u32 size)
+{
+	if (scif_rb_ring_cnt(rb->current_write_offset,
+			     rb->current_read_offset,
+			     rb->size) < size) {
+		rb->current_write_offset = *rb->write_ptr;
+		/*
+		 * Update from the HW write pointer if empty only once the peer
+		 * has exposed the new message. This read barrier is paired
+		 * with the write barrier in scif_rb_commit(..)
+		 */
+		smp_rmb();
+	}
+	return scif_rb_ring_cnt(rb->current_write_offset,
+				rb->current_read_offset,
+				rb->size);
+}
diff --git a/drivers/misc/mic/scif/scif_rb.h b/drivers/misc/mic/scif/scif_rb.h
new file mode 100644
index 000000000..166dffe30
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rb.h
@@ -0,0 +1,100 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ */
+#ifndef SCIF_RB_H
+#define SCIF_RB_H
+/*
+ * This file describes a general purpose, byte based ring buffer. Writers to the
+ * ring buffer need to synchronize using a lock. The same is true for readers,
+ * although in practice, the ring buffer has a single reader. It is lockless
+ * between producer and consumer so it can handle being used across the PCIe
+ * bus. The ring buffer ensures that there are no reads across the PCIe bus for
+ * performance reasons. Two of these are used to form a single bidirectional
+ * queue-pair across PCIe.
+ */
+/*
+ * struct scif_rb - SCIF Ring Buffer
+ *
+ * @rb_base: The base of the memory used for storing RB messages
+ * @read_ptr: Pointer to the read offset
+ * @write_ptr: Pointer to the write offset
+ * @size: Size of the memory in rb_base
+ * @current_read_offset: Cached read offset for performance
+ * @current_write_offset: Cached write offset for performance
+ */
+struct scif_rb {
+	void *rb_base;
+	u32 *read_ptr;
+	u32 *write_ptr;
+	u32 size;
+	u32 current_read_offset;
+	u32 current_write_offset;
+};
+
+/* methods used by both */
+void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
+		  void *rb_base, u8 size);
+/* writer only methods */
+/* write a new command, then scif_rb_commit() */
+int scif_rb_write(struct scif_rb *rb, void *msg, u32 size);
+/* after write(), then scif_rb_commit() */
+void scif_rb_commit(struct scif_rb *rb);
+/* query space available for writing to a RB. */
+u32 scif_rb_space(struct scif_rb *rb);
+
+/* reader only methods */
+/* read a new message from the ring buffer of size bytes */
+u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size);
+/* update the read pointer so that the space can be reused */
+void scif_rb_update_read_ptr(struct scif_rb *rb);
+/* count the number of bytes that can be read */
+u32 scif_rb_count(struct scif_rb *rb, u32 size);
+#endif
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
new file mode 100644
index 000000000..e1f59b177
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -0,0 +1,1775 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/dma_remapping.h>
+#include <linux/pagemap.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+
+#include "scif_main.h"
+#include "scif_map.h"
+
+/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
+#define SCIF_MAP_ULIMIT 0x40
+
+bool scif_ulimit_check = 1;
+
+/**
+ * scif_rma_ep_init:
+ * @ep: end point
+ *
+ * Initialize RMA per EP data structures.
+ */
+void scif_rma_ep_init(struct scif_endpt *ep)
+{
+	struct scif_endpt_rma_info *rma = &ep->rma_info;
+
+	mutex_init(&rma->rma_lock);
+	init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN);
+	spin_lock_init(&rma->tc_lock);
+	mutex_init(&rma->mmn_lock);
+	INIT_LIST_HEAD(&rma->reg_list);
+	INIT_LIST_HEAD(&rma->remote_reg_list);
+	atomic_set(&rma->tw_refcount, 0);
+	atomic_set(&rma->tcw_refcount, 0);
+	atomic_set(&rma->tcw_total_pages, 0);
+	atomic_set(&rma->fence_refcount, 0);
+
+	rma->async_list_del = 0;
+	rma->dma_chan = NULL;
+	INIT_LIST_HEAD(&rma->mmn_list);
+	INIT_LIST_HEAD(&rma->vma_list);
+	init_waitqueue_head(&rma->markwq);
+}
+
+/**
+ * scif_rma_ep_can_uninit:
+ * @ep: end point
+ *
+ * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
+ */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep)
+{
+	int ret = 0;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Destroy RMA Info only if both lists are empty */
+	if (list_empty(&ep->rma_info.reg_list) &&
+	    list_empty(&ep->rma_info.remote_reg_list) &&
+	    list_empty(&ep->rma_info.mmn_list) &&
+	    !atomic_read(&ep->rma_info.tw_refcount) &&
+	    !atomic_read(&ep->rma_info.tcw_refcount) &&
+	    !atomic_read(&ep->rma_info.fence_refcount))
+		ret = 1;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return ret;
+}
+
+/**
+ * scif_create_pinned_pages:
+ * @nr_pages: number of pages in window
+ * @prot: read/write protection
+ *
+ * Allocate and prepare a set of pinned pages.
+ */
+static struct scif_pinned_pages *
+scif_create_pinned_pages(int nr_pages, int prot)
+{
+	struct scif_pinned_pages *pin;
+
+	might_sleep();
+	pin = scif_zalloc(sizeof(*pin));
+	if (!pin)
+		goto error;
+
+	pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
+	if (!pin->pages)
+		goto error_free_pinned_pages;
+
+	pin->prot = prot;
+	pin->magic = SCIFEP_MAGIC;
+	return pin;
+
+error_free_pinned_pages:
+	scif_free(pin, sizeof(*pin));
+error:
+	return NULL;
+}
+
+/**
+ * scif_destroy_pinned_pages:
+ * @pin: A set of pinned pages.
+ *
+ * Deallocate resources for pinned pages.
+ */
+static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
+{
+	int j;
+	int writeable = pin->prot & SCIF_PROT_WRITE;
+	int kernel = SCIF_MAP_KERNEL & pin->map_flags;
+
+	for (j = 0; j < pin->nr_pages; j++) {
+		if (pin->pages[j] && !kernel) {
+			if (writeable)
+				SetPageDirty(pin->pages[j]);
+			put_page(pin->pages[j]);
+		}
+	}
+
+	scif_free(pin->pages,
+		  pin->nr_pages * sizeof(*pin->pages));
+	scif_free(pin, sizeof(*pin));
+	return 0;
+}
+
+/*
+ * scif_create_window:
+ * @ep: end point
+ * @nr_pages: number of pages
+ * @offset: registration offset
+ * @temp: true if a temporary window is being created
+ *
+ * Allocate and prepare a self registration window.
+ */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+				       s64 offset, bool temp)
+{
+	struct scif_window *window;
+
+	might_sleep();
+	window = scif_zalloc(sizeof(*window));
+	if (!window)
+		goto error;
+
+	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+	if (!window->dma_addr)
+		goto error_free_window;
+
+	window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
+	if (!window->num_pages)
+		goto error_free_window;
+
+	window->offset = offset;
+	window->ep = (u64)ep;
+	window->magic = SCIFEP_MAGIC;
+	window->reg_state = OP_IDLE;
+	init_waitqueue_head(&window->regwq);
+	window->unreg_state = OP_IDLE;
+	init_waitqueue_head(&window->unregwq);
+	INIT_LIST_HEAD(&window->list);
+	window->type = SCIF_WINDOW_SELF;
+	window->temp = temp;
+	return window;
+
+error_free_window:
+	scif_free(window->dma_addr,
+		  nr_pages * sizeof(*window->dma_addr));
+	scif_free(window, sizeof(*window));
+error:
+	return NULL;
+}
+
+/**
+ * scif_destroy_incomplete_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+static void scif_destroy_incomplete_window(struct scif_endpt *ep,
+					   struct scif_window *window)
+{
+	int err;
+	int nr_pages = window->nr_pages;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+	struct scifmsg msg;
+
+retry:
+	/* Wait for a SCIF_ALLOC_GNT/REJ message */
+	err = wait_event_timeout(alloc->allocwq,
+				 alloc->state != OP_IN_PROGRESS,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (alloc->state == OP_COMPLETED) {
+		msg.uop = SCIF_FREE_VIRT;
+		msg.src = ep->port;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = window->alloc_handle.vaddr;
+		msg.payload[2] = (u64)window;
+		msg.payload[3] = SCIF_REGISTER;
+		_scif_nodeqp_send(ep->remote_dev, &msg);
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	scif_free_window_offset(ep, window, window->offset);
+	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+	scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_unmap_window:
+ * @remote_dev: SCIF remote device
+ * @window: registration window
+ *
+ * Delete any DMA mappings created for a registered self window
+ */
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+	int j;
+
+	if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
+		if (window->st) {
+			dma_unmap_sg(&remote_dev->sdev->dev,
+				     window->st->sgl, window->st->nents,
+				     DMA_BIDIRECTIONAL);
+			sg_free_table(window->st);
+			kfree(window->st);
+			window->st = NULL;
+		}
+	} else {
+		for (j = 0; j < window->nr_contig_chunks; j++) {
+			if (window->dma_addr[j]) {
+				scif_unmap_single(window->dma_addr[j],
+						  remote_dev,
+						  window->num_pages[j] <<
+						  PAGE_SHIFT);
+				window->dma_addr[j] = 0x0;
+			}
+		}
+	}
+}
+
+static inline struct mm_struct *__scif_acquire_mm(void)
+{
+	if (scif_ulimit_check)
+		return get_task_mm(current);
+	return NULL;
+}
+
+static inline void __scif_release_mm(struct mm_struct *mm)
+{
+	if (mm)
+		mmput(mm);
+}
+
+static inline int
+__scif_dec_pinned_vm_lock(struct mm_struct *mm,
+			  int nr_pages, bool try_lock)
+{
+	if (!mm || !nr_pages || !scif_ulimit_check)
+		return 0;
+	if (try_lock) {
+		if (!down_write_trylock(&mm->mmap_sem)) {
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err\n", __func__, __LINE__);
+			return -1;
+		}
+	} else {
+		down_write(&mm->mmap_sem);
+	}
+	mm->pinned_vm -= nr_pages;
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+
+static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
+					     int nr_pages)
+{
+	unsigned long locked, lock_limit;
+
+	if (!mm || !nr_pages || !scif_ulimit_check)
+		return 0;
+
+	locked = nr_pages;
+	locked += mm->pinned_vm;
+	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+		dev_err(scif_info.mdev.this_device,
+			"locked(%lu) > lock_limit(%lu)\n",
+			locked, lock_limit);
+		return -ENOMEM;
+	}
+	mm->pinned_vm = locked;
+	return 0;
+}
+
+/**
+ * scif_destroy_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
+{
+	int j;
+	struct scif_pinned_pages *pinned_pages = window->pinned_pages;
+	int nr_pages = window->nr_pages;
+
+	might_sleep();
+	if (!window->temp && window->mm) {
+		__scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
+		__scif_release_mm(window->mm);
+		window->mm = NULL;
+	}
+
+	scif_free_window_offset(ep, window, window->offset);
+	scif_unmap_window(ep->remote_dev, window);
+	/*
+	 * Decrement references for this set of pinned pages from
+	 * this window.
+	 */
+	j = atomic_sub_return(1, &pinned_pages->ref_count);
+	if (j < 0)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d incorrect ref count %d\n",
+			__func__, __LINE__, j);
+	/*
+	 * If the ref count for pinned_pages is zero then someone
+	 * has already called scif_unpin_pages() for it and we should
+	 * destroy the page cache.
+	 */
+	if (!j)
+		scif_destroy_pinned_pages(window->pinned_pages);
+	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+	window->magic = 0;
+	scif_free(window, sizeof(*window));
+	return 0;
+}
+
+/**
+ * scif_create_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Allocate and prepare lookup entries for the remote
+ * end to copy over the physical addresses.
+ * Returns 0 on success and appropriate errno on failure.
+ */
+static int scif_create_remote_lookup(struct scif_dev *remote_dev,
+				     struct scif_window *window)
+{
+	int i, j, err = 0;
+	int nr_pages = window->nr_pages;
+	bool vmalloc_dma_phys, vmalloc_num_pages;
+
+	might_sleep();
+	/* Map window */
+	err = scif_map_single(&window->mapped_offset,
+			      window, remote_dev, sizeof(*window));
+	if (err)
+		goto error_window;
+
+	/* Compute the number of lookup entries. 21 == 2MB Shift */
+	window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
+					((2) * 1024 * 1024)) >> 21;
+
+	window->dma_addr_lookup.lookup =
+		scif_alloc_coherent(&window->dma_addr_lookup.offset,
+				    remote_dev, window->nr_lookup *
+				    sizeof(*window->dma_addr_lookup.lookup),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!window->dma_addr_lookup.lookup) {
+		err = -ENOMEM;
+		goto error_window;
+	}
+
+	window->num_pages_lookup.lookup =
+		scif_alloc_coherent(&window->num_pages_lookup.offset,
+				    remote_dev, window->nr_lookup *
+				    sizeof(*window->num_pages_lookup.lookup),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!window->num_pages_lookup.lookup) {
+		err = -ENOMEM;
+		goto error_window;
+	}
+
+	vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
+	vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
+
+	/* Now map each of the pages containing physical addresses */
+	for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
+		err = scif_map_page(&window->dma_addr_lookup.lookup[j],
+				    vmalloc_dma_phys ?
+				    vmalloc_to_page(&window->dma_addr[i]) :
+				    virt_to_page(&window->dma_addr[i]),
+				    remote_dev);
+		if (err)
+			goto error_window;
+		err = scif_map_page(&window->num_pages_lookup.lookup[j],
+				    vmalloc_num_pages ?
+				    vmalloc_to_page(&window->num_pages[i]) :
+				    virt_to_page(&window->num_pages[i]),
+				    remote_dev);
+		if (err)
+			goto error_window;
+	}
+	return 0;
+error_window:
+	return err;
+}
+
+/**
+ * scif_destroy_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Destroy lookup entries used for the remote
+ * end to copy over the physical addresses.
+ */
+static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
+				       struct scif_window *window)
+{
+	int i, j;
+
+	if (window->nr_lookup) {
+		struct scif_rma_lookup *lup = &window->dma_addr_lookup;
+		struct scif_rma_lookup *npup = &window->num_pages_lookup;
+
+		for (i = 0, j = 0; i < window->nr_pages;
+			i += SCIF_NR_ADDR_IN_PAGE, j++) {
+			if (lup->lookup && lup->lookup[j])
+				scif_unmap_single(lup->lookup[j],
+						  remote_dev,
+						  PAGE_SIZE);
+			if (npup->lookup && npup->lookup[j])
+				scif_unmap_single(npup->lookup[j],
+						  remote_dev,
+						  PAGE_SIZE);
+		}
+		if (lup->lookup)
+			scif_free_coherent(lup->lookup, lup->offset,
+					   remote_dev, window->nr_lookup *
+					   sizeof(*lup->lookup));
+		if (npup->lookup)
+			scif_free_coherent(npup->lookup, npup->offset,
+					   remote_dev, window->nr_lookup *
+					   sizeof(*npup->lookup));
+		if (window->mapped_offset)
+			scif_unmap_single(window->mapped_offset,
+					  remote_dev, sizeof(*window));
+		window->nr_lookup = 0;
+	}
+}
+
+/**
+ * scif_create_remote_window:
+ * @ep: end point
+ * @nr_pages: number of pages in window
+ *
+ * Allocate and prepare a remote registration window.
+ */
+static struct scif_window *
+scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
+{
+	struct scif_window *window;
+
+	might_sleep();
+	window = scif_zalloc(sizeof(*window));
+	if (!window)
+		goto error_ret;
+
+	window->magic = SCIFEP_MAGIC;
+	window->nr_pages = nr_pages;
+
+	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+	if (!window->dma_addr)
+		goto error_window;
+
+	window->num_pages = scif_zalloc(nr_pages *
+					sizeof(*window->num_pages));
+	if (!window->num_pages)
+		goto error_window;
+
+	if (scif_create_remote_lookup(scifdev, window))
+		goto error_window;
+
+	window->type = SCIF_WINDOW_PEER;
+	window->unreg_state = OP_IDLE;
+	INIT_LIST_HEAD(&window->list);
+	return window;
+error_window:
+	scif_destroy_remote_window(window);
+error_ret:
+	return NULL;
+}
+
+/**
+ * scif_destroy_remote_window:
+ * @ep: end point
+ * @window: remote registration window
+ *
+ * Deallocate resources for remote window.
+ */
+void
+scif_destroy_remote_window(struct scif_window *window)
+{
+	scif_free(window->dma_addr, window->nr_pages *
+		  sizeof(*window->dma_addr));
+	scif_free(window->num_pages, window->nr_pages *
+		  sizeof(*window->num_pages));
+	window->magic = 0;
+	scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_iommu_map: create DMA mappings if the IOMMU is enabled
+ * @remote_dev: SCIF remote device
+ * @window: remote registration window
+ *
+ * Map the physical pages using dma_map_sg(..) and then detect the number
+ * of contiguous DMA mappings allocated
+ */
+static int scif_iommu_map(struct scif_dev *remote_dev,
+			  struct scif_window *window)
+{
+	struct scatterlist *sg;
+	int i, err;
+	scif_pinned_pages_t pin = window->pinned_pages;
+
+	window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
+	if (!window->st)
+		return -ENOMEM;
+
+	err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
+	if (err)
+		return err;
+
+	for_each_sg(window->st->sgl, sg, window->st->nents, i)
+		sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
+
+	err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
+			 window->st->nents, DMA_BIDIRECTIONAL);
+	if (!err)
+		return -ENOMEM;
+	/* Detect contiguous ranges of DMA mappings */
+	sg = window->st->sgl;
+	for (i = 0; sg; i++) {
+		dma_addr_t last_da;
+
+		window->dma_addr[i] = sg_dma_address(sg);
+		window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
+		last_da = sg_dma_address(sg) + sg_dma_len(sg);
+		while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
+			window->num_pages[i] +=
+				(sg_dma_len(sg) >> PAGE_SHIFT);
+			last_da = window->dma_addr[i] +
+				sg_dma_len(sg);
+		}
+		window->nr_contig_chunks++;
+	}
+	return 0;
+}
+
+/**
+ * scif_map_window:
+ * @remote_dev: SCIF remote device
+ * @window: self registration window
+ *
+ * Map pages of a window into the aperture/PCI.
+ * Also determine addresses required for DMA.
+ */
+int
+scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+	int i, j, k, err = 0, nr_contig_pages;
+	scif_pinned_pages_t pin;
+	phys_addr_t phys_prev, phys_curr;
+
+	might_sleep();
+
+	pin = window->pinned_pages;
+
+	if (intel_iommu_enabled && !scifdev_self(remote_dev))
+		return scif_iommu_map(remote_dev, window);
+
+	for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
+		phys_prev = page_to_phys(pin->pages[i]);
+		nr_contig_pages = 1;
+
+		/* Detect physically contiguous chunks */
+		for (k = i + 1; k < window->nr_pages; k++) {
+			phys_curr = page_to_phys(pin->pages[k]);
+			if (phys_curr != (phys_prev + PAGE_SIZE))
+				break;
+			phys_prev = phys_curr;
+			nr_contig_pages++;
+		}
+		window->num_pages[j] = nr_contig_pages;
+		window->nr_contig_chunks++;
+		if (scif_is_mgmt_node()) {
+			/*
+			 * Management node has to deal with SMPT on X100 and
+			 * hence the DMA mapping is required
+			 */
+			err = scif_map_single(&window->dma_addr[j],
+					      phys_to_virt(page_to_phys(
+							   pin->pages[i])),
+					      remote_dev,
+					      nr_contig_pages << PAGE_SHIFT);
+			if (err)
+				return err;
+		} else {
+			window->dma_addr[j] = page_to_phys(pin->pages[i]);
+		}
+	}
+	return err;
+}
+
+/**
+ * scif_send_scif_unregister:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_UNREGISTER message.
+ */
+static int scif_send_scif_unregister(struct scif_endpt *ep,
+				     struct scif_window *window)
+{
+	struct scifmsg msg;
+
+	msg.uop = SCIF_UNREGISTER;
+	msg.src = ep->port;
+	msg.payload[0] = window->alloc_handle.vaddr;
+	msg.payload[1] = (u64)window;
+	return scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_unregister_window:
+ * @window: self registration window
+ *
+ * Send an unregistration request and wait for a response.
+ */
+int scif_unregister_window(struct scif_window *window)
+{
+	int err = 0;
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+	bool send_msg = false;
+
+	might_sleep();
+	switch (window->unreg_state) {
+	case OP_IDLE:
+	{
+		window->unreg_state = OP_IN_PROGRESS;
+		send_msg = true;
+		/* fall through */
+	}
+	case OP_IN_PROGRESS:
+	{
+		scif_get_window(window, 1);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		if (send_msg) {
+			err = scif_send_scif_unregister(ep, window);
+			if (err) {
+				window->unreg_state = OP_COMPLETED;
+				goto done;
+			}
+		} else {
+			/* Return ENXIO since unregistration is in progress */
+			mutex_lock(&ep->rma_info.rma_lock);
+			return -ENXIO;
+		}
+retry:
+		/* Wait for a SCIF_UNREGISTER_(N)ACK message */
+		err = wait_event_timeout(window->unregwq,
+					 window->unreg_state != OP_IN_PROGRESS,
+					 SCIF_NODE_ALIVE_TIMEOUT);
+		if (!err && scifdev_alive(ep))
+			goto retry;
+		if (!err) {
+			err = -ENODEV;
+			window->unreg_state = OP_COMPLETED;
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err %d\n", __func__, __LINE__, err);
+		}
+		if (err > 0)
+			err = 0;
+done:
+		mutex_lock(&ep->rma_info.rma_lock);
+		scif_put_window(window, 1);
+		break;
+	}
+	case OP_FAILED:
+	{
+		if (!scifdev_alive(ep)) {
+			err = -ENODEV;
+			window->unreg_state = OP_COMPLETED;
+		}
+		break;
+	}
+	case OP_COMPLETED:
+		break;
+	default:
+		err = -ENODEV;
+	}
+
+	if (window->unreg_state == OP_COMPLETED && window->ref_count)
+		scif_put_window(window, window->nr_pages);
+
+	if (!window->ref_count) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		list_del_init(&window->list);
+		scif_free_window_offset(ep, window, window->offset);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
+		    scifdev_alive(ep)) {
+			scif_drain_dma_intr(ep->remote_dev->sdev,
+					    ep->rma_info.dma_chan);
+		} else {
+			if (!__scif_dec_pinned_vm_lock(window->mm,
+						       window->nr_pages, 1)) {
+				__scif_release_mm(window->mm);
+				window->mm = NULL;
+			}
+		}
+		scif_queue_for_cleanup(window, &scif_info.rma);
+		mutex_lock(&ep->rma_info.rma_lock);
+	}
+	return err;
+}
+
+/**
+ * scif_send_alloc_request:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request
+ */
+static int scif_send_alloc_request(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	struct scifmsg msg;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+
+	/* Set up the Alloc Handle */
+	alloc->state = OP_IN_PROGRESS;
+	init_waitqueue_head(&alloc->allocwq);
+
+	/* Send out an allocation request */
+	msg.uop = SCIF_ALLOC_REQ;
+	msg.payload[1] = window->nr_pages;
+	msg.payload[2] = (u64)&window->alloc_handle;
+	return _scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_prep_remote_window:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request, wait for an allocation response,
+ * and prepares the remote window by copying over the page lists
+ */
+static int scif_prep_remote_window(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	struct scifmsg msg;
+	struct scif_window *remote_window;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+	dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
+	int i = 0, j = 0;
+	int nr_contig_chunks, loop_nr_contig_chunks;
+	int remaining_nr_contig_chunks, nr_lookup;
+	int err, map_err;
+
+	map_err = scif_map_window(ep->remote_dev, window);
+	if (map_err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d map_err %d\n", __func__, __LINE__, map_err);
+	remaining_nr_contig_chunks = window->nr_contig_chunks;
+	nr_contig_chunks = window->nr_contig_chunks;
+retry:
+	/* Wait for a SCIF_ALLOC_GNT/REJ message */
+	err = wait_event_timeout(alloc->allocwq,
+				 alloc->state != OP_IN_PROGRESS,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Synchronize with the thread waking up allocwq */
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+
+	if (!err)
+		err = -ENODEV;
+
+	if (err > 0)
+		err = 0;
+	else
+		return err;
+
+	/* Bail out. The remote end rejected this request */
+	if (alloc->state == OP_FAILED)
+		return -ENOMEM;
+
+	if (map_err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, map_err);
+		msg.uop = SCIF_FREE_VIRT;
+		msg.src = ep->port;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = window->alloc_handle.vaddr;
+		msg.payload[2] = (u64)window;
+		msg.payload[3] = SCIF_REGISTER;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		return err;
+	}
+
+	remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
+				     ep->remote_dev);
+
+	/* Compute the number of lookup entries. 21 == 2MB Shift */
+	nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
+			  >> ilog2(SCIF_NR_ADDR_IN_PAGE);
+
+	dma_phys_lookup =
+		scif_ioremap(remote_window->dma_addr_lookup.offset,
+			     nr_lookup *
+			     sizeof(*remote_window->dma_addr_lookup.lookup),
+			     ep->remote_dev);
+	num_pages_lookup =
+		scif_ioremap(remote_window->num_pages_lookup.offset,
+			     nr_lookup *
+			     sizeof(*remote_window->num_pages_lookup.lookup),
+			     ep->remote_dev);
+
+	while (remaining_nr_contig_chunks) {
+		loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
+					      (int)SCIF_NR_ADDR_IN_PAGE);
+		/* #1/2 - Copy  physical addresses over to the remote side */
+
+		/* #2/2 - Copy DMA addresses (addresses that are fed into the
+		 * DMA engine) We transfer bus addresses which are then
+		 * converted into a MIC physical address on the remote
+		 * side if it is a MIC, if the remote node is a mgmt node we
+		 * transfer the MIC physical address
+		 */
+		tmp = scif_ioremap(dma_phys_lookup[j],
+				   loop_nr_contig_chunks *
+				   sizeof(*window->dma_addr),
+				   ep->remote_dev);
+		tmp1 = scif_ioremap(num_pages_lookup[j],
+				    loop_nr_contig_chunks *
+				    sizeof(*window->num_pages),
+				    ep->remote_dev);
+		if (scif_is_mgmt_node()) {
+			memcpy_toio((void __force __iomem *)tmp,
+				    &window->dma_addr[i], loop_nr_contig_chunks
+				    * sizeof(*window->dma_addr));
+			memcpy_toio((void __force __iomem *)tmp1,
+				    &window->num_pages[i], loop_nr_contig_chunks
+				    * sizeof(*window->num_pages));
+		} else {
+			if (scifdev_is_p2p(ep->remote_dev)) {
+				/*
+				 * add remote node's base address for this node
+				 * to convert it into a MIC address
+				 */
+				int m;
+				dma_addr_t dma_addr;
+
+				for (m = 0; m < loop_nr_contig_chunks; m++) {
+					dma_addr = window->dma_addr[i + m] +
+						ep->remote_dev->base_addr;
+					writeq(dma_addr,
+					       (void __force __iomem *)&tmp[m]);
+				}
+				memcpy_toio((void __force __iomem *)tmp1,
+					    &window->num_pages[i],
+					    loop_nr_contig_chunks
+					    * sizeof(*window->num_pages));
+			} else {
+				/* Mgmt node or loopback - transfer DMA
+				 * addresses as is, this is the same as a
+				 * MIC physical address (we use the dma_addr
+				 * and not the phys_addr array since the
+				 * phys_addr is only setup if there is a mmap()
+				 * request from the mgmt node)
+				 */
+				memcpy_toio((void __force __iomem *)tmp,
+					    &window->dma_addr[i],
+					    loop_nr_contig_chunks *
+					    sizeof(*window->dma_addr));
+				memcpy_toio((void __force __iomem *)tmp1,
+					    &window->num_pages[i],
+					    loop_nr_contig_chunks *
+					    sizeof(*window->num_pages));
+			}
+		}
+		remaining_nr_contig_chunks -= loop_nr_contig_chunks;
+		i += loop_nr_contig_chunks;
+		j++;
+		scif_iounmap(tmp, loop_nr_contig_chunks *
+			     sizeof(*window->dma_addr), ep->remote_dev);
+		scif_iounmap(tmp1, loop_nr_contig_chunks *
+			     sizeof(*window->num_pages), ep->remote_dev);
+	}
+
+	/* Prepare the remote window for the peer */
+	remote_window->peer_window = (u64)window;
+	remote_window->offset = window->offset;
+	remote_window->prot = window->prot;
+	remote_window->nr_contig_chunks = nr_contig_chunks;
+	remote_window->ep = ep->remote_ep;
+	scif_iounmap(num_pages_lookup,
+		     nr_lookup *
+		     sizeof(*remote_window->num_pages_lookup.lookup),
+		     ep->remote_dev);
+	scif_iounmap(dma_phys_lookup,
+		     nr_lookup *
+		     sizeof(*remote_window->dma_addr_lookup.lookup),
+		     ep->remote_dev);
+	scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
+	window->peer_window = alloc->vaddr;
+	return err;
+}
+
+/**
+ * scif_send_scif_register:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_REGISTER message if EP is connected and wait for a
+ * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
+ * message so that the peer can free its remote window allocated earlier.
+ */
+static int scif_send_scif_register(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	int err = 0;
+	struct scifmsg msg;
+
+	msg.src = ep->port;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = window->alloc_handle.vaddr;
+	msg.payload[2] = (u64)window;
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED) {
+		msg.uop = SCIF_REGISTER;
+		window->reg_state = OP_IN_PROGRESS;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		spin_unlock(&ep->lock);
+		if (!err) {
+retry:
+			/* Wait for a SCIF_REGISTER_(N)ACK message */
+			err = wait_event_timeout(window->regwq,
+						 window->reg_state !=
+						 OP_IN_PROGRESS,
+						 SCIF_NODE_ALIVE_TIMEOUT);
+			if (!err && scifdev_alive(ep))
+				goto retry;
+			err = !err ? -ENODEV : 0;
+			if (window->reg_state == OP_FAILED)
+				err = -ENOTCONN;
+		}
+	} else {
+		msg.uop = SCIF_FREE_VIRT;
+		msg.payload[3] = SCIF_REGISTER;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		spin_unlock(&ep->lock);
+		if (!err)
+			err = -ENOTCONN;
+	}
+	return err;
+}
+
+/**
+ * scif_get_window_offset:
+ * @ep: end point descriptor
+ * @flags: flags
+ * @offset: offset hint
+ * @num_pages: number of pages
+ * @out_offset: computed offset returned by reference.
+ *
+ * Compute/Claim a new offset for this EP.
+ */
+int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
+			   int num_pages, s64 *out_offset)
+{
+	s64 page_index;
+	struct iova *iova_ptr;
+	int err = 0;
+
+	if (flags & SCIF_MAP_FIXED) {
+		page_index = SCIF_IOVA_PFN(offset);
+		iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
+					page_index + num_pages - 1);
+		if (!iova_ptr)
+			err = -EADDRINUSE;
+	} else {
+		iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
+				      SCIF_DMA_63BIT_PFN - 1, 0);
+		if (!iova_ptr)
+			err = -ENOMEM;
+	}
+	if (!err)
+		*out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
+	return err;
+}
+
+/**
+ * scif_free_window_offset:
+ * @ep: end point descriptor
+ * @window: registration window
+ * @offset: Offset to be freed
+ *
+ * Free offset for this EP. The callee is supposed to grab
+ * the RMA mutex before calling this API.
+ */
+void scif_free_window_offset(struct scif_endpt *ep,
+			     struct scif_window *window, s64 offset)
+{
+	if ((window && !window->offset_freed) || !window) {
+		free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
+		if (window)
+			window->offset_freed = true;
+	}
+}
+
+/**
+ * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is requesting a memory allocation.
+ */
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	int err;
+	struct scif_window *window = NULL;
+	int nr_pages = msg->payload[1];
+
+	window = scif_create_remote_window(scifdev, nr_pages);
+	if (!window) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* The peer's allocation request is granted */
+	msg->uop = SCIF_ALLOC_GNT;
+	msg->payload[0] = (u64)window;
+	msg->payload[1] = window->mapped_offset;
+	err = scif_nodeqp_send(scifdev, msg);
+	if (err)
+		scif_destroy_remote_window(window);
+	return;
+error:
+	/* The peer's allocation request is rejected */
+	dev_err(&scifdev->sdev->dev,
+		"%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
+		__func__, __LINE__, err, window, nr_pages);
+	msg->uop = SCIF_ALLOC_REJ;
+	scif_nodeqp_send(scifdev, msg);
+}
+
+/**
+ * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side responded to a memory allocation.
+ */
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
+	struct scif_window *window = container_of(handle, struct scif_window,
+						  alloc_handle);
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	handle->vaddr = msg->payload[0];
+	handle->phys_addr = msg->payload[1];
+	if (msg->uop == SCIF_ALLOC_GNT)
+		handle->state = OP_COMPLETED;
+	else
+		handle->state = OP_FAILED;
+	wake_up(&handle->allocwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
+ * @msg:        Interrupt message
+ *
+ * Free up memory kmalloc'd earlier.
+ */
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window = (struct scif_window *)msg->payload[1];
+
+	scif_destroy_remote_window(window);
+}
+
+static void
+scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
+{
+	int j;
+	struct scif_hw_dev *sdev = dev->sdev;
+	phys_addr_t apt_base = 0;
+
+	/*
+	 * Add the aperture base if the DMA address is not card relative
+	 * since the DMA addresses need to be an offset into the bar
+	 */
+	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+	    sdev->aper && !sdev->card_rel_da)
+		apt_base = sdev->aper->pa;
+	else
+		return;
+
+	for (j = 0; j < window->nr_contig_chunks; j++) {
+		if (window->num_pages[j])
+			window->dma_addr[j] += apt_base;
+		else
+			break;
+	}
+}
+
+/**
+ * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
+ * @msg:        Interrupt message
+ *
+ * Update remote window list with a new registered window.
+ */
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED) {
+		msg->uop = SCIF_REGISTER_ACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+		scif_fixup_aper_base(ep->remote_dev, window);
+		/* No further failures expected. Insert new window */
+		scif_insert_window(window, &ep->rma_info.remote_reg_list);
+	} else {
+		msg->uop = SCIF_REGISTER_NACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+	}
+	spin_unlock(&ep->lock);
+	mutex_unlock(&ep->rma_info.rma_lock);
+	/* free up any lookup resources now that page lists are transferred */
+	scif_destroy_remote_lookup(ep->remote_dev, window);
+	/*
+	 * We could not insert the window but we need to
+	 * destroy the window.
+	 */
+	if (msg->uop == SCIF_REGISTER_NACK)
+		scif_destroy_remote_window(window);
+}
+
+/**
+ * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remove window from remote registration list;
+ */
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_window *recv_window =
+		(struct scif_window *)msg->payload[0];
+	struct scif_endpt *ep;
+	int del_window = 0;
+
+	ep = (struct scif_endpt *)recv_window->ep;
+	req.out_window = &window;
+	req.offset = recv_window->offset;
+	req.prot = 0;
+	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.remote_reg_list;
+	msg->payload[0] = ep->remote_ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if (scif_query_window(&req)) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d -ENXIO\n", __func__, __LINE__);
+		msg->uop = SCIF_UNREGISTER_ACK;
+		goto error;
+	}
+	if (window) {
+		if (window->ref_count)
+			scif_put_window(window, window->nr_pages);
+		else
+			dev_err(&scifdev->sdev->dev,
+				"%s %d ref count should be +ve\n",
+				__func__, __LINE__);
+		window->unreg_state = OP_COMPLETED;
+		if (!window->ref_count) {
+			msg->uop = SCIF_UNREGISTER_ACK;
+			atomic_inc(&ep->rma_info.tw_refcount);
+			ep->rma_info.async_list_del = 1;
+			list_del_init(&window->list);
+			del_window = 1;
+		} else {
+			/* NACK! There are valid references to this window */
+			msg->uop = SCIF_UNREGISTER_NACK;
+		}
+	} else {
+		/* The window did not make its way to the list at all. ACK */
+		msg->uop = SCIF_UNREGISTER_ACK;
+		scif_destroy_remote_window(recv_window);
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (del_window)
+		scif_drain_dma_intr(ep->remote_dev->sdev,
+				    ep->rma_info.dma_chan);
+	scif_nodeqp_send(ep->remote_dev, msg);
+	if (del_window)
+		scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/**
+ * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to complete registration.
+ */
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[2];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->reg_state = OP_COMPLETED;
+	wake_up(&window->regwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to inform it that registration
+ * cannot be completed.
+ */
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[2];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->reg_state = OP_FAILED;
+	wake_up(&window->regwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to complete unregistration.
+ */
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->unreg_state = OP_COMPLETED;
+	wake_up(&window->unregwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to inform it that unregistration
+ * cannot be completed immediately.
+ */
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->unreg_state = OP_FAILED;
+	wake_up(&window->unregwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+		     int map_flags, scif_pinned_pages_t *pages)
+{
+	struct scif_pinned_pages *pinned_pages;
+	int nr_pages, err = 0, i;
+	bool vmalloc_addr = false;
+	bool try_upgrade = false;
+	int prot = *out_prot;
+	int ulimit = 0;
+	struct mm_struct *mm = NULL;
+
+	/* Unsupported flags */
+	if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
+		return -EINVAL;
+	ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
+
+	/* Unsupported protection requested */
+	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+		return -EINVAL;
+
+	/* addr/len must be page aligned. len should be non zero */
+	if (!len ||
+	    (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+		return -EINVAL;
+
+	might_sleep();
+
+	nr_pages = len >> PAGE_SHIFT;
+
+	/* Allocate a set of pinned pages */
+	pinned_pages = scif_create_pinned_pages(nr_pages, prot);
+	if (!pinned_pages)
+		return -ENOMEM;
+
+	if (map_flags & SCIF_MAP_KERNEL) {
+		if (is_vmalloc_addr(addr))
+			vmalloc_addr = true;
+
+		for (i = 0; i < nr_pages; i++) {
+			if (vmalloc_addr)
+				pinned_pages->pages[i] =
+					vmalloc_to_page(addr + (i * PAGE_SIZE));
+			else
+				pinned_pages->pages[i] =
+					virt_to_page(addr + (i * PAGE_SIZE));
+		}
+		pinned_pages->nr_pages = nr_pages;
+		pinned_pages->map_flags = SCIF_MAP_KERNEL;
+	} else {
+		/*
+		 * SCIF supports registration caching. If a registration has
+		 * been requested with read only permissions, then we try
+		 * to pin the pages with RW permissions so that a subsequent
+		 * transfer with RW permission can hit the cache instead of
+		 * invalidating it. If the upgrade fails with RW then we
+		 * revert back to R permission and retry
+		 */
+		if (prot == SCIF_PROT_READ)
+			try_upgrade = true;
+		prot |= SCIF_PROT_WRITE;
+retry:
+		mm = current->mm;
+		down_write(&mm->mmap_sem);
+		if (ulimit) {
+			err = __scif_check_inc_pinned_vm(mm, nr_pages);
+			if (err) {
+				up_write(&mm->mmap_sem);
+				pinned_pages->nr_pages = 0;
+				goto error_unmap;
+			}
+		}
+
+		pinned_pages->nr_pages = get_user_pages(
+				(u64)addr,
+				nr_pages,
+				(prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
+				pinned_pages->pages,
+				NULL);
+		up_write(&mm->mmap_sem);
+		if (nr_pages != pinned_pages->nr_pages) {
+			if (pinned_pages->nr_pages < 0)
+				pinned_pages->nr_pages = 0;
+			if (try_upgrade) {
+				if (ulimit)
+					__scif_dec_pinned_vm_lock(mm,
+								  nr_pages, 0);
+				/* Roll back any pinned pages */
+				for (i = 0; i < pinned_pages->nr_pages; i++) {
+					if (pinned_pages->pages[i])
+						put_page(
+						pinned_pages->pages[i]);
+				}
+				prot &= ~SCIF_PROT_WRITE;
+				try_upgrade = false;
+				goto retry;
+			}
+		}
+		pinned_pages->map_flags = 0;
+	}
+
+	if (pinned_pages->nr_pages < nr_pages) {
+		err = -EFAULT;
+		goto dec_pinned;
+	}
+
+	*out_prot = prot;
+	atomic_set(&pinned_pages->ref_count, 1);
+	*pages = pinned_pages;
+	return err;
+dec_pinned:
+	if (ulimit)
+		__scif_dec_pinned_vm_lock(mm, nr_pages, 0);
+	/* Something went wrong! Rollback */
+error_unmap:
+	scif_destroy_pinned_pages(pinned_pages);
+	*pages = NULL;
+	dev_dbg(scif_info.mdev.this_device,
+		"%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
+	return err;
+}
+
+int scif_pin_pages(void *addr, size_t len, int prot,
+		   int map_flags, scif_pinned_pages_t *pages)
+{
+	return __scif_pin_pages(addr, len, &prot, map_flags, pages);
+}
+EXPORT_SYMBOL_GPL(scif_pin_pages);
+
+int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
+{
+	int err = 0, ret;
+
+	if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
+		return -EINVAL;
+
+	ret = atomic_sub_return(1, &pinned_pages->ref_count);
+	if (ret < 0) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d scif_unpin_pages called without pinning? rc %d\n",
+			__func__, __LINE__, ret);
+		return -EINVAL;
+	}
+	/*
+	 * Destroy the window if the ref count for this set of pinned
+	 * pages has dropped to zero. If it is positive then there is
+	 * a valid registered window which is backed by these pages and
+	 * it will be destroyed once all such windows are unregistered.
+	 */
+	if (!ret)
+		err = scif_destroy_pinned_pages(pinned_pages);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_unpin_pages);
+
+static inline void
+scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
+{
+	mutex_lock(&ep->rma_info.rma_lock);
+	scif_insert_window(window, &ep->rma_info.reg_list);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+off_t scif_register_pinned_pages(scif_epd_t epd,
+				 scif_pinned_pages_t pinned_pages,
+				 off_t offset, int map_flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 computed_offset;
+	struct scif_window *window;
+	int err;
+	size_t len;
+	struct device *spdev;
+
+	/* Unsupported flags */
+	if (map_flags & ~SCIF_MAP_FIXED)
+		return -EINVAL;
+
+	len = pinned_pages->nr_pages << PAGE_SHIFT;
+
+	/*
+	 * Offset is not page aligned/negative or offset+len
+	 * wraps around with SCIF_MAP_FIXED.
+	 */
+	if ((map_flags & SCIF_MAP_FIXED) &&
+	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset < 0) ||
+	    (len > LONG_MAX - offset)))
+		return -EINVAL;
+
+	might_sleep();
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+	/*
+	 * It is an error to pass pinned_pages to scif_register_pinned_pages()
+	 * after calling scif_unpin_pages().
+	 */
+	if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
+		return -EINVAL;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, map_flags, offset,
+				     len, &computed_offset);
+	if (err) {
+		atomic_sub(1, &pinned_pages->ref_count);
+		return err;
+	}
+
+	/* Allocate and prepare self registration window */
+	window = scif_create_window(ep, pinned_pages->nr_pages,
+				    computed_offset, false);
+	if (!window) {
+		atomic_sub(1, &pinned_pages->ref_count);
+		scif_free_window_offset(ep, NULL, computed_offset);
+		return -ENOMEM;
+	}
+
+	window->pinned_pages = pinned_pages;
+	window->nr_pages = pinned_pages->nr_pages;
+	window->prot = pinned_pages->prot;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		scif_destroy_window(ep, window);
+		return err;
+	}
+	err = scif_send_alloc_request(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Prepare the remote registration window */
+	err = scif_prep_remote_window(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Tell the peer about the new window */
+	err = scif_send_scif_register(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	scif_put_peer_dev(spdev);
+	/* No further failures expected. Insert new window */
+	scif_insert_local_window(window, ep);
+	return computed_offset;
+error_unmap:
+	scif_destroy_window(ep, window);
+	scif_put_peer_dev(spdev);
+	dev_err(&ep->remote_dev->sdev->dev,
+		"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
+
+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+		    int prot, int map_flags)
+{
+	scif_pinned_pages_t pinned_pages;
+	off_t err;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 computed_offset;
+	struct scif_window *window;
+	struct mm_struct *mm = NULL;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
+		epd, addr, len, offset, prot, map_flags);
+	/* Unsupported flags */
+	if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
+		return -EINVAL;
+
+	/*
+	 * Offset is not page aligned/negative or offset+len
+	 * wraps around with SCIF_MAP_FIXED.
+	 */
+	if ((map_flags & SCIF_MAP_FIXED) &&
+	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset < 0) ||
+	    (len > LONG_MAX - offset)))
+		return -EINVAL;
+
+	/* Unsupported protection requested */
+	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+		return -EINVAL;
+
+	/* addr/len must be page aligned. len should be non zero */
+	if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+	    (ALIGN(len, PAGE_SIZE) != len))
+		return -EINVAL;
+
+	might_sleep();
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, map_flags, offset,
+				     len >> PAGE_SHIFT, &computed_offset);
+	if (err)
+		return err;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		scif_free_window_offset(ep, NULL, computed_offset);
+		return err;
+	}
+	/* Allocate and prepare self registration window */
+	window = scif_create_window(ep, len >> PAGE_SHIFT,
+				    computed_offset, false);
+	if (!window) {
+		scif_free_window_offset(ep, NULL, computed_offset);
+		scif_put_peer_dev(spdev);
+		return -ENOMEM;
+	}
+
+	window->nr_pages = len >> PAGE_SHIFT;
+
+	err = scif_send_alloc_request(ep, window);
+	if (err) {
+		scif_destroy_incomplete_window(ep, window);
+		scif_put_peer_dev(spdev);
+		return err;
+	}
+
+	if (!(map_flags & SCIF_MAP_KERNEL)) {
+		mm = __scif_acquire_mm();
+		map_flags |= SCIF_MAP_ULIMIT;
+	}
+	/* Pin down the pages */
+	err = __scif_pin_pages(addr, len, &prot,
+			       map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
+			       &pinned_pages);
+	if (err) {
+		scif_destroy_incomplete_window(ep, window);
+		__scif_release_mm(mm);
+		goto error;
+	}
+
+	window->pinned_pages = pinned_pages;
+	window->prot = pinned_pages->prot;
+	window->mm = mm;
+
+	/* Prepare the remote registration window */
+	err = scif_prep_remote_window(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %ld\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Tell the peer about the new window */
+	err = scif_send_scif_register(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %ld\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	scif_put_peer_dev(spdev);
+	/* No further failures expected. Insert new window */
+	scif_insert_local_window(window, ep);
+	dev_dbg(&ep->remote_dev->sdev->dev,
+		"SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
+		epd, addr, len, computed_offset);
+	return computed_offset;
+error_unmap:
+	scif_destroy_window(ep, window);
+error:
+	scif_put_peer_dev(spdev);
+	dev_err(&ep->remote_dev->sdev->dev,
+		"%s %d err %ld\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_register);
+
+int
+scif_unregister(scif_epd_t epd, off_t offset, size_t len)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_window *window = NULL;
+	struct scif_rma_req req;
+	int nr_pages, err;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
+		ep, offset, len);
+	/* len must be page aligned. len should be non zero */
+	if (!len ||
+	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+		return -EINVAL;
+
+	/* Offset is not page aligned or offset+len wraps around */
+	if ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset < 0) ||
+	    (len > LONG_MAX - offset))
+		return -EINVAL;
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	might_sleep();
+	nr_pages = len >> PAGE_SHIFT;
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.prot = 0;
+	req.nr_bytes = len;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.reg_list;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+	/* Unregister all the windows in this range */
+	err = scif_rma_list_unregister(window, offset, nr_pages);
+	if (err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	scif_put_peer_dev(spdev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_unregister);
diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h
new file mode 100644
index 000000000..fa6722279
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.h
@@ -0,0 +1,464 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_H
+#define SCIF_RMA_H
+
+#include <linux/dma_remapping.h>
+#include <linux/mmu_notifier.h>
+
+#include "../bus/scif_bus.h"
+
+/* If this bit is set then the mark is a remote fence mark */
+#define SCIF_REMOTE_FENCE_BIT          31
+/* Magic value used to indicate a remote fence request */
+#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT)
+
+#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL)
+#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \
+				      (L1_CACHE_BYTES << 1))
+
+#define SCIF_IOVA_START_PFN		(1)
+#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64))
+#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63))
+
+/*
+ * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information
+ *
+ * @reg_list: List of registration windows for self
+ * @remote_reg_list: List of registration windows for peer
+ * @iovad: Offset generator
+ * @rma_lock: Synchronizes access to self/remote list and also protects the
+ *	      window from being destroyed while RMAs are in progress.
+ * @tc_lock: Synchronizes access to temporary cached windows list
+ *	     for SCIF Registration Caching.
+ * @mmn_lock: Synchronizes access to the list of MMU notifiers registered
+ * @tw_refcount: Keeps track of number of outstanding temporary registered
+ *		 windows created by scif_vreadfrom/scif_vwriteto which have
+ *		 not been destroyed.
+ * @tcw_refcount: Same as tw_refcount but for temporary cached windows
+ * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned
+ * @mmn_list: MMU notifier so that we can destroy the windows when required
+ * @fence_refcount: Keeps track of number of outstanding remote fence
+ *		    requests which have been received by the peer.
+ * @dma_chan: DMA channel used for all DMA transfers for this endpoint.
+ * @async_list_del: Detect asynchronous list entry deletion
+ * @vma_list: List of vmas with remote memory mappings
+ * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait
+*/
+struct scif_endpt_rma_info {
+	struct list_head reg_list;
+	struct list_head remote_reg_list;
+	struct iova_domain iovad;
+	struct mutex rma_lock;
+	spinlock_t tc_lock;
+	struct mutex mmn_lock;
+	atomic_t tw_refcount;
+	atomic_t tcw_refcount;
+	atomic_t tcw_total_pages;
+	struct list_head mmn_list;
+	atomic_t fence_refcount;
+	struct dma_chan	*dma_chan;
+	int async_list_del;
+	struct list_head vma_list;
+	wait_queue_head_t markwq;
+};
+
+/*
+ * struct scif_fence_info - used for tracking fence requests
+ *
+ * @state: State of this transfer
+ * @wq: Fences wait on this queue
+ * @dma_mark: Used for storing the DMA mark
+ */
+struct scif_fence_info {
+	enum scif_msg_state state;
+	struct completion comp;
+	int dma_mark;
+};
+
+/*
+ * struct scif_remote_fence_info - used for tracking remote fence requests
+ *
+ * @msg: List of SCIF node QP fence messages
+ * @list: Link to list of remote fence requests
+ */
+struct scif_remote_fence_info {
+	struct scifmsg msg;
+	struct list_head list;
+};
+
+/*
+ * Specifies whether an RMA operation can span across partial windows, a single
+ * window or multiple contiguous windows. Mmaps can span across partial windows.
+ * Unregistration can span across complete windows. scif_get_pages() can span a
+ * single window. A window can also be of type self or peer.
+ */
+enum scif_window_type {
+	SCIF_WINDOW_PARTIAL,
+	SCIF_WINDOW_SINGLE,
+	SCIF_WINDOW_FULL,
+	SCIF_WINDOW_SELF,
+	SCIF_WINDOW_PEER
+};
+
+/* The number of physical addresses that can be stored in a PAGE. */
+#define SCIF_NR_ADDR_IN_PAGE   (0x1000 >> 3)
+
+/*
+ * struct scif_rma_lookup - RMA lookup data structure for page list transfers
+ *
+ * Store an array of lookup offsets. Each offset in this array maps
+ * one 4K page containing 512 physical addresses i.e. 2MB. 512 such
+ * offsets in a 4K page will correspond to 1GB of registered address space.
+
+ * @lookup: Array of offsets
+ * @offset: DMA offset of lookup array
+ */
+struct scif_rma_lookup {
+	dma_addr_t *lookup;
+	dma_addr_t offset;
+};
+
+/*
+ * struct scif_pinned_pages - A set of pinned pages obtained with
+ * scif_pin_pages() which could be part of multiple registered
+ * windows across different end points.
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @prot: read/write protections
+ * @map_flags: Flags specified during the pin operation
+ * @ref_count: Reference count bumped in terms of number of pages
+ * @magic: A magic value
+ * @pages: Array of pointers to struct pages populated with get_user_pages(..)
+ */
+struct scif_pinned_pages {
+	s64 nr_pages;
+	int prot;
+	int map_flags;
+	atomic_t ref_count;
+	u64 magic;
+	struct page **pages;
+};
+
+/*
+ * struct scif_status - Stores DMA status update information
+ *
+ * @src_dma_addr: Source buffer DMA address
+ * @val: src location for value to be written to the destination
+ * @ep: SCIF endpoint
+ */
+struct scif_status {
+	dma_addr_t src_dma_addr;
+	u64 val;
+	struct scif_endpt *ep;
+};
+
+/*
+ * struct scif_window - Registration Window for Self and Remote
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @nr_contig_chunks: Number of contiguous physical chunks
+ * @prot: read/write protections
+ * @ref_count: reference count in terms of number of pages
+ * @magic: Cookie to detect corruption
+ * @offset: registered offset
+ * @va_for_temp: va address that this window represents
+ * @dma_mark: Used to determine if all DMAs against the window are done
+ * @ep: Pointer to EP. Useful for passing EP around with messages to
+	avoid expensive list traversals.
+ * @list: link to list of windows for the endpoint
+ * @type: self or peer window
+ * @peer_window: Pointer to peer window. Useful for sending messages to peer
+ *		 without requiring an extra list traversal
+ * @unreg_state: unregistration state
+ * @offset_freed: True if the offset has been freed
+ * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @st: scatter gather table for DMA mappings with IOMMU enabled
+ * @pinned_pages: The set of pinned_pages backing this window
+ * @alloc_handle: Handle for sending ALLOC_REQ
+ * @regwq: Wait Queue for an registration (N)ACK
+ * @reg_state: Registration state
+ * @unregwq: Wait Queue for an unregistration (N)ACK
+ * @dma_addr_lookup: Lookup for physical addresses used for DMA
+ * @nr_lookup: Number of entries in lookup
+ * @mapped_offset: Offset used to map the window by the peer
+ * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA
+ * @num_pages: Array specifying number of pages for each physical address
+ */
+struct scif_window {
+	s64 nr_pages;
+	int nr_contig_chunks;
+	int prot;
+	int ref_count;
+	u64 magic;
+	s64 offset;
+	unsigned long va_for_temp;
+	int dma_mark;
+	u64 ep;
+	struct list_head list;
+	enum scif_window_type type;
+	u64 peer_window;
+	enum scif_msg_state unreg_state;
+	bool offset_freed;
+	bool temp;
+	struct mm_struct *mm;
+	struct sg_table *st;
+	union {
+		struct {
+			struct scif_pinned_pages *pinned_pages;
+			struct scif_allocmsg alloc_handle;
+			wait_queue_head_t regwq;
+			enum scif_msg_state reg_state;
+			wait_queue_head_t unregwq;
+		};
+		struct {
+			struct scif_rma_lookup dma_addr_lookup;
+			struct scif_rma_lookup num_pages_lookup;
+			int nr_lookup;
+			dma_addr_t mapped_offset;
+		};
+	};
+	dma_addr_t *dma_addr;
+	u64 *num_pages;
+} __packed;
+
+/*
+ * scif_mmu_notif - SCIF mmu notifier information
+ *
+ * @mmu_notifier ep_mmu_notifier: MMU notifier operations
+ * @tc_reg_list: List of temp registration windows for self
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @ep: SCIF endpoint
+ * @list: link to list of MMU notifier information
+ */
+struct scif_mmu_notif {
+#ifdef CONFIG_MMU_NOTIFIER
+	struct mmu_notifier ep_mmu_notifier;
+#endif
+	struct list_head tc_reg_list;
+	struct mm_struct *mm;
+	struct scif_endpt *ep;
+	struct list_head list;
+};
+
+enum scif_rma_dir {
+	SCIF_LOCAL_TO_REMOTE,
+	SCIF_REMOTE_TO_LOCAL
+};
+
+extern struct kmem_cache *unaligned_cache;
+/* Initialize RMA for this EP */
+void scif_rma_ep_init(struct scif_endpt *ep);
+/* Check if epd can be uninitialized */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep);
+/* Obtain a new offset. Callee must grab RMA lock */
+int scif_get_window_offset(struct scif_endpt *ep, int flags,
+			   s64 offset, int nr_pages, s64 *out_offset);
+/* Free offset. Callee must grab RMA lock */
+void scif_free_window_offset(struct scif_endpt *ep,
+			     struct scif_window *window, s64 offset);
+/* Create self registration window */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+				       s64 offset, bool temp);
+/* Destroy self registration window.*/
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window);
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window);
+/* Map pages of self window to Aperture/PCI */
+int scif_map_window(struct scif_dev *remote_dev,
+		    struct scif_window *window);
+/* Unregister a self window */
+int scif_unregister_window(struct scif_window *window);
+/* Destroy remote registration window */
+void
+scif_destroy_remote_window(struct scif_window *window);
+/* remove valid remote memory mappings from process address space */
+void scif_zap_mmaps(int node);
+/* Query if any applications have remote memory mappings */
+bool scif_rma_do_apps_have_mmaps(int node);
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node);
+/* Reserve a DMA channel for a particular endpoint */
+int scif_reserve_dma_chan(struct scif_endpt *ep);
+/* Setup a DMA mark for an endpoint */
+int _scif_fence_mark(scif_epd_t epd, int *mark);
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+		     enum scif_window_type type);
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_mmu_notif_handler(struct work_struct *work);
+void scif_rma_handle_remote_fences(void);
+void scif_rma_destroy_windows(void);
+void scif_rma_destroy_tcw_invalid(void);
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan);
+
+struct scif_window_iter {
+	s64 offset;
+	int index;
+};
+
+static inline void
+scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter)
+{
+	iter->offset = window->offset;
+	iter->index = 0;
+}
+
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+				size_t *nr_bytes,
+				struct scif_window_iter *iter);
+static inline
+dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off)
+{
+	return scif_off_to_dma_addr(window, off, NULL, NULL);
+}
+
+static inline bool scif_unaligned(off_t src_offset, off_t dst_offset)
+{
+	src_offset = src_offset & (L1_CACHE_BYTES - 1);
+	dst_offset = dst_offset & (L1_CACHE_BYTES - 1);
+	return !(src_offset == dst_offset);
+}
+
+/*
+ * scif_zalloc:
+ * @size: Size of the allocation request.
+ *
+ * Helper API which attempts to allocate zeroed pages via
+ * __get_free_pages(..) first and then falls back on
+ * vzalloc(..) if that fails.
+ */
+static inline void *scif_zalloc(size_t size)
+{
+	void *ret = NULL;
+	size_t align = ALIGN(size, PAGE_SIZE);
+
+	if (align && get_order(align) < MAX_ORDER)
+		ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					       get_order(align));
+	return ret ? ret : vzalloc(align);
+}
+
+/*
+ * scif_free:
+ * @addr: Address to be freed.
+ * @size: Size of the allocation.
+ * Helper API which frees memory allocated via scif_zalloc().
+ */
+static inline void scif_free(void *addr, size_t size)
+{
+	size_t align = ALIGN(size, PAGE_SIZE);
+
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		free_pages((unsigned long)addr, get_order(align));
+}
+
+static inline void scif_get_window(struct scif_window *window, int nr_pages)
+{
+	window->ref_count += nr_pages;
+}
+
+static inline void scif_put_window(struct scif_window *window, int nr_pages)
+{
+	window->ref_count -= nr_pages;
+}
+
+static inline void scif_set_window_ref(struct scif_window *window, int nr_pages)
+{
+	window->ref_count = nr_pages;
+}
+
+static inline void
+scif_queue_for_cleanup(struct scif_window *window, struct list_head *list)
+{
+	spin_lock(&scif_info.rmalock);
+	list_add_tail(&window->list, list);
+	spin_unlock(&scif_info.rmalock);
+	schedule_work(&scif_info.misc_work);
+}
+
+static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window)
+{
+	list_del_init(&window->list);
+	scif_queue_for_cleanup(window, &scif_info.rma_tc);
+}
+
+static inline bool scif_is_iommu_enabled(void)
+{
+#ifdef CONFIG_INTEL_IOMMU
+	return intel_iommu_enabled;
+#else
+	return false;
+#endif
+}
+#endif /* SCIF_RMA_H */
diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c
new file mode 100644
index 000000000..a036dbb41
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.c
@@ -0,0 +1,291 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include <linux/mmu_notifier.h>
+#include <linux/highmem.h>
+
+/*
+ * scif_insert_tcw:
+ *
+ * Insert a temp window to the temp registration list sorted by va_for_temp.
+ * RMA lock must be held.
+ */
+void scif_insert_tcw(struct scif_window *window, struct list_head *head)
+{
+	struct scif_window *curr = NULL;
+	struct scif_window *prev = list_entry(head, struct scif_window, list);
+	struct list_head *item;
+
+	INIT_LIST_HEAD(&window->list);
+	/* Compare with tail and if the entry is new tail add it to the end */
+	if (!list_empty(head)) {
+		curr = list_entry(head->prev, struct scif_window, list);
+		if (curr->va_for_temp < window->va_for_temp) {
+			list_add_tail(&window->list, head);
+			return;
+		}
+	}
+	list_for_each(item, head) {
+		curr = list_entry(item, struct scif_window, list);
+		if (curr->va_for_temp > window->va_for_temp)
+			break;
+		prev = curr;
+	}
+	list_add(&window->list, &prev->list);
+}
+
+/*
+ * scif_insert_window:
+ *
+ * Insert a window to the self registration list sorted by offset.
+ * RMA lock must be held.
+ */
+void scif_insert_window(struct scif_window *window, struct list_head *head)
+{
+	struct scif_window *curr = NULL, *prev = NULL;
+	struct list_head *item;
+
+	INIT_LIST_HEAD(&window->list);
+	list_for_each(item, head) {
+		curr = list_entry(item, struct scif_window, list);
+		if (curr->offset > window->offset)
+			break;
+		prev = curr;
+	}
+	if (!prev)
+		list_add(&window->list, head);
+	else
+		list_add(&window->list, &prev->list);
+	scif_set_window_ref(window, window->nr_pages);
+}
+
+/*
+ * scif_query_tcw:
+ *
+ * Query the temp cached registration list of ep for an overlapping window
+ * in case of permission mismatch, destroy the previous window. if permissions
+ * match and overlap is partial, destroy the window but return the new range
+ * RMA lock must be held.
+ */
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req)
+{
+	struct list_head *item, *temp, *head = req->head;
+	struct scif_window *window;
+	u64 start_va_window, start_va_req = req->va_for_temp;
+	u64 end_va_window, end_va_req = start_va_req + req->nr_bytes;
+
+	if (!req->nr_bytes)
+		return -EINVAL;
+	/*
+	 * Avoid traversing the entire list to find out that there
+	 * is no entry that matches
+	 */
+	if (!list_empty(head)) {
+		window = list_last_entry(head, struct scif_window, list);
+		end_va_window = window->va_for_temp +
+			(window->nr_pages << PAGE_SHIFT);
+		if (start_va_req > end_va_window)
+			return -ENXIO;
+	}
+	list_for_each_safe(item, temp, head) {
+		window = list_entry(item, struct scif_window, list);
+		start_va_window = window->va_for_temp;
+		end_va_window = window->va_for_temp +
+			(window->nr_pages << PAGE_SHIFT);
+		if (start_va_req < start_va_window &&
+		    end_va_req < start_va_window)
+			break;
+		if (start_va_req >= end_va_window)
+			continue;
+		if ((window->prot & req->prot) == req->prot) {
+			if (start_va_req >= start_va_window &&
+			    end_va_req <= end_va_window) {
+				*req->out_window = window;
+				return 0;
+			}
+			/* expand window */
+			if (start_va_req < start_va_window) {
+				req->nr_bytes +=
+					start_va_window - start_va_req;
+				req->va_for_temp = start_va_window;
+			}
+			if (end_va_req >= end_va_window)
+				req->nr_bytes += end_va_window - end_va_req;
+		}
+		/* Destroy the old window to create a new one */
+		__scif_rma_destroy_tcw_helper(window);
+		break;
+	}
+	return -ENXIO;
+}
+
+/*
+ * scif_query_window:
+ *
+ * Query the registration list and check if a valid contiguous
+ * range of windows exist.
+ * RMA lock must be held.
+ */
+int scif_query_window(struct scif_rma_req *req)
+{
+	struct list_head *item;
+	struct scif_window *window;
+	s64 end_offset, offset = req->offset;
+	u64 tmp_min, nr_bytes_left = req->nr_bytes;
+
+	if (!req->nr_bytes)
+		return -EINVAL;
+
+	list_for_each(item, req->head) {
+		window = list_entry(item, struct scif_window, list);
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		if (offset < window->offset)
+			/* Offset not found! */
+			return -ENXIO;
+		if (offset >= end_offset)
+			continue;
+		/* Check read/write protections. */
+		if ((window->prot & req->prot) != req->prot)
+			return -EPERM;
+		if (nr_bytes_left == req->nr_bytes)
+			/* Store the first window */
+			*req->out_window = window;
+		tmp_min = min((u64)end_offset - offset, nr_bytes_left);
+		nr_bytes_left -= tmp_min;
+		offset += tmp_min;
+		/*
+		 * Range requested encompasses
+		 * multiple windows contiguously.
+		 */
+		if (!nr_bytes_left) {
+			/* Done for partial window */
+			if (req->type == SCIF_WINDOW_PARTIAL ||
+			    req->type == SCIF_WINDOW_SINGLE)
+				return 0;
+			/* Extra logic for full windows */
+			if (offset == end_offset)
+				/* Spanning multiple whole windows */
+				return 0;
+				/* Not spanning multiple whole windows */
+			return -ENXIO;
+		}
+		if (req->type == SCIF_WINDOW_SINGLE)
+			break;
+	}
+	dev_err(scif_info.mdev.this_device,
+		"%s %d ENXIO\n", __func__, __LINE__);
+	return -ENXIO;
+}
+
+/*
+ * scif_rma_list_unregister:
+ *
+ * Traverse the self registration list starting from window:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_rma_list_unregister(struct scif_window *window,
+			     s64 offset, int nr_pages)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+	struct list_head *head = &ep->rma_info.reg_list;
+	s64 end_offset;
+	int err = 0;
+	int loop_nr_pages;
+	struct scif_window *_window;
+
+	list_for_each_entry_safe_from(window, _window, head, list) {
+		end_offset = window->offset + (window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT),
+				    nr_pages);
+		err = scif_unregister_window(window);
+		if (err)
+			return err;
+		nr_pages -= loop_nr_pages;
+		offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages)
+			break;
+	}
+	return 0;
+}
+
+/*
+ * scif_unmap_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Delete any DMA mappings created
+ */
+void scif_unmap_all_windows(scif_epd_t epd)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct list_head *head = &ep->rma_info.reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, struct scif_window, list);
+		scif_unmap_window(ep->remote_dev, window);
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/*
+ * scif_unregister_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_unregister_all_windows(scif_epd_t epd)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct list_head *head = &ep->rma_info.reg_list;
+	int err = 0;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+retry:
+	item = NULL;
+	tmp = NULL;
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, struct scif_window, list);
+		ep->rma_info.async_list_del = 0;
+		err = scif_unregister_window(window);
+		if (err)
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err %d\n",
+				__func__, __LINE__, err);
+		/*
+		 * Need to restart list traversal if there has been
+		 * an asynchronous list entry deletion.
+		 */
+		if (READ_ONCE(ep->rma_info.async_list_del))
+			goto retry;
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (!list_empty(&ep->rma_info.mmn_list)) {
+		spin_lock(&scif_info.rmalock);
+		list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup);
+		spin_unlock(&scif_info.rmalock);
+		schedule_work(&scif_info.mmu_notif_work);
+	}
+	return err;
+}
diff --git a/drivers/misc/mic/scif/scif_rma_list.h b/drivers/misc/mic/scif/scif_rma_list.h
new file mode 100644
index 000000000..7d58d1d55
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.h
@@ -0,0 +1,57 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_LIST_H
+#define SCIF_RMA_LIST_H
+
+/*
+ * struct scif_rma_req - Self Registration list RMA Request query
+ *
+ * @out_window - Returns the window if found
+ * @offset: Starting offset
+ * @nr_bytes: number of bytes
+ * @prot: protection requested i.e. read or write or both
+ * @type: Specify single, partial or multiple windows
+ * @head: Head of list on which to search
+ * @va_for_temp: VA for searching temporary cached windows
+ */
+struct scif_rma_req {
+	struct scif_window **out_window;
+	union {
+		s64 offset;
+		unsigned long va_for_temp;
+	};
+	size_t nr_bytes;
+	int prot;
+	enum scif_window_type type;
+	struct list_head *head;
+};
+
+/* Insert */
+void scif_insert_window(struct scif_window *window, struct list_head *head);
+void scif_insert_tcw(struct scif_window *window,
+		     struct list_head *head);
+/* Query */
+int scif_query_window(struct scif_rma_req *request);
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request);
+/* Called from close to unregister all self windows */
+int scif_unregister_all_windows(scif_epd_t epd);
+void scif_unmap_all_windows(scif_epd_t epd);
+/* Traverse list and unregister */
+int scif_rma_list_unregister(struct scif_window *window, s64 offset,
+			     int nr_pages);
+#endif /* SCIF_RMA_LIST_H */
diff --git a/drivers/misc/mic/vop/Makefile b/drivers/misc/mic/vop/Makefile
new file mode 100644
index 000000000..78819c899
--- /dev/null
+++ b/drivers/misc/mic/vop/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2016, Intel Corporation.
+#
+obj-m := vop.o
+
+vop-objs += vop_main.o
+vop-objs += vop_debugfs.o
+vop-objs += vop_vringh.o
diff --git a/drivers/misc/mic/vop/vop_debugfs.c b/drivers/misc/mic/vop/vop_debugfs.c
new file mode 100644
index 000000000..ab43884e5
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_debugfs.c
@@ -0,0 +1,232 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "vop_main.h"
+
+static int vop_dp_show(struct seq_file *s, void *pos)
+{
+	struct mic_device_desc *d;
+	struct mic_device_ctrl *dc;
+	struct mic_vqconfig *vqconfig;
+	__u32 *features;
+	__u8 *config;
+	struct vop_info *vi = s->private;
+	struct vop_device *vpdev = vi->vpdev;
+	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+	int j, k;
+
+	seq_printf(s, "Bootparam: magic 0x%x\n",
+		   bootparam->magic);
+	seq_printf(s, "Bootparam: h2c_config_db %d\n",
+		   bootparam->h2c_config_db);
+	seq_printf(s, "Bootparam: node_id %d\n",
+		   bootparam->node_id);
+	seq_printf(s, "Bootparam: c2h_scif_db %d\n",
+		   bootparam->c2h_scif_db);
+	seq_printf(s, "Bootparam: h2c_scif_db %d\n",
+		   bootparam->h2c_scif_db);
+	seq_printf(s, "Bootparam: scif_host_dma_addr 0x%llx\n",
+		   bootparam->scif_host_dma_addr);
+	seq_printf(s, "Bootparam: scif_card_dma_addr 0x%llx\n",
+		   bootparam->scif_card_dma_addr);
+
+	for (j = sizeof(*bootparam);
+		j < MIC_DP_SIZE; j += mic_total_desc_size(d)) {
+		d = (void *)bootparam + j;
+		dc = (void *)d + mic_aligned_desc_size(d);
+
+		/* end of list */
+		if (d->type == 0)
+			break;
+
+		if (d->type == -1)
+			continue;
+
+		seq_printf(s, "Type %d ", d->type);
+		seq_printf(s, "Num VQ %d ", d->num_vq);
+		seq_printf(s, "Feature Len %d\n", d->feature_len);
+		seq_printf(s, "Config Len %d ", d->config_len);
+		seq_printf(s, "Shutdown Status %d\n", d->status);
+
+		for (k = 0; k < d->num_vq; k++) {
+			vqconfig = mic_vq_config(d) + k;
+			seq_printf(s, "vqconfig[%d]: ", k);
+			seq_printf(s, "address 0x%llx ",
+				   vqconfig->address);
+			seq_printf(s, "num %d ", vqconfig->num);
+			seq_printf(s, "used address 0x%llx\n",
+				   vqconfig->used_address);
+		}
+
+		features = (__u32 *)mic_vq_features(d);
+		seq_printf(s, "Features: Host 0x%x ", features[0]);
+		seq_printf(s, "Guest 0x%x\n", features[1]);
+
+		config = mic_vq_configspace(d);
+		for (k = 0; k < d->config_len; k++)
+			seq_printf(s, "config[%d]=%d\n", k, config[k]);
+
+		seq_puts(s, "Device control:\n");
+		seq_printf(s, "Config Change %d ", dc->config_change);
+		seq_printf(s, "Vdev reset %d\n", dc->vdev_reset);
+		seq_printf(s, "Guest Ack %d ", dc->guest_ack);
+		seq_printf(s, "Host ack %d\n", dc->host_ack);
+		seq_printf(s, "Used address updated %d ",
+			   dc->used_address_updated);
+		seq_printf(s, "Vdev 0x%llx\n", dc->vdev);
+		seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db);
+		seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db);
+	}
+	schedule_work(&vi->hotplug_work);
+	return 0;
+}
+
+static int vop_dp_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vop_dp_show, inode->i_private);
+}
+
+static int vop_dp_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations dp_ops = {
+	.owner   = THIS_MODULE,
+	.open    = vop_dp_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = vop_dp_debug_release
+};
+
+static int vop_vdev_info_show(struct seq_file *s, void *unused)
+{
+	struct vop_info *vi = s->private;
+	struct list_head *pos, *tmp;
+	struct vop_vdev *vdev;
+	int i, j;
+
+	mutex_lock(&vi->vop_mutex);
+	list_for_each_safe(pos, tmp, &vi->vdev_list) {
+		vdev = list_entry(pos, struct vop_vdev, list);
+		seq_printf(s, "VDEV type %d state %s in %ld out %ld in_dma %ld out_dma %ld\n",
+			   vdev->virtio_id,
+			   vop_vdevup(vdev) ? "UP" : "DOWN",
+			   vdev->in_bytes,
+			   vdev->out_bytes,
+			   vdev->in_bytes_dma,
+			   vdev->out_bytes_dma);
+		for (i = 0; i < MIC_MAX_VRINGS; i++) {
+			struct vring_desc *desc;
+			struct vring_avail *avail;
+			struct vring_used *used;
+			struct vop_vringh *vvr = &vdev->vvr[i];
+			struct vringh *vrh = &vvr->vrh;
+			int num = vrh->vring.num;
+
+			if (!num)
+				continue;
+			desc = vrh->vring.desc;
+			seq_printf(s, "vring i %d avail_idx %d",
+				   i, vvr->vring.info->avail_idx & (num - 1));
+			seq_printf(s, " vring i %d avail_idx %d\n",
+				   i, vvr->vring.info->avail_idx);
+			seq_printf(s, "vrh i %d weak_barriers %d",
+				   i, vrh->weak_barriers);
+			seq_printf(s, " last_avail_idx %d last_used_idx %d",
+				   vrh->last_avail_idx, vrh->last_used_idx);
+			seq_printf(s, " completed %d\n", vrh->completed);
+			for (j = 0; j < num; j++) {
+				seq_printf(s, "desc[%d] addr 0x%llx len %d",
+					   j, desc->addr, desc->len);
+				seq_printf(s, " flags 0x%x next %d\n",
+					   desc->flags, desc->next);
+				desc++;
+			}
+			avail = vrh->vring.avail;
+			seq_printf(s, "avail flags 0x%x idx %d\n",
+				   vringh16_to_cpu(vrh, avail->flags),
+				   vringh16_to_cpu(vrh,
+						   avail->idx) & (num - 1));
+			seq_printf(s, "avail flags 0x%x idx %d\n",
+				   vringh16_to_cpu(vrh, avail->flags),
+				   vringh16_to_cpu(vrh, avail->idx));
+			for (j = 0; j < num; j++)
+				seq_printf(s, "avail ring[%d] %d\n",
+					   j, avail->ring[j]);
+			used = vrh->vring.used;
+			seq_printf(s, "used flags 0x%x idx %d\n",
+				   vringh16_to_cpu(vrh, used->flags),
+				   vringh16_to_cpu(vrh, used->idx) & (num - 1));
+			seq_printf(s, "used flags 0x%x idx %d\n",
+				   vringh16_to_cpu(vrh, used->flags),
+				   vringh16_to_cpu(vrh, used->idx));
+			for (j = 0; j < num; j++)
+				seq_printf(s, "used ring[%d] id %d len %d\n",
+					   j, vringh32_to_cpu(vrh,
+							      used->ring[j].id),
+					   vringh32_to_cpu(vrh,
+							   used->ring[j].len));
+		}
+	}
+	mutex_unlock(&vi->vop_mutex);
+
+	return 0;
+}
+
+static int vop_vdev_info_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vop_vdev_info_show, inode->i_private);
+}
+
+static int vop_vdev_info_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations vdev_info_ops = {
+	.owner   = THIS_MODULE,
+	.open    = vop_vdev_info_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = vop_vdev_info_debug_release
+};
+
+void vop_init_debugfs(struct vop_info *vi)
+{
+	char name[16];
+
+	snprintf(name, sizeof(name), "%s%d", KBUILD_MODNAME, vi->vpdev->dnode);
+	vi->dbg = debugfs_create_dir(name, NULL);
+	if (!vi->dbg) {
+		pr_err("can't create debugfs dir vop\n");
+		return;
+	}
+	debugfs_create_file("dp", 0444, vi->dbg, vi, &dp_ops);
+	debugfs_create_file("vdev_info", 0444, vi->dbg, vi, &vdev_info_ops);
+}
+
+void vop_exit_debugfs(struct vop_info *vi)
+{
+	debugfs_remove_recursive(vi->dbg);
+}
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
new file mode 100644
index 000000000..f4332a97c
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -0,0 +1,766 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Adapted from:
+ *
+ * virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/dma-mapping.h>
+
+#include "vop_main.h"
+
+#define VOP_MAX_VRINGS 4
+
+/*
+ * _vop_vdev - Allocated per virtio device instance injected by the peer.
+ *
+ * @vdev: Virtio device
+ * @desc: Virtio device page descriptor
+ * @dc: Virtio device control
+ * @vpdev: VOP device which is the parent for this virtio device
+ * @vr: Buffer for accessing the VRING
+ * @used: Buffer for used
+ * @used_size: Size of the used buffer
+ * @reset_done: Track whether VOP reset is complete
+ * @virtio_cookie: Cookie returned upon requesting a interrupt
+ * @c2h_vdev_db: The doorbell used by the guest to interrupt the host
+ * @h2c_vdev_db: The doorbell used by the host to interrupt the guest
+ * @dnode: The destination node
+ */
+struct _vop_vdev {
+	struct virtio_device vdev;
+	struct mic_device_desc __iomem *desc;
+	struct mic_device_ctrl __iomem *dc;
+	struct vop_device *vpdev;
+	void __iomem *vr[VOP_MAX_VRINGS];
+	dma_addr_t used[VOP_MAX_VRINGS];
+	int used_size[VOP_MAX_VRINGS];
+	struct completion reset_done;
+	struct mic_irq *virtio_cookie;
+	int c2h_vdev_db;
+	int h2c_vdev_db;
+	int dnode;
+};
+
+#define to_vopvdev(vd) container_of(vd, struct _vop_vdev, vdev)
+
+#define _vop_aligned_desc_size(d) __mic_align(_vop_desc_size(d), 8)
+
+/* Helper API to obtain the parent of the virtio device */
+static inline struct device *_vop_dev(struct _vop_vdev *vdev)
+{
+	return vdev->vdev.dev.parent;
+}
+
+static inline unsigned _vop_desc_size(struct mic_device_desc __iomem *desc)
+{
+	return sizeof(*desc)
+		+ ioread8(&desc->num_vq) * sizeof(struct mic_vqconfig)
+		+ ioread8(&desc->feature_len) * 2
+		+ ioread8(&desc->config_len);
+}
+
+static inline struct mic_vqconfig __iomem *
+_vop_vq_config(struct mic_device_desc __iomem *desc)
+{
+	return (struct mic_vqconfig __iomem *)(desc + 1);
+}
+
+static inline u8 __iomem *
+_vop_vq_features(struct mic_device_desc __iomem *desc)
+{
+	return (u8 __iomem *)(_vop_vq_config(desc) + ioread8(&desc->num_vq));
+}
+
+static inline u8 __iomem *
+_vop_vq_configspace(struct mic_device_desc __iomem *desc)
+{
+	return _vop_vq_features(desc) + ioread8(&desc->feature_len) * 2;
+}
+
+static inline unsigned
+_vop_total_desc_size(struct mic_device_desc __iomem *desc)
+{
+	return _vop_aligned_desc_size(desc) + sizeof(struct mic_device_ctrl);
+}
+
+/* This gets the device's feature bits. */
+static u64 vop_get_features(struct virtio_device *vdev)
+{
+	unsigned int i, bits;
+	u32 features = 0;
+	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+	u8 __iomem *in_features = _vop_vq_features(desc);
+	int feature_len = ioread8(&desc->feature_len);
+
+	bits = min_t(unsigned, feature_len, sizeof(vdev->features)) * 8;
+	for (i = 0; i < bits; i++)
+		if (ioread8(&in_features[i / 8]) & (BIT(i % 8)))
+			features |= BIT(i);
+
+	return features;
+}
+
+static int vop_finalize_features(struct virtio_device *vdev)
+{
+	unsigned int i, bits;
+	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+	u8 feature_len = ioread8(&desc->feature_len);
+	/* Second half of bitmap is features we accept. */
+	u8 __iomem *out_features =
+		_vop_vq_features(desc) + feature_len;
+
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
+	memset_io(out_features, 0, feature_len);
+	bits = min_t(unsigned, feature_len,
+		     sizeof(vdev->features)) * 8;
+	for (i = 0; i < bits; i++) {
+		if (__virtio_test_bit(vdev, i))
+			iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)),
+				 &out_features[i / 8]);
+	}
+	return 0;
+}
+
+/*
+ * Reading and writing elements in config space
+ */
+static void vop_get(struct virtio_device *vdev, unsigned int offset,
+		    void *buf, unsigned len)
+{
+	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+
+	if (offset + len > ioread8(&desc->config_len))
+		return;
+	memcpy_fromio(buf, _vop_vq_configspace(desc) + offset, len);
+}
+
+static void vop_set(struct virtio_device *vdev, unsigned int offset,
+		    const void *buf, unsigned len)
+{
+	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+
+	if (offset + len > ioread8(&desc->config_len))
+		return;
+	memcpy_toio(_vop_vq_configspace(desc) + offset, buf, len);
+}
+
+/*
+ * The operations to get and set the status word just access the status
+ * field of the device descriptor. set_status also interrupts the host
+ * to tell about status changes.
+ */
+static u8 vop_get_status(struct virtio_device *vdev)
+{
+	return ioread8(&to_vopvdev(vdev)->desc->status);
+}
+
+static void vop_set_status(struct virtio_device *dev, u8 status)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+	struct vop_device *vpdev = vdev->vpdev;
+
+	if (!status)
+		return;
+	iowrite8(status, &vdev->desc->status);
+	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+}
+
+/* Inform host on a virtio device reset and wait for ack from host */
+static void vop_reset_inform_host(struct virtio_device *dev)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+	struct mic_device_ctrl __iomem *dc = vdev->dc;
+	struct vop_device *vpdev = vdev->vpdev;
+	int retry;
+
+	iowrite8(0, &dc->host_ack);
+	iowrite8(1, &dc->vdev_reset);
+	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+
+	/* Wait till host completes all card accesses and acks the reset */
+	for (retry = 100; retry--;) {
+		if (ioread8(&dc->host_ack))
+			break;
+		msleep(100);
+	};
+
+	dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry);
+
+	/* Reset status to 0 in case we timed out */
+	iowrite8(0, &vdev->desc->status);
+}
+
+static void vop_reset(struct virtio_device *dev)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+
+	dev_dbg(_vop_dev(vdev), "%s: virtio id %d\n",
+		__func__, dev->id.device);
+
+	vop_reset_inform_host(dev);
+	complete_all(&vdev->reset_done);
+}
+
+/*
+ * The virtio_ring code calls this API when it wants to notify the Host.
+ */
+static bool vop_notify(struct virtqueue *vq)
+{
+	struct _vop_vdev *vdev = vq->priv;
+	struct vop_device *vpdev = vdev->vpdev;
+
+	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+	return true;
+}
+
+static void vop_del_vq(struct virtqueue *vq, int n)
+{
+	struct _vop_vdev *vdev = to_vopvdev(vq->vdev);
+	struct vring *vr = (struct vring *)(vq + 1);
+	struct vop_device *vpdev = vdev->vpdev;
+
+	dma_unmap_single(&vpdev->dev, vdev->used[n],
+			 vdev->used_size[n], DMA_BIDIRECTIONAL);
+	free_pages((unsigned long)vr->used, get_order(vdev->used_size[n]));
+	vring_del_virtqueue(vq);
+	vpdev->hw_ops->iounmap(vpdev, vdev->vr[n]);
+	vdev->vr[n] = NULL;
+}
+
+static void vop_del_vqs(struct virtio_device *dev)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+	struct virtqueue *vq, *n;
+	int idx = 0;
+
+	dev_dbg(_vop_dev(vdev), "%s\n", __func__);
+
+	list_for_each_entry_safe(vq, n, &dev->vqs, list)
+		vop_del_vq(vq, idx++);
+}
+
+/*
+ * This routine will assign vring's allocated in host/io memory. Code in
+ * virtio_ring.c however continues to access this io memory as if it were local
+ * memory without io accessors.
+ */
+static struct virtqueue *vop_find_vq(struct virtio_device *dev,
+				     unsigned index,
+				     void (*callback)(struct virtqueue *vq),
+				     const char *name, bool ctx)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+	struct vop_device *vpdev = vdev->vpdev;
+	struct mic_vqconfig __iomem *vqconfig;
+	struct mic_vqconfig config;
+	struct virtqueue *vq;
+	void __iomem *va;
+	struct _mic_vring_info __iomem *info;
+	void *used;
+	int vr_size, _vr_size, err, magic;
+	struct vring *vr;
+	u8 type = ioread8(&vdev->desc->type);
+
+	if (index >= ioread8(&vdev->desc->num_vq))
+		return ERR_PTR(-ENOENT);
+
+	if (!name)
+		return ERR_PTR(-ENOENT);
+
+	/* First assign the vring's allocated in host memory */
+	vqconfig = _vop_vq_config(vdev->desc) + index;
+	memcpy_fromio(&config, vqconfig, sizeof(config));
+	_vr_size = round_up(vring_size(le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN), 4);
+	vr_size = PAGE_ALIGN(_vr_size + sizeof(struct _mic_vring_info));
+	va = vpdev->hw_ops->ioremap(vpdev, le64_to_cpu(config.address),
+			vr_size);
+	if (!va)
+		return ERR_PTR(-ENOMEM);
+	vdev->vr[index] = va;
+	memset_io(va, 0x0, _vr_size);
+	vq = vring_new_virtqueue(
+				index,
+				le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN,
+				dev,
+				false,
+				ctx,
+				(void __force *)va, vop_notify, callback, name);
+	if (!vq) {
+		err = -ENOMEM;
+		goto unmap;
+	}
+	info = va + _vr_size;
+	magic = ioread32(&info->magic);
+
+	if (WARN(magic != MIC_MAGIC + type + index, "magic mismatch")) {
+		err = -EIO;
+		goto unmap;
+	}
+
+	/* Allocate and reassign used ring now */
+	vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 +
+					     sizeof(struct vring_used_elem) *
+					     le16_to_cpu(config.num));
+	used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					get_order(vdev->used_size[index]));
+	if (!used) {
+		err = -ENOMEM;
+		dev_err(_vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto del_vq;
+	}
+	vdev->used[index] = dma_map_single(&vpdev->dev, used,
+					    vdev->used_size[index],
+					    DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&vpdev->dev, vdev->used[index])) {
+		err = -ENOMEM;
+		dev_err(_vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto free_used;
+	}
+	writeq(vdev->used[index], &vqconfig->used_address);
+	/*
+	 * To reassign the used ring here we are directly accessing
+	 * struct vring_virtqueue which is a private data structure
+	 * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in
+	 * vring_new_virtqueue() would ensure that
+	 *  (&vq->vring == (struct vring *) (&vq->vq + 1));
+	 */
+	vr = (struct vring *)(vq + 1);
+	vr->used = used;
+
+	vq->priv = vdev;
+	return vq;
+free_used:
+	free_pages((unsigned long)used,
+		   get_order(vdev->used_size[index]));
+del_vq:
+	vring_del_virtqueue(vq);
+unmap:
+	vpdev->hw_ops->iounmap(vpdev, vdev->vr[index]);
+	return ERR_PTR(err);
+}
+
+static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char * const names[], const bool *ctx,
+			struct irq_affinity *desc)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+	struct vop_device *vpdev = vdev->vpdev;
+	struct mic_device_ctrl __iomem *dc = vdev->dc;
+	int i, err, retry;
+
+	/* We must have this many virtqueues. */
+	if (nvqs > ioread8(&vdev->desc->num_vq))
+		return -ENOENT;
+
+	for (i = 0; i < nvqs; ++i) {
+		dev_dbg(_vop_dev(vdev), "%s: %d: %s\n",
+			__func__, i, names[i]);
+		vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false);
+		if (IS_ERR(vqs[i])) {
+			err = PTR_ERR(vqs[i]);
+			goto error;
+		}
+	}
+
+	iowrite8(1, &dc->used_address_updated);
+	/*
+	 * Send an interrupt to the host to inform it that used
+	 * rings have been re-assigned.
+	 */
+	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+	for (retry = 100; --retry;) {
+		if (!ioread8(&dc->used_address_updated))
+			break;
+		msleep(100);
+	};
+
+	dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry);
+	if (!retry) {
+		err = -ENODEV;
+		goto error;
+	}
+
+	return 0;
+error:
+	vop_del_vqs(dev);
+	return err;
+}
+
+/*
+ * The config ops structure as defined by virtio config
+ */
+static struct virtio_config_ops vop_vq_config_ops = {
+	.get_features = vop_get_features,
+	.finalize_features = vop_finalize_features,
+	.get = vop_get,
+	.set = vop_set,
+	.get_status = vop_get_status,
+	.set_status = vop_set_status,
+	.reset = vop_reset,
+	.find_vqs = vop_find_vqs,
+	.del_vqs = vop_del_vqs,
+};
+
+static irqreturn_t vop_virtio_intr_handler(int irq, void *data)
+{
+	struct _vop_vdev *vdev = data;
+	struct vop_device *vpdev = vdev->vpdev;
+	struct virtqueue *vq;
+
+	vpdev->hw_ops->ack_interrupt(vpdev, vdev->h2c_vdev_db);
+	list_for_each_entry(vq, &vdev->vdev.vqs, list)
+		vring_interrupt(0, vq);
+
+	return IRQ_HANDLED;
+}
+
+static void vop_virtio_release_dev(struct device *_d)
+{
+	struct virtio_device *vdev =
+			container_of(_d, struct virtio_device, dev);
+	struct _vop_vdev *vop_vdev =
+			container_of(vdev, struct _vop_vdev, vdev);
+
+	kfree(vop_vdev);
+}
+
+/*
+ * adds a new device and register it with virtio
+ * appropriate drivers are loaded by the device model
+ */
+static int _vop_add_device(struct mic_device_desc __iomem *d,
+			   unsigned int offset, struct vop_device *vpdev,
+			   int dnode)
+{
+	struct _vop_vdev *vdev, *reg_dev = NULL;
+	int ret;
+	u8 type = ioread8(&d->type);
+
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev)
+		return -ENOMEM;
+
+	vdev->vpdev = vpdev;
+	vdev->vdev.dev.parent = &vpdev->dev;
+	vdev->vdev.dev.release = vop_virtio_release_dev;
+	vdev->vdev.id.device = type;
+	vdev->vdev.config = &vop_vq_config_ops;
+	vdev->desc = d;
+	vdev->dc = (void __iomem *)d + _vop_aligned_desc_size(d);
+	vdev->dnode = dnode;
+	vdev->vdev.priv = (void *)(u64)dnode;
+	init_completion(&vdev->reset_done);
+
+	vdev->h2c_vdev_db = vpdev->hw_ops->next_db(vpdev);
+	vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
+			vop_virtio_intr_handler, "virtio intr",
+			vdev, vdev->h2c_vdev_db);
+	if (IS_ERR(vdev->virtio_cookie)) {
+		ret = PTR_ERR(vdev->virtio_cookie);
+		goto kfree;
+	}
+	iowrite8((u8)vdev->h2c_vdev_db, &vdev->dc->h2c_vdev_db);
+	vdev->c2h_vdev_db = ioread8(&vdev->dc->c2h_vdev_db);
+
+	ret = register_virtio_device(&vdev->vdev);
+	reg_dev = vdev;
+	if (ret) {
+		dev_err(_vop_dev(vdev),
+			"Failed to register vop device %u type %u\n",
+			offset, type);
+		goto free_irq;
+	}
+	writeq((u64)vdev, &vdev->dc->vdev);
+	dev_dbg(_vop_dev(vdev), "%s: registered vop device %u type %u vdev %p\n",
+		__func__, offset, type, vdev);
+
+	return 0;
+
+free_irq:
+	vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
+kfree:
+	if (reg_dev)
+		put_device(&vdev->vdev.dev);
+	else
+		kfree(vdev);
+	return ret;
+}
+
+/*
+ * match for a vop device with a specific desc pointer
+ */
+static int vop_match_desc(struct device *dev, void *data)
+{
+	struct virtio_device *_dev = dev_to_virtio(dev);
+	struct _vop_vdev *vdev = to_vopvdev(_dev);
+
+	return vdev->desc == (void __iomem *)data;
+}
+
+static void _vop_handle_config_change(struct mic_device_desc __iomem *d,
+				      unsigned int offset,
+				      struct vop_device *vpdev)
+{
+	struct mic_device_ctrl __iomem *dc
+		= (void __iomem *)d + _vop_aligned_desc_size(d);
+	struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev);
+
+	if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED)
+		return;
+
+	dev_dbg(&vpdev->dev, "%s %d\n", __func__, __LINE__);
+	virtio_config_changed(&vdev->vdev);
+	iowrite8(1, &dc->guest_ack);
+}
+
+/*
+ * removes a virtio device if a hot remove event has been
+ * requested by the host.
+ */
+static int _vop_remove_device(struct mic_device_desc __iomem *d,
+			      unsigned int offset, struct vop_device *vpdev)
+{
+	struct mic_device_ctrl __iomem *dc
+		= (void __iomem *)d + _vop_aligned_desc_size(d);
+	struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev);
+	u8 status;
+	int ret = -1;
+
+	if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) {
+		struct device *dev = get_device(&vdev->vdev.dev);
+
+		dev_dbg(&vpdev->dev,
+			"%s %d config_change %d type %d vdev %p\n",
+			__func__, __LINE__,
+			ioread8(&dc->config_change), ioread8(&d->type), vdev);
+		status = ioread8(&d->status);
+		reinit_completion(&vdev->reset_done);
+		unregister_virtio_device(&vdev->vdev);
+		vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
+		iowrite8(-1, &dc->h2c_vdev_db);
+		if (status & VIRTIO_CONFIG_S_DRIVER_OK)
+			wait_for_completion(&vdev->reset_done);
+		put_device(dev);
+		iowrite8(1, &dc->guest_ack);
+		dev_dbg(&vpdev->dev, "%s %d guest_ack %d\n",
+			__func__, __LINE__, ioread8(&dc->guest_ack));
+		iowrite8(-1, &d->type);
+		ret = 0;
+	}
+	return ret;
+}
+
+#define REMOVE_DEVICES true
+
+static void _vop_scan_devices(void __iomem *dp, struct vop_device *vpdev,
+			      bool remove, int dnode)
+{
+	s8 type;
+	unsigned int i;
+	struct mic_device_desc __iomem *d;
+	struct mic_device_ctrl __iomem *dc;
+	struct device *dev;
+	int ret;
+
+	for (i = sizeof(struct mic_bootparam);
+			i < MIC_DP_SIZE; i += _vop_total_desc_size(d)) {
+		d = dp + i;
+		dc = (void __iomem *)d + _vop_aligned_desc_size(d);
+		/*
+		 * This read barrier is paired with the corresponding write
+		 * barrier on the host which is inserted before adding or
+		 * removing a virtio device descriptor, by updating the type.
+		 */
+		rmb();
+		type = ioread8(&d->type);
+
+		/* end of list */
+		if (type == 0)
+			break;
+
+		if (type == -1)
+			continue;
+
+		/* device already exists */
+		dev = device_find_child(&vpdev->dev, (void __force *)d,
+					vop_match_desc);
+		if (dev) {
+			if (remove)
+				iowrite8(MIC_VIRTIO_PARAM_DEV_REMOVE,
+					 &dc->config_change);
+			put_device(dev);
+			_vop_handle_config_change(d, i, vpdev);
+			ret = _vop_remove_device(d, i, vpdev);
+			if (remove) {
+				iowrite8(0, &dc->config_change);
+				iowrite8(0, &dc->guest_ack);
+			}
+			continue;
+		}
+
+		/* new device */
+		dev_dbg(&vpdev->dev, "%s %d Adding new virtio device %p\n",
+			__func__, __LINE__, d);
+		if (!remove)
+			_vop_add_device(d, i, vpdev, dnode);
+	}
+}
+
+static void vop_scan_devices(struct vop_info *vi,
+			     struct vop_device *vpdev, bool remove)
+{
+	void __iomem *dp = vpdev->hw_ops->get_remote_dp(vpdev);
+
+	if (!dp)
+		return;
+	mutex_lock(&vi->vop_mutex);
+	_vop_scan_devices(dp, vpdev, remove, vpdev->dnode);
+	mutex_unlock(&vi->vop_mutex);
+}
+
+/*
+ * vop_hotplug_device tries to find changes in the device page.
+ */
+static void vop_hotplug_devices(struct work_struct *work)
+{
+	struct vop_info *vi = container_of(work, struct vop_info,
+					     hotplug_work);
+
+	vop_scan_devices(vi, vi->vpdev, !REMOVE_DEVICES);
+}
+
+/*
+ * Interrupt handler for hot plug/config changes etc.
+ */
+static irqreturn_t vop_extint_handler(int irq, void *data)
+{
+	struct vop_info *vi = data;
+	struct mic_bootparam __iomem *bp;
+	struct vop_device *vpdev = vi->vpdev;
+
+	bp = vpdev->hw_ops->get_remote_dp(vpdev);
+	dev_dbg(&vpdev->dev, "%s %d hotplug work\n",
+		__func__, __LINE__);
+	vpdev->hw_ops->ack_interrupt(vpdev, ioread8(&bp->h2c_config_db));
+	schedule_work(&vi->hotplug_work);
+	return IRQ_HANDLED;
+}
+
+static int vop_driver_probe(struct vop_device *vpdev)
+{
+	struct vop_info *vi;
+	int rc;
+
+	vi = kzalloc(sizeof(*vi), GFP_KERNEL);
+	if (!vi) {
+		rc = -ENOMEM;
+		goto exit;
+	}
+	dev_set_drvdata(&vpdev->dev, vi);
+	vi->vpdev = vpdev;
+
+	mutex_init(&vi->vop_mutex);
+	INIT_WORK(&vi->hotplug_work, vop_hotplug_devices);
+	if (vpdev->dnode) {
+		rc = vop_host_init(vi);
+		if (rc < 0)
+			goto free;
+	} else {
+		struct mic_bootparam __iomem *bootparam;
+
+		vop_scan_devices(vi, vpdev, !REMOVE_DEVICES);
+
+		vi->h2c_config_db = vpdev->hw_ops->next_db(vpdev);
+		vi->cookie = vpdev->hw_ops->request_irq(vpdev,
+							vop_extint_handler,
+							"virtio_config_intr",
+							vi, vi->h2c_config_db);
+		if (IS_ERR(vi->cookie)) {
+			rc = PTR_ERR(vi->cookie);
+			goto free;
+		}
+		bootparam = vpdev->hw_ops->get_remote_dp(vpdev);
+		iowrite8(vi->h2c_config_db, &bootparam->h2c_config_db);
+	}
+	vop_init_debugfs(vi);
+	return 0;
+free:
+	kfree(vi);
+exit:
+	return rc;
+}
+
+static void vop_driver_remove(struct vop_device *vpdev)
+{
+	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
+
+	if (vpdev->dnode) {
+		vop_host_uninit(vi);
+	} else {
+		struct mic_bootparam __iomem *bootparam =
+			vpdev->hw_ops->get_remote_dp(vpdev);
+		if (bootparam)
+			iowrite8(-1, &bootparam->h2c_config_db);
+		vpdev->hw_ops->free_irq(vpdev, vi->cookie, vi);
+		flush_work(&vi->hotplug_work);
+		vop_scan_devices(vi, vpdev, REMOVE_DEVICES);
+	}
+	vop_exit_debugfs(vi);
+	kfree(vi);
+}
+
+static struct vop_device_id id_table[] = {
+	{ VOP_DEV_TRNSP, VOP_DEV_ANY_ID },
+	{ 0 },
+};
+
+static struct vop_driver vop_driver = {
+	.driver.name =	KBUILD_MODNAME,
+	.driver.owner =	THIS_MODULE,
+	.id_table = id_table,
+	.probe = vop_driver_probe,
+	.remove = vop_driver_remove,
+};
+
+module_vop_driver(vop_driver);
+
+MODULE_DEVICE_TABLE(mbus, id_table);
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Virtio Over PCIe (VOP) driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/vop/vop_main.h b/drivers/misc/mic/vop/vop_main.h
new file mode 100644
index 000000000..ba47ec7a6
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_main.h
@@ -0,0 +1,170 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#ifndef _VOP_MAIN_H_
+#define _VOP_MAIN_H_
+
+#include <linux/vringh.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio.h>
+#include <linux/miscdevice.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+#include "../bus/vop_bus.h"
+
+/*
+ * Note on endianness.
+ * 1. Host can be both BE or LE
+ * 2. Guest/card is LE. Host uses le_to_cpu to access desc/avail
+ *    rings and ioreadXX/iowriteXX to access used ring.
+ * 3. Device page exposed by host to guest contains LE values. Guest
+ *    accesses these using ioreadXX/iowriteXX etc. This way in general we
+ *    obey the virtio spec according to which guest works with native
+ *    endianness and host is aware of guest endianness and does all
+ *    required endianness conversion.
+ * 4. Data provided from user space to guest (in ADD_DEVICE and
+ *    CONFIG_CHANGE ioctl's) is not interpreted by the driver and should be
+ *    in guest endianness.
+ */
+
+/*
+ * vop_info - Allocated per invocation of VOP probe
+ *
+ * @vpdev: VOP device
+ * @hotplug_work: Handle virtio device creation, deletion and configuration
+ * @cookie: Cookie received upon requesting a virtio configuration interrupt
+ * @h2c_config_db: The doorbell used by the peer to indicate a config change
+ * @vdev_list: List of "active" virtio devices injected in the peer node
+ * @vop_mutex: Synchronize access to the device page as well as serialize
+ *             creation/deletion of virtio devices on the peer node
+ * @dp: Peer device page information
+ * @dbg: Debugfs entry
+ * @dma_ch: The DMA channel used by this transport for data transfers.
+ * @name: Name for this transport used in misc device creation.
+ * @miscdev: The misc device registered.
+ */
+struct vop_info {
+	struct vop_device *vpdev;
+	struct work_struct hotplug_work;
+	struct mic_irq *cookie;
+	int h2c_config_db;
+	struct list_head vdev_list;
+	struct mutex vop_mutex;
+	void __iomem *dp;
+	struct dentry *dbg;
+	struct dma_chan *dma_ch;
+	char name[16];
+	struct miscdevice miscdev;
+};
+
+/**
+ * struct vop_vringh - Virtio ring host information.
+ *
+ * @vring: The VOP vring used for setting up user space mappings.
+ * @vrh: The host VRINGH used for accessing the card vrings.
+ * @riov: The VRINGH read kernel IOV.
+ * @wiov: The VRINGH write kernel IOV.
+ * @head: The VRINGH head index address passed to vringh_getdesc_kern(..).
+ * @vr_mutex: Mutex for synchronizing access to the VRING.
+ * @buf: Temporary kernel buffer used to copy in/out data
+ * from/to the card via DMA.
+ * @buf_da: dma address of buf.
+ * @vdev: Back pointer to VOP virtio device for vringh_notify(..).
+ */
+struct vop_vringh {
+	struct mic_vring vring;
+	struct vringh vrh;
+	struct vringh_kiov riov;
+	struct vringh_kiov wiov;
+	u16 head;
+	struct mutex vr_mutex;
+	void *buf;
+	dma_addr_t buf_da;
+	struct vop_vdev *vdev;
+};
+
+/**
+ * struct vop_vdev - Host information for a card Virtio device.
+ *
+ * @virtio_id - Virtio device id.
+ * @waitq - Waitqueue to allow ring3 apps to poll.
+ * @vpdev - pointer to VOP bus device.
+ * @poll_wake - Used for waking up threads blocked in poll.
+ * @out_bytes - Debug stats for number of bytes copied from host to card.
+ * @in_bytes - Debug stats for number of bytes copied from card to host.
+ * @out_bytes_dma - Debug stats for number of bytes copied from host to card
+ * using DMA.
+ * @in_bytes_dma - Debug stats for number of bytes copied from card to host
+ * using DMA.
+ * @tx_len_unaligned - Debug stats for number of bytes copied to the card where
+ * the transfer length did not have the required DMA alignment.
+ * @tx_dst_unaligned - Debug stats for number of bytes copied where the
+ * destination address on the card did not have the required DMA alignment.
+ * @vvr - Store per VRING data structures.
+ * @virtio_bh_work - Work struct used to schedule virtio bottom half handling.
+ * @dd - Virtio device descriptor.
+ * @dc - Virtio device control fields.
+ * @list - List of Virtio devices.
+ * @virtio_db - The doorbell used by the card to interrupt the host.
+ * @virtio_cookie - The cookie returned while requesting interrupts.
+ * @vi: Transport information.
+ * @vdev_mutex: Mutex synchronizing virtio device injection,
+ *              removal and data transfers.
+ * @destroy: Track if a virtio device is being destroyed.
+ * @deleted: The virtio device has been deleted.
+ */
+struct vop_vdev {
+	int virtio_id;
+	wait_queue_head_t waitq;
+	struct vop_device *vpdev;
+	int poll_wake;
+	unsigned long out_bytes;
+	unsigned long in_bytes;
+	unsigned long out_bytes_dma;
+	unsigned long in_bytes_dma;
+	unsigned long tx_len_unaligned;
+	unsigned long tx_dst_unaligned;
+	unsigned long rx_dst_unaligned;
+	struct vop_vringh vvr[MIC_MAX_VRINGS];
+	struct work_struct virtio_bh_work;
+	struct mic_device_desc *dd;
+	struct mic_device_ctrl *dc;
+	struct list_head list;
+	int virtio_db;
+	struct mic_irq *virtio_cookie;
+	struct vop_info *vi;
+	struct mutex vdev_mutex;
+	struct completion destroy;
+	bool deleted;
+};
+
+/* Helper API to check if a virtio device is running */
+static inline bool vop_vdevup(struct vop_vdev *vdev)
+{
+	return !!vdev->dd->status;
+}
+
+void vop_init_debugfs(struct vop_info *vi);
+void vop_exit_debugfs(struct vop_info *vi);
+int vop_host_init(struct vop_info *vi);
+void vop_host_uninit(struct vop_info *vi);
+#endif
diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
new file mode 100644
index 000000000..a252c2199
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -0,0 +1,1169 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+#include <linux/mic_ioctl.h>
+#include "vop_main.h"
+
+/* Helper API to obtain the VOP PCIe device */
+static inline struct device *vop_dev(struct vop_vdev *vdev)
+{
+	return vdev->vpdev->dev.parent;
+}
+
+/* Helper API to check if a virtio device is initialized */
+static inline int vop_vdev_inited(struct vop_vdev *vdev)
+{
+	if (!vdev)
+		return -EINVAL;
+	/* Device has not been created yet */
+	if (!vdev->dd || !vdev->dd->type) {
+		dev_err(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, -EINVAL);
+		return -EINVAL;
+	}
+	/* Device has been removed/deleted */
+	if (vdev->dd->type == -1) {
+		dev_dbg(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, -ENODEV);
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static void _vop_notify(struct vringh *vrh)
+{
+	struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
+	struct vop_vdev *vdev = vvrh->vdev;
+	struct vop_device *vpdev = vdev->vpdev;
+	s8 db = vdev->dc->h2c_vdev_db;
+
+	if (db != -1)
+		vpdev->hw_ops->send_intr(vpdev, db);
+}
+
+static void vop_virtio_init_post(struct vop_vdev *vdev)
+{
+	struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
+	struct vop_device *vpdev = vdev->vpdev;
+	int i, used_size;
+
+	for (i = 0; i < vdev->dd->num_vq; i++) {
+		used_size = PAGE_ALIGN(sizeof(u16) * 3 +
+				sizeof(struct vring_used_elem) *
+				le16_to_cpu(vqconfig->num));
+		if (!le64_to_cpu(vqconfig[i].used_address)) {
+			dev_warn(vop_dev(vdev), "used_address zero??\n");
+			continue;
+		}
+		vdev->vvr[i].vrh.vring.used =
+			(void __force *)vpdev->hw_ops->ioremap(
+			vpdev,
+			le64_to_cpu(vqconfig[i].used_address),
+			used_size);
+	}
+
+	vdev->dc->used_address_updated = 0;
+
+	dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
+		 __func__, vdev->virtio_id);
+}
+
+static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
+{
+	int i;
+
+	dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
+		__func__, vdev->dd->status, vdev->virtio_id);
+
+	for (i = 0; i < vdev->dd->num_vq; i++)
+		/*
+		 * Avoid lockdep false positive. The + 1 is for the vop
+		 * mutex which is held in the reset devices code path.
+		 */
+		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
+
+	/* 0 status means "reset" */
+	vdev->dd->status = 0;
+	vdev->dc->vdev_reset = 0;
+	vdev->dc->host_ack = 1;
+
+	for (i = 0; i < vdev->dd->num_vq; i++) {
+		struct vringh *vrh = &vdev->vvr[i].vrh;
+
+		vdev->vvr[i].vring.info->avail_idx = 0;
+		vrh->completed = 0;
+		vrh->last_avail_idx = 0;
+		vrh->last_used_idx = 0;
+	}
+
+	for (i = 0; i < vdev->dd->num_vq; i++)
+		mutex_unlock(&vdev->vvr[i].vr_mutex);
+}
+
+static void vop_virtio_reset_devices(struct vop_info *vi)
+{
+	struct list_head *pos, *tmp;
+	struct vop_vdev *vdev;
+
+	list_for_each_safe(pos, tmp, &vi->vdev_list) {
+		vdev = list_entry(pos, struct vop_vdev, list);
+		vop_virtio_device_reset(vdev);
+		vdev->poll_wake = 1;
+		wake_up(&vdev->waitq);
+	}
+}
+
+static void vop_bh_handler(struct work_struct *work)
+{
+	struct vop_vdev *vdev = container_of(work, struct vop_vdev,
+			virtio_bh_work);
+
+	if (vdev->dc->used_address_updated)
+		vop_virtio_init_post(vdev);
+
+	if (vdev->dc->vdev_reset)
+		vop_virtio_device_reset(vdev);
+
+	vdev->poll_wake = 1;
+	wake_up(&vdev->waitq);
+}
+
+static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
+{
+	struct vop_vdev *vdev = data;
+	struct vop_device *vpdev = vdev->vpdev;
+
+	vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
+	schedule_work(&vdev->virtio_bh_work);
+	return IRQ_HANDLED;
+}
+
+static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
+{
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+	int ret = 0, retry, i;
+	struct vop_device *vpdev = vdev->vpdev;
+	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
+	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+	s8 db = bootparam->h2c_config_db;
+
+	mutex_lock(&vi->vop_mutex);
+	for (i = 0; i < vdev->dd->num_vq; i++)
+		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
+
+	if (db == -1 || vdev->dd->type == -1) {
+		ret = -EIO;
+		goto exit;
+	}
+
+	memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
+	vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
+	vpdev->hw_ops->send_intr(vpdev, db);
+
+	for (retry = 100; retry--;) {
+		ret = wait_event_timeout(wake, vdev->dc->guest_ack,
+					 msecs_to_jiffies(100));
+		if (ret)
+			break;
+	}
+
+	dev_dbg(vop_dev(vdev),
+		"%s %d retry: %d\n", __func__, __LINE__, retry);
+	vdev->dc->config_change = 0;
+	vdev->dc->guest_ack = 0;
+exit:
+	for (i = 0; i < vdev->dd->num_vq; i++)
+		mutex_unlock(&vdev->vvr[i].vr_mutex);
+	mutex_unlock(&vi->vop_mutex);
+	return ret;
+}
+
+static int vop_copy_dp_entry(struct vop_vdev *vdev,
+			     struct mic_device_desc *argp, __u8 *type,
+			     struct mic_device_desc **devpage)
+{
+	struct vop_device *vpdev = vdev->vpdev;
+	struct mic_device_desc *devp;
+	struct mic_vqconfig *vqconfig;
+	int ret = 0, i;
+	bool slot_found = false;
+
+	vqconfig = mic_vq_config(argp);
+	for (i = 0; i < argp->num_vq; i++) {
+		if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
+			ret =  -EINVAL;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			goto exit;
+		}
+	}
+
+	/* Find the first free device page entry */
+	for (i = sizeof(struct mic_bootparam);
+		i < MIC_DP_SIZE - mic_total_desc_size(argp);
+		i += mic_total_desc_size(devp)) {
+		devp = vpdev->hw_ops->get_dp(vpdev) + i;
+		if (devp->type == 0 || devp->type == -1) {
+			slot_found = true;
+			break;
+		}
+	}
+	if (!slot_found) {
+		ret =  -EINVAL;
+		dev_err(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, ret);
+		goto exit;
+	}
+	/*
+	 * Save off the type before doing the memcpy. Type will be set in the
+	 * end after completing all initialization for the new device.
+	 */
+	*type = argp->type;
+	argp->type = 0;
+	memcpy(devp, argp, mic_desc_size(argp));
+
+	*devpage = devp;
+exit:
+	return ret;
+}
+
+static void vop_init_device_ctrl(struct vop_vdev *vdev,
+				 struct mic_device_desc *devpage)
+{
+	struct mic_device_ctrl *dc;
+
+	dc = (void *)devpage + mic_aligned_desc_size(devpage);
+
+	dc->config_change = 0;
+	dc->guest_ack = 0;
+	dc->vdev_reset = 0;
+	dc->host_ack = 0;
+	dc->used_address_updated = 0;
+	dc->c2h_vdev_db = -1;
+	dc->h2c_vdev_db = -1;
+	vdev->dc = dc;
+}
+
+static int vop_virtio_add_device(struct vop_vdev *vdev,
+				 struct mic_device_desc *argp)
+{
+	struct vop_info *vi = vdev->vi;
+	struct vop_device *vpdev = vi->vpdev;
+	struct mic_device_desc *dd = NULL;
+	struct mic_vqconfig *vqconfig;
+	int vr_size, i, j, ret;
+	u8 type = 0;
+	s8 db = -1;
+	char irqname[16];
+	struct mic_bootparam *bootparam;
+	u16 num;
+	dma_addr_t vr_addr;
+
+	bootparam = vpdev->hw_ops->get_dp(vpdev);
+	init_waitqueue_head(&vdev->waitq);
+	INIT_LIST_HEAD(&vdev->list);
+	vdev->vpdev = vpdev;
+
+	ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
+	if (ret) {
+		dev_err(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, ret);
+		return ret;
+	}
+
+	vop_init_device_ctrl(vdev, dd);
+
+	vdev->dd = dd;
+	vdev->virtio_id = type;
+	vqconfig = mic_vq_config(dd);
+	INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
+
+	for (i = 0; i < dd->num_vq; i++) {
+		struct vop_vringh *vvr = &vdev->vvr[i];
+		struct mic_vring *vr = &vdev->vvr[i].vring;
+
+		num = le16_to_cpu(vqconfig[i].num);
+		mutex_init(&vvr->vr_mutex);
+		vr_size = PAGE_ALIGN(round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4) +
+			sizeof(struct _mic_vring_info));
+		vr->va = (void *)
+			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					 get_order(vr_size));
+		if (!vr->va) {
+			ret = -ENOMEM;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			goto err;
+		}
+		vr->len = vr_size;
+		vr->info = vr->va + round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4);
+		vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
+		vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
+					 DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&vpdev->dev, vr_addr)) {
+			free_pages((unsigned long)vr->va, get_order(vr_size));
+			ret = -ENOMEM;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			goto err;
+		}
+		vqconfig[i].address = cpu_to_le64(vr_addr);
+
+		vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
+		ret = vringh_init_kern(&vvr->vrh,
+				       *(u32 *)mic_vq_features(vdev->dd),
+				       num, false, vr->vr.desc, vr->vr.avail,
+				       vr->vr.used);
+		if (ret) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			goto err;
+		}
+		vringh_kiov_init(&vvr->riov, NULL, 0);
+		vringh_kiov_init(&vvr->wiov, NULL, 0);
+		vvr->head = USHRT_MAX;
+		vvr->vdev = vdev;
+		vvr->vrh.notify = _vop_notify;
+		dev_dbg(&vpdev->dev,
+			"%s %d index %d va %p info %p vr_size 0x%x\n",
+			__func__, __LINE__, i, vr->va, vr->info, vr_size);
+		vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
+					get_order(VOP_INT_DMA_BUF_SIZE));
+		vvr->buf_da = dma_map_single(&vpdev->dev,
+					  vvr->buf, VOP_INT_DMA_BUF_SIZE,
+					  DMA_BIDIRECTIONAL);
+	}
+
+	snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
+		 vdev->virtio_id);
+	vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
+	vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
+			_vop_virtio_intr_handler, irqname, vdev,
+			vdev->virtio_db);
+	if (IS_ERR(vdev->virtio_cookie)) {
+		ret = PTR_ERR(vdev->virtio_cookie);
+		dev_dbg(&vpdev->dev, "request irq failed\n");
+		goto err;
+	}
+
+	vdev->dc->c2h_vdev_db = vdev->virtio_db;
+
+	/*
+	 * Order the type update with previous stores. This write barrier
+	 * is paired with the corresponding read barrier before the uncached
+	 * system memory read of the type, on the card while scanning the
+	 * device page.
+	 */
+	smp_wmb();
+	dd->type = type;
+	argp->type = type;
+
+	if (bootparam) {
+		db = bootparam->h2c_config_db;
+		if (db != -1)
+			vpdev->hw_ops->send_intr(vpdev, db);
+	}
+	dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
+	return 0;
+err:
+	vqconfig = mic_vq_config(dd);
+	for (j = 0; j < i; j++) {
+		struct vop_vringh *vvr = &vdev->vvr[j];
+
+		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
+				 vvr->vring.len, DMA_BIDIRECTIONAL);
+		free_pages((unsigned long)vvr->vring.va,
+			   get_order(vvr->vring.len));
+	}
+	return ret;
+}
+
+static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
+			   struct vop_device *vpdev)
+{
+	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+	s8 db;
+	int ret, retry;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
+	db = bootparam->h2c_config_db;
+	if (db != -1)
+		vpdev->hw_ops->send_intr(vpdev, db);
+	else
+		goto done;
+	for (retry = 15; retry--;) {
+		ret = wait_event_timeout(wake, devp->guest_ack,
+					 msecs_to_jiffies(1000));
+		if (ret)
+			break;
+	}
+done:
+	devp->config_change = 0;
+	devp->guest_ack = 0;
+}
+
+static void vop_virtio_del_device(struct vop_vdev *vdev)
+{
+	struct vop_info *vi = vdev->vi;
+	struct vop_device *vpdev = vdev->vpdev;
+	int i;
+	struct mic_vqconfig *vqconfig;
+	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+
+	if (!bootparam)
+		goto skip_hot_remove;
+	vop_dev_remove(vi, vdev->dc, vpdev);
+skip_hot_remove:
+	vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
+	flush_work(&vdev->virtio_bh_work);
+	vqconfig = mic_vq_config(vdev->dd);
+	for (i = 0; i < vdev->dd->num_vq; i++) {
+		struct vop_vringh *vvr = &vdev->vvr[i];
+
+		dma_unmap_single(&vpdev->dev,
+				 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
+				 DMA_BIDIRECTIONAL);
+		free_pages((unsigned long)vvr->buf,
+			   get_order(VOP_INT_DMA_BUF_SIZE));
+		vringh_kiov_cleanup(&vvr->riov);
+		vringh_kiov_cleanup(&vvr->wiov);
+		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
+				 vvr->vring.len, DMA_BIDIRECTIONAL);
+		free_pages((unsigned long)vvr->vring.va,
+			   get_order(vvr->vring.len));
+	}
+	/*
+	 * Order the type update with previous stores. This write barrier
+	 * is paired with the corresponding read barrier before the uncached
+	 * system memory read of the type, on the card while scanning the
+	 * device page.
+	 */
+	smp_wmb();
+	vdev->dd->type = -1;
+}
+
+/*
+ * vop_sync_dma - Wrapper for synchronous DMAs.
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @dst - destination DMA address.
+ * @src - source DMA address.
+ * @len - size of the transfer.
+ *
+ * Return DMA_SUCCESS on success
+ */
+static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
+			size_t len)
+{
+	int err = 0;
+	struct dma_device *ddev;
+	struct dma_async_tx_descriptor *tx;
+	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
+	struct dma_chan *vop_ch = vi->dma_ch;
+
+	if (!vop_ch) {
+		err = -EBUSY;
+		goto error;
+	}
+	ddev = vop_ch->device;
+	tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
+		DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		goto error;
+	} else {
+		dma_cookie_t cookie;
+
+		cookie = tx->tx_submit(tx);
+		if (dma_submit_error(cookie)) {
+			err = -ENOMEM;
+			goto error;
+		}
+		dma_async_issue_pending(vop_ch);
+		err = dma_sync_wait(vop_ch, cookie);
+	}
+error:
+	if (err)
+		dev_err(&vi->vpdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+	return err;
+}
+
+#define VOP_USE_DMA true
+
+/*
+ * Initiates the copies across the PCIe bus from card memory to a user
+ * space buffer. When transfers are done using DMA, source/destination
+ * addresses and transfer length must follow the alignment requirements of
+ * the MIC DMA engine.
+ */
+static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
+				   size_t len, u64 daddr, size_t dlen,
+				   int vr_idx)
+{
+	struct vop_device *vpdev = vdev->vpdev;
+	void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
+	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
+	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
+	size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
+	bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
+	size_t dma_offset, partlen;
+	int err;
+
+	if (!VOP_USE_DMA) {
+		if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
+			err = -EFAULT;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		vdev->in_bytes += len;
+		err = 0;
+		goto err;
+	}
+
+	dma_offset = daddr - round_down(daddr, dma_alignment);
+	daddr -= dma_offset;
+	len += dma_offset;
+	/*
+	 * X100 uses DMA addresses as seen by the card so adding
+	 * the aperture base is not required for DMA. However x200
+	 * requires DMA addresses to be an offset into the bar so
+	 * add the aperture base for x200.
+	 */
+	if (x200)
+		daddr += vpdev->aper->pa;
+	while (len) {
+		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
+		err = vop_sync_dma(vdev, vvr->buf_da, daddr,
+				   ALIGN(partlen, dma_alignment));
+		if (err) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		if (copy_to_user(ubuf, vvr->buf + dma_offset,
+				 partlen - dma_offset)) {
+			err = -EFAULT;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		daddr += partlen;
+		ubuf += partlen;
+		dbuf += partlen;
+		vdev->in_bytes_dma += partlen;
+		vdev->in_bytes += partlen;
+		len -= partlen;
+		dma_offset = 0;
+	}
+	err = 0;
+err:
+	vpdev->hw_ops->iounmap(vpdev, dbuf);
+	dev_dbg(vop_dev(vdev),
+		"%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
+		__func__, ubuf, dbuf, len, vr_idx);
+	return err;
+}
+
+/*
+ * Initiates copies across the PCIe bus from a user space buffer to card
+ * memory. When transfers are done using DMA, source/destination addresses
+ * and transfer length must follow the alignment requirements of the MIC
+ * DMA engine.
+ */
+static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
+				     size_t len, u64 daddr, size_t dlen,
+				     int vr_idx)
+{
+	struct vop_device *vpdev = vdev->vpdev;
+	void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
+	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
+	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
+	size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
+	bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
+	size_t partlen;
+	bool dma = VOP_USE_DMA;
+	int err = 0;
+	size_t offset = 0;
+
+	if (daddr & (dma_alignment - 1)) {
+		vdev->tx_dst_unaligned += len;
+		dma = false;
+	} else if (ALIGN(len, dma_alignment) > dlen) {
+		vdev->tx_len_unaligned += len;
+		dma = false;
+	}
+
+	if (!dma)
+		goto memcpy;
+
+	/*
+	 * X100 uses DMA addresses as seen by the card so adding
+	 * the aperture base is not required for DMA. However x200
+	 * requires DMA addresses to be an offset into the bar so
+	 * add the aperture base for x200.
+	 */
+	if (x200)
+		daddr += vpdev->aper->pa;
+	while (len) {
+		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
+
+		if (copy_from_user(vvr->buf, ubuf, partlen)) {
+			err = -EFAULT;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		err = vop_sync_dma(vdev, daddr, vvr->buf_da,
+				   ALIGN(partlen, dma_alignment));
+		if (err) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		daddr += partlen;
+		ubuf += partlen;
+		dbuf += partlen;
+		vdev->out_bytes_dma += partlen;
+		vdev->out_bytes += partlen;
+		len -= partlen;
+	}
+memcpy:
+	/*
+	 * We are copying to IO below and should ideally use something
+	 * like copy_from_user_toio(..) if it existed.
+	 */
+	while (len) {
+		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
+
+		if (copy_from_user(vvr->buf, ubuf + offset, partlen)) {
+			err = -EFAULT;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		memcpy_toio(dbuf + offset, vvr->buf, partlen);
+		offset += partlen;
+		vdev->out_bytes += partlen;
+		len -= partlen;
+	}
+	err = 0;
+err:
+	vpdev->hw_ops->iounmap(vpdev, dbuf);
+	dev_dbg(vop_dev(vdev),
+		"%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
+		__func__, ubuf, dbuf, len, vr_idx);
+	return err;
+}
+
+#define MIC_VRINGH_READ true
+
+/* Determine the total number of bytes consumed in a VRINGH KIOV */
+static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
+{
+	int i;
+	u32 total = iov->consumed;
+
+	for (i = 0; i < iov->i; i++)
+		total += iov->iov[i].iov_len;
+	return total;
+}
+
+/*
+ * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
+ * This API is heavily based on the vringh_iov_xfer(..) implementation
+ * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
+ * and vringh_iov_push_kern(..) directly is because there is no
+ * way to override the VRINGH xfer(..) routines as of v3.10.
+ */
+static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
+			   void __user *ubuf, size_t len, bool read, int vr_idx,
+			   size_t *out_len)
+{
+	int ret = 0;
+	size_t partlen, tot_len = 0;
+
+	while (len && iov->i < iov->used) {
+		struct kvec *kiov = &iov->iov[iov->i];
+
+		partlen = min(kiov->iov_len, len);
+		if (read)
+			ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
+						      (u64)kiov->iov_base,
+						      kiov->iov_len,
+						      vr_idx);
+		else
+			ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
+							(u64)kiov->iov_base,
+							kiov->iov_len,
+							vr_idx);
+		if (ret) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			break;
+		}
+		len -= partlen;
+		ubuf += partlen;
+		tot_len += partlen;
+		iov->consumed += partlen;
+		kiov->iov_len -= partlen;
+		kiov->iov_base += partlen;
+		if (!kiov->iov_len) {
+			/* Fix up old iov element then increment. */
+			kiov->iov_len = iov->consumed;
+			kiov->iov_base -= iov->consumed;
+
+			iov->consumed = 0;
+			iov->i++;
+		}
+	}
+	*out_len = tot_len;
+	return ret;
+}
+
+/*
+ * Use the standard VRINGH infrastructure in the kernel to fetch new
+ * descriptors, initiate the copies and update the used ring.
+ */
+static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
+{
+	int ret = 0;
+	u32 iovcnt = copy->iovcnt;
+	struct iovec iov;
+	struct iovec __user *u_iov = copy->iov;
+	void __user *ubuf = NULL;
+	struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
+	struct vringh_kiov *riov = &vvr->riov;
+	struct vringh_kiov *wiov = &vvr->wiov;
+	struct vringh *vrh = &vvr->vrh;
+	u16 *head = &vvr->head;
+	struct mic_vring *vr = &vvr->vring;
+	size_t len = 0, out_len;
+
+	copy->out_len = 0;
+	/* Fetch a new IOVEC if all previous elements have been processed */
+	if (riov->i == riov->used && wiov->i == wiov->used) {
+		ret = vringh_getdesc_kern(vrh, riov, wiov,
+					  head, GFP_KERNEL);
+		/* Check if there are available descriptors */
+		if (ret <= 0)
+			return ret;
+	}
+	while (iovcnt) {
+		if (!len) {
+			/* Copy over a new iovec from user space. */
+			ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
+			if (ret) {
+				ret = -EINVAL;
+				dev_err(vop_dev(vdev), "%s %d err %d\n",
+					__func__, __LINE__, ret);
+				break;
+			}
+			len = iov.iov_len;
+			ubuf = iov.iov_base;
+		}
+		/* Issue all the read descriptors first */
+		ret = vop_vringh_copy(vdev, riov, ubuf, len,
+				      MIC_VRINGH_READ, copy->vr_idx, &out_len);
+		if (ret) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			break;
+		}
+		len -= out_len;
+		ubuf += out_len;
+		copy->out_len += out_len;
+		/* Issue the write descriptors next */
+		ret = vop_vringh_copy(vdev, wiov, ubuf, len,
+				      !MIC_VRINGH_READ, copy->vr_idx, &out_len);
+		if (ret) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			break;
+		}
+		len -= out_len;
+		ubuf += out_len;
+		copy->out_len += out_len;
+		if (!len) {
+			/* One user space iovec is now completed */
+			iovcnt--;
+			u_iov++;
+		}
+		/* Exit loop if all elements in KIOVs have been processed. */
+		if (riov->i == riov->used && wiov->i == wiov->used)
+			break;
+	}
+	/*
+	 * Update the used ring if a descriptor was available and some data was
+	 * copied in/out and the user asked for a used ring update.
+	 */
+	if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
+		u32 total = 0;
+
+		/* Determine the total data consumed */
+		total += vop_vringh_iov_consumed(riov);
+		total += vop_vringh_iov_consumed(wiov);
+		vringh_complete_kern(vrh, *head, total);
+		*head = USHRT_MAX;
+		if (vringh_need_notify_kern(vrh) > 0)
+			vringh_notify(vrh);
+		vringh_kiov_cleanup(riov);
+		vringh_kiov_cleanup(wiov);
+		/* Update avail idx for user space */
+		vr->info->avail_idx = vrh->last_avail_idx;
+	}
+	return ret;
+}
+
+static inline int vop_verify_copy_args(struct vop_vdev *vdev,
+				       struct mic_copy_desc *copy)
+{
+	if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
+		return -EINVAL;
+	return 0;
+}
+
+/* Copy a specified number of virtio descriptors in a chain */
+static int vop_virtio_copy_desc(struct vop_vdev *vdev,
+				struct mic_copy_desc *copy)
+{
+	int err;
+	struct vop_vringh *vvr;
+
+	err = vop_verify_copy_args(vdev, copy);
+	if (err)
+		return err;
+
+	vvr = &vdev->vvr[copy->vr_idx];
+	mutex_lock(&vvr->vr_mutex);
+	if (!vop_vdevup(vdev)) {
+		err = -ENODEV;
+		dev_err(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto err;
+	}
+	err = _vop_virtio_copy(vdev, copy);
+	if (err) {
+		dev_err(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, err);
+	}
+err:
+	mutex_unlock(&vvr->vr_mutex);
+	return err;
+}
+
+static int vop_open(struct inode *inode, struct file *f)
+{
+	struct vop_vdev *vdev;
+	struct vop_info *vi = container_of(f->private_data,
+		struct vop_info, miscdev);
+
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev)
+		return -ENOMEM;
+	vdev->vi = vi;
+	mutex_init(&vdev->vdev_mutex);
+	f->private_data = vdev;
+	init_completion(&vdev->destroy);
+	complete(&vdev->destroy);
+	return 0;
+}
+
+static int vop_release(struct inode *inode, struct file *f)
+{
+	struct vop_vdev *vdev = f->private_data, *vdev_tmp;
+	struct vop_info *vi = vdev->vi;
+	struct list_head *pos, *tmp;
+	bool found = false;
+
+	mutex_lock(&vdev->vdev_mutex);
+	if (vdev->deleted)
+		goto unlock;
+	mutex_lock(&vi->vop_mutex);
+	list_for_each_safe(pos, tmp, &vi->vdev_list) {
+		vdev_tmp = list_entry(pos, struct vop_vdev, list);
+		if (vdev == vdev_tmp) {
+			vop_virtio_del_device(vdev);
+			list_del(pos);
+			found = true;
+			break;
+		}
+	}
+	mutex_unlock(&vi->vop_mutex);
+unlock:
+	mutex_unlock(&vdev->vdev_mutex);
+	if (!found)
+		wait_for_completion(&vdev->destroy);
+	f->private_data = NULL;
+	kfree(vdev);
+	return 0;
+}
+
+static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	struct vop_vdev *vdev = f->private_data;
+	struct vop_info *vi = vdev->vi;
+	void __user *argp = (void __user *)arg;
+	int ret;
+
+	switch (cmd) {
+	case MIC_VIRTIO_ADD_DEVICE:
+	{
+		struct mic_device_desc dd, *dd_config;
+
+		if (copy_from_user(&dd, argp, sizeof(dd)))
+			return -EFAULT;
+
+		if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
+		    dd.num_vq > MIC_MAX_VRINGS)
+			return -EINVAL;
+
+		dd_config = memdup_user(argp, mic_desc_size(&dd));
+		if (IS_ERR(dd_config))
+			return PTR_ERR(dd_config);
+
+		/* Ensure desc has not changed between the two reads */
+		if (memcmp(&dd, dd_config, sizeof(dd))) {
+			ret = -EINVAL;
+			goto free_ret;
+		}
+		mutex_lock(&vdev->vdev_mutex);
+		mutex_lock(&vi->vop_mutex);
+		ret = vop_virtio_add_device(vdev, dd_config);
+		if (ret)
+			goto unlock_ret;
+		list_add_tail(&vdev->list, &vi->vdev_list);
+unlock_ret:
+		mutex_unlock(&vi->vop_mutex);
+		mutex_unlock(&vdev->vdev_mutex);
+free_ret:
+		kfree(dd_config);
+		return ret;
+	}
+	case MIC_VIRTIO_COPY_DESC:
+	{
+		struct mic_copy_desc copy;
+
+		mutex_lock(&vdev->vdev_mutex);
+		ret = vop_vdev_inited(vdev);
+		if (ret)
+			goto _unlock_ret;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			ret = -EFAULT;
+			goto _unlock_ret;
+		}
+
+		ret = vop_virtio_copy_desc(vdev, &copy);
+		if (ret < 0)
+			goto _unlock_ret;
+		if (copy_to_user(
+			&((struct mic_copy_desc __user *)argp)->out_len,
+			&copy.out_len, sizeof(copy.out_len)))
+			ret = -EFAULT;
+_unlock_ret:
+		mutex_unlock(&vdev->vdev_mutex);
+		return ret;
+	}
+	case MIC_VIRTIO_CONFIG_CHANGE:
+	{
+		void *buf;
+
+		mutex_lock(&vdev->vdev_mutex);
+		ret = vop_vdev_inited(vdev);
+		if (ret)
+			goto __unlock_ret;
+		buf = memdup_user(argp, vdev->dd->config_len);
+		if (IS_ERR(buf)) {
+			ret = PTR_ERR(buf);
+			goto __unlock_ret;
+		}
+		ret = vop_virtio_config_change(vdev, buf);
+		kfree(buf);
+__unlock_ret:
+		mutex_unlock(&vdev->vdev_mutex);
+		return ret;
+	}
+	default:
+		return -ENOIOCTLCMD;
+	};
+	return 0;
+}
+
+/*
+ * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
+ * not when previously enqueued buffers may be available. This means that
+ * in the card->host (TX) path, when userspace is unblocked by poll it
+ * must drain all available descriptors or it can stall.
+ */
+static __poll_t vop_poll(struct file *f, poll_table *wait)
+{
+	struct vop_vdev *vdev = f->private_data;
+	__poll_t mask = 0;
+
+	mutex_lock(&vdev->vdev_mutex);
+	if (vop_vdev_inited(vdev)) {
+		mask = EPOLLERR;
+		goto done;
+	}
+	poll_wait(f, &vdev->waitq, wait);
+	if (vop_vdev_inited(vdev)) {
+		mask = EPOLLERR;
+	} else if (vdev->poll_wake) {
+		vdev->poll_wake = 0;
+		mask = EPOLLIN | EPOLLOUT;
+	}
+done:
+	mutex_unlock(&vdev->vdev_mutex);
+	return mask;
+}
+
+static inline int
+vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
+		 unsigned long *size, unsigned long *pa)
+{
+	struct vop_device *vpdev = vdev->vpdev;
+	unsigned long start = MIC_DP_SIZE;
+	int i;
+
+	/*
+	 * MMAP interface is as follows:
+	 * offset				region
+	 * 0x0					virtio device_page
+	 * 0x1000				first vring
+	 * 0x1000 + size of 1st vring		second vring
+	 * ....
+	 */
+	if (!offset) {
+		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
+		*size = MIC_DP_SIZE;
+		return 0;
+	}
+
+	for (i = 0; i < vdev->dd->num_vq; i++) {
+		struct vop_vringh *vvr = &vdev->vvr[i];
+
+		if (offset == start) {
+			*pa = virt_to_phys(vvr->vring.va);
+			*size = vvr->vring.len;
+			return 0;
+		}
+		start += vvr->vring.len;
+	}
+	return -1;
+}
+
+/*
+ * Maps the device page and virtio rings to user space for readonly access.
+ */
+static int vop_mmap(struct file *f, struct vm_area_struct *vma)
+{
+	struct vop_vdev *vdev = f->private_data;
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
+	int i, err;
+
+	err = vop_vdev_inited(vdev);
+	if (err)
+		goto ret;
+	if (vma->vm_flags & VM_WRITE) {
+		err = -EACCES;
+		goto ret;
+	}
+	while (size_rem) {
+		i = vop_query_offset(vdev, offset, &size, &pa);
+		if (i < 0) {
+			err = -EINVAL;
+			goto ret;
+		}
+		err = remap_pfn_range(vma, vma->vm_start + offset,
+				      pa >> PAGE_SHIFT, size,
+				      vma->vm_page_prot);
+		if (err)
+			goto ret;
+		size_rem -= size;
+		offset += size;
+	}
+ret:
+	return err;
+}
+
+static const struct file_operations vop_fops = {
+	.open = vop_open,
+	.release = vop_release,
+	.unlocked_ioctl = vop_ioctl,
+	.poll = vop_poll,
+	.mmap = vop_mmap,
+	.owner = THIS_MODULE,
+};
+
+int vop_host_init(struct vop_info *vi)
+{
+	int rc;
+	struct miscdevice *mdev;
+	struct vop_device *vpdev = vi->vpdev;
+
+	INIT_LIST_HEAD(&vi->vdev_list);
+	vi->dma_ch = vpdev->dma_ch;
+	mdev = &vi->miscdev;
+	mdev->minor = MISC_DYNAMIC_MINOR;
+	snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
+	mdev->name = vi->name;
+	mdev->fops = &vop_fops;
+	mdev->parent = &vpdev->dev;
+
+	rc = misc_register(mdev);
+	if (rc)
+		dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
+	return rc;
+}
+
+void vop_host_uninit(struct vop_info *vi)
+{
+	struct list_head *pos, *tmp;
+	struct vop_vdev *vdev;
+
+	mutex_lock(&vi->vop_mutex);
+	vop_virtio_reset_devices(vi);
+	list_for_each_safe(pos, tmp, &vi->vdev_list) {
+		vdev = list_entry(pos, struct vop_vdev, list);
+		list_del(pos);
+		reinit_completion(&vdev->destroy);
+		mutex_unlock(&vi->vop_mutex);
+		mutex_lock(&vdev->vdev_mutex);
+		vop_virtio_del_device(vdev);
+		vdev->deleted = true;
+		mutex_unlock(&vdev->vdev_mutex);
+		complete(&vdev->destroy);
+		mutex_lock(&vi->vop_mutex);
+	}
+	mutex_unlock(&vi->vop_mutex);
+	misc_deregister(&vi->miscdev);
+}