From 76cb841cb886eef6b3bee341a2266c76578724ad Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Mon, 6 May 2024 03:02:30 +0200
Subject: Adding upstream version 4.19.249.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 drivers/misc/Kconfig                            |  530 ++++
 drivers/misc/Makefile                           |   60 +
 drivers/misc/ad525x_dpot-i2c.c                  |  119 +
 drivers/misc/ad525x_dpot-spi.c                  |  146 +
 drivers/misc/ad525x_dpot.c                      |  765 ++++++
 drivers/misc/ad525x_dpot.h                      |  215 ++
 drivers/misc/altera-stapl/Kconfig               |    9 +
 drivers/misc/altera-stapl/Makefile              |    3 +
 drivers/misc/altera-stapl/altera-comp.c         |  142 +
 drivers/misc/altera-stapl/altera-exprt.h        |   33 +
 drivers/misc/altera-stapl/altera-jtag.c         | 1021 +++++++
 drivers/misc/altera-stapl/altera-jtag.h         |  113 +
 drivers/misc/altera-stapl/altera-lpt.c          |   70 +
 drivers/misc/altera-stapl/altera.c              | 2536 ++++++++++++++++++
 drivers/misc/apds9802als.c                      |  322 +++
 drivers/misc/apds990x.c                         | 1300 +++++++++
 drivers/misc/aspeed-lpc-ctrl.c                  |  300 +++
 drivers/misc/aspeed-lpc-snoop.c                 |  375 +++
 drivers/misc/atmel-ssc.c                        |  285 ++
 drivers/misc/atmel_tclib.c                      |  198 ++
 drivers/misc/bh1770glc.c                        | 1410 ++++++++++
 drivers/misc/c2port/Kconfig                     |   34 +
 drivers/misc/c2port/Makefile                    |    3 +
 drivers/misc/c2port/c2port-duramar2150.c        |  159 ++
 drivers/misc/c2port/core.c                      | 1003 +++++++
 drivers/misc/cardreader/Kconfig                 |   20 +
 drivers/misc/cardreader/Makefile                |    4 +
 drivers/misc/cardreader/rtl8411.c               |  508 ++++
 drivers/misc/cardreader/rts5209.c               |  277 ++
 drivers/misc/cardreader/rts5227.c               |  380 +++
 drivers/misc/cardreader/rts5229.c               |  273 ++
 drivers/misc/cardreader/rts5249.c               |  742 +++++
 drivers/misc/cardreader/rts5260.c               |  748 ++++++
 drivers/misc/cardreader/rts5260.h               |   45 +
 drivers/misc/cardreader/rtsx_pcr.c              | 1698 ++++++++++++
 drivers/misc/cardreader/rtsx_pcr.h              |  113 +
 drivers/misc/cardreader/rtsx_usb.c              |  792 ++++++
 drivers/misc/cb710/Kconfig                      |   25 +
 drivers/misc/cb710/Makefile                     |    6 +
 drivers/misc/cb710/core.c                       |  346 +++
 drivers/misc/cb710/debug.c                      |  118 +
 drivers/misc/cb710/sgbuf2.c                     |  146 +
 drivers/misc/cs5535-mfgpt.c                     |  383 +++
 drivers/misc/cxl/Kconfig                        |   35 +
 drivers/misc/cxl/Makefile                       |   14 +
 drivers/misc/cxl/api.c                          |  540 ++++
 drivers/misc/cxl/base.c                         |  129 +
 drivers/misc/cxl/context.c                      |  366 +++
 drivers/misc/cxl/cxl.h                          | 1139 ++++++++
 drivers/misc/cxl/cxllib.c                       |  277 ++
 drivers/misc/cxl/debugfs.c                      |  152 ++
 drivers/misc/cxl/fault.c                        |  360 +++
 drivers/misc/cxl/file.c                         |  703 +++++
 drivers/misc/cxl/flash.c                        |  543 ++++
 drivers/misc/cxl/guest.c                        | 1202 +++++++++
 drivers/misc/cxl/hcalls.c                       |  647 +++++
 drivers/misc/cxl/hcalls.h                       |  204 ++
 drivers/misc/cxl/irq.c                          |  453 ++++
 drivers/misc/cxl/main.c                         |  382 +++
 drivers/misc/cxl/native.c                       | 1600 +++++++++++
 drivers/misc/cxl/of.c                           |  511 ++++
 drivers/misc/cxl/pci.c                          | 2104 +++++++++++++++
 drivers/misc/cxl/sysfs.c                        |  774 ++++++
 drivers/misc/cxl/trace.c                        |   13 +
 drivers/misc/cxl/trace.h                        |  695 +++++
 drivers/misc/cxl/vphb.c                         |  333 +++
 drivers/misc/ds1682.c                           |  267 ++
 drivers/misc/dummy-irq.c                        |   64 +
 drivers/misc/echo/Kconfig                       |    9 +
 drivers/misc/echo/Makefile                      |    1 +
 drivers/misc/echo/echo.c                        |  601 +++++
 drivers/misc/echo/echo.h                        |  187 ++
 drivers/misc/echo/fir.h                         |  166 ++
 drivers/misc/echo/oslec.h                       |   94 +
 drivers/misc/eeprom/Kconfig                     |  114 +
 drivers/misc/eeprom/Makefile                    |    9 +
 drivers/misc/eeprom/at24.c                      |  805 ++++++
 drivers/misc/eeprom/at25.c                      |  414 +++
 drivers/misc/eeprom/digsy_mtc_eeprom.c          |  106 +
 drivers/misc/eeprom/eeprom.c                    |  222 ++
 drivers/misc/eeprom/eeprom_93cx6.c              |  381 +++
 drivers/misc/eeprom/eeprom_93xx46.c             |  541 ++++
 drivers/misc/eeprom/idt_89hpesx.c               | 1620 +++++++++++
 drivers/misc/eeprom/max6875.c                   |  210 ++
 drivers/misc/enclosure.c                        |  695 +++++
 drivers/misc/fsa9480.c                          |  550 ++++
 drivers/misc/genwqe/Kconfig                     |   19 +
 drivers/misc/genwqe/Makefile                    |    7 +
 drivers/misc/genwqe/card_base.c                 | 1412 ++++++++++
 drivers/misc/genwqe/card_base.h                 |  585 ++++
 drivers/misc/genwqe/card_ddcb.c                 | 1410 ++++++++++
 drivers/misc/genwqe/card_ddcb.h                 |  188 ++
 drivers/misc/genwqe/card_debugfs.c              |  504 ++++
 drivers/misc/genwqe/card_dev.c                  | 1412 ++++++++++
 drivers/misc/genwqe/card_sysfs.c                |  303 +++
 drivers/misc/genwqe/card_utils.c                | 1061 ++++++++
 drivers/misc/genwqe/genwqe_driver.h             |   77 +
 drivers/misc/hmc6352.c                          |  157 ++
 drivers/misc/hpilo.c                            |  918 +++++++
 drivers/misc/hpilo.h                            |  211 ++
 drivers/misc/ibmasm/Makefile                    |   16 +
 drivers/misc/ibmasm/command.c                   |  187 ++
 drivers/misc/ibmasm/dot_command.c               |  152 ++
 drivers/misc/ibmasm/dot_command.h               |   78 +
 drivers/misc/ibmasm/event.c                     |  177 ++
 drivers/misc/ibmasm/heartbeat.c                 |  101 +
 drivers/misc/ibmasm/i2o.h                       |   77 +
 drivers/misc/ibmasm/ibmasm.h                    |  223 ++
 drivers/misc/ibmasm/ibmasmfs.c                  |  608 +++++
 drivers/misc/ibmasm/lowlevel.c                  |   85 +
 drivers/misc/ibmasm/lowlevel.h                  |  137 +
 drivers/misc/ibmasm/module.c                    |  238 ++
 drivers/misc/ibmasm/r_heartbeat.c               |   99 +
 drivers/misc/ibmasm/remote.c                    |  282 ++
 drivers/misc/ibmasm/remote.h                    |  270 ++
 drivers/misc/ibmasm/uart.c                      |   72 +
 drivers/misc/ibmvmc.c                           | 2421 +++++++++++++++++
 drivers/misc/ibmvmc.h                           |  209 ++
 drivers/misc/ics932s401.c                       |  495 ++++
 drivers/misc/ioc4.c                             |  500 ++++
 drivers/misc/isl29003.c                         |  487 ++++
 drivers/misc/isl29020.c                         |  238 ++
 drivers/misc/kgdbts.c                           | 1193 +++++++++
 drivers/misc/lattice-ecp3-config.c              |  249 ++
 drivers/misc/lis3lv02d/Kconfig                  |   37 +
 drivers/misc/lis3lv02d/Makefile                 |    7 +
 drivers/misc/lis3lv02d/lis3lv02d.c              | 1274 +++++++++
 drivers/misc/lis3lv02d/lis3lv02d.h              |  332 +++
 drivers/misc/lis3lv02d/lis3lv02d_i2c.c          |  289 ++
 drivers/misc/lis3lv02d/lis3lv02d_spi.c          |  153 ++
 drivers/misc/lkdtm/Makefile                     |   19 +
 drivers/misc/lkdtm/bugs.c                       |  257 ++
 drivers/misc/lkdtm/core.c                       |  507 ++++
 drivers/misc/lkdtm/heap.c                       |  148 +
 drivers/misc/lkdtm/lkdtm.h                      |   88 +
 drivers/misc/lkdtm/perms.c                      |  221 ++
 drivers/misc/lkdtm/refcount.c                   |  401 +++
 drivers/misc/lkdtm/rodata.c                     |   11 +
 drivers/misc/lkdtm/usercopy.c                   |  350 +++
 drivers/misc/mei/Kconfig                        |   45 +
 drivers/misc/mei/Makefile                       |   25 +
 drivers/misc/mei/bus-fixup.c                    |  505 ++++
 drivers/misc/mei/bus.c                          | 1153 ++++++++
 drivers/misc/mei/client.c                       | 1844 +++++++++++++
 drivers/misc/mei/client.h                       |  236 ++
 drivers/misc/mei/debugfs.c                      |  288 ++
 drivers/misc/mei/hbm.c                          | 1287 +++++++++
 drivers/misc/mei/hbm.h                          |   62 +
 drivers/misc/mei/hw-me-regs.h                   |  245 ++
 drivers/misc/mei/hw-me.c                        | 1500 +++++++++++
 drivers/misc/mei/hw-me.h                        |  115 +
 drivers/misc/mei/hw-txe-regs.h                  |  294 ++
 drivers/misc/mei/hw-txe.c                       | 1267 +++++++++
 drivers/misc/mei/hw-txe.h                       |   75 +
 drivers/misc/mei/hw.h                           |  515 ++++
 drivers/misc/mei/init.c                         |  406 +++
 drivers/misc/mei/interrupt.c                    |  535 ++++
 drivers/misc/mei/main.c                         | 1022 +++++++
 drivers/misc/mei/mei-trace.c                    |   26 +
 drivers/misc/mei/mei-trace.h                    |   93 +
 drivers/misc/mei/mei_dev.h                      |  756 ++++++
 drivers/misc/mei/pci-me.c                       |  534 ++++
 drivers/misc/mei/pci-txe.c                      |  422 +++
 drivers/misc/mic/Kconfig                        |  156 ++
 drivers/misc/mic/Makefile                       |   12 +
 drivers/misc/mic/bus/Makefile                   |    8 +
 drivers/misc/mic/bus/cosm_bus.c                 |  141 +
 drivers/misc/mic/bus/cosm_bus.h                 |  136 +
 drivers/misc/mic/bus/mic_bus.c                  |  204 ++
 drivers/misc/mic/bus/scif_bus.c                 |  209 ++
 drivers/misc/mic/bus/scif_bus.h                 |  133 +
 drivers/misc/mic/bus/vop_bus.c                  |  205 ++
 drivers/misc/mic/bus/vop_bus.h                  |  140 +
 drivers/misc/mic/card/Makefile                  |   11 +
 drivers/misc/mic/card/mic_debugfs.c             |  130 +
 drivers/misc/mic/card/mic_device.c              |  429 +++
 drivers/misc/mic/card/mic_device.h              |  149 ++
 drivers/misc/mic/card/mic_x100.c                |  359 +++
 drivers/misc/mic/card/mic_x100.h                |   49 +
 drivers/misc/mic/common/mic_dev.h               |   67 +
 drivers/misc/mic/cosm/Makefile                  |   11 +
 drivers/misc/mic/cosm/cosm_debugfs.c            |  156 ++
 drivers/misc/mic/cosm/cosm_main.c               |  393 +++
 drivers/misc/mic/cosm/cosm_main.h               |   73 +
 drivers/misc/mic/cosm/cosm_scif_server.c        |  411 +++
 drivers/misc/mic/cosm/cosm_sysfs.c              |  461 ++++
 drivers/misc/mic/cosm_client/Makefile           |    7 +
 drivers/misc/mic/cosm_client/cosm_scif_client.c |  281 ++
 drivers/misc/mic/host/Makefile                  |   12 +
 drivers/misc/mic/host/mic_boot.c                |  599 +++++
 drivers/misc/mic/host/mic_debugfs.c             |  216 ++
 drivers/misc/mic/host/mic_device.h              |  169 ++
 drivers/misc/mic/host/mic_intr.c                |  645 +++++
 drivers/misc/mic/host/mic_intr.h                |  149 ++
 drivers/misc/mic/host/mic_main.c                |  347 +++
 drivers/misc/mic/host/mic_smpt.c                |  439 +++
 drivers/misc/mic/host/mic_smpt.h                |   99 +
 drivers/misc/mic/host/mic_x100.c                |  584 ++++
 drivers/misc/mic/host/mic_x100.h                |   98 +
 drivers/misc/mic/scif/Makefile                  |   21 +
 drivers/misc/mic/scif/scif_api.c                | 1496 +++++++++++
 drivers/misc/mic/scif/scif_debugfs.c            |  162 ++
 drivers/misc/mic/scif/scif_dma.c                | 1960 ++++++++++++++
 drivers/misc/mic/scif/scif_epd.c                |  357 +++
 drivers/misc/mic/scif/scif_epd.h                |  210 ++
 drivers/misc/mic/scif/scif_fd.c                 |  471 ++++
 drivers/misc/mic/scif/scif_fence.c              |  772 ++++++
 drivers/misc/mic/scif/scif_main.c               |  359 +++
 drivers/misc/mic/scif/scif_main.h               |  283 ++
 drivers/misc/mic/scif/scif_map.h                |  136 +
 drivers/misc/mic/scif/scif_mmap.c               |  699 +++++
 drivers/misc/mic/scif/scif_nm.c                 |  237 ++
 drivers/misc/mic/scif/scif_nodeqp.c             | 1354 ++++++++++
 drivers/misc/mic/scif/scif_nodeqp.h             |  221 ++
 drivers/misc/mic/scif/scif_peer_bus.c           |  183 ++
 drivers/misc/mic/scif/scif_peer_bus.h           |   31 +
 drivers/misc/mic/scif/scif_ports.c              |  124 +
 drivers/misc/mic/scif/scif_rb.c                 |  249 ++
 drivers/misc/mic/scif/scif_rb.h                 |  100 +
 drivers/misc/mic/scif/scif_rma.c                | 1775 ++++++++++++
 drivers/misc/mic/scif/scif_rma.h                |  464 ++++
 drivers/misc/mic/scif/scif_rma_list.c           |  291 ++
 drivers/misc/mic/scif/scif_rma_list.h           |   57 +
 drivers/misc/mic/vop/Makefile                   |    9 +
 drivers/misc/mic/vop/vop_debugfs.c              |  232 ++
 drivers/misc/mic/vop/vop_main.c                 |  766 ++++++
 drivers/misc/mic/vop/vop_main.h                 |  170 ++
 drivers/misc/mic/vop/vop_vringh.c               | 1169 ++++++++
 drivers/misc/ocxl/Kconfig                       |   31 +
 drivers/misc/ocxl/Makefile                      |   11 +
 drivers/misc/ocxl/afu_irq.c                     |  202 ++
 drivers/misc/ocxl/config.c                      |  725 +++++
 drivers/misc/ocxl/context.c                     |  282 ++
 drivers/misc/ocxl/file.c                        |  541 ++++
 drivers/misc/ocxl/link.c                        |  690 +++++
 drivers/misc/ocxl/main.c                        |   33 +
 drivers/misc/ocxl/ocxl_internal.h               |  132 +
 drivers/misc/ocxl/pasid.c                       |  107 +
 drivers/misc/ocxl/pci.c                         |  585 ++++
 drivers/misc/ocxl/sysfs.c                       |  141 +
 drivers/misc/ocxl/trace.c                       |    6 +
 drivers/misc/ocxl/trace.h                       |  182 ++
 drivers/misc/pch_phub.c                         |  915 +++++++
 drivers/misc/pci_endpoint_test.c                |  831 ++++++
 drivers/misc/phantom.c                          |  571 ++++
 drivers/misc/pti.c                              |  988 +++++++
 drivers/misc/qcom-coincell.c                    |  153 ++
 drivers/misc/sgi-gru/Makefile                   |    5 +
 drivers/misc/sgi-gru/gru.h                      |   78 +
 drivers/misc/sgi-gru/gru_instructions.h         |  736 +++++
 drivers/misc/sgi-gru/grufault.c                 |  907 +++++++
 drivers/misc/sgi-gru/grufile.c                  |  623 +++++
 drivers/misc/sgi-gru/gruhandles.c               |  210 ++
 drivers/misc/sgi-gru/gruhandles.h               |  530 ++++
 drivers/misc/sgi-gru/grukdump.c                 |  236 ++
 drivers/misc/sgi-gru/grukservices.c             | 1171 ++++++++
 drivers/misc/sgi-gru/grukservices.h             |  214 ++
 drivers/misc/sgi-gru/grulib.h                   |  153 ++
 drivers/misc/sgi-gru/grumain.c                  |  976 +++++++
 drivers/misc/sgi-gru/gruprocfs.c                |  324 +++
 drivers/misc/sgi-gru/grutables.h                |  679 +++++
 drivers/misc/sgi-gru/grutlbpurge.c              |  370 +++
 drivers/misc/sgi-xp/Makefile                    |   20 +
 drivers/misc/sgi-xp/xp.h                        |  368 +++
 drivers/misc/sgi-xp/xp_main.c                   |  264 ++
 drivers/misc/sgi-xp/xp_nofault.S                |   35 +
 drivers/misc/sgi-xp/xp_sn2.c                    |  190 ++
 drivers/misc/sgi-xp/xp_uv.c                     |  171 ++
 drivers/misc/sgi-xp/xpc.h                       | 1004 +++++++
 drivers/misc/sgi-xp/xpc_channel.c               | 1011 +++++++
 drivers/misc/sgi-xp/xpc_main.c                  | 1373 ++++++++++
 drivers/misc/sgi-xp/xpc_partition.c             |  540 ++++
 drivers/misc/sgi-xp/xpc_sn2.c                   | 2459 +++++++++++++++++
 drivers/misc/sgi-xp/xpc_uv.c                    | 1813 +++++++++++++
 drivers/misc/sgi-xp/xpnet.c                     |  596 +++++
 drivers/misc/spear13xx_pcie_gadget.c            |  797 ++++++
 drivers/misc/sram-exec.c                        |  119 +
 drivers/misc/sram.c                             |  456 ++++
 drivers/misc/sram.h                             |   58 +
 drivers/misc/ti-st/Kconfig                      |   18 +
 drivers/misc/ti-st/Makefile                     |    6 +
 drivers/misc/ti-st/st_core.c                    |  922 +++++++
 drivers/misc/ti-st/st_kim.c                     |  868 ++++++
 drivers/misc/ti-st/st_ll.c                      |  169 ++
 drivers/misc/tifm_7xx1.c                        |  446 +++
 drivers/misc/tifm_core.c                        |  371 +++
 drivers/misc/tsl2550.c                          |  468 ++++
 drivers/misc/vexpress-syscfg.c                  |  287 ++
 drivers/misc/vmw_balloon.c                      | 1236 +++++++++
 drivers/misc/vmw_vmci/Kconfig                   |   16 +
 drivers/misc/vmw_vmci/Makefile                  |    4 +
 drivers/misc/vmw_vmci/vmci_context.c            | 1225 +++++++++
 drivers/misc/vmw_vmci/vmci_context.h            |  182 ++
 drivers/misc/vmw_vmci/vmci_datagram.c           |  502 ++++
 drivers/misc/vmw_vmci/vmci_datagram.h           |   52 +
 drivers/misc/vmw_vmci/vmci_doorbell.c           |  609 +++++
 drivers/misc/vmw_vmci/vmci_doorbell.h           |   51 +
 drivers/misc/vmw_vmci/vmci_driver.c             |  117 +
 drivers/misc/vmw_vmci/vmci_driver.h             |   57 +
 drivers/misc/vmw_vmci/vmci_event.c              |  225 ++
 drivers/misc/vmw_vmci/vmci_event.h              |   25 +
 drivers/misc/vmw_vmci/vmci_guest.c              |  736 +++++
 drivers/misc/vmw_vmci/vmci_handle_array.c       |  154 ++
 drivers/misc/vmw_vmci/vmci_handle_array.h       |   61 +
 drivers/misc/vmw_vmci/vmci_host.c               | 1036 +++++++
 drivers/misc/vmw_vmci/vmci_queue_pair.c         | 3272 +++++++++++++++++++++++
 drivers/misc/vmw_vmci/vmci_queue_pair.h         |  173 ++
 drivers/misc/vmw_vmci/vmci_resource.c           |  229 ++
 drivers/misc/vmw_vmci/vmci_resource.h           |   59 +
 drivers/misc/vmw_vmci/vmci_route.c              |  226 ++
 drivers/misc/vmw_vmci/vmci_route.h              |   30 +
 311 files changed, 133699 insertions(+)
 create mode 100644 drivers/misc/Kconfig
 create mode 100644 drivers/misc/Makefile
 create mode 100644 drivers/misc/ad525x_dpot-i2c.c
 create mode 100644 drivers/misc/ad525x_dpot-spi.c
 create mode 100644 drivers/misc/ad525x_dpot.c
 create mode 100644 drivers/misc/ad525x_dpot.h
 create mode 100644 drivers/misc/altera-stapl/Kconfig
 create mode 100644 drivers/misc/altera-stapl/Makefile
 create mode 100644 drivers/misc/altera-stapl/altera-comp.c
 create mode 100644 drivers/misc/altera-stapl/altera-exprt.h
 create mode 100644 drivers/misc/altera-stapl/altera-jtag.c
 create mode 100644 drivers/misc/altera-stapl/altera-jtag.h
 create mode 100644 drivers/misc/altera-stapl/altera-lpt.c
 create mode 100644 drivers/misc/altera-stapl/altera.c
 create mode 100644 drivers/misc/apds9802als.c
 create mode 100644 drivers/misc/apds990x.c
 create mode 100644 drivers/misc/aspeed-lpc-ctrl.c
 create mode 100644 drivers/misc/aspeed-lpc-snoop.c
 create mode 100644 drivers/misc/atmel-ssc.c
 create mode 100644 drivers/misc/atmel_tclib.c
 create mode 100644 drivers/misc/bh1770glc.c
 create mode 100644 drivers/misc/c2port/Kconfig
 create mode 100644 drivers/misc/c2port/Makefile
 create mode 100644 drivers/misc/c2port/c2port-duramar2150.c
 create mode 100644 drivers/misc/c2port/core.c
 create mode 100644 drivers/misc/cardreader/Kconfig
 create mode 100644 drivers/misc/cardreader/Makefile
 create mode 100644 drivers/misc/cardreader/rtl8411.c
 create mode 100644 drivers/misc/cardreader/rts5209.c
 create mode 100644 drivers/misc/cardreader/rts5227.c
 create mode 100644 drivers/misc/cardreader/rts5229.c
 create mode 100644 drivers/misc/cardreader/rts5249.c
 create mode 100644 drivers/misc/cardreader/rts5260.c
 create mode 100644 drivers/misc/cardreader/rts5260.h
 create mode 100644 drivers/misc/cardreader/rtsx_pcr.c
 create mode 100644 drivers/misc/cardreader/rtsx_pcr.h
 create mode 100644 drivers/misc/cardreader/rtsx_usb.c
 create mode 100644 drivers/misc/cb710/Kconfig
 create mode 100644 drivers/misc/cb710/Makefile
 create mode 100644 drivers/misc/cb710/core.c
 create mode 100644 drivers/misc/cb710/debug.c
 create mode 100644 drivers/misc/cb710/sgbuf2.c
 create mode 100644 drivers/misc/cs5535-mfgpt.c
 create mode 100644 drivers/misc/cxl/Kconfig
 create mode 100644 drivers/misc/cxl/Makefile
 create mode 100644 drivers/misc/cxl/api.c
 create mode 100644 drivers/misc/cxl/base.c
 create mode 100644 drivers/misc/cxl/context.c
 create mode 100644 drivers/misc/cxl/cxl.h
 create mode 100644 drivers/misc/cxl/cxllib.c
 create mode 100644 drivers/misc/cxl/debugfs.c
 create mode 100644 drivers/misc/cxl/fault.c
 create mode 100644 drivers/misc/cxl/file.c
 create mode 100644 drivers/misc/cxl/flash.c
 create mode 100644 drivers/misc/cxl/guest.c
 create mode 100644 drivers/misc/cxl/hcalls.c
 create mode 100644 drivers/misc/cxl/hcalls.h
 create mode 100644 drivers/misc/cxl/irq.c
 create mode 100644 drivers/misc/cxl/main.c
 create mode 100644 drivers/misc/cxl/native.c
 create mode 100644 drivers/misc/cxl/of.c
 create mode 100644 drivers/misc/cxl/pci.c
 create mode 100644 drivers/misc/cxl/sysfs.c
 create mode 100644 drivers/misc/cxl/trace.c
 create mode 100644 drivers/misc/cxl/trace.h
 create mode 100644 drivers/misc/cxl/vphb.c
 create mode 100644 drivers/misc/ds1682.c
 create mode 100644 drivers/misc/dummy-irq.c
 create mode 100644 drivers/misc/echo/Kconfig
 create mode 100644 drivers/misc/echo/Makefile
 create mode 100644 drivers/misc/echo/echo.c
 create mode 100644 drivers/misc/echo/echo.h
 create mode 100644 drivers/misc/echo/fir.h
 create mode 100644 drivers/misc/echo/oslec.h
 create mode 100644 drivers/misc/eeprom/Kconfig
 create mode 100644 drivers/misc/eeprom/Makefile
 create mode 100644 drivers/misc/eeprom/at24.c
 create mode 100644 drivers/misc/eeprom/at25.c
 create mode 100644 drivers/misc/eeprom/digsy_mtc_eeprom.c
 create mode 100644 drivers/misc/eeprom/eeprom.c
 create mode 100644 drivers/misc/eeprom/eeprom_93cx6.c
 create mode 100644 drivers/misc/eeprom/eeprom_93xx46.c
 create mode 100644 drivers/misc/eeprom/idt_89hpesx.c
 create mode 100644 drivers/misc/eeprom/max6875.c
 create mode 100644 drivers/misc/enclosure.c
 create mode 100644 drivers/misc/fsa9480.c
 create mode 100644 drivers/misc/genwqe/Kconfig
 create mode 100644 drivers/misc/genwqe/Makefile
 create mode 100644 drivers/misc/genwqe/card_base.c
 create mode 100644 drivers/misc/genwqe/card_base.h
 create mode 100644 drivers/misc/genwqe/card_ddcb.c
 create mode 100644 drivers/misc/genwqe/card_ddcb.h
 create mode 100644 drivers/misc/genwqe/card_debugfs.c
 create mode 100644 drivers/misc/genwqe/card_dev.c
 create mode 100644 drivers/misc/genwqe/card_sysfs.c
 create mode 100644 drivers/misc/genwqe/card_utils.c
 create mode 100644 drivers/misc/genwqe/genwqe_driver.h
 create mode 100644 drivers/misc/hmc6352.c
 create mode 100644 drivers/misc/hpilo.c
 create mode 100644 drivers/misc/hpilo.h
 create mode 100644 drivers/misc/ibmasm/Makefile
 create mode 100644 drivers/misc/ibmasm/command.c
 create mode 100644 drivers/misc/ibmasm/dot_command.c
 create mode 100644 drivers/misc/ibmasm/dot_command.h
 create mode 100644 drivers/misc/ibmasm/event.c
 create mode 100644 drivers/misc/ibmasm/heartbeat.c
 create mode 100644 drivers/misc/ibmasm/i2o.h
 create mode 100644 drivers/misc/ibmasm/ibmasm.h
 create mode 100644 drivers/misc/ibmasm/ibmasmfs.c
 create mode 100644 drivers/misc/ibmasm/lowlevel.c
 create mode 100644 drivers/misc/ibmasm/lowlevel.h
 create mode 100644 drivers/misc/ibmasm/module.c
 create mode 100644 drivers/misc/ibmasm/r_heartbeat.c
 create mode 100644 drivers/misc/ibmasm/remote.c
 create mode 100644 drivers/misc/ibmasm/remote.h
 create mode 100644 drivers/misc/ibmasm/uart.c
 create mode 100644 drivers/misc/ibmvmc.c
 create mode 100644 drivers/misc/ibmvmc.h
 create mode 100644 drivers/misc/ics932s401.c
 create mode 100644 drivers/misc/ioc4.c
 create mode 100644 drivers/misc/isl29003.c
 create mode 100644 drivers/misc/isl29020.c
 create mode 100644 drivers/misc/kgdbts.c
 create mode 100644 drivers/misc/lattice-ecp3-config.c
 create mode 100644 drivers/misc/lis3lv02d/Kconfig
 create mode 100644 drivers/misc/lis3lv02d/Makefile
 create mode 100644 drivers/misc/lis3lv02d/lis3lv02d.c
 create mode 100644 drivers/misc/lis3lv02d/lis3lv02d.h
 create mode 100644 drivers/misc/lis3lv02d/lis3lv02d_i2c.c
 create mode 100644 drivers/misc/lis3lv02d/lis3lv02d_spi.c
 create mode 100644 drivers/misc/lkdtm/Makefile
 create mode 100644 drivers/misc/lkdtm/bugs.c
 create mode 100644 drivers/misc/lkdtm/core.c
 create mode 100644 drivers/misc/lkdtm/heap.c
 create mode 100644 drivers/misc/lkdtm/lkdtm.h
 create mode 100644 drivers/misc/lkdtm/perms.c
 create mode 100644 drivers/misc/lkdtm/refcount.c
 create mode 100644 drivers/misc/lkdtm/rodata.c
 create mode 100644 drivers/misc/lkdtm/usercopy.c
 create mode 100644 drivers/misc/mei/Kconfig
 create mode 100644 drivers/misc/mei/Makefile
 create mode 100644 drivers/misc/mei/bus-fixup.c
 create mode 100644 drivers/misc/mei/bus.c
 create mode 100644 drivers/misc/mei/client.c
 create mode 100644 drivers/misc/mei/client.h
 create mode 100644 drivers/misc/mei/debugfs.c
 create mode 100644 drivers/misc/mei/hbm.c
 create mode 100644 drivers/misc/mei/hbm.h
 create mode 100644 drivers/misc/mei/hw-me-regs.h
 create mode 100644 drivers/misc/mei/hw-me.c
 create mode 100644 drivers/misc/mei/hw-me.h
 create mode 100644 drivers/misc/mei/hw-txe-regs.h
 create mode 100644 drivers/misc/mei/hw-txe.c
 create mode 100644 drivers/misc/mei/hw-txe.h
 create mode 100644 drivers/misc/mei/hw.h
 create mode 100644 drivers/misc/mei/init.c
 create mode 100644 drivers/misc/mei/interrupt.c
 create mode 100644 drivers/misc/mei/main.c
 create mode 100644 drivers/misc/mei/mei-trace.c
 create mode 100644 drivers/misc/mei/mei-trace.h
 create mode 100644 drivers/misc/mei/mei_dev.h
 create mode 100644 drivers/misc/mei/pci-me.c
 create mode 100644 drivers/misc/mei/pci-txe.c
 create mode 100644 drivers/misc/mic/Kconfig
 create mode 100644 drivers/misc/mic/Makefile
 create mode 100644 drivers/misc/mic/bus/Makefile
 create mode 100644 drivers/misc/mic/bus/cosm_bus.c
 create mode 100644 drivers/misc/mic/bus/cosm_bus.h
 create mode 100644 drivers/misc/mic/bus/mic_bus.c
 create mode 100644 drivers/misc/mic/bus/scif_bus.c
 create mode 100644 drivers/misc/mic/bus/scif_bus.h
 create mode 100644 drivers/misc/mic/bus/vop_bus.c
 create mode 100644 drivers/misc/mic/bus/vop_bus.h
 create mode 100644 drivers/misc/mic/card/Makefile
 create mode 100644 drivers/misc/mic/card/mic_debugfs.c
 create mode 100644 drivers/misc/mic/card/mic_device.c
 create mode 100644 drivers/misc/mic/card/mic_device.h
 create mode 100644 drivers/misc/mic/card/mic_x100.c
 create mode 100644 drivers/misc/mic/card/mic_x100.h
 create mode 100644 drivers/misc/mic/common/mic_dev.h
 create mode 100644 drivers/misc/mic/cosm/Makefile
 create mode 100644 drivers/misc/mic/cosm/cosm_debugfs.c
 create mode 100644 drivers/misc/mic/cosm/cosm_main.c
 create mode 100644 drivers/misc/mic/cosm/cosm_main.h
 create mode 100644 drivers/misc/mic/cosm/cosm_scif_server.c
 create mode 100644 drivers/misc/mic/cosm/cosm_sysfs.c
 create mode 100644 drivers/misc/mic/cosm_client/Makefile
 create mode 100644 drivers/misc/mic/cosm_client/cosm_scif_client.c
 create mode 100644 drivers/misc/mic/host/Makefile
 create mode 100644 drivers/misc/mic/host/mic_boot.c
 create mode 100644 drivers/misc/mic/host/mic_debugfs.c
 create mode 100644 drivers/misc/mic/host/mic_device.h
 create mode 100644 drivers/misc/mic/host/mic_intr.c
 create mode 100644 drivers/misc/mic/host/mic_intr.h
 create mode 100644 drivers/misc/mic/host/mic_main.c
 create mode 100644 drivers/misc/mic/host/mic_smpt.c
 create mode 100644 drivers/misc/mic/host/mic_smpt.h
 create mode 100644 drivers/misc/mic/host/mic_x100.c
 create mode 100644 drivers/misc/mic/host/mic_x100.h
 create mode 100644 drivers/misc/mic/scif/Makefile
 create mode 100644 drivers/misc/mic/scif/scif_api.c
 create mode 100644 drivers/misc/mic/scif/scif_debugfs.c
 create mode 100644 drivers/misc/mic/scif/scif_dma.c
 create mode 100644 drivers/misc/mic/scif/scif_epd.c
 create mode 100644 drivers/misc/mic/scif/scif_epd.h
 create mode 100644 drivers/misc/mic/scif/scif_fd.c
 create mode 100644 drivers/misc/mic/scif/scif_fence.c
 create mode 100644 drivers/misc/mic/scif/scif_main.c
 create mode 100644 drivers/misc/mic/scif/scif_main.h
 create mode 100644 drivers/misc/mic/scif/scif_map.h
 create mode 100644 drivers/misc/mic/scif/scif_mmap.c
 create mode 100644 drivers/misc/mic/scif/scif_nm.c
 create mode 100644 drivers/misc/mic/scif/scif_nodeqp.c
 create mode 100644 drivers/misc/mic/scif/scif_nodeqp.h
 create mode 100644 drivers/misc/mic/scif/scif_peer_bus.c
 create mode 100644 drivers/misc/mic/scif/scif_peer_bus.h
 create mode 100644 drivers/misc/mic/scif/scif_ports.c
 create mode 100644 drivers/misc/mic/scif/scif_rb.c
 create mode 100644 drivers/misc/mic/scif/scif_rb.h
 create mode 100644 drivers/misc/mic/scif/scif_rma.c
 create mode 100644 drivers/misc/mic/scif/scif_rma.h
 create mode 100644 drivers/misc/mic/scif/scif_rma_list.c
 create mode 100644 drivers/misc/mic/scif/scif_rma_list.h
 create mode 100644 drivers/misc/mic/vop/Makefile
 create mode 100644 drivers/misc/mic/vop/vop_debugfs.c
 create mode 100644 drivers/misc/mic/vop/vop_main.c
 create mode 100644 drivers/misc/mic/vop/vop_main.h
 create mode 100644 drivers/misc/mic/vop/vop_vringh.c
 create mode 100644 drivers/misc/ocxl/Kconfig
 create mode 100644 drivers/misc/ocxl/Makefile
 create mode 100644 drivers/misc/ocxl/afu_irq.c
 create mode 100644 drivers/misc/ocxl/config.c
 create mode 100644 drivers/misc/ocxl/context.c
 create mode 100644 drivers/misc/ocxl/file.c
 create mode 100644 drivers/misc/ocxl/link.c
 create mode 100644 drivers/misc/ocxl/main.c
 create mode 100644 drivers/misc/ocxl/ocxl_internal.h
 create mode 100644 drivers/misc/ocxl/pasid.c
 create mode 100644 drivers/misc/ocxl/pci.c
 create mode 100644 drivers/misc/ocxl/sysfs.c
 create mode 100644 drivers/misc/ocxl/trace.c
 create mode 100644 drivers/misc/ocxl/trace.h
 create mode 100644 drivers/misc/pch_phub.c
 create mode 100644 drivers/misc/pci_endpoint_test.c
 create mode 100644 drivers/misc/phantom.c
 create mode 100644 drivers/misc/pti.c
 create mode 100644 drivers/misc/qcom-coincell.c
 create mode 100644 drivers/misc/sgi-gru/Makefile
 create mode 100644 drivers/misc/sgi-gru/gru.h
 create mode 100644 drivers/misc/sgi-gru/gru_instructions.h
 create mode 100644 drivers/misc/sgi-gru/grufault.c
 create mode 100644 drivers/misc/sgi-gru/grufile.c
 create mode 100644 drivers/misc/sgi-gru/gruhandles.c
 create mode 100644 drivers/misc/sgi-gru/gruhandles.h
 create mode 100644 drivers/misc/sgi-gru/grukdump.c
 create mode 100644 drivers/misc/sgi-gru/grukservices.c
 create mode 100644 drivers/misc/sgi-gru/grukservices.h
 create mode 100644 drivers/misc/sgi-gru/grulib.h
 create mode 100644 drivers/misc/sgi-gru/grumain.c
 create mode 100644 drivers/misc/sgi-gru/gruprocfs.c
 create mode 100644 drivers/misc/sgi-gru/grutables.h
 create mode 100644 drivers/misc/sgi-gru/grutlbpurge.c
 create mode 100644 drivers/misc/sgi-xp/Makefile
 create mode 100644 drivers/misc/sgi-xp/xp.h
 create mode 100644 drivers/misc/sgi-xp/xp_main.c
 create mode 100644 drivers/misc/sgi-xp/xp_nofault.S
 create mode 100644 drivers/misc/sgi-xp/xp_sn2.c
 create mode 100644 drivers/misc/sgi-xp/xp_uv.c
 create mode 100644 drivers/misc/sgi-xp/xpc.h
 create mode 100644 drivers/misc/sgi-xp/xpc_channel.c
 create mode 100644 drivers/misc/sgi-xp/xpc_main.c
 create mode 100644 drivers/misc/sgi-xp/xpc_partition.c
 create mode 100644 drivers/misc/sgi-xp/xpc_sn2.c
 create mode 100644 drivers/misc/sgi-xp/xpc_uv.c
 create mode 100644 drivers/misc/sgi-xp/xpnet.c
 create mode 100644 drivers/misc/spear13xx_pcie_gadget.c
 create mode 100644 drivers/misc/sram-exec.c
 create mode 100644 drivers/misc/sram.c
 create mode 100644 drivers/misc/sram.h
 create mode 100644 drivers/misc/ti-st/Kconfig
 create mode 100644 drivers/misc/ti-st/Makefile
 create mode 100644 drivers/misc/ti-st/st_core.c
 create mode 100644 drivers/misc/ti-st/st_kim.c
 create mode 100644 drivers/misc/ti-st/st_ll.c
 create mode 100644 drivers/misc/tifm_7xx1.c
 create mode 100644 drivers/misc/tifm_core.c
 create mode 100644 drivers/misc/tsl2550.c
 create mode 100644 drivers/misc/vexpress-syscfg.c
 create mode 100644 drivers/misc/vmw_balloon.c
 create mode 100644 drivers/misc/vmw_vmci/Kconfig
 create mode 100644 drivers/misc/vmw_vmci/Makefile
 create mode 100644 drivers/misc/vmw_vmci/vmci_context.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_context.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_datagram.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_datagram.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_doorbell.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_doorbell.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_driver.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_driver.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_event.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_event.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_guest.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_handle_array.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_handle_array.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_host.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_queue_pair.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_queue_pair.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_resource.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_resource.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_route.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_route.h

(limited to 'drivers/misc')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
new file mode 100644
index 000000000..3726eacdf
--- /dev/null
+++ b/drivers/misc/Kconfig
@@ -0,0 +1,530 @@
+#
+# Misc strange devices
+#
+
+menu "Misc devices"
+
+config SENSORS_LIS3LV02D
+	tristate
+	depends on INPUT
+	select INPUT_POLLDEV
+	default n
+
+config AD525X_DPOT
+	tristate "Analog Devices Digital Potentiometers"
+	depends on (I2C || SPI) && SYSFS
+	help
+	  If you say yes here, you get support for the Analog Devices
+	  AD5258, AD5259, AD5251, AD5252, AD5253, AD5254, AD5255
+	  AD5160, AD5161, AD5162, AD5165, AD5200, AD5201, AD5203,
+	  AD5204, AD5206, AD5207, AD5231, AD5232, AD5233, AD5235,
+	  AD5260, AD5262, AD5263, AD5290, AD5291, AD5292, AD5293,
+	  AD7376, AD8400, AD8402, AD8403, ADN2850, AD5241, AD5242,
+	  AD5243, AD5245, AD5246, AD5247, AD5248, AD5280, AD5282,
+	  ADN2860, AD5273, AD5171, AD5170, AD5172, AD5173, AD5270,
+	  AD5271, AD5272, AD5274
+	  digital potentiometer chips.
+
+	  See Documentation/misc-devices/ad525x_dpot.txt for the
+	  userspace interface.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called ad525x_dpot.
+
+config AD525X_DPOT_I2C
+	tristate "support I2C bus connection"
+	depends on AD525X_DPOT && I2C
+	help
+	  Say Y here if you have a digital potentiometers hooked to an I2C bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad525x_dpot-i2c.
+
+config AD525X_DPOT_SPI
+	tristate "support SPI bus connection"
+	depends on AD525X_DPOT && SPI_MASTER
+	help
+	  Say Y here if you have a digital potentiometers hooked to an SPI bus.
+
+	  If unsure, say N (but it's safe to say "Y").
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad525x_dpot-spi.
+
+config ATMEL_TCLIB
+	bool "Atmel AT32/AT91 Timer/Counter Library"
+	depends on ARCH_AT91
+	help
+	  Select this if you want a library to allocate the Timer/Counter
+	  blocks found on many Atmel processors.  This facilitates using
+	  these blocks by different drivers despite processor differences.
+
+config ATMEL_TCB_CLKSRC
+	bool "TC Block Clocksource"
+	depends on ATMEL_TCLIB
+	default y
+	help
+	  Select this to get a high precision clocksource based on a
+	  TC block with a 5+ MHz base clock rate.  Two timer channels
+	  are combined to make a single 32-bit timer.
+
+	  When GENERIC_CLOCKEVENTS is defined, the third timer channel
+	  may be used as a clock event device supporting oneshot mode
+	  (delays of up to two seconds) based on the 32 KiHz clock.
+
+config ATMEL_TCB_CLKSRC_BLOCK
+	int
+	depends on ATMEL_TCB_CLKSRC
+	default 0
+	range 0 1
+	help
+	  Some chips provide more than one TC block, so you have the
+	  choice of which one to use for the clock framework.  The other
+	  TC can be used for other purposes, such as PWM generation and
+	  interval timing.
+
+config DUMMY_IRQ
+	tristate "Dummy IRQ handler"
+	default n
+	---help---
+	  This module accepts a single 'irq' parameter, which it should register for.
+	  The sole purpose of this module is to help with debugging of systems on
+	  which spurious IRQs would happen on disabled IRQ vector.
+
+config IBM_ASM
+	tristate "Device driver for IBM RSA service processor"
+	depends on X86 && PCI && INPUT
+	depends on SERIAL_8250 || SERIAL_8250=n
+	---help---
+	  This option enables device driver support for in-band access to the
+	  IBM RSA (Condor) service processor in eServer xSeries systems.
+	  The ibmasm device driver allows user space application to access
+	  ASM (Advanced Systems Management) functions on the service
+	  processor. The driver is meant to be used in conjunction with
+	  a user space API.
+	  The ibmasm driver also enables the OS to use the UART on the
+	  service processor board as a regular serial port. To make use of
+	  this feature serial driver support (CONFIG_SERIAL_8250) must be
+	  enabled.
+
+	  WARNING: This software may not be supported or function
+	  correctly on your IBM server. Please consult the IBM ServerProven
+	  website <http://www-03.ibm.com/systems/info/x86servers/serverproven/compat/us/>
+	  for information on the specific driver level and support statement
+	  for your IBM server.
+
+config IBMVMC
+	tristate "IBM Virtual Management Channel support"
+	depends on PPC_PSERIES
+	help
+	  This is the IBM POWER Virtual Management Channel
+
+	  This driver is to be used for the POWER Virtual
+	  Management Channel virtual adapter on the PowerVM
+	  platform. It provides both request/response and
+	  async message support through the /dev/ibmvmc node.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ibmvmc.
+
+config PHANTOM
+	tristate "Sensable PHANToM (PCI)"
+	depends on PCI
+	help
+	  Say Y here if you want to build a driver for Sensable PHANToM device.
+
+	  This driver is only for PCI PHANToMs.
+
+	  If you choose to build module, its name will be phantom. If unsure,
+	  say N here.
+
+config INTEL_MID_PTI
+	tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
+	depends on PCI && TTY && (X86_INTEL_MID || COMPILE_TEST)
+	default n
+	help
+	  The PTI (Parallel Trace Interface) driver directs
+	  trace data routed from various parts in the system out
+	  through an Intel Penwell PTI port and out of the mobile
+	  device for analysis with a debugging tool (Lauterbach or Fido).
+
+	  You should select this driver if the target kernel is meant for
+	  an Intel Atom (non-netbook) mobile device containing a MIPI
+	  P1149.7 standard implementation.
+
+config SGI_IOC4
+	tristate "SGI IOC4 Base IO support"
+	depends on PCI
+	---help---
+	  This option enables basic support for the IOC4 chip on certain
+	  SGI IO controller cards (IO9, IO10, and PCI-RT).  This option
+	  does not enable any specific functions on such a card, but provides
+	  necessary infrastructure for other drivers to utilize.
+
+	  If you have an SGI Altix with an IOC4-based card say Y.
+	  Otherwise say N.
+
+config TIFM_CORE
+	tristate "TI Flash Media interface support"
+	depends on PCI
+	help
+	  If you want support for Texas Instruments(R) Flash Media adapters
+	  you should select this option and then also choose an appropriate
+	  host adapter, such as 'TI Flash Media PCI74xx/PCI76xx host adapter
+	  support', if you have a TI PCI74xx compatible card reader, for
+	  example.
+	  You will also have to select some flash card format drivers. MMC/SD
+	  cards are supported via 'MMC/SD Card support: TI Flash Media MMC/SD
+	  Interface support (MMC_TIFM_SD)'.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called tifm_core.
+
+config TIFM_7XX1
+	tristate "TI Flash Media PCI74xx/PCI76xx host adapter support"
+	depends on PCI && TIFM_CORE
+	default TIFM_CORE
+	help
+	  This option enables support for Texas Instruments(R) PCI74xx and
+	  PCI76xx families of Flash Media adapters, found in many laptops.
+	  To make actual use of the device, you will have to select some
+	  flash card format drivers, as outlined in the TIFM_CORE Help.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called tifm_7xx1.
+
+config ICS932S401
+	tristate "Integrated Circuits ICS932S401"
+	depends on I2C
+	help
+	  If you say yes here you get support for the Integrated Circuits
+	  ICS932S401 clock control chips.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called ics932s401.
+
+config ATMEL_SSC
+	tristate "Device driver for Atmel SSC peripheral"
+	depends on HAS_IOMEM && (ARCH_AT91 || COMPILE_TEST)
+	---help---
+	  This option enables device driver support for Atmel Synchronized
+	  Serial Communication peripheral (SSC).
+
+	  The SSC peripheral supports a wide variety of serial frame based
+	  communications, i.e. I2S, SPI, etc.
+
+	  If unsure, say N.
+
+config ENCLOSURE_SERVICES
+	tristate "Enclosure Services"
+	default n
+	help
+	  Provides support for intelligent enclosures (bays which
+	  contain storage devices).  You also need either a host
+	  driver (SCSI/ATA) which supports enclosures
+	  or a SCSI enclosure device (SES) to use these services.
+
+config SGI_XP
+	tristate "Support communication between SGI SSIs"
+	depends on NET
+	depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
+	select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+	select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+	select SGI_GRU if X86_64 && SMP
+	---help---
+	  An SGI machine can be divided into multiple Single System
+	  Images which act independently of each other and have
+	  hardware based memory protection from the others.  Enabling
+	  this feature will allow for direct communication between SSIs
+	  based on a network adapter and DMA messaging.
+
+config CS5535_MFGPT
+	tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support"
+	depends on MFD_CS5535
+	default n
+	help
+	  This driver provides access to MFGPT functionality for other
+	  drivers that need timers.  MFGPTs are available in the CS5535 and
+	  CS5536 companion chips that are found in AMD Geode and several
+	  other platforms.  They have a better resolution and max interval
+	  than the generic PIT, and are suitable for use as high-res timers.
+	  You probably don't want to enable this manually; other drivers that
+	  make use of it should enable it.
+
+config CS5535_MFGPT_DEFAULT_IRQ
+	int
+	depends on CS5535_MFGPT
+	default 7
+	help
+	  MFGPTs on the CS5535 require an interrupt.  The selected IRQ
+	  can be overridden as a module option as well as by driver that
+	  use the cs5535_mfgpt_ API; however, different architectures might
+	  want to use a different IRQ by default.  This is here for
+	  architectures to set as necessary.
+
+config CS5535_CLOCK_EVENT_SRC
+	tristate "CS5535/CS5536 high-res timer (MFGPT) events"
+	depends on GENERIC_CLOCKEVENTS && CS5535_MFGPT
+	help
+	  This driver provides a clock event source based on the MFGPT
+	  timer(s) in the CS5535 and CS5536 companion chips.
+	  MFGPTs have a better resolution and max interval than the
+	  generic PIT, and are suitable for use as high-res timers.
+
+config HP_ILO
+	tristate "Channel interface driver for the HP iLO processor"
+	depends on PCI
+	default n
+	help
+	  The channel interface driver allows applications to communicate
+	  with iLO management processors present on HP ProLiant servers.
+	  Upon loading, the driver creates /dev/hpilo/dXccbN files, which
+	  can be used to gather data from the management processor, via
+	  read and write system calls.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called hpilo.
+
+config QCOM_COINCELL
+	tristate "Qualcomm coincell charger support"
+	depends on MFD_SPMI_PMIC || COMPILE_TEST
+	help
+	  This driver supports the coincell block found inside of
+	  Qualcomm PMICs.  The coincell charger provides a means to
+	  charge a coincell battery or backup capacitor which is used
+	  to maintain PMIC register and RTC state in the absence of
+	  external power.
+
+config SGI_GRU
+	tristate "SGI GRU driver"
+	depends on X86_UV && SMP
+	default n
+	select MMU_NOTIFIER
+	---help---
+	The GRU is a hardware resource located in the system chipset. The GRU
+	contains memory that can be mmapped into the user address space. This memory is
+	used to communicate with the GRU to perform functions such as load/store,
+	scatter/gather, bcopy, AMOs, etc.  The GRU is directly accessed by user
+	instructions using user virtual addresses. GRU instructions (ex., bcopy) use
+	user virtual addresses for operands.
+
+	If you are not running on a SGI UV system, say N.
+
+config SGI_GRU_DEBUG
+	bool  "SGI GRU driver debug"
+	depends on SGI_GRU
+	default n
+	---help---
+	This option enables additional debugging code for the SGI GRU driver.
+	If you are unsure, say N.
+
+config APDS9802ALS
+	tristate "Medfield Avago APDS9802 ALS Sensor module"
+	depends on I2C
+	help
+	  If you say yes here you get support for the ALS APDS9802 ambient
+	  light sensor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called apds9802als.
+
+config ISL29003
+	tristate "Intersil ISL29003 ambient light sensor"
+	depends on I2C && SYSFS
+	help
+	  If you say yes here you get support for the Intersil ISL29003
+	  ambient light sensor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called isl29003.
+
+config ISL29020
+	tristate "Intersil ISL29020 ambient light sensor"
+	depends on I2C
+	help
+	  If you say yes here you get support for the Intersil ISL29020
+	  ambient light sensor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called isl29020.
+
+config SENSORS_TSL2550
+	tristate "Taos TSL2550 ambient light sensor"
+	depends on I2C && SYSFS
+	help
+	  If you say yes here you get support for the Taos TSL2550
+	  ambient light sensor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called tsl2550.
+
+config SENSORS_BH1770
+         tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor"
+         depends on I2C
+         ---help---
+           Say Y here if you want to build a driver for BH1770GLC (ROHM) or
+	   SFH7770 (Osram) combined ambient light and proximity sensor chip.
+
+           To compile this driver as a module, choose M here: the
+           module will be called bh1770glc. If unsure, say N here.
+
+config SENSORS_APDS990X
+	 tristate "APDS990X combined als and proximity sensors"
+	 depends on I2C
+	 default n
+	 ---help---
+	   Say Y here if you want to build a driver for Avago APDS990x
+	   combined ambient light and proximity sensor chip.
+
+	   To compile this driver as a module, choose M here: the
+	   module will be called apds990x. If unsure, say N here.
+
+config HMC6352
+	tristate "Honeywell HMC6352 compass"
+	depends on I2C
+	help
+	  This driver provides support for the Honeywell HMC6352 compass,
+	  providing configuration and heading data via sysfs.
+
+config DS1682
+	tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm"
+	depends on I2C
+	help
+	  If you say yes here you get support for Dallas Semiconductor
+	  DS1682 Total Elapsed Time Recorder.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called ds1682.
+
+config SPEAR13XX_PCIE_GADGET
+	bool "PCIe gadget support for SPEAr13XX platform"
+	depends on ARCH_SPEAR13XX && BROKEN
+	default n
+	help
+	 This option enables gadget support for PCIe controller. If
+	 board file defines any controller as PCIe endpoint then a sysfs
+	 entry will be created for that controller. User can use these
+	 sysfs node to configure PCIe EP as per his requirements.
+
+config VMWARE_BALLOON
+	tristate "VMware Balloon Driver"
+	depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST
+	help
+	  This is VMware physical memory management driver which acts
+	  like a "balloon" that can be inflated to reclaim physical pages
+	  by reserving them in the guest and invalidating them in the
+	  monitor, freeing up the underlying machine pages so they can
+	  be allocated to other guests. The balloon can also be deflated
+	  to allow the guest to use more physical memory.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called vmw_balloon.
+
+config PCH_PHUB
+	tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) PHUB"
+	select GENERIC_NET_UTILS
+	depends on PCI && (X86_32 || MIPS || COMPILE_TEST)
+	help
+	  This driver is for PCH(Platform controller Hub) PHUB(Packet Hub) of
+	  Intel Topcliff which is an IOH(Input/Output Hub) for x86 embedded
+	  processor. The Topcliff has MAC address and Option ROM data in SROM.
+	  This driver can access MAC address and Option ROM data in SROM.
+
+	  This driver also can be used for LAPIS Semiconductor's IOH,
+	  ML7213/ML7223/ML7831.
+	  ML7213 which is for IVI(In-Vehicle Infotainment) use.
+	  ML7223 IOH is for MP(Media Phone) use.
+	  ML7831 IOH is for general purpose use.
+	  ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+	  ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called pch_phub.
+
+config USB_SWITCH_FSA9480
+	tristate "FSA9480 USB Switch"
+	depends on I2C
+	help
+	  The FSA9480 is a USB port accessory detector and switch.
+	  The FSA9480 is fully controlled using I2C and enables USB data,
+	  stereo and mono audio, video, microphone and UART data to use
+	  a common connector port.
+
+config LATTICE_ECP3_CONFIG
+	tristate "Lattice ECP3 FPGA bitstream configuration via SPI"
+	depends on SPI && SYSFS
+	select FW_LOADER
+	default	n
+	help
+	  This option enables support for bitstream configuration (programming
+	  or loading) of the Lattice ECP3 FPGA family via SPI.
+
+	  If unsure, say N.
+
+config SRAM
+	bool "Generic on-chip SRAM driver"
+	depends on HAS_IOMEM
+	select GENERIC_ALLOCATOR
+	select SRAM_EXEC if ARM
+	help
+	  This driver allows you to declare a memory region to be managed by
+	  the genalloc API. It is supposed to be used for small on-chip SRAM
+	  areas found on many SoCs.
+
+config SRAM_EXEC
+	bool
+
+config VEXPRESS_SYSCFG
+	bool "Versatile Express System Configuration driver"
+	depends on VEXPRESS_CONFIG
+	default y
+	help
+	  ARM Ltd. Versatile Express uses specialised platform configuration
+	  bus. System Configuration interface is one of the possible means
+	  of generating transactions on this bus.
+
+config ASPEED_LPC_CTRL
+	depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON
+	tristate "Aspeed ast2400/2500 HOST LPC to BMC bridge control"
+	---help---
+	  Control Aspeed ast2400/2500 HOST LPC to BMC mappings through
+	  ioctl()s, the driver also provides a read/write interface to a BMC ram
+	  region where the host LPC read/write region can be buffered.
+
+config ASPEED_LPC_SNOOP
+	tristate "Aspeed ast2500 HOST LPC snoop support"
+	depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON
+	help
+	  Provides a driver to control the LPC snoop interface which
+	  allows the BMC to listen on and save the data written by
+	  the host to an arbitrary LPC I/O port.
+
+config PCI_ENDPOINT_TEST
+	depends on PCI
+	select CRC32
+	tristate "PCI Endpoint Test driver"
+	---help---
+           Enable this configuration option to enable the host side test driver
+           for PCI Endpoint.
+
+config MISC_RTSX
+	tristate
+	default MISC_RTSX_PCI || MISC_RTSX_USB
+
+source "drivers/misc/c2port/Kconfig"
+source "drivers/misc/eeprom/Kconfig"
+source "drivers/misc/cb710/Kconfig"
+source "drivers/misc/ti-st/Kconfig"
+source "drivers/misc/lis3lv02d/Kconfig"
+source "drivers/misc/altera-stapl/Kconfig"
+source "drivers/misc/mei/Kconfig"
+source "drivers/misc/vmw_vmci/Kconfig"
+source "drivers/misc/mic/Kconfig"
+source "drivers/misc/genwqe/Kconfig"
+source "drivers/misc/echo/Kconfig"
+source "drivers/misc/cxl/Kconfig"
+source "drivers/misc/ocxl/Kconfig"
+source "drivers/misc/cardreader/Kconfig"
+endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
new file mode 100644
index 000000000..af22bbc3d
--- /dev/null
+++ b/drivers/misc/Makefile
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for misc devices that really don't fit anywhere else.
+#
+
+obj-$(CONFIG_IBM_ASM)		+= ibmasm/
+obj-$(CONFIG_IBMVMC)		+= ibmvmc.o
+obj-$(CONFIG_AD525X_DPOT)	+= ad525x_dpot.o
+obj-$(CONFIG_AD525X_DPOT_I2C)	+= ad525x_dpot-i2c.o
+obj-$(CONFIG_AD525X_DPOT_SPI)	+= ad525x_dpot-spi.o
+obj-$(CONFIG_INTEL_MID_PTI)	+= pti.o
+obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
+obj-$(CONFIG_ATMEL_TCLIB)	+= atmel_tclib.o
+obj-$(CONFIG_DUMMY_IRQ)		+= dummy-irq.o
+obj-$(CONFIG_ICS932S401)	+= ics932s401.o
+obj-$(CONFIG_LKDTM)		+= lkdtm/
+obj-$(CONFIG_TIFM_CORE)       	+= tifm_core.o
+obj-$(CONFIG_TIFM_7XX1)       	+= tifm_7xx1.o
+obj-$(CONFIG_PHANTOM)		+= phantom.o
+obj-$(CONFIG_QCOM_COINCELL)	+= qcom-coincell.o
+obj-$(CONFIG_SENSORS_BH1770)	+= bh1770glc.o
+obj-$(CONFIG_SENSORS_APDS990X)	+= apds990x.o
+obj-$(CONFIG_SGI_IOC4)		+= ioc4.o
+obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
+obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
+obj-$(CONFIG_SGI_XP)		+= sgi-xp/
+obj-$(CONFIG_SGI_GRU)		+= sgi-gru/
+obj-$(CONFIG_CS5535_MFGPT)	+= cs5535-mfgpt.o
+obj-$(CONFIG_HP_ILO)		+= hpilo.o
+obj-$(CONFIG_APDS9802ALS)	+= apds9802als.o
+obj-$(CONFIG_ISL29003)		+= isl29003.o
+obj-$(CONFIG_ISL29020)		+= isl29020.o
+obj-$(CONFIG_SENSORS_TSL2550)	+= tsl2550.o
+obj-$(CONFIG_DS1682)		+= ds1682.o
+obj-$(CONFIG_C2PORT)		+= c2port/
+obj-$(CONFIG_HMC6352)		+= hmc6352.o
+obj-y				+= eeprom/
+obj-y				+= cb710/
+obj-$(CONFIG_SPEAR13XX_PCIE_GADGET)	+= spear13xx_pcie_gadget.o
+obj-$(CONFIG_VMWARE_BALLOON)	+= vmw_balloon.o
+obj-$(CONFIG_PCH_PHUB)		+= pch_phub.o
+obj-y				+= ti-st/
+obj-y				+= lis3lv02d/
+obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o
+obj-$(CONFIG_ALTERA_STAPL)	+=altera-stapl/
+obj-$(CONFIG_INTEL_MEI)		+= mei/
+obj-$(CONFIG_VMWARE_VMCI)	+= vmw_vmci/
+obj-$(CONFIG_LATTICE_ECP3_CONFIG)	+= lattice-ecp3-config.o
+obj-$(CONFIG_SRAM)		+= sram.o
+obj-$(CONFIG_SRAM_EXEC)		+= sram-exec.o
+obj-y				+= mic/
+obj-$(CONFIG_GENWQE)		+= genwqe/
+obj-$(CONFIG_ECHO)		+= echo/
+obj-$(CONFIG_VEXPRESS_SYSCFG)	+= vexpress-syscfg.o
+obj-$(CONFIG_CXL_BASE)		+= cxl/
+obj-$(CONFIG_ASPEED_LPC_CTRL)	+= aspeed-lpc-ctrl.o
+obj-$(CONFIG_ASPEED_LPC_SNOOP)	+= aspeed-lpc-snoop.o
+obj-$(CONFIG_PCI_ENDPOINT_TEST)	+= pci_endpoint_test.o
+obj-$(CONFIG_OCXL)		+= ocxl/
+obj-$(CONFIG_MISC_RTSX)		+= cardreader/
diff --git a/drivers/misc/ad525x_dpot-i2c.c b/drivers/misc/ad525x_dpot-i2c.c
new file mode 100644
index 000000000..4f832002d
--- /dev/null
+++ b/drivers/misc/ad525x_dpot-i2c.c
@@ -0,0 +1,119 @@
+/*
+ * Driver for the Analog Devices digital potentiometers (I2C bus)
+ *
+ * Copyright (C) 2010-2011 Michael Hennerich, Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/i2c.h>
+#include <linux/module.h>
+
+#include "ad525x_dpot.h"
+
+/* I2C bus functions */
+static int write_d8(void *client, u8 val)
+{
+	return i2c_smbus_write_byte(client, val);
+}
+
+static int write_r8d8(void *client, u8 reg, u8 val)
+{
+	return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static int write_r8d16(void *client, u8 reg, u16 val)
+{
+	return i2c_smbus_write_word_data(client, reg, val);
+}
+
+static int read_d8(void *client)
+{
+	return i2c_smbus_read_byte(client);
+}
+
+static int read_r8d8(void *client, u8 reg)
+{
+	return i2c_smbus_read_byte_data(client, reg);
+}
+
+static int read_r8d16(void *client, u8 reg)
+{
+	return i2c_smbus_read_word_data(client, reg);
+}
+
+static const struct ad_dpot_bus_ops bops = {
+	.read_d8	= read_d8,
+	.read_r8d8	= read_r8d8,
+	.read_r8d16	= read_r8d16,
+	.write_d8	= write_d8,
+	.write_r8d8	= write_r8d8,
+	.write_r8d16	= write_r8d16,
+};
+
+static int ad_dpot_i2c_probe(struct i2c_client *client,
+				      const struct i2c_device_id *id)
+{
+	struct ad_dpot_bus_data bdata = {
+		.client = client,
+		.bops = &bops,
+	};
+
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_WORD_DATA)) {
+		dev_err(&client->dev, "SMBUS Word Data not Supported\n");
+		return -EIO;
+	}
+
+	return ad_dpot_probe(&client->dev, &bdata, id->driver_data, id->name);
+}
+
+static int ad_dpot_i2c_remove(struct i2c_client *client)
+{
+	return ad_dpot_remove(&client->dev);
+}
+
+static const struct i2c_device_id ad_dpot_id[] = {
+	{"ad5258", AD5258_ID},
+	{"ad5259", AD5259_ID},
+	{"ad5251", AD5251_ID},
+	{"ad5252", AD5252_ID},
+	{"ad5253", AD5253_ID},
+	{"ad5254", AD5254_ID},
+	{"ad5255", AD5255_ID},
+	{"ad5241", AD5241_ID},
+	{"ad5242", AD5242_ID},
+	{"ad5243", AD5243_ID},
+	{"ad5245", AD5245_ID},
+	{"ad5246", AD5246_ID},
+	{"ad5247", AD5247_ID},
+	{"ad5248", AD5248_ID},
+	{"ad5280", AD5280_ID},
+	{"ad5282", AD5282_ID},
+	{"adn2860", ADN2860_ID},
+	{"ad5273", AD5273_ID},
+	{"ad5161", AD5161_ID},
+	{"ad5171", AD5171_ID},
+	{"ad5170", AD5170_ID},
+	{"ad5172", AD5172_ID},
+	{"ad5173", AD5173_ID},
+	{"ad5272", AD5272_ID},
+	{"ad5274", AD5274_ID},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, ad_dpot_id);
+
+static struct i2c_driver ad_dpot_i2c_driver = {
+	.driver = {
+		.name	= "ad_dpot",
+	},
+	.probe		= ad_dpot_i2c_probe,
+	.remove		= ad_dpot_i2c_remove,
+	.id_table	= ad_dpot_id,
+};
+
+module_i2c_driver(ad_dpot_i2c_driver);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("digital potentiometer I2C bus driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ad525x_dpot-spi.c b/drivers/misc/ad525x_dpot-spi.c
new file mode 100644
index 000000000..39a7f517e
--- /dev/null
+++ b/drivers/misc/ad525x_dpot-spi.c
@@ -0,0 +1,146 @@
+/*
+ * Driver for the Analog Devices digital potentiometers (SPI bus)
+ *
+ * Copyright (C) 2010-2011 Michael Hennerich, Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/spi/spi.h>
+#include <linux/module.h>
+
+#include "ad525x_dpot.h"
+
+/* SPI bus functions */
+static int write8(void *client, u8 val)
+{
+	u8 data = val;
+
+	return spi_write(client, &data, 1);
+}
+
+static int write16(void *client, u8 reg, u8 val)
+{
+	u8 data[2] = {reg, val};
+
+	return spi_write(client, data, 2);
+}
+
+static int write24(void *client, u8 reg, u16 val)
+{
+	u8 data[3] = {reg, val >> 8, val};
+
+	return spi_write(client, data, 3);
+}
+
+static int read8(void *client)
+{
+	int ret;
+	u8 data;
+
+	ret = spi_read(client, &data, 1);
+	if (ret < 0)
+		return ret;
+
+	return data;
+}
+
+static int read16(void *client, u8 reg)
+{
+	int ret;
+	u8 buf_rx[2];
+
+	write16(client, reg, 0);
+	ret = spi_read(client, buf_rx, 2);
+	if (ret < 0)
+		return ret;
+
+	return (buf_rx[0] << 8) |  buf_rx[1];
+}
+
+static int read24(void *client, u8 reg)
+{
+	int ret;
+	u8 buf_rx[3];
+
+	write24(client, reg, 0);
+	ret = spi_read(client, buf_rx, 3);
+	if (ret < 0)
+		return ret;
+
+	return (buf_rx[1] << 8) |  buf_rx[2];
+}
+
+static const struct ad_dpot_bus_ops bops = {
+	.read_d8	= read8,
+	.read_r8d8	= read16,
+	.read_r8d16	= read24,
+	.write_d8	= write8,
+	.write_r8d8	= write16,
+	.write_r8d16	= write24,
+};
+static int ad_dpot_spi_probe(struct spi_device *spi)
+{
+	struct ad_dpot_bus_data bdata = {
+		.client = spi,
+		.bops = &bops,
+	};
+
+	return ad_dpot_probe(&spi->dev, &bdata,
+			     spi_get_device_id(spi)->driver_data,
+			     spi_get_device_id(spi)->name);
+}
+
+static int ad_dpot_spi_remove(struct spi_device *spi)
+{
+	return ad_dpot_remove(&spi->dev);
+}
+
+static const struct spi_device_id ad_dpot_spi_id[] = {
+	{"ad5160", AD5160_ID},
+	{"ad5161", AD5161_ID},
+	{"ad5162", AD5162_ID},
+	{"ad5165", AD5165_ID},
+	{"ad5200", AD5200_ID},
+	{"ad5201", AD5201_ID},
+	{"ad5203", AD5203_ID},
+	{"ad5204", AD5204_ID},
+	{"ad5206", AD5206_ID},
+	{"ad5207", AD5207_ID},
+	{"ad5231", AD5231_ID},
+	{"ad5232", AD5232_ID},
+	{"ad5233", AD5233_ID},
+	{"ad5235", AD5235_ID},
+	{"ad5260", AD5260_ID},
+	{"ad5262", AD5262_ID},
+	{"ad5263", AD5263_ID},
+	{"ad5290", AD5290_ID},
+	{"ad5291", AD5291_ID},
+	{"ad5292", AD5292_ID},
+	{"ad5293", AD5293_ID},
+	{"ad7376", AD7376_ID},
+	{"ad8400", AD8400_ID},
+	{"ad8402", AD8402_ID},
+	{"ad8403", AD8403_ID},
+	{"adn2850", ADN2850_ID},
+	{"ad5270", AD5270_ID},
+	{"ad5271", AD5271_ID},
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad_dpot_spi_id);
+
+static struct spi_driver ad_dpot_spi_driver = {
+	.driver = {
+		.name	= "ad_dpot",
+	},
+	.probe		= ad_dpot_spi_probe,
+	.remove		= ad_dpot_spi_remove,
+	.id_table	= ad_dpot_spi_id,
+};
+
+module_spi_driver(ad_dpot_spi_driver);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("digital potentiometer SPI bus driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ad_dpot");
diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c
new file mode 100644
index 000000000..bc591b716
--- /dev/null
+++ b/drivers/misc/ad525x_dpot.c
@@ -0,0 +1,765 @@
+/*
+ * ad525x_dpot: Driver for the Analog Devices digital potentiometers
+ * Copyright (c) 2009-2010 Analog Devices, Inc.
+ * Author: Michael Hennerich <hennerich@blackfin.uclinux.org>
+ *
+ * DEVID		#Wipers		#Positions	Resistor Options (kOhm)
+ * AD5258		1		64		1, 10, 50, 100
+ * AD5259		1		256		5, 10, 50, 100
+ * AD5251		2		64		1, 10, 50, 100
+ * AD5252		2		256		1, 10, 50, 100
+ * AD5255		3		512		25, 250
+ * AD5253		4		64		1, 10, 50, 100
+ * AD5254		4		256		1, 10, 50, 100
+ * AD5160		1		256		5, 10, 50, 100
+ * AD5161		1		256		5, 10, 50, 100
+ * AD5162		2		256		2.5, 10, 50, 100
+ * AD5165		1		256		100
+ * AD5200		1		256		10, 50
+ * AD5201		1		33		10, 50
+ * AD5203		4		64		10, 100
+ * AD5204		4		256		10, 50, 100
+ * AD5206		6		256		10, 50, 100
+ * AD5207		2		256		10, 50, 100
+ * AD5231		1		1024		10, 50, 100
+ * AD5232		2		256		10, 50, 100
+ * AD5233		4		64		10, 50, 100
+ * AD5235		2		1024		25, 250
+ * AD5260		1		256		20, 50, 200
+ * AD5262		2		256		20, 50, 200
+ * AD5263		4		256		20, 50, 200
+ * AD5290		1		256		10, 50, 100
+ * AD5291		1		256		20, 50, 100  (20-TP)
+ * AD5292		1		1024		20, 50, 100  (20-TP)
+ * AD5293		1		1024		20, 50, 100
+ * AD7376		1		128		10, 50, 100, 1M
+ * AD8400		1		256		1, 10, 50, 100
+ * AD8402		2		256		1, 10, 50, 100
+ * AD8403		4		256		1, 10, 50, 100
+ * ADN2850		3		512		25, 250
+ * AD5241		1		256		10, 100, 1M
+ * AD5246		1		128		5, 10, 50, 100
+ * AD5247		1		128		5, 10, 50, 100
+ * AD5245		1		256		5, 10, 50, 100
+ * AD5243		2		256		2.5, 10, 50, 100
+ * AD5248		2		256		2.5, 10, 50, 100
+ * AD5242		2		256		20, 50, 200
+ * AD5280		1		256		20, 50, 200
+ * AD5282		2		256		20, 50, 200
+ * ADN2860		3		512		25, 250
+ * AD5273		1		64		1, 10, 50, 100 (OTP)
+ * AD5171		1		64		5, 10, 50, 100 (OTP)
+ * AD5170		1		256		2.5, 10, 50, 100 (OTP)
+ * AD5172		2		256		2.5, 10, 50, 100 (OTP)
+ * AD5173		2		256		2.5, 10, 50, 100 (OTP)
+ * AD5270		1		1024		20, 50, 100 (50-TP)
+ * AD5271		1		256		20, 50, 100 (50-TP)
+ * AD5272		1		1024		20, 50, 100 (50-TP)
+ * AD5274		1		256		20, 50, 100 (50-TP)
+ *
+ * See Documentation/misc-devices/ad525x_dpot.txt for more info.
+ *
+ * derived from ad5258.c
+ * Copyright (c) 2009 Cyber Switching, Inc.
+ * Author: Chris Verges <chrisv@cyberswitching.com>
+ *
+ * derived from ad5252.c
+ * Copyright (c) 2006-2011 Michael Hennerich <hennerich@blackfin.uclinux.org>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include "ad525x_dpot.h"
+
+/*
+ * Client data (each client gets its own)
+ */
+
+struct dpot_data {
+	struct ad_dpot_bus_data	bdata;
+	struct mutex update_lock;
+	unsigned int rdac_mask;
+	unsigned int max_pos;
+	unsigned long devid;
+	unsigned int uid;
+	unsigned int feat;
+	unsigned int wipers;
+	u16 rdac_cache[MAX_RDACS];
+	DECLARE_BITMAP(otp_en_mask, MAX_RDACS);
+};
+
+static inline int dpot_read_d8(struct dpot_data *dpot)
+{
+	return dpot->bdata.bops->read_d8(dpot->bdata.client);
+}
+
+static inline int dpot_read_r8d8(struct dpot_data *dpot, u8 reg)
+{
+	return dpot->bdata.bops->read_r8d8(dpot->bdata.client, reg);
+}
+
+static inline int dpot_read_r8d16(struct dpot_data *dpot, u8 reg)
+{
+	return dpot->bdata.bops->read_r8d16(dpot->bdata.client, reg);
+}
+
+static inline int dpot_write_d8(struct dpot_data *dpot, u8 val)
+{
+	return dpot->bdata.bops->write_d8(dpot->bdata.client, val);
+}
+
+static inline int dpot_write_r8d8(struct dpot_data *dpot, u8 reg, u16 val)
+{
+	return dpot->bdata.bops->write_r8d8(dpot->bdata.client, reg, val);
+}
+
+static inline int dpot_write_r8d16(struct dpot_data *dpot, u8 reg, u16 val)
+{
+	return dpot->bdata.bops->write_r8d16(dpot->bdata.client, reg, val);
+}
+
+static s32 dpot_read_spi(struct dpot_data *dpot, u8 reg)
+{
+	unsigned int ctrl = 0;
+	int value;
+
+	if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD))) {
+
+		if (dpot->feat & F_RDACS_WONLY)
+			return dpot->rdac_cache[reg & DPOT_RDAC_MASK];
+		if (dpot->uid == DPOT_UID(AD5291_ID) ||
+			dpot->uid == DPOT_UID(AD5292_ID) ||
+			dpot->uid == DPOT_UID(AD5293_ID)) {
+
+			value = dpot_read_r8d8(dpot,
+				DPOT_AD5291_READ_RDAC << 2);
+
+			if (dpot->uid == DPOT_UID(AD5291_ID))
+				value = value >> 2;
+
+			return value;
+		} else if (dpot->uid == DPOT_UID(AD5270_ID) ||
+			dpot->uid == DPOT_UID(AD5271_ID)) {
+
+			value = dpot_read_r8d8(dpot,
+				DPOT_AD5270_1_2_4_READ_RDAC << 2);
+
+			if (value < 0)
+				return value;
+
+			if (dpot->uid == DPOT_UID(AD5271_ID))
+				value = value >> 2;
+
+			return value;
+		}
+
+		ctrl = DPOT_SPI_READ_RDAC;
+	} else if (reg & DPOT_ADDR_EEPROM) {
+		ctrl = DPOT_SPI_READ_EEPROM;
+	}
+
+	if (dpot->feat & F_SPI_16BIT)
+		return dpot_read_r8d8(dpot, ctrl);
+	else if (dpot->feat & F_SPI_24BIT)
+		return dpot_read_r8d16(dpot, ctrl);
+
+	return -EFAULT;
+}
+
+static s32 dpot_read_i2c(struct dpot_data *dpot, u8 reg)
+{
+	int value;
+	unsigned int ctrl = 0;
+
+	switch (dpot->uid) {
+	case DPOT_UID(AD5246_ID):
+	case DPOT_UID(AD5247_ID):
+		return dpot_read_d8(dpot);
+	case DPOT_UID(AD5245_ID):
+	case DPOT_UID(AD5241_ID):
+	case DPOT_UID(AD5242_ID):
+	case DPOT_UID(AD5243_ID):
+	case DPOT_UID(AD5248_ID):
+	case DPOT_UID(AD5280_ID):
+	case DPOT_UID(AD5282_ID):
+		ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+			0 : DPOT_AD5282_RDAC_AB;
+		return dpot_read_r8d8(dpot, ctrl);
+	case DPOT_UID(AD5170_ID):
+	case DPOT_UID(AD5171_ID):
+	case DPOT_UID(AD5273_ID):
+			return dpot_read_d8(dpot);
+	case DPOT_UID(AD5172_ID):
+	case DPOT_UID(AD5173_ID):
+		ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+			0 : DPOT_AD5172_3_A0;
+		return dpot_read_r8d8(dpot, ctrl);
+	case DPOT_UID(AD5272_ID):
+	case DPOT_UID(AD5274_ID):
+			dpot_write_r8d8(dpot,
+				(DPOT_AD5270_1_2_4_READ_RDAC << 2), 0);
+
+			value = dpot_read_r8d16(dpot,
+				DPOT_AD5270_1_2_4_RDAC << 2);
+
+			if (value < 0)
+				return value;
+			/*
+			 * AD5272/AD5274 returns high byte first, however
+			 * underling smbus expects low byte first.
+			 */
+			value = swab16(value);
+
+			if (dpot->uid == DPOT_UID(AD5274_ID))
+				value = value >> 2;
+		return value;
+	default:
+		if ((reg & DPOT_REG_TOL) || (dpot->max_pos > 256))
+			return dpot_read_r8d16(dpot, (reg & 0xF8) |
+					((reg & 0x7) << 1));
+		else
+			return dpot_read_r8d8(dpot, reg);
+	}
+}
+
+static s32 dpot_read(struct dpot_data *dpot, u8 reg)
+{
+	if (dpot->feat & F_SPI)
+		return dpot_read_spi(dpot, reg);
+	else
+		return dpot_read_i2c(dpot, reg);
+}
+
+static s32 dpot_write_spi(struct dpot_data *dpot, u8 reg, u16 value)
+{
+	unsigned int val = 0;
+
+	if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD | DPOT_ADDR_OTP))) {
+		if (dpot->feat & F_RDACS_WONLY)
+			dpot->rdac_cache[reg & DPOT_RDAC_MASK] = value;
+
+		if (dpot->feat & F_AD_APPDATA) {
+			if (dpot->feat & F_SPI_8BIT) {
+				val = ((reg & DPOT_RDAC_MASK) <<
+					DPOT_MAX_POS(dpot->devid)) |
+					value;
+				return dpot_write_d8(dpot, val);
+			} else if (dpot->feat & F_SPI_16BIT) {
+				val = ((reg & DPOT_RDAC_MASK) <<
+					DPOT_MAX_POS(dpot->devid)) |
+					value;
+				return dpot_write_r8d8(dpot, val >> 8,
+					val & 0xFF);
+			} else
+				BUG();
+		} else {
+			if (dpot->uid == DPOT_UID(AD5291_ID) ||
+				dpot->uid == DPOT_UID(AD5292_ID) ||
+				dpot->uid == DPOT_UID(AD5293_ID)) {
+
+				dpot_write_r8d8(dpot, DPOT_AD5291_CTRLREG << 2,
+						DPOT_AD5291_UNLOCK_CMD);
+
+				if (dpot->uid == DPOT_UID(AD5291_ID))
+					value = value << 2;
+
+				return dpot_write_r8d8(dpot,
+					(DPOT_AD5291_RDAC << 2) |
+					(value >> 8), value & 0xFF);
+			} else if (dpot->uid == DPOT_UID(AD5270_ID) ||
+				dpot->uid == DPOT_UID(AD5271_ID)) {
+				dpot_write_r8d8(dpot,
+						DPOT_AD5270_1_2_4_CTRLREG << 2,
+						DPOT_AD5270_1_2_4_UNLOCK_CMD);
+
+				if (dpot->uid == DPOT_UID(AD5271_ID))
+					value = value << 2;
+
+				return dpot_write_r8d8(dpot,
+					(DPOT_AD5270_1_2_4_RDAC << 2) |
+					(value >> 8), value & 0xFF);
+			}
+			val = DPOT_SPI_RDAC | (reg & DPOT_RDAC_MASK);
+		}
+	} else if (reg & DPOT_ADDR_EEPROM) {
+		val = DPOT_SPI_EEPROM | (reg & DPOT_RDAC_MASK);
+	} else if (reg & DPOT_ADDR_CMD) {
+		switch (reg) {
+		case DPOT_DEC_ALL_6DB:
+			val = DPOT_SPI_DEC_ALL_6DB;
+			break;
+		case DPOT_INC_ALL_6DB:
+			val = DPOT_SPI_INC_ALL_6DB;
+			break;
+		case DPOT_DEC_ALL:
+			val = DPOT_SPI_DEC_ALL;
+			break;
+		case DPOT_INC_ALL:
+			val = DPOT_SPI_INC_ALL;
+			break;
+		}
+	} else if (reg & DPOT_ADDR_OTP) {
+		if (dpot->uid == DPOT_UID(AD5291_ID) ||
+			dpot->uid == DPOT_UID(AD5292_ID)) {
+			return dpot_write_r8d8(dpot,
+				DPOT_AD5291_STORE_XTPM << 2, 0);
+		} else if (dpot->uid == DPOT_UID(AD5270_ID) ||
+			dpot->uid == DPOT_UID(AD5271_ID)) {
+			return dpot_write_r8d8(dpot,
+				DPOT_AD5270_1_2_4_STORE_XTPM << 2, 0);
+		}
+	} else
+		BUG();
+
+	if (dpot->feat & F_SPI_16BIT)
+		return dpot_write_r8d8(dpot, val, value);
+	else if (dpot->feat & F_SPI_24BIT)
+		return dpot_write_r8d16(dpot, val, value);
+
+	return -EFAULT;
+}
+
+static s32 dpot_write_i2c(struct dpot_data *dpot, u8 reg, u16 value)
+{
+	/* Only write the instruction byte for certain commands */
+	unsigned int tmp = 0, ctrl = 0;
+
+	switch (dpot->uid) {
+	case DPOT_UID(AD5246_ID):
+	case DPOT_UID(AD5247_ID):
+		return dpot_write_d8(dpot, value);
+
+	case DPOT_UID(AD5245_ID):
+	case DPOT_UID(AD5241_ID):
+	case DPOT_UID(AD5242_ID):
+	case DPOT_UID(AD5243_ID):
+	case DPOT_UID(AD5248_ID):
+	case DPOT_UID(AD5280_ID):
+	case DPOT_UID(AD5282_ID):
+		ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+			0 : DPOT_AD5282_RDAC_AB;
+		return dpot_write_r8d8(dpot, ctrl, value);
+	case DPOT_UID(AD5171_ID):
+	case DPOT_UID(AD5273_ID):
+		if (reg & DPOT_ADDR_OTP) {
+			tmp = dpot_read_d8(dpot);
+			if (tmp >> 6) /* Ready to Program? */
+				return -EFAULT;
+			ctrl = DPOT_AD5273_FUSE;
+		}
+		return dpot_write_r8d8(dpot, ctrl, value);
+	case DPOT_UID(AD5172_ID):
+	case DPOT_UID(AD5173_ID):
+		ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+			0 : DPOT_AD5172_3_A0;
+		if (reg & DPOT_ADDR_OTP) {
+			tmp = dpot_read_r8d16(dpot, ctrl);
+			if (tmp >> 14) /* Ready to Program? */
+				return -EFAULT;
+			ctrl |= DPOT_AD5170_2_3_FUSE;
+		}
+		return dpot_write_r8d8(dpot, ctrl, value);
+	case DPOT_UID(AD5170_ID):
+		if (reg & DPOT_ADDR_OTP) {
+			tmp = dpot_read_r8d16(dpot, tmp);
+			if (tmp >> 14) /* Ready to Program? */
+				return -EFAULT;
+			ctrl = DPOT_AD5170_2_3_FUSE;
+		}
+		return dpot_write_r8d8(dpot, ctrl, value);
+	case DPOT_UID(AD5272_ID):
+	case DPOT_UID(AD5274_ID):
+		dpot_write_r8d8(dpot, DPOT_AD5270_1_2_4_CTRLREG << 2,
+				DPOT_AD5270_1_2_4_UNLOCK_CMD);
+
+		if (reg & DPOT_ADDR_OTP)
+			return dpot_write_r8d8(dpot,
+					DPOT_AD5270_1_2_4_STORE_XTPM << 2, 0);
+
+		if (dpot->uid == DPOT_UID(AD5274_ID))
+			value = value << 2;
+
+		return dpot_write_r8d8(dpot, (DPOT_AD5270_1_2_4_RDAC << 2) |
+				       (value >> 8), value & 0xFF);
+	default:
+		if (reg & DPOT_ADDR_CMD)
+			return dpot_write_d8(dpot, reg);
+
+		if (dpot->max_pos > 256)
+			return dpot_write_r8d16(dpot, (reg & 0xF8) |
+						((reg & 0x7) << 1), value);
+		else
+			/* All other registers require instruction + data bytes */
+			return dpot_write_r8d8(dpot, reg, value);
+	}
+}
+
+static s32 dpot_write(struct dpot_data *dpot, u8 reg, u16 value)
+{
+	if (dpot->feat & F_SPI)
+		return dpot_write_spi(dpot, reg, value);
+	else
+		return dpot_write_i2c(dpot, reg, value);
+}
+
+/* sysfs functions */
+
+static ssize_t sysfs_show_reg(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf, u32 reg)
+{
+	struct dpot_data *data = dev_get_drvdata(dev);
+	s32 value;
+
+	if (reg & DPOT_ADDR_OTP_EN)
+		return sprintf(buf, "%s\n",
+			test_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask) ?
+			"enabled" : "disabled");
+
+
+	mutex_lock(&data->update_lock);
+	value = dpot_read(data, reg);
+	mutex_unlock(&data->update_lock);
+
+	if (value < 0)
+		return -EINVAL;
+	/*
+	 * Let someone else deal with converting this ...
+	 * the tolerance is a two-byte value where the MSB
+	 * is a sign + integer value, and the LSB is a
+	 * decimal value.  See page 18 of the AD5258
+	 * datasheet (Rev. A) for more details.
+	 */
+
+	if (reg & DPOT_REG_TOL)
+		return sprintf(buf, "0x%04x\n", value & 0xFFFF);
+	else
+		return sprintf(buf, "%u\n", value & data->rdac_mask);
+}
+
+static ssize_t sysfs_set_reg(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t count, u32 reg)
+{
+	struct dpot_data *data = dev_get_drvdata(dev);
+	unsigned long value;
+	int err;
+
+	if (reg & DPOT_ADDR_OTP_EN) {
+		if (sysfs_streq(buf, "enabled"))
+			set_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask);
+		else
+			clear_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask);
+
+		return count;
+	}
+
+	if ((reg & DPOT_ADDR_OTP) &&
+		!test_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask))
+		return -EPERM;
+
+	err = kstrtoul(buf, 10, &value);
+	if (err)
+		return err;
+
+	if (value > data->rdac_mask)
+		value = data->rdac_mask;
+
+	mutex_lock(&data->update_lock);
+	dpot_write(data, reg, value);
+	if (reg & DPOT_ADDR_EEPROM)
+		msleep(26);	/* Sleep while the EEPROM updates */
+	else if (reg & DPOT_ADDR_OTP)
+		msleep(400);	/* Sleep while the OTP updates */
+	mutex_unlock(&data->update_lock);
+
+	return count;
+}
+
+static ssize_t sysfs_do_cmd(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t count, u32 reg)
+{
+	struct dpot_data *data = dev_get_drvdata(dev);
+
+	mutex_lock(&data->update_lock);
+	dpot_write(data, reg, 0);
+	mutex_unlock(&data->update_lock);
+
+	return count;
+}
+
+/* ------------------------------------------------------------------------- */
+
+#define DPOT_DEVICE_SHOW(_name, _reg) static ssize_t \
+show_##_name(struct device *dev, \
+			  struct device_attribute *attr, char *buf) \
+{ \
+	return sysfs_show_reg(dev, attr, buf, _reg); \
+}
+
+#define DPOT_DEVICE_SET(_name, _reg) static ssize_t \
+set_##_name(struct device *dev, \
+			 struct device_attribute *attr, \
+			 const char *buf, size_t count) \
+{ \
+	return sysfs_set_reg(dev, attr, buf, count, _reg); \
+}
+
+#define DPOT_DEVICE_SHOW_SET(name, reg) \
+DPOT_DEVICE_SHOW(name, reg) \
+DPOT_DEVICE_SET(name, reg) \
+static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, set_##name)
+
+#define DPOT_DEVICE_SHOW_ONLY(name, reg) \
+DPOT_DEVICE_SHOW(name, reg) \
+static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, NULL)
+
+DPOT_DEVICE_SHOW_SET(rdac0, DPOT_ADDR_RDAC | DPOT_RDAC0);
+DPOT_DEVICE_SHOW_SET(eeprom0, DPOT_ADDR_EEPROM | DPOT_RDAC0);
+DPOT_DEVICE_SHOW_ONLY(tolerance0, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC0);
+DPOT_DEVICE_SHOW_SET(otp0, DPOT_ADDR_OTP | DPOT_RDAC0);
+DPOT_DEVICE_SHOW_SET(otp0en, DPOT_ADDR_OTP_EN | DPOT_RDAC0);
+
+DPOT_DEVICE_SHOW_SET(rdac1, DPOT_ADDR_RDAC | DPOT_RDAC1);
+DPOT_DEVICE_SHOW_SET(eeprom1, DPOT_ADDR_EEPROM | DPOT_RDAC1);
+DPOT_DEVICE_SHOW_ONLY(tolerance1, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC1);
+DPOT_DEVICE_SHOW_SET(otp1, DPOT_ADDR_OTP | DPOT_RDAC1);
+DPOT_DEVICE_SHOW_SET(otp1en, DPOT_ADDR_OTP_EN | DPOT_RDAC1);
+
+DPOT_DEVICE_SHOW_SET(rdac2, DPOT_ADDR_RDAC | DPOT_RDAC2);
+DPOT_DEVICE_SHOW_SET(eeprom2, DPOT_ADDR_EEPROM | DPOT_RDAC2);
+DPOT_DEVICE_SHOW_ONLY(tolerance2, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC2);
+DPOT_DEVICE_SHOW_SET(otp2, DPOT_ADDR_OTP | DPOT_RDAC2);
+DPOT_DEVICE_SHOW_SET(otp2en, DPOT_ADDR_OTP_EN | DPOT_RDAC2);
+
+DPOT_DEVICE_SHOW_SET(rdac3, DPOT_ADDR_RDAC | DPOT_RDAC3);
+DPOT_DEVICE_SHOW_SET(eeprom3, DPOT_ADDR_EEPROM | DPOT_RDAC3);
+DPOT_DEVICE_SHOW_ONLY(tolerance3, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC3);
+DPOT_DEVICE_SHOW_SET(otp3, DPOT_ADDR_OTP | DPOT_RDAC3);
+DPOT_DEVICE_SHOW_SET(otp3en, DPOT_ADDR_OTP_EN | DPOT_RDAC3);
+
+DPOT_DEVICE_SHOW_SET(rdac4, DPOT_ADDR_RDAC | DPOT_RDAC4);
+DPOT_DEVICE_SHOW_SET(eeprom4, DPOT_ADDR_EEPROM | DPOT_RDAC4);
+DPOT_DEVICE_SHOW_ONLY(tolerance4, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC4);
+DPOT_DEVICE_SHOW_SET(otp4, DPOT_ADDR_OTP | DPOT_RDAC4);
+DPOT_DEVICE_SHOW_SET(otp4en, DPOT_ADDR_OTP_EN | DPOT_RDAC4);
+
+DPOT_DEVICE_SHOW_SET(rdac5, DPOT_ADDR_RDAC | DPOT_RDAC5);
+DPOT_DEVICE_SHOW_SET(eeprom5, DPOT_ADDR_EEPROM | DPOT_RDAC5);
+DPOT_DEVICE_SHOW_ONLY(tolerance5, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC5);
+DPOT_DEVICE_SHOW_SET(otp5, DPOT_ADDR_OTP | DPOT_RDAC5);
+DPOT_DEVICE_SHOW_SET(otp5en, DPOT_ADDR_OTP_EN | DPOT_RDAC5);
+
+static const struct attribute *dpot_attrib_wipers[] = {
+	&dev_attr_rdac0.attr,
+	&dev_attr_rdac1.attr,
+	&dev_attr_rdac2.attr,
+	&dev_attr_rdac3.attr,
+	&dev_attr_rdac4.attr,
+	&dev_attr_rdac5.attr,
+	NULL
+};
+
+static const struct attribute *dpot_attrib_eeprom[] = {
+	&dev_attr_eeprom0.attr,
+	&dev_attr_eeprom1.attr,
+	&dev_attr_eeprom2.attr,
+	&dev_attr_eeprom3.attr,
+	&dev_attr_eeprom4.attr,
+	&dev_attr_eeprom5.attr,
+	NULL
+};
+
+static const struct attribute *dpot_attrib_otp[] = {
+	&dev_attr_otp0.attr,
+	&dev_attr_otp1.attr,
+	&dev_attr_otp2.attr,
+	&dev_attr_otp3.attr,
+	&dev_attr_otp4.attr,
+	&dev_attr_otp5.attr,
+	NULL
+};
+
+static const struct attribute *dpot_attrib_otp_en[] = {
+	&dev_attr_otp0en.attr,
+	&dev_attr_otp1en.attr,
+	&dev_attr_otp2en.attr,
+	&dev_attr_otp3en.attr,
+	&dev_attr_otp4en.attr,
+	&dev_attr_otp5en.attr,
+	NULL
+};
+
+static const struct attribute *dpot_attrib_tolerance[] = {
+	&dev_attr_tolerance0.attr,
+	&dev_attr_tolerance1.attr,
+	&dev_attr_tolerance2.attr,
+	&dev_attr_tolerance3.attr,
+	&dev_attr_tolerance4.attr,
+	&dev_attr_tolerance5.attr,
+	NULL
+};
+
+/* ------------------------------------------------------------------------- */
+
+#define DPOT_DEVICE_DO_CMD(_name, _cmd) static ssize_t \
+set_##_name(struct device *dev, \
+			 struct device_attribute *attr, \
+			 const char *buf, size_t count) \
+{ \
+	return sysfs_do_cmd(dev, attr, buf, count, _cmd); \
+} \
+static DEVICE_ATTR(_name, S_IWUSR | S_IRUGO, NULL, set_##_name)
+
+DPOT_DEVICE_DO_CMD(inc_all, DPOT_INC_ALL);
+DPOT_DEVICE_DO_CMD(dec_all, DPOT_DEC_ALL);
+DPOT_DEVICE_DO_CMD(inc_all_6db, DPOT_INC_ALL_6DB);
+DPOT_DEVICE_DO_CMD(dec_all_6db, DPOT_DEC_ALL_6DB);
+
+static struct attribute *ad525x_attributes_commands[] = {
+	&dev_attr_inc_all.attr,
+	&dev_attr_dec_all.attr,
+	&dev_attr_inc_all_6db.attr,
+	&dev_attr_dec_all_6db.attr,
+	NULL
+};
+
+static const struct attribute_group ad525x_group_commands = {
+	.attrs = ad525x_attributes_commands,
+};
+
+static int ad_dpot_add_files(struct device *dev,
+		unsigned int features, unsigned int rdac)
+{
+	int err = sysfs_create_file(&dev->kobj,
+		dpot_attrib_wipers[rdac]);
+	if (features & F_CMD_EEP)
+		err |= sysfs_create_file(&dev->kobj,
+			dpot_attrib_eeprom[rdac]);
+	if (features & F_CMD_TOL)
+		err |= sysfs_create_file(&dev->kobj,
+			dpot_attrib_tolerance[rdac]);
+	if (features & F_CMD_OTP) {
+		err |= sysfs_create_file(&dev->kobj,
+			dpot_attrib_otp_en[rdac]);
+		err |= sysfs_create_file(&dev->kobj,
+			dpot_attrib_otp[rdac]);
+	}
+
+	if (err)
+		dev_err(dev, "failed to register sysfs hooks for RDAC%d\n",
+			rdac);
+
+	return err;
+}
+
+static inline void ad_dpot_remove_files(struct device *dev,
+		unsigned int features, unsigned int rdac)
+{
+	sysfs_remove_file(&dev->kobj,
+		dpot_attrib_wipers[rdac]);
+	if (features & F_CMD_EEP)
+		sysfs_remove_file(&dev->kobj,
+			dpot_attrib_eeprom[rdac]);
+	if (features & F_CMD_TOL)
+		sysfs_remove_file(&dev->kobj,
+			dpot_attrib_tolerance[rdac]);
+	if (features & F_CMD_OTP) {
+		sysfs_remove_file(&dev->kobj,
+			dpot_attrib_otp_en[rdac]);
+		sysfs_remove_file(&dev->kobj,
+			dpot_attrib_otp[rdac]);
+	}
+}
+
+int ad_dpot_probe(struct device *dev,
+		struct ad_dpot_bus_data *bdata, unsigned long devid,
+			    const char *name)
+{
+
+	struct dpot_data *data;
+	int i, err = 0;
+
+	data = kzalloc(sizeof(struct dpot_data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	dev_set_drvdata(dev, data);
+	mutex_init(&data->update_lock);
+
+	data->bdata = *bdata;
+	data->devid = devid;
+
+	data->max_pos = 1 << DPOT_MAX_POS(devid);
+	data->rdac_mask = data->max_pos - 1;
+	data->feat = DPOT_FEAT(devid);
+	data->uid = DPOT_UID(devid);
+	data->wipers = DPOT_WIPERS(devid);
+
+	for (i = DPOT_RDAC0; i < MAX_RDACS; i++)
+		if (data->wipers & (1 << i)) {
+			err = ad_dpot_add_files(dev, data->feat, i);
+			if (err)
+				goto exit_remove_files;
+			/* power-up midscale */
+			if (data->feat & F_RDACS_WONLY)
+				data->rdac_cache[i] = data->max_pos / 2;
+		}
+
+	if (data->feat & F_CMD_INC)
+		err = sysfs_create_group(&dev->kobj, &ad525x_group_commands);
+
+	if (err) {
+		dev_err(dev, "failed to register sysfs hooks\n");
+		goto exit_free;
+	}
+
+	dev_info(dev, "%s %d-Position Digital Potentiometer registered\n",
+		 name, data->max_pos);
+
+	return 0;
+
+exit_remove_files:
+	for (i = DPOT_RDAC0; i < MAX_RDACS; i++)
+		if (data->wipers & (1 << i))
+			ad_dpot_remove_files(dev, data->feat, i);
+
+exit_free:
+	kfree(data);
+	dev_set_drvdata(dev, NULL);
+exit:
+	dev_err(dev, "failed to create client for %s ID 0x%lX\n",
+		name, devid);
+	return err;
+}
+EXPORT_SYMBOL(ad_dpot_probe);
+
+int ad_dpot_remove(struct device *dev)
+{
+	struct dpot_data *data = dev_get_drvdata(dev);
+	int i;
+
+	for (i = DPOT_RDAC0; i < MAX_RDACS; i++)
+		if (data->wipers & (1 << i))
+			ad_dpot_remove_files(dev, data->feat, i);
+
+	kfree(data);
+
+	return 0;
+}
+EXPORT_SYMBOL(ad_dpot_remove);
+
+
+MODULE_AUTHOR("Chris Verges <chrisv@cyberswitching.com>, "
+	      "Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("Digital potentiometer driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ad525x_dpot.h b/drivers/misc/ad525x_dpot.h
new file mode 100644
index 000000000..443a51fd5
--- /dev/null
+++ b/drivers/misc/ad525x_dpot.h
@@ -0,0 +1,215 @@
+/*
+ * Driver for the Analog Devices digital potentiometers
+ *
+ * Copyright (C) 2010 Michael Hennerich, Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _AD_DPOT_H_
+#define _AD_DPOT_H_
+
+#include <linux/types.h>
+
+#define DPOT_CONF(features, wipers, max_pos, uid) \
+		(((features) << 18) | (((wipers) & 0xFF) << 10) | \
+		((max_pos & 0xF) << 6) | (uid & 0x3F))
+
+#define DPOT_UID(conf)		(conf & 0x3F)
+#define DPOT_MAX_POS(conf)	((conf >> 6) & 0xF)
+#define DPOT_WIPERS(conf)	((conf >> 10) & 0xFF)
+#define DPOT_FEAT(conf)		(conf >> 18)
+
+#define BRDAC0			(1 << 0)
+#define BRDAC1			(1 << 1)
+#define BRDAC2			(1 << 2)
+#define BRDAC3			(1 << 3)
+#define BRDAC4			(1 << 4)
+#define BRDAC5			(1 << 5)
+#define MAX_RDACS		6
+
+#define F_CMD_INC		(1 << 0)	/* Features INC/DEC ALL, 6dB */
+#define F_CMD_EEP		(1 << 1)	/* Features EEPROM */
+#define F_CMD_OTP		(1 << 2)	/* Features OTP */
+#define F_CMD_TOL		(1 << 3)	/* RDACS feature Tolerance REG */
+#define F_RDACS_RW		(1 << 4)	/* RDACS are Read/Write  */
+#define F_RDACS_WONLY		(1 << 5)	/* RDACS are Write only */
+#define F_AD_APPDATA		(1 << 6)	/* RDAC Address append to data */
+#define F_SPI_8BIT		(1 << 7)	/* All SPI XFERS are 8-bit */
+#define F_SPI_16BIT		(1 << 8)	/* All SPI XFERS are 16-bit */
+#define F_SPI_24BIT		(1 << 9)	/* All SPI XFERS are 24-bit */
+
+#define F_RDACS_RW_TOL	(F_RDACS_RW | F_CMD_EEP | F_CMD_TOL)
+#define F_RDACS_RW_EEP	(F_RDACS_RW | F_CMD_EEP)
+#define F_SPI		(F_SPI_8BIT | F_SPI_16BIT | F_SPI_24BIT)
+
+enum dpot_devid {
+	AD5258_ID = DPOT_CONF(F_RDACS_RW_TOL, BRDAC0, 6, 0), /* I2C */
+	AD5259_ID = DPOT_CONF(F_RDACS_RW_TOL, BRDAC0, 8, 1),
+	AD5251_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC1 | BRDAC3, 6, 2),
+	AD5252_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC1 | BRDAC3, 8, 3),
+	AD5253_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 4),
+	AD5254_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 5),
+	AD5255_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC0 | BRDAC1 | BRDAC2, 9, 6),
+	AD5160_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 7), /* SPI */
+	AD5161_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 8),
+	AD5162_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1, 8, 9),
+	AD5165_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 10),
+	AD5200_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 11),
+	AD5201_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 5, 12),
+	AD5203_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 13),
+	AD5204_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 14),
+	AD5206_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3 | BRDAC4 | BRDAC5,
+			8, 15),
+	AD5207_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1, 8, 16),
+	AD5231_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT,
+			BRDAC0, 10, 17),
+	AD5232_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_16BIT,
+			BRDAC0 | BRDAC1, 8, 18),
+	AD5233_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_16BIT,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 19),
+	AD5235_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT,
+			BRDAC0 | BRDAC1, 10, 20),
+	AD5260_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 21),
+	AD5262_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1, 8, 22),
+	AD5263_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 23),
+	AD5290_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 24),
+	AD5291_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT | F_CMD_OTP,
+			BRDAC0, 8, 25),
+	AD5292_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT | F_CMD_OTP,
+			BRDAC0, 10, 26),
+	AD5293_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT, BRDAC0, 10, 27),
+	AD7376_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 7, 28),
+	AD8400_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0, 8, 29),
+	AD8402_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1, 8, 30),
+	AD8403_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1 | BRDAC2, 8, 31),
+	ADN2850_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT,
+			BRDAC0 | BRDAC1, 10, 32),
+	AD5241_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 33),
+	AD5242_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 34),
+	AD5243_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 35),
+	AD5245_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 36),
+	AD5246_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 7, 37),
+	AD5247_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 7, 38),
+	AD5248_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 39),
+	AD5280_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 40),
+	AD5282_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 41),
+	ADN2860_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC0 | BRDAC1 | BRDAC2, 9, 42),
+	AD5273_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 6, 43),
+	AD5171_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 6, 44),
+	AD5170_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 8, 45),
+	AD5172_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0 | BRDAC1, 8, 46),
+	AD5173_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0 | BRDAC1, 8, 47),
+	AD5270_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP | F_SPI_16BIT,
+			BRDAC0, 10, 48),
+	AD5271_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP | F_SPI_16BIT,
+			BRDAC0, 8, 49),
+	AD5272_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 10, 50),
+	AD5274_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 8, 51),
+};
+
+#define DPOT_RDAC0		0
+#define DPOT_RDAC1		1
+#define DPOT_RDAC2		2
+#define DPOT_RDAC3		3
+#define DPOT_RDAC4		4
+#define DPOT_RDAC5		5
+
+#define DPOT_RDAC_MASK		0x1F
+
+#define DPOT_REG_TOL		0x18
+#define DPOT_TOL_RDAC0		(DPOT_REG_TOL | DPOT_RDAC0)
+#define DPOT_TOL_RDAC1		(DPOT_REG_TOL | DPOT_RDAC1)
+#define DPOT_TOL_RDAC2		(DPOT_REG_TOL | DPOT_RDAC2)
+#define DPOT_TOL_RDAC3		(DPOT_REG_TOL | DPOT_RDAC3)
+#define DPOT_TOL_RDAC4		(DPOT_REG_TOL | DPOT_RDAC4)
+#define DPOT_TOL_RDAC5		(DPOT_REG_TOL | DPOT_RDAC5)
+
+/* RDAC-to-EEPROM Interface Commands */
+#define DPOT_ADDR_RDAC		(0x0 << 5)
+#define DPOT_ADDR_EEPROM	(0x1 << 5)
+#define DPOT_ADDR_OTP		(0x1 << 6)
+#define DPOT_ADDR_CMD		(0x1 << 7)
+#define DPOT_ADDR_OTP_EN	(0x1 << 9)
+
+#define DPOT_DEC_ALL_6DB	(DPOT_ADDR_CMD | (0x4 << 3))
+#define DPOT_INC_ALL_6DB	(DPOT_ADDR_CMD | (0x9 << 3))
+#define DPOT_DEC_ALL		(DPOT_ADDR_CMD | (0x6 << 3))
+#define DPOT_INC_ALL		(DPOT_ADDR_CMD | (0xB << 3))
+
+#define DPOT_SPI_RDAC		0xB0
+#define DPOT_SPI_EEPROM		0x30
+#define DPOT_SPI_READ_RDAC	0xA0
+#define DPOT_SPI_READ_EEPROM	0x90
+#define DPOT_SPI_DEC_ALL_6DB	0x50
+#define DPOT_SPI_INC_ALL_6DB	0xD0
+#define DPOT_SPI_DEC_ALL	0x70
+#define DPOT_SPI_INC_ALL	0xF0
+
+/* AD5291/2/3 use special commands */
+#define DPOT_AD5291_RDAC	0x01
+#define DPOT_AD5291_READ_RDAC	0x02
+#define DPOT_AD5291_STORE_XTPM	0x03
+#define DPOT_AD5291_CTRLREG	0x06
+#define DPOT_AD5291_UNLOCK_CMD	0x03
+
+/* AD5270/1/2/4 use special commands */
+#define DPOT_AD5270_1_2_4_RDAC		0x01
+#define DPOT_AD5270_1_2_4_READ_RDAC	0x02
+#define DPOT_AD5270_1_2_4_STORE_XTPM	0x03
+#define DPOT_AD5270_1_2_4_CTRLREG	0x07
+#define DPOT_AD5270_1_2_4_UNLOCK_CMD	0x03
+
+#define DPOT_AD5282_RDAC_AB	0x80
+
+#define DPOT_AD5273_FUSE	0x80
+#define DPOT_AD5170_2_3_FUSE	0x20
+#define DPOT_AD5170_2_3_OW	0x08
+#define DPOT_AD5172_3_A0	0x08
+#define DPOT_AD5170_2FUSE	0x80
+
+struct dpot_data;
+
+struct ad_dpot_bus_ops {
+	int (*read_d8)(void *client);
+	int (*read_r8d8)(void *client, u8 reg);
+	int (*read_r8d16)(void *client, u8 reg);
+	int (*write_d8)(void *client, u8 val);
+	int (*write_r8d8)(void *client, u8 reg, u8 val);
+	int (*write_r8d16)(void *client, u8 reg, u16 val);
+};
+
+struct ad_dpot_bus_data {
+	void *client;
+	const struct ad_dpot_bus_ops *bops;
+};
+
+int ad_dpot_probe(struct device *dev, struct ad_dpot_bus_data *bdata,
+		  unsigned long devid, const char *name);
+int ad_dpot_remove(struct device *dev);
+
+#endif
diff --git a/drivers/misc/altera-stapl/Kconfig b/drivers/misc/altera-stapl/Kconfig
new file mode 100644
index 000000000..8a828fe41
--- /dev/null
+++ b/drivers/misc/altera-stapl/Kconfig
@@ -0,0 +1,9 @@
+comment "Altera FPGA firmware download module (requires I2C)"
+	depends on !I2C
+
+config ALTERA_STAPL
+	tristate "Altera FPGA firmware download module"
+	depends on I2C
+	default n
+	help
+	  An Altera FPGA module. Say Y when you want to support this tool.
diff --git a/drivers/misc/altera-stapl/Makefile b/drivers/misc/altera-stapl/Makefile
new file mode 100644
index 000000000..055f61ee7
--- /dev/null
+++ b/drivers/misc/altera-stapl/Makefile
@@ -0,0 +1,3 @@
+altera-stapl-objs = altera-lpt.o altera-jtag.o altera-comp.o altera.o
+
+obj-$(CONFIG_ALTERA_STAPL) += altera-stapl.o
diff --git a/drivers/misc/altera-stapl/altera-comp.c b/drivers/misc/altera-stapl/altera-comp.c
new file mode 100644
index 000000000..49b103bed
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-comp.c
@@ -0,0 +1,142 @@
+/*
+ * altera-comp.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include "altera-exprt.h"
+
+#define	SHORT_BITS		16
+#define	CHAR_BITS		8
+#define	DATA_BLOB_LENGTH	3
+#define	MATCH_DATA_LENGTH	8192
+#define ALTERA_REQUEST_SIZE	1024
+#define ALTERA_BUFFER_SIZE	(MATCH_DATA_LENGTH + ALTERA_REQUEST_SIZE)
+
+static u32 altera_bits_req(u32 n)
+{
+	u32 result = SHORT_BITS;
+
+	if (n == 0)
+		result = 1;
+	else {
+		/* Look for the highest non-zero bit position */
+		while ((n & (1 << (SHORT_BITS - 1))) == 0) {
+			n <<= 1;
+			--result;
+		}
+	}
+
+	return result;
+}
+
+static u32 altera_read_packed(u8 *buffer, u32 bits, u32 *bits_avail,
+							u32 *in_index)
+{
+	u32 result = 0;
+	u32 shift = 0;
+	u32 databyte = 0;
+
+	while (bits > 0) {
+		databyte = buffer[*in_index];
+		result |= (((databyte >> (CHAR_BITS - *bits_avail))
+			& (0xff >> (CHAR_BITS - *bits_avail))) << shift);
+
+		if (bits <= *bits_avail) {
+			result &= (0xffff >> (SHORT_BITS - (bits + shift)));
+			*bits_avail -= bits;
+			bits = 0;
+		} else {
+			++(*in_index);
+			shift += *bits_avail;
+			bits -= *bits_avail;
+			*bits_avail = CHAR_BITS;
+		}
+	}
+
+	return result;
+}
+
+u32 altera_shrink(u8 *in, u32 in_length, u8 *out, u32 out_length, s32 version)
+{
+	u32 i, j, data_length = 0L;
+	u32 offset, length;
+	u32 match_data_length = MATCH_DATA_LENGTH;
+	u32 bits_avail = CHAR_BITS;
+	u32 in_index = 0L;
+
+	if (version > 0)
+		--match_data_length;
+
+	for (i = 0; i < out_length; ++i)
+		out[i] = 0;
+
+	/* Read number of bytes in data. */
+	for (i = 0; i < sizeof(in_length); ++i) {
+		data_length = data_length | (
+			altera_read_packed(in,
+					CHAR_BITS,
+					&bits_avail,
+					&in_index) << (i * CHAR_BITS));
+	}
+
+	if (data_length > out_length) {
+		data_length = 0L;
+		return data_length;
+	}
+
+	i = 0;
+	while (i < data_length) {
+		/* A 0 bit indicates literal data. */
+		if (altera_read_packed(in, 1, &bits_avail,
+						&in_index) == 0) {
+			for (j = 0; j < DATA_BLOB_LENGTH; ++j) {
+				if (i < data_length) {
+					out[i] = (u8)altera_read_packed(in,
+							CHAR_BITS,
+							&bits_avail,
+							&in_index);
+					i++;
+				}
+			}
+		} else {
+			/* A 1 bit indicates offset/length to follow. */
+			offset = altera_read_packed(in, altera_bits_req((s16)
+					(i > match_data_length ?
+						match_data_length : i)),
+					&bits_avail,
+					&in_index);
+			length = altera_read_packed(in, CHAR_BITS,
+					&bits_avail,
+					&in_index);
+			for (j = 0; j < length; ++j) {
+				if (i < data_length) {
+					out[i] = out[i - offset];
+					i++;
+				}
+			}
+		}
+	}
+
+	return data_length;
+}
diff --git a/drivers/misc/altera-stapl/altera-exprt.h b/drivers/misc/altera-stapl/altera-exprt.h
new file mode 100644
index 000000000..39c38d84a
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-exprt.h
@@ -0,0 +1,33 @@
+/*
+ * altera-exprt.h
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef ALTERA_EXPRT_H
+#define ALTERA_EXPRT_H
+
+
+u32 altera_shrink(u8 *in, u32 in_length, u8 *out, u32 out_length, s32 version);
+int netup_jtag_io_lpt(void *device, int tms, int tdi, int read_tdo);
+
+#endif /* ALTERA_EXPRT_H */
diff --git a/drivers/misc/altera-stapl/altera-jtag.c b/drivers/misc/altera-stapl/altera-jtag.c
new file mode 100644
index 000000000..f4bf20096
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-jtag.c
@@ -0,0 +1,1021 @@
+/*
+ * altera-jtag.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <misc/altera.h>
+#include "altera-exprt.h"
+#include "altera-jtag.h"
+
+#define	alt_jtag_io(a, b, c)\
+		astate->config->jtag_io(astate->config->dev, a, b, c);
+
+#define	alt_malloc(a)	kzalloc(a, GFP_KERNEL);
+
+/*
+ * This structure shows, for each JTAG state, which state is reached after
+ * a single TCK clock cycle with TMS high or TMS low, respectively.  This
+ * describes all possible state transitions in the JTAG state machine.
+ */
+struct altera_jtag_machine {
+	enum altera_jtag_state tms_high;
+	enum altera_jtag_state tms_low;
+};
+
+static const struct altera_jtag_machine altera_transitions[] = {
+	/* RESET     */	{ RESET,	IDLE },
+	/* IDLE      */	{ DRSELECT,	IDLE },
+	/* DRSELECT  */	{ IRSELECT,	DRCAPTURE },
+	/* DRCAPTURE */	{ DREXIT1,	DRSHIFT },
+	/* DRSHIFT   */	{ DREXIT1,	DRSHIFT },
+	/* DREXIT1   */	{ DRUPDATE,	DRPAUSE },
+	/* DRPAUSE   */	{ DREXIT2,	DRPAUSE },
+	/* DREXIT2   */	{ DRUPDATE,	DRSHIFT },
+	/* DRUPDATE  */	{ DRSELECT,	IDLE },
+	/* IRSELECT  */	{ RESET,	IRCAPTURE },
+	/* IRCAPTURE */	{ IREXIT1,	IRSHIFT },
+	/* IRSHIFT   */	{ IREXIT1,	IRSHIFT },
+	/* IREXIT1   */	{ IRUPDATE,	IRPAUSE },
+	/* IRPAUSE   */	{ IREXIT2,	IRPAUSE },
+	/* IREXIT2   */	{ IRUPDATE,	IRSHIFT },
+	/* IRUPDATE  */	{ DRSELECT,	IDLE }
+};
+
+/*
+ * This table contains the TMS value to be used to take the NEXT STEP on
+ * the path to the desired state.  The array index is the current state,
+ * and the bit position is the desired endstate.  To find out which state
+ * is used as the intermediate state, look up the TMS value in the
+ * altera_transitions[] table.
+ */
+static const u16 altera_jtag_path_map[16] = {
+	/* RST	RTI	SDRS	CDR	SDR	E1DR	PDR	E2DR */
+	0x0001,	0xFFFD,	0xFE01,	0xFFE7,	0xFFEF,	0xFF0F,	0xFFBF,	0xFFFF,
+	/* UDR	SIRS	CIR	SIR	E1IR	PIR	E2IR	UIR */
+	0xFEFD,	0x0001,	0xF3FF,	0xF7FF,	0x87FF,	0xDFFF,	0xFFFF,	0x7FFD
+};
+
+/* Flag bits for alt_jtag_io() function */
+#define TMS_HIGH   1
+#define TMS_LOW    0
+#define TDI_HIGH   1
+#define TDI_LOW    0
+#define READ_TDO   1
+#define IGNORE_TDO 0
+
+int altera_jinit(struct altera_state *astate)
+{
+	struct altera_jtag *js = &astate->js;
+
+	/* initial JTAG state is unknown */
+	js->jtag_state = ILLEGAL_JTAG_STATE;
+
+	/* initialize to default state */
+	js->drstop_state = IDLE;
+	js->irstop_state = IDLE;
+	js->dr_pre  = 0;
+	js->dr_post = 0;
+	js->ir_pre  = 0;
+	js->ir_post = 0;
+	js->dr_length    = 0;
+	js->ir_length    = 0;
+
+	js->dr_pre_data  = NULL;
+	js->dr_post_data = NULL;
+	js->ir_pre_data  = NULL;
+	js->ir_post_data = NULL;
+	js->dr_buffer	 = NULL;
+	js->ir_buffer	 = NULL;
+
+	return 0;
+}
+
+int altera_set_drstop(struct altera_jtag *js, enum altera_jtag_state state)
+{
+	js->drstop_state = state;
+
+	return 0;
+}
+
+int altera_set_irstop(struct altera_jtag *js, enum altera_jtag_state state)
+{
+	js->irstop_state = state;
+
+	return 0;
+}
+
+int altera_set_dr_pre(struct altera_jtag *js,
+				u32 count, u32 start_index,
+				u8 *preamble_data)
+{
+	int status = 0;
+	u32 i;
+	u32 j;
+
+	if (count > js->dr_pre) {
+		kfree(js->dr_pre_data);
+		js->dr_pre_data = (u8 *)alt_malloc((count + 7) >> 3);
+		if (js->dr_pre_data == NULL)
+			status = -ENOMEM;
+		else
+			js->dr_pre = count;
+	} else
+		js->dr_pre = count;
+
+	if (status == 0) {
+		for (i = 0; i < count; ++i) {
+			j = i + start_index;
+
+			if (preamble_data == NULL)
+				js->dr_pre_data[i >> 3] |= (1 << (i & 7));
+			else {
+				if (preamble_data[j >> 3] & (1 << (j & 7)))
+					js->dr_pre_data[i >> 3] |=
+							(1 << (i & 7));
+				else
+					js->dr_pre_data[i >> 3] &=
+							~(u32)(1 << (i & 7));
+
+			}
+		}
+	}
+
+	return status;
+}
+
+int altera_set_ir_pre(struct altera_jtag *js, u32 count, u32 start_index,
+							u8 *preamble_data)
+{
+	int status = 0;
+	u32 i;
+	u32 j;
+
+	if (count > js->ir_pre) {
+		kfree(js->ir_pre_data);
+		js->ir_pre_data = (u8 *)alt_malloc((count + 7) >> 3);
+		if (js->ir_pre_data == NULL)
+			status = -ENOMEM;
+		else
+			js->ir_pre = count;
+
+	} else
+		js->ir_pre = count;
+
+	if (status == 0) {
+		for (i = 0; i < count; ++i) {
+			j = i + start_index;
+			if (preamble_data == NULL)
+				js->ir_pre_data[i >> 3] |= (1 << (i & 7));
+			else {
+				if (preamble_data[j >> 3] & (1 << (j & 7)))
+					js->ir_pre_data[i >> 3] |=
+							(1 << (i & 7));
+				else
+					js->ir_pre_data[i >> 3] &=
+							~(u32)(1 << (i & 7));
+
+			}
+		}
+	}
+
+	return status;
+}
+
+int altera_set_dr_post(struct altera_jtag *js, u32 count, u32 start_index,
+						u8 *postamble_data)
+{
+	int status = 0;
+	u32 i;
+	u32 j;
+
+	if (count > js->dr_post) {
+		kfree(js->dr_post_data);
+		js->dr_post_data = (u8 *)alt_malloc((count + 7) >> 3);
+
+		if (js->dr_post_data == NULL)
+			status = -ENOMEM;
+		else
+			js->dr_post = count;
+
+	} else
+		js->dr_post = count;
+
+	if (status == 0) {
+		for (i = 0; i < count; ++i) {
+			j = i + start_index;
+
+			if (postamble_data == NULL)
+				js->dr_post_data[i >> 3] |= (1 << (i & 7));
+			else {
+				if (postamble_data[j >> 3] & (1 << (j & 7)))
+					js->dr_post_data[i >> 3] |=
+								(1 << (i & 7));
+				else
+					js->dr_post_data[i >> 3] &=
+					    ~(u32)(1 << (i & 7));
+
+			}
+		}
+	}
+
+	return status;
+}
+
+int altera_set_ir_post(struct altera_jtag *js, u32 count, u32 start_index,
+						u8 *postamble_data)
+{
+	int status = 0;
+	u32 i;
+	u32 j;
+
+	if (count > js->ir_post) {
+		kfree(js->ir_post_data);
+		js->ir_post_data = (u8 *)alt_malloc((count + 7) >> 3);
+		if (js->ir_post_data == NULL)
+			status = -ENOMEM;
+		else
+			js->ir_post = count;
+
+	} else
+		js->ir_post = count;
+
+	if (status != 0)
+		return status;
+
+	for (i = 0; i < count; ++i) {
+		j = i + start_index;
+
+		if (postamble_data == NULL)
+			js->ir_post_data[i >> 3] |= (1 << (i & 7));
+		else {
+			if (postamble_data[j >> 3] & (1 << (j & 7)))
+				js->ir_post_data[i >> 3] |= (1 << (i & 7));
+			else
+				js->ir_post_data[i >> 3] &=
+				    ~(u32)(1 << (i & 7));
+
+		}
+	}
+
+	return status;
+}
+
+static void altera_jreset_idle(struct altera_state *astate)
+{
+	struct altera_jtag *js = &astate->js;
+	int i;
+	/* Go to Test Logic Reset (no matter what the starting state may be) */
+	for (i = 0; i < 5; ++i)
+		alt_jtag_io(TMS_HIGH, TDI_LOW, IGNORE_TDO);
+
+	/* Now step to Run Test / Idle */
+	alt_jtag_io(TMS_LOW, TDI_LOW, IGNORE_TDO);
+	js->jtag_state = IDLE;
+}
+
+int altera_goto_jstate(struct altera_state *astate,
+					enum altera_jtag_state state)
+{
+	struct altera_jtag *js = &astate->js;
+	int tms;
+	int count = 0;
+	int status = 0;
+
+	if (js->jtag_state == ILLEGAL_JTAG_STATE)
+		/* initialize JTAG chain to known state */
+		altera_jreset_idle(astate);
+
+	if (js->jtag_state == state) {
+		/*
+		 * We are already in the desired state.
+		 * If it is a stable state, loop here.
+		 * Otherwise do nothing (no clock cycles).
+		 */
+		if ((state == IDLE) || (state == DRSHIFT) ||
+			(state == DRPAUSE) || (state == IRSHIFT) ||
+				(state == IRPAUSE)) {
+			alt_jtag_io(TMS_LOW, TDI_LOW, IGNORE_TDO);
+		} else if (state == RESET)
+			alt_jtag_io(TMS_HIGH, TDI_LOW, IGNORE_TDO);
+
+	} else {
+		while ((js->jtag_state != state) && (count < 9)) {
+			/* Get TMS value to take a step toward desired state */
+			tms = (altera_jtag_path_map[js->jtag_state] &
+							(1 << state))
+							? TMS_HIGH : TMS_LOW;
+
+			/* Take a step */
+			alt_jtag_io(tms, TDI_LOW, IGNORE_TDO);
+
+			if (tms)
+				js->jtag_state =
+					altera_transitions[js->jtag_state].tms_high;
+			else
+				js->jtag_state =
+					altera_transitions[js->jtag_state].tms_low;
+
+			++count;
+		}
+	}
+
+	if (js->jtag_state != state)
+		status = -EREMOTEIO;
+
+	return status;
+}
+
+int altera_wait_cycles(struct altera_state *astate,
+					s32 cycles,
+					enum altera_jtag_state wait_state)
+{
+	struct altera_jtag *js = &astate->js;
+	int tms;
+	s32 count;
+	int status = 0;
+
+	if (js->jtag_state != wait_state)
+		status = altera_goto_jstate(astate, wait_state);
+
+	if (status == 0) {
+		/*
+		 * Set TMS high to loop in RESET state
+		 * Set TMS low to loop in any other stable state
+		 */
+		tms = (wait_state == RESET) ? TMS_HIGH : TMS_LOW;
+
+		for (count = 0L; count < cycles; count++)
+			alt_jtag_io(tms, TDI_LOW, IGNORE_TDO);
+
+	}
+
+	return status;
+}
+
+int altera_wait_msecs(struct altera_state *astate,
+			s32 microseconds, enum altera_jtag_state wait_state)
+/*
+ * Causes JTAG hardware to sit in the specified stable
+ * state for the specified duration of real time.  If
+ * no JTAG operations have been performed yet, then only
+ * a delay is performed.  This permits the WAIT USECS
+ * statement to be used in VECTOR programs without causing
+ * any JTAG operations.
+ * Returns 0 for success, else appropriate error code.
+ */
+{
+	struct altera_jtag *js = &astate->js;
+	int status = 0;
+
+	if ((js->jtag_state != ILLEGAL_JTAG_STATE) &&
+	    (js->jtag_state != wait_state))
+		status = altera_goto_jstate(astate, wait_state);
+
+	if (status == 0)
+		/* Wait for specified time interval */
+		udelay(microseconds);
+
+	return status;
+}
+
+static void altera_concatenate_data(u8 *buffer,
+				u8 *preamble_data,
+				u32 preamble_count,
+				u8 *target_data,
+				u32 start_index,
+				u32 target_count,
+				u8 *postamble_data,
+				u32 postamble_count)
+/*
+ * Copies preamble data, target data, and postamble data
+ * into one buffer for IR or DR scans.
+ */
+{
+	u32 i, j, k;
+
+	for (i = 0L; i < preamble_count; ++i) {
+		if (preamble_data[i >> 3L] & (1L << (i & 7L)))
+			buffer[i >> 3L] |= (1L << (i & 7L));
+		else
+			buffer[i >> 3L] &= ~(u32)(1L << (i & 7L));
+
+	}
+
+	j = start_index;
+	k = preamble_count + target_count;
+	for (; i < k; ++i, ++j) {
+		if (target_data[j >> 3L] & (1L << (j & 7L)))
+			buffer[i >> 3L] |= (1L << (i & 7L));
+		else
+			buffer[i >> 3L] &= ~(u32)(1L << (i & 7L));
+
+	}
+
+	j = 0L;
+	k = preamble_count + target_count + postamble_count;
+	for (; i < k; ++i, ++j) {
+		if (postamble_data[j >> 3L] & (1L << (j & 7L)))
+			buffer[i >> 3L] |= (1L << (i & 7L));
+		else
+			buffer[i >> 3L] &= ~(u32)(1L << (i & 7L));
+
+	}
+}
+
+static int alt_jtag_drscan(struct altera_state *astate,
+			int start_state,
+			int count,
+			u8 *tdi,
+			u8 *tdo)
+{
+	int i = 0;
+	int tdo_bit = 0;
+	int status = 1;
+
+	/* First go to DRSHIFT state */
+	switch (start_state) {
+	case 0:						/* IDLE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(0, 0, 0);	/* DRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* DRSHIFT */
+		break;
+
+	case 1:						/* DRPAUSE */
+		alt_jtag_io(1, 0, 0);	/* DREXIT2 */
+		alt_jtag_io(1, 0, 0);	/* DRUPDATE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(0, 0, 0);	/* DRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* DRSHIFT */
+		break;
+
+	case 2:						/* IRPAUSE */
+		alt_jtag_io(1, 0, 0);	/* IREXIT2 */
+		alt_jtag_io(1, 0, 0);	/* IRUPDATE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(0, 0, 0);	/* DRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* DRSHIFT */
+		break;
+
+	default:
+		status = 0;
+	}
+
+	if (status) {
+		/* loop in the SHIFT-DR state */
+		for (i = 0; i < count; i++) {
+			tdo_bit = alt_jtag_io(
+					(i == count - 1),
+					tdi[i >> 3] & (1 << (i & 7)),
+					(tdo != NULL));
+
+			if (tdo != NULL) {
+				if (tdo_bit)
+					tdo[i >> 3] |= (1 << (i & 7));
+				else
+					tdo[i >> 3] &= ~(u32)(1 << (i & 7));
+
+			}
+		}
+
+		alt_jtag_io(0, 0, 0);	/* DRPAUSE */
+	}
+
+	return status;
+}
+
+static int alt_jtag_irscan(struct altera_state *astate,
+		    int start_state,
+		    int count,
+		    u8 *tdi,
+		    u8 *tdo)
+{
+	int i = 0;
+	int tdo_bit = 0;
+	int status = 1;
+
+	/* First go to IRSHIFT state */
+	switch (start_state) {
+	case 0:						/* IDLE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(1, 0, 0);	/* IRSELECT */
+		alt_jtag_io(0, 0, 0);	/* IRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* IRSHIFT */
+		break;
+
+	case 1:						/* DRPAUSE */
+		alt_jtag_io(1, 0, 0);	/* DREXIT2 */
+		alt_jtag_io(1, 0, 0);	/* DRUPDATE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(1, 0, 0);	/* IRSELECT */
+		alt_jtag_io(0, 0, 0);	/* IRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* IRSHIFT */
+		break;
+
+	case 2:						/* IRPAUSE */
+		alt_jtag_io(1, 0, 0);	/* IREXIT2 */
+		alt_jtag_io(1, 0, 0);	/* IRUPDATE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(1, 0, 0);	/* IRSELECT */
+		alt_jtag_io(0, 0, 0);	/* IRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* IRSHIFT */
+		break;
+
+	default:
+		status = 0;
+	}
+
+	if (status) {
+		/* loop in the SHIFT-IR state */
+		for (i = 0; i < count; i++) {
+			tdo_bit = alt_jtag_io(
+				      (i == count - 1),
+				      tdi[i >> 3] & (1 << (i & 7)),
+				      (tdo != NULL));
+			if (tdo != NULL) {
+				if (tdo_bit)
+					tdo[i >> 3] |= (1 << (i & 7));
+				else
+					tdo[i >> 3] &= ~(u32)(1 << (i & 7));
+
+			}
+		}
+
+		alt_jtag_io(0, 0, 0);	/* IRPAUSE */
+	}
+
+	return status;
+}
+
+static void altera_extract_target_data(u8 *buffer,
+				u8 *target_data,
+				u32 start_index,
+				u32 preamble_count,
+				u32 target_count)
+/*
+ * Copies target data from scan buffer, filtering out
+ * preamble and postamble data.
+ */
+{
+	u32 i;
+	u32 j;
+	u32 k;
+
+	j = preamble_count;
+	k = start_index + target_count;
+	for (i = start_index; i < k; ++i, ++j) {
+		if (buffer[j >> 3] & (1 << (j & 7)))
+			target_data[i >> 3] |= (1 << (i & 7));
+		else
+			target_data[i >> 3] &= ~(u32)(1 << (i & 7));
+
+	}
+}
+
+int altera_irscan(struct altera_state *astate,
+				u32 count,
+				u8 *tdi_data,
+				u32 start_index)
+/* Shifts data into instruction register */
+{
+	struct altera_jtag *js = &astate->js;
+	int start_code = 0;
+	u32 alloc_chars = 0;
+	u32 shift_count = js->ir_pre + count + js->ir_post;
+	int status = 0;
+	enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+	switch (js->jtag_state) {
+	case ILLEGAL_JTAG_STATE:
+	case RESET:
+	case IDLE:
+		start_code = 0;
+		start_state = IDLE;
+		break;
+
+	case DRSELECT:
+	case DRCAPTURE:
+	case DRSHIFT:
+	case DREXIT1:
+	case DRPAUSE:
+	case DREXIT2:
+	case DRUPDATE:
+		start_code = 1;
+		start_state = DRPAUSE;
+		break;
+
+	case IRSELECT:
+	case IRCAPTURE:
+	case IRSHIFT:
+	case IREXIT1:
+	case IRPAUSE:
+	case IREXIT2:
+	case IRUPDATE:
+		start_code = 2;
+		start_state = IRPAUSE;
+		break;
+
+	default:
+		status = -EREMOTEIO;
+		break;
+	}
+
+	if (status == 0)
+		if (js->jtag_state != start_state)
+			status = altera_goto_jstate(astate, start_state);
+
+	if (status == 0) {
+		if (shift_count > js->ir_length) {
+			alloc_chars = (shift_count + 7) >> 3;
+			kfree(js->ir_buffer);
+			js->ir_buffer = (u8 *)alt_malloc(alloc_chars);
+			if (js->ir_buffer == NULL)
+				status = -ENOMEM;
+			else
+				js->ir_length = alloc_chars * 8;
+
+		}
+	}
+
+	if (status == 0) {
+		/*
+		 * Copy preamble data, IR data,
+		 * and postamble data into a buffer
+		 */
+		altera_concatenate_data(js->ir_buffer,
+					js->ir_pre_data,
+					js->ir_pre,
+					tdi_data,
+					start_index,
+					count,
+					js->ir_post_data,
+					js->ir_post);
+		/* Do the IRSCAN */
+		alt_jtag_irscan(astate,
+				start_code,
+				shift_count,
+				js->ir_buffer,
+				NULL);
+
+		/* alt_jtag_irscan() always ends in IRPAUSE state */
+		js->jtag_state = IRPAUSE;
+	}
+
+	if (status == 0)
+		if (js->irstop_state != IRPAUSE)
+			status = altera_goto_jstate(astate, js->irstop_state);
+
+
+	return status;
+}
+
+int altera_swap_ir(struct altera_state *astate,
+			    u32 count,
+			    u8 *in_data,
+			    u32 in_index,
+			    u8 *out_data,
+			    u32 out_index)
+/* Shifts data into instruction register, capturing output data */
+{
+	struct altera_jtag *js = &astate->js;
+	int start_code = 0;
+	u32 alloc_chars = 0;
+	u32 shift_count = js->ir_pre + count + js->ir_post;
+	int status = 0;
+	enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+	switch (js->jtag_state) {
+	case ILLEGAL_JTAG_STATE:
+	case RESET:
+	case IDLE:
+		start_code = 0;
+		start_state = IDLE;
+		break;
+
+	case DRSELECT:
+	case DRCAPTURE:
+	case DRSHIFT:
+	case DREXIT1:
+	case DRPAUSE:
+	case DREXIT2:
+	case DRUPDATE:
+		start_code = 1;
+		start_state = DRPAUSE;
+		break;
+
+	case IRSELECT:
+	case IRCAPTURE:
+	case IRSHIFT:
+	case IREXIT1:
+	case IRPAUSE:
+	case IREXIT2:
+	case IRUPDATE:
+		start_code = 2;
+		start_state = IRPAUSE;
+		break;
+
+	default:
+		status = -EREMOTEIO;
+		break;
+	}
+
+	if (status == 0)
+		if (js->jtag_state != start_state)
+			status = altera_goto_jstate(astate, start_state);
+
+	if (status == 0) {
+		if (shift_count > js->ir_length) {
+			alloc_chars = (shift_count + 7) >> 3;
+			kfree(js->ir_buffer);
+			js->ir_buffer = (u8 *)alt_malloc(alloc_chars);
+			if (js->ir_buffer == NULL)
+				status = -ENOMEM;
+			else
+				js->ir_length = alloc_chars * 8;
+
+		}
+	}
+
+	if (status == 0) {
+		/*
+		 * Copy preamble data, IR data,
+		 * and postamble data into a buffer
+		 */
+		altera_concatenate_data(js->ir_buffer,
+					js->ir_pre_data,
+					js->ir_pre,
+					in_data,
+					in_index,
+					count,
+					js->ir_post_data,
+					js->ir_post);
+
+		/* Do the IRSCAN */
+		alt_jtag_irscan(astate,
+				start_code,
+				shift_count,
+				js->ir_buffer,
+				js->ir_buffer);
+
+		/* alt_jtag_irscan() always ends in IRPAUSE state */
+		js->jtag_state = IRPAUSE;
+	}
+
+	if (status == 0)
+		if (js->irstop_state != IRPAUSE)
+			status = altera_goto_jstate(astate, js->irstop_state);
+
+
+	if (status == 0)
+		/* Now extract the returned data from the buffer */
+		altera_extract_target_data(js->ir_buffer,
+					out_data, out_index,
+					js->ir_pre, count);
+
+	return status;
+}
+
+int altera_drscan(struct altera_state *astate,
+				u32 count,
+				u8 *tdi_data,
+				u32 start_index)
+/* Shifts data into data register (ignoring output data) */
+{
+	struct altera_jtag *js = &astate->js;
+	int start_code = 0;
+	u32 alloc_chars = 0;
+	u32 shift_count = js->dr_pre + count + js->dr_post;
+	int status = 0;
+	enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+	switch (js->jtag_state) {
+	case ILLEGAL_JTAG_STATE:
+	case RESET:
+	case IDLE:
+		start_code = 0;
+		start_state = IDLE;
+		break;
+
+	case DRSELECT:
+	case DRCAPTURE:
+	case DRSHIFT:
+	case DREXIT1:
+	case DRPAUSE:
+	case DREXIT2:
+	case DRUPDATE:
+		start_code = 1;
+		start_state = DRPAUSE;
+		break;
+
+	case IRSELECT:
+	case IRCAPTURE:
+	case IRSHIFT:
+	case IREXIT1:
+	case IRPAUSE:
+	case IREXIT2:
+	case IRUPDATE:
+		start_code = 2;
+		start_state = IRPAUSE;
+		break;
+
+	default:
+		status = -EREMOTEIO;
+		break;
+	}
+
+	if (status == 0)
+		if (js->jtag_state != start_state)
+			status = altera_goto_jstate(astate, start_state);
+
+	if (status == 0) {
+		if (shift_count > js->dr_length) {
+			alloc_chars = (shift_count + 7) >> 3;
+			kfree(js->dr_buffer);
+			js->dr_buffer = (u8 *)alt_malloc(alloc_chars);
+			if (js->dr_buffer == NULL)
+				status = -ENOMEM;
+			else
+				js->dr_length = alloc_chars * 8;
+
+		}
+	}
+
+	if (status == 0) {
+		/*
+		 * Copy preamble data, DR data,
+		 * and postamble data into a buffer
+		 */
+		altera_concatenate_data(js->dr_buffer,
+					js->dr_pre_data,
+					js->dr_pre,
+					tdi_data,
+					start_index,
+					count,
+					js->dr_post_data,
+					js->dr_post);
+		/* Do the DRSCAN */
+		alt_jtag_drscan(astate, start_code, shift_count,
+				js->dr_buffer, NULL);
+		/* alt_jtag_drscan() always ends in DRPAUSE state */
+		js->jtag_state = DRPAUSE;
+	}
+
+	if (status == 0)
+		if (js->drstop_state != DRPAUSE)
+			status = altera_goto_jstate(astate, js->drstop_state);
+
+	return status;
+}
+
+int altera_swap_dr(struct altera_state *astate, u32 count,
+				u8 *in_data, u32 in_index,
+				u8 *out_data, u32 out_index)
+/* Shifts data into data register, capturing output data */
+{
+	struct altera_jtag *js = &astate->js;
+	int start_code = 0;
+	u32 alloc_chars = 0;
+	u32 shift_count = js->dr_pre + count + js->dr_post;
+	int status = 0;
+	enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+	switch (js->jtag_state) {
+	case ILLEGAL_JTAG_STATE:
+	case RESET:
+	case IDLE:
+		start_code = 0;
+		start_state = IDLE;
+		break;
+
+	case DRSELECT:
+	case DRCAPTURE:
+	case DRSHIFT:
+	case DREXIT1:
+	case DRPAUSE:
+	case DREXIT2:
+	case DRUPDATE:
+		start_code = 1;
+		start_state = DRPAUSE;
+		break;
+
+	case IRSELECT:
+	case IRCAPTURE:
+	case IRSHIFT:
+	case IREXIT1:
+	case IRPAUSE:
+	case IREXIT2:
+	case IRUPDATE:
+		start_code = 2;
+		start_state = IRPAUSE;
+		break;
+
+	default:
+		status = -EREMOTEIO;
+		break;
+	}
+
+	if (status == 0)
+		if (js->jtag_state != start_state)
+			status = altera_goto_jstate(astate, start_state);
+
+	if (status == 0) {
+		if (shift_count > js->dr_length) {
+			alloc_chars = (shift_count + 7) >> 3;
+			kfree(js->dr_buffer);
+			js->dr_buffer = (u8 *)alt_malloc(alloc_chars);
+
+			if (js->dr_buffer == NULL)
+				status = -ENOMEM;
+			else
+				js->dr_length = alloc_chars * 8;
+
+		}
+	}
+
+	if (status == 0) {
+		/*
+		 * Copy preamble data, DR data,
+		 * and postamble data into a buffer
+		 */
+		altera_concatenate_data(js->dr_buffer,
+				js->dr_pre_data,
+				js->dr_pre,
+				in_data,
+				in_index,
+				count,
+				js->dr_post_data,
+				js->dr_post);
+
+		/* Do the DRSCAN */
+		alt_jtag_drscan(astate,
+				start_code,
+				shift_count,
+				js->dr_buffer,
+				js->dr_buffer);
+
+		/* alt_jtag_drscan() always ends in DRPAUSE state */
+		js->jtag_state = DRPAUSE;
+	}
+
+	if (status == 0)
+		if (js->drstop_state != DRPAUSE)
+			status = altera_goto_jstate(astate, js->drstop_state);
+
+	if (status == 0)
+		/* Now extract the returned data from the buffer */
+		altera_extract_target_data(js->dr_buffer,
+					out_data,
+					out_index,
+					js->dr_pre,
+					count);
+
+	return status;
+}
+
+void altera_free_buffers(struct altera_state *astate)
+{
+	struct altera_jtag *js = &astate->js;
+	/* If the JTAG interface was used, reset it to TLR */
+	if (js->jtag_state != ILLEGAL_JTAG_STATE)
+		altera_jreset_idle(astate);
+
+	kfree(js->dr_pre_data);
+	js->dr_pre_data = NULL;
+
+	kfree(js->dr_post_data);
+	js->dr_post_data = NULL;
+
+	kfree(js->dr_buffer);
+	js->dr_buffer = NULL;
+
+	kfree(js->ir_pre_data);
+	js->ir_pre_data = NULL;
+
+	kfree(js->ir_post_data);
+	js->ir_post_data = NULL;
+
+	kfree(js->ir_buffer);
+	js->ir_buffer = NULL;
+}
diff --git a/drivers/misc/altera-stapl/altera-jtag.h b/drivers/misc/altera-stapl/altera-jtag.h
new file mode 100644
index 000000000..2f97e36a2
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-jtag.h
@@ -0,0 +1,113 @@
+/*
+ * altera-jtag.h
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef ALTERA_JTAG_H
+#define ALTERA_JTAG_H
+
+/* Function Prototypes */
+enum altera_jtag_state {
+	ILLEGAL_JTAG_STATE = -1,
+	RESET = 0,
+	IDLE = 1,
+	DRSELECT = 2,
+	DRCAPTURE = 3,
+	DRSHIFT = 4,
+	DREXIT1 = 5,
+	DRPAUSE = 6,
+	DREXIT2 = 7,
+	DRUPDATE = 8,
+	IRSELECT = 9,
+	IRCAPTURE = 10,
+	IRSHIFT = 11,
+	IREXIT1 = 12,
+	IRPAUSE = 13,
+	IREXIT2 = 14,
+	IRUPDATE = 15
+
+};
+
+struct altera_jtag {
+	/* Global variable to store the current JTAG state */
+	enum altera_jtag_state jtag_state;
+
+	/* Store current stop-state for DR and IR scan commands */
+	enum altera_jtag_state drstop_state;
+	enum altera_jtag_state irstop_state;
+
+	/* Store current padding values */
+	u32 dr_pre;
+	u32 dr_post;
+	u32 ir_pre;
+	u32 ir_post;
+	u32 dr_length;
+	u32 ir_length;
+	u8 *dr_pre_data;
+	u8 *dr_post_data;
+	u8 *ir_pre_data;
+	u8 *ir_post_data;
+	u8 *dr_buffer;
+	u8 *ir_buffer;
+};
+
+#define ALTERA_STACK_SIZE 128
+#define ALTERA_MESSAGE_LENGTH 1024
+
+struct altera_state {
+	struct altera_config	*config;
+	struct altera_jtag	js;
+	char			msg_buff[ALTERA_MESSAGE_LENGTH + 1];
+	long			stack[ALTERA_STACK_SIZE];
+};
+
+int altera_jinit(struct altera_state *astate);
+int altera_set_drstop(struct altera_jtag *js, enum altera_jtag_state state);
+int altera_set_irstop(struct altera_jtag *js, enum altera_jtag_state state);
+int altera_set_dr_pre(struct altera_jtag *js, u32 count, u32 start_index,
+				u8 *preamble_data);
+int altera_set_ir_pre(struct altera_jtag *js, u32 count, u32 start_index,
+				u8 *preamble_data);
+int altera_set_dr_post(struct altera_jtag *js, u32 count, u32 start_index,
+				u8 *postamble_data);
+int altera_set_ir_post(struct altera_jtag *js, u32 count, u32 start_index,
+				u8 *postamble_data);
+int altera_goto_jstate(struct altera_state *astate,
+				enum altera_jtag_state state);
+int altera_wait_cycles(struct altera_state *astate, s32 cycles,
+				enum altera_jtag_state wait_state);
+int altera_wait_msecs(struct altera_state *astate, s32 microseconds,
+				enum altera_jtag_state wait_state);
+int altera_irscan(struct altera_state *astate, u32 count,
+				u8 *tdi_data, u32 start_index);
+int altera_swap_ir(struct altera_state *astate,
+				u32 count, u8 *in_data,
+				u32 in_index, u8 *out_data,
+				u32 out_index);
+int altera_drscan(struct altera_state *astate, u32 count,
+				u8 *tdi_data, u32 start_index);
+int altera_swap_dr(struct altera_state *astate, u32 count,
+				u8 *in_data, u32 in_index,
+				u8 *out_data, u32 out_index);
+void altera_free_buffers(struct altera_state *astate);
+#endif /* ALTERA_JTAG_H */
diff --git a/drivers/misc/altera-stapl/altera-lpt.c b/drivers/misc/altera-stapl/altera-lpt.c
new file mode 100644
index 000000000..91456a036
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-lpt.c
@@ -0,0 +1,70 @@
+/*
+ * altera-lpt.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Abylay Ospan <aospan@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include "altera-exprt.h"
+
+static int lpt_hardware_initialized;
+
+static void byteblaster_write(int port, int data)
+{
+	outb((u8)data, (u16)(port + 0x378));
+};
+
+static int byteblaster_read(int port)
+{
+	int data = 0;
+	data = inb((u16)(port + 0x378));
+	return data & 0xff;
+};
+
+int netup_jtag_io_lpt(void *device, int tms, int tdi, int read_tdo)
+{
+	int data = 0;
+	int tdo = 0;
+	int initial_lpt_ctrl = 0;
+
+	if (!lpt_hardware_initialized) {
+		initial_lpt_ctrl = byteblaster_read(2);
+		byteblaster_write(2, (initial_lpt_ctrl | 0x02) & 0xdf);
+		lpt_hardware_initialized = 1;
+	}
+
+	data = ((tdi ? 0x40 : 0) | (tms ? 0x02 : 0));
+
+	byteblaster_write(0, data);
+
+	if (read_tdo) {
+		tdo = byteblaster_read(1);
+		tdo = ((tdo & 0x80) ? 0 : 1);
+	}
+
+	byteblaster_write(0, data | 0x01);
+
+	byteblaster_write(0, data);
+
+	return tdo;
+}
diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c
new file mode 100644
index 000000000..94bde09d9
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera.c
@@ -0,0 +1,2536 @@
+/*
+ * altera.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010,2011 NetUP Inc.
+ * Copyright (C) 2010,2011 Igor M. Liplianin <liplianin@netup.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/unaligned.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <misc/altera.h>
+#include "altera-exprt.h"
+#include "altera-jtag.h"
+
+static int debug = 1;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "enable debugging information");
+
+MODULE_DESCRIPTION("altera FPGA kernel module");
+MODULE_AUTHOR("Igor M. Liplianin  <liplianin@netup.ru>");
+MODULE_LICENSE("GPL");
+
+#define dprintk(args...) \
+	if (debug) { \
+		printk(KERN_DEBUG args); \
+	}
+
+enum altera_fpga_opcode {
+	OP_NOP = 0,
+	OP_DUP,
+	OP_SWP,
+	OP_ADD,
+	OP_SUB,
+	OP_MULT,
+	OP_DIV,
+	OP_MOD,
+	OP_SHL,
+	OP_SHR,
+	OP_NOT,
+	OP_AND,
+	OP_OR,
+	OP_XOR,
+	OP_INV,
+	OP_GT,
+	OP_LT,
+	OP_RET,
+	OP_CMPS,
+	OP_PINT,
+	OP_PRNT,
+	OP_DSS,
+	OP_DSSC,
+	OP_ISS,
+	OP_ISSC,
+	OP_DPR = 0x1c,
+	OP_DPRL,
+	OP_DPO,
+	OP_DPOL,
+	OP_IPR,
+	OP_IPRL,
+	OP_IPO,
+	OP_IPOL,
+	OP_PCHR,
+	OP_EXIT,
+	OP_EQU,
+	OP_POPT,
+	OP_ABS = 0x2c,
+	OP_BCH0,
+	OP_PSH0 = 0x2f,
+	OP_PSHL = 0x40,
+	OP_PSHV,
+	OP_JMP,
+	OP_CALL,
+	OP_NEXT,
+	OP_PSTR,
+	OP_SINT = 0x47,
+	OP_ST,
+	OP_ISTP,
+	OP_DSTP,
+	OP_SWPN,
+	OP_DUPN,
+	OP_POPV,
+	OP_POPE,
+	OP_POPA,
+	OP_JMPZ,
+	OP_DS,
+	OP_IS,
+	OP_DPRA,
+	OP_DPOA,
+	OP_IPRA,
+	OP_IPOA,
+	OP_EXPT,
+	OP_PSHE,
+	OP_PSHA,
+	OP_DYNA,
+	OP_EXPV = 0x5c,
+	OP_COPY = 0x80,
+	OP_REVA,
+	OP_DSC,
+	OP_ISC,
+	OP_WAIT,
+	OP_VS,
+	OP_CMPA = 0xc0,
+	OP_VSC,
+};
+
+struct altera_procinfo {
+	char			*name;
+	u8			attrs;
+	struct altera_procinfo	*next;
+};
+
+/* This function checks if enough parameters are available on the stack. */
+static int altera_check_stack(int stack_ptr, int count, int *status)
+{
+	if (stack_ptr < count) {
+		*status = -EOVERFLOW;
+		return 0;
+	}
+
+	return 1;
+}
+
+static void altera_export_int(char *key, s32 value)
+{
+	dprintk("Export: key = \"%s\", value = %d\n", key, value);
+}
+
+#define HEX_LINE_CHARS 72
+#define HEX_LINE_BITS (HEX_LINE_CHARS * 4)
+
+static void altera_export_bool_array(char *key, u8 *data, s32 count)
+{
+	char string[HEX_LINE_CHARS + 1];
+	s32 i, offset;
+	u32 size, line, lines, linebits, value, j, k;
+
+	if (count > HEX_LINE_BITS) {
+		dprintk("Export: key = \"%s\", %d bits, value = HEX\n",
+							key, count);
+		lines = (count + (HEX_LINE_BITS - 1)) / HEX_LINE_BITS;
+
+		for (line = 0; line < lines; ++line) {
+			if (line < (lines - 1)) {
+				linebits = HEX_LINE_BITS;
+				size = HEX_LINE_CHARS;
+				offset = count - ((line + 1) * HEX_LINE_BITS);
+			} else {
+				linebits =
+					count - ((lines - 1) * HEX_LINE_BITS);
+				size = (linebits + 3) / 4;
+				offset = 0L;
+			}
+
+			string[size] = '\0';
+			j = size - 1;
+			value = 0;
+
+			for (k = 0; k < linebits; ++k) {
+				i = k + offset;
+				if (data[i >> 3] & (1 << (i & 7)))
+					value |= (1 << (i & 3));
+				if ((i & 3) == 3) {
+					sprintf(&string[j], "%1x", value);
+					value = 0;
+					--j;
+				}
+			}
+			if ((k & 3) > 0)
+				sprintf(&string[j], "%1x", value);
+
+			dprintk("%s\n", string);
+		}
+
+	} else {
+		size = (count + 3) / 4;
+		string[size] = '\0';
+		j = size - 1;
+		value = 0;
+
+		for (i = 0; i < count; ++i) {
+			if (data[i >> 3] & (1 << (i & 7)))
+				value |= (1 << (i & 3));
+			if ((i & 3) == 3) {
+				sprintf(&string[j], "%1x", value);
+				value = 0;
+				--j;
+			}
+		}
+		if ((i & 3) > 0)
+			sprintf(&string[j], "%1x", value);
+
+		dprintk("Export: key = \"%s\", %d bits, value = HEX %s\n",
+			key, count, string);
+	}
+}
+
+static int altera_execute(struct altera_state *astate,
+				u8 *p,
+				s32 program_size,
+				s32 *error_address,
+				int *exit_code,
+				int *format_version)
+{
+	struct altera_config *aconf = astate->config;
+	char *msg_buff = astate->msg_buff;
+	long *stack = astate->stack;
+	int status = 0;
+	u32 first_word = 0L;
+	u32 action_table = 0L;
+	u32 proc_table = 0L;
+	u32 str_table = 0L;
+	u32 sym_table = 0L;
+	u32 data_sect = 0L;
+	u32 code_sect = 0L;
+	u32 debug_sect = 0L;
+	u32 action_count = 0L;
+	u32 proc_count = 0L;
+	u32 sym_count = 0L;
+	long *vars = NULL;
+	s32 *var_size = NULL;
+	char *attrs = NULL;
+	u8 *proc_attributes = NULL;
+	u32 pc;
+	u32 opcode_address;
+	u32 args[3];
+	u32 opcode;
+	u32 name_id;
+	u8 charbuf[4];
+	long long_tmp;
+	u32 variable_id;
+	u8 *charptr_tmp;
+	u8 *charptr_tmp2;
+	long *longptr_tmp;
+	int version = 0;
+	int delta = 0;
+	int stack_ptr = 0;
+	u32 arg_count;
+	int done = 0;
+	int bad_opcode = 0;
+	u32 count;
+	u32 index;
+	u32 index2;
+	s32 long_count;
+	s32 long_idx;
+	s32 long_idx2;
+	u32 i;
+	u32 j;
+	u32 uncomp_size;
+	u32 offset;
+	u32 value;
+	int current_proc = 0;
+	int reverse;
+
+	char *name;
+
+	dprintk("%s\n", __func__);
+
+	/* Read header information */
+	if (program_size > 52L) {
+		first_word    = get_unaligned_be32(&p[0]);
+		version = (first_word & 1L);
+		*format_version = version + 1;
+		delta = version * 8;
+
+		action_table  = get_unaligned_be32(&p[4]);
+		proc_table    = get_unaligned_be32(&p[8]);
+		str_table  = get_unaligned_be32(&p[4 + delta]);
+		sym_table  = get_unaligned_be32(&p[16 + delta]);
+		data_sect  = get_unaligned_be32(&p[20 + delta]);
+		code_sect  = get_unaligned_be32(&p[24 + delta]);
+		debug_sect = get_unaligned_be32(&p[28 + delta]);
+		action_count  = get_unaligned_be32(&p[40 + delta]);
+		proc_count    = get_unaligned_be32(&p[44 + delta]);
+		sym_count  = get_unaligned_be32(&p[48 + (2 * delta)]);
+	}
+
+	if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L)) {
+		done = 1;
+		status = -EIO;
+		goto exit_done;
+	}
+
+	if (sym_count <= 0)
+		goto exit_done;
+
+	vars = kcalloc(sym_count, sizeof(long), GFP_KERNEL);
+
+	if (vars == NULL)
+		status = -ENOMEM;
+
+	if (status == 0) {
+		var_size = kcalloc(sym_count, sizeof(s32), GFP_KERNEL);
+
+		if (var_size == NULL)
+			status = -ENOMEM;
+	}
+
+	if (status == 0) {
+		attrs = kzalloc(sym_count, GFP_KERNEL);
+
+		if (attrs == NULL)
+			status = -ENOMEM;
+	}
+
+	if ((status == 0) && (version > 0)) {
+		proc_attributes = kzalloc(proc_count, GFP_KERNEL);
+
+		if (proc_attributes == NULL)
+			status = -ENOMEM;
+	}
+
+	if (status != 0)
+		goto exit_done;
+
+	delta = version * 2;
+
+	for (i = 0; i < sym_count; ++i) {
+		offset = (sym_table + ((11 + delta) * i));
+
+		value = get_unaligned_be32(&p[offset + 3 + delta]);
+
+		attrs[i] = p[offset];
+
+		/*
+		 * use bit 7 of attribute byte to indicate that
+		 * this buffer was dynamically allocated
+		 * and should be freed later
+		 */
+		attrs[i] &= 0x7f;
+
+		var_size[i] = get_unaligned_be32(&p[offset + 7 + delta]);
+
+		/*
+		 * Attribute bits:
+		 * bit 0: 0 = read-only, 1 = read-write
+		 * bit 1: 0 = not compressed, 1 = compressed
+		 * bit 2: 0 = not initialized, 1 = initialized
+		 * bit 3: 0 = scalar, 1 = array
+		 * bit 4: 0 = Boolean, 1 = integer
+		 * bit 5: 0 = declared variable,
+		 *	1 = compiler created temporary variable
+		 */
+
+		if ((attrs[i] & 0x0c) == 0x04)
+			/* initialized scalar variable */
+			vars[i] = value;
+		else if ((attrs[i] & 0x1e) == 0x0e) {
+			/* initialized compressed Boolean array */
+			uncomp_size = get_unaligned_le32(&p[data_sect + value]);
+
+			/* allocate a buffer for the uncompressed data */
+			vars[i] = (long)kzalloc(uncomp_size, GFP_KERNEL);
+			if (vars[i] == 0L)
+				status = -ENOMEM;
+			else {
+				/* set flag so buffer will be freed later */
+				attrs[i] |= 0x80;
+
+				/* uncompress the data */
+				if (altera_shrink(&p[data_sect + value],
+						var_size[i],
+						(u8 *)vars[i],
+						uncomp_size,
+						version) != uncomp_size)
+					/* decompression failed */
+					status = -EIO;
+				else
+					var_size[i] = uncomp_size * 8L;
+
+			}
+		} else if ((attrs[i] & 0x1e) == 0x0c) {
+			/* initialized Boolean array */
+			vars[i] = value + data_sect + (long)p;
+		} else if ((attrs[i] & 0x1c) == 0x1c) {
+			/* initialized integer array */
+			vars[i] = value + data_sect;
+		} else if ((attrs[i] & 0x0c) == 0x08) {
+			/* uninitialized array */
+
+			/* flag attrs so that memory is freed */
+			attrs[i] |= 0x80;
+
+			if (var_size[i] > 0) {
+				u32 size;
+
+				if (attrs[i] & 0x10)
+					/* integer array */
+					size = (var_size[i] * sizeof(s32));
+				else
+					/* Boolean array */
+					size = ((var_size[i] + 7L) / 8L);
+
+				vars[i] = (long)kzalloc(size, GFP_KERNEL);
+
+				if (vars[i] == 0) {
+					status = -ENOMEM;
+				} else {
+					/* zero out memory */
+					for (j = 0; j < size; ++j)
+						((u8 *)(vars[i]))[j] = 0;
+
+				}
+			} else
+				vars[i] = 0;
+
+		} else
+			vars[i] = 0;
+
+	}
+
+exit_done:
+	if (status != 0)
+		done = 1;
+
+	altera_jinit(astate);
+
+	pc = code_sect;
+	msg_buff[0] = '\0';
+
+	/*
+	 * For JBC version 2, we will execute the procedures corresponding to
+	 * the selected ACTION
+	 */
+	if (version > 0) {
+		if (aconf->action == NULL) {
+			status = -EINVAL;
+			done = 1;
+		} else {
+			int action_found = 0;
+			for (i = 0; (i < action_count) && !action_found; ++i) {
+				name_id = get_unaligned_be32(&p[action_table +
+								(12 * i)]);
+
+				name = &p[str_table + name_id];
+
+				if (strncasecmp(aconf->action, name, strlen(name)) == 0) {
+					action_found = 1;
+					current_proc =
+						get_unaligned_be32(&p[action_table +
+								(12 * i) + 8]);
+				}
+			}
+
+			if (!action_found) {
+				status = -EINVAL;
+				done = 1;
+			}
+		}
+
+		if (status == 0) {
+			int first_time = 1;
+			i = current_proc;
+			while ((i != 0) || first_time) {
+				first_time = 0;
+				/* check procedure attribute byte */
+				proc_attributes[i] =
+						(p[proc_table +
+								(13 * i) + 8] &
+									0x03);
+
+				/*
+				 * BIT0 - OPTIONAL
+				 * BIT1 - RECOMMENDED
+				 * BIT6 - FORCED OFF
+				 * BIT7 - FORCED ON
+				 */
+
+				i = get_unaligned_be32(&p[proc_table +
+							(13 * i) + 4]);
+			}
+
+			/*
+			 * Set current_proc to the first procedure
+			 * to be executed
+			 */
+			i = current_proc;
+			while ((i != 0) &&
+				((proc_attributes[i] == 1) ||
+				((proc_attributes[i] & 0xc0) == 0x40))) {
+				i = get_unaligned_be32(&p[proc_table +
+							(13 * i) + 4]);
+			}
+
+			if ((i != 0) || ((i == 0) && (current_proc == 0) &&
+				((proc_attributes[0] != 1) &&
+				((proc_attributes[0] & 0xc0) != 0x40)))) {
+				current_proc = i;
+				pc = code_sect +
+					get_unaligned_be32(&p[proc_table +
+								(13 * i) + 9]);
+				if ((pc < code_sect) || (pc >= debug_sect))
+					status = -ERANGE;
+			} else
+				/* there are no procedures to execute! */
+				done = 1;
+
+		}
+	}
+
+	msg_buff[0] = '\0';
+
+	while (!done) {
+		opcode = (p[pc] & 0xff);
+		opcode_address = pc;
+		++pc;
+
+		if (debug > 1)
+			printk("opcode: %02x\n", opcode);
+
+		arg_count = (opcode >> 6) & 3;
+		for (i = 0; i < arg_count; ++i) {
+			args[i] = get_unaligned_be32(&p[pc]);
+			pc += 4;
+		}
+
+		switch (opcode) {
+		case OP_NOP:
+			break;
+		case OP_DUP:
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				stack[stack_ptr] = stack[stack_ptr - 1];
+				++stack_ptr;
+			}
+			break;
+		case OP_SWP:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				long_tmp = stack[stack_ptr - 2];
+				stack[stack_ptr - 2] = stack[stack_ptr - 1];
+				stack[stack_ptr - 1] = long_tmp;
+			}
+			break;
+		case OP_ADD:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] += stack[stack_ptr];
+			}
+			break;
+		case OP_SUB:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] -= stack[stack_ptr];
+			}
+			break;
+		case OP_MULT:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] *= stack[stack_ptr];
+			}
+			break;
+		case OP_DIV:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] /= stack[stack_ptr];
+			}
+			break;
+		case OP_MOD:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] %= stack[stack_ptr];
+			}
+			break;
+		case OP_SHL:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] <<= stack[stack_ptr];
+			}
+			break;
+		case OP_SHR:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] >>= stack[stack_ptr];
+			}
+			break;
+		case OP_NOT:
+			if (altera_check_stack(stack_ptr, 1, &status))
+				stack[stack_ptr - 1] ^= (-1L);
+
+			break;
+		case OP_AND:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] &= stack[stack_ptr];
+			}
+			break;
+		case OP_OR:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] |= stack[stack_ptr];
+			}
+			break;
+		case OP_XOR:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] ^= stack[stack_ptr];
+			}
+			break;
+		case OP_INV:
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			stack[stack_ptr - 1] = stack[stack_ptr - 1] ? 0L : 1L;
+			break;
+		case OP_GT:
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			--stack_ptr;
+			stack[stack_ptr - 1] =
+				(stack[stack_ptr - 1] > stack[stack_ptr]) ?
+									1L : 0L;
+
+			break;
+		case OP_LT:
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			--stack_ptr;
+			stack[stack_ptr - 1] =
+				(stack[stack_ptr - 1] < stack[stack_ptr]) ?
+									1L : 0L;
+
+			break;
+		case OP_RET:
+			if ((version > 0) && (stack_ptr == 0)) {
+				/*
+				 * We completed one of the main procedures
+				 * of an ACTION.
+				 * Find the next procedure
+				 * to be executed and jump to it.
+				 * If there are no more procedures, then EXIT.
+				 */
+				i = get_unaligned_be32(&p[proc_table +
+						(13 * current_proc) + 4]);
+				while ((i != 0) &&
+					((proc_attributes[i] == 1) ||
+					((proc_attributes[i] & 0xc0) == 0x40)))
+					i = get_unaligned_be32(&p[proc_table +
+								(13 * i) + 4]);
+
+				if (i == 0) {
+					/* no procedures to execute! */
+					done = 1;
+					*exit_code = 0;	/* success */
+				} else {
+					current_proc = i;
+					pc = code_sect + get_unaligned_be32(
+								&p[proc_table +
+								(13 * i) + 9]);
+					if ((pc < code_sect) ||
+					    (pc >= debug_sect))
+						status = -ERANGE;
+				}
+
+			} else
+				if (altera_check_stack(stack_ptr, 1, &status)) {
+					pc = stack[--stack_ptr] + code_sect;
+					if ((pc <= code_sect) ||
+					    (pc >= debug_sect))
+						status = -ERANGE;
+
+				}
+
+			break;
+		case OP_CMPS:
+			/*
+			 * Array short compare
+			 * ...stack 0 is source 1 value
+			 * ...stack 1 is source 2 value
+			 * ...stack 2 is mask value
+			 * ...stack 3 is count
+			 */
+			if (altera_check_stack(stack_ptr, 4, &status)) {
+				s32 a = stack[--stack_ptr];
+				s32 b = stack[--stack_ptr];
+				long_tmp = stack[--stack_ptr];
+				count = stack[stack_ptr - 1];
+
+				if ((count < 1) || (count > 32))
+					status = -ERANGE;
+				else {
+					long_tmp &= ((-1L) >> (32 - count));
+
+					stack[stack_ptr - 1] =
+					((a & long_tmp) == (b & long_tmp))
+								? 1L : 0L;
+				}
+			}
+			break;
+		case OP_PINT:
+			/*
+			 * PRINT add integer
+			 * ...stack 0 is integer value
+			 */
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			sprintf(&msg_buff[strlen(msg_buff)],
+					"%ld", stack[--stack_ptr]);
+			break;
+		case OP_PRNT:
+			/* PRINT finish */
+			if (debug)
+				printk(msg_buff, "\n");
+
+			msg_buff[0] = '\0';
+			break;
+		case OP_DSS:
+			/*
+			 * DRSCAN short
+			 * ...stack 0 is scan data
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_tmp = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_drscan(astate, count, charbuf, 0);
+			break;
+		case OP_DSSC:
+			/*
+			 * DRSCAN short with capture
+			 * ...stack 0 is scan data
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_tmp = stack[--stack_ptr];
+			count = stack[stack_ptr - 1];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_swap_dr(astate, count, charbuf,
+							0, charbuf, 0);
+			stack[stack_ptr - 1] = get_unaligned_le32(&charbuf[0]);
+			break;
+		case OP_ISS:
+			/*
+			 * IRSCAN short
+			 * ...stack 0 is scan data
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_tmp = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_irscan(astate, count, charbuf, 0);
+			break;
+		case OP_ISSC:
+			/*
+			 * IRSCAN short with capture
+			 * ...stack 0 is scan data
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_tmp = stack[--stack_ptr];
+			count = stack[stack_ptr - 1];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_swap_ir(astate, count, charbuf,
+							0, charbuf, 0);
+			stack[stack_ptr - 1] = get_unaligned_le32(&charbuf[0]);
+			break;
+		case OP_DPR:
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			count = stack[--stack_ptr];
+			status = altera_set_dr_pre(&astate->js, count, 0, NULL);
+			break;
+		case OP_DPRL:
+			/*
+			 * DRPRE with literal data
+			 * ...stack 0 is count
+			 * ...stack 1 is literal data
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			count = stack[--stack_ptr];
+			long_tmp = stack[--stack_ptr];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_set_dr_pre(&astate->js, count, 0,
+						charbuf);
+			break;
+		case OP_DPO:
+			/*
+			 * DRPOST
+			 * ...stack 0 is count
+			 */
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				count = stack[--stack_ptr];
+				status = altera_set_dr_post(&astate->js, count,
+								0, NULL);
+			}
+			break;
+		case OP_DPOL:
+			/*
+			 * DRPOST with literal data
+			 * ...stack 0 is count
+			 * ...stack 1 is literal data
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			count = stack[--stack_ptr];
+			long_tmp = stack[--stack_ptr];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_set_dr_post(&astate->js, count, 0,
+							charbuf);
+			break;
+		case OP_IPR:
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				count = stack[--stack_ptr];
+				status = altera_set_ir_pre(&astate->js, count,
+								0, NULL);
+			}
+			break;
+		case OP_IPRL:
+			/*
+			 * IRPRE with literal data
+			 * ...stack 0 is count
+			 * ...stack 1 is literal data
+			 */
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				count = stack[--stack_ptr];
+				long_tmp = stack[--stack_ptr];
+				put_unaligned_le32(long_tmp, &charbuf[0]);
+				status = altera_set_ir_pre(&astate->js, count,
+							0, charbuf);
+			}
+			break;
+		case OP_IPO:
+			/*
+			 * IRPOST
+			 * ...stack 0 is count
+			 */
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				count = stack[--stack_ptr];
+				status = altera_set_ir_post(&astate->js, count,
+							0, NULL);
+			}
+			break;
+		case OP_IPOL:
+			/*
+			 * IRPOST with literal data
+			 * ...stack 0 is count
+			 * ...stack 1 is literal data
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			count = stack[--stack_ptr];
+			long_tmp = stack[--stack_ptr];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_set_ir_post(&astate->js, count, 0,
+							charbuf);
+			break;
+		case OP_PCHR:
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				u8 ch;
+				count = strlen(msg_buff);
+				ch = (char) stack[--stack_ptr];
+				if ((ch < 1) || (ch > 127)) {
+					/*
+					 * character code out of range
+					 * instead of flagging an error,
+					 * force the value to 127
+					 */
+					ch = 127;
+				}
+				msg_buff[count] = ch;
+				msg_buff[count + 1] = '\0';
+			}
+			break;
+		case OP_EXIT:
+			if (altera_check_stack(stack_ptr, 1, &status))
+				*exit_code = stack[--stack_ptr];
+
+			done = 1;
+			break;
+		case OP_EQU:
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			--stack_ptr;
+			stack[stack_ptr - 1] =
+				(stack[stack_ptr - 1] == stack[stack_ptr]) ?
+									1L : 0L;
+			break;
+		case OP_POPT:
+			if (altera_check_stack(stack_ptr, 1, &status))
+				--stack_ptr;
+
+			break;
+		case OP_ABS:
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			if (stack[stack_ptr - 1] < 0)
+				stack[stack_ptr - 1] = 0 - stack[stack_ptr - 1];
+
+			break;
+		case OP_BCH0:
+			/*
+			 * Batch operation 0
+			 * SWP
+			 * SWPN 7
+			 * SWP
+			 * SWPN 6
+			 * DUPN 8
+			 * SWPN 2
+			 * SWP
+			 * DUPN 6
+			 * DUPN 6
+			 */
+
+			/* SWP  */
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				long_tmp = stack[stack_ptr - 2];
+				stack[stack_ptr - 2] = stack[stack_ptr - 1];
+				stack[stack_ptr - 1] = long_tmp;
+			}
+
+			/* SWPN 7 */
+			index = 7 + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				long_tmp = stack[stack_ptr - index];
+				stack[stack_ptr - index] = stack[stack_ptr - 1];
+				stack[stack_ptr - 1] = long_tmp;
+			}
+
+			/* SWP  */
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				long_tmp = stack[stack_ptr - 2];
+				stack[stack_ptr - 2] = stack[stack_ptr - 1];
+				stack[stack_ptr - 1] = long_tmp;
+			}
+
+			/* SWPN 6 */
+			index = 6 + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				long_tmp = stack[stack_ptr - index];
+				stack[stack_ptr - index] = stack[stack_ptr - 1];
+				stack[stack_ptr - 1] = long_tmp;
+			}
+
+			/* DUPN 8 */
+			index = 8 + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				stack[stack_ptr] = stack[stack_ptr - index];
+				++stack_ptr;
+			}
+
+			/* SWPN 2 */
+			index = 2 + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				long_tmp = stack[stack_ptr - index];
+				stack[stack_ptr - index] = stack[stack_ptr - 1];
+				stack[stack_ptr - 1] = long_tmp;
+			}
+
+			/* SWP  */
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				long_tmp = stack[stack_ptr - 2];
+				stack[stack_ptr - 2] = stack[stack_ptr - 1];
+				stack[stack_ptr - 1] = long_tmp;
+			}
+
+			/* DUPN 6 */
+			index = 6 + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				stack[stack_ptr] = stack[stack_ptr - index];
+				++stack_ptr;
+			}
+
+			/* DUPN 6 */
+			index = 6 + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				stack[stack_ptr] = stack[stack_ptr - index];
+				++stack_ptr;
+			}
+			break;
+		case OP_PSH0:
+			stack[stack_ptr++] = 0;
+			break;
+		case OP_PSHL:
+			stack[stack_ptr++] = (s32) args[0];
+			break;
+		case OP_PSHV:
+			stack[stack_ptr++] = vars[args[0]];
+			break;
+		case OP_JMP:
+			pc = args[0] + code_sect;
+			if ((pc < code_sect) || (pc >= debug_sect))
+				status = -ERANGE;
+			break;
+		case OP_CALL:
+			stack[stack_ptr++] = pc;
+			pc = args[0] + code_sect;
+			if ((pc < code_sect) || (pc >= debug_sect))
+				status = -ERANGE;
+			break;
+		case OP_NEXT:
+			/*
+			 * Process FOR / NEXT loop
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is step value
+			 * ...stack 1 is end value
+			 * ...stack 2 is top address
+			 */
+			if (altera_check_stack(stack_ptr, 3, &status)) {
+				s32 step = stack[stack_ptr - 1];
+				s32 end = stack[stack_ptr - 2];
+				s32 top = stack[stack_ptr - 3];
+				s32 iterator = vars[args[0]];
+				int break_out = 0;
+
+				if (step < 0) {
+					if (iterator <= end)
+						break_out = 1;
+				} else if (iterator >= end)
+					break_out = 1;
+
+				if (break_out) {
+					stack_ptr -= 3;
+				} else {
+					vars[args[0]] = iterator + step;
+					pc = top + code_sect;
+					if ((pc < code_sect) ||
+					    (pc >= debug_sect))
+						status = -ERANGE;
+				}
+			}
+			break;
+		case OP_PSTR:
+			/*
+			 * PRINT add string
+			 * ...argument 0 is string ID
+			 */
+			count = strlen(msg_buff);
+			strlcpy(&msg_buff[count],
+				&p[str_table + args[0]],
+				ALTERA_MESSAGE_LENGTH - count);
+			break;
+		case OP_SINT:
+			/*
+			 * STATE intermediate state
+			 * ...argument 0 is state code
+			 */
+			status = altera_goto_jstate(astate, args[0]);
+			break;
+		case OP_ST:
+			/*
+			 * STATE final state
+			 * ...argument 0 is state code
+			 */
+			status = altera_goto_jstate(astate, args[0]);
+			break;
+		case OP_ISTP:
+			/*
+			 * IRSTOP state
+			 * ...argument 0 is state code
+			 */
+			status = altera_set_irstop(&astate->js, args[0]);
+			break;
+		case OP_DSTP:
+			/*
+			 * DRSTOP state
+			 * ...argument 0 is state code
+			 */
+			status = altera_set_drstop(&astate->js, args[0]);
+			break;
+
+		case OP_SWPN:
+			/*
+			 * Exchange top with Nth stack value
+			 * ...argument 0 is 0-based stack entry
+			 * to swap with top element
+			 */
+			index = (args[0]) + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				long_tmp = stack[stack_ptr - index];
+				stack[stack_ptr - index] = stack[stack_ptr - 1];
+				stack[stack_ptr - 1] = long_tmp;
+			}
+			break;
+		case OP_DUPN:
+			/*
+			 * Duplicate Nth stack value
+			 * ...argument 0 is 0-based stack entry to duplicate
+			 */
+			index = (args[0]) + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				stack[stack_ptr] = stack[stack_ptr - index];
+				++stack_ptr;
+			}
+			break;
+		case OP_POPV:
+			/*
+			 * Pop stack into scalar variable
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is value
+			 */
+			if (altera_check_stack(stack_ptr, 1, &status))
+				vars[args[0]] = stack[--stack_ptr];
+
+			break;
+		case OP_POPE:
+			/*
+			 * Pop stack into integer array element
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is value
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			variable_id = args[0];
+
+			/*
+			 * If variable is read-only,
+			 * convert to writable array
+			 */
+			if ((version > 0) &&
+				((attrs[variable_id] & 0x9c) == 0x1c)) {
+				/* Allocate a writable buffer for this array */
+				count = var_size[variable_id];
+				long_tmp = vars[variable_id];
+				longptr_tmp = kcalloc(count, sizeof(long),
+								GFP_KERNEL);
+				vars[variable_id] = (long)longptr_tmp;
+
+				if (vars[variable_id] == 0) {
+					status = -ENOMEM;
+					break;
+				}
+
+				/* copy previous contents into buffer */
+				for (i = 0; i < count; ++i) {
+					longptr_tmp[i] =
+						get_unaligned_be32(&p[long_tmp]);
+					long_tmp += sizeof(long);
+				}
+
+				/*
+				 * set bit 7 - buffer was
+				 * dynamically allocated
+				 */
+				attrs[variable_id] |= 0x80;
+
+				/* clear bit 2 - variable is writable */
+				attrs[variable_id] &= ~0x04;
+				attrs[variable_id] |= 0x01;
+
+			}
+
+			/* check that variable is a writable integer array */
+			if ((attrs[variable_id] & 0x1c) != 0x18)
+				status = -ERANGE;
+			else {
+				longptr_tmp = (long *)vars[variable_id];
+
+				/* pop the array index */
+				index = stack[--stack_ptr];
+
+				/* pop the value and store it into the array */
+				longptr_tmp[index] = stack[--stack_ptr];
+			}
+
+			break;
+		case OP_POPA:
+			/*
+			 * Pop stack into Boolean array
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is count
+			 * ...stack 1 is array index
+			 * ...stack 2 is value
+			 */
+			if (!altera_check_stack(stack_ptr, 3, &status))
+				break;
+			variable_id = args[0];
+
+			/*
+			 * If variable is read-only,
+			 * convert to writable array
+			 */
+			if ((version > 0) &&
+				((attrs[variable_id] & 0x9c) == 0x0c)) {
+				/* Allocate a writable buffer for this array */
+				long_tmp =
+					(var_size[variable_id] + 7L) >> 3L;
+				charptr_tmp2 = (u8 *)vars[variable_id];
+				charptr_tmp =
+					kzalloc(long_tmp, GFP_KERNEL);
+				vars[variable_id] = (long)charptr_tmp;
+
+				if (vars[variable_id] == 0) {
+					status = -ENOMEM;
+					break;
+				}
+
+				/* zero the buffer */
+				for (long_idx = 0L;
+					long_idx < long_tmp;
+					++long_idx) {
+					charptr_tmp[long_idx] = 0;
+				}
+
+				/* copy previous contents into buffer */
+				for (long_idx = 0L;
+					long_idx < var_size[variable_id];
+					++long_idx) {
+					long_idx2 = long_idx;
+
+					if (charptr_tmp2[long_idx2 >> 3] &
+						(1 << (long_idx2 & 7))) {
+						charptr_tmp[long_idx >> 3] |=
+							(1 << (long_idx & 7));
+					}
+				}
+
+				/*
+				 * set bit 7 - buffer was
+				 * dynamically allocated
+				 */
+				attrs[variable_id] |= 0x80;
+
+				/* clear bit 2 - variable is writable */
+				attrs[variable_id] &= ~0x04;
+				attrs[variable_id] |= 0x01;
+
+			}
+
+			/*
+			 * check that variable is
+			 * a writable Boolean array
+			 */
+			if ((attrs[variable_id] & 0x1c) != 0x08) {
+				status = -ERANGE;
+				break;
+			}
+
+			charptr_tmp = (u8 *)vars[variable_id];
+
+			/* pop the count (number of bits to copy) */
+			long_count = stack[--stack_ptr];
+
+			/* pop the array index */
+			long_idx = stack[--stack_ptr];
+
+			reverse = 0;
+
+			if (version > 0) {
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+
+				if (long_idx > long_count) {
+					reverse = 1;
+					long_tmp = long_count;
+					long_count = 1 + long_idx -
+								long_count;
+					long_idx = long_tmp;
+
+					/* reverse POPA is not supported */
+					status = -ERANGE;
+					break;
+				} else
+					long_count = 1 + long_count -
+								long_idx;
+
+			}
+
+			/* pop the data */
+			long_tmp = stack[--stack_ptr];
+
+			if (long_count < 1) {
+				status = -ERANGE;
+				break;
+			}
+
+			for (i = 0; i < long_count; ++i) {
+				if (long_tmp & (1L << (s32) i))
+					charptr_tmp[long_idx >> 3L] |=
+						(1L << (long_idx & 7L));
+				else
+					charptr_tmp[long_idx >> 3L] &=
+						~(1L << (long_idx & 7L));
+
+				++long_idx;
+			}
+
+			break;
+		case OP_JMPZ:
+			/*
+			 * Pop stack and branch if zero
+			 * ...argument 0 is address
+			 * ...stack 0 is condition value
+			 */
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				if (stack[--stack_ptr] == 0) {
+					pc = args[0] + code_sect;
+					if ((pc < code_sect) ||
+					    (pc >= debug_sect))
+						status = -ERANGE;
+				}
+			}
+			break;
+		case OP_DS:
+		case OP_IS:
+			/*
+			 * DRSCAN
+			 * IRSCAN
+			 * ...argument 0 is scan data variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_idx = stack[--stack_ptr];
+			long_count = stack[--stack_ptr];
+			reverse = 0;
+			if (version > 0) {
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 * stack 2 = count
+				 */
+				long_tmp = long_count;
+				long_count = stack[--stack_ptr];
+
+				if (long_idx > long_tmp) {
+					reverse = 1;
+					long_idx = long_tmp;
+				}
+			}
+
+			charptr_tmp = (u8 *)vars[args[0]];
+
+			if (reverse) {
+				/*
+				 * allocate a buffer
+				 * and reverse the data order
+				 */
+				charptr_tmp2 = charptr_tmp;
+				charptr_tmp = kzalloc((long_count >> 3) + 1,
+								GFP_KERNEL);
+				if (charptr_tmp == NULL) {
+					status = -ENOMEM;
+					break;
+				}
+
+				long_tmp = long_idx + long_count - 1;
+				long_idx2 = 0;
+				while (long_idx2 < long_count) {
+					if (charptr_tmp2[long_tmp >> 3] &
+							(1 << (long_tmp & 7)))
+						charptr_tmp[long_idx2 >> 3] |=
+							(1 << (long_idx2 & 7));
+					else
+						charptr_tmp[long_idx2 >> 3] &=
+							~(1 << (long_idx2 & 7));
+
+					--long_tmp;
+					++long_idx2;
+				}
+			}
+
+			if (opcode == 0x51) /* DS */
+				status = altera_drscan(astate, long_count,
+						charptr_tmp, long_idx);
+			else /* IS */
+				status = altera_irscan(astate, long_count,
+						charptr_tmp, long_idx);
+
+			if (reverse)
+				kfree(charptr_tmp);
+
+			break;
+		case OP_DPRA:
+			/*
+			 * DRPRE with array data
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			index = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+
+			if (version > 0)
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+				count = 1 + count - index;
+
+			charptr_tmp = (u8 *)vars[args[0]];
+			status = altera_set_dr_pre(&astate->js, count, index,
+							charptr_tmp);
+			break;
+		case OP_DPOA:
+			/*
+			 * DRPOST with array data
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			index = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+
+			if (version > 0)
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+				count = 1 + count - index;
+
+			charptr_tmp = (u8 *)vars[args[0]];
+			status = altera_set_dr_post(&astate->js, count, index,
+							charptr_tmp);
+			break;
+		case OP_IPRA:
+			/*
+			 * IRPRE with array data
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			index = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+
+			if (version > 0)
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+				count = 1 + count - index;
+
+			charptr_tmp = (u8 *)vars[args[0]];
+			status = altera_set_ir_pre(&astate->js, count, index,
+							charptr_tmp);
+
+			break;
+		case OP_IPOA:
+			/*
+			 * IRPOST with array data
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			index = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+
+			if (version > 0)
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+				count = 1 + count - index;
+
+			charptr_tmp = (u8 *)vars[args[0]];
+			status = altera_set_ir_post(&astate->js, count, index,
+							charptr_tmp);
+
+			break;
+		case OP_EXPT:
+			/*
+			 * EXPORT
+			 * ...argument 0 is string ID
+			 * ...stack 0 is integer expression
+			 */
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				name = &p[str_table + args[0]];
+				long_tmp = stack[--stack_ptr];
+				altera_export_int(name, long_tmp);
+			}
+			break;
+		case OP_PSHE:
+			/*
+			 * Push integer array element
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 */
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			variable_id = args[0];
+			index = stack[stack_ptr - 1];
+
+			/* check variable type */
+			if ((attrs[variable_id] & 0x1f) == 0x19) {
+				/* writable integer array */
+				longptr_tmp = (long *)vars[variable_id];
+				stack[stack_ptr - 1] = longptr_tmp[index];
+			} else if ((attrs[variable_id] & 0x1f) == 0x1c) {
+				/* read-only integer array */
+				long_tmp = vars[variable_id] +
+						(index * sizeof(long));
+				stack[stack_ptr - 1] =
+					get_unaligned_be32(&p[long_tmp]);
+			} else
+				status = -ERANGE;
+
+			break;
+		case OP_PSHA:
+			/*
+			 * Push Boolean array
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is count
+			 * ...stack 1 is array index
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			variable_id = args[0];
+
+			/* check that variable is a Boolean array */
+			if ((attrs[variable_id] & 0x18) != 0x08) {
+				status = -ERANGE;
+				break;
+			}
+
+			charptr_tmp = (u8 *)vars[variable_id];
+
+			/* pop the count (number of bits to copy) */
+			count = stack[--stack_ptr];
+
+			/* pop the array index */
+			index = stack[stack_ptr - 1];
+
+			if (version > 0)
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+				count = 1 + count - index;
+
+			if ((count < 1) || (count > 32)) {
+				status = -ERANGE;
+				break;
+			}
+
+			long_tmp = 0L;
+
+			for (i = 0; i < count; ++i)
+				if (charptr_tmp[(i + index) >> 3] &
+						(1 << ((i + index) & 7)))
+					long_tmp |= (1L << i);
+
+			stack[stack_ptr - 1] = long_tmp;
+
+			break;
+		case OP_DYNA:
+			/*
+			 * Dynamically change size of array
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is new size
+			 */
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			variable_id = args[0];
+			long_tmp = stack[--stack_ptr];
+
+			if (long_tmp > var_size[variable_id]) {
+				var_size[variable_id] = long_tmp;
+
+				if (attrs[variable_id] & 0x10)
+					/* allocate integer array */
+					long_tmp *= sizeof(long);
+				else
+					/* allocate Boolean array */
+					long_tmp = (long_tmp + 7) >> 3;
+
+				/*
+				 * If the buffer was previously allocated,
+				 * free it
+				 */
+				if (attrs[variable_id] & 0x80) {
+					kfree((void *)vars[variable_id]);
+					vars[variable_id] = 0;
+				}
+
+				/*
+				 * Allocate a new buffer
+				 * of the requested size
+				 */
+				vars[variable_id] = (long)
+					kzalloc(long_tmp, GFP_KERNEL);
+
+				if (vars[variable_id] == 0) {
+					status = -ENOMEM;
+					break;
+				}
+
+				/*
+				 * Set the attribute bit to indicate that
+				 * this buffer was dynamically allocated and
+				 * should be freed later
+				 */
+				attrs[variable_id] |= 0x80;
+
+				/* zero out memory */
+				count = ((var_size[variable_id] + 7L) /
+									8L);
+				charptr_tmp = (u8 *)(vars[variable_id]);
+				for (index = 0; index < count; ++index)
+					charptr_tmp[index] = 0;
+
+			}
+
+			break;
+		case OP_EXPV:
+			/*
+			 * Export Boolean array
+			 * ...argument 0 is string ID
+			 * ...stack 0 is variable ID
+			 * ...stack 1 is array right index
+			 * ...stack 2 is array left index
+			 */
+			if (!altera_check_stack(stack_ptr, 3, &status))
+				break;
+			if (version == 0) {
+				/* EXPV is not supported in JBC 1.0 */
+				bad_opcode = 1;
+				break;
+			}
+			name = &p[str_table + args[0]];
+			variable_id = stack[--stack_ptr];
+			long_idx = stack[--stack_ptr];/* right indx */
+			long_idx2 = stack[--stack_ptr];/* left indx */
+
+			if (long_idx > long_idx2) {
+				/* reverse indices not supported */
+				status = -ERANGE;
+				break;
+			}
+
+			long_count = 1 + long_idx2 - long_idx;
+
+			charptr_tmp = (u8 *)vars[variable_id];
+			charptr_tmp2 = NULL;
+
+			if ((long_idx & 7L) != 0) {
+				s32 k = long_idx;
+				charptr_tmp2 =
+					kzalloc(((long_count + 7L) / 8L),
+							GFP_KERNEL);
+				if (charptr_tmp2 == NULL) {
+					status = -ENOMEM;
+					break;
+				}
+
+				for (i = 0; i < long_count; ++i) {
+					if (charptr_tmp[k >> 3] &
+							(1 << (k & 7)))
+						charptr_tmp2[i >> 3] |=
+								(1 << (i & 7));
+					else
+						charptr_tmp2[i >> 3] &=
+								~(1 << (i & 7));
+
+					++k;
+				}
+				charptr_tmp = charptr_tmp2;
+
+			} else if (long_idx != 0)
+				charptr_tmp = &charptr_tmp[long_idx >> 3];
+
+			altera_export_bool_array(name, charptr_tmp,
+							long_count);
+
+			/* free allocated buffer */
+			if ((long_idx & 7L) != 0)
+				kfree(charptr_tmp2);
+
+			break;
+		case OP_COPY: {
+			/*
+			 * Array copy
+			 * ...argument 0 is dest ID
+			 * ...argument 1 is source ID
+			 * ...stack 0 is count
+			 * ...stack 1 is dest index
+			 * ...stack 2 is source index
+			 */
+			s32 copy_count;
+			s32 copy_index;
+			s32 copy_index2;
+			s32 destleft;
+			s32 src_count;
+			s32 dest_count;
+			int src_reverse = 0;
+			int dest_reverse = 0;
+
+			if (!altera_check_stack(stack_ptr, 3, &status))
+				break;
+
+			copy_count = stack[--stack_ptr];
+			copy_index = stack[--stack_ptr];
+			copy_index2 = stack[--stack_ptr];
+			reverse = 0;
+
+			if (version > 0) {
+				/*
+				 * stack 0 = source right index
+				 * stack 1 = source left index
+				 * stack 2 = destination right index
+				 * stack 3 = destination left index
+				 */
+				destleft = stack[--stack_ptr];
+
+				if (copy_count > copy_index) {
+					src_reverse = 1;
+					reverse = 1;
+					src_count = 1 + copy_count - copy_index;
+					/* copy_index = source start index */
+				} else {
+					src_count = 1 + copy_index - copy_count;
+					/* source start index */
+					copy_index = copy_count;
+				}
+
+				if (copy_index2 > destleft) {
+					dest_reverse = 1;
+					reverse = !reverse;
+					dest_count = 1 + copy_index2 - destleft;
+					/* destination start index */
+					copy_index2 = destleft;
+				} else
+					dest_count = 1 + destleft - copy_index2;
+
+				copy_count = (src_count < dest_count) ?
+							src_count : dest_count;
+
+				if ((src_reverse || dest_reverse) &&
+					(src_count != dest_count))
+					/*
+					 * If either the source or destination
+					 * is reversed, we can't tolerate
+					 * a length mismatch, because we
+					 * "left justify" arrays when copying.
+					 * This won't work correctly
+					 * with reversed arrays.
+					 */
+					status = -ERANGE;
+
+			}
+
+			count = copy_count;
+			index = copy_index;
+			index2 = copy_index2;
+
+			/*
+			 * If destination is a read-only array,
+			 * allocate a buffer and convert it to a writable array
+			 */
+			variable_id = args[1];
+			if ((version > 0) &&
+				((attrs[variable_id] & 0x9c) == 0x0c)) {
+				/* Allocate a writable buffer for this array */
+				long_tmp =
+					(var_size[variable_id] + 7L) >> 3L;
+				charptr_tmp2 = (u8 *)vars[variable_id];
+				charptr_tmp =
+					kzalloc(long_tmp, GFP_KERNEL);
+				vars[variable_id] = (long)charptr_tmp;
+
+				if (vars[variable_id] == 0) {
+					status = -ENOMEM;
+					break;
+				}
+
+				/* zero the buffer */
+				for (long_idx = 0L; long_idx < long_tmp;
+								++long_idx)
+					charptr_tmp[long_idx] = 0;
+
+				/* copy previous contents into buffer */
+				for (long_idx = 0L;
+					long_idx < var_size[variable_id];
+								++long_idx) {
+					long_idx2 = long_idx;
+
+					if (charptr_tmp2[long_idx2 >> 3] &
+						(1 << (long_idx2 & 7)))
+						charptr_tmp[long_idx >> 3] |=
+							(1 << (long_idx & 7));
+
+				}
+
+				/*
+				set bit 7 - buffer was dynamically allocated */
+				attrs[variable_id] |= 0x80;
+
+				/* clear bit 2 - variable is writable */
+				attrs[variable_id] &= ~0x04;
+				attrs[variable_id] |= 0x01;
+			}
+
+			charptr_tmp = (u8 *)vars[args[1]];
+			charptr_tmp2 = (u8 *)vars[args[0]];
+
+			/* check if destination is a writable Boolean array */
+			if ((attrs[args[1]] & 0x1c) != 0x08) {
+				status = -ERANGE;
+				break;
+			}
+
+			if (count < 1) {
+				status = -ERANGE;
+				break;
+			}
+
+			if (reverse)
+				index2 += (count - 1);
+
+			for (i = 0; i < count; ++i) {
+				if (charptr_tmp2[index >> 3] &
+							(1 << (index & 7)))
+					charptr_tmp[index2 >> 3] |=
+							(1 << (index2 & 7));
+				else
+					charptr_tmp[index2 >> 3] &=
+						~(1 << (index2 & 7));
+
+				++index;
+				if (reverse)
+					--index2;
+				else
+					++index2;
+			}
+
+			break;
+		}
+		case OP_DSC:
+		case OP_ISC: {
+			/*
+			 * DRSCAN with capture
+			 * IRSCAN with capture
+			 * ...argument 0 is scan data variable ID
+			 * ...argument 1 is capture variable ID
+			 * ...stack 0 is capture index
+			 * ...stack 1 is scan data index
+			 * ...stack 2 is count
+			 */
+			s32 scan_right, scan_left;
+			s32 capture_count = 0;
+			s32 scan_count = 0;
+			s32 capture_index;
+			s32 scan_index;
+
+			if (!altera_check_stack(stack_ptr, 3, &status))
+				break;
+
+			capture_index = stack[--stack_ptr];
+			scan_index = stack[--stack_ptr];
+
+			if (version > 0) {
+				/*
+				 * stack 0 = capture right index
+				 * stack 1 = capture left index
+				 * stack 2 = scan right index
+				 * stack 3 = scan left index
+				 * stack 4 = count
+				 */
+				scan_right = stack[--stack_ptr];
+				scan_left = stack[--stack_ptr];
+				capture_count = 1 + scan_index - capture_index;
+				scan_count = 1 + scan_left - scan_right;
+				scan_index = scan_right;
+			}
+
+			long_count = stack[--stack_ptr];
+			/*
+			 * If capture array is read-only, allocate a buffer
+			 * and convert it to a writable array
+			 */
+			variable_id = args[1];
+			if ((version > 0) &&
+				((attrs[variable_id] & 0x9c) == 0x0c)) {
+				/* Allocate a writable buffer for this array */
+				long_tmp =
+					(var_size[variable_id] + 7L) >> 3L;
+				charptr_tmp2 = (u8 *)vars[variable_id];
+				charptr_tmp =
+					kzalloc(long_tmp, GFP_KERNEL);
+				vars[variable_id] = (long)charptr_tmp;
+
+				if (vars[variable_id] == 0) {
+					status = -ENOMEM;
+					break;
+				}
+
+				/* zero the buffer */
+				for (long_idx = 0L; long_idx < long_tmp;
+								++long_idx)
+					charptr_tmp[long_idx] = 0;
+
+				/* copy previous contents into buffer */
+				for (long_idx = 0L;
+					long_idx < var_size[variable_id];
+								++long_idx) {
+					long_idx2 = long_idx;
+
+					if (charptr_tmp2[long_idx2 >> 3] &
+						(1 << (long_idx2 & 7)))
+						charptr_tmp[long_idx >> 3] |=
+							(1 << (long_idx & 7));
+
+				}
+
+				/*
+				 * set bit 7 - buffer was
+				 * dynamically allocated
+				 */
+				attrs[variable_id] |= 0x80;
+
+				/* clear bit 2 - variable is writable */
+				attrs[variable_id] &= ~0x04;
+				attrs[variable_id] |= 0x01;
+
+			}
+
+			charptr_tmp = (u8 *)vars[args[0]];
+			charptr_tmp2 = (u8 *)vars[args[1]];
+
+			if ((version > 0) &&
+					((long_count > capture_count) ||
+					(long_count > scan_count))) {
+				status = -ERANGE;
+				break;
+			}
+
+			/*
+			 * check that capture array
+			 * is a writable Boolean array
+			 */
+			if ((attrs[args[1]] & 0x1c) != 0x08) {
+				status = -ERANGE;
+				break;
+			}
+
+			if (status == 0) {
+				if (opcode == 0x82) /* DSC */
+					status = altera_swap_dr(astate,
+							long_count,
+							charptr_tmp,
+							scan_index,
+							charptr_tmp2,
+							capture_index);
+				else /* ISC */
+					status = altera_swap_ir(astate,
+							long_count,
+							charptr_tmp,
+							scan_index,
+							charptr_tmp2,
+							capture_index);
+
+			}
+
+			break;
+		}
+		case OP_WAIT:
+			/*
+			 * WAIT
+			 * ...argument 0 is wait state
+			 * ...argument 1 is end state
+			 * ...stack 0 is cycles
+			 * ...stack 1 is microseconds
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_tmp = stack[--stack_ptr];
+
+			if (long_tmp != 0L)
+				status = altera_wait_cycles(astate, long_tmp,
+								args[0]);
+
+			long_tmp = stack[--stack_ptr];
+
+			if ((status == 0) && (long_tmp != 0L))
+				status = altera_wait_msecs(astate,
+								long_tmp,
+								args[0]);
+
+			if ((status == 0) && (args[1] != args[0]))
+				status = altera_goto_jstate(astate,
+								args[1]);
+
+			if (version > 0) {
+				--stack_ptr; /* throw away MAX cycles */
+				--stack_ptr; /* throw away MAX microseconds */
+			}
+			break;
+		case OP_CMPA: {
+			/*
+			 * Array compare
+			 * ...argument 0 is source 1 ID
+			 * ...argument 1 is source 2 ID
+			 * ...argument 2 is mask ID
+			 * ...stack 0 is source 1 index
+			 * ...stack 1 is source 2 index
+			 * ...stack 2 is mask index
+			 * ...stack 3 is count
+			 */
+			s32 a, b;
+			u8 *source1 = (u8 *)vars[args[0]];
+			u8 *source2 = (u8 *)vars[args[1]];
+			u8 *mask = (u8 *)vars[args[2]];
+			u32 index1;
+			u32 index2;
+			u32 mask_index;
+
+			if (!altera_check_stack(stack_ptr, 4, &status))
+				break;
+
+			index1 = stack[--stack_ptr];
+			index2 = stack[--stack_ptr];
+			mask_index = stack[--stack_ptr];
+			long_count = stack[--stack_ptr];
+
+			if (version > 0) {
+				/*
+				 * stack 0 = source 1 right index
+				 * stack 1 = source 1 left index
+				 * stack 2 = source 2 right index
+				 * stack 3 = source 2 left index
+				 * stack 4 = mask right index
+				 * stack 5 = mask left index
+				 */
+				s32 mask_right = stack[--stack_ptr];
+				s32 mask_left = stack[--stack_ptr];
+				/* source 1 count */
+				a = 1 + index2 - index1;
+				/* source 2 count */
+				b = 1 + long_count - mask_index;
+				a = (a < b) ? a : b;
+				/* mask count */
+				b = 1 + mask_left - mask_right;
+				a = (a < b) ? a : b;
+				/* source 2 start index */
+				index2 = mask_index;
+				/* mask start index */
+				mask_index = mask_right;
+				long_count = a;
+			}
+
+			long_tmp = 1L;
+
+			if (long_count < 1)
+				status = -ERANGE;
+			else {
+				count = long_count;
+
+				for (i = 0; i < count; ++i) {
+					if (mask[mask_index >> 3] &
+						(1 << (mask_index & 7))) {
+						a = source1[index1 >> 3] &
+							(1 << (index1 & 7))
+								? 1 : 0;
+						b = source2[index2 >> 3] &
+							(1 << (index2 & 7))
+								? 1 : 0;
+
+						if (a != b) /* failure */
+							long_tmp = 0L;
+					}
+					++index1;
+					++index2;
+					++mask_index;
+				}
+			}
+
+			stack[stack_ptr++] = long_tmp;
+
+			break;
+		}
+		default:
+			/* Unrecognized opcode -- ERROR! */
+			bad_opcode = 1;
+			break;
+		}
+
+		if (bad_opcode)
+			status = -ENOSYS;
+
+		if ((stack_ptr < 0) || (stack_ptr >= ALTERA_STACK_SIZE))
+			status = -EOVERFLOW;
+
+		if (status != 0) {
+			done = 1;
+			*error_address = (s32)(opcode_address - code_sect);
+		}
+	}
+
+	altera_free_buffers(astate);
+
+	/* Free all dynamically allocated arrays */
+	if ((attrs != NULL) && (vars != NULL))
+		for (i = 0; i < sym_count; ++i)
+			if (attrs[i] & 0x80)
+				kfree((void *)vars[i]);
+
+	kfree(vars);
+	kfree(var_size);
+	kfree(attrs);
+	kfree(proc_attributes);
+
+	return status;
+}
+
+static int altera_get_note(u8 *p, s32 program_size, s32 *offset,
+			   char *key, char *value, int keylen, int vallen)
+/*
+ * Gets key and value of NOTE fields in the JBC file.
+ * Can be called in two modes:  if offset pointer is NULL,
+ * then the function searches for note fields which match
+ * the key string provided.  If offset is not NULL, then
+ * the function finds the next note field of any key,
+ * starting at the offset specified by the offset pointer.
+ * Returns 0 for success, else appropriate error code
+ */
+{
+	int status = -ENODATA;
+	u32 note_strings = 0L;
+	u32 note_table = 0L;
+	u32 note_count = 0L;
+	u32 first_word = 0L;
+	int version = 0;
+	int delta = 0;
+	char *key_ptr;
+	char *value_ptr;
+	int i;
+
+	/* Read header information */
+	if (program_size > 52L) {
+		first_word    = get_unaligned_be32(&p[0]);
+		version = (first_word & 1L);
+		delta = version * 8;
+
+		note_strings  = get_unaligned_be32(&p[8 + delta]);
+		note_table    = get_unaligned_be32(&p[12 + delta]);
+		note_count    = get_unaligned_be32(&p[44 + (2 * delta)]);
+	}
+
+	if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L))
+		return -EIO;
+
+	if (note_count <= 0L)
+		return status;
+
+	if (offset == NULL) {
+		/*
+		 * We will search for the first note with a specific key,
+		 * and return only the value
+		 */
+		for (i = 0; (i < note_count) &&
+						(status != 0); ++i) {
+			key_ptr = &p[note_strings +
+					get_unaligned_be32(
+					&p[note_table + (8 * i)])];
+			if (key && !strncasecmp(key, key_ptr, strlen(key_ptr))) {
+				status = 0;
+
+				value_ptr = &p[note_strings +
+						get_unaligned_be32(
+						&p[note_table + (8 * i) + 4])];
+
+				if (value != NULL)
+					strlcpy(value, value_ptr, vallen);
+
+			}
+		}
+	} else {
+		/*
+		 * We will search for the next note, regardless of the key,
+		 * and return both the value and the key
+		 */
+
+		i = *offset;
+
+		if ((i >= 0) && (i < note_count)) {
+			status = 0;
+
+			if (key != NULL)
+				strlcpy(key, &p[note_strings +
+						get_unaligned_be32(
+						&p[note_table + (8 * i)])],
+					keylen);
+
+			if (value != NULL)
+				strlcpy(value, &p[note_strings +
+						get_unaligned_be32(
+						&p[note_table + (8 * i) + 4])],
+					vallen);
+
+			*offset = i + 1;
+		}
+	}
+
+	return status;
+}
+
+static int altera_check_crc(u8 *p, s32 program_size)
+{
+	int status = 0;
+	u16 local_expected = 0,
+	    local_actual = 0,
+	    shift_reg = 0xffff;
+	int bit, feedback;
+	u8 databyte;
+	u32 i;
+	u32 crc_section = 0L;
+	u32 first_word = 0L;
+	int version = 0;
+	int delta = 0;
+
+	if (program_size > 52L) {
+		first_word  = get_unaligned_be32(&p[0]);
+		version = (first_word & 1L);
+		delta = version * 8;
+
+		crc_section = get_unaligned_be32(&p[32 + delta]);
+	}
+
+	if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L))
+		status = -EIO;
+
+	if (crc_section >= program_size)
+		status = -EIO;
+
+	if (status == 0) {
+		local_expected = (u16)get_unaligned_be16(&p[crc_section]);
+
+		for (i = 0; i < crc_section; ++i) {
+			databyte = p[i];
+			for (bit = 0; bit < 8; bit++) {
+				feedback = (databyte ^ shift_reg) & 0x01;
+				shift_reg >>= 1;
+				if (feedback)
+					shift_reg ^= 0x8408;
+
+				databyte >>= 1;
+			}
+		}
+
+		local_actual = (u16)~shift_reg;
+
+		if (local_expected != local_actual)
+			status = -EILSEQ;
+
+	}
+
+	if (debug || status) {
+		switch (status) {
+		case 0:
+			printk(KERN_INFO "%s: CRC matched: %04x\n", __func__,
+				local_actual);
+			break;
+		case -EILSEQ:
+			printk(KERN_ERR "%s: CRC mismatch: expected %04x, "
+				"actual %04x\n", __func__, local_expected,
+				local_actual);
+			break;
+		case -ENODATA:
+			printk(KERN_ERR "%s: expected CRC not found, "
+				"actual CRC = %04x\n", __func__,
+				local_actual);
+			break;
+		case -EIO:
+			printk(KERN_ERR "%s: error: format isn't "
+				"recognized.\n", __func__);
+			break;
+		default:
+			printk(KERN_ERR "%s: CRC function returned error "
+				"code %d\n", __func__, status);
+			break;
+		}
+	}
+
+	return status;
+}
+
+static int altera_get_file_info(u8 *p,
+					s32 program_size,
+					int *format_version,
+					int *action_count,
+					int *procedure_count)
+{
+	int status = -EIO;
+	u32 first_word = 0;
+	int version = 0;
+
+	if (program_size <= 52L)
+		return status;
+
+	first_word = get_unaligned_be32(&p[0]);
+
+	if ((first_word == 0x4A414D00L) || (first_word == 0x4A414D01L)) {
+		status = 0;
+
+		version = (first_word & 1L);
+		*format_version = version + 1;
+
+		if (version > 0) {
+			*action_count = get_unaligned_be32(&p[48]);
+			*procedure_count = get_unaligned_be32(&p[52]);
+		}
+	}
+
+	return status;
+}
+
+static int altera_get_act_info(u8 *p,
+					s32 program_size,
+					int index,
+					char **name,
+					char **description,
+					struct altera_procinfo **proc_list)
+{
+	int status = -EIO;
+	struct altera_procinfo *procptr = NULL;
+	struct altera_procinfo *tmpptr = NULL;
+	u32 first_word = 0L;
+	u32 action_table = 0L;
+	u32 proc_table = 0L;
+	u32 str_table = 0L;
+	u32 note_strings = 0L;
+	u32 action_count = 0L;
+	u32 proc_count = 0L;
+	u32 act_name_id = 0L;
+	u32 act_desc_id = 0L;
+	u32 act_proc_id = 0L;
+	u32 act_proc_name = 0L;
+	u8 act_proc_attribute = 0;
+
+	if (program_size <= 52L)
+		return status;
+	/* Read header information */
+	first_word = get_unaligned_be32(&p[0]);
+
+	if (first_word != 0x4A414D01L)
+		return status;
+
+	action_table = get_unaligned_be32(&p[4]);
+	proc_table   = get_unaligned_be32(&p[8]);
+	str_table = get_unaligned_be32(&p[12]);
+	note_strings = get_unaligned_be32(&p[16]);
+	action_count = get_unaligned_be32(&p[48]);
+	proc_count   = get_unaligned_be32(&p[52]);
+
+	if (index >= action_count)
+		return status;
+
+	act_name_id = get_unaligned_be32(&p[action_table + (12 * index)]);
+	act_desc_id = get_unaligned_be32(&p[action_table + (12 * index) + 4]);
+	act_proc_id = get_unaligned_be32(&p[action_table + (12 * index) + 8]);
+
+	*name = &p[str_table + act_name_id];
+
+	if (act_desc_id < (note_strings - str_table))
+		*description = &p[str_table + act_desc_id];
+
+	do {
+		act_proc_name = get_unaligned_be32(
+					&p[proc_table + (13 * act_proc_id)]);
+		act_proc_attribute =
+			(p[proc_table + (13 * act_proc_id) + 8] & 0x03);
+
+		procptr =
+				kzalloc(sizeof(struct altera_procinfo),
+								GFP_KERNEL);
+
+		if (procptr == NULL)
+			status = -ENOMEM;
+		else {
+			procptr->name = &p[str_table + act_proc_name];
+			procptr->attrs = act_proc_attribute;
+			procptr->next = NULL;
+
+			/* add record to end of linked list */
+			if (*proc_list == NULL)
+				*proc_list = procptr;
+			else {
+				tmpptr = *proc_list;
+				while (tmpptr->next != NULL)
+					tmpptr = tmpptr->next;
+				tmpptr->next = procptr;
+			}
+		}
+
+		act_proc_id = get_unaligned_be32(
+				&p[proc_table + (13 * act_proc_id) + 4]);
+	} while ((act_proc_id != 0) && (act_proc_id < proc_count));
+
+	return status;
+}
+
+int altera_init(struct altera_config *config, const struct firmware *fw)
+{
+	struct altera_state *astate = NULL;
+	struct altera_procinfo *proc_list = NULL;
+	struct altera_procinfo *procptr = NULL;
+	char *key = NULL;
+	char *value = NULL;
+	char *action_name = NULL;
+	char *description = NULL;
+	int exec_result = 0;
+	int exit_code = 0;
+	int format_version = 0;
+	int action_count = 0;
+	int procedure_count = 0;
+	int index = 0;
+	s32 offset = 0L;
+	s32 error_address = 0L;
+	int retval = 0;
+
+	key = kzalloc(33, GFP_KERNEL);
+	if (!key) {
+		retval = -ENOMEM;
+		goto out;
+	}
+	value = kzalloc(257, GFP_KERNEL);
+	if (!value) {
+		retval = -ENOMEM;
+		goto free_key;
+	}
+	astate = kzalloc(sizeof(struct altera_state), GFP_KERNEL);
+	if (!astate) {
+		retval = -ENOMEM;
+		goto free_value;
+	}
+
+	astate->config = config;
+	if (!astate->config->jtag_io) {
+		dprintk("%s: using byteblaster!\n", __func__);
+		astate->config->jtag_io = netup_jtag_io_lpt;
+	}
+
+	altera_check_crc((u8 *)fw->data, fw->size);
+
+	if (debug) {
+		altera_get_file_info((u8 *)fw->data, fw->size, &format_version,
+					&action_count, &procedure_count);
+		printk(KERN_INFO "%s: File format is %s ByteCode format\n",
+			__func__, (format_version == 2) ? "Jam STAPL" :
+						"pre-standardized Jam 1.1");
+		while (altera_get_note((u8 *)fw->data, fw->size,
+					&offset, key, value, 32, 256) == 0)
+			printk(KERN_INFO "%s: NOTE \"%s\" = \"%s\"\n",
+					__func__, key, value);
+	}
+
+	if (debug && (format_version == 2) && (action_count > 0)) {
+		printk(KERN_INFO "%s: Actions available:\n", __func__);
+		for (index = 0; index < action_count; ++index) {
+			altera_get_act_info((u8 *)fw->data, fw->size,
+						index, &action_name,
+						&description,
+						&proc_list);
+
+			if (description == NULL)
+				printk(KERN_INFO "%s: %s\n",
+						__func__,
+						action_name);
+			else
+				printk(KERN_INFO "%s: %s \"%s\"\n",
+						__func__,
+						action_name,
+						description);
+
+			procptr = proc_list;
+			while (procptr != NULL) {
+				if (procptr->attrs != 0)
+					printk(KERN_INFO "%s:    %s (%s)\n",
+						__func__,
+						procptr->name,
+						(procptr->attrs == 1) ?
+						"optional" : "recommended");
+
+				proc_list = procptr->next;
+				kfree(procptr);
+				procptr = proc_list;
+			}
+		}
+
+		printk(KERN_INFO "\n");
+	}
+
+	exec_result = altera_execute(astate, (u8 *)fw->data, fw->size,
+				&error_address, &exit_code, &format_version);
+
+	if (exit_code)
+		exec_result = -EREMOTEIO;
+
+	if ((format_version == 2) && (exec_result == -EINVAL)) {
+		if (astate->config->action == NULL)
+			printk(KERN_ERR "%s: error: no action specified for "
+				"Jam STAPL file.\nprogram terminated.\n",
+				__func__);
+		else
+			printk(KERN_ERR "%s: error: action \"%s\""
+				" is not supported "
+				"for this Jam STAPL file.\n"
+				"Program terminated.\n", __func__,
+				astate->config->action);
+
+	} else if (exec_result)
+		printk(KERN_ERR "%s: error %d\n", __func__, exec_result);
+
+	kfree(astate);
+free_value:
+	kfree(value);
+free_key:
+	kfree(key);
+out:
+	return retval;
+}
+EXPORT_SYMBOL(altera_init);
diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c
new file mode 100644
index 000000000..d8ac036f0
--- /dev/null
+++ b/drivers/misc/apds9802als.c
@@ -0,0 +1,322 @@
+/*
+ * apds9802als.c - apds9802  ALS Driver
+ *
+ * Copyright (C) 2009 Intel Corp
+ *
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/pm_runtime.h>
+
+#define ALS_MIN_RANGE_VAL 1
+#define ALS_MAX_RANGE_VAL 2
+#define POWER_STA_ENABLE 1
+#define POWER_STA_DISABLE 0
+
+#define DRIVER_NAME "apds9802als"
+
+struct als_data {
+	struct mutex mutex;
+};
+
+static ssize_t als_sensing_range_show(struct device *dev,
+			struct device_attribute *attr,  char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int  val;
+
+	val = i2c_smbus_read_byte_data(client, 0x81);
+	if (val < 0)
+		return val;
+	if (val & 1)
+		return sprintf(buf, "4095\n");
+	else
+		return sprintf(buf, "65535\n");
+}
+
+static int als_wait_for_data_ready(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret;
+	int retry = 10;
+
+	do {
+		msleep(30);
+		ret = i2c_smbus_read_byte_data(client, 0x86);
+	} while (!(ret & 0x80) && retry--);
+
+	if (retry < 0) {
+		dev_warn(dev, "timeout waiting for data ready\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static ssize_t als_lux0_input_data_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct als_data *data = i2c_get_clientdata(client);
+	int ret_val;
+	int temp;
+
+	/* Protect against parallel reads */
+	pm_runtime_get_sync(dev);
+	mutex_lock(&data->mutex);
+
+	/* clear EOC interrupt status */
+	i2c_smbus_write_byte(client, 0x40);
+	/* start measurement */
+	temp = i2c_smbus_read_byte_data(client, 0x81);
+	i2c_smbus_write_byte_data(client, 0x81, temp | 0x08);
+
+	ret_val = als_wait_for_data_ready(dev);
+	if (ret_val < 0)
+		goto failed;
+
+	temp = i2c_smbus_read_byte_data(client, 0x8C); /* LSB data */
+	if (temp < 0) {
+		ret_val = temp;
+		goto failed;
+	}
+	ret_val = i2c_smbus_read_byte_data(client, 0x8D); /* MSB data */
+	if (ret_val < 0)
+		goto failed;
+
+	mutex_unlock(&data->mutex);
+	pm_runtime_put_sync(dev);
+
+	temp = (ret_val << 8) | temp;
+	return sprintf(buf, "%d\n", temp);
+failed:
+	mutex_unlock(&data->mutex);
+	pm_runtime_put_sync(dev);
+	return ret_val;
+}
+
+static ssize_t als_sensing_range_store(struct device *dev,
+		struct device_attribute *attr, const  char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct als_data *data = i2c_get_clientdata(client);
+	int ret_val;
+	unsigned long val;
+
+	ret_val = kstrtoul(buf, 10, &val);
+	if (ret_val)
+		return ret_val;
+
+	if (val < 4096)
+		val = 1;
+	else if (val < 65536)
+		val = 2;
+	else
+		return -ERANGE;
+
+	pm_runtime_get_sync(dev);
+
+	/* Make sure nobody else reads/modifies/writes 0x81 while we
+	   are active */
+	mutex_lock(&data->mutex);
+
+	ret_val = i2c_smbus_read_byte_data(client, 0x81);
+	if (ret_val < 0)
+		goto fail;
+
+	/* Reset the bits before setting them */
+	ret_val = ret_val & 0xFA;
+
+	if (val == 1) /* Setting detection range up to 4k LUX */
+		ret_val = (ret_val | 0x01);
+	else /* Setting detection range up to 64k LUX*/
+		ret_val = (ret_val | 0x00);
+
+	ret_val = i2c_smbus_write_byte_data(client, 0x81, ret_val);
+
+	if (ret_val >= 0) {
+		/* All OK */
+		mutex_unlock(&data->mutex);
+		pm_runtime_put_sync(dev);
+		return count;
+	}
+fail:
+	mutex_unlock(&data->mutex);
+	pm_runtime_put_sync(dev);
+	return ret_val;
+}
+
+static int als_set_power_state(struct i2c_client *client, bool on_off)
+{
+	int ret_val;
+	struct als_data *data = i2c_get_clientdata(client);
+
+	mutex_lock(&data->mutex);
+	ret_val = i2c_smbus_read_byte_data(client, 0x80);
+	if (ret_val < 0)
+		goto fail;
+	if (on_off)
+		ret_val = ret_val | 0x01;
+	else
+		ret_val = ret_val & 0xFE;
+	ret_val = i2c_smbus_write_byte_data(client, 0x80, ret_val);
+fail:
+	mutex_unlock(&data->mutex);
+	return ret_val;
+}
+
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO | S_IWUSR,
+	als_sensing_range_show, als_sensing_range_store);
+static DEVICE_ATTR(lux0_input, S_IRUGO, als_lux0_input_data_show, NULL);
+
+static struct attribute *mid_att_als[] = {
+	&dev_attr_lux0_sensor_range.attr,
+	&dev_attr_lux0_input.attr,
+	NULL
+};
+
+static const struct attribute_group m_als_gr = {
+	.name = "apds9802als",
+	.attrs = mid_att_als
+};
+
+static int als_set_default_config(struct i2c_client *client)
+{
+	int ret_val;
+	/* Write the command and then switch on */
+	ret_val = i2c_smbus_write_byte_data(client, 0x80, 0x01);
+	if (ret_val < 0) {
+		dev_err(&client->dev, "failed default switch on write\n");
+		return ret_val;
+	}
+	/* detection range: 1~64K Lux, maunal measurement */
+	ret_val = i2c_smbus_write_byte_data(client, 0x81, 0x08);
+	if (ret_val < 0)
+		dev_err(&client->dev, "failed default LUX on write\n");
+
+	/*  We always get 0 for the 1st measurement after system power on,
+	 *  so make sure it is finished before user asks for data.
+	 */
+	als_wait_for_data_ready(&client->dev);
+
+	return ret_val;
+}
+
+static int apds9802als_probe(struct i2c_client *client,
+			     const struct i2c_device_id *id)
+{
+	int res;
+	struct als_data *data;
+
+	data = kzalloc(sizeof(struct als_data), GFP_KERNEL);
+	if (data == NULL) {
+		dev_err(&client->dev, "Memory allocation failed\n");
+		return -ENOMEM;
+	}
+	i2c_set_clientdata(client, data);
+	res = sysfs_create_group(&client->dev.kobj, &m_als_gr);
+	if (res) {
+		dev_err(&client->dev, "device create file failed\n");
+		goto als_error1;
+	}
+	dev_info(&client->dev, "ALS chip found\n");
+	als_set_default_config(client);
+	mutex_init(&data->mutex);
+
+	pm_runtime_set_active(&client->dev);
+	pm_runtime_enable(&client->dev);
+
+	return res;
+als_error1:
+	kfree(data);
+	return res;
+}
+
+static int apds9802als_remove(struct i2c_client *client)
+{
+	struct als_data *data = i2c_get_clientdata(client);
+
+	pm_runtime_get_sync(&client->dev);
+
+	als_set_power_state(client, false);
+	sysfs_remove_group(&client->dev.kobj, &m_als_gr);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+	pm_runtime_put_noidle(&client->dev);
+
+	kfree(data);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int apds9802als_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	als_set_power_state(client, false);
+	return 0;
+}
+
+static int apds9802als_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	als_set_power_state(client, true);
+	return 0;
+}
+
+static UNIVERSAL_DEV_PM_OPS(apds9802als_pm_ops, apds9802als_suspend,
+	apds9802als_resume, NULL);
+
+#define APDS9802ALS_PM_OPS (&apds9802als_pm_ops)
+
+#else	/* CONFIG_PM */
+#define APDS9802ALS_PM_OPS NULL
+#endif	/* CONFIG_PM */
+
+static const struct i2c_device_id apds9802als_id[] = {
+	{ DRIVER_NAME, 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, apds9802als_id);
+
+static struct i2c_driver apds9802als_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.pm = APDS9802ALS_PM_OPS,
+	},
+	.probe = apds9802als_probe,
+	.remove = apds9802als_remove,
+	.id_table = apds9802als_id,
+};
+
+module_i2c_driver(apds9802als_driver);
+
+MODULE_AUTHOR("Anantha Narayanan <Anantha.Narayanan@intel.com");
+MODULE_DESCRIPTION("Avago apds9802als ALS Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c
new file mode 100644
index 000000000..ed9412d75
--- /dev/null
+++ b/drivers/misc/apds990x.c
@@ -0,0 +1,1300 @@
+/*
+ * This file is part of the APDS990x sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/platform_data/apds990x.h>
+
+/* Register map */
+#define APDS990X_ENABLE	 0x00 /* Enable of states and interrupts */
+#define APDS990X_ATIME	 0x01 /* ALS ADC time  */
+#define APDS990X_PTIME	 0x02 /* Proximity ADC time  */
+#define APDS990X_WTIME	 0x03 /* Wait time  */
+#define APDS990X_AILTL	 0x04 /* ALS interrupt low threshold low byte */
+#define APDS990X_AILTH	 0x05 /* ALS interrupt low threshold hi byte */
+#define APDS990X_AIHTL	 0x06 /* ALS interrupt hi threshold low byte */
+#define APDS990X_AIHTH	 0x07 /* ALS interrupt hi threshold hi byte */
+#define APDS990X_PILTL	 0x08 /* Proximity interrupt low threshold low byte */
+#define APDS990X_PILTH	 0x09 /* Proximity interrupt low threshold hi byte */
+#define APDS990X_PIHTL	 0x0a /* Proximity interrupt hi threshold low byte */
+#define APDS990X_PIHTH	 0x0b /* Proximity interrupt hi threshold hi byte */
+#define APDS990X_PERS	 0x0c /* Interrupt persistence filters */
+#define APDS990X_CONFIG	 0x0d /* Configuration */
+#define APDS990X_PPCOUNT 0x0e /* Proximity pulse count */
+#define APDS990X_CONTROL 0x0f /* Gain control register */
+#define APDS990X_REV	 0x11 /* Revision Number */
+#define APDS990X_ID	 0x12 /* Device ID */
+#define APDS990X_STATUS	 0x13 /* Device status */
+#define APDS990X_CDATAL	 0x14 /* Clear ADC low data register */
+#define APDS990X_CDATAH	 0x15 /* Clear ADC high data register */
+#define APDS990X_IRDATAL 0x16 /* IR ADC low data register */
+#define APDS990X_IRDATAH 0x17 /* IR ADC high data register */
+#define APDS990X_PDATAL	 0x18 /* Proximity ADC low data register */
+#define APDS990X_PDATAH	 0x19 /* Proximity ADC high data register */
+
+/* Control */
+#define APDS990X_MAX_AGAIN	3
+
+/* Enable register */
+#define APDS990X_EN_PIEN	(0x1 << 5)
+#define APDS990X_EN_AIEN	(0x1 << 4)
+#define APDS990X_EN_WEN		(0x1 << 3)
+#define APDS990X_EN_PEN		(0x1 << 2)
+#define APDS990X_EN_AEN		(0x1 << 1)
+#define APDS990X_EN_PON		(0x1 << 0)
+#define APDS990X_EN_DISABLE_ALL 0
+
+/* Status register */
+#define APDS990X_ST_PINT	(0x1 << 5)
+#define APDS990X_ST_AINT	(0x1 << 4)
+
+/* I2C access types */
+#define APDS990x_CMD_TYPE_MASK	(0x03 << 5)
+#define APDS990x_CMD_TYPE_RB	(0x00 << 5) /* Repeated byte */
+#define APDS990x_CMD_TYPE_INC	(0x01 << 5) /* Auto increment */
+#define APDS990x_CMD_TYPE_SPE	(0x03 << 5) /* Special function */
+
+#define APDS990x_ADDR_SHIFT	0
+#define APDS990x_CMD		0x80
+
+/* Interrupt ack commands */
+#define APDS990X_INT_ACK_ALS	0x6
+#define APDS990X_INT_ACK_PS	0x5
+#define APDS990X_INT_ACK_BOTH	0x7
+
+/* ptime */
+#define APDS990X_PTIME_DEFAULT	0xff /* Recommended conversion time 2.7ms*/
+
+/* wtime */
+#define APDS990X_WTIME_DEFAULT	0xee /* ~50ms wait time */
+
+#define APDS990X_TIME_TO_ADC	1024 /* One timetick as ADC count value */
+
+/* Persistence */
+#define APDS990X_APERS_SHIFT	0
+#define APDS990X_PPERS_SHIFT	4
+
+/* Supported ID:s */
+#define APDS990X_ID_0		0x0
+#define APDS990X_ID_4		0x4
+#define APDS990X_ID_29		0x29
+
+/* pgain and pdiode settings */
+#define APDS_PGAIN_1X	       0x0
+#define APDS_PDIODE_IR	       0x2
+
+#define APDS990X_LUX_OUTPUT_SCALE 10
+
+/* Reverse chip factors for threshold calculation */
+struct reverse_factors {
+	u32 afactor;
+	int cf1;
+	int irf1;
+	int cf2;
+	int irf2;
+};
+
+struct apds990x_chip {
+	struct apds990x_platform_data	*pdata;
+	struct i2c_client		*client;
+	struct mutex			mutex; /* avoid parallel access */
+	struct regulator_bulk_data	regs[2];
+	wait_queue_head_t		wait;
+
+	int	prox_en;
+	bool	prox_continuous_mode;
+	bool	lux_wait_fresh_res;
+
+	/* Chip parameters */
+	struct	apds990x_chip_factors	cf;
+	struct	reverse_factors		rcf;
+	u16	atime;		/* als integration time */
+	u16	arate;		/* als reporting rate */
+	u16	a_max_result;	/* Max possible ADC value with current atime */
+	u8	again_meas;	/* Gain used in last measurement */
+	u8	again_next;	/* Next calculated gain */
+	u8	pgain;
+	u8	pdiode;
+	u8	pdrive;
+	u8	lux_persistence;
+	u8	prox_persistence;
+
+	u32	lux_raw;
+	u32	lux;
+	u16	lux_clear;
+	u16	lux_ir;
+	u16	lux_calib;
+	u32	lux_thres_hi;
+	u32	lux_thres_lo;
+
+	u32	prox_thres;
+	u16	prox_data;
+	u16	prox_calib;
+
+	char	chipname[10];
+	u8	revision;
+};
+
+#define APDS_CALIB_SCALER		8192
+#define APDS_LUX_NEUTRAL_CALIB_VALUE	(1 * APDS_CALIB_SCALER)
+#define APDS_PROX_NEUTRAL_CALIB_VALUE	(1 * APDS_CALIB_SCALER)
+
+#define APDS_PROX_DEF_THRES		600
+#define APDS_PROX_HYSTERESIS		50
+#define APDS_LUX_DEF_THRES_HI		101
+#define APDS_LUX_DEF_THRES_LO		100
+#define APDS_DEFAULT_PROX_PERS		1
+
+#define APDS_TIMEOUT			2000
+#define APDS_STARTUP_DELAY		25000 /* us */
+#define APDS_RANGE			65535
+#define APDS_PROX_RANGE			1023
+#define APDS_LUX_GAIN_LO_LIMIT		100
+#define APDS_LUX_GAIN_LO_LIMIT_STRICT	25
+
+#define TIMESTEP			87 /* 2.7ms is about 87 / 32 */
+#define TIME_STEP_SCALER		32
+
+#define APDS_LUX_AVERAGING_TIME		50 /* tolerates 50/60Hz ripple */
+#define APDS_LUX_DEFAULT_RATE		200
+
+static const u8 again[]	= {1, 8, 16, 120}; /* ALS gain steps */
+static const u8 ir_currents[]	= {100, 50, 25, 12}; /* IRled currents in mA */
+
+/* Following two tables must match i.e 10Hz rate means 1 as persistence value */
+static const u16 arates_hz[] = {10, 5, 2, 1};
+static const u8 apersis[] = {1, 2, 4, 5};
+
+/* Regulators */
+static const char reg_vcc[] = "Vdd";
+static const char reg_vled[] = "Vled";
+
+static int apds990x_read_byte(struct apds990x_chip *chip, u8 reg, u8 *data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	*data = ret;
+	return (int)ret;
+}
+
+static int apds990x_read_word(struct apds990x_chip *chip, u8 reg, u16 *data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC;
+
+	ret = i2c_smbus_read_word_data(client, reg);
+	*data = ret;
+	return (int)ret;
+}
+
+static int apds990x_write_byte(struct apds990x_chip *chip, u8 reg, u8 data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB;
+
+	ret = i2c_smbus_write_byte_data(client, reg, data);
+	return (int)ret;
+}
+
+static int apds990x_write_word(struct apds990x_chip *chip, u8 reg, u16 data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC;
+
+	ret = i2c_smbus_write_word_data(client, reg, data);
+	return (int)ret;
+}
+
+static int apds990x_mode_on(struct apds990x_chip *chip)
+{
+	/* ALS is mandatory, proximity optional */
+	u8 reg = APDS990X_EN_AIEN | APDS990X_EN_PON | APDS990X_EN_AEN |
+		APDS990X_EN_WEN;
+
+	if (chip->prox_en)
+		reg |= APDS990X_EN_PIEN | APDS990X_EN_PEN;
+
+	return apds990x_write_byte(chip, APDS990X_ENABLE, reg);
+}
+
+static u16 apds990x_lux_to_threshold(struct apds990x_chip *chip, u32 lux)
+{
+	u32 thres;
+	u32 cpl;
+	u32 ir;
+
+	if (lux == 0)
+		return 0;
+	else if (lux == APDS_RANGE)
+		return APDS_RANGE;
+
+	/*
+	 * Reported LUX value is a combination of the IR and CLEAR channel
+	 * values. However, interrupt threshold is only for clear channel.
+	 * This function approximates needed HW threshold value for a given
+	 * LUX value in the current lightning type.
+	 * IR level compared to visible light varies heavily depending on the
+	 * source of the light
+	 *
+	 * Calculate threshold value for the next measurement period.
+	 * Math: threshold = lux * cpl where
+	 * cpl = atime * again / (glass_attenuation * device_factor)
+	 * (count-per-lux)
+	 *
+	 * First remove calibration. Division by four is to avoid overflow
+	 */
+	lux = lux * (APDS_CALIB_SCALER / 4) / (chip->lux_calib / 4);
+
+	/* Multiplication by 64 is to increase accuracy */
+	cpl = ((u32)chip->atime * (u32)again[chip->again_next] *
+		APDS_PARAM_SCALE * 64) / (chip->cf.ga * chip->cf.df);
+
+	thres = lux * cpl / 64;
+	/*
+	 * Convert IR light from the latest result to match with
+	 * new gain step. This helps to adapt with the current
+	 * source of light.
+	 */
+	ir = (u32)chip->lux_ir * (u32)again[chip->again_next] /
+		(u32)again[chip->again_meas];
+
+	/*
+	 * Compensate count with IR light impact
+	 * IAC1 > IAC2 (see apds990x_get_lux for formulas)
+	 */
+	if (chip->lux_clear * APDS_PARAM_SCALE >=
+		chip->rcf.afactor * chip->lux_ir)
+		thres = (chip->rcf.cf1 * thres + chip->rcf.irf1 * ir) /
+			APDS_PARAM_SCALE;
+	else
+		thres = (chip->rcf.cf2 * thres + chip->rcf.irf2 * ir) /
+			APDS_PARAM_SCALE;
+
+	if (thres >= chip->a_max_result)
+		thres = chip->a_max_result - 1;
+	return thres;
+}
+
+static inline int apds990x_set_atime(struct apds990x_chip *chip, u32 time_ms)
+{
+	u8 reg_value;
+
+	chip->atime = time_ms;
+	/* Formula is specified in the data sheet */
+	reg_value = 256 - ((time_ms * TIME_STEP_SCALER) / TIMESTEP);
+	/* Calculate max ADC value for given integration time */
+	chip->a_max_result = (u16)(256 - reg_value) * APDS990X_TIME_TO_ADC;
+	return apds990x_write_byte(chip, APDS990X_ATIME, reg_value);
+}
+
+/* Called always with mutex locked */
+static int apds990x_refresh_pthres(struct apds990x_chip *chip, int data)
+{
+	int ret, lo, hi;
+
+	/* If the chip is not in use, don't try to access it */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	if (data < chip->prox_thres) {
+		lo = 0;
+		hi = chip->prox_thres;
+	} else {
+		lo = chip->prox_thres - APDS_PROX_HYSTERESIS;
+		if (chip->prox_continuous_mode)
+			hi = chip->prox_thres;
+		else
+			hi = APDS_RANGE;
+	}
+
+	ret = apds990x_write_word(chip, APDS990X_PILTL, lo);
+	ret |= apds990x_write_word(chip, APDS990X_PIHTL, hi);
+	return ret;
+}
+
+/* Called always with mutex locked */
+static int apds990x_refresh_athres(struct apds990x_chip *chip)
+{
+	int ret;
+	/* If the chip is not in use, don't try to access it */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	ret = apds990x_write_word(chip, APDS990X_AILTL,
+			apds990x_lux_to_threshold(chip, chip->lux_thres_lo));
+	ret |= apds990x_write_word(chip, APDS990X_AIHTL,
+			apds990x_lux_to_threshold(chip, chip->lux_thres_hi));
+
+	return ret;
+}
+
+/* Called always with mutex locked */
+static void apds990x_force_a_refresh(struct apds990x_chip *chip)
+{
+	/* This will force ALS interrupt after the next measurement. */
+	apds990x_write_word(chip, APDS990X_AILTL, APDS_LUX_DEF_THRES_LO);
+	apds990x_write_word(chip, APDS990X_AIHTL, APDS_LUX_DEF_THRES_HI);
+}
+
+/* Called always with mutex locked */
+static void apds990x_force_p_refresh(struct apds990x_chip *chip)
+{
+	/* This will force proximity interrupt after the next measurement. */
+	apds990x_write_word(chip, APDS990X_PILTL, APDS_PROX_DEF_THRES - 1);
+	apds990x_write_word(chip, APDS990X_PIHTL, APDS_PROX_DEF_THRES);
+}
+
+/* Called always with mutex locked */
+static int apds990x_calc_again(struct apds990x_chip *chip)
+{
+	int curr_again = chip->again_meas;
+	int next_again = chip->again_meas;
+	int ret = 0;
+
+	/* Calculate suitable als gain */
+	if (chip->lux_clear == chip->a_max_result)
+		next_again -= 2; /* ALS saturated. Decrease gain by 2 steps */
+	else if (chip->lux_clear > chip->a_max_result / 2)
+		next_again--;
+	else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT)
+		next_again += 2; /* Too dark. Increase gain by 2 steps */
+	else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT)
+		next_again++;
+
+	/* Limit gain to available range */
+	if (next_again < 0)
+		next_again = 0;
+	else if (next_again > APDS990X_MAX_AGAIN)
+		next_again = APDS990X_MAX_AGAIN;
+
+	/* Let's check can we trust the measured result */
+	if (chip->lux_clear == chip->a_max_result)
+		/* Result can be totally garbage due to saturation */
+		ret = -ERANGE;
+	else if (next_again != curr_again &&
+		chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT)
+		/*
+		 * Gain is changed and measurement result is very small.
+		 * Result can be totally garbage due to underflow
+		 */
+		ret = -ERANGE;
+
+	chip->again_next = next_again;
+	apds990x_write_byte(chip, APDS990X_CONTROL,
+			(chip->pdrive << 6) |
+			(chip->pdiode << 4) |
+			(chip->pgain << 2) |
+			(chip->again_next << 0));
+
+	/*
+	 * Error means bad result -> re-measurement is needed. The forced
+	 * refresh uses fastest possible persistence setting to get result
+	 * as soon as possible.
+	 */
+	if (ret < 0)
+		apds990x_force_a_refresh(chip);
+	else
+		apds990x_refresh_athres(chip);
+
+	return ret;
+}
+
+/* Called always with mutex locked */
+static int apds990x_get_lux(struct apds990x_chip *chip, int clear, int ir)
+{
+	int iac, iac1, iac2; /* IR adjusted counts */
+	u32 lpc; /* Lux per count */
+
+	/* Formulas:
+	 * iac1 = CF1 * CLEAR_CH - IRF1 * IR_CH
+	 * iac2 = CF2 * CLEAR_CH - IRF2 * IR_CH
+	 */
+	iac1 = (chip->cf.cf1 * clear - chip->cf.irf1 * ir) / APDS_PARAM_SCALE;
+	iac2 = (chip->cf.cf2 * clear - chip->cf.irf2 * ir) / APDS_PARAM_SCALE;
+
+	iac = max(iac1, iac2);
+	iac = max(iac, 0);
+
+	lpc = APDS990X_LUX_OUTPUT_SCALE * (chip->cf.df * chip->cf.ga) /
+		(u32)(again[chip->again_meas] * (u32)chip->atime);
+
+	return (iac * lpc) / APDS_PARAM_SCALE;
+}
+
+static int apds990x_ack_int(struct apds990x_chip *chip, u8 mode)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+	u8 reg = APDS990x_CMD | APDS990x_CMD_TYPE_SPE;
+
+	switch (mode & (APDS990X_ST_AINT | APDS990X_ST_PINT)) {
+	case APDS990X_ST_AINT:
+		reg |= APDS990X_INT_ACK_ALS;
+		break;
+	case APDS990X_ST_PINT:
+		reg |= APDS990X_INT_ACK_PS;
+		break;
+	default:
+		reg |= APDS990X_INT_ACK_BOTH;
+		break;
+	}
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	return (int)ret;
+}
+
+static irqreturn_t apds990x_irq(int irq, void *data)
+{
+	struct apds990x_chip *chip = data;
+	u8 status;
+
+	apds990x_read_byte(chip, APDS990X_STATUS, &status);
+	apds990x_ack_int(chip, status);
+
+	mutex_lock(&chip->mutex);
+	if (!pm_runtime_suspended(&chip->client->dev)) {
+		if (status & APDS990X_ST_AINT) {
+			apds990x_read_word(chip, APDS990X_CDATAL,
+					&chip->lux_clear);
+			apds990x_read_word(chip, APDS990X_IRDATAL,
+					&chip->lux_ir);
+			/* Store used gain for calculations */
+			chip->again_meas = chip->again_next;
+
+			chip->lux_raw = apds990x_get_lux(chip,
+							chip->lux_clear,
+							chip->lux_ir);
+
+			if (apds990x_calc_again(chip) == 0) {
+				/* Result is valid */
+				chip->lux = chip->lux_raw;
+				chip->lux_wait_fresh_res = false;
+				wake_up(&chip->wait);
+				sysfs_notify(&chip->client->dev.kobj,
+					NULL, "lux0_input");
+			}
+		}
+
+		if ((status & APDS990X_ST_PINT) && chip->prox_en) {
+			u16 clr_ch;
+
+			apds990x_read_word(chip, APDS990X_CDATAL, &clr_ch);
+			/*
+			 * If ALS channel is saturated at min gain,
+			 * proximity gives false posivite values.
+			 * Just ignore them.
+			 */
+			if (chip->again_meas == 0 &&
+				clr_ch == chip->a_max_result)
+				chip->prox_data = 0;
+			else
+				apds990x_read_word(chip,
+						APDS990X_PDATAL,
+						&chip->prox_data);
+
+			apds990x_refresh_pthres(chip, chip->prox_data);
+			if (chip->prox_data < chip->prox_thres)
+				chip->prox_data = 0;
+			else if (!chip->prox_continuous_mode)
+				chip->prox_data = APDS_PROX_RANGE;
+			sysfs_notify(&chip->client->dev.kobj,
+				NULL, "prox0_raw");
+		}
+	}
+	mutex_unlock(&chip->mutex);
+	return IRQ_HANDLED;
+}
+
+static int apds990x_configure(struct apds990x_chip *chip)
+{
+	/* It is recommended to use disabled mode during these operations */
+	apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL);
+
+	/* conversion and wait times for different state machince states */
+	apds990x_write_byte(chip, APDS990X_PTIME, APDS990X_PTIME_DEFAULT);
+	apds990x_write_byte(chip, APDS990X_WTIME, APDS990X_WTIME_DEFAULT);
+	apds990x_set_atime(chip, APDS_LUX_AVERAGING_TIME);
+
+	apds990x_write_byte(chip, APDS990X_CONFIG, 0);
+
+	/* Persistence levels */
+	apds990x_write_byte(chip, APDS990X_PERS,
+			(chip->lux_persistence << APDS990X_APERS_SHIFT) |
+			(chip->prox_persistence << APDS990X_PPERS_SHIFT));
+
+	apds990x_write_byte(chip, APDS990X_PPCOUNT, chip->pdata->ppcount);
+
+	/* Start with relatively small gain */
+	chip->again_meas = 1;
+	chip->again_next = 1;
+	apds990x_write_byte(chip, APDS990X_CONTROL,
+			(chip->pdrive << 6) |
+			(chip->pdiode << 4) |
+			(chip->pgain << 2) |
+			(chip->again_next << 0));
+	return 0;
+}
+
+static int apds990x_detect(struct apds990x_chip *chip)
+{
+	struct i2c_client *client = chip->client;
+	int ret;
+	u8 id;
+
+	ret = apds990x_read_byte(chip, APDS990X_ID, &id);
+	if (ret < 0) {
+		dev_err(&client->dev, "ID read failed\n");
+		return ret;
+	}
+
+	ret = apds990x_read_byte(chip, APDS990X_REV, &chip->revision);
+	if (ret < 0) {
+		dev_err(&client->dev, "REV read failed\n");
+		return ret;
+	}
+
+	switch (id) {
+	case APDS990X_ID_0:
+	case APDS990X_ID_4:
+	case APDS990X_ID_29:
+		snprintf(chip->chipname, sizeof(chip->chipname), "APDS-990x");
+		break;
+	default:
+		ret = -ENODEV;
+		break;
+	}
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int apds990x_chip_on(struct apds990x_chip *chip)
+{
+	int err	 = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+					chip->regs);
+	if (err < 0)
+		return err;
+
+	usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY);
+
+	/* Refresh all configs in case of regulators were off */
+	chip->prox_data = 0;
+	apds990x_configure(chip);
+	apds990x_mode_on(chip);
+	return 0;
+}
+#endif
+
+static int apds990x_chip_off(struct apds990x_chip *chip)
+{
+	apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL);
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+	return 0;
+}
+
+static ssize_t apds990x_lux_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip = dev_get_drvdata(dev);
+	ssize_t ret;
+	u32 result;
+	long timeout;
+
+	if (pm_runtime_suspended(dev))
+		return -EIO;
+
+	timeout = wait_event_interruptible_timeout(chip->wait,
+						!chip->lux_wait_fresh_res,
+						msecs_to_jiffies(APDS_TIMEOUT));
+	if (!timeout)
+		return -EIO;
+
+	mutex_lock(&chip->mutex);
+	result = (chip->lux * chip->lux_calib) / APDS_CALIB_SCALER;
+	if (result > (APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE))
+		result = APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE;
+
+	ret = sprintf(buf, "%d.%d\n",
+		result / APDS990X_LUX_OUTPUT_SCALE,
+		result % APDS990X_LUX_OUTPUT_SCALE);
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static DEVICE_ATTR(lux0_input, S_IRUGO, apds990x_lux_show, NULL);
+
+static ssize_t apds990x_lux_range_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", APDS_RANGE);
+}
+
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, apds990x_lux_range_show, NULL);
+
+static ssize_t apds990x_lux_calib_format_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", APDS_CALIB_SCALER);
+}
+
+static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO,
+		apds990x_lux_calib_format_show, NULL);
+
+static ssize_t apds990x_lux_calib_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", chip->lux_calib);
+}
+
+static ssize_t apds990x_lux_calib_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	chip->lux_calib = value;
+
+	return len;
+}
+
+static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, apds990x_lux_calib_show,
+		apds990x_lux_calib_store);
+
+static ssize_t apds990x_rate_avail(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int i;
+	int pos = 0;
+
+	for (i = 0; i < ARRAY_SIZE(arates_hz); i++)
+		pos += sprintf(buf + pos, "%d ", arates_hz[i]);
+	sprintf(buf + pos - 1, "\n");
+	return pos;
+}
+
+static ssize_t apds990x_rate_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->arate);
+}
+
+static int apds990x_set_arate(struct apds990x_chip *chip, int rate)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arates_hz); i++)
+		if (rate >= arates_hz[i])
+			break;
+
+	if (i == ARRAY_SIZE(arates_hz))
+		return -EINVAL;
+
+	/* Pick up corresponding persistence value */
+	chip->lux_persistence = apersis[i];
+	chip->arate = arates_hz[i];
+
+	/* If the chip is not in use, don't try to access it */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	/* Persistence levels */
+	return apds990x_write_byte(chip, APDS990X_PERS,
+			(chip->lux_persistence << APDS990X_APERS_SHIFT) |
+			(chip->prox_persistence << APDS990X_PPERS_SHIFT));
+}
+
+static ssize_t apds990x_rate_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	ret = apds990x_set_arate(chip, value);
+	mutex_unlock(&chip->mutex);
+
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, apds990x_rate_avail, NULL);
+
+static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, apds990x_rate_show,
+						 apds990x_rate_store);
+
+static ssize_t apds990x_prox_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	ssize_t ret;
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	if (pm_runtime_suspended(dev) || !chip->prox_en)
+		return -EIO;
+
+	mutex_lock(&chip->mutex);
+	ret = sprintf(buf, "%d\n", chip->prox_data);
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static DEVICE_ATTR(prox0_raw, S_IRUGO, apds990x_prox_show, NULL);
+
+static ssize_t apds990x_prox_range_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", APDS_PROX_RANGE);
+}
+
+static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, apds990x_prox_range_show, NULL);
+
+static ssize_t apds990x_prox_enable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->prox_en);
+}
+
+static ssize_t apds990x_prox_enable_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+
+	if (!chip->prox_en)
+		chip->prox_data = 0;
+
+	if (value)
+		chip->prox_en++;
+	else if (chip->prox_en > 0)
+		chip->prox_en--;
+
+	if (!pm_runtime_suspended(dev))
+		apds990x_mode_on(chip);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, apds990x_prox_enable_show,
+						   apds990x_prox_enable_store);
+
+static const char *reporting_modes[] = {"trigger", "periodic"};
+
+static ssize_t apds990x_prox_reporting_mode_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n",
+		reporting_modes[!!chip->prox_continuous_mode]);
+}
+
+static ssize_t apds990x_prox_reporting_mode_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	int ret;
+
+	ret = sysfs_match_string(reporting_modes, buf);
+	if (ret < 0)
+		return ret;
+
+	chip->prox_continuous_mode = ret;
+	return len;
+}
+
+static DEVICE_ATTR(prox0_reporting_mode, S_IRUGO | S_IWUSR,
+		apds990x_prox_reporting_mode_show,
+		apds990x_prox_reporting_mode_store);
+
+static ssize_t apds990x_prox_reporting_avail_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s %s\n", reporting_modes[0], reporting_modes[1]);
+}
+
+static DEVICE_ATTR(prox0_reporting_mode_avail, S_IRUGO | S_IWUSR,
+		apds990x_prox_reporting_avail_show, NULL);
+
+
+static ssize_t apds990x_lux_thresh_above_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->lux_thres_hi);
+}
+
+static ssize_t apds990x_lux_thresh_below_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->lux_thres_lo);
+}
+
+static ssize_t apds990x_set_lux_thresh(struct apds990x_chip *chip, u32 *target,
+				const char *buf)
+{
+	unsigned long thresh;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &thresh);
+	if (ret)
+		return ret;
+
+	if (thresh > APDS_RANGE)
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	*target = thresh;
+	/*
+	 * Don't update values in HW if we are still waiting for
+	 * first interrupt to come after device handle open call.
+	 */
+	if (!chip->lux_wait_fresh_res)
+		apds990x_refresh_athres(chip);
+	mutex_unlock(&chip->mutex);
+	return ret;
+
+}
+
+static ssize_t apds990x_lux_thresh_above_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_hi, buf);
+
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static ssize_t apds990x_lux_thresh_below_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_lo, buf);
+
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR,
+		apds990x_lux_thresh_above_show,
+		apds990x_lux_thresh_above_store);
+
+static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR,
+		apds990x_lux_thresh_below_show,
+		apds990x_lux_thresh_below_store);
+
+static ssize_t apds990x_prox_threshold_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->prox_thres);
+}
+
+static ssize_t apds990x_prox_threshold_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	if ((value > APDS_RANGE) || (value == 0) ||
+		(value < APDS_PROX_HYSTERESIS))
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_thres = value;
+
+	apds990x_force_p_refresh(chip);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static DEVICE_ATTR(prox0_thresh_above_value, S_IRUGO | S_IWUSR,
+		apds990x_prox_threshold_show,
+		apds990x_prox_threshold_store);
+
+static ssize_t apds990x_power_state_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", !pm_runtime_suspended(dev));
+	return 0;
+}
+
+static ssize_t apds990x_power_state_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	if (value) {
+		pm_runtime_get_sync(dev);
+		mutex_lock(&chip->mutex);
+		chip->lux_wait_fresh_res = true;
+		apds990x_force_a_refresh(chip);
+		apds990x_force_p_refresh(chip);
+		mutex_unlock(&chip->mutex);
+	} else {
+		if (!pm_runtime_suspended(dev))
+			pm_runtime_put(dev);
+	}
+	return len;
+}
+
+static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR,
+		apds990x_power_state_show,
+		apds990x_power_state_store);
+
+static ssize_t apds990x_chip_id_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s %d\n", chip->chipname, chip->revision);
+}
+
+static DEVICE_ATTR(chip_id, S_IRUGO, apds990x_chip_id_show, NULL);
+
+static struct attribute *sysfs_attrs_ctrl[] = {
+	&dev_attr_lux0_calibscale.attr,
+	&dev_attr_lux0_calibscale_default.attr,
+	&dev_attr_lux0_input.attr,
+	&dev_attr_lux0_sensor_range.attr,
+	&dev_attr_lux0_rate.attr,
+	&dev_attr_lux0_rate_avail.attr,
+	&dev_attr_lux0_thresh_above_value.attr,
+	&dev_attr_lux0_thresh_below_value.attr,
+	&dev_attr_prox0_raw_en.attr,
+	&dev_attr_prox0_raw.attr,
+	&dev_attr_prox0_sensor_range.attr,
+	&dev_attr_prox0_thresh_above_value.attr,
+	&dev_attr_prox0_reporting_mode.attr,
+	&dev_attr_prox0_reporting_mode_avail.attr,
+	&dev_attr_chip_id.attr,
+	&dev_attr_power_state.attr,
+	NULL
+};
+
+static const struct attribute_group apds990x_attribute_group[] = {
+	{.attrs = sysfs_attrs_ctrl },
+};
+
+static int apds990x_probe(struct i2c_client *client,
+				const struct i2c_device_id *id)
+{
+	struct apds990x_chip *chip;
+	int err;
+
+	chip = kzalloc(sizeof *chip, GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, chip);
+	chip->client  = client;
+
+	init_waitqueue_head(&chip->wait);
+	mutex_init(&chip->mutex);
+	chip->pdata	= client->dev.platform_data;
+
+	if (chip->pdata == NULL) {
+		dev_err(&client->dev, "platform data is mandatory\n");
+		err = -EINVAL;
+		goto fail1;
+	}
+
+	if (chip->pdata->cf.ga == 0) {
+		/* set uncovered sensor default parameters */
+		chip->cf.ga = 1966; /* 0.48 * APDS_PARAM_SCALE */
+		chip->cf.cf1 = 4096; /* 1.00 * APDS_PARAM_SCALE */
+		chip->cf.irf1 = 9134; /* 2.23 * APDS_PARAM_SCALE */
+		chip->cf.cf2 = 2867; /* 0.70 * APDS_PARAM_SCALE */
+		chip->cf.irf2 = 5816; /* 1.42 * APDS_PARAM_SCALE */
+		chip->cf.df = 52;
+	} else {
+		chip->cf = chip->pdata->cf;
+	}
+
+	/* precalculate inverse chip factors for threshold control */
+	chip->rcf.afactor =
+		(chip->cf.irf1 - chip->cf.irf2) * APDS_PARAM_SCALE /
+		(chip->cf.cf1 - chip->cf.cf2);
+	chip->rcf.cf1 = APDS_PARAM_SCALE * APDS_PARAM_SCALE /
+		chip->cf.cf1;
+	chip->rcf.irf1 = chip->cf.irf1 * APDS_PARAM_SCALE /
+		chip->cf.cf1;
+	chip->rcf.cf2 = APDS_PARAM_SCALE * APDS_PARAM_SCALE /
+		chip->cf.cf2;
+	chip->rcf.irf2 = chip->cf.irf2 * APDS_PARAM_SCALE /
+		chip->cf.cf2;
+
+	/* Set something to start with */
+	chip->lux_thres_hi = APDS_LUX_DEF_THRES_HI;
+	chip->lux_thres_lo = APDS_LUX_DEF_THRES_LO;
+	chip->lux_calib = APDS_LUX_NEUTRAL_CALIB_VALUE;
+
+	chip->prox_thres = APDS_PROX_DEF_THRES;
+	chip->pdrive = chip->pdata->pdrive;
+	chip->pdiode = APDS_PDIODE_IR;
+	chip->pgain = APDS_PGAIN_1X;
+	chip->prox_calib = APDS_PROX_NEUTRAL_CALIB_VALUE;
+	chip->prox_persistence = APDS_DEFAULT_PROX_PERS;
+	chip->prox_continuous_mode = false;
+
+	chip->regs[0].supply = reg_vcc;
+	chip->regs[1].supply = reg_vled;
+
+	err = regulator_bulk_get(&client->dev,
+				 ARRAY_SIZE(chip->regs), chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot get regulators\n");
+		goto fail1;
+	}
+
+	err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot enable regulators\n");
+		goto fail2;
+	}
+
+	usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY);
+
+	err = apds990x_detect(chip);
+	if (err < 0) {
+		dev_err(&client->dev, "APDS990X not found\n");
+		goto fail3;
+	}
+
+	pm_runtime_set_active(&client->dev);
+
+	apds990x_configure(chip);
+	apds990x_set_arate(chip, APDS_LUX_DEFAULT_RATE);
+	apds990x_mode_on(chip);
+
+	pm_runtime_enable(&client->dev);
+
+	if (chip->pdata->setup_resources) {
+		err = chip->pdata->setup_resources();
+		if (err) {
+			err = -EINVAL;
+			goto fail3;
+		}
+	}
+
+	err = sysfs_create_group(&chip->client->dev.kobj,
+				apds990x_attribute_group);
+	if (err < 0) {
+		dev_err(&chip->client->dev, "Sysfs registration failed\n");
+		goto fail4;
+	}
+
+	err = request_threaded_irq(client->irq, NULL,
+				apds990x_irq,
+				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW |
+				IRQF_ONESHOT,
+				"apds990x", chip);
+	if (err) {
+		dev_err(&client->dev, "could not get IRQ %d\n",
+			client->irq);
+		goto fail5;
+	}
+	return err;
+fail5:
+	sysfs_remove_group(&chip->client->dev.kobj,
+			&apds990x_attribute_group[0]);
+fail4:
+	if (chip->pdata && chip->pdata->release_resources)
+		chip->pdata->release_resources();
+fail3:
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+fail2:
+	regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs);
+fail1:
+	kfree(chip);
+	return err;
+}
+
+static int apds990x_remove(struct i2c_client *client)
+{
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	free_irq(client->irq, chip);
+	sysfs_remove_group(&chip->client->dev.kobj,
+			apds990x_attribute_group);
+
+	if (chip->pdata && chip->pdata->release_resources)
+		chip->pdata->release_resources();
+
+	if (!pm_runtime_suspended(&client->dev))
+		apds990x_chip_off(chip);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+
+	regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs);
+
+	kfree(chip);
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int apds990x_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	apds990x_chip_off(chip);
+	return 0;
+}
+
+static int apds990x_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	/*
+	 * If we were enabled at suspend time, it is expected
+	 * everything works nice and smoothly. Chip_on is enough
+	 */
+	apds990x_chip_on(chip);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int apds990x_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	apds990x_chip_off(chip);
+	return 0;
+}
+
+static int apds990x_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	apds990x_chip_on(chip);
+	return 0;
+}
+
+#endif
+
+static const struct i2c_device_id apds990x_id[] = {
+	{"apds990x", 0 },
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, apds990x_id);
+
+static const struct dev_pm_ops apds990x_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(apds990x_suspend, apds990x_resume)
+	SET_RUNTIME_PM_OPS(apds990x_runtime_suspend,
+			apds990x_runtime_resume,
+			NULL)
+};
+
+static struct i2c_driver apds990x_driver = {
+	.driver	 = {
+		.name	= "apds990x",
+		.pm	= &apds990x_pm_ops,
+	},
+	.probe	  = apds990x_probe,
+	.remove	  = apds990x_remove,
+	.id_table = apds990x_id,
+};
+
+module_i2c_driver(apds990x_driver);
+
+MODULE_DESCRIPTION("APDS990X combined ALS and proximity sensor");
+MODULE_AUTHOR("Samu Onkalo, Nokia Corporation");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c
new file mode 100644
index 000000000..870ab0dfc
--- /dev/null
+++ b/drivers/misc/aspeed-lpc-ctrl.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/mfd/syscon.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/regmap.h>
+
+#include <linux/aspeed-lpc-ctrl.h>
+
+#define DEVICE_NAME	"aspeed-lpc-ctrl"
+
+#define HICR5 0x0
+#define HICR5_ENL2H	BIT(8)
+#define HICR5_ENFWH	BIT(10)
+
+#define HICR7 0x8
+#define HICR8 0xc
+
+struct aspeed_lpc_ctrl {
+	struct miscdevice	miscdev;
+	struct regmap		*regmap;
+	struct clk		*clk;
+	phys_addr_t		mem_base;
+	resource_size_t		mem_size;
+	u32		pnor_size;
+	u32		pnor_base;
+};
+
+static struct aspeed_lpc_ctrl *file_aspeed_lpc_ctrl(struct file *file)
+{
+	return container_of(file->private_data, struct aspeed_lpc_ctrl,
+			miscdev);
+}
+
+static int aspeed_lpc_ctrl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct aspeed_lpc_ctrl *lpc_ctrl = file_aspeed_lpc_ctrl(file);
+	unsigned long vsize = vma->vm_end - vma->vm_start;
+	pgprot_t prot = vma->vm_page_prot;
+
+	if (vma->vm_pgoff + vma_pages(vma) > lpc_ctrl->mem_size >> PAGE_SHIFT)
+		return -EINVAL;
+
+	/* ast2400/2500 AHB accesses are not cache coherent */
+	prot = pgprot_noncached(prot);
+
+	if (remap_pfn_range(vma, vma->vm_start,
+		(lpc_ctrl->mem_base >> PAGE_SHIFT) + vma->vm_pgoff,
+		vsize, prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+static long aspeed_lpc_ctrl_ioctl(struct file *file, unsigned int cmd,
+		unsigned long param)
+{
+	struct aspeed_lpc_ctrl *lpc_ctrl = file_aspeed_lpc_ctrl(file);
+	void __user *p = (void __user *)param;
+	struct aspeed_lpc_ctrl_mapping map;
+	u32 addr;
+	u32 size;
+	long rc;
+
+	if (copy_from_user(&map, p, sizeof(map)))
+		return -EFAULT;
+
+	if (map.flags != 0)
+		return -EINVAL;
+
+	switch (cmd) {
+	case ASPEED_LPC_CTRL_IOCTL_GET_SIZE:
+		/* The flash windows don't report their size */
+		if (map.window_type != ASPEED_LPC_CTRL_WINDOW_MEMORY)
+			return -EINVAL;
+
+		/* Support more than one window id in the future */
+		if (map.window_id != 0)
+			return -EINVAL;
+
+		map.size = lpc_ctrl->mem_size;
+
+		return copy_to_user(p, &map, sizeof(map)) ? -EFAULT : 0;
+	case ASPEED_LPC_CTRL_IOCTL_MAP:
+
+		/*
+		 * The top half of HICR7 is the MSB of the BMC address of the
+		 * mapping.
+		 * The bottom half of HICR7 is the MSB of the HOST LPC
+		 * firmware space address of the mapping.
+		 *
+		 * The 1 bits in the top of half of HICR8 represent the bits
+		 * (in the requested address) that should be ignored and
+		 * replaced with those from the top half of HICR7.
+		 * The 1 bits in the bottom half of HICR8 represent the bits
+		 * (in the requested address) that should be kept and pass
+		 * into the BMC address space.
+		 */
+
+		/*
+		 * It doesn't make sense to talk about a size or offset with
+		 * low 16 bits set. Both HICR7 and HICR8 talk about the top 16
+		 * bits of addresses and sizes.
+		 */
+
+		if ((map.size & 0x0000ffff) || (map.offset & 0x0000ffff))
+			return -EINVAL;
+
+		/*
+		 * Because of the way the masks work in HICR8 offset has to
+		 * be a multiple of size.
+		 */
+		if (map.offset & (map.size - 1))
+			return -EINVAL;
+
+		if (map.window_type == ASPEED_LPC_CTRL_WINDOW_FLASH) {
+			addr = lpc_ctrl->pnor_base;
+			size = lpc_ctrl->pnor_size;
+		} else if (map.window_type == ASPEED_LPC_CTRL_WINDOW_MEMORY) {
+			addr = lpc_ctrl->mem_base;
+			size = lpc_ctrl->mem_size;
+		} else {
+			return -EINVAL;
+		}
+
+		/* Check overflow first! */
+		if (map.offset + map.size < map.offset ||
+			map.offset + map.size > size)
+			return -EINVAL;
+
+		if (map.size == 0 || map.size > size)
+			return -EINVAL;
+
+		addr += map.offset;
+
+		/*
+		 * addr (host lpc address) is safe regardless of values. This
+		 * simply changes the address the host has to request on its
+		 * side of the LPC bus. This cannot impact the hosts own
+		 * memory space by surprise as LPC specific accessors are
+		 * required. The only strange thing that could be done is
+		 * setting the lower 16 bits but the shift takes care of that.
+		 */
+
+		rc = regmap_write(lpc_ctrl->regmap, HICR7,
+				(addr | (map.addr >> 16)));
+		if (rc)
+			return rc;
+
+		rc = regmap_write(lpc_ctrl->regmap, HICR8,
+				(~(map.size - 1)) | ((map.size >> 16) - 1));
+		if (rc)
+			return rc;
+
+		/*
+		 * Enable LPC FHW cycles. This is required for the host to
+		 * access the regions specified.
+		 */
+		return regmap_update_bits(lpc_ctrl->regmap, HICR5,
+				HICR5_ENFWH | HICR5_ENL2H,
+				HICR5_ENFWH | HICR5_ENL2H);
+	}
+
+	return -EINVAL;
+}
+
+static const struct file_operations aspeed_lpc_ctrl_fops = {
+	.owner		= THIS_MODULE,
+	.mmap		= aspeed_lpc_ctrl_mmap,
+	.unlocked_ioctl	= aspeed_lpc_ctrl_ioctl,
+};
+
+static int aspeed_lpc_ctrl_probe(struct platform_device *pdev)
+{
+	struct aspeed_lpc_ctrl *lpc_ctrl;
+	struct device_node *node;
+	struct resource resm;
+	struct device *dev;
+	int rc;
+
+	dev = &pdev->dev;
+
+	lpc_ctrl = devm_kzalloc(dev, sizeof(*lpc_ctrl), GFP_KERNEL);
+	if (!lpc_ctrl)
+		return -ENOMEM;
+
+	node = of_parse_phandle(dev->of_node, "flash", 0);
+	if (!node) {
+		dev_err(dev, "Didn't find host pnor flash node\n");
+		return -ENODEV;
+	}
+
+	rc = of_address_to_resource(node, 1, &resm);
+	of_node_put(node);
+	if (rc) {
+		dev_err(dev, "Couldn't address to resource for flash\n");
+		return rc;
+	}
+
+	lpc_ctrl->pnor_size = resource_size(&resm);
+	lpc_ctrl->pnor_base = resm.start;
+
+	dev_set_drvdata(&pdev->dev, lpc_ctrl);
+
+	node = of_parse_phandle(dev->of_node, "memory-region", 0);
+	if (!node) {
+		dev_err(dev, "Didn't find reserved memory\n");
+		return -EINVAL;
+	}
+
+	rc = of_address_to_resource(node, 0, &resm);
+	of_node_put(node);
+	if (rc) {
+		dev_err(dev, "Couldn't address to resource for reserved memory\n");
+		return -ENOMEM;
+	}
+
+	lpc_ctrl->mem_size = resource_size(&resm);
+	lpc_ctrl->mem_base = resm.start;
+
+	lpc_ctrl->regmap = syscon_node_to_regmap(
+			pdev->dev.parent->of_node);
+	if (IS_ERR(lpc_ctrl->regmap)) {
+		dev_err(dev, "Couldn't get regmap\n");
+		return -ENODEV;
+	}
+
+	lpc_ctrl->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(lpc_ctrl->clk)) {
+		dev_err(dev, "couldn't get clock\n");
+		return PTR_ERR(lpc_ctrl->clk);
+	}
+	rc = clk_prepare_enable(lpc_ctrl->clk);
+	if (rc) {
+		dev_err(dev, "couldn't enable clock\n");
+		return rc;
+	}
+
+	lpc_ctrl->miscdev.minor = MISC_DYNAMIC_MINOR;
+	lpc_ctrl->miscdev.name = DEVICE_NAME;
+	lpc_ctrl->miscdev.fops = &aspeed_lpc_ctrl_fops;
+	lpc_ctrl->miscdev.parent = dev;
+	rc = misc_register(&lpc_ctrl->miscdev);
+	if (rc) {
+		dev_err(dev, "Unable to register device\n");
+		goto err;
+	}
+
+	dev_info(dev, "Loaded at %pr\n", &resm);
+
+	return 0;
+
+err:
+	clk_disable_unprepare(lpc_ctrl->clk);
+	return rc;
+}
+
+static int aspeed_lpc_ctrl_remove(struct platform_device *pdev)
+{
+	struct aspeed_lpc_ctrl *lpc_ctrl = dev_get_drvdata(&pdev->dev);
+
+	misc_deregister(&lpc_ctrl->miscdev);
+	clk_disable_unprepare(lpc_ctrl->clk);
+
+	return 0;
+}
+
+static const struct of_device_id aspeed_lpc_ctrl_match[] = {
+	{ .compatible = "aspeed,ast2400-lpc-ctrl" },
+	{ .compatible = "aspeed,ast2500-lpc-ctrl" },
+	{ },
+};
+
+static struct platform_driver aspeed_lpc_ctrl_driver = {
+	.driver = {
+		.name		= DEVICE_NAME,
+		.of_match_table = aspeed_lpc_ctrl_match,
+	},
+	.probe = aspeed_lpc_ctrl_probe,
+	.remove = aspeed_lpc_ctrl_remove,
+};
+
+module_platform_driver(aspeed_lpc_ctrl_driver);
+
+MODULE_DEVICE_TABLE(of, aspeed_lpc_ctrl_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Cyril Bur <cyrilbur@gmail.com>");
+MODULE_DESCRIPTION("Control for aspeed 2400/2500 LPC HOST to BMC mappings");
diff --git a/drivers/misc/aspeed-lpc-snoop.c b/drivers/misc/aspeed-lpc-snoop.c
new file mode 100644
index 000000000..e2cb0b960
--- /dev/null
+++ b/drivers/misc/aspeed-lpc-snoop.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright 2017 Google Inc
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Provides a simple driver to control the ASPEED LPC snoop interface which
+ * allows the BMC to listen on and save the data written by
+ * the host to an arbitrary LPC I/O port.
+ *
+ * Typically used by the BMC to "watch" host boot progress via port
+ * 0x80 writes made by the BIOS during the boot process.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/kfifo.h>
+#include <linux/mfd/syscon.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/regmap.h>
+
+#define DEVICE_NAME	"aspeed-lpc-snoop"
+
+#define NUM_SNOOP_CHANNELS 2
+#define SNOOP_FIFO_SIZE 2048
+
+#define HICR5	0x0
+#define HICR5_EN_SNP0W		BIT(0)
+#define HICR5_ENINT_SNP0W	BIT(1)
+#define HICR5_EN_SNP1W		BIT(2)
+#define HICR5_ENINT_SNP1W	BIT(3)
+
+#define HICR6	0x4
+#define HICR6_STR_SNP0W		BIT(0)
+#define HICR6_STR_SNP1W		BIT(1)
+#define SNPWADR	0x10
+#define SNPWADR_CH0_MASK	GENMASK(15, 0)
+#define SNPWADR_CH0_SHIFT	0
+#define SNPWADR_CH1_MASK	GENMASK(31, 16)
+#define SNPWADR_CH1_SHIFT	16
+#define SNPWDR	0x14
+#define SNPWDR_CH0_MASK		GENMASK(7, 0)
+#define SNPWDR_CH0_SHIFT	0
+#define SNPWDR_CH1_MASK		GENMASK(15, 8)
+#define SNPWDR_CH1_SHIFT	8
+#define HICRB	0x80
+#define HICRB_ENSNP0D		BIT(14)
+#define HICRB_ENSNP1D		BIT(15)
+
+struct aspeed_lpc_snoop_model_data {
+	/* The ast2400 has bits 14 and 15 as reserved, whereas the ast2500
+	 * can use them.
+	 */
+	unsigned int has_hicrb_ensnp;
+};
+
+struct aspeed_lpc_snoop_channel {
+	struct kfifo		fifo;
+	wait_queue_head_t	wq;
+	struct miscdevice	miscdev;
+};
+
+struct aspeed_lpc_snoop {
+	struct regmap		*regmap;
+	int			irq;
+	struct clk		*clk;
+	struct aspeed_lpc_snoop_channel chan[NUM_SNOOP_CHANNELS];
+};
+
+static struct aspeed_lpc_snoop_channel *snoop_file_to_chan(struct file *file)
+{
+	return container_of(file->private_data,
+			    struct aspeed_lpc_snoop_channel,
+			    miscdev);
+}
+
+static ssize_t snoop_file_read(struct file *file, char __user *buffer,
+				size_t count, loff_t *ppos)
+{
+	struct aspeed_lpc_snoop_channel *chan = snoop_file_to_chan(file);
+	unsigned int copied;
+	int ret = 0;
+
+	if (kfifo_is_empty(&chan->fifo)) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		ret = wait_event_interruptible(chan->wq,
+				!kfifo_is_empty(&chan->fifo));
+		if (ret == -ERESTARTSYS)
+			return -EINTR;
+	}
+	ret = kfifo_to_user(&chan->fifo, buffer, count, &copied);
+	if (ret)
+		return ret;
+
+	return copied;
+}
+
+static __poll_t snoop_file_poll(struct file *file,
+				    struct poll_table_struct *pt)
+{
+	struct aspeed_lpc_snoop_channel *chan = snoop_file_to_chan(file);
+
+	poll_wait(file, &chan->wq, pt);
+	return !kfifo_is_empty(&chan->fifo) ? EPOLLIN : 0;
+}
+
+static const struct file_operations snoop_fops = {
+	.owner  = THIS_MODULE,
+	.read   = snoop_file_read,
+	.poll   = snoop_file_poll,
+	.llseek = noop_llseek,
+};
+
+/* Save a byte to a FIFO and discard the oldest byte if FIFO is full */
+static void put_fifo_with_discard(struct aspeed_lpc_snoop_channel *chan, u8 val)
+{
+	if (!kfifo_initialized(&chan->fifo))
+		return;
+	if (kfifo_is_full(&chan->fifo))
+		kfifo_skip(&chan->fifo);
+	kfifo_put(&chan->fifo, val);
+	wake_up_interruptible(&chan->wq);
+}
+
+static irqreturn_t aspeed_lpc_snoop_irq(int irq, void *arg)
+{
+	struct aspeed_lpc_snoop *lpc_snoop = arg;
+	u32 reg, data;
+
+	if (regmap_read(lpc_snoop->regmap, HICR6, &reg))
+		return IRQ_NONE;
+
+	/* Check if one of the snoop channels is interrupting */
+	reg &= (HICR6_STR_SNP0W | HICR6_STR_SNP1W);
+	if (!reg)
+		return IRQ_NONE;
+
+	/* Ack pending IRQs */
+	regmap_write(lpc_snoop->regmap, HICR6, reg);
+
+	/* Read and save most recent snoop'ed data byte to FIFO */
+	regmap_read(lpc_snoop->regmap, SNPWDR, &data);
+
+	if (reg & HICR6_STR_SNP0W) {
+		u8 val = (data & SNPWDR_CH0_MASK) >> SNPWDR_CH0_SHIFT;
+
+		put_fifo_with_discard(&lpc_snoop->chan[0], val);
+	}
+	if (reg & HICR6_STR_SNP1W) {
+		u8 val = (data & SNPWDR_CH1_MASK) >> SNPWDR_CH1_SHIFT;
+
+		put_fifo_with_discard(&lpc_snoop->chan[1], val);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int aspeed_lpc_snoop_config_irq(struct aspeed_lpc_snoop *lpc_snoop,
+				       struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	int rc;
+
+	lpc_snoop->irq = platform_get_irq(pdev, 0);
+	if (!lpc_snoop->irq)
+		return -ENODEV;
+
+	rc = devm_request_irq(dev, lpc_snoop->irq,
+			      aspeed_lpc_snoop_irq, IRQF_SHARED,
+			      DEVICE_NAME, lpc_snoop);
+	if (rc < 0) {
+		dev_warn(dev, "Unable to request IRQ %d\n", lpc_snoop->irq);
+		lpc_snoop->irq = 0;
+		return rc;
+	}
+
+	return 0;
+}
+
+static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop,
+				   struct device *dev,
+				   int channel, u16 lpc_port)
+{
+	int rc = 0;
+	u32 hicr5_en, snpwadr_mask, snpwadr_shift, hicrb_en;
+	const struct aspeed_lpc_snoop_model_data *model_data =
+		of_device_get_match_data(dev);
+
+	init_waitqueue_head(&lpc_snoop->chan[channel].wq);
+	/* Create FIFO datastructure */
+	rc = kfifo_alloc(&lpc_snoop->chan[channel].fifo,
+			 SNOOP_FIFO_SIZE, GFP_KERNEL);
+	if (rc)
+		return rc;
+
+	lpc_snoop->chan[channel].miscdev.minor = MISC_DYNAMIC_MINOR;
+	lpc_snoop->chan[channel].miscdev.name =
+		devm_kasprintf(dev, GFP_KERNEL, "%s%d", DEVICE_NAME, channel);
+	lpc_snoop->chan[channel].miscdev.fops = &snoop_fops;
+	lpc_snoop->chan[channel].miscdev.parent = dev;
+	rc = misc_register(&lpc_snoop->chan[channel].miscdev);
+	if (rc)
+		return rc;
+
+	/* Enable LPC snoop channel at requested port */
+	switch (channel) {
+	case 0:
+		hicr5_en = HICR5_EN_SNP0W | HICR5_ENINT_SNP0W;
+		snpwadr_mask = SNPWADR_CH0_MASK;
+		snpwadr_shift = SNPWADR_CH0_SHIFT;
+		hicrb_en = HICRB_ENSNP0D;
+		break;
+	case 1:
+		hicr5_en = HICR5_EN_SNP1W | HICR5_ENINT_SNP1W;
+		snpwadr_mask = SNPWADR_CH1_MASK;
+		snpwadr_shift = SNPWADR_CH1_SHIFT;
+		hicrb_en = HICRB_ENSNP1D;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap_update_bits(lpc_snoop->regmap, HICR5, hicr5_en, hicr5_en);
+	regmap_update_bits(lpc_snoop->regmap, SNPWADR, snpwadr_mask,
+			   lpc_port << snpwadr_shift);
+	if (model_data->has_hicrb_ensnp)
+		regmap_update_bits(lpc_snoop->regmap, HICRB,
+				hicrb_en, hicrb_en);
+
+	return rc;
+}
+
+static void aspeed_lpc_disable_snoop(struct aspeed_lpc_snoop *lpc_snoop,
+				     int channel)
+{
+	switch (channel) {
+	case 0:
+		regmap_update_bits(lpc_snoop->regmap, HICR5,
+				   HICR5_EN_SNP0W | HICR5_ENINT_SNP0W,
+				   0);
+		break;
+	case 1:
+		regmap_update_bits(lpc_snoop->regmap, HICR5,
+				   HICR5_EN_SNP1W | HICR5_ENINT_SNP1W,
+				   0);
+		break;
+	default:
+		return;
+	}
+
+	kfifo_free(&lpc_snoop->chan[channel].fifo);
+	misc_deregister(&lpc_snoop->chan[channel].miscdev);
+}
+
+static int aspeed_lpc_snoop_probe(struct platform_device *pdev)
+{
+	struct aspeed_lpc_snoop *lpc_snoop;
+	struct device *dev;
+	u32 port;
+	int rc;
+
+	dev = &pdev->dev;
+
+	lpc_snoop = devm_kzalloc(dev, sizeof(*lpc_snoop), GFP_KERNEL);
+	if (!lpc_snoop)
+		return -ENOMEM;
+
+	lpc_snoop->regmap = syscon_node_to_regmap(
+			pdev->dev.parent->of_node);
+	if (IS_ERR(lpc_snoop->regmap)) {
+		dev_err(dev, "Couldn't get regmap\n");
+		return -ENODEV;
+	}
+
+	dev_set_drvdata(&pdev->dev, lpc_snoop);
+
+	rc = of_property_read_u32_index(dev->of_node, "snoop-ports", 0, &port);
+	if (rc) {
+		dev_err(dev, "no snoop ports configured\n");
+		return -ENODEV;
+	}
+
+	lpc_snoop->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(lpc_snoop->clk)) {
+		rc = PTR_ERR(lpc_snoop->clk);
+		if (rc != -EPROBE_DEFER)
+			dev_err(dev, "couldn't get clock\n");
+		return rc;
+	}
+	rc = clk_prepare_enable(lpc_snoop->clk);
+	if (rc) {
+		dev_err(dev, "couldn't enable clock\n");
+		return rc;
+	}
+
+	rc = aspeed_lpc_snoop_config_irq(lpc_snoop, pdev);
+	if (rc)
+		goto err;
+
+	rc = aspeed_lpc_enable_snoop(lpc_snoop, dev, 0, port);
+	if (rc)
+		goto err;
+
+	/* Configuration of 2nd snoop channel port is optional */
+	if (of_property_read_u32_index(dev->of_node, "snoop-ports",
+				       1, &port) == 0) {
+		rc = aspeed_lpc_enable_snoop(lpc_snoop, dev, 1, port);
+		if (rc) {
+			aspeed_lpc_disable_snoop(lpc_snoop, 0);
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	clk_disable_unprepare(lpc_snoop->clk);
+
+	return rc;
+}
+
+static int aspeed_lpc_snoop_remove(struct platform_device *pdev)
+{
+	struct aspeed_lpc_snoop *lpc_snoop = dev_get_drvdata(&pdev->dev);
+
+	/* Disable both snoop channels */
+	aspeed_lpc_disable_snoop(lpc_snoop, 0);
+	aspeed_lpc_disable_snoop(lpc_snoop, 1);
+
+	clk_disable_unprepare(lpc_snoop->clk);
+
+	return 0;
+}
+
+static const struct aspeed_lpc_snoop_model_data ast2400_model_data = {
+	.has_hicrb_ensnp = 0,
+};
+
+static const struct aspeed_lpc_snoop_model_data ast2500_model_data = {
+	.has_hicrb_ensnp = 1,
+};
+
+static const struct of_device_id aspeed_lpc_snoop_match[] = {
+	{ .compatible = "aspeed,ast2400-lpc-snoop",
+	  .data = &ast2400_model_data },
+	{ .compatible = "aspeed,ast2500-lpc-snoop",
+	  .data = &ast2500_model_data },
+	{ },
+};
+
+static struct platform_driver aspeed_lpc_snoop_driver = {
+	.driver = {
+		.name		= DEVICE_NAME,
+		.of_match_table = aspeed_lpc_snoop_match,
+	},
+	.probe = aspeed_lpc_snoop_probe,
+	.remove = aspeed_lpc_snoop_remove,
+};
+
+module_platform_driver(aspeed_lpc_snoop_driver);
+
+MODULE_DEVICE_TABLE(of, aspeed_lpc_snoop_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Lippert <rlippert@google.com>");
+MODULE_DESCRIPTION("Linux driver to control Aspeed LPC snoop functionality");
diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
new file mode 100644
index 000000000..48521861b
--- /dev/null
+++ b/drivers/misc/atmel-ssc.c
@@ -0,0 +1,285 @@
+/*
+ * Atmel SSC driver
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/list.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/atmel-ssc.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <linux/of.h>
+
+#include "../../sound/soc/atmel/atmel_ssc_dai.h"
+
+/* Serialize access to ssc_list and user count */
+static DEFINE_MUTEX(user_lock);
+static LIST_HEAD(ssc_list);
+
+struct ssc_device *ssc_request(unsigned int ssc_num)
+{
+	int ssc_valid = 0;
+	struct ssc_device *ssc;
+
+	mutex_lock(&user_lock);
+	list_for_each_entry(ssc, &ssc_list, list) {
+		if (ssc->pdev->dev.of_node) {
+			if (of_alias_get_id(ssc->pdev->dev.of_node, "ssc")
+				== ssc_num) {
+				ssc->pdev->id = ssc_num;
+				ssc_valid = 1;
+				break;
+			}
+		} else if (ssc->pdev->id == ssc_num) {
+			ssc_valid = 1;
+			break;
+		}
+	}
+
+	if (!ssc_valid) {
+		mutex_unlock(&user_lock);
+		pr_err("ssc: ssc%d platform device is missing\n", ssc_num);
+		return ERR_PTR(-ENODEV);
+	}
+
+	if (ssc->user) {
+		mutex_unlock(&user_lock);
+		dev_dbg(&ssc->pdev->dev, "module busy\n");
+		return ERR_PTR(-EBUSY);
+	}
+	ssc->user++;
+	mutex_unlock(&user_lock);
+
+	clk_prepare(ssc->clk);
+
+	return ssc;
+}
+EXPORT_SYMBOL(ssc_request);
+
+void ssc_free(struct ssc_device *ssc)
+{
+	bool disable_clk = true;
+
+	mutex_lock(&user_lock);
+	if (ssc->user)
+		ssc->user--;
+	else {
+		disable_clk = false;
+		dev_dbg(&ssc->pdev->dev, "device already free\n");
+	}
+	mutex_unlock(&user_lock);
+
+	if (disable_clk)
+		clk_unprepare(ssc->clk);
+}
+EXPORT_SYMBOL(ssc_free);
+
+static struct atmel_ssc_platform_data at91rm9200_config = {
+	.use_dma = 0,
+	.has_fslen_ext = 0,
+};
+
+static struct atmel_ssc_platform_data at91sam9rl_config = {
+	.use_dma = 0,
+	.has_fslen_ext = 1,
+};
+
+static struct atmel_ssc_platform_data at91sam9g45_config = {
+	.use_dma = 1,
+	.has_fslen_ext = 1,
+};
+
+static const struct platform_device_id atmel_ssc_devtypes[] = {
+	{
+		.name = "at91rm9200_ssc",
+		.driver_data = (unsigned long) &at91rm9200_config,
+	}, {
+		.name = "at91sam9rl_ssc",
+		.driver_data = (unsigned long) &at91sam9rl_config,
+	}, {
+		.name = "at91sam9g45_ssc",
+		.driver_data = (unsigned long) &at91sam9g45_config,
+	}, {
+		/* sentinel */
+	}
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id atmel_ssc_dt_ids[] = {
+	{
+		.compatible = "atmel,at91rm9200-ssc",
+		.data = &at91rm9200_config,
+	}, {
+		.compatible = "atmel,at91sam9rl-ssc",
+		.data = &at91sam9rl_config,
+	}, {
+		.compatible = "atmel,at91sam9g45-ssc",
+		.data = &at91sam9g45_config,
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, atmel_ssc_dt_ids);
+#endif
+
+static inline const struct atmel_ssc_platform_data *
+	atmel_ssc_get_driver_data(struct platform_device *pdev)
+{
+	if (pdev->dev.of_node) {
+		const struct of_device_id *match;
+		match = of_match_node(atmel_ssc_dt_ids, pdev->dev.of_node);
+		if (match == NULL)
+			return NULL;
+		return match->data;
+	}
+
+	return (struct atmel_ssc_platform_data *)
+		platform_get_device_id(pdev)->driver_data;
+}
+
+#ifdef CONFIG_SND_ATMEL_SOC_SSC
+static int ssc_sound_dai_probe(struct ssc_device *ssc)
+{
+	struct device_node *np = ssc->pdev->dev.of_node;
+	int ret;
+	int id;
+
+	ssc->sound_dai = false;
+
+	if (!of_property_read_bool(np, "#sound-dai-cells"))
+		return 0;
+
+	id = of_alias_get_id(np, "ssc");
+	if (id < 0)
+		return id;
+
+	ret = atmel_ssc_set_audio(id);
+	ssc->sound_dai = !ret;
+
+	return ret;
+}
+
+static void ssc_sound_dai_remove(struct ssc_device *ssc)
+{
+	if (!ssc->sound_dai)
+		return;
+
+	atmel_ssc_put_audio(of_alias_get_id(ssc->pdev->dev.of_node, "ssc"));
+}
+#else
+static inline int ssc_sound_dai_probe(struct ssc_device *ssc)
+{
+	if (of_property_read_bool(ssc->pdev->dev.of_node, "#sound-dai-cells"))
+		return -ENOTSUPP;
+
+	return 0;
+}
+
+static inline void ssc_sound_dai_remove(struct ssc_device *ssc)
+{
+}
+#endif
+
+static int ssc_probe(struct platform_device *pdev)
+{
+	struct resource *regs;
+	struct ssc_device *ssc;
+	const struct atmel_ssc_platform_data *plat_dat;
+
+	ssc = devm_kzalloc(&pdev->dev, sizeof(struct ssc_device), GFP_KERNEL);
+	if (!ssc) {
+		dev_dbg(&pdev->dev, "out of memory\n");
+		return -ENOMEM;
+	}
+
+	ssc->pdev = pdev;
+
+	plat_dat = atmel_ssc_get_driver_data(pdev);
+	if (!plat_dat)
+		return -ENODEV;
+	ssc->pdata = (struct atmel_ssc_platform_data *)plat_dat;
+
+	if (pdev->dev.of_node) {
+		struct device_node *np = pdev->dev.of_node;
+		ssc->clk_from_rk_pin =
+			of_property_read_bool(np, "atmel,clk-from-rk-pin");
+	}
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ssc->regs = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(ssc->regs))
+		return PTR_ERR(ssc->regs);
+
+	ssc->phybase = regs->start;
+
+	ssc->clk = devm_clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(ssc->clk)) {
+		dev_dbg(&pdev->dev, "no pclk clock defined\n");
+		return -ENXIO;
+	}
+
+	/* disable all interrupts */
+	clk_prepare_enable(ssc->clk);
+	ssc_writel(ssc->regs, IDR, -1);
+	ssc_readl(ssc->regs, SR);
+	clk_disable_unprepare(ssc->clk);
+
+	ssc->irq = platform_get_irq(pdev, 0);
+	if (ssc->irq < 0) {
+		dev_dbg(&pdev->dev, "could not get irq\n");
+		return ssc->irq;
+	}
+
+	mutex_lock(&user_lock);
+	list_add_tail(&ssc->list, &ssc_list);
+	mutex_unlock(&user_lock);
+
+	platform_set_drvdata(pdev, ssc);
+
+	dev_info(&pdev->dev, "Atmel SSC device at 0x%p (irq %d)\n",
+			ssc->regs, ssc->irq);
+
+	if (ssc_sound_dai_probe(ssc))
+		dev_err(&pdev->dev, "failed to auto-setup ssc for audio\n");
+
+	return 0;
+}
+
+static int ssc_remove(struct platform_device *pdev)
+{
+	struct ssc_device *ssc = platform_get_drvdata(pdev);
+
+	ssc_sound_dai_remove(ssc);
+
+	mutex_lock(&user_lock);
+	list_del(&ssc->list);
+	mutex_unlock(&user_lock);
+
+	return 0;
+}
+
+static struct platform_driver ssc_driver = {
+	.driver		= {
+		.name		= "ssc",
+		.of_match_table	= of_match_ptr(atmel_ssc_dt_ids),
+	},
+	.id_table	= atmel_ssc_devtypes,
+	.probe		= ssc_probe,
+	.remove		= ssc_remove,
+};
+module_platform_driver(ssc_driver);
+
+MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>");
+MODULE_DESCRIPTION("SSC driver for Atmel AVR32 and AT91");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:ssc");
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
new file mode 100644
index 000000000..ac24a4bd6
--- /dev/null
+++ b/drivers/misc/atmel_tclib.c
@@ -0,0 +1,198 @@
+#include <linux/atmel_tc.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/of.h>
+
+/*
+ * This is a thin library to solve the problem of how to portably allocate
+ * one of the TC blocks.  For simplicity, it doesn't currently expect to
+ * share individual timers between different drivers.
+ */
+
+#if defined(CONFIG_AVR32)
+/* AVR32 has these divide PBB */
+const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, };
+EXPORT_SYMBOL(atmel_tc_divisors);
+
+#elif defined(CONFIG_ARCH_AT91)
+/* AT91 has these divide MCK */
+const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
+EXPORT_SYMBOL(atmel_tc_divisors);
+
+#endif
+
+static DEFINE_SPINLOCK(tc_list_lock);
+static LIST_HEAD(tc_list);
+
+/**
+ * atmel_tc_alloc - allocate a specified TC block
+ * @block: which block to allocate
+ *
+ * Caller allocates a block.  If it is available, a pointer to a
+ * pre-initialized struct atmel_tc is returned. The caller can access
+ * the registers directly through the "regs" field.
+ */
+struct atmel_tc *atmel_tc_alloc(unsigned block)
+{
+	struct atmel_tc		*tc;
+	struct platform_device	*pdev = NULL;
+
+	spin_lock(&tc_list_lock);
+	list_for_each_entry(tc, &tc_list, node) {
+		if (tc->allocated)
+			continue;
+
+		if ((tc->pdev->dev.of_node && tc->id == block) ||
+		    (tc->pdev->id == block)) {
+			pdev = tc->pdev;
+			tc->allocated = true;
+			break;
+		}
+	}
+	spin_unlock(&tc_list_lock);
+
+	return pdev ? tc : NULL;
+}
+EXPORT_SYMBOL_GPL(atmel_tc_alloc);
+
+/**
+ * atmel_tc_free - release a specified TC block
+ * @tc: Timer/counter block that was returned by atmel_tc_alloc()
+ *
+ * This reverses the effect of atmel_tc_alloc(), invalidating the resource
+ * returned by that routine and making the TC available to other drivers.
+ */
+void atmel_tc_free(struct atmel_tc *tc)
+{
+	spin_lock(&tc_list_lock);
+	if (tc->allocated)
+		tc->allocated = false;
+	spin_unlock(&tc_list_lock);
+}
+EXPORT_SYMBOL_GPL(atmel_tc_free);
+
+#if defined(CONFIG_OF)
+static struct atmel_tcb_config tcb_rm9200_config = {
+	.counter_width = 16,
+};
+
+static struct atmel_tcb_config tcb_sam9x5_config = {
+	.counter_width = 32,
+};
+
+static const struct of_device_id atmel_tcb_dt_ids[] = {
+	{
+		.compatible = "atmel,at91rm9200-tcb",
+		.data = &tcb_rm9200_config,
+	}, {
+		.compatible = "atmel,at91sam9x5-tcb",
+		.data = &tcb_sam9x5_config,
+	}, {
+		/* sentinel */
+	}
+};
+
+MODULE_DEVICE_TABLE(of, atmel_tcb_dt_ids);
+#endif
+
+static int __init tc_probe(struct platform_device *pdev)
+{
+	struct atmel_tc *tc;
+	struct clk	*clk;
+	int		irq;
+	struct resource	*r;
+	unsigned int	i;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -EINVAL;
+
+	tc = devm_kzalloc(&pdev->dev, sizeof(struct atmel_tc), GFP_KERNEL);
+	if (!tc)
+		return -ENOMEM;
+
+	tc->pdev = pdev;
+
+	clk = devm_clk_get(&pdev->dev, "t0_clk");
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	tc->slow_clk = devm_clk_get(&pdev->dev, "slow_clk");
+	if (IS_ERR(tc->slow_clk))
+		return PTR_ERR(tc->slow_clk);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tc->regs = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(tc->regs))
+		return PTR_ERR(tc->regs);
+
+	/* Now take SoC information if available */
+	if (pdev->dev.of_node) {
+		const struct of_device_id *match;
+		match = of_match_node(atmel_tcb_dt_ids, pdev->dev.of_node);
+		if (match)
+			tc->tcb_config = match->data;
+
+		tc->id = of_alias_get_id(tc->pdev->dev.of_node, "tcb");
+	} else {
+		tc->id = pdev->id;
+	}
+
+	tc->clk[0] = clk;
+	tc->clk[1] = devm_clk_get(&pdev->dev, "t1_clk");
+	if (IS_ERR(tc->clk[1]))
+		tc->clk[1] = clk;
+	tc->clk[2] = devm_clk_get(&pdev->dev, "t2_clk");
+	if (IS_ERR(tc->clk[2]))
+		tc->clk[2] = clk;
+
+	tc->irq[0] = irq;
+	tc->irq[1] = platform_get_irq(pdev, 1);
+	if (tc->irq[1] < 0)
+		tc->irq[1] = irq;
+	tc->irq[2] = platform_get_irq(pdev, 2);
+	if (tc->irq[2] < 0)
+		tc->irq[2] = irq;
+
+	for (i = 0; i < 3; i++)
+		writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
+
+	spin_lock(&tc_list_lock);
+	list_add_tail(&tc->node, &tc_list);
+	spin_unlock(&tc_list_lock);
+
+	platform_set_drvdata(pdev, tc);
+
+	return 0;
+}
+
+static void tc_shutdown(struct platform_device *pdev)
+{
+	int i;
+	struct atmel_tc *tc = platform_get_drvdata(pdev);
+
+	for (i = 0; i < 3; i++)
+		writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
+}
+
+static struct platform_driver tc_driver = {
+	.driver = {
+		.name	= "atmel_tcb",
+		.of_match_table	= of_match_ptr(atmel_tcb_dt_ids),
+	},
+	.shutdown = tc_shutdown,
+};
+
+static int __init tc_init(void)
+{
+	return platform_driver_probe(&tc_driver, tc_probe);
+}
+arch_initcall(tc_init);
diff --git a/drivers/misc/bh1770glc.c b/drivers/misc/bh1770glc.c
new file mode 100644
index 000000000..9c62bf064
--- /dev/null
+++ b/drivers/misc/bh1770glc.c
@@ -0,0 +1,1410 @@
+/*
+ * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/platform_data/bh1770glc.h>
+#include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+
+#define BH1770_ALS_CONTROL	0x80 /* ALS operation mode control */
+#define BH1770_PS_CONTROL	0x81 /* PS operation mode control */
+#define BH1770_I_LED		0x82 /* active LED and LED1, LED2 current */
+#define BH1770_I_LED3		0x83 /* LED3 current setting */
+#define BH1770_ALS_PS_MEAS	0x84 /* Forced mode trigger */
+#define BH1770_PS_MEAS_RATE	0x85 /* PS meas. rate at stand alone mode */
+#define BH1770_ALS_MEAS_RATE	0x86 /* ALS meas. rate at stand alone mode */
+#define BH1770_PART_ID		0x8a /* Part number and revision ID */
+#define BH1770_MANUFACT_ID	0x8b /* Manufacturerer ID */
+#define BH1770_ALS_DATA_0	0x8c /* ALS DATA low byte */
+#define BH1770_ALS_DATA_1	0x8d /* ALS DATA high byte */
+#define BH1770_ALS_PS_STATUS	0x8e /* Measurement data and int status */
+#define BH1770_PS_DATA_LED1	0x8f /* PS data from LED1 */
+#define BH1770_PS_DATA_LED2	0x90 /* PS data from LED2 */
+#define BH1770_PS_DATA_LED3	0x91 /* PS data from LED3 */
+#define BH1770_INTERRUPT	0x92 /* Interrupt setting */
+#define BH1770_PS_TH_LED1	0x93 /* PS interrupt threshold for LED1 */
+#define BH1770_PS_TH_LED2	0x94 /* PS interrupt threshold for LED2 */
+#define BH1770_PS_TH_LED3	0x95 /* PS interrupt threshold for LED3 */
+#define BH1770_ALS_TH_UP_0	0x96 /* ALS upper threshold low byte */
+#define BH1770_ALS_TH_UP_1	0x97 /* ALS upper threshold high byte */
+#define BH1770_ALS_TH_LOW_0	0x98 /* ALS lower threshold low byte */
+#define BH1770_ALS_TH_LOW_1	0x99 /* ALS lower threshold high byte */
+
+/* MANUFACT_ID */
+#define BH1770_MANUFACT_ROHM	0x01
+#define BH1770_MANUFACT_OSRAM	0x03
+
+/* PART_ID */
+#define BH1770_PART		0x90
+#define BH1770_PART_MASK	0xf0
+#define BH1770_REV_MASK		0x0f
+#define BH1770_REV_SHIFT	0
+#define BH1770_REV_0		0x00
+#define BH1770_REV_1		0x01
+
+/* Operating modes for both */
+#define BH1770_STANDBY		0x00
+#define BH1770_FORCED		0x02
+#define BH1770_STANDALONE	0x03
+#define BH1770_SWRESET		(0x01 << 2)
+
+#define BH1770_PS_TRIG_MEAS	(1 << 0)
+#define BH1770_ALS_TRIG_MEAS	(1 << 1)
+
+/* Interrupt control */
+#define BH1770_INT_OUTPUT_MODE	(1 << 3) /* 0 = latched */
+#define BH1770_INT_POLARITY	(1 << 2) /* 1 = active high */
+#define BH1770_INT_ALS_ENA	(1 << 1)
+#define BH1770_INT_PS_ENA	(1 << 0)
+
+/* Interrupt status */
+#define BH1770_INT_LED1_DATA	(1 << 0)
+#define BH1770_INT_LED1_INT	(1 << 1)
+#define BH1770_INT_LED2_DATA	(1 << 2)
+#define BH1770_INT_LED2_INT	(1 << 3)
+#define BH1770_INT_LED3_DATA	(1 << 4)
+#define BH1770_INT_LED3_INT	(1 << 5)
+#define BH1770_INT_LEDS_INT	((1 << 1) | (1 << 3) | (1 << 5))
+#define BH1770_INT_ALS_DATA	(1 << 6)
+#define BH1770_INT_ALS_INT	(1 << 7)
+
+/* Led channels */
+#define BH1770_LED1		0x00
+
+#define BH1770_DISABLE		0
+#define BH1770_ENABLE		1
+#define BH1770_PROX_CHANNELS	1
+
+#define BH1770_LUX_DEFAULT_RATE	1 /* Index to lux rate table */
+#define BH1770_PROX_DEFAULT_RATE 1 /* Direct HW value =~ 50Hz */
+#define BH1770_PROX_DEF_RATE_THRESH 6 /* Direct HW value =~ 5 Hz */
+#define BH1770_STARTUP_DELAY	50
+#define BH1770_RESET_TIME	10
+#define BH1770_TIMEOUT		2100 /* Timeout in 2.1 seconds */
+
+#define BH1770_LUX_RANGE	65535
+#define BH1770_PROX_RANGE	255
+#define BH1770_COEF_SCALER	1024
+#define BH1770_CALIB_SCALER	8192
+#define BH1770_LUX_NEUTRAL_CALIB_VALUE (1 * BH1770_CALIB_SCALER)
+#define BH1770_LUX_DEF_THRES	1000
+#define BH1770_PROX_DEF_THRES	70
+#define BH1770_PROX_DEF_ABS_THRES   100
+#define BH1770_DEFAULT_PERSISTENCE  10
+#define BH1770_PROX_MAX_PERSISTENCE 50
+#define BH1770_LUX_GA_SCALE	16384
+#define BH1770_LUX_CF_SCALE	2048 /* CF ChipFactor */
+#define BH1770_NEUTRAL_CF	BH1770_LUX_CF_SCALE
+#define BH1770_LUX_CORR_SCALE	4096
+
+#define PROX_ABOVE_THRESHOLD	1
+#define PROX_BELOW_THRESHOLD	0
+
+#define PROX_IGNORE_LUX_LIMIT	500
+
+struct bh1770_chip {
+	struct bh1770_platform_data	*pdata;
+	char				chipname[10];
+	u8				revision;
+	struct i2c_client		*client;
+	struct regulator_bulk_data	regs[2];
+	struct mutex			mutex; /* avoid parallel access */
+	wait_queue_head_t		wait;
+
+	bool			int_mode_prox;
+	bool			int_mode_lux;
+	struct delayed_work	prox_work;
+	u32	lux_cf; /* Chip specific factor */
+	u32	lux_ga;
+	u32	lux_calib;
+	int	lux_rate_index;
+	u32	lux_corr;
+	u16	lux_data_raw;
+	u16	lux_threshold_hi;
+	u16	lux_threshold_lo;
+	u16	lux_thres_hi_onchip;
+	u16	lux_thres_lo_onchip;
+	bool	lux_wait_result;
+
+	int	prox_enable_count;
+	u16	prox_coef;
+	u16	prox_const;
+	int	prox_rate;
+	int	prox_rate_threshold;
+	u8	prox_persistence;
+	u8	prox_persistence_counter;
+	u8	prox_data;
+	u8	prox_threshold;
+	u8	prox_threshold_hw;
+	bool	prox_force_update;
+	u8	prox_abs_thres;
+	u8	prox_led;
+};
+
+static const char reg_vcc[] = "Vcc";
+static const char reg_vleds[] = "Vleds";
+
+/*
+ * Supported stand alone rates in ms from chip data sheet
+ * {10, 20, 30, 40, 70, 100, 200, 500, 1000, 2000};
+ */
+static const s16 prox_rates_hz[] = {100, 50, 33, 25, 14, 10, 5, 2};
+static const s16 prox_rates_ms[] = {10, 20, 30, 40, 70, 100, 200, 500};
+
+/* Supported IR-led currents in mA */
+static const u8 prox_curr_ma[] = {5, 10, 20, 50, 100, 150, 200};
+
+/*
+ * Supported stand alone rates in ms from chip data sheet
+ * {100, 200, 500, 1000, 2000};
+ */
+static const s16 lux_rates_hz[] = {10, 5, 2, 1, 0};
+
+/*
+ * interrupt control functions are called while keeping chip->mutex
+ * excluding module probe / remove
+ */
+static inline int bh1770_lux_interrupt_control(struct bh1770_chip *chip,
+					int lux)
+{
+	chip->int_mode_lux = lux;
+	/* Set interrupt modes, interrupt active low, latched */
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_INTERRUPT,
+					(lux << 1) | chip->int_mode_prox);
+}
+
+static inline int bh1770_prox_interrupt_control(struct bh1770_chip *chip,
+					int ps)
+{
+	chip->int_mode_prox = ps;
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_INTERRUPT,
+					(chip->int_mode_lux << 1) | (ps << 0));
+}
+
+/* chip->mutex is always kept here */
+static int bh1770_lux_rate(struct bh1770_chip *chip, int rate_index)
+{
+	/* sysfs may call this when the chip is powered off */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	/* Proper proximity response needs fastest lux rate (100ms) */
+	if (chip->prox_enable_count)
+		rate_index = 0;
+
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_ALS_MEAS_RATE,
+					rate_index);
+}
+
+static int bh1770_prox_rate(struct bh1770_chip *chip, int mode)
+{
+	int rate;
+
+	rate = (mode == PROX_ABOVE_THRESHOLD) ?
+		chip->prox_rate_threshold : chip->prox_rate;
+
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_PS_MEAS_RATE,
+					rate);
+}
+
+/* InfraredLED is controlled by the chip during proximity scanning */
+static inline int bh1770_led_cfg(struct bh1770_chip *chip)
+{
+	/* LED cfg, current for leds 1 and 2 */
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_I_LED,
+					(BH1770_LED1 << 6) |
+					(BH1770_LED_5mA << 3) |
+					chip->prox_led);
+}
+
+/*
+ * Following two functions converts raw ps values from HW to normalized
+ * values. Purpose is to compensate differences between different sensor
+ * versions and variants so that result means about the same between
+ * versions.
+ */
+static inline u8 bh1770_psraw_to_adjusted(struct bh1770_chip *chip, u8 psraw)
+{
+	u16 adjusted;
+	adjusted = (u16)(((u32)(psraw + chip->prox_const) * chip->prox_coef) /
+		BH1770_COEF_SCALER);
+	if (adjusted > BH1770_PROX_RANGE)
+		adjusted = BH1770_PROX_RANGE;
+	return adjusted;
+}
+
+static inline u8 bh1770_psadjusted_to_raw(struct bh1770_chip *chip, u8 ps)
+{
+	u16 raw;
+
+	raw = (((u32)ps * BH1770_COEF_SCALER) / chip->prox_coef);
+	if (raw > chip->prox_const)
+		raw = raw - chip->prox_const;
+	else
+		raw = 0;
+	return raw;
+}
+
+/*
+ * Following two functions converts raw lux values from HW to normalized
+ * values. Purpose is to compensate differences between different sensor
+ * versions and variants so that result means about the same between
+ * versions. Chip->mutex is kept when this is called.
+ */
+static int bh1770_prox_set_threshold(struct bh1770_chip *chip)
+{
+	u8 tmp = 0;
+
+	/* sysfs may call this when the chip is powered off */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	tmp = bh1770_psadjusted_to_raw(chip, chip->prox_threshold);
+	chip->prox_threshold_hw = tmp;
+
+	return	i2c_smbus_write_byte_data(chip->client, BH1770_PS_TH_LED1,
+					tmp);
+}
+
+static inline u16 bh1770_lux_raw_to_adjusted(struct bh1770_chip *chip, u16 raw)
+{
+	u32 lux;
+	lux = ((u32)raw * chip->lux_corr) / BH1770_LUX_CORR_SCALE;
+	return min(lux, (u32)BH1770_LUX_RANGE);
+}
+
+static inline u16 bh1770_lux_adjusted_to_raw(struct bh1770_chip *chip,
+					u16 adjusted)
+{
+	return (u32)adjusted * BH1770_LUX_CORR_SCALE / chip->lux_corr;
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_lux_update_thresholds(struct bh1770_chip *chip,
+					u16 threshold_hi, u16 threshold_lo)
+{
+	u8 data[4];
+	int ret;
+
+	/* sysfs may call this when the chip is powered off */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	/*
+	 * Compensate threshold values with the correction factors if not
+	 * set to minimum or maximum.
+	 * Min & max values disables interrupts.
+	 */
+	if (threshold_hi != BH1770_LUX_RANGE && threshold_hi != 0)
+		threshold_hi = bh1770_lux_adjusted_to_raw(chip, threshold_hi);
+
+	if (threshold_lo != BH1770_LUX_RANGE && threshold_lo != 0)
+		threshold_lo = bh1770_lux_adjusted_to_raw(chip, threshold_lo);
+
+	if (chip->lux_thres_hi_onchip == threshold_hi &&
+	    chip->lux_thres_lo_onchip == threshold_lo)
+		return 0;
+
+	chip->lux_thres_hi_onchip = threshold_hi;
+	chip->lux_thres_lo_onchip = threshold_lo;
+
+	data[0] = threshold_hi;
+	data[1] = threshold_hi >> 8;
+	data[2] = threshold_lo;
+	data[3] = threshold_lo >> 8;
+
+	ret = i2c_smbus_write_i2c_block_data(chip->client,
+					BH1770_ALS_TH_UP_0,
+					ARRAY_SIZE(data),
+					data);
+	return ret;
+}
+
+static int bh1770_lux_get_result(struct bh1770_chip *chip)
+{
+	u16 data;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_0);
+	if (ret < 0)
+		return ret;
+
+	data = ret & 0xff;
+	ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_1);
+	if (ret < 0)
+		return ret;
+
+	chip->lux_data_raw = data | ((ret & 0xff) << 8);
+
+	return 0;
+}
+
+/* Calculate correction value which contains chip and device specific parts */
+static u32 bh1770_get_corr_value(struct bh1770_chip *chip)
+{
+	u32 tmp;
+	/* Impact of glass attenuation correction */
+	tmp = (BH1770_LUX_CORR_SCALE * chip->lux_ga) / BH1770_LUX_GA_SCALE;
+	/* Impact of chip factor correction */
+	tmp = (tmp * chip->lux_cf) / BH1770_LUX_CF_SCALE;
+	/* Impact of Device specific calibration correction */
+	tmp = (tmp * chip->lux_calib) / BH1770_CALIB_SCALER;
+	return tmp;
+}
+
+static int bh1770_lux_read_result(struct bh1770_chip *chip)
+{
+	bh1770_lux_get_result(chip);
+	return bh1770_lux_raw_to_adjusted(chip, chip->lux_data_raw);
+}
+
+/*
+ * Chip on / off functions are called while keeping mutex except probe
+ * or remove phase
+ */
+static int bh1770_chip_on(struct bh1770_chip *chip)
+{
+	int ret = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+					chip->regs);
+	if (ret < 0)
+		return ret;
+
+	usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2);
+
+	/* Reset the chip */
+	i2c_smbus_write_byte_data(chip->client, BH1770_ALS_CONTROL,
+				BH1770_SWRESET);
+	usleep_range(BH1770_RESET_TIME, BH1770_RESET_TIME * 2);
+
+	/*
+	 * ALS is started always since proximity needs als results
+	 * for realibility estimation.
+	 * Let's assume dark until the first ALS measurement is ready.
+	 */
+	chip->lux_data_raw = 0;
+	chip->prox_data = 0;
+	ret = i2c_smbus_write_byte_data(chip->client,
+					BH1770_ALS_CONTROL, BH1770_STANDALONE);
+
+	/* Assume reset defaults */
+	chip->lux_thres_hi_onchip = BH1770_LUX_RANGE;
+	chip->lux_thres_lo_onchip = 0;
+
+	return ret;
+}
+
+static void bh1770_chip_off(struct bh1770_chip *chip)
+{
+	i2c_smbus_write_byte_data(chip->client,
+					BH1770_INTERRUPT, BH1770_DISABLE);
+	i2c_smbus_write_byte_data(chip->client,
+				BH1770_ALS_CONTROL, BH1770_STANDBY);
+	i2c_smbus_write_byte_data(chip->client,
+				BH1770_PS_CONTROL, BH1770_STANDBY);
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_prox_mode_control(struct bh1770_chip *chip)
+{
+	if (chip->prox_enable_count) {
+		chip->prox_force_update = true; /* Force immediate update */
+
+		bh1770_lux_rate(chip, chip->lux_rate_index);
+		bh1770_prox_set_threshold(chip);
+		bh1770_led_cfg(chip);
+		bh1770_prox_rate(chip, PROX_BELOW_THRESHOLD);
+		bh1770_prox_interrupt_control(chip, BH1770_ENABLE);
+		i2c_smbus_write_byte_data(chip->client,
+					BH1770_PS_CONTROL, BH1770_STANDALONE);
+	} else {
+		chip->prox_data = 0;
+		bh1770_lux_rate(chip, chip->lux_rate_index);
+		bh1770_prox_interrupt_control(chip, BH1770_DISABLE);
+		i2c_smbus_write_byte_data(chip->client,
+					BH1770_PS_CONTROL, BH1770_STANDBY);
+	}
+	return 0;
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_prox_read_result(struct bh1770_chip *chip)
+{
+	int ret;
+	bool above;
+	u8 mode;
+
+	ret = i2c_smbus_read_byte_data(chip->client, BH1770_PS_DATA_LED1);
+	if (ret < 0)
+		goto out;
+
+	if (ret > chip->prox_threshold_hw)
+		above = true;
+	else
+		above = false;
+
+	/*
+	 * when ALS levels goes above limit, proximity result may be
+	 * false proximity. Thus ignore the result. With real proximity
+	 * there is a shadow causing low als levels.
+	 */
+	if (chip->lux_data_raw > PROX_IGNORE_LUX_LIMIT)
+		ret = 0;
+
+	chip->prox_data = bh1770_psraw_to_adjusted(chip, ret);
+
+	/* Strong proximity level or force mode requires immediate response */
+	if (chip->prox_data >= chip->prox_abs_thres ||
+	    chip->prox_force_update)
+		chip->prox_persistence_counter = chip->prox_persistence;
+
+	chip->prox_force_update = false;
+
+	/* Persistence filttering to reduce false proximity events */
+	if (likely(above)) {
+		if (chip->prox_persistence_counter < chip->prox_persistence) {
+			chip->prox_persistence_counter++;
+			ret = -ENODATA;
+		} else {
+			mode = PROX_ABOVE_THRESHOLD;
+			ret = 0;
+		}
+	} else {
+		chip->prox_persistence_counter = 0;
+		mode = PROX_BELOW_THRESHOLD;
+		chip->prox_data = 0;
+		ret = 0;
+	}
+
+	/* Set proximity detection rate based on above or below value */
+	if (ret == 0) {
+		bh1770_prox_rate(chip, mode);
+		sysfs_notify(&chip->client->dev.kobj, NULL, "prox0_raw");
+	}
+out:
+	return ret;
+}
+
+static int bh1770_detect(struct bh1770_chip *chip)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+	u8 manu, part;
+
+	ret = i2c_smbus_read_byte_data(client, BH1770_MANUFACT_ID);
+	if (ret < 0)
+		goto error;
+	manu = (u8)ret;
+
+	ret = i2c_smbus_read_byte_data(client, BH1770_PART_ID);
+	if (ret < 0)
+		goto error;
+	part = (u8)ret;
+
+	chip->revision = (part & BH1770_REV_MASK) >> BH1770_REV_SHIFT;
+	chip->prox_coef = BH1770_COEF_SCALER;
+	chip->prox_const = 0;
+	chip->lux_cf = BH1770_NEUTRAL_CF;
+
+	if ((manu == BH1770_MANUFACT_ROHM) &&
+	    ((part & BH1770_PART_MASK) == BH1770_PART)) {
+		snprintf(chip->chipname, sizeof(chip->chipname), "BH1770GLC");
+		return 0;
+	}
+
+	if ((manu == BH1770_MANUFACT_OSRAM) &&
+	    ((part & BH1770_PART_MASK) == BH1770_PART)) {
+		snprintf(chip->chipname, sizeof(chip->chipname), "SFH7770");
+		/* Values selected by comparing different versions */
+		chip->prox_coef = 819; /* 0.8 * BH1770_COEF_SCALER */
+		chip->prox_const = 40;
+		return 0;
+	}
+
+	ret = -ENODEV;
+error:
+	dev_dbg(&client->dev, "BH1770 or SFH7770 not found\n");
+
+	return ret;
+}
+
+/*
+ * This work is re-scheduled at every proximity interrupt.
+ * If this work is running, it means that there hasn't been any
+ * proximity interrupt in time. Situation is handled as no-proximity.
+ * It would be nice to have low-threshold interrupt or interrupt
+ * when measurement and hi-threshold are both 0. But neither of those exists.
+ * This is a workaroud for missing HW feature.
+ */
+
+static void bh1770_prox_work(struct work_struct *work)
+{
+	struct bh1770_chip *chip =
+		container_of(work, struct bh1770_chip, prox_work.work);
+
+	mutex_lock(&chip->mutex);
+	bh1770_prox_read_result(chip);
+	mutex_unlock(&chip->mutex);
+}
+
+/* This is threaded irq handler */
+static irqreturn_t bh1770_irq(int irq, void *data)
+{
+	struct bh1770_chip *chip = data;
+	int status;
+	int rate = 0;
+
+	mutex_lock(&chip->mutex);
+	status = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_PS_STATUS);
+
+	/* Acknowledge interrupt by reading this register */
+	i2c_smbus_read_byte_data(chip->client, BH1770_INTERRUPT);
+
+	/*
+	 * Check if there is fresh data available for als.
+	 * If this is the very first data, update thresholds after that.
+	 */
+	if (status & BH1770_INT_ALS_DATA) {
+		bh1770_lux_get_result(chip);
+		if (unlikely(chip->lux_wait_result)) {
+			chip->lux_wait_result = false;
+			wake_up(&chip->wait);
+			bh1770_lux_update_thresholds(chip,
+						chip->lux_threshold_hi,
+						chip->lux_threshold_lo);
+		}
+	}
+
+	/* Disable interrupt logic to guarantee acknowledgement */
+	i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT,
+				  (0 << 1) | (0 << 0));
+
+	if ((status & BH1770_INT_ALS_INT))
+		sysfs_notify(&chip->client->dev.kobj, NULL, "lux0_input");
+
+	if (chip->int_mode_prox && (status & BH1770_INT_LEDS_INT)) {
+		rate = prox_rates_ms[chip->prox_rate_threshold];
+		bh1770_prox_read_result(chip);
+	}
+
+	/* Re-enable interrupt logic */
+	i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT,
+				  (chip->int_mode_lux << 1) |
+				  (chip->int_mode_prox << 0));
+	mutex_unlock(&chip->mutex);
+
+	/*
+	 * Can't cancel work while keeping mutex since the work uses the
+	 * same mutex.
+	 */
+	if (rate) {
+		/*
+		 * Simulate missing no-proximity interrupt 50ms after the
+		 * next expected interrupt time.
+		 */
+		cancel_delayed_work_sync(&chip->prox_work);
+		schedule_delayed_work(&chip->prox_work,
+				msecs_to_jiffies(rate + 50));
+	}
+	return IRQ_HANDLED;
+}
+
+static ssize_t bh1770_power_state_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	ssize_t ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	if (value) {
+		pm_runtime_get_sync(dev);
+
+		ret = bh1770_lux_rate(chip, chip->lux_rate_index);
+		if (ret < 0) {
+			pm_runtime_put(dev);
+			goto leave;
+		}
+
+		ret = bh1770_lux_interrupt_control(chip, BH1770_ENABLE);
+		if (ret < 0) {
+			pm_runtime_put(dev);
+			goto leave;
+		}
+
+		/* This causes interrupt after the next measurement cycle */
+		bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES,
+					BH1770_LUX_DEF_THRES);
+		/* Inform that we are waiting for a result from ALS */
+		chip->lux_wait_result = true;
+		bh1770_prox_mode_control(chip);
+	} else if (!pm_runtime_suspended(dev)) {
+		pm_runtime_put(dev);
+	}
+	ret = count;
+leave:
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static ssize_t bh1770_power_state_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", !pm_runtime_suspended(dev));
+}
+
+static ssize_t bh1770_lux_result_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	ssize_t ret;
+	long timeout;
+
+	if (pm_runtime_suspended(dev))
+		return -EIO; /* Chip is not enabled at all */
+
+	timeout = wait_event_interruptible_timeout(chip->wait,
+					!chip->lux_wait_result,
+					msecs_to_jiffies(BH1770_TIMEOUT));
+	if (!timeout)
+		return -EIO;
+
+	mutex_lock(&chip->mutex);
+	ret = sprintf(buf, "%d\n", bh1770_lux_read_result(chip));
+	mutex_unlock(&chip->mutex);
+
+	return ret;
+}
+
+static ssize_t bh1770_lux_range_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", BH1770_LUX_RANGE);
+}
+
+static ssize_t bh1770_prox_enable_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	/* Assume no proximity. Sensor will tell real state soon */
+	if (!chip->prox_enable_count)
+		chip->prox_data = 0;
+
+	if (value)
+		chip->prox_enable_count++;
+	else if (chip->prox_enable_count > 0)
+		chip->prox_enable_count--;
+	else
+		goto leave;
+
+	/* Run control only when chip is powered on */
+	if (!pm_runtime_suspended(dev))
+		bh1770_prox_mode_control(chip);
+leave:
+	mutex_unlock(&chip->mutex);
+	return count;
+}
+
+static ssize_t bh1770_prox_enable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	ssize_t len;
+
+	mutex_lock(&chip->mutex);
+	len = sprintf(buf, "%d\n", chip->prox_enable_count);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static ssize_t bh1770_prox_result_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	ssize_t ret;
+
+	mutex_lock(&chip->mutex);
+	if (chip->prox_enable_count && !pm_runtime_suspended(dev))
+		ret = sprintf(buf, "%d\n", chip->prox_data);
+	else
+		ret = -EIO;
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static ssize_t bh1770_prox_range_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", BH1770_PROX_RANGE);
+}
+
+static ssize_t bh1770_get_prox_rate_avail(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int i;
+	int pos = 0;
+	for (i = 0; i < ARRAY_SIZE(prox_rates_hz); i++)
+		pos += sprintf(buf + pos, "%d ", prox_rates_hz[i]);
+	sprintf(buf + pos - 1, "\n");
+	return pos;
+}
+
+static ssize_t bh1770_get_prox_rate_above(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate_threshold]);
+}
+
+static ssize_t bh1770_get_prox_rate_below(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate]);
+}
+
+static int bh1770_prox_rate_validate(int rate)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(prox_rates_hz) - 1; i++)
+		if (rate >= prox_rates_hz[i])
+			break;
+	return i;
+}
+
+static ssize_t bh1770_set_prox_rate_above(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_rate_threshold = bh1770_prox_rate_validate(value);
+	mutex_unlock(&chip->mutex);
+	return count;
+}
+
+static ssize_t bh1770_set_prox_rate_below(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_rate = bh1770_prox_rate_validate(value);
+	mutex_unlock(&chip->mutex);
+	return count;
+}
+
+static ssize_t bh1770_get_prox_thres(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->prox_threshold);
+}
+
+static ssize_t bh1770_set_prox_thres(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	if (value > BH1770_PROX_RANGE)
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_threshold = value;
+	ret = bh1770_prox_set_threshold(chip);
+	mutex_unlock(&chip->mutex);
+	if (ret < 0)
+		return ret;
+	return count;
+}
+
+static ssize_t bh1770_prox_persistence_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", chip->prox_persistence);
+}
+
+static ssize_t bh1770_prox_persistence_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	if (value > BH1770_PROX_MAX_PERSISTENCE)
+		return -EINVAL;
+
+	chip->prox_persistence = value;
+
+	return len;
+}
+
+static ssize_t bh1770_prox_abs_thres_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	return sprintf(buf, "%u\n", chip->prox_abs_thres);
+}
+
+static ssize_t bh1770_prox_abs_thres_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	if (value > BH1770_PROX_RANGE)
+		return -EINVAL;
+
+	chip->prox_abs_thres = value;
+
+	return len;
+}
+
+static ssize_t bh1770_chip_id_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%s rev %d\n", chip->chipname, chip->revision);
+}
+
+static ssize_t bh1770_lux_calib_default_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", BH1770_CALIB_SCALER);
+}
+
+static ssize_t bh1770_lux_calib_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	ssize_t len;
+
+	mutex_lock(&chip->mutex);
+	len = sprintf(buf, "%u\n", chip->lux_calib);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static ssize_t bh1770_lux_calib_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+	u32 old_calib;
+	u32 new_corr;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	old_calib = chip->lux_calib;
+	chip->lux_calib = value;
+	new_corr = bh1770_get_corr_value(chip);
+	if (new_corr == 0) {
+		chip->lux_calib = old_calib;
+		mutex_unlock(&chip->mutex);
+		return -EINVAL;
+	}
+	chip->lux_corr = new_corr;
+	/* Refresh thresholds on HW after changing correction value */
+	bh1770_lux_update_thresholds(chip, chip->lux_threshold_hi,
+				chip->lux_threshold_lo);
+
+	mutex_unlock(&chip->mutex);
+
+	return len;
+}
+
+static ssize_t bh1770_get_lux_rate_avail(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int i;
+	int pos = 0;
+	for (i = 0; i < ARRAY_SIZE(lux_rates_hz); i++)
+		pos += sprintf(buf + pos, "%d ", lux_rates_hz[i]);
+	sprintf(buf + pos - 1, "\n");
+	return pos;
+}
+
+static ssize_t bh1770_get_lux_rate(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", lux_rates_hz[chip->lux_rate_index]);
+}
+
+static ssize_t bh1770_set_lux_rate(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long rate_hz;
+	int ret, i;
+
+	ret = kstrtoul(buf, 0, &rate_hz);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(lux_rates_hz) - 1; i++)
+		if (rate_hz >= lux_rates_hz[i])
+			break;
+
+	mutex_lock(&chip->mutex);
+	chip->lux_rate_index = i;
+	ret = bh1770_lux_rate(chip, i);
+	mutex_unlock(&chip->mutex);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static ssize_t bh1770_get_lux_thresh_above(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->lux_threshold_hi);
+}
+
+static ssize_t bh1770_get_lux_thresh_below(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->lux_threshold_lo);
+}
+
+static ssize_t bh1770_set_lux_thresh(struct bh1770_chip *chip, u16 *target,
+				const char *buf)
+{
+	unsigned long thresh;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &thresh);
+	if (ret)
+		return ret;
+
+	if (thresh > BH1770_LUX_RANGE)
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	*target = thresh;
+	/*
+	 * Don't update values in HW if we are still waiting for
+	 * first interrupt to come after device handle open call.
+	 */
+	if (!chip->lux_wait_result)
+		ret = bh1770_lux_update_thresholds(chip,
+						chip->lux_threshold_hi,
+						chip->lux_threshold_lo);
+	mutex_unlock(&chip->mutex);
+	return ret;
+
+}
+
+static ssize_t bh1770_set_lux_thresh_above(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_hi, buf);
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static ssize_t bh1770_set_lux_thresh_below(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_lo, buf);
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, bh1770_prox_enable_show,
+						bh1770_prox_enable_store);
+static DEVICE_ATTR(prox0_thresh_above1_value, S_IRUGO | S_IWUSR,
+						bh1770_prox_abs_thres_show,
+						bh1770_prox_abs_thres_store);
+static DEVICE_ATTR(prox0_thresh_above0_value, S_IRUGO | S_IWUSR,
+						bh1770_get_prox_thres,
+						bh1770_set_prox_thres);
+static DEVICE_ATTR(prox0_raw, S_IRUGO, bh1770_prox_result_show, NULL);
+static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, bh1770_prox_range_show, NULL);
+static DEVICE_ATTR(prox0_thresh_above_count, S_IRUGO | S_IWUSR,
+						bh1770_prox_persistence_show,
+						bh1770_prox_persistence_store);
+static DEVICE_ATTR(prox0_rate_above, S_IRUGO | S_IWUSR,
+						bh1770_get_prox_rate_above,
+						bh1770_set_prox_rate_above);
+static DEVICE_ATTR(prox0_rate_below, S_IRUGO | S_IWUSR,
+						bh1770_get_prox_rate_below,
+						bh1770_set_prox_rate_below);
+static DEVICE_ATTR(prox0_rate_avail, S_IRUGO, bh1770_get_prox_rate_avail, NULL);
+
+static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, bh1770_lux_calib_show,
+						bh1770_lux_calib_store);
+static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO,
+						bh1770_lux_calib_default_show,
+						NULL);
+static DEVICE_ATTR(lux0_input, S_IRUGO, bh1770_lux_result_show, NULL);
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, bh1770_lux_range_show, NULL);
+static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, bh1770_get_lux_rate,
+						bh1770_set_lux_rate);
+static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, bh1770_get_lux_rate_avail, NULL);
+static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR,
+						bh1770_get_lux_thresh_above,
+						bh1770_set_lux_thresh_above);
+static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR,
+						bh1770_get_lux_thresh_below,
+						bh1770_set_lux_thresh_below);
+static DEVICE_ATTR(chip_id, S_IRUGO, bh1770_chip_id_show, NULL);
+static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, bh1770_power_state_show,
+						 bh1770_power_state_store);
+
+
+static struct attribute *sysfs_attrs[] = {
+	&dev_attr_lux0_calibscale.attr,
+	&dev_attr_lux0_calibscale_default.attr,
+	&dev_attr_lux0_input.attr,
+	&dev_attr_lux0_sensor_range.attr,
+	&dev_attr_lux0_rate.attr,
+	&dev_attr_lux0_rate_avail.attr,
+	&dev_attr_lux0_thresh_above_value.attr,
+	&dev_attr_lux0_thresh_below_value.attr,
+	&dev_attr_prox0_raw.attr,
+	&dev_attr_prox0_sensor_range.attr,
+	&dev_attr_prox0_raw_en.attr,
+	&dev_attr_prox0_thresh_above_count.attr,
+	&dev_attr_prox0_rate_above.attr,
+	&dev_attr_prox0_rate_below.attr,
+	&dev_attr_prox0_rate_avail.attr,
+	&dev_attr_prox0_thresh_above0_value.attr,
+	&dev_attr_prox0_thresh_above1_value.attr,
+	&dev_attr_chip_id.attr,
+	&dev_attr_power_state.attr,
+	NULL
+};
+
+static const struct attribute_group bh1770_attribute_group = {
+	.attrs = sysfs_attrs
+};
+
+static int bh1770_probe(struct i2c_client *client,
+				const struct i2c_device_id *id)
+{
+	struct bh1770_chip *chip;
+	int err;
+
+	chip = devm_kzalloc(&client->dev, sizeof *chip, GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, chip);
+	chip->client  = client;
+
+	mutex_init(&chip->mutex);
+	init_waitqueue_head(&chip->wait);
+	INIT_DELAYED_WORK(&chip->prox_work, bh1770_prox_work);
+
+	if (client->dev.platform_data == NULL) {
+		dev_err(&client->dev, "platform data is mandatory\n");
+		return -EINVAL;
+	}
+
+	chip->pdata		= client->dev.platform_data;
+	chip->lux_calib		= BH1770_LUX_NEUTRAL_CALIB_VALUE;
+	chip->lux_rate_index	= BH1770_LUX_DEFAULT_RATE;
+	chip->lux_threshold_lo	= BH1770_LUX_DEF_THRES;
+	chip->lux_threshold_hi	= BH1770_LUX_DEF_THRES;
+
+	if (chip->pdata->glass_attenuation == 0)
+		chip->lux_ga = BH1770_NEUTRAL_GA;
+	else
+		chip->lux_ga = chip->pdata->glass_attenuation;
+
+	chip->prox_threshold	= BH1770_PROX_DEF_THRES;
+	chip->prox_led		= chip->pdata->led_def_curr;
+	chip->prox_abs_thres	= BH1770_PROX_DEF_ABS_THRES;
+	chip->prox_persistence	= BH1770_DEFAULT_PERSISTENCE;
+	chip->prox_rate_threshold = BH1770_PROX_DEF_RATE_THRESH;
+	chip->prox_rate		= BH1770_PROX_DEFAULT_RATE;
+	chip->prox_data		= 0;
+
+	chip->regs[0].supply = reg_vcc;
+	chip->regs[1].supply = reg_vleds;
+
+	err = devm_regulator_bulk_get(&client->dev,
+				      ARRAY_SIZE(chip->regs), chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot get regulators\n");
+		return err;
+	}
+
+	err = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+				chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot enable regulators\n");
+		return err;
+	}
+
+	usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2);
+	err = bh1770_detect(chip);
+	if (err < 0)
+		goto fail0;
+
+	/* Start chip */
+	bh1770_chip_on(chip);
+	pm_runtime_set_active(&client->dev);
+	pm_runtime_enable(&client->dev);
+
+	chip->lux_corr = bh1770_get_corr_value(chip);
+	if (chip->lux_corr == 0) {
+		dev_err(&client->dev, "Improper correction values\n");
+		err = -EINVAL;
+		goto fail0;
+	}
+
+	if (chip->pdata->setup_resources) {
+		err = chip->pdata->setup_resources();
+		if (err) {
+			err = -EINVAL;
+			goto fail0;
+		}
+	}
+
+	err = sysfs_create_group(&chip->client->dev.kobj,
+				&bh1770_attribute_group);
+	if (err < 0) {
+		dev_err(&chip->client->dev, "Sysfs registration failed\n");
+		goto fail1;
+	}
+
+	/*
+	 * Chip needs level triggered interrupt to work. However,
+	 * level triggering doesn't work always correctly with power
+	 * management. Select both
+	 */
+	err = request_threaded_irq(client->irq, NULL,
+				bh1770_irq,
+				IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+				IRQF_TRIGGER_LOW,
+				"bh1770", chip);
+	if (err) {
+		dev_err(&client->dev, "could not get IRQ %d\n",
+			client->irq);
+		goto fail2;
+	}
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+	return err;
+fail2:
+	sysfs_remove_group(&chip->client->dev.kobj,
+			&bh1770_attribute_group);
+fail1:
+	if (chip->pdata->release_resources)
+		chip->pdata->release_resources();
+fail0:
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+	return err;
+}
+
+static int bh1770_remove(struct i2c_client *client)
+{
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	free_irq(client->irq, chip);
+
+	sysfs_remove_group(&chip->client->dev.kobj,
+			&bh1770_attribute_group);
+
+	if (chip->pdata->release_resources)
+		chip->pdata->release_resources();
+
+	cancel_delayed_work_sync(&chip->prox_work);
+
+	if (!pm_runtime_suspended(&client->dev))
+		bh1770_chip_off(chip);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int bh1770_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	bh1770_chip_off(chip);
+
+	return 0;
+}
+
+static int bh1770_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+	int ret = 0;
+
+	bh1770_chip_on(chip);
+
+	if (!pm_runtime_suspended(dev)) {
+		/*
+		 * If we were enabled at suspend time, it is expected
+		 * everything works nice and smoothly
+		 */
+		ret = bh1770_lux_rate(chip, chip->lux_rate_index);
+		ret |= bh1770_lux_interrupt_control(chip, BH1770_ENABLE);
+
+		/* This causes interrupt after the next measurement cycle */
+		bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES,
+					BH1770_LUX_DEF_THRES);
+		/* Inform that we are waiting for a result from ALS */
+		chip->lux_wait_result = true;
+		bh1770_prox_mode_control(chip);
+	}
+	return ret;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int bh1770_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	bh1770_chip_off(chip);
+
+	return 0;
+}
+
+static int bh1770_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	bh1770_chip_on(chip);
+
+	return 0;
+}
+#endif
+
+static const struct i2c_device_id bh1770_id[] = {
+	{"bh1770glc", 0 },
+	{"sfh7770", 0 },
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, bh1770_id);
+
+static const struct dev_pm_ops bh1770_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(bh1770_suspend, bh1770_resume)
+	SET_RUNTIME_PM_OPS(bh1770_runtime_suspend, bh1770_runtime_resume, NULL)
+};
+
+static struct i2c_driver bh1770_driver = {
+	.driver	 = {
+		.name	= "bh1770glc",
+		.pm	= &bh1770_pm_ops,
+	},
+	.probe	  = bh1770_probe,
+	.remove	  = bh1770_remove,
+	.id_table = bh1770_id,
+};
+
+module_i2c_driver(bh1770_driver);
+
+MODULE_DESCRIPTION("BH1770GLC / SFH7770 combined ALS and proximity sensor");
+MODULE_AUTHOR("Samu Onkalo, Nokia Corporation");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/c2port/Kconfig b/drivers/misc/c2port/Kconfig
new file mode 100644
index 000000000..0dd690e61
--- /dev/null
+++ b/drivers/misc/c2port/Kconfig
@@ -0,0 +1,34 @@
+#
+# C2 port devices
+#
+
+menuconfig C2PORT
+	tristate "Silicon Labs C2 port support"
+	default n
+	help
+	  This option enables support for Silicon Labs C2 port used to
+	  program Silicon micro controller chips (and other 8051 compatible).
+
+	  If your board have no such micro controllers you don't need this
+	  interface at all.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called c2port_core. Note that you also need a client module
+	  usually called c2port-*.
+
+	  If you are not sure, say N here.
+
+if C2PORT
+
+config C2PORT_DURAMAR_2150
+	tristate "C2 port support for Eurotech's Duramar 2150"
+	depends on X86
+	default n
+	help
+	  This option enables C2 support for the Eurotech's Duramar 2150
+	  on board micro controller.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called c2port-duramar2150.
+
+endif # C2PORT
diff --git a/drivers/misc/c2port/Makefile b/drivers/misc/c2port/Makefile
new file mode 100644
index 000000000..3b2cf43d6
--- /dev/null
+++ b/drivers/misc/c2port/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_C2PORT)		+= core.o
+
+obj-$(CONFIG_C2PORT_DURAMAR_2150)	+= c2port-duramar2150.o
diff --git a/drivers/misc/c2port/c2port-duramar2150.c b/drivers/misc/c2port/c2port-duramar2150.c
new file mode 100644
index 000000000..3dc61ea7d
--- /dev/null
+++ b/drivers/misc/c2port/c2port-duramar2150.c
@@ -0,0 +1,159 @@
+/*
+ *  Silicon Labs C2 port Linux support for Eurotech Duramar 2150
+ *
+ *  Copyright (c) 2008 Rodolfo Giometti <giometti@linux.it>
+ *  Copyright (c) 2008 Eurotech S.p.A. <info@eurotech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/c2port.h>
+
+#define DATA_PORT	0x325
+#define DIR_PORT	0x326
+#define    C2D		   (1 << 0)
+#define    C2CK		   (1 << 1)
+
+static DEFINE_MUTEX(update_lock);
+
+/*
+ * C2 port operations
+ */
+
+static void duramar2150_c2port_access(struct c2port_device *dev, int status)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DIR_PORT);
+
+	/* 0 = input, 1 = output */
+	if (status)
+		outb(v | (C2D | C2CK), DIR_PORT);
+	else
+		/* When access is "off" is important that both lines are set
+		 * as inputs or hi-impedance */
+		outb(v & ~(C2D | C2CK), DIR_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static void duramar2150_c2port_c2d_dir(struct c2port_device *dev, int dir)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DIR_PORT);
+
+	if (dir)
+		outb(v & ~C2D, DIR_PORT);
+	else
+		outb(v | C2D, DIR_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static int duramar2150_c2port_c2d_get(struct c2port_device *dev)
+{
+	return inb(DATA_PORT) & C2D;
+}
+
+static void duramar2150_c2port_c2d_set(struct c2port_device *dev, int status)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DATA_PORT);
+
+	if (status)
+		outb(v | C2D, DATA_PORT);
+	else
+		outb(v & ~C2D, DATA_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static void duramar2150_c2port_c2ck_set(struct c2port_device *dev, int status)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DATA_PORT);
+
+	if (status)
+		outb(v | C2CK, DATA_PORT);
+	else
+		outb(v & ~C2CK, DATA_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static struct c2port_ops duramar2150_c2port_ops = {
+	.block_size	= 512,	/* bytes */
+	.blocks_num	= 30,	/* total flash size: 15360 bytes */
+
+	.access		= duramar2150_c2port_access,
+	.c2d_dir	= duramar2150_c2port_c2d_dir,
+	.c2d_get	= duramar2150_c2port_c2d_get,
+	.c2d_set	= duramar2150_c2port_c2d_set,
+	.c2ck_set	= duramar2150_c2port_c2ck_set,
+};
+
+static struct c2port_device *duramar2150_c2port_dev;
+
+/*
+ * Module stuff
+ */
+
+static int __init duramar2150_c2port_init(void)
+{
+	struct resource *res;
+	int ret = 0;
+
+	res = request_region(0x325, 2, "c2port");
+	if (!res)
+		return -EBUSY;
+
+	duramar2150_c2port_dev = c2port_device_register("uc",
+					&duramar2150_c2port_ops, NULL);
+	if (IS_ERR(duramar2150_c2port_dev)) {
+		ret = PTR_ERR(duramar2150_c2port_dev);
+		goto free_region;
+	}
+
+	return 0;
+
+free_region:
+	release_region(0x325, 2);
+	return ret;
+}
+
+static void __exit duramar2150_c2port_exit(void)
+{
+	/* Setup the GPIOs as input by default (access = 0) */
+	duramar2150_c2port_access(duramar2150_c2port_dev, 0);
+
+	c2port_device_unregister(duramar2150_c2port_dev);
+
+	release_region(0x325, 2);
+}
+
+module_init(duramar2150_c2port_init);
+module_exit(duramar2150_c2port_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("Silicon Labs C2 port Linux support for Duramar 2150");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c
new file mode 100644
index 000000000..1c5b7aec1
--- /dev/null
+++ b/drivers/misc/c2port/core.c
@@ -0,0 +1,1003 @@
+/*
+ *  Silicon Labs C2 port core Linux support
+ *
+ *  Copyright (c) 2007 Rodolfo Giometti <giometti@linux.it>
+ *  Copyright (c) 2007 Eurotech S.p.A. <info@eurotech.it>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <linux/c2port.h>
+
+#define DRIVER_NAME             "c2port"
+#define DRIVER_VERSION          "0.51.0"
+
+static DEFINE_SPINLOCK(c2port_idr_lock);
+static DEFINE_IDR(c2port_idr);
+
+/*
+ * Local variables
+ */
+
+static struct class *c2port_class;
+
+/*
+ * C2 registers & commands defines
+ */
+
+/* C2 registers */
+#define C2PORT_DEVICEID		0x00
+#define C2PORT_REVID		0x01
+#define C2PORT_FPCTL		0x02
+#define C2PORT_FPDAT		0xB4
+
+/* C2 interface commands */
+#define C2PORT_GET_VERSION	0x01
+#define C2PORT_DEVICE_ERASE	0x03
+#define C2PORT_BLOCK_READ	0x06
+#define C2PORT_BLOCK_WRITE	0x07
+#define C2PORT_PAGE_ERASE	0x08
+
+/* C2 status return codes */
+#define C2PORT_INVALID_COMMAND	0x00
+#define C2PORT_COMMAND_FAILED	0x02
+#define C2PORT_COMMAND_OK	0x0d
+
+/*
+ * C2 port low level signal managements
+ */
+
+static void c2port_reset(struct c2port_device *dev)
+{
+	struct c2port_ops *ops = dev->ops;
+
+	/* To reset the device we have to keep clock line low for at least
+	 * 20us.
+	 */
+	local_irq_disable();
+	ops->c2ck_set(dev, 0);
+	udelay(25);
+	ops->c2ck_set(dev, 1);
+	local_irq_enable();
+
+	udelay(1);
+}
+
+static void c2port_strobe_ck(struct c2port_device *dev)
+{
+	struct c2port_ops *ops = dev->ops;
+
+	/* During hi-low-hi transition we disable local IRQs to avoid
+	 * interructions since C2 port specification says that it must be
+	 * shorter than 5us, otherwise the microcontroller may consider
+	 * it as a reset signal!
+	 */
+	local_irq_disable();
+	ops->c2ck_set(dev, 0);
+	udelay(1);
+	ops->c2ck_set(dev, 1);
+	local_irq_enable();
+
+	udelay(1);
+}
+
+/*
+ * C2 port basic functions
+ */
+
+static void c2port_write_ar(struct c2port_device *dev, u8 addr)
+{
+	struct c2port_ops *ops = dev->ops;
+	int i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (11b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+
+	/* ADDRESS field */
+	for (i = 0; i < 8; i++) {
+		ops->c2d_set(dev, addr & 0x01);
+		c2port_strobe_ck(dev);
+
+		addr >>= 1;
+	}
+
+	/* STOP field */
+	ops->c2d_dir(dev, 1);
+	c2port_strobe_ck(dev);
+}
+
+static int c2port_read_ar(struct c2port_device *dev, u8 *addr)
+{
+	struct c2port_ops *ops = dev->ops;
+	int i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (10b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+
+	/* ADDRESS field */
+	ops->c2d_dir(dev, 1);
+	*addr = 0;
+	for (i = 0; i < 8; i++) {
+		*addr >>= 1;	/* shift in 8-bit ADDRESS field LSB first */
+
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			*addr |= 0x80;
+	}
+
+	/* STOP field */
+	c2port_strobe_ck(dev);
+
+	return 0;
+}
+
+static int c2port_write_dr(struct c2port_device *dev, u8 data)
+{
+	struct c2port_ops *ops = dev->ops;
+	int timeout, i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (01b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* LENGTH field (00b, LSB first -> 1 byte) */
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* DATA field */
+	for (i = 0; i < 8; i++) {
+		ops->c2d_set(dev, data & 0x01);
+		c2port_strobe_ck(dev);
+
+		data >>= 1;
+	}
+
+	/* WAIT field */
+	ops->c2d_dir(dev, 1);
+	timeout = 20;
+	do {
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	/* STOP field */
+	c2port_strobe_ck(dev);
+
+	return 0;
+}
+
+static int c2port_read_dr(struct c2port_device *dev, u8 *data)
+{
+	struct c2port_ops *ops = dev->ops;
+	int timeout, i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (00b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* LENGTH field (00b, LSB first -> 1 byte) */
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* WAIT field */
+	ops->c2d_dir(dev, 1);
+	timeout = 20;
+	do {
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	/* DATA field */
+	*data = 0;
+	for (i = 0; i < 8; i++) {
+		*data >>= 1;	/* shift in 8-bit DATA field LSB first */
+
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			*data |= 0x80;
+	}
+
+	/* STOP field */
+	c2port_strobe_ck(dev);
+
+	return 0;
+}
+
+static int c2port_poll_in_busy(struct c2port_device *dev)
+{
+	u8 addr;
+	int ret, timeout = 20;
+
+	do {
+		ret = (c2port_read_ar(dev, &addr));
+		if (ret < 0)
+			return -EIO;
+
+		if (!(addr & 0x02))
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	return 0;
+}
+
+static int c2port_poll_out_ready(struct c2port_device *dev)
+{
+	u8 addr;
+	int ret, timeout = 10000; /* erase flash needs long time... */
+
+	do {
+		ret = (c2port_read_ar(dev, &addr));
+		if (ret < 0)
+			return -EIO;
+
+		if (addr & 0x01)
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	return 0;
+}
+
+/*
+ * sysfs methods
+ */
+
+static ssize_t c2port_show_name(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", c2dev->name);
+}
+static DEVICE_ATTR(name, 0444, c2port_show_name, NULL);
+
+static ssize_t c2port_show_flash_blocks_num(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+
+	return sprintf(buf, "%d\n", ops->blocks_num);
+}
+static DEVICE_ATTR(flash_blocks_num, 0444, c2port_show_flash_blocks_num, NULL);
+
+static ssize_t c2port_show_flash_block_size(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+
+	return sprintf(buf, "%d\n", ops->block_size);
+}
+static DEVICE_ATTR(flash_block_size, 0444, c2port_show_flash_block_size, NULL);
+
+static ssize_t c2port_show_flash_size(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+
+	return sprintf(buf, "%d\n", ops->blocks_num * ops->block_size);
+}
+static DEVICE_ATTR(flash_size, 0444, c2port_show_flash_size, NULL);
+
+static ssize_t access_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", c2dev->access);
+}
+
+static ssize_t access_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+	int status, ret;
+
+	ret = sscanf(buf, "%d", &status);
+	if (ret != 1)
+		return -EINVAL;
+
+	mutex_lock(&c2dev->mutex);
+
+	c2dev->access = !!status;
+
+	/* If access is "on" clock should be HIGH _before_ setting the line
+	 * as output and data line should be set as INPUT anyway */
+	if (c2dev->access)
+		ops->c2ck_set(c2dev, 1);
+	ops->access(c2dev, c2dev->access);
+	if (c2dev->access)
+		ops->c2d_dir(c2dev, 1);
+
+	mutex_unlock(&c2dev->mutex);
+
+	return count;
+}
+static DEVICE_ATTR_RW(access);
+
+static ssize_t c2port_store_reset(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	/* Check the device access status */
+	if (!c2dev->access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+
+	c2port_reset(c2dev);
+	c2dev->flash_access = 0;
+
+	mutex_unlock(&c2dev->mutex);
+
+	return count;
+}
+static DEVICE_ATTR(reset, 0200, NULL, c2port_store_reset);
+
+static ssize_t __c2port_show_dev_id(struct c2port_device *dev, char *buf)
+{
+	u8 data;
+	int ret;
+
+	/* Select DEVICEID register for C2 data register accesses */
+	c2port_write_ar(dev, C2PORT_DEVICEID);
+
+	/* Read and return the device ID register */
+	ret = c2port_read_dr(dev, &data);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d\n", data);
+}
+
+static ssize_t c2port_show_dev_id(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	ssize_t ret;
+
+	/* Check the device access status */
+	if (!c2dev->access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_show_dev_id(c2dev, buf);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(dev, "cannot read from %s\n", c2dev->name);
+
+	return ret;
+}
+static DEVICE_ATTR(dev_id, 0444, c2port_show_dev_id, NULL);
+
+static ssize_t __c2port_show_rev_id(struct c2port_device *dev, char *buf)
+{
+	u8 data;
+	int ret;
+
+	/* Select REVID register for C2 data register accesses */
+	c2port_write_ar(dev, C2PORT_REVID);
+
+	/* Read and return the revision ID register */
+	ret = c2port_read_dr(dev, &data);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d\n", data);
+}
+
+static ssize_t c2port_show_rev_id(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	ssize_t ret;
+
+	/* Check the device access status */
+	if (!c2dev->access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_show_rev_id(c2dev, buf);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(c2dev->dev, "cannot read from %s\n", c2dev->name);
+
+	return ret;
+}
+static DEVICE_ATTR(rev_id, 0444, c2port_show_rev_id, NULL);
+
+static ssize_t c2port_show_flash_access(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", c2dev->flash_access);
+}
+
+static ssize_t __c2port_store_flash_access(struct c2port_device *dev,
+						int status)
+{
+	int ret;
+
+	/* Check the device access status */
+	if (!dev->access)
+		return -EBUSY;
+
+	dev->flash_access = !!status;
+
+	/* If flash_access is off we have nothing to do... */
+	if (dev->flash_access == 0)
+		return 0;
+
+	/* Target the C2 flash programming control register for C2 data
+	 * register access */
+	c2port_write_ar(dev, C2PORT_FPCTL);
+
+	/* Write the first keycode to enable C2 Flash programming */
+	ret = c2port_write_dr(dev, 0x02);
+	if (ret < 0)
+		return ret;
+
+	/* Write the second keycode to enable C2 Flash programming */
+	ret = c2port_write_dr(dev, 0x01);
+	if (ret < 0)
+		return ret;
+
+	/* Delay for at least 20ms to ensure the target is ready for
+	 * C2 flash programming */
+	mdelay(25);
+
+	return 0;
+}
+
+static ssize_t c2port_store_flash_access(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	int status;
+	ssize_t ret;
+
+	ret = sscanf(buf, "%d", &status);
+	if (ret != 1)
+		return -EINVAL;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_store_flash_access(c2dev, status);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0) {
+		dev_err(c2dev->dev, "cannot enable %s flash programming\n",
+			c2dev->name);
+		return ret;
+	}
+
+	return count;
+}
+static DEVICE_ATTR(flash_access, 0644, c2port_show_flash_access,
+		   c2port_store_flash_access);
+
+static ssize_t __c2port_write_flash_erase(struct c2port_device *dev)
+{
+	u8 status;
+	int ret;
+
+	/* Target the C2 flash programming data register for C2 data register
+	 * access.
+	 */
+	c2port_write_ar(dev, C2PORT_FPDAT);
+
+	/* Send device erase command */
+	c2port_write_dr(dev, C2PORT_DEVICE_ERASE);
+
+	/* Wait for input acknowledge */
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before starting FLASH access sequence */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Send a three-byte arming sequence to enable the device erase.
+	 * If the sequence is not received correctly, the command will be
+	 * ignored.
+	 * Sequence is: 0xde, 0xad, 0xa5.
+	 */
+	c2port_write_dr(dev, 0xde);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+	c2port_write_dr(dev, 0xad);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+	c2port_write_dr(dev, 0xa5);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static ssize_t c2port_store_flash_erase(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	int ret;
+
+	/* Check the device and flash access status */
+	if (!c2dev->access || !c2dev->flash_access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_write_flash_erase(c2dev);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0) {
+		dev_err(c2dev->dev, "cannot erase %s flash\n", c2dev->name);
+		return ret;
+	}
+
+	return count;
+}
+static DEVICE_ATTR(flash_erase, 0200, NULL, c2port_store_flash_erase);
+
+static ssize_t __c2port_read_flash_data(struct c2port_device *dev,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_ops *ops = dev->ops;
+	u8 status, nread = 128;
+	int i, ret;
+
+	/* Check for flash end */
+	if (offset >= ops->block_size * ops->blocks_num)
+		return 0;
+
+	if (ops->block_size * ops->blocks_num - offset < nread)
+		nread = ops->block_size * ops->blocks_num - offset;
+	if (count < nread)
+		nread = count;
+	if (nread == 0)
+		return nread;
+
+	/* Target the C2 flash programming data register for C2 data register
+	 * access */
+	c2port_write_ar(dev, C2PORT_FPDAT);
+
+	/* Send flash block read command */
+	c2port_write_dr(dev, C2PORT_BLOCK_READ);
+
+	/* Wait for input acknowledge */
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before starting FLASH access sequence */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Send address high byte */
+	c2port_write_dr(dev, offset >> 8);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address low byte */
+	c2port_write_dr(dev, offset & 0x00ff);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address block size */
+	c2port_write_dr(dev, nread);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before reading FLASH block */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Read flash block */
+	for (i = 0; i < nread; i++) {
+		ret = c2port_poll_out_ready(dev);
+		if (ret < 0)
+			return ret;
+
+		ret = c2port_read_dr(dev, buffer+i);
+		if (ret < 0)
+			return ret;
+	}
+
+	return nread;
+}
+
+static ssize_t c2port_read_flash_data(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(kobj_to_dev(kobj));
+	ssize_t ret;
+
+	/* Check the device and flash access status */
+	if (!c2dev->access || !c2dev->flash_access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_read_flash_data(c2dev, buffer, offset, count);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(c2dev->dev, "cannot read %s flash\n", c2dev->name);
+
+	return ret;
+}
+
+static ssize_t __c2port_write_flash_data(struct c2port_device *dev,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_ops *ops = dev->ops;
+	u8 status, nwrite = 128;
+	int i, ret;
+
+	if (nwrite > count)
+		nwrite = count;
+	if (ops->block_size * ops->blocks_num - offset < nwrite)
+		nwrite = ops->block_size * ops->blocks_num - offset;
+
+	/* Check for flash end */
+	if (offset >= ops->block_size * ops->blocks_num)
+		return -EINVAL;
+
+	/* Target the C2 flash programming data register for C2 data register
+	 * access */
+	c2port_write_ar(dev, C2PORT_FPDAT);
+
+	/* Send flash block write command */
+	c2port_write_dr(dev, C2PORT_BLOCK_WRITE);
+
+	/* Wait for input acknowledge */
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before starting FLASH access sequence */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Send address high byte */
+	c2port_write_dr(dev, offset >> 8);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address low byte */
+	c2port_write_dr(dev, offset & 0x00ff);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address block size */
+	c2port_write_dr(dev, nwrite);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before writing FLASH block */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Write flash block */
+	for (i = 0; i < nwrite; i++) {
+		ret = c2port_write_dr(dev, *(buffer+i));
+		if (ret < 0)
+			return ret;
+
+		ret = c2port_poll_in_busy(dev);
+		if (ret < 0)
+			return ret;
+
+	}
+
+	/* Wait for last flash write to complete */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	return nwrite;
+}
+
+static ssize_t c2port_write_flash_data(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(kobj_to_dev(kobj));
+	int ret;
+
+	/* Check the device access status */
+	if (!c2dev->access || !c2dev->flash_access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_write_flash_data(c2dev, buffer, offset, count);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(c2dev->dev, "cannot write %s flash\n", c2dev->name);
+
+	return ret;
+}
+/* size is computed at run-time */
+static BIN_ATTR(flash_data, 0644, c2port_read_flash_data,
+		c2port_write_flash_data, 0);
+
+/*
+ * Class attributes
+ */
+static struct attribute *c2port_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_flash_blocks_num.attr,
+	&dev_attr_flash_block_size.attr,
+	&dev_attr_flash_size.attr,
+	&dev_attr_access.attr,
+	&dev_attr_reset.attr,
+	&dev_attr_dev_id.attr,
+	&dev_attr_rev_id.attr,
+	&dev_attr_flash_access.attr,
+	&dev_attr_flash_erase.attr,
+	NULL,
+};
+
+static struct bin_attribute *c2port_bin_attrs[] = {
+	&bin_attr_flash_data,
+	NULL,
+};
+
+static const struct attribute_group c2port_group = {
+	.attrs = c2port_attrs,
+	.bin_attrs = c2port_bin_attrs,
+};
+
+static const struct attribute_group *c2port_groups[] = {
+	&c2port_group,
+	NULL,
+};
+
+/*
+ * Exported functions
+ */
+
+struct c2port_device *c2port_device_register(char *name,
+					struct c2port_ops *ops, void *devdata)
+{
+	struct c2port_device *c2dev;
+	int ret;
+
+	if (unlikely(!ops) || unlikely(!ops->access) || \
+		unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \
+		unlikely(!ops->c2d_get) || unlikely(!ops->c2d_set))
+		return ERR_PTR(-EINVAL);
+
+	c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL);
+	if (unlikely(!c2dev))
+		return ERR_PTR(-ENOMEM);
+
+	idr_preload(GFP_KERNEL);
+	spin_lock_irq(&c2port_idr_lock);
+	ret = idr_alloc(&c2port_idr, c2dev, 0, 0, GFP_NOWAIT);
+	spin_unlock_irq(&c2port_idr_lock);
+	idr_preload_end();
+
+	if (ret < 0)
+		goto error_idr_alloc;
+	c2dev->id = ret;
+
+	bin_attr_flash_data.size = ops->blocks_num * ops->block_size;
+
+	c2dev->dev = device_create(c2port_class, NULL, 0, c2dev,
+				   "c2port%d", c2dev->id);
+	if (IS_ERR(c2dev->dev)) {
+		ret = PTR_ERR(c2dev->dev);
+		goto error_device_create;
+	}
+	dev_set_drvdata(c2dev->dev, c2dev);
+
+	strncpy(c2dev->name, name, C2PORT_NAME_LEN);
+	c2dev->ops = ops;
+	mutex_init(&c2dev->mutex);
+
+	/* By default C2 port access is off */
+	c2dev->access = c2dev->flash_access = 0;
+	ops->access(c2dev, 0);
+
+	dev_info(c2dev->dev, "C2 port %s added\n", name);
+	dev_info(c2dev->dev, "%s flash has %d blocks x %d bytes "
+				"(%d bytes total)\n",
+				name, ops->blocks_num, ops->block_size,
+				ops->blocks_num * ops->block_size);
+
+	return c2dev;
+
+error_device_create:
+	spin_lock_irq(&c2port_idr_lock);
+	idr_remove(&c2port_idr, c2dev->id);
+	spin_unlock_irq(&c2port_idr_lock);
+
+error_idr_alloc:
+	kfree(c2dev);
+
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(c2port_device_register);
+
+void c2port_device_unregister(struct c2port_device *c2dev)
+{
+	if (!c2dev)
+		return;
+
+	dev_info(c2dev->dev, "C2 port %s removed\n", c2dev->name);
+
+	spin_lock_irq(&c2port_idr_lock);
+	idr_remove(&c2port_idr, c2dev->id);
+	spin_unlock_irq(&c2port_idr_lock);
+
+	device_destroy(c2port_class, c2dev->id);
+
+	kfree(c2dev);
+}
+EXPORT_SYMBOL(c2port_device_unregister);
+
+/*
+ * Module stuff
+ */
+
+static int __init c2port_init(void)
+{
+	printk(KERN_INFO "Silicon Labs C2 port support v. " DRIVER_VERSION
+		" - (C) 2007 Rodolfo Giometti\n");
+
+	c2port_class = class_create(THIS_MODULE, "c2port");
+	if (IS_ERR(c2port_class)) {
+		printk(KERN_ERR "c2port: failed to allocate class\n");
+		return PTR_ERR(c2port_class);
+	}
+	c2port_class->dev_groups = c2port_groups;
+
+	return 0;
+}
+
+static void __exit c2port_exit(void)
+{
+	class_destroy(c2port_class);
+}
+
+module_init(c2port_init);
+module_exit(c2port_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("Silicon Labs C2 port support v. " DRIVER_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/cardreader/Kconfig b/drivers/misc/cardreader/Kconfig
new file mode 100644
index 000000000..69e815e32
--- /dev/null
+++ b/drivers/misc/cardreader/Kconfig
@@ -0,0 +1,20 @@
+config MISC_RTSX_PCI
+	tristate "Realtek PCI-E card reader"
+	depends on PCI
+	select MFD_CORE
+	help
+	  This supports for Realtek PCI-Express card reader including rts5209,
+	  rts5227, rts522A, rts5229, rts5249, rts524A, rts525A, rtl8411, rts5260.
+	  Realtek card readers support access to many types of memory cards,
+	  such as Memory Stick, Memory Stick Pro, Secure Digital and
+	  MultiMediaCard.
+
+config MISC_RTSX_USB
+	tristate "Realtek USB card reader"
+	depends on USB
+	select MFD_CORE
+	help
+	  Select this option to get support for Realtek USB 2.0 card readers
+	  including RTS5129, RTS5139, RTS5179 and RTS5170.
+	  Realtek card reader supports access to many types of memory cards,
+	  such as Memory Stick Pro, Secure Digital and MultiMediaCard.
diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile
new file mode 100644
index 000000000..9fabfcc6f
--- /dev/null
+++ b/drivers/misc/cardreader/Makefile
@@ -0,0 +1,4 @@
+rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o
+
+obj-$(CONFIG_MISC_RTSX_PCI)	+= rtsx_pci.o
+obj-$(CONFIG_MISC_RTSX_USB)	+= rtsx_usb.o
diff --git a/drivers/misc/cardreader/rtl8411.c b/drivers/misc/cardreader/rtl8411.c
new file mode 100644
index 000000000..434fd070d
--- /dev/null
+++ b/drivers/misc/cardreader/rtl8411.c
@@ -0,0 +1,508 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   Roger Tseng <rogerable@realtek.com>
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rtl8411_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, SYS_VER, &val);
+	return val & 0x0F;
+}
+
+static int rtl8411b_is_qfn48(struct rtsx_pcr *pcr)
+{
+	u8 val = 0;
+
+	rtsx_pci_read_register(pcr, RTL8411B_PACKAGE_MODE, &val);
+
+	if (val & 0x2)
+		return 1;
+	else
+		return 0;
+}
+
+static void rtl8411_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	u32 reg1 = 0;
+	u8 reg3 = 0;
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, &reg1);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg1);
+
+	if (!rtsx_vendor_setting_valid(reg1))
+		return;
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg1);
+	pcr->sd30_drive_sel_1v8 =
+		map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg1));
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg1);
+
+	rtsx_pci_read_config_byte(pcr, PCR_SETTING_REG3, &reg3);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG3, reg3);
+	pcr->sd30_drive_sel_3v3 = rtl8411_reg_to_sd30_drive_sel_3v3(reg3);
+}
+
+static void rtl8411b_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	u32 reg = 0;
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg))
+		return;
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 =
+		map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg));
+	pcr->sd30_drive_sel_3v3 =
+		map_sd_drive(rtl8411b_reg_to_sd30_drive_sel_3v3(reg));
+}
+
+static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
+{
+	rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07);
+}
+
+static int rtl8411_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0xFF, pcr->sd30_drive_sel_3v3);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CD_PAD_CTL,
+			CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rtl8411b_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	if (rtl8411b_is_qfn48(pcr))
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+				CARD_PULL_CTL3, 0xFF, 0xF5);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0xFF, pcr->sd30_drive_sel_3v3);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CD_PAD_CTL,
+			CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, FUNC_FORCE_CTL,
+			0x06, 0x00);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rtl8411_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00);
+}
+
+static int rtl8411_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01);
+}
+
+static int rtl8411_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D);
+}
+
+static int rtl8411_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00);
+}
+
+static int rtl8411_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CTL,
+			BPP_LDO_POWB, BPP_LDO_SUSPEND);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_10_PERCENT_ON);
+	if (err < 0)
+		return err;
+
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_15_PERCENT_ON);
+	if (err < 0)
+		return err;
+
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_ON);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL, BPP_LDO_POWB, BPP_LDO_ON);
+}
+
+static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_OFF);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL,
+			BPP_LDO_POWB, BPP_LDO_SUSPEND);
+}
+
+static int rtl8411_do_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage,
+		int bpp_tuned18_shift, int bpp_asic_1v8)
+{
+	u8 mask, val;
+	int err;
+
+	mask = (BPP_REG_TUNED18 << bpp_tuned18_shift) | BPP_PAD_MASK;
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3);
+		if (err < 0)
+			return err;
+		val = (BPP_ASIC_3V3 << bpp_tuned18_shift) | BPP_PAD_3V3;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8);
+		if (err < 0)
+			return err;
+		val = (bpp_asic_1v8 << bpp_tuned18_shift) | BPP_PAD_1V8;
+	} else {
+		return -EINVAL;
+	}
+
+	return rtsx_pci_write_register(pcr, LDO_CTL, mask, val);
+}
+
+static int rtl8411_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	return rtl8411_do_switch_output_voltage(pcr, voltage,
+			BPP_TUNED18_SHIFT_8411, BPP_ASIC_1V8);
+}
+
+static int rtl8402_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	return rtl8411_do_switch_output_voltage(pcr, voltage,
+			BPP_TUNED18_SHIFT_8402, BPP_ASIC_2V0);
+}
+
+static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr)
+{
+	unsigned int card_exist;
+
+	card_exist = rtsx_pci_readl(pcr, RTSX_BIPR);
+	card_exist &= CARD_EXIST;
+	if (!card_exist) {
+		/* Enable card CD */
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, CD_ENABLE);
+		/* Enable card interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x00);
+		return 0;
+	}
+
+	if (hweight32(card_exist) > 1) {
+		rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+				BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON);
+		msleep(100);
+
+		card_exist = rtsx_pci_readl(pcr, RTSX_BIPR);
+		if (card_exist & MS_EXIST)
+			card_exist = MS_EXIST;
+		else if (card_exist & SD_EXIST)
+			card_exist = SD_EXIST;
+		else
+			card_exist = 0;
+
+		rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+				BPP_POWER_MASK, BPP_POWER_OFF);
+
+		pcr_dbg(pcr, "After CD deglitch, card_exist = 0x%x\n",
+			card_exist);
+	}
+
+	if (card_exist & MS_EXIST) {
+		/* Disable SD interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x40);
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, MS_CD_EN_ONLY);
+	} else if (card_exist & SD_EXIST) {
+		/* Disable MS interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x80);
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, SD_CD_EN_ONLY);
+	}
+
+	return card_exist;
+}
+
+static int rtl8411_conv_clk_and_div_n(int input, int dir)
+{
+	int output;
+
+	if (dir == CLK_TO_DIV_N)
+		output = input * 4 / 5 - 2;
+	else
+		output = (input + 2) * 5 / 4;
+
+	return output;
+}
+
+static const struct pcr_ops rtl8411_pcr_ops = {
+	.fetch_vendor_settings = rtl8411_fetch_vendor_settings,
+	.extra_init_hw = rtl8411_extra_init_hw,
+	.optimize_phy = NULL,
+	.turn_on_led = rtl8411_turn_on_led,
+	.turn_off_led = rtl8411_turn_off_led,
+	.enable_auto_blink = rtl8411_enable_auto_blink,
+	.disable_auto_blink = rtl8411_disable_auto_blink,
+	.card_power_on = rtl8411_card_power_on,
+	.card_power_off = rtl8411_card_power_off,
+	.switch_output_voltage = rtl8411_switch_output_voltage,
+	.cd_deglitch = rtl8411_cd_deglitch,
+	.conv_clk_and_div_n = rtl8411_conv_clk_and_div_n,
+	.force_power_down = rtl8411_force_power_down,
+};
+
+static const struct pcr_ops rtl8402_pcr_ops = {
+	.fetch_vendor_settings = rtl8411_fetch_vendor_settings,
+	.extra_init_hw = rtl8411_extra_init_hw,
+	.optimize_phy = NULL,
+	.turn_on_led = rtl8411_turn_on_led,
+	.turn_off_led = rtl8411_turn_off_led,
+	.enable_auto_blink = rtl8411_enable_auto_blink,
+	.disable_auto_blink = rtl8411_disable_auto_blink,
+	.card_power_on = rtl8411_card_power_on,
+	.card_power_off = rtl8411_card_power_off,
+	.switch_output_voltage = rtl8402_switch_output_voltage,
+	.cd_deglitch = rtl8411_cd_deglitch,
+	.conv_clk_and_div_n = rtl8411_conv_clk_and_div_n,
+	.force_power_down = rtl8411_force_power_down,
+};
+
+static const struct pcr_ops rtl8411b_pcr_ops = {
+	.fetch_vendor_settings = rtl8411b_fetch_vendor_settings,
+	.extra_init_hw = rtl8411b_extra_init_hw,
+	.optimize_phy = NULL,
+	.turn_on_led = rtl8411_turn_on_led,
+	.turn_off_led = rtl8411_turn_off_led,
+	.enable_auto_blink = rtl8411_enable_auto_blink,
+	.disable_auto_blink = rtl8411_disable_auto_blink,
+	.card_power_on = rtl8411_card_power_on,
+	.card_power_off = rtl8411_card_power_off,
+	.switch_output_voltage = rtl8411_switch_output_voltage,
+	.cd_deglitch = rtl8411_cd_deglitch,
+	.conv_clk_and_div_n = rtl8411_conv_clk_and_div_n,
+	.force_power_down = rtl8411_force_power_down,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rtl8411_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xA9),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rtl8411_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rtl8411_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rtl8411_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+static const u32 rtl8411b_qfn64_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x09 | 0xD0),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn48_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x69 | 0x90),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x08 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn64_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn48_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn64_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn48_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn64_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn48_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static void rtl8411_init_common_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTL8411_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(23, 7, 14);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(4, 3, 10);
+	pcr->ic_version = rtl8411_get_ic_version(pcr);
+}
+
+void rtl8411_init_params(struct rtsx_pcr *pcr)
+{
+	rtl8411_init_common_params(pcr);
+	pcr->ops = &rtl8411_pcr_ops;
+	set_pull_ctrl_tables(pcr, rtl8411);
+}
+
+void rtl8411b_init_params(struct rtsx_pcr *pcr)
+{
+	rtl8411_init_common_params(pcr);
+	pcr->ops = &rtl8411b_pcr_ops;
+	if (rtl8411b_is_qfn48(pcr))
+		set_pull_ctrl_tables(pcr, rtl8411b_qfn48);
+	else
+		set_pull_ctrl_tables(pcr, rtl8411b_qfn64);
+}
+
+void rtl8402_init_params(struct rtsx_pcr *pcr)
+{
+	rtl8411_init_common_params(pcr);
+	pcr->ops = &rtl8402_pcr_ops;
+	set_pull_ctrl_tables(pcr, rtl8411);
+}
diff --git a/drivers/misc/cardreader/rts5209.c b/drivers/misc/cardreader/rts5209.c
new file mode 100644
index 000000000..ce68c48d8
--- /dev/null
+++ b/drivers/misc/cardreader/rts5209.c
@@ -0,0 +1,277 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5209_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	val = rtsx_pci_readb(pcr, 0x1C);
+	return val & 0x0F;
+}
+
+static void rts5209_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	u32 reg;
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (rts5209_vendor_setting1_valid(reg)) {
+		if (rts5209_reg_check_ms_pmos(reg))
+			pcr->flags |= PCR_MS_PMOS;
+		pcr->aspm_en = rts5209_reg_to_aspm(reg);
+	}
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+
+	if (rts5209_vendor_setting2_valid(reg)) {
+		pcr->sd30_drive_sel_1v8 =
+			rts5209_reg_to_sd30_drive_sel_1v8(reg);
+		pcr->sd30_drive_sel_3v3 =
+			rts5209_reg_to_sd30_drive_sel_3v3(reg);
+		pcr->card_drive_sel = rts5209_reg_to_card_drive_sel(reg);
+	}
+}
+
+static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
+{
+	rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07);
+}
+
+static int rts5209_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Turn off LED */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO, 0xFF, 0x03);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Force CLKREQ# PIN to drive 0 to request clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08);
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO_DIR, 0xFF, 0x03);
+	/* Configure driving */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0xFF, pcr->sd30_drive_sel_3v3);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5209_optimize_phy(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xB966);
+}
+
+static int rts5209_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00);
+}
+
+static int rts5209_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01);
+}
+
+static int rts5209_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D);
+}
+
+static int rts5209_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00);
+}
+
+static int rts5209_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+	u8 pwr_mask, partial_pwr_on, pwr_on;
+
+	pwr_mask = SD_POWER_MASK;
+	partial_pwr_on = SD_PARTIAL_POWER_ON;
+	pwr_on = SD_POWER_ON;
+
+	if ((pcr->flags & PCR_MS_PMOS) && (card == RTSX_MS_CARD)) {
+		pwr_mask = MS_POWER_MASK;
+		partial_pwr_on = MS_PARTIAL_POWER_ON;
+		pwr_on = MS_POWER_ON;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			pwr_mask, partial_pwr_on);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x04);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, pwr_mask, pwr_on);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	u8 pwr_mask, pwr_off;
+
+	pwr_mask = SD_POWER_MASK;
+	pwr_off = SD_POWER_OFF;
+
+	if ((pcr->flags & PCR_MS_PMOS) && (card == RTSX_MS_CARD)) {
+		pwr_mask = MS_POWER_MASK;
+		pwr_off = MS_POWER_OFF;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			pwr_mask | PMOS_STRG_MASK, pwr_off | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5209_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct pcr_ops rts5209_pcr_ops = {
+	.fetch_vendor_settings = rts5209_fetch_vendor_settings,
+	.extra_init_hw = rts5209_extra_init_hw,
+	.optimize_phy = rts5209_optimize_phy,
+	.turn_on_led = rts5209_turn_on_led,
+	.turn_off_led = rts5209_turn_off_led,
+	.enable_auto_blink = rts5209_enable_auto_blink,
+	.disable_auto_blink = rts5209_disable_auto_blink,
+	.card_power_on = rts5209_card_power_on,
+	.card_power_off = rts5209_card_power_off,
+	.switch_output_voltage = rts5209_switch_output_voltage,
+	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
+	.force_power_down = rts5209_force_power_down,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5209_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5209_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5209_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5209_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5209_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 |
+		EXTRA_CAPS_SD_SDR104 | EXTRA_CAPS_MMC_8BIT;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5209_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTS5209_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 16);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
+
+	pcr->ic_version = rts5209_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5209_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5209_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5209_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5209_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c
new file mode 100644
index 000000000..d3ce9fe08
--- /dev/null
+++ b/drivers/misc/cardreader/rts5227.c
@@ -0,0 +1,380 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   Roger Tseng <rogerable@realtek.com>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5227_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
+static void rts5227_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 driving_3v3[4][3] = {
+		{0x13, 0x13, 0x13},
+		{0x96, 0x96, 0x96},
+		{0x7F, 0x7F, 0x7F},
+		{0x96, 0x96, 0x96},
+	};
+	u8 driving_1v8[4][3] = {
+		{0x99, 0x99, 0x99},
+		{0xAA, 0xAA, 0xAA},
+		{0xFE, 0xFE, 0xFE},
+		{0xB3, 0xB3, 0xB3},
+	};
+	u8 (*driving)[3], drive_sel;
+
+	if (voltage == OUTPUT_3V3) {
+		driving = driving_3v3;
+		drive_sel = pcr->sd30_drive_sel_3v3;
+	} else {
+		driving = driving_1v8;
+		drive_sel = pcr->sd30_drive_sel_1v8;
+	}
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL,
+			0xFF, driving[drive_sel][0]);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL,
+			0xFF, driving[drive_sel][1]);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL,
+			0xFF, driving[drive_sel][2]);
+}
+
+static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	u32 reg;
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg))
+		return;
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg);
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg);
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+	pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg);
+	if (rtsx_reg_check_reverse_socket(reg))
+		pcr->flags |= PCR_REVERSE_SOCKET;
+}
+
+static void rts5227_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
+{
+	/* Set relink_time to 0 */
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, 0xFF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, 0xFF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, 0x01, 0);
+
+	if (pm_state == HOST_ENTER_S3)
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x10);
+
+	rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03);
+}
+
+static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	u16 cap;
+
+	rtsx_pci_init_cmd(pcr);
+
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+	/* Configure LTR */
+	pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &cap);
+	if (cap & PCI_EXP_DEVCTL2_LTR_EN)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LTR_CTL, 0xFF, 0xA3);
+	/* Configure OBFF */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OBFF_CFG, 0x03, 0x03);
+	/* Configure driving */
+	rts5227_fill_driving(pcr, OUTPUT_3V3);
+	/* Configure force_clock_req */
+	if (pcr->flags & PCR_REVERSE_SOCKET)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB8, 0xB8);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB8, 0x88);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, pcr->reg_pm_ctrl3, 0x10, 0x00);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5227_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00);
+	if (err < 0)
+		return err;
+
+	/* Optimize RX sensitivity */
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42);
+}
+
+static int rts5227_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rts5227_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rts5227_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rts5227_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rts5227_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_PARTIAL_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5227_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK | PMOS_STRG_MASK,
+			SD_POWER_OFF | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0X00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5227_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_phy_register(pcr, 0x11, 0x3C02);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C80 | 0x24);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rtsx_pci_init_cmd(pcr);
+	rts5227_fill_driving(pcr, voltage);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5227_pcr_ops = {
+	.fetch_vendor_settings = rts5227_fetch_vendor_settings,
+	.extra_init_hw = rts5227_extra_init_hw,
+	.optimize_phy = rts5227_optimize_phy,
+	.turn_on_led = rts5227_turn_on_led,
+	.turn_off_led = rts5227_turn_off_led,
+	.enable_auto_blink = rts5227_enable_auto_blink,
+	.disable_auto_blink = rts5227_disable_auto_blink,
+	.card_power_on = rts5227_card_power_on,
+	.card_power_off = rts5227_card_power_off,
+	.switch_output_voltage = rts5227_switch_output_voltage,
+	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
+	.force_power_down = rts5227_force_power_down,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5227_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5227_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5227_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5227_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5227_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5227_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 7, 7);
+
+	pcr->ic_version = rts5227_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5227_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5227_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5227_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5227_ms_pull_ctl_disable_tbl;
+
+	pcr->reg_pm_ctrl3 = PM_CTRL3;
+}
+
+static int rts522a_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, D3_DELINK_MODE_EN,
+		0x00);
+	if (err < 0)
+		return err;
+
+	if (is_version(pcr, 0x522A, IC_VER_A)) {
+		err = rtsx_pci_write_phy_register(pcr, PHY_RCR2,
+			PHY_RCR2_INIT_27S);
+		if (err)
+			return err;
+
+		rtsx_pci_write_phy_register(pcr, PHY_RCR1, PHY_RCR1_INIT_27S);
+		rtsx_pci_write_phy_register(pcr, PHY_FLD0, PHY_FLD0_INIT_27S);
+		rtsx_pci_write_phy_register(pcr, PHY_FLD3, PHY_FLD3_INIT_27S);
+		rtsx_pci_write_phy_register(pcr, PHY_FLD4, PHY_FLD4_INIT_27S);
+	}
+
+	return 0;
+}
+
+static int rts522a_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rts5227_extra_init_hw(pcr);
+
+	/* Power down OCP for power consumption */
+	if (!pcr->card_exist)
+		rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN,
+				OC_POWER_DOWN);
+
+	rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, FUNC_FORCE_UPME_XMT_DBG,
+		FUNC_FORCE_UPME_XMT_DBG);
+	rtsx_pci_write_register(pcr, PCLK_CTL, 0x04, 0x04);
+	rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0);
+	rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 0xFF, 0x11);
+
+	return 0;
+}
+
+/* rts522a operations mainly derived from rts5227, except phy/hw init setting.
+ */
+static const struct pcr_ops rts522a_pcr_ops = {
+	.fetch_vendor_settings = rts5227_fetch_vendor_settings,
+	.extra_init_hw = rts522a_extra_init_hw,
+	.optimize_phy = rts522a_optimize_phy,
+	.turn_on_led = rts5227_turn_on_led,
+	.turn_off_led = rts5227_turn_off_led,
+	.enable_auto_blink = rts5227_enable_auto_blink,
+	.disable_auto_blink = rts5227_disable_auto_blink,
+	.card_power_on = rts5227_card_power_on,
+	.card_power_off = rts5227_card_power_off,
+	.switch_output_voltage = rts5227_switch_output_voltage,
+	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
+	.force_power_down = rts5227_force_power_down,
+};
+
+void rts522a_init_params(struct rtsx_pcr *pcr)
+{
+	rts5227_init_params(pcr);
+	pcr->ops = &rts522a_pcr_ops;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 20, 11);
+	pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3;
+}
diff --git a/drivers/misc/cardreader/rts5229.c b/drivers/misc/cardreader/rts5229.c
new file mode 100644
index 000000000..911926133
--- /dev/null
+++ b/drivers/misc/cardreader/rts5229.c
@@ -0,0 +1,273 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5229_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
+static void rts5229_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	u32 reg;
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg))
+		return;
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 =
+		map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg));
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg);
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+	pcr->sd30_drive_sel_3v3 =
+		map_sd_drive(rtsx_reg_to_sd30_drive_sel_3v3(reg));
+}
+
+static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
+{
+	rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03);
+}
+
+static int rts5229_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Force CLKREQ# PIN to drive 0 to request clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+	/* Configure driving */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0xFF, pcr->sd30_drive_sel_3v3);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5229_optimize_phy(struct rtsx_pcr *pcr)
+{
+	/* Optimize RX sensitivity */
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42);
+}
+
+static int rts5229_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rts5229_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rts5229_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rts5229_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rts5229_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_PARTIAL_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK | PMOS_STRG_MASK,
+			SD_POWER_OFF | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5229_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct pcr_ops rts5229_pcr_ops = {
+	.fetch_vendor_settings = rts5229_fetch_vendor_settings,
+	.extra_init_hw = rts5229_extra_init_hw,
+	.optimize_phy = rts5229_optimize_phy,
+	.turn_on_led = rts5229_turn_on_led,
+	.turn_off_led = rts5229_turn_off_led,
+	.enable_auto_blink = rts5229_enable_auto_blink,
+	.disable_auto_blink = rts5229_disable_auto_blink,
+	.card_power_on = rts5229_card_power_on,
+	.card_power_off = rts5229_card_power_off,
+	.switch_output_voltage = rts5229_switch_output_voltage,
+	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
+	.force_power_down = rts5229_force_power_down,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5229_sd_pull_ctl_enable_tbl1[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* For RTS5229 version C */
+static const u32 rts5229_sd_pull_ctl_enable_tbl2[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5229_sd_pull_ctl_disable_tbl1[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* For RTS5229 version C */
+static const u32 rts5229_sd_pull_ctl_disable_tbl2[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5229_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5229_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5229_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5229_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 6, 6);
+
+	pcr->ic_version = rts5229_get_ic_version(pcr);
+	if (pcr->ic_version == IC_VER_C) {
+		pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl2;
+		pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl2;
+	} else {
+		pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl1;
+		pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl1;
+	}
+	pcr->ms_pull_ctl_enable_tbl = rts5229_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5229_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c
new file mode 100644
index 000000000..45cd75041
--- /dev/null
+++ b/drivers/misc/cardreader/rts5249.c
@@ -0,0 +1,742 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5249_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
+static void rts5249_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 driving_3v3[4][3] = {
+		{0x11, 0x11, 0x18},
+		{0x55, 0x55, 0x5C},
+		{0xFF, 0xFF, 0xFF},
+		{0x96, 0x96, 0x96},
+	};
+	u8 driving_1v8[4][3] = {
+		{0xC4, 0xC4, 0xC4},
+		{0x3C, 0x3C, 0x3C},
+		{0xFE, 0xFE, 0xFE},
+		{0xB3, 0xB3, 0xB3},
+	};
+	u8 (*driving)[3], drive_sel;
+
+	if (voltage == OUTPUT_3V3) {
+		driving = driving_3v3;
+		drive_sel = pcr->sd30_drive_sel_3v3;
+	} else {
+		driving = driving_1v8;
+		drive_sel = pcr->sd30_drive_sel_1v8;
+	}
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL,
+			0xFF, driving[drive_sel][0]);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL,
+			0xFF, driving[drive_sel][1]);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL,
+			0xFF, driving[drive_sel][2]);
+}
+
+static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	u32 reg;
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg)) {
+		pcr_dbg(pcr, "skip fetch vendor setting\n");
+		return;
+	}
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg);
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg);
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+	pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg);
+	if (rtsx_reg_check_reverse_socket(reg))
+		pcr->flags |= PCR_REVERSE_SOCKET;
+}
+
+static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
+{
+	/* Set relink_time to 0 */
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, 0xFF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, 0xFF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, 0x01, 0);
+
+	if (pm_state == HOST_ENTER_S3)
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3,
+			D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
+
+	rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03);
+}
+
+static void rts5249_init_from_cfg(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &(pcr->option);
+	u32 lval;
+
+	if (CHK_PCI_PID(pcr, PID_524A))
+		rtsx_pci_read_config_dword(pcr,
+			PCR_ASPM_SETTING_REG1, &lval);
+	else
+		rtsx_pci_read_config_dword(pcr,
+			PCR_ASPM_SETTING_REG2, &lval);
+
+	if (lval & ASPM_L1_1_EN_MASK)
+		rtsx_set_dev_flag(pcr, ASPM_L1_1_EN);
+
+	if (lval & ASPM_L1_2_EN_MASK)
+		rtsx_set_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (lval & PM_L1_1_EN_MASK)
+		rtsx_set_dev_flag(pcr, PM_L1_1_EN);
+
+	if (lval & PM_L1_2_EN_MASK)
+		rtsx_set_dev_flag(pcr, PM_L1_2_EN);
+
+	if (option->ltr_en) {
+		u16 val;
+
+		pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val);
+		if (val & PCI_EXP_DEVCTL2_LTR_EN) {
+			option->ltr_enabled = true;
+			option->ltr_active = true;
+			rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+		} else {
+			option->ltr_enabled = false;
+		}
+	}
+}
+
+static int rts5249_init_from_hw(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &(pcr->option);
+
+	if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+				| PM_L1_1_EN | PM_L1_2_EN))
+		option->force_clkreq_0 = false;
+	else
+		option->force_clkreq_0 = true;
+
+	return 0;
+}
+
+static int rts5249_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &(pcr->option);
+
+	rts5249_init_from_cfg(pcr);
+	rts5249_init_from_hw(pcr);
+
+	rtsx_pci_init_cmd(pcr);
+
+	/* Rest L1SUB Config */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG3, 0xFF, 0x00);
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+	/* Configure driving */
+	rts5249_fill_driving(pcr, OUTPUT_3V3);
+	if (pcr->flags & PCR_REVERSE_SOCKET)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80);
+
+	/*
+	 * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+	 * to drive low, and we forcibly request clock.
+	 */
+	if (option->force_clkreq_0)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
+			FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
+			FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+
+	return rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF);
+}
+
+static int rts5249_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00);
+	if (err < 0)
+		return err;
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_REV,
+			PHY_REV_RESV | PHY_REV_RXIDLE_LATCHED |
+			PHY_REV_P1_EN | PHY_REV_RXIDLE_EN |
+			PHY_REV_CLKREQ_TX_EN | PHY_REV_RX_PWST |
+			PHY_REV_CLKREQ_DT_1_0 | PHY_REV_STOP_CLKRD |
+			PHY_REV_STOP_CLKWR);
+	if (err < 0)
+		return err;
+
+	msleep(1);
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_BPCR,
+			PHY_BPCR_IBRXSEL | PHY_BPCR_IBTXSEL |
+			PHY_BPCR_IB_FILTER | PHY_BPCR_CMIRROR_EN);
+	if (err < 0)
+		return err;
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_PCR,
+			PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 |
+			PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 |
+			PHY_PCR_RSSI_EN | PHY_PCR_RX10K);
+	if (err < 0)
+		return err;
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_RCR2,
+			PHY_RCR2_EMPHASE_EN | PHY_RCR2_NADJR |
+			PHY_RCR2_CDR_SR_2 | PHY_RCR2_FREQSEL_12 |
+			PHY_RCR2_CDR_SC_12P | PHY_RCR2_CALIB_LATE);
+	if (err < 0)
+		return err;
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_FLD4,
+			PHY_FLD4_FLDEN_SEL | PHY_FLD4_REQ_REF |
+			PHY_FLD4_RXAMP_OFF | PHY_FLD4_REQ_ADDA |
+			PHY_FLD4_BER_COUNT | PHY_FLD4_BER_TIMER |
+			PHY_FLD4_BER_CHK_EN);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_RDR,
+			PHY_RDR_RXDSEL_1_9 | PHY_SSC_AUTO_PWD);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_RCR1,
+			PHY_RCR1_ADP_TIME_4 | PHY_RCR1_VCO_COARSE);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_FLD3,
+			PHY_FLD3_TIMER_4 | PHY_FLD3_TIMER_6 |
+			PHY_FLD3_RXDELINK);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_phy_register(pcr, PHY_TUNE,
+			PHY_TUNE_TUNEREF_1_0 | PHY_TUNE_VBGSEL_1252 |
+			PHY_TUNE_SDBUS_33 | PHY_TUNE_TUNED18 |
+			PHY_TUNE_TUNED12 | PHY_TUNE_TUNEA12);
+}
+
+static int rtsx_base_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rtsx_base_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rtsx_base_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rtsx_base_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rtsx_base_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_VCC_PARTIAL_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	msleep(5);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_VCC_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rtsx_base_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_POWER_OFF);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rtsx_base_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+	u16 append;
+
+	switch (voltage) {
+	case OUTPUT_3V3:
+		err = rtsx_pci_update_phy(pcr, PHY_TUNE, PHY_TUNE_VOLTAGE_MASK,
+			PHY_TUNE_VOLTAGE_3V3);
+		if (err < 0)
+			return err;
+		break;
+	case OUTPUT_1V8:
+		append = PHY_TUNE_D18_1V8;
+		if (CHK_PCI_PID(pcr, 0x5249)) {
+			err = rtsx_pci_update_phy(pcr, PHY_BACR,
+				PHY_BACR_BASIC_MASK, 0);
+			if (err < 0)
+				return err;
+			append = PHY_TUNE_D18_1V7;
+		}
+
+		err = rtsx_pci_update_phy(pcr, PHY_TUNE, PHY_TUNE_VOLTAGE_MASK,
+			append);
+		if (err < 0)
+			return err;
+		break;
+	default:
+		pcr_dbg(pcr, "unknown output voltage %d\n", voltage);
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rtsx_pci_init_cmd(pcr);
+	rts5249_fill_driving(pcr, voltage);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static void rts5249_set_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	u8 val = 0;
+
+	if (pcr->aspm_enabled == enable)
+		return;
+
+	if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) {
+		if (enable)
+			val = pcr->aspm_en;
+		rtsx_pci_update_cfg_byte(pcr,
+			pcr->pcie_cap + PCI_EXP_LNKCTL,
+			ASPM_MASK_NEG, val);
+	} else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) {
+		u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0;
+
+		if (!enable)
+			val = FORCE_ASPM_CTL0;
+		rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val);
+	}
+
+	pcr->aspm_enabled = enable;
+}
+
+static const struct pcr_ops rts5249_pcr_ops = {
+	.fetch_vendor_settings = rtsx_base_fetch_vendor_settings,
+	.extra_init_hw = rts5249_extra_init_hw,
+	.optimize_phy = rts5249_optimize_phy,
+	.turn_on_led = rtsx_base_turn_on_led,
+	.turn_off_led = rtsx_base_turn_off_led,
+	.enable_auto_blink = rtsx_base_enable_auto_blink,
+	.disable_auto_blink = rtsx_base_disable_auto_blink,
+	.card_power_on = rtsx_base_card_power_on,
+	.card_power_off = rtsx_base_card_power_off,
+	.switch_output_voltage = rtsx_base_switch_output_voltage,
+	.force_power_down = rtsx_base_force_power_down,
+	.set_aspm = rts5249_set_aspm,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5249_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5249_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5249_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5249_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5249_init_params(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &(pcr->option);
+
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5249_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
+
+	pcr->ic_version = rts5249_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5249_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5249_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5249_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5249_ms_pull_ctl_disable_tbl;
+
+	pcr->reg_pm_ctrl3 = PM_CTRL3;
+
+	option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN
+				| LTR_L1SS_PWR_GATE_EN);
+	option->ltr_en = true;
+
+	/* Init latency of active, idle, L1OFF to 60us, 300us, 3ms */
+	option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF;
+	option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF;
+	option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF;
+	option->dev_aspm_mode = DEV_ASPM_DYNAMIC;
+	option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF;
+	option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5249_DEF;
+	option->ltr_l1off_snooze_sspwrgate =
+		LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF;
+}
+
+static int rts524a_write_phy(struct rtsx_pcr *pcr, u8 addr, u16 val)
+{
+	addr = addr & 0x80 ? (addr & 0x7F) | 0x40 : addr;
+
+	return __rtsx_pci_write_phy_register(pcr, addr, val);
+}
+
+static int rts524a_read_phy(struct rtsx_pcr *pcr, u8 addr, u16 *val)
+{
+	addr = addr & 0x80 ? (addr & 0x7F) | 0x40 : addr;
+
+	return __rtsx_pci_read_phy_register(pcr, addr, val);
+}
+
+static int rts524a_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3,
+		D3_DELINK_MODE_EN, 0x00);
+	if (err < 0)
+		return err;
+
+	rtsx_pci_write_phy_register(pcr, PHY_PCR,
+		PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 |
+		PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 | PHY_PCR_RSSI_EN);
+	rtsx_pci_write_phy_register(pcr, PHY_SSCCR3,
+		PHY_SSCCR3_STEP_IN | PHY_SSCCR3_CHECK_DELAY);
+
+	if (is_version(pcr, 0x524A, IC_VER_A)) {
+		rtsx_pci_write_phy_register(pcr, PHY_SSCCR3,
+			PHY_SSCCR3_STEP_IN | PHY_SSCCR3_CHECK_DELAY);
+		rtsx_pci_write_phy_register(pcr, PHY_SSCCR2,
+			PHY_SSCCR2_PLL_NCODE | PHY_SSCCR2_TIME0 |
+			PHY_SSCCR2_TIME2_WIDTH);
+		rtsx_pci_write_phy_register(pcr, PHY_ANA1A,
+			PHY_ANA1A_TXR_LOOPBACK | PHY_ANA1A_RXT_BIST |
+			PHY_ANA1A_TXR_BIST | PHY_ANA1A_REV);
+		rtsx_pci_write_phy_register(pcr, PHY_ANA1D,
+			PHY_ANA1D_DEBUG_ADDR);
+		rtsx_pci_write_phy_register(pcr, PHY_DIG1E,
+			PHY_DIG1E_REV | PHY_DIG1E_D0_X_D1 |
+			PHY_DIG1E_RX_ON_HOST | PHY_DIG1E_RCLK_REF_HOST |
+			PHY_DIG1E_RCLK_TX_EN_KEEP |
+			PHY_DIG1E_RCLK_TX_TERM_KEEP |
+			PHY_DIG1E_RCLK_RX_EIDLE_ON | PHY_DIG1E_TX_TERM_KEEP |
+			PHY_DIG1E_RX_TERM_KEEP | PHY_DIG1E_TX_EN_KEEP |
+			PHY_DIG1E_RX_EN_KEEP);
+	}
+
+	rtsx_pci_write_phy_register(pcr, PHY_ANA08,
+		PHY_ANA08_RX_EQ_DCGAIN | PHY_ANA08_SEL_RX_EN |
+		PHY_ANA08_RX_EQ_VAL | PHY_ANA08_SCP | PHY_ANA08_SEL_IPI);
+
+	return 0;
+}
+
+static int rts524a_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rts5249_extra_init_hw(pcr);
+
+	rtsx_pci_write_register(pcr, FUNC_FORCE_CTL,
+		FORCE_ASPM_L1_EN, FORCE_ASPM_L1_EN);
+	rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0);
+	rtsx_pci_write_register(pcr, LDO_VCC_CFG1, LDO_VCC_LMT_EN,
+		LDO_VCC_LMT_EN);
+	rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL);
+	if (is_version(pcr, 0x524A, IC_VER_A)) {
+		rtsx_pci_write_register(pcr, LDO_DV18_CFG,
+			LDO_DV18_SR_MASK, LDO_DV18_SR_DF);
+		rtsx_pci_write_register(pcr, LDO_VCC_CFG1,
+			LDO_VCC_REF_TUNE_MASK, LDO_VCC_REF_1V2);
+		rtsx_pci_write_register(pcr, LDO_VIO_CFG,
+			LDO_VIO_REF_TUNE_MASK, LDO_VIO_REF_1V2);
+		rtsx_pci_write_register(pcr, LDO_VIO_CFG,
+			LDO_VIO_SR_MASK, LDO_VIO_SR_DF);
+		rtsx_pci_write_register(pcr, LDO_DV12S_CFG,
+			LDO_REF12_TUNE_MASK, LDO_REF12_TUNE_DF);
+		rtsx_pci_write_register(pcr, SD40_LDO_CTL1,
+			SD40_VIO_TUNE_MASK, SD40_VIO_TUNE_1V7);
+	}
+
+	return 0;
+}
+
+static void rts5250_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
+{
+	struct rtsx_cr_option *option = &(pcr->option);
+
+	u32 interrupt = rtsx_pci_readl(pcr, RTSX_BIPR);
+	int card_exist = (interrupt & SD_EXIST) | (interrupt & MS_EXIST);
+	int aspm_L1_1, aspm_L1_2;
+	u8 val = 0;
+
+	aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN);
+	aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (active) {
+		/* Run, latency: 60us */
+		if (aspm_L1_1)
+			val = option->ltr_l1off_snooze_sspwrgate;
+	} else {
+		/* L1off, latency: 300us */
+		if (aspm_L1_2)
+			val = option->ltr_l1off_sspwrgate;
+	}
+
+	if (aspm_L1_1 || aspm_L1_2) {
+		if (rtsx_check_dev_flag(pcr,
+					LTR_L1SS_PWR_GATE_CHECK_CARD_EN)) {
+			if (card_exist)
+				val &= ~L1OFF_MBIAS2_EN_5250;
+			else
+				val |= L1OFF_MBIAS2_EN_5250;
+		}
+	}
+	rtsx_set_l1off_sub(pcr, val);
+}
+
+static const struct pcr_ops rts524a_pcr_ops = {
+	.write_phy = rts524a_write_phy,
+	.read_phy = rts524a_read_phy,
+	.fetch_vendor_settings = rtsx_base_fetch_vendor_settings,
+	.extra_init_hw = rts524a_extra_init_hw,
+	.optimize_phy = rts524a_optimize_phy,
+	.turn_on_led = rtsx_base_turn_on_led,
+	.turn_off_led = rtsx_base_turn_off_led,
+	.enable_auto_blink = rtsx_base_enable_auto_blink,
+	.disable_auto_blink = rtsx_base_disable_auto_blink,
+	.card_power_on = rtsx_base_card_power_on,
+	.card_power_off = rtsx_base_card_power_off,
+	.switch_output_voltage = rtsx_base_switch_output_voltage,
+	.force_power_down = rtsx_base_force_power_down,
+	.set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0,
+	.set_aspm = rts5249_set_aspm,
+};
+
+void rts524a_init_params(struct rtsx_pcr *pcr)
+{
+	rts5249_init_params(pcr);
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
+	pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
+	pcr->option.ltr_l1off_snooze_sspwrgate =
+		LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
+
+	pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3;
+	pcr->ops = &rts524a_pcr_ops;
+}
+
+static int rts525a_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_write_register(pcr, LDO_VCC_CFG1,
+		LDO_VCC_TUNE_MASK, LDO_VCC_3V3);
+	return rtsx_base_card_power_on(pcr, card);
+}
+
+static int rts525a_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	switch (voltage) {
+	case OUTPUT_3V3:
+		rtsx_pci_write_register(pcr, LDO_CONFIG2,
+			LDO_D3318_MASK, LDO_D3318_33V);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, 0);
+		break;
+	case OUTPUT_1V8:
+		rtsx_pci_write_register(pcr, LDO_CONFIG2,
+			LDO_D3318_MASK, LDO_D3318_18V);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8,
+			SD_IO_USING_1V8);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rts5249_fill_driving(pcr, voltage);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts525a_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3,
+		D3_DELINK_MODE_EN, 0x00);
+	if (err < 0)
+		return err;
+
+	rtsx_pci_write_phy_register(pcr, _PHY_FLD0,
+		_PHY_FLD0_CLK_REQ_20C | _PHY_FLD0_RX_IDLE_EN |
+		_PHY_FLD0_BIT_ERR_RSTN | _PHY_FLD0_BER_COUNT |
+		_PHY_FLD0_BER_TIMER | _PHY_FLD0_CHECK_EN);
+
+	rtsx_pci_write_phy_register(pcr, _PHY_ANA03,
+		_PHY_ANA03_TIMER_MAX | _PHY_ANA03_OOBS_DEB_EN |
+		_PHY_CMU_DEBUG_EN);
+
+	if (is_version(pcr, 0x525A, IC_VER_A))
+		rtsx_pci_write_phy_register(pcr, _PHY_REV0,
+			_PHY_REV0_FILTER_OUT | _PHY_REV0_CDR_BYPASS_PFD |
+			_PHY_REV0_CDR_RX_IDLE_BYPASS);
+
+	return 0;
+}
+
+static int rts525a_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rts5249_extra_init_hw(pcr);
+
+	rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL);
+	if (is_version(pcr, 0x525A, IC_VER_A)) {
+		rtsx_pci_write_register(pcr, L1SUB_CONFIG2,
+			L1SUB_AUTO_CFG, L1SUB_AUTO_CFG);
+		rtsx_pci_write_register(pcr, RREF_CFG,
+			RREF_VBGSEL_MASK, RREF_VBGSEL_1V25);
+		rtsx_pci_write_register(pcr, LDO_VIO_CFG,
+			LDO_VIO_TUNE_MASK, LDO_VIO_1V7);
+		rtsx_pci_write_register(pcr, LDO_DV12S_CFG,
+			LDO_D12_TUNE_MASK, LDO_D12_TUNE_DF);
+		rtsx_pci_write_register(pcr, LDO_AV12S_CFG,
+			LDO_AV12S_TUNE_MASK, LDO_AV12S_TUNE_DF);
+		rtsx_pci_write_register(pcr, LDO_VCC_CFG0,
+			LDO_VCC_LMTVTH_MASK, LDO_VCC_LMTVTH_2A);
+		rtsx_pci_write_register(pcr, OOBS_CONFIG,
+			OOBS_AUTOK_DIS | OOBS_VAL_MASK, 0x89);
+	}
+
+	return 0;
+}
+
+static const struct pcr_ops rts525a_pcr_ops = {
+	.fetch_vendor_settings = rtsx_base_fetch_vendor_settings,
+	.extra_init_hw = rts525a_extra_init_hw,
+	.optimize_phy = rts525a_optimize_phy,
+	.turn_on_led = rtsx_base_turn_on_led,
+	.turn_off_led = rtsx_base_turn_off_led,
+	.enable_auto_blink = rtsx_base_enable_auto_blink,
+	.disable_auto_blink = rtsx_base_disable_auto_blink,
+	.card_power_on = rts525a_card_power_on,
+	.card_power_off = rtsx_base_card_power_off,
+	.switch_output_voltage = rts525a_switch_output_voltage,
+	.force_power_down = rtsx_base_force_power_down,
+	.set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0,
+	.set_aspm = rts5249_set_aspm,
+};
+
+void rts525a_init_params(struct rtsx_pcr *pcr)
+{
+	rts5249_init_params(pcr);
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(25, 29, 11);
+	pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
+	pcr->option.ltr_l1off_snooze_sspwrgate =
+		LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
+
+	pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3;
+	pcr->ops = &rts525a_pcr_ops;
+}
diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c
new file mode 100644
index 000000000..958b19f7c
--- /dev/null
+++ b/drivers/misc/cardreader/rts5260.c
@@ -0,0 +1,748 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2016-2017 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Steven FENG <steven_feng@realsil.com.cn>
+ *   Rui FENG <rui_feng@realsil.com.cn>
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rts5260.h"
+#include "rtsx_pcr.h"
+
+static u8 rts5260_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & IC_VERSION_MASK;
+}
+
+static void rts5260_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 driving_3v3[6][3] = {
+		{0x94, 0x94, 0x94},
+		{0x11, 0x11, 0x18},
+		{0x55, 0x55, 0x5C},
+		{0x94, 0x94, 0x94},
+		{0x94, 0x94, 0x94},
+		{0xFF, 0xFF, 0xFF},
+	};
+	u8 driving_1v8[6][3] = {
+		{0x9A, 0x89, 0x89},
+		{0xC4, 0xC4, 0xC4},
+		{0x3C, 0x3C, 0x3C},
+		{0x9B, 0x99, 0x99},
+		{0x9A, 0x89, 0x89},
+		{0xFE, 0xFE, 0xFE},
+	};
+	u8 (*driving)[3], drive_sel;
+
+	if (voltage == OUTPUT_3V3) {
+		driving = driving_3v3;
+		drive_sel = pcr->sd30_drive_sel_3v3;
+	} else {
+		driving = driving_1v8;
+		drive_sel = pcr->sd30_drive_sel_1v8;
+	}
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL,
+			 0xFF, driving[drive_sel][0]);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL,
+			 0xFF, driving[drive_sel][1]);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL,
+			 0xFF, driving[drive_sel][2]);
+}
+
+static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	u32 reg;
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg)) {
+		pcr_dbg(pcr, "skip fetch vendor setting\n");
+		return;
+	}
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg);
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg);
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+	pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg);
+	if (rtsx_reg_check_reverse_socket(reg))
+		pcr->flags |= PCR_REVERSE_SOCKET;
+}
+
+static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
+{
+	/* Set relink_time to 0 */
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
+				RELINK_TIME_MASK, 0);
+
+	if (pm_state == HOST_ENTER_S3)
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3,
+					D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
+
+	rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN);
+}
+
+static int rtsx_base_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL,
+		LED_SHINE_MASK, LED_SHINE_EN);
+}
+
+static int rtsx_base_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL,
+		LED_SHINE_MASK, LED_SHINE_DISABLE);
+}
+
+static int rts5260_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, RTS5260_REG_GPIO_CTL0,
+		RTS5260_REG_GPIO_MASK, RTS5260_REG_GPIO_ON);
+}
+
+static int rts5260_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, RTS5260_REG_GPIO_CTL0,
+		RTS5260_REG_GPIO_MASK, RTS5260_REG_GPIO_OFF);
+}
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5260_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5260_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5260_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5260_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+static int sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_write_register(pcr, SD_CFG1, SD_MODE_SELECT_MASK
+		| SD_ASYNC_FIFO_NOT_RST, SD_30_MODE | SD_ASYNC_FIFO_NOT_RST);
+	rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_write_register(pcr, CARD_CLK_SOURCE, 0xFF,
+				CRC_VAR_CLK0 | SD30_FIX_CLK | SAMPLE_VAR_CLK1);
+	rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+
+	return 0;
+}
+
+static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err = 0;
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en)
+		rtsx_pci_enable_ocp(pcr);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CONFIG2,
+			 DV331812_VDD1, DV331812_VDD1);
+	err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF);
+	if (err < 0)
+		return err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_VCC_CFG0,
+			 RTS5260_DVCC_TUNE_MASK, RTS5260_DVCC_33);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_VCC_CFG1,
+			 LDO_POW_SDVDD1_MASK, LDO_POW_SDVDD1_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CONFIG2,
+			 DV331812_POWERON, DV331812_POWERON);
+	err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF);
+
+	msleep(20);
+
+	if (pcr->extra_caps & EXTRA_CAPS_SD_SDR50 ||
+	    pcr->extra_caps & EXTRA_CAPS_SD_SDR104)
+		sd_set_sample_push_timing_sd30(pcr);
+
+	/* Initialize SD_CFG1 register */
+	rtsx_pci_write_register(pcr, SD_CFG1, 0xFF,
+				SD_CLK_DIVIDE_128 | SD_20_MODE);
+
+	rtsx_pci_write_register(pcr, SD_SAMPLE_POINT_CTL,
+				0xFF, SD20_RX_POS_EDGE);
+	rtsx_pci_write_register(pcr, SD_PUSH_POINT_CTL, 0xFF, 0);
+	rtsx_pci_write_register(pcr, CARD_STOP, SD_STOP | SD_CLR_ERR,
+				SD_STOP | SD_CLR_ERR);
+
+	/* Reset SD_CFG3 register */
+	rtsx_pci_write_register(pcr, SD_CFG3, SD30_CLK_END_EN, 0);
+	rtsx_pci_write_register(pcr, REG_SD_STOP_SDCLK_CFG,
+				SD30_CLK_STOP_CFG_EN | SD30_CLK_STOP_CFG1 |
+				SD30_CLK_STOP_CFG0, 0);
+
+	rtsx_pci_write_register(pcr, REG_PRE_RW_MODE, EN_INFINITE_MODE, 0);
+
+	return err;
+}
+
+static int rts5260_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	switch (voltage) {
+	case OUTPUT_3V3:
+		rtsx_pci_write_register(pcr, LDO_CONFIG2,
+					DV331812_VDD1, DV331812_VDD1);
+		rtsx_pci_write_register(pcr, LDO_DV18_CFG,
+					DV331812_MASK, DV331812_33);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, 0);
+		break;
+	case OUTPUT_1V8:
+		rtsx_pci_write_register(pcr, LDO_CONFIG2,
+					DV331812_VDD1, DV331812_VDD1);
+		rtsx_pci_write_register(pcr, LDO_DV18_CFG,
+					DV331812_MASK, DV331812_17);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8,
+					SD_IO_USING_1V8);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rtsx_pci_init_cmd(pcr);
+	rts5260_fill_driving(pcr, voltage);
+	return rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF);
+}
+
+static void rts5260_stop_cmd(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA);
+	rtsx_pci_write_register(pcr, RTS5260_DMA_RST_CTL_0,
+				RTS5260_DMA_RST | RTS5260_ADMA3_RST,
+				RTS5260_DMA_RST | RTS5260_ADMA3_RST);
+	rtsx_pci_write_register(pcr, RBCTL, RB_FLUSH, RB_FLUSH);
+}
+
+static void rts5260_card_before_power_off(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	rts5260_stop_cmd(pcr);
+	rts5260_switch_output_voltage(pcr, OUTPUT_3V3);
+
+	if (option->ocp_en)
+		rtsx_pci_disable_ocp(pcr);
+}
+
+static int rts5260_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	int err = 0;
+
+	rts5260_card_before_power_off(pcr);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_VCC_CFG1,
+			 LDO_POW_SDVDD1_MASK, LDO_POW_SDVDD1_OFF);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CONFIG2,
+			 DV331812_POWERON, DV331812_POWEROFF);
+	err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF);
+
+	return err;
+}
+
+static void rts5260_init_ocp(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en) {
+		u8 mask, val;
+
+		rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL,
+					RTS5260_DVCC_OCP_EN |
+					RTS5260_DVCC_OCP_CL_EN,
+					RTS5260_DVCC_OCP_EN |
+					RTS5260_DVCC_OCP_CL_EN);
+		rtsx_pci_write_register(pcr, RTS5260_DVIO_CTRL,
+					RTS5260_DVIO_OCP_EN |
+					RTS5260_DVIO_OCP_CL_EN,
+					RTS5260_DVIO_OCP_EN |
+					RTS5260_DVIO_OCP_CL_EN);
+
+		rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL,
+					RTS5260_DVCC_OCP_THD_MASK,
+					option->sd_400mA_ocp_thd);
+
+		rtsx_pci_write_register(pcr, RTS5260_DVIO_CTRL,
+					RTS5260_DVIO_OCP_THD_MASK,
+					RTS5260_DVIO_OCP_THD_350);
+
+		rtsx_pci_write_register(pcr, RTS5260_DV331812_CFG,
+					RTS5260_DV331812_OCP_THD_MASK,
+					RTS5260_DV331812_OCP_THD_210);
+
+		mask = SD_OCP_GLITCH_MASK | SDVIO_OCP_GLITCH_MASK;
+		val = pcr->hw_param.ocp_glitch;
+		rtsx_pci_write_register(pcr, REG_OCPGLITCH, mask, val);
+
+		rtsx_pci_enable_ocp(pcr);
+	} else {
+		rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL,
+					RTS5260_DVCC_OCP_EN |
+					RTS5260_DVCC_OCP_CL_EN, 0);
+		rtsx_pci_write_register(pcr, RTS5260_DVIO_CTRL,
+					RTS5260_DVIO_OCP_EN |
+					RTS5260_DVIO_OCP_CL_EN, 0);
+	}
+}
+
+static void rts5260_enable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 val = 0;
+
+	rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, 0);
+
+	val = SD_OCP_INT_EN | SD_DETECT_EN;
+	val |= SDVIO_OCP_INT_EN | SDVIO_DETECT_EN;
+	rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val);
+	rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL,
+				DV3318_DETECT_EN | DV3318_OCP_INT_EN,
+				DV3318_DETECT_EN | DV3318_OCP_INT_EN);
+}
+
+static void rts5260_disable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 mask = 0;
+
+	mask = SD_OCP_INT_EN | SD_DETECT_EN;
+	mask |= SDVIO_OCP_INT_EN | SDVIO_DETECT_EN;
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+	rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL,
+				DV3318_DETECT_EN | DV3318_OCP_INT_EN, 0);
+
+	rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN,
+				OC_POWER_DOWN);
+}
+
+static int rts5260_get_ocpstat(struct rtsx_pcr *pcr, u8 *val)
+{
+	return rtsx_pci_read_register(pcr, REG_OCPSTAT, val);
+}
+
+static int rts5260_get_ocpstat2(struct rtsx_pcr *pcr, u8 *val)
+{
+	return rtsx_pci_read_register(pcr, REG_DV3318_OCPSTAT, val);
+}
+
+static void rts5260_clear_ocpstat(struct rtsx_pcr *pcr)
+{
+	u8 mask = 0;
+	u8 val = 0;
+
+	mask = SD_OCP_INT_CLR | SD_OC_CLR;
+	mask |= SDVIO_OCP_INT_CLR | SDVIO_OC_CLR;
+	val = SD_OCP_INT_CLR | SD_OC_CLR;
+	val |= SDVIO_OCP_INT_CLR | SDVIO_OC_CLR;
+
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val);
+	rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL,
+				DV3318_OCP_INT_CLR | DV3318_OCP_CLR,
+				DV3318_OCP_INT_CLR | DV3318_OCP_CLR);
+	udelay(10);
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+	rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL,
+				DV3318_OCP_INT_CLR | DV3318_OCP_CLR, 0);
+}
+
+static void rts5260_process_ocp(struct rtsx_pcr *pcr)
+{
+	if (!pcr->option.ocp_en)
+		return;
+
+	rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat);
+	rts5260_get_ocpstat2(pcr, &pcr->ocp_stat2);
+	if (pcr->card_exist & SD_EXIST)
+		rtsx_sd_power_off_card3v3(pcr);
+	else if (pcr->card_exist & MS_EXIST)
+		rtsx_ms_power_off_card3v3(pcr);
+
+	if (!(pcr->card_exist & MS_EXIST) && !(pcr->card_exist & SD_EXIST)) {
+		if ((pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER |
+			SDVIO_OC_NOW | SDVIO_OC_EVER)) ||
+			(pcr->ocp_stat2 & (DV3318_OCP_NOW | DV3318_OCP_EVER)))
+			rtsx_pci_clear_ocpstat(pcr);
+		pcr->ocp_stat = 0;
+		pcr->ocp_stat2 = 0;
+	}
+
+	if ((pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER |
+			SDVIO_OC_NOW | SDVIO_OC_EVER)) ||
+			(pcr->ocp_stat2 & (DV3318_OCP_NOW | DV3318_OCP_EVER))) {
+		if (pcr->card_exist & SD_EXIST)
+			rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0);
+		else if (pcr->card_exist & MS_EXIST)
+			rtsx_pci_write_register(pcr, CARD_OE, MS_OUTPUT_EN, 0);
+	}
+}
+
+static int rts5260_init_hw(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	rtsx_pci_init_ocp(pcr);
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG1,
+			 AUX_CLK_ACTIVE_SEL_MASK, MAC_CKSW_DONE);
+	/* Rest L1SUB Config */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG3, 0xFF, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CLK_FORCE_CTL,
+			 CLK_PM_EN, CLK_PM_EN);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWD_SUSPEND_EN, 0xFF, 0xFF);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			 PWR_GATE_EN, PWR_GATE_EN);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, REG_VREF,
+			 PWD_SUSPND_EN, PWD_SUSPND_EN);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RBCTL,
+			 U_AUTO_DMA_EN_MASK, U_AUTO_DMA_DISABLE);
+
+	if (pcr->flags & PCR_REVERSE_SOCKET)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OBFF_CFG,
+			 OBFF_EN_MASK, OBFF_DISABLE);
+
+	err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static void rts5260_pwr_saving_setting(struct rtsx_pcr *pcr)
+{
+	int lss_l1_1, lss_l1_2;
+
+	lss_l1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN)
+			| rtsx_check_dev_flag(pcr, PM_L1_1_EN);
+	lss_l1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN)
+			| rtsx_check_dev_flag(pcr, PM_L1_2_EN);
+
+	if (lss_l1_2) {
+		pcr_dbg(pcr, "Set parameters for L1.2.");
+		rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL,
+					0xFF, PCIE_L1_2_EN);
+		rtsx_pci_write_register(pcr, PWR_FE_CTL,
+					0xFF, PCIE_L1_2_PD_FE_EN);
+	} else if (lss_l1_1) {
+		pcr_dbg(pcr, "Set parameters for L1.1.");
+		rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL,
+					0xFF, PCIE_L1_1_EN);
+		rtsx_pci_write_register(pcr, PWR_FE_CTL,
+					0xFF, PCIE_L1_1_PD_FE_EN);
+	} else {
+		pcr_dbg(pcr, "Set parameters for L1.");
+		rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL,
+					0xFF, PCIE_L1_0_EN);
+		rtsx_pci_write_register(pcr, PWR_FE_CTL,
+					0xFF, PCIE_L1_0_PD_FE_EN);
+	}
+
+	rtsx_pci_write_register(pcr, CFG_L1_0_PCIE_DPHY_RET_VALUE,
+				0xFF, CFG_L1_0_RET_VALUE_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_L1_0_PCIE_MAC_RET_VALUE,
+				0xFF, CFG_L1_0_RET_VALUE_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_L1_0_CRC_SD30_RET_VALUE,
+				0xFF, CFG_L1_0_RET_VALUE_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_L1_0_CRC_SD40_RET_VALUE,
+				0xFF, CFG_L1_0_RET_VALUE_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_L1_0_SYS_RET_VALUE,
+				0xFF, CFG_L1_0_RET_VALUE_DEFAULT);
+	/*Option cut APHY*/
+	rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_0,
+				0xFF, CFG_PCIE_APHY_OFF_0_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_1,
+				0xFF, CFG_PCIE_APHY_OFF_1_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_2,
+				0xFF, CFG_PCIE_APHY_OFF_2_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_3,
+				0xFF, CFG_PCIE_APHY_OFF_3_DEFAULT);
+	/*CDR DEC*/
+	rtsx_pci_write_register(pcr, PWC_CDR, 0xFF, PWC_CDR_DEFAULT);
+	/*PWMPFM*/
+	rtsx_pci_write_register(pcr, CFG_LP_FPWM_VALUE,
+				0xFF, CFG_LP_FPWM_VALUE_DEFAULT);
+	/*No Power Saving WA*/
+	rtsx_pci_write_register(pcr, CFG_L1_0_CRC_MISC_RET_VALUE,
+				0xFF, CFG_L1_0_CRC_MISC_RET_VALUE_DEFAULT);
+}
+
+static void rts5260_init_from_cfg(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	u32 lval;
+
+	rtsx_pci_read_config_dword(pcr, PCR_ASPM_SETTING_5260, &lval);
+
+	if (lval & ASPM_L1_1_EN_MASK)
+		rtsx_set_dev_flag(pcr, ASPM_L1_1_EN);
+
+	if (lval & ASPM_L1_2_EN_MASK)
+		rtsx_set_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (lval & PM_L1_1_EN_MASK)
+		rtsx_set_dev_flag(pcr, PM_L1_1_EN);
+
+	if (lval & PM_L1_2_EN_MASK)
+		rtsx_set_dev_flag(pcr, PM_L1_2_EN);
+
+	rts5260_pwr_saving_setting(pcr);
+
+	if (option->ltr_en) {
+		u16 val;
+
+		pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val);
+		if (val & PCI_EXP_DEVCTL2_LTR_EN) {
+			option->ltr_enabled = true;
+			option->ltr_active = true;
+			rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+		} else {
+			option->ltr_enabled = false;
+		}
+	}
+
+	if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+				| PM_L1_1_EN | PM_L1_2_EN))
+		option->force_clkreq_0 = false;
+	else
+		option->force_clkreq_0 = true;
+}
+
+static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	/* Set mcu_cnt to 7 to ensure data can be sampled properly */
+	rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07);
+	rtsx_pci_write_register(pcr, SSC_DIV_N_0, 0xFF, 0x5D);
+
+	rts5260_init_from_cfg(pcr);
+
+	/* force no MDIO*/
+	rtsx_pci_write_register(pcr, RTS5260_AUTOLOAD_CFG4,
+				0xFF, RTS5260_MIMO_DISABLE);
+	/*Modify SDVCC Tune Default Parameters!*/
+	rtsx_pci_write_register(pcr, LDO_VCC_CFG0,
+				RTS5260_DVCC_TUNE_MASK, RTS5260_DVCC_33);
+
+	rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL);
+
+	rts5260_init_hw(pcr);
+
+	/*
+	 * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+	 * to drive low, and we forcibly request clock.
+	 */
+	if (option->force_clkreq_0)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
+				 FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
+				 FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+
+	return 0;
+}
+
+static void rts5260_set_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	u8 val = 0;
+
+	if (pcr->aspm_enabled == enable)
+		return;
+
+	if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) {
+		if (enable)
+			val = pcr->aspm_en;
+		rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL,
+					 ASPM_MASK_NEG, val);
+	} else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) {
+		u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0;
+
+		if (!enable)
+			val = FORCE_ASPM_CTL0;
+		rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val);
+	}
+
+	pcr->aspm_enabled = enable;
+}
+
+static void rts5260_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	u32 interrupt = rtsx_pci_readl(pcr, RTSX_BIPR);
+	int card_exist = (interrupt & SD_EXIST) | (interrupt & MS_EXIST);
+	int aspm_L1_1, aspm_L1_2;
+	u8 val = 0;
+
+	aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN);
+	aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (active) {
+		/* run, latency: 60us */
+		if (aspm_L1_1)
+			val = option->ltr_l1off_snooze_sspwrgate;
+	} else {
+		/* l1off, latency: 300us */
+		if (aspm_L1_2)
+			val = option->ltr_l1off_sspwrgate;
+	}
+
+	if (aspm_L1_1 || aspm_L1_2) {
+		if (rtsx_check_dev_flag(pcr,
+					LTR_L1SS_PWR_GATE_CHECK_CARD_EN)) {
+			if (card_exist)
+				val &= ~L1OFF_MBIAS2_EN_5250;
+			else
+				val |= L1OFF_MBIAS2_EN_5250;
+		}
+	}
+	rtsx_set_l1off_sub(pcr, val);
+}
+
+static const struct pcr_ops rts5260_pcr_ops = {
+	.fetch_vendor_settings = rtsx_base_fetch_vendor_settings,
+	.turn_on_led = rts5260_turn_on_led,
+	.turn_off_led = rts5260_turn_off_led,
+	.extra_init_hw = rts5260_extra_init_hw,
+	.enable_auto_blink = rtsx_base_enable_auto_blink,
+	.disable_auto_blink = rtsx_base_disable_auto_blink,
+	.card_power_on = rts5260_card_power_on,
+	.card_power_off = rts5260_card_power_off,
+	.switch_output_voltage = rts5260_switch_output_voltage,
+	.force_power_down = rtsx_base_force_power_down,
+	.stop_cmd = rts5260_stop_cmd,
+	.set_aspm = rts5260_set_aspm,
+	.set_l1off_cfg_sub_d0 = rts5260_set_l1off_cfg_sub_d0,
+	.enable_ocp = rts5260_enable_ocp,
+	.disable_ocp = rts5260_disable_ocp,
+	.init_ocp = rts5260_init_ocp,
+	.process_ocp = rts5260_process_ocp,
+	.get_ocpstat = rts5260_get_ocpstat,
+	.clear_ocpstat = rts5260_clear_ocpstat,
+};
+
+void rts5260_init_params(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	struct rtsx_hw_param *hw_param = &pcr->hw_param;
+
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
+
+	pcr->ic_version = rts5260_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5260_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5260_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5260_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5260_ms_pull_ctl_disable_tbl;
+
+	pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3;
+
+	pcr->ops = &rts5260_pcr_ops;
+
+	option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN
+				| LTR_L1SS_PWR_GATE_EN);
+	option->ltr_en = true;
+
+	/* init latency of active, idle, L1OFF to 60us, 300us, 3ms */
+	option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF;
+	option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF;
+	option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF;
+	option->dev_aspm_mode = DEV_ASPM_DYNAMIC;
+	option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF;
+	option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
+	option->ltr_l1off_snooze_sspwrgate =
+		LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
+
+	option->ocp_en = 1;
+	if (option->ocp_en)
+		hw_param->interrupt_en |= SD_OC_INT_EN;
+	hw_param->ocp_glitch = SD_OCP_GLITCH_10M | SDVIO_OCP_GLITCH_800U;
+	option->sd_400mA_ocp_thd = RTS5260_DVCC_OCP_THD_550;
+	option->sd_800mA_ocp_thd = RTS5260_DVCC_OCP_THD_970;
+}
diff --git a/drivers/misc/cardreader/rts5260.h b/drivers/misc/cardreader/rts5260.h
new file mode 100644
index 000000000..53a1411c8
--- /dev/null
+++ b/drivers/misc/cardreader/rts5260.h
@@ -0,0 +1,45 @@
+#ifndef __RTS5260_H__
+#define __RTS5260_H__
+
+#define RTS5260_DVCC_CTRL		0xFF73
+#define RTS5260_DVCC_OCP_EN		(0x01 << 7)
+#define RTS5260_DVCC_OCP_THD_MASK	(0x07 << 4)
+#define RTS5260_DVCC_POWERON		(0x01 << 3)
+#define RTS5260_DVCC_OCP_CL_EN		(0x01 << 2)
+
+#define RTS5260_DVIO_CTRL		0xFF75
+#define RTS5260_DVIO_OCP_EN		(0x01 << 7)
+#define RTS5260_DVIO_OCP_THD_MASK	(0x07 << 4)
+#define RTS5260_DVIO_POWERON		(0x01 << 3)
+#define RTS5260_DVIO_OCP_CL_EN		(0x01 << 2)
+
+#define RTS5260_DV331812_CFG		0xFF71
+#define RTS5260_DV331812_OCP_EN		(0x01 << 7)
+#define RTS5260_DV331812_OCP_THD_MASK	(0x07 << 4)
+#define RTS5260_DV331812_POWERON	(0x01 << 3)
+#define RTS5260_DV331812_SEL		(0x01 << 2)
+#define RTS5260_DV331812_VDD1		(0x01 << 2)
+#define RTS5260_DV331812_VDD2		(0x00 << 2)
+
+#define RTS5260_DV331812_OCP_THD_120	(0x00 << 4)
+#define RTS5260_DV331812_OCP_THD_140	(0x01 << 4)
+#define RTS5260_DV331812_OCP_THD_160	(0x02 << 4)
+#define RTS5260_DV331812_OCP_THD_180	(0x03 << 4)
+#define RTS5260_DV331812_OCP_THD_210	(0x04 << 4)
+#define RTS5260_DV331812_OCP_THD_240	(0x05 << 4)
+#define RTS5260_DV331812_OCP_THD_270	(0x06 << 4)
+#define RTS5260_DV331812_OCP_THD_300	(0x07 << 4)
+
+#define RTS5260_DVIO_OCP_THD_250	(0x00 << 4)
+#define RTS5260_DVIO_OCP_THD_300	(0x01 << 4)
+#define RTS5260_DVIO_OCP_THD_350	(0x02 << 4)
+#define RTS5260_DVIO_OCP_THD_400	(0x03 << 4)
+#define RTS5260_DVIO_OCP_THD_450	(0x04 << 4)
+#define RTS5260_DVIO_OCP_THD_500	(0x05 << 4)
+#define RTS5260_DVIO_OCP_THD_550	(0x06 << 4)
+#define RTS5260_DVIO_OCP_THD_600	(0x07 << 4)
+
+#define RTS5260_DVCC_OCP_THD_550	(0x00 << 4)
+#define RTS5260_DVCC_OCP_THD_970	(0x05 << 4)
+
+#endif
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
new file mode 100644
index 000000000..3eb3c237f
--- /dev/null
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -0,0 +1,1698 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/rtsx_pci.h>
+#include <linux/mmc/card.h>
+#include <asm/unaligned.h>
+
+#include "rtsx_pcr.h"
+
+static bool msi_en = true;
+module_param(msi_en, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(msi_en, "Enable MSI");
+
+static DEFINE_IDR(rtsx_pci_idr);
+static DEFINE_SPINLOCK(rtsx_pci_lock);
+
+static struct mfd_cell rtsx_pcr_cells[] = {
+	[RTSX_SD_CARD] = {
+		.name = DRV_NAME_RTSX_PCI_SDMMC,
+	},
+	[RTSX_MS_CARD] = {
+		.name = DRV_NAME_RTSX_PCI_MS,
+	},
+};
+
+static const struct pci_device_id rtsx_pci_ids[] = {
+	{ PCI_DEVICE(0x10EC, 0x5209), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5227), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x522A), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5249), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5287), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5286), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x524A), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x525A), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5260), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, rtsx_pci_ids);
+
+static inline void rtsx_pci_enable_aspm(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL,
+		0xFC, pcr->aspm_en);
+}
+
+static inline void rtsx_pci_disable_aspm(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL,
+		0xFC, 0);
+}
+
+static int rtsx_comm_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency)
+{
+	rtsx_pci_write_register(pcr, MSGTXDATA0,
+				MASK_8_BIT_DEF, (u8) (latency & 0xFF));
+	rtsx_pci_write_register(pcr, MSGTXDATA1,
+				MASK_8_BIT_DEF, (u8)((latency >> 8) & 0xFF));
+	rtsx_pci_write_register(pcr, MSGTXDATA2,
+				MASK_8_BIT_DEF, (u8)((latency >> 16) & 0xFF));
+	rtsx_pci_write_register(pcr, MSGTXDATA3,
+				MASK_8_BIT_DEF, (u8)((latency >> 24) & 0xFF));
+	rtsx_pci_write_register(pcr, LTR_CTL, LTR_TX_EN_MASK |
+		LTR_LATENCY_MODE_MASK, LTR_TX_EN_1 | LTR_LATENCY_MODE_SW);
+
+	return 0;
+}
+
+int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency)
+{
+	if (pcr->ops->set_ltr_latency)
+		return pcr->ops->set_ltr_latency(pcr, latency);
+	else
+		return rtsx_comm_set_ltr_latency(pcr, latency);
+}
+
+static void rtsx_comm_set_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (pcr->aspm_enabled == enable)
+		return;
+
+	if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) {
+		if (enable)
+			rtsx_pci_enable_aspm(pcr);
+		else
+			rtsx_pci_disable_aspm(pcr);
+	} else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) {
+		u8 mask = FORCE_ASPM_VAL_MASK;
+		u8 val = 0;
+
+		if (enable)
+			val = pcr->aspm_en;
+		rtsx_pci_write_register(pcr, ASPM_FORCE_CTL,  mask, val);
+	}
+
+	pcr->aspm_enabled = enable;
+}
+
+static void rtsx_disable_aspm(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->set_aspm)
+		pcr->ops->set_aspm(pcr, false);
+	else
+		rtsx_comm_set_aspm(pcr, false);
+}
+
+int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val)
+{
+	rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, val);
+
+	return 0;
+}
+
+static void rtsx_set_l1off_sub_cfg_d0(struct rtsx_pcr *pcr, int active)
+{
+	if (pcr->ops->set_l1off_cfg_sub_d0)
+		pcr->ops->set_l1off_cfg_sub_d0(pcr, active);
+}
+
+static void rtsx_comm_pm_full_on(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	rtsx_disable_aspm(pcr);
+
+	/* Fixes DMA transfer timout issue after disabling ASPM on RTS5260 */
+	msleep(1);
+
+	if (option->ltr_enabled)
+		rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+
+	if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN))
+		rtsx_set_l1off_sub_cfg_d0(pcr, 1);
+}
+
+static void rtsx_pm_full_on(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->full_on)
+		pcr->ops->full_on(pcr);
+	else
+		rtsx_comm_pm_full_on(pcr);
+}
+
+void rtsx_pci_start_run(struct rtsx_pcr *pcr)
+{
+	/* If pci device removed, don't queue idle work any more */
+	if (pcr->remove_pci)
+		return;
+
+	if (pcr->state != PDEV_STAT_RUN) {
+		pcr->state = PDEV_STAT_RUN;
+		if (pcr->ops->enable_auto_blink)
+			pcr->ops->enable_auto_blink(pcr);
+		rtsx_pm_full_on(pcr);
+	}
+
+	mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200));
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_start_run);
+
+int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data)
+{
+	int i;
+	u32 val = HAIMR_WRITE_START;
+
+	val |= (u32)(addr & 0x3FFF) << 16;
+	val |= (u32)mask << 8;
+	val |= (u32)data;
+
+	rtsx_pci_writel(pcr, RTSX_HAIMR, val);
+
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		val = rtsx_pci_readl(pcr, RTSX_HAIMR);
+		if ((val & HAIMR_TRANS_END) == 0) {
+			if (data != (u8)val)
+				return -EIO;
+			return 0;
+		}
+	}
+
+	return -ETIMEDOUT;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_register);
+
+int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data)
+{
+	u32 val = HAIMR_READ_START;
+	int i;
+
+	val |= (u32)(addr & 0x3FFF) << 16;
+	rtsx_pci_writel(pcr, RTSX_HAIMR, val);
+
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		val = rtsx_pci_readl(pcr, RTSX_HAIMR);
+		if ((val & HAIMR_TRANS_END) == 0)
+			break;
+	}
+
+	if (i >= MAX_RW_REG_CNT)
+		return -ETIMEDOUT;
+
+	if (data)
+		*data = (u8)(val & 0xFF);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_register);
+
+int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val)
+{
+	int err, i, finished = 0;
+	u8 tmp;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA0, 0xFF, (u8)val);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA1, 0xFF, (u8)(val >> 8));
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x81);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < 100000; i++) {
+		err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp);
+		if (err < 0)
+			return err;
+
+		if (!(tmp & 0x80)) {
+			finished = 1;
+			break;
+		}
+	}
+
+	if (!finished)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val)
+{
+	if (pcr->ops->write_phy)
+		return pcr->ops->write_phy(pcr, addr, val);
+
+	return __rtsx_pci_write_phy_register(pcr, addr, val);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_phy_register);
+
+int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val)
+{
+	int err, i, finished = 0;
+	u16 data;
+	u8 *ptr, tmp;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x80);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < 100000; i++) {
+		err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp);
+		if (err < 0)
+			return err;
+
+		if (!(tmp & 0x80)) {
+			finished = 1;
+			break;
+		}
+	}
+
+	if (!finished)
+		return -ETIMEDOUT;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA0, 0, 0);
+	rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA1, 0, 0);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	ptr = rtsx_pci_get_cmd_data(pcr);
+	data = ((u16)ptr[1] << 8) | ptr[0];
+
+	if (val)
+		*val = data;
+
+	return 0;
+}
+
+int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val)
+{
+	if (pcr->ops->read_phy)
+		return pcr->ops->read_phy(pcr, addr, val);
+
+	return __rtsx_pci_read_phy_register(pcr, addr, val);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_phy_register);
+
+void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->stop_cmd)
+		return pcr->ops->stop_cmd(pcr);
+
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA);
+
+	rtsx_pci_write_register(pcr, DMACTL, 0x80, 0x80);
+	rtsx_pci_write_register(pcr, RBCTL, 0x80, 0x80);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_stop_cmd);
+
+void rtsx_pci_add_cmd(struct rtsx_pcr *pcr,
+		u8 cmd_type, u16 reg_addr, u8 mask, u8 data)
+{
+	unsigned long flags;
+	u32 val = 0;
+	u32 *ptr = (u32 *)(pcr->host_cmds_ptr);
+
+	val |= (u32)(cmd_type & 0x03) << 30;
+	val |= (u32)(reg_addr & 0x3FFF) << 16;
+	val |= (u32)mask << 8;
+	val |= (u32)data;
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	ptr += pcr->ci;
+	if (pcr->ci < (HOST_CMDS_BUF_LEN / 4)) {
+		put_unaligned_le32(val, ptr);
+		ptr++;
+		pcr->ci++;
+	}
+	spin_unlock_irqrestore(&pcr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_add_cmd);
+
+void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr)
+{
+	u32 val = 1 << 31;
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	val |= (u32)(pcr->ci * 4) & 0x00FFFFFF;
+	/* Hardware Auto Response */
+	val |= 0x40000000;
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, val);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd_no_wait);
+
+int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout)
+{
+	struct completion trans_done;
+	u32 val = 1 << 31;
+	long timeleft;
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	/* set up data structures for the wakeup system */
+	pcr->done = &trans_done;
+	pcr->trans_result = TRANS_NOT_READY;
+	init_completion(&trans_done);
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	val |= (u32)(pcr->ci * 4) & 0x00FFFFFF;
+	/* Hardware Auto Response */
+	val |= 0x40000000;
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, val);
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	/* Wait for TRANS_OK_INT */
+	timeleft = wait_for_completion_interruptible_timeout(
+			&trans_done, msecs_to_jiffies(timeout));
+	if (timeleft <= 0) {
+		pcr_dbg(pcr, "Timeout (%s %d)\n", __func__, __LINE__);
+		err = -ETIMEDOUT;
+		goto finish_send_cmd;
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	if (pcr->trans_result == TRANS_RESULT_FAIL)
+		err = -EINVAL;
+	else if (pcr->trans_result == TRANS_RESULT_OK)
+		err = 0;
+	else if (pcr->trans_result == TRANS_NO_DEVICE)
+		err = -ENODEV;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+finish_send_cmd:
+	spin_lock_irqsave(&pcr->lock, flags);
+	pcr->done = NULL;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if ((err < 0) && (err != -ENODEV))
+		rtsx_pci_stop_cmd(pcr);
+
+	if (pcr->finish_me)
+		complete(pcr->finish_me);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd);
+
+static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
+		dma_addr_t addr, unsigned int len, int end)
+{
+	u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi;
+	u64 val;
+	u8 option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA;
+
+	pcr_dbg(pcr, "DMA addr: 0x%x, Len: 0x%x\n", (unsigned int)addr, len);
+
+	if (end)
+		option |= RTSX_SG_END;
+	val = ((u64)addr << 32) | ((u64)len << 12) | option;
+
+	put_unaligned_le64(val, ptr);
+	pcr->sgi++;
+}
+
+int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read, int timeout)
+{
+	int err = 0, count;
+
+	pcr_dbg(pcr, "--> %s: num_sg = %d\n", __func__, num_sg);
+	count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read);
+	if (count < 1)
+		return -EINVAL;
+	pcr_dbg(pcr, "DMA mapping count: %d\n", count);
+
+	err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout);
+
+	rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+
+int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	if (pcr->remove_pci)
+		return -EINVAL;
+
+	if ((sglist == NULL) || (num_sg <= 0))
+		return -EINVAL;
+
+	return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg);
+
+void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg);
+
+int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int count, bool read, int timeout)
+{
+	struct completion trans_done;
+	struct scatterlist *sg;
+	dma_addr_t addr;
+	long timeleft;
+	unsigned long flags;
+	unsigned int len;
+	int i, err = 0;
+	u32 val;
+	u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE;
+
+	if (pcr->remove_pci)
+		return -ENODEV;
+
+	if ((sglist == NULL) || (count < 1))
+		return -EINVAL;
+
+	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
+	pcr->sgi = 0;
+	for_each_sg(sglist, sg, count, i) {
+		addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+		rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1);
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	pcr->done = &trans_done;
+	pcr->trans_result = TRANS_NOT_READY;
+	init_completion(&trans_done);
+	rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, val);
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	timeleft = wait_for_completion_interruptible_timeout(
+			&trans_done, msecs_to_jiffies(timeout));
+	if (timeleft <= 0) {
+		pcr_dbg(pcr, "Timeout (%s %d)\n", __func__, __LINE__);
+		err = -ETIMEDOUT;
+		goto out;
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	if (pcr->trans_result == TRANS_RESULT_FAIL) {
+		err = -EILSEQ;
+		if (pcr->dma_error_count < RTS_MAX_TIMES_FREQ_REDUCTION)
+			pcr->dma_error_count++;
+	}
+
+	else if (pcr->trans_result == TRANS_NO_DEVICE)
+		err = -ENODEV;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+out:
+	spin_lock_irqsave(&pcr->lock, flags);
+	pcr->done = NULL;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if ((err < 0) && (err != -ENODEV))
+		rtsx_pci_stop_cmd(pcr);
+
+	if (pcr->finish_me)
+		complete(pcr->finish_me);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer);
+
+int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
+{
+	int err;
+	int i, j;
+	u16 reg;
+	u8 *ptr;
+
+	if (buf_len > 512)
+		buf_len = 512;
+
+	ptr = buf;
+	reg = PPBUF_BASE2;
+	for (i = 0; i < buf_len / 256; i++) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < 256; j++)
+			rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0);
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+
+		memcpy(ptr, rtsx_pci_get_cmd_data(pcr), 256);
+		ptr += 256;
+	}
+
+	if (buf_len % 256) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < buf_len % 256; j++)
+			rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0);
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	memcpy(ptr, rtsx_pci_get_cmd_data(pcr), buf_len % 256);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_ppbuf);
+
+int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
+{
+	int err;
+	int i, j;
+	u16 reg;
+	u8 *ptr;
+
+	if (buf_len > 512)
+		buf_len = 512;
+
+	ptr = buf;
+	reg = PPBUF_BASE2;
+	for (i = 0; i < buf_len / 256; i++) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < 256; j++) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+					reg++, 0xFF, *ptr);
+			ptr++;
+		}
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	if (buf_len % 256) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < buf_len % 256; j++) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+					reg++, 0xFF, *ptr);
+			ptr++;
+		}
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_ppbuf);
+
+static int rtsx_pci_set_pull_ctl(struct rtsx_pcr *pcr, const u32 *tbl)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	while (*tbl & 0xFFFF0000) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+				(u16)(*tbl >> 16), 0xFF, (u8)(*tbl));
+		tbl++;
+	}
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card)
+{
+	const u32 *tbl;
+
+	if (card == RTSX_SD_CARD)
+		tbl = pcr->sd_pull_ctl_enable_tbl;
+	else if (card == RTSX_MS_CARD)
+		tbl = pcr->ms_pull_ctl_enable_tbl;
+	else
+		return -EINVAL;
+
+	return rtsx_pci_set_pull_ctl(pcr, tbl);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_enable);
+
+int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card)
+{
+	const u32 *tbl;
+
+	if (card == RTSX_SD_CARD)
+		tbl = pcr->sd_pull_ctl_disable_tbl;
+	else if (card == RTSX_MS_CARD)
+		tbl = pcr->ms_pull_ctl_disable_tbl;
+	else
+		return -EINVAL;
+
+
+	return rtsx_pci_set_pull_ctl(pcr, tbl);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable);
+
+static void rtsx_pci_enable_bus_int(struct rtsx_pcr *pcr)
+{
+	pcr->bier = TRANS_OK_INT_EN | TRANS_FAIL_INT_EN | SD_INT_EN;
+
+	if (pcr->num_slots > 1)
+		pcr->bier |= MS_INT_EN;
+
+	/* Enable Bus Interrupt */
+	rtsx_pci_writel(pcr, RTSX_BIER, pcr->bier);
+
+	pcr_dbg(pcr, "RTSX_BIER: 0x%08x\n", pcr->bier);
+}
+
+static inline u8 double_ssc_depth(u8 depth)
+{
+	return ((depth > 1) ? (depth - 1) : depth);
+}
+
+static u8 revise_ssc_depth(u8 ssc_depth, u8 div)
+{
+	if (div > CLK_DIV_1) {
+		if (ssc_depth > (div - 1))
+			ssc_depth -= (div - 1);
+		else
+			ssc_depth = SSC_DEPTH_4M;
+	}
+
+	return ssc_depth;
+}
+
+int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk)
+{
+	int err, clk;
+	u8 n, clk_divider, mcu_cnt, div;
+	static const u8 depth[] = {
+		[RTSX_SSC_DEPTH_4M] = SSC_DEPTH_4M,
+		[RTSX_SSC_DEPTH_2M] = SSC_DEPTH_2M,
+		[RTSX_SSC_DEPTH_1M] = SSC_DEPTH_1M,
+		[RTSX_SSC_DEPTH_500K] = SSC_DEPTH_500K,
+		[RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K,
+	};
+
+	if (initial_mode) {
+		/* We use 250k(around) here, in initial stage */
+		clk_divider = SD_CLK_DIVIDE_128;
+		card_clock = 30000000;
+	} else {
+		clk_divider = SD_CLK_DIVIDE_0;
+	}
+	err = rtsx_pci_write_register(pcr, SD_CFG1,
+			SD_CLK_DIVIDE_MASK, clk_divider);
+	if (err < 0)
+		return err;
+
+	/* Reduce card clock by 20MHz each time a DMA transfer error occurs */
+	if (card_clock == UHS_SDR104_MAX_DTR &&
+	    pcr->dma_error_count &&
+	    PCI_PID(pcr) == RTS5227_DEVICE_ID)
+		card_clock = UHS_SDR104_MAX_DTR -
+			(pcr->dma_error_count * 20000000);
+
+	card_clock /= 1000000;
+	pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock);
+
+	clk = card_clock;
+	if (!initial_mode && double_clk)
+		clk = card_clock * 2;
+	pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n",
+		clk, pcr->cur_clock);
+
+	if (clk == pcr->cur_clock)
+		return 0;
+
+	if (pcr->ops->conv_clk_and_div_n)
+		n = (u8)pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N);
+	else
+		n = (u8)(clk - 2);
+	if ((clk <= 2) || (n > MAX_DIV_N_PCR))
+		return -EINVAL;
+
+	mcu_cnt = (u8)(125/clk + 3);
+	if (mcu_cnt > 15)
+		mcu_cnt = 15;
+
+	/* Make sure that the SSC clock div_n is not less than MIN_DIV_N_PCR */
+	div = CLK_DIV_1;
+	while ((n < MIN_DIV_N_PCR) && (div < CLK_DIV_8)) {
+		if (pcr->ops->conv_clk_and_div_n) {
+			int dbl_clk = pcr->ops->conv_clk_and_div_n(n,
+					DIV_N_TO_CLK) * 2;
+			n = (u8)pcr->ops->conv_clk_and_div_n(dbl_clk,
+					CLK_TO_DIV_N);
+		} else {
+			n = (n + 2) * 2 - 2;
+		}
+		div++;
+	}
+	pcr_dbg(pcr, "n = %d, div = %d\n", n, div);
+
+	ssc_depth = depth[ssc_depth];
+	if (double_clk)
+		ssc_depth = double_ssc_depth(ssc_depth);
+
+	ssc_depth = revise_ssc_depth(ssc_depth, div);
+	pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL,
+			CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV,
+			0xFF, (div << 4) | mcu_cnt);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2,
+			SSC_DEPTH_MASK, ssc_depth);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB);
+	if (vpclk) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 2000);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC clock stable */
+	udelay(SSC_CLOCK_STABLE_WAIT);
+	err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+	if (err < 0)
+		return err;
+
+	pcr->cur_clock = clk;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_switch_clock);
+
+int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->ops->card_power_on)
+		return pcr->ops->card_power_on(pcr, card);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_power_on);
+
+int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->ops->card_power_off)
+		return pcr->ops->card_power_off(pcr, card);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off);
+
+int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card)
+{
+	static const unsigned int cd_mask[] = {
+		[RTSX_SD_CARD] = SD_EXIST,
+		[RTSX_MS_CARD] = MS_EXIST
+	};
+
+	if (!(pcr->flags & PCR_MS_PMOS)) {
+		/* When using single PMOS, accessing card is not permitted
+		 * if the existing card is not the designated one.
+		 */
+		if (pcr->card_exist & (~cd_mask[card]))
+			return -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_exclusive_check);
+
+int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	if (pcr->ops->switch_output_voltage)
+		return pcr->ops->switch_output_voltage(pcr, voltage);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_switch_output_voltage);
+
+unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr)
+{
+	unsigned int val;
+
+	val = rtsx_pci_readl(pcr, RTSX_BIPR);
+	if (pcr->ops->cd_deglitch)
+		val = pcr->ops->cd_deglitch(pcr);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_exist);
+
+void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr)
+{
+	struct completion finish;
+
+	pcr->finish_me = &finish;
+	init_completion(&finish);
+
+	if (pcr->done)
+		complete(pcr->done);
+
+	if (!pcr->remove_pci)
+		rtsx_pci_stop_cmd(pcr);
+
+	wait_for_completion_interruptible_timeout(&finish,
+			msecs_to_jiffies(2));
+	pcr->finish_me = NULL;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_complete_unfinished_transfer);
+
+static void rtsx_pci_card_detect(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct rtsx_pcr *pcr;
+	unsigned long flags;
+	unsigned int card_detect = 0, card_inserted, card_removed;
+	u32 irq_status;
+
+	dwork = to_delayed_work(work);
+	pcr = container_of(dwork, struct rtsx_pcr, carddet_work);
+
+	pcr_dbg(pcr, "--> %s\n", __func__);
+
+	mutex_lock(&pcr->pcr_mutex);
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	irq_status = rtsx_pci_readl(pcr, RTSX_BIPR);
+	pcr_dbg(pcr, "irq_status: 0x%08x\n", irq_status);
+
+	irq_status &= CARD_EXIST;
+	card_inserted = pcr->card_inserted & irq_status;
+	card_removed = pcr->card_removed;
+	pcr->card_inserted = 0;
+	pcr->card_removed = 0;
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if (card_inserted || card_removed) {
+		pcr_dbg(pcr, "card_inserted: 0x%x, card_removed: 0x%x\n",
+			card_inserted, card_removed);
+
+		if (pcr->ops->cd_deglitch)
+			card_inserted = pcr->ops->cd_deglitch(pcr);
+
+		card_detect = card_inserted | card_removed;
+
+		pcr->card_exist |= card_inserted;
+		pcr->card_exist &= ~card_removed;
+	}
+
+	mutex_unlock(&pcr->pcr_mutex);
+
+	if ((card_detect & SD_EXIST) && pcr->slots[RTSX_SD_CARD].card_event)
+		pcr->slots[RTSX_SD_CARD].card_event(
+				pcr->slots[RTSX_SD_CARD].p_dev);
+	if ((card_detect & MS_EXIST) && pcr->slots[RTSX_MS_CARD].card_event)
+		pcr->slots[RTSX_MS_CARD].card_event(
+				pcr->slots[RTSX_MS_CARD].p_dev);
+}
+
+static void rtsx_pci_process_ocp(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->process_ocp)
+		pcr->ops->process_ocp(pcr);
+}
+
+static int rtsx_pci_process_ocp_interrupt(struct rtsx_pcr *pcr)
+{
+	if (pcr->option.ocp_en)
+		rtsx_pci_process_ocp(pcr);
+
+	return 0;
+}
+
+static irqreturn_t rtsx_pci_isr(int irq, void *dev_id)
+{
+	struct rtsx_pcr *pcr = dev_id;
+	u32 int_reg;
+
+	if (!pcr)
+		return IRQ_NONE;
+
+	spin_lock(&pcr->lock);
+
+	int_reg = rtsx_pci_readl(pcr, RTSX_BIPR);
+	/* Clear interrupt flag */
+	rtsx_pci_writel(pcr, RTSX_BIPR, int_reg);
+	if ((int_reg & pcr->bier) == 0) {
+		spin_unlock(&pcr->lock);
+		return IRQ_NONE;
+	}
+	if (int_reg == 0xFFFFFFFF) {
+		spin_unlock(&pcr->lock);
+		return IRQ_HANDLED;
+	}
+
+	int_reg &= (pcr->bier | 0x7FFFFF);
+
+	if (int_reg & SD_OC_INT)
+		rtsx_pci_process_ocp_interrupt(pcr);
+
+	if (int_reg & SD_INT) {
+		if (int_reg & SD_EXIST) {
+			pcr->card_inserted |= SD_EXIST;
+		} else {
+			pcr->card_removed |= SD_EXIST;
+			pcr->card_inserted &= ~SD_EXIST;
+		}
+		pcr->dma_error_count = 0;
+	}
+
+	if (int_reg & MS_INT) {
+		if (int_reg & MS_EXIST) {
+			pcr->card_inserted |= MS_EXIST;
+		} else {
+			pcr->card_removed |= MS_EXIST;
+			pcr->card_inserted &= ~MS_EXIST;
+		}
+	}
+
+	if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) {
+		if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) {
+			pcr->trans_result = TRANS_RESULT_FAIL;
+			if (pcr->done)
+				complete(pcr->done);
+		} else if (int_reg & TRANS_OK_INT) {
+			pcr->trans_result = TRANS_RESULT_OK;
+			if (pcr->done)
+				complete(pcr->done);
+		}
+	}
+
+	if (pcr->card_inserted || pcr->card_removed)
+		schedule_delayed_work(&pcr->carddet_work,
+				msecs_to_jiffies(200));
+
+	spin_unlock(&pcr->lock);
+	return IRQ_HANDLED;
+}
+
+static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr)
+{
+	pcr_dbg(pcr, "%s: pcr->msi_en = %d, pci->irq = %d\n",
+			__func__, pcr->msi_en, pcr->pci->irq);
+
+	if (request_irq(pcr->pci->irq, rtsx_pci_isr,
+			pcr->msi_en ? 0 : IRQF_SHARED,
+			DRV_NAME_RTSX_PCI, pcr)) {
+		dev_err(&(pcr->pci->dev),
+			"rtsx_sdmmc: unable to grab IRQ %d, disabling device\n",
+			pcr->pci->irq);
+		return -1;
+	}
+
+	pcr->irq = pcr->pci->irq;
+	pci_intx(pcr->pci, !pcr->msi_en);
+
+	return 0;
+}
+
+static void rtsx_enable_aspm(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->set_aspm)
+		pcr->ops->set_aspm(pcr, true);
+	else
+		rtsx_comm_set_aspm(pcr, true);
+}
+
+static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ltr_enabled) {
+		u32 latency = option->ltr_l1off_latency;
+
+		if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN))
+			mdelay(option->l1_snooze_delay);
+
+		rtsx_set_ltr_latency(pcr, latency);
+	}
+
+	if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN))
+		rtsx_set_l1off_sub_cfg_d0(pcr, 0);
+
+	rtsx_enable_aspm(pcr);
+}
+
+static void rtsx_pm_power_saving(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->power_saving)
+		pcr->ops->power_saving(pcr);
+	else
+		rtsx_comm_pm_power_saving(pcr);
+}
+
+static void rtsx_pci_idle_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work);
+
+	pcr_dbg(pcr, "--> %s\n", __func__);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	pcr->state = PDEV_STAT_IDLE;
+
+	if (pcr->ops->disable_auto_blink)
+		pcr->ops->disable_auto_blink(pcr);
+	if (pcr->ops->turn_off_led)
+		pcr->ops->turn_off_led(pcr);
+
+	rtsx_pm_power_saving(pcr);
+
+	mutex_unlock(&pcr->pcr_mutex);
+}
+
+#ifdef CONFIG_PM
+static void rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state)
+{
+	if (pcr->ops->turn_off_led)
+		pcr->ops->turn_off_led(pcr);
+
+	rtsx_pci_writel(pcr, RTSX_BIER, 0);
+	pcr->bier = 0;
+
+	rtsx_pci_write_register(pcr, PETXCFG, 0x08, 0x08);
+	rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, pm_state);
+
+	if (pcr->ops->force_power_down)
+		pcr->ops->force_power_down(pcr, pm_state);
+}
+#endif
+
+void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 val = SD_OCP_INT_EN | SD_DETECT_EN;
+
+	if (pcr->ops->enable_ocp)
+		pcr->ops->enable_ocp(pcr);
+	else
+		rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val);
+
+}
+
+void rtsx_pci_disable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 mask = SD_OCP_INT_EN | SD_DETECT_EN;
+
+	if (pcr->ops->disable_ocp)
+		pcr->ops->disable_ocp(pcr);
+	else
+		rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+}
+
+void rtsx_pci_init_ocp(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->init_ocp) {
+		pcr->ops->init_ocp(pcr);
+	} else {
+		struct rtsx_cr_option *option = &(pcr->option);
+
+		if (option->ocp_en) {
+			u8 val = option->sd_400mA_ocp_thd;
+
+			rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, 0);
+			rtsx_pci_write_register(pcr, REG_OCPPARA1,
+				SD_OCP_TIME_MASK, SD_OCP_TIME_800);
+			rtsx_pci_write_register(pcr, REG_OCPPARA2,
+				SD_OCP_THD_MASK, val);
+			rtsx_pci_write_register(pcr, REG_OCPGLITCH,
+				SD_OCP_GLITCH_MASK, pcr->hw_param.ocp_glitch);
+			rtsx_pci_enable_ocp(pcr);
+		} else {
+			/* OC power down */
+			rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN,
+				OC_POWER_DOWN);
+		}
+	}
+}
+
+int rtsx_pci_get_ocpstat(struct rtsx_pcr *pcr, u8 *val)
+{
+	if (pcr->ops->get_ocpstat)
+		return pcr->ops->get_ocpstat(pcr, val);
+	else
+		return rtsx_pci_read_register(pcr, REG_OCPSTAT, val);
+}
+
+void rtsx_pci_clear_ocpstat(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->clear_ocpstat) {
+		pcr->ops->clear_ocpstat(pcr);
+	} else {
+		u8 mask = SD_OCP_INT_CLR | SD_OC_CLR;
+		u8 val = SD_OCP_INT_CLR | SD_OC_CLR;
+
+		rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val);
+		rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+	}
+}
+
+int rtsx_sd_power_off_card3v3(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_write_register(pcr, CARD_CLK_EN, SD_CLK_EN |
+		MS_CLK_EN | SD40_CLK_EN, 0);
+	rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0);
+
+	rtsx_pci_card_power_off(pcr, RTSX_SD_CARD);
+
+	msleep(50);
+
+	rtsx_pci_card_pull_ctl_disable(pcr, RTSX_SD_CARD);
+
+	return 0;
+}
+
+int rtsx_ms_power_off_card3v3(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_write_register(pcr, CARD_CLK_EN, SD_CLK_EN |
+		MS_CLK_EN | SD40_CLK_EN, 0);
+
+	rtsx_pci_card_pull_ctl_disable(pcr, RTSX_MS_CARD);
+
+	rtsx_pci_write_register(pcr, CARD_OE, MS_OUTPUT_EN, 0);
+	rtsx_pci_card_power_off(pcr, RTSX_MS_CARD);
+
+	return 0;
+}
+
+static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	pcr->pcie_cap = pci_find_capability(pcr->pci, PCI_CAP_ID_EXP);
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	rtsx_pci_enable_bus_int(pcr);
+
+	/* Power on SSC */
+	err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC power stable */
+	udelay(200);
+
+	rtsx_pci_disable_aspm(pcr);
+	if (pcr->ops->optimize_phy) {
+		err = pcr->ops->optimize_phy(pcr);
+		if (err < 0)
+			return err;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+
+	/* Set mcu_cnt to 7 to ensure data can be sampled properly */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, 0x07, 0x07);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, HOST_SLEEP_STATE, 0x03, 0x00);
+	/* Disable card clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_CLK_EN, 0x1E, 0);
+	/* Reset delink mode */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x0A, 0);
+	/* Card driving select */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_DRIVE_SEL,
+			0xFF, pcr->card_drive_sel);
+	/* Enable SSC Clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1,
+			0xFF, SSC_8X_EN | SSC_SEL_4M);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12);
+	/* Disable cd_pwr_save */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10);
+	/* Clear Link Ready Interrupt */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0,
+			LINK_RDY_INT, LINK_RDY_INT);
+	/* Enlarge the estimation window of PERST# glitch
+	 * to reduce the chance of invalid card interrupt
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PERST_GLITCH_WIDTH, 0xFF, 0x80);
+	/* Update RC oscillator to 400k
+	 * bit[0] F_HIGH: for RC oscillator, Rst_value is 1'b1
+	 *                1: 2M  0: 400k
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RCCTL, 0x01, 0x00);
+	/* Set interrupt write clear
+	 * bit 1: U_elbi_if_rd_clr_en
+	 *	1: Enable ELBI interrupt[31:22] & [7:0] flag read clear
+	 *	0: ELBI interrupt flag[31:22] & [7:0] only can be write clear
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, NFTS_TX_CTRL, 0x02, 0);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	switch (PCI_PID(pcr)) {
+	case PID_5250:
+	case PID_524A:
+	case PID_525A:
+	case PID_5260:
+		rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 1, 1);
+		break;
+	default:
+		break;
+	}
+
+	/* Enable clk_request_n to enable clock power management */
+	rtsx_pci_write_config_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL + 1, 1);
+	/* Enter L1 when host tx idle */
+	rtsx_pci_write_config_byte(pcr, 0x70F, 0x5B);
+
+	if (pcr->ops->extra_init_hw) {
+		err = pcr->ops->extra_init_hw(pcr);
+		if (err < 0)
+			return err;
+	}
+
+	/* No CD interrupt if probing driver with card inserted.
+	 * So we need to initialize pcr->card_exist here.
+	 */
+	if (pcr->ops->cd_deglitch)
+		pcr->card_exist = pcr->ops->cd_deglitch(pcr);
+	else
+		pcr->card_exist = rtsx_pci_readl(pcr, RTSX_BIPR) & CARD_EXIST;
+
+	return 0;
+}
+
+static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	spin_lock_init(&pcr->lock);
+	mutex_init(&pcr->pcr_mutex);
+
+	switch (PCI_PID(pcr)) {
+	default:
+	case 0x5209:
+		rts5209_init_params(pcr);
+		break;
+
+	case 0x5229:
+		rts5229_init_params(pcr);
+		break;
+
+	case 0x5289:
+		rtl8411_init_params(pcr);
+		break;
+
+	case 0x5227:
+		rts5227_init_params(pcr);
+		break;
+
+	case 0x522A:
+		rts522a_init_params(pcr);
+		break;
+
+	case 0x5249:
+		rts5249_init_params(pcr);
+		break;
+
+	case 0x524A:
+		rts524a_init_params(pcr);
+		break;
+
+	case 0x525A:
+		rts525a_init_params(pcr);
+		break;
+
+	case 0x5287:
+		rtl8411b_init_params(pcr);
+		break;
+
+	case 0x5286:
+		rtl8402_init_params(pcr);
+		break;
+	case 0x5260:
+		rts5260_init_params(pcr);
+		break;
+	}
+
+	pcr_dbg(pcr, "PID: 0x%04x, IC version: 0x%02x\n",
+			PCI_PID(pcr), pcr->ic_version);
+
+	pcr->slots = kcalloc(pcr->num_slots, sizeof(struct rtsx_slot),
+			GFP_KERNEL);
+	if (!pcr->slots)
+		return -ENOMEM;
+
+	if (pcr->ops->fetch_vendor_settings)
+		pcr->ops->fetch_vendor_settings(pcr);
+
+	pcr_dbg(pcr, "pcr->aspm_en = 0x%x\n", pcr->aspm_en);
+	pcr_dbg(pcr, "pcr->sd30_drive_sel_1v8 = 0x%x\n",
+			pcr->sd30_drive_sel_1v8);
+	pcr_dbg(pcr, "pcr->sd30_drive_sel_3v3 = 0x%x\n",
+			pcr->sd30_drive_sel_3v3);
+	pcr_dbg(pcr, "pcr->card_drive_sel = 0x%x\n",
+			pcr->card_drive_sel);
+	pcr_dbg(pcr, "pcr->flags = 0x%x\n", pcr->flags);
+
+	pcr->state = PDEV_STAT_IDLE;
+	err = rtsx_pci_init_hw(pcr);
+	if (err < 0) {
+		kfree(pcr->slots);
+		return err;
+	}
+
+	return 0;
+}
+
+static int rtsx_pci_probe(struct pci_dev *pcidev,
+			  const struct pci_device_id *id)
+{
+	struct rtsx_pcr *pcr;
+	struct pcr_handle *handle;
+	u32 base, len;
+	int ret, i, bar = 0;
+
+	dev_dbg(&(pcidev->dev),
+		": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n",
+		pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device,
+		(int)pcidev->revision);
+
+	ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
+	if (ret < 0)
+		return ret;
+
+	ret = pci_enable_device(pcidev);
+	if (ret)
+		return ret;
+
+	ret = pci_request_regions(pcidev, DRV_NAME_RTSX_PCI);
+	if (ret)
+		goto disable;
+
+	pcr = kzalloc(sizeof(*pcr), GFP_KERNEL);
+	if (!pcr) {
+		ret = -ENOMEM;
+		goto release_pci;
+	}
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle) {
+		ret = -ENOMEM;
+		goto free_pcr;
+	}
+	handle->pcr = pcr;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&rtsx_pci_lock);
+	ret = idr_alloc(&rtsx_pci_idr, pcr, 0, 0, GFP_NOWAIT);
+	if (ret >= 0)
+		pcr->id = ret;
+	spin_unlock(&rtsx_pci_lock);
+	idr_preload_end();
+	if (ret < 0)
+		goto free_handle;
+
+	pcr->pci = pcidev;
+	dev_set_drvdata(&pcidev->dev, handle);
+
+	if (CHK_PCI_PID(pcr, 0x525A))
+		bar = 1;
+	len = pci_resource_len(pcidev, bar);
+	base = pci_resource_start(pcidev, bar);
+	pcr->remap_addr = ioremap_nocache(base, len);
+	if (!pcr->remap_addr) {
+		ret = -ENOMEM;
+		goto free_handle;
+	}
+
+	pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev),
+			RTSX_RESV_BUF_LEN, &(pcr->rtsx_resv_buf_addr),
+			GFP_KERNEL);
+	if (pcr->rtsx_resv_buf == NULL) {
+		ret = -ENXIO;
+		goto unmap;
+	}
+	pcr->host_cmds_ptr = pcr->rtsx_resv_buf;
+	pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr;
+	pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN;
+	pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN;
+
+	pcr->card_inserted = 0;
+	pcr->card_removed = 0;
+	INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect);
+	INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work);
+
+	pcr->msi_en = msi_en;
+	if (pcr->msi_en) {
+		ret = pci_enable_msi(pcidev);
+		if (ret)
+			pcr->msi_en = false;
+	}
+
+	ret = rtsx_pci_acquire_irq(pcr);
+	if (ret < 0)
+		goto disable_msi;
+
+	pci_set_master(pcidev);
+	synchronize_irq(pcr->irq);
+
+	ret = rtsx_pci_init_chip(pcr);
+	if (ret < 0)
+		goto disable_irq;
+
+	for (i = 0; i < ARRAY_SIZE(rtsx_pcr_cells); i++) {
+		rtsx_pcr_cells[i].platform_data = handle;
+		rtsx_pcr_cells[i].pdata_size = sizeof(*handle);
+	}
+	ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells,
+			ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL);
+	if (ret < 0)
+		goto free_slots;
+
+	schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
+
+	return 0;
+
+free_slots:
+	kfree(pcr->slots);
+disable_irq:
+	free_irq(pcr->irq, (void *)pcr);
+disable_msi:
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
+	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
+			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+unmap:
+	iounmap(pcr->remap_addr);
+free_handle:
+	kfree(handle);
+free_pcr:
+	kfree(pcr);
+release_pci:
+	pci_release_regions(pcidev);
+disable:
+	pci_disable_device(pcidev);
+
+	return ret;
+}
+
+static void rtsx_pci_remove(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+
+	pcr->remove_pci = true;
+
+	/* Disable interrupts at the pcr level */
+	spin_lock_irq(&pcr->lock);
+	rtsx_pci_writel(pcr, RTSX_BIER, 0);
+	pcr->bier = 0;
+	spin_unlock_irq(&pcr->lock);
+
+	cancel_delayed_work_sync(&pcr->carddet_work);
+	cancel_delayed_work_sync(&pcr->idle_work);
+
+	mfd_remove_devices(&pcidev->dev);
+
+	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
+			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+	free_irq(pcr->irq, (void *)pcr);
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
+	iounmap(pcr->remap_addr);
+
+	pci_release_regions(pcidev);
+	pci_disable_device(pcidev);
+
+	spin_lock(&rtsx_pci_lock);
+	idr_remove(&rtsx_pci_idr, pcr->id);
+	spin_unlock(&rtsx_pci_lock);
+
+	kfree(pcr->slots);
+	kfree(pcr);
+	kfree(handle);
+
+	dev_dbg(&(pcidev->dev),
+		": Realtek PCI-E Card Reader at %s [%04x:%04x] has been removed\n",
+		pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device);
+}
+
+#ifdef CONFIG_PM
+
+static int rtsx_pci_suspend(struct pci_dev *pcidev, pm_message_t state)
+{
+	struct pcr_handle *handle;
+	struct rtsx_pcr *pcr;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	handle = pci_get_drvdata(pcidev);
+	pcr = handle->pcr;
+
+	cancel_delayed_work(&pcr->carddet_work);
+	cancel_delayed_work(&pcr->idle_work);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	rtsx_pci_power_off(pcr, HOST_ENTER_S3);
+
+	pci_save_state(pcidev);
+	pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0);
+	pci_disable_device(pcidev);
+	pci_set_power_state(pcidev, pci_choose_state(pcidev, state));
+
+	mutex_unlock(&pcr->pcr_mutex);
+	return 0;
+}
+
+static int rtsx_pci_resume(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle;
+	struct rtsx_pcr *pcr;
+	int ret = 0;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	handle = pci_get_drvdata(pcidev);
+	pcr = handle->pcr;
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	pci_set_power_state(pcidev, PCI_D0);
+	pci_restore_state(pcidev);
+	ret = pci_enable_device(pcidev);
+	if (ret)
+		goto out;
+	pci_set_master(pcidev);
+
+	ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
+	if (ret)
+		goto out;
+
+	ret = rtsx_pci_init_hw(pcr);
+	if (ret)
+		goto out;
+
+	schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
+
+out:
+	mutex_unlock(&pcr->pcr_mutex);
+	return ret;
+}
+
+static void rtsx_pci_shutdown(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle;
+	struct rtsx_pcr *pcr;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	handle = pci_get_drvdata(pcidev);
+	pcr = handle->pcr;
+	rtsx_pci_power_off(pcr, HOST_ENTER_S1);
+
+	pci_disable_device(pcidev);
+	free_irq(pcr->irq, (void *)pcr);
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
+}
+
+#else /* CONFIG_PM */
+
+#define rtsx_pci_suspend NULL
+#define rtsx_pci_resume NULL
+#define rtsx_pci_shutdown NULL
+
+#endif /* CONFIG_PM */
+
+static struct pci_driver rtsx_pci_driver = {
+	.name = DRV_NAME_RTSX_PCI,
+	.id_table = rtsx_pci_ids,
+	.probe = rtsx_pci_probe,
+	.remove = rtsx_pci_remove,
+	.suspend = rtsx_pci_suspend,
+	.resume = rtsx_pci_resume,
+	.shutdown = rtsx_pci_shutdown,
+};
+module_pci_driver(rtsx_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wei WANG <wei_wang@realsil.com.cn>");
+MODULE_DESCRIPTION("Realtek PCI-E Card Reader Driver");
diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h
new file mode 100644
index 000000000..6ea1655db
--- /dev/null
+++ b/drivers/misc/cardreader/rtsx_pcr.h
@@ -0,0 +1,113 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#ifndef __RTSX_PCR_H
+#define __RTSX_PCR_H
+
+#include <linux/rtsx_pci.h>
+
+#define MIN_DIV_N_PCR		80
+#define MAX_DIV_N_PCR		208
+
+#define RTS522A_PM_CTRL3		0xFF7E
+
+#define RTS524A_PME_FORCE_CTL		0xFF78
+#define RTS524A_PM_CTRL3		0xFF7E
+
+#define LTR_ACTIVE_LATENCY_DEF		0x883C
+#define LTR_IDLE_LATENCY_DEF		0x892C
+#define LTR_L1OFF_LATENCY_DEF		0x9003
+#define L1_SNOOZE_DELAY_DEF		1
+#define LTR_L1OFF_SSPWRGATE_5249_DEF		0xAF
+#define LTR_L1OFF_SSPWRGATE_5250_DEF		0xFF
+#define LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF	0xAC
+#define LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF	0xF8
+#define CMD_TIMEOUT_DEF		100
+#define ASPM_MASK_NEG		0xFC
+#define MASK_8_BIT_DEF		0xFF
+
+#define SSC_CLOCK_STABLE_WAIT	130
+
+int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val);
+int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val);
+
+void rts5209_init_params(struct rtsx_pcr *pcr);
+void rts5229_init_params(struct rtsx_pcr *pcr);
+void rtl8411_init_params(struct rtsx_pcr *pcr);
+void rtl8402_init_params(struct rtsx_pcr *pcr);
+void rts5227_init_params(struct rtsx_pcr *pcr);
+void rts522a_init_params(struct rtsx_pcr *pcr);
+void rts5249_init_params(struct rtsx_pcr *pcr);
+void rts524a_init_params(struct rtsx_pcr *pcr);
+void rts525a_init_params(struct rtsx_pcr *pcr);
+void rtl8411b_init_params(struct rtsx_pcr *pcr);
+void rts5260_init_params(struct rtsx_pcr *pcr);
+
+static inline u8 map_sd_drive(int idx)
+{
+	u8 sd_drive[4] = {
+		0x01,	/* Type D */
+		0x02,	/* Type C */
+		0x05,	/* Type A */
+		0x03	/* Type B */
+	};
+
+	return sd_drive[idx];
+}
+
+#define rtsx_vendor_setting_valid(reg)		(!((reg) & 0x1000000))
+#define rts5209_vendor_setting1_valid(reg)	(!((reg) & 0x80))
+#define rts5209_vendor_setting2_valid(reg)	((reg) & 0x80)
+
+#define rtsx_reg_to_aspm(reg)			(((reg) >> 28) & 0x03)
+#define rtsx_reg_to_sd30_drive_sel_1v8(reg)	(((reg) >> 26) & 0x03)
+#define rtsx_reg_to_sd30_drive_sel_3v3(reg)	(((reg) >> 5) & 0x03)
+#define rtsx_reg_to_card_drive_sel(reg)		((((reg) >> 25) & 0x01) << 6)
+#define rtsx_reg_check_reverse_socket(reg)	((reg) & 0x4000)
+#define rts5209_reg_to_aspm(reg)		(((reg) >> 5) & 0x03)
+#define rts5209_reg_check_ms_pmos(reg)		(!((reg) & 0x08))
+#define rts5209_reg_to_sd30_drive_sel_1v8(reg)	(((reg) >> 3) & 0x07)
+#define rts5209_reg_to_sd30_drive_sel_3v3(reg)	((reg) & 0x07)
+#define rts5209_reg_to_card_drive_sel(reg)	((reg) >> 8)
+#define rtl8411_reg_to_sd30_drive_sel_3v3(reg)	(((reg) >> 5) & 0x07)
+#define rtl8411b_reg_to_sd30_drive_sel_3v3(reg)	((reg) & 0x03)
+
+#define set_pull_ctrl_tables(pcr, __device)				\
+do {									\
+	pcr->sd_pull_ctl_enable_tbl  = __device##_sd_pull_ctl_enable_tbl;  \
+	pcr->sd_pull_ctl_disable_tbl = __device##_sd_pull_ctl_disable_tbl; \
+	pcr->ms_pull_ctl_enable_tbl  = __device##_ms_pull_ctl_enable_tbl;  \
+	pcr->ms_pull_ctl_disable_tbl = __device##_ms_pull_ctl_disable_tbl; \
+} while (0)
+
+/* generic operations */
+int rtsx_gops_pm_reset(struct rtsx_pcr *pcr);
+int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency);
+int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val);
+void rtsx_pci_init_ocp(struct rtsx_pcr *pcr);
+void rtsx_pci_disable_ocp(struct rtsx_pcr *pcr);
+void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr);
+int rtsx_pci_get_ocpstat(struct rtsx_pcr *pcr, u8 *val);
+void rtsx_pci_clear_ocpstat(struct rtsx_pcr *pcr);
+int rtsx_sd_power_off_card3v3(struct rtsx_pcr *pcr);
+int rtsx_ms_power_off_card3v3(struct rtsx_pcr *pcr);
+
+#endif
diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c
new file mode 100644
index 000000000..869c176d4
--- /dev/null
+++ b/drivers/misc/cardreader/rtsx_usb.c
@@ -0,0 +1,792 @@
+/* Driver for Realtek USB card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Roger Tseng <rogerable@realtek.com>
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/usb.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/rtsx_usb.h>
+
+static int polling_pipe = 1;
+module_param(polling_pipe, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(polling_pipe, "polling pipe (0: ctl, 1: bulk)");
+
+static const struct mfd_cell rtsx_usb_cells[] = {
+	[RTSX_USB_SD_CARD] = {
+		.name = "rtsx_usb_sdmmc",
+		.pdata_size = 0,
+	},
+	[RTSX_USB_MS_CARD] = {
+		.name = "rtsx_usb_ms",
+		.pdata_size = 0,
+	},
+};
+
+static void rtsx_usb_sg_timed_out(struct timer_list *t)
+{
+	struct rtsx_ucr *ucr = from_timer(ucr, t, sg_timer);
+
+	dev_dbg(&ucr->pusb_intf->dev, "%s: sg transfer timed out", __func__);
+	usb_sg_cancel(&ucr->current_sg);
+}
+
+static int rtsx_usb_bulk_transfer_sglist(struct rtsx_ucr *ucr,
+		unsigned int pipe, struct scatterlist *sg, int num_sg,
+		unsigned int length, unsigned int *act_len, int timeout)
+{
+	int ret;
+
+	dev_dbg(&ucr->pusb_intf->dev, "%s: xfer %u bytes, %d entries\n",
+			__func__, length, num_sg);
+	ret = usb_sg_init(&ucr->current_sg, ucr->pusb_dev, pipe, 0,
+			sg, num_sg, length, GFP_NOIO);
+	if (ret)
+		return ret;
+
+	ucr->sg_timer.expires = jiffies + msecs_to_jiffies(timeout);
+	add_timer(&ucr->sg_timer);
+	usb_sg_wait(&ucr->current_sg);
+	if (!del_timer_sync(&ucr->sg_timer))
+		ret = -ETIMEDOUT;
+	else
+		ret = ucr->current_sg.status;
+
+	if (act_len)
+		*act_len = ucr->current_sg.bytes;
+
+	return ret;
+}
+
+int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe,
+			      void *buf, unsigned int len, int num_sg,
+			      unsigned int *act_len, int timeout)
+{
+	if (timeout < 600)
+		timeout = 600;
+
+	if (num_sg)
+		return rtsx_usb_bulk_transfer_sglist(ucr, pipe,
+				(struct scatterlist *)buf, num_sg, len, act_len,
+				timeout);
+	else
+		return usb_bulk_msg(ucr->pusb_dev, pipe, buf, len, act_len,
+				timeout);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_transfer_data);
+
+static inline void rtsx_usb_seq_cmd_hdr(struct rtsx_ucr *ucr,
+		u16 addr, u16 len, u8 seq_type)
+{
+	rtsx_usb_cmd_hdr_tag(ucr);
+
+	ucr->cmd_buf[PACKET_TYPE] = seq_type;
+	ucr->cmd_buf[5] = (u8)(len >> 8);
+	ucr->cmd_buf[6] = (u8)len;
+	ucr->cmd_buf[8] = (u8)(addr >> 8);
+	ucr->cmd_buf[9] = (u8)addr;
+
+	if (seq_type == SEQ_WRITE)
+		ucr->cmd_buf[STAGE_FLAG] = 0;
+	else
+		ucr->cmd_buf[STAGE_FLAG] = STAGE_R;
+}
+
+static int rtsx_usb_seq_write_register(struct rtsx_ucr *ucr,
+		u16 addr, u16 len, u8 *data)
+{
+	u16 cmd_len = ALIGN(SEQ_WRITE_DATA_OFFSET + len, 4);
+
+	if (!data)
+		return -EINVAL;
+
+	if (cmd_len > IOBUF_SIZE)
+		return -EINVAL;
+
+	rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_WRITE);
+	memcpy(ucr->cmd_buf + SEQ_WRITE_DATA_OFFSET, data, len);
+
+	return rtsx_usb_transfer_data(ucr,
+			usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT),
+			ucr->cmd_buf, cmd_len, 0, NULL, 100);
+}
+
+static int rtsx_usb_seq_read_register(struct rtsx_ucr *ucr,
+		u16 addr, u16 len, u8 *data)
+{
+	int i, ret;
+	u16 rsp_len = round_down(len, 4);
+	u16 res_len = len - rsp_len;
+
+	if (!data)
+		return -EINVAL;
+
+	/* 4-byte aligned part */
+	if (rsp_len) {
+		rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_READ);
+		ret = rtsx_usb_transfer_data(ucr,
+				usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT),
+				ucr->cmd_buf, 12, 0, NULL, 100);
+		if (ret)
+			return ret;
+
+		ret = rtsx_usb_transfer_data(ucr,
+				usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN),
+				data, rsp_len, 0, NULL, 100);
+		if (ret)
+			return ret;
+	}
+
+	/* unaligned part */
+	for (i = 0; i < res_len; i++) {
+		ret = rtsx_usb_read_register(ucr, addr + rsp_len + i,
+				data + rsp_len + i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len)
+{
+	return rtsx_usb_seq_read_register(ucr, PPBUF_BASE2, (u16)buf_len, buf);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_read_ppbuf);
+
+int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len)
+{
+	return rtsx_usb_seq_write_register(ucr, PPBUF_BASE2, (u16)buf_len, buf);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_write_ppbuf);
+
+int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr,
+		u8 mask, u8 data)
+{
+	u16 value, index;
+
+	addr |= EP0_WRITE_REG_CMD << EP0_OP_SHIFT;
+	value = swab16(addr);
+	index = mask | data << 8;
+
+	return usb_control_msg(ucr->pusb_dev,
+			usb_sndctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP,
+			USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+			value, index, NULL, 0, 100);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register);
+
+int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data)
+{
+	u16 value;
+	u8 *buf;
+	int ret;
+
+	if (!data)
+		return -EINVAL;
+
+	buf = kzalloc(sizeof(u8), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT;
+	value = swab16(addr);
+
+	ret = usb_control_msg(ucr->pusb_dev,
+			usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP,
+			USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+			value, 0, buf, 1, 100);
+	*data = *buf;
+
+	kfree(buf);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register);
+
+void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, u16 reg_addr,
+		u8 mask, u8 data)
+{
+	int i;
+
+	if (ucr->cmd_idx < (IOBUF_SIZE - CMD_OFFSET) / 4) {
+		i = CMD_OFFSET + ucr->cmd_idx * 4;
+
+		ucr->cmd_buf[i++] = ((cmd_type & 0x03) << 6) |
+			(u8)((reg_addr >> 8) & 0x3F);
+		ucr->cmd_buf[i++] = (u8)reg_addr;
+		ucr->cmd_buf[i++] = mask;
+		ucr->cmd_buf[i++] = data;
+
+		ucr->cmd_idx++;
+	}
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_add_cmd);
+
+int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout)
+{
+	int ret;
+
+	ucr->cmd_buf[CNT_H] = (u8)(ucr->cmd_idx >> 8);
+	ucr->cmd_buf[CNT_L] = (u8)(ucr->cmd_idx);
+	ucr->cmd_buf[STAGE_FLAG] = flag;
+
+	ret = rtsx_usb_transfer_data(ucr,
+			usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT),
+			ucr->cmd_buf, ucr->cmd_idx * 4 + CMD_OFFSET,
+			0, NULL, timeout);
+	if (ret) {
+		rtsx_usb_clear_fsm_err(ucr);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_send_cmd);
+
+int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout)
+{
+	if (rsp_len <= 0)
+		return -EINVAL;
+
+	rsp_len = ALIGN(rsp_len, 4);
+
+	return rtsx_usb_transfer_data(ucr,
+			usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN),
+			ucr->rsp_buf, rsp_len, 0, NULL, timeout);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_get_rsp);
+
+static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status)
+{
+	int ret;
+
+	rtsx_usb_init_cmd(ucr);
+	rtsx_usb_add_cmd(ucr, READ_REG_CMD, CARD_EXIST, 0x00, 0x00);
+	rtsx_usb_add_cmd(ucr, READ_REG_CMD, OCPSTAT, 0x00, 0x00);
+	ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100);
+	if (ret)
+		return ret;
+
+	ret = rtsx_usb_get_rsp(ucr, 2, 100);
+	if (ret)
+		return ret;
+
+	*status = ((ucr->rsp_buf[0] >> 2) & 0x0f) |
+		  ((ucr->rsp_buf[1] & 0x03) << 4);
+
+	return 0;
+}
+
+int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status)
+{
+	int ret;
+	u16 *buf;
+
+	if (!status)
+		return -EINVAL;
+
+	if (polling_pipe == 0) {
+		buf = kzalloc(sizeof(u16), GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+
+		ret = usb_control_msg(ucr->pusb_dev,
+				usb_rcvctrlpipe(ucr->pusb_dev, 0),
+				RTSX_USB_REQ_POLL,
+				USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+				0, 0, buf, 2, 100);
+		*status = *buf;
+
+		kfree(buf);
+	} else {
+		ret = rtsx_usb_get_status_with_bulk(ucr, status);
+	}
+
+	/* usb_control_msg may return positive when success */
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_get_card_status);
+
+static int rtsx_usb_write_phy_register(struct rtsx_ucr *ucr, u8 addr, u8 val)
+{
+	dev_dbg(&ucr->pusb_intf->dev, "Write 0x%x to phy register 0x%x\n",
+			val, addr);
+
+	rtsx_usb_init_cmd(ucr);
+
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VSTAIN, 0xFF, val);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, 0xFF, addr & 0x0F);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL,
+			0xFF, (addr >> 4) & 0x0F);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01);
+
+	return rtsx_usb_send_cmd(ucr, MODE_C, 100);
+}
+
+int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, u8 data)
+{
+	rtsx_usb_init_cmd(ucr);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, addr, mask, data);
+	return rtsx_usb_send_cmd(ucr, MODE_C, 100);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_write_register);
+
+int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data)
+{
+	int ret;
+
+	if (data != NULL)
+		*data = 0;
+
+	rtsx_usb_init_cmd(ucr);
+	rtsx_usb_add_cmd(ucr, READ_REG_CMD, addr, 0, 0);
+	ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100);
+	if (ret)
+		return ret;
+
+	ret = rtsx_usb_get_rsp(ucr, 1, 100);
+	if (ret)
+		return ret;
+
+	if (data != NULL)
+		*data = ucr->rsp_buf[0];
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_read_register);
+
+static inline u8 double_ssc_depth(u8 depth)
+{
+	return (depth > 1) ? (depth - 1) : depth;
+}
+
+static u8 revise_ssc_depth(u8 ssc_depth, u8 div)
+{
+	if (div > CLK_DIV_1) {
+		if (ssc_depth > div - 1)
+			ssc_depth -= (div - 1);
+		else
+			ssc_depth = SSC_DEPTH_2M;
+	}
+
+	return ssc_depth;
+}
+
+int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk)
+{
+	int ret;
+	u8 n, clk_divider, mcu_cnt, div;
+
+	if (!card_clock) {
+		ucr->cur_clk = 0;
+		return 0;
+	}
+
+	if (initial_mode) {
+		/* We use 250k(around) here, in initial stage */
+		clk_divider = SD_CLK_DIVIDE_128;
+		card_clock = 30000000;
+	} else {
+		clk_divider = SD_CLK_DIVIDE_0;
+	}
+
+	ret = rtsx_usb_write_register(ucr, SD_CFG1,
+			SD_CLK_DIVIDE_MASK, clk_divider);
+	if (ret < 0)
+		return ret;
+
+	card_clock /= 1000000;
+	dev_dbg(&ucr->pusb_intf->dev,
+			"Switch card clock to %dMHz\n", card_clock);
+
+	if (!initial_mode && double_clk)
+		card_clock *= 2;
+	dev_dbg(&ucr->pusb_intf->dev,
+			"Internal SSC clock: %dMHz (cur_clk = %d)\n",
+			card_clock, ucr->cur_clk);
+
+	if (card_clock == ucr->cur_clk)
+		return 0;
+
+	/* Converting clock value into internal settings: n and div */
+	n = card_clock - 2;
+	if ((card_clock <= 2) || (n > MAX_DIV_N))
+		return -EINVAL;
+
+	mcu_cnt = 60/card_clock + 3;
+	if (mcu_cnt > 15)
+		mcu_cnt = 15;
+
+	/* Make sure that the SSC clock div_n is not less than MIN_DIV_N */
+
+	div = CLK_DIV_1;
+	while (n < MIN_DIV_N && div < CLK_DIV_4) {
+		n = (n + 2) * 2 - 2;
+		div++;
+	}
+	dev_dbg(&ucr->pusb_intf->dev, "n = %d, div = %d\n", n, div);
+
+	if (double_clk)
+		ssc_depth = double_ssc_depth(ssc_depth);
+
+	ssc_depth = revise_ssc_depth(ssc_depth, div);
+	dev_dbg(&ucr->pusb_intf->dev, "ssc_depth = %d\n", ssc_depth);
+
+	rtsx_usb_init_cmd(ucr);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, CLK_CHANGE, CLK_CHANGE);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV,
+			0x3F, (div << 4) | mcu_cnt);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL2,
+			SSC_DEPTH_MASK, ssc_depth);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB);
+	if (vpclk) {
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+	}
+
+	ret = rtsx_usb_send_cmd(ucr, MODE_C, 2000);
+	if (ret < 0)
+		return ret;
+
+	ret = rtsx_usb_write_register(ucr, SSC_CTL1, 0xff,
+			SSC_RSTB | SSC_8X_EN | SSC_SEL_4M);
+	if (ret < 0)
+		return ret;
+
+	/* Wait SSC clock stable */
+	usleep_range(100, 1000);
+
+	ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0);
+	if (ret < 0)
+		return ret;
+
+	ucr->cur_clk = card_clock;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_switch_clock);
+
+int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card)
+{
+	int ret;
+	u16 val;
+	u16 cd_mask[] = {
+		[RTSX_USB_SD_CARD] = (CD_MASK & ~SD_CD),
+		[RTSX_USB_MS_CARD] = (CD_MASK & ~MS_CD)
+	};
+
+	ret = rtsx_usb_get_card_status(ucr, &val);
+	/*
+	 * If get status fails, return 0 (ok) for the exclusive check
+	 * and let the flow fail at somewhere else.
+	 */
+	if (ret)
+		return 0;
+
+	if (val & cd_mask[card])
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_card_exclusive_check);
+
+static int rtsx_usb_reset_chip(struct rtsx_ucr *ucr)
+{
+	int ret;
+	u8 val;
+
+	rtsx_usb_init_cmd(ucr);
+
+	if (CHECK_PKG(ucr, LQFP48)) {
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL,
+				LDO3318_PWR_MASK, LDO_SUSPEND);
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL,
+				FORCE_LDO_POWERB, FORCE_LDO_POWERB);
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL1,
+				0x30, 0x10);
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL5,
+				0x03, 0x01);
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL6,
+				0x0C, 0x04);
+	}
+
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SYS_DUMMY0, NYET_MSAK, NYET_EN);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CD_DEGLITCH_WIDTH, 0xFF, 0x08);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD,
+			CD_DEGLITCH_EN, XD_CD_DEGLITCH_EN, 0x0);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			SD30_DRIVE_MASK, DRIVER_TYPE_D);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD,
+			CARD_DRIVE_SEL, SD20_DRIVE_MASK, 0x0);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, LDO_POWER_CFG, 0xE0, 0x0);
+
+	if (ucr->is_rts5179)
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD,
+				CARD_PULL_CTL5, 0x03, 0x01);
+
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_DMA1_CTL,
+		       EXTEND_DMA1_ASYNC_SIGNAL, EXTEND_DMA1_ASYNC_SIGNAL);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_INT_PEND,
+			XD_INT | MS_INT | SD_INT,
+			XD_INT | MS_INT | SD_INT);
+
+	ret = rtsx_usb_send_cmd(ucr, MODE_C, 100);
+	if (ret)
+		return ret;
+
+	/* config non-crystal mode */
+	rtsx_usb_read_register(ucr, CFG_MODE, &val);
+	if ((val & XTAL_FREE) || ((val & CLK_MODE_MASK) == CLK_MODE_NON_XTAL)) {
+		ret = rtsx_usb_write_phy_register(ucr, 0xC2, 0x7C);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int rtsx_usb_init_chip(struct rtsx_ucr *ucr)
+{
+	int ret;
+	u8 val;
+
+	rtsx_usb_clear_fsm_err(ucr);
+
+	/* power on SSC */
+	ret = rtsx_usb_write_register(ucr,
+			FPDCTL, SSC_POWER_MASK, SSC_POWER_ON);
+	if (ret)
+		return ret;
+
+	usleep_range(100, 1000);
+	ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0x00);
+	if (ret)
+		return ret;
+
+	/* determine IC version */
+	ret = rtsx_usb_read_register(ucr, HW_VERSION, &val);
+	if (ret)
+		return ret;
+
+	ucr->ic_version = val & HW_VER_MASK;
+
+	/* determine package */
+	ret = rtsx_usb_read_register(ucr, CARD_SHARE_MODE, &val);
+	if (ret)
+		return ret;
+
+	if (val & CARD_SHARE_LQFP_SEL) {
+		ucr->package = LQFP48;
+		dev_dbg(&ucr->pusb_intf->dev, "Package: LQFP48\n");
+	} else {
+		ucr->package = QFN24;
+		dev_dbg(&ucr->pusb_intf->dev, "Package: QFN24\n");
+	}
+
+	/* determine IC variations */
+	rtsx_usb_read_register(ucr, CFG_MODE_1, &val);
+	if (val & RTS5179) {
+		ucr->is_rts5179 = true;
+		dev_dbg(&ucr->pusb_intf->dev, "Device is rts5179\n");
+	} else {
+		ucr->is_rts5179 = false;
+	}
+
+	return rtsx_usb_reset_chip(ucr);
+}
+
+static int rtsx_usb_probe(struct usb_interface *intf,
+			 const struct usb_device_id *id)
+{
+	struct usb_device *usb_dev = interface_to_usbdev(intf);
+	struct rtsx_ucr *ucr;
+	int ret;
+
+	dev_dbg(&intf->dev,
+		": Realtek USB Card Reader found at bus %03d address %03d\n",
+		 usb_dev->bus->busnum, usb_dev->devnum);
+
+	ucr = devm_kzalloc(&intf->dev, sizeof(*ucr), GFP_KERNEL);
+	if (!ucr)
+		return -ENOMEM;
+
+	ucr->pusb_dev = usb_dev;
+
+	ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE,
+			GFP_KERNEL, &ucr->iobuf_dma);
+	if (!ucr->iobuf)
+		return -ENOMEM;
+
+	usb_set_intfdata(intf, ucr);
+
+	ucr->vendor_id = id->idVendor;
+	ucr->product_id = id->idProduct;
+	ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf;
+
+	mutex_init(&ucr->dev_mutex);
+
+	ucr->pusb_intf = intf;
+
+	/* initialize */
+	ret = rtsx_usb_init_chip(ucr);
+	if (ret)
+		goto out_init_fail;
+
+	/* initialize USB SG transfer timer */
+	timer_setup(&ucr->sg_timer, rtsx_usb_sg_timed_out, 0);
+
+	ret = mfd_add_hotplug_devices(&intf->dev, rtsx_usb_cells,
+				      ARRAY_SIZE(rtsx_usb_cells));
+	if (ret)
+		goto out_init_fail;
+
+#ifdef CONFIG_PM
+	intf->needs_remote_wakeup = 1;
+	usb_enable_autosuspend(usb_dev);
+#endif
+
+	return 0;
+
+out_init_fail:
+	usb_set_intfdata(ucr->pusb_intf, NULL);
+	usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf,
+			ucr->iobuf_dma);
+	return ret;
+}
+
+static void rtsx_usb_disconnect(struct usb_interface *intf)
+{
+	struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf);
+
+	dev_dbg(&intf->dev, "%s called\n", __func__);
+
+	mfd_remove_devices(&intf->dev);
+
+	usb_set_intfdata(ucr->pusb_intf, NULL);
+	usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf,
+			ucr->iobuf_dma);
+}
+
+#ifdef CONFIG_PM
+static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message)
+{
+	struct rtsx_ucr *ucr =
+		(struct rtsx_ucr *)usb_get_intfdata(intf);
+	u16 val = 0;
+
+	dev_dbg(&intf->dev, "%s called with pm message 0x%04x\n",
+			__func__, message.event);
+
+	if (PMSG_IS_AUTO(message)) {
+		if (mutex_trylock(&ucr->dev_mutex)) {
+			rtsx_usb_get_card_status(ucr, &val);
+			mutex_unlock(&ucr->dev_mutex);
+
+			/* Defer the autosuspend if card exists */
+			if (val & (SD_CD | MS_CD))
+				return -EAGAIN;
+		} else {
+			/* There is an ongoing operation*/
+			return -EAGAIN;
+		}
+	}
+
+	return 0;
+}
+
+static int rtsx_usb_resume(struct usb_interface *intf)
+{
+	return 0;
+}
+
+static int rtsx_usb_reset_resume(struct usb_interface *intf)
+{
+	struct rtsx_ucr *ucr =
+		(struct rtsx_ucr *)usb_get_intfdata(intf);
+
+	rtsx_usb_reset_chip(ucr);
+	return 0;
+}
+
+#else /* CONFIG_PM */
+
+#define rtsx_usb_suspend NULL
+#define rtsx_usb_resume NULL
+#define rtsx_usb_reset_resume NULL
+
+#endif /* CONFIG_PM */
+
+
+static int rtsx_usb_pre_reset(struct usb_interface *intf)
+{
+	struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf);
+
+	mutex_lock(&ucr->dev_mutex);
+	return 0;
+}
+
+static int rtsx_usb_post_reset(struct usb_interface *intf)
+{
+	struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf);
+
+	mutex_unlock(&ucr->dev_mutex);
+	return 0;
+}
+
+static struct usb_device_id rtsx_usb_usb_ids[] = {
+	{ USB_DEVICE(0x0BDA, 0x0129) },
+	{ USB_DEVICE(0x0BDA, 0x0139) },
+	{ USB_DEVICE(0x0BDA, 0x0140) },
+	{ }
+};
+MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids);
+
+static struct usb_driver rtsx_usb_driver = {
+	.name			= "rtsx_usb",
+	.probe			= rtsx_usb_probe,
+	.disconnect		= rtsx_usb_disconnect,
+	.suspend		= rtsx_usb_suspend,
+	.resume			= rtsx_usb_resume,
+	.reset_resume		= rtsx_usb_reset_resume,
+	.pre_reset		= rtsx_usb_pre_reset,
+	.post_reset		= rtsx_usb_post_reset,
+	.id_table		= rtsx_usb_usb_ids,
+	.supports_autosuspend	= 1,
+	.soft_unbind		= 1,
+};
+
+module_usb_driver(rtsx_usb_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Roger Tseng <rogerable@realtek.com>");
+MODULE_DESCRIPTION("Realtek USB Card Reader Driver");
diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig
new file mode 100644
index 000000000..22429b8b1
--- /dev/null
+++ b/drivers/misc/cb710/Kconfig
@@ -0,0 +1,25 @@
+config CB710_CORE
+	tristate "ENE CB710/720 Flash memory card reader support"
+	depends on PCI
+	help
+	  This option enables support for PCI ENE CB710/720 Flash memory card
+	  reader found in some laptops (ie. some versions of HP Compaq nx9500).
+
+	  You will also have to select some flash card format drivers (MMC/SD,
+	  MemoryStick).
+
+	  This driver can also be built as a module. If so, the module
+	  will be called cb710.
+
+config CB710_DEBUG
+	bool "Enable driver debugging"
+	depends on CB710_CORE != n
+	default n
+	help
+	  This is an option for use by developers; most people should
+	  say N here.  This adds a lot of debugging output to dmesg.
+
+config CB710_DEBUG_ASSUMPTIONS
+	bool
+	depends on CB710_CORE != n
+	default y
diff --git a/drivers/misc/cb710/Makefile b/drivers/misc/cb710/Makefile
new file mode 100644
index 000000000..467c8e9ca
--- /dev/null
+++ b/drivers/misc/cb710/Makefile
@@ -0,0 +1,6 @@
+ccflags-$(CONFIG_CB710_DEBUG)	:= -DDEBUG
+
+obj-$(CONFIG_CB710_CORE)	+= cb710.o
+
+cb710-y				:= core.o sgbuf2.o
+cb710-$(CONFIG_CB710_DEBUG)	+= debug.o
diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c
new file mode 100644
index 000000000..2c43fd09d
--- /dev/null
+++ b/drivers/misc/cb710/core.c
@@ -0,0 +1,346 @@
+/*
+ *  cb710/core.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/cb710.h>
+#include <linux/gfp.h>
+
+static DEFINE_IDA(cb710_ida);
+
+void cb710_pci_update_config_reg(struct pci_dev *pdev,
+	int reg, uint32_t mask, uint32_t xor)
+{
+	u32 rval;
+
+	pci_read_config_dword(pdev, reg, &rval);
+	rval = (rval & mask) ^ xor;
+	pci_write_config_dword(pdev, reg, rval);
+}
+EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg);
+
+/* Some magic writes based on Windows driver init code */
+static int cb710_pci_configure(struct pci_dev *pdev)
+{
+	unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	struct pci_dev *pdev0;
+	u32 val;
+
+	cb710_pci_update_config_reg(pdev, 0x48,
+		~0x000000FF, 0x0000003F);
+
+	pci_read_config_dword(pdev, 0x48, &val);
+	if (val & 0x80000000)
+		return 0;
+
+	pdev0 = pci_get_slot(pdev->bus, devfn);
+	if (!pdev0)
+		return -ENODEV;
+
+	if (pdev0->vendor == PCI_VENDOR_ID_ENE
+	    && pdev0->device == PCI_DEVICE_ID_ENE_720) {
+		cb710_pci_update_config_reg(pdev0, 0x8C,
+			~0x00F00000, 0x00100000);
+		cb710_pci_update_config_reg(pdev0, 0xB0,
+			~0x08000000, 0x08000000);
+	}
+
+	cb710_pci_update_config_reg(pdev0, 0x8C,
+		~0x00000F00, 0x00000200);
+	cb710_pci_update_config_reg(pdev0, 0x90,
+		~0x00060000, 0x00040000);
+
+	pci_dev_put(pdev0);
+
+	return 0;
+}
+
+static irqreturn_t cb710_irq_handler(int irq, void *data)
+{
+	struct cb710_chip *chip = data;
+	struct cb710_slot *slot = &chip->slot[0];
+	irqreturn_t handled = IRQ_NONE;
+	unsigned nr;
+
+	spin_lock(&chip->irq_lock); /* incl. smp_rmb() */
+
+	for (nr = chip->slots; nr; ++slot, --nr) {
+		cb710_irq_handler_t handler_func = slot->irq_handler;
+		if (handler_func && handler_func(slot))
+			handled = IRQ_HANDLED;
+	}
+
+	spin_unlock(&chip->irq_lock);
+
+	return handled;
+}
+
+static void cb710_release_slot(struct device *dev)
+{
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	struct cb710_slot *slot = cb710_pdev_to_slot(to_platform_device(dev));
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+
+	/* slot struct can be freed now */
+	atomic_dec(&chip->slot_refs_count);
+#endif
+}
+
+static int cb710_register_slot(struct cb710_chip *chip,
+	unsigned slot_mask, unsigned io_offset, const char *name)
+{
+	int nr = chip->slots;
+	struct cb710_slot *slot = &chip->slot[nr];
+	int err;
+
+	dev_dbg(cb710_chip_dev(chip),
+		"register: %s.%d; slot %d; mask %d; IO offset: 0x%02X\n",
+		name, chip->platform_id, nr, slot_mask, io_offset);
+
+	/* slot->irq_handler == NULL here; this needs to be
+	 * seen before platform_device_register() */
+	++chip->slots;
+	smp_wmb();
+
+	slot->iobase = chip->iobase + io_offset;
+	slot->pdev.name = name;
+	slot->pdev.id = chip->platform_id;
+	slot->pdev.dev.parent = &chip->pdev->dev;
+	slot->pdev.dev.release = cb710_release_slot;
+
+	err = platform_device_register(&slot->pdev);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	atomic_inc(&chip->slot_refs_count);
+#endif
+
+	if (err) {
+		/* device_initialize() called from platform_device_register()
+		 * wants this on error path */
+		platform_device_put(&slot->pdev);
+
+		/* slot->irq_handler == NULL here anyway, so no lock needed */
+		--chip->slots;
+		return err;
+	}
+
+	chip->slot_mask |= slot_mask;
+
+	return 0;
+}
+
+static void cb710_unregister_slot(struct cb710_chip *chip,
+	unsigned slot_mask)
+{
+	int nr = chip->slots - 1;
+
+	if (!(chip->slot_mask & slot_mask))
+		return;
+
+	platform_device_unregister(&chip->slot[nr].pdev);
+
+	/* complementary to spin_unlock() in cb710_set_irq_handler() */
+	smp_rmb();
+	BUG_ON(chip->slot[nr].irq_handler != NULL);
+
+	/* slot->irq_handler == NULL here, so no lock needed */
+	--chip->slots;
+	chip->slot_mask &= ~slot_mask;
+}
+
+void cb710_set_irq_handler(struct cb710_slot *slot,
+	cb710_irq_handler_t handler)
+{
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->irq_lock, flags);
+	slot->irq_handler = handler;
+	spin_unlock_irqrestore(&chip->irq_lock, flags);
+}
+EXPORT_SYMBOL_GPL(cb710_set_irq_handler);
+
+#ifdef CONFIG_PM
+
+static int cb710_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+
+	devm_free_irq(&pdev->dev, pdev->irq, chip);
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	if (state.event & PM_EVENT_SLEEP)
+		pci_set_power_state(pdev, PCI_D3hot);
+	return 0;
+}
+
+static int cb710_resume(struct pci_dev *pdev)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+	int err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	return devm_request_irq(&pdev->dev, pdev->irq,
+		cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+}
+
+#endif /* CONFIG_PM */
+
+static int cb710_probe(struct pci_dev *pdev,
+	const struct pci_device_id *ent)
+{
+	struct cb710_chip *chip;
+	u32 val;
+	int err;
+	int n = 0;
+
+	err = cb710_pci_configure(pdev);
+	if (err)
+		return err;
+
+	/* this is actually magic... */
+	pci_read_config_dword(pdev, 0x48, &val);
+	if (!(val & 0x80000000)) {
+		pci_write_config_dword(pdev, 0x48, val|0x71000000);
+		pci_read_config_dword(pdev, 0x48, &val);
+	}
+
+	dev_dbg(&pdev->dev, "PCI config[0x48] = 0x%08X\n", val);
+	if (!(val & 0x70000000))
+		return -ENODEV;
+	val = (val >> 28) & 7;
+	if (val & CB710_SLOT_MMC)
+		++n;
+	if (val & CB710_SLOT_MS)
+		++n;
+	if (val & CB710_SLOT_SM)
+		++n;
+
+	chip = devm_kzalloc(&pdev->dev, struct_size(chip, slot, n),
+			    GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, 0x0001, KBUILD_MODNAME);
+	if (err)
+		return err;
+
+	spin_lock_init(&chip->irq_lock);
+	chip->pdev = pdev;
+	chip->iobase = pcim_iomap_table(pdev)[0];
+
+	pci_set_drvdata(pdev, chip);
+
+	err = devm_request_irq(&pdev->dev, pdev->irq,
+		cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+	if (err)
+		return err;
+
+	err = ida_alloc(&cb710_ida, GFP_KERNEL);
+	if (err < 0)
+		return err;
+	chip->platform_id = err;
+
+	dev_info(&pdev->dev, "id %d, IO 0x%p, IRQ %d\n",
+		chip->platform_id, chip->iobase, pdev->irq);
+
+	if (val & CB710_SLOT_MMC) {	/* MMC/SD slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_MMC, 0x00, "cb710-mmc");
+		if (err)
+			return err;
+	}
+
+	if (val & CB710_SLOT_MS) {	/* MemoryStick slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_MS, 0x40, "cb710-ms");
+		if (err)
+			goto unreg_mmc;
+	}
+
+	if (val & CB710_SLOT_SM) {	/* SmartMedia slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_SM, 0x60, "cb710-sm");
+		if (err)
+			goto unreg_ms;
+	}
+
+	return 0;
+unreg_ms:
+	cb710_unregister_slot(chip, CB710_SLOT_MS);
+unreg_mmc:
+	cb710_unregister_slot(chip, CB710_SLOT_MMC);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+	return err;
+}
+
+static void cb710_remove_one(struct pci_dev *pdev)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+
+	cb710_unregister_slot(chip, CB710_SLOT_SM);
+	cb710_unregister_slot(chip, CB710_SLOT_MS);
+	cb710_unregister_slot(chip, CB710_SLOT_MMC);
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+
+	ida_free(&cb710_ida, chip->platform_id);
+}
+
+static const struct pci_device_id cb710_pci_tbl[] = {
+	{ PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_CB710_FLASH,
+		PCI_ANY_ID, PCI_ANY_ID, },
+	{ 0, }
+};
+
+static struct pci_driver cb710_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = cb710_pci_tbl,
+	.probe = cb710_probe,
+	.remove = cb710_remove_one,
+#ifdef CONFIG_PM
+	.suspend = cb710_suspend,
+	.resume = cb710_resume,
+#endif
+};
+
+static int __init cb710_init_module(void)
+{
+	return pci_register_driver(&cb710_driver);
+}
+
+static void __exit cb710_cleanup_module(void)
+{
+	pci_unregister_driver(&cb710_driver);
+	ida_destroy(&cb710_ida);
+}
+
+module_init(cb710_init_module);
+module_exit(cb710_cleanup_module);
+
+MODULE_AUTHOR("Michał Mirosław <mirq-linux@rere.qmqm.pl>");
+MODULE_DESCRIPTION("ENE CB710 memory card reader driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, cb710_pci_tbl);
diff --git a/drivers/misc/cb710/debug.c b/drivers/misc/cb710/debug.c
new file mode 100644
index 000000000..fcb3b8e30
--- /dev/null
+++ b/drivers/misc/cb710/debug.c
@@ -0,0 +1,118 @@
+/*
+ *  cb710/debug.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cb710.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#define CB710_REG_COUNT		0x80
+
+static const u16 allow[CB710_REG_COUNT/16] = {
+	0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+	0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+};
+static const char *const prefix[ARRAY_SIZE(allow)] = {
+	"MMC", "MMC", "MMC", "MMC",
+	"MS?", "MS?", "SM?", "SM?"
+};
+
+static inline int allow_reg_read(unsigned block, unsigned offset, unsigned bits)
+{
+	unsigned mask = (1 << bits/8) - 1;
+	offset *= bits/8;
+	return ((allow[block] >> offset) & mask) == mask;
+}
+
+#define CB710_READ_REGS_TEMPLATE(t)					\
+static void cb710_read_regs_##t(void __iomem *iobase,			\
+	u##t *reg, unsigned select)					\
+{									\
+	unsigned i, j;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
+		if (!(select & (1 << i)))					\
+			continue;					\
+									\
+		for (j = 0; j < 0x10/(t/8); ++j) {			\
+			if (!allow_reg_read(i, j, t))			\
+				continue;				\
+			reg[j] = ioread##t(iobase			\
+				+ (i << 4) + (j * (t/8)));		\
+		}							\
+	}								\
+}
+
+static const char cb710_regf_8[] = "%02X";
+static const char cb710_regf_16[] = "%04X";
+static const char cb710_regf_32[] = "%08X";
+static const char cb710_xes[] = "xxxxxxxx";
+
+#define CB710_DUMP_REGS_TEMPLATE(t)					\
+static void cb710_dump_regs_##t(struct device *dev,			\
+	const u##t *reg, unsigned select)				\
+{									\
+	const char *const xp = &cb710_xes[8 - t/4];			\
+	const char *const format = cb710_regf_##t;			\
+									\
+	char msg[100], *p;						\
+	unsigned i, j;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
+		if (!(select & (1 << i)))				\
+			continue;					\
+		p = msg;						\
+		for (j = 0; j < 0x10/(t/8); ++j) {			\
+			*p++ = ' ';					\
+			if (j == 8/(t/8))				\
+				*p++ = ' ';				\
+			if (allow_reg_read(i, j, t))			\
+				p += sprintf(p, format, reg[j]);	\
+			else						\
+				p += sprintf(p, "%s", xp);		\
+		}							\
+		dev_dbg(dev, "%s 0x%02X %s\n", prefix[i], i << 4, msg);	\
+	}								\
+}
+
+#define CB710_READ_AND_DUMP_REGS_TEMPLATE(t)				\
+static void cb710_read_and_dump_regs_##t(struct cb710_chip *chip,	\
+	unsigned select)						\
+{									\
+	u##t regs[CB710_REG_COUNT/sizeof(u##t)];			\
+									\
+	memset(&regs, 0, sizeof(regs));					\
+	cb710_read_regs_##t(chip->iobase, regs, select);		\
+	cb710_dump_regs_##t(cb710_chip_dev(chip), regs, select);	\
+}
+
+#define CB710_REG_ACCESS_TEMPLATES(t)		\
+  CB710_READ_REGS_TEMPLATE(t)			\
+  CB710_DUMP_REGS_TEMPLATE(t)			\
+  CB710_READ_AND_DUMP_REGS_TEMPLATE(t)
+
+CB710_REG_ACCESS_TEMPLATES(8)
+CB710_REG_ACCESS_TEMPLATES(16)
+CB710_REG_ACCESS_TEMPLATES(32)
+
+void cb710_dump_regs(struct cb710_chip *chip, unsigned select)
+{
+	if (!(select & CB710_DUMP_REGS_MASK))
+		select = CB710_DUMP_REGS_ALL;
+	if (!(select & CB710_DUMP_ACCESS_MASK))
+		select |= CB710_DUMP_ACCESS_8;
+
+	if (select & CB710_DUMP_ACCESS_32)
+		cb710_read_and_dump_regs_32(chip, select);
+	if (select & CB710_DUMP_ACCESS_16)
+		cb710_read_and_dump_regs_16(chip, select);
+	if (select & CB710_DUMP_ACCESS_8)
+		cb710_read_and_dump_regs_8(chip, select);
+}
+EXPORT_SYMBOL_GPL(cb710_dump_regs);
+
diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c
new file mode 100644
index 000000000..4d2a72a53
--- /dev/null
+++ b/drivers/misc/cb710/sgbuf2.c
@@ -0,0 +1,146 @@
+/*
+ *  cb710/sgbuf2.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cb710.h>
+
+static bool sg_dwiter_next(struct sg_mapping_iter *miter)
+{
+	if (sg_miter_next(miter)) {
+		miter->consumed = 0;
+		return true;
+	} else
+		return false;
+}
+
+static bool sg_dwiter_is_at_end(struct sg_mapping_iter *miter)
+{
+	return miter->length == miter->consumed && !sg_dwiter_next(miter);
+}
+
+static uint32_t sg_dwiter_read_buffer(struct sg_mapping_iter *miter)
+{
+	size_t len, left = 4;
+	uint32_t data;
+	void *addr = &data;
+
+	do {
+		len = min(miter->length - miter->consumed, left);
+		memcpy(addr, miter->addr + miter->consumed, len);
+		miter->consumed += len;
+		left -= len;
+		if (!left)
+			return data;
+		addr += len;
+	} while (sg_dwiter_next(miter));
+
+	memset(addr, 0, left);
+	return data;
+}
+
+static inline bool needs_unaligned_copy(const void *ptr)
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	return false;
+#else
+	return ((uintptr_t)ptr & 3) != 0;
+#endif
+}
+
+static bool sg_dwiter_get_next_block(struct sg_mapping_iter *miter, uint32_t **ptr)
+{
+	size_t len;
+
+	if (sg_dwiter_is_at_end(miter))
+		return true;
+
+	len = miter->length - miter->consumed;
+
+	if (likely(len >= 4 && !needs_unaligned_copy(
+			miter->addr + miter->consumed))) {
+		*ptr = miter->addr + miter->consumed;
+		miter->consumed += 4;
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * cb710_sg_dwiter_read_next_block() - get next 32-bit word from sg buffer
+ * @miter: sg mapping iterator used for reading
+ *
+ * Description:
+ *   Returns 32-bit word starting at byte pointed to by @miter@
+ *   handling any alignment issues.  Bytes past the buffer's end
+ *   are not accessed (read) but are returned as zeroes.  @miter@
+ *   is advanced by 4 bytes or to the end of buffer whichever is
+ *   closer.
+ *
+ * Context:
+ *   Same requirements as in sg_miter_next().
+ *
+ * Returns:
+ *   32-bit word just read.
+ */
+uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter)
+{
+	uint32_t *ptr = NULL;
+
+	if (likely(sg_dwiter_get_next_block(miter, &ptr)))
+		return ptr ? *ptr : 0;
+
+	return sg_dwiter_read_buffer(miter);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_read_next_block);
+
+static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data)
+{
+	size_t len, left = 4;
+	void *addr = &data;
+
+	do {
+		len = min(miter->length - miter->consumed, left);
+		memcpy(miter->addr, addr, len);
+		miter->consumed += len;
+		left -= len;
+		if (!left)
+			return;
+		addr += len;
+	} while (sg_dwiter_next(miter));
+}
+
+/**
+ * cb710_sg_dwiter_write_next_block() - write next 32-bit word to sg buffer
+ * @miter: sg mapping iterator used for writing
+ *
+ * Description:
+ *   Writes 32-bit word starting at byte pointed to by @miter@
+ *   handling any alignment issues.  Bytes which would be written
+ *   past the buffer's end are silently discarded. @miter@ is
+ *   advanced by 4 bytes or to the end of buffer whichever is closer.
+ *
+ * Context:
+ *   Same requirements as in sg_miter_next().
+ */
+void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data)
+{
+	uint32_t *ptr = NULL;
+
+	if (likely(sg_dwiter_get_next_block(miter, &ptr))) {
+		if (ptr)
+			*ptr = data;
+		else
+			return;
+	} else
+		sg_dwiter_write_slow(miter, data);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block);
+
diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c
new file mode 100644
index 000000000..347f08f2f
--- /dev/null
+++ b/drivers/misc/cs5535-mfgpt.c
@@ -0,0 +1,383 @@
+/*
+ * Driver for the CS5535/CS5536 Multi-Function General Purpose Timers (MFGPT)
+ *
+ * Copyright (C) 2006, Advanced Micro Devices, Inc.
+ * Copyright (C) 2007  Andres Salomon <dilinger@debian.org>
+ * Copyright (C) 2009  Andres Salomon <dilinger@collabora.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book.
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/cs5535.h>
+#include <linux/slab.h>
+
+#define DRV_NAME "cs5535-mfgpt"
+
+static int mfgpt_reset_timers;
+module_param_named(mfgptfix, mfgpt_reset_timers, int, 0644);
+MODULE_PARM_DESC(mfgptfix, "Try to reset the MFGPT timers during init; "
+		"required by some broken BIOSes (ie, TinyBIOS < 0.99) or kexec "
+		"(1 = reset the MFGPT using an undocumented bit, "
+		"2 = perform a soft reset by unconfiguring all timers); "
+		"use what works best for you.");
+
+struct cs5535_mfgpt_timer {
+	struct cs5535_mfgpt_chip *chip;
+	int nr;
+};
+
+static struct cs5535_mfgpt_chip {
+	DECLARE_BITMAP(avail, MFGPT_MAX_TIMERS);
+	resource_size_t base;
+
+	struct platform_device *pdev;
+	spinlock_t lock;
+	int initialized;
+} cs5535_mfgpt_chip;
+
+int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp,
+		int event, int enable)
+{
+	uint32_t msr, mask, value, dummy;
+	int shift = (cmp == MFGPT_CMP1) ? 0 : 8;
+
+	if (!timer) {
+		WARN_ON(1);
+		return -EIO;
+	}
+
+	/*
+	 * The register maps for these are described in sections 6.17.1.x of
+	 * the AMD Geode CS5536 Companion Device Data Book.
+	 */
+	switch (event) {
+	case MFGPT_EVENT_RESET:
+		/*
+		 * XXX: According to the docs, we cannot reset timers above
+		 * 6; that is, resets for 7 and 8 will be ignored.  Is this
+		 * a problem?   -dilinger
+		 */
+		msr = MSR_MFGPT_NR;
+		mask = 1 << (timer->nr + 24);
+		break;
+
+	case MFGPT_EVENT_NMI:
+		msr = MSR_MFGPT_NR;
+		mask = 1 << (timer->nr + shift);
+		break;
+
+	case MFGPT_EVENT_IRQ:
+		msr = MSR_MFGPT_IRQ;
+		mask = 1 << (timer->nr + shift);
+		break;
+
+	default:
+		return -EIO;
+	}
+
+	rdmsr(msr, value, dummy);
+
+	if (enable)
+		value |= mask;
+	else
+		value &= ~mask;
+
+	wrmsr(msr, value, dummy);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_toggle_event);
+
+int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp, int *irq,
+		int enable)
+{
+	uint32_t zsel, lpc, dummy;
+	int shift;
+
+	if (!timer) {
+		WARN_ON(1);
+		return -EIO;
+	}
+
+	/*
+	 * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
+	 * is using the same CMP of the timer's Siamese twin, the IRQ is set to
+	 * 2, and we mustn't use nor change it.
+	 * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
+	 * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
+	 * with *irq==0 is safe. Currently there _are_ no 2 drivers.
+	 */
+	rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+	shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer->nr % 4) * 4;
+	if (((zsel >> shift) & 0xF) == 2)
+		return -EIO;
+
+	/* Choose IRQ: if none supplied, keep IRQ already set or use default */
+	if (!*irq)
+		*irq = (zsel >> shift) & 0xF;
+	if (!*irq)
+		*irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ;
+
+	/* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
+	if (*irq < 1 || *irq == 2 || *irq > 15)
+		return -EIO;
+	rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
+	if (lpc & (1 << *irq))
+		return -EIO;
+
+	/* All chosen and checked - go for it */
+	if (cs5535_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+		return -EIO;
+	if (enable) {
+		zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
+		wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_set_irq);
+
+struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer_nr, int domain)
+{
+	struct cs5535_mfgpt_chip *mfgpt = &cs5535_mfgpt_chip;
+	struct cs5535_mfgpt_timer *timer = NULL;
+	unsigned long flags;
+	int max;
+
+	if (!mfgpt->initialized)
+		goto done;
+
+	/* only allocate timers from the working domain if requested */
+	if (domain == MFGPT_DOMAIN_WORKING)
+		max = 6;
+	else
+		max = MFGPT_MAX_TIMERS;
+
+	if (timer_nr >= max) {
+		/* programmer error.  silly programmers! */
+		WARN_ON(1);
+		goto done;
+	}
+
+	spin_lock_irqsave(&mfgpt->lock, flags);
+	if (timer_nr < 0) {
+		unsigned long t;
+
+		/* try to find any available timer */
+		t = find_first_bit(mfgpt->avail, max);
+		/* set timer_nr to -1 if no timers available */
+		timer_nr = t < max ? (int) t : -1;
+	} else {
+		/* check if the requested timer's available */
+		if (!test_bit(timer_nr, mfgpt->avail))
+			timer_nr = -1;
+	}
+
+	if (timer_nr >= 0)
+		/* if timer_nr is not -1, it's an available timer */
+		__clear_bit(timer_nr, mfgpt->avail);
+	spin_unlock_irqrestore(&mfgpt->lock, flags);
+
+	if (timer_nr < 0)
+		goto done;
+
+	timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+	if (!timer) {
+		/* aw hell */
+		spin_lock_irqsave(&mfgpt->lock, flags);
+		__set_bit(timer_nr, mfgpt->avail);
+		spin_unlock_irqrestore(&mfgpt->lock, flags);
+		goto done;
+	}
+	timer->chip = mfgpt;
+	timer->nr = timer_nr;
+	dev_info(&mfgpt->pdev->dev, "registered timer %d\n", timer_nr);
+
+done:
+	return timer;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_alloc_timer);
+
+/*
+ * XXX: This frees the timer memory, but never resets the actual hardware
+ * timer.  The old geode_mfgpt code did this; it would be good to figure
+ * out a way to actually release the hardware timer.  See comments below.
+ */
+void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer)
+{
+	unsigned long flags;
+	uint16_t val;
+
+	/* timer can be made available again only if never set up */
+	val = cs5535_mfgpt_read(timer, MFGPT_REG_SETUP);
+	if (!(val & MFGPT_SETUP_SETUP)) {
+		spin_lock_irqsave(&timer->chip->lock, flags);
+		__set_bit(timer->nr, timer->chip->avail);
+		spin_unlock_irqrestore(&timer->chip->lock, flags);
+	}
+
+	kfree(timer);
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_free_timer);
+
+uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer, uint16_t reg)
+{
+	return inw(timer->chip->base + reg + (timer->nr * 8));
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_read);
+
+void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg,
+		uint16_t value)
+{
+	outw(value, timer->chip->base + reg + (timer->nr * 8));
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_write);
+
+/*
+ * This is a sledgehammer that resets all MFGPT timers. This is required by
+ * some broken BIOSes which leave the system in an unstable state
+ * (TinyBIOS 0.98, for example; fixed in 0.99).  It's uncertain as to
+ * whether or not this secret MSR can be used to release individual timers.
+ * Jordan tells me that he and Mitch once played w/ it, but it's unclear
+ * what the results of that were (and they experienced some instability).
+ */
+static void reset_all_timers(void)
+{
+	uint32_t val, dummy;
+
+	/* The following undocumented bit resets the MFGPT timers */
+	val = 0xFF; dummy = 0;
+	wrmsr(MSR_MFGPT_SETUP, val, dummy);
+}
+
+/*
+ * This is another sledgehammer to reset all MFGPT timers.
+ * Instead of using the undocumented bit method it clears
+ * IRQ, NMI and RESET settings.
+ */
+static void soft_reset(void)
+{
+	int i;
+	struct cs5535_mfgpt_timer t;
+
+	for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
+		t.nr = i;
+
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_RESET, 0);
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_RESET, 0);
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_NMI, 0);
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_NMI, 0);
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_IRQ, 0);
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_IRQ, 0);
+	}
+}
+
+/*
+ * Check whether any MFGPTs are available for the kernel to use.  In most
+ * cases, firmware that uses AMD's VSA code will claim all timers during
+ * bootup; we certainly don't want to take them if they're already in use.
+ * In other cases (such as with VSAless OpenFirmware), the system firmware
+ * leaves timers available for us to use.
+ */
+static int scan_timers(struct cs5535_mfgpt_chip *mfgpt)
+{
+	struct cs5535_mfgpt_timer timer = { .chip = mfgpt };
+	unsigned long flags;
+	int timers = 0;
+	uint16_t val;
+	int i;
+
+	/* bios workaround */
+	if (mfgpt_reset_timers == 1)
+		reset_all_timers();
+	else if (mfgpt_reset_timers == 2)
+		soft_reset();
+
+	/* just to be safe, protect this section w/ lock */
+	spin_lock_irqsave(&mfgpt->lock, flags);
+	for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
+		timer.nr = i;
+		val = cs5535_mfgpt_read(&timer, MFGPT_REG_SETUP);
+		if (!(val & MFGPT_SETUP_SETUP) || mfgpt_reset_timers == 2) {
+			__set_bit(i, mfgpt->avail);
+			timers++;
+		}
+	}
+	spin_unlock_irqrestore(&mfgpt->lock, flags);
+
+	return timers;
+}
+
+static int cs5535_mfgpt_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int err = -EIO, t;
+
+	if (mfgpt_reset_timers < 0 || mfgpt_reset_timers > 2) {
+		dev_err(&pdev->dev, "Bad mfgpt_reset_timers value: %i\n",
+			mfgpt_reset_timers);
+		goto done;
+	}
+
+	/* There are two ways to get the MFGPT base address; one is by
+	 * fetching it from MSR_LBAR_MFGPT, the other is by reading the
+	 * PCI BAR info.  The latter method is easier (especially across
+	 * different architectures), so we'll stick with that for now.  If
+	 * it turns out to be unreliable in the face of crappy BIOSes, we
+	 * can always go back to using MSRs.. */
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "can't fetch device resource info\n");
+		goto done;
+	}
+
+	if (!request_region(res->start, resource_size(res), pdev->name)) {
+		dev_err(&pdev->dev, "can't request region\n");
+		goto done;
+	}
+
+	/* set up the driver-specific struct */
+	cs5535_mfgpt_chip.base = res->start;
+	cs5535_mfgpt_chip.pdev = pdev;
+	spin_lock_init(&cs5535_mfgpt_chip.lock);
+
+	dev_info(&pdev->dev, "reserved resource region %pR\n", res);
+
+	/* detect the available timers */
+	t = scan_timers(&cs5535_mfgpt_chip);
+	dev_info(&pdev->dev, "%d MFGPT timers available\n", t);
+	cs5535_mfgpt_chip.initialized = 1;
+	return 0;
+
+done:
+	return err;
+}
+
+static struct platform_driver cs5535_mfgpt_driver = {
+	.driver = {
+		.name = DRV_NAME,
+	},
+	.probe = cs5535_mfgpt_probe,
+};
+
+
+static int __init cs5535_mfgpt_init(void)
+{
+	return platform_driver_register(&cs5535_mfgpt_driver);
+}
+
+module_init(cs5535_mfgpt_init);
+
+MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
+MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
new file mode 100644
index 000000000..3ce933707
--- /dev/null
+++ b/drivers/misc/cxl/Kconfig
@@ -0,0 +1,35 @@
+#
+# IBM Coherent Accelerator (CXL) compatible devices
+#
+
+config CXL_BASE
+	bool
+	default n
+	select PPC_COPRO_BASE
+
+config CXL_AFU_DRIVER_OPS
+	bool
+	default n
+
+config CXL_LIB
+	bool
+	default n
+
+config CXL
+	tristate "Support for IBM Coherent Accelerators (CXL)"
+	depends on PPC_POWERNV && PCI_MSI && EEH
+	select CXL_BASE
+	select CXL_AFU_DRIVER_OPS
+	select CXL_LIB
+	default m
+	help
+	  Select this option to enable driver support for IBM Coherent
+	  Accelerators (CXL).  CXL is otherwise known as Coherent Accelerator
+	  Processor Interface (CAPI).  CAPI allows accelerators in FPGAs to be
+	  coherently attached to a CPU via an MMU.  This driver enables
+	  userspace programs to access these accelerators via /dev/cxl/afuM.N
+	  devices.
+
+	  CAPI adapters are found in POWER8 based systems.
+
+	  If unsure, say N.
diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
new file mode 100644
index 000000000..5eea61b95
--- /dev/null
+++ b/drivers/misc/cxl/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y			:= $(call cc-disable-warning, unused-const-variable)
+ccflags-$(CONFIG_PPC_WERROR)	+= -Werror
+
+cxl-y				+= main.o file.o irq.o fault.o native.o
+cxl-y				+= context.o sysfs.o pci.o trace.o
+cxl-y				+= vphb.o api.o cxllib.o
+cxl-$(CONFIG_PPC_PSERIES)	+= flash.o guest.o of.o hcalls.o
+cxl-$(CONFIG_DEBUG_FS)		+= debugfs.o
+obj-$(CONFIG_CXL)		+= cxl.o
+obj-$(CONFIG_CXL_BASE)		+= base.o
+
+# For tracepoints to include our trace.h from tracepoint infrastructure:
+CFLAGS_trace.o := -I$(src)
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
new file mode 100644
index 000000000..750470ef2
--- /dev/null
+++ b/drivers/misc/cxl/api.c
@@ -0,0 +1,540 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <misc/cxl.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+
+#include "cxl.h"
+
+/*
+ * Since we want to track memory mappings to be able to force-unmap
+ * when the AFU is no longer reachable, we need an inode. For devices
+ * opened through the cxl user API, this is not a problem, but a
+ * userland process can also get a cxl fd through the cxl_get_fd()
+ * API, which is used by the cxlflash driver.
+ *
+ * Therefore we implement our own simple pseudo-filesystem and inode
+ * allocator. We don't use the anonymous inode, as we need the
+ * meta-data associated with it (address_space) and it is shared by
+ * other drivers/processes, so it could lead to cxl unmapping VMAs
+ * from random processes.
+ */
+
+#define CXL_PSEUDO_FS_MAGIC	0x1697697f
+
+static int cxl_fs_cnt;
+static struct vfsmount *cxl_vfs_mount;
+
+static const struct dentry_operations cxl_fs_dops = {
+	.d_dname	= simple_dname,
+};
+
+static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *data)
+{
+	return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops,
+			CXL_PSEUDO_FS_MAGIC);
+}
+
+static struct file_system_type cxl_fs_type = {
+	.name		= "cxl",
+	.owner		= THIS_MODULE,
+	.mount		= cxl_fs_mount,
+	.kill_sb	= kill_anon_super,
+};
+
+
+void cxl_release_mapping(struct cxl_context *ctx)
+{
+	if (ctx->kernelapi && ctx->mapping)
+		simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
+}
+
+static struct file *cxl_getfile(const char *name,
+				const struct file_operations *fops,
+				void *priv, int flags)
+{
+	struct file *file;
+	struct inode *inode;
+	int rc;
+
+	/* strongly inspired by anon_inode_getfile() */
+
+	if (fops->owner && !try_module_get(fops->owner))
+		return ERR_PTR(-ENOENT);
+
+	rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt);
+	if (rc < 0) {
+		pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc);
+		file = ERR_PTR(rc);
+		goto err_module;
+	}
+
+	inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
+	if (IS_ERR(inode)) {
+		file = ERR_CAST(inode);
+		goto err_fs;
+	}
+
+	file = alloc_file_pseudo(inode, cxl_vfs_mount, name,
+				 flags & (O_ACCMODE | O_NONBLOCK), fops);
+	if (IS_ERR(file))
+		goto err_inode;
+
+	file->private_data = priv;
+
+	return file;
+
+err_inode:
+	iput(inode);
+err_fs:
+	simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
+err_module:
+	module_put(fops->owner);
+	return file;
+}
+
+struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
+{
+	struct cxl_afu *afu;
+	struct cxl_context  *ctx;
+	int rc;
+
+	afu = cxl_pci_to_afu(dev);
+	if (IS_ERR(afu))
+		return ERR_CAST(afu);
+
+	ctx = cxl_context_alloc();
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	ctx->kernelapi = true;
+
+	/* Make it a slave context.  We can promote it later? */
+	rc = cxl_context_init(ctx, afu, false);
+	if (rc)
+		goto err_ctx;
+
+	return ctx;
+
+err_ctx:
+	kfree(ctx);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(cxl_dev_context_init);
+
+struct cxl_context *cxl_get_context(struct pci_dev *dev)
+{
+	return dev->dev.archdata.cxl_ctx;
+}
+EXPORT_SYMBOL_GPL(cxl_get_context);
+
+int cxl_release_context(struct cxl_context *ctx)
+{
+	if (ctx->status >= STARTED)
+		return -EBUSY;
+
+	cxl_context_free(ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_release_context);
+
+static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
+{
+	__u16 range;
+	int r;
+
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		range = ctx->irqs.range[r];
+		if (num < range) {
+			return ctx->irqs.offset[r] + num;
+		}
+		num -= range;
+	}
+	return 0;
+}
+
+
+int cxl_set_priv(struct cxl_context *ctx, void *priv)
+{
+	if (!ctx)
+		return -EINVAL;
+
+	ctx->priv = priv;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_priv);
+
+void *cxl_get_priv(struct cxl_context *ctx)
+{
+	if (!ctx)
+		return ERR_PTR(-EINVAL);
+
+	return ctx->priv;
+}
+EXPORT_SYMBOL_GPL(cxl_get_priv);
+
+int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
+{
+	int res;
+	irq_hw_number_t hwirq;
+
+	if (num == 0)
+		num = ctx->afu->pp_irqs;
+	res = afu_allocate_irqs(ctx, num);
+	if (res)
+		return res;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE)) {
+		/* In a guest, the PSL interrupt is not multiplexed. It was
+		 * allocated above, and we need to set its handler
+		 */
+		hwirq = cxl_find_afu_irq(ctx, 0);
+		if (hwirq)
+			cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
+	}
+
+	if (ctx->status == STARTED) {
+		if (cxl_ops->update_ivtes)
+			cxl_ops->update_ivtes(ctx);
+		else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n");
+	}
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
+
+void cxl_free_afu_irqs(struct cxl_context *ctx)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE)) {
+		hwirq = cxl_find_afu_irq(ctx, 0);
+		if (hwirq) {
+			virq = irq_find_mapping(NULL, hwirq);
+			if (virq)
+				cxl_unmap_irq(virq, ctx);
+		}
+	}
+	afu_irq_name_free(ctx);
+	cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+}
+EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
+
+int cxl_map_afu_irq(struct cxl_context *ctx, int num,
+		    irq_handler_t handler, void *cookie, char *name)
+{
+	irq_hw_number_t hwirq;
+
+	/*
+	 * Find interrupt we are to register.
+	 */
+	hwirq = cxl_find_afu_irq(ctx, num);
+	if (!hwirq)
+		return -ENOENT;
+
+	return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
+}
+EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
+
+void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+
+	hwirq = cxl_find_afu_irq(ctx, num);
+	if (!hwirq)
+		return;
+
+	virq = irq_find_mapping(NULL, hwirq);
+	if (virq)
+		cxl_unmap_irq(virq, cookie);
+}
+EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
+
+/*
+ * Start a context
+ * Code here similar to afu_ioctl_start_work().
+ */
+int cxl_start_context(struct cxl_context *ctx, u64 wed,
+		      struct task_struct *task)
+{
+	int rc = 0;
+	bool kernel = true;
+
+	pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status == STARTED)
+		goto out; /* already started */
+
+	/*
+	 * Increment the mapped context count for adapter. This also checks
+	 * if adapter_context_lock is taken.
+	 */
+	rc = cxl_adapter_context_get(ctx->afu->adapter);
+	if (rc)
+		goto out;
+
+	if (task) {
+		ctx->pid = get_task_pid(task, PIDTYPE_PID);
+		kernel = false;
+
+		/* acquire a reference to the task's mm */
+		ctx->mm = get_task_mm(current);
+
+		/* ensure this mm_struct can't be freed */
+		cxl_context_mm_count_get(ctx);
+
+		if (ctx->mm) {
+			/* decrement the use count from above */
+			mmput(ctx->mm);
+			/* make TLBIs for this context global */
+			mm_context_add_copro(ctx->mm);
+		}
+	}
+
+	/*
+	 * Increment driver use count. Enables global TLBIs for hash
+	 * and callbacks to handle the segment table
+	 */
+	cxl_ctx_get();
+
+	/* See the comment in afu_ioctl_start_work() */
+	smp_mb();
+
+	if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
+		put_pid(ctx->pid);
+		ctx->pid = NULL;
+		cxl_adapter_context_put(ctx->afu->adapter);
+		cxl_ctx_put();
+		if (task) {
+			cxl_context_mm_count_put(ctx);
+			if (ctx->mm)
+				mm_context_remove_copro(ctx->mm);
+		}
+		goto out;
+	}
+
+	ctx->status = STARTED;
+out:
+	mutex_unlock(&ctx->status_mutex);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(cxl_start_context);
+
+int cxl_process_element(struct cxl_context *ctx)
+{
+	return ctx->external_pe;
+}
+EXPORT_SYMBOL_GPL(cxl_process_element);
+
+/* Stop a context.  Returns 0 on success, otherwise -Errno */
+int cxl_stop_context(struct cxl_context *ctx)
+{
+	return __detach_context(ctx);
+}
+EXPORT_SYMBOL_GPL(cxl_stop_context);
+
+void cxl_set_master(struct cxl_context *ctx)
+{
+	ctx->master = true;
+}
+EXPORT_SYMBOL_GPL(cxl_set_master);
+
+/* wrappers around afu_* file ops which are EXPORTED */
+int cxl_fd_open(struct inode *inode, struct file *file)
+{
+	return afu_open(inode, file);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_open);
+int cxl_fd_release(struct inode *inode, struct file *file)
+{
+	return afu_release(inode, file);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_release);
+long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return afu_ioctl(file, cmd, arg);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
+int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
+{
+	return afu_mmap(file, vm);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_mmap);
+__poll_t cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
+{
+	return afu_poll(file, poll);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_poll);
+ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
+			loff_t *off)
+{
+	return afu_read(file, buf, count, off);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_read);
+
+#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
+
+/* Get a struct file and fd for a context and attach the ops */
+struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
+			int *fd)
+{
+	struct file *file;
+	int rc, flags, fdtmp;
+	char *name = NULL;
+
+	/* only allow one per context */
+	if (ctx->mapping)
+		return ERR_PTR(-EEXIST);
+
+	flags = O_RDWR | O_CLOEXEC;
+
+	/* This code is similar to anon_inode_getfd() */
+	rc = get_unused_fd_flags(flags);
+	if (rc < 0)
+		return ERR_PTR(rc);
+	fdtmp = rc;
+
+	/*
+	 * Patch the file ops.  Needs to be careful that this is rentrant safe.
+	 */
+	if (fops) {
+		PATCH_FOPS(open);
+		PATCH_FOPS(poll);
+		PATCH_FOPS(read);
+		PATCH_FOPS(release);
+		PATCH_FOPS(unlocked_ioctl);
+		PATCH_FOPS(compat_ioctl);
+		PATCH_FOPS(mmap);
+	} else /* use default ops */
+		fops = (struct file_operations *)&afu_fops;
+
+	name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe);
+	file = cxl_getfile(name, fops, ctx, flags);
+	kfree(name);
+	if (IS_ERR(file))
+		goto err_fd;
+
+	cxl_context_set_mapping(ctx, file->f_mapping);
+	*fd = fdtmp;
+	return file;
+
+err_fd:
+	put_unused_fd(fdtmp);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(cxl_get_fd);
+
+struct cxl_context *cxl_fops_get_context(struct file *file)
+{
+	return file->private_data;
+}
+EXPORT_SYMBOL_GPL(cxl_fops_get_context);
+
+void cxl_set_driver_ops(struct cxl_context *ctx,
+			struct cxl_afu_driver_ops *ops)
+{
+	WARN_ON(!ops->fetch_event || !ops->event_delivered);
+	atomic_set(&ctx->afu_driver_events, 0);
+	ctx->afu_driver_ops = ops;
+}
+EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
+
+void cxl_context_events_pending(struct cxl_context *ctx,
+				unsigned int new_events)
+{
+	atomic_add(new_events, &ctx->afu_driver_events);
+	wake_up_all(&ctx->wq);
+}
+EXPORT_SYMBOL_GPL(cxl_context_events_pending);
+
+int cxl_start_work(struct cxl_context *ctx,
+		   struct cxl_ioctl_start_work *work)
+{
+	int rc;
+
+	/* code taken from afu_ioctl_start_work */
+	if (!(work->flags & CXL_START_WORK_NUM_IRQS))
+		work->num_interrupts = ctx->afu->pp_irqs;
+	else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
+		 (work->num_interrupts > ctx->afu->irqs_max)) {
+		return -EINVAL;
+	}
+
+	rc = afu_register_irqs(ctx, work->num_interrupts);
+	if (rc)
+		return rc;
+
+	rc = cxl_start_context(ctx, work->work_element_descriptor, current);
+	if (rc < 0) {
+		afu_release_irqs(ctx, ctx);
+		return rc;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_start_work);
+
+void __iomem *cxl_psa_map(struct cxl_context *ctx)
+{
+	if (ctx->status != STARTED)
+		return NULL;
+
+	pr_devel("%s: psn_phys%llx size:%llx\n",
+		__func__, ctx->psn_phys, ctx->psn_size);
+	return ioremap(ctx->psn_phys, ctx->psn_size);
+}
+EXPORT_SYMBOL_GPL(cxl_psa_map);
+
+void cxl_psa_unmap(void __iomem *addr)
+{
+	iounmap(addr);
+}
+EXPORT_SYMBOL_GPL(cxl_psa_unmap);
+
+int cxl_afu_reset(struct cxl_context *ctx)
+{
+	struct cxl_afu *afu = ctx->afu;
+	int rc;
+
+	rc = cxl_ops->afu_reset(afu);
+	if (rc)
+		return rc;
+
+	return cxl_ops->afu_check_and_enable(afu);
+}
+EXPORT_SYMBOL_GPL(cxl_afu_reset);
+
+void cxl_perst_reloads_same_image(struct cxl_afu *afu,
+				  bool perst_reloads_same_image)
+{
+	afu->adapter->perst_same_image = perst_reloads_same_image;
+}
+EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
+
+ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
+{
+	struct cxl_afu *afu = cxl_pci_to_afu(dev);
+	if (IS_ERR(afu))
+		return -ENODEV;
+
+	return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
+}
+EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c
new file mode 100644
index 000000000..7557835cd
--- /dev/null
+++ b/drivers/misc/cxl/base.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/rcupdate.h>
+#include <asm/errno.h>
+#include <misc/cxl-base.h>
+#include <linux/of_platform.h>
+#include "cxl.h"
+
+/* protected by rcu */
+static struct cxl_calls *cxl_calls;
+
+atomic_t cxl_use_count = ATOMIC_INIT(0);
+EXPORT_SYMBOL(cxl_use_count);
+
+#ifdef CONFIG_CXL_MODULE
+
+static inline struct cxl_calls *cxl_calls_get(void)
+{
+	struct cxl_calls *calls = NULL;
+
+	rcu_read_lock();
+	calls = rcu_dereference(cxl_calls);
+	if (calls && !try_module_get(calls->owner))
+		calls = NULL;
+	rcu_read_unlock();
+
+	return calls;
+}
+
+static inline void cxl_calls_put(struct cxl_calls *calls)
+{
+	BUG_ON(calls != cxl_calls);
+
+	/* we don't need to rcu this, as we hold a reference to the module */
+	module_put(cxl_calls->owner);
+}
+
+#else /* !defined CONFIG_CXL_MODULE */
+
+static inline struct cxl_calls *cxl_calls_get(void)
+{
+	return cxl_calls;
+}
+
+static inline void cxl_calls_put(struct cxl_calls *calls) { }
+
+#endif /* CONFIG_CXL_MODULE */
+
+/* AFU refcount management */
+struct cxl_afu *cxl_afu_get(struct cxl_afu *afu)
+{
+	return (get_device(&afu->dev) == NULL) ? NULL : afu;
+}
+EXPORT_SYMBOL_GPL(cxl_afu_get);
+
+void cxl_afu_put(struct cxl_afu *afu)
+{
+	put_device(&afu->dev);
+}
+EXPORT_SYMBOL_GPL(cxl_afu_put);
+
+void cxl_slbia(struct mm_struct *mm)
+{
+	struct cxl_calls *calls;
+
+	calls = cxl_calls_get();
+	if (!calls)
+		return;
+
+	if (cxl_ctx_in_use())
+	    calls->cxl_slbia(mm);
+
+	cxl_calls_put(calls);
+}
+
+int register_cxl_calls(struct cxl_calls *calls)
+{
+	if (cxl_calls)
+		return -EBUSY;
+
+	rcu_assign_pointer(cxl_calls, calls);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_cxl_calls);
+
+void unregister_cxl_calls(struct cxl_calls *calls)
+{
+	BUG_ON(cxl_calls->owner != calls->owner);
+	RCU_INIT_POINTER(cxl_calls, NULL);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(unregister_cxl_calls);
+
+int cxl_update_properties(struct device_node *dn,
+			  struct property *new_prop)
+{
+	return of_update_property(dn, new_prop);
+}
+EXPORT_SYMBOL_GPL(cxl_update_properties);
+
+static int __init cxl_base_init(void)
+{
+	struct device_node *np;
+	struct platform_device *dev;
+	int count = 0;
+
+	/*
+	 * Scan for compatible devices in guest only
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		return 0;
+
+	for_each_compatible_node(np, NULL, "ibm,coherent-platform-facility") {
+		dev = of_platform_device_create(np, NULL, NULL);
+		if (dev)
+			count++;
+	}
+	pr_devel("Found %d cxl device(s)\n", count);
+	return 0;
+}
+device_initcall(cxl_base_init);
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
new file mode 100644
index 000000000..5fe529b43
--- /dev/null
+++ b/drivers/misc/cxl/context.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/bitmap.h>
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/cputable.h>
+#include <asm/current.h>
+#include <asm/copro.h>
+
+#include "cxl.h"
+
+/*
+ * Allocates space for a CXL context.
+ */
+struct cxl_context *cxl_context_alloc(void)
+{
+	return kzalloc(sizeof(struct cxl_context), GFP_KERNEL);
+}
+
+/*
+ * Initialises a CXL context.
+ */
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
+{
+	int i;
+
+	ctx->afu = afu;
+	ctx->master = master;
+	ctx->pid = NULL; /* Set in start work ioctl */
+	mutex_init(&ctx->mapping_lock);
+	ctx->mapping = NULL;
+	ctx->tidr = 0;
+	ctx->assign_tidr = false;
+
+	if (cxl_is_power8()) {
+		spin_lock_init(&ctx->sste_lock);
+
+		/*
+		 * Allocate the segment table before we put it in the IDR so that we
+		 * can always access it when dereferenced from IDR. For the same
+		 * reason, the segment table is only destroyed after the context is
+		 * removed from the IDR.  Access to this in the IOCTL is protected by
+		 * Linux filesytem symantics (can't IOCTL until open is complete).
+		 */
+		i = cxl_alloc_sst(ctx);
+		if (i)
+			return i;
+	}
+
+	INIT_WORK(&ctx->fault_work, cxl_handle_fault);
+
+	init_waitqueue_head(&ctx->wq);
+	spin_lock_init(&ctx->lock);
+
+	ctx->irq_bitmap = NULL;
+	ctx->pending_irq = false;
+	ctx->pending_fault = false;
+	ctx->pending_afu_err = false;
+
+	INIT_LIST_HEAD(&ctx->irq_names);
+
+	/*
+	 * When we have to destroy all contexts in cxl_context_detach_all() we
+	 * end up with afu_release_irqs() called from inside a
+	 * idr_for_each_entry(). Hence we need to make sure that anything
+	 * dereferenced from this IDR is ok before we allocate the IDR here.
+	 * This clears out the IRQ ranges to ensure this.
+	 */
+	for (i = 0; i < CXL_IRQ_RANGES; i++)
+		ctx->irqs.range[i] = 0;
+
+	mutex_init(&ctx->status_mutex);
+
+	ctx->status = OPENED;
+
+	/*
+	 * Allocating IDR! We better make sure everything's setup that
+	 * dereferences from it.
+	 */
+	mutex_lock(&afu->contexts_lock);
+	idr_preload(GFP_KERNEL);
+	i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0,
+		      ctx->afu->num_procs, GFP_NOWAIT);
+	idr_preload_end();
+	mutex_unlock(&afu->contexts_lock);
+	if (i < 0)
+		return i;
+
+	ctx->pe = i;
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		ctx->elem = &ctx->afu->native->spa[i];
+		ctx->external_pe = ctx->pe;
+	} else {
+		ctx->external_pe = -1; /* assigned when attaching */
+	}
+	ctx->pe_inserted = false;
+
+	/*
+	 * take a ref on the afu so that it stays alive at-least till
+	 * this context is reclaimed inside reclaim_ctx.
+	 */
+	cxl_afu_get(afu);
+	return 0;
+}
+
+void cxl_context_set_mapping(struct cxl_context *ctx,
+			struct address_space *mapping)
+{
+	mutex_lock(&ctx->mapping_lock);
+	ctx->mapping = mapping;
+	mutex_unlock(&ctx->mapping_lock);
+}
+
+static vm_fault_t cxl_mmap_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct cxl_context *ctx = vma->vm_file->private_data;
+	u64 area, offset;
+	vm_fault_t ret;
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+
+	pr_devel("%s: pe: %i address: 0x%lx offset: 0x%llx\n",
+			__func__, ctx->pe, vmf->address, offset);
+
+	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
+		area = ctx->afu->psn_phys;
+		if (offset >= ctx->afu->adapter->ps_size)
+			return VM_FAULT_SIGBUS;
+	} else {
+		area = ctx->psn_phys;
+		if (offset >= ctx->psn_size)
+			return VM_FAULT_SIGBUS;
+	}
+
+	mutex_lock(&ctx->status_mutex);
+
+	if (ctx->status != STARTED) {
+		mutex_unlock(&ctx->status_mutex);
+		pr_devel("%s: Context not started, failing problem state access\n", __func__);
+		if (ctx->mmio_err_ff) {
+			if (!ctx->ff_page) {
+				ctx->ff_page = alloc_page(GFP_USER);
+				if (!ctx->ff_page)
+					return VM_FAULT_OOM;
+				memset(page_address(ctx->ff_page), 0xff, PAGE_SIZE);
+			}
+			get_page(ctx->ff_page);
+			vmf->page = ctx->ff_page;
+			vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+			return 0;
+		}
+		return VM_FAULT_SIGBUS;
+	}
+
+	ret = vmf_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT);
+
+	mutex_unlock(&ctx->status_mutex);
+
+	return ret;
+}
+
+static const struct vm_operations_struct cxl_mmap_vmops = {
+	.fault = cxl_mmap_fault,
+};
+
+/*
+ * Map a per-context mmio space into the given vma.
+ */
+int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
+{
+	u64 start = vma->vm_pgoff << PAGE_SHIFT;
+	u64 len = vma->vm_end - vma->vm_start;
+
+	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
+		if (start + len > ctx->afu->adapter->ps_size)
+			return -EINVAL;
+
+		if (cxl_is_power9()) {
+			/*
+			 * Make sure there is a valid problem state
+			 * area space for this AFU.
+			 */
+			if (ctx->master && !ctx->afu->psa) {
+				pr_devel("AFU doesn't support mmio space\n");
+				return -EINVAL;
+			}
+
+			/* Can't mmap until the AFU is enabled */
+			if (!ctx->afu->enabled)
+				return -EBUSY;
+		}
+	} else {
+		if (start + len > ctx->psn_size)
+			return -EINVAL;
+
+		/* Make sure there is a valid per process space for this AFU */
+		if ((ctx->master && !ctx->afu->psa) || (!ctx->afu->pp_psa)) {
+			pr_devel("AFU doesn't support mmio space\n");
+			return -EINVAL;
+		}
+
+		/* Can't mmap until the AFU is enabled */
+		if (!ctx->afu->enabled)
+			return -EBUSY;
+	}
+
+	pr_devel("%s: mmio physical: %llx pe: %i master:%i\n", __func__,
+		 ctx->psn_phys, ctx->pe , ctx->master);
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_ops = &cxl_mmap_vmops;
+	return 0;
+}
+
+/*
+ * Detach a context from the hardware. This disables interrupts and doesn't
+ * return until all outstanding interrupts for this context have completed. The
+ * hardware should no longer access *ctx after this has returned.
+ */
+int __detach_context(struct cxl_context *ctx)
+{
+	enum cxl_context_status status;
+
+	mutex_lock(&ctx->status_mutex);
+	status = ctx->status;
+	ctx->status = CLOSED;
+	mutex_unlock(&ctx->status_mutex);
+	if (status != STARTED)
+		return -EBUSY;
+
+	/* Only warn if we detached while the link was OK.
+	 * If detach fails when hw is down, we don't care.
+	 */
+	WARN_ON(cxl_ops->detach_process(ctx) &&
+		cxl_ops->link_ok(ctx->afu->adapter, ctx->afu));
+	flush_work(&ctx->fault_work); /* Only needed for dedicated process */
+
+	/*
+	 * Wait until no further interrupts are presented by the PSL
+	 * for this context.
+	 */
+	if (cxl_ops->irq_wait)
+		cxl_ops->irq_wait(ctx);
+
+	/* release the reference to the group leader and mm handling pid */
+	put_pid(ctx->pid);
+
+	cxl_ctx_put();
+
+	/* Decrease the attached context count on the adapter */
+	cxl_adapter_context_put(ctx->afu->adapter);
+
+	/* Decrease the mm count on the context */
+	cxl_context_mm_count_put(ctx);
+	if (ctx->mm)
+		mm_context_remove_copro(ctx->mm);
+	ctx->mm = NULL;
+
+	return 0;
+}
+
+/*
+ * Detach the given context from the AFU. This doesn't actually
+ * free the context but it should stop the context running in hardware
+ * (ie. prevent this context from generating any further interrupts
+ * so that it can be freed).
+ */
+void cxl_context_detach(struct cxl_context *ctx)
+{
+	int rc;
+
+	rc = __detach_context(ctx);
+	if (rc)
+		return;
+
+	afu_release_irqs(ctx, ctx);
+	wake_up_all(&ctx->wq);
+}
+
+/*
+ * Detach all contexts on the given AFU.
+ */
+void cxl_context_detach_all(struct cxl_afu *afu)
+{
+	struct cxl_context *ctx;
+	int tmp;
+
+	mutex_lock(&afu->contexts_lock);
+	idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
+		/*
+		 * Anything done in here needs to be setup before the IDR is
+		 * created and torn down after the IDR removed
+		 */
+		cxl_context_detach(ctx);
+
+		/*
+		 * We are force detaching - remove any active PSA mappings so
+		 * userspace cannot interfere with the card if it comes back.
+		 * Easiest way to exercise this is to unbind and rebind the
+		 * driver via sysfs while it is in use.
+		 */
+		mutex_lock(&ctx->mapping_lock);
+		if (ctx->mapping)
+			unmap_mapping_range(ctx->mapping, 0, 0, 1);
+		mutex_unlock(&ctx->mapping_lock);
+	}
+	mutex_unlock(&afu->contexts_lock);
+}
+
+static void reclaim_ctx(struct rcu_head *rcu)
+{
+	struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
+
+	if (cxl_is_power8())
+		free_page((u64)ctx->sstp);
+	if (ctx->ff_page)
+		__free_page(ctx->ff_page);
+	ctx->sstp = NULL;
+
+	kfree(ctx->irq_bitmap);
+
+	/* Drop ref to the afu device taken during cxl_context_init */
+	cxl_afu_put(ctx->afu);
+
+	kfree(ctx);
+}
+
+void cxl_context_free(struct cxl_context *ctx)
+{
+	if (ctx->kernelapi && ctx->mapping)
+		cxl_release_mapping(ctx);
+	mutex_lock(&ctx->afu->contexts_lock);
+	idr_remove(&ctx->afu->contexts_idr, ctx->pe);
+	mutex_unlock(&ctx->afu->contexts_lock);
+	call_rcu(&ctx->rcu, reclaim_ctx);
+}
+
+void cxl_context_mm_count_get(struct cxl_context *ctx)
+{
+	if (ctx->mm)
+		atomic_inc(&ctx->mm->mm_count);
+}
+
+void cxl_context_mm_count_put(struct cxl_context *ctx)
+{
+	if (ctx->mm)
+		mmdrop(ctx->mm);
+}
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
new file mode 100644
index 000000000..d1d927ccb
--- /dev/null
+++ b/drivers/misc/cxl/cxl.h
@@ -0,0 +1,1139 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _CXL_H_
+#define _CXL_H_
+
+#include <linux/interrupt.h>
+#include <linux/semaphore.h>
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/pid.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/fs.h>
+#include <asm/cputable.h>
+#include <asm/mmu.h>
+#include <asm/reg.h>
+#include <misc/cxl-base.h>
+
+#include <misc/cxl.h>
+#include <uapi/misc/cxl.h>
+
+extern uint cxl_verbose;
+
+#define CXL_TIMEOUT 5
+
+/*
+ * Bump version each time a user API change is made, whether it is
+ * backwards compatible ot not.
+ */
+#define CXL_API_VERSION 3
+#define CXL_API_VERSION_COMPATIBLE 1
+
+/*
+ * Opaque types to avoid accidentally passing registers for the wrong MMIO
+ *
+ * At the end of the day, I'm not married to using typedef here, but it might
+ * (and has!) help avoid bugs like mixing up CXL_PSL_CtxTime and
+ * CXL_PSL_CtxTime_An, or calling cxl_p1n_write instead of cxl_p1_write.
+ *
+ * I'm quite happy if these are changed back to #defines before upstreaming, it
+ * should be little more than a regexp search+replace operation in this file.
+ */
+typedef struct {
+	const int x;
+} cxl_p1_reg_t;
+typedef struct {
+	const int x;
+} cxl_p1n_reg_t;
+typedef struct {
+	const int x;
+} cxl_p2n_reg_t;
+#define cxl_reg_off(reg) \
+	(reg.x)
+
+/* Memory maps. Ref CXL Appendix A */
+
+/* PSL Privilege 1 Memory Map */
+/* Configuration and Control area - CAIA 1&2 */
+static const cxl_p1_reg_t CXL_PSL_CtxTime = {0x0000};
+static const cxl_p1_reg_t CXL_PSL_ErrIVTE = {0x0008};
+static const cxl_p1_reg_t CXL_PSL_KEY1    = {0x0010};
+static const cxl_p1_reg_t CXL_PSL_KEY2    = {0x0018};
+static const cxl_p1_reg_t CXL_PSL_Control = {0x0020};
+/* Downloading */
+static const cxl_p1_reg_t CXL_PSL_DLCNTL  = {0x0060};
+static const cxl_p1_reg_t CXL_PSL_DLADDR  = {0x0068};
+
+/* PSL Lookaside Buffer Management Area - CAIA 1 */
+static const cxl_p1_reg_t CXL_PSL_LBISEL  = {0x0080};
+static const cxl_p1_reg_t CXL_PSL_SLBIE   = {0x0088};
+static const cxl_p1_reg_t CXL_PSL_SLBIA   = {0x0090};
+static const cxl_p1_reg_t CXL_PSL_TLBIE   = {0x00A0};
+static const cxl_p1_reg_t CXL_PSL_TLBIA   = {0x00A8};
+static const cxl_p1_reg_t CXL_PSL_AFUSEL  = {0x00B0};
+
+/* 0x00C0:7EFF Implementation dependent area */
+/* PSL registers - CAIA 1 */
+static const cxl_p1_reg_t CXL_PSL_FIR1      = {0x0100};
+static const cxl_p1_reg_t CXL_PSL_FIR2      = {0x0108};
+static const cxl_p1_reg_t CXL_PSL_Timebase  = {0x0110};
+static const cxl_p1_reg_t CXL_PSL_VERSION   = {0x0118};
+static const cxl_p1_reg_t CXL_PSL_RESLCKTO  = {0x0128};
+static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140};
+static const cxl_p1_reg_t CXL_PSL_FIR_CNTL  = {0x0148};
+static const cxl_p1_reg_t CXL_PSL_DSNDCTL   = {0x0150};
+static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158};
+static const cxl_p1_reg_t CXL_PSL_TRACE     = {0x0170};
+/* PSL registers - CAIA 2 */
+static const cxl_p1_reg_t CXL_PSL9_CONTROL  = {0x0020};
+static const cxl_p1_reg_t CXL_XSL9_INV      = {0x0110};
+static const cxl_p1_reg_t CXL_XSL9_DBG      = {0x0130};
+static const cxl_p1_reg_t CXL_XSL9_DEF      = {0x0140};
+static const cxl_p1_reg_t CXL_XSL9_DSNCTL   = {0x0168};
+static const cxl_p1_reg_t CXL_PSL9_FIR1     = {0x0300};
+static const cxl_p1_reg_t CXL_PSL9_FIR_MASK = {0x0308};
+static const cxl_p1_reg_t CXL_PSL9_Timebase = {0x0310};
+static const cxl_p1_reg_t CXL_PSL9_DEBUG    = {0x0320};
+static const cxl_p1_reg_t CXL_PSL9_FIR_CNTL = {0x0348};
+static const cxl_p1_reg_t CXL_PSL9_DSNDCTL  = {0x0350};
+static const cxl_p1_reg_t CXL_PSL9_TB_CTLSTAT = {0x0340};
+static const cxl_p1_reg_t CXL_PSL9_TRACECFG = {0x0368};
+static const cxl_p1_reg_t CXL_PSL9_APCDEDALLOC = {0x0378};
+static const cxl_p1_reg_t CXL_PSL9_APCDEDTYPE = {0x0380};
+static const cxl_p1_reg_t CXL_PSL9_TNR_ADDR = {0x0388};
+static const cxl_p1_reg_t CXL_PSL9_CTCCFG = {0x0390};
+static const cxl_p1_reg_t CXL_PSL9_GP_CT = {0x0398};
+static const cxl_p1_reg_t CXL_XSL9_IERAT = {0x0588};
+static const cxl_p1_reg_t CXL_XSL9_ILPP  = {0x0590};
+
+/* 0x7F00:7FFF Reserved PCIe MSI-X Pending Bit Array area */
+/* 0x8000:FFFF Reserved PCIe MSI-X Table Area */
+
+/* PSL Slice Privilege 1 Memory Map */
+/* Configuration Area - CAIA 1&2 */
+static const cxl_p1n_reg_t CXL_PSL_SR_An          = {0x00};
+static const cxl_p1n_reg_t CXL_PSL_LPID_An        = {0x08};
+static const cxl_p1n_reg_t CXL_PSL_AMBAR_An       = {0x10};
+static const cxl_p1n_reg_t CXL_PSL_SPOffset_An    = {0x18};
+static const cxl_p1n_reg_t CXL_PSL_ID_An          = {0x20};
+static const cxl_p1n_reg_t CXL_PSL_SERR_An        = {0x28};
+/* Memory Management and Lookaside Buffer Management - CAIA 1*/
+static const cxl_p1n_reg_t CXL_PSL_SDR_An         = {0x30};
+/* Memory Management and Lookaside Buffer Management - CAIA 1&2 */
+static const cxl_p1n_reg_t CXL_PSL_AMOR_An        = {0x38};
+/* Pointer Area - CAIA 1&2 */
+static const cxl_p1n_reg_t CXL_HAURP_An           = {0x80};
+static const cxl_p1n_reg_t CXL_PSL_SPAP_An        = {0x88};
+static const cxl_p1n_reg_t CXL_PSL_LLCMD_An       = {0x90};
+/* Control Area - CAIA 1&2 */
+static const cxl_p1n_reg_t CXL_PSL_SCNTL_An       = {0xA0};
+static const cxl_p1n_reg_t CXL_PSL_CtxTime_An     = {0xA8};
+static const cxl_p1n_reg_t CXL_PSL_IVTE_Offset_An = {0xB0};
+static const cxl_p1n_reg_t CXL_PSL_IVTE_Limit_An  = {0xB8};
+/* 0xC0:FF Implementation Dependent Area - CAIA 1&2 */
+static const cxl_p1n_reg_t CXL_PSL_FIR_SLICE_An   = {0xC0};
+static const cxl_p1n_reg_t CXL_AFU_DEBUG_An       = {0xC8};
+/* 0xC0:FF Implementation Dependent Area - CAIA 1 */
+static const cxl_p1n_reg_t CXL_PSL_APCALLOC_A     = {0xD0};
+static const cxl_p1n_reg_t CXL_PSL_COALLOC_A      = {0xD8};
+static const cxl_p1n_reg_t CXL_PSL_RXCTL_A        = {0xE0};
+static const cxl_p1n_reg_t CXL_PSL_SLICE_TRACE    = {0xE8};
+
+/* PSL Slice Privilege 2 Memory Map */
+/* Configuration and Control Area - CAIA 1&2 */
+static const cxl_p2n_reg_t CXL_PSL_PID_TID_An = {0x000};
+static const cxl_p2n_reg_t CXL_CSRP_An        = {0x008};
+/* Configuration and Control Area - CAIA 1 */
+static const cxl_p2n_reg_t CXL_AURP0_An       = {0x010};
+static const cxl_p2n_reg_t CXL_AURP1_An       = {0x018};
+static const cxl_p2n_reg_t CXL_SSTP0_An       = {0x020};
+static const cxl_p2n_reg_t CXL_SSTP1_An       = {0x028};
+/* Configuration and Control Area - CAIA 1 */
+static const cxl_p2n_reg_t CXL_PSL_AMR_An     = {0x030};
+/* Segment Lookaside Buffer Management - CAIA 1 */
+static const cxl_p2n_reg_t CXL_SLBIE_An       = {0x040};
+static const cxl_p2n_reg_t CXL_SLBIA_An       = {0x048};
+static const cxl_p2n_reg_t CXL_SLBI_Select_An = {0x050};
+/* Interrupt Registers - CAIA 1&2 */
+static const cxl_p2n_reg_t CXL_PSL_DSISR_An   = {0x060};
+static const cxl_p2n_reg_t CXL_PSL_DAR_An     = {0x068};
+static const cxl_p2n_reg_t CXL_PSL_DSR_An     = {0x070};
+static const cxl_p2n_reg_t CXL_PSL_TFC_An     = {0x078};
+static const cxl_p2n_reg_t CXL_PSL_PEHandle_An = {0x080};
+static const cxl_p2n_reg_t CXL_PSL_ErrStat_An = {0x088};
+/* AFU Registers - CAIA 1&2 */
+static const cxl_p2n_reg_t CXL_AFU_Cntl_An    = {0x090};
+static const cxl_p2n_reg_t CXL_AFU_ERR_An     = {0x098};
+/* Work Element Descriptor - CAIA 1&2 */
+static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
+/* 0x0C0:FFF Implementation Dependent Area */
+
+#define CXL_PSL_SPAP_Addr 0x0ffffffffffff000ULL
+#define CXL_PSL_SPAP_Size 0x0000000000000ff0ULL
+#define CXL_PSL_SPAP_Size_Shift 4
+#define CXL_PSL_SPAP_V    0x0000000000000001ULL
+
+/****** CXL_PSL_Control ****************************************************/
+#define CXL_PSL_Control_tb              (0x1ull << (63-63))
+#define CXL_PSL_Control_Fr              (0x1ull << (63-31))
+#define CXL_PSL_Control_Fs_MASK         (0x3ull << (63-29))
+#define CXL_PSL_Control_Fs_Complete     (0x3ull << (63-29))
+
+/****** CXL_PSL_DLCNTL *****************************************************/
+#define CXL_PSL_DLCNTL_D (0x1ull << (63-28))
+#define CXL_PSL_DLCNTL_C (0x1ull << (63-29))
+#define CXL_PSL_DLCNTL_E (0x1ull << (63-30))
+#define CXL_PSL_DLCNTL_S (0x1ull << (63-31))
+#define CXL_PSL_DLCNTL_CE (CXL_PSL_DLCNTL_C | CXL_PSL_DLCNTL_E)
+#define CXL_PSL_DLCNTL_DCES (CXL_PSL_DLCNTL_D | CXL_PSL_DLCNTL_CE | CXL_PSL_DLCNTL_S)
+
+/****** CXL_PSL_SR_An ******************************************************/
+#define CXL_PSL_SR_An_SF  MSR_SF            /* 64bit */
+#define CXL_PSL_SR_An_TA  (1ull << (63-1))  /* Tags active,   GA1: 0 */
+#define CXL_PSL_SR_An_HV  MSR_HV            /* Hypervisor,    GA1: 0 */
+#define CXL_PSL_SR_An_XLAT_hpt (0ull << (63-6))/* Hashed page table (HPT) mode */
+#define CXL_PSL_SR_An_XLAT_roh (2ull << (63-6))/* Radix on HPT mode */
+#define CXL_PSL_SR_An_XLAT_ror (3ull << (63-6))/* Radix on Radix mode */
+#define CXL_PSL_SR_An_BOT (1ull << (63-10)) /* Use the in-memory segment table */
+#define CXL_PSL_SR_An_PR  MSR_PR            /* Problem state, GA1: 1 */
+#define CXL_PSL_SR_An_ISL (1ull << (63-53)) /* Ignore Segment Large Page */
+#define CXL_PSL_SR_An_TC  (1ull << (63-54)) /* Page Table secondary hash */
+#define CXL_PSL_SR_An_US  (1ull << (63-56)) /* User state,    GA1: X */
+#define CXL_PSL_SR_An_SC  (1ull << (63-58)) /* Segment Table secondary hash */
+#define CXL_PSL_SR_An_R   MSR_DR            /* Relocate,      GA1: 1 */
+#define CXL_PSL_SR_An_MP  (1ull << (63-62)) /* Master Process */
+#define CXL_PSL_SR_An_LE  (1ull << (63-63)) /* Little Endian */
+
+/****** CXL_PSL_ID_An ****************************************************/
+#define CXL_PSL_ID_An_F	(1ull << (63-31))
+#define CXL_PSL_ID_An_L	(1ull << (63-30))
+
+/****** CXL_PSL_SERR_An ****************************************************/
+#define CXL_PSL_SERR_An_afuto	(1ull << (63-0))
+#define CXL_PSL_SERR_An_afudis	(1ull << (63-1))
+#define CXL_PSL_SERR_An_afuov	(1ull << (63-2))
+#define CXL_PSL_SERR_An_badsrc	(1ull << (63-3))
+#define CXL_PSL_SERR_An_badctx	(1ull << (63-4))
+#define CXL_PSL_SERR_An_llcmdis	(1ull << (63-5))
+#define CXL_PSL_SERR_An_llcmdto	(1ull << (63-6))
+#define CXL_PSL_SERR_An_afupar	(1ull << (63-7))
+#define CXL_PSL_SERR_An_afudup	(1ull << (63-8))
+#define CXL_PSL_SERR_An_IRQS	( \
+	CXL_PSL_SERR_An_afuto | CXL_PSL_SERR_An_afudis | CXL_PSL_SERR_An_afuov | \
+	CXL_PSL_SERR_An_badsrc | CXL_PSL_SERR_An_badctx | CXL_PSL_SERR_An_llcmdis | \
+	CXL_PSL_SERR_An_llcmdto | CXL_PSL_SERR_An_afupar | CXL_PSL_SERR_An_afudup)
+#define CXL_PSL_SERR_An_afuto_mask	(1ull << (63-32))
+#define CXL_PSL_SERR_An_afudis_mask	(1ull << (63-33))
+#define CXL_PSL_SERR_An_afuov_mask	(1ull << (63-34))
+#define CXL_PSL_SERR_An_badsrc_mask	(1ull << (63-35))
+#define CXL_PSL_SERR_An_badctx_mask	(1ull << (63-36))
+#define CXL_PSL_SERR_An_llcmdis_mask	(1ull << (63-37))
+#define CXL_PSL_SERR_An_llcmdto_mask	(1ull << (63-38))
+#define CXL_PSL_SERR_An_afupar_mask	(1ull << (63-39))
+#define CXL_PSL_SERR_An_afudup_mask	(1ull << (63-40))
+#define CXL_PSL_SERR_An_IRQ_MASKS	( \
+	CXL_PSL_SERR_An_afuto_mask | CXL_PSL_SERR_An_afudis_mask | CXL_PSL_SERR_An_afuov_mask | \
+	CXL_PSL_SERR_An_badsrc_mask | CXL_PSL_SERR_An_badctx_mask | CXL_PSL_SERR_An_llcmdis_mask | \
+	CXL_PSL_SERR_An_llcmdto_mask | CXL_PSL_SERR_An_afupar_mask | CXL_PSL_SERR_An_afudup_mask)
+
+#define CXL_PSL_SERR_An_AE	(1ull << (63-30))
+
+/****** CXL_PSL_SCNTL_An ****************************************************/
+#define CXL_PSL_SCNTL_An_CR          (0x1ull << (63-15))
+/* Programming Modes: */
+#define CXL_PSL_SCNTL_An_PM_MASK     (0xffffull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_Shared   (0x0000ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_OS       (0x0001ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_Process  (0x0002ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_AFU      (0x0004ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_AFU_PBT  (0x0104ull << (63-31))
+/* Purge Status (ro) */
+#define CXL_PSL_SCNTL_An_Ps_MASK     (0x3ull << (63-39))
+#define CXL_PSL_SCNTL_An_Ps_Pending  (0x1ull << (63-39))
+#define CXL_PSL_SCNTL_An_Ps_Complete (0x3ull << (63-39))
+/* Purge */
+#define CXL_PSL_SCNTL_An_Pc          (0x1ull << (63-48))
+/* Suspend Status (ro) */
+#define CXL_PSL_SCNTL_An_Ss_MASK     (0x3ull << (63-55))
+#define CXL_PSL_SCNTL_An_Ss_Pending  (0x1ull << (63-55))
+#define CXL_PSL_SCNTL_An_Ss_Complete (0x3ull << (63-55))
+/* Suspend Control */
+#define CXL_PSL_SCNTL_An_Sc          (0x1ull << (63-63))
+
+/* AFU Slice Enable Status (ro) */
+#define CXL_AFU_Cntl_An_ES_MASK     (0x7ull << (63-2))
+#define CXL_AFU_Cntl_An_ES_Disabled (0x0ull << (63-2))
+#define CXL_AFU_Cntl_An_ES_Enabled  (0x4ull << (63-2))
+/* AFU Slice Enable */
+#define CXL_AFU_Cntl_An_E           (0x1ull << (63-3))
+/* AFU Slice Reset status (ro) */
+#define CXL_AFU_Cntl_An_RS_MASK     (0x3ull << (63-5))
+#define CXL_AFU_Cntl_An_RS_Pending  (0x1ull << (63-5))
+#define CXL_AFU_Cntl_An_RS_Complete (0x2ull << (63-5))
+/* AFU Slice Reset */
+#define CXL_AFU_Cntl_An_RA          (0x1ull << (63-7))
+
+/****** CXL_SSTP0/1_An ******************************************************/
+/* These top bits are for the segment that CONTAINS the segment table */
+#define CXL_SSTP0_An_B_SHIFT    SLB_VSID_SSIZE_SHIFT
+#define CXL_SSTP0_An_KS             (1ull << (63-2))
+#define CXL_SSTP0_An_KP             (1ull << (63-3))
+#define CXL_SSTP0_An_N              (1ull << (63-4))
+#define CXL_SSTP0_An_L              (1ull << (63-5))
+#define CXL_SSTP0_An_C              (1ull << (63-6))
+#define CXL_SSTP0_An_TA             (1ull << (63-7))
+#define CXL_SSTP0_An_LP_SHIFT                (63-9)  /* 2 Bits */
+/* And finally, the virtual address & size of the segment table: */
+#define CXL_SSTP0_An_SegTableSize_SHIFT      (63-31) /* 12 Bits */
+#define CXL_SSTP0_An_SegTableSize_MASK \
+	(((1ull << 12) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT)
+#define CXL_SSTP0_An_STVA_U_MASK   ((1ull << (63-49))-1)
+#define CXL_SSTP1_An_STVA_L_MASK (~((1ull << (63-55))-1))
+#define CXL_SSTP1_An_V              (1ull << (63-63))
+
+/****** CXL_PSL_SLBIE_[An] - CAIA 1 **************************************************/
+/* write: */
+#define CXL_SLBIE_C        PPC_BIT(36)         /* Class */
+#define CXL_SLBIE_SS       PPC_BITMASK(37, 38) /* Segment Size */
+#define CXL_SLBIE_SS_SHIFT PPC_BITLSHIFT(38)
+#define CXL_SLBIE_TA       PPC_BIT(38)         /* Tags Active */
+/* read: */
+#define CXL_SLBIE_MAX      PPC_BITMASK(24, 31)
+#define CXL_SLBIE_PENDING  PPC_BITMASK(56, 63)
+
+/****** Common to all CXL_TLBIA/SLBIA_[An] - CAIA 1 **********************************/
+#define CXL_TLB_SLB_P          (1ull) /* Pending (read) */
+
+/****** Common to all CXL_TLB/SLB_IA/IE_[An] registers - CAIA 1 **********************/
+#define CXL_TLB_SLB_IQ_ALL     (0ull) /* Inv qualifier */
+#define CXL_TLB_SLB_IQ_LPID    (1ull) /* Inv qualifier */
+#define CXL_TLB_SLB_IQ_LPIDPID (3ull) /* Inv qualifier */
+
+/****** CXL_PSL_AFUSEL ******************************************************/
+#define CXL_PSL_AFUSEL_A (1ull << (63-55)) /* Adapter wide invalidates affect all AFUs */
+
+/****** CXL_PSL_DSISR_An - CAIA 1 ****************************************************/
+#define CXL_PSL_DSISR_An_DS (1ull << (63-0))  /* Segment not found */
+#define CXL_PSL_DSISR_An_DM (1ull << (63-1))  /* PTE not found (See also: M) or protection fault */
+#define CXL_PSL_DSISR_An_ST (1ull << (63-2))  /* Segment Table PTE not found */
+#define CXL_PSL_DSISR_An_UR (1ull << (63-3))  /* AURP PTE not found */
+#define CXL_PSL_DSISR_TRANS (CXL_PSL_DSISR_An_DS | CXL_PSL_DSISR_An_DM | CXL_PSL_DSISR_An_ST | CXL_PSL_DSISR_An_UR)
+#define CXL_PSL_DSISR_An_PE (1ull << (63-4))  /* PSL Error (implementation specific) */
+#define CXL_PSL_DSISR_An_AE (1ull << (63-5))  /* AFU Error */
+#define CXL_PSL_DSISR_An_OC (1ull << (63-6))  /* OS Context Warning */
+#define CXL_PSL_DSISR_PENDING (CXL_PSL_DSISR_TRANS | CXL_PSL_DSISR_An_PE | CXL_PSL_DSISR_An_AE | CXL_PSL_DSISR_An_OC)
+/* NOTE: Bits 32:63 are undefined if DSISR[DS] = 1 */
+#define CXL_PSL_DSISR_An_M  DSISR_NOHPTE      /* PTE not found */
+#define CXL_PSL_DSISR_An_P  DSISR_PROTFAULT   /* Storage protection violation */
+#define CXL_PSL_DSISR_An_A  (1ull << (63-37)) /* AFU lock access to write through or cache inhibited storage */
+#define CXL_PSL_DSISR_An_S  DSISR_ISSTORE     /* Access was afu_wr or afu_zero */
+#define CXL_PSL_DSISR_An_K  DSISR_KEYFAULT    /* Access not permitted by virtual page class key protection */
+
+/****** CXL_PSL_DSISR_An - CAIA 2 ****************************************************/
+#define CXL_PSL9_DSISR_An_TF (1ull << (63-3))  /* Translation fault */
+#define CXL_PSL9_DSISR_An_PE (1ull << (63-4))  /* PSL Error (implementation specific) */
+#define CXL_PSL9_DSISR_An_AE (1ull << (63-5))  /* AFU Error */
+#define CXL_PSL9_DSISR_An_OC (1ull << (63-6))  /* OS Context Warning */
+#define CXL_PSL9_DSISR_An_S (1ull << (63-38))  /* TF for a write operation */
+#define CXL_PSL9_DSISR_PENDING (CXL_PSL9_DSISR_An_TF | CXL_PSL9_DSISR_An_PE | CXL_PSL9_DSISR_An_AE | CXL_PSL9_DSISR_An_OC)
+/*
+ * NOTE: Bits 56:63 (Checkout Response Status) are valid when DSISR_An[TF] = 1
+ * Status (0:7) Encoding
+ */
+#define CXL_PSL9_DSISR_An_CO_MASK 0x00000000000000ffULL
+#define CXL_PSL9_DSISR_An_SF      0x0000000000000080ULL  /* Segment Fault                        0b10000000 */
+#define CXL_PSL9_DSISR_An_PF_SLR  0x0000000000000088ULL  /* PTE not found (Single Level Radix)   0b10001000 */
+#define CXL_PSL9_DSISR_An_PF_RGC  0x000000000000008CULL  /* PTE not found (Radix Guest (child))  0b10001100 */
+#define CXL_PSL9_DSISR_An_PF_RGP  0x0000000000000090ULL  /* PTE not found (Radix Guest (parent)) 0b10010000 */
+#define CXL_PSL9_DSISR_An_PF_HRH  0x0000000000000094ULL  /* PTE not found (HPT/Radix Host)       0b10010100 */
+#define CXL_PSL9_DSISR_An_PF_STEG 0x000000000000009CULL  /* PTE not found (STEG VA)              0b10011100 */
+#define CXL_PSL9_DSISR_An_URTCH   0x00000000000000B4ULL  /* Unsupported Radix Tree Configuration 0b10110100 */
+
+/****** CXL_PSL_TFC_An ******************************************************/
+#define CXL_PSL_TFC_An_A  (1ull << (63-28)) /* Acknowledge non-translation fault */
+#define CXL_PSL_TFC_An_C  (1ull << (63-29)) /* Continue (abort transaction) */
+#define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */
+#define CXL_PSL_TFC_An_R  (1ull << (63-31)) /* Restart PSL transaction */
+
+/****** CXL_PSL_DEBUG *****************************************************/
+#define CXL_PSL_DEBUG_CDC  (1ull << (63-27)) /* Coherent Data cache support */
+
+/****** CXL_XSL9_IERAT_ERAT - CAIA 2 **********************************/
+#define CXL_XSL9_IERAT_MLPID    (1ull << (63-0))  /* Match LPID */
+#define CXL_XSL9_IERAT_MPID     (1ull << (63-1))  /* Match PID */
+#define CXL_XSL9_IERAT_PRS      (1ull << (63-4))  /* PRS bit for Radix invalidations */
+#define CXL_XSL9_IERAT_INVR     (1ull << (63-3))  /* Invalidate Radix */
+#define CXL_XSL9_IERAT_IALL     (1ull << (63-8))  /* Invalidate All */
+#define CXL_XSL9_IERAT_IINPROG  (1ull << (63-63)) /* Invalidate in progress */
+
+/* cxl_process_element->software_status */
+#define CXL_PE_SOFTWARE_STATE_V (1ul << (31 -  0)) /* Valid */
+#define CXL_PE_SOFTWARE_STATE_C (1ul << (31 - 29)) /* Complete */
+#define CXL_PE_SOFTWARE_STATE_S (1ul << (31 - 30)) /* Suspend */
+#define CXL_PE_SOFTWARE_STATE_T (1ul << (31 - 31)) /* Terminate */
+
+/****** CXL_PSL_RXCTL_An (Implementation Specific) **************************
+ * Controls AFU Hang Pulse, which sets the timeout for the AFU to respond to
+ * the PSL for any response (except MMIO). Timeouts will occur between 1x to 2x
+ * of the hang pulse frequency.
+ */
+#define CXL_PSL_RXCTL_AFUHP_4S      0x7000000000000000ULL
+
+/* SPA->sw_command_status */
+#define CXL_SPA_SW_CMD_MASK         0xffff000000000000ULL
+#define CXL_SPA_SW_CMD_TERMINATE    0x0001000000000000ULL
+#define CXL_SPA_SW_CMD_REMOVE       0x0002000000000000ULL
+#define CXL_SPA_SW_CMD_SUSPEND      0x0003000000000000ULL
+#define CXL_SPA_SW_CMD_RESUME       0x0004000000000000ULL
+#define CXL_SPA_SW_CMD_ADD          0x0005000000000000ULL
+#define CXL_SPA_SW_CMD_UPDATE       0x0006000000000000ULL
+#define CXL_SPA_SW_STATE_MASK       0x0000ffff00000000ULL
+#define CXL_SPA_SW_STATE_TERMINATED 0x0000000100000000ULL
+#define CXL_SPA_SW_STATE_REMOVED    0x0000000200000000ULL
+#define CXL_SPA_SW_STATE_SUSPENDED  0x0000000300000000ULL
+#define CXL_SPA_SW_STATE_RESUMED    0x0000000400000000ULL
+#define CXL_SPA_SW_STATE_ADDED      0x0000000500000000ULL
+#define CXL_SPA_SW_STATE_UPDATED    0x0000000600000000ULL
+#define CXL_SPA_SW_PSL_ID_MASK      0x00000000ffff0000ULL
+#define CXL_SPA_SW_LINK_MASK        0x000000000000ffffULL
+
+#define CXL_MAX_SLICES 4
+#define MAX_AFU_MMIO_REGS 3
+
+#define CXL_MODE_TIME_SLICED 0x4
+#define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED)
+
+#define CXL_DEV_MINORS 13   /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */
+#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS)
+#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS)
+
+#define CXL_PSL9_TRACEID_MAX 0xAU
+#define CXL_PSL9_TRACESTATE_FIN 0x3U
+
+enum cxl_context_status {
+	CLOSED,
+	OPENED,
+	STARTED
+};
+
+enum prefault_modes {
+	CXL_PREFAULT_NONE,
+	CXL_PREFAULT_WED,
+	CXL_PREFAULT_ALL,
+};
+
+enum cxl_attrs {
+	CXL_ADAPTER_ATTRS,
+	CXL_AFU_MASTER_ATTRS,
+	CXL_AFU_ATTRS,
+};
+
+struct cxl_sste {
+	__be64 esid_data;
+	__be64 vsid_data;
+};
+
+#define to_cxl_adapter(d) container_of(d, struct cxl, dev)
+#define to_cxl_afu(d) container_of(d, struct cxl_afu, dev)
+
+struct cxl_afu_native {
+	void __iomem *p1n_mmio;
+	void __iomem *afu_desc_mmio;
+	irq_hw_number_t psl_hwirq;
+	unsigned int psl_virq;
+	struct mutex spa_mutex;
+	/*
+	 * Only the first part of the SPA is used for the process element
+	 * linked list. The only other part that software needs to worry about
+	 * is sw_command_status, which we store a separate pointer to.
+	 * Everything else in the SPA is only used by hardware
+	 */
+	struct cxl_process_element *spa;
+	__be64 *sw_command_status;
+	unsigned int spa_size;
+	int spa_order;
+	int spa_max_procs;
+	u64 pp_offset;
+};
+
+struct cxl_afu_guest {
+	struct cxl_afu *parent;
+	u64 handle;
+	phys_addr_t p2n_phys;
+	u64 p2n_size;
+	int max_ints;
+	bool handle_err;
+	struct delayed_work work_err;
+	int previous_state;
+};
+
+struct cxl_afu {
+	struct cxl_afu_native *native;
+	struct cxl_afu_guest *guest;
+	irq_hw_number_t serr_hwirq;
+	unsigned int serr_virq;
+	char *psl_irq_name;
+	char *err_irq_name;
+	void __iomem *p2n_mmio;
+	phys_addr_t psn_phys;
+	u64 pp_size;
+
+	struct cxl *adapter;
+	struct device dev;
+	struct cdev afu_cdev_s, afu_cdev_m, afu_cdev_d;
+	struct device *chardev_s, *chardev_m, *chardev_d;
+	struct idr contexts_idr;
+	struct dentry *debugfs;
+	struct mutex contexts_lock;
+	spinlock_t afu_cntl_lock;
+
+	/* -1: AFU deconfigured/locked, >= 0: number of readers */
+	atomic_t configured_state;
+
+	/* AFU error buffer fields and bin attribute for sysfs */
+	u64 eb_len, eb_offset;
+	struct bin_attribute attr_eb;
+
+	/* pointer to the vphb */
+	struct pci_controller *phb;
+
+	int pp_irqs;
+	int irqs_max;
+	int num_procs;
+	int max_procs_virtualised;
+	int slice;
+	int modes_supported;
+	int current_mode;
+	int crs_num;
+	u64 crs_len;
+	u64 crs_offset;
+	struct list_head crs;
+	enum prefault_modes prefault_mode;
+	bool psa;
+	bool pp_psa;
+	bool enabled;
+};
+
+
+struct cxl_irq_name {
+	struct list_head list;
+	char *name;
+};
+
+struct irq_avail {
+	irq_hw_number_t offset;
+	irq_hw_number_t range;
+	unsigned long   *bitmap;
+};
+
+/*
+ * This is a cxl context.  If the PSL is in dedicated mode, there will be one
+ * of these per AFU.  If in AFU directed there can be lots of these.
+ */
+struct cxl_context {
+	struct cxl_afu *afu;
+
+	/* Problem state MMIO */
+	phys_addr_t psn_phys;
+	u64 psn_size;
+
+	/* Used to unmap any mmaps when force detaching */
+	struct address_space *mapping;
+	struct mutex mapping_lock;
+	struct page *ff_page;
+	bool mmio_err_ff;
+	bool kernelapi;
+
+	spinlock_t sste_lock; /* Protects segment table entries */
+	struct cxl_sste *sstp;
+	u64 sstp0, sstp1;
+	unsigned int sst_size, sst_lru;
+
+	wait_queue_head_t wq;
+	/* use mm context associated with this pid for ds faults */
+	struct pid *pid;
+	spinlock_t lock; /* Protects pending_irq_mask, pending_fault and fault_addr */
+	/* Only used in PR mode */
+	u64 process_token;
+
+	/* driver private data */
+	void *priv;
+
+	unsigned long *irq_bitmap; /* Accessed from IRQ context */
+	struct cxl_irq_ranges irqs;
+	struct list_head irq_names;
+	u64 fault_addr;
+	u64 fault_dsisr;
+	u64 afu_err;
+
+	/*
+	 * This status and it's lock pretects start and detach context
+	 * from racing.  It also prevents detach from racing with
+	 * itself
+	 */
+	enum cxl_context_status status;
+	struct mutex status_mutex;
+
+
+	/* XXX: Is it possible to need multiple work items at once? */
+	struct work_struct fault_work;
+	u64 dsisr;
+	u64 dar;
+
+	struct cxl_process_element *elem;
+
+	/*
+	 * pe is the process element handle, assigned by this driver when the
+	 * context is initialized.
+	 *
+	 * external_pe is the PE shown outside of cxl.
+	 * On bare-metal, pe=external_pe, because we decide what the handle is.
+	 * In a guest, we only find out about the pe used by pHyp when the
+	 * context is attached, and that's the value we want to report outside
+	 * of cxl.
+	 */
+	int pe;
+	int external_pe;
+
+	u32 irq_count;
+	bool pe_inserted;
+	bool master;
+	bool kernel;
+	bool pending_irq;
+	bool pending_fault;
+	bool pending_afu_err;
+
+	/* Used by AFU drivers for driver specific event delivery */
+	struct cxl_afu_driver_ops *afu_driver_ops;
+	atomic_t afu_driver_events;
+
+	struct rcu_head rcu;
+
+	struct mm_struct *mm;
+
+	u16 tidr;
+	bool assign_tidr;
+};
+
+struct cxl_irq_info;
+
+struct cxl_service_layer_ops {
+	int (*adapter_regs_init)(struct cxl *adapter, struct pci_dev *dev);
+	int (*invalidate_all)(struct cxl *adapter);
+	int (*afu_regs_init)(struct cxl_afu *afu);
+	int (*sanitise_afu_regs)(struct cxl_afu *afu);
+	int (*register_serr_irq)(struct cxl_afu *afu);
+	void (*release_serr_irq)(struct cxl_afu *afu);
+	irqreturn_t (*handle_interrupt)(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info);
+	irqreturn_t (*fail_irq)(struct cxl_afu *afu, struct cxl_irq_info *irq_info);
+	int (*activate_dedicated_process)(struct cxl_afu *afu);
+	int (*attach_afu_directed)(struct cxl_context *ctx, u64 wed, u64 amr);
+	int (*attach_dedicated_process)(struct cxl_context *ctx, u64 wed, u64 amr);
+	void (*update_dedicated_ivtes)(struct cxl_context *ctx);
+	void (*debugfs_add_adapter_regs)(struct cxl *adapter, struct dentry *dir);
+	void (*debugfs_add_afu_regs)(struct cxl_afu *afu, struct dentry *dir);
+	void (*psl_irq_dump_registers)(struct cxl_context *ctx);
+	void (*err_irq_dump_registers)(struct cxl *adapter);
+	void (*debugfs_stop_trace)(struct cxl *adapter);
+	void (*write_timebase_ctrl)(struct cxl *adapter);
+	u64 (*timebase_read)(struct cxl *adapter);
+	int capi_mode;
+	bool needs_reset_before_disable;
+};
+
+struct cxl_native {
+	u64 afu_desc_off;
+	u64 afu_desc_size;
+	void __iomem *p1_mmio;
+	void __iomem *p2_mmio;
+	irq_hw_number_t err_hwirq;
+	unsigned int err_virq;
+	u64 ps_off;
+	bool no_data_cache; /* set if no data cache on the card */
+	const struct cxl_service_layer_ops *sl_ops;
+};
+
+struct cxl_guest {
+	struct platform_device *pdev;
+	int irq_nranges;
+	struct cdev cdev;
+	irq_hw_number_t irq_base_offset;
+	struct irq_avail *irq_avail;
+	spinlock_t irq_alloc_lock;
+	u64 handle;
+	char *status;
+	u16 vendor;
+	u16 device;
+	u16 subsystem_vendor;
+	u16 subsystem;
+};
+
+struct cxl {
+	struct cxl_native *native;
+	struct cxl_guest *guest;
+	spinlock_t afu_list_lock;
+	struct cxl_afu *afu[CXL_MAX_SLICES];
+	struct device dev;
+	struct dentry *trace;
+	struct dentry *psl_err_chk;
+	struct dentry *debugfs;
+	char *irq_name;
+	struct bin_attribute cxl_attr;
+	int adapter_num;
+	int user_irqs;
+	u64 ps_size;
+	u16 psl_rev;
+	u16 base_image;
+	u8 vsec_status;
+	u8 caia_major;
+	u8 caia_minor;
+	u8 slices;
+	bool user_image_loaded;
+	bool perst_loads_image;
+	bool perst_select_user;
+	bool perst_same_image;
+	bool psl_timebase_synced;
+	bool tunneled_ops_supported;
+
+	/*
+	 * number of contexts mapped on to this card. Possible values are:
+	 * >0: Number of contexts mapped and new one can be mapped.
+	 *  0: No active contexts and new ones can be mapped.
+	 * -1: No contexts mapped and new ones cannot be mapped.
+	 */
+	atomic_t contexts_num;
+};
+
+int cxl_pci_alloc_one_irq(struct cxl *adapter);
+void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq);
+int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num);
+void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter);
+int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq);
+int cxl_update_image_control(struct cxl *adapter);
+int cxl_pci_reset(struct cxl *adapter);
+void cxl_pci_release_afu(struct device *dev);
+ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len);
+
+/* common == phyp + powernv - CAIA 1&2 */
+struct cxl_process_element_common {
+	__be32 tid;
+	__be32 pid;
+	__be64 csrp;
+	union {
+		struct {
+			__be64 aurp0;
+			__be64 aurp1;
+			__be64 sstp0;
+			__be64 sstp1;
+		} psl8;  /* CAIA 1 */
+		struct {
+			u8     reserved2[8];
+			u8     reserved3[8];
+			u8     reserved4[8];
+			u8     reserved5[8];
+		} psl9;  /* CAIA 2 */
+	} u;
+	__be64 amr;
+	u8     reserved6[4];
+	__be64 wed;
+} __packed;
+
+/* just powernv - CAIA 1&2 */
+struct cxl_process_element {
+	__be64 sr;
+	__be64 SPOffset;
+	union {
+		__be64 sdr;          /* CAIA 1 */
+		u8     reserved1[8]; /* CAIA 2 */
+	} u;
+	__be64 haurp;
+	__be32 ctxtime;
+	__be16 ivte_offsets[4];
+	__be16 ivte_ranges[4];
+	__be32 lpid;
+	struct cxl_process_element_common common;
+	__be32 software_state;
+} __packed;
+
+static inline bool cxl_adapter_link_ok(struct cxl *cxl, struct cxl_afu *afu)
+{
+	struct pci_dev *pdev;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		pdev = to_pci_dev(cxl->dev.parent);
+		return !pci_channel_offline(pdev);
+	}
+	return true;
+}
+
+static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg)
+{
+	WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
+	return cxl->native->p1_mmio + cxl_reg_off(reg);
+}
+
+static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val)
+{
+	if (likely(cxl_adapter_link_ok(cxl, NULL)))
+		out_be64(_cxl_p1_addr(cxl, reg), val);
+}
+
+static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg)
+{
+	if (likely(cxl_adapter_link_ok(cxl, NULL)))
+		return in_be64(_cxl_p1_addr(cxl, reg));
+	else
+		return ~0ULL;
+}
+
+static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg)
+{
+	WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
+	return afu->native->p1n_mmio + cxl_reg_off(reg);
+}
+
+static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
+		out_be64(_cxl_p1n_addr(afu, reg), val);
+}
+
+static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
+		return in_be64(_cxl_p1n_addr(afu, reg));
+	else
+		return ~0ULL;
+}
+
+static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg)
+{
+	return afu->p2n_mmio + cxl_reg_off(reg);
+}
+
+static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
+		out_be64(_cxl_p2n_addr(afu, reg), val);
+}
+
+static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
+		return in_be64(_cxl_p2n_addr(afu, reg));
+	else
+		return ~0ULL;
+}
+
+static inline bool cxl_is_power8(void)
+{
+	if ((pvr_version_is(PVR_POWER8E)) ||
+	    (pvr_version_is(PVR_POWER8NVL)) ||
+	    (pvr_version_is(PVR_POWER8)))
+		return true;
+	return false;
+}
+
+static inline bool cxl_is_power9(void)
+{
+	if (pvr_version_is(PVR_POWER9))
+		return true;
+	return false;
+}
+
+ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+				loff_t off, size_t count);
+
+
+struct cxl_calls {
+	void (*cxl_slbia)(struct mm_struct *mm);
+	struct module *owner;
+};
+int register_cxl_calls(struct cxl_calls *calls);
+void unregister_cxl_calls(struct cxl_calls *calls);
+int cxl_update_properties(struct device_node *dn, struct property *new_prop);
+
+void cxl_remove_adapter_nr(struct cxl *adapter);
+
+void cxl_release_spa(struct cxl_afu *afu);
+
+dev_t cxl_get_dev(void);
+int cxl_file_init(void);
+void cxl_file_exit(void);
+int cxl_register_adapter(struct cxl *adapter);
+int cxl_register_afu(struct cxl_afu *afu);
+int cxl_chardev_d_afu_add(struct cxl_afu *afu);
+int cxl_chardev_m_afu_add(struct cxl_afu *afu);
+int cxl_chardev_s_afu_add(struct cxl_afu *afu);
+void cxl_chardev_afu_remove(struct cxl_afu *afu);
+
+void cxl_context_detach_all(struct cxl_afu *afu);
+void cxl_context_free(struct cxl_context *ctx);
+void cxl_context_detach(struct cxl_context *ctx);
+
+int cxl_sysfs_adapter_add(struct cxl *adapter);
+void cxl_sysfs_adapter_remove(struct cxl *adapter);
+int cxl_sysfs_afu_add(struct cxl_afu *afu);
+void cxl_sysfs_afu_remove(struct cxl_afu *afu);
+int cxl_sysfs_afu_m_add(struct cxl_afu *afu);
+void cxl_sysfs_afu_m_remove(struct cxl_afu *afu);
+
+struct cxl *cxl_alloc_adapter(void);
+struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice);
+int cxl_afu_select_best_mode(struct cxl_afu *afu);
+
+int cxl_native_register_psl_irq(struct cxl_afu *afu);
+void cxl_native_release_psl_irq(struct cxl_afu *afu);
+int cxl_native_register_psl_err_irq(struct cxl *adapter);
+void cxl_native_release_psl_err_irq(struct cxl *adapter);
+int cxl_native_register_serr_irq(struct cxl_afu *afu);
+void cxl_native_release_serr_irq(struct cxl_afu *afu);
+int afu_register_irqs(struct cxl_context *ctx, u32 count);
+void afu_release_irqs(struct cxl_context *ctx, void *cookie);
+void afu_irq_name_free(struct cxl_context *ctx);
+
+int cxl_attach_afu_directed_psl9(struct cxl_context *ctx, u64 wed, u64 amr);
+int cxl_attach_afu_directed_psl8(struct cxl_context *ctx, u64 wed, u64 amr);
+int cxl_activate_dedicated_process_psl9(struct cxl_afu *afu);
+int cxl_activate_dedicated_process_psl8(struct cxl_afu *afu);
+int cxl_attach_dedicated_process_psl9(struct cxl_context *ctx, u64 wed, u64 amr);
+int cxl_attach_dedicated_process_psl8(struct cxl_context *ctx, u64 wed, u64 amr);
+void cxl_update_dedicated_ivtes_psl9(struct cxl_context *ctx);
+void cxl_update_dedicated_ivtes_psl8(struct cxl_context *ctx);
+
+#ifdef CONFIG_DEBUG_FS
+
+int cxl_debugfs_init(void);
+void cxl_debugfs_exit(void);
+int cxl_debugfs_adapter_add(struct cxl *adapter);
+void cxl_debugfs_adapter_remove(struct cxl *adapter);
+int cxl_debugfs_afu_add(struct cxl_afu *afu);
+void cxl_debugfs_afu_remove(struct cxl_afu *afu);
+void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir);
+void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir);
+void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir);
+void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir);
+
+#else /* CONFIG_DEBUG_FS */
+
+static inline int __init cxl_debugfs_init(void)
+{
+	return 0;
+}
+
+static inline void cxl_debugfs_exit(void)
+{
+}
+
+static inline int cxl_debugfs_adapter_add(struct cxl *adapter)
+{
+	return 0;
+}
+
+static inline void cxl_debugfs_adapter_remove(struct cxl *adapter)
+{
+}
+
+static inline int cxl_debugfs_afu_add(struct cxl_afu *afu)
+{
+	return 0;
+}
+
+static inline void cxl_debugfs_afu_remove(struct cxl_afu *afu)
+{
+}
+
+static inline void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter,
+						    struct dentry *dir)
+{
+}
+
+static inline void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter,
+						    struct dentry *dir)
+{
+}
+
+static inline void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir)
+{
+}
+
+static inline void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+void cxl_handle_fault(struct work_struct *work);
+void cxl_prefault(struct cxl_context *ctx, u64 wed);
+int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar);
+
+struct cxl *get_cxl_adapter(int num);
+int cxl_alloc_sst(struct cxl_context *ctx);
+void cxl_dump_debug_buffer(void *addr, size_t size);
+
+void init_cxl_native(void);
+
+struct cxl_context *cxl_context_alloc(void);
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master);
+void cxl_context_set_mapping(struct cxl_context *ctx,
+			struct address_space *mapping);
+void cxl_context_free(struct cxl_context *ctx);
+int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
+unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
+			 irq_handler_t handler, void *cookie, const char *name);
+void cxl_unmap_irq(unsigned int virq, void *cookie);
+int __detach_context(struct cxl_context *ctx);
+
+/*
+ * This must match the layout of the H_COLLECT_CA_INT_INFO retbuf defined
+ * in PAPR.
+ * Field pid_tid is now 'reserved' because it's no more used on bare-metal.
+ * On a guest environment, PSL_PID_An is located on the upper 32 bits and
+ * PSL_TID_An register in the lower 32 bits.
+ */
+struct cxl_irq_info {
+	u64 dsisr;
+	u64 dar;
+	u64 dsr;
+	u64 reserved;
+	u64 afu_err;
+	u64 errstat;
+	u64 proc_handle;
+	u64 padding[2]; /* to match the expected retbuf size for plpar_hcall9 */
+};
+
+void cxl_assign_psn_space(struct cxl_context *ctx);
+int cxl_invalidate_all_psl9(struct cxl *adapter);
+int cxl_invalidate_all_psl8(struct cxl *adapter);
+irqreturn_t cxl_irq_psl9(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info);
+irqreturn_t cxl_irq_psl8(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info);
+irqreturn_t cxl_fail_irq_psl(struct cxl_afu *afu, struct cxl_irq_info *irq_info);
+int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler,
+			void *cookie, irq_hw_number_t *dest_hwirq,
+			unsigned int *dest_virq, const char *name);
+
+int cxl_check_error(struct cxl_afu *afu);
+int cxl_afu_slbia(struct cxl_afu *afu);
+int cxl_data_cache_flush(struct cxl *adapter);
+int cxl_afu_disable(struct cxl_afu *afu);
+int cxl_psl_purge(struct cxl_afu *afu);
+int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
+			  u32 *phb_index, u64 *capp_unit_id);
+int cxl_slot_is_switched(struct pci_dev *dev);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
+u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
+
+void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
+void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx);
+void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter);
+void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter);
+int cxl_pci_vphb_add(struct cxl_afu *afu);
+void cxl_pci_vphb_remove(struct cxl_afu *afu);
+void cxl_release_mapping(struct cxl_context *ctx);
+
+extern struct pci_driver cxl_pci_driver;
+extern struct platform_driver cxl_of_driver;
+int afu_allocate_irqs(struct cxl_context *ctx, u32 count);
+
+int afu_open(struct inode *inode, struct file *file);
+int afu_release(struct inode *inode, struct file *file);
+long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int afu_mmap(struct file *file, struct vm_area_struct *vm);
+__poll_t afu_poll(struct file *file, struct poll_table_struct *poll);
+ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off);
+extern const struct file_operations afu_fops;
+
+struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *dev);
+void cxl_guest_remove_adapter(struct cxl *adapter);
+int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np);
+int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np);
+ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len);
+ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len);
+int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np);
+void cxl_guest_remove_afu(struct cxl_afu *afu);
+int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np);
+int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *afu_np);
+int cxl_guest_add_chardev(struct cxl *adapter);
+void cxl_guest_remove_chardev(struct cxl *adapter);
+void cxl_guest_reload_module(struct cxl *adapter);
+int cxl_of_probe(struct platform_device *pdev);
+
+struct cxl_backend_ops {
+	struct module *module;
+	int (*adapter_reset)(struct cxl *adapter);
+	int (*alloc_one_irq)(struct cxl *adapter);
+	void (*release_one_irq)(struct cxl *adapter, int hwirq);
+	int (*alloc_irq_ranges)(struct cxl_irq_ranges *irqs,
+				struct cxl *adapter, unsigned int num);
+	void (*release_irq_ranges)(struct cxl_irq_ranges *irqs,
+				struct cxl *adapter);
+	int (*setup_irq)(struct cxl *adapter, unsigned int hwirq,
+			unsigned int virq);
+	irqreturn_t (*handle_psl_slice_error)(struct cxl_context *ctx,
+					u64 dsisr, u64 errstat);
+	irqreturn_t (*psl_interrupt)(int irq, void *data);
+	int (*ack_irq)(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask);
+	void (*irq_wait)(struct cxl_context *ctx);
+	int (*attach_process)(struct cxl_context *ctx, bool kernel,
+			u64 wed, u64 amr);
+	int (*detach_process)(struct cxl_context *ctx);
+	void (*update_ivtes)(struct cxl_context *ctx);
+	bool (*support_attributes)(const char *attr_name, enum cxl_attrs type);
+	bool (*link_ok)(struct cxl *cxl, struct cxl_afu *afu);
+	void (*release_afu)(struct device *dev);
+	ssize_t (*afu_read_err_buffer)(struct cxl_afu *afu, char *buf,
+				loff_t off, size_t count);
+	int (*afu_check_and_enable)(struct cxl_afu *afu);
+	int (*afu_activate_mode)(struct cxl_afu *afu, int mode);
+	int (*afu_deactivate_mode)(struct cxl_afu *afu, int mode);
+	int (*afu_reset)(struct cxl_afu *afu);
+	int (*afu_cr_read8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 *val);
+	int (*afu_cr_read16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 *val);
+	int (*afu_cr_read32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 *val);
+	int (*afu_cr_read64)(struct cxl_afu *afu, int cr_idx, u64 offset, u64 *val);
+	int (*afu_cr_write8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 val);
+	int (*afu_cr_write16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 val);
+	int (*afu_cr_write32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 val);
+	ssize_t (*read_adapter_vpd)(struct cxl *adapter, void *buf, size_t count);
+};
+extern const struct cxl_backend_ops cxl_native_ops;
+extern const struct cxl_backend_ops cxl_guest_ops;
+extern const struct cxl_backend_ops *cxl_ops;
+
+/* check if the given pci_dev is on the the cxl vphb bus */
+bool cxl_pci_is_vphb_device(struct pci_dev *dev);
+
+/* decode AFU error bits in the PSL register PSL_SERR_An */
+void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
+
+/*
+ * Increments the number of attached contexts on an adapter.
+ * In case an adapter_context_lock is taken the return -EBUSY.
+ */
+int cxl_adapter_context_get(struct cxl *adapter);
+
+/* Decrements the number of attached contexts on an adapter */
+void cxl_adapter_context_put(struct cxl *adapter);
+
+/* If no active contexts then prevents contexts from being attached */
+int cxl_adapter_context_lock(struct cxl *adapter);
+
+/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */
+void cxl_adapter_context_unlock(struct cxl *adapter);
+
+/* Increases the reference count to "struct mm_struct" */
+void cxl_context_mm_count_get(struct cxl_context *ctx);
+
+/* Decrements the reference count to "struct mm_struct" */
+void cxl_context_mm_count_put(struct cxl_context *ctx);
+
+#endif
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
new file mode 100644
index 000000000..5a3f91255
--- /dev/null
+++ b/drivers/misc/cxl/cxllib.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
+#include <asm/pnv-pci.h>
+#include <misc/cxllib.h>
+
+#include "cxl.h"
+
+#define CXL_INVALID_DRA                 ~0ull
+#define CXL_DUMMY_READ_SIZE             128
+#define CXL_DUMMY_READ_ALIGN            8
+#define CXL_CAPI_WINDOW_START           0x2000000000000ull
+#define CXL_CAPI_WINDOW_LOG_SIZE        48
+#define CXL_XSL_CONFIG_CURRENT_VERSION  CXL_XSL_CONFIG_VERSION1
+
+
+bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
+{
+	int rc;
+	u32 phb_index;
+	u64 chip_id, capp_unit_id;
+
+	/* No flags currently supported */
+	if (flags)
+		return false;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return false;
+
+	if (!cxl_is_power9())
+		return false;
+
+	if (cxl_slot_is_switched(dev))
+		return false;
+
+	/* on p9, some pci slots are not connected to a CAPP unit */
+	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
+	if (rc)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
+
+static DEFINE_MUTEX(dra_mutex);
+static u64 dummy_read_addr = CXL_INVALID_DRA;
+
+static int allocate_dummy_read_buf(void)
+{
+	u64 buf, vaddr;
+	size_t buf_size;
+
+	/*
+	 * Dummy read buffer is 128-byte long, aligned on a
+	 * 256-byte boundary and we need the physical address.
+	 */
+	buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
+	buf = (u64) kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
+					(~0ull << CXL_DUMMY_READ_ALIGN);
+
+	WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
+		"Dummy read buffer alignment issue");
+	dummy_read_addr = virt_to_phys((void *) vaddr);
+	return 0;
+}
+
+int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
+{
+	int rc;
+	u32 phb_index;
+	u64 chip_id, capp_unit_id;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return -EINVAL;
+
+	mutex_lock(&dra_mutex);
+	if (dummy_read_addr == CXL_INVALID_DRA) {
+		rc = allocate_dummy_read_buf();
+		if (rc) {
+			mutex_unlock(&dra_mutex);
+			return rc;
+		}
+	}
+	mutex_unlock(&dra_mutex);
+
+	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
+	if (rc)
+		return rc;
+
+	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
+	if (rc)
+		return rc;
+
+	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
+	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
+	cfg->bar_addr = CXL_CAPI_WINDOW_START;
+	cfg->dra = dummy_read_addr;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
+
+int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
+			unsigned long flags)
+{
+	int rc = 0;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return -EINVAL;
+
+	switch (mode) {
+	case CXL_MODE_PCI:
+		/*
+		 * We currently don't support going back to PCI mode
+		 * However, we'll turn the invalidations off, so that
+		 * the firmware doesn't have to ack them and can do
+		 * things like reset, etc.. with no worries.
+		 * So always return EPERM (can't go back to PCI) or
+		 * EBUSY if we couldn't even turn off snooping
+		 */
+		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
+		if (rc)
+			rc = -EBUSY;
+		else
+			rc = -EPERM;
+		break;
+	case CXL_MODE_CXL:
+		/* DMA only supported on TVT1 for the time being */
+		if (flags != CXL_MODE_DMA_TVT1)
+			return -EINVAL;
+		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
+		if (rc)
+			return rc;
+		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
+		break;
+	default:
+		rc = -EINVAL;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
+
+/*
+ * When switching the PHB to capi mode, the TVT#1 entry for
+ * the Partitionable Endpoint is set in bypass mode, like
+ * in PCI mode.
+ * Configure the device dma to use TVT#1, which is done
+ * by calling dma_set_mask() with a mask large enough.
+ */
+int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
+{
+	int rc;
+
+	if (flags)
+		return -EINVAL;
+
+	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
+	return rc;
+}
+EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
+
+int cxllib_get_PE_attributes(struct task_struct *task,
+			     unsigned long translation_mode,
+			     struct cxllib_pe_attributes *attr)
+{
+	struct mm_struct *mm = NULL;
+
+	if (translation_mode != CXL_TRANSLATED_MODE &&
+		translation_mode != CXL_REAL_MODE)
+		return -EINVAL;
+
+	attr->sr = cxl_calculate_sr(false,
+				task == NULL,
+				translation_mode == CXL_REAL_MODE,
+				true);
+	attr->lpid = mfspr(SPRN_LPID);
+	if (task) {
+		mm = get_task_mm(task);
+		if (mm == NULL)
+			return -EINVAL;
+		/*
+		 * Caller is keeping a reference on mm_users for as long
+		 * as XSL uses the memory context
+		 */
+		attr->pid = mm->context.id;
+		mmput(mm);
+		attr->tid = task->thread.tidr;
+	} else {
+		attr->pid = 0;
+		attr->tid = 0;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
+
+static int get_vma_info(struct mm_struct *mm, u64 addr,
+			u64 *vma_start, u64 *vma_end,
+			unsigned long *page_size)
+{
+	struct vm_area_struct *vma = NULL;
+	int rc = 0;
+
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, addr);
+	if (!vma) {
+		rc = -EFAULT;
+		goto out;
+	}
+	*page_size = vma_kernel_pagesize(vma);
+	*vma_start = vma->vm_start;
+	*vma_end = vma->vm_end;
+out:
+	up_read(&mm->mmap_sem);
+	return rc;
+}
+
+int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
+{
+	int rc;
+	u64 dar, vma_start, vma_end;
+	unsigned long page_size;
+
+	if (mm == NULL)
+		return -EFAULT;
+
+	/*
+	 * The buffer we have to process can extend over several pages
+	 * and may also cover several VMAs.
+	 * We iterate over all the pages. The page size could vary
+	 * between VMAs.
+	 */
+	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
+	if (rc)
+		return rc;
+
+	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
+	     dar += page_size) {
+		if (dar < vma_start || dar >= vma_end) {
+			/*
+			 * We don't hold the mm->mmap_sem semaphore
+			 * while iterating, since the semaphore is
+			 * required by one of the lower-level page
+			 * fault processing functions and it could
+			 * create a deadlock.
+			 *
+			 * It means the VMAs can be altered between 2
+			 * loop iterations and we could theoretically
+			 * miss a page (however unlikely). But that's
+			 * not really a problem, as the driver will
+			 * retry access, get another page fault on the
+			 * missing page and call us again.
+			 */
+			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
+					&page_size);
+			if (rc)
+				return rc;
+		}
+
+		rc = cxl_handle_mm_fault(mm, flags, dar);
+		if (rc)
+			return -EFAULT;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_handle_fault);
diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c
new file mode 100644
index 000000000..a1921d815
--- /dev/null
+++ b/drivers/misc/cxl/debugfs.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "cxl.h"
+
+static struct dentry *cxl_debugfs;
+
+/* Helpers to export CXL mmaped IO registers via debugfs */
+static int debugfs_io_u64_get(void *data, u64 *val)
+{
+	*val = in_be64((u64 __iomem *)data);
+	return 0;
+}
+
+static int debugfs_io_u64_set(void *data, u64 val)
+{
+	out_be64((u64 __iomem *)data, val);
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set,
+			 "0x%016llx\n");
+
+static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode,
+					    struct dentry *parent, u64 __iomem *value)
+{
+	return debugfs_create_file_unsafe(name, mode, parent,
+					  (void __force *)value, &fops_io_x64);
+}
+
+void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir)
+{
+	debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR1));
+	debugfs_create_io_x64("fir_mask", 0400, dir,
+			      _cxl_p1_addr(adapter, CXL_PSL9_FIR_MASK));
+	debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR_CNTL));
+	debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_TRACECFG));
+	debugfs_create_io_x64("debug", 0600, dir,
+			      _cxl_p1_addr(adapter, CXL_PSL9_DEBUG));
+	debugfs_create_io_x64("xsl-debug", 0600, dir,
+			      _cxl_p1_addr(adapter, CXL_XSL9_DBG));
+}
+
+void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir)
+{
+	debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1));
+	debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2));
+	debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL));
+	debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE));
+}
+
+int cxl_debugfs_adapter_add(struct cxl *adapter)
+{
+	struct dentry *dir;
+	char buf[32];
+
+	if (!cxl_debugfs)
+		return -ENODEV;
+
+	snprintf(buf, 32, "card%i", adapter->adapter_num);
+	dir = debugfs_create_dir(buf, cxl_debugfs);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+	adapter->debugfs = dir;
+
+	debugfs_create_io_x64("err_ivte", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_ErrIVTE));
+
+	if (adapter->native->sl_ops->debugfs_add_adapter_regs)
+		adapter->native->sl_ops->debugfs_add_adapter_regs(adapter, dir);
+	return 0;
+}
+
+void cxl_debugfs_adapter_remove(struct cxl *adapter)
+{
+	debugfs_remove_recursive(adapter->debugfs);
+}
+
+void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir)
+{
+	debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An));
+}
+
+void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir)
+{
+	debugfs_create_io_x64("sstp0", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP0_An));
+	debugfs_create_io_x64("sstp1", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP1_An));
+
+	debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An));
+	debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An));
+	debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An));
+	debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE));
+}
+
+int cxl_debugfs_afu_add(struct cxl_afu *afu)
+{
+	struct dentry *dir;
+	char buf[32];
+
+	if (!afu->adapter->debugfs)
+		return -ENODEV;
+
+	snprintf(buf, 32, "psl%i.%i", afu->adapter->adapter_num, afu->slice);
+	dir = debugfs_create_dir(buf, afu->adapter->debugfs);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+	afu->debugfs = dir;
+
+	debugfs_create_io_x64("sr",         S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SR_An));
+	debugfs_create_io_x64("dsisr",      S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DSISR_An));
+	debugfs_create_io_x64("dar",        S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DAR_An));
+
+	debugfs_create_io_x64("err_status", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_ErrStat_An));
+
+	if (afu->adapter->native->sl_ops->debugfs_add_afu_regs)
+		afu->adapter->native->sl_ops->debugfs_add_afu_regs(afu, dir);
+
+	return 0;
+}
+
+void cxl_debugfs_afu_remove(struct cxl_afu *afu)
+{
+	debugfs_remove_recursive(afu->debugfs);
+}
+
+int __init cxl_debugfs_init(void)
+{
+	struct dentry *ent;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return 0;
+
+	ent = debugfs_create_dir("cxl", NULL);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+	cxl_debugfs = ent;
+
+	return 0;
+}
+
+void cxl_debugfs_exit(void)
+{
+	debugfs_remove_recursive(cxl_debugfs);
+}
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
new file mode 100644
index 000000000..dc7b34174
--- /dev/null
+++ b/drivers/misc/cxl/fault.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/workqueue.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/pid.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "cxl" "."
+#include <asm/current.h>
+#include <asm/copro.h>
+#include <asm/mmu.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb)
+{
+	return ((sste->vsid_data == cpu_to_be64(slb->vsid)) &&
+		(sste->esid_data == cpu_to_be64(slb->esid)));
+}
+
+/*
+ * This finds a free SSTE for the given SLB, or returns NULL if it's already in
+ * the segment table.
+ */
+static struct cxl_sste *find_free_sste(struct cxl_context *ctx,
+				       struct copro_slb *slb)
+{
+	struct cxl_sste *primary, *sste, *ret = NULL;
+	unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */
+	unsigned int entry;
+	unsigned int hash;
+
+	if (slb->vsid & SLB_VSID_B_1T)
+		hash = (slb->esid >> SID_SHIFT_1T) & mask;
+	else /* 256M */
+		hash = (slb->esid >> SID_SHIFT) & mask;
+
+	primary = ctx->sstp + (hash << 3);
+
+	for (entry = 0, sste = primary; entry < 8; entry++, sste++) {
+		if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V))
+			ret = sste;
+		if (sste_matches(sste, slb))
+			return NULL;
+	}
+	if (ret)
+		return ret;
+
+	/* Nothing free, select an entry to cast out */
+	ret = primary + ctx->sst_lru;
+	ctx->sst_lru = (ctx->sst_lru + 1) & 0x7;
+
+	return ret;
+}
+
+static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb)
+{
+	/* mask is the group index, we search primary and secondary here. */
+	struct cxl_sste *sste;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->sste_lock, flags);
+	sste = find_free_sste(ctx, slb);
+	if (!sste)
+		goto out_unlock;
+
+	pr_devel("CXL Populating SST[%li]: %#llx %#llx\n",
+			sste - ctx->sstp, slb->vsid, slb->esid);
+	trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid);
+
+	sste->vsid_data = cpu_to_be64(slb->vsid);
+	sste->esid_data = cpu_to_be64(slb->esid);
+out_unlock:
+	spin_unlock_irqrestore(&ctx->sste_lock, flags);
+}
+
+static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm,
+			     u64 ea)
+{
+	struct copro_slb slb = {0,0};
+	int rc;
+
+	if (!(rc = copro_calculate_slb(mm, ea, &slb))) {
+		cxl_load_segment(ctx, &slb);
+	}
+
+	return rc;
+}
+
+static void cxl_ack_ae(struct cxl_context *ctx)
+{
+	unsigned long flags;
+
+	cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0);
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	ctx->pending_fault = true;
+	ctx->fault_addr = ctx->dar;
+	ctx->fault_dsisr = ctx->dsisr;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	wake_up_all(&ctx->wq);
+}
+
+static int cxl_handle_segment_miss(struct cxl_context *ctx,
+				   struct mm_struct *mm, u64 ea)
+{
+	int rc;
+
+	pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea);
+	trace_cxl_ste_miss(ctx, ea);
+
+	if ((rc = cxl_fault_segment(ctx, mm, ea)))
+		cxl_ack_ae(ctx);
+	else {
+
+		mb(); /* Order seg table write to TFC MMIO write */
+		cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
+	}
+
+	return IRQ_HANDLED;
+}
+
+int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar)
+{
+	vm_fault_t flt = 0;
+	int result;
+	unsigned long access, flags, inv_flags = 0;
+
+	/*
+	 * Add the fault handling cpu to task mm cpumask so that we
+	 * can do a safe lockless page table walk when inserting the
+	 * hash page table entry. This function get called with a
+	 * valid mm for user space addresses. Hence using the if (mm)
+	 * check is sufficient here.
+	 */
+	if (mm && !cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
+		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+		/*
+		 * We need to make sure we walk the table only after
+		 * we update the cpumask. The other side of the barrier
+		 * is explained in serialize_against_pte_lookup()
+		 */
+		smp_mb();
+	}
+	if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) {
+		pr_devel("copro_handle_mm_fault failed: %#x\n", result);
+		return result;
+	}
+
+	if (!radix_enabled()) {
+		/*
+		 * update_mmu_cache() will not have loaded the hash since current->trap
+		 * is not a 0x400 or 0x300, so just call hash_page_mm() here.
+		 */
+		access = _PAGE_PRESENT | _PAGE_READ;
+		if (dsisr & CXL_PSL_DSISR_An_S)
+			access |= _PAGE_WRITE;
+
+		if (!mm && (REGION_ID(dar) != USER_REGION_ID))
+			access |= _PAGE_PRIVILEGED;
+
+		if (dsisr & DSISR_NOHPTE)
+			inv_flags |= HPTE_NOHPTE_UPDATE;
+
+		local_irq_save(flags);
+		hash_page_mm(mm, dar, access, 0x300, inv_flags);
+		local_irq_restore(flags);
+	}
+	return 0;
+}
+
+static void cxl_handle_page_fault(struct cxl_context *ctx,
+				  struct mm_struct *mm,
+				  u64 dsisr, u64 dar)
+{
+	trace_cxl_pte_miss(ctx, dsisr, dar);
+
+	if (cxl_handle_mm_fault(mm, dsisr, dar)) {
+		cxl_ack_ae(ctx);
+	} else {
+		pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
+		cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
+	}
+}
+
+/*
+ * Returns the mm_struct corresponding to the context ctx.
+ * mm_users == 0, the context may be in the process of being closed.
+ */
+static struct mm_struct *get_mem_context(struct cxl_context *ctx)
+{
+	if (ctx->mm == NULL)
+		return NULL;
+
+	if (!atomic_inc_not_zero(&ctx->mm->mm_users))
+		return NULL;
+
+	return ctx->mm;
+}
+
+static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
+{
+	if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
+		return true;
+
+	return false;
+}
+
+static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
+{
+	if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
+		return true;
+
+	if (cxl_is_power9())
+		return true;
+
+	return false;
+}
+
+void cxl_handle_fault(struct work_struct *fault_work)
+{
+	struct cxl_context *ctx =
+		container_of(fault_work, struct cxl_context, fault_work);
+	u64 dsisr = ctx->dsisr;
+	u64 dar = ctx->dar;
+	struct mm_struct *mm = NULL;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
+		    cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
+		    cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
+			/* Most likely explanation is harmless - a dedicated
+			 * process has detached and these were cleared by the
+			 * PSL purge, but warn about it just in case
+			 */
+			dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n");
+			return;
+		}
+	}
+
+	/* Early return if the context is being / has been detached */
+	if (ctx->status == CLOSED) {
+		cxl_ack_ae(ctx);
+		return;
+	}
+
+	pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. "
+		"DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
+
+	if (!ctx->kernel) {
+
+		mm = get_mem_context(ctx);
+		if (mm == NULL) {
+			pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
+				 __func__, ctx->pe, pid_nr(ctx->pid));
+			cxl_ack_ae(ctx);
+			return;
+		} else {
+			pr_devel("Handling page fault for pe=%d pid=%i\n",
+				 ctx->pe, pid_nr(ctx->pid));
+		}
+	}
+
+	if (cxl_is_segment_miss(ctx, dsisr))
+		cxl_handle_segment_miss(ctx, mm, dar);
+	else if (cxl_is_page_fault(ctx, dsisr))
+		cxl_handle_page_fault(ctx, mm, dsisr, dar);
+	else
+		WARN(1, "cxl_handle_fault has nothing to handle\n");
+
+	if (mm)
+		mmput(mm);
+}
+
+static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
+{
+	struct mm_struct *mm;
+
+	mm = get_mem_context(ctx);
+	if (mm == NULL) {
+		pr_devel("cxl_prefault_one unable to get mm %i\n",
+			 pid_nr(ctx->pid));
+		return;
+	}
+
+	cxl_fault_segment(ctx, mm, ea);
+
+	mmput(mm);
+}
+
+static u64 next_segment(u64 ea, u64 vsid)
+{
+	if (vsid & SLB_VSID_B_1T)
+		ea |= (1ULL << 40) - 1;
+	else
+		ea |= (1ULL << 28) - 1;
+
+	return ea + 1;
+}
+
+static void cxl_prefault_vma(struct cxl_context *ctx)
+{
+	u64 ea, last_esid = 0;
+	struct copro_slb slb;
+	struct vm_area_struct *vma;
+	int rc;
+	struct mm_struct *mm;
+
+	mm = get_mem_context(ctx);
+	if (mm == NULL) {
+		pr_devel("cxl_prefault_vm unable to get mm %i\n",
+			 pid_nr(ctx->pid));
+		return;
+	}
+
+	down_read(&mm->mmap_sem);
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		for (ea = vma->vm_start; ea < vma->vm_end;
+				ea = next_segment(ea, slb.vsid)) {
+			rc = copro_calculate_slb(mm, ea, &slb);
+			if (rc)
+				continue;
+
+			if (last_esid == slb.esid)
+				continue;
+
+			cxl_load_segment(ctx, &slb);
+			last_esid = slb.esid;
+		}
+	}
+	up_read(&mm->mmap_sem);
+
+	mmput(mm);
+}
+
+void cxl_prefault(struct cxl_context *ctx, u64 wed)
+{
+	switch (ctx->afu->prefault_mode) {
+	case CXL_PREFAULT_WED:
+		cxl_prefault_one(ctx, wed);
+		break;
+	case CXL_PREFAULT_ALL:
+		cxl_prefault_vma(ctx);
+		break;
+	default:
+		break;
+	}
+}
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
new file mode 100644
index 000000000..bd6ddbdb5
--- /dev/null
+++ b/drivers/misc/cxl/file.c
@@ -0,0 +1,703 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/bitmap.h>
+#include <linux/sched/signal.h>
+#include <linux/poll.h>
+#include <linux/pid.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/cputable.h>
+#include <asm/current.h>
+#include <asm/copro.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+#define CXL_NUM_MINORS 256 /* Total to reserve */
+
+#define CXL_AFU_MINOR_D(afu) (CXL_CARD_MINOR(afu->adapter) + 1 + (3 * afu->slice))
+#define CXL_AFU_MINOR_M(afu) (CXL_AFU_MINOR_D(afu) + 1)
+#define CXL_AFU_MINOR_S(afu) (CXL_AFU_MINOR_D(afu) + 2)
+#define CXL_AFU_MKDEV_D(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_D(afu))
+#define CXL_AFU_MKDEV_M(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_M(afu))
+#define CXL_AFU_MKDEV_S(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_S(afu))
+
+#define CXL_DEVT_AFU(dev) ((MINOR(dev) % CXL_DEV_MINORS - 1) / 3)
+
+#define CXL_DEVT_IS_CARD(dev) (MINOR(dev) % CXL_DEV_MINORS == 0)
+
+static dev_t cxl_dev;
+
+static struct class *cxl_class;
+
+static int __afu_open(struct inode *inode, struct file *file, bool master)
+{
+	struct cxl *adapter;
+	struct cxl_afu *afu;
+	struct cxl_context *ctx;
+	int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev);
+	int slice = CXL_DEVT_AFU(inode->i_rdev);
+	int rc = -ENODEV;
+
+	pr_devel("afu_open afu%i.%i\n", slice, adapter_num);
+
+	if (!(adapter = get_cxl_adapter(adapter_num)))
+		return -ENODEV;
+
+	if (slice > adapter->slices)
+		goto err_put_adapter;
+
+	spin_lock(&adapter->afu_list_lock);
+	if (!(afu = adapter->afu[slice])) {
+		spin_unlock(&adapter->afu_list_lock);
+		goto err_put_adapter;
+	}
+
+	/*
+	 * taking a ref to the afu so that it doesn't go away
+	 * for rest of the function. This ref is released before
+	 * we return.
+	 */
+	cxl_afu_get(afu);
+	spin_unlock(&adapter->afu_list_lock);
+
+	if (!afu->current_mode)
+		goto err_put_afu;
+
+	if (!cxl_ops->link_ok(adapter, afu)) {
+		rc = -EIO;
+		goto err_put_afu;
+	}
+
+	if (!(ctx = cxl_context_alloc())) {
+		rc = -ENOMEM;
+		goto err_put_afu;
+	}
+
+	rc = cxl_context_init(ctx, afu, master);
+	if (rc)
+		goto err_put_afu;
+
+	cxl_context_set_mapping(ctx, inode->i_mapping);
+
+	pr_devel("afu_open pe: %i\n", ctx->pe);
+	file->private_data = ctx;
+
+	/* indicate success */
+	rc = 0;
+
+err_put_afu:
+	/* release the ref taken earlier */
+	cxl_afu_put(afu);
+err_put_adapter:
+	put_device(&adapter->dev);
+	return rc;
+}
+
+int afu_open(struct inode *inode, struct file *file)
+{
+	return __afu_open(inode, file, false);
+}
+
+static int afu_master_open(struct inode *inode, struct file *file)
+{
+	return __afu_open(inode, file, true);
+}
+
+int afu_release(struct inode *inode, struct file *file)
+{
+	struct cxl_context *ctx = file->private_data;
+
+	pr_devel("%s: closing cxl file descriptor. pe: %i\n",
+		 __func__, ctx->pe);
+	cxl_context_detach(ctx);
+
+
+	/*
+	 * Delete the context's mapping pointer, unless it's created by the
+	 * kernel API, in which case leave it so it can be freed by reclaim_ctx()
+	 */
+	if (!ctx->kernelapi) {
+		mutex_lock(&ctx->mapping_lock);
+		ctx->mapping = NULL;
+		mutex_unlock(&ctx->mapping_lock);
+	}
+
+	/*
+	 * At this this point all bottom halfs have finished and we should be
+	 * getting no more IRQs from the hardware for this context.  Once it's
+	 * removed from the IDR (and RCU synchronised) it's safe to free the
+	 * sstp and context.
+	 */
+	cxl_context_free(ctx);
+
+	return 0;
+}
+
+static long afu_ioctl_start_work(struct cxl_context *ctx,
+				 struct cxl_ioctl_start_work __user *uwork)
+{
+	struct cxl_ioctl_start_work work;
+	u64 amr = 0;
+	int rc;
+
+	pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+	/* Do this outside the status_mutex to avoid a circular dependency with
+	 * the locking in cxl_mmap_fault() */
+	if (copy_from_user(&work, uwork, sizeof(work)))
+		return -EFAULT;
+
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status != OPENED) {
+		rc = -EIO;
+		goto out;
+	}
+
+	/*
+	 * if any of the reserved fields are set or any of the unused
+	 * flags are set it's invalid
+	 */
+	if (work.reserved1 || work.reserved2 || work.reserved3 ||
+	    work.reserved4 || work.reserved5 ||
+	    (work.flags & ~CXL_START_WORK_ALL)) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (!(work.flags & CXL_START_WORK_NUM_IRQS))
+		work.num_interrupts = ctx->afu->pp_irqs;
+	else if ((work.num_interrupts < ctx->afu->pp_irqs) ||
+		 (work.num_interrupts > ctx->afu->irqs_max)) {
+		rc =  -EINVAL;
+		goto out;
+	}
+
+	if ((rc = afu_register_irqs(ctx, work.num_interrupts)))
+		goto out;
+
+	if (work.flags & CXL_START_WORK_AMR)
+		amr = work.amr & mfspr(SPRN_UAMOR);
+
+	if (work.flags & CXL_START_WORK_TID)
+		ctx->assign_tidr = true;
+
+	ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);
+
+	/*
+	 * Increment the mapped context count for adapter. This also checks
+	 * if adapter_context_lock is taken.
+	 */
+	rc = cxl_adapter_context_get(ctx->afu->adapter);
+	if (rc) {
+		afu_release_irqs(ctx, ctx);
+		goto out;
+	}
+
+	/*
+	 * We grab the PID here and not in the file open to allow for the case
+	 * where a process (master, some daemon, etc) has opened the chardev on
+	 * behalf of another process, so the AFU's mm gets bound to the process
+	 * that performs this ioctl and not the process that opened the file.
+	 * Also we grab the PID of the group leader so that if the task that
+	 * has performed the attach operation exits the mm context of the
+	 * process is still accessible.
+	 */
+	ctx->pid = get_task_pid(current, PIDTYPE_PID);
+
+	/* acquire a reference to the task's mm */
+	ctx->mm = get_task_mm(current);
+
+	/* ensure this mm_struct can't be freed */
+	cxl_context_mm_count_get(ctx);
+
+	if (ctx->mm) {
+		/* decrement the use count from above */
+		mmput(ctx->mm);
+		/* make TLBIs for this context global */
+		mm_context_add_copro(ctx->mm);
+	}
+
+	/*
+	 * Increment driver use count. Enables global TLBIs for hash
+	 * and callbacks to handle the segment table
+	 */
+	cxl_ctx_get();
+
+	/*
+	 * A barrier is needed to make sure all TLBIs are global
+	 * before we attach and the context starts being used by the
+	 * adapter.
+	 *
+	 * Needed after mm_context_add_copro() for radix and
+	 * cxl_ctx_get() for hash/p8.
+	 *
+	 * The barrier should really be mb(), since it involves a
+	 * device. However, it's only useful when we have local
+	 * vs. global TLBIs, i.e SMP=y. So keep smp_mb().
+	 */
+	smp_mb();
+
+	trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
+
+	if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
+							amr))) {
+		afu_release_irqs(ctx, ctx);
+		cxl_adapter_context_put(ctx->afu->adapter);
+		put_pid(ctx->pid);
+		ctx->pid = NULL;
+		cxl_ctx_put();
+		cxl_context_mm_count_put(ctx);
+		if (ctx->mm)
+			mm_context_remove_copro(ctx->mm);
+		goto out;
+	}
+
+	rc = 0;
+	if (work.flags & CXL_START_WORK_TID) {
+		work.tid = ctx->tidr;
+		if (copy_to_user(uwork, &work, sizeof(work)))
+			rc = -EFAULT;
+	}
+
+	ctx->status = STARTED;
+
+out:
+	mutex_unlock(&ctx->status_mutex);
+	return rc;
+}
+
+static long afu_ioctl_process_element(struct cxl_context *ctx,
+				      int __user *upe)
+{
+	pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+	if (copy_to_user(upe, &ctx->external_pe, sizeof(__u32)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long afu_ioctl_get_afu_id(struct cxl_context *ctx,
+				 struct cxl_afu_id __user *upafuid)
+{
+	struct cxl_afu_id afuid = { 0 };
+
+	afuid.card_id = ctx->afu->adapter->adapter_num;
+	afuid.afu_offset = ctx->afu->slice;
+	afuid.afu_mode = ctx->afu->current_mode;
+
+	/* set the flag bit in case the afu is a slave */
+	if (ctx->afu->current_mode == CXL_MODE_DIRECTED && !ctx->master)
+		afuid.flags |= CXL_AFUID_FLAG_SLAVE;
+
+	if (copy_to_user(upafuid, &afuid, sizeof(afuid)))
+		return -EFAULT;
+
+	return 0;
+}
+
+long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct cxl_context *ctx = file->private_data;
+
+	if (ctx->status == CLOSED)
+		return -EIO;
+
+	if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		return -EIO;
+
+	pr_devel("afu_ioctl\n");
+	switch (cmd) {
+	case CXL_IOCTL_START_WORK:
+		return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg);
+	case CXL_IOCTL_GET_PROCESS_ELEMENT:
+		return afu_ioctl_process_element(ctx, (__u32 __user *)arg);
+	case CXL_IOCTL_GET_AFU_ID:
+		return afu_ioctl_get_afu_id(ctx, (struct cxl_afu_id __user *)
+					    arg);
+	}
+	return -EINVAL;
+}
+
+static long afu_compat_ioctl(struct file *file, unsigned int cmd,
+			     unsigned long arg)
+{
+	return afu_ioctl(file, cmd, arg);
+}
+
+int afu_mmap(struct file *file, struct vm_area_struct *vm)
+{
+	struct cxl_context *ctx = file->private_data;
+
+	/* AFU must be started before we can MMIO */
+	if (ctx->status != STARTED)
+		return -EIO;
+
+	if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		return -EIO;
+
+	return cxl_context_iomap(ctx, vm);
+}
+
+static inline bool ctx_event_pending(struct cxl_context *ctx)
+{
+	if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err)
+		return true;
+
+	if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events))
+		return true;
+
+	return false;
+}
+
+__poll_t afu_poll(struct file *file, struct poll_table_struct *poll)
+{
+	struct cxl_context *ctx = file->private_data;
+	__poll_t mask = 0;
+	unsigned long flags;
+
+
+	poll_wait(file, &ctx->wq, poll);
+
+	pr_devel("afu_poll wait done pe: %i\n", ctx->pe);
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	if (ctx_event_pending(ctx))
+		mask |= EPOLLIN | EPOLLRDNORM;
+	else if (ctx->status == CLOSED)
+		/* Only error on closed when there are no futher events pending
+		 */
+		mask |= EPOLLERR;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	pr_devel("afu_poll pe: %i returning %#x\n", ctx->pe, mask);
+
+	return mask;
+}
+
+static ssize_t afu_driver_event_copy(struct cxl_context *ctx,
+				     char __user *buf,
+				     struct cxl_event *event,
+				     struct cxl_event_afu_driver_reserved *pl)
+{
+	/* Check event */
+	if (!pl) {
+		ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL);
+		return -EFAULT;
+	}
+
+	/* Check event size */
+	event->header.size += pl->data_size;
+	if (event->header.size > CXL_READ_MIN_SIZE) {
+		ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL);
+		return -EFAULT;
+	}
+
+	/* Copy event header */
+	if (copy_to_user(buf, event, sizeof(struct cxl_event_header))) {
+		ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT);
+		return -EFAULT;
+	}
+
+	/* Copy event data */
+	buf += sizeof(struct cxl_event_header);
+	if (copy_to_user(buf, &pl->data, pl->data_size)) {
+		ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT);
+		return -EFAULT;
+	}
+
+	ctx->afu_driver_ops->event_delivered(ctx, pl, 0); /* Success */
+	return event->header.size;
+}
+
+ssize_t afu_read(struct file *file, char __user *buf, size_t count,
+			loff_t *off)
+{
+	struct cxl_context *ctx = file->private_data;
+	struct cxl_event_afu_driver_reserved *pl = NULL;
+	struct cxl_event event;
+	unsigned long flags;
+	int rc;
+	DEFINE_WAIT(wait);
+
+	if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		return -EIO;
+
+	if (count < CXL_READ_MIN_SIZE)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+
+	for (;;) {
+		prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE);
+		if (ctx_event_pending(ctx) || (ctx->status == CLOSED))
+			break;
+
+		if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) {
+			rc = -EIO;
+			goto out;
+		}
+
+		if (file->f_flags & O_NONBLOCK) {
+			rc = -EAGAIN;
+			goto out;
+		}
+
+		if (signal_pending(current)) {
+			rc = -ERESTARTSYS;
+			goto out;
+		}
+
+		spin_unlock_irqrestore(&ctx->lock, flags);
+		pr_devel("afu_read going to sleep...\n");
+		schedule();
+		pr_devel("afu_read woken up\n");
+		spin_lock_irqsave(&ctx->lock, flags);
+	}
+
+	finish_wait(&ctx->wq, &wait);
+
+	memset(&event, 0, sizeof(event));
+	event.header.process_element = ctx->pe;
+	event.header.size = sizeof(struct cxl_event_header);
+	if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) {
+		pr_devel("afu_read delivering AFU driver specific event\n");
+		pl = ctx->afu_driver_ops->fetch_event(ctx);
+		atomic_dec(&ctx->afu_driver_events);
+		event.header.type = CXL_EVENT_AFU_DRIVER;
+	} else if (ctx->pending_irq) {
+		pr_devel("afu_read delivering AFU interrupt\n");
+		event.header.size += sizeof(struct cxl_event_afu_interrupt);
+		event.header.type = CXL_EVENT_AFU_INTERRUPT;
+		event.irq.irq = find_first_bit(ctx->irq_bitmap, ctx->irq_count) + 1;
+		clear_bit(event.irq.irq - 1, ctx->irq_bitmap);
+		if (bitmap_empty(ctx->irq_bitmap, ctx->irq_count))
+			ctx->pending_irq = false;
+	} else if (ctx->pending_fault) {
+		pr_devel("afu_read delivering data storage fault\n");
+		event.header.size += sizeof(struct cxl_event_data_storage);
+		event.header.type = CXL_EVENT_DATA_STORAGE;
+		event.fault.addr = ctx->fault_addr;
+		event.fault.dsisr = ctx->fault_dsisr;
+		ctx->pending_fault = false;
+	} else if (ctx->pending_afu_err) {
+		pr_devel("afu_read delivering afu error\n");
+		event.header.size += sizeof(struct cxl_event_afu_error);
+		event.header.type = CXL_EVENT_AFU_ERROR;
+		event.afu_error.error = ctx->afu_err;
+		ctx->pending_afu_err = false;
+	} else if (ctx->status == CLOSED) {
+		pr_devel("afu_read fatal error\n");
+		spin_unlock_irqrestore(&ctx->lock, flags);
+		return -EIO;
+	} else
+		WARN(1, "afu_read must be buggy\n");
+
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	if (event.header.type == CXL_EVENT_AFU_DRIVER)
+		return afu_driver_event_copy(ctx, buf, &event, pl);
+
+	if (copy_to_user(buf, &event, event.header.size))
+		return -EFAULT;
+	return event.header.size;
+
+out:
+	finish_wait(&ctx->wq, &wait);
+	spin_unlock_irqrestore(&ctx->lock, flags);
+	return rc;
+}
+
+/* 
+ * Note: if this is updated, we need to update api.c to patch the new ones in
+ * too
+ */
+const struct file_operations afu_fops = {
+	.owner		= THIS_MODULE,
+	.open           = afu_open,
+	.poll		= afu_poll,
+	.read		= afu_read,
+	.release        = afu_release,
+	.unlocked_ioctl = afu_ioctl,
+	.compat_ioctl   = afu_compat_ioctl,
+	.mmap           = afu_mmap,
+};
+
+static const struct file_operations afu_master_fops = {
+	.owner		= THIS_MODULE,
+	.open           = afu_master_open,
+	.poll		= afu_poll,
+	.read		= afu_read,
+	.release        = afu_release,
+	.unlocked_ioctl = afu_ioctl,
+	.compat_ioctl   = afu_compat_ioctl,
+	.mmap           = afu_mmap,
+};
+
+
+static char *cxl_devnode(struct device *dev, umode_t *mode)
+{
+	if (cpu_has_feature(CPU_FTR_HVMODE) &&
+	    CXL_DEVT_IS_CARD(dev->devt)) {
+		/*
+		 * These minor numbers will eventually be used to program the
+		 * PSL and AFUs once we have dynamic reprogramming support
+		 */
+		return NULL;
+	}
+	return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
+}
+
+extern struct class *cxl_class;
+
+static int cxl_add_chardev(struct cxl_afu *afu, dev_t devt, struct cdev *cdev,
+			   struct device **chardev, char *postfix, char *desc,
+			   const struct file_operations *fops)
+{
+	struct device *dev;
+	int rc;
+
+	cdev_init(cdev, fops);
+	if ((rc = cdev_add(cdev, devt, 1))) {
+		dev_err(&afu->dev, "Unable to add %s chardev: %i\n", desc, rc);
+		return rc;
+	}
+
+	dev = device_create(cxl_class, &afu->dev, devt, afu,
+			"afu%i.%i%s", afu->adapter->adapter_num, afu->slice, postfix);
+	if (IS_ERR(dev)) {
+		dev_err(&afu->dev, "Unable to create %s chardev in sysfs: %i\n", desc, rc);
+		rc = PTR_ERR(dev);
+		goto err;
+	}
+
+	*chardev = dev;
+
+	return 0;
+err:
+	cdev_del(cdev);
+	return rc;
+}
+
+int cxl_chardev_d_afu_add(struct cxl_afu *afu)
+{
+	return cxl_add_chardev(afu, CXL_AFU_MKDEV_D(afu), &afu->afu_cdev_d,
+			       &afu->chardev_d, "d", "dedicated",
+			       &afu_master_fops); /* Uses master fops */
+}
+
+int cxl_chardev_m_afu_add(struct cxl_afu *afu)
+{
+	return cxl_add_chardev(afu, CXL_AFU_MKDEV_M(afu), &afu->afu_cdev_m,
+			       &afu->chardev_m, "m", "master",
+			       &afu_master_fops);
+}
+
+int cxl_chardev_s_afu_add(struct cxl_afu *afu)
+{
+	return cxl_add_chardev(afu, CXL_AFU_MKDEV_S(afu), &afu->afu_cdev_s,
+			       &afu->chardev_s, "s", "shared",
+			       &afu_fops);
+}
+
+void cxl_chardev_afu_remove(struct cxl_afu *afu)
+{
+	if (afu->chardev_d) {
+		cdev_del(&afu->afu_cdev_d);
+		device_unregister(afu->chardev_d);
+		afu->chardev_d = NULL;
+	}
+	if (afu->chardev_m) {
+		cdev_del(&afu->afu_cdev_m);
+		device_unregister(afu->chardev_m);
+		afu->chardev_m = NULL;
+	}
+	if (afu->chardev_s) {
+		cdev_del(&afu->afu_cdev_s);
+		device_unregister(afu->chardev_s);
+		afu->chardev_s = NULL;
+	}
+}
+
+int cxl_register_afu(struct cxl_afu *afu)
+{
+	afu->dev.class = cxl_class;
+
+	return device_register(&afu->dev);
+}
+
+int cxl_register_adapter(struct cxl *adapter)
+{
+	adapter->dev.class = cxl_class;
+
+	/*
+	 * Future: When we support dynamically reprogramming the PSL & AFU we
+	 * will expose the interface to do that via a chardev:
+	 * adapter->dev.devt = CXL_CARD_MKDEV(adapter);
+	 */
+
+	return device_register(&adapter->dev);
+}
+
+dev_t cxl_get_dev(void)
+{
+	return cxl_dev;
+}
+
+int __init cxl_file_init(void)
+{
+	int rc;
+
+	/*
+	 * If these change we really need to update API.  Either change some
+	 * flags or update API version number CXL_API_VERSION.
+	 */
+	BUILD_BUG_ON(CXL_API_VERSION != 3);
+	BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64);
+	BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8);
+	BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8);
+	BUILD_BUG_ON(sizeof(struct cxl_event_data_storage) != 32);
+	BUILD_BUG_ON(sizeof(struct cxl_event_afu_error) != 16);
+
+	if ((rc = alloc_chrdev_region(&cxl_dev, 0, CXL_NUM_MINORS, "cxl"))) {
+		pr_err("Unable to allocate CXL major number: %i\n", rc);
+		return rc;
+	}
+
+	pr_devel("CXL device allocated, MAJOR %i\n", MAJOR(cxl_dev));
+
+	cxl_class = class_create(THIS_MODULE, "cxl");
+	if (IS_ERR(cxl_class)) {
+		pr_err("Unable to create CXL class\n");
+		rc = PTR_ERR(cxl_class);
+		goto err;
+	}
+	cxl_class->devnode = cxl_devnode;
+
+	return 0;
+
+err:
+	unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS);
+	return rc;
+}
+
+void cxl_file_exit(void)
+{
+	unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS);
+	class_destroy(cxl_class);
+}
diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c
new file mode 100644
index 000000000..43917898f
--- /dev/null
+++ b/drivers/misc/cxl/flash.c
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/semaphore.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <asm/rtas.h>
+
+#include "cxl.h"
+#include "hcalls.h"
+
+#define DOWNLOAD_IMAGE 1
+#define VALIDATE_IMAGE 2
+
+struct ai_header {
+	u16 version;
+	u8  reserved0[6];
+	u16 vendor;
+	u16 device;
+	u16 subsystem_vendor;
+	u16 subsystem;
+	u64 image_offset;
+	u64 image_length;
+	u8  reserved1[96];
+};
+
+static struct semaphore sem;
+static unsigned long *buffer[CXL_AI_MAX_ENTRIES];
+static struct sg_list *le;
+static u64 continue_token;
+static unsigned int transfer;
+
+struct update_props_workarea {
+	__be32 phandle;
+	__be32 state;
+	__be64 reserved;
+	__be32 nprops;
+} __packed;
+
+struct update_nodes_workarea {
+	__be32 state;
+	__be64 unit_address;
+	__be32 reserved;
+} __packed;
+
+#define DEVICE_SCOPE 3
+#define NODE_ACTION_MASK	0xff000000
+#define NODE_COUNT_MASK		0x00ffffff
+#define OPCODE_DELETE	0x01000000
+#define OPCODE_UPDATE	0x02000000
+#define OPCODE_ADD	0x03000000
+
+static int rcall(int token, char *buf, s32 scope)
+{
+	int rc;
+
+	spin_lock(&rtas_data_buf_lock);
+
+	memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
+	rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope);
+	memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+
+	spin_unlock(&rtas_data_buf_lock);
+	return rc;
+}
+
+static int update_property(struct device_node *dn, const char *name,
+			   u32 vd, char *value)
+{
+	struct property *new_prop;
+	u32 *val;
+	int rc;
+
+	new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+	if (!new_prop)
+		return -ENOMEM;
+
+	new_prop->name = kstrdup(name, GFP_KERNEL);
+	if (!new_prop->name) {
+		kfree(new_prop);
+		return -ENOMEM;
+	}
+
+	new_prop->length = vd;
+	new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
+	if (!new_prop->value) {
+		kfree(new_prop->name);
+		kfree(new_prop);
+		return -ENOMEM;
+	}
+	memcpy(new_prop->value, value, vd);
+
+	val = (u32 *)new_prop->value;
+	rc = cxl_update_properties(dn, new_prop);
+	pr_devel("%s: update property (%s, length: %i, value: %#x)\n",
+		  dn->name, name, vd, be32_to_cpu(*val));
+
+	if (rc) {
+		kfree(new_prop->name);
+		kfree(new_prop->value);
+		kfree(new_prop);
+	}
+	return rc;
+}
+
+static int update_node(__be32 phandle, s32 scope)
+{
+	struct update_props_workarea *upwa;
+	struct device_node *dn;
+	int i, rc, ret;
+	char *prop_data;
+	char *buf;
+	int token;
+	u32 nprops;
+	u32 vd;
+
+	token = rtas_token("ibm,update-properties");
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	dn = of_find_node_by_phandle(be32_to_cpu(phandle));
+	if (!dn) {
+		kfree(buf);
+		return -ENOENT;
+	}
+
+	upwa = (struct update_props_workarea *)&buf[0];
+	upwa->phandle = phandle;
+	do {
+		rc = rcall(token, buf, scope);
+		if (rc < 0)
+			break;
+
+		prop_data = buf + sizeof(*upwa);
+		nprops = be32_to_cpu(upwa->nprops);
+
+		if (*prop_data == 0) {
+			prop_data++;
+			vd = be32_to_cpu(*(__be32 *)prop_data);
+			prop_data += vd + sizeof(vd);
+			nprops--;
+		}
+
+		for (i = 0; i < nprops; i++) {
+			char *prop_name;
+
+			prop_name = prop_data;
+			prop_data += strlen(prop_name) + 1;
+			vd = be32_to_cpu(*(__be32 *)prop_data);
+			prop_data += sizeof(vd);
+
+			if ((vd != 0x00000000) && (vd != 0x80000000)) {
+				ret = update_property(dn, prop_name, vd,
+						prop_data);
+				if (ret)
+					pr_err("cxl: Could not update property %s - %i\n",
+					       prop_name, ret);
+
+				prop_data += vd;
+			}
+		}
+	} while (rc == 1);
+
+	of_node_put(dn);
+	kfree(buf);
+	return rc;
+}
+
+static int update_devicetree(struct cxl *adapter, s32 scope)
+{
+	struct update_nodes_workarea *unwa;
+	u32 action, node_count;
+	int token, rc, i;
+	__be32 *data, drc_index, phandle;
+	char *buf;
+
+	token = rtas_token("ibm,update-nodes");
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	unwa = (struct update_nodes_workarea *)&buf[0];
+	unwa->unit_address = cpu_to_be64(adapter->guest->handle);
+	do {
+		rc = rcall(token, buf, scope);
+		if (rc && rc != 1)
+			break;
+
+		data = (__be32 *)buf + 4;
+		while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
+			action = be32_to_cpu(*data) & NODE_ACTION_MASK;
+			node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
+			pr_devel("device reconfiguration - action: %#x, nodes: %#x\n",
+				 action, node_count);
+			data++;
+
+			for (i = 0; i < node_count; i++) {
+				phandle = *data++;
+
+				switch (action) {
+				case OPCODE_DELETE:
+					/* nothing to do */
+					break;
+				case OPCODE_UPDATE:
+					update_node(phandle, scope);
+					break;
+				case OPCODE_ADD:
+					/* nothing to do, just move pointer */
+					drc_index = *data++;
+					break;
+				}
+			}
+		}
+	} while (rc == 1);
+
+	kfree(buf);
+	return 0;
+}
+
+static int handle_image(struct cxl *adapter, int operation,
+			long (*fct)(u64, u64, u64, u64 *),
+			struct cxl_adapter_image *ai)
+{
+	size_t mod, s_copy, len_chunk = 0;
+	struct ai_header *header = NULL;
+	unsigned int entries = 0, i;
+	void *dest, *from;
+	int rc = 0, need_header;
+
+	/* base adapter image header */
+	need_header = (ai->flags & CXL_AI_NEED_HEADER);
+	if (need_header) {
+		header = kzalloc(sizeof(struct ai_header), GFP_KERNEL);
+		if (!header)
+			return -ENOMEM;
+		header->version = cpu_to_be16(1);
+		header->vendor = cpu_to_be16(adapter->guest->vendor);
+		header->device = cpu_to_be16(adapter->guest->device);
+		header->subsystem_vendor = cpu_to_be16(adapter->guest->subsystem_vendor);
+		header->subsystem = cpu_to_be16(adapter->guest->subsystem);
+		header->image_offset = cpu_to_be64(CXL_AI_HEADER_SIZE);
+		header->image_length = cpu_to_be64(ai->len_image);
+	}
+
+	/* number of entries in the list */
+	len_chunk = ai->len_data;
+	if (need_header)
+		len_chunk += CXL_AI_HEADER_SIZE;
+
+	entries = len_chunk / CXL_AI_BUFFER_SIZE;
+	mod = len_chunk % CXL_AI_BUFFER_SIZE;
+	if (mod)
+		entries++;
+
+	if (entries > CXL_AI_MAX_ENTRIES) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	/*          < -- MAX_CHUNK_SIZE = 4096 * 256 = 1048576 bytes -->
+	 * chunk 0  ----------------------------------------------------
+	 *          | header   |  data                                 |
+	 *          ----------------------------------------------------
+	 * chunk 1  ----------------------------------------------------
+	 *          | data                                             |
+	 *          ----------------------------------------------------
+	 * ....
+	 * chunk n  ----------------------------------------------------
+	 *          | data                                             |
+	 *          ----------------------------------------------------
+	 */
+	from = (void *) ai->data;
+	for (i = 0; i < entries; i++) {
+		dest = buffer[i];
+		s_copy = CXL_AI_BUFFER_SIZE;
+
+		if ((need_header) && (i == 0)) {
+			/* add adapter image header */
+			memcpy(buffer[i], header, sizeof(struct ai_header));
+			s_copy = CXL_AI_BUFFER_SIZE - CXL_AI_HEADER_SIZE;
+			dest += CXL_AI_HEADER_SIZE; /* image offset */
+		}
+		if ((i == (entries - 1)) && mod)
+			s_copy = mod;
+
+		/* copy data */
+		if (copy_from_user(dest, from, s_copy))
+			goto err;
+
+		/* fill in the list */
+		le[i].phys_addr = cpu_to_be64(virt_to_phys(buffer[i]));
+		le[i].len = cpu_to_be64(CXL_AI_BUFFER_SIZE);
+		if ((i == (entries - 1)) && mod)
+			le[i].len = cpu_to_be64(mod);
+		from += s_copy;
+	}
+	pr_devel("%s (op: %i, need header: %i, entries: %i, token: %#llx)\n",
+		 __func__, operation, need_header, entries, continue_token);
+
+	/*
+	 * download/validate the adapter image to the coherent
+	 * platform facility
+	 */
+	rc = fct(adapter->guest->handle, virt_to_phys(le), entries,
+		&continue_token);
+	if (rc == 0) /* success of download/validation operation */
+		continue_token = 0;
+
+err:
+	kfree(header);
+
+	return rc;
+}
+
+static int transfer_image(struct cxl *adapter, int operation,
+			struct cxl_adapter_image *ai)
+{
+	int rc = 0;
+	int afu;
+
+	switch (operation) {
+	case DOWNLOAD_IMAGE:
+		rc = handle_image(adapter, operation,
+				&cxl_h_download_adapter_image, ai);
+		if (rc < 0) {
+			pr_devel("resetting adapter\n");
+			cxl_h_reset_adapter(adapter->guest->handle);
+		}
+		return rc;
+
+	case VALIDATE_IMAGE:
+		rc = handle_image(adapter, operation,
+				&cxl_h_validate_adapter_image, ai);
+		if (rc < 0) {
+			pr_devel("resetting adapter\n");
+			cxl_h_reset_adapter(adapter->guest->handle);
+			return rc;
+		}
+		if (rc == 0) {
+			pr_devel("remove current afu\n");
+			for (afu = 0; afu < adapter->slices; afu++)
+				cxl_guest_remove_afu(adapter->afu[afu]);
+
+			pr_devel("resetting adapter\n");
+			cxl_h_reset_adapter(adapter->guest->handle);
+
+			/* The entire image has now been
+			 * downloaded and the validation has
+			 * been successfully performed.
+			 * After that, the partition should call
+			 * ibm,update-nodes and
+			 * ibm,update-properties to receive the
+			 * current configuration
+			 */
+			rc = update_devicetree(adapter, DEVICE_SCOPE);
+			transfer = 1;
+		}
+		return rc;
+	}
+
+	return -EINVAL;
+}
+
+static long ioctl_transfer_image(struct cxl *adapter, int operation,
+				struct cxl_adapter_image __user *uai)
+{
+	struct cxl_adapter_image ai;
+
+	pr_devel("%s\n", __func__);
+
+	if (copy_from_user(&ai, uai, sizeof(struct cxl_adapter_image)))
+		return -EFAULT;
+
+	/*
+	 * Make sure reserved fields and bits are set to 0
+	 */
+	if (ai.reserved1 || ai.reserved2 || ai.reserved3 || ai.reserved4 ||
+		(ai.flags & ~CXL_AI_ALL))
+		return -EINVAL;
+
+	return transfer_image(adapter, operation, &ai);
+}
+
+static int device_open(struct inode *inode, struct file *file)
+{
+	int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev);
+	struct cxl *adapter;
+	int rc = 0, i;
+
+	pr_devel("in %s\n", __func__);
+
+	BUG_ON(sizeof(struct ai_header) != CXL_AI_HEADER_SIZE);
+
+	/* Allows one process to open the device by using a semaphore */
+	if (down_interruptible(&sem) != 0)
+		return -EPERM;
+
+	if (!(adapter = get_cxl_adapter(adapter_num))) {
+		rc = -ENODEV;
+		goto err_unlock;
+	}
+
+	file->private_data = adapter;
+	continue_token = 0;
+	transfer = 0;
+
+	for (i = 0; i < CXL_AI_MAX_ENTRIES; i++)
+		buffer[i] = NULL;
+
+	/* aligned buffer containing list entries which describes up to
+	 * 1 megabyte of data (256 entries of 4096 bytes each)
+	 *  Logical real address of buffer 0  -  Buffer 0 length in bytes
+	 *  Logical real address of buffer 1  -  Buffer 1 length in bytes
+	 *  Logical real address of buffer 2  -  Buffer 2 length in bytes
+	 *  ....
+	 *  ....
+	 *  Logical real address of buffer N  -  Buffer N length in bytes
+	 */
+	le = (struct sg_list *)get_zeroed_page(GFP_KERNEL);
+	if (!le) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) {
+		buffer[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+		if (!buffer[i]) {
+			rc = -ENOMEM;
+			goto err1;
+		}
+	}
+
+	return 0;
+
+err1:
+	for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) {
+		if (buffer[i])
+			free_page((unsigned long) buffer[i]);
+	}
+
+	if (le)
+		free_page((unsigned long) le);
+err:
+	put_device(&adapter->dev);
+err_unlock:
+	up(&sem);
+
+	return rc;
+}
+
+static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct cxl *adapter = file->private_data;
+
+	pr_devel("in %s\n", __func__);
+
+	if (cmd == CXL_IOCTL_DOWNLOAD_IMAGE)
+		return ioctl_transfer_image(adapter,
+					DOWNLOAD_IMAGE,
+					(struct cxl_adapter_image __user *)arg);
+	else if (cmd == CXL_IOCTL_VALIDATE_IMAGE)
+		return ioctl_transfer_image(adapter,
+					VALIDATE_IMAGE,
+					(struct cxl_adapter_image __user *)arg);
+	else
+		return -EINVAL;
+}
+
+static long device_compat_ioctl(struct file *file, unsigned int cmd,
+				unsigned long arg)
+{
+	return device_ioctl(file, cmd, arg);
+}
+
+static int device_close(struct inode *inode, struct file *file)
+{
+	struct cxl *adapter = file->private_data;
+	int i;
+
+	pr_devel("in %s\n", __func__);
+
+	for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) {
+		if (buffer[i])
+			free_page((unsigned long) buffer[i]);
+	}
+
+	if (le)
+		free_page((unsigned long) le);
+
+	up(&sem);
+	put_device(&adapter->dev);
+	continue_token = 0;
+
+	/* reload the module */
+	if (transfer)
+		cxl_guest_reload_module(adapter);
+	else {
+		pr_devel("resetting adapter\n");
+		cxl_h_reset_adapter(adapter->guest->handle);
+	}
+
+	transfer = 0;
+	return 0;
+}
+
+static const struct file_operations fops = {
+	.owner		= THIS_MODULE,
+	.open		= device_open,
+	.unlocked_ioctl	= device_ioctl,
+	.compat_ioctl	= device_compat_ioctl,
+	.release	= device_close,
+};
+
+void cxl_guest_remove_chardev(struct cxl *adapter)
+{
+	cdev_del(&adapter->guest->cdev);
+}
+
+int cxl_guest_add_chardev(struct cxl *adapter)
+{
+	dev_t devt;
+	int rc;
+
+	devt = MKDEV(MAJOR(cxl_get_dev()), CXL_CARD_MINOR(adapter));
+	cdev_init(&adapter->guest->cdev, &fops);
+	if ((rc = cdev_add(&adapter->guest->cdev, devt, 1))) {
+		dev_err(&adapter->dev,
+			"Unable to add chardev on adapter (card%i): %i\n",
+			adapter->adapter_num, rc);
+		goto err;
+	}
+	adapter->dev.devt = devt;
+	sema_init(&sem, 1);
+err:
+	return rc;
+}
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
new file mode 100644
index 000000000..08f4a512a
--- /dev/null
+++ b/drivers/misc/cxl/guest.c
@@ -0,0 +1,1202 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+
+#include "cxl.h"
+#include "hcalls.h"
+#include "trace.h"
+
+#define CXL_ERROR_DETECTED_EVENT	1
+#define CXL_SLOT_RESET_EVENT		2
+#define CXL_RESUME_EVENT		3
+
+static void pci_error_handlers(struct cxl_afu *afu,
+				int bus_error_event,
+				pci_channel_state_t state)
+{
+	struct pci_dev *afu_dev;
+
+	if (afu->phb == NULL)
+		return;
+
+	list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+		if (!afu_dev->driver)
+			continue;
+
+		switch (bus_error_event) {
+		case CXL_ERROR_DETECTED_EVENT:
+			afu_dev->error_state = state;
+
+			if (afu_dev->driver->err_handler &&
+			    afu_dev->driver->err_handler->error_detected)
+				afu_dev->driver->err_handler->error_detected(afu_dev, state);
+		break;
+		case CXL_SLOT_RESET_EVENT:
+			afu_dev->error_state = state;
+
+			if (afu_dev->driver->err_handler &&
+			    afu_dev->driver->err_handler->slot_reset)
+				afu_dev->driver->err_handler->slot_reset(afu_dev);
+		break;
+		case CXL_RESUME_EVENT:
+			if (afu_dev->driver->err_handler &&
+			    afu_dev->driver->err_handler->resume)
+				afu_dev->driver->err_handler->resume(afu_dev);
+		break;
+		}
+	}
+}
+
+static irqreturn_t guest_handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr,
+					u64 errstat)
+{
+	pr_devel("in %s\n", __func__);
+	dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat);
+
+	return cxl_ops->ack_irq(ctx, 0, errstat);
+}
+
+static ssize_t guest_collect_vpd(struct cxl *adapter, struct cxl_afu *afu,
+			void *buf, size_t len)
+{
+	unsigned int entries, mod;
+	unsigned long **vpd_buf = NULL;
+	struct sg_list *le;
+	int rc = 0, i, tocopy;
+	u64 out = 0;
+
+	if (buf == NULL)
+		return -EINVAL;
+
+	/* number of entries in the list */
+	entries = len / SG_BUFFER_SIZE;
+	mod = len % SG_BUFFER_SIZE;
+	if (mod)
+		entries++;
+
+	if (entries > SG_MAX_ENTRIES) {
+		entries = SG_MAX_ENTRIES;
+		len = SG_MAX_ENTRIES * SG_BUFFER_SIZE;
+		mod = 0;
+	}
+
+	vpd_buf = kcalloc(entries, sizeof(unsigned long *), GFP_KERNEL);
+	if (!vpd_buf)
+		return -ENOMEM;
+
+	le = (struct sg_list *)get_zeroed_page(GFP_KERNEL);
+	if (!le) {
+		rc = -ENOMEM;
+		goto err1;
+	}
+
+	for (i = 0; i < entries; i++) {
+		vpd_buf[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+		if (!vpd_buf[i]) {
+			rc = -ENOMEM;
+			goto err2;
+		}
+		le[i].phys_addr = cpu_to_be64(virt_to_phys(vpd_buf[i]));
+		le[i].len = cpu_to_be64(SG_BUFFER_SIZE);
+		if ((i == (entries - 1)) && mod)
+			le[i].len = cpu_to_be64(mod);
+	}
+
+	if (adapter)
+		rc = cxl_h_collect_vpd_adapter(adapter->guest->handle,
+					virt_to_phys(le), entries, &out);
+	else
+		rc = cxl_h_collect_vpd(afu->guest->handle, 0,
+				virt_to_phys(le), entries, &out);
+	pr_devel("length of available (entries: %i), vpd: %#llx\n",
+		entries, out);
+
+	if (!rc) {
+		/*
+		 * hcall returns in 'out' the size of available VPDs.
+		 * It fills the buffer with as much data as possible.
+		 */
+		if (out < len)
+			len = out;
+		rc = len;
+		if (out) {
+			for (i = 0; i < entries; i++) {
+				if (len < SG_BUFFER_SIZE)
+					tocopy = len;
+				else
+					tocopy = SG_BUFFER_SIZE;
+				memcpy(buf, vpd_buf[i], tocopy);
+				buf += tocopy;
+				len -= tocopy;
+			}
+		}
+	}
+err2:
+	for (i = 0; i < entries; i++) {
+		if (vpd_buf[i])
+			free_page((unsigned long) vpd_buf[i]);
+	}
+	free_page((unsigned long) le);
+err1:
+	kfree(vpd_buf);
+	return rc;
+}
+
+static int guest_get_irq_info(struct cxl_context *ctx, struct cxl_irq_info *info)
+{
+	return cxl_h_collect_int_info(ctx->afu->guest->handle, ctx->process_token, info);
+}
+
+static irqreturn_t guest_psl_irq(int irq, void *data)
+{
+	struct cxl_context *ctx = data;
+	struct cxl_irq_info irq_info;
+	int rc;
+
+	pr_devel("%d: received PSL interrupt %i\n", ctx->pe, irq);
+	rc = guest_get_irq_info(ctx, &irq_info);
+	if (rc) {
+		WARN(1, "Unable to get IRQ info: %i\n", rc);
+		return IRQ_HANDLED;
+	}
+
+	rc = cxl_irq_psl8(irq, ctx, &irq_info);
+	return rc;
+}
+
+static int afu_read_error_state(struct cxl_afu *afu, int *state_out)
+{
+	u64 state;
+	int rc = 0;
+
+	if (!afu)
+		return -EIO;
+
+	rc = cxl_h_read_error_state(afu->guest->handle, &state);
+	if (!rc) {
+		WARN_ON(state != H_STATE_NORMAL &&
+			state != H_STATE_DISABLE &&
+			state != H_STATE_TEMP_UNAVAILABLE &&
+			state != H_STATE_PERM_UNAVAILABLE);
+		*state_out = state & 0xffffffff;
+	}
+	return rc;
+}
+
+static irqreturn_t guest_slice_irq_err(int irq, void *data)
+{
+	struct cxl_afu *afu = data;
+	int rc;
+	u64 serr, afu_error, dsisr;
+
+	rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, &serr);
+	if (rc) {
+		dev_crit(&afu->dev, "Couldn't read PSL_SERR_An: %d\n", rc);
+		return IRQ_HANDLED;
+	}
+	afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An);
+	dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	cxl_afu_decode_psl_serr(afu, serr);
+	dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error);
+	dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr);
+
+	rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr);
+	if (rc)
+		dev_crit(&afu->dev, "Couldn't ack slice error interrupt: %d\n",
+			rc);
+
+	return IRQ_HANDLED;
+}
+
+
+static int irq_alloc_range(struct cxl *adapter, int len, int *irq)
+{
+	int i, n;
+	struct irq_avail *cur;
+
+	for (i = 0; i < adapter->guest->irq_nranges; i++) {
+		cur = &adapter->guest->irq_avail[i];
+		n = bitmap_find_next_zero_area(cur->bitmap, cur->range,
+					0, len, 0);
+		if (n < cur->range) {
+			bitmap_set(cur->bitmap, n, len);
+			*irq = cur->offset + n;
+			pr_devel("guest: allocate IRQs %#x->%#x\n",
+				*irq, *irq + len - 1);
+
+			return 0;
+		}
+	}
+	return -ENOSPC;
+}
+
+static int irq_free_range(struct cxl *adapter, int irq, int len)
+{
+	int i, n;
+	struct irq_avail *cur;
+
+	if (len == 0)
+		return -ENOENT;
+
+	for (i = 0; i < adapter->guest->irq_nranges; i++) {
+		cur = &adapter->guest->irq_avail[i];
+		if (irq >= cur->offset &&
+			(irq + len) <= (cur->offset + cur->range)) {
+			n = irq - cur->offset;
+			bitmap_clear(cur->bitmap, n, len);
+			pr_devel("guest: release IRQs %#x->%#x\n",
+				irq, irq + len - 1);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int guest_reset(struct cxl *adapter)
+{
+	struct cxl_afu *afu = NULL;
+	int i, rc;
+
+	pr_devel("Adapter reset request\n");
+	spin_lock(&adapter->afu_list_lock);
+	for (i = 0; i < adapter->slices; i++) {
+		if ((afu = adapter->afu[i])) {
+			pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
+					pci_channel_io_frozen);
+			cxl_context_detach_all(afu);
+		}
+	}
+
+	rc = cxl_h_reset_adapter(adapter->guest->handle);
+	for (i = 0; i < adapter->slices; i++) {
+		if (!rc && (afu = adapter->afu[i])) {
+			pci_error_handlers(afu, CXL_SLOT_RESET_EVENT,
+					pci_channel_io_normal);
+			pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
+		}
+	}
+	spin_unlock(&adapter->afu_list_lock);
+	return rc;
+}
+
+static int guest_alloc_one_irq(struct cxl *adapter)
+{
+	int irq;
+
+	spin_lock(&adapter->guest->irq_alloc_lock);
+	if (irq_alloc_range(adapter, 1, &irq))
+		irq = -ENOSPC;
+	spin_unlock(&adapter->guest->irq_alloc_lock);
+	return irq;
+}
+
+static void guest_release_one_irq(struct cxl *adapter, int irq)
+{
+	spin_lock(&adapter->guest->irq_alloc_lock);
+	irq_free_range(adapter, irq, 1);
+	spin_unlock(&adapter->guest->irq_alloc_lock);
+}
+
+static int guest_alloc_irq_ranges(struct cxl_irq_ranges *irqs,
+				struct cxl *adapter, unsigned int num)
+{
+	int i, try, irq;
+
+	memset(irqs, 0, sizeof(struct cxl_irq_ranges));
+
+	spin_lock(&adapter->guest->irq_alloc_lock);
+	for (i = 0; i < CXL_IRQ_RANGES && num; i++) {
+		try = num;
+		while (try) {
+			if (irq_alloc_range(adapter, try, &irq) == 0)
+				break;
+			try /= 2;
+		}
+		if (!try)
+			goto error;
+		irqs->offset[i] = irq;
+		irqs->range[i] = try;
+		num -= try;
+	}
+	if (num)
+		goto error;
+	spin_unlock(&adapter->guest->irq_alloc_lock);
+	return 0;
+
+error:
+	for (i = 0; i < CXL_IRQ_RANGES; i++)
+		irq_free_range(adapter, irqs->offset[i], irqs->range[i]);
+	spin_unlock(&adapter->guest->irq_alloc_lock);
+	return -ENOSPC;
+}
+
+static void guest_release_irq_ranges(struct cxl_irq_ranges *irqs,
+				struct cxl *adapter)
+{
+	int i;
+
+	spin_lock(&adapter->guest->irq_alloc_lock);
+	for (i = 0; i < CXL_IRQ_RANGES; i++)
+		irq_free_range(adapter, irqs->offset[i], irqs->range[i]);
+	spin_unlock(&adapter->guest->irq_alloc_lock);
+}
+
+static int guest_register_serr_irq(struct cxl_afu *afu)
+{
+	afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+				      dev_name(&afu->dev));
+	if (!afu->err_irq_name)
+		return -ENOMEM;
+
+	if (!(afu->serr_virq = cxl_map_irq(afu->adapter, afu->serr_hwirq,
+				 guest_slice_irq_err, afu, afu->err_irq_name))) {
+		kfree(afu->err_irq_name);
+		afu->err_irq_name = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void guest_release_serr_irq(struct cxl_afu *afu)
+{
+	cxl_unmap_irq(afu->serr_virq, afu);
+	cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
+	kfree(afu->err_irq_name);
+}
+
+static int guest_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask)
+{
+	return cxl_h_control_faults(ctx->afu->guest->handle, ctx->process_token,
+				tfc >> 32, (psl_reset_mask != 0));
+}
+
+static void disable_afu_irqs(struct cxl_context *ctx)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+	int r, i;
+
+	pr_devel("Disabling AFU(%d) interrupts\n", ctx->afu->slice);
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		hwirq = ctx->irqs.offset[r];
+		for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+			virq = irq_find_mapping(NULL, hwirq);
+			disable_irq(virq);
+		}
+	}
+}
+
+static void enable_afu_irqs(struct cxl_context *ctx)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+	int r, i;
+
+	pr_devel("Enabling AFU(%d) interrupts\n", ctx->afu->slice);
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		hwirq = ctx->irqs.offset[r];
+		for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+			virq = irq_find_mapping(NULL, hwirq);
+			enable_irq(virq);
+		}
+	}
+}
+
+static int _guest_afu_cr_readXX(int sz, struct cxl_afu *afu, int cr_idx,
+			u64 offset, u64 *val)
+{
+	unsigned long cr;
+	char c;
+	int rc = 0;
+
+	if (afu->crs_len < sz)
+		return -ENOENT;
+
+	if (unlikely(offset >= afu->crs_len))
+		return -ERANGE;
+
+	cr = get_zeroed_page(GFP_KERNEL);
+	if (!cr)
+		return -ENOMEM;
+
+	rc = cxl_h_get_config(afu->guest->handle, cr_idx, offset,
+			virt_to_phys((void *)cr), sz);
+	if (rc)
+		goto err;
+
+	switch (sz) {
+	case 1:
+		c = *((char *) cr);
+		*val = c;
+		break;
+	case 2:
+		*val = in_le16((u16 *)cr);
+		break;
+	case 4:
+		*val = in_le32((unsigned *)cr);
+		break;
+	case 8:
+		*val = in_le64((u64 *)cr);
+		break;
+	default:
+		WARN_ON(1);
+	}
+err:
+	free_page(cr);
+	return rc;
+}
+
+static int guest_afu_cr_read32(struct cxl_afu *afu, int cr_idx, u64 offset,
+			u32 *out)
+{
+	int rc;
+	u64 val;
+
+	rc = _guest_afu_cr_readXX(4, afu, cr_idx, offset, &val);
+	if (!rc)
+		*out = (u32) val;
+	return rc;
+}
+
+static int guest_afu_cr_read16(struct cxl_afu *afu, int cr_idx, u64 offset,
+			u16 *out)
+{
+	int rc;
+	u64 val;
+
+	rc = _guest_afu_cr_readXX(2, afu, cr_idx, offset, &val);
+	if (!rc)
+		*out = (u16) val;
+	return rc;
+}
+
+static int guest_afu_cr_read8(struct cxl_afu *afu, int cr_idx, u64 offset,
+			u8 *out)
+{
+	int rc;
+	u64 val;
+
+	rc = _guest_afu_cr_readXX(1, afu, cr_idx, offset, &val);
+	if (!rc)
+		*out = (u8) val;
+	return rc;
+}
+
+static int guest_afu_cr_read64(struct cxl_afu *afu, int cr_idx, u64 offset,
+			u64 *out)
+{
+	return _guest_afu_cr_readXX(8, afu, cr_idx, offset, out);
+}
+
+static int guest_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in)
+{
+	/* config record is not writable from guest */
+	return -EPERM;
+}
+
+static int guest_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in)
+{
+	/* config record is not writable from guest */
+	return -EPERM;
+}
+
+static int guest_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in)
+{
+	/* config record is not writable from guest */
+	return -EPERM;
+}
+
+static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	struct cxl_process_element_hcall *elem;
+	struct cxl *adapter = ctx->afu->adapter;
+	const struct cred *cred;
+	u32 pid, idx;
+	int rc, r, i;
+	u64 mmio_addr, mmio_size;
+	__be64 flags = 0;
+
+	/* Must be 8 byte aligned and cannot cross a 4096 byte boundary */
+	if (!(elem = (struct cxl_process_element_hcall *)
+			get_zeroed_page(GFP_KERNEL)))
+		return -ENOMEM;
+
+	elem->version = cpu_to_be64(CXL_PROCESS_ELEMENT_VERSION);
+	if (ctx->kernel) {
+		pid = 0;
+		flags |= CXL_PE_TRANSLATION_ENABLED;
+		flags |= CXL_PE_PRIVILEGED_PROCESS;
+		if (mfmsr() & MSR_SF)
+			flags |= CXL_PE_64_BIT;
+	} else {
+		pid = current->pid;
+		flags |= CXL_PE_PROBLEM_STATE;
+		flags |= CXL_PE_TRANSLATION_ENABLED;
+		if (!test_tsk_thread_flag(current, TIF_32BIT))
+			flags |= CXL_PE_64_BIT;
+		cred = get_current_cred();
+		if (uid_eq(cred->euid, GLOBAL_ROOT_UID))
+			flags |= CXL_PE_PRIVILEGED_PROCESS;
+		put_cred(cred);
+	}
+	elem->flags         = cpu_to_be64(flags);
+	elem->common.tid    = cpu_to_be32(0); /* Unused */
+	elem->common.pid    = cpu_to_be32(pid);
+	elem->common.csrp   = cpu_to_be64(0); /* disable */
+	elem->common.u.psl8.aurp0  = cpu_to_be64(0); /* disable */
+	elem->common.u.psl8.aurp1  = cpu_to_be64(0); /* disable */
+
+	cxl_prefault(ctx, wed);
+
+	elem->common.u.psl8.sstp0  = cpu_to_be64(ctx->sstp0);
+	elem->common.u.psl8.sstp1  = cpu_to_be64(ctx->sstp1);
+
+	/*
+	 * Ensure we have at least one interrupt allocated to take faults for
+	 * kernel contexts that may not have allocated any AFU IRQs at all:
+	 */
+	if (ctx->irqs.range[0] == 0) {
+		rc = afu_register_irqs(ctx, 0);
+		if (rc)
+			goto out_free;
+	}
+
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		for (i = 0; i < ctx->irqs.range[r]; i++) {
+			if (r == 0 && i == 0) {
+				elem->pslVirtualIsn = cpu_to_be32(ctx->irqs.offset[0]);
+			} else {
+				idx = ctx->irqs.offset[r] + i - adapter->guest->irq_base_offset;
+				elem->applicationVirtualIsnBitmap[idx / 8] |= 0x80 >> (idx % 8);
+			}
+		}
+	}
+	elem->common.amr = cpu_to_be64(amr);
+	elem->common.wed = cpu_to_be64(wed);
+
+	disable_afu_irqs(ctx);
+
+	rc = cxl_h_attach_process(ctx->afu->guest->handle, elem,
+				&ctx->process_token, &mmio_addr, &mmio_size);
+	if (rc == H_SUCCESS) {
+		if (ctx->master || !ctx->afu->pp_psa) {
+			ctx->psn_phys = ctx->afu->psn_phys;
+			ctx->psn_size = ctx->afu->adapter->ps_size;
+		} else {
+			ctx->psn_phys = mmio_addr;
+			ctx->psn_size = mmio_size;
+		}
+		if (ctx->afu->pp_psa && mmio_size &&
+			ctx->afu->pp_size == 0) {
+			/*
+			 * There's no property in the device tree to read the
+			 * pp_size. We only find out at the 1st attach.
+			 * Compared to bare-metal, it is too late and we
+			 * should really lock here. However, on powerVM,
+			 * pp_size is really only used to display in /sys.
+			 * Being discussed with pHyp for their next release.
+			 */
+			ctx->afu->pp_size = mmio_size;
+		}
+		/* from PAPR: process element is bytes 4-7 of process token */
+		ctx->external_pe = ctx->process_token & 0xFFFFFFFF;
+		pr_devel("CXL pe=%i is known as %i for pHyp, mmio_size=%#llx",
+			ctx->pe, ctx->external_pe, ctx->psn_size);
+		ctx->pe_inserted = true;
+		enable_afu_irqs(ctx);
+	}
+
+out_free:
+	free_page((u64)elem);
+	return rc;
+}
+
+static int guest_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
+{
+	pr_devel("in %s\n", __func__);
+
+	ctx->kernel = kernel;
+	if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
+		return attach_afu_directed(ctx, wed, amr);
+
+	/* dedicated mode not supported on FW840 */
+
+	return -EINVAL;
+}
+
+static int detach_afu_directed(struct cxl_context *ctx)
+{
+	if (!ctx->pe_inserted)
+		return 0;
+	if (cxl_h_detach_process(ctx->afu->guest->handle, ctx->process_token))
+		return -1;
+	return 0;
+}
+
+static int guest_detach_process(struct cxl_context *ctx)
+{
+	pr_devel("in %s\n", __func__);
+	trace_cxl_detach(ctx);
+
+	if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		return -EIO;
+
+	if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
+		return detach_afu_directed(ctx);
+
+	return -EINVAL;
+}
+
+static void guest_release_afu(struct device *dev)
+{
+	struct cxl_afu *afu = to_cxl_afu(dev);
+
+	pr_devel("%s\n", __func__);
+
+	idr_destroy(&afu->contexts_idr);
+
+	kfree(afu->guest);
+	kfree(afu);
+}
+
+ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len)
+{
+	return guest_collect_vpd(NULL, afu, buf, len);
+}
+
+#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE
+static ssize_t guest_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+					loff_t off, size_t count)
+{
+	void *tbuf = NULL;
+	int rc = 0;
+
+	tbuf = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!tbuf)
+		return -ENOMEM;
+
+	rc = cxl_h_get_afu_err(afu->guest->handle,
+			       off & 0x7,
+			       virt_to_phys(tbuf),
+			       count);
+	if (rc)
+		goto err;
+
+	if (count > ERR_BUFF_MAX_COPY_SIZE)
+		count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7);
+	memcpy(buf, tbuf, count);
+err:
+	free_page((u64)tbuf);
+
+	return rc;
+}
+
+static int guest_afu_check_and_enable(struct cxl_afu *afu)
+{
+	return 0;
+}
+
+static bool guest_support_attributes(const char *attr_name,
+				     enum cxl_attrs type)
+{
+	switch (type) {
+	case CXL_ADAPTER_ATTRS:
+		if ((strcmp(attr_name, "base_image") == 0) ||
+			(strcmp(attr_name, "load_image_on_perst") == 0) ||
+			(strcmp(attr_name, "perst_reloads_same_image") == 0) ||
+			(strcmp(attr_name, "image_loaded") == 0))
+			return false;
+		break;
+	case CXL_AFU_MASTER_ATTRS:
+		if ((strcmp(attr_name, "pp_mmio_off") == 0))
+			return false;
+		break;
+	case CXL_AFU_ATTRS:
+		break;
+	default:
+		break;
+	}
+
+	return true;
+}
+
+static int activate_afu_directed(struct cxl_afu *afu)
+{
+	int rc;
+
+	dev_info(&afu->dev, "Activating AFU(%d) directed mode\n", afu->slice);
+
+	afu->current_mode = CXL_MODE_DIRECTED;
+
+	afu->num_procs = afu->max_procs_virtualised;
+
+	if ((rc = cxl_chardev_m_afu_add(afu)))
+		return rc;
+
+	if ((rc = cxl_sysfs_afu_m_add(afu)))
+		goto err;
+
+	if ((rc = cxl_chardev_s_afu_add(afu)))
+		goto err1;
+
+	return 0;
+err1:
+	cxl_sysfs_afu_m_remove(afu);
+err:
+	cxl_chardev_afu_remove(afu);
+	return rc;
+}
+
+static int guest_afu_activate_mode(struct cxl_afu *afu, int mode)
+{
+	if (!mode)
+		return 0;
+	if (!(mode & afu->modes_supported))
+		return -EINVAL;
+
+	if (mode == CXL_MODE_DIRECTED)
+		return activate_afu_directed(afu);
+
+	if (mode == CXL_MODE_DEDICATED)
+		dev_err(&afu->dev, "Dedicated mode not supported\n");
+
+	return -EINVAL;
+}
+
+static int deactivate_afu_directed(struct cxl_afu *afu)
+{
+	dev_info(&afu->dev, "Deactivating AFU(%d) directed mode\n", afu->slice);
+
+	afu->current_mode = 0;
+	afu->num_procs = 0;
+
+	cxl_sysfs_afu_m_remove(afu);
+	cxl_chardev_afu_remove(afu);
+
+	cxl_ops->afu_reset(afu);
+
+	return 0;
+}
+
+static int guest_afu_deactivate_mode(struct cxl_afu *afu, int mode)
+{
+	if (!mode)
+		return 0;
+	if (!(mode & afu->modes_supported))
+		return -EINVAL;
+
+	if (mode == CXL_MODE_DIRECTED)
+		return deactivate_afu_directed(afu);
+	return 0;
+}
+
+static int guest_afu_reset(struct cxl_afu *afu)
+{
+	pr_devel("AFU(%d) reset request\n", afu->slice);
+	return cxl_h_reset_afu(afu->guest->handle);
+}
+
+static int guest_map_slice_regs(struct cxl_afu *afu)
+{
+	if (!(afu->p2n_mmio = ioremap(afu->guest->p2n_phys, afu->guest->p2n_size))) {
+		dev_err(&afu->dev, "Error mapping AFU(%d) MMIO regions\n",
+			afu->slice);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void guest_unmap_slice_regs(struct cxl_afu *afu)
+{
+	if (afu->p2n_mmio)
+		iounmap(afu->p2n_mmio);
+}
+
+static int afu_update_state(struct cxl_afu *afu)
+{
+	int rc, cur_state;
+
+	rc = afu_read_error_state(afu, &cur_state);
+	if (rc)
+		return rc;
+
+	if (afu->guest->previous_state == cur_state)
+		return 0;
+
+	pr_devel("AFU(%d) update state to %#x\n", afu->slice, cur_state);
+
+	switch (cur_state) {
+	case H_STATE_NORMAL:
+		afu->guest->previous_state = cur_state;
+		break;
+
+	case H_STATE_DISABLE:
+		pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
+				pci_channel_io_frozen);
+
+		cxl_context_detach_all(afu);
+		if ((rc = cxl_ops->afu_reset(afu)))
+			pr_devel("reset hcall failed %d\n", rc);
+
+		rc = afu_read_error_state(afu, &cur_state);
+		if (!rc && cur_state == H_STATE_NORMAL) {
+			pci_error_handlers(afu, CXL_SLOT_RESET_EVENT,
+					pci_channel_io_normal);
+			pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
+		}
+		afu->guest->previous_state = 0;
+		break;
+
+	case H_STATE_TEMP_UNAVAILABLE:
+		afu->guest->previous_state = cur_state;
+		break;
+
+	case H_STATE_PERM_UNAVAILABLE:
+		dev_err(&afu->dev, "AFU is in permanent error state\n");
+		pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
+				pci_channel_io_perm_failure);
+		afu->guest->previous_state = cur_state;
+		break;
+
+	default:
+		pr_err("Unexpected AFU(%d) error state: %#x\n",
+		       afu->slice, cur_state);
+		return -EINVAL;
+	}
+
+	return rc;
+}
+
+static void afu_handle_errstate(struct work_struct *work)
+{
+	struct cxl_afu_guest *afu_guest =
+		container_of(to_delayed_work(work), struct cxl_afu_guest, work_err);
+
+	if (!afu_update_state(afu_guest->parent) &&
+	    afu_guest->previous_state == H_STATE_PERM_UNAVAILABLE)
+		return;
+
+	if (afu_guest->handle_err)
+		schedule_delayed_work(&afu_guest->work_err,
+				      msecs_to_jiffies(3000));
+}
+
+static bool guest_link_ok(struct cxl *cxl, struct cxl_afu *afu)
+{
+	int state;
+
+	if (afu && (!afu_read_error_state(afu, &state))) {
+		if (state == H_STATE_NORMAL)
+			return true;
+	}
+
+	return false;
+}
+
+static int afu_properties_look_ok(struct cxl_afu *afu)
+{
+	if (afu->pp_irqs < 0) {
+		dev_err(&afu->dev, "Unexpected per-process minimum interrupt value\n");
+		return -EINVAL;
+	}
+
+	if (afu->max_procs_virtualised < 1) {
+		dev_err(&afu->dev, "Unexpected max number of processes virtualised value\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np)
+{
+	struct cxl_afu *afu;
+	bool free = true;
+	int rc;
+
+	pr_devel("in %s - AFU(%d)\n", __func__, slice);
+	if (!(afu = cxl_alloc_afu(adapter, slice)))
+		return -ENOMEM;
+
+	if (!(afu->guest = kzalloc(sizeof(struct cxl_afu_guest), GFP_KERNEL))) {
+		kfree(afu);
+		return -ENOMEM;
+	}
+
+	if ((rc = dev_set_name(&afu->dev, "afu%i.%i",
+					  adapter->adapter_num,
+					  slice)))
+		goto err1;
+
+	adapter->slices++;
+
+	if ((rc = cxl_of_read_afu_handle(afu, afu_np)))
+		goto err1;
+
+	if ((rc = cxl_ops->afu_reset(afu)))
+		goto err1;
+
+	if ((rc = cxl_of_read_afu_properties(afu, afu_np)))
+		goto err1;
+
+	if ((rc = afu_properties_look_ok(afu)))
+		goto err1;
+
+	if ((rc = guest_map_slice_regs(afu)))
+		goto err1;
+
+	if ((rc = guest_register_serr_irq(afu)))
+		goto err2;
+
+	/*
+	 * After we call this function we must not free the afu directly, even
+	 * if it returns an error!
+	 */
+	if ((rc = cxl_register_afu(afu)))
+		goto err_put1;
+
+	if ((rc = cxl_sysfs_afu_add(afu)))
+		goto err_put1;
+
+	/*
+	 * pHyp doesn't expose the programming models supported by the
+	 * AFU. pHyp currently only supports directed mode. If it adds
+	 * dedicated mode later, this version of cxl has no way to
+	 * detect it. So we'll initialize the driver, but the first
+	 * attach will fail.
+	 * Being discussed with pHyp to do better (likely new property)
+	 */
+	if (afu->max_procs_virtualised == 1)
+		afu->modes_supported = CXL_MODE_DEDICATED;
+	else
+		afu->modes_supported = CXL_MODE_DIRECTED;
+
+	if ((rc = cxl_afu_select_best_mode(afu)))
+		goto err_put2;
+
+	adapter->afu[afu->slice] = afu;
+
+	afu->enabled = true;
+
+	/*
+	 * wake up the cpu periodically to check the state
+	 * of the AFU using "afu" stored in the guest structure.
+	 */
+	afu->guest->parent = afu;
+	afu->guest->handle_err = true;
+	INIT_DELAYED_WORK(&afu->guest->work_err, afu_handle_errstate);
+	schedule_delayed_work(&afu->guest->work_err, msecs_to_jiffies(1000));
+
+	if ((rc = cxl_pci_vphb_add(afu)))
+		dev_info(&afu->dev, "Can't register vPHB\n");
+
+	return 0;
+
+err_put2:
+	cxl_sysfs_afu_remove(afu);
+err_put1:
+	device_unregister(&afu->dev);
+	free = false;
+	guest_release_serr_irq(afu);
+err2:
+	guest_unmap_slice_regs(afu);
+err1:
+	if (free) {
+		kfree(afu->guest);
+		kfree(afu);
+	}
+	return rc;
+}
+
+void cxl_guest_remove_afu(struct cxl_afu *afu)
+{
+	if (!afu)
+		return;
+
+	/* flush and stop pending job */
+	afu->guest->handle_err = false;
+	flush_delayed_work(&afu->guest->work_err);
+
+	cxl_pci_vphb_remove(afu);
+	cxl_sysfs_afu_remove(afu);
+
+	spin_lock(&afu->adapter->afu_list_lock);
+	afu->adapter->afu[afu->slice] = NULL;
+	spin_unlock(&afu->adapter->afu_list_lock);
+
+	cxl_context_detach_all(afu);
+	cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
+	guest_release_serr_irq(afu);
+	guest_unmap_slice_regs(afu);
+
+	device_unregister(&afu->dev);
+}
+
+static void free_adapter(struct cxl *adapter)
+{
+	struct irq_avail *cur;
+	int i;
+
+	if (adapter->guest) {
+		if (adapter->guest->irq_avail) {
+			for (i = 0; i < adapter->guest->irq_nranges; i++) {
+				cur = &adapter->guest->irq_avail[i];
+				kfree(cur->bitmap);
+			}
+			kfree(adapter->guest->irq_avail);
+		}
+		kfree(adapter->guest->status);
+		kfree(adapter->guest);
+	}
+	cxl_remove_adapter_nr(adapter);
+	kfree(adapter);
+}
+
+static int properties_look_ok(struct cxl *adapter)
+{
+	/* The absence of this property means that the operational
+	 * status is unknown or okay
+	 */
+	if (strlen(adapter->guest->status) &&
+	    strcmp(adapter->guest->status, "okay")) {
+		pr_err("ABORTING:Bad operational status of the device\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len)
+{
+	return guest_collect_vpd(adapter, NULL, buf, len);
+}
+
+void cxl_guest_remove_adapter(struct cxl *adapter)
+{
+	pr_devel("in %s\n", __func__);
+
+	cxl_sysfs_adapter_remove(adapter);
+
+	cxl_guest_remove_chardev(adapter);
+	device_unregister(&adapter->dev);
+}
+
+static void release_adapter(struct device *dev)
+{
+	free_adapter(to_cxl_adapter(dev));
+}
+
+struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *pdev)
+{
+	struct cxl *adapter;
+	bool free = true;
+	int rc;
+
+	if (!(adapter = cxl_alloc_adapter()))
+		return ERR_PTR(-ENOMEM);
+
+	if (!(adapter->guest = kzalloc(sizeof(struct cxl_guest), GFP_KERNEL))) {
+		free_adapter(adapter);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	adapter->slices = 0;
+	adapter->guest->pdev = pdev;
+	adapter->dev.parent = &pdev->dev;
+	adapter->dev.release = release_adapter;
+	dev_set_drvdata(&pdev->dev, adapter);
+
+	/*
+	 * Hypervisor controls PSL timebase initialization (p1 register).
+	 * On FW840, PSL is initialized.
+	 */
+	adapter->psl_timebase_synced = true;
+
+	if ((rc = cxl_of_read_adapter_handle(adapter, np)))
+		goto err1;
+
+	if ((rc = cxl_of_read_adapter_properties(adapter, np)))
+		goto err1;
+
+	if ((rc = properties_look_ok(adapter)))
+		goto err1;
+
+	if ((rc = cxl_guest_add_chardev(adapter)))
+		goto err1;
+
+	/*
+	 * After we call this function we must not free the adapter directly,
+	 * even if it returns an error!
+	 */
+	if ((rc = cxl_register_adapter(adapter)))
+		goto err_put1;
+
+	if ((rc = cxl_sysfs_adapter_add(adapter)))
+		goto err_put1;
+
+	/* release the context lock as the adapter is configured */
+	cxl_adapter_context_unlock(adapter);
+
+	return adapter;
+
+err_put1:
+	device_unregister(&adapter->dev);
+	free = false;
+	cxl_guest_remove_chardev(adapter);
+err1:
+	if (free)
+		free_adapter(adapter);
+	return ERR_PTR(rc);
+}
+
+void cxl_guest_reload_module(struct cxl *adapter)
+{
+	struct platform_device *pdev;
+
+	pdev = adapter->guest->pdev;
+	cxl_guest_remove_adapter(adapter);
+
+	cxl_of_probe(pdev);
+}
+
+const struct cxl_backend_ops cxl_guest_ops = {
+	.module = THIS_MODULE,
+	.adapter_reset = guest_reset,
+	.alloc_one_irq = guest_alloc_one_irq,
+	.release_one_irq = guest_release_one_irq,
+	.alloc_irq_ranges = guest_alloc_irq_ranges,
+	.release_irq_ranges = guest_release_irq_ranges,
+	.setup_irq = NULL,
+	.handle_psl_slice_error = guest_handle_psl_slice_error,
+	.psl_interrupt = guest_psl_irq,
+	.ack_irq = guest_ack_irq,
+	.attach_process = guest_attach_process,
+	.detach_process = guest_detach_process,
+	.update_ivtes = NULL,
+	.support_attributes = guest_support_attributes,
+	.link_ok = guest_link_ok,
+	.release_afu = guest_release_afu,
+	.afu_read_err_buffer = guest_afu_read_err_buffer,
+	.afu_check_and_enable = guest_afu_check_and_enable,
+	.afu_activate_mode = guest_afu_activate_mode,
+	.afu_deactivate_mode = guest_afu_deactivate_mode,
+	.afu_reset = guest_afu_reset,
+	.afu_cr_read8 = guest_afu_cr_read8,
+	.afu_cr_read16 = guest_afu_cr_read16,
+	.afu_cr_read32 = guest_afu_cr_read32,
+	.afu_cr_read64 = guest_afu_cr_read64,
+	.afu_cr_write8 = guest_afu_cr_write8,
+	.afu_cr_write16 = guest_afu_cr_write16,
+	.afu_cr_write32 = guest_afu_cr_write32,
+	.read_adapter_vpd = cxl_guest_read_adapter_vpd,
+};
diff --git a/drivers/misc/cxl/hcalls.c b/drivers/misc/cxl/hcalls.c
new file mode 100644
index 000000000..9b8bb0f80
--- /dev/null
+++ b/drivers/misc/cxl/hcalls.c
@@ -0,0 +1,647 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <asm/byteorder.h>
+#include "hcalls.h"
+#include "trace.h"
+
+#define CXL_HCALL_TIMEOUT 60000
+#define CXL_HCALL_TIMEOUT_DOWNLOAD 120000
+
+#define H_ATTACH_CA_PROCESS    0x344
+#define H_CONTROL_CA_FUNCTION  0x348
+#define H_DETACH_CA_PROCESS    0x34C
+#define H_COLLECT_CA_INT_INFO  0x350
+#define H_CONTROL_CA_FAULTS    0x354
+#define H_DOWNLOAD_CA_FUNCTION 0x35C
+#define H_DOWNLOAD_CA_FACILITY 0x364
+#define H_CONTROL_CA_FACILITY  0x368
+
+#define H_CONTROL_CA_FUNCTION_RESET                   1 /* perform a reset */
+#define H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS         2 /* suspend a process from being executed */
+#define H_CONTROL_CA_FUNCTION_RESUME_PROCESS          3 /* resume a process to be executed */
+#define H_CONTROL_CA_FUNCTION_READ_ERR_STATE          4 /* read the error state */
+#define H_CONTROL_CA_FUNCTION_GET_AFU_ERR             5 /* collect the AFU error buffer */
+#define H_CONTROL_CA_FUNCTION_GET_CONFIG              6 /* collect configuration record */
+#define H_CONTROL_CA_FUNCTION_GET_DOWNLOAD_STATE      7 /* query to return download status */
+#define H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS       8 /* terminate the process before completion */
+#define H_CONTROL_CA_FUNCTION_COLLECT_VPD             9 /* collect VPD */
+#define H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT   11 /* read the function-wide error data based on an interrupt */
+#define H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT   12 /* acknowledge function-wide error data based on an interrupt */
+#define H_CONTROL_CA_FUNCTION_GET_ERROR_LOG          13 /* retrieve the Platform Log ID (PLID) of an error log */
+
+#define H_CONTROL_CA_FAULTS_RESPOND_PSL         1
+#define H_CONTROL_CA_FAULTS_RESPOND_AFU         2
+
+#define H_CONTROL_CA_FACILITY_RESET             1 /* perform a reset */
+#define H_CONTROL_CA_FACILITY_COLLECT_VPD       2 /* collect VPD */
+
+#define H_DOWNLOAD_CA_FACILITY_DOWNLOAD         1 /* download adapter image */
+#define H_DOWNLOAD_CA_FACILITY_VALIDATE         2 /* validate adapter image */
+
+
+#define _CXL_LOOP_HCALL(call, rc, retbuf, fn, ...)			\
+	{								\
+		unsigned int delay, total_delay = 0;			\
+		u64 token = 0;						\
+									\
+		memset(retbuf, 0, sizeof(retbuf));			\
+		while (1) {						\
+			rc = call(fn, retbuf, __VA_ARGS__, token);	\
+			token = retbuf[0];				\
+			if (rc != H_BUSY && !H_IS_LONG_BUSY(rc))	\
+				break;					\
+									\
+			if (rc == H_BUSY)				\
+				delay = 10;				\
+			else						\
+				delay = get_longbusy_msecs(rc);		\
+									\
+			total_delay += delay;				\
+			if (total_delay > CXL_HCALL_TIMEOUT) {		\
+				WARN(1, "Warning: Giving up waiting for CXL hcall " \
+					"%#x after %u msec\n", fn, total_delay); \
+				rc = H_BUSY;				\
+				break;					\
+			}						\
+			msleep(delay);					\
+		}							\
+	}
+#define CXL_H_WAIT_UNTIL_DONE(...)  _CXL_LOOP_HCALL(plpar_hcall, __VA_ARGS__)
+#define CXL_H9_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall9, __VA_ARGS__)
+
+#define _PRINT_MSG(rc, format, ...)					\
+	{								\
+		if ((rc != H_SUCCESS) && (rc != H_CONTINUE))		\
+			pr_err(format, __VA_ARGS__);			\
+		else							\
+			pr_devel(format, __VA_ARGS__);			\
+	}								\
+
+
+static char *afu_op_names[] = {
+	"UNKNOWN_OP",		/* 0 undefined */
+	"RESET",		/* 1 */
+	"SUSPEND_PROCESS",	/* 2 */
+	"RESUME_PROCESS",	/* 3 */
+	"READ_ERR_STATE",	/* 4 */
+	"GET_AFU_ERR",		/* 5 */
+	"GET_CONFIG",		/* 6 */
+	"GET_DOWNLOAD_STATE",	/* 7 */
+	"TERMINATE_PROCESS",	/* 8 */
+	"COLLECT_VPD",		/* 9 */
+	"UNKNOWN_OP",		/* 10 undefined */
+	"GET_FUNCTION_ERR_INT",	/* 11 */
+	"ACK_FUNCTION_ERR_INT",	/* 12 */
+	"GET_ERROR_LOG",	/* 13 */
+};
+
+static char *control_adapter_op_names[] = {
+	"UNKNOWN_OP",		/* 0 undefined */
+	"RESET",		/* 1 */
+	"COLLECT_VPD",		/* 2 */
+};
+
+static char *download_op_names[] = {
+	"UNKNOWN_OP",		/* 0 undefined */
+	"DOWNLOAD",		/* 1 */
+	"VALIDATE",		/* 2 */
+};
+
+static char *op_str(unsigned int op, char *name_array[], int array_len)
+{
+	if (op >= array_len)
+		return "UNKNOWN_OP";
+	return name_array[op];
+}
+
+#define OP_STR(op, name_array)      op_str(op, name_array, ARRAY_SIZE(name_array))
+
+#define OP_STR_AFU(op)              OP_STR(op, afu_op_names)
+#define OP_STR_CONTROL_ADAPTER(op)  OP_STR(op, control_adapter_op_names)
+#define OP_STR_DOWNLOAD_ADAPTER(op) OP_STR(op, download_op_names)
+
+
+long cxl_h_attach_process(u64 unit_address,
+			struct cxl_process_element_hcall *element,
+			u64 *process_token, u64 *mmio_addr, u64 *mmio_size)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_ATTACH_CA_PROCESS, unit_address, virt_to_phys(element));
+	_PRINT_MSG(rc, "cxl_h_attach_process(%#.16llx, %#.16lx): %li\n",
+		unit_address, virt_to_phys(element), rc);
+	trace_cxl_hcall_attach(unit_address, virt_to_phys(element), retbuf[0], retbuf[1], retbuf[2], rc);
+
+	pr_devel("token: 0x%.8lx mmio_addr: 0x%lx mmio_size: 0x%lx\nProcess Element Structure:\n",
+		retbuf[0], retbuf[1], retbuf[2]);
+	cxl_dump_debug_buffer(element, sizeof(*element));
+
+	switch (rc) {
+	case H_SUCCESS:       /* The process info is attached to the coherent platform function */
+		*process_token = retbuf[0];
+		if (mmio_addr)
+			*mmio_addr = retbuf[1];
+		if (mmio_size)
+			*mmio_size = retbuf[2];
+		return 0;
+	case H_PARAMETER:     /* An incorrect parameter was supplied. */
+	case H_FUNCTION:      /* The function is not supported. */
+		return -EINVAL;
+	case H_AUTHORITY:     /* The partition does not have authority to perform this hcall */
+	case H_RESOURCE:      /* The coherent platform function does not have enough additional resource to attach the process */
+	case H_HARDWARE:      /* A hardware event prevented the attach operation */
+	case H_STATE:         /* The coherent platform function is not in a valid state */
+	case H_BUSY:
+		return -EBUSY;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/**
+ * cxl_h_detach_process - Detach a process element from a coherent
+ *                        platform function.
+ */
+long cxl_h_detach_process(u64 unit_address, u64 process_token)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_DETACH_CA_PROCESS, unit_address, process_token);
+	_PRINT_MSG(rc, "cxl_h_detach_process(%#.16llx, 0x%.8llx): %li\n", unit_address, process_token, rc);
+	trace_cxl_hcall_detach(unit_address, process_token, rc);
+
+	switch (rc) {
+	case H_SUCCESS:       /* The process was detached from the coherent platform function */
+		return 0;
+	case H_PARAMETER:     /* An incorrect parameter was supplied. */
+		return -EINVAL;
+	case H_AUTHORITY:     /* The partition does not have authority to perform this hcall */
+	case H_RESOURCE:      /* The function has page table mappings for MMIO */
+	case H_HARDWARE:      /* A hardware event prevented the detach operation */
+	case H_STATE:         /* The coherent platform function is not in a valid state */
+	case H_BUSY:
+		return -EBUSY;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/**
+ * cxl_h_control_function - This H_CONTROL_CA_FUNCTION hypervisor call allows
+ *                          the partition to manipulate or query
+ *                          certain coherent platform function behaviors.
+ */
+static long cxl_h_control_function(u64 unit_address, u64 op,
+				   u64 p1, u64 p2, u64 p3, u64 p4, u64 *out)
+{
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+	long rc;
+
+	CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FUNCTION, unit_address, op, p1, p2, p3, p4);
+	_PRINT_MSG(rc, "cxl_h_control_function(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n",
+		unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc);
+	trace_cxl_hcall_control_function(unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc);
+
+	switch (rc) {
+	case H_SUCCESS:       /* The operation is completed for the coherent platform function */
+		if ((op == H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT ||
+		     op == H_CONTROL_CA_FUNCTION_READ_ERR_STATE ||
+		     op == H_CONTROL_CA_FUNCTION_COLLECT_VPD))
+			*out = retbuf[0];
+		return 0;
+	case H_PARAMETER:     /* An incorrect parameter was supplied. */
+	case H_FUNCTION:      /* The function is not supported. */
+	case H_NOT_FOUND:     /* The operation supplied was not valid */
+	case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */
+	case H_SG_LIST:       /* An block list entry was invalid */
+		return -EINVAL;
+	case H_AUTHORITY:     /* The partition does not have authority to perform this hcall */
+	case H_RESOURCE:      /* The function has page table mappings for MMIO */
+	case H_HARDWARE:      /* A hardware event prevented the attach operation */
+	case H_STATE:         /* The coherent platform function is not in a valid state */
+	case H_BUSY:
+		return -EBUSY;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/**
+ * cxl_h_reset_afu - Perform a reset to the coherent platform function.
+ */
+long cxl_h_reset_afu(u64 unit_address)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_RESET,
+				0, 0, 0, 0,
+				NULL);
+}
+
+/**
+ * cxl_h_suspend_process - Suspend a process from being executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_suspend_process(u64 unit_address, u64 process_token)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS,
+				process_token, 0, 0, 0,
+				NULL);
+}
+
+/**
+ * cxl_h_resume_process - Resume a process to be executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_resume_process(u64 unit_address, u64 process_token)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_RESUME_PROCESS,
+				process_token, 0, 0, 0,
+				NULL);
+}
+
+/**
+ * cxl_h_read_error_state - Checks the error state of the coherent
+ *                          platform function.
+ * R4 contains the error state
+ */
+long cxl_h_read_error_state(u64 unit_address, u64 *state)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_READ_ERR_STATE,
+				0, 0, 0, 0,
+				state);
+}
+
+/**
+ * cxl_h_get_afu_err - collect the AFU error buffer
+ * Parameter1 = byte offset into error buffer to retrieve, valid values
+ *              are between 0 and (ibm,error-buffer-size - 1)
+ * Parameter2 = 4K aligned real address of error buffer, to be filled in
+ * Parameter3 = length of error buffer, valid values are 4K or less
+ */
+long cxl_h_get_afu_err(u64 unit_address, u64 offset,
+		u64 buf_address, u64 len)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_GET_AFU_ERR,
+				offset, buf_address, len, 0,
+				NULL);
+}
+
+/**
+ * cxl_h_get_config - collect configuration record for the
+ *                    coherent platform function
+ * Parameter1 = # of configuration record to retrieve, valid values are
+ *              between 0 and (ibm,#config-records - 1)
+ * Parameter2 = byte offset into configuration record to retrieve,
+ *              valid values are between 0 and (ibm,config-record-size - 1)
+ * Parameter3 = 4K aligned real address of configuration record buffer,
+ *              to be filled in
+ * Parameter4 = length of configuration buffer, valid values are 4K or less
+ */
+long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset,
+		u64 buf_address, u64 len)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_GET_CONFIG,
+				cr_num, offset, buf_address, len,
+				NULL);
+}
+
+/**
+ * cxl_h_terminate_process - Terminate the process before completion
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_terminate_process(u64 unit_address, u64 process_token)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS,
+				process_token, 0, 0, 0,
+				NULL);
+}
+
+/**
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = # of VPD record to retrieve, valid values are between 0
+ *              and (ibm,#config-records - 1).
+ * Parameter2 = 4K naturally aligned real buffer containing block
+ *              list entries
+ * Parameter3 = number of block list entries in the block list, valid
+ *              values are between 0 and 256
+ */
+long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address,
+		       u64 num, u64 *out)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_COLLECT_VPD,
+				record, list_address, num, 0,
+				out);
+}
+
+/**
+ * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt
+ */
+long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT,
+				0, 0, 0, 0, reg);
+}
+
+/**
+ * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data
+ *                                based on an interrupt
+ * Parameter1 = value to write to the function-wide error interrupt register
+ */
+long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT,
+				value, 0, 0, 0,
+				NULL);
+}
+
+/**
+ * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of
+ *                       an error log
+ */
+long cxl_h_get_error_log(u64 unit_address, u64 value)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_GET_ERROR_LOG,
+				0, 0, 0, 0,
+				NULL);
+}
+
+/**
+ * cxl_h_collect_int_info - Collect interrupt info about a coherent
+ *                          platform function after an interrupt occurred.
+ */
+long cxl_h_collect_int_info(u64 unit_address, u64 process_token,
+			    struct cxl_irq_info *info)
+{
+	long rc;
+
+	BUG_ON(sizeof(*info) != sizeof(unsigned long[PLPAR_HCALL9_BUFSIZE]));
+
+	rc = plpar_hcall9(H_COLLECT_CA_INT_INFO, (unsigned long *) info,
+			unit_address, process_token);
+	_PRINT_MSG(rc, "cxl_h_collect_int_info(%#.16llx, 0x%llx): %li\n",
+		unit_address, process_token, rc);
+	trace_cxl_hcall_collect_int_info(unit_address, process_token, rc);
+
+	switch (rc) {
+	case H_SUCCESS:     /* The interrupt info is returned in return registers. */
+		pr_devel("dsisr:%#llx, dar:%#llx, dsr:%#llx, pid_tid:%#llx, afu_err:%#llx, errstat:%#llx\n",
+			info->dsisr, info->dar, info->dsr, info->reserved,
+			info->afu_err, info->errstat);
+		return 0;
+	case H_PARAMETER:   /* An incorrect parameter was supplied. */
+		return -EINVAL;
+	case H_AUTHORITY:   /* The partition does not have authority to perform this hcall. */
+	case H_HARDWARE:    /* A hardware event prevented the collection of the interrupt info.*/
+	case H_STATE:       /* The coherent platform function is not in a valid state to collect interrupt info. */
+		return -EBUSY;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/**
+ * cxl_h_control_faults - Control the operation of a coherent platform
+ *                        function after a fault occurs.
+ *
+ * Parameters
+ *    control-mask: value to control the faults
+ *                  looks like PSL_TFC_An shifted >> 32
+ *    reset-mask: mask to control reset of function faults
+ *                Set reset_mask = 1 to reset PSL errors
+ */
+long cxl_h_control_faults(u64 unit_address, u64 process_token,
+			  u64 control_mask, u64 reset_mask)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	memset(retbuf, 0, sizeof(retbuf));
+
+	rc = plpar_hcall(H_CONTROL_CA_FAULTS, retbuf, unit_address,
+			H_CONTROL_CA_FAULTS_RESPOND_PSL, process_token,
+			control_mask, reset_mask);
+	_PRINT_MSG(rc, "cxl_h_control_faults(%#.16llx, 0x%llx, %#llx, %#llx): %li (%#lx)\n",
+		unit_address, process_token, control_mask, reset_mask,
+		rc, retbuf[0]);
+	trace_cxl_hcall_control_faults(unit_address, process_token,
+				control_mask, reset_mask, retbuf[0], rc);
+
+	switch (rc) {
+	case H_SUCCESS:    /* Faults were successfully controlled for the function. */
+		return 0;
+	case H_PARAMETER:  /* An incorrect parameter was supplied. */
+		return -EINVAL;
+	case H_HARDWARE:   /* A hardware event prevented the control of faults. */
+	case H_STATE:      /* The function was in an invalid state. */
+	case H_AUTHORITY:  /* The partition does not have authority to perform this hcall; the coherent platform facilities may need to be licensed. */
+		return -EBUSY;
+	case H_FUNCTION:   /* The function is not supported */
+	case H_NOT_FOUND:  /* The operation supplied was not valid */
+		return -EINVAL;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/**
+ * cxl_h_control_facility - This H_CONTROL_CA_FACILITY hypervisor call
+ *                          allows the partition to manipulate or query
+ *                          certain coherent platform facility behaviors.
+ */
+static long cxl_h_control_facility(u64 unit_address, u64 op,
+				   u64 p1, u64 p2, u64 p3, u64 p4, u64 *out)
+{
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+	long rc;
+
+	CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FACILITY, unit_address, op, p1, p2, p3, p4);
+	_PRINT_MSG(rc, "cxl_h_control_facility(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n",
+		unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc);
+	trace_cxl_hcall_control_facility(unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc);
+
+	switch (rc) {
+	case H_SUCCESS:       /* The operation is completed for the coherent platform facility */
+		if (op == H_CONTROL_CA_FACILITY_COLLECT_VPD)
+			*out = retbuf[0];
+		return 0;
+	case H_PARAMETER:     /* An incorrect parameter was supplied. */
+	case H_FUNCTION:      /* The function is not supported. */
+	case H_NOT_FOUND:     /* The operation supplied was not valid */
+	case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */
+	case H_SG_LIST:       /* An block list entry was invalid */
+		return -EINVAL;
+	case H_AUTHORITY:     /* The partition does not have authority to perform this hcall */
+	case H_RESOURCE:      /* The function has page table mappings for MMIO */
+	case H_HARDWARE:      /* A hardware event prevented the attach operation */
+	case H_STATE:         /* The coherent platform facility is not in a valid state */
+	case H_BUSY:
+		return -EBUSY;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/**
+ * cxl_h_reset_adapter - Perform a reset to the coherent platform facility.
+ */
+long cxl_h_reset_adapter(u64 unit_address)
+{
+	return cxl_h_control_facility(unit_address,
+				H_CONTROL_CA_FACILITY_RESET,
+				0, 0, 0, 0,
+				NULL);
+}
+
+/**
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = 4K naturally aligned real buffer containing block
+ *              list entries
+ * Parameter2 = number of block list entries in the block list, valid
+ *              values are between 0 and 256
+ */
+long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address,
+			       u64 num, u64 *out)
+{
+	return cxl_h_control_facility(unit_address,
+				H_CONTROL_CA_FACILITY_COLLECT_VPD,
+				list_address, num, 0, 0,
+				out);
+}
+
+/**
+ * cxl_h_download_facility - This H_DOWNLOAD_CA_FACILITY
+ *                    hypervisor call provide platform support for
+ *                    downloading a base adapter image to the coherent
+ *                    platform facility, and for validating the entire
+ *                    image after the download.
+ * Parameters
+ *    op: operation to perform to the coherent platform function
+ *      Download: operation = 1, the base image in the coherent platform
+ *                               facility is first erased, and then
+ *                               programmed using the image supplied
+ *                               in the scatter/gather list.
+ *      Validate: operation = 2, the base image in the coherent platform
+ *                               facility is compared with the image
+ *                               supplied in the scatter/gather list.
+ *    list_address: 4K naturally aligned real buffer containing
+ *                  scatter/gather list entries.
+ *    num: number of block list entries in the scatter/gather list.
+ */
+static long cxl_h_download_facility(u64 unit_address, u64 op,
+				    u64 list_address, u64 num,
+				    u64 *out)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	unsigned int delay, total_delay = 0;
+	u64 token = 0;
+	long rc;
+
+	if (*out != 0)
+		token = *out;
+
+	memset(retbuf, 0, sizeof(retbuf));
+	while (1) {
+		rc = plpar_hcall(H_DOWNLOAD_CA_FACILITY, retbuf,
+				 unit_address, op, list_address, num,
+				 token);
+		token = retbuf[0];
+		if (rc != H_BUSY && !H_IS_LONG_BUSY(rc))
+			break;
+
+		if (rc != H_BUSY) {
+			delay = get_longbusy_msecs(rc);
+			total_delay += delay;
+			if (total_delay > CXL_HCALL_TIMEOUT_DOWNLOAD) {
+				WARN(1, "Warning: Giving up waiting for CXL hcall "
+					"%#x after %u msec\n",
+					H_DOWNLOAD_CA_FACILITY, total_delay);
+				rc = H_BUSY;
+				break;
+			}
+			msleep(delay);
+		}
+	}
+	_PRINT_MSG(rc, "cxl_h_download_facility(%#.16llx, %s(%#llx, %#llx), %#lx): %li\n",
+		 unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc);
+	trace_cxl_hcall_download_facility(unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc);
+
+	switch (rc) {
+	case H_SUCCESS:       /* The operation is completed for the coherent platform facility */
+		return 0;
+	case H_PARAMETER:     /* An incorrect parameter was supplied */
+	case H_FUNCTION:      /* The function is not supported. */
+	case H_SG_LIST:       /* An block list entry was invalid */
+	case H_BAD_DATA:      /* Image verification failed */
+		return -EINVAL;
+	case H_AUTHORITY:     /* The partition does not have authority to perform this hcall */
+	case H_RESOURCE:      /* The function has page table mappings for MMIO */
+	case H_HARDWARE:      /* A hardware event prevented the attach operation */
+	case H_STATE:         /* The coherent platform facility is not in a valid state */
+	case H_BUSY:
+		return -EBUSY;
+	case H_CONTINUE:
+		*out = retbuf[0];
+		return 1;  /* More data is needed for the complete image */
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/**
+ * cxl_h_download_adapter_image - Download the base image to the coherent
+ *                                platform facility.
+ */
+long cxl_h_download_adapter_image(u64 unit_address,
+				  u64 list_address, u64 num,
+				  u64 *out)
+{
+	return cxl_h_download_facility(unit_address,
+				       H_DOWNLOAD_CA_FACILITY_DOWNLOAD,
+				       list_address, num, out);
+}
+
+/**
+ * cxl_h_validate_adapter_image - Validate the base image in the coherent
+ *                                platform facility.
+ */
+long cxl_h_validate_adapter_image(u64 unit_address,
+				  u64 list_address, u64 num,
+				  u64 *out)
+{
+	return cxl_h_download_facility(unit_address,
+				       H_DOWNLOAD_CA_FACILITY_VALIDATE,
+				       list_address, num, out);
+}
diff --git a/drivers/misc/cxl/hcalls.h b/drivers/misc/cxl/hcalls.h
new file mode 100644
index 000000000..3e25522a5
--- /dev/null
+++ b/drivers/misc/cxl/hcalls.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _HCALLS_H
+#define _HCALLS_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/hvcall.h>
+#include "cxl.h"
+
+#define SG_BUFFER_SIZE 4096
+#define SG_MAX_ENTRIES 256
+
+struct sg_list {
+	u64 phys_addr;
+	u64 len;
+};
+
+/*
+ * This is straight out of PAPR, but replacing some of the compound fields with
+ * a single field, where they were identical to the register layout.
+ *
+ * The 'flags' parameter regroups the various bit-fields
+ */
+#define CXL_PE_CSRP_VALID			(1ULL << 63)
+#define CXL_PE_PROBLEM_STATE			(1ULL << 62)
+#define CXL_PE_SECONDARY_SEGMENT_TBL_SRCH	(1ULL << 61)
+#define CXL_PE_TAGS_ACTIVE			(1ULL << 60)
+#define CXL_PE_USER_STATE			(1ULL << 59)
+#define CXL_PE_TRANSLATION_ENABLED		(1ULL << 58)
+#define CXL_PE_64_BIT				(1ULL << 57)
+#define CXL_PE_PRIVILEGED_PROCESS		(1ULL << 56)
+
+#define CXL_PROCESS_ELEMENT_VERSION 1
+struct cxl_process_element_hcall {
+	__be64 version;
+	__be64 flags;
+	u8     reserved0[12];
+	__be32 pslVirtualIsn;
+	u8     applicationVirtualIsnBitmap[256];
+	u8     reserved1[144];
+	struct cxl_process_element_common common;
+	u8     reserved4[12];
+} __packed;
+
+#define H_STATE_NORMAL              1
+#define H_STATE_DISABLE             2
+#define H_STATE_TEMP_UNAVAILABLE    3
+#define H_STATE_PERM_UNAVAILABLE    4
+
+/* NOTE: element must be a logical real address, and must be pinned */
+long cxl_h_attach_process(u64 unit_address, struct cxl_process_element_hcall *element,
+			u64 *process_token, u64 *mmio_addr, u64 *mmio_size);
+
+/**
+ * cxl_h_detach_process - Detach a process element from a coherent
+ *                        platform function.
+ */
+long cxl_h_detach_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_reset_afu - Perform a reset to the coherent platform function.
+ */
+long cxl_h_reset_afu(u64 unit_address);
+
+/**
+ * cxl_h_suspend_process - Suspend a process from being executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_suspend_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_resume_process - Resume a process to be executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_resume_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_read_error_state - Reads the error state of the coherent
+ *                          platform function.
+ * R4 contains the error state
+ */
+long cxl_h_read_error_state(u64 unit_address, u64 *state);
+
+/**
+ * cxl_h_get_afu_err - collect the AFU error buffer
+ * Parameter1 = byte offset into error buffer to retrieve, valid values
+ *              are between 0 and (ibm,error-buffer-size - 1)
+ * Parameter2 = 4K aligned real address of error buffer, to be filled in
+ * Parameter3 = length of error buffer, valid values are 4K or less
+ */
+long cxl_h_get_afu_err(u64 unit_address, u64 offset, u64 buf_address, u64 len);
+
+/**
+ * cxl_h_get_config - collect configuration record for the
+ *                    coherent platform function
+ * Parameter1 = # of configuration record to retrieve, valid values are
+ *              between 0 and (ibm,#config-records - 1)
+ * Parameter2 = byte offset into configuration record to retrieve,
+ *              valid values are between 0 and (ibm,config-record-size - 1)
+ * Parameter3 = 4K aligned real address of configuration record buffer,
+ *              to be filled in
+ * Parameter4 = length of configuration buffer, valid values are 4K or less
+ */
+long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset,
+		u64 buf_address, u64 len);
+
+/**
+ * cxl_h_terminate_process - Terminate the process before completion
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_terminate_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = # of VPD record to retrieve, valid values are between 0
+ *              and (ibm,#config-records - 1).
+ * Parameter2 = 4K naturally aligned real buffer containing block
+ *              list entries
+ * Parameter3 = number of block list entries in the block list, valid
+ *              values are between 0 and 256
+ */
+long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address,
+		       u64 num, u64 *out);
+
+/**
+ * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt
+ */
+long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg);
+
+/**
+ * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data
+ *                                based on an interrupt
+ * Parameter1 = value to write to the function-wide error interrupt register
+ */
+long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value);
+
+/**
+ * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of
+ *                       an error log
+ */
+long cxl_h_get_error_log(u64 unit_address, u64 value);
+
+/**
+ * cxl_h_collect_int_info - Collect interrupt info about a coherent
+ *                          platform function after an interrupt occurred.
+ */
+long cxl_h_collect_int_info(u64 unit_address, u64 process_token,
+			struct cxl_irq_info *info);
+
+/**
+ * cxl_h_control_faults - Control the operation of a coherent platform
+ *                        function after a fault occurs.
+ *
+ * Parameters
+ *    control-mask: value to control the faults
+ *                  looks like PSL_TFC_An shifted >> 32
+ *    reset-mask: mask to control reset of function faults
+ *                Set reset_mask = 1 to reset PSL errors
+ */
+long cxl_h_control_faults(u64 unit_address, u64 process_token,
+			u64 control_mask, u64 reset_mask);
+
+/**
+ * cxl_h_reset_adapter - Perform a reset to the coherent platform facility.
+ */
+long cxl_h_reset_adapter(u64 unit_address);
+
+/**
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = 4K naturally aligned real buffer containing block
+ *              list entries
+ * Parameter2 = number of block list entries in the block list, valid
+ *              values are between 0 and 256
+ */
+long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address,
+			       u64 num, u64 *out);
+
+/**
+ * cxl_h_download_adapter_image - Download the base image to the coherent
+ *                                platform facility.
+ */
+long cxl_h_download_adapter_image(u64 unit_address,
+				  u64 list_address, u64 num,
+				  u64 *out);
+
+/**
+ * cxl_h_validate_adapter_image - Validate the base image in the coherent
+ *                                platform facility.
+ */
+long cxl_h_validate_adapter_image(u64 unit_address,
+				  u64 list_address, u64 num,
+				  u64 *out);
+#endif /* _HCALLS_H */
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
new file mode 100644
index 000000000..ce08a9f22
--- /dev/null
+++ b/drivers/misc/cxl/irq.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/pid.h>
+#include <asm/cputable.h>
+#include <misc/cxl-base.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static int afu_irq_range_start(void)
+{
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		return 1;
+	return 0;
+}
+
+static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar)
+{
+	ctx->dsisr = dsisr;
+	ctx->dar = dar;
+	schedule_work(&ctx->fault_work);
+	return IRQ_HANDLED;
+}
+
+irqreturn_t cxl_irq_psl9(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info)
+{
+	u64 dsisr, dar;
+
+	dsisr = irq_info->dsisr;
+	dar = irq_info->dar;
+
+	trace_cxl_psl9_irq(ctx, irq, dsisr, dar);
+
+	pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar);
+
+	if (dsisr & CXL_PSL9_DSISR_An_TF) {
+		pr_devel("CXL interrupt: Scheduling translation fault handling for later (pe: %i)\n", ctx->pe);
+		return schedule_cxl_fault(ctx, dsisr, dar);
+	}
+
+	if (dsisr & CXL_PSL9_DSISR_An_PE)
+		return cxl_ops->handle_psl_slice_error(ctx, dsisr,
+						irq_info->errstat);
+	if (dsisr & CXL_PSL9_DSISR_An_AE) {
+		pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err);
+
+		if (ctx->pending_afu_err) {
+			/*
+			 * This shouldn't happen - the PSL treats these errors
+			 * as fatal and will have reset the AFU, so there's not
+			 * much point buffering multiple AFU errors.
+			 * OTOH if we DO ever see a storm of these come in it's
+			 * probably best that we log them somewhere:
+			 */
+			dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error undelivered to pe %i: 0x%016llx\n",
+					    ctx->pe, irq_info->afu_err);
+		} else {
+			spin_lock(&ctx->lock);
+			ctx->afu_err = irq_info->afu_err;
+			ctx->pending_afu_err = 1;
+			spin_unlock(&ctx->lock);
+
+			wake_up_all(&ctx->wq);
+		}
+
+		cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0);
+		return IRQ_HANDLED;
+	}
+	if (dsisr & CXL_PSL9_DSISR_An_OC)
+		pr_devel("CXL interrupt: OS Context Warning\n");
+
+	WARN(1, "Unhandled CXL PSL IRQ\n");
+	return IRQ_HANDLED;
+}
+
+irqreturn_t cxl_irq_psl8(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info)
+{
+	u64 dsisr, dar;
+
+	dsisr = irq_info->dsisr;
+	dar = irq_info->dar;
+
+	trace_cxl_psl_irq(ctx, irq, dsisr, dar);
+
+	pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar);
+
+	if (dsisr & CXL_PSL_DSISR_An_DS) {
+		/*
+		 * We don't inherently need to sleep to handle this, but we do
+		 * need to get a ref to the task's mm, which we can't do from
+		 * irq context without the potential for a deadlock since it
+		 * takes the task_lock. An alternate option would be to keep a
+		 * reference to the task's mm the entire time it has cxl open,
+		 * but to do that we need to solve the issue where we hold a
+		 * ref to the mm, but the mm can hold a ref to the fd after an
+		 * mmap preventing anything from being cleaned up.
+		 */
+		pr_devel("Scheduling segment miss handling for later pe: %i\n", ctx->pe);
+		return schedule_cxl_fault(ctx, dsisr, dar);
+	}
+
+	if (dsisr & CXL_PSL_DSISR_An_M)
+		pr_devel("CXL interrupt: PTE not found\n");
+	if (dsisr & CXL_PSL_DSISR_An_P)
+		pr_devel("CXL interrupt: Storage protection violation\n");
+	if (dsisr & CXL_PSL_DSISR_An_A)
+		pr_devel("CXL interrupt: AFU lock access to write through or cache inhibited storage\n");
+	if (dsisr & CXL_PSL_DSISR_An_S)
+		pr_devel("CXL interrupt: Access was afu_wr or afu_zero\n");
+	if (dsisr & CXL_PSL_DSISR_An_K)
+		pr_devel("CXL interrupt: Access not permitted by virtual page class key protection\n");
+
+	if (dsisr & CXL_PSL_DSISR_An_DM) {
+		/*
+		 * In some cases we might be able to handle the fault
+		 * immediately if hash_page would succeed, but we still need
+		 * the task's mm, which as above we can't get without a lock
+		 */
+		pr_devel("Scheduling page fault handling for later pe: %i\n", ctx->pe);
+		return schedule_cxl_fault(ctx, dsisr, dar);
+	}
+	if (dsisr & CXL_PSL_DSISR_An_ST)
+		WARN(1, "CXL interrupt: Segment Table PTE not found\n");
+	if (dsisr & CXL_PSL_DSISR_An_UR)
+		pr_devel("CXL interrupt: AURP PTE not found\n");
+	if (dsisr & CXL_PSL_DSISR_An_PE)
+		return cxl_ops->handle_psl_slice_error(ctx, dsisr,
+						irq_info->errstat);
+	if (dsisr & CXL_PSL_DSISR_An_AE) {
+		pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err);
+
+		if (ctx->pending_afu_err) {
+			/*
+			 * This shouldn't happen - the PSL treats these errors
+			 * as fatal and will have reset the AFU, so there's not
+			 * much point buffering multiple AFU errors.
+			 * OTOH if we DO ever see a storm of these come in it's
+			 * probably best that we log them somewhere:
+			 */
+			dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error "
+					    "undelivered to pe %i: 0x%016llx\n",
+					    ctx->pe, irq_info->afu_err);
+		} else {
+			spin_lock(&ctx->lock);
+			ctx->afu_err = irq_info->afu_err;
+			ctx->pending_afu_err = true;
+			spin_unlock(&ctx->lock);
+
+			wake_up_all(&ctx->wq);
+		}
+
+		cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0);
+		return IRQ_HANDLED;
+	}
+	if (dsisr & CXL_PSL_DSISR_An_OC)
+		pr_devel("CXL interrupt: OS Context Warning\n");
+
+	WARN(1, "Unhandled CXL PSL IRQ\n");
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cxl_irq_afu(int irq, void *data)
+{
+	struct cxl_context *ctx = data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq));
+	int irq_off, afu_irq = 0;
+	__u16 range;
+	int r;
+
+	/*
+	 * Look for the interrupt number.
+	 * On bare-metal, we know range 0 only contains the PSL
+	 * interrupt so we could start counting at range 1 and initialize
+	 * afu_irq at 1.
+	 * In a guest, range 0 also contains AFU interrupts, so it must
+	 * be counted for. Therefore we initialize afu_irq at 0 to take into
+	 * account the PSL interrupt.
+	 *
+	 * For code-readability, it just seems easier to go over all
+	 * the ranges on bare-metal and guest. The end result is the same.
+	 */
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		irq_off = hwirq - ctx->irqs.offset[r];
+		range = ctx->irqs.range[r];
+		if (irq_off >= 0 && irq_off < range) {
+			afu_irq += irq_off;
+			break;
+		}
+		afu_irq += range;
+	}
+	if (unlikely(r >= CXL_IRQ_RANGES)) {
+		WARN(1, "Received AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n",
+		     ctx->pe, irq, hwirq);
+		return IRQ_HANDLED;
+	}
+
+	trace_cxl_afu_irq(ctx, afu_irq, irq, hwirq);
+	pr_devel("Received AFU interrupt %i for pe: %i (virq %i hwirq %lx)\n",
+	       afu_irq, ctx->pe, irq, hwirq);
+
+	if (unlikely(!ctx->irq_bitmap)) {
+		WARN(1, "Received AFU IRQ for context with no IRQ bitmap\n");
+		return IRQ_HANDLED;
+	}
+	spin_lock(&ctx->lock);
+	set_bit(afu_irq - 1, ctx->irq_bitmap);
+	ctx->pending_irq = true;
+	spin_unlock(&ctx->lock);
+
+	wake_up_all(&ctx->wq);
+
+	return IRQ_HANDLED;
+}
+
+unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
+			 irq_handler_t handler, void *cookie, const char *name)
+{
+	unsigned int virq;
+	int result;
+
+	/* IRQ Domain? */
+	virq = irq_create_mapping(NULL, hwirq);
+	if (!virq) {
+		dev_warn(&adapter->dev, "cxl_map_irq: irq_create_mapping failed\n");
+		return 0;
+	}
+
+	if (cxl_ops->setup_irq)
+		cxl_ops->setup_irq(adapter, hwirq, virq);
+
+	pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq);
+
+	result = request_irq(virq, handler, 0, name, cookie);
+	if (result) {
+		dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result);
+		return 0;
+	}
+
+	return virq;
+}
+
+void cxl_unmap_irq(unsigned int virq, void *cookie)
+{
+	free_irq(virq, cookie);
+}
+
+int cxl_register_one_irq(struct cxl *adapter,
+			irq_handler_t handler,
+			void *cookie,
+			irq_hw_number_t *dest_hwirq,
+			unsigned int *dest_virq,
+			const char *name)
+{
+	int hwirq, virq;
+
+	if ((hwirq = cxl_ops->alloc_one_irq(adapter)) < 0)
+		return hwirq;
+
+	if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name)))
+		goto err;
+
+	*dest_hwirq = hwirq;
+	*dest_virq = virq;
+
+	return 0;
+
+err:
+	cxl_ops->release_one_irq(adapter, hwirq);
+	return -ENOMEM;
+}
+
+void afu_irq_name_free(struct cxl_context *ctx)
+{
+	struct cxl_irq_name *irq_name, *tmp;
+
+	list_for_each_entry_safe(irq_name, tmp, &ctx->irq_names, list) {
+		kfree(irq_name->name);
+		list_del(&irq_name->list);
+		kfree(irq_name);
+	}
+}
+
+int afu_allocate_irqs(struct cxl_context *ctx, u32 count)
+{
+	int rc, r, i, j = 1;
+	struct cxl_irq_name *irq_name;
+	int alloc_count;
+
+	/*
+	 * In native mode, range 0 is reserved for the multiplexed
+	 * PSL interrupt. It has been allocated when the AFU was initialized.
+	 *
+	 * In a guest, the PSL interrupt is not mutliplexed, but per-context,
+	 * and is the first interrupt from range 0. It still needs to be
+	 * allocated, so bump the count by one.
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		alloc_count = count;
+	else
+		alloc_count = count + 1;
+
+	if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter,
+							alloc_count)))
+		return rc;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		/* Multiplexed PSL Interrupt */
+		ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq;
+		ctx->irqs.range[0] = 1;
+	}
+
+	ctx->irq_count = count;
+	ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count),
+				  sizeof(*ctx->irq_bitmap), GFP_KERNEL);
+	if (!ctx->irq_bitmap)
+		goto out;
+
+	/*
+	 * Allocate names first.  If any fail, bail out before allocating
+	 * actual hardware IRQs.
+	 */
+	for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) {
+		for (i = 0; i < ctx->irqs.range[r]; i++) {
+			irq_name = kmalloc(sizeof(struct cxl_irq_name),
+					   GFP_KERNEL);
+			if (!irq_name)
+				goto out;
+			irq_name->name = kasprintf(GFP_KERNEL, "cxl-%s-pe%i-%i",
+						   dev_name(&ctx->afu->dev),
+						   ctx->pe, j);
+			if (!irq_name->name) {
+				kfree(irq_name);
+				goto out;
+			}
+			/* Add to tail so next look get the correct order */
+			list_add_tail(&irq_name->list, &ctx->irq_names);
+			j++;
+		}
+	}
+	return 0;
+
+out:
+	cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+	afu_irq_name_free(ctx);
+	return -ENOMEM;
+}
+
+static void afu_register_hwirqs(struct cxl_context *ctx)
+{
+	irq_hw_number_t hwirq;
+	struct cxl_irq_name *irq_name;
+	int r, i;
+	irqreturn_t (*handler)(int irq, void *data);
+
+	/* We've allocated all memory now, so let's do the irq allocations */
+	irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list);
+	for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) {
+		hwirq = ctx->irqs.offset[r];
+		for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+			if (r == 0 && i == 0)
+				/*
+				 * The very first interrupt of range 0 is
+				 * always the PSL interrupt, but we only
+				 * need to connect a handler for guests,
+				 * because there's one PSL interrupt per
+				 * context.
+				 * On bare-metal, the PSL interrupt is
+				 * multiplexed and was setup when the AFU
+				 * was configured.
+				 */
+				handler = cxl_ops->psl_interrupt;
+			else
+				handler = cxl_irq_afu;
+			cxl_map_irq(ctx->afu->adapter, hwirq, handler, ctx,
+				irq_name->name);
+			irq_name = list_next_entry(irq_name, list);
+		}
+	}
+}
+
+int afu_register_irqs(struct cxl_context *ctx, u32 count)
+{
+	int rc;
+
+	rc = afu_allocate_irqs(ctx, count);
+	if (rc)
+		return rc;
+
+	afu_register_hwirqs(ctx);
+	return 0;
+}
+
+void afu_release_irqs(struct cxl_context *ctx, void *cookie)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+	int r, i;
+
+	for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) {
+		hwirq = ctx->irqs.offset[r];
+		for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+			virq = irq_find_mapping(NULL, hwirq);
+			if (virq)
+				cxl_unmap_irq(virq, cookie);
+		}
+	}
+
+	afu_irq_name_free(ctx);
+	cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+
+	ctx->irq_count = 0;
+}
+
+void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr)
+{
+	dev_crit(&afu->dev,
+		 "PSL Slice error received. Check AFU for root cause.\n");
+	dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
+	if (serr & CXL_PSL_SERR_An_afuto)
+		dev_crit(&afu->dev, "AFU MMIO Timeout\n");
+	if (serr & CXL_PSL_SERR_An_afudis)
+		dev_crit(&afu->dev,
+			 "MMIO targeted Accelerator that was not enabled\n");
+	if (serr & CXL_PSL_SERR_An_afuov)
+		dev_crit(&afu->dev, "AFU CTAG Overflow\n");
+	if (serr & CXL_PSL_SERR_An_badsrc)
+		dev_crit(&afu->dev, "Bad Interrupt Source\n");
+	if (serr & CXL_PSL_SERR_An_badctx)
+		dev_crit(&afu->dev, "Bad Context Handle\n");
+	if (serr & CXL_PSL_SERR_An_llcmdis)
+		dev_crit(&afu->dev, "LLCMD to Disabled AFU\n");
+	if (serr & CXL_PSL_SERR_An_llcmdto)
+		dev_crit(&afu->dev, "LLCMD Timeout to AFU\n");
+	if (serr & CXL_PSL_SERR_An_afupar)
+		dev_crit(&afu->dev, "AFU MMIO Parity Error\n");
+	if (serr & CXL_PSL_SERR_An_afudup)
+		dev_crit(&afu->dev, "AFU MMIO Duplicate CTAG Error\n");
+	if (serr & CXL_PSL_SERR_An_AE)
+		dev_crit(&afu->dev,
+			 "AFU asserted JDONE with JERROR in AFU Directed Mode\n");
+}
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
new file mode 100644
index 000000000..f35406be4
--- /dev/null
+++ b/drivers/misc/cxl/main.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <linux/sched/task.h>
+
+#include <asm/cputable.h>
+#include <misc/cxl-base.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static DEFINE_SPINLOCK(adapter_idr_lock);
+static DEFINE_IDR(cxl_adapter_idr);
+
+uint cxl_verbose;
+module_param_named(verbose, cxl_verbose, uint, 0600);
+MODULE_PARM_DESC(verbose, "Enable verbose dmesg output");
+
+const struct cxl_backend_ops *cxl_ops;
+
+int cxl_afu_slbia(struct cxl_afu *afu)
+{
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+	pr_devel("cxl_afu_slbia issuing SLBIA command\n");
+	cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL);
+	while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n");
+			return -EBUSY;
+		}
+		/* If the adapter has gone down, we can assume that we
+		 * will PERST it and that will invalidate everything.
+		 */
+		if (!cxl_ops->link_ok(afu->adapter, afu))
+			return -EIO;
+		cpu_relax();
+	}
+	return 0;
+}
+
+static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	if (ctx->mm != mm)
+		return;
+
+	pr_devel("%s matched mm - card: %i afu: %i pe: %i\n", __func__,
+		 ctx->afu->adapter->adapter_num, ctx->afu->slice, ctx->pe);
+
+	spin_lock_irqsave(&ctx->sste_lock, flags);
+	trace_cxl_slbia(ctx);
+	memset(ctx->sstp, 0, ctx->sst_size);
+	spin_unlock_irqrestore(&ctx->sste_lock, flags);
+	mb();
+	cxl_afu_slbia(ctx->afu);
+}
+
+static inline void cxl_slbia_core(struct mm_struct *mm)
+{
+	struct cxl *adapter;
+	struct cxl_afu *afu;
+	struct cxl_context *ctx;
+	int card, slice, id;
+
+	pr_devel("%s called\n", __func__);
+
+	spin_lock(&adapter_idr_lock);
+	idr_for_each_entry(&cxl_adapter_idr, adapter, card) {
+		/* XXX: Make this lookup faster with link from mm to ctx */
+		spin_lock(&adapter->afu_list_lock);
+		for (slice = 0; slice < adapter->slices; slice++) {
+			afu = adapter->afu[slice];
+			if (!afu || !afu->enabled)
+				continue;
+			rcu_read_lock();
+			idr_for_each_entry(&afu->contexts_idr, ctx, id)
+				_cxl_slbia(ctx, mm);
+			rcu_read_unlock();
+		}
+		spin_unlock(&adapter->afu_list_lock);
+	}
+	spin_unlock(&adapter_idr_lock);
+}
+
+static struct cxl_calls cxl_calls = {
+	.cxl_slbia = cxl_slbia_core,
+	.owner = THIS_MODULE,
+};
+
+int cxl_alloc_sst(struct cxl_context *ctx)
+{
+	unsigned long vsid;
+	u64 ea_mask, size, sstp0, sstp1;
+
+	sstp0 = 0;
+	sstp1 = 0;
+
+	ctx->sst_size = PAGE_SIZE;
+	ctx->sst_lru = 0;
+	ctx->sstp = (struct cxl_sste *)get_zeroed_page(GFP_KERNEL);
+	if (!ctx->sstp) {
+		pr_err("cxl_alloc_sst: Unable to allocate segment table\n");
+		return -ENOMEM;
+	}
+	pr_devel("SSTP allocated at 0x%p\n", ctx->sstp);
+
+	vsid  = get_kernel_vsid((u64)ctx->sstp, mmu_kernel_ssize) << 12;
+
+	sstp0 |= (u64)mmu_kernel_ssize << CXL_SSTP0_An_B_SHIFT;
+	sstp0 |= (SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp) << 50;
+
+	size = (((u64)ctx->sst_size >> 8) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT;
+	if (unlikely(size & ~CXL_SSTP0_An_SegTableSize_MASK)) {
+		WARN(1, "Impossible segment table size\n");
+		return -EINVAL;
+	}
+	sstp0 |= size;
+
+	if (mmu_kernel_ssize == MMU_SEGSIZE_256M)
+		ea_mask = 0xfffff00ULL;
+	else
+		ea_mask = 0xffffffff00ULL;
+
+	sstp0 |=  vsid >>     (50-14);  /*   Top 14 bits of VSID */
+	sstp1 |= (vsid << (64-(50-14))) & ~ea_mask;
+	sstp1 |= (u64)ctx->sstp & ea_mask;
+	sstp1 |= CXL_SSTP1_An_V;
+
+	pr_devel("Looked up %#llx: slbfee. %#llx (ssize: %x, vsid: %#lx), copied to SSTP0: %#llx, SSTP1: %#llx\n",
+			(u64)ctx->sstp, (u64)ctx->sstp & ESID_MASK, mmu_kernel_ssize, vsid, sstp0, sstp1);
+
+	/* Store calculated sstp hardware points for use later */
+	ctx->sstp0 = sstp0;
+	ctx->sstp1 = sstp1;
+
+	return 0;
+}
+
+/* print buffer content as integers when debugging */
+void cxl_dump_debug_buffer(void *buf, size_t buf_len)
+{
+#ifdef DEBUG
+	int i, *ptr;
+
+	/*
+	 * We want to regroup up to 4 integers per line, which means they
+	 * need to be in the same pr_devel() statement
+	 */
+	ptr = (int *) buf;
+	for (i = 0; i * 4 < buf_len; i += 4) {
+		if ((i + 3) * 4 < buf_len)
+			pr_devel("%.8x %.8x %.8x %.8x\n", ptr[i], ptr[i + 1],
+				ptr[i + 2], ptr[i + 3]);
+		else if ((i + 2) * 4 < buf_len)
+			pr_devel("%.8x %.8x %.8x\n", ptr[i], ptr[i + 1],
+				ptr[i + 2]);
+		else if ((i + 1) * 4 < buf_len)
+			pr_devel("%.8x %.8x\n", ptr[i], ptr[i + 1]);
+		else
+			pr_devel("%.8x\n", ptr[i]);
+	}
+#endif /* DEBUG */
+}
+
+/* Find a CXL adapter by it's number and increase it's refcount */
+struct cxl *get_cxl_adapter(int num)
+{
+	struct cxl *adapter;
+
+	spin_lock(&adapter_idr_lock);
+	if ((adapter = idr_find(&cxl_adapter_idr, num)))
+		get_device(&adapter->dev);
+	spin_unlock(&adapter_idr_lock);
+
+	return adapter;
+}
+
+static int cxl_alloc_adapter_nr(struct cxl *adapter)
+{
+	int i;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&adapter_idr_lock);
+	i = idr_alloc(&cxl_adapter_idr, adapter, 0, 0, GFP_NOWAIT);
+	spin_unlock(&adapter_idr_lock);
+	idr_preload_end();
+	if (i < 0)
+		return i;
+
+	adapter->adapter_num = i;
+
+	return 0;
+}
+
+void cxl_remove_adapter_nr(struct cxl *adapter)
+{
+	idr_remove(&cxl_adapter_idr, adapter->adapter_num);
+}
+
+struct cxl *cxl_alloc_adapter(void)
+{
+	struct cxl *adapter;
+
+	if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL)))
+		return NULL;
+
+	spin_lock_init(&adapter->afu_list_lock);
+
+	if (cxl_alloc_adapter_nr(adapter))
+		goto err1;
+
+	if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
+		goto err2;
+
+	/* start with context lock taken */
+	atomic_set(&adapter->contexts_num, -1);
+
+	return adapter;
+err2:
+	cxl_remove_adapter_nr(adapter);
+err1:
+	kfree(adapter);
+	return NULL;
+}
+
+struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
+{
+	struct cxl_afu *afu;
+
+	if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL)))
+		return NULL;
+
+	afu->adapter = adapter;
+	afu->dev.parent = &adapter->dev;
+	afu->dev.release = cxl_ops->release_afu;
+	afu->slice = slice;
+	idr_init(&afu->contexts_idr);
+	mutex_init(&afu->contexts_lock);
+	spin_lock_init(&afu->afu_cntl_lock);
+	atomic_set(&afu->configured_state, -1);
+	afu->prefault_mode = CXL_PREFAULT_NONE;
+	afu->irqs_max = afu->adapter->user_irqs;
+
+	return afu;
+}
+
+int cxl_afu_select_best_mode(struct cxl_afu *afu)
+{
+	if (afu->modes_supported & CXL_MODE_DIRECTED)
+		return cxl_ops->afu_activate_mode(afu, CXL_MODE_DIRECTED);
+
+	if (afu->modes_supported & CXL_MODE_DEDICATED)
+		return cxl_ops->afu_activate_mode(afu, CXL_MODE_DEDICATED);
+
+	dev_warn(&afu->dev, "No supported programming modes available\n");
+	/* We don't fail this so the user can inspect sysfs */
+	return 0;
+}
+
+int cxl_adapter_context_get(struct cxl *adapter)
+{
+	int rc;
+
+	rc = atomic_inc_unless_negative(&adapter->contexts_num);
+	return rc ? 0 : -EBUSY;
+}
+
+void cxl_adapter_context_put(struct cxl *adapter)
+{
+	atomic_dec_if_positive(&adapter->contexts_num);
+}
+
+int cxl_adapter_context_lock(struct cxl *adapter)
+{
+	int rc;
+	/* no active contexts -> contexts_num == 0 */
+	rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1);
+	return rc ? -EBUSY : 0;
+}
+
+void cxl_adapter_context_unlock(struct cxl *adapter)
+{
+	int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0);
+
+	/*
+	 * contexts lock taken -> contexts_num == -1
+	 * If not true then show a warning and force reset the lock.
+	 * This will happen when context_unlock was requested without
+	 * doing a context_lock.
+	 */
+	if (val != -1) {
+		atomic_set(&adapter->contexts_num, 0);
+		WARN(1, "Adapter context unlocked with %d active contexts",
+		     val);
+	}
+}
+
+static int __init init_cxl(void)
+{
+	int rc = 0;
+
+	if ((rc = cxl_file_init()))
+		return rc;
+
+	cxl_debugfs_init();
+
+	/*
+	 * we don't register the callback on P9. slb callack is only
+	 * used for the PSL8 MMU and CX4.
+	 */
+	if (cxl_is_power8()) {
+		rc = register_cxl_calls(&cxl_calls);
+		if (rc)
+			goto err;
+	}
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		cxl_ops = &cxl_native_ops;
+		rc = pci_register_driver(&cxl_pci_driver);
+	}
+#ifdef CONFIG_PPC_PSERIES
+	else {
+		cxl_ops = &cxl_guest_ops;
+		rc = platform_driver_register(&cxl_of_driver);
+	}
+#endif
+	if (rc)
+		goto err1;
+
+	return 0;
+err1:
+	if (cxl_is_power8())
+		unregister_cxl_calls(&cxl_calls);
+err:
+	cxl_debugfs_exit();
+	cxl_file_exit();
+
+	return rc;
+}
+
+static void exit_cxl(void)
+{
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		pci_unregister_driver(&cxl_pci_driver);
+#ifdef CONFIG_PPC_PSERIES
+	else
+		platform_driver_unregister(&cxl_of_driver);
+#endif
+
+	cxl_debugfs_exit();
+	cxl_file_exit();
+	if (cxl_is_power8())
+		unregister_cxl_calls(&cxl_calls);
+	idr_destroy(&cxl_adapter_idr);
+}
+
+module_init(init_cxl);
+module_exit(exit_cxl);
+
+MODULE_DESCRIPTION("IBM Coherent Accelerator");
+MODULE_AUTHOR("Ian Munsie <imunsie@au1.ibm.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
new file mode 100644
index 000000000..c9d5d82dc
--- /dev/null
+++ b/drivers/misc/cxl/native.c
@@ -0,0 +1,1600 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <asm/synch.h>
+#include <asm/switch_to.h>
+#include <misc/cxl-base.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static int afu_control(struct cxl_afu *afu, u64 command, u64 clear,
+		       u64 result, u64 mask, bool enabled)
+{
+	u64 AFU_Cntl;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+	int rc = 0;
+
+	spin_lock(&afu->afu_cntl_lock);
+	pr_devel("AFU command starting: %llx\n", command);
+
+	trace_cxl_afu_ctrl(afu, command);
+
+	AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	cxl_p2n_write(afu, CXL_AFU_Cntl_An, (AFU_Cntl & ~clear) | command);
+
+	AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	while ((AFU_Cntl & mask) != result) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&afu->dev, "WARNING: AFU control timed out!\n");
+			rc = -EBUSY;
+			goto out;
+		}
+
+		if (!cxl_ops->link_ok(afu->adapter, afu)) {
+			afu->enabled = enabled;
+			rc = -EIO;
+			goto out;
+		}
+
+		pr_devel_ratelimited("AFU control... (0x%016llx)\n",
+				     AFU_Cntl | command);
+		cpu_relax();
+		AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	}
+
+	if (AFU_Cntl & CXL_AFU_Cntl_An_RA) {
+		/*
+		 * Workaround for a bug in the XSL used in the Mellanox CX4
+		 * that fails to clear the RA bit after an AFU reset,
+		 * preventing subsequent AFU resets from working.
+		 */
+		cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl & ~CXL_AFU_Cntl_An_RA);
+	}
+
+	pr_devel("AFU command complete: %llx\n", command);
+	afu->enabled = enabled;
+out:
+	trace_cxl_afu_ctrl_done(afu, command, rc);
+	spin_unlock(&afu->afu_cntl_lock);
+
+	return rc;
+}
+
+static int afu_enable(struct cxl_afu *afu)
+{
+	pr_devel("AFU enable request\n");
+
+	return afu_control(afu, CXL_AFU_Cntl_An_E, 0,
+			   CXL_AFU_Cntl_An_ES_Enabled,
+			   CXL_AFU_Cntl_An_ES_MASK, true);
+}
+
+int cxl_afu_disable(struct cxl_afu *afu)
+{
+	pr_devel("AFU disable request\n");
+
+	return afu_control(afu, 0, CXL_AFU_Cntl_An_E,
+			   CXL_AFU_Cntl_An_ES_Disabled,
+			   CXL_AFU_Cntl_An_ES_MASK, false);
+}
+
+/* This will disable as well as reset */
+static int native_afu_reset(struct cxl_afu *afu)
+{
+	int rc;
+	u64 serr;
+
+	pr_devel("AFU reset request\n");
+
+	rc = afu_control(afu, CXL_AFU_Cntl_An_RA, 0,
+			   CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled,
+			   CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
+			   false);
+
+	/*
+	 * Re-enable any masked interrupts when the AFU is not
+	 * activated to avoid side effects after attaching a process
+	 * in dedicated mode.
+	 */
+	if (afu->current_mode == 0) {
+		serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+		serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
+		cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+	}
+
+	return rc;
+}
+
+static int native_afu_check_and_enable(struct cxl_afu *afu)
+{
+	if (!cxl_ops->link_ok(afu->adapter, afu)) {
+		WARN(1, "Refusing to enable afu while link down!\n");
+		return -EIO;
+	}
+	if (afu->enabled)
+		return 0;
+	return afu_enable(afu);
+}
+
+int cxl_psl_purge(struct cxl_afu *afu)
+{
+	u64 PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+	u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	u64 dsisr, dar;
+	u64 start, end;
+	u64 trans_fault = 0x0ULL;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+	int rc = 0;
+
+	trace_cxl_psl_ctrl(afu, CXL_PSL_SCNTL_An_Pc);
+
+	pr_devel("PSL purge request\n");
+
+	if (cxl_is_power8())
+		trans_fault = CXL_PSL_DSISR_TRANS;
+	if (cxl_is_power9())
+		trans_fault = CXL_PSL9_DSISR_An_TF;
+
+	if (!cxl_ops->link_ok(afu->adapter, afu)) {
+		dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n");
+		rc = -EIO;
+		goto out;
+	}
+
+	if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
+		WARN(1, "psl_purge request while AFU not disabled!\n");
+		cxl_afu_disable(afu);
+	}
+
+	cxl_p1n_write(afu, CXL_PSL_SCNTL_An,
+		       PSL_CNTL | CXL_PSL_SCNTL_An_Pc);
+	start = local_clock();
+	PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+	while ((PSL_CNTL &  CXL_PSL_SCNTL_An_Ps_MASK)
+			== CXL_PSL_SCNTL_An_Ps_Pending) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&afu->dev, "WARNING: PSL Purge timed out!\n");
+			rc = -EBUSY;
+			goto out;
+		}
+		if (!cxl_ops->link_ok(afu->adapter, afu)) {
+			rc = -EIO;
+			goto out;
+		}
+
+		dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+		pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%016llx  PSL_DSISR: 0x%016llx\n",
+				     PSL_CNTL, dsisr);
+
+		if (dsisr & trans_fault) {
+			dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
+			dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%016llx, DAR: 0x%016llx\n",
+				   dsisr, dar);
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+		} else if (dsisr) {
+			dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%016llx\n",
+				   dsisr);
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+		} else {
+			cpu_relax();
+		}
+		PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+	}
+	end = local_clock();
+	pr_devel("PSL purged in %lld ns\n", end - start);
+
+	cxl_p1n_write(afu, CXL_PSL_SCNTL_An,
+		       PSL_CNTL & ~CXL_PSL_SCNTL_An_Pc);
+out:
+	trace_cxl_psl_ctrl_done(afu, CXL_PSL_SCNTL_An_Pc, rc);
+	return rc;
+}
+
+static int spa_max_procs(int spa_size)
+{
+	/*
+	 * From the CAIA:
+	 *    end_of_SPA_area = SPA_Base + ((n+4) * 128) + (( ((n*8) + 127) >> 7) * 128) + 255
+	 * Most of that junk is really just an overly-complicated way of saying
+	 * the last 256 bytes are __aligned(128), so it's really:
+	 *    end_of_SPA_area = end_of_PSL_queue_area + __aligned(128) 255
+	 * and
+	 *    end_of_PSL_queue_area = SPA_Base + ((n+4) * 128) + (n*8) - 1
+	 * so
+	 *    sizeof(SPA) = ((n+4) * 128) + (n*8) + __aligned(128) 256
+	 * Ignore the alignment (which is safe in this case as long as we are
+	 * careful with our rounding) and solve for n:
+	 */
+	return ((spa_size / 8) - 96) / 17;
+}
+
+static int cxl_alloc_spa(struct cxl_afu *afu, int mode)
+{
+	unsigned spa_size;
+
+	/* Work out how many pages to allocate */
+	afu->native->spa_order = -1;
+	do {
+		afu->native->spa_order++;
+		spa_size = (1 << afu->native->spa_order) * PAGE_SIZE;
+
+		if (spa_size > 0x100000) {
+			dev_warn(&afu->dev, "num_of_processes too large for the SPA, limiting to %i (0x%x)\n",
+					afu->native->spa_max_procs, afu->native->spa_size);
+			if (mode != CXL_MODE_DEDICATED)
+				afu->num_procs = afu->native->spa_max_procs;
+			break;
+		}
+
+		afu->native->spa_size = spa_size;
+		afu->native->spa_max_procs = spa_max_procs(afu->native->spa_size);
+	} while (afu->native->spa_max_procs < afu->num_procs);
+
+	if (!(afu->native->spa = (struct cxl_process_element *)
+	      __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->native->spa_order))) {
+		pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n");
+		return -ENOMEM;
+	}
+	pr_devel("spa pages: %i afu->spa_max_procs: %i   afu->num_procs: %i\n",
+		 1<<afu->native->spa_order, afu->native->spa_max_procs, afu->num_procs);
+
+	return 0;
+}
+
+static void attach_spa(struct cxl_afu *afu)
+{
+	u64 spap;
+
+	afu->native->sw_command_status = (__be64 *)((char *)afu->native->spa +
+					    ((afu->native->spa_max_procs + 3) * 128));
+
+	spap = virt_to_phys(afu->native->spa) & CXL_PSL_SPAP_Addr;
+	spap |= ((afu->native->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size;
+	spap |= CXL_PSL_SPAP_V;
+	pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n",
+		afu->native->spa, afu->native->spa_max_procs,
+		afu->native->sw_command_status, spap);
+	cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap);
+}
+
+static inline void detach_spa(struct cxl_afu *afu)
+{
+	cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0);
+}
+
+void cxl_release_spa(struct cxl_afu *afu)
+{
+	if (afu->native->spa) {
+		free_pages((unsigned long) afu->native->spa,
+			afu->native->spa_order);
+		afu->native->spa = NULL;
+	}
+}
+
+/*
+ * Invalidation of all ERAT entries is no longer required by CAIA2. Use
+ * only for debug.
+ */
+int cxl_invalidate_all_psl9(struct cxl *adapter)
+{
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+	u64 ierat;
+
+	pr_devel("CXL adapter - invalidation of all ERAT entries\n");
+
+	/* Invalidates all ERAT entries for Radix or HPT */
+	ierat = CXL_XSL9_IERAT_IALL;
+	if (radix_enabled())
+		ierat |= CXL_XSL9_IERAT_INVR;
+	cxl_p1_write(adapter, CXL_XSL9_IERAT, ierat);
+
+	while (cxl_p1_read(adapter, CXL_XSL9_IERAT) & CXL_XSL9_IERAT_IINPROG) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&adapter->dev,
+			"WARNING: CXL adapter invalidation of all ERAT entries timed out!\n");
+			return -EBUSY;
+		}
+		if (!cxl_ops->link_ok(adapter, NULL))
+			return -EIO;
+		cpu_relax();
+	}
+	return 0;
+}
+
+int cxl_invalidate_all_psl8(struct cxl *adapter)
+{
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+	pr_devel("CXL adapter wide TLBIA & SLBIA\n");
+
+	cxl_p1_write(adapter, CXL_PSL_AFUSEL, CXL_PSL_AFUSEL_A);
+
+	cxl_p1_write(adapter, CXL_PSL_TLBIA, CXL_TLB_SLB_IQ_ALL);
+	while (cxl_p1_read(adapter, CXL_PSL_TLBIA) & CXL_TLB_SLB_P) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n");
+			return -EBUSY;
+		}
+		if (!cxl_ops->link_ok(adapter, NULL))
+			return -EIO;
+		cpu_relax();
+	}
+
+	cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_ALL);
+	while (cxl_p1_read(adapter, CXL_PSL_SLBIA) & CXL_TLB_SLB_P) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n");
+			return -EBUSY;
+		}
+		if (!cxl_ops->link_ok(adapter, NULL))
+			return -EIO;
+		cpu_relax();
+	}
+	return 0;
+}
+
+int cxl_data_cache_flush(struct cxl *adapter)
+{
+	u64 reg;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+	/*
+	 * Do a datacache flush only if datacache is available.
+	 * In case of PSL9D datacache absent hence flush operation.
+	 * would timeout.
+	 */
+	if (adapter->native->no_data_cache) {
+		pr_devel("No PSL data cache. Ignoring cache flush req.\n");
+		return 0;
+	}
+
+	pr_devel("Flushing data cache\n");
+	reg = cxl_p1_read(adapter, CXL_PSL_Control);
+	reg |= CXL_PSL_Control_Fr;
+	cxl_p1_write(adapter, CXL_PSL_Control, reg);
+
+	reg = cxl_p1_read(adapter, CXL_PSL_Control);
+	while ((reg & CXL_PSL_Control_Fs_MASK) != CXL_PSL_Control_Fs_Complete) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&adapter->dev, "WARNING: cache flush timed out!\n");
+			return -EBUSY;
+		}
+
+		if (!cxl_ops->link_ok(adapter, NULL)) {
+			dev_warn(&adapter->dev, "WARNING: link down when flushing cache\n");
+			return -EIO;
+		}
+		cpu_relax();
+		reg = cxl_p1_read(adapter, CXL_PSL_Control);
+	}
+
+	reg &= ~CXL_PSL_Control_Fr;
+	cxl_p1_write(adapter, CXL_PSL_Control, reg);
+	return 0;
+}
+
+static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1)
+{
+	int rc;
+
+	/* 1. Disable SSTP by writing 0 to SSTP1[V] */
+	cxl_p2n_write(afu, CXL_SSTP1_An, 0);
+
+	/* 2. Invalidate all SLB entries */
+	if ((rc = cxl_afu_slbia(afu)))
+		return rc;
+
+	/* 3. Set SSTP0_An */
+	cxl_p2n_write(afu, CXL_SSTP0_An, sstp0);
+
+	/* 4. Set SSTP1_An */
+	cxl_p2n_write(afu, CXL_SSTP1_An, sstp1);
+
+	return 0;
+}
+
+/* Using per slice version may improve performance here. (ie. SLBIA_An) */
+static void slb_invalid(struct cxl_context *ctx)
+{
+	struct cxl *adapter = ctx->afu->adapter;
+	u64 slbia;
+
+	WARN_ON(!mutex_is_locked(&ctx->afu->native->spa_mutex));
+
+	cxl_p1_write(adapter, CXL_PSL_LBISEL,
+			((u64)be32_to_cpu(ctx->elem->common.pid) << 32) |
+			be32_to_cpu(ctx->elem->lpid));
+	cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID);
+
+	while (1) {
+		if (!cxl_ops->link_ok(adapter, NULL))
+			break;
+		slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA);
+		if (!(slbia & CXL_TLB_SLB_P))
+			break;
+		cpu_relax();
+	}
+}
+
+static int do_process_element_cmd(struct cxl_context *ctx,
+				  u64 cmd, u64 pe_state)
+{
+	u64 state;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+	int rc = 0;
+
+	trace_cxl_llcmd(ctx, cmd);
+
+	WARN_ON(!ctx->afu->enabled);
+
+	ctx->elem->software_state = cpu_to_be32(pe_state);
+	smp_wmb();
+	*(ctx->afu->native->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe);
+	smp_mb();
+	cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
+	while (1) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n");
+			rc = -EBUSY;
+			goto out;
+		}
+		if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) {
+			dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n");
+			rc = -EIO;
+			goto out;
+		}
+		state = be64_to_cpup(ctx->afu->native->sw_command_status);
+		if (state == ~0ULL) {
+			pr_err("cxl: Error adding process element to AFU\n");
+			rc = -1;
+			goto out;
+		}
+		if ((state & (CXL_SPA_SW_CMD_MASK | CXL_SPA_SW_STATE_MASK  | CXL_SPA_SW_LINK_MASK)) ==
+		    (cmd | (cmd >> 16) | ctx->pe))
+			break;
+		/*
+		 * The command won't finish in the PSL if there are
+		 * outstanding DSIs.  Hence we need to yield here in
+		 * case there are outstanding DSIs that we need to
+		 * service.  Tuning possiblity: we could wait for a
+		 * while before sched
+		 */
+		schedule();
+
+	}
+out:
+	trace_cxl_llcmd_done(ctx, cmd, rc);
+	return rc;
+}
+
+static int add_process_element(struct cxl_context *ctx)
+{
+	int rc = 0;
+
+	mutex_lock(&ctx->afu->native->spa_mutex);
+	pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe);
+	if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V)))
+		ctx->pe_inserted = true;
+	pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe);
+	mutex_unlock(&ctx->afu->native->spa_mutex);
+	return rc;
+}
+
+static int terminate_process_element(struct cxl_context *ctx)
+{
+	int rc = 0;
+
+	/* fast path terminate if it's already invalid */
+	if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V)))
+		return rc;
+
+	mutex_lock(&ctx->afu->native->spa_mutex);
+	pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe);
+	/* We could be asked to terminate when the hw is down. That
+	 * should always succeed: it's not running if the hw has gone
+	 * away and is being reset.
+	 */
+	if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE,
+					    CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T);
+	ctx->elem->software_state = 0;	/* Remove Valid bit */
+	pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe);
+	mutex_unlock(&ctx->afu->native->spa_mutex);
+	return rc;
+}
+
+static int remove_process_element(struct cxl_context *ctx)
+{
+	int rc = 0;
+
+	mutex_lock(&ctx->afu->native->spa_mutex);
+	pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe);
+
+	/* We could be asked to remove when the hw is down. Again, if
+	 * the hw is down, the PE is gone, so we succeed.
+	 */
+	if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0);
+
+	if (!rc)
+		ctx->pe_inserted = false;
+	if (cxl_is_power8())
+		slb_invalid(ctx);
+	pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe);
+	mutex_unlock(&ctx->afu->native->spa_mutex);
+
+	return rc;
+}
+
+void cxl_assign_psn_space(struct cxl_context *ctx)
+{
+	if (!ctx->afu->pp_size || ctx->master) {
+		ctx->psn_phys = ctx->afu->psn_phys;
+		ctx->psn_size = ctx->afu->adapter->ps_size;
+	} else {
+		ctx->psn_phys = ctx->afu->psn_phys +
+			(ctx->afu->native->pp_offset + ctx->afu->pp_size * ctx->pe);
+		ctx->psn_size = ctx->afu->pp_size;
+	}
+}
+
+static int activate_afu_directed(struct cxl_afu *afu)
+{
+	int rc;
+
+	dev_info(&afu->dev, "Activating AFU directed mode\n");
+
+	afu->num_procs = afu->max_procs_virtualised;
+	if (afu->native->spa == NULL) {
+		if (cxl_alloc_spa(afu, CXL_MODE_DIRECTED))
+			return -ENOMEM;
+	}
+	attach_spa(afu);
+
+	cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU);
+	if (cxl_is_power8())
+		cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
+	cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L);
+
+	afu->current_mode = CXL_MODE_DIRECTED;
+
+	if ((rc = cxl_chardev_m_afu_add(afu)))
+		return rc;
+
+	if ((rc = cxl_sysfs_afu_m_add(afu)))
+		goto err;
+
+	if ((rc = cxl_chardev_s_afu_add(afu)))
+		goto err1;
+
+	return 0;
+err1:
+	cxl_sysfs_afu_m_remove(afu);
+err:
+	cxl_chardev_afu_remove(afu);
+	return rc;
+}
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define set_endian(sr) ((sr) |= CXL_PSL_SR_An_LE)
+#else
+#define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE))
+#endif
+
+u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9)
+{
+	u64 sr = 0;
+
+	set_endian(sr);
+	if (master)
+		sr |= CXL_PSL_SR_An_MP;
+	if (mfspr(SPRN_LPCR) & LPCR_TC)
+		sr |= CXL_PSL_SR_An_TC;
+
+	if (kernel) {
+		if (!real_mode)
+			sr |= CXL_PSL_SR_An_R;
+		sr |= (mfmsr() & MSR_SF) | CXL_PSL_SR_An_HV;
+	} else {
+		sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
+		if (radix_enabled())
+			sr |= CXL_PSL_SR_An_HV;
+		else
+			sr &= ~(CXL_PSL_SR_An_HV);
+		if (!test_tsk_thread_flag(current, TIF_32BIT))
+			sr |= CXL_PSL_SR_An_SF;
+	}
+	if (p9) {
+		if (radix_enabled())
+			sr |= CXL_PSL_SR_An_XLAT_ror;
+		else
+			sr |= CXL_PSL_SR_An_XLAT_hpt;
+	}
+	return sr;
+}
+
+static u64 calculate_sr(struct cxl_context *ctx)
+{
+	return cxl_calculate_sr(ctx->master, ctx->kernel, false,
+				cxl_is_power9());
+}
+
+static void update_ivtes_directed(struct cxl_context *ctx)
+{
+	bool need_update = (ctx->status == STARTED);
+	int r;
+
+	if (need_update) {
+		WARN_ON(terminate_process_element(ctx));
+		WARN_ON(remove_process_element(ctx));
+	}
+
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]);
+		ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]);
+	}
+
+	/*
+	 * Theoretically we could use the update llcmd, instead of a
+	 * terminate/remove/add (or if an atomic update was required we could
+	 * do a suspend/update/resume), however it seems there might be issues
+	 * with the update llcmd on some cards (including those using an XSL on
+	 * an ASIC) so for now it's safest to go with the commands that are
+	 * known to work. In the future if we come across a situation where the
+	 * card may be performing transactions using the same PE while we are
+	 * doing this update we might need to revisit this.
+	 */
+	if (need_update)
+		WARN_ON(add_process_element(ctx));
+}
+
+static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	u32 pid;
+	int rc;
+
+	cxl_assign_psn_space(ctx);
+
+	ctx->elem->ctxtime = 0; /* disable */
+	ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+	ctx->elem->haurp = 0; /* disable */
+
+	if (ctx->kernel)
+		pid = 0;
+	else {
+		if (ctx->mm == NULL) {
+			pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
+				__func__, ctx->pe, pid_nr(ctx->pid));
+			return -EINVAL;
+		}
+		pid = ctx->mm->context.id;
+	}
+
+	/* Assign a unique TIDR (thread id) for the current thread */
+	if (!(ctx->tidr) && (ctx->assign_tidr)) {
+		rc = set_thread_tidr(current);
+		if (rc)
+			return -ENODEV;
+		ctx->tidr = current->thread.tidr;
+		pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr);
+	}
+
+	ctx->elem->common.tid = cpu_to_be32(ctx->tidr);
+	ctx->elem->common.pid = cpu_to_be32(pid);
+
+	ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
+
+	ctx->elem->common.csrp = 0; /* disable */
+
+	cxl_prefault(ctx, wed);
+
+	/*
+	 * Ensure we have the multiplexed PSL interrupt set up to take faults
+	 * for kernel contexts that may not have allocated any AFU IRQs at all:
+	 */
+	if (ctx->irqs.range[0] == 0) {
+		ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq;
+		ctx->irqs.range[0] = 1;
+	}
+
+	ctx->elem->common.amr = cpu_to_be64(amr);
+	ctx->elem->common.wed = cpu_to_be64(wed);
+
+	return 0;
+}
+
+int cxl_attach_afu_directed_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	int result;
+
+	/* fill the process element entry */
+	result = process_element_entry_psl9(ctx, wed, amr);
+	if (result)
+		return result;
+
+	update_ivtes_directed(ctx);
+
+	/* first guy needs to enable */
+	result = cxl_ops->afu_check_and_enable(ctx->afu);
+	if (result)
+		return result;
+
+	return add_process_element(ctx);
+}
+
+int cxl_attach_afu_directed_psl8(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	u32 pid;
+	int result;
+
+	cxl_assign_psn_space(ctx);
+
+	ctx->elem->ctxtime = 0; /* disable */
+	ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+	ctx->elem->haurp = 0; /* disable */
+	ctx->elem->u.sdr = cpu_to_be64(mfspr(SPRN_SDR1));
+
+	pid = current->pid;
+	if (ctx->kernel)
+		pid = 0;
+	ctx->elem->common.tid = 0;
+	ctx->elem->common.pid = cpu_to_be32(pid);
+
+	ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
+
+	ctx->elem->common.csrp = 0; /* disable */
+	ctx->elem->common.u.psl8.aurp0 = 0; /* disable */
+	ctx->elem->common.u.psl8.aurp1 = 0; /* disable */
+
+	cxl_prefault(ctx, wed);
+
+	ctx->elem->common.u.psl8.sstp0 = cpu_to_be64(ctx->sstp0);
+	ctx->elem->common.u.psl8.sstp1 = cpu_to_be64(ctx->sstp1);
+
+	/*
+	 * Ensure we have the multiplexed PSL interrupt set up to take faults
+	 * for kernel contexts that may not have allocated any AFU IRQs at all:
+	 */
+	if (ctx->irqs.range[0] == 0) {
+		ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq;
+		ctx->irqs.range[0] = 1;
+	}
+
+	update_ivtes_directed(ctx);
+
+	ctx->elem->common.amr = cpu_to_be64(amr);
+	ctx->elem->common.wed = cpu_to_be64(wed);
+
+	/* first guy needs to enable */
+	if ((result = cxl_ops->afu_check_and_enable(ctx->afu)))
+		return result;
+
+	return add_process_element(ctx);
+}
+
+static int deactivate_afu_directed(struct cxl_afu *afu)
+{
+	dev_info(&afu->dev, "Deactivating AFU directed mode\n");
+
+	afu->current_mode = 0;
+	afu->num_procs = 0;
+
+	cxl_sysfs_afu_m_remove(afu);
+	cxl_chardev_afu_remove(afu);
+
+	/*
+	 * The CAIA section 2.2.1 indicates that the procedure for starting and
+	 * stopping an AFU in AFU directed mode is AFU specific, which is not
+	 * ideal since this code is generic and with one exception has no
+	 * knowledge of the AFU. This is in contrast to the procedure for
+	 * disabling a dedicated process AFU, which is documented to just
+	 * require a reset. The architecture does indicate that both an AFU
+	 * reset and an AFU disable should result in the AFU being disabled and
+	 * we do both followed by a PSL purge for safety.
+	 *
+	 * Notably we used to have some issues with the disable sequence on PSL
+	 * cards, which is why we ended up using this heavy weight procedure in
+	 * the first place, however a bug was discovered that had rendered the
+	 * disable operation ineffective, so it is conceivable that was the
+	 * sole explanation for those difficulties. Careful regression testing
+	 * is recommended if anyone attempts to remove or reorder these
+	 * operations.
+	 *
+	 * The XSL on the Mellanox CX4 behaves a little differently from the
+	 * PSL based cards and will time out an AFU reset if the AFU is still
+	 * enabled. That card is special in that we do have a means to identify
+	 * it from this code, so in that case we skip the reset and just use a
+	 * disable/purge to avoid the timeout and corresponding noise in the
+	 * kernel log.
+	 */
+	if (afu->adapter->native->sl_ops->needs_reset_before_disable)
+		cxl_ops->afu_reset(afu);
+	cxl_afu_disable(afu);
+	cxl_psl_purge(afu);
+
+	return 0;
+}
+
+int cxl_activate_dedicated_process_psl9(struct cxl_afu *afu)
+{
+	dev_info(&afu->dev, "Activating dedicated process mode\n");
+
+	/*
+	 * If XSL is set to dedicated mode (Set in PSL_SCNTL reg), the
+	 * XSL and AFU are programmed to work with a single context.
+	 * The context information should be configured in the SPA area
+	 * index 0 (so PSL_SPAP must be configured before enabling the
+	 * AFU).
+	 */
+	afu->num_procs = 1;
+	if (afu->native->spa == NULL) {
+		if (cxl_alloc_spa(afu, CXL_MODE_DEDICATED))
+			return -ENOMEM;
+	}
+	attach_spa(afu);
+
+	cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process);
+	cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L);
+
+	afu->current_mode = CXL_MODE_DEDICATED;
+
+	return cxl_chardev_d_afu_add(afu);
+}
+
+int cxl_activate_dedicated_process_psl8(struct cxl_afu *afu)
+{
+	dev_info(&afu->dev, "Activating dedicated process mode\n");
+
+	cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process);
+
+	cxl_p1n_write(afu, CXL_PSL_CtxTime_An, 0); /* disable */
+	cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0);    /* disable */
+	cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
+	cxl_p1n_write(afu, CXL_PSL_LPID_An, mfspr(SPRN_LPID));
+	cxl_p1n_write(afu, CXL_HAURP_An, 0);       /* disable */
+	cxl_p1n_write(afu, CXL_PSL_SDR_An, mfspr(SPRN_SDR1));
+
+	cxl_p2n_write(afu, CXL_CSRP_An, 0);        /* disable */
+	cxl_p2n_write(afu, CXL_AURP0_An, 0);       /* disable */
+	cxl_p2n_write(afu, CXL_AURP1_An, 0);       /* disable */
+
+	afu->current_mode = CXL_MODE_DEDICATED;
+	afu->num_procs = 1;
+
+	return cxl_chardev_d_afu_add(afu);
+}
+
+void cxl_update_dedicated_ivtes_psl9(struct cxl_context *ctx)
+{
+	int r;
+
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]);
+		ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]);
+	}
+}
+
+void cxl_update_dedicated_ivtes_psl8(struct cxl_context *ctx)
+{
+	struct cxl_afu *afu = ctx->afu;
+
+	cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An,
+		       (((u64)ctx->irqs.offset[0] & 0xffff) << 48) |
+		       (((u64)ctx->irqs.offset[1] & 0xffff) << 32) |
+		       (((u64)ctx->irqs.offset[2] & 0xffff) << 16) |
+			((u64)ctx->irqs.offset[3] & 0xffff));
+	cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64)
+		       (((u64)ctx->irqs.range[0] & 0xffff) << 48) |
+		       (((u64)ctx->irqs.range[1] & 0xffff) << 32) |
+		       (((u64)ctx->irqs.range[2] & 0xffff) << 16) |
+			((u64)ctx->irqs.range[3] & 0xffff));
+}
+
+int cxl_attach_dedicated_process_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	struct cxl_afu *afu = ctx->afu;
+	int result;
+
+	/* fill the process element entry */
+	result = process_element_entry_psl9(ctx, wed, amr);
+	if (result)
+		return result;
+
+	if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes)
+		afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx);
+
+	ctx->elem->software_state = cpu_to_be32(CXL_PE_SOFTWARE_STATE_V);
+	/*
+	 * Ideally we should do a wmb() here to make sure the changes to the
+	 * PE are visible to the card before we call afu_enable.
+	 * On ppc64 though all mmios are preceded by a 'sync' instruction hence
+	 * we dont dont need one here.
+	 */
+
+	result = cxl_ops->afu_reset(afu);
+	if (result)
+		return result;
+
+	return afu_enable(afu);
+}
+
+int cxl_attach_dedicated_process_psl8(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	struct cxl_afu *afu = ctx->afu;
+	u64 pid;
+	int rc;
+
+	pid = (u64)current->pid << 32;
+	if (ctx->kernel)
+		pid = 0;
+	cxl_p2n_write(afu, CXL_PSL_PID_TID_An, pid);
+
+	cxl_p1n_write(afu, CXL_PSL_SR_An, calculate_sr(ctx));
+
+	if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1)))
+		return rc;
+
+	cxl_prefault(ctx, wed);
+
+	if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes)
+		afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx);
+
+	cxl_p2n_write(afu, CXL_PSL_AMR_An, amr);
+
+	/* master only context for dedicated */
+	cxl_assign_psn_space(ctx);
+
+	if ((rc = cxl_ops->afu_reset(afu)))
+		return rc;
+
+	cxl_p2n_write(afu, CXL_PSL_WED_An, wed);
+
+	return afu_enable(afu);
+}
+
+static int deactivate_dedicated_process(struct cxl_afu *afu)
+{
+	dev_info(&afu->dev, "Deactivating dedicated process mode\n");
+
+	afu->current_mode = 0;
+	afu->num_procs = 0;
+
+	cxl_chardev_afu_remove(afu);
+
+	return 0;
+}
+
+static int native_afu_deactivate_mode(struct cxl_afu *afu, int mode)
+{
+	if (mode == CXL_MODE_DIRECTED)
+		return deactivate_afu_directed(afu);
+	if (mode == CXL_MODE_DEDICATED)
+		return deactivate_dedicated_process(afu);
+	return 0;
+}
+
+static int native_afu_activate_mode(struct cxl_afu *afu, int mode)
+{
+	if (!mode)
+		return 0;
+	if (!(mode & afu->modes_supported))
+		return -EINVAL;
+
+	if (!cxl_ops->link_ok(afu->adapter, afu)) {
+		WARN(1, "Device link is down, refusing to activate!\n");
+		return -EIO;
+	}
+
+	if (mode == CXL_MODE_DIRECTED)
+		return activate_afu_directed(afu);
+	if ((mode == CXL_MODE_DEDICATED) &&
+	    (afu->adapter->native->sl_ops->activate_dedicated_process))
+		return afu->adapter->native->sl_ops->activate_dedicated_process(afu);
+
+	return -EINVAL;
+}
+
+static int native_attach_process(struct cxl_context *ctx, bool kernel,
+				u64 wed, u64 amr)
+{
+	if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) {
+		WARN(1, "Device link is down, refusing to attach process!\n");
+		return -EIO;
+	}
+
+	ctx->kernel = kernel;
+	if ((ctx->afu->current_mode == CXL_MODE_DIRECTED) &&
+	    (ctx->afu->adapter->native->sl_ops->attach_afu_directed))
+		return ctx->afu->adapter->native->sl_ops->attach_afu_directed(ctx, wed, amr);
+
+	if ((ctx->afu->current_mode == CXL_MODE_DEDICATED) &&
+	    (ctx->afu->adapter->native->sl_ops->attach_dedicated_process))
+		return ctx->afu->adapter->native->sl_ops->attach_dedicated_process(ctx, wed, amr);
+
+	return -EINVAL;
+}
+
+static inline int detach_process_native_dedicated(struct cxl_context *ctx)
+{
+	/*
+	 * The CAIA section 2.1.1 indicates that we need to do an AFU reset to
+	 * stop the AFU in dedicated mode (we therefore do not make that
+	 * optional like we do in the afu directed path). It does not indicate
+	 * that we need to do an explicit disable (which should occur
+	 * implicitly as part of the reset) or purge, but we do these as well
+	 * to be on the safe side.
+	 *
+	 * Notably we used to have some issues with the disable sequence
+	 * (before the sequence was spelled out in the architecture) which is
+	 * why we were so heavy weight in the first place, however a bug was
+	 * discovered that had rendered the disable operation ineffective, so
+	 * it is conceivable that was the sole explanation for those
+	 * difficulties. Point is, we should be careful and do some regression
+	 * testing if we ever attempt to remove any part of this procedure.
+	 */
+	cxl_ops->afu_reset(ctx->afu);
+	cxl_afu_disable(ctx->afu);
+	cxl_psl_purge(ctx->afu);
+	return 0;
+}
+
+static void native_update_ivtes(struct cxl_context *ctx)
+{
+	if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
+		return update_ivtes_directed(ctx);
+	if ((ctx->afu->current_mode == CXL_MODE_DEDICATED) &&
+	    (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes))
+		return ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx);
+	WARN(1, "native_update_ivtes: Bad mode\n");
+}
+
+static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
+{
+	if (!ctx->pe_inserted)
+		return 0;
+	if (terminate_process_element(ctx))
+		return -1;
+	if (remove_process_element(ctx))
+		return -1;
+
+	return 0;
+}
+
+static int native_detach_process(struct cxl_context *ctx)
+{
+	trace_cxl_detach(ctx);
+
+	if (ctx->afu->current_mode == CXL_MODE_DEDICATED)
+		return detach_process_native_dedicated(ctx);
+
+	return detach_process_native_afu_directed(ctx);
+}
+
+static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info)
+{
+	/* If the adapter has gone away, we can't get any meaningful
+	 * information.
+	 */
+	if (!cxl_ops->link_ok(afu->adapter, afu))
+		return -EIO;
+
+	info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
+	if (cxl_is_power8())
+		info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An);
+	info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An);
+	info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+	info->proc_handle = 0;
+
+	return 0;
+}
+
+void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx)
+{
+	u64 fir1, serr;
+
+	fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL9_FIR1);
+
+	dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
+	if (ctx->afu->adapter->native->sl_ops->register_serr_irq) {
+		serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
+		cxl_afu_decode_psl_serr(ctx->afu, serr);
+	}
+}
+
+void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx)
+{
+	u64 fir1, fir2, fir_slice, serr, afu_debug;
+
+	fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1);
+	fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2);
+	fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An);
+	afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An);
+
+	dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
+	dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2);
+	if (ctx->afu->adapter->native->sl_ops->register_serr_irq) {
+		serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
+		cxl_afu_decode_psl_serr(ctx->afu, serr);
+	}
+	dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
+	dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
+}
+
+static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx,
+						u64 dsisr, u64 errstat)
+{
+
+	dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat);
+
+	if (ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers)
+		ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers(ctx);
+
+	if (ctx->afu->adapter->native->sl_ops->debugfs_stop_trace) {
+		dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n");
+		ctx->afu->adapter->native->sl_ops->debugfs_stop_trace(ctx->afu->adapter);
+	}
+
+	return cxl_ops->ack_irq(ctx, 0, errstat);
+}
+
+static bool cxl_is_translation_fault(struct cxl_afu *afu, u64 dsisr)
+{
+	if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_TRANS))
+		return true;
+
+	if ((cxl_is_power9()) && (dsisr & CXL_PSL9_DSISR_An_TF))
+		return true;
+
+	return false;
+}
+
+irqreturn_t cxl_fail_irq_psl(struct cxl_afu *afu, struct cxl_irq_info *irq_info)
+{
+	if (cxl_is_translation_fault(afu, irq_info->dsisr))
+		cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+	else
+		cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t native_irq_multiplexed(int irq, void *data)
+{
+	struct cxl_afu *afu = data;
+	struct cxl_context *ctx;
+	struct cxl_irq_info irq_info;
+	u64 phreg = cxl_p2n_read(afu, CXL_PSL_PEHandle_An);
+	int ph, ret = IRQ_HANDLED, res;
+
+	/* check if eeh kicked in while the interrupt was in flight */
+	if (unlikely(phreg == ~0ULL)) {
+		dev_warn(&afu->dev,
+			 "Ignoring slice interrupt(%d) due to fenced card",
+			 irq);
+		return IRQ_HANDLED;
+	}
+	/* Mask the pe-handle from register value */
+	ph = phreg & 0xffff;
+	if ((res = native_get_irq_info(afu, &irq_info))) {
+		WARN(1, "Unable to get CXL IRQ Info: %i\n", res);
+		if (afu->adapter->native->sl_ops->fail_irq)
+			return afu->adapter->native->sl_ops->fail_irq(afu, &irq_info);
+		return ret;
+	}
+
+	rcu_read_lock();
+	ctx = idr_find(&afu->contexts_idr, ph);
+	if (ctx) {
+		if (afu->adapter->native->sl_ops->handle_interrupt)
+			ret = afu->adapter->native->sl_ops->handle_interrupt(irq, ctx, &irq_info);
+		rcu_read_unlock();
+		return ret;
+	}
+	rcu_read_unlock();
+
+	WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR"
+		" %016llx\n(Possible AFU HW issue - was a term/remove acked"
+		" with outstanding transactions?)\n", ph, irq_info.dsisr,
+		irq_info.dar);
+	if (afu->adapter->native->sl_ops->fail_irq)
+		ret = afu->adapter->native->sl_ops->fail_irq(afu, &irq_info);
+	return ret;
+}
+
+static void native_irq_wait(struct cxl_context *ctx)
+{
+	u64 dsisr;
+	int timeout = 1000;
+	int ph;
+
+	/*
+	 * Wait until no further interrupts are presented by the PSL
+	 * for this context.
+	 */
+	while (timeout--) {
+		ph = cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) & 0xffff;
+		if (ph != ctx->pe)
+			return;
+		dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An);
+		if (cxl_is_power8() &&
+		   ((dsisr & CXL_PSL_DSISR_PENDING) == 0))
+			return;
+		if (cxl_is_power9() &&
+		   ((dsisr & CXL_PSL9_DSISR_PENDING) == 0))
+			return;
+		/*
+		 * We are waiting for the workqueue to process our
+		 * irq, so need to let that run here.
+		 */
+		msleep(1);
+	}
+
+	dev_warn(&ctx->afu->dev, "WARNING: waiting on DSI for PE %i"
+		 " DSISR %016llx!\n", ph, dsisr);
+	return;
+}
+
+static irqreturn_t native_slice_irq_err(int irq, void *data)
+{
+	struct cxl_afu *afu = data;
+	u64 errstat, serr, afu_error, dsisr;
+	u64 fir_slice, afu_debug, irq_mask;
+
+	/*
+	 * slice err interrupt is only used with full PSL (no XSL)
+	 */
+	serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+	errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+	afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An);
+	dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	cxl_afu_decode_psl_serr(afu, serr);
+
+	if (cxl_is_power8()) {
+		fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An);
+		afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An);
+		dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
+		dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
+	}
+	dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat);
+	dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error);
+	dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr);
+
+	/* mask off the IRQ so it won't retrigger until the AFU is reset */
+	irq_mask = (serr & CXL_PSL_SERR_An_IRQS) >> 32;
+	serr |= irq_mask;
+	cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+	dev_info(&afu->dev, "Further such interrupts will be masked until the AFU is reset\n");
+
+	return IRQ_HANDLED;
+}
+
+void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter)
+{
+	u64 fir1;
+
+	fir1 = cxl_p1_read(adapter, CXL_PSL9_FIR1);
+	dev_crit(&adapter->dev, "PSL_FIR: 0x%016llx\n", fir1);
+}
+
+void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter)
+{
+	u64 fir1, fir2;
+
+	fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1);
+	fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2);
+	dev_crit(&adapter->dev,
+		 "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n",
+		 fir1, fir2);
+}
+
+static irqreturn_t native_irq_err(int irq, void *data)
+{
+	struct cxl *adapter = data;
+	u64 err_ivte;
+
+	WARN(1, "CXL ERROR interrupt %i\n", irq);
+
+	err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE);
+	dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte);
+
+	if (adapter->native->sl_ops->debugfs_stop_trace) {
+		dev_crit(&adapter->dev, "STOPPING CXL TRACE\n");
+		adapter->native->sl_ops->debugfs_stop_trace(adapter);
+	}
+
+	if (adapter->native->sl_ops->err_irq_dump_registers)
+		adapter->native->sl_ops->err_irq_dump_registers(adapter);
+
+	return IRQ_HANDLED;
+}
+
+int cxl_native_register_psl_err_irq(struct cxl *adapter)
+{
+	int rc;
+
+	adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+				      dev_name(&adapter->dev));
+	if (!adapter->irq_name)
+		return -ENOMEM;
+
+	if ((rc = cxl_register_one_irq(adapter, native_irq_err, adapter,
+				       &adapter->native->err_hwirq,
+				       &adapter->native->err_virq,
+				       adapter->irq_name))) {
+		kfree(adapter->irq_name);
+		adapter->irq_name = NULL;
+		return rc;
+	}
+
+	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->native->err_hwirq & 0xffff);
+
+	return 0;
+}
+
+void cxl_native_release_psl_err_irq(struct cxl *adapter)
+{
+	if (adapter->native->err_virq == 0 ||
+	    adapter->native->err_virq !=
+	    irq_find_mapping(NULL, adapter->native->err_hwirq))
+		return;
+
+	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
+	cxl_unmap_irq(adapter->native->err_virq, adapter);
+	cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq);
+	kfree(adapter->irq_name);
+	adapter->native->err_virq = 0;
+}
+
+int cxl_native_register_serr_irq(struct cxl_afu *afu)
+{
+	u64 serr;
+	int rc;
+
+	afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+				      dev_name(&afu->dev));
+	if (!afu->err_irq_name)
+		return -ENOMEM;
+
+	if ((rc = cxl_register_one_irq(afu->adapter, native_slice_irq_err, afu,
+				       &afu->serr_hwirq,
+				       &afu->serr_virq, afu->err_irq_name))) {
+		kfree(afu->err_irq_name);
+		afu->err_irq_name = NULL;
+		return rc;
+	}
+
+	serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+	if (cxl_is_power8())
+		serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff);
+	if (cxl_is_power9()) {
+		/*
+		 * By default, all errors are masked. So don't set all masks.
+		 * Slice errors will be transfered.
+		 */
+		serr = (serr & ~0xff0000007fffffffULL) | (afu->serr_hwirq & 0xffff);
+	}
+	cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+
+	return 0;
+}
+
+void cxl_native_release_serr_irq(struct cxl_afu *afu)
+{
+	if (afu->serr_virq == 0 ||
+	    afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
+		return;
+
+	cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
+	cxl_unmap_irq(afu->serr_virq, afu);
+	cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
+	kfree(afu->err_irq_name);
+	afu->serr_virq = 0;
+}
+
+int cxl_native_register_psl_irq(struct cxl_afu *afu)
+{
+	int rc;
+
+	afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s",
+				      dev_name(&afu->dev));
+	if (!afu->psl_irq_name)
+		return -ENOMEM;
+
+	if ((rc = cxl_register_one_irq(afu->adapter, native_irq_multiplexed,
+				    afu, &afu->native->psl_hwirq, &afu->native->psl_virq,
+				    afu->psl_irq_name))) {
+		kfree(afu->psl_irq_name);
+		afu->psl_irq_name = NULL;
+	}
+	return rc;
+}
+
+void cxl_native_release_psl_irq(struct cxl_afu *afu)
+{
+	if (afu->native->psl_virq == 0 ||
+	    afu->native->psl_virq !=
+	    irq_find_mapping(NULL, afu->native->psl_hwirq))
+		return;
+
+	cxl_unmap_irq(afu->native->psl_virq, afu);
+	cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq);
+	kfree(afu->psl_irq_name);
+	afu->native->psl_virq = 0;
+}
+
+static void recover_psl_err(struct cxl_afu *afu, u64 errstat)
+{
+	u64 dsisr;
+
+	pr_devel("RECOVERING FROM PSL ERROR... (0x%016llx)\n", errstat);
+
+	/* Clear PSL_DSISR[PE] */
+	dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	cxl_p2n_write(afu, CXL_PSL_DSISR_An, dsisr & ~CXL_PSL_DSISR_An_PE);
+
+	/* Write 1s to clear error status bits */
+	cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat);
+}
+
+static int native_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask)
+{
+	trace_cxl_psl_irq_ack(ctx, tfc);
+	if (tfc)
+		cxl_p2n_write(ctx->afu, CXL_PSL_TFC_An, tfc);
+	if (psl_reset_mask)
+		recover_psl_err(ctx->afu, psl_reset_mask);
+
+	return 0;
+}
+
+int cxl_check_error(struct cxl_afu *afu)
+{
+	return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL);
+}
+
+static bool native_support_attributes(const char *attr_name,
+				      enum cxl_attrs type)
+{
+	return true;
+}
+
+static int native_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off, u64 *out)
+{
+	if (unlikely(!cxl_ops->link_ok(afu->adapter, afu)))
+		return -EIO;
+	if (unlikely(off >= afu->crs_len))
+		return -ERANGE;
+	*out = in_le64(afu->native->afu_desc_mmio + afu->crs_offset +
+		(cr * afu->crs_len) + off);
+	return 0;
+}
+
+static int native_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off, u32 *out)
+{
+	if (unlikely(!cxl_ops->link_ok(afu->adapter, afu)))
+		return -EIO;
+	if (unlikely(off >= afu->crs_len))
+		return -ERANGE;
+	*out = in_le32(afu->native->afu_desc_mmio + afu->crs_offset +
+		(cr * afu->crs_len) + off);
+	return 0;
+}
+
+static int native_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off, u16 *out)
+{
+	u64 aligned_off = off & ~0x3L;
+	u32 val;
+	int rc;
+
+	rc = native_afu_cr_read32(afu, cr, aligned_off, &val);
+	if (!rc)
+		*out = (val >> ((off & 0x3) * 8)) & 0xffff;
+	return rc;
+}
+
+static int native_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off, u8 *out)
+{
+	u64 aligned_off = off & ~0x3L;
+	u32 val;
+	int rc;
+
+	rc = native_afu_cr_read32(afu, cr, aligned_off, &val);
+	if (!rc)
+		*out = (val >> ((off & 0x3) * 8)) & 0xff;
+	return rc;
+}
+
+static int native_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in)
+{
+	if (unlikely(!cxl_ops->link_ok(afu->adapter, afu)))
+		return -EIO;
+	if (unlikely(off >= afu->crs_len))
+		return -ERANGE;
+	out_le32(afu->native->afu_desc_mmio + afu->crs_offset +
+		(cr * afu->crs_len) + off, in);
+	return 0;
+}
+
+static int native_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in)
+{
+	u64 aligned_off = off & ~0x3L;
+	u32 val32, mask, shift;
+	int rc;
+
+	rc = native_afu_cr_read32(afu, cr, aligned_off, &val32);
+	if (rc)
+		return rc;
+	shift = (off & 0x3) * 8;
+	WARN_ON(shift == 24);
+	mask = 0xffff << shift;
+	val32 = (val32 & ~mask) | (in << shift);
+
+	rc = native_afu_cr_write32(afu, cr, aligned_off, val32);
+	return rc;
+}
+
+static int native_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in)
+{
+	u64 aligned_off = off & ~0x3L;
+	u32 val32, mask, shift;
+	int rc;
+
+	rc = native_afu_cr_read32(afu, cr, aligned_off, &val32);
+	if (rc)
+		return rc;
+	shift = (off & 0x3) * 8;
+	mask = 0xff << shift;
+	val32 = (val32 & ~mask) | (in << shift);
+
+	rc = native_afu_cr_write32(afu, cr, aligned_off, val32);
+	return rc;
+}
+
+const struct cxl_backend_ops cxl_native_ops = {
+	.module = THIS_MODULE,
+	.adapter_reset = cxl_pci_reset,
+	.alloc_one_irq = cxl_pci_alloc_one_irq,
+	.release_one_irq = cxl_pci_release_one_irq,
+	.alloc_irq_ranges = cxl_pci_alloc_irq_ranges,
+	.release_irq_ranges = cxl_pci_release_irq_ranges,
+	.setup_irq = cxl_pci_setup_irq,
+	.handle_psl_slice_error = native_handle_psl_slice_error,
+	.psl_interrupt = NULL,
+	.ack_irq = native_ack_irq,
+	.irq_wait = native_irq_wait,
+	.attach_process = native_attach_process,
+	.detach_process = native_detach_process,
+	.update_ivtes = native_update_ivtes,
+	.support_attributes = native_support_attributes,
+	.link_ok = cxl_adapter_link_ok,
+	.release_afu = cxl_pci_release_afu,
+	.afu_read_err_buffer = cxl_pci_afu_read_err_buffer,
+	.afu_check_and_enable = native_afu_check_and_enable,
+	.afu_activate_mode = native_afu_activate_mode,
+	.afu_deactivate_mode = native_afu_deactivate_mode,
+	.afu_reset = native_afu_reset,
+	.afu_cr_read8 = native_afu_cr_read8,
+	.afu_cr_read16 = native_afu_cr_read16,
+	.afu_cr_read32 = native_afu_cr_read32,
+	.afu_cr_read64 = native_afu_cr_read64,
+	.afu_cr_write8 = native_afu_cr_write8,
+	.afu_cr_write16 = native_afu_cr_write16,
+	.afu_cr_write32 = native_afu_cr_write32,
+	.read_adapter_vpd = cxl_pci_read_adapter_vpd,
+};
diff --git a/drivers/misc/cxl/of.c b/drivers/misc/cxl/of.c
new file mode 100644
index 000000000..aff181cd0
--- /dev/null
+++ b/drivers/misc/cxl/of.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include "cxl.h"
+
+
+static const __be32 *read_prop_string(const struct device_node *np,
+				const char *prop_name)
+{
+	const __be32 *prop;
+
+	prop = of_get_property(np, prop_name, NULL);
+	if (cxl_verbose && prop)
+		pr_info("%s: %s\n", prop_name, (char *) prop);
+	return prop;
+}
+
+static const __be32 *read_prop_dword(const struct device_node *np,
+				const char *prop_name, u32 *val)
+{
+	const __be32 *prop;
+
+	prop = of_get_property(np, prop_name, NULL);
+	if (prop)
+		*val = be32_to_cpu(prop[0]);
+	if (cxl_verbose && prop)
+		pr_info("%s: %#x (%u)\n", prop_name, *val, *val);
+	return prop;
+}
+
+static const __be64 *read_prop64_dword(const struct device_node *np,
+				const char *prop_name, u64 *val)
+{
+	const __be64 *prop;
+
+	prop = of_get_property(np, prop_name, NULL);
+	if (prop)
+		*val = be64_to_cpu(prop[0]);
+	if (cxl_verbose && prop)
+		pr_info("%s: %#llx (%llu)\n", prop_name, *val, *val);
+	return prop;
+}
+
+
+static int read_handle(struct device_node *np, u64 *handle)
+{
+	const __be32 *prop;
+	u64 size;
+
+	/* Get address and size of the node */
+	prop = of_get_address(np, 0, &size, NULL);
+	if (size)
+		return -EINVAL;
+
+	/* Helper to read a big number; size is in cells (not bytes) */
+	*handle = of_read_number(prop, of_n_addr_cells(np));
+	return 0;
+}
+
+static int read_phys_addr(struct device_node *np, char *prop_name,
+			struct cxl_afu *afu)
+{
+	int i, len, entry_size, naddr, nsize, type;
+	u64 addr, size;
+	const __be32 *prop;
+
+	naddr = of_n_addr_cells(np);
+	nsize = of_n_size_cells(np);
+
+	prop = of_get_property(np, prop_name, &len);
+	if (prop) {
+		entry_size = naddr + nsize;
+		for (i = 0; i < (len / 4); i += entry_size, prop += entry_size) {
+			type = be32_to_cpu(prop[0]);
+			addr = of_read_number(prop, naddr);
+			size = of_read_number(&prop[naddr], nsize);
+			switch (type) {
+			case 0: /* unit address */
+				afu->guest->handle = addr;
+				break;
+			case 1: /* p2 area */
+				afu->guest->p2n_phys += addr;
+				afu->guest->p2n_size = size;
+				break;
+			case 2: /* problem state area */
+				afu->psn_phys += addr;
+				afu->adapter->ps_size = size;
+				break;
+			default:
+				pr_err("Invalid address type %d found in %s property of AFU\n",
+					type, prop_name);
+				return -EINVAL;
+			}
+			if (cxl_verbose)
+				pr_info("%s: %#x %#llx (size %#llx)\n",
+					prop_name, type, addr, size);
+		}
+	}
+	return 0;
+}
+
+static int read_vpd(struct cxl *adapter, struct cxl_afu *afu)
+{
+	char vpd[256];
+	int rc;
+	size_t len = sizeof(vpd);
+
+	memset(vpd, 0, len);
+
+	if (adapter)
+		rc = cxl_guest_read_adapter_vpd(adapter, vpd, len);
+	else
+		rc = cxl_guest_read_afu_vpd(afu, vpd, len);
+
+	if (rc > 0) {
+		cxl_dump_debug_buffer(vpd, rc);
+		rc = 0;
+	}
+	return rc;
+}
+
+int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np)
+{
+	if (read_handle(afu_np, &afu->guest->handle))
+		return -EINVAL;
+	pr_devel("AFU handle: 0x%.16llx\n", afu->guest->handle);
+
+	return 0;
+}
+
+int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *np)
+{
+	int i, len, rc;
+	char *p;
+	const __be32 *prop;
+	u16 device_id, vendor_id;
+	u32 val = 0, class_code;
+
+	/* Properties are read in the same order as listed in PAPR */
+
+	if (cxl_verbose) {
+		pr_info("Dump of the 'ibm,coherent-platform-function' node properties:\n");
+
+		prop = of_get_property(np, "compatible", &len);
+		i = 0;
+		while (i < len) {
+			p = (char *) prop + i;
+			pr_info("compatible: %s\n", p);
+			i += strlen(p) + 1;
+		}
+		read_prop_string(np, "name");
+	}
+
+	rc = read_phys_addr(np, "reg", afu);
+	if (rc)
+		return rc;
+
+	rc = read_phys_addr(np, "assigned-addresses", afu);
+	if (rc)
+		return rc;
+
+	if (afu->psn_phys == 0)
+		afu->psa = false;
+	else
+		afu->psa = true;
+
+	if (cxl_verbose) {
+		read_prop_string(np, "ibm,loc-code");
+		read_prop_string(np, "device_type");
+	}
+
+	read_prop_dword(np, "ibm,#processes", &afu->max_procs_virtualised);
+
+	if (cxl_verbose) {
+		read_prop_dword(np, "ibm,scratchpad-size", &val);
+		read_prop_dword(np, "ibm,programmable", &val);
+		read_prop_string(np, "ibm,phandle");
+		read_vpd(NULL, afu);
+	}
+
+	read_prop_dword(np, "ibm,max-ints-per-process", &afu->guest->max_ints);
+	afu->irqs_max = afu->guest->max_ints;
+
+	prop = read_prop_dword(np, "ibm,min-ints-per-process", &afu->pp_irqs);
+	if (prop) {
+		/* One extra interrupt for the PSL interrupt is already
+		 * included. Remove it now to keep only AFU interrupts and
+		 * match the native case.
+		 */
+		afu->pp_irqs--;
+	}
+
+	if (cxl_verbose) {
+		read_prop_dword(np, "ibm,max-ints", &val);
+		read_prop_dword(np, "ibm,vpd-size", &val);
+	}
+
+	read_prop64_dword(np, "ibm,error-buffer-size", &afu->eb_len);
+	afu->eb_offset = 0;
+
+	if (cxl_verbose)
+		read_prop_dword(np, "ibm,config-record-type", &val);
+
+	read_prop64_dword(np, "ibm,config-record-size", &afu->crs_len);
+	afu->crs_offset = 0;
+
+	read_prop_dword(np, "ibm,#config-records", &afu->crs_num);
+
+	if (cxl_verbose) {
+		for (i = 0; i < afu->crs_num; i++) {
+			rc = cxl_ops->afu_cr_read16(afu, i, PCI_DEVICE_ID,
+						&device_id);
+			if (!rc)
+				pr_info("record %d - device-id: %#x\n",
+					i, device_id);
+			rc = cxl_ops->afu_cr_read16(afu, i, PCI_VENDOR_ID,
+						&vendor_id);
+			if (!rc)
+				pr_info("record %d - vendor-id: %#x\n",
+					i, vendor_id);
+			rc = cxl_ops->afu_cr_read32(afu, i, PCI_CLASS_REVISION,
+						&class_code);
+			if (!rc) {
+				class_code >>= 8;
+				pr_info("record %d - class-code: %#x\n",
+					i, class_code);
+			}
+		}
+
+		read_prop_dword(np, "ibm,function-number", &val);
+		read_prop_dword(np, "ibm,privileged-function", &val);
+		read_prop_dword(np, "vendor-id", &val);
+		read_prop_dword(np, "device-id", &val);
+		read_prop_dword(np, "revision-id", &val);
+		read_prop_dword(np, "class-code", &val);
+		read_prop_dword(np, "subsystem-vendor-id", &val);
+		read_prop_dword(np, "subsystem-id", &val);
+	}
+	/*
+	 * if "ibm,process-mmio" doesn't exist then per-process mmio is
+	 * not supported
+	 */
+	val = 0;
+	prop = read_prop_dword(np, "ibm,process-mmio", &val);
+	if (prop && val == 1)
+		afu->pp_psa = true;
+	else
+		afu->pp_psa = false;
+
+	if (cxl_verbose) {
+		read_prop_dword(np, "ibm,supports-aur", &val);
+		read_prop_dword(np, "ibm,supports-csrp", &val);
+		read_prop_dword(np, "ibm,supports-prr", &val);
+	}
+
+	prop = read_prop_dword(np, "ibm,function-error-interrupt", &val);
+	if (prop)
+		afu->serr_hwirq = val;
+
+	pr_devel("AFU handle: %#llx\n", afu->guest->handle);
+	pr_devel("p2n_phys: %#llx (size %#llx)\n",
+		afu->guest->p2n_phys, afu->guest->p2n_size);
+	pr_devel("psn_phys: %#llx (size %#llx)\n",
+		afu->psn_phys, afu->adapter->ps_size);
+	pr_devel("Max number of processes virtualised=%i\n",
+		afu->max_procs_virtualised);
+	pr_devel("Per-process irqs min=%i, max=%i\n", afu->pp_irqs,
+		 afu->irqs_max);
+	pr_devel("Slice error interrupt=%#lx\n", afu->serr_hwirq);
+
+	return 0;
+}
+
+static int read_adapter_irq_config(struct cxl *adapter, struct device_node *np)
+{
+	const __be32 *ranges;
+	int len, nranges, i;
+	struct irq_avail *cur;
+
+	ranges = of_get_property(np, "interrupt-ranges", &len);
+	if (ranges == NULL || len < (2 * sizeof(int)))
+		return -EINVAL;
+
+	/*
+	 * encoded array of two cells per entry, each cell encoded as
+	 * with encode-int
+	 */
+	nranges = len / (2 * sizeof(int));
+	if (nranges == 0 || (nranges * 2 * sizeof(int)) != len)
+		return -EINVAL;
+
+	adapter->guest->irq_avail = kcalloc(nranges, sizeof(struct irq_avail),
+					    GFP_KERNEL);
+	if (adapter->guest->irq_avail == NULL)
+		return -ENOMEM;
+
+	adapter->guest->irq_base_offset = be32_to_cpu(ranges[0]);
+	for (i = 0; i < nranges; i++) {
+		cur = &adapter->guest->irq_avail[i];
+		cur->offset = be32_to_cpu(ranges[i * 2]);
+		cur->range  = be32_to_cpu(ranges[i * 2 + 1]);
+		cur->bitmap = kcalloc(BITS_TO_LONGS(cur->range),
+				sizeof(*cur->bitmap), GFP_KERNEL);
+		if (cur->bitmap == NULL)
+			goto err;
+		if (cur->offset < adapter->guest->irq_base_offset)
+			adapter->guest->irq_base_offset = cur->offset;
+		if (cxl_verbose)
+			pr_info("available IRQ range: %#lx-%#lx (%lu)\n",
+				cur->offset, cur->offset + cur->range - 1,
+				cur->range);
+	}
+	adapter->guest->irq_nranges = nranges;
+	spin_lock_init(&adapter->guest->irq_alloc_lock);
+
+	return 0;
+err:
+	for (i--; i >= 0; i--) {
+		cur = &adapter->guest->irq_avail[i];
+		kfree(cur->bitmap);
+	}
+	kfree(adapter->guest->irq_avail);
+	adapter->guest->irq_avail = NULL;
+	return -ENOMEM;
+}
+
+int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np)
+{
+	if (read_handle(np, &adapter->guest->handle))
+		return -EINVAL;
+	pr_devel("Adapter handle: 0x%.16llx\n", adapter->guest->handle);
+
+	return 0;
+}
+
+int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np)
+{
+	int rc, len, naddr, i;
+	char *p;
+	const __be32 *prop;
+	u32 val = 0;
+
+	/* Properties are read in the same order as listed in PAPR */
+
+	naddr = of_n_addr_cells(np);
+
+	if (cxl_verbose) {
+		pr_info("Dump of the 'ibm,coherent-platform-facility' node properties:\n");
+
+		read_prop_dword(np, "#address-cells", &val);
+		read_prop_dword(np, "#size-cells", &val);
+
+		prop = of_get_property(np, "compatible", &len);
+		i = 0;
+		while (i < len) {
+			p = (char *) prop + i;
+			pr_info("compatible: %s\n", p);
+			i += strlen(p) + 1;
+		}
+		read_prop_string(np, "name");
+		read_prop_string(np, "model");
+
+		prop = of_get_property(np, "reg", NULL);
+		if (prop) {
+			pr_info("reg: addr:%#llx size:%#x\n",
+				of_read_number(prop, naddr),
+				be32_to_cpu(prop[naddr]));
+		}
+
+		read_prop_string(np, "ibm,loc-code");
+	}
+
+	if ((rc = read_adapter_irq_config(adapter, np)))
+		return rc;
+
+	if (cxl_verbose) {
+		read_prop_string(np, "device_type");
+		read_prop_string(np, "ibm,phandle");
+	}
+
+	prop = read_prop_dword(np, "ibm,caia-version", &val);
+	if (prop) {
+		adapter->caia_major = (val & 0xFF00) >> 8;
+		adapter->caia_minor = val & 0xFF;
+	}
+
+	prop = read_prop_dword(np, "ibm,psl-revision", &val);
+	if (prop)
+		adapter->psl_rev = val;
+
+	prop = read_prop_string(np, "status");
+	if (prop) {
+		adapter->guest->status = kasprintf(GFP_KERNEL, "%s", (char *) prop);
+		if (adapter->guest->status == NULL)
+			return -ENOMEM;
+	}
+
+	prop = read_prop_dword(np, "vendor-id", &val);
+	if (prop)
+		adapter->guest->vendor = val;
+
+	prop = read_prop_dword(np, "device-id", &val);
+	if (prop)
+		adapter->guest->device = val;
+
+	if (cxl_verbose) {
+		read_prop_dword(np, "ibm,privileged-facility", &val);
+		read_prop_dword(np, "revision-id", &val);
+		read_prop_dword(np, "class-code", &val);
+	}
+
+	prop = read_prop_dword(np, "subsystem-vendor-id", &val);
+	if (prop)
+		adapter->guest->subsystem_vendor = val;
+
+	prop = read_prop_dword(np, "subsystem-id", &val);
+	if (prop)
+		adapter->guest->subsystem = val;
+
+	if (cxl_verbose)
+		read_vpd(adapter, NULL);
+
+	return 0;
+}
+
+static int cxl_of_remove(struct platform_device *pdev)
+{
+	struct cxl *adapter;
+	int afu;
+
+	adapter = dev_get_drvdata(&pdev->dev);
+	for (afu = 0; afu < adapter->slices; afu++)
+		cxl_guest_remove_afu(adapter->afu[afu]);
+
+	cxl_guest_remove_adapter(adapter);
+	return 0;
+}
+
+static void cxl_of_shutdown(struct platform_device *pdev)
+{
+	cxl_of_remove(pdev);
+}
+
+int cxl_of_probe(struct platform_device *pdev)
+{
+	struct device_node *np = NULL;
+	struct device_node *afu_np = NULL;
+	struct cxl *adapter = NULL;
+	int ret;
+	int slice = 0, slice_ok = 0;
+
+	pr_devel("in %s\n", __func__);
+
+	np = pdev->dev.of_node;
+	if (np == NULL)
+		return -ENODEV;
+
+	/* init adapter */
+	adapter = cxl_guest_init_adapter(np, pdev);
+	if (IS_ERR(adapter)) {
+		dev_err(&pdev->dev, "guest_init_adapter failed: %li\n", PTR_ERR(adapter));
+		return PTR_ERR(adapter);
+	}
+
+	/* init afu */
+	for_each_child_of_node(np, afu_np) {
+		if ((ret = cxl_guest_init_afu(adapter, slice, afu_np)))
+			dev_err(&pdev->dev, "AFU %i failed to initialise: %i\n",
+				slice, ret);
+		else
+			slice_ok++;
+		slice++;
+	}
+
+	if (slice_ok == 0) {
+		dev_info(&pdev->dev, "No active AFU");
+		adapter->slices = 0;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id cxl_of_match[] = {
+	{ .compatible = "ibm,coherent-platform-facility",},
+	{},
+};
+MODULE_DEVICE_TABLE(of, cxl_of_match);
+
+struct platform_driver cxl_of_driver = {
+	.driver = {
+		.name = "cxl_of",
+		.of_match_table = cxl_of_match,
+		.owner = THIS_MODULE
+	},
+	.probe = cxl_of_probe,
+	.remove = cxl_of_remove,
+	.shutdown = cxl_of_shutdown,
+};
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
new file mode 100644
index 000000000..b9d3c87e9
--- /dev/null
+++ b/drivers/misc/cxl/pci.c
@@ -0,0 +1,2104 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/pci_regs.h>
+#include <linux/pci_ids.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/delay.h>
+#include <asm/opal.h>
+#include <asm/msi_bitmap.h>
+#include <asm/pnv-pci.h>
+#include <asm/io.h>
+#include <asm/reg.h>
+
+#include "cxl.h"
+#include <misc/cxl.h>
+
+
+#define CXL_PCI_VSEC_ID	0x1280
+#define CXL_VSEC_MIN_SIZE 0x80
+
+#define CXL_READ_VSEC_LENGTH(dev, vsec, dest)			\
+	{							\
+		pci_read_config_word(dev, vsec + 0x6, dest);	\
+		*dest >>= 4;					\
+	}
+#define CXL_READ_VSEC_NAFUS(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0x8, dest)
+
+#define CXL_READ_VSEC_STATUS(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0x9, dest)
+#define CXL_STATUS_SECOND_PORT  0x80
+#define CXL_STATUS_MSI_X_FULL   0x40
+#define CXL_STATUS_MSI_X_SINGLE 0x20
+#define CXL_STATUS_FLASH_RW     0x08
+#define CXL_STATUS_FLASH_RO     0x04
+#define CXL_STATUS_LOADABLE_AFU 0x02
+#define CXL_STATUS_LOADABLE_PSL 0x01
+/* If we see these features we won't try to use the card */
+#define CXL_UNSUPPORTED_FEATURES \
+	(CXL_STATUS_MSI_X_FULL | CXL_STATUS_MSI_X_SINGLE)
+
+#define CXL_READ_VSEC_MODE_CONTROL(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0xa, dest)
+#define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \
+	pci_write_config_byte(dev, vsec + 0xa, val)
+#define CXL_VSEC_PROTOCOL_MASK   0xe0
+#define CXL_VSEC_PROTOCOL_1024TB 0x80
+#define CXL_VSEC_PROTOCOL_512TB  0x40
+#define CXL_VSEC_PROTOCOL_256TB  0x20 /* Power 8/9 uses this */
+#define CXL_VSEC_PROTOCOL_ENABLE 0x01
+
+#define CXL_READ_VSEC_PSL_REVISION(dev, vsec, dest) \
+	pci_read_config_word(dev, vsec + 0xc, dest)
+#define CXL_READ_VSEC_CAIA_MINOR(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0xe, dest)
+#define CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0xf, dest)
+#define CXL_READ_VSEC_BASE_IMAGE(dev, vsec, dest) \
+	pci_read_config_word(dev, vsec + 0x10, dest)
+
+#define CXL_READ_VSEC_IMAGE_STATE(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0x13, dest)
+#define CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, val) \
+	pci_write_config_byte(dev, vsec + 0x13, val)
+#define CXL_VSEC_USER_IMAGE_LOADED 0x80 /* RO */
+#define CXL_VSEC_PERST_LOADS_IMAGE 0x20 /* RW */
+#define CXL_VSEC_PERST_SELECT_USER 0x10 /* RW */
+
+#define CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, dest) \
+	pci_read_config_dword(dev, vsec + 0x20, dest)
+#define CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, dest) \
+	pci_read_config_dword(dev, vsec + 0x24, dest)
+#define CXL_READ_VSEC_PS_OFF(dev, vsec, dest) \
+	pci_read_config_dword(dev, vsec + 0x28, dest)
+#define CXL_READ_VSEC_PS_SIZE(dev, vsec, dest) \
+	pci_read_config_dword(dev, vsec + 0x2c, dest)
+
+
+/* This works a little different than the p1/p2 register accesses to make it
+ * easier to pull out individual fields */
+#define AFUD_READ(afu, off)		in_be64(afu->native->afu_desc_mmio + off)
+#define AFUD_READ_LE(afu, off)		in_le64(afu->native->afu_desc_mmio + off)
+#define EXTRACT_PPC_BIT(val, bit)	(!!(val & PPC_BIT(bit)))
+#define EXTRACT_PPC_BITS(val, bs, be)	((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be))
+
+#define AFUD_READ_INFO(afu)		AFUD_READ(afu, 0x0)
+#define   AFUD_NUM_INTS_PER_PROC(val)	EXTRACT_PPC_BITS(val,  0, 15)
+#define   AFUD_NUM_PROCS(val)		EXTRACT_PPC_BITS(val, 16, 31)
+#define   AFUD_NUM_CRS(val)		EXTRACT_PPC_BITS(val, 32, 47)
+#define   AFUD_MULTIMODE(val)		EXTRACT_PPC_BIT(val, 48)
+#define   AFUD_PUSH_BLOCK_TRANSFER(val)	EXTRACT_PPC_BIT(val, 55)
+#define   AFUD_DEDICATED_PROCESS(val)	EXTRACT_PPC_BIT(val, 59)
+#define   AFUD_AFU_DIRECTED(val)	EXTRACT_PPC_BIT(val, 61)
+#define   AFUD_TIME_SLICED(val)		EXTRACT_PPC_BIT(val, 63)
+#define AFUD_READ_CR(afu)		AFUD_READ(afu, 0x20)
+#define   AFUD_CR_LEN(val)		EXTRACT_PPC_BITS(val, 8, 63)
+#define AFUD_READ_CR_OFF(afu)		AFUD_READ(afu, 0x28)
+#define AFUD_READ_PPPSA(afu)		AFUD_READ(afu, 0x30)
+#define   AFUD_PPPSA_PP(val)		EXTRACT_PPC_BIT(val, 6)
+#define   AFUD_PPPSA_PSA(val)		EXTRACT_PPC_BIT(val, 7)
+#define   AFUD_PPPSA_LEN(val)		EXTRACT_PPC_BITS(val, 8, 63)
+#define AFUD_READ_PPPSA_OFF(afu)	AFUD_READ(afu, 0x38)
+#define AFUD_READ_EB(afu)		AFUD_READ(afu, 0x40)
+#define   AFUD_EB_LEN(val)		EXTRACT_PPC_BITS(val, 8, 63)
+#define AFUD_READ_EB_OFF(afu)		AFUD_READ(afu, 0x48)
+
+static const struct pci_device_id cxl_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0623), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0628), },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, cxl_pci_tbl);
+
+
+/*
+ * Mostly using these wrappers to avoid confusion:
+ * priv 1 is BAR2, while priv 2 is BAR0
+ */
+static inline resource_size_t p1_base(struct pci_dev *dev)
+{
+	return pci_resource_start(dev, 2);
+}
+
+static inline resource_size_t p1_size(struct pci_dev *dev)
+{
+	return pci_resource_len(dev, 2);
+}
+
+static inline resource_size_t p2_base(struct pci_dev *dev)
+{
+	return pci_resource_start(dev, 0);
+}
+
+static inline resource_size_t p2_size(struct pci_dev *dev)
+{
+	return pci_resource_len(dev, 0);
+}
+
+static int find_cxl_vsec(struct pci_dev *dev)
+{
+	int vsec = 0;
+	u16 val;
+
+	while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) {
+		pci_read_config_word(dev, vsec + 0x4, &val);
+		if (val == CXL_PCI_VSEC_ID)
+			return vsec;
+	}
+	return 0;
+
+}
+
+static void dump_cxl_config_space(struct pci_dev *dev)
+{
+	int vsec;
+	u32 val;
+
+	dev_info(&dev->dev, "dump_cxl_config_space\n");
+
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &val);
+	dev_info(&dev->dev, "BAR0: %#.8x\n", val);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &val);
+	dev_info(&dev->dev, "BAR1: %#.8x\n", val);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_2, &val);
+	dev_info(&dev->dev, "BAR2: %#.8x\n", val);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_3, &val);
+	dev_info(&dev->dev, "BAR3: %#.8x\n", val);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_4, &val);
+	dev_info(&dev->dev, "BAR4: %#.8x\n", val);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_5, &val);
+	dev_info(&dev->dev, "BAR5: %#.8x\n", val);
+
+	dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n",
+		p1_base(dev), p1_size(dev));
+	dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n",
+		p2_base(dev), p2_size(dev));
+	dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n",
+		pci_resource_start(dev, 4), pci_resource_len(dev, 4));
+
+	if (!(vsec = find_cxl_vsec(dev)))
+		return;
+
+#define show_reg(name, what) \
+	dev_info(&dev->dev, "cxl vsec: %30s: %#x\n", name, what)
+
+	pci_read_config_dword(dev, vsec + 0x0, &val);
+	show_reg("Cap ID", (val >> 0) & 0xffff);
+	show_reg("Cap Ver", (val >> 16) & 0xf);
+	show_reg("Next Cap Ptr", (val >> 20) & 0xfff);
+	pci_read_config_dword(dev, vsec + 0x4, &val);
+	show_reg("VSEC ID", (val >> 0) & 0xffff);
+	show_reg("VSEC Rev", (val >> 16) & 0xf);
+	show_reg("VSEC Length",	(val >> 20) & 0xfff);
+	pci_read_config_dword(dev, vsec + 0x8, &val);
+	show_reg("Num AFUs", (val >> 0) & 0xff);
+	show_reg("Status", (val >> 8) & 0xff);
+	show_reg("Mode Control", (val >> 16) & 0xff);
+	show_reg("Reserved", (val >> 24) & 0xff);
+	pci_read_config_dword(dev, vsec + 0xc, &val);
+	show_reg("PSL Rev", (val >> 0) & 0xffff);
+	show_reg("CAIA Ver", (val >> 16) & 0xffff);
+	pci_read_config_dword(dev, vsec + 0x10, &val);
+	show_reg("Base Image Rev", (val >> 0) & 0xffff);
+	show_reg("Reserved", (val >> 16) & 0x0fff);
+	show_reg("Image Control", (val >> 28) & 0x3);
+	show_reg("Reserved", (val >> 30) & 0x1);
+	show_reg("Image Loaded", (val >> 31) & 0x1);
+
+	pci_read_config_dword(dev, vsec + 0x14, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x18, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x1c, &val);
+	show_reg("Reserved", val);
+
+	pci_read_config_dword(dev, vsec + 0x20, &val);
+	show_reg("AFU Descriptor Offset", val);
+	pci_read_config_dword(dev, vsec + 0x24, &val);
+	show_reg("AFU Descriptor Size", val);
+	pci_read_config_dword(dev, vsec + 0x28, &val);
+	show_reg("Problem State Offset", val);
+	pci_read_config_dword(dev, vsec + 0x2c, &val);
+	show_reg("Problem State Size", val);
+
+	pci_read_config_dword(dev, vsec + 0x30, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x34, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x38, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x3c, &val);
+	show_reg("Reserved", val);
+
+	pci_read_config_dword(dev, vsec + 0x40, &val);
+	show_reg("PSL Programming Port", val);
+	pci_read_config_dword(dev, vsec + 0x44, &val);
+	show_reg("PSL Programming Control", val);
+
+	pci_read_config_dword(dev, vsec + 0x48, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x4c, &val);
+	show_reg("Reserved", val);
+
+	pci_read_config_dword(dev, vsec + 0x50, &val);
+	show_reg("Flash Address Register", val);
+	pci_read_config_dword(dev, vsec + 0x54, &val);
+	show_reg("Flash Size Register", val);
+	pci_read_config_dword(dev, vsec + 0x58, &val);
+	show_reg("Flash Status/Control Register", val);
+	pci_read_config_dword(dev, vsec + 0x58, &val);
+	show_reg("Flash Data Port", val);
+
+#undef show_reg
+}
+
+static void dump_afu_descriptor(struct cxl_afu *afu)
+{
+	u64 val, afu_cr_num, afu_cr_off, afu_cr_len;
+	int i;
+
+#define show_reg(name, what) \
+	dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what)
+
+	val = AFUD_READ_INFO(afu);
+	show_reg("num_ints_per_process", AFUD_NUM_INTS_PER_PROC(val));
+	show_reg("num_of_processes", AFUD_NUM_PROCS(val));
+	show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val));
+	show_reg("req_prog_mode", val & 0xffffULL);
+	afu_cr_num = AFUD_NUM_CRS(val);
+
+	val = AFUD_READ(afu, 0x8);
+	show_reg("Reserved", val);
+	val = AFUD_READ(afu, 0x10);
+	show_reg("Reserved", val);
+	val = AFUD_READ(afu, 0x18);
+	show_reg("Reserved", val);
+
+	val = AFUD_READ_CR(afu);
+	show_reg("Reserved", (val >> (63-7)) & 0xff);
+	show_reg("AFU_CR_len", AFUD_CR_LEN(val));
+	afu_cr_len = AFUD_CR_LEN(val) * 256;
+
+	val = AFUD_READ_CR_OFF(afu);
+	afu_cr_off = val;
+	show_reg("AFU_CR_offset", val);
+
+	val = AFUD_READ_PPPSA(afu);
+	show_reg("PerProcessPSA_control", (val >> (63-7)) & 0xff);
+	show_reg("PerProcessPSA Length", AFUD_PPPSA_LEN(val));
+
+	val = AFUD_READ_PPPSA_OFF(afu);
+	show_reg("PerProcessPSA_offset", val);
+
+	val = AFUD_READ_EB(afu);
+	show_reg("Reserved", (val >> (63-7)) & 0xff);
+	show_reg("AFU_EB_len", AFUD_EB_LEN(val));
+
+	val = AFUD_READ_EB_OFF(afu);
+	show_reg("AFU_EB_offset", val);
+
+	for (i = 0; i < afu_cr_num; i++) {
+		val = AFUD_READ_LE(afu, afu_cr_off + i * afu_cr_len);
+		show_reg("CR Vendor", val & 0xffff);
+		show_reg("CR Device", (val >> 16) & 0xffff);
+	}
+#undef show_reg
+}
+
+#define P8_CAPP_UNIT0_ID 0xBA
+#define P8_CAPP_UNIT1_ID 0XBE
+#define P9_CAPP_UNIT0_ID 0xC0
+#define P9_CAPP_UNIT1_ID 0xE0
+
+static int get_phb_index(struct device_node *np, u32 *phb_index)
+{
+	if (of_property_read_u32(np, "ibm,phb-index", phb_index))
+		return -ENODEV;
+	return 0;
+}
+
+static u64 get_capp_unit_id(struct device_node *np, u32 phb_index)
+{
+	/*
+	 * POWER 8:
+	 *  - For chips other than POWER8NVL, we only have CAPP 0,
+	 *    irrespective of which PHB is used.
+	 *  - For POWER8NVL, assume CAPP 0 is attached to PHB0 and
+	 *    CAPP 1 is attached to PHB1.
+	 */
+	if (cxl_is_power8()) {
+		if (!pvr_version_is(PVR_POWER8NVL))
+			return P8_CAPP_UNIT0_ID;
+
+		if (phb_index == 0)
+			return P8_CAPP_UNIT0_ID;
+
+		if (phb_index == 1)
+			return P8_CAPP_UNIT1_ID;
+	}
+
+	/*
+	 * POWER 9:
+	 *   PEC0 (PHB0). Capp ID = CAPP0 (0b1100_0000)
+	 *   PEC1 (PHB1 - PHB2). No capi mode
+	 *   PEC2 (PHB3 - PHB4 - PHB5): Capi mode on PHB3 only. Capp ID = CAPP1 (0b1110_0000)
+	 */
+	if (cxl_is_power9()) {
+		if (phb_index == 0)
+			return P9_CAPP_UNIT0_ID;
+
+		if (phb_index == 3)
+			return P9_CAPP_UNIT1_ID;
+	}
+
+	return 0;
+}
+
+int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
+			     u32 *phb_index, u64 *capp_unit_id)
+{
+	int rc;
+	struct device_node *np;
+	const __be32 *prop;
+
+	if (!(np = pnv_pci_get_phb_node(dev)))
+		return -ENODEV;
+
+	while (np && !(prop = of_get_property(np, "ibm,chip-id", NULL)))
+		np = of_get_next_parent(np);
+	if (!np)
+		return -ENODEV;
+
+	*chipid = be32_to_cpup(prop);
+
+	rc = get_phb_index(np, phb_index);
+	if (rc) {
+		pr_err("cxl: invalid phb index\n");
+		return rc;
+	}
+
+	*capp_unit_id = get_capp_unit_id(np, *phb_index);
+	of_node_put(np);
+	if (!*capp_unit_id) {
+		pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n",
+		       *chipid, *phb_index);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64 *capiind, u64 *asnind,
+			       u64 *nbwind)
+{
+	static u64 nbw, asn, capi = 0;
+	struct device_node *np;
+	const __be32 *prop;
+
+	mutex_lock(&indications_mutex);
+	if (!capi) {
+		if (!(np = pnv_pci_get_phb_node(dev))) {
+			mutex_unlock(&indications_mutex);
+			return -ENODEV;
+		}
+
+		prop = of_get_property(np, "ibm,phb-indications", NULL);
+		if (!prop) {
+			nbw = 0x0300UL; /* legacy values */
+			asn = 0x0400UL;
+			capi = 0x0200UL;
+		} else {
+			nbw = (u64)be32_to_cpu(prop[2]);
+			asn = (u64)be32_to_cpu(prop[1]);
+			capi = (u64)be32_to_cpu(prop[0]);
+		}
+		of_node_put(np);
+	}
+	*capiind = capi;
+	*asnind = asn;
+	*nbwind = nbw;
+	mutex_unlock(&indications_mutex);
+	return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
+{
+	u64 xsl_dsnctl;
+	u64 capiind, asnind, nbwind;
+
+	/*
+	 * CAPI Identifier bits [0:7]
+	 * bit 61:60 MSI bits --> 0
+	 * bit 59 TVT selector --> 0
+	 */
+	if (get_phb_indications(dev, &capiind, &asnind, &nbwind))
+		return -ENODEV;
+
+	/*
+	 * Tell XSL where to route data to.
+	 * The field chipid should match the PHB CAPI_CMPM register
+	 */
+	xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
+	xsl_dsnctl |= (capp_unit_id << (63-15));
+
+	/* nMMU_ID Defaults to: b’000001001’*/
+	xsl_dsnctl |= ((u64)0x09 << (63-28));
+
+	/*
+	 * Used to identify CAPI packets which should be sorted into
+	 * the Non-Blocking queues by the PHB. This field should match
+	 * the PHB PBL_NBW_CMPM register
+	 * nbwind=0x03, bits [57:58], must include capi indicator.
+	 * Not supported on P9 DD1.
+	 */
+	xsl_dsnctl |= (nbwind << (63-55));
+
+	/*
+	 * Upper 16b address bits of ASB_Notify messages sent to the
+	 * system. Need to match the PHB’s ASN Compare/Mask Register.
+	 * Not supported on P9 DD1.
+	 */
+	xsl_dsnctl |= asnind;
+
+	*reg = xsl_dsnctl;
+	return 0;
+}
+
+static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
+						 struct pci_dev *dev)
+{
+	u64 xsl_dsnctl, psl_fircntl;
+	u64 chipid;
+	u32 phb_index;
+	u64 capp_unit_id;
+	u64 psl_debug;
+	int rc;
+
+	rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
+	if (rc)
+		return rc;
+
+	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &xsl_dsnctl);
+	if (rc)
+		return rc;
+
+	cxl_p1_write(adapter, CXL_XSL9_DSNCTL, xsl_dsnctl);
+
+	/* Set fir_cntl to recommended value for production env */
+	psl_fircntl = (0x2ULL << (63-3)); /* ce_report */
+	psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */
+	psl_fircntl |= 0x1ULL; /* ce_thresh */
+	cxl_p1_write(adapter, CXL_PSL9_FIR_CNTL, psl_fircntl);
+
+	/* Setup the PSL to transmit packets on the PCIe before the
+	 * CAPP is enabled. Make sure that CAPP virtual machines are disabled
+	 */
+	cxl_p1_write(adapter, CXL_PSL9_DSNDCTL, 0x0001001000012A10ULL);
+
+	/*
+	 * A response to an ASB_Notify request is returned by the
+	 * system as an MMIO write to the address defined in
+	 * the PSL_TNR_ADDR register.
+	 * keep the Reset Value: 0x00020000E0000000
+	 */
+
+	/* Enable XSL rty limit */
+	cxl_p1_write(adapter, CXL_XSL9_DEF, 0x51F8000000000005ULL);
+
+	/* Change XSL_INV dummy read threshold */
+	cxl_p1_write(adapter, CXL_XSL9_INV, 0x0000040007FFC200ULL);
+
+	if (phb_index == 3) {
+		/* disable machines 31-47 and 20-27 for DMA */
+		cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000FF3FFFF0000ULL);
+	}
+
+	/* Snoop machines */
+	cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL);
+
+	/* Enable NORST and DD2 features */
+	cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL);
+
+	/*
+	 * Check if PSL has data-cache. We need to flush adapter datacache
+	 * when as its about to be removed.
+	 */
+	psl_debug = cxl_p1_read(adapter, CXL_PSL9_DEBUG);
+	if (psl_debug & CXL_PSL_DEBUG_CDC) {
+		dev_dbg(&dev->dev, "No data-cache present\n");
+		adapter->native->no_data_cache = true;
+	}
+
+	return 0;
+}
+
+static int init_implementation_adapter_regs_psl8(struct cxl *adapter, struct pci_dev *dev)
+{
+	u64 psl_dsnctl, psl_fircntl;
+	u64 chipid;
+	u32 phb_index;
+	u64 capp_unit_id;
+	int rc;
+
+	rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
+	if (rc)
+		return rc;
+
+	psl_dsnctl = 0x0000900000000000ULL; /* pteupd ttype, scdone */
+	psl_dsnctl |= (0x2ULL << (63-38)); /* MMIO hang pulse: 256 us */
+	/* Tell PSL where to route data to */
+	psl_dsnctl |= (chipid << (63-5));
+	psl_dsnctl |= (capp_unit_id << (63-13));
+
+	cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl);
+	cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL);
+	/* snoop write mask */
+	cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL);
+	/* set fir_cntl to recommended value for production env */
+	psl_fircntl = (0x2ULL << (63-3)); /* ce_report */
+	psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */
+	psl_fircntl |= 0x1ULL; /* ce_thresh */
+	cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl);
+	/* for debugging with trace arrays */
+	cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL);
+
+	return 0;
+}
+
+/* PSL */
+#define TBSYNC_CAL(n) (((u64)n & 0x7) << (63-3))
+#define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6))
+/* For the PSL this is a multiple for 0 < n <= 7: */
+#define PSL_2048_250MHZ_CYCLES 1
+
+static void write_timebase_ctrl_psl8(struct cxl *adapter)
+{
+	cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
+		     TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES));
+}
+
+static u64 timebase_read_psl9(struct cxl *adapter)
+{
+	return cxl_p1_read(adapter, CXL_PSL9_Timebase);
+}
+
+static u64 timebase_read_psl8(struct cxl *adapter)
+{
+	return cxl_p1_read(adapter, CXL_PSL_Timebase);
+}
+
+static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+{
+	struct device_node *np;
+
+	adapter->psl_timebase_synced = false;
+
+	if (!(np = pnv_pci_get_phb_node(dev)))
+		return;
+
+	/* Do not fail when CAPP timebase sync is not supported by OPAL */
+	of_node_get(np);
+	if (! of_get_property(np, "ibm,capp-timebase-sync", NULL)) {
+		of_node_put(np);
+		dev_info(&dev->dev, "PSL timebase inactive: OPAL support missing\n");
+		return;
+	}
+	of_node_put(np);
+
+	/*
+	 * Setup PSL Timebase Control and Status register
+	 * with the recommended Timebase Sync Count value
+	 */
+	if (adapter->native->sl_ops->write_timebase_ctrl)
+		adapter->native->sl_ops->write_timebase_ctrl(adapter);
+
+	/* Enable PSL Timebase */
+	cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000);
+	cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
+
+	return;
+}
+
+static int init_implementation_afu_regs_psl9(struct cxl_afu *afu)
+{
+	return 0;
+}
+
+static int init_implementation_afu_regs_psl8(struct cxl_afu *afu)
+{
+	/* read/write masks for this slice */
+	cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL);
+	/* APC read/write masks for this slice */
+	cxl_p1n_write(afu, CXL_PSL_COALLOC_A, 0xFF000000FEFEFEFEULL);
+	/* for debugging with trace arrays */
+	cxl_p1n_write(afu, CXL_PSL_SLICE_TRACE, 0x0000FFFF00000000ULL);
+	cxl_p1n_write(afu, CXL_PSL_RXCTL_A, CXL_PSL_RXCTL_AFUHP_4S);
+
+	return 0;
+}
+
+int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq,
+		unsigned int virq)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	return pnv_cxl_ioda_msi_setup(dev, hwirq, virq);
+}
+
+int cxl_update_image_control(struct cxl *adapter)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+	int rc;
+	int vsec;
+	u8 image_state;
+
+	if (!(vsec = find_cxl_vsec(dev))) {
+		dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
+		return -ENODEV;
+	}
+
+	if ((rc = CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state))) {
+		dev_err(&dev->dev, "failed to read image state: %i\n", rc);
+		return rc;
+	}
+
+	if (adapter->perst_loads_image)
+		image_state |= CXL_VSEC_PERST_LOADS_IMAGE;
+	else
+		image_state &= ~CXL_VSEC_PERST_LOADS_IMAGE;
+
+	if (adapter->perst_select_user)
+		image_state |= CXL_VSEC_PERST_SELECT_USER;
+	else
+		image_state &= ~CXL_VSEC_PERST_SELECT_USER;
+
+	if ((rc = CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, image_state))) {
+		dev_err(&dev->dev, "failed to update image control: %i\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+int cxl_pci_alloc_one_irq(struct cxl *adapter)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	return pnv_cxl_alloc_hwirqs(dev, 1);
+}
+
+void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	return pnv_cxl_release_hwirqs(dev, hwirq, 1);
+}
+
+int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs,
+			struct cxl *adapter, unsigned int num)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num);
+}
+
+void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs,
+				struct cxl *adapter)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	pnv_cxl_release_hwirq_ranges(irqs, dev);
+}
+
+static int setup_cxl_bars(struct pci_dev *dev)
+{
+	/* Safety check in case we get backported to < 3.17 without M64 */
+	if ((p1_base(dev) < 0x100000000ULL) ||
+	    (p2_base(dev) < 0x100000000ULL)) {
+		dev_err(&dev->dev, "ABORTING: M32 BAR assignment incompatible with CXL\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * BAR 4/5 has a special meaning for CXL and must be programmed with a
+	 * special value corresponding to the CXL protocol address range.
+	 * For POWER 8/9 that means bits 48:49 must be set to 10
+	 */
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_4, 0x00000000);
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, 0x00020000);
+
+	return 0;
+}
+
+/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */
+static int switch_card_to_cxl(struct pci_dev *dev)
+{
+	int vsec;
+	u8 val;
+	int rc;
+
+	dev_info(&dev->dev, "switch card to CXL\n");
+
+	if (!(vsec = find_cxl_vsec(dev))) {
+		dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
+		return -ENODEV;
+	}
+
+	if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) {
+		dev_err(&dev->dev, "failed to read current mode control: %i", rc);
+		return rc;
+	}
+	val &= ~CXL_VSEC_PROTOCOL_MASK;
+	val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE;
+	if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) {
+		dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc);
+		return rc;
+	}
+	/*
+	 * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states
+	 * we must wait 100ms after this mode switch before touching
+	 * PCIe config space.
+	 */
+	msleep(100);
+
+	return 0;
+}
+
+static int pci_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
+{
+	u64 p1n_base, p2n_base, afu_desc;
+	const u64 p1n_size = 0x100;
+	const u64 p2n_size = 0x1000;
+
+	p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size);
+	p2n_base = p2_base(dev) + (afu->slice * p2n_size);
+	afu->psn_phys = p2_base(dev) + (adapter->native->ps_off + (afu->slice * adapter->ps_size));
+	afu_desc = p2_base(dev) + adapter->native->afu_desc_off + (afu->slice * adapter->native->afu_desc_size);
+
+	if (!(afu->native->p1n_mmio = ioremap(p1n_base, p1n_size)))
+		goto err;
+	if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size)))
+		goto err1;
+	if (afu_desc) {
+		if (!(afu->native->afu_desc_mmio = ioremap(afu_desc, adapter->native->afu_desc_size)))
+			goto err2;
+	}
+
+	return 0;
+err2:
+	iounmap(afu->p2n_mmio);
+err1:
+	iounmap(afu->native->p1n_mmio);
+err:
+	dev_err(&afu->dev, "Error mapping AFU MMIO regions\n");
+	return -ENOMEM;
+}
+
+static void pci_unmap_slice_regs(struct cxl_afu *afu)
+{
+	if (afu->p2n_mmio) {
+		iounmap(afu->p2n_mmio);
+		afu->p2n_mmio = NULL;
+	}
+	if (afu->native->p1n_mmio) {
+		iounmap(afu->native->p1n_mmio);
+		afu->native->p1n_mmio = NULL;
+	}
+	if (afu->native->afu_desc_mmio) {
+		iounmap(afu->native->afu_desc_mmio);
+		afu->native->afu_desc_mmio = NULL;
+	}
+}
+
+void cxl_pci_release_afu(struct device *dev)
+{
+	struct cxl_afu *afu = to_cxl_afu(dev);
+
+	pr_devel("%s\n", __func__);
+
+	idr_destroy(&afu->contexts_idr);
+	cxl_release_spa(afu);
+
+	kfree(afu->native);
+	kfree(afu);
+}
+
+/* Expects AFU struct to have recently been zeroed out */
+static int cxl_read_afu_descriptor(struct cxl_afu *afu)
+{
+	u64 val;
+
+	val = AFUD_READ_INFO(afu);
+	afu->pp_irqs = AFUD_NUM_INTS_PER_PROC(val);
+	afu->max_procs_virtualised = AFUD_NUM_PROCS(val);
+	afu->crs_num = AFUD_NUM_CRS(val);
+
+	if (AFUD_AFU_DIRECTED(val))
+		afu->modes_supported |= CXL_MODE_DIRECTED;
+	if (AFUD_DEDICATED_PROCESS(val))
+		afu->modes_supported |= CXL_MODE_DEDICATED;
+	if (AFUD_TIME_SLICED(val))
+		afu->modes_supported |= CXL_MODE_TIME_SLICED;
+
+	val = AFUD_READ_PPPSA(afu);
+	afu->pp_size = AFUD_PPPSA_LEN(val) * 4096;
+	afu->psa = AFUD_PPPSA_PSA(val);
+	if ((afu->pp_psa = AFUD_PPPSA_PP(val)))
+		afu->native->pp_offset = AFUD_READ_PPPSA_OFF(afu);
+
+	val = AFUD_READ_CR(afu);
+	afu->crs_len = AFUD_CR_LEN(val) * 256;
+	afu->crs_offset = AFUD_READ_CR_OFF(afu);
+
+
+	/* eb_len is in multiple of 4K */
+	afu->eb_len = AFUD_EB_LEN(AFUD_READ_EB(afu)) * 4096;
+	afu->eb_offset = AFUD_READ_EB_OFF(afu);
+
+	/* eb_off is 4K aligned so lower 12 bits are always zero */
+	if (EXTRACT_PPC_BITS(afu->eb_offset, 0, 11) != 0) {
+		dev_warn(&afu->dev,
+			 "Invalid AFU error buffer offset %Lx\n",
+			 afu->eb_offset);
+		dev_info(&afu->dev,
+			 "Ignoring AFU error buffer in the descriptor\n");
+		/* indicate that no afu buffer exists */
+		afu->eb_len = 0;
+	}
+
+	return 0;
+}
+
+static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu)
+{
+	int i, rc;
+	u32 val;
+
+	if (afu->psa && afu->adapter->ps_size <
+			(afu->native->pp_offset + afu->pp_size*afu->max_procs_virtualised)) {
+		dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n");
+		return -ENODEV;
+	}
+
+	if (afu->pp_psa && (afu->pp_size < PAGE_SIZE))
+		dev_warn(&afu->dev, "AFU uses pp_size(%#016llx) < PAGE_SIZE per-process PSA!\n", afu->pp_size);
+
+	for (i = 0; i < afu->crs_num; i++) {
+		rc = cxl_ops->afu_cr_read32(afu, i, 0, &val);
+		if (rc || val == 0) {
+			dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i);
+			return -EINVAL;
+		}
+	}
+
+	if ((afu->modes_supported & ~CXL_MODE_DEDICATED) && afu->max_procs_virtualised == 0) {
+		/*
+		 * We could also check this for the dedicated process model
+		 * since the architecture indicates it should be set to 1, but
+		 * in that case we ignore the value and I'd rather not risk
+		 * breaking any existing dedicated process AFUs that left it as
+		 * 0 (not that I'm aware of any). It is clearly an error for an
+		 * AFU directed AFU to set this to 0, and would have previously
+		 * triggered a bug resulting in the maximum not being enforced
+		 * at all since idr_alloc treats 0 as no maximum.
+		 */
+		dev_err(&afu->dev, "AFU does not support any processes\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int sanitise_afu_regs_psl9(struct cxl_afu *afu)
+{
+	u64 reg;
+
+	/*
+	 * Clear out any regs that contain either an IVTE or address or may be
+	 * waiting on an acknowledgment to try to be a bit safer as we bring
+	 * it online
+	 */
+	reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
+		dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg);
+		if (cxl_ops->afu_reset(afu))
+			return -EIO;
+		if (cxl_afu_disable(afu))
+			return -EIO;
+		if (cxl_psl_purge(afu))
+			return -EIO;
+	}
+	cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000);
+	reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	if (reg) {
+		dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg);
+		if (reg & CXL_PSL9_DSISR_An_TF)
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+		else
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+	}
+	if (afu->adapter->native->sl_ops->register_serr_irq) {
+		reg = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+		if (reg) {
+			if (reg & ~0x000000007fffffff)
+				dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg);
+			cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff);
+		}
+	}
+	reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+	if (reg) {
+		dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg);
+		cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg);
+	}
+
+	return 0;
+}
+
+static int sanitise_afu_regs_psl8(struct cxl_afu *afu)
+{
+	u64 reg;
+
+	/*
+	 * Clear out any regs that contain either an IVTE or address or may be
+	 * waiting on an acknowledgement to try to be a bit safer as we bring
+	 * it online
+	 */
+	reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
+		dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg);
+		if (cxl_ops->afu_reset(afu))
+			return -EIO;
+		if (cxl_afu_disable(afu))
+			return -EIO;
+		if (cxl_psl_purge(afu))
+			return -EIO;
+	}
+	cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_PSL_SPOffset_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_HAURP_An, 0x0000000000000000);
+	cxl_p2n_write(afu, CXL_CSRP_An, 0x0000000000000000);
+	cxl_p2n_write(afu, CXL_AURP1_An, 0x0000000000000000);
+	cxl_p2n_write(afu, CXL_AURP0_An, 0x0000000000000000);
+	cxl_p2n_write(afu, CXL_SSTP1_An, 0x0000000000000000);
+	cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000);
+	reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	if (reg) {
+		dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg);
+		if (reg & CXL_PSL_DSISR_TRANS)
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+		else
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+	}
+	if (afu->adapter->native->sl_ops->register_serr_irq) {
+		reg = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+		if (reg) {
+			if (reg & ~0xffff)
+				dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg);
+			cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff);
+		}
+	}
+	reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+	if (reg) {
+		dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg);
+		cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg);
+	}
+
+	return 0;
+}
+
+#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE
+/*
+ * afu_eb_read:
+ * Called from sysfs and reads the afu error info buffer. The h/w only supports
+ * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte
+ * aligned the function uses a bounce buffer which can be max PAGE_SIZE.
+ */
+ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+				loff_t off, size_t count)
+{
+	loff_t aligned_start, aligned_end;
+	size_t aligned_length;
+	void *tbuf;
+	const void __iomem *ebuf = afu->native->afu_desc_mmio + afu->eb_offset;
+
+	if (count == 0 || off < 0 || (size_t)off >= afu->eb_len)
+		return 0;
+
+	/* calculate aligned read window */
+	count = min((size_t)(afu->eb_len - off), count);
+	aligned_start = round_down(off, 8);
+	aligned_end = round_up(off + count, 8);
+	aligned_length = aligned_end - aligned_start;
+
+	/* max we can copy in one read is PAGE_SIZE */
+	if (aligned_length > ERR_BUFF_MAX_COPY_SIZE) {
+		aligned_length = ERR_BUFF_MAX_COPY_SIZE;
+		count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7);
+	}
+
+	/* use bounce buffer for copy */
+	tbuf = (void *)__get_free_page(GFP_KERNEL);
+	if (!tbuf)
+		return -ENOMEM;
+
+	/* perform aligned read from the mmio region */
+	memcpy_fromio(tbuf, ebuf + aligned_start, aligned_length);
+	memcpy(buf, tbuf + (off & 0x7), count);
+
+	free_page((unsigned long)tbuf);
+
+	return count;
+}
+
+static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
+{
+	int rc;
+
+	if ((rc = pci_map_slice_regs(afu, adapter, dev)))
+		return rc;
+
+	if (adapter->native->sl_ops->sanitise_afu_regs) {
+		rc = adapter->native->sl_ops->sanitise_afu_regs(afu);
+		if (rc)
+			goto err1;
+	}
+
+	/* We need to reset the AFU before we can read the AFU descriptor */
+	if ((rc = cxl_ops->afu_reset(afu)))
+		goto err1;
+
+	if (cxl_verbose)
+		dump_afu_descriptor(afu);
+
+	if ((rc = cxl_read_afu_descriptor(afu)))
+		goto err1;
+
+	if ((rc = cxl_afu_descriptor_looks_ok(afu)))
+		goto err1;
+
+	if (adapter->native->sl_ops->afu_regs_init)
+		if ((rc = adapter->native->sl_ops->afu_regs_init(afu)))
+			goto err1;
+
+	if (adapter->native->sl_ops->register_serr_irq)
+		if ((rc = adapter->native->sl_ops->register_serr_irq(afu)))
+			goto err1;
+
+	if ((rc = cxl_native_register_psl_irq(afu)))
+		goto err2;
+
+	atomic_set(&afu->configured_state, 0);
+	return 0;
+
+err2:
+	if (adapter->native->sl_ops->release_serr_irq)
+		adapter->native->sl_ops->release_serr_irq(afu);
+err1:
+	pci_unmap_slice_regs(afu);
+	return rc;
+}
+
+static void pci_deconfigure_afu(struct cxl_afu *afu)
+{
+	/*
+	 * It's okay to deconfigure when AFU is already locked, otherwise wait
+	 * until there are no readers
+	 */
+	if (atomic_read(&afu->configured_state) != -1) {
+		while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1)
+			schedule();
+	}
+	cxl_native_release_psl_irq(afu);
+	if (afu->adapter->native->sl_ops->release_serr_irq)
+		afu->adapter->native->sl_ops->release_serr_irq(afu);
+	pci_unmap_slice_regs(afu);
+}
+
+static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
+{
+	struct cxl_afu *afu;
+	int rc = -ENOMEM;
+
+	afu = cxl_alloc_afu(adapter, slice);
+	if (!afu)
+		return -ENOMEM;
+
+	afu->native = kzalloc(sizeof(struct cxl_afu_native), GFP_KERNEL);
+	if (!afu->native)
+		goto err_free_afu;
+
+	mutex_init(&afu->native->spa_mutex);
+
+	rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice);
+	if (rc)
+		goto err_free_native;
+
+	rc = pci_configure_afu(afu, adapter, dev);
+	if (rc)
+		goto err_free_native;
+
+	/* Don't care if this fails */
+	cxl_debugfs_afu_add(afu);
+
+	/*
+	 * After we call this function we must not free the afu directly, even
+	 * if it returns an error!
+	 */
+	if ((rc = cxl_register_afu(afu)))
+		goto err_put1;
+
+	if ((rc = cxl_sysfs_afu_add(afu)))
+		goto err_put1;
+
+	adapter->afu[afu->slice] = afu;
+
+	if ((rc = cxl_pci_vphb_add(afu)))
+		dev_info(&afu->dev, "Can't register vPHB\n");
+
+	return 0;
+
+err_put1:
+	pci_deconfigure_afu(afu);
+	cxl_debugfs_afu_remove(afu);
+	device_unregister(&afu->dev);
+	return rc;
+
+err_free_native:
+	kfree(afu->native);
+err_free_afu:
+	kfree(afu);
+	return rc;
+
+}
+
+static void cxl_pci_remove_afu(struct cxl_afu *afu)
+{
+	pr_devel("%s\n", __func__);
+
+	if (!afu)
+		return;
+
+	cxl_pci_vphb_remove(afu);
+	cxl_sysfs_afu_remove(afu);
+	cxl_debugfs_afu_remove(afu);
+
+	spin_lock(&afu->adapter->afu_list_lock);
+	afu->adapter->afu[afu->slice] = NULL;
+	spin_unlock(&afu->adapter->afu_list_lock);
+
+	cxl_context_detach_all(afu);
+	cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
+
+	pci_deconfigure_afu(afu);
+	device_unregister(&afu->dev);
+}
+
+int cxl_pci_reset(struct cxl *adapter)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+	int rc;
+
+	if (adapter->perst_same_image) {
+		dev_warn(&dev->dev,
+			 "cxl: refusing to reset/reflash when perst_reloads_same_image is set.\n");
+		return -EINVAL;
+	}
+
+	dev_info(&dev->dev, "CXL reset\n");
+
+	/*
+	 * The adapter is about to be reset, so ignore errors.
+	 */
+	cxl_data_cache_flush(adapter);
+
+	/* pcie_warm_reset requests a fundamental pci reset which includes a
+	 * PERST assert/deassert.  PERST triggers a loading of the image
+	 * if "user" or "factory" is selected in sysfs */
+	if ((rc = pci_set_pcie_reset_state(dev, pcie_warm_reset))) {
+		dev_err(&dev->dev, "cxl: pcie_warm_reset failed\n");
+		return rc;
+	}
+
+	return rc;
+}
+
+static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev)
+{
+	if (pci_request_region(dev, 2, "priv 2 regs"))
+		goto err1;
+	if (pci_request_region(dev, 0, "priv 1 regs"))
+		goto err2;
+
+	pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx",
+			p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev));
+
+	if (!(adapter->native->p1_mmio = ioremap(p1_base(dev), p1_size(dev))))
+		goto err3;
+
+	if (!(adapter->native->p2_mmio = ioremap(p2_base(dev), p2_size(dev))))
+		goto err4;
+
+	return 0;
+
+err4:
+	iounmap(adapter->native->p1_mmio);
+	adapter->native->p1_mmio = NULL;
+err3:
+	pci_release_region(dev, 0);
+err2:
+	pci_release_region(dev, 2);
+err1:
+	return -ENOMEM;
+}
+
+static void cxl_unmap_adapter_regs(struct cxl *adapter)
+{
+	if (adapter->native->p1_mmio) {
+		iounmap(adapter->native->p1_mmio);
+		adapter->native->p1_mmio = NULL;
+		pci_release_region(to_pci_dev(adapter->dev.parent), 2);
+	}
+	if (adapter->native->p2_mmio) {
+		iounmap(adapter->native->p2_mmio);
+		adapter->native->p2_mmio = NULL;
+		pci_release_region(to_pci_dev(adapter->dev.parent), 0);
+	}
+}
+
+static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev)
+{
+	int vsec;
+	u32 afu_desc_off, afu_desc_size;
+	u32 ps_off, ps_size;
+	u16 vseclen;
+	u8 image_state;
+
+	if (!(vsec = find_cxl_vsec(dev))) {
+		dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
+		return -ENODEV;
+	}
+
+	CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen);
+	if (vseclen < CXL_VSEC_MIN_SIZE) {
+		dev_err(&dev->dev, "ABORTING: CXL VSEC too short\n");
+		return -EINVAL;
+	}
+
+	CXL_READ_VSEC_STATUS(dev, vsec, &adapter->vsec_status);
+	CXL_READ_VSEC_PSL_REVISION(dev, vsec, &adapter->psl_rev);
+	CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, &adapter->caia_major);
+	CXL_READ_VSEC_CAIA_MINOR(dev, vsec, &adapter->caia_minor);
+	CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image);
+	CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state);
+	adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED);
+	adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED);
+	adapter->perst_loads_image = !!(image_state & CXL_VSEC_PERST_LOADS_IMAGE);
+
+	CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices);
+	CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, &afu_desc_off);
+	CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, &afu_desc_size);
+	CXL_READ_VSEC_PS_OFF(dev, vsec, &ps_off);
+	CXL_READ_VSEC_PS_SIZE(dev, vsec, &ps_size);
+
+	/* Convert everything to bytes, because there is NO WAY I'd look at the
+	 * code a month later and forget what units these are in ;-) */
+	adapter->native->ps_off = ps_off * 64 * 1024;
+	adapter->ps_size = ps_size * 64 * 1024;
+	adapter->native->afu_desc_off = afu_desc_off * 64 * 1024;
+	adapter->native->afu_desc_size = afu_desc_size * 64 * 1024;
+
+	/* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */
+	adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices;
+
+	return 0;
+}
+
+/*
+ * Workaround a PCIe Host Bridge defect on some cards, that can cause
+ * malformed Transaction Layer Packet (TLP) errors to be erroneously
+ * reported. Mask this error in the Uncorrectable Error Mask Register.
+ *
+ * The upper nibble of the PSL revision is used to distinguish between
+ * different cards. The affected ones have it set to 0.
+ */
+static void cxl_fixup_malformed_tlp(struct cxl *adapter, struct pci_dev *dev)
+{
+	int aer;
+	u32 data;
+
+	if (adapter->psl_rev & 0xf000)
+		return;
+	if (!(aer = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)))
+		return;
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &data);
+	if (data & PCI_ERR_UNC_MALF_TLP)
+		if (data & PCI_ERR_UNC_INTN)
+			return;
+	data |= PCI_ERR_UNC_MALF_TLP;
+	data |= PCI_ERR_UNC_INTN;
+	pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, data);
+}
+
+static bool cxl_compatible_caia_version(struct cxl *adapter)
+{
+	if (cxl_is_power8() && (adapter->caia_major == 1))
+		return true;
+
+	if (cxl_is_power9() && (adapter->caia_major == 2))
+		return true;
+
+	return false;
+}
+
+static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev)
+{
+	if (adapter->vsec_status & CXL_STATUS_SECOND_PORT)
+		return -EBUSY;
+
+	if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) {
+		dev_err(&dev->dev, "ABORTING: CXL requires unsupported features\n");
+		return -EINVAL;
+	}
+
+	if (!cxl_compatible_caia_version(adapter)) {
+		dev_info(&dev->dev, "Ignoring card. PSL type is not supported (caia version: %d)\n",
+			 adapter->caia_major);
+		return -ENODEV;
+	}
+
+	if (!adapter->slices) {
+		/* Once we support dynamic reprogramming we can use the card if
+		 * it supports loadable AFUs */
+		dev_err(&dev->dev, "ABORTING: Device has no AFUs\n");
+		return -EINVAL;
+	}
+
+	if (!adapter->native->afu_desc_off || !adapter->native->afu_desc_size) {
+		dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n");
+		return -EINVAL;
+	}
+
+	if (adapter->ps_size > p2_size(dev) - adapter->native->ps_off) {
+		dev_err(&dev->dev, "ABORTING: Problem state size larger than "
+				   "available in BAR2: 0x%llx > 0x%llx\n",
+			 adapter->ps_size, p2_size(dev) - adapter->native->ps_off);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len)
+{
+	return pci_read_vpd(to_pci_dev(adapter->dev.parent), 0, len, buf);
+}
+
+static void cxl_release_adapter(struct device *dev)
+{
+	struct cxl *adapter = to_cxl_adapter(dev);
+
+	pr_devel("cxl_release_adapter\n");
+
+	cxl_remove_adapter_nr(adapter);
+
+	kfree(adapter->native);
+	kfree(adapter);
+}
+
+#define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31))
+
+static int sanitise_adapter_regs(struct cxl *adapter)
+{
+	int rc = 0;
+
+	/* Clear PSL tberror bit by writing 1 to it */
+	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror);
+
+	if (adapter->native->sl_ops->invalidate_all) {
+		/* do not invalidate ERAT entries when not reloading on PERST */
+		if (cxl_is_power9() && (adapter->perst_loads_image))
+			return 0;
+		rc = adapter->native->sl_ops->invalidate_all(adapter);
+	}
+
+	return rc;
+}
+
+/* This should contain *only* operations that can safely be done in
+ * both creation and recovery.
+ */
+static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
+{
+	int rc;
+
+	adapter->dev.parent = &dev->dev;
+	adapter->dev.release = cxl_release_adapter;
+	pci_set_drvdata(dev, adapter);
+
+	rc = pci_enable_device(dev);
+	if (rc) {
+		dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc);
+		return rc;
+	}
+
+	if ((rc = cxl_read_vsec(adapter, dev)))
+		return rc;
+
+	if ((rc = cxl_vsec_looks_ok(adapter, dev)))
+	        return rc;
+
+	cxl_fixup_malformed_tlp(adapter, dev);
+
+	if ((rc = setup_cxl_bars(dev)))
+		return rc;
+
+	if ((rc = switch_card_to_cxl(dev)))
+		return rc;
+
+	if ((rc = cxl_update_image_control(adapter)))
+		return rc;
+
+	if ((rc = cxl_map_adapter_regs(adapter, dev)))
+		return rc;
+
+	if ((rc = sanitise_adapter_regs(adapter)))
+		goto err;
+
+	if ((rc = adapter->native->sl_ops->adapter_regs_init(adapter, dev)))
+		goto err;
+
+	/* Required for devices using CAPP DMA mode, harmless for others */
+	pci_set_master(dev);
+
+	adapter->tunneled_ops_supported = false;
+
+	if (cxl_is_power9()) {
+		if (pnv_pci_set_tunnel_bar(dev, 0x00020000E0000000ull, 1))
+			dev_info(&dev->dev, "Tunneled operations unsupported\n");
+		else
+			adapter->tunneled_ops_supported = true;
+	}
+
+	if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
+		goto err;
+
+	/* If recovery happened, the last step is to turn on snooping.
+	 * In the non-recovery case this has no effect */
+	if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON)))
+		goto err;
+
+	/* Ignore error, adapter init is not dependant on timebase sync */
+	cxl_setup_psl_timebase(adapter, dev);
+
+	if ((rc = cxl_native_register_psl_err_irq(adapter)))
+		goto err;
+
+	return 0;
+
+err:
+	cxl_unmap_adapter_regs(adapter);
+	return rc;
+
+}
+
+static void cxl_deconfigure_adapter(struct cxl *adapter)
+{
+	struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
+
+	if (cxl_is_power9())
+		pnv_pci_set_tunnel_bar(pdev, 0x00020000E0000000ull, 0);
+
+	cxl_native_release_psl_err_irq(adapter);
+	cxl_unmap_adapter_regs(adapter);
+
+	pci_disable_device(pdev);
+}
+
+static void cxl_stop_trace_psl9(struct cxl *adapter)
+{
+	int traceid;
+	u64 trace_state, trace_mask;
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	/* read each tracearray state and issue mmio to stop them is needed */
+	for (traceid = 0; traceid <= CXL_PSL9_TRACEID_MAX; ++traceid) {
+		trace_state = cxl_p1_read(adapter, CXL_PSL9_CTCCFG);
+		trace_mask = (0x3ULL << (62 - traceid * 2));
+		trace_state = (trace_state & trace_mask) >> (62 - traceid * 2);
+		dev_dbg(&dev->dev, "cxl: Traceid-%d trace_state=0x%0llX\n",
+			traceid, trace_state);
+
+		/* issue mmio if the trace array isn't in FIN state */
+		if (trace_state != CXL_PSL9_TRACESTATE_FIN)
+			cxl_p1_write(adapter, CXL_PSL9_TRACECFG,
+				     0x8400000000000000ULL | traceid);
+	}
+}
+
+static void cxl_stop_trace_psl8(struct cxl *adapter)
+{
+	int slice;
+
+	/* Stop the trace */
+	cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL);
+
+	/* Stop the slice traces */
+	spin_lock(&adapter->afu_list_lock);
+	for (slice = 0; slice < adapter->slices; slice++) {
+		if (adapter->afu[slice])
+			cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE,
+				      0x8000000000000000LL);
+	}
+	spin_unlock(&adapter->afu_list_lock);
+}
+
+static const struct cxl_service_layer_ops psl9_ops = {
+	.adapter_regs_init = init_implementation_adapter_regs_psl9,
+	.invalidate_all = cxl_invalidate_all_psl9,
+	.afu_regs_init = init_implementation_afu_regs_psl9,
+	.sanitise_afu_regs = sanitise_afu_regs_psl9,
+	.register_serr_irq = cxl_native_register_serr_irq,
+	.release_serr_irq = cxl_native_release_serr_irq,
+	.handle_interrupt = cxl_irq_psl9,
+	.fail_irq = cxl_fail_irq_psl,
+	.activate_dedicated_process = cxl_activate_dedicated_process_psl9,
+	.attach_afu_directed = cxl_attach_afu_directed_psl9,
+	.attach_dedicated_process = cxl_attach_dedicated_process_psl9,
+	.update_dedicated_ivtes = cxl_update_dedicated_ivtes_psl9,
+	.debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9,
+	.debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9,
+	.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
+	.err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl9,
+	.debugfs_stop_trace = cxl_stop_trace_psl9,
+	.timebase_read = timebase_read_psl9,
+	.capi_mode = OPAL_PHB_CAPI_MODE_CAPI,
+	.needs_reset_before_disable = true,
+};
+
+static const struct cxl_service_layer_ops psl8_ops = {
+	.adapter_regs_init = init_implementation_adapter_regs_psl8,
+	.invalidate_all = cxl_invalidate_all_psl8,
+	.afu_regs_init = init_implementation_afu_regs_psl8,
+	.sanitise_afu_regs = sanitise_afu_regs_psl8,
+	.register_serr_irq = cxl_native_register_serr_irq,
+	.release_serr_irq = cxl_native_release_serr_irq,
+	.handle_interrupt = cxl_irq_psl8,
+	.fail_irq = cxl_fail_irq_psl,
+	.activate_dedicated_process = cxl_activate_dedicated_process_psl8,
+	.attach_afu_directed = cxl_attach_afu_directed_psl8,
+	.attach_dedicated_process = cxl_attach_dedicated_process_psl8,
+	.update_dedicated_ivtes = cxl_update_dedicated_ivtes_psl8,
+	.debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl8,
+	.debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl8,
+	.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl8,
+	.err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl8,
+	.debugfs_stop_trace = cxl_stop_trace_psl8,
+	.write_timebase_ctrl = write_timebase_ctrl_psl8,
+	.timebase_read = timebase_read_psl8,
+	.capi_mode = OPAL_PHB_CAPI_MODE_CAPI,
+	.needs_reset_before_disable = true,
+};
+
+static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev)
+{
+	if (cxl_is_power8()) {
+		dev_info(&dev->dev, "Device uses a PSL8\n");
+		adapter->native->sl_ops = &psl8_ops;
+	} else {
+		dev_info(&dev->dev, "Device uses a PSL9\n");
+		adapter->native->sl_ops = &psl9_ops;
+	}
+}
+
+
+static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev)
+{
+	struct cxl *adapter;
+	int rc;
+
+	adapter = cxl_alloc_adapter();
+	if (!adapter)
+		return ERR_PTR(-ENOMEM);
+
+	adapter->native = kzalloc(sizeof(struct cxl_native), GFP_KERNEL);
+	if (!adapter->native) {
+		rc = -ENOMEM;
+		goto err_release;
+	}
+
+	set_sl_ops(adapter, dev);
+
+	/* Set defaults for parameters which need to persist over
+	 * configure/reconfigure
+	 */
+	adapter->perst_loads_image = true;
+	adapter->perst_same_image = false;
+
+	rc = cxl_configure_adapter(adapter, dev);
+	if (rc) {
+		pci_disable_device(dev);
+		goto err_release;
+	}
+
+	/* Don't care if this one fails: */
+	cxl_debugfs_adapter_add(adapter);
+
+	/*
+	 * After we call this function we must not free the adapter directly,
+	 * even if it returns an error!
+	 */
+	if ((rc = cxl_register_adapter(adapter)))
+		goto err_put1;
+
+	if ((rc = cxl_sysfs_adapter_add(adapter)))
+		goto err_put1;
+
+	/* Release the context lock as adapter is configured */
+	cxl_adapter_context_unlock(adapter);
+
+	return adapter;
+
+err_put1:
+	/* This should mirror cxl_remove_adapter, except without the
+	 * sysfs parts
+	 */
+	cxl_debugfs_adapter_remove(adapter);
+	cxl_deconfigure_adapter(adapter);
+	device_unregister(&adapter->dev);
+	return ERR_PTR(rc);
+
+err_release:
+	cxl_release_adapter(&adapter->dev);
+	return ERR_PTR(rc);
+}
+
+static void cxl_pci_remove_adapter(struct cxl *adapter)
+{
+	pr_devel("cxl_remove_adapter\n");
+
+	cxl_sysfs_adapter_remove(adapter);
+	cxl_debugfs_adapter_remove(adapter);
+
+	/*
+	 * Flush adapter datacache as its about to be removed.
+	 */
+	cxl_data_cache_flush(adapter);
+
+	cxl_deconfigure_adapter(adapter);
+
+	device_unregister(&adapter->dev);
+}
+
+#define CXL_MAX_PCIEX_PARENT 2
+
+int cxl_slot_is_switched(struct pci_dev *dev)
+{
+	struct device_node *np;
+	int depth = 0;
+	const __be32 *prop;
+
+	if (!(np = pci_device_to_OF_node(dev))) {
+		pr_err("cxl: np = NULL\n");
+		return -ENODEV;
+	}
+	of_node_get(np);
+	while (np) {
+		np = of_get_next_parent(np);
+		prop = of_get_property(np, "device_type", NULL);
+		if (!prop || strcmp((char *)prop, "pciex"))
+			break;
+		depth++;
+	}
+	of_node_put(np);
+	return (depth > CXL_MAX_PCIEX_PARENT);
+}
+
+static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	struct cxl *adapter;
+	int slice;
+	int rc;
+
+	if (cxl_pci_is_vphb_device(dev)) {
+		dev_dbg(&dev->dev, "cxl_init_adapter: Ignoring cxl vphb device\n");
+		return -ENODEV;
+	}
+
+	if (cxl_slot_is_switched(dev)) {
+		dev_info(&dev->dev, "Ignoring card on incompatible PCI slot\n");
+		return -ENODEV;
+	}
+
+	if (cxl_is_power9() && !radix_enabled()) {
+		dev_info(&dev->dev, "Only Radix mode supported\n");
+		return -ENODEV;
+	}
+
+	if (cxl_verbose)
+		dump_cxl_config_space(dev);
+
+	adapter = cxl_pci_init_adapter(dev);
+	if (IS_ERR(adapter)) {
+		dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter));
+		return PTR_ERR(adapter);
+	}
+
+	for (slice = 0; slice < adapter->slices; slice++) {
+		if ((rc = pci_init_afu(adapter, slice, dev))) {
+			dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc);
+			continue;
+		}
+
+		rc = cxl_afu_select_best_mode(adapter->afu[slice]);
+		if (rc)
+			dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc);
+	}
+
+	return 0;
+}
+
+static void cxl_remove(struct pci_dev *dev)
+{
+	struct cxl *adapter = pci_get_drvdata(dev);
+	struct cxl_afu *afu;
+	int i;
+
+	/*
+	 * Lock to prevent someone grabbing a ref through the adapter list as
+	 * we are removing it
+	 */
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+		cxl_pci_remove_afu(afu);
+	}
+	cxl_pci_remove_adapter(adapter);
+}
+
+static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu,
+						pci_channel_state_t state)
+{
+	struct pci_dev *afu_dev;
+	pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
+	pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
+
+	/* There should only be one entry, but go through the list
+	 * anyway
+	 */
+	if (afu == NULL || afu->phb == NULL)
+		return result;
+
+	list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+		if (!afu_dev->driver)
+			continue;
+
+		afu_dev->error_state = state;
+
+		if (afu_dev->driver->err_handler)
+			afu_result = afu_dev->driver->err_handler->error_detected(afu_dev,
+										  state);
+		/* Disconnect trumps all, NONE trumps NEED_RESET */
+		if (afu_result == PCI_ERS_RESULT_DISCONNECT)
+			result = PCI_ERS_RESULT_DISCONNECT;
+		else if ((afu_result == PCI_ERS_RESULT_NONE) &&
+			 (result == PCI_ERS_RESULT_NEED_RESET))
+			result = PCI_ERS_RESULT_NONE;
+	}
+	return result;
+}
+
+static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
+					       pci_channel_state_t state)
+{
+	struct cxl *adapter = pci_get_drvdata(pdev);
+	struct cxl_afu *afu;
+	pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
+	pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
+	int i;
+
+	/* At this point, we could still have an interrupt pending.
+	 * Let's try to get them out of the way before they do
+	 * anything we don't like.
+	 */
+	schedule();
+
+	/* If we're permanently dead, give up. */
+	if (state == pci_channel_io_perm_failure) {
+		spin_lock(&adapter->afu_list_lock);
+		for (i = 0; i < adapter->slices; i++) {
+			afu = adapter->afu[i];
+			/*
+			 * Tell the AFU drivers; but we don't care what they
+			 * say, we're going away.
+			 */
+			cxl_vphb_error_detected(afu, state);
+		}
+		spin_unlock(&adapter->afu_list_lock);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	/* Are we reflashing?
+	 *
+	 * If we reflash, we could come back as something entirely
+	 * different, including a non-CAPI card. As such, by default
+	 * we don't participate in the process. We'll be unbound and
+	 * the slot re-probed. (TODO: check EEH doesn't blindly rebind
+	 * us!)
+	 *
+	 * However, this isn't the entire story: for reliablity
+	 * reasons, we usually want to reflash the FPGA on PERST in
+	 * order to get back to a more reliable known-good state.
+	 *
+	 * This causes us a bit of a problem: if we reflash we can't
+	 * trust that we'll come back the same - we could have a new
+	 * image and been PERSTed in order to load that
+	 * image. However, most of the time we actually *will* come
+	 * back the same - for example a regular EEH event.
+	 *
+	 * Therefore, we allow the user to assert that the image is
+	 * indeed the same and that we should continue on into EEH
+	 * anyway.
+	 */
+	if (adapter->perst_loads_image && !adapter->perst_same_image) {
+		/* TODO take the PHB out of CXL mode */
+		dev_info(&pdev->dev, "reflashing, so opting out of EEH!\n");
+		return PCI_ERS_RESULT_NONE;
+	}
+
+	/*
+	 * At this point, we want to try to recover.  We'll always
+	 * need a complete slot reset: we don't trust any other reset.
+	 *
+	 * Now, we go through each AFU:
+	 *  - We send the driver, if bound, an error_detected callback.
+	 *    We expect it to clean up, but it can also tell us to give
+	 *    up and permanently detach the card. To simplify things, if
+	 *    any bound AFU driver doesn't support EEH, we give up on EEH.
+	 *
+	 *  - We detach all contexts associated with the AFU. This
+	 *    does not free them, but puts them into a CLOSED state
+	 *    which causes any the associated files to return useful
+	 *    errors to userland. It also unmaps, but does not free,
+	 *    any IRQs.
+	 *
+	 *  - We clean up our side: releasing and unmapping resources we hold
+	 *    so we can wire them up again when the hardware comes back up.
+	 *
+	 * Driver authors should note:
+	 *
+	 *  - Any contexts you create in your kernel driver (except
+	 *    those associated with anonymous file descriptors) are
+	 *    your responsibility to free and recreate. Likewise with
+	 *    any attached resources.
+	 *
+	 *  - We will take responsibility for re-initialising the
+	 *    device context (the one set up for you in
+	 *    cxl_pci_enable_device_hook and accessed through
+	 *    cxl_get_context). If you've attached IRQs or other
+	 *    resources to it, they remains yours to free.
+	 *
+	 * You can call the same functions to release resources as you
+	 * normally would: we make sure that these functions continue
+	 * to work when the hardware is down.
+	 *
+	 * Two examples:
+	 *
+	 * 1) If you normally free all your resources at the end of
+	 *    each request, or if you use anonymous FDs, your
+	 *    error_detected callback can simply set a flag to tell
+	 *    your driver not to start any new calls. You can then
+	 *    clear the flag in the resume callback.
+	 *
+	 * 2) If you normally allocate your resources on startup:
+	 *     * Set a flag in error_detected as above.
+	 *     * Let CXL detach your contexts.
+	 *     * In slot_reset, free the old resources and allocate new ones.
+	 *     * In resume, clear the flag to allow things to start.
+	 */
+
+	/* Make sure no one else changes the afu list */
+	spin_lock(&adapter->afu_list_lock);
+
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+
+		if (afu == NULL)
+			continue;
+
+		afu_result = cxl_vphb_error_detected(afu, state);
+		cxl_context_detach_all(afu);
+		cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
+		pci_deconfigure_afu(afu);
+
+		/* Disconnect trumps all, NONE trumps NEED_RESET */
+		if (afu_result == PCI_ERS_RESULT_DISCONNECT)
+			result = PCI_ERS_RESULT_DISCONNECT;
+		else if ((afu_result == PCI_ERS_RESULT_NONE) &&
+			 (result == PCI_ERS_RESULT_NEED_RESET))
+			result = PCI_ERS_RESULT_NONE;
+	}
+	spin_unlock(&adapter->afu_list_lock);
+
+	/* should take the context lock here */
+	if (cxl_adapter_context_lock(adapter) != 0)
+		dev_warn(&adapter->dev,
+			 "Couldn't take context lock with %d active-contexts\n",
+			 atomic_read(&adapter->contexts_num));
+
+	cxl_deconfigure_adapter(adapter);
+
+	return result;
+}
+
+static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
+{
+	struct cxl *adapter = pci_get_drvdata(pdev);
+	struct cxl_afu *afu;
+	struct cxl_context *ctx;
+	struct pci_dev *afu_dev;
+	pci_ers_result_t afu_result = PCI_ERS_RESULT_RECOVERED;
+	pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
+	int i;
+
+	if (cxl_configure_adapter(adapter, pdev))
+		goto err;
+
+	/*
+	 * Unlock context activation for the adapter. Ideally this should be
+	 * done in cxl_pci_resume but cxlflash module tries to activate the
+	 * master context as part of slot_reset callback.
+	 */
+	cxl_adapter_context_unlock(adapter);
+
+	spin_lock(&adapter->afu_list_lock);
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+
+		if (afu == NULL)
+			continue;
+
+		if (pci_configure_afu(afu, adapter, pdev))
+			goto err_unlock;
+
+		if (cxl_afu_select_best_mode(afu))
+			goto err_unlock;
+
+		if (afu->phb == NULL)
+			continue;
+
+		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+			/* Reset the device context.
+			 * TODO: make this less disruptive
+			 */
+			ctx = cxl_get_context(afu_dev);
+
+			if (ctx && cxl_release_context(ctx))
+				goto err_unlock;
+
+			ctx = cxl_dev_context_init(afu_dev);
+			if (IS_ERR(ctx))
+				goto err_unlock;
+
+			afu_dev->dev.archdata.cxl_ctx = ctx;
+
+			if (cxl_ops->afu_check_and_enable(afu))
+				goto err_unlock;
+
+			afu_dev->error_state = pci_channel_io_normal;
+
+			/* If there's a driver attached, allow it to
+			 * chime in on recovery. Drivers should check
+			 * if everything has come back OK, but
+			 * shouldn't start new work until we call
+			 * their resume function.
+			 */
+			if (!afu_dev->driver)
+				continue;
+
+			if (afu_dev->driver->err_handler &&
+			    afu_dev->driver->err_handler->slot_reset)
+				afu_result = afu_dev->driver->err_handler->slot_reset(afu_dev);
+
+			if (afu_result == PCI_ERS_RESULT_DISCONNECT)
+				result = PCI_ERS_RESULT_DISCONNECT;
+		}
+	}
+
+	spin_unlock(&adapter->afu_list_lock);
+	return result;
+
+err_unlock:
+	spin_unlock(&adapter->afu_list_lock);
+
+err:
+	/* All the bits that happen in both error_detected and cxl_remove
+	 * should be idempotent, so we don't need to worry about leaving a mix
+	 * of unconfigured and reconfigured resources.
+	 */
+	dev_err(&pdev->dev, "EEH recovery failed. Asking to be disconnected.\n");
+	return PCI_ERS_RESULT_DISCONNECT;
+}
+
+static void cxl_pci_resume(struct pci_dev *pdev)
+{
+	struct cxl *adapter = pci_get_drvdata(pdev);
+	struct cxl_afu *afu;
+	struct pci_dev *afu_dev;
+	int i;
+
+	/* Everything is back now. Drivers should restart work now.
+	 * This is not the place to be checking if everything came back up
+	 * properly, because there's no return value: do that in slot_reset.
+	 */
+	spin_lock(&adapter->afu_list_lock);
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+
+		if (afu == NULL || afu->phb == NULL)
+			continue;
+
+		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+			if (afu_dev->driver && afu_dev->driver->err_handler &&
+			    afu_dev->driver->err_handler->resume)
+				afu_dev->driver->err_handler->resume(afu_dev);
+		}
+	}
+	spin_unlock(&adapter->afu_list_lock);
+}
+
+static const struct pci_error_handlers cxl_err_handler = {
+	.error_detected = cxl_pci_error_detected,
+	.slot_reset = cxl_pci_slot_reset,
+	.resume = cxl_pci_resume,
+};
+
+struct pci_driver cxl_pci_driver = {
+	.name = "cxl-pci",
+	.id_table = cxl_pci_tbl,
+	.probe = cxl_probe,
+	.remove = cxl_remove,
+	.shutdown = cxl_remove,
+	.err_handler = &cxl_err_handler,
+};
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
new file mode 100644
index 000000000..0baa229d2
--- /dev/null
+++ b/drivers/misc/cxl/sysfs.c
@@ -0,0 +1,774 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/pci_regs.h>
+
+#include "cxl.h"
+
+#define to_afu_chardev_m(d) dev_get_drvdata(d)
+
+/*********  Adapter attributes  **********************************************/
+
+static ssize_t caia_version_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i.%i\n", adapter->caia_major,
+			 adapter->caia_minor);
+}
+
+static ssize_t psl_revision_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_rev);
+}
+
+static ssize_t base_image_show(struct device *device,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->base_image);
+}
+
+static ssize_t image_loaded_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	if (adapter->user_image_loaded)
+		return scnprintf(buf, PAGE_SIZE, "user\n");
+	return scnprintf(buf, PAGE_SIZE, "factory\n");
+}
+
+static ssize_t psl_timebase_synced_show(struct device *device,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+	u64 psl_tb, delta;
+
+	/* Recompute the status only in native mode */
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		psl_tb = adapter->native->sl_ops->timebase_read(adapter);
+		delta = abs(mftb() - psl_tb);
+
+		/* CORE TB and PSL TB difference <= 16usecs ? */
+		adapter->psl_timebase_synced = (tb_to_ns(delta) < 16000) ? true : false;
+		pr_devel("PSL timebase %s - delta: 0x%016llx\n",
+			 (tb_to_ns(delta) < 16000) ? "synchronized" :
+			 "not synchronized", tb_to_ns(delta));
+	}
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
+}
+
+static ssize_t tunneled_ops_supported_show(struct device *device,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->tunneled_ops_supported);
+}
+
+static ssize_t reset_adapter_store(struct device *device,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+	int rc;
+	int val;
+
+	rc = sscanf(buf, "%i", &val);
+	if ((rc != 1) || (val != 1 && val != -1))
+		return -EINVAL;
+
+	/*
+	 * See if we can lock the context mapping that's only allowed
+	 * when there are no contexts attached to the adapter. Once
+	 * taken this will also prevent any context from getting activated.
+	 */
+	if (val == 1) {
+		rc =  cxl_adapter_context_lock(adapter);
+		if (rc)
+			goto out;
+
+		rc = cxl_ops->adapter_reset(adapter);
+		/* In case reset failed release context lock */
+		if (rc)
+			cxl_adapter_context_unlock(adapter);
+
+	} else if (val == -1) {
+		/* Perform a forced adapter reset */
+		rc = cxl_ops->adapter_reset(adapter);
+	}
+
+out:
+	return rc ? rc : count;
+}
+
+static ssize_t load_image_on_perst_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	if (!adapter->perst_loads_image)
+		return scnprintf(buf, PAGE_SIZE, "none\n");
+
+	if (adapter->perst_select_user)
+		return scnprintf(buf, PAGE_SIZE, "user\n");
+	return scnprintf(buf, PAGE_SIZE, "factory\n");
+}
+
+static ssize_t load_image_on_perst_store(struct device *device,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+	int rc;
+
+	if (!strncmp(buf, "none", 4))
+		adapter->perst_loads_image = false;
+	else if (!strncmp(buf, "user", 4)) {
+		adapter->perst_select_user = true;
+		adapter->perst_loads_image = true;
+	} else if (!strncmp(buf, "factory", 7)) {
+		adapter->perst_select_user = false;
+		adapter->perst_loads_image = true;
+	} else
+		return -EINVAL;
+
+	if ((rc = cxl_update_image_control(adapter)))
+		return rc;
+
+	return count;
+}
+
+static ssize_t perst_reloads_same_image_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->perst_same_image);
+}
+
+static ssize_t perst_reloads_same_image_store(struct device *device,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+	int rc;
+	int val;
+
+	rc = sscanf(buf, "%i", &val);
+	if ((rc != 1) || !(val == 1 || val == 0))
+		return -EINVAL;
+
+	adapter->perst_same_image = (val == 1 ? true : false);
+	return count;
+}
+
+static struct device_attribute adapter_attrs[] = {
+	__ATTR_RO(caia_version),
+	__ATTR_RO(psl_revision),
+	__ATTR_RO(base_image),
+	__ATTR_RO(image_loaded),
+	__ATTR_RO(psl_timebase_synced),
+	__ATTR_RO(tunneled_ops_supported),
+	__ATTR_RW(load_image_on_perst),
+	__ATTR_RW(perst_reloads_same_image),
+	__ATTR(reset, S_IWUSR, NULL, reset_adapter_store),
+};
+
+
+/*********  AFU master specific attributes  **********************************/
+
+static ssize_t mmio_size_show_master(struct device *device,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct cxl_afu *afu = to_afu_chardev_m(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size);
+}
+
+static ssize_t pp_mmio_off_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct cxl_afu *afu = to_afu_chardev_m(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->native->pp_offset);
+}
+
+static ssize_t pp_mmio_len_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct cxl_afu *afu = to_afu_chardev_m(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size);
+}
+
+static struct device_attribute afu_master_attrs[] = {
+	__ATTR(mmio_size, S_IRUGO, mmio_size_show_master, NULL),
+	__ATTR_RO(pp_mmio_off),
+	__ATTR_RO(pp_mmio_len),
+};
+
+
+/*********  AFU attributes  **************************************************/
+
+static ssize_t mmio_size_show(struct device *device,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+
+	if (afu->pp_size)
+		return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size);
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size);
+}
+
+static ssize_t reset_store_afu(struct device *device,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+	int rc;
+
+	/* Not safe to reset if it is currently in use */
+	mutex_lock(&afu->contexts_lock);
+	if (!idr_is_empty(&afu->contexts_idr)) {
+		rc = -EBUSY;
+		goto err;
+	}
+
+	if ((rc = cxl_ops->afu_reset(afu)))
+		goto err;
+
+	rc = count;
+err:
+	mutex_unlock(&afu->contexts_lock);
+	return rc;
+}
+
+static ssize_t irqs_min_show(struct device *device,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", afu->pp_irqs);
+}
+
+static ssize_t irqs_max_show(struct device *device,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", afu->irqs_max);
+}
+
+static ssize_t irqs_max_store(struct device *device,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+	ssize_t ret;
+	int irqs_max;
+
+	ret = sscanf(buf, "%i", &irqs_max);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (irqs_max < afu->pp_irqs)
+		return -EINVAL;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		if (irqs_max > afu->adapter->user_irqs)
+			return -EINVAL;
+	} else {
+		/* pHyp sets a per-AFU limit */
+		if (irqs_max > afu->guest->max_ints)
+			return -EINVAL;
+	}
+
+	afu->irqs_max = irqs_max;
+	return count;
+}
+
+static ssize_t modes_supported_show(struct device *device,
+				    struct device_attribute *attr, char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+	char *p = buf, *end = buf + PAGE_SIZE;
+
+	if (afu->modes_supported & CXL_MODE_DEDICATED)
+		p += scnprintf(p, end - p, "dedicated_process\n");
+	if (afu->modes_supported & CXL_MODE_DIRECTED)
+		p += scnprintf(p, end - p, "afu_directed\n");
+	return (p - buf);
+}
+
+static ssize_t prefault_mode_show(struct device *device,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+
+	switch (afu->prefault_mode) {
+	case CXL_PREFAULT_WED:
+		return scnprintf(buf, PAGE_SIZE, "work_element_descriptor\n");
+	case CXL_PREFAULT_ALL:
+		return scnprintf(buf, PAGE_SIZE, "all\n");
+	default:
+		return scnprintf(buf, PAGE_SIZE, "none\n");
+	}
+}
+
+static ssize_t prefault_mode_store(struct device *device,
+			  struct device_attribute *attr,
+			  const char *buf, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+	enum prefault_modes mode = -1;
+
+	if (!strncmp(buf, "none", 4))
+		mode = CXL_PREFAULT_NONE;
+	else {
+		if (!radix_enabled()) {
+
+			/* only allowed when not in radix mode */
+			if (!strncmp(buf, "work_element_descriptor", 23))
+				mode = CXL_PREFAULT_WED;
+			if (!strncmp(buf, "all", 3))
+				mode = CXL_PREFAULT_ALL;
+		} else {
+			dev_err(device, "Cannot prefault with radix enabled\n");
+		}
+	}
+
+	if (mode == -1)
+		return -EINVAL;
+
+	afu->prefault_mode = mode;
+	return count;
+}
+
+static ssize_t mode_show(struct device *device,
+			 struct device_attribute *attr,
+			 char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+
+	if (afu->current_mode == CXL_MODE_DEDICATED)
+		return scnprintf(buf, PAGE_SIZE, "dedicated_process\n");
+	if (afu->current_mode == CXL_MODE_DIRECTED)
+		return scnprintf(buf, PAGE_SIZE, "afu_directed\n");
+	return scnprintf(buf, PAGE_SIZE, "none\n");
+}
+
+static ssize_t mode_store(struct device *device, struct device_attribute *attr,
+			  const char *buf, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+	int old_mode, mode = -1;
+	int rc = -EBUSY;
+
+	/* can't change this if we have a user */
+	mutex_lock(&afu->contexts_lock);
+	if (!idr_is_empty(&afu->contexts_idr))
+		goto err;
+
+	if (!strncmp(buf, "dedicated_process", 17))
+		mode = CXL_MODE_DEDICATED;
+	if (!strncmp(buf, "afu_directed", 12))
+		mode = CXL_MODE_DIRECTED;
+	if (!strncmp(buf, "none", 4))
+		mode = 0;
+
+	if (mode == -1) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	/*
+	 * afu_deactivate_mode needs to be done outside the lock, prevent
+	 * other contexts coming in before we are ready:
+	 */
+	old_mode = afu->current_mode;
+	afu->current_mode = 0;
+	afu->num_procs = 0;
+
+	mutex_unlock(&afu->contexts_lock);
+
+	if ((rc = cxl_ops->afu_deactivate_mode(afu, old_mode)))
+		return rc;
+	if ((rc = cxl_ops->afu_activate_mode(afu, mode)))
+		return rc;
+
+	return count;
+err:
+	mutex_unlock(&afu->contexts_lock);
+	return rc;
+}
+
+static ssize_t api_version_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION);
+}
+
+static ssize_t api_version_compatible_show(struct device *device,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE);
+}
+
+static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr, char *buf,
+			       loff_t off, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(kobj_to_dev(kobj));
+
+	return cxl_ops->afu_read_err_buffer(afu, buf, off, count);
+}
+
+static struct device_attribute afu_attrs[] = {
+	__ATTR_RO(mmio_size),
+	__ATTR_RO(irqs_min),
+	__ATTR_RW(irqs_max),
+	__ATTR_RO(modes_supported),
+	__ATTR_RW(mode),
+	__ATTR_RW(prefault_mode),
+	__ATTR_RO(api_version),
+	__ATTR_RO(api_version_compatible),
+	__ATTR(reset, S_IWUSR, NULL, reset_store_afu),
+};
+
+int cxl_sysfs_adapter_add(struct cxl *adapter)
+{
+	struct device_attribute *dev_attr;
+	int i, rc;
+
+	for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) {
+		dev_attr = &adapter_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_ADAPTER_ATTRS)) {
+			if ((rc = device_create_file(&adapter->dev, dev_attr)))
+				goto err;
+		}
+	}
+	return 0;
+err:
+	for (i--; i >= 0; i--) {
+		dev_attr = &adapter_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_ADAPTER_ATTRS))
+			device_remove_file(&adapter->dev, dev_attr);
+	}
+	return rc;
+}
+
+void cxl_sysfs_adapter_remove(struct cxl *adapter)
+{
+	struct device_attribute *dev_attr;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) {
+		dev_attr = &adapter_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_ADAPTER_ATTRS))
+			device_remove_file(&adapter->dev, dev_attr);
+	}
+}
+
+struct afu_config_record {
+	struct kobject kobj;
+	struct bin_attribute config_attr;
+	struct list_head list;
+	int cr;
+	u16 device;
+	u16 vendor;
+	u32 class;
+};
+
+#define to_cr(obj) container_of(obj, struct afu_config_record, kobj)
+
+static ssize_t vendor_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct afu_config_record *cr = to_cr(kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->vendor);
+}
+
+static ssize_t device_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct afu_config_record *cr = to_cr(kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->device);
+}
+
+static ssize_t class_show(struct kobject *kobj,
+			  struct kobj_attribute *attr, char *buf)
+{
+	struct afu_config_record *cr = to_cr(kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%.6x\n", cr->class);
+}
+
+static ssize_t afu_read_config(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr, char *buf,
+			       loff_t off, size_t count)
+{
+	struct afu_config_record *cr = to_cr(kobj);
+	struct cxl_afu *afu = to_cxl_afu(kobj_to_dev(kobj->parent));
+
+	u64 i, j, val, rc;
+
+	for (i = 0; i < count;) {
+		rc = cxl_ops->afu_cr_read64(afu, cr->cr, off & ~0x7, &val);
+		if (rc)
+			val = ~0ULL;
+		for (j = off & 0x7; j < 8 && i < count; i++, j++, off++)
+			buf[i] = (val >> (j * 8)) & 0xff;
+	}
+
+	return count;
+}
+
+static struct kobj_attribute vendor_attribute =
+	__ATTR_RO(vendor);
+static struct kobj_attribute device_attribute =
+	__ATTR_RO(device);
+static struct kobj_attribute class_attribute =
+	__ATTR_RO(class);
+
+static struct attribute *afu_cr_attrs[] = {
+	&vendor_attribute.attr,
+	&device_attribute.attr,
+	&class_attribute.attr,
+	NULL,
+};
+
+static void release_afu_config_record(struct kobject *kobj)
+{
+	struct afu_config_record *cr = to_cr(kobj);
+
+	kfree(cr);
+}
+
+static struct kobj_type afu_config_record_type = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.release = release_afu_config_record,
+	.default_attrs = afu_cr_attrs,
+};
+
+static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int cr_idx)
+{
+	struct afu_config_record *cr;
+	int rc;
+
+	cr = kzalloc(sizeof(struct afu_config_record), GFP_KERNEL);
+	if (!cr)
+		return ERR_PTR(-ENOMEM);
+
+	cr->cr = cr_idx;
+
+	rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID, &cr->device);
+	if (rc)
+		goto err;
+	rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID, &cr->vendor);
+	if (rc)
+		goto err;
+	rc = cxl_ops->afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION, &cr->class);
+	if (rc)
+		goto err;
+	cr->class >>= 8;
+
+	/*
+	 * Export raw AFU PCIe like config record. For now this is read only by
+	 * root - we can expand that later to be readable by non-root and maybe
+	 * even writable provided we have a good use-case. Once we support
+	 * exposing AFUs through a virtual PHB they will get that for free from
+	 * Linux' PCI infrastructure, but until then it's not clear that we
+	 * need it for anything since the main use case is just identifying
+	 * AFUs, which can be done via the vendor, device and class attributes.
+	 */
+	sysfs_bin_attr_init(&cr->config_attr);
+	cr->config_attr.attr.name = "config";
+	cr->config_attr.attr.mode = S_IRUSR;
+	cr->config_attr.size = afu->crs_len;
+	cr->config_attr.read = afu_read_config;
+
+	rc = kobject_init_and_add(&cr->kobj, &afu_config_record_type,
+				  &afu->dev.kobj, "cr%i", cr->cr);
+	if (rc)
+		goto err1;
+
+	rc = sysfs_create_bin_file(&cr->kobj, &cr->config_attr);
+	if (rc)
+		goto err1;
+
+	rc = kobject_uevent(&cr->kobj, KOBJ_ADD);
+	if (rc)
+		goto err2;
+
+	return cr;
+err2:
+	sysfs_remove_bin_file(&cr->kobj, &cr->config_attr);
+err1:
+	kobject_put(&cr->kobj);
+	return ERR_PTR(rc);
+err:
+	kfree(cr);
+	return ERR_PTR(rc);
+}
+
+void cxl_sysfs_afu_remove(struct cxl_afu *afu)
+{
+	struct device_attribute *dev_attr;
+	struct afu_config_record *cr, *tmp;
+	int i;
+
+	/* remove the err buffer bin attribute */
+	if (afu->eb_len)
+		device_remove_bin_file(&afu->dev, &afu->attr_eb);
+
+	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+		dev_attr = &afu_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_ATTRS))
+			device_remove_file(&afu->dev, &afu_attrs[i]);
+	}
+
+	list_for_each_entry_safe(cr, tmp, &afu->crs, list) {
+		sysfs_remove_bin_file(&cr->kobj, &cr->config_attr);
+		kobject_put(&cr->kobj);
+	}
+}
+
+int cxl_sysfs_afu_add(struct cxl_afu *afu)
+{
+	struct device_attribute *dev_attr;
+	struct afu_config_record *cr;
+	int i, rc;
+
+	INIT_LIST_HEAD(&afu->crs);
+
+	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+		dev_attr = &afu_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_ATTRS)) {
+			if ((rc = device_create_file(&afu->dev, &afu_attrs[i])))
+				goto err;
+		}
+	}
+
+	/* conditionally create the add the binary file for error info buffer */
+	if (afu->eb_len) {
+		sysfs_attr_init(&afu->attr_eb.attr);
+
+		afu->attr_eb.attr.name = "afu_err_buff";
+		afu->attr_eb.attr.mode = S_IRUGO;
+		afu->attr_eb.size = afu->eb_len;
+		afu->attr_eb.read = afu_eb_read;
+
+		rc = device_create_bin_file(&afu->dev, &afu->attr_eb);
+		if (rc) {
+			dev_err(&afu->dev,
+				"Unable to create eb attr for the afu. Err(%d)\n",
+				rc);
+			goto err;
+		}
+	}
+
+	for (i = 0; i < afu->crs_num; i++) {
+		cr = cxl_sysfs_afu_new_cr(afu, i);
+		if (IS_ERR(cr)) {
+			rc = PTR_ERR(cr);
+			goto err1;
+		}
+		list_add(&cr->list, &afu->crs);
+	}
+
+	return 0;
+
+err1:
+	cxl_sysfs_afu_remove(afu);
+	return rc;
+err:
+	/* reset the eb_len as we havent created the bin attr */
+	afu->eb_len = 0;
+
+	for (i--; i >= 0; i--) {
+		dev_attr = &afu_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_ATTRS))
+		device_remove_file(&afu->dev, &afu_attrs[i]);
+	}
+	return rc;
+}
+
+int cxl_sysfs_afu_m_add(struct cxl_afu *afu)
+{
+	struct device_attribute *dev_attr;
+	int i, rc;
+
+	for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) {
+		dev_attr = &afu_master_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_MASTER_ATTRS)) {
+			if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i])))
+				goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	for (i--; i >= 0; i--) {
+		dev_attr = &afu_master_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_MASTER_ATTRS))
+			device_remove_file(afu->chardev_m, &afu_master_attrs[i]);
+	}
+	return rc;
+}
+
+void cxl_sysfs_afu_m_remove(struct cxl_afu *afu)
+{
+	struct device_attribute *dev_attr;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) {
+		dev_attr = &afu_master_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_MASTER_ATTRS))
+			device_remove_file(afu->chardev_m, &afu_master_attrs[i]);
+	}
+}
diff --git a/drivers/misc/cxl/trace.c b/drivers/misc/cxl/trace.c
new file mode 100644
index 000000000..c2b06d319
--- /dev/null
+++ b/drivers/misc/cxl/trace.c
@@ -0,0 +1,13 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#endif
diff --git a/drivers/misc/cxl/trace.h b/drivers/misc/cxl/trace.h
new file mode 100644
index 000000000..b8e300af0
--- /dev/null
+++ b/drivers/misc/cxl/trace.h
@@ -0,0 +1,695 @@
+/*
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cxl
+
+#if !defined(_CXL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _CXL_TRACE_H
+
+#include <linux/tracepoint.h>
+
+#include "cxl.h"
+
+#define dsisr_psl9_flags(flags) \
+	__print_flags(flags, "|", \
+		{ CXL_PSL9_DSISR_An_CO_MASK,	"FR" }, \
+		{ CXL_PSL9_DSISR_An_TF,		"TF" }, \
+		{ CXL_PSL9_DSISR_An_PE,		"PE" }, \
+		{ CXL_PSL9_DSISR_An_AE,		"AE" }, \
+		{ CXL_PSL9_DSISR_An_OC,		"OC" }, \
+		{ CXL_PSL9_DSISR_An_S,		"S" })
+
+#define DSISR_FLAGS \
+	{ CXL_PSL_DSISR_An_DS,	"DS" }, \
+	{ CXL_PSL_DSISR_An_DM,	"DM" }, \
+	{ CXL_PSL_DSISR_An_ST,	"ST" }, \
+	{ CXL_PSL_DSISR_An_UR,	"UR" }, \
+	{ CXL_PSL_DSISR_An_PE,	"PE" }, \
+	{ CXL_PSL_DSISR_An_AE,	"AE" }, \
+	{ CXL_PSL_DSISR_An_OC,	"OC" }, \
+	{ CXL_PSL_DSISR_An_M,	"M" }, \
+	{ CXL_PSL_DSISR_An_P,	"P" }, \
+	{ CXL_PSL_DSISR_An_A,	"A" }, \
+	{ CXL_PSL_DSISR_An_S,	"S" }, \
+	{ CXL_PSL_DSISR_An_K,	"K" }
+
+#define TFC_FLAGS \
+	{ CXL_PSL_TFC_An_A,	"A" }, \
+	{ CXL_PSL_TFC_An_C,	"C" }, \
+	{ CXL_PSL_TFC_An_AE,	"AE" }, \
+	{ CXL_PSL_TFC_An_R,	"R" }
+
+#define LLCMD_NAMES \
+	{ CXL_SPA_SW_CMD_TERMINATE,	"TERMINATE" }, \
+	{ CXL_SPA_SW_CMD_REMOVE,	"REMOVE" }, \
+	{ CXL_SPA_SW_CMD_SUSPEND,	"SUSPEND" }, \
+	{ CXL_SPA_SW_CMD_RESUME,	"RESUME" }, \
+	{ CXL_SPA_SW_CMD_ADD,		"ADD" }, \
+	{ CXL_SPA_SW_CMD_UPDATE,	"UPDATE" }
+
+#define AFU_COMMANDS \
+	{ 0,			"DISABLE" }, \
+	{ CXL_AFU_Cntl_An_E,	"ENABLE" }, \
+	{ CXL_AFU_Cntl_An_RA,	"RESET" }
+
+#define PSL_COMMANDS \
+	{ CXL_PSL_SCNTL_An_Pc,	"PURGE" }, \
+	{ CXL_PSL_SCNTL_An_Sc,	"SUSPEND" }
+
+
+DECLARE_EVENT_CLASS(cxl_pe_class,
+	TP_PROTO(struct cxl_context *ctx),
+
+	TP_ARGS(ctx),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+	),
+
+	TP_printk("afu%i.%i pe=%i",
+		__entry->card,
+		__entry->afu,
+		__entry->pe
+	)
+);
+
+
+TRACE_EVENT(cxl_attach,
+	TP_PROTO(struct cxl_context *ctx, u64 wed, s16 num_interrupts, u64 amr),
+
+	TP_ARGS(ctx, wed, num_interrupts, amr),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(pid_t, pid)
+		__field(u64, wed)
+		__field(u64, amr)
+		__field(s16, num_interrupts)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->pid = pid_nr(ctx->pid);
+		__entry->wed = wed;
+		__entry->amr = amr;
+		__entry->num_interrupts = num_interrupts;
+	),
+
+	TP_printk("afu%i.%i pid=%i pe=%i wed=0x%016llx irqs=%i amr=0x%llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pid,
+		__entry->pe,
+		__entry->wed,
+		__entry->num_interrupts,
+		__entry->amr
+	)
+);
+
+DEFINE_EVENT(cxl_pe_class, cxl_detach,
+	TP_PROTO(struct cxl_context *ctx),
+	TP_ARGS(ctx)
+);
+
+TRACE_EVENT(cxl_afu_irq,
+	TP_PROTO(struct cxl_context *ctx, int afu_irq, int virq, irq_hw_number_t hwirq),
+
+	TP_ARGS(ctx, afu_irq, virq, hwirq),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u16, afu_irq)
+		__field(int, virq)
+		__field(irq_hw_number_t, hwirq)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->afu_irq = afu_irq;
+		__entry->virq = virq;
+		__entry->hwirq = hwirq;
+	),
+
+	TP_printk("afu%i.%i pe=%i afu_irq=%i virq=%i hwirq=0x%lx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__entry->afu_irq,
+		__entry->virq,
+		__entry->hwirq
+	)
+);
+
+TRACE_EVENT(cxl_psl9_irq,
+	TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar),
+
+	TP_ARGS(ctx, irq, dsisr, dar),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(int, irq)
+		__field(u64, dsisr)
+		__field(u64, dar)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->irq = irq;
+		__entry->dsisr = dsisr;
+		__entry->dar = dar;
+	),
+
+	TP_printk("afu%i.%i pe=%i irq=%i dsisr=0x%016llx dsisr=%s dar=0x%016llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__entry->irq,
+		__entry->dsisr,
+		dsisr_psl9_flags(__entry->dsisr),
+		__entry->dar
+	)
+);
+
+TRACE_EVENT(cxl_psl_irq,
+	TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar),
+
+	TP_ARGS(ctx, irq, dsisr, dar),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(int, irq)
+		__field(u64, dsisr)
+		__field(u64, dar)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->irq = irq;
+		__entry->dsisr = dsisr;
+		__entry->dar = dar;
+	),
+
+	TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%016llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__entry->irq,
+		__print_flags(__entry->dsisr, "|", DSISR_FLAGS),
+		__entry->dar
+	)
+);
+
+TRACE_EVENT(cxl_psl_irq_ack,
+	TP_PROTO(struct cxl_context *ctx, u64 tfc),
+
+	TP_ARGS(ctx, tfc),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u64, tfc)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->tfc = tfc;
+	),
+
+	TP_printk("afu%i.%i pe=%i tfc=%s",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__print_flags(__entry->tfc, "|", TFC_FLAGS)
+	)
+);
+
+TRACE_EVENT(cxl_ste_miss,
+	TP_PROTO(struct cxl_context *ctx, u64 dar),
+
+	TP_ARGS(ctx, dar),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u64, dar)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->dar = dar;
+	),
+
+	TP_printk("afu%i.%i pe=%i dar=0x%016llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__entry->dar
+	)
+);
+
+TRACE_EVENT(cxl_ste_write,
+	TP_PROTO(struct cxl_context *ctx, unsigned int idx, u64 e, u64 v),
+
+	TP_ARGS(ctx, idx, e, v),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(unsigned int, idx)
+		__field(u64, e)
+		__field(u64, v)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->idx = idx;
+		__entry->e = e;
+		__entry->v = v;
+	),
+
+	TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%016llx V=0x%016llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__entry->idx,
+		__entry->e,
+		__entry->v
+	)
+);
+
+TRACE_EVENT(cxl_pte_miss,
+	TP_PROTO(struct cxl_context *ctx, u64 dsisr, u64 dar),
+
+	TP_ARGS(ctx, dsisr, dar),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u64, dsisr)
+		__field(u64, dar)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->dsisr = dsisr;
+		__entry->dar = dar;
+	),
+
+	TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%016llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__print_flags(__entry->dsisr, "|", DSISR_FLAGS),
+		__entry->dar
+	)
+);
+
+TRACE_EVENT(cxl_llcmd,
+	TP_PROTO(struct cxl_context *ctx, u64 cmd),
+
+	TP_ARGS(ctx, cmd),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u64, cmd)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->cmd = cmd;
+	),
+
+	TP_printk("afu%i.%i pe=%i cmd=%s",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__print_symbolic_u64(__entry->cmd, LLCMD_NAMES)
+	)
+);
+
+TRACE_EVENT(cxl_llcmd_done,
+	TP_PROTO(struct cxl_context *ctx, u64 cmd, int rc),
+
+	TP_ARGS(ctx, cmd, rc),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u64, cmd)
+		__field(int, rc)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->rc = rc;
+		__entry->cmd = cmd;
+	),
+
+	TP_printk("afu%i.%i pe=%i cmd=%s rc=%i",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__print_symbolic_u64(__entry->cmd, LLCMD_NAMES),
+		__entry->rc
+	)
+);
+
+DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd),
+
+	TP_ARGS(afu, cmd),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u64, cmd)
+	),
+
+	TP_fast_assign(
+		__entry->card = afu->adapter->adapter_num;
+		__entry->afu = afu->slice;
+		__entry->cmd = cmd;
+	),
+
+	TP_printk("afu%i.%i cmd=%s",
+		__entry->card,
+		__entry->afu,
+		__print_symbolic_u64(__entry->cmd, AFU_COMMANDS)
+	)
+);
+
+DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl_done,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc),
+
+	TP_ARGS(afu, cmd, rc),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u64, cmd)
+		__field(int, rc)
+	),
+
+	TP_fast_assign(
+		__entry->card = afu->adapter->adapter_num;
+		__entry->afu = afu->slice;
+		__entry->rc = rc;
+		__entry->cmd = cmd;
+	),
+
+	TP_printk("afu%i.%i cmd=%s rc=%i",
+		__entry->card,
+		__entry->afu,
+		__print_symbolic_u64(__entry->cmd, AFU_COMMANDS),
+		__entry->rc
+	)
+);
+
+DEFINE_EVENT(cxl_afu_psl_ctrl, cxl_afu_ctrl,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd),
+	TP_ARGS(afu, cmd)
+);
+
+DEFINE_EVENT(cxl_afu_psl_ctrl_done, cxl_afu_ctrl_done,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc),
+	TP_ARGS(afu, cmd, rc)
+);
+
+DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl, cxl_psl_ctrl,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd),
+	TP_ARGS(afu, cmd),
+
+	TP_printk("psl%i.%i cmd=%s",
+		__entry->card,
+		__entry->afu,
+		__print_symbolic_u64(__entry->cmd, PSL_COMMANDS)
+	)
+);
+
+DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl_done, cxl_psl_ctrl_done,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc),
+	TP_ARGS(afu, cmd, rc),
+
+	TP_printk("psl%i.%i cmd=%s rc=%i",
+		__entry->card,
+		__entry->afu,
+		__print_symbolic_u64(__entry->cmd, PSL_COMMANDS),
+		__entry->rc
+	)
+);
+
+DEFINE_EVENT(cxl_pe_class, cxl_slbia,
+	TP_PROTO(struct cxl_context *ctx),
+	TP_ARGS(ctx)
+);
+
+TRACE_EVENT(cxl_hcall,
+	TP_PROTO(u64 unit_address, u64 process_token, long rc),
+
+	TP_ARGS(unit_address, process_token, rc),
+
+	TP_STRUCT__entry(
+		__field(u64, unit_address)
+		__field(u64, process_token)
+		__field(long, rc)
+	),
+
+	TP_fast_assign(
+		__entry->unit_address = unit_address;
+		__entry->process_token = process_token;
+		__entry->rc = rc;
+	),
+
+	TP_printk("unit_address=0x%016llx process_token=0x%016llx rc=%li",
+		__entry->unit_address,
+		__entry->process_token,
+		__entry->rc
+	)
+);
+
+TRACE_EVENT(cxl_hcall_control,
+	TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3,
+	u64 p4, unsigned long r4, long rc),
+
+	TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc),
+
+	TP_STRUCT__entry(
+		__field(u64, unit_address)
+		__field(char *, fct)
+		__field(u64, p1)
+		__field(u64, p2)
+		__field(u64, p3)
+		__field(u64, p4)
+		__field(unsigned long, r4)
+		__field(long, rc)
+	),
+
+	TP_fast_assign(
+		__entry->unit_address = unit_address;
+		__entry->fct = fct;
+		__entry->p1 = p1;
+		__entry->p2 = p2;
+		__entry->p3 = p3;
+		__entry->p4 = p4;
+		__entry->r4 = r4;
+		__entry->rc = rc;
+	),
+
+	TP_printk("unit_address=%#.16llx %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li",
+		__entry->unit_address,
+		__entry->fct,
+		__entry->p1,
+		__entry->p2,
+		__entry->p3,
+		__entry->p4,
+		__entry->r4,
+		__entry->rc
+	)
+);
+
+TRACE_EVENT(cxl_hcall_attach,
+	TP_PROTO(u64 unit_address, u64 phys_addr, unsigned long process_token,
+		unsigned long mmio_addr, unsigned long mmio_size, long rc),
+
+	TP_ARGS(unit_address, phys_addr, process_token,
+		mmio_addr, mmio_size, rc),
+
+	TP_STRUCT__entry(
+		__field(u64, unit_address)
+		__field(u64, phys_addr)
+		__field(unsigned long, process_token)
+		__field(unsigned long, mmio_addr)
+		__field(unsigned long, mmio_size)
+		__field(long, rc)
+	),
+
+	TP_fast_assign(
+		__entry->unit_address = unit_address;
+		__entry->phys_addr = phys_addr;
+		__entry->process_token = process_token;
+		__entry->mmio_addr = mmio_addr;
+		__entry->mmio_size = mmio_size;
+		__entry->rc = rc;
+	),
+
+	TP_printk("unit_address=0x%016llx phys_addr=0x%016llx "
+		"token=0x%.8lx mmio_addr=0x%lx mmio_size=0x%lx rc=%li",
+		__entry->unit_address,
+		__entry->phys_addr,
+		__entry->process_token,
+		__entry->mmio_addr,
+		__entry->mmio_size,
+		__entry->rc
+	)
+);
+
+DEFINE_EVENT(cxl_hcall, cxl_hcall_detach,
+	TP_PROTO(u64 unit_address, u64 process_token, long rc),
+	TP_ARGS(unit_address, process_token, rc)
+);
+
+DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_function,
+	TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3,
+	u64 p4, unsigned long r4, long rc),
+	TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc)
+);
+
+DEFINE_EVENT(cxl_hcall, cxl_hcall_collect_int_info,
+	TP_PROTO(u64 unit_address, u64 process_token, long rc),
+	TP_ARGS(unit_address, process_token, rc)
+);
+
+TRACE_EVENT(cxl_hcall_control_faults,
+	TP_PROTO(u64 unit_address, u64 process_token,
+		u64 control_mask, u64 reset_mask, unsigned long r4,
+		long rc),
+
+	TP_ARGS(unit_address, process_token,
+		control_mask, reset_mask, r4, rc),
+
+	TP_STRUCT__entry(
+		__field(u64, unit_address)
+		__field(u64, process_token)
+		__field(u64, control_mask)
+		__field(u64, reset_mask)
+		__field(unsigned long, r4)
+		__field(long, rc)
+	),
+
+	TP_fast_assign(
+		__entry->unit_address = unit_address;
+		__entry->process_token = process_token;
+		__entry->control_mask = control_mask;
+		__entry->reset_mask = reset_mask;
+		__entry->r4 = r4;
+		__entry->rc = rc;
+	),
+
+	TP_printk("unit_address=0x%016llx process_token=0x%llx "
+		"control_mask=%#llx reset_mask=%#llx r4=%#lx rc=%li",
+		__entry->unit_address,
+		__entry->process_token,
+		__entry->control_mask,
+		__entry->reset_mask,
+		__entry->r4,
+		__entry->rc
+	)
+);
+
+DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_facility,
+	TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3,
+	u64 p4, unsigned long r4, long rc),
+	TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc)
+);
+
+TRACE_EVENT(cxl_hcall_download_facility,
+	TP_PROTO(u64 unit_address, char *fct, u64 list_address, u64 num,
+	unsigned long r4, long rc),
+
+	TP_ARGS(unit_address, fct, list_address, num, r4, rc),
+
+	TP_STRUCT__entry(
+		__field(u64, unit_address)
+		__field(char *, fct)
+		__field(u64, list_address)
+		__field(u64, num)
+		__field(unsigned long, r4)
+		__field(long, rc)
+	),
+
+	TP_fast_assign(
+		__entry->unit_address = unit_address;
+		__entry->fct = fct;
+		__entry->list_address = list_address;
+		__entry->num = num;
+		__entry->r4 = r4;
+		__entry->rc = rc;
+	),
+
+	TP_printk("%#.16llx, %s(%#llx, %#llx), %#lx): %li",
+		__entry->unit_address,
+		__entry->fct,
+		__entry->list_address,
+		__entry->num,
+		__entry->r4,
+		__entry->rc
+	)
+);
+
+#endif /* _CXL_TRACE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
new file mode 100644
index 000000000..7908633d9
--- /dev/null
+++ b/drivers/misc/cxl/vphb.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <misc/cxl.h>
+#include "cxl.h"
+
+static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+	if (dma_mask < DMA_BIT_MASK(64)) {
+		pr_info("%s only 64bit DMA supported on CXL", __func__);
+		return -EIO;
+	}
+
+	*(pdev->dev.dma_mask) = dma_mask;
+	return 0;
+}
+
+static int cxl_pci_probe_mode(struct pci_bus *bus)
+{
+	return PCI_PROBE_NORMAL;
+}
+
+static int cxl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	return -ENODEV;
+}
+
+static void cxl_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	/*
+	 * MSI should never be set but need still need to provide this call
+	 * back.
+	 */
+}
+
+static bool cxl_pci_enable_device_hook(struct pci_dev *dev)
+{
+	struct pci_controller *phb;
+	struct cxl_afu *afu;
+	struct cxl_context *ctx;
+
+	phb = pci_bus_to_host(dev->bus);
+	afu = (struct cxl_afu *)phb->private_data;
+
+	if (!cxl_ops->link_ok(afu->adapter, afu)) {
+		dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__);
+		return false;
+	}
+
+	set_dma_ops(&dev->dev, &dma_nommu_ops);
+	set_dma_offset(&dev->dev, PAGE_OFFSET);
+
+	/*
+	 * Allocate a context to do cxl things too.  If we eventually do real
+	 * DMA ops, we'll need a default context to attach them to
+	 */
+	ctx = cxl_dev_context_init(dev);
+	if (IS_ERR(ctx))
+		return false;
+	dev->dev.archdata.cxl_ctx = ctx;
+
+	return (cxl_ops->afu_check_and_enable(afu) == 0);
+}
+
+static void cxl_pci_disable_device(struct pci_dev *dev)
+{
+	struct cxl_context *ctx = cxl_get_context(dev);
+
+	if (ctx) {
+		if (ctx->status == STARTED) {
+			dev_err(&dev->dev, "Default context started\n");
+			return;
+		}
+		dev->dev.archdata.cxl_ctx = NULL;
+		cxl_release_context(ctx);
+	}
+}
+
+static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus,
+						unsigned long type)
+{
+	return 1;
+}
+
+static void cxl_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+	/* Should we do an AFU reset here ? */
+}
+
+static int cxl_pcie_cfg_record(u8 bus, u8 devfn)
+{
+	return (bus << 8) + devfn;
+}
+
+static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus)
+{
+	struct pci_controller *phb = bus ? pci_bus_to_host(bus) : NULL;
+
+	return phb ? phb->private_data : NULL;
+}
+
+static void cxl_afu_configured_put(struct cxl_afu *afu)
+{
+	atomic_dec_if_positive(&afu->configured_state);
+}
+
+static bool cxl_afu_configured_get(struct cxl_afu *afu)
+{
+	return atomic_inc_unless_negative(&afu->configured_state);
+}
+
+static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
+				       struct cxl_afu *afu, int *_record)
+{
+	int record;
+
+	record = cxl_pcie_cfg_record(bus->number, devfn);
+	if (record > afu->crs_num)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	*_record = record;
+	return 0;
+}
+
+static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
+				int offset, int len, u32 *val)
+{
+	int rc, record;
+	struct cxl_afu *afu;
+	u8 val8;
+	u16 val16;
+	u32 val32;
+
+	afu = pci_bus_to_afu(bus);
+	/* Grab a reader lock on afu. */
+	if (afu == NULL || !cxl_afu_configured_get(afu))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+	if (rc)
+		goto out;
+
+	switch (len) {
+	case 1:
+		rc = cxl_ops->afu_cr_read8(afu, record, offset,	&val8);
+		*val = val8;
+		break;
+	case 2:
+		rc = cxl_ops->afu_cr_read16(afu, record, offset, &val16);
+		*val = val16;
+		break;
+	case 4:
+		rc = cxl_ops->afu_cr_read32(afu, record, offset, &val32);
+		*val = val32;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+out:
+	cxl_afu_configured_put(afu);
+	return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+}
+
+static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+				 int offset, int len, u32 val)
+{
+	int rc, record;
+	struct cxl_afu *afu;
+
+	afu = pci_bus_to_afu(bus);
+	/* Grab a reader lock on afu. */
+	if (afu == NULL || !cxl_afu_configured_get(afu))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+	if (rc)
+		goto out;
+
+	switch (len) {
+	case 1:
+		rc = cxl_ops->afu_cr_write8(afu, record, offset, val & 0xff);
+		break;
+	case 2:
+		rc = cxl_ops->afu_cr_write16(afu, record, offset, val & 0xffff);
+		break;
+	case 4:
+		rc = cxl_ops->afu_cr_write32(afu, record, offset, val);
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+out:
+	cxl_afu_configured_put(afu);
+	return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops cxl_pcie_pci_ops =
+{
+	.read = cxl_pcie_read_config,
+	.write = cxl_pcie_write_config,
+};
+
+
+static struct pci_controller_ops cxl_pci_controller_ops =
+{
+	.probe_mode = cxl_pci_probe_mode,
+	.enable_device_hook = cxl_pci_enable_device_hook,
+	.disable_device = cxl_pci_disable_device,
+	.release_device = cxl_pci_disable_device,
+	.window_alignment = cxl_pci_window_alignment,
+	.reset_secondary_bus = cxl_pci_reset_secondary_bus,
+	.setup_msi_irqs = cxl_setup_msi_irqs,
+	.teardown_msi_irqs = cxl_teardown_msi_irqs,
+	.dma_set_mask = cxl_dma_set_mask,
+};
+
+int cxl_pci_vphb_add(struct cxl_afu *afu)
+{
+	struct pci_controller *phb;
+	struct device_node *vphb_dn;
+	struct device *parent;
+
+	/*
+	 * If there are no AFU configuration records we won't have anything to
+	 * expose under the vPHB, so skip creating one, returning success since
+	 * this is still a valid case. This will also opt us out of EEH
+	 * handling since we won't have anything special to do if there are no
+	 * kernel drivers attached to the vPHB, and EEH handling is not yet
+	 * supported in the peer model.
+	 */
+	if (!afu->crs_num)
+		return 0;
+
+	/* The parent device is the adapter. Reuse the device node of
+	 * the adapter.
+	 * We don't seem to care what device node is used for the vPHB,
+	 * but tools such as lsvpd walk up the device parents looking
+	 * for a valid location code, so we might as well show devices
+	 * attached to the adapter as being located on that adapter.
+	 */
+	parent = afu->adapter->dev.parent;
+	vphb_dn = parent->of_node;
+
+	/* Alloc and setup PHB data structure */
+	phb = pcibios_alloc_controller(vphb_dn);
+	if (!phb)
+		return -ENODEV;
+
+	/* Setup parent in sysfs */
+	phb->parent = parent;
+
+	/* Setup the PHB using arch provided callback */
+	phb->ops = &cxl_pcie_pci_ops;
+	phb->cfg_addr = NULL;
+	phb->cfg_data = NULL;
+	phb->private_data = afu;
+	phb->controller_ops = cxl_pci_controller_ops;
+
+	/* Scan the bus */
+	pcibios_scan_phb(phb);
+	if (phb->bus == NULL)
+		return -ENXIO;
+
+	/* Set release hook on root bus */
+	pci_set_host_bridge_release(to_pci_host_bridge(phb->bus->bridge),
+				    pcibios_free_controller_deferred,
+				    (void *) phb);
+
+	/* Claim resources. This might need some rework as well depending
+	 * whether we are doing probe-only or not, like assigning unassigned
+	 * resources etc...
+	 */
+	pcibios_claim_one_bus(phb->bus);
+
+	/* Add probed PCI devices to the device model */
+	pci_bus_add_devices(phb->bus);
+
+	afu->phb = phb;
+
+	return 0;
+}
+
+void cxl_pci_vphb_remove(struct cxl_afu *afu)
+{
+	struct pci_controller *phb;
+
+	/* If there is no configuration record we won't have one of these */
+	if (!afu || !afu->phb)
+		return;
+
+	phb = afu->phb;
+	afu->phb = NULL;
+
+	pci_remove_root_bus(phb->bus);
+	/*
+	 * We don't free phb here - that's handled by
+	 * pcibios_free_controller_deferred()
+	 */
+}
+
+bool cxl_pci_is_vphb_device(struct pci_dev *dev)
+{
+	struct pci_controller *phb;
+
+	phb = pci_bus_to_host(dev->bus);
+
+	return (phb->ops == &cxl_pcie_pci_ops);
+}
+
+struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev)
+{
+	struct pci_controller *phb;
+
+	phb = pci_bus_to_host(dev->bus);
+
+	return (struct cxl_afu *)phb->private_data;
+}
+EXPORT_SYMBOL_GPL(cxl_pci_to_afu);
+
+unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev)
+{
+	return cxl_pcie_cfg_record(dev->bus->number, dev->devfn);
+}
+EXPORT_SYMBOL_GPL(cxl_pci_to_cfg_record);
diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c
new file mode 100644
index 000000000..98a921ea9
--- /dev/null
+++ b/drivers/misc/ds1682.c
@@ -0,0 +1,267 @@
+/*
+ * Dallas Semiconductor DS1682 Elapsed Time Recorder device driver
+ *
+ * Written by: Grant Likely <grant.likely@secretlab.ca>
+ *
+ * Copyright (C) 2007 Secret Lab Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * The DS1682 elapsed timer recorder is a simple device that implements
+ * one elapsed time counter, one event counter, an alarm signal and 10
+ * bytes of general purpose EEPROM.
+ *
+ * This driver provides access to the DS1682 counters and user data via
+ * the sysfs.  The following attributes are added to the device node:
+ *     elapsed_time (u32): Total elapsed event time in ms resolution
+ *     alarm_time (u32): When elapsed time exceeds the value in alarm_time,
+ *                       then the alarm pin is asserted.
+ *     event_count (u16): number of times the event pin has gone low.
+ *     eeprom (u8[10]): general purpose EEPROM
+ *
+ * Counter registers and user data are both read/write unless the device
+ * has been write protected.  This driver does not support turning off write
+ * protection.  Once write protection is turned on, it is impossible to
+ * turn it off again, so I have left the feature out of this driver to avoid
+ * accidental enabling, but it is trivial to add write protect support.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/hwmon-sysfs.h>
+
+/* Device registers */
+#define DS1682_REG_CONFIG		0x00
+#define DS1682_REG_ALARM		0x01
+#define DS1682_REG_ELAPSED		0x05
+#define DS1682_REG_EVT_CNTR		0x09
+#define DS1682_REG_EEPROM		0x0b
+#define DS1682_REG_RESET		0x1d
+#define DS1682_REG_WRITE_DISABLE	0x1e
+#define DS1682_REG_WRITE_MEM_DISABLE	0x1f
+
+#define DS1682_EEPROM_SIZE		10
+
+/*
+ * Generic counter attributes
+ */
+static ssize_t ds1682_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long long val, check;
+	__le32 val_le = 0;
+	int rc;
+
+	dev_dbg(dev, "ds1682_show() called on %s\n", attr->attr.name);
+
+	/* Read the register */
+	rc = i2c_smbus_read_i2c_block_data(client, sattr->index, sattr->nr,
+					   (u8 *)&val_le);
+	if (rc < 0)
+		return -EIO;
+
+	val = le32_to_cpu(val_le);
+
+	if (sattr->index == DS1682_REG_ELAPSED) {
+		int retries = 5;
+
+		/* Detect and retry when a tick occurs mid-read */
+		do {
+			rc = i2c_smbus_read_i2c_block_data(client, sattr->index,
+							   sattr->nr,
+							   (u8 *)&val_le);
+			if (rc < 0 || retries <= 0)
+				return -EIO;
+
+			check = val;
+			val = le32_to_cpu(val_le);
+			retries--;
+		} while (val != check && val != (check + 1));
+	}
+
+	/* Format the output string and return # of bytes
+	 * Special case: the 32 bit regs are time values with 1/4s
+	 * resolution, scale them up to milliseconds
+	 */
+	return sprintf(buf, "%llu\n", (sattr->nr == 4) ? (val * 250) : val);
+}
+
+static ssize_t ds1682_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	struct i2c_client *client = to_i2c_client(dev);
+	u64 val;
+	__le32 val_le;
+	int rc;
+
+	dev_dbg(dev, "ds1682_store() called on %s\n", attr->attr.name);
+
+	/* Decode input */
+	rc = kstrtoull(buf, 0, &val);
+	if (rc < 0) {
+		dev_dbg(dev, "input string not a number\n");
+		return -EINVAL;
+	}
+
+	/* Special case: the 32 bit regs are time values with 1/4s
+	 * resolution, scale input down to quarter-seconds */
+	if (sattr->nr == 4)
+		do_div(val, 250);
+
+	/* write out the value */
+	val_le = cpu_to_le32(val);
+	rc = i2c_smbus_write_i2c_block_data(client, sattr->index, sattr->nr,
+					    (u8 *) & val_le);
+	if (rc < 0) {
+		dev_err(dev, "register write failed; reg=0x%x, size=%i\n",
+			sattr->index, sattr->nr);
+		return -EIO;
+	}
+
+	return count;
+}
+
+/*
+ * Simple register attributes
+ */
+static SENSOR_DEVICE_ATTR_2(elapsed_time, S_IRUGO | S_IWUSR, ds1682_show,
+			    ds1682_store, 4, DS1682_REG_ELAPSED);
+static SENSOR_DEVICE_ATTR_2(alarm_time, S_IRUGO | S_IWUSR, ds1682_show,
+			    ds1682_store, 4, DS1682_REG_ALARM);
+static SENSOR_DEVICE_ATTR_2(event_count, S_IRUGO | S_IWUSR, ds1682_show,
+			    ds1682_store, 2, DS1682_REG_EVT_CNTR);
+
+static const struct attribute_group ds1682_group = {
+	.attrs = (struct attribute *[]) {
+		&sensor_dev_attr_elapsed_time.dev_attr.attr,
+		&sensor_dev_attr_alarm_time.dev_attr.attr,
+		&sensor_dev_attr_event_count.dev_attr.attr,
+		NULL,
+	},
+};
+
+/*
+ * User data attribute
+ */
+static ssize_t ds1682_eeprom_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr,
+				  char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+	int rc;
+
+	dev_dbg(&client->dev, "ds1682_eeprom_read(p=%p, off=%lli, c=%zi)\n",
+		buf, off, count);
+
+	rc = i2c_smbus_read_i2c_block_data(client, DS1682_REG_EEPROM + off,
+					   count, buf);
+	if (rc < 0)
+		return -EIO;
+
+	return count;
+}
+
+static ssize_t ds1682_eeprom_write(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *attr,
+				   char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+
+	dev_dbg(&client->dev, "ds1682_eeprom_write(p=%p, off=%lli, c=%zi)\n",
+		buf, off, count);
+
+	/* Write out to the device */
+	if (i2c_smbus_write_i2c_block_data(client, DS1682_REG_EEPROM + off,
+					   count, buf) < 0)
+		return -EIO;
+
+	return count;
+}
+
+static const struct bin_attribute ds1682_eeprom_attr = {
+	.attr = {
+		.name = "eeprom",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = DS1682_EEPROM_SIZE,
+	.read = ds1682_eeprom_read,
+	.write = ds1682_eeprom_write,
+};
+
+/*
+ * Called when a ds1682 device is matched with this driver
+ */
+static int ds1682_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	int rc;
+
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_I2C_BLOCK)) {
+		dev_err(&client->dev, "i2c bus does not support the ds1682\n");
+		rc = -ENODEV;
+		goto exit;
+	}
+
+	rc = sysfs_create_group(&client->dev.kobj, &ds1682_group);
+	if (rc)
+		goto exit;
+
+	rc = sysfs_create_bin_file(&client->dev.kobj, &ds1682_eeprom_attr);
+	if (rc)
+		goto exit_bin_attr;
+
+	return 0;
+
+ exit_bin_attr:
+	sysfs_remove_group(&client->dev.kobj, &ds1682_group);
+ exit:
+	return rc;
+}
+
+static int ds1682_remove(struct i2c_client *client)
+{
+	sysfs_remove_bin_file(&client->dev.kobj, &ds1682_eeprom_attr);
+	sysfs_remove_group(&client->dev.kobj, &ds1682_group);
+	return 0;
+}
+
+static const struct i2c_device_id ds1682_id[] = {
+	{ "ds1682", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ds1682_id);
+
+static const struct of_device_id ds1682_of_match[] = {
+	{ .compatible = "dallas,ds1682", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, ds1682_of_match);
+
+static struct i2c_driver ds1682_driver = {
+	.driver = {
+		.name = "ds1682",
+		.of_match_table = ds1682_of_match,
+	},
+	.probe = ds1682_probe,
+	.remove = ds1682_remove,
+	.id_table = ds1682_id,
+};
+
+module_i2c_driver(ds1682_driver);
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("DS1682 Elapsed Time Indicator driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/dummy-irq.c b/drivers/misc/dummy-irq.c
new file mode 100644
index 000000000..76a1015d5
--- /dev/null
+++ b/drivers/misc/dummy-irq.c
@@ -0,0 +1,64 @@
+/*
+ * Dummy IRQ handler driver.
+ *
+ * This module only registers itself as a handler that is specified to it
+ * by the 'irq' parameter.
+ *
+ * The sole purpose of this module is to help with debugging of systems on
+ * which spurious IRQs would happen on disabled IRQ vector.
+ *
+ * Copyright (C) 2013 Jiri Kosina
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+static int irq = -1;
+
+static irqreturn_t dummy_interrupt(int irq, void *dev_id)
+{
+	static int count = 0;
+
+	if (count == 0) {
+		printk(KERN_INFO "dummy-irq: interrupt occurred on IRQ %d\n",
+				irq);
+		count++;
+	}
+
+	return IRQ_NONE;
+}
+
+static int __init dummy_irq_init(void)
+{
+	if (irq < 0) {
+		printk(KERN_ERR "dummy-irq: no IRQ given.  Use irq=N\n");
+		return -EIO;
+	}
+	if (request_irq(irq, &dummy_interrupt, IRQF_SHARED, "dummy_irq", &irq)) {
+		printk(KERN_ERR "dummy-irq: cannot register IRQ %d\n", irq);
+		return -EIO;
+	}
+	printk(KERN_INFO "dummy-irq: registered for IRQ %d\n", irq);
+	return 0;
+}
+
+static void __exit dummy_irq_exit(void)
+{
+	printk(KERN_INFO "dummy-irq unloaded\n");
+	free_irq(irq, &irq);
+}
+
+module_init(dummy_irq_init);
+module_exit(dummy_irq_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jiri Kosina");
+module_param_hw(irq, uint, irq, 0444);
+MODULE_PARM_DESC(irq, "The IRQ to register for");
+MODULE_DESCRIPTION("Dummy IRQ handler driver");
diff --git a/drivers/misc/echo/Kconfig b/drivers/misc/echo/Kconfig
new file mode 100644
index 000000000..f1d41ea9c
--- /dev/null
+++ b/drivers/misc/echo/Kconfig
@@ -0,0 +1,9 @@
+config ECHO
+	tristate "Line Echo Canceller support"
+	default n
+	---help---
+	  This driver provides line echo cancelling support for mISDN and
+	  Zaptel drivers.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called echo.
diff --git a/drivers/misc/echo/Makefile b/drivers/misc/echo/Makefile
new file mode 100644
index 000000000..7d4caac12
--- /dev/null
+++ b/drivers/misc/echo/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_ECHO) += echo.o
diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c
new file mode 100644
index 000000000..3ebe5d75a
--- /dev/null
+++ b/drivers/misc/echo/echo.c
@@ -0,0 +1,601 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * echo.c - A line echo canceller.  This code is being developed
+ *          against and partially complies with G168.
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *         and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe
+ *
+ * Based on a bit from here, a bit from there, eye of toad, ear of
+ * bat, 15 years of failed attempts by David and a few fried brain
+ * cells.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*! \file */
+
+/* Implementation Notes
+   David Rowe
+   April 2007
+
+   This code started life as Steve's NLMS algorithm with a tap
+   rotation algorithm to handle divergence during double talk.  I
+   added a Geigel Double Talk Detector (DTD) [2] and performed some
+   G168 tests.  However I had trouble meeting the G168 requirements,
+   especially for double talk - there were always cases where my DTD
+   failed, for example where near end speech was under the 6dB
+   threshold required for declaring double talk.
+
+   So I tried a two path algorithm [1], which has so far given better
+   results.  The original tap rotation/Geigel algorithm is available
+   in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit.
+   It's probably possible to make it work if some one wants to put some
+   serious work into it.
+
+   At present no special treatment is provided for tones, which
+   generally cause NLMS algorithms to diverge.  Initial runs of a
+   subset of the G168 tests for tones (e.g ./echo_test 6) show the
+   current algorithm is passing OK, which is kind of surprising.  The
+   full set of tests needs to be performed to confirm this result.
+
+   One other interesting change is that I have managed to get the NLMS
+   code to work with 16 bit coefficients, rather than the original 32
+   bit coefficents.  This reduces the MIPs and storage required.
+   I evaulated the 16 bit port using g168_tests.sh and listening tests
+   on 4 real-world samples.
+
+   I also attempted the implementation of a block based NLMS update
+   [2] but although this passes g168_tests.sh it didn't converge well
+   on the real-world samples.  I have no idea why, perhaps a scaling
+   problem.  The block based code is also available in SVN
+   http://svn.rowetel.com/software/oslec/tags/before_16bit.  If this
+   code can be debugged, it will lead to further reduction in MIPS, as
+   the block update code maps nicely onto DSP instruction sets (it's a
+   dot product) compared to the current sample-by-sample update.
+
+   Steve also has some nice notes on echo cancellers in echo.h
+
+   References:
+
+   [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
+       Path Models", IEEE Transactions on communications, COM-25,
+       No. 6, June
+       1977.
+       http://www.rowetel.com/images/echo/dual_path_paper.pdf
+
+   [2] The classic, very useful paper that tells you how to
+       actually build a real world echo canceller:
+	 Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice
+	 Echo Canceller with a TMS320020,
+	 http://www.rowetel.com/images/echo/spra129.pdf
+
+   [3] I have written a series of blog posts on this work, here is
+       Part 1: http://www.rowetel.com/blog/?p=18
+
+   [4] The source code http://svn.rowetel.com/software/oslec/
+
+   [5] A nice reference on LMS filters:
+	 http://en.wikipedia.org/wiki/Least_mean_squares_filter
+
+   Credits:
+
+   Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan
+   Muthukrishnan for their suggestions and email discussions.  Thanks
+   also to those people who collected echo samples for me such as
+   Mark, Pawel, and Pavel.
+*/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "echo.h"
+
+#define MIN_TX_POWER_FOR_ADAPTION	64
+#define MIN_RX_POWER_FOR_ADAPTION	64
+#define DTD_HANGOVER			600	/* 600 samples, or 75ms     */
+#define DC_LOG2BETA			3	/* log2() of DC filter Beta */
+
+/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
+
+static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
+{
+	int i;
+
+	int offset1;
+	int offset2;
+	int factor;
+	int exp;
+
+	if (shift > 0)
+		factor = clean << shift;
+	else
+		factor = clean >> -shift;
+
+	/* Update the FIR taps */
+
+	offset2 = ec->curr_pos;
+	offset1 = ec->taps - offset2;
+
+	for (i = ec->taps - 1; i >= offset1; i--) {
+		exp = (ec->fir_state_bg.history[i - offset1] * factor);
+		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+	}
+	for (; i >= 0; i--) {
+		exp = (ec->fir_state_bg.history[i + offset2] * factor);
+		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+	}
+}
+
+static inline int top_bit(unsigned int bits)
+{
+	if (bits == 0)
+		return -1;
+	else
+		return (int)fls((int32_t) bits) - 1;
+}
+
+struct oslec_state *oslec_create(int len, int adaption_mode)
+{
+	struct oslec_state *ec;
+	int i;
+	const int16_t *history;
+
+	ec = kzalloc(sizeof(*ec), GFP_KERNEL);
+	if (!ec)
+		return NULL;
+
+	ec->taps = len;
+	ec->log2taps = top_bit(len);
+	ec->curr_pos = ec->taps - 1;
+
+	ec->fir_taps16[0] =
+	    kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+	if (!ec->fir_taps16[0])
+		goto error_oom_0;
+
+	ec->fir_taps16[1] =
+	    kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+	if (!ec->fir_taps16[1])
+		goto error_oom_1;
+
+	history = fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
+	if (!history)
+		goto error_state;
+	history = fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps);
+	if (!history)
+		goto error_state_bg;
+
+	for (i = 0; i < 5; i++)
+		ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
+
+	ec->cng_level = 1000;
+	oslec_adaption_mode(ec, adaption_mode);
+
+	ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+	if (!ec->snapshot)
+		goto error_snap;
+
+	ec->cond_met = 0;
+	ec->pstates = 0;
+	ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0;
+	ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0;
+	ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
+	ec->lbgn = ec->lbgn_acc = 0;
+	ec->lbgn_upper = 200;
+	ec->lbgn_upper_acc = ec->lbgn_upper << 13;
+
+	return ec;
+
+error_snap:
+	fir16_free(&ec->fir_state_bg);
+error_state_bg:
+	fir16_free(&ec->fir_state);
+error_state:
+	kfree(ec->fir_taps16[1]);
+error_oom_1:
+	kfree(ec->fir_taps16[0]);
+error_oom_0:
+	kfree(ec);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(oslec_create);
+
+void oslec_free(struct oslec_state *ec)
+{
+	int i;
+
+	fir16_free(&ec->fir_state);
+	fir16_free(&ec->fir_state_bg);
+	for (i = 0; i < 2; i++)
+		kfree(ec->fir_taps16[i]);
+	kfree(ec->snapshot);
+	kfree(ec);
+}
+EXPORT_SYMBOL_GPL(oslec_free);
+
+void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode)
+{
+	ec->adaption_mode = adaption_mode;
+}
+EXPORT_SYMBOL_GPL(oslec_adaption_mode);
+
+void oslec_flush(struct oslec_state *ec)
+{
+	int i;
+
+	ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0;
+	ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0;
+	ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
+
+	ec->lbgn = ec->lbgn_acc = 0;
+	ec->lbgn_upper = 200;
+	ec->lbgn_upper_acc = ec->lbgn_upper << 13;
+
+	ec->nonupdate_dwell = 0;
+
+	fir16_flush(&ec->fir_state);
+	fir16_flush(&ec->fir_state_bg);
+	ec->fir_state.curr_pos = ec->taps - 1;
+	ec->fir_state_bg.curr_pos = ec->taps - 1;
+	for (i = 0; i < 2; i++)
+		memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
+
+	ec->curr_pos = ec->taps - 1;
+	ec->pstates = 0;
+}
+EXPORT_SYMBOL_GPL(oslec_flush);
+
+void oslec_snapshot(struct oslec_state *ec)
+{
+	memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t));
+}
+EXPORT_SYMBOL_GPL(oslec_snapshot);
+
+/* Dual Path Echo Canceller */
+
+int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
+{
+	int32_t echo_value;
+	int clean_bg;
+	int tmp;
+	int tmp1;
+
+	/*
+	 * Input scaling was found be required to prevent problems when tx
+	 * starts clipping.  Another possible way to handle this would be the
+	 * filter coefficent scaling.
+	 */
+
+	ec->tx = tx;
+	ec->rx = rx;
+	tx >>= 1;
+	rx >>= 1;
+
+	/*
+	 * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision
+	 * required otherwise values do not track down to 0. Zero at DC, Pole
+	 * at (1-Beta) on real axis.  Some chip sets (like Si labs) don't
+	 * need this, but something like a $10 X100P card does.  Any DC really
+	 * slows down convergence.
+	 *
+	 * Note: removes some low frequency from the signal, this reduces the
+	 * speech quality when listening to samples through headphones but may
+	 * not be obvious through a telephone handset.
+	 *
+	 * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta
+	 * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
+	 */
+
+	if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
+		tmp = rx << 15;
+
+		/*
+		 * Make sure the gain of the HPF is 1.0. This can still
+		 * saturate a little under impulse conditions, and it might
+		 * roll to 32768 and need clipping on sustained peak level
+		 * signals. However, the scale of such clipping is small, and
+		 * the error due to any saturation should not markedly affect
+		 * the downstream processing.
+		 */
+		tmp -= (tmp >> 4);
+
+		ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
+
+		/*
+		 * hard limit filter to prevent clipping.  Note that at this
+		 * stage rx should be limited to +/- 16383 due to right shift
+		 * above
+		 */
+		tmp1 = ec->rx_1 >> 15;
+		if (tmp1 > 16383)
+			tmp1 = 16383;
+		if (tmp1 < -16383)
+			tmp1 = -16383;
+		rx = tmp1;
+		ec->rx_2 = tmp;
+	}
+
+	/* Block average of power in the filter states.  Used for
+	   adaption power calculation. */
+
+	{
+		int new, old;
+
+		/* efficient "out with the old and in with the new" algorithm so
+		   we don't have to recalculate over the whole block of
+		   samples. */
+		new = (int)tx * (int)tx;
+		old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
+		    (int)ec->fir_state.history[ec->fir_state.curr_pos];
+		ec->pstates +=
+		    ((new - old) + (1 << (ec->log2taps - 1))) >> ec->log2taps;
+		if (ec->pstates < 0)
+			ec->pstates = 0;
+	}
+
+	/* Calculate short term average levels using simple single pole IIRs */
+
+	ec->ltxacc += abs(tx) - ec->ltx;
+	ec->ltx = (ec->ltxacc + (1 << 4)) >> 5;
+	ec->lrxacc += abs(rx) - ec->lrx;
+	ec->lrx = (ec->lrxacc + (1 << 4)) >> 5;
+
+	/* Foreground filter */
+
+	ec->fir_state.coeffs = ec->fir_taps16[0];
+	echo_value = fir16(&ec->fir_state, tx);
+	ec->clean = rx - echo_value;
+	ec->lcleanacc += abs(ec->clean) - ec->lclean;
+	ec->lclean = (ec->lcleanacc + (1 << 4)) >> 5;
+
+	/* Background filter */
+
+	echo_value = fir16(&ec->fir_state_bg, tx);
+	clean_bg = rx - echo_value;
+	ec->lclean_bgacc += abs(clean_bg) - ec->lclean_bg;
+	ec->lclean_bg = (ec->lclean_bgacc + (1 << 4)) >> 5;
+
+	/* Background Filter adaption */
+
+	/* Almost always adap bg filter, just simple DT and energy
+	   detection to minimise adaption in cases of strong double talk.
+	   However this is not critical for the dual path algorithm.
+	 */
+	ec->factor = 0;
+	ec->shift = 0;
+	if (!ec->nonupdate_dwell) {
+		int p, logp, shift;
+
+		/* Determine:
+
+		   f = Beta * clean_bg_rx/P ------ (1)
+
+		   where P is the total power in the filter states.
+
+		   The Boffins have shown that if we obey (1) we converge
+		   quickly and avoid instability.
+
+		   The correct factor f must be in Q30, as this is the fixed
+		   point format required by the lms_adapt_bg() function,
+		   therefore the scaled version of (1) is:
+
+		   (2^30) * f  = (2^30) * Beta * clean_bg_rx/P
+		   factor      = (2^30) * Beta * clean_bg_rx/P     ----- (2)
+
+		   We have chosen Beta = 0.25 by experiment, so:
+
+		   factor      = (2^30) * (2^-2) * clean_bg_rx/P
+
+		   (30 - 2 - log2(P))
+		   factor      = clean_bg_rx 2                     ----- (3)
+
+		   To avoid a divide we approximate log2(P) as top_bit(P),
+		   which returns the position of the highest non-zero bit in
+		   P.  This approximation introduces an error as large as a
+		   factor of 2, but the algorithm seems to handle it OK.
+
+		   Come to think of it a divide may not be a big deal on a
+		   modern DSP, so its probably worth checking out the cycles
+		   for a divide versus a top_bit() implementation.
+		 */
+
+		p = MIN_TX_POWER_FOR_ADAPTION + ec->pstates;
+		logp = top_bit(p) + ec->log2taps;
+		shift = 30 - 2 - logp;
+		ec->shift = shift;
+
+		lms_adapt_bg(ec, clean_bg, shift);
+	}
+
+	/* very simple DTD to make sure we dont try and adapt with strong
+	   near end speech */
+
+	ec->adapt = 0;
+	if ((ec->lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->lrx > ec->ltx))
+		ec->nonupdate_dwell = DTD_HANGOVER;
+	if (ec->nonupdate_dwell)
+		ec->nonupdate_dwell--;
+
+	/* Transfer logic */
+
+	/* These conditions are from the dual path paper [1], I messed with
+	   them a bit to improve performance. */
+
+	if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
+	    (ec->nonupdate_dwell == 0) &&
+	    /* (ec->Lclean_bg < 0.875*ec->Lclean) */
+	    (8 * ec->lclean_bg < 7 * ec->lclean) &&
+	    /* (ec->Lclean_bg < 0.125*ec->Ltx) */
+	    (8 * ec->lclean_bg < ec->ltx)) {
+		if (ec->cond_met == 6) {
+			/*
+			 * BG filter has had better results for 6 consecutive
+			 * samples
+			 */
+			ec->adapt = 1;
+			memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
+			       ec->taps * sizeof(int16_t));
+		} else
+			ec->cond_met++;
+	} else
+		ec->cond_met = 0;
+
+	/* Non-Linear Processing */
+
+	ec->clean_nlp = ec->clean;
+	if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
+		/*
+		 * Non-linear processor - a fancy way to say "zap small
+		 * signals, to avoid residual echo due to (uLaw/ALaw)
+		 * non-linearity in the channel.".
+		 */
+
+		if ((16 * ec->lclean < ec->ltx)) {
+			/*
+			 * Our e/c has improved echo by at least 24 dB (each
+			 * factor of 2 is 6dB, so 2*2*2*2=16 is the same as
+			 * 6+6+6+6=24dB)
+			 */
+			if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
+				ec->cng_level = ec->lbgn;
+
+				/*
+				 * Very elementary comfort noise generation.
+				 * Just random numbers rolled off very vaguely
+				 * Hoth-like.  DR: This noise doesn't sound
+				 * quite right to me - I suspect there are some
+				 * overflow issues in the filtering as it's too
+				 * "crackly".
+				 * TODO: debug this, maybe just play noise at
+				 * high level or look at spectrum.
+				 */
+
+				ec->cng_rndnum =
+				    1664525U * ec->cng_rndnum + 1013904223U;
+				ec->cng_filter =
+				    ((ec->cng_rndnum & 0xFFFF) - 32768 +
+				     5 * ec->cng_filter) >> 3;
+				ec->clean_nlp =
+				    (ec->cng_filter * ec->cng_level * 8) >> 14;
+
+			} else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
+				/* This sounds much better than CNG */
+				if (ec->clean_nlp > ec->lbgn)
+					ec->clean_nlp = ec->lbgn;
+				if (ec->clean_nlp < -ec->lbgn)
+					ec->clean_nlp = -ec->lbgn;
+			} else {
+				/*
+				 * just mute the residual, doesn't sound very
+				 * good, used mainly in G168 tests
+				 */
+				ec->clean_nlp = 0;
+			}
+		} else {
+			/*
+			 * Background noise estimator.  I tried a few
+			 * algorithms here without much luck.  This very simple
+			 * one seems to work best, we just average the level
+			 * using a slow (1 sec time const) filter if the
+			 * current level is less than a (experimentally
+			 * derived) constant.  This means we dont include high
+			 * level signals like near end speech.  When combined
+			 * with CNG or especially CLIP seems to work OK.
+			 */
+			if (ec->lclean < 40) {
+				ec->lbgn_acc += abs(ec->clean) - ec->lbgn;
+				ec->lbgn = (ec->lbgn_acc + (1 << 11)) >> 12;
+			}
+		}
+	}
+
+	/* Roll around the taps buffer */
+	if (ec->curr_pos <= 0)
+		ec->curr_pos = ec->taps;
+	ec->curr_pos--;
+
+	if (ec->adaption_mode & ECHO_CAN_DISABLE)
+		ec->clean_nlp = rx;
+
+	/* Output scaled back up again to match input scaling */
+
+	return (int16_t) ec->clean_nlp << 1;
+}
+EXPORT_SYMBOL_GPL(oslec_update);
+
+/* This function is separated from the echo canceller is it is usually called
+   as part of the tx process.  See rx HP (DC blocking) filter above, it's
+   the same design.
+
+   Some soft phones send speech signals with a lot of low frequency
+   energy, e.g. down to 20Hz.  This can make the hybrid non-linear
+   which causes the echo canceller to fall over.  This filter can help
+   by removing any low frequency before it gets to the tx port of the
+   hybrid.
+
+   It can also help by removing and DC in the tx signal.  DC is bad
+   for LMS algorithms.
+
+   This is one of the classic DC removal filters, adjusted to provide
+   sufficient bass rolloff to meet the above requirement to protect hybrids
+   from things that upset them. The difference between successive samples
+   produces a lousy HPF, and then a suitably placed pole flattens things out.
+   The final result is a nicely rolled off bass end. The filtering is
+   implemented with extended fractional precision, which noise shapes things,
+   giving very clean DC removal.
+*/
+
+int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx)
+{
+	int tmp;
+	int tmp1;
+
+	if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
+		tmp = tx << 15;
+
+		/*
+		 * Make sure the gain of the HPF is 1.0. The first can still
+		 * saturate a little under impulse conditions, and it might
+		 * roll to 32768 and need clipping on sustained peak level
+		 * signals. However, the scale of such clipping is small, and
+		 * the error due to any saturation should not markedly affect
+		 * the downstream processing.
+		 */
+		tmp -= (tmp >> 4);
+
+		ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
+		tmp1 = ec->tx_1 >> 15;
+		if (tmp1 > 32767)
+			tmp1 = 32767;
+		if (tmp1 < -32767)
+			tmp1 = -32767;
+		tx = tmp1;
+		ec->tx_2 = tmp;
+	}
+
+	return tx;
+}
+EXPORT_SYMBOL_GPL(oslec_hpf_tx);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David Rowe");
+MODULE_DESCRIPTION("Open Source Line Echo Canceller");
+MODULE_VERSION("0.3.0");
diff --git a/drivers/misc/echo/echo.h b/drivers/misc/echo/echo.h
new file mode 100644
index 000000000..9b08c63e6
--- /dev/null
+++ b/drivers/misc/echo/echo.h
@@ -0,0 +1,187 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * echo.c - A line echo canceller.  This code is being developed
+ *          against and partially complies with G168.
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *         and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001 Steve Underwood and 2007 David Rowe
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ECHO_H
+#define __ECHO_H
+
+/*
+Line echo cancellation for voice
+
+What does it do?
+
+This module aims to provide G.168-2002 compliant echo cancellation, to remove
+electrical echoes (e.g. from 2-4 wire hybrids) from voice calls.
+
+How does it work?
+
+The heart of the echo cancellor is FIR filter. This is adapted to match the
+echo impulse response of the telephone line. It must be long enough to
+adequately cover the duration of that impulse response. The signal transmitted
+to the telephone line is passed through the FIR filter. Once the FIR is
+properly adapted, the resulting output is an estimate of the echo signal
+received from the line. This is subtracted from the received signal. The result
+is an estimate of the signal which originated at the far end of the line, free
+from echos of our own transmitted signal.
+
+The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and
+was introduced in 1960. It is the commonest form of filter adaption used in
+things like modem line equalisers and line echo cancellers. There it works very
+well.  However, it only works well for signals of constant amplitude. It works
+very poorly for things like speech echo cancellation, where the signal level
+varies widely.  This is quite easy to fix. If the signal level is normalised -
+similar to applying AGC - LMS can work as well for a signal of varying
+amplitude as it does for a modem signal. This normalised least mean squares
+(NLMS) algorithm is the commonest one used for speech echo cancellation. Many
+other algorithms exist - e.g. RLS (essentially the same as Kalman filtering),
+FAP, etc. Some perform significantly better than NLMS.  However, factors such
+as computational complexity and patents favour the use of NLMS.
+
+A simple refinement to NLMS can improve its performance with speech. NLMS tends
+to adapt best to the strongest parts of a signal. If the signal is white noise,
+the NLMS algorithm works very well. However, speech has more low frequency than
+high frequency content. Pre-whitening (i.e. filtering the signal to flatten its
+spectrum) the echo signal improves the adapt rate for speech, and ensures the
+final residual signal is not heavily biased towards high frequencies. A very
+low complexity filter is adequate for this, so pre-whitening adds little to the
+compute requirements of the echo canceller.
+
+An FIR filter adapted using pre-whitened NLMS performs well, provided certain
+conditions are met:
+
+    - The transmitted signal has poor self-correlation.
+    - There is no signal being generated within the environment being
+      cancelled.
+
+The difficulty is that neither of these can be guaranteed.
+
+If the adaption is performed while transmitting noise (or something fairly
+noise like, such as voice) the adaption works very well. If the adaption is
+performed while transmitting something highly correlative (typically narrow
+band energy such as signalling tones or DTMF), the adaption can go seriously
+wrong. The reason is there is only one solution for the adaption on a near
+random signal - the impulse response of the line. For a repetitive signal,
+there are any number of solutions which converge the adaption, and nothing
+guides the adaption to choose the generalised one. Allowing an untrained
+canceller to converge on this kind of narrowband energy probably a good thing,
+since at least it cancels the tones. Allowing a well converged canceller to
+continue converging on such energy is just a way to ruin its generalised
+adaption. A narrowband detector is needed, so adapation can be suspended at
+appropriate times.
+
+The adaption process is based on trying to eliminate the received signal. When
+there is any signal from within the environment being cancelled it may upset
+the adaption process. Similarly, if the signal we are transmitting is small,
+noise may dominate and disturb the adaption process. If we can ensure that the
+adaption is only performed when we are transmitting a significant signal level,
+and the environment is not, things will be OK. Clearly, it is easy to tell when
+we are sending a significant signal. Telling, if the environment is generating
+a significant signal, and doing it with sufficient speed that the adaption will
+not have diverged too much more we stop it, is a little harder.
+
+The key problem in detecting when the environment is sourcing significant
+energy is that we must do this very quickly. Given a reasonably long sample of
+the received signal, there are a number of strategies which may be used to
+assess whether that signal contains a strong far end component. However, by the
+time that assessment is complete the far end signal will have already caused
+major mis-convergence in the adaption process. An assessment algorithm is
+needed which produces a fairly accurate result from a very short burst of far
+end energy.
+
+How do I use it?
+
+The echo cancellor processes both the transmit and receive streams sample by
+sample. The processing function is not declared inline. Unfortunately,
+cancellation requires many operations per sample, so the call overhead is only
+a minor burden.
+*/
+
+#include "fir.h"
+#include "oslec.h"
+
+/*
+    G.168 echo canceller descriptor. This defines the working state for a line
+    echo canceller.
+*/
+struct oslec_state {
+	int16_t tx;
+	int16_t rx;
+	int16_t clean;
+	int16_t clean_nlp;
+
+	int nonupdate_dwell;
+	int curr_pos;
+	int taps;
+	int log2taps;
+	int adaption_mode;
+
+	int cond_met;
+	int32_t pstates;
+	int16_t adapt;
+	int32_t factor;
+	int16_t shift;
+
+	/* Average levels and averaging filter states */
+	int ltxacc;
+	int lrxacc;
+	int lcleanacc;
+	int lclean_bgacc;
+	int ltx;
+	int lrx;
+	int lclean;
+	int lclean_bg;
+	int lbgn;
+	int lbgn_acc;
+	int lbgn_upper;
+	int lbgn_upper_acc;
+
+	/* foreground and background filter states */
+	struct fir16_state_t fir_state;
+	struct fir16_state_t fir_state_bg;
+	int16_t *fir_taps16[2];
+
+	/* DC blocking filter states */
+	int tx_1;
+	int tx_2;
+	int rx_1;
+	int rx_2;
+
+	/* optional High Pass Filter states */
+	int32_t xvtx[5];
+	int32_t yvtx[5];
+	int32_t xvrx[5];
+	int32_t yvrx[5];
+
+	/* Parameters for the optional Hoth noise generator */
+	int cng_level;
+	int cng_rndnum;
+	int cng_filter;
+
+	/* snapshot sample of coeffs used for development */
+	int16_t *snapshot;
+};
+
+#endif /* __ECHO_H */
diff --git a/drivers/misc/echo/fir.h b/drivers/misc/echo/fir.h
new file mode 100644
index 000000000..4e0f365f0
--- /dev/null
+++ b/drivers/misc/echo/fir.h
@@ -0,0 +1,166 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * fir.h - General telephony FIR routines
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *
+ * Copyright (C) 2002 Steve Underwood
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#if !defined(_FIR_H_)
+#define _FIR_H_
+
+/*
+   Ideas for improvement:
+
+   1/ Rewrite filter for dual MAC inner loop.  The issue here is handling
+   history sample offsets that are 16 bit aligned - the dual MAC needs
+   32 bit aligmnent.  There are some good examples in libbfdsp.
+
+   2/ Use the hardware circular buffer facility tohalve memory usage.
+
+   3/ Consider using internal memory.
+
+   Using less memory might also improve speed as cache misses will be
+   reduced. A drop in MIPs and memory approaching 50% should be
+   possible.
+
+   The foreground and background filters currenlty use a total of
+   about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo
+   can.
+*/
+
+/*
+ * 16 bit integer FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using 16 bit integer coefficients.
+ */
+struct fir16_state_t {
+	int taps;
+	int curr_pos;
+	const int16_t *coeffs;
+	int16_t *history;
+};
+
+/*
+ * 32 bit integer FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using 32 bit integer coefficients, and filtering
+ * 16 bit integer data.
+ */
+struct fir32_state_t {
+	int taps;
+	int curr_pos;
+	const int32_t *coeffs;
+	int16_t *history;
+};
+
+/*
+ * Floating point FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using floating point coefficients and data.
+ */
+struct fir_float_state_t {
+	int taps;
+	int curr_pos;
+	const float *coeffs;
+	float *history;
+};
+
+static inline const int16_t *fir16_create(struct fir16_state_t *fir,
+					      const int16_t *coeffs, int taps)
+{
+	fir->taps = taps;
+	fir->curr_pos = taps - 1;
+	fir->coeffs = coeffs;
+	fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
+	return fir->history;
+}
+
+static inline void fir16_flush(struct fir16_state_t *fir)
+{
+	memset(fir->history, 0, fir->taps * sizeof(int16_t));
+}
+
+static inline void fir16_free(struct fir16_state_t *fir)
+{
+	kfree(fir->history);
+}
+
+static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample)
+{
+	int32_t y;
+	int i;
+	int offset1;
+	int offset2;
+
+	fir->history[fir->curr_pos] = sample;
+
+	offset2 = fir->curr_pos;
+	offset1 = fir->taps - offset2;
+	y = 0;
+	for (i = fir->taps - 1; i >= offset1; i--)
+		y += fir->coeffs[i] * fir->history[i - offset1];
+	for (; i >= 0; i--)
+		y += fir->coeffs[i] * fir->history[i + offset2];
+	if (fir->curr_pos <= 0)
+		fir->curr_pos = fir->taps;
+	fir->curr_pos--;
+	return (int16_t) (y >> 15);
+}
+
+static inline const int16_t *fir32_create(struct fir32_state_t *fir,
+					      const int32_t *coeffs, int taps)
+{
+	fir->taps = taps;
+	fir->curr_pos = taps - 1;
+	fir->coeffs = coeffs;
+	fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
+	return fir->history;
+}
+
+static inline void fir32_flush(struct fir32_state_t *fir)
+{
+	memset(fir->history, 0, fir->taps * sizeof(int16_t));
+}
+
+static inline void fir32_free(struct fir32_state_t *fir)
+{
+	kfree(fir->history);
+}
+
+static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample)
+{
+	int i;
+	int32_t y;
+	int offset1;
+	int offset2;
+
+	fir->history[fir->curr_pos] = sample;
+	offset2 = fir->curr_pos;
+	offset1 = fir->taps - offset2;
+	y = 0;
+	for (i = fir->taps - 1; i >= offset1; i--)
+		y += fir->coeffs[i] * fir->history[i - offset1];
+	for (; i >= 0; i--)
+		y += fir->coeffs[i] * fir->history[i + offset2];
+	if (fir->curr_pos <= 0)
+		fir->curr_pos = fir->taps;
+	fir->curr_pos--;
+	return (int16_t) (y >> 15);
+}
+
+#endif
diff --git a/drivers/misc/echo/oslec.h b/drivers/misc/echo/oslec.h
new file mode 100644
index 000000000..f4175360c
--- /dev/null
+++ b/drivers/misc/echo/oslec.h
@@ -0,0 +1,94 @@
+/*
+ *  OSLEC - A line echo canceller.  This code is being developed
+ *          against and partially complies with G168. Using code from SpanDSP
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *         and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001 Steve Underwood and 2007-2008 David Rowe
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __OSLEC_H
+#define __OSLEC_H
+
+/* Mask bits for the adaption mode */
+#define ECHO_CAN_USE_ADAPTION	0x01
+#define ECHO_CAN_USE_NLP	0x02
+#define ECHO_CAN_USE_CNG	0x04
+#define ECHO_CAN_USE_CLIP	0x08
+#define ECHO_CAN_USE_TX_HPF	0x10
+#define ECHO_CAN_USE_RX_HPF	0x20
+#define ECHO_CAN_DISABLE	0x40
+
+/**
+ * oslec_state: G.168 echo canceller descriptor.
+ *
+ * This defines the working state for a line echo canceller.
+ */
+struct oslec_state;
+
+/**
+ * oslec_create - Create a voice echo canceller context.
+ * @len: The length of the canceller, in samples.
+ * @return: The new canceller context, or NULL if the canceller could not be
+ * created.
+ */
+struct oslec_state *oslec_create(int len, int adaption_mode);
+
+/**
+ * oslec_free - Free a voice echo canceller context.
+ * @ec: The echo canceller context.
+ */
+void oslec_free(struct oslec_state *ec);
+
+/**
+ * oslec_flush - Flush (reinitialise) a voice echo canceller context.
+ * @ec: The echo canceller context.
+ */
+void oslec_flush(struct oslec_state *ec);
+
+/**
+ * oslec_adaption_mode - set the adaption mode of a voice echo canceller context.
+ * @ec The echo canceller context.
+ * @adaption_mode: The mode.
+ */
+void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode);
+
+void oslec_snapshot(struct oslec_state *ec);
+
+/**
+ * oslec_update: Process a sample through a voice echo canceller.
+ * @ec: The echo canceller context.
+ * @tx: The transmitted audio sample.
+ * @rx: The received audio sample.
+ *
+ * The return value is the clean (echo cancelled) received sample.
+ */
+int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx);
+
+/**
+ * oslec_hpf_tx: Process to high pass filter the tx signal.
+ * @ec: The echo canceller context.
+ * @tx: The transmitted auio sample.
+ *
+ * The return value is the HP filtered transmit sample, send this to your D/A.
+ */
+int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx);
+
+#endif /* __OSLEC_H */
diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
new file mode 100644
index 000000000..d382b13c2
--- /dev/null
+++ b/drivers/misc/eeprom/Kconfig
@@ -0,0 +1,114 @@
+menu "EEPROM support"
+
+config EEPROM_AT24
+	tristate "I2C EEPROMs / RAMs / ROMs from most vendors"
+	depends on I2C && SYSFS
+	select NVMEM
+	select REGMAP_I2C
+	help
+	  Enable this driver to get read/write support to most I2C EEPROMs
+	  and compatible devices like FRAMs, SRAMs, ROMs etc. After you
+	  configure the driver to know about each chip on your target
+	  board.  Use these generic chip names, instead of vendor-specific
+	  ones like at24c64, 24lc02 or fm24c04:
+
+	     24c00, 24c01, 24c02, spd (readonly 24c02), 24c04, 24c08,
+	     24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024, 24c2048
+
+	  Unless you like data loss puzzles, always be sure that any chip
+	  you configure as a 24c32 (32 kbit) or larger is NOT really a
+	  24c16 (16 kbit) or smaller, and vice versa. Marking the chip
+	  as read-only won't help recover from this. Also, if your chip
+	  has any software write-protect mechanism you may want to review the
+	  code to make sure this driver won't turn it on by accident.
+
+	  If you use this with an SMBus adapter instead of an I2C adapter,
+	  full functionality is not available.  Only smaller devices are
+	  supported (24c16 and below, max 4 kByte).
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called at24.
+
+config EEPROM_AT25
+	tristate "SPI EEPROMs from most vendors"
+	depends on SPI && SYSFS
+	select NVMEM
+	help
+	  Enable this driver to get read/write support to most SPI EEPROMs,
+	  after you configure the board init code to know about each eeprom
+	  on your target board.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called at25.
+
+config EEPROM_LEGACY
+	tristate "Old I2C EEPROM reader"
+	depends on I2C && SYSFS
+	help
+	  If you say yes here you get read-only access to the EEPROM data
+	  available on modern memory DIMMs and Sony Vaio laptops via I2C. Such
+	  EEPROMs could theoretically be available on other devices as well.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called eeprom.
+
+config EEPROM_MAX6875
+	tristate "Maxim MAX6874/5 power supply supervisor"
+	depends on I2C
+	help
+	  If you say yes here you get read-only support for the user EEPROM of
+	  the Maxim MAX6874/5 EEPROM-programmable, quad power-supply
+	  sequencer/supervisor.
+
+	  All other features of this chip should be accessed via i2c-dev.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called max6875.
+
+
+config EEPROM_93CX6
+	tristate "EEPROM 93CX6 support"
+	help
+	  This is a driver for the EEPROM chipsets 93c46 and 93c66.
+	  The driver supports both read as well as write commands.
+
+	  If unsure, say N.
+
+config EEPROM_93XX46
+	tristate "Microwire EEPROM 93XX46 support"
+	depends on SPI && SYSFS
+	select REGMAP
+	select NVMEM
+	help
+	  Driver for the microwire EEPROM chipsets 93xx46x. The driver
+	  supports both read and write commands and also the command to
+	  erase the whole EEPROM.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called eeprom_93xx46.
+
+	  If unsure, say N.
+
+config EEPROM_DIGSY_MTC_CFG
+	bool "DigsyMTC display configuration EEPROMs device"
+	depends on GPIO_MPC5200 && SPI_GPIO
+	help
+	  This option enables access to display configuration EEPROMs
+	  on digsy_mtc board. You have to additionally select Microwire
+	  EEPROM 93XX46 driver. sysfs entries will be created for that
+	  EEPROM allowing to read/write the configuration data or to
+	  erase the whole EEPROM.
+
+	  If unsure, say N.
+
+config EEPROM_IDT_89HPESX
+	tristate "IDT 89HPESx PCIe-swtiches EEPROM / CSR support"
+	depends on I2C && SYSFS
+	help
+	  Enable this driver to get read/write access to EEPROM / CSRs
+	  over IDT PCIe-swtich i2c-slave interface.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called idt_89hpesx.
+
+endmenu
diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile
new file mode 100644
index 000000000..2aab60ef3
--- /dev/null
+++ b/drivers/misc/eeprom/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_EEPROM_AT24)	+= at24.o
+obj-$(CONFIG_EEPROM_AT25)	+= at25.o
+obj-$(CONFIG_EEPROM_LEGACY)	+= eeprom.o
+obj-$(CONFIG_EEPROM_MAX6875)	+= max6875.o
+obj-$(CONFIG_EEPROM_93CX6)	+= eeprom_93cx6.o
+obj-$(CONFIG_EEPROM_93XX46)	+= eeprom_93xx46.o
+obj-$(CONFIG_EEPROM_DIGSY_MTC_CFG) += digsy_mtc_eeprom.o
+obj-$(CONFIG_EEPROM_IDT_89HPESX) += idt_89hpesx.o
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
new file mode 100644
index 000000000..dc3537651
--- /dev/null
+++ b/drivers/misc/eeprom/at24.c
@@ -0,0 +1,805 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * at24.c - handle most I2C EEPROMs
+ *
+ * Copyright (C) 2005-2007 David Brownell
+ * Copyright (C) 2008 Wolfram Sang, Pengutronix
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/mod_devicetable.h>
+#include <linux/log2.h>
+#include <linux/bitops.h>
+#include <linux/jiffies.h>
+#include <linux/property.h>
+#include <linux/acpi.h>
+#include <linux/i2c.h>
+#include <linux/nvmem-provider.h>
+#include <linux/regmap.h>
+#include <linux/platform_data/at24.h>
+#include <linux/pm_runtime.h>
+#include <linux/gpio/consumer.h>
+
+/*
+ * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
+ * Differences between different vendor product lines (like Atmel AT24C or
+ * MicroChip 24LC, etc) won't much matter for typical read/write access.
+ * There are also I2C RAM chips, likewise interchangeable. One example
+ * would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes).
+ *
+ * However, misconfiguration can lose data. "Set 16-bit memory address"
+ * to a part with 8-bit addressing will overwrite data. Writing with too
+ * big a page size also loses data. And it's not safe to assume that the
+ * conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC
+ * uses 0x51, for just one example.
+ *
+ * Accordingly, explicit board-specific configuration data should be used
+ * in almost all cases. (One partial exception is an SMBus used to access
+ * "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.)
+ *
+ * So this driver uses "new style" I2C driver binding, expecting to be
+ * told what devices exist. That may be in arch/X/mach-Y/board-Z.c or
+ * similar kernel-resident tables; or, configuration data coming from
+ * a bootloader.
+ *
+ * Other than binding model, current differences from "eeprom" driver are
+ * that this one handles write access and isn't restricted to 24c02 devices.
+ * It also handles larger devices (32 kbit and up) with two-byte addresses,
+ * which won't work on pure SMBus systems.
+ */
+
+struct at24_client {
+	struct i2c_client *client;
+	struct regmap *regmap;
+};
+
+struct at24_data {
+	/*
+	 * Lock protects against activities from other Linux tasks,
+	 * but not from changes by other I2C masters.
+	 */
+	struct mutex lock;
+
+	unsigned int write_max;
+	unsigned int num_addresses;
+	unsigned int offset_adj;
+
+	u32 byte_len;
+	u16 page_size;
+	u8 flags;
+
+	struct nvmem_device *nvmem;
+
+	struct gpio_desc *wp_gpio;
+
+	/*
+	 * Some chips tie up multiple I2C addresses; dummy devices reserve
+	 * them for us, and we'll use them with SMBus calls.
+	 */
+	struct at24_client client[];
+};
+
+/*
+ * This parameter is to help this driver avoid blocking other drivers out
+ * of I2C for potentially troublesome amounts of time. With a 100 kHz I2C
+ * clock, one 256 byte read takes about 1/43 second which is excessive;
+ * but the 1/170 second it takes at 400 kHz may be quite reasonable; and
+ * at 1 MHz (Fm+) a 1/430 second delay could easily be invisible.
+ *
+ * This value is forced to be a power of two so that writes align on pages.
+ */
+static unsigned int at24_io_limit = 128;
+module_param_named(io_limit, at24_io_limit, uint, 0);
+MODULE_PARM_DESC(at24_io_limit, "Maximum bytes per I/O (default 128)");
+
+/*
+ * Specs often allow 5 msec for a page write, sometimes 20 msec;
+ * it's important to recover from write timeouts.
+ */
+static unsigned int at24_write_timeout = 25;
+module_param_named(write_timeout, at24_write_timeout, uint, 0);
+MODULE_PARM_DESC(at24_write_timeout, "Time (in ms) to try writes (default 25)");
+
+struct at24_chip_data {
+	/*
+	 * these fields mirror their equivalents in
+	 * struct at24_platform_data
+	 */
+	u32 byte_len;
+	u8 flags;
+};
+
+#define AT24_CHIP_DATA(_name, _len, _flags)				\
+	static const struct at24_chip_data _name = {			\
+		.byte_len = _len, .flags = _flags,			\
+	}
+
+/* needs 8 addresses as A0-A2 are ignored */
+AT24_CHIP_DATA(at24_data_24c00, 128 / 8, AT24_FLAG_TAKE8ADDR);
+/* old variants can't be handled with this generic entry! */
+AT24_CHIP_DATA(at24_data_24c01, 1024 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs01, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c02, 2048 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs02, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24mac402, 48 / 8,
+	AT24_FLAG_MAC | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24mac602, 64 / 8,
+	AT24_FLAG_MAC | AT24_FLAG_READONLY);
+/* spd is a 24c02 in memory DIMMs */
+AT24_CHIP_DATA(at24_data_spd, 2048 / 8,
+	AT24_FLAG_READONLY | AT24_FLAG_IRUGO);
+AT24_CHIP_DATA(at24_data_24c04, 4096 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs04, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+/* 24rf08 quirk is handled at i2c-core */
+AT24_CHIP_DATA(at24_data_24c08, 8192 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs08, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c16, 16384 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs16, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c32, 32768 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24cs32, 16,
+	AT24_FLAG_ADDR16 | AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c64, 65536 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24cs64, 16,
+	AT24_FLAG_ADDR16 | AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c128, 131072 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24c256, 262144 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24c512, 524288 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24c1024, 1048576 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24c2048, 2097152 / 8, AT24_FLAG_ADDR16);
+/* identical to 24c08 ? */
+AT24_CHIP_DATA(at24_data_INT3499, 8192 / 8, 0);
+
+static const struct i2c_device_id at24_ids[] = {
+	{ "24c00",	(kernel_ulong_t)&at24_data_24c00 },
+	{ "24c01",	(kernel_ulong_t)&at24_data_24c01 },
+	{ "24cs01",	(kernel_ulong_t)&at24_data_24cs01 },
+	{ "24c02",	(kernel_ulong_t)&at24_data_24c02 },
+	{ "24cs02",	(kernel_ulong_t)&at24_data_24cs02 },
+	{ "24mac402",	(kernel_ulong_t)&at24_data_24mac402 },
+	{ "24mac602",	(kernel_ulong_t)&at24_data_24mac602 },
+	{ "spd",	(kernel_ulong_t)&at24_data_spd },
+	{ "24c04",	(kernel_ulong_t)&at24_data_24c04 },
+	{ "24cs04",	(kernel_ulong_t)&at24_data_24cs04 },
+	{ "24c08",	(kernel_ulong_t)&at24_data_24c08 },
+	{ "24cs08",	(kernel_ulong_t)&at24_data_24cs08 },
+	{ "24c16",	(kernel_ulong_t)&at24_data_24c16 },
+	{ "24cs16",	(kernel_ulong_t)&at24_data_24cs16 },
+	{ "24c32",	(kernel_ulong_t)&at24_data_24c32 },
+	{ "24cs32",	(kernel_ulong_t)&at24_data_24cs32 },
+	{ "24c64",	(kernel_ulong_t)&at24_data_24c64 },
+	{ "24cs64",	(kernel_ulong_t)&at24_data_24cs64 },
+	{ "24c128",	(kernel_ulong_t)&at24_data_24c128 },
+	{ "24c256",	(kernel_ulong_t)&at24_data_24c256 },
+	{ "24c512",	(kernel_ulong_t)&at24_data_24c512 },
+	{ "24c1024",	(kernel_ulong_t)&at24_data_24c1024 },
+	{ "24c2048",    (kernel_ulong_t)&at24_data_24c2048 },
+	{ "at24",	0 },
+	{ /* END OF LIST */ }
+};
+MODULE_DEVICE_TABLE(i2c, at24_ids);
+
+static const struct of_device_id at24_of_match[] = {
+	{ .compatible = "atmel,24c00",		.data = &at24_data_24c00 },
+	{ .compatible = "atmel,24c01",		.data = &at24_data_24c01 },
+	{ .compatible = "atmel,24cs01",		.data = &at24_data_24cs01 },
+	{ .compatible = "atmel,24c02",		.data = &at24_data_24c02 },
+	{ .compatible = "atmel,24cs02",		.data = &at24_data_24cs02 },
+	{ .compatible = "atmel,24mac402",	.data = &at24_data_24mac402 },
+	{ .compatible = "atmel,24mac602",	.data = &at24_data_24mac602 },
+	{ .compatible = "atmel,spd",		.data = &at24_data_spd },
+	{ .compatible = "atmel,24c04",		.data = &at24_data_24c04 },
+	{ .compatible = "atmel,24cs04",		.data = &at24_data_24cs04 },
+	{ .compatible = "atmel,24c08",		.data = &at24_data_24c08 },
+	{ .compatible = "atmel,24cs08",		.data = &at24_data_24cs08 },
+	{ .compatible = "atmel,24c16",		.data = &at24_data_24c16 },
+	{ .compatible = "atmel,24cs16",		.data = &at24_data_24cs16 },
+	{ .compatible = "atmel,24c32",		.data = &at24_data_24c32 },
+	{ .compatible = "atmel,24cs32",		.data = &at24_data_24cs32 },
+	{ .compatible = "atmel,24c64",		.data = &at24_data_24c64 },
+	{ .compatible = "atmel,24cs64",		.data = &at24_data_24cs64 },
+	{ .compatible = "atmel,24c128",		.data = &at24_data_24c128 },
+	{ .compatible = "atmel,24c256",		.data = &at24_data_24c256 },
+	{ .compatible = "atmel,24c512",		.data = &at24_data_24c512 },
+	{ .compatible = "atmel,24c1024",	.data = &at24_data_24c1024 },
+	{ .compatible = "atmel,24c2048",	.data = &at24_data_24c2048 },
+	{ /* END OF LIST */ },
+};
+MODULE_DEVICE_TABLE(of, at24_of_match);
+
+static const struct acpi_device_id at24_acpi_ids[] = {
+	{ "INT3499",	(kernel_ulong_t)&at24_data_INT3499 },
+	{ /* END OF LIST */ }
+};
+MODULE_DEVICE_TABLE(acpi, at24_acpi_ids);
+
+/*
+ * This routine supports chips which consume multiple I2C addresses. It
+ * computes the addressing information to be used for a given r/w request.
+ * Assumes that sanity checks for offset happened at sysfs-layer.
+ *
+ * Slave address and byte offset derive from the offset. Always
+ * set the byte address; on a multi-master board, another master
+ * may have changed the chip's "current" address pointer.
+ */
+static struct at24_client *at24_translate_offset(struct at24_data *at24,
+						 unsigned int *offset)
+{
+	unsigned int i;
+
+	if (at24->flags & AT24_FLAG_ADDR16) {
+		i = *offset >> 16;
+		*offset &= 0xffff;
+	} else {
+		i = *offset >> 8;
+		*offset &= 0xff;
+	}
+
+	return &at24->client[i];
+}
+
+static struct device *at24_base_client_dev(struct at24_data *at24)
+{
+	return &at24->client[0].client->dev;
+}
+
+static size_t at24_adjust_read_count(struct at24_data *at24,
+				      unsigned int offset, size_t count)
+{
+	unsigned int bits;
+	size_t remainder;
+
+	/*
+	 * In case of multi-address chips that don't rollover reads to
+	 * the next slave address: truncate the count to the slave boundary,
+	 * so that the read never straddles slaves.
+	 */
+	if (at24->flags & AT24_FLAG_NO_RDROL) {
+		bits = (at24->flags & AT24_FLAG_ADDR16) ? 16 : 8;
+		remainder = BIT(bits) - offset;
+		if (count > remainder)
+			count = remainder;
+	}
+
+	if (count > at24_io_limit)
+		count = at24_io_limit;
+
+	return count;
+}
+
+static ssize_t at24_regmap_read(struct at24_data *at24, char *buf,
+				unsigned int offset, size_t count)
+{
+	unsigned long timeout, read_time;
+	struct at24_client *at24_client;
+	struct i2c_client *client;
+	struct regmap *regmap;
+	int ret;
+
+	at24_client = at24_translate_offset(at24, &offset);
+	regmap = at24_client->regmap;
+	client = at24_client->client;
+	count = at24_adjust_read_count(at24, offset, count);
+
+	/* adjust offset for mac and serial read ops */
+	offset += at24->offset_adj;
+
+	timeout = jiffies + msecs_to_jiffies(at24_write_timeout);
+	do {
+		/*
+		 * The timestamp shall be taken before the actual operation
+		 * to avoid a premature timeout in case of high CPU load.
+		 */
+		read_time = jiffies;
+
+		ret = regmap_bulk_read(regmap, offset, buf, count);
+		dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n",
+			count, offset, ret, jiffies);
+		if (!ret)
+			return count;
+
+		usleep_range(1000, 1500);
+	} while (time_before(read_time, timeout));
+
+	return -ETIMEDOUT;
+}
+
+/*
+ * Note that if the hardware write-protect pin is pulled high, the whole
+ * chip is normally write protected. But there are plenty of product
+ * variants here, including OTP fuses and partial chip protect.
+ *
+ * We only use page mode writes; the alternative is sloooow. These routines
+ * write at most one page.
+ */
+
+static size_t at24_adjust_write_count(struct at24_data *at24,
+				      unsigned int offset, size_t count)
+{
+	unsigned int next_page;
+
+	/* write_max is at most a page */
+	if (count > at24->write_max)
+		count = at24->write_max;
+
+	/* Never roll over backwards, to the start of this page */
+	next_page = roundup(offset + 1, at24->page_size);
+	if (offset + count > next_page)
+		count = next_page - offset;
+
+	return count;
+}
+
+static ssize_t at24_regmap_write(struct at24_data *at24, const char *buf,
+				 unsigned int offset, size_t count)
+{
+	unsigned long timeout, write_time;
+	struct at24_client *at24_client;
+	struct i2c_client *client;
+	struct regmap *regmap;
+	int ret;
+
+	at24_client = at24_translate_offset(at24, &offset);
+	regmap = at24_client->regmap;
+	client = at24_client->client;
+	count = at24_adjust_write_count(at24, offset, count);
+	timeout = jiffies + msecs_to_jiffies(at24_write_timeout);
+
+	do {
+		/*
+		 * The timestamp shall be taken before the actual operation
+		 * to avoid a premature timeout in case of high CPU load.
+		 */
+		write_time = jiffies;
+
+		ret = regmap_bulk_write(regmap, offset, buf, count);
+		dev_dbg(&client->dev, "write %zu@%d --> %d (%ld)\n",
+			count, offset, ret, jiffies);
+		if (!ret)
+			return count;
+
+		usleep_range(1000, 1500);
+	} while (time_before(write_time, timeout));
+
+	return -ETIMEDOUT;
+}
+
+static int at24_read(void *priv, unsigned int off, void *val, size_t count)
+{
+	struct at24_data *at24;
+	struct device *dev;
+	char *buf = val;
+	int ret;
+
+	at24 = priv;
+	dev = at24_base_client_dev(at24);
+
+	if (unlikely(!count))
+		return count;
+
+	if (off + count > at24->byte_len)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	/*
+	 * Read data from chip, protecting against concurrent updates
+	 * from this host, but not from other I2C masters.
+	 */
+	mutex_lock(&at24->lock);
+
+	while (count) {
+		ret = at24_regmap_read(at24, buf, off, count);
+		if (ret < 0) {
+			mutex_unlock(&at24->lock);
+			pm_runtime_put(dev);
+			return ret;
+		}
+		buf += ret;
+		off += ret;
+		count -= ret;
+	}
+
+	mutex_unlock(&at24->lock);
+
+	pm_runtime_put(dev);
+
+	return 0;
+}
+
+static int at24_write(void *priv, unsigned int off, void *val, size_t count)
+{
+	struct at24_data *at24;
+	struct device *dev;
+	char *buf = val;
+	int ret;
+
+	at24 = priv;
+	dev = at24_base_client_dev(at24);
+
+	if (unlikely(!count))
+		return -EINVAL;
+
+	if (off + count > at24->byte_len)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	/*
+	 * Write data to chip, protecting against concurrent updates
+	 * from this host, but not from other I2C masters.
+	 */
+	mutex_lock(&at24->lock);
+	gpiod_set_value_cansleep(at24->wp_gpio, 0);
+
+	while (count) {
+		ret = at24_regmap_write(at24, buf, off, count);
+		if (ret < 0) {
+			gpiod_set_value_cansleep(at24->wp_gpio, 1);
+			mutex_unlock(&at24->lock);
+			pm_runtime_put(dev);
+			return ret;
+		}
+		buf += ret;
+		off += ret;
+		count -= ret;
+	}
+
+	gpiod_set_value_cansleep(at24->wp_gpio, 1);
+	mutex_unlock(&at24->lock);
+
+	pm_runtime_put(dev);
+
+	return 0;
+}
+
+static void at24_properties_to_pdata(struct device *dev,
+				     struct at24_platform_data *chip)
+{
+	int err;
+	u32 val;
+
+	if (device_property_present(dev, "read-only"))
+		chip->flags |= AT24_FLAG_READONLY;
+	if (device_property_present(dev, "no-read-rollover"))
+		chip->flags |= AT24_FLAG_NO_RDROL;
+
+	err = device_property_read_u32(dev, "address-width", &val);
+	if (!err) {
+		switch (val) {
+		case 8:
+			if (chip->flags & AT24_FLAG_ADDR16)
+				dev_warn(dev, "Override address width to be 8, while default is 16\n");
+			chip->flags &= ~AT24_FLAG_ADDR16;
+			break;
+		case 16:
+			chip->flags |= AT24_FLAG_ADDR16;
+			break;
+		default:
+			dev_warn(dev, "Bad \"address-width\" property: %u\n",
+				 val);
+		}
+	}
+
+	err = device_property_read_u32(dev, "size", &val);
+	if (!err)
+		chip->byte_len = val;
+
+	err = device_property_read_u32(dev, "pagesize", &val);
+	if (!err) {
+		chip->page_size = val;
+	} else {
+		/*
+		 * This is slow, but we can't know all eeproms, so we better
+		 * play safe. Specifying custom eeprom-types via platform_data
+		 * is recommended anyhow.
+		 */
+		chip->page_size = 1;
+	}
+}
+
+static int at24_get_pdata(struct device *dev, struct at24_platform_data *pdata)
+{
+	struct device_node *of_node = dev->of_node;
+	const struct at24_chip_data *cdata;
+	const struct i2c_device_id *id;
+	struct at24_platform_data *pd;
+
+	pd = dev_get_platdata(dev);
+	if (pd) {
+		memcpy(pdata, pd, sizeof(*pdata));
+		return 0;
+	}
+
+	id = i2c_match_id(at24_ids, to_i2c_client(dev));
+
+	/*
+	 * The I2C core allows OF nodes compatibles to match against the
+	 * I2C device ID table as a fallback, so check not only if an OF
+	 * node is present but also if it matches an OF device ID entry.
+	 */
+	if (of_node && of_match_device(at24_of_match, dev))
+		cdata = of_device_get_match_data(dev);
+	else if (id)
+		cdata = (void *)id->driver_data;
+	else
+		cdata = acpi_device_get_match_data(dev);
+
+	if (!cdata)
+		return -ENODEV;
+
+	pdata->byte_len = cdata->byte_len;
+	pdata->flags = cdata->flags;
+	at24_properties_to_pdata(dev, pdata);
+
+	return 0;
+}
+
+static void at24_remove_dummy_clients(struct at24_data *at24)
+{
+	int i;
+
+	for (i = 1; i < at24->num_addresses; i++)
+		i2c_unregister_device(at24->client[i].client);
+}
+
+static int at24_make_dummy_client(struct at24_data *at24, unsigned int index,
+				  struct regmap_config *regmap_config)
+{
+	struct i2c_client *base_client, *dummy_client;
+	unsigned short int addr;
+	struct regmap *regmap;
+	struct device *dev;
+
+	base_client = at24->client[0].client;
+	dev = &base_client->dev;
+	addr = base_client->addr + index;
+
+	dummy_client = i2c_new_dummy(base_client->adapter,
+				     base_client->addr + index);
+	if (!dummy_client) {
+		dev_err(dev, "address 0x%02x unavailable\n", addr);
+		return -EADDRINUSE;
+	}
+
+	regmap = devm_regmap_init_i2c(dummy_client, regmap_config);
+	if (IS_ERR(regmap)) {
+		i2c_unregister_device(dummy_client);
+		return PTR_ERR(regmap);
+	}
+
+	at24->client[index].client = dummy_client;
+	at24->client[index].regmap = regmap;
+
+	return 0;
+}
+
+static unsigned int at24_get_offset_adj(u8 flags, unsigned int byte_len)
+{
+	if (flags & AT24_FLAG_MAC) {
+		/* EUI-48 starts from 0x9a, EUI-64 from 0x98 */
+		return 0xa0 - byte_len;
+	} else if (flags & AT24_FLAG_SERIAL && flags & AT24_FLAG_ADDR16) {
+		/*
+		 * For 16 bit address pointers, the word address must contain
+		 * a '10' sequence in bits 11 and 10 regardless of the
+		 * intended position of the address pointer.
+		 */
+		return 0x0800;
+	} else if (flags & AT24_FLAG_SERIAL) {
+		/*
+		 * Otherwise the word address must begin with a '10' sequence,
+		 * regardless of the intended address.
+		 */
+		return 0x0080;
+	} else {
+		return 0;
+	}
+}
+
+static int at24_probe(struct i2c_client *client)
+{
+	struct regmap_config regmap_config = { };
+	struct nvmem_config nvmem_config = { };
+	struct at24_platform_data pdata = { };
+	struct device *dev = &client->dev;
+	bool i2c_fn_i2c, i2c_fn_block;
+	unsigned int i, num_addresses;
+	struct at24_data *at24;
+	struct regmap *regmap;
+	size_t at24_size;
+	bool writable;
+	u8 test_byte;
+	int err;
+
+	i2c_fn_i2c = i2c_check_functionality(client->adapter, I2C_FUNC_I2C);
+	i2c_fn_block = i2c_check_functionality(client->adapter,
+					       I2C_FUNC_SMBUS_WRITE_I2C_BLOCK);
+
+	err = at24_get_pdata(dev, &pdata);
+	if (err)
+		return err;
+
+	if (!i2c_fn_i2c && !i2c_fn_block)
+		pdata.page_size = 1;
+
+	if (!pdata.page_size) {
+		dev_err(dev, "page_size must not be 0!\n");
+		return -EINVAL;
+	}
+
+	if (!is_power_of_2(pdata.page_size))
+		dev_warn(dev, "page_size looks suspicious (no power of 2)!\n");
+
+	if (pdata.flags & AT24_FLAG_TAKE8ADDR)
+		num_addresses = 8;
+	else
+		num_addresses =	DIV_ROUND_UP(pdata.byte_len,
+			(pdata.flags & AT24_FLAG_ADDR16) ? 65536 : 256);
+
+	if ((pdata.flags & AT24_FLAG_SERIAL) && (pdata.flags & AT24_FLAG_MAC)) {
+		dev_err(dev,
+			"invalid device data - cannot have both AT24_FLAG_SERIAL & AT24_FLAG_MAC.");
+		return -EINVAL;
+	}
+
+	regmap_config.val_bits = 8;
+	regmap_config.reg_bits = (pdata.flags & AT24_FLAG_ADDR16) ? 16 : 8;
+	regmap_config.disable_locking = true;
+
+	regmap = devm_regmap_init_i2c(client, &regmap_config);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	at24_size = sizeof(*at24) + num_addresses * sizeof(struct at24_client);
+	at24 = devm_kzalloc(dev, at24_size, GFP_KERNEL);
+	if (!at24)
+		return -ENOMEM;
+
+	mutex_init(&at24->lock);
+	at24->byte_len = pdata.byte_len;
+	at24->page_size = pdata.page_size;
+	at24->flags = pdata.flags;
+	at24->num_addresses = num_addresses;
+	at24->offset_adj = at24_get_offset_adj(pdata.flags, pdata.byte_len);
+	at24->client[0].client = client;
+	at24->client[0].regmap = regmap;
+
+	at24->wp_gpio = devm_gpiod_get_optional(dev, "wp", GPIOD_OUT_HIGH);
+	if (IS_ERR(at24->wp_gpio))
+		return PTR_ERR(at24->wp_gpio);
+
+	writable = !(pdata.flags & AT24_FLAG_READONLY);
+	if (writable) {
+		at24->write_max = min_t(unsigned int,
+					pdata.page_size, at24_io_limit);
+		if (!i2c_fn_i2c && at24->write_max > I2C_SMBUS_BLOCK_MAX)
+			at24->write_max = I2C_SMBUS_BLOCK_MAX;
+	}
+
+	/* use dummy devices for multiple-address chips */
+	for (i = 1; i < num_addresses; i++) {
+		err = at24_make_dummy_client(at24, i, &regmap_config);
+		if (err) {
+			at24_remove_dummy_clients(at24);
+			return err;
+		}
+	}
+
+	i2c_set_clientdata(client, at24);
+
+	/* enable runtime pm */
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+
+	/*
+	 * Perform a one-byte test read to verify that the
+	 * chip is functional.
+	 */
+	err = at24_read(at24, 0, &test_byte, 1);
+	pm_runtime_idle(dev);
+	if (err) {
+		err = -ENODEV;
+		goto err_clients;
+	}
+
+	nvmem_config.name = dev_name(dev);
+	nvmem_config.dev = dev;
+	nvmem_config.read_only = !writable;
+	nvmem_config.root_only = !(pdata.flags & AT24_FLAG_IRUGO);
+	nvmem_config.owner = THIS_MODULE;
+	nvmem_config.compat = true;
+	nvmem_config.base_dev = dev;
+	nvmem_config.reg_read = at24_read;
+	nvmem_config.reg_write = at24_write;
+	nvmem_config.priv = at24;
+	nvmem_config.stride = 1;
+	nvmem_config.word_size = 1;
+	nvmem_config.size = pdata.byte_len;
+
+	at24->nvmem = devm_nvmem_register(dev, &nvmem_config);
+	if (IS_ERR(at24->nvmem)) {
+		err = PTR_ERR(at24->nvmem);
+		goto err_clients;
+	}
+
+	dev_info(dev, "%u byte %s EEPROM, %s, %u bytes/write\n",
+		 pdata.byte_len, client->name,
+		 writable ? "writable" : "read-only", at24->write_max);
+
+	/* export data to kernel code */
+	if (pdata.setup)
+		pdata.setup(at24->nvmem, pdata.context);
+
+	return 0;
+
+err_clients:
+	at24_remove_dummy_clients(at24);
+	pm_runtime_disable(dev);
+
+	return err;
+}
+
+static int at24_remove(struct i2c_client *client)
+{
+	struct at24_data *at24;
+
+	at24 = i2c_get_clientdata(client);
+
+	at24_remove_dummy_clients(at24);
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+
+	return 0;
+}
+
+static struct i2c_driver at24_driver = {
+	.driver = {
+		.name = "at24",
+		.of_match_table = at24_of_match,
+		.acpi_match_table = ACPI_PTR(at24_acpi_ids),
+	},
+	.probe_new = at24_probe,
+	.remove = at24_remove,
+	.id_table = at24_ids,
+};
+
+static int __init at24_init(void)
+{
+	if (!at24_io_limit) {
+		pr_err("at24: at24_io_limit must not be 0!\n");
+		return -EINVAL;
+	}
+
+	at24_io_limit = rounddown_pow_of_two(at24_io_limit);
+	return i2c_add_driver(&at24_driver);
+}
+module_init(at24_init);
+
+static void __exit at24_exit(void)
+{
+	i2c_del_driver(&at24_driver);
+}
+module_exit(at24_exit);
+
+MODULE_DESCRIPTION("Driver for most I2C EEPROMs");
+MODULE_AUTHOR("David Brownell and Wolfram Sang");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
new file mode 100644
index 000000000..8dd5c610c
--- /dev/null
+++ b/drivers/misc/eeprom/at25.c
@@ -0,0 +1,414 @@
+/*
+ * at25.c -- support most SPI EEPROMs, such as Atmel AT25 models
+ *
+ * Copyright (C) 2006 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <linux/nvmem-provider.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/eeprom.h>
+#include <linux/property.h>
+
+/*
+ * NOTE: this is an *EEPROM* driver.  The vagaries of product naming
+ * mean that some AT25 products are EEPROMs, and others are FLASH.
+ * Handle FLASH chips with the drivers/mtd/devices/m25p80.c driver,
+ * not this one!
+ */
+
+struct at25_data {
+	struct spi_device	*spi;
+	struct mutex		lock;
+	struct spi_eeprom	chip;
+	unsigned		addrlen;
+	struct nvmem_config	nvmem_config;
+	struct nvmem_device	*nvmem;
+};
+
+#define	AT25_WREN	0x06		/* latch the write enable */
+#define	AT25_WRDI	0x04		/* reset the write enable */
+#define	AT25_RDSR	0x05		/* read status register */
+#define	AT25_WRSR	0x01		/* write status register */
+#define	AT25_READ	0x03		/* read byte(s) */
+#define	AT25_WRITE	0x02		/* write byte(s)/sector */
+
+#define	AT25_SR_nRDY	0x01		/* nRDY = write-in-progress */
+#define	AT25_SR_WEN	0x02		/* write enable (latched) */
+#define	AT25_SR_BP0	0x04		/* BP for software writeprotect */
+#define	AT25_SR_BP1	0x08
+#define	AT25_SR_WPEN	0x80		/* writeprotect enable */
+
+#define	AT25_INSTR_BIT3	0x08		/* Additional address bit in instr */
+
+#define EE_MAXADDRLEN	3		/* 24 bit addresses, up to 2 MBytes */
+
+/* Specs often allow 5 msec for a page write, sometimes 20 msec;
+ * it's important to recover from write timeouts.
+ */
+#define	EE_TIMEOUT	25
+
+/*-------------------------------------------------------------------------*/
+
+#define	io_limit	PAGE_SIZE	/* bytes */
+
+static int at25_ee_read(void *priv, unsigned int offset,
+			void *val, size_t count)
+{
+	struct at25_data *at25 = priv;
+	char *buf = val;
+	u8			command[EE_MAXADDRLEN + 1];
+	u8			*cp;
+	ssize_t			status;
+	struct spi_transfer	t[2];
+	struct spi_message	m;
+	u8			instr;
+
+	if (unlikely(offset >= at25->chip.byte_len))
+		return -EINVAL;
+	if ((offset + count) > at25->chip.byte_len)
+		count = at25->chip.byte_len - offset;
+	if (unlikely(!count))
+		return -EINVAL;
+
+	cp = command;
+
+	instr = AT25_READ;
+	if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
+		if (offset >= (1U << (at25->addrlen * 8)))
+			instr |= AT25_INSTR_BIT3;
+	*cp++ = instr;
+
+	/* 8/16/24-bit address is written MSB first */
+	switch (at25->addrlen) {
+	default:	/* case 3 */
+		*cp++ = offset >> 16;
+		/* fall through */
+	case 2:
+		*cp++ = offset >> 8;
+		/* fall through */
+	case 1:
+	case 0:	/* can't happen: for better codegen */
+		*cp++ = offset >> 0;
+	}
+
+	spi_message_init(&m);
+	memset(t, 0, sizeof(t));
+
+	t[0].tx_buf = command;
+	t[0].len = at25->addrlen + 1;
+	spi_message_add_tail(&t[0], &m);
+
+	t[1].rx_buf = buf;
+	t[1].len = count;
+	spi_message_add_tail(&t[1], &m);
+
+	mutex_lock(&at25->lock);
+
+	/* Read it all at once.
+	 *
+	 * REVISIT that's potentially a problem with large chips, if
+	 * other devices on the bus need to be accessed regularly or
+	 * this chip is clocked very slowly
+	 */
+	status = spi_sync(at25->spi, &m);
+	dev_dbg(&at25->spi->dev, "read %zu bytes at %d --> %zd\n",
+		count, offset, status);
+
+	mutex_unlock(&at25->lock);
+	return status;
+}
+
+static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
+{
+	struct at25_data *at25 = priv;
+	const char *buf = val;
+	int			status = 0;
+	unsigned		buf_size;
+	u8			*bounce;
+
+	if (unlikely(off >= at25->chip.byte_len))
+		return -EFBIG;
+	if ((off + count) > at25->chip.byte_len)
+		count = at25->chip.byte_len - off;
+	if (unlikely(!count))
+		return -EINVAL;
+
+	/* Temp buffer starts with command and address */
+	buf_size = at25->chip.page_size;
+	if (buf_size > io_limit)
+		buf_size = io_limit;
+	bounce = kmalloc(buf_size + at25->addrlen + 1, GFP_KERNEL);
+	if (!bounce)
+		return -ENOMEM;
+
+	/* For write, rollover is within the page ... so we write at
+	 * most one page, then manually roll over to the next page.
+	 */
+	mutex_lock(&at25->lock);
+	do {
+		unsigned long	timeout, retries;
+		unsigned	segment;
+		unsigned	offset = (unsigned) off;
+		u8		*cp = bounce;
+		int		sr;
+		u8		instr;
+
+		*cp = AT25_WREN;
+		status = spi_write(at25->spi, cp, 1);
+		if (status < 0) {
+			dev_dbg(&at25->spi->dev, "WREN --> %d\n", status);
+			break;
+		}
+
+		instr = AT25_WRITE;
+		if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
+			if (offset >= (1U << (at25->addrlen * 8)))
+				instr |= AT25_INSTR_BIT3;
+		*cp++ = instr;
+
+		/* 8/16/24-bit address is written MSB first */
+		switch (at25->addrlen) {
+		default:	/* case 3 */
+			*cp++ = offset >> 16;
+			/* fall through */
+		case 2:
+			*cp++ = offset >> 8;
+			/* fall through */
+		case 1:
+		case 0:	/* can't happen: for better codegen */
+			*cp++ = offset >> 0;
+		}
+
+		/* Write as much of a page as we can */
+		segment = buf_size - (offset % buf_size);
+		if (segment > count)
+			segment = count;
+		memcpy(cp, buf, segment);
+		status = spi_write(at25->spi, bounce,
+				segment + at25->addrlen + 1);
+		dev_dbg(&at25->spi->dev, "write %u bytes at %u --> %d\n",
+			segment, offset, status);
+		if (status < 0)
+			break;
+
+		/* REVISIT this should detect (or prevent) failed writes
+		 * to readonly sections of the EEPROM...
+		 */
+
+		/* Wait for non-busy status */
+		timeout = jiffies + msecs_to_jiffies(EE_TIMEOUT);
+		retries = 0;
+		do {
+
+			sr = spi_w8r8(at25->spi, AT25_RDSR);
+			if (sr < 0 || (sr & AT25_SR_nRDY)) {
+				dev_dbg(&at25->spi->dev,
+					"rdsr --> %d (%02x)\n", sr, sr);
+				/* at HZ=100, this is sloooow */
+				msleep(1);
+				continue;
+			}
+			if (!(sr & AT25_SR_nRDY))
+				break;
+		} while (retries++ < 3 || time_before_eq(jiffies, timeout));
+
+		if ((sr < 0) || (sr & AT25_SR_nRDY)) {
+			dev_err(&at25->spi->dev,
+				"write %u bytes offset %u, timeout after %u msecs\n",
+				segment, offset,
+				jiffies_to_msecs(jiffies -
+					(timeout - EE_TIMEOUT)));
+			status = -ETIMEDOUT;
+			break;
+		}
+
+		off += segment;
+		buf += segment;
+		count -= segment;
+
+	} while (count > 0);
+
+	mutex_unlock(&at25->lock);
+
+	kfree(bounce);
+	return status;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip)
+{
+	u32 val;
+
+	memset(chip, 0, sizeof(*chip));
+	strncpy(chip->name, "at25", sizeof(chip->name));
+
+	if (device_property_read_u32(dev, "size", &val) == 0 ||
+	    device_property_read_u32(dev, "at25,byte-len", &val) == 0) {
+		chip->byte_len = val;
+	} else {
+		dev_err(dev, "Error: missing \"size\" property\n");
+		return -ENODEV;
+	}
+
+	if (device_property_read_u32(dev, "pagesize", &val) == 0 ||
+	    device_property_read_u32(dev, "at25,page-size", &val) == 0) {
+		chip->page_size = (u16)val;
+	} else {
+		dev_err(dev, "Error: missing \"pagesize\" property\n");
+		return -ENODEV;
+	}
+
+	if (device_property_read_u32(dev, "at25,addr-mode", &val) == 0) {
+		chip->flags = (u16)val;
+	} else {
+		if (device_property_read_u32(dev, "address-width", &val)) {
+			dev_err(dev,
+				"Error: missing \"address-width\" property\n");
+			return -ENODEV;
+		}
+		switch (val) {
+		case 9:
+			chip->flags |= EE_INSTR_BIT3_IS_ADDR;
+			/* fall through */
+		case 8:
+			chip->flags |= EE_ADDR1;
+			break;
+		case 16:
+			chip->flags |= EE_ADDR2;
+			break;
+		case 24:
+			chip->flags |= EE_ADDR3;
+			break;
+		default:
+			dev_err(dev,
+				"Error: bad \"address-width\" property: %u\n",
+				val);
+			return -ENODEV;
+		}
+		if (device_property_present(dev, "read-only"))
+			chip->flags |= EE_READONLY;
+	}
+	return 0;
+}
+
+static int at25_probe(struct spi_device *spi)
+{
+	struct at25_data	*at25 = NULL;
+	struct spi_eeprom	chip;
+	int			err;
+	int			sr;
+	int			addrlen;
+
+	/* Chip description */
+	if (!spi->dev.platform_data) {
+		err = at25_fw_to_chip(&spi->dev, &chip);
+		if (err)
+			return err;
+	} else
+		chip = *(struct spi_eeprom *)spi->dev.platform_data;
+
+	/* For now we only support 8/16/24 bit addressing */
+	if (chip.flags & EE_ADDR1)
+		addrlen = 1;
+	else if (chip.flags & EE_ADDR2)
+		addrlen = 2;
+	else if (chip.flags & EE_ADDR3)
+		addrlen = 3;
+	else {
+		dev_dbg(&spi->dev, "unsupported address type\n");
+		return -EINVAL;
+	}
+
+	/* Ping the chip ... the status register is pretty portable,
+	 * unlike probing manufacturer IDs.  We do expect that system
+	 * firmware didn't write it in the past few milliseconds!
+	 */
+	sr = spi_w8r8(spi, AT25_RDSR);
+	if (sr < 0 || sr & AT25_SR_nRDY) {
+		dev_dbg(&spi->dev, "rdsr --> %d (%02x)\n", sr, sr);
+		return -ENXIO;
+	}
+
+	at25 = devm_kzalloc(&spi->dev, sizeof(struct at25_data), GFP_KERNEL);
+	if (!at25)
+		return -ENOMEM;
+
+	mutex_init(&at25->lock);
+	at25->chip = chip;
+	at25->spi = spi;
+	spi_set_drvdata(spi, at25);
+	at25->addrlen = addrlen;
+
+	at25->nvmem_config.name = dev_name(&spi->dev);
+	at25->nvmem_config.dev = &spi->dev;
+	at25->nvmem_config.read_only = chip.flags & EE_READONLY;
+	at25->nvmem_config.root_only = true;
+	at25->nvmem_config.owner = THIS_MODULE;
+	at25->nvmem_config.compat = true;
+	at25->nvmem_config.base_dev = &spi->dev;
+	at25->nvmem_config.reg_read = at25_ee_read;
+	at25->nvmem_config.reg_write = at25_ee_write;
+	at25->nvmem_config.priv = at25;
+	at25->nvmem_config.stride = 1;
+	at25->nvmem_config.word_size = 1;
+	at25->nvmem_config.size = chip.byte_len;
+
+	at25->nvmem = nvmem_register(&at25->nvmem_config);
+	if (IS_ERR(at25->nvmem))
+		return PTR_ERR(at25->nvmem);
+
+	dev_info(&spi->dev, "%d %s %s eeprom%s, pagesize %u\n",
+		(chip.byte_len < 1024) ? chip.byte_len : (chip.byte_len / 1024),
+		(chip.byte_len < 1024) ? "Byte" : "KByte",
+		at25->chip.name,
+		(chip.flags & EE_READONLY) ? " (readonly)" : "",
+		at25->chip.page_size);
+	return 0;
+}
+
+static int at25_remove(struct spi_device *spi)
+{
+	struct at25_data	*at25;
+
+	at25 = spi_get_drvdata(spi);
+	nvmem_unregister(at25->nvmem);
+
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static const struct of_device_id at25_of_match[] = {
+	{ .compatible = "atmel,at25", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, at25_of_match);
+
+static struct spi_driver at25_driver = {
+	.driver = {
+		.name		= "at25",
+		.of_match_table = at25_of_match,
+	},
+	.probe		= at25_probe,
+	.remove		= at25_remove,
+};
+
+module_spi_driver(at25_driver);
+
+MODULE_DESCRIPTION("Driver for most SPI EEPROMs");
+MODULE_AUTHOR("David Brownell");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:at25");
diff --git a/drivers/misc/eeprom/digsy_mtc_eeprom.c b/drivers/misc/eeprom/digsy_mtc_eeprom.c
new file mode 100644
index 000000000..fbde2516c
--- /dev/null
+++ b/drivers/misc/eeprom/digsy_mtc_eeprom.c
@@ -0,0 +1,106 @@
+/*
+ * EEPROMs access control driver for display configuration EEPROMs
+ * on DigsyMTC board.
+ *
+ * (C) 2011 DENX Software Engineering, Anatolij Gustschin <agust@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * FIXME: this driver is used on a device-tree probed platform: it
+ * should be defined as a bit-banged SPI device and probed from the device
+ * tree and not like this with static grabbing of a few numbered GPIO
+ * lines at random.
+ *
+ * Add proper SPI and EEPROM in arch/powerpc/boot/dts/digsy_mtc.dts
+ * and delete this driver.
+ */
+
+#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi_gpio.h>
+#include <linux/eeprom_93xx46.h>
+
+#define GPIO_EEPROM_CLK		216
+#define GPIO_EEPROM_CS		210
+#define GPIO_EEPROM_DI		217
+#define GPIO_EEPROM_DO		249
+#define GPIO_EEPROM_OE		255
+#define EE_SPI_BUS_NUM	1
+
+static void digsy_mtc_op_prepare(void *p)
+{
+	/* enable */
+	gpio_set_value(GPIO_EEPROM_OE, 0);
+}
+
+static void digsy_mtc_op_finish(void *p)
+{
+	/* disable */
+	gpio_set_value(GPIO_EEPROM_OE, 1);
+}
+
+struct eeprom_93xx46_platform_data digsy_mtc_eeprom_data = {
+	.flags		= EE_ADDR8,
+	.prepare	= digsy_mtc_op_prepare,
+	.finish		= digsy_mtc_op_finish,
+};
+
+static struct spi_gpio_platform_data eeprom_spi_gpio_data = {
+	.num_chipselect	= 1,
+};
+
+static struct platform_device digsy_mtc_eeprom = {
+	.name	= "spi_gpio",
+	.id	= EE_SPI_BUS_NUM,
+	.dev	= {
+		.platform_data	= &eeprom_spi_gpio_data,
+	},
+};
+
+static struct gpiod_lookup_table eeprom_spi_gpiod_table = {
+	.dev_id         = "spi_gpio",
+	.table          = {
+		GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_CLK,
+			    "sck", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_DI,
+			    "mosi", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_DO,
+			    "miso", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_CS,
+			    "cs", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
+static struct spi_board_info digsy_mtc_eeprom_info[] __initdata = {
+	{
+		.modalias		= "93xx46",
+		.max_speed_hz		= 1000000,
+		.bus_num		= EE_SPI_BUS_NUM,
+		.chip_select		= 0,
+		.mode			= SPI_MODE_0,
+		.platform_data		= &digsy_mtc_eeprom_data,
+	},
+};
+
+static int __init digsy_mtc_eeprom_devices_init(void)
+{
+	int ret;
+
+	ret = gpio_request_one(GPIO_EEPROM_OE, GPIOF_OUT_INIT_HIGH,
+				"93xx46 EEPROMs OE");
+	if (ret) {
+		pr_err("can't request gpio %d\n", GPIO_EEPROM_OE);
+		return ret;
+	}
+	gpiod_add_lookup_table(&eeprom_spi_gpiod_table);
+	spi_register_board_info(digsy_mtc_eeprom_info,
+				ARRAY_SIZE(digsy_mtc_eeprom_info));
+	return platform_device_register(&digsy_mtc_eeprom);
+}
+device_initcall(digsy_mtc_eeprom_devices_init);
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
new file mode 100644
index 000000000..60e3d91b5
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 1998, 1999  Frodo Looijaard <frodol@dds.nl> and
+ *                           Philip Edelbrock <phil@netroedge.com>
+ * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2003 IBM Corp.
+ * Copyright (C) 2004 Jean Delvare <jdelvare@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/capability.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+/* Addresses to scan */
+static const unsigned short normal_i2c[] = { 0x50, 0x51, 0x52, 0x53, 0x54,
+					0x55, 0x56, 0x57, I2C_CLIENT_END };
+
+
+/* Size of EEPROM in bytes */
+#define EEPROM_SIZE		256
+
+/* possible types of eeprom devices */
+enum eeprom_nature {
+	UNKNOWN,
+	VAIO,
+};
+
+/* Each client has this additional data */
+struct eeprom_data {
+	struct mutex update_lock;
+	u8 valid;			/* bitfield, bit!=0 if slice is valid */
+	unsigned long last_updated[8];	/* In jiffies, 8 slices */
+	u8 data[EEPROM_SIZE];		/* Register values */
+	enum eeprom_nature nature;
+};
+
+
+static void eeprom_update_client(struct i2c_client *client, u8 slice)
+{
+	struct eeprom_data *data = i2c_get_clientdata(client);
+	int i;
+
+	mutex_lock(&data->update_lock);
+
+	if (!(data->valid & (1 << slice)) ||
+	    time_after(jiffies, data->last_updated[slice] + 300 * HZ)) {
+		dev_dbg(&client->dev, "Starting eeprom update, slice %u\n", slice);
+
+		if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+			for (i = slice << 5; i < (slice + 1) << 5; i += 32)
+				if (i2c_smbus_read_i2c_block_data(client, i,
+							32, data->data + i)
+							!= 32)
+					goto exit;
+		} else {
+			for (i = slice << 5; i < (slice + 1) << 5; i += 2) {
+				int word = i2c_smbus_read_word_data(client, i);
+				if (word < 0)
+					goto exit;
+				data->data[i] = word & 0xff;
+				data->data[i + 1] = word >> 8;
+			}
+		}
+		data->last_updated[slice] = jiffies;
+		data->valid |= (1 << slice);
+	}
+exit:
+	mutex_unlock(&data->update_lock);
+}
+
+static ssize_t eeprom_read(struct file *filp, struct kobject *kobj,
+			   struct bin_attribute *bin_attr,
+			   char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(kobj_to_dev(kobj));
+	struct eeprom_data *data = i2c_get_clientdata(client);
+	u8 slice;
+
+	/* Only refresh slices which contain requested bytes */
+	for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++)
+		eeprom_update_client(client, slice);
+
+	/* Hide Vaio private settings to regular users:
+	   - BIOS passwords: bytes 0x00 to 0x0f
+	   - UUID: bytes 0x10 to 0x1f
+	   - Serial number: 0xc0 to 0xdf */
+	if (data->nature == VAIO && !capable(CAP_SYS_ADMIN)) {
+		int i;
+
+		for (i = 0; i < count; i++) {
+			if ((off + i <= 0x1f) ||
+			    (off + i >= 0xc0 && off + i <= 0xdf))
+				buf[i] = 0;
+			else
+				buf[i] = data->data[off + i];
+		}
+	} else {
+		memcpy(buf, &data->data[off], count);
+	}
+
+	return count;
+}
+
+static const struct bin_attribute eeprom_attr = {
+	.attr = {
+		.name = "eeprom",
+		.mode = S_IRUGO,
+	},
+	.size = EEPROM_SIZE,
+	.read = eeprom_read,
+};
+
+/* Return 0 if detection is successful, -ENODEV otherwise */
+static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info)
+{
+	struct i2c_adapter *adapter = client->adapter;
+
+	/* EDID EEPROMs are often 24C00 EEPROMs, which answer to all
+	   addresses 0x50-0x57, but we only care about 0x50. So decline
+	   attaching to addresses >= 0x51 on DDC buses */
+	if (!(adapter->class & I2C_CLASS_SPD) && client->addr >= 0x51)
+		return -ENODEV;
+
+	/* There are four ways we can read the EEPROM data:
+	   (1) I2C block reads (faster, but unsupported by most adapters)
+	   (2) Word reads (128% overhead)
+	   (3) Consecutive byte reads (88% overhead, unsafe)
+	   (4) Regular byte data reads (265% overhead)
+	   The third and fourth methods are not implemented by this driver
+	   because all known adapters support one of the first two. */
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)
+	 && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
+		return -ENODEV;
+
+	strlcpy(info->type, "eeprom", I2C_NAME_SIZE);
+
+	return 0;
+}
+
+static int eeprom_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	struct eeprom_data *data;
+
+	data = devm_kzalloc(&client->dev, sizeof(struct eeprom_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	memset(data->data, 0xff, EEPROM_SIZE);
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->update_lock);
+	data->nature = UNKNOWN;
+
+	/* Detect the Vaio nature of EEPROMs.
+	   We use the "PCG-" or "VGN-" prefix as the signature. */
+	if (client->addr == 0x57
+	 && i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		char name[4];
+
+		name[0] = i2c_smbus_read_byte_data(client, 0x80);
+		name[1] = i2c_smbus_read_byte_data(client, 0x81);
+		name[2] = i2c_smbus_read_byte_data(client, 0x82);
+		name[3] = i2c_smbus_read_byte_data(client, 0x83);
+
+		if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) {
+			dev_info(&client->dev, "Vaio EEPROM detected, "
+				 "enabling privacy protection\n");
+			data->nature = VAIO;
+		}
+	}
+
+	/* create the sysfs eeprom file */
+	return sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr);
+}
+
+static int eeprom_remove(struct i2c_client *client)
+{
+	sysfs_remove_bin_file(&client->dev.kobj, &eeprom_attr);
+
+	return 0;
+}
+
+static const struct i2c_device_id eeprom_id[] = {
+	{ "eeprom", 0 },
+	{ }
+};
+
+static struct i2c_driver eeprom_driver = {
+	.driver = {
+		.name	= "eeprom",
+	},
+	.probe		= eeprom_probe,
+	.remove		= eeprom_remove,
+	.id_table	= eeprom_id,
+
+	.class		= I2C_CLASS_DDC | I2C_CLASS_SPD,
+	.detect		= eeprom_detect,
+	.address_list	= normal_i2c,
+};
+
+module_i2c_driver(eeprom_driver);
+
+MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl> and "
+		"Philip Edelbrock <phil@netroedge.com> and "
+		"Greg Kroah-Hartman <greg@kroah.com>");
+MODULE_DESCRIPTION("I2C EEPROM driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c
new file mode 100644
index 000000000..0cf2c9d67
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom_93cx6.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (C) 2004 - 2006 rt2x00 SourceForge Project
+ * <http://rt2x00.serialmonkey.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Module: eeprom_93cx6
+ * Abstract: EEPROM reader routines for 93cx6 chipsets.
+ * Supported chipsets: 93c46 & 93c66.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/eeprom_93cx6.h>
+
+MODULE_AUTHOR("http://rt2x00.serialmonkey.com");
+MODULE_VERSION("1.0");
+MODULE_DESCRIPTION("EEPROM 93cx6 chip driver");
+MODULE_LICENSE("GPL");
+
+static inline void eeprom_93cx6_pulse_high(struct eeprom_93cx6 *eeprom)
+{
+	eeprom->reg_data_clock = 1;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * Add a short delay for the pulse to work.
+	 * According to the specifications the "maximum minimum"
+	 * time should be 450ns.
+	 */
+	ndelay(450);
+}
+
+static inline void eeprom_93cx6_pulse_low(struct eeprom_93cx6 *eeprom)
+{
+	eeprom->reg_data_clock = 0;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * Add a short delay for the pulse to work.
+	 * According to the specifications the "maximum minimum"
+	 * time should be 450ns.
+	 */
+	ndelay(450);
+}
+
+static void eeprom_93cx6_startup(struct eeprom_93cx6 *eeprom)
+{
+	/*
+	 * Clear all flags, and enable chip select.
+	 */
+	eeprom->register_read(eeprom);
+	eeprom->reg_data_in = 0;
+	eeprom->reg_data_out = 0;
+	eeprom->reg_data_clock = 0;
+	eeprom->reg_chip_select = 1;
+	eeprom->drive_data = 1;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * kick a pulse.
+	 */
+	eeprom_93cx6_pulse_high(eeprom);
+	eeprom_93cx6_pulse_low(eeprom);
+}
+
+static void eeprom_93cx6_cleanup(struct eeprom_93cx6 *eeprom)
+{
+	/*
+	 * Clear chip_select and data_in flags.
+	 */
+	eeprom->register_read(eeprom);
+	eeprom->reg_data_in = 0;
+	eeprom->reg_chip_select = 0;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * kick a pulse.
+	 */
+	eeprom_93cx6_pulse_high(eeprom);
+	eeprom_93cx6_pulse_low(eeprom);
+}
+
+static void eeprom_93cx6_write_bits(struct eeprom_93cx6 *eeprom,
+	const u16 data, const u16 count)
+{
+	unsigned int i;
+
+	eeprom->register_read(eeprom);
+
+	/*
+	 * Clear data flags.
+	 */
+	eeprom->reg_data_in = 0;
+	eeprom->reg_data_out = 0;
+	eeprom->drive_data = 1;
+
+	/*
+	 * Start writing all bits.
+	 */
+	for (i = count; i > 0; i--) {
+		/*
+		 * Check if this bit needs to be set.
+		 */
+		eeprom->reg_data_in = !!(data & (1 << (i - 1)));
+
+		/*
+		 * Write the bit to the eeprom register.
+		 */
+		eeprom->register_write(eeprom);
+
+		/*
+		 * Kick a pulse.
+		 */
+		eeprom_93cx6_pulse_high(eeprom);
+		eeprom_93cx6_pulse_low(eeprom);
+	}
+
+	eeprom->reg_data_in = 0;
+	eeprom->register_write(eeprom);
+}
+
+static void eeprom_93cx6_read_bits(struct eeprom_93cx6 *eeprom,
+	u16 *data, const u16 count)
+{
+	unsigned int i;
+	u16 buf = 0;
+
+	eeprom->register_read(eeprom);
+
+	/*
+	 * Clear data flags.
+	 */
+	eeprom->reg_data_in = 0;
+	eeprom->reg_data_out = 0;
+	eeprom->drive_data = 0;
+
+	/*
+	 * Start reading all bits.
+	 */
+	for (i = count; i > 0; i--) {
+		eeprom_93cx6_pulse_high(eeprom);
+
+		eeprom->register_read(eeprom);
+
+		/*
+		 * Clear data_in flag.
+		 */
+		eeprom->reg_data_in = 0;
+
+		/*
+		 * Read if the bit has been set.
+		 */
+		if (eeprom->reg_data_out)
+			buf |= (1 << (i - 1));
+
+		eeprom_93cx6_pulse_low(eeprom);
+	}
+
+	*data = buf;
+}
+
+/**
+ * eeprom_93cx6_read - Read a word from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Word index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ *
+ * This function will read the eeprom data as host-endian word
+ * into the given data pointer.
+ */
+void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom, const u8 word,
+	u16 *data)
+{
+	u16 command;
+
+	/*
+	 * Initialize the eeprom register
+	 */
+	eeprom_93cx6_startup(eeprom);
+
+	/*
+	 * Select the read opcode and the word to be read.
+	 */
+	command = (PCI_EEPROM_READ_OPCODE << eeprom->width) | word;
+	eeprom_93cx6_write_bits(eeprom, command,
+		PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	/*
+	 * Read the requested 16 bits.
+	 */
+	eeprom_93cx6_read_bits(eeprom, data, 16);
+
+	/*
+	 * Cleanup eeprom register.
+	 */
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_read);
+
+/**
+ * eeprom_93cx6_multiread - Read multiple words from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Word index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ * @words: Number of words that should be read.
+ *
+ * This function will read all requested words from the eeprom,
+ * this is done by calling eeprom_93cx6_read() multiple times.
+ * But with the additional change that while the eeprom_93cx6_read
+ * will return host ordered bytes, this method will return little
+ * endian words.
+ */
+void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word,
+	__le16 *data, const u16 words)
+{
+	unsigned int i;
+	u16 tmp;
+
+	for (i = 0; i < words; i++) {
+		tmp = 0;
+		eeprom_93cx6_read(eeprom, word + i, &tmp);
+		data[i] = cpu_to_le16(tmp);
+	}
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_multiread);
+
+/**
+ * eeprom_93cx6_readb - Read a byte from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Byte index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ *
+ * This function will read a byte of the eeprom data
+ * into the given data pointer.
+ */
+void eeprom_93cx6_readb(struct eeprom_93cx6 *eeprom, const u8 byte,
+	u8 *data)
+{
+	u16 command;
+	u16 tmp;
+
+	/*
+	 * Initialize the eeprom register
+	 */
+	eeprom_93cx6_startup(eeprom);
+
+	/*
+	 * Select the read opcode and the byte to be read.
+	 */
+	command = (PCI_EEPROM_READ_OPCODE << (eeprom->width + 1)) | byte;
+	eeprom_93cx6_write_bits(eeprom, command,
+		PCI_EEPROM_WIDTH_OPCODE + eeprom->width + 1);
+
+	/*
+	 * Read the requested 8 bits.
+	 */
+	eeprom_93cx6_read_bits(eeprom, &tmp, 8);
+	*data = tmp & 0xff;
+
+	/*
+	 * Cleanup eeprom register.
+	 */
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_readb);
+
+/**
+ * eeprom_93cx6_multireadb - Read multiple bytes from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @byte: Index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ * @words: Number of bytes that should be read.
+ *
+ * This function will read all requested bytes from the eeprom,
+ * this is done by calling eeprom_93cx6_readb() multiple times.
+ */
+void eeprom_93cx6_multireadb(struct eeprom_93cx6 *eeprom, const u8 byte,
+	u8 *data, const u16 bytes)
+{
+	unsigned int i;
+
+	for (i = 0; i < bytes; i++)
+		eeprom_93cx6_readb(eeprom, byte + i, &data[i]);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_multireadb);
+
+/**
+ * eeprom_93cx6_wren - set the write enable state
+ * @eeprom: Pointer to eeprom structure
+ * @enable: true to enable writes, otherwise disable writes
+ *
+ * Set the EEPROM write enable state to either allow or deny
+ * writes depending on the @enable value.
+ */
+void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable)
+{
+	u16 command;
+
+	/* start the command */
+	eeprom_93cx6_startup(eeprom);
+
+	/* create command to enable/disable */
+
+	command = enable ? PCI_EEPROM_EWEN_OPCODE : PCI_EEPROM_EWDS_OPCODE;
+	command <<= (eeprom->width - 2);
+
+	eeprom_93cx6_write_bits(eeprom, command,
+				PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_wren);
+
+/**
+ * eeprom_93cx6_write - write data to the EEPROM
+ * @eeprom: Pointer to eeprom structure
+ * @addr: Address to write data to.
+ * @data: The data to write to address @addr.
+ *
+ * Write the @data to the specified @addr in the EEPROM and
+ * waiting for the device to finish writing.
+ *
+ * Note, since we do not expect large number of write operations
+ * we delay in between parts of the operation to avoid using excessive
+ * amounts of CPU time busy waiting.
+ */
+void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, u8 addr, u16 data)
+{
+	int timeout = 100;
+	u16 command;
+
+	/* start the command */
+	eeprom_93cx6_startup(eeprom);
+
+	command = PCI_EEPROM_WRITE_OPCODE << eeprom->width;
+	command |= addr;
+
+	/* send write command */
+	eeprom_93cx6_write_bits(eeprom, command,
+				PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	/* send data */
+	eeprom_93cx6_write_bits(eeprom, data, 16);
+
+	/* get ready to check for busy */
+	eeprom->drive_data = 0;
+	eeprom->reg_chip_select = 1;
+	eeprom->register_write(eeprom);
+
+	/* wait at-least 250ns to get DO to be the busy signal */
+	usleep_range(1000, 2000);
+
+	/* wait for DO to go high to signify finish */
+
+	while (true) {
+		eeprom->register_read(eeprom);
+
+		if (eeprom->reg_data_out)
+			break;
+
+		usleep_range(1000, 2000);
+
+		if (--timeout <= 0) {
+			printk(KERN_ERR "%s: timeout\n", __func__);
+			break;
+		}
+	}
+
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_write);
diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
new file mode 100644
index 000000000..182feab6d
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -0,0 +1,541 @@
+/*
+ * Driver for 93xx46 EEPROMs
+ *
+ * (C) 2011 DENX Software Engineering, Anatolij Gustschin <agust@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/nvmem-provider.h>
+#include <linux/eeprom_93xx46.h>
+
+#define OP_START	0x4
+#define OP_WRITE	(OP_START | 0x1)
+#define OP_READ		(OP_START | 0x2)
+#define ADDR_EWDS	0x00
+#define ADDR_ERAL	0x20
+#define ADDR_EWEN	0x30
+
+struct eeprom_93xx46_devtype_data {
+	unsigned int quirks;
+};
+
+static const struct eeprom_93xx46_devtype_data atmel_at93c46d_data = {
+	.quirks = EEPROM_93XX46_QUIRK_SINGLE_WORD_READ |
+		  EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH,
+};
+
+static const struct eeprom_93xx46_devtype_data microchip_93lc46b_data = {
+	.quirks = EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE,
+};
+
+struct eeprom_93xx46_dev {
+	struct spi_device *spi;
+	struct eeprom_93xx46_platform_data *pdata;
+	struct mutex lock;
+	struct nvmem_config nvmem_config;
+	struct nvmem_device *nvmem;
+	int addrlen;
+	int size;
+};
+
+static inline bool has_quirk_single_word_read(struct eeprom_93xx46_dev *edev)
+{
+	return edev->pdata->quirks & EEPROM_93XX46_QUIRK_SINGLE_WORD_READ;
+}
+
+static inline bool has_quirk_instruction_length(struct eeprom_93xx46_dev *edev)
+{
+	return edev->pdata->quirks & EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH;
+}
+
+static inline bool has_quirk_extra_read_cycle(struct eeprom_93xx46_dev *edev)
+{
+	return edev->pdata->quirks & EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE;
+}
+
+static int eeprom_93xx46_read(void *priv, unsigned int off,
+			      void *val, size_t count)
+{
+	struct eeprom_93xx46_dev *edev = priv;
+	char *buf = val;
+	int err = 0;
+
+	if (unlikely(off >= edev->size))
+		return 0;
+	if ((off + count) > edev->size)
+		count = edev->size - off;
+	if (unlikely(!count))
+		return count;
+
+	mutex_lock(&edev->lock);
+
+	if (edev->pdata->prepare)
+		edev->pdata->prepare(edev);
+
+	while (count) {
+		struct spi_message m;
+		struct spi_transfer t[2] = { { 0 } };
+		u16 cmd_addr = OP_READ << edev->addrlen;
+		size_t nbytes = count;
+		int bits;
+
+		if (edev->addrlen == 7) {
+			cmd_addr |= off & 0x7f;
+			bits = 10;
+			if (has_quirk_single_word_read(edev))
+				nbytes = 1;
+		} else {
+			cmd_addr |= (off >> 1) & 0x3f;
+			bits = 9;
+			if (has_quirk_single_word_read(edev))
+				nbytes = 2;
+		}
+
+		dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n",
+			cmd_addr, edev->spi->max_speed_hz);
+
+		if (has_quirk_extra_read_cycle(edev)) {
+			cmd_addr <<= 1;
+			bits += 1;
+		}
+
+		spi_message_init(&m);
+
+		t[0].tx_buf = (char *)&cmd_addr;
+		t[0].len = 2;
+		t[0].bits_per_word = bits;
+		spi_message_add_tail(&t[0], &m);
+
+		t[1].rx_buf = buf;
+		t[1].len = count;
+		t[1].bits_per_word = 8;
+		spi_message_add_tail(&t[1], &m);
+
+		err = spi_sync(edev->spi, &m);
+		/* have to wait at least Tcsl ns */
+		ndelay(250);
+
+		if (err) {
+			dev_err(&edev->spi->dev, "read %zu bytes at %d: err. %d\n",
+				nbytes, (int)off, err);
+			break;
+		}
+
+		buf += nbytes;
+		off += nbytes;
+		count -= nbytes;
+	}
+
+	if (edev->pdata->finish)
+		edev->pdata->finish(edev);
+
+	mutex_unlock(&edev->lock);
+
+	return err;
+}
+
+static int eeprom_93xx46_ew(struct eeprom_93xx46_dev *edev, int is_on)
+{
+	struct spi_message m;
+	struct spi_transfer t;
+	int bits, ret;
+	u16 cmd_addr;
+
+	cmd_addr = OP_START << edev->addrlen;
+	if (edev->addrlen == 7) {
+		cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS) << 1;
+		bits = 10;
+	} else {
+		cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS);
+		bits = 9;
+	}
+
+	if (has_quirk_instruction_length(edev)) {
+		cmd_addr <<= 2;
+		bits += 2;
+	}
+
+	dev_dbg(&edev->spi->dev, "ew%s cmd 0x%04x, %d bits\n",
+			is_on ? "en" : "ds", cmd_addr, bits);
+
+	spi_message_init(&m);
+	memset(&t, 0, sizeof(t));
+
+	t.tx_buf = &cmd_addr;
+	t.len = 2;
+	t.bits_per_word = bits;
+	spi_message_add_tail(&t, &m);
+
+	mutex_lock(&edev->lock);
+
+	if (edev->pdata->prepare)
+		edev->pdata->prepare(edev);
+
+	ret = spi_sync(edev->spi, &m);
+	/* have to wait at least Tcsl ns */
+	ndelay(250);
+	if (ret)
+		dev_err(&edev->spi->dev, "erase/write %sable error %d\n",
+			is_on ? "en" : "dis", ret);
+
+	if (edev->pdata->finish)
+		edev->pdata->finish(edev);
+
+	mutex_unlock(&edev->lock);
+	return ret;
+}
+
+static ssize_t
+eeprom_93xx46_write_word(struct eeprom_93xx46_dev *edev,
+			 const char *buf, unsigned off)
+{
+	struct spi_message m;
+	struct spi_transfer t[2];
+	int bits, data_len, ret;
+	u16 cmd_addr;
+
+	cmd_addr = OP_WRITE << edev->addrlen;
+
+	if (edev->addrlen == 7) {
+		cmd_addr |= off & 0x7f;
+		bits = 10;
+		data_len = 1;
+	} else {
+		cmd_addr |= (off >> 1) & 0x3f;
+		bits = 9;
+		data_len = 2;
+	}
+
+	dev_dbg(&edev->spi->dev, "write cmd 0x%x\n", cmd_addr);
+
+	spi_message_init(&m);
+	memset(t, 0, sizeof(t));
+
+	t[0].tx_buf = (char *)&cmd_addr;
+	t[0].len = 2;
+	t[0].bits_per_word = bits;
+	spi_message_add_tail(&t[0], &m);
+
+	t[1].tx_buf = buf;
+	t[1].len = data_len;
+	t[1].bits_per_word = 8;
+	spi_message_add_tail(&t[1], &m);
+
+	ret = spi_sync(edev->spi, &m);
+	/* have to wait program cycle time Twc ms */
+	mdelay(6);
+	return ret;
+}
+
+static int eeprom_93xx46_write(void *priv, unsigned int off,
+				   void *val, size_t count)
+{
+	struct eeprom_93xx46_dev *edev = priv;
+	char *buf = val;
+	int i, ret, step = 1;
+
+	if (unlikely(off >= edev->size))
+		return -EFBIG;
+	if ((off + count) > edev->size)
+		count = edev->size - off;
+	if (unlikely(!count))
+		return count;
+
+	/* only write even number of bytes on 16-bit devices */
+	if (edev->addrlen == 6) {
+		step = 2;
+		count &= ~1;
+	}
+
+	/* erase/write enable */
+	ret = eeprom_93xx46_ew(edev, 1);
+	if (ret)
+		return ret;
+
+	mutex_lock(&edev->lock);
+
+	if (edev->pdata->prepare)
+		edev->pdata->prepare(edev);
+
+	for (i = 0; i < count; i += step) {
+		ret = eeprom_93xx46_write_word(edev, &buf[i], off + i);
+		if (ret) {
+			dev_err(&edev->spi->dev, "write failed at %d: %d\n",
+				(int)off + i, ret);
+			break;
+		}
+	}
+
+	if (edev->pdata->finish)
+		edev->pdata->finish(edev);
+
+	mutex_unlock(&edev->lock);
+
+	/* erase/write disable */
+	eeprom_93xx46_ew(edev, 0);
+	return ret;
+}
+
+static int eeprom_93xx46_eral(struct eeprom_93xx46_dev *edev)
+{
+	struct eeprom_93xx46_platform_data *pd = edev->pdata;
+	struct spi_message m;
+	struct spi_transfer t;
+	int bits, ret;
+	u16 cmd_addr;
+
+	cmd_addr = OP_START << edev->addrlen;
+	if (edev->addrlen == 7) {
+		cmd_addr |= ADDR_ERAL << 1;
+		bits = 10;
+	} else {
+		cmd_addr |= ADDR_ERAL;
+		bits = 9;
+	}
+
+	if (has_quirk_instruction_length(edev)) {
+		cmd_addr <<= 2;
+		bits += 2;
+	}
+
+	dev_dbg(&edev->spi->dev, "eral cmd 0x%04x, %d bits\n", cmd_addr, bits);
+
+	spi_message_init(&m);
+	memset(&t, 0, sizeof(t));
+
+	t.tx_buf = &cmd_addr;
+	t.len = 2;
+	t.bits_per_word = bits;
+	spi_message_add_tail(&t, &m);
+
+	mutex_lock(&edev->lock);
+
+	if (edev->pdata->prepare)
+		edev->pdata->prepare(edev);
+
+	ret = spi_sync(edev->spi, &m);
+	if (ret)
+		dev_err(&edev->spi->dev, "erase error %d\n", ret);
+	/* have to wait erase cycle time Tec ms */
+	mdelay(6);
+
+	if (pd->finish)
+		pd->finish(edev);
+
+	mutex_unlock(&edev->lock);
+	return ret;
+}
+
+static ssize_t eeprom_93xx46_store_erase(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct eeprom_93xx46_dev *edev = dev_get_drvdata(dev);
+	int erase = 0, ret;
+
+	sscanf(buf, "%d", &erase);
+	if (erase) {
+		ret = eeprom_93xx46_ew(edev, 1);
+		if (ret)
+			return ret;
+		ret = eeprom_93xx46_eral(edev);
+		if (ret)
+			return ret;
+		ret = eeprom_93xx46_ew(edev, 0);
+		if (ret)
+			return ret;
+	}
+	return count;
+}
+static DEVICE_ATTR(erase, S_IWUSR, NULL, eeprom_93xx46_store_erase);
+
+static void select_assert(void *context)
+{
+	struct eeprom_93xx46_dev *edev = context;
+
+	gpiod_set_value_cansleep(edev->pdata->select, 1);
+}
+
+static void select_deassert(void *context)
+{
+	struct eeprom_93xx46_dev *edev = context;
+
+	gpiod_set_value_cansleep(edev->pdata->select, 0);
+}
+
+static const struct of_device_id eeprom_93xx46_of_table[] = {
+	{ .compatible = "eeprom-93xx46", },
+	{ .compatible = "atmel,at93c46d", .data = &atmel_at93c46d_data, },
+	{ .compatible = "microchip,93lc46b", .data = &microchip_93lc46b_data, },
+	{}
+};
+MODULE_DEVICE_TABLE(of, eeprom_93xx46_of_table);
+
+static int eeprom_93xx46_probe_dt(struct spi_device *spi)
+{
+	const struct of_device_id *of_id =
+		of_match_device(eeprom_93xx46_of_table, &spi->dev);
+	struct device_node *np = spi->dev.of_node;
+	struct eeprom_93xx46_platform_data *pd;
+	u32 tmp;
+	int ret;
+
+	pd = devm_kzalloc(&spi->dev, sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return -ENOMEM;
+
+	ret = of_property_read_u32(np, "data-size", &tmp);
+	if (ret < 0) {
+		dev_err(&spi->dev, "data-size property not found\n");
+		return ret;
+	}
+
+	if (tmp == 8) {
+		pd->flags |= EE_ADDR8;
+	} else if (tmp == 16) {
+		pd->flags |= EE_ADDR16;
+	} else {
+		dev_err(&spi->dev, "invalid data-size (%d)\n", tmp);
+		return -EINVAL;
+	}
+
+	if (of_property_read_bool(np, "read-only"))
+		pd->flags |= EE_READONLY;
+
+	pd->select = devm_gpiod_get_optional(&spi->dev, "select",
+					     GPIOD_OUT_LOW);
+	if (IS_ERR(pd->select))
+		return PTR_ERR(pd->select);
+
+	pd->prepare = select_assert;
+	pd->finish = select_deassert;
+	gpiod_direction_output(pd->select, 0);
+
+	if (of_id->data) {
+		const struct eeprom_93xx46_devtype_data *data = of_id->data;
+
+		pd->quirks = data->quirks;
+	}
+
+	spi->dev.platform_data = pd;
+
+	return 0;
+}
+
+static int eeprom_93xx46_probe(struct spi_device *spi)
+{
+	struct eeprom_93xx46_platform_data *pd;
+	struct eeprom_93xx46_dev *edev;
+	int err;
+
+	if (spi->dev.of_node) {
+		err = eeprom_93xx46_probe_dt(spi);
+		if (err < 0)
+			return err;
+	}
+
+	pd = spi->dev.platform_data;
+	if (!pd) {
+		dev_err(&spi->dev, "missing platform data\n");
+		return -ENODEV;
+	}
+
+	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+	if (!edev)
+		return -ENOMEM;
+
+	if (pd->flags & EE_ADDR8)
+		edev->addrlen = 7;
+	else if (pd->flags & EE_ADDR16)
+		edev->addrlen = 6;
+	else {
+		dev_err(&spi->dev, "unspecified address type\n");
+		err = -EINVAL;
+		goto fail;
+	}
+
+	mutex_init(&edev->lock);
+
+	edev->spi = spi;
+	edev->pdata = pd;
+
+	edev->size = 128;
+	edev->nvmem_config.name = dev_name(&spi->dev);
+	edev->nvmem_config.dev = &spi->dev;
+	edev->nvmem_config.read_only = pd->flags & EE_READONLY;
+	edev->nvmem_config.root_only = true;
+	edev->nvmem_config.owner = THIS_MODULE;
+	edev->nvmem_config.compat = true;
+	edev->nvmem_config.base_dev = &spi->dev;
+	edev->nvmem_config.reg_read = eeprom_93xx46_read;
+	edev->nvmem_config.reg_write = eeprom_93xx46_write;
+	edev->nvmem_config.priv = edev;
+	edev->nvmem_config.stride = 4;
+	edev->nvmem_config.word_size = 1;
+	edev->nvmem_config.size = edev->size;
+
+	edev->nvmem = nvmem_register(&edev->nvmem_config);
+	if (IS_ERR(edev->nvmem)) {
+		err = PTR_ERR(edev->nvmem);
+		goto fail;
+	}
+
+	dev_info(&spi->dev, "%d-bit eeprom %s\n",
+		(pd->flags & EE_ADDR8) ? 8 : 16,
+		(pd->flags & EE_READONLY) ? "(readonly)" : "");
+
+	if (!(pd->flags & EE_READONLY)) {
+		if (device_create_file(&spi->dev, &dev_attr_erase))
+			dev_err(&spi->dev, "can't create erase interface\n");
+	}
+
+	spi_set_drvdata(spi, edev);
+	return 0;
+fail:
+	kfree(edev);
+	return err;
+}
+
+static int eeprom_93xx46_remove(struct spi_device *spi)
+{
+	struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi);
+
+	nvmem_unregister(edev->nvmem);
+
+	if (!(edev->pdata->flags & EE_READONLY))
+		device_remove_file(&spi->dev, &dev_attr_erase);
+
+	kfree(edev);
+	return 0;
+}
+
+static struct spi_driver eeprom_93xx46_driver = {
+	.driver = {
+		.name	= "93xx46",
+		.of_match_table = of_match_ptr(eeprom_93xx46_of_table),
+	},
+	.probe		= eeprom_93xx46_probe,
+	.remove		= eeprom_93xx46_remove,
+};
+
+module_spi_driver(eeprom_93xx46_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Driver for 93xx46 EEPROMs");
+MODULE_AUTHOR("Anatolij Gustschin <agust@denx.de>");
+MODULE_ALIAS("spi:93xx46");
+MODULE_ALIAS("spi:eeprom-93xx46");
diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c
new file mode 100644
index 000000000..5879ba82c
--- /dev/null
+++ b/drivers/misc/eeprom/idt_89hpesx.c
@@ -0,0 +1,1620 @@
+/*
+ *   This file is provided under a GPLv2 license.  When using or
+ *   redistributing this file, you may do so under that license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms and conditions of the GNU General Public License,
+ *   version 2, as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but WITHOUT
+ *   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ *   more details.
+ *
+ *   You should have received a copy of the GNU General Public License along
+ *   with this program; if not, it can be found <http://www.gnu.org/licenses/>.
+ *
+ *   The full GNU General Public License is included in this distribution in
+ *   the file called "COPYING".
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * IDT PCIe-switch NTB Linux driver
+ *
+ * Contact Information:
+ * Serge Semin <fancer.lancer@gmail.com>, <Sergey.Semin@t-platforms.ru>
+ */
+/*
+ *           NOTE of the IDT 89HPESx SMBus-slave interface driver
+ *    This driver primarily is developed to have an access to EEPROM device of
+ * IDT PCIe-switches. IDT provides a simple SMBus interface to perform IO-
+ * operations from/to EEPROM, which is located at private (so called Master)
+ * SMBus of switches. Using that interface this the driver creates a simple
+ * binary sysfs-file in the device directory:
+ * /sys/bus/i2c/devices/<bus>-<devaddr>/eeprom
+ * In case if read-only flag is specified in the dts-node of device desription,
+ * User-space applications won't be able to write to the EEPROM sysfs-node.
+ *    Additionally IDT 89HPESx SMBus interface has an ability to write/read
+ * data of device CSRs. This driver exposes debugf-file to perform simple IO
+ * operations using that ability for just basic debug purpose. Particularly
+ * next file is created in the specific debugfs-directory:
+ * /sys/kernel/debug/idt_csr/
+ * Format of the debugfs-node is:
+ * $ cat /sys/kernel/debug/idt_csr/<bus>-<devaddr>/<devname>;
+ * <CSR address>:<CSR value>
+ * So reading the content of the file gives current CSR address and it value.
+ * If User-space application wishes to change current CSR address,
+ * it can just write a proper value to the sysfs-file:
+ * $ echo "<CSR address>" > /sys/kernel/debug/idt_csr/<bus>-<devaddr>/<devname>
+ * If it wants to change the CSR value as well, the format of the write
+ * operation is:
+ * $ echo "<CSR address>:<CSR value>" > \
+ *        /sys/kernel/debug/idt_csr/<bus>-<devaddr>/<devname>;
+ * CSR address and value can be any of hexadecimal, decimal or octal format.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/debugfs.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
+#include <linux/i2c.h>
+#include <linux/pci_ids.h>
+#include <linux/delay.h>
+
+#define IDT_NAME		"89hpesx"
+#define IDT_89HPESX_DESC	"IDT 89HPESx SMBus-slave interface driver"
+#define IDT_89HPESX_VER		"1.0"
+
+MODULE_DESCRIPTION(IDT_89HPESX_DESC);
+MODULE_VERSION(IDT_89HPESX_VER);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("T-platforms");
+
+/*
+ * csr_dbgdir - CSR read/write operations Debugfs directory
+ */
+static struct dentry *csr_dbgdir;
+
+/*
+ * struct idt_89hpesx_dev - IDT 89HPESx device data structure
+ * @eesize:	Size of EEPROM in bytes (calculated from "idt,eecompatible")
+ * @eero:	EEPROM Read-only flag
+ * @eeaddr:	EEPROM custom address
+ *
+ * @inieecmd:	Initial cmd value for EEPROM read/write operations
+ * @inicsrcmd:	Initial cmd value for CSR read/write operations
+ * @iniccode:	Initialial command code value for IO-operations
+ *
+ * @csr:	CSR address to perform read operation
+ *
+ * @smb_write:	SMBus write method
+ * @smb_read:	SMBus read method
+ * @smb_mtx:	SMBus mutex
+ *
+ * @client:	i2c client used to perform IO operations
+ *
+ * @ee_file:	EEPROM read/write sysfs-file
+ * @csr_file:	CSR read/write debugfs-node
+ */
+struct idt_smb_seq;
+struct idt_89hpesx_dev {
+	u32 eesize;
+	bool eero;
+	u8 eeaddr;
+
+	u8 inieecmd;
+	u8 inicsrcmd;
+	u8 iniccode;
+
+	u16 csr;
+
+	int (*smb_write)(struct idt_89hpesx_dev *, const struct idt_smb_seq *);
+	int (*smb_read)(struct idt_89hpesx_dev *, struct idt_smb_seq *);
+	struct mutex smb_mtx;
+
+	struct i2c_client *client;
+
+	struct bin_attribute *ee_file;
+	struct dentry *csr_dir;
+	struct dentry *csr_file;
+};
+
+/*
+ * struct idt_smb_seq - sequence of data to be read/written from/to IDT 89HPESx
+ * @ccode:	SMBus command code
+ * @bytecnt:	Byte count of operation
+ * @data:	Data to by written
+ */
+struct idt_smb_seq {
+	u8 ccode;
+	u8 bytecnt;
+	u8 *data;
+};
+
+/*
+ * struct idt_eeprom_seq - sequence of data to be read/written from/to EEPROM
+ * @cmd:	Transaction CMD
+ * @eeaddr:	EEPROM custom address
+ * @memaddr:	Internal memory address of EEPROM
+ * @data:	Data to be written at the memory address
+ */
+struct idt_eeprom_seq {
+	u8 cmd;
+	u8 eeaddr;
+	u16 memaddr;
+	u8 data;
+} __packed;
+
+/*
+ * struct idt_csr_seq - sequence of data to be read/written from/to CSR
+ * @cmd:	Transaction CMD
+ * @csraddr:	Internal IDT device CSR address
+ * @data:	Data to be read/written from/to the CSR address
+ */
+struct idt_csr_seq {
+	u8 cmd;
+	u16 csraddr;
+	u32 data;
+} __packed;
+
+/*
+ * SMBus command code macros
+ * @CCODE_END:		Indicates the end of transaction
+ * @CCODE_START:	Indicates the start of transaction
+ * @CCODE_CSR:		CSR read/write transaction
+ * @CCODE_EEPROM:	EEPROM read/write transaction
+ * @CCODE_BYTE:		Supplied data has BYTE length
+ * @CCODE_WORD:		Supplied data has WORD length
+ * @CCODE_BLOCK:	Supplied data has variable length passed in bytecnt
+ *			byte right following CCODE byte
+ */
+#define CCODE_END	((u8)0x01)
+#define CCODE_START	((u8)0x02)
+#define CCODE_CSR	((u8)0x00)
+#define CCODE_EEPROM	((u8)0x04)
+#define CCODE_BYTE	((u8)0x00)
+#define CCODE_WORD	((u8)0x20)
+#define CCODE_BLOCK	((u8)0x40)
+#define CCODE_PEC	((u8)0x80)
+
+/*
+ * EEPROM command macros
+ * @EEPROM_OP_WRITE:	EEPROM write operation
+ * @EEPROM_OP_READ:	EEPROM read operation
+ * @EEPROM_USA:		Use specified address of EEPROM
+ * @EEPROM_NAERR:	EEPROM device is not ready to respond
+ * @EEPROM_LAERR:	EEPROM arbitration loss error
+ * @EEPROM_MSS:		EEPROM misplace start & stop bits error
+ * @EEPROM_WR_CNT:	Bytes count to perform write operation
+ * @EEPROM_WRRD_CNT:	Bytes count to write before reading
+ * @EEPROM_RD_CNT:	Bytes count to perform read operation
+ * @EEPROM_DEF_SIZE:	Fall back size of EEPROM
+ * @EEPROM_DEF_ADDR:	Defatul EEPROM address
+ * @EEPROM_TOUT:	Timeout before retry read operation if eeprom is busy
+ */
+#define EEPROM_OP_WRITE	((u8)0x00)
+#define EEPROM_OP_READ	((u8)0x01)
+#define EEPROM_USA	((u8)0x02)
+#define EEPROM_NAERR	((u8)0x08)
+#define EEPROM_LAERR    ((u8)0x10)
+#define EEPROM_MSS	((u8)0x20)
+#define EEPROM_WR_CNT	((u8)5)
+#define EEPROM_WRRD_CNT	((u8)4)
+#define EEPROM_RD_CNT	((u8)5)
+#define EEPROM_DEF_SIZE	((u16)4096)
+#define EEPROM_DEF_ADDR	((u8)0x50)
+#define EEPROM_TOUT	(100)
+
+/*
+ * CSR command macros
+ * @CSR_DWE:		Enable all four bytes of the operation
+ * @CSR_OP_WRITE:	CSR write operation
+ * @CSR_OP_READ:	CSR read operation
+ * @CSR_RERR:		Read operation error
+ * @CSR_WERR:		Write operation error
+ * @CSR_WR_CNT:		Bytes count to perform write operation
+ * @CSR_WRRD_CNT:	Bytes count to write before reading
+ * @CSR_RD_CNT:		Bytes count to perform read operation
+ * @CSR_MAX:		Maximum CSR address
+ * @CSR_DEF:		Default CSR address
+ * @CSR_REAL_ADDR:	CSR real unshifted address
+ */
+#define CSR_DWE			((u8)0x0F)
+#define CSR_OP_WRITE		((u8)0x00)
+#define CSR_OP_READ		((u8)0x10)
+#define CSR_RERR		((u8)0x40)
+#define CSR_WERR		((u8)0x80)
+#define CSR_WR_CNT		((u8)7)
+#define CSR_WRRD_CNT		((u8)3)
+#define CSR_RD_CNT		((u8)7)
+#define CSR_MAX			((u32)0x3FFFF)
+#define CSR_DEF			((u16)0x0000)
+#define CSR_REAL_ADDR(val)	((unsigned int)val << 2)
+
+/*
+ * IDT 89HPESx basic register
+ * @IDT_VIDDID_CSR:	PCIe VID and DID of IDT 89HPESx
+ * @IDT_VID_MASK:	Mask of VID
+ */
+#define IDT_VIDDID_CSR	((u32)0x0000)
+#define IDT_VID_MASK	((u32)0xFFFF)
+
+/*
+ * IDT 89HPESx can send NACK when new command is sent before previous one
+ * fininshed execution. In this case driver retries operation
+ * certain times.
+ * @RETRY_CNT:		Number of retries before giving up and fail
+ * @idt_smb_safe:	Generate a retry loop on corresponding SMBus method
+ */
+#define RETRY_CNT (128)
+#define idt_smb_safe(ops, args...) ({ \
+	int __retry = RETRY_CNT; \
+	s32 __sts; \
+	do { \
+		__sts = i2c_smbus_ ## ops ## _data(args); \
+	} while (__retry-- && __sts < 0); \
+	__sts; \
+})
+
+/*===========================================================================
+ *                         i2c bus level IO-operations
+ *===========================================================================
+ */
+
+/*
+ * idt_smb_write_byte() - SMBus write method when I2C_SMBUS_BYTE_DATA operation
+ *                        is only available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Sequence of data to be written
+ */
+static int idt_smb_write_byte(struct idt_89hpesx_dev *pdev,
+			      const struct idt_smb_seq *seq)
+{
+	s32 sts;
+	u8 ccode;
+	int idx;
+
+	/* Loop over the supplied data sending byte one-by-one */
+	for (idx = 0; idx < seq->bytecnt; idx++) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_BYTE;
+		if (idx == 0)
+			ccode |= CCODE_START;
+		if (idx == seq->bytecnt - 1)
+			ccode |= CCODE_END;
+
+		/* Send data to the device */
+		sts = idt_smb_safe(write_byte, pdev->client, ccode,
+			seq->data[idx]);
+		if (sts != 0)
+			return (int)sts;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_smb_read_byte() - SMBus read method when I2C_SMBUS_BYTE_DATA operation
+ *                        is only available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Buffer to read data to
+ */
+static int idt_smb_read_byte(struct idt_89hpesx_dev *pdev,
+			     struct idt_smb_seq *seq)
+{
+	s32 sts;
+	u8 ccode;
+	int idx;
+
+	/* Loop over the supplied buffer receiving byte one-by-one */
+	for (idx = 0; idx < seq->bytecnt; idx++) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_BYTE;
+		if (idx == 0)
+			ccode |= CCODE_START;
+		if (idx == seq->bytecnt - 1)
+			ccode |= CCODE_END;
+
+		/* Read data from the device */
+		sts = idt_smb_safe(read_byte, pdev->client, ccode);
+		if (sts < 0)
+			return (int)sts;
+
+		seq->data[idx] = (u8)sts;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_smb_write_word() - SMBus write method when I2C_SMBUS_BYTE_DATA and
+ *                        I2C_FUNC_SMBUS_WORD_DATA operations are available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Sequence of data to be written
+ */
+static int idt_smb_write_word(struct idt_89hpesx_dev *pdev,
+			      const struct idt_smb_seq *seq)
+{
+	s32 sts;
+	u8 ccode;
+	int idx, evencnt;
+
+	/* Calculate the even count of data to send */
+	evencnt = seq->bytecnt - (seq->bytecnt % 2);
+
+	/* Loop over the supplied data sending two bytes at a time */
+	for (idx = 0; idx < evencnt; idx += 2) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_WORD;
+		if (idx == 0)
+			ccode |= CCODE_START;
+		if (idx == evencnt - 2)
+			ccode |= CCODE_END;
+
+		/* Send word data to the device */
+		sts = idt_smb_safe(write_word, pdev->client, ccode,
+			*(u16 *)&seq->data[idx]);
+		if (sts != 0)
+			return (int)sts;
+	}
+
+	/* If there is odd number of bytes then send just one last byte */
+	if (seq->bytecnt != evencnt) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_BYTE | CCODE_END;
+		if (idx == 0)
+			ccode |= CCODE_START;
+
+		/* Send byte data to the device */
+		sts = idt_smb_safe(write_byte, pdev->client, ccode,
+			seq->data[idx]);
+		if (sts != 0)
+			return (int)sts;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_smb_read_word() - SMBus read method when I2C_SMBUS_BYTE_DATA and
+ *                       I2C_FUNC_SMBUS_WORD_DATA operations are available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Buffer to read data to
+ */
+static int idt_smb_read_word(struct idt_89hpesx_dev *pdev,
+			     struct idt_smb_seq *seq)
+{
+	s32 sts;
+	u8 ccode;
+	int idx, evencnt;
+
+	/* Calculate the even count of data to send */
+	evencnt = seq->bytecnt - (seq->bytecnt % 2);
+
+	/* Loop over the supplied data reading two bytes at a time */
+	for (idx = 0; idx < evencnt; idx += 2) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_WORD;
+		if (idx == 0)
+			ccode |= CCODE_START;
+		if (idx == evencnt - 2)
+			ccode |= CCODE_END;
+
+		/* Read word data from the device */
+		sts = idt_smb_safe(read_word, pdev->client, ccode);
+		if (sts < 0)
+			return (int)sts;
+
+		*(u16 *)&seq->data[idx] = (u16)sts;
+	}
+
+	/* If there is odd number of bytes then receive just one last byte */
+	if (seq->bytecnt != evencnt) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_BYTE | CCODE_END;
+		if (idx == 0)
+			ccode |= CCODE_START;
+
+		/* Read last data byte from the device */
+		sts = idt_smb_safe(read_byte, pdev->client, ccode);
+		if (sts < 0)
+			return (int)sts;
+
+		seq->data[idx] = (u8)sts;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_smb_write_block() - SMBus write method when I2C_SMBUS_BLOCK_DATA
+ *                         operation is available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Sequence of data to be written
+ */
+static int idt_smb_write_block(struct idt_89hpesx_dev *pdev,
+			       const struct idt_smb_seq *seq)
+{
+	u8 ccode;
+
+	/* Return error if too much data passed to send */
+	if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX)
+		return -EINVAL;
+
+	/* Collect the command code byte */
+	ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END;
+
+	/* Send block of data to the device */
+	return idt_smb_safe(write_block, pdev->client, ccode, seq->bytecnt,
+		seq->data);
+}
+
+/*
+ * idt_smb_read_block() - SMBus read method when I2C_SMBUS_BLOCK_DATA
+ *                        operation is available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Buffer to read data to
+ */
+static int idt_smb_read_block(struct idt_89hpesx_dev *pdev,
+			      struct idt_smb_seq *seq)
+{
+	s32 sts;
+	u8 ccode;
+
+	/* Return error if too much data passed to send */
+	if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX)
+		return -EINVAL;
+
+	/* Collect the command code byte */
+	ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END;
+
+	/* Read block of data from the device */
+	sts = idt_smb_safe(read_block, pdev->client, ccode, seq->data);
+	if (sts != seq->bytecnt)
+		return (sts < 0 ? sts : -ENODATA);
+
+	return 0;
+}
+
+/*
+ * idt_smb_write_i2c_block() - SMBus write method when I2C_SMBUS_I2C_BLOCK_DATA
+ *                             operation is available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Sequence of data to be written
+ *
+ * NOTE It's usual SMBus write block operation, except the actual data length is
+ * sent as first byte of data
+ */
+static int idt_smb_write_i2c_block(struct idt_89hpesx_dev *pdev,
+				   const struct idt_smb_seq *seq)
+{
+	u8 ccode, buf[I2C_SMBUS_BLOCK_MAX + 1];
+
+	/* Return error if too much data passed to send */
+	if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX)
+		return -EINVAL;
+
+	/* Collect the data to send. Length byte must be added prior the data */
+	buf[0] = seq->bytecnt;
+	memcpy(&buf[1], seq->data, seq->bytecnt);
+
+	/* Collect the command code byte */
+	ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END;
+
+	/* Send length and block of data to the device */
+	return idt_smb_safe(write_i2c_block, pdev->client, ccode,
+		seq->bytecnt + 1, buf);
+}
+
+/*
+ * idt_smb_read_i2c_block() - SMBus read method when I2C_SMBUS_I2C_BLOCK_DATA
+ *                            operation is available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Buffer to read data to
+ *
+ * NOTE It's usual SMBus read block operation, except the actual data length is
+ * retrieved as first byte of data
+ */
+static int idt_smb_read_i2c_block(struct idt_89hpesx_dev *pdev,
+				  struct idt_smb_seq *seq)
+{
+	u8 ccode, buf[I2C_SMBUS_BLOCK_MAX + 1];
+	s32 sts;
+
+	/* Return error if too much data passed to send */
+	if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX)
+		return -EINVAL;
+
+	/* Collect the command code byte */
+	ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END;
+
+	/* Read length and block of data from the device */
+	sts = idt_smb_safe(read_i2c_block, pdev->client, ccode,
+		seq->bytecnt + 1, buf);
+	if (sts != seq->bytecnt + 1)
+		return (sts < 0 ? sts : -ENODATA);
+	if (buf[0] != seq->bytecnt)
+		return -ENODATA;
+
+	/* Copy retrieved data to the output data buffer */
+	memcpy(seq->data, &buf[1], seq->bytecnt);
+
+	return 0;
+}
+
+/*===========================================================================
+ *                          EEPROM IO-operations
+ *===========================================================================
+ */
+
+/*
+ * idt_eeprom_read_byte() - read just one byte from EEPROM
+ * @pdev:	Pointer to the driver data
+ * @memaddr:	Start EEPROM memory address
+ * @data:	Data to be written to EEPROM
+ */
+static int idt_eeprom_read_byte(struct idt_89hpesx_dev *pdev, u16 memaddr,
+				u8 *data)
+{
+	struct device *dev = &pdev->client->dev;
+	struct idt_eeprom_seq eeseq;
+	struct idt_smb_seq smbseq;
+	int ret, retry;
+
+	/* Initialize SMBus sequence fields */
+	smbseq.ccode = pdev->iniccode | CCODE_EEPROM;
+	smbseq.data = (u8 *)&eeseq;
+
+	/*
+	 * Sometimes EEPROM may respond with NACK if it's busy with previous
+	 * operation, so we need to perform a few attempts of read cycle
+	 */
+	retry = RETRY_CNT;
+	do {
+		/* Send EEPROM memory address to read data from */
+		smbseq.bytecnt = EEPROM_WRRD_CNT;
+		eeseq.cmd = pdev->inieecmd | EEPROM_OP_READ;
+		eeseq.eeaddr = pdev->eeaddr;
+		eeseq.memaddr = cpu_to_le16(memaddr);
+		ret = pdev->smb_write(pdev, &smbseq);
+		if (ret != 0) {
+			dev_err(dev, "Failed to init eeprom addr 0x%02hhx",
+				memaddr);
+			break;
+		}
+
+		/* Perform read operation */
+		smbseq.bytecnt = EEPROM_RD_CNT;
+		ret = pdev->smb_read(pdev, &smbseq);
+		if (ret != 0) {
+			dev_err(dev, "Failed to read eeprom data 0x%02hhx",
+				memaddr);
+			break;
+		}
+
+		/* Restart read operation if the device is busy */
+		if (retry && (eeseq.cmd & EEPROM_NAERR)) {
+			dev_dbg(dev, "EEPROM busy, retry reading after %d ms",
+				EEPROM_TOUT);
+			msleep(EEPROM_TOUT);
+			continue;
+		}
+
+		/* Check whether IDT successfully read data from EEPROM */
+		if (eeseq.cmd & (EEPROM_NAERR | EEPROM_LAERR | EEPROM_MSS)) {
+			dev_err(dev,
+				"Communication with eeprom failed, cmd 0x%hhx",
+				eeseq.cmd);
+			ret = -EREMOTEIO;
+			break;
+		}
+
+		/* Save retrieved data and exit the loop */
+		*data = eeseq.data;
+		break;
+	} while (retry--);
+
+	/* Return the status of operation */
+	return ret;
+}
+
+/*
+ * idt_eeprom_write() - EEPROM write operation
+ * @pdev:	Pointer to the driver data
+ * @memaddr:	Start EEPROM memory address
+ * @len:	Length of data to be written
+ * @data:	Data to be written to EEPROM
+ */
+static int idt_eeprom_write(struct idt_89hpesx_dev *pdev, u16 memaddr, u16 len,
+			    const u8 *data)
+{
+	struct device *dev = &pdev->client->dev;
+	struct idt_eeprom_seq eeseq;
+	struct idt_smb_seq smbseq;
+	int ret;
+	u16 idx;
+
+	/* Initialize SMBus sequence fields */
+	smbseq.ccode = pdev->iniccode | CCODE_EEPROM;
+	smbseq.data = (u8 *)&eeseq;
+
+	/* Send data byte-by-byte, checking if it is successfully written */
+	for (idx = 0; idx < len; idx++, memaddr++) {
+		/* Lock IDT SMBus device */
+		mutex_lock(&pdev->smb_mtx);
+
+		/* Perform write operation */
+		smbseq.bytecnt = EEPROM_WR_CNT;
+		eeseq.cmd = pdev->inieecmd | EEPROM_OP_WRITE;
+		eeseq.eeaddr = pdev->eeaddr;
+		eeseq.memaddr = cpu_to_le16(memaddr);
+		eeseq.data = data[idx];
+		ret = pdev->smb_write(pdev, &smbseq);
+		if (ret != 0) {
+			dev_err(dev,
+				"Failed to write 0x%04hx:0x%02hhx to eeprom",
+				memaddr, data[idx]);
+			goto err_mutex_unlock;
+		}
+
+		/*
+		 * Check whether the data is successfully written by reading
+		 * from the same EEPROM memory address.
+		 */
+		eeseq.data = ~data[idx];
+		ret = idt_eeprom_read_byte(pdev, memaddr, &eeseq.data);
+		if (ret != 0)
+			goto err_mutex_unlock;
+
+		/* Check whether the read byte is the same as written one */
+		if (eeseq.data != data[idx]) {
+			dev_err(dev, "Values don't match 0x%02hhx != 0x%02hhx",
+				eeseq.data, data[idx]);
+			ret = -EREMOTEIO;
+			goto err_mutex_unlock;
+		}
+
+		/* Unlock IDT SMBus device */
+err_mutex_unlock:
+		mutex_unlock(&pdev->smb_mtx);
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_eeprom_read() - EEPROM read operation
+ * @pdev:	Pointer to the driver data
+ * @memaddr:	Start EEPROM memory address
+ * @len:	Length of data to read
+ * @buf:	Buffer to read data to
+ */
+static int idt_eeprom_read(struct idt_89hpesx_dev *pdev, u16 memaddr, u16 len,
+			   u8 *buf)
+{
+	int ret;
+	u16 idx;
+
+	/* Read data byte-by-byte, retrying if it wasn't successful */
+	for (idx = 0; idx < len; idx++, memaddr++) {
+		/* Lock IDT SMBus device */
+		mutex_lock(&pdev->smb_mtx);
+
+		/* Just read the byte to the buffer */
+		ret = idt_eeprom_read_byte(pdev, memaddr, &buf[idx]);
+
+		/* Unlock IDT SMBus device */
+		mutex_unlock(&pdev->smb_mtx);
+
+		/* Return error if read operation failed */
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*===========================================================================
+ *                          CSR IO-operations
+ *===========================================================================
+ */
+
+/*
+ * idt_csr_write() - CSR write operation
+ * @pdev:	Pointer to the driver data
+ * @csraddr:	CSR address (with no two LS bits)
+ * @data:	Data to be written to CSR
+ */
+static int idt_csr_write(struct idt_89hpesx_dev *pdev, u16 csraddr,
+			 const u32 data)
+{
+	struct device *dev = &pdev->client->dev;
+	struct idt_csr_seq csrseq;
+	struct idt_smb_seq smbseq;
+	int ret;
+
+	/* Initialize SMBus sequence fields */
+	smbseq.ccode = pdev->iniccode | CCODE_CSR;
+	smbseq.data = (u8 *)&csrseq;
+
+	/* Lock IDT SMBus device */
+	mutex_lock(&pdev->smb_mtx);
+
+	/* Perform write operation */
+	smbseq.bytecnt = CSR_WR_CNT;
+	csrseq.cmd = pdev->inicsrcmd | CSR_OP_WRITE;
+	csrseq.csraddr = cpu_to_le16(csraddr);
+	csrseq.data = cpu_to_le32(data);
+	ret = pdev->smb_write(pdev, &smbseq);
+	if (ret != 0) {
+		dev_err(dev, "Failed to write 0x%04x: 0x%04x to csr",
+			CSR_REAL_ADDR(csraddr), data);
+		goto err_mutex_unlock;
+	}
+
+	/* Send CSR address to read data from */
+	smbseq.bytecnt = CSR_WRRD_CNT;
+	csrseq.cmd = pdev->inicsrcmd | CSR_OP_READ;
+	ret = pdev->smb_write(pdev, &smbseq);
+	if (ret != 0) {
+		dev_err(dev, "Failed to init csr address 0x%04x",
+			CSR_REAL_ADDR(csraddr));
+		goto err_mutex_unlock;
+	}
+
+	/* Perform read operation */
+	smbseq.bytecnt = CSR_RD_CNT;
+	ret = pdev->smb_read(pdev, &smbseq);
+	if (ret != 0) {
+		dev_err(dev, "Failed to read csr 0x%04x",
+			CSR_REAL_ADDR(csraddr));
+		goto err_mutex_unlock;
+	}
+
+	/* Check whether IDT successfully retrieved CSR data */
+	if (csrseq.cmd & (CSR_RERR | CSR_WERR)) {
+		dev_err(dev, "IDT failed to perform CSR r/w");
+		ret = -EREMOTEIO;
+		goto err_mutex_unlock;
+	}
+
+	/* Unlock IDT SMBus device */
+err_mutex_unlock:
+	mutex_unlock(&pdev->smb_mtx);
+
+	return ret;
+}
+
+/*
+ * idt_csr_read() - CSR read operation
+ * @pdev:	Pointer to the driver data
+ * @csraddr:	CSR address (with no two LS bits)
+ * @data:	Data to be written to CSR
+ */
+static int idt_csr_read(struct idt_89hpesx_dev *pdev, u16 csraddr, u32 *data)
+{
+	struct device *dev = &pdev->client->dev;
+	struct idt_csr_seq csrseq;
+	struct idt_smb_seq smbseq;
+	int ret;
+
+	/* Initialize SMBus sequence fields */
+	smbseq.ccode = pdev->iniccode | CCODE_CSR;
+	smbseq.data = (u8 *)&csrseq;
+
+	/* Lock IDT SMBus device */
+	mutex_lock(&pdev->smb_mtx);
+
+	/* Send CSR register address before reading it */
+	smbseq.bytecnt = CSR_WRRD_CNT;
+	csrseq.cmd = pdev->inicsrcmd | CSR_OP_READ;
+	csrseq.csraddr = cpu_to_le16(csraddr);
+	ret = pdev->smb_write(pdev, &smbseq);
+	if (ret != 0) {
+		dev_err(dev, "Failed to init csr address 0x%04x",
+			CSR_REAL_ADDR(csraddr));
+		goto err_mutex_unlock;
+	}
+
+	/* Perform read operation */
+	smbseq.bytecnt = CSR_RD_CNT;
+	ret = pdev->smb_read(pdev, &smbseq);
+	if (ret != 0) {
+		dev_err(dev, "Failed to read csr 0x%04hx",
+			CSR_REAL_ADDR(csraddr));
+		goto err_mutex_unlock;
+	}
+
+	/* Check whether IDT successfully retrieved CSR data */
+	if (csrseq.cmd & (CSR_RERR | CSR_WERR)) {
+		dev_err(dev, "IDT failed to perform CSR r/w");
+		ret = -EREMOTEIO;
+		goto err_mutex_unlock;
+	}
+
+	/* Save data retrieved from IDT */
+	*data = le32_to_cpu(csrseq.data);
+
+	/* Unlock IDT SMBus device */
+err_mutex_unlock:
+	mutex_unlock(&pdev->smb_mtx);
+
+	return ret;
+}
+
+/*===========================================================================
+ *                          Sysfs/debugfs-nodes IO-operations
+ *===========================================================================
+ */
+
+/*
+ * eeprom_write() - EEPROM sysfs-node write callback
+ * @filep:	Pointer to the file system node
+ * @kobj:	Pointer to the kernel object related to the sysfs-node
+ * @attr:	Attributes of the file
+ * @buf:	Buffer to write data to
+ * @off:	Offset at which data should be written to
+ * @count:	Number of bytes to write
+ */
+static ssize_t eeprom_write(struct file *filp, struct kobject *kobj,
+			    struct bin_attribute *attr,
+			    char *buf, loff_t off, size_t count)
+{
+	struct idt_89hpesx_dev *pdev;
+	int ret;
+
+	/* Retrieve driver data */
+	pdev = dev_get_drvdata(kobj_to_dev(kobj));
+
+	/* Perform EEPROM write operation */
+	ret = idt_eeprom_write(pdev, (u16)off, (u16)count, (u8 *)buf);
+	return (ret != 0 ? ret : count);
+}
+
+/*
+ * eeprom_read() - EEPROM sysfs-node read callback
+ * @filep:	Pointer to the file system node
+ * @kobj:	Pointer to the kernel object related to the sysfs-node
+ * @attr:	Attributes of the file
+ * @buf:	Buffer to write data to
+ * @off:	Offset at which data should be written to
+ * @count:	Number of bytes to write
+ */
+static ssize_t eeprom_read(struct file *filp, struct kobject *kobj,
+			   struct bin_attribute *attr,
+			   char *buf, loff_t off, size_t count)
+{
+	struct idt_89hpesx_dev *pdev;
+	int ret;
+
+	/* Retrieve driver data */
+	pdev = dev_get_drvdata(kobj_to_dev(kobj));
+
+	/* Perform EEPROM read operation */
+	ret = idt_eeprom_read(pdev, (u16)off, (u16)count, (u8 *)buf);
+	return (ret != 0 ? ret : count);
+}
+
+/*
+ * idt_dbgfs_csr_write() - CSR debugfs-node write callback
+ * @filep:	Pointer to the file system file descriptor
+ * @buf:	Buffer to read data from
+ * @count:	Size of the buffer
+ * @offp:	Offset within the file
+ *
+ * It accepts either "0x<reg addr>:0x<value>" for saving register address
+ * and writing value to specified DWORD register or "0x<reg addr>" for
+ * just saving register address in order to perform next read operation.
+ *
+ * WARNING No spaces are allowed. Incoming string must be strictly formated as:
+ * "<reg addr>:<value>". Register address must be aligned within 4 bytes
+ * (one DWORD).
+ */
+static ssize_t idt_dbgfs_csr_write(struct file *filep, const char __user *ubuf,
+				   size_t count, loff_t *offp)
+{
+	struct idt_89hpesx_dev *pdev = filep->private_data;
+	char *colon_ch, *csraddr_str, *csrval_str;
+	int ret, csraddr_len;
+	u32 csraddr, csrval;
+	char *buf;
+
+	/* Copy data from User-space */
+	buf = kmalloc(count + 1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = simple_write_to_buffer(buf, count, offp, ubuf, count);
+	if (ret < 0)
+		goto free_buf;
+	buf[count] = 0;
+
+	/* Find position of colon in the buffer */
+	colon_ch = strnchr(buf, count, ':');
+
+	/*
+	 * If there is colon passed then new CSR value should be parsed as
+	 * well, so allocate buffer for CSR address substring.
+	 * If no colon is found, then string must have just one number with
+	 * no new CSR value
+	 */
+	if (colon_ch != NULL) {
+		csraddr_len = colon_ch - buf;
+		csraddr_str =
+			kmalloc(csraddr_len + 1, GFP_KERNEL);
+		if (csraddr_str == NULL) {
+			ret = -ENOMEM;
+			goto free_buf;
+		}
+		/* Copy the register address to the substring buffer */
+		strncpy(csraddr_str, buf, csraddr_len);
+		csraddr_str[csraddr_len] = '\0';
+		/* Register value must follow the colon */
+		csrval_str = colon_ch + 1;
+	} else /* if (str_colon == NULL) */ {
+		csraddr_str = (char *)buf; /* Just to shut warning up */
+		csraddr_len = strnlen(csraddr_str, count);
+		csrval_str = NULL;
+	}
+
+	/* Convert CSR address to u32 value */
+	ret = kstrtou32(csraddr_str, 0, &csraddr);
+	if (ret != 0)
+		goto free_csraddr_str;
+
+	/* Check whether passed register address is valid */
+	if (csraddr > CSR_MAX || !IS_ALIGNED(csraddr, SZ_4)) {
+		ret = -EINVAL;
+		goto free_csraddr_str;
+	}
+
+	/* Shift register address to the right so to have u16 address */
+	pdev->csr = (csraddr >> 2);
+
+	/* Parse new CSR value and send it to IDT, if colon has been found */
+	if (colon_ch != NULL) {
+		ret = kstrtou32(csrval_str, 0, &csrval);
+		if (ret != 0)
+			goto free_csraddr_str;
+
+		ret = idt_csr_write(pdev, pdev->csr, csrval);
+		if (ret != 0)
+			goto free_csraddr_str;
+	}
+
+	/* Free memory only if colon has been found */
+free_csraddr_str:
+	if (colon_ch != NULL)
+		kfree(csraddr_str);
+
+	/* Free buffer allocated for data retrieved from User-space */
+free_buf:
+	kfree(buf);
+
+	return (ret != 0 ? ret : count);
+}
+
+/*
+ * idt_dbgfs_csr_read() - CSR debugfs-node read callback
+ * @filep:	Pointer to the file system file descriptor
+ * @buf:	Buffer to write data to
+ * @count:	Size of the buffer
+ * @offp:	Offset within the file
+ *
+ * It just prints the pair "0x<reg addr>:0x<value>" to passed buffer.
+ */
+#define CSRBUF_SIZE	((size_t)32)
+static ssize_t idt_dbgfs_csr_read(struct file *filep, char __user *ubuf,
+				  size_t count, loff_t *offp)
+{
+	struct idt_89hpesx_dev *pdev = filep->private_data;
+	u32 csraddr, csrval;
+	char buf[CSRBUF_SIZE];
+	int ret, size;
+
+	/* Perform CSR read operation */
+	ret = idt_csr_read(pdev, pdev->csr, &csrval);
+	if (ret != 0)
+		return ret;
+
+	/* Shift register address to the left so to have real address */
+	csraddr = ((u32)pdev->csr << 2);
+
+	/* Print the "0x<reg addr>:0x<value>" to buffer */
+	size = snprintf(buf, CSRBUF_SIZE, "0x%05x:0x%08x\n",
+		(unsigned int)csraddr, (unsigned int)csrval);
+
+	/* Copy data to User-space */
+	return simple_read_from_buffer(ubuf, count, offp, buf, size);
+}
+
+/*
+ * eeprom_attribute - EEPROM sysfs-node attributes
+ *
+ * NOTE Size will be changed in compliance with OF node. EEPROM attribute will
+ * be read-only as well if the corresponding flag is specified in OF node.
+ */
+static BIN_ATTR_RW(eeprom, EEPROM_DEF_SIZE);
+
+/*
+ * csr_dbgfs_ops - CSR debugfs-node read/write operations
+ */
+static const struct file_operations csr_dbgfs_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = idt_dbgfs_csr_write,
+	.read = idt_dbgfs_csr_read
+};
+
+/*===========================================================================
+ *                       Driver init/deinit methods
+ *===========================================================================
+ */
+
+/*
+ * idt_set_defval() - disable EEPROM access by default
+ * @pdev:	Pointer to the driver data
+ */
+static void idt_set_defval(struct idt_89hpesx_dev *pdev)
+{
+	/* If OF info is missing then use next values */
+	pdev->eesize = 0;
+	pdev->eero = true;
+	pdev->inieecmd = 0;
+	pdev->eeaddr = 0;
+}
+
+static const struct i2c_device_id ee_ids[];
+
+/*
+ * idt_ee_match_id() - check whether the node belongs to compatible EEPROMs
+ */
+static const struct i2c_device_id *idt_ee_match_id(struct fwnode_handle *fwnode)
+{
+	const struct i2c_device_id *id = ee_ids;
+	const char *compatible, *p;
+	char devname[I2C_NAME_SIZE];
+	int ret;
+
+	ret = fwnode_property_read_string(fwnode, "compatible", &compatible);
+	if (ret)
+		return NULL;
+
+	p = strchr(compatible, ',');
+	strlcpy(devname, p ? p + 1 : compatible, sizeof(devname));
+	/* Search through the device name */
+	while (id->name[0]) {
+		if (strcmp(devname, id->name) == 0)
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
+/*
+ * idt_get_fw_data() - get IDT i2c-device parameters from device tree
+ * @pdev:	Pointer to the driver data
+ */
+static void idt_get_fw_data(struct idt_89hpesx_dev *pdev)
+{
+	struct device *dev = &pdev->client->dev;
+	struct fwnode_handle *fwnode;
+	const struct i2c_device_id *ee_id = NULL;
+	u32 eeprom_addr;
+	int ret;
+
+	device_for_each_child_node(dev, fwnode) {
+		ee_id = idt_ee_match_id(fwnode);
+		if (ee_id)
+			break;
+
+		dev_warn(dev, "Skip unsupported EEPROM device %pfw\n", fwnode);
+	}
+
+	/* If there is no fwnode EEPROM device, then set zero size */
+	if (!ee_id) {
+		dev_warn(dev, "No fwnode, EEPROM access disabled");
+		idt_set_defval(pdev);
+		return;
+	}
+
+	/* Retrieve EEPROM size */
+	pdev->eesize = (u32)ee_id->driver_data;
+
+	/* Get custom EEPROM address from 'reg' attribute */
+	ret = fwnode_property_read_u32(fwnode, "reg", &eeprom_addr);
+	if (ret || (eeprom_addr == 0)) {
+		dev_warn(dev, "No EEPROM reg found, use default address 0x%x",
+			 EEPROM_DEF_ADDR);
+		pdev->inieecmd = 0;
+		pdev->eeaddr = EEPROM_DEF_ADDR << 1;
+	} else {
+		pdev->inieecmd = EEPROM_USA;
+		pdev->eeaddr = eeprom_addr << 1;
+	}
+
+	/* Check EEPROM 'read-only' flag */
+	if (fwnode_property_read_bool(fwnode, "read-only"))
+		pdev->eero = true;
+	else /* if (!fwnode_property_read_bool(node, "read-only")) */
+		pdev->eero = false;
+
+	fwnode_handle_put(fwnode);
+	dev_info(dev, "EEPROM of %d bytes found by 0x%x",
+		pdev->eesize, pdev->eeaddr);
+}
+
+/*
+ * idt_create_pdev() - create and init data structure of the driver
+ * @client:	i2c client of IDT PCIe-switch device
+ */
+static struct idt_89hpesx_dev *idt_create_pdev(struct i2c_client *client)
+{
+	struct idt_89hpesx_dev *pdev;
+
+	/* Allocate memory for driver data */
+	pdev = devm_kmalloc(&client->dev, sizeof(struct idt_89hpesx_dev),
+		GFP_KERNEL);
+	if (pdev == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Initialize basic fields of the data */
+	pdev->client = client;
+	i2c_set_clientdata(client, pdev);
+
+	/* Read firmware nodes information */
+	idt_get_fw_data(pdev);
+
+	/* Initialize basic CSR CMD field - use full DWORD-sized r/w ops */
+	pdev->inicsrcmd = CSR_DWE;
+	pdev->csr = CSR_DEF;
+
+	/* Enable Packet Error Checking if it's supported by adapter */
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_PEC)) {
+		pdev->iniccode = CCODE_PEC;
+		client->flags |= I2C_CLIENT_PEC;
+	} else /* PEC is unsupported */ {
+		pdev->iniccode = 0;
+	}
+
+	return pdev;
+}
+
+/*
+ * idt_free_pdev() - free data structure of the driver
+ * @pdev:	Pointer to the driver data
+ */
+static void idt_free_pdev(struct idt_89hpesx_dev *pdev)
+{
+	/* Clear driver data from device private field */
+	i2c_set_clientdata(pdev->client, NULL);
+}
+
+/*
+ * idt_set_smbus_ops() - set supported SMBus operations
+ * @pdev:	Pointer to the driver data
+ * Return status of smbus check operations
+ */
+static int idt_set_smbus_ops(struct idt_89hpesx_dev *pdev)
+{
+	struct i2c_adapter *adapter = pdev->client->adapter;
+	struct device *dev = &pdev->client->dev;
+
+	/* Check i2c adapter read functionality */
+	if (i2c_check_functionality(adapter,
+				    I2C_FUNC_SMBUS_READ_BLOCK_DATA)) {
+		pdev->smb_read = idt_smb_read_block;
+		dev_dbg(dev, "SMBus block-read op chosen");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+		pdev->smb_read = idt_smb_read_i2c_block;
+		dev_dbg(dev, "SMBus i2c-block-read op chosen");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_READ_WORD_DATA) &&
+		   i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		pdev->smb_read = idt_smb_read_word;
+		dev_warn(dev, "Use slow word/byte SMBus read ops");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		pdev->smb_read = idt_smb_read_byte;
+		dev_warn(dev, "Use slow byte SMBus read op");
+	} else /* no supported smbus read operations */ {
+		dev_err(dev, "No supported SMBus read op");
+		return -EPFNOSUPPORT;
+	}
+
+	/* Check i2c adapter write functionality */
+	if (i2c_check_functionality(adapter,
+				    I2C_FUNC_SMBUS_WRITE_BLOCK_DATA)) {
+		pdev->smb_write = idt_smb_write_block;
+		dev_dbg(dev, "SMBus block-write op chosen");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
+		pdev->smb_write = idt_smb_write_i2c_block;
+		dev_dbg(dev, "SMBus i2c-block-write op chosen");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_WRITE_WORD_DATA) &&
+		   i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) {
+		pdev->smb_write = idt_smb_write_word;
+		dev_warn(dev, "Use slow word/byte SMBus write op");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) {
+		pdev->smb_write = idt_smb_write_byte;
+		dev_warn(dev, "Use slow byte SMBus write op");
+	} else /* no supported smbus write operations */ {
+		dev_err(dev, "No supported SMBus write op");
+		return -EPFNOSUPPORT;
+	}
+
+	/* Initialize IDT SMBus slave interface mutex */
+	mutex_init(&pdev->smb_mtx);
+
+	return 0;
+}
+
+/*
+ * idt_check_dev() - check whether it's really IDT 89HPESx device
+ * @pdev:	Pointer to the driver data
+ * Return status of i2c adapter check operation
+ */
+static int idt_check_dev(struct idt_89hpesx_dev *pdev)
+{
+	struct device *dev = &pdev->client->dev;
+	u32 viddid;
+	int ret;
+
+	/* Read VID and DID directly from IDT memory space */
+	ret = idt_csr_read(pdev, IDT_VIDDID_CSR, &viddid);
+	if (ret != 0) {
+		dev_err(dev, "Failed to read VID/DID");
+		return ret;
+	}
+
+	/* Check whether it's IDT device */
+	if ((viddid & IDT_VID_MASK) != PCI_VENDOR_ID_IDT) {
+		dev_err(dev, "Got unsupported VID/DID: 0x%08x", viddid);
+		return -ENODEV;
+	}
+
+	dev_info(dev, "Found IDT 89HPES device VID:0x%04x, DID:0x%04x",
+		(viddid & IDT_VID_MASK), (viddid >> 16));
+
+	return 0;
+}
+
+/*
+ * idt_create_sysfs_files() - create sysfs attribute files
+ * @pdev:	Pointer to the driver data
+ * Return status of operation
+ */
+static int idt_create_sysfs_files(struct idt_89hpesx_dev *pdev)
+{
+	struct device *dev = &pdev->client->dev;
+	int ret;
+
+	/* Don't do anything if EEPROM isn't accessible */
+	if (pdev->eesize == 0) {
+		dev_dbg(dev, "Skip creating sysfs-files");
+		return 0;
+	}
+
+	/* Allocate memory for attribute file */
+	pdev->ee_file = devm_kmalloc(dev, sizeof(*pdev->ee_file), GFP_KERNEL);
+	if (!pdev->ee_file)
+		return -ENOMEM;
+
+	/* Copy the declared EEPROM attr structure to change some of fields */
+	memcpy(pdev->ee_file, &bin_attr_eeprom, sizeof(*pdev->ee_file));
+
+	/* In case of read-only EEPROM get rid of write ability */
+	if (pdev->eero) {
+		pdev->ee_file->attr.mode &= ~0200;
+		pdev->ee_file->write = NULL;
+	}
+	/* Create EEPROM sysfs file */
+	pdev->ee_file->size = pdev->eesize;
+	ret = sysfs_create_bin_file(&dev->kobj, pdev->ee_file);
+	if (ret != 0) {
+		dev_err(dev, "Failed to create EEPROM sysfs-node");
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_remove_sysfs_files() - remove sysfs attribute files
+ * @pdev:	Pointer to the driver data
+ */
+static void idt_remove_sysfs_files(struct idt_89hpesx_dev *pdev)
+{
+	struct device *dev = &pdev->client->dev;
+
+	/* Don't do anything if EEPROM wasn't accessible */
+	if (pdev->eesize == 0)
+		return;
+
+	/* Remove EEPROM sysfs file */
+	sysfs_remove_bin_file(&dev->kobj, pdev->ee_file);
+}
+
+/*
+ * idt_create_dbgfs_files() - create debugfs files
+ * @pdev:	Pointer to the driver data
+ */
+#define CSRNAME_LEN	((size_t)32)
+static void idt_create_dbgfs_files(struct idt_89hpesx_dev *pdev)
+{
+	struct i2c_client *cli = pdev->client;
+	char fname[CSRNAME_LEN];
+
+	/* Create Debugfs directory for CSR file */
+	snprintf(fname, CSRNAME_LEN, "%d-%04hx", cli->adapter->nr, cli->addr);
+	pdev->csr_dir = debugfs_create_dir(fname, csr_dbgdir);
+
+	/* Create Debugfs file for CSR read/write operations */
+	pdev->csr_file = debugfs_create_file(cli->name, 0600,
+		pdev->csr_dir, pdev, &csr_dbgfs_ops);
+}
+
+/*
+ * idt_remove_dbgfs_files() - remove debugfs files
+ * @pdev:	Pointer to the driver data
+ */
+static void idt_remove_dbgfs_files(struct idt_89hpesx_dev *pdev)
+{
+	/* Remove CSR directory and it sysfs-node */
+	debugfs_remove_recursive(pdev->csr_dir);
+}
+
+/*
+ * idt_probe() - IDT 89HPESx driver probe() callback method
+ */
+static int idt_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	struct idt_89hpesx_dev *pdev;
+	int ret;
+
+	/* Create driver data */
+	pdev = idt_create_pdev(client);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	/* Set SMBus operations */
+	ret = idt_set_smbus_ops(pdev);
+	if (ret != 0)
+		goto err_free_pdev;
+
+	/* Check whether it is truly IDT 89HPESx device */
+	ret = idt_check_dev(pdev);
+	if (ret != 0)
+		goto err_free_pdev;
+
+	/* Create sysfs files */
+	ret = idt_create_sysfs_files(pdev);
+	if (ret != 0)
+		goto err_free_pdev;
+
+	/* Create debugfs files */
+	idt_create_dbgfs_files(pdev);
+
+	return 0;
+
+err_free_pdev:
+	idt_free_pdev(pdev);
+
+	return ret;
+}
+
+/*
+ * idt_remove() - IDT 89HPESx driver remove() callback method
+ */
+static int idt_remove(struct i2c_client *client)
+{
+	struct idt_89hpesx_dev *pdev = i2c_get_clientdata(client);
+
+	/* Remove debugfs files first */
+	idt_remove_dbgfs_files(pdev);
+
+	/* Remove sysfs files */
+	idt_remove_sysfs_files(pdev);
+
+	/* Discard driver data structure */
+	idt_free_pdev(pdev);
+
+	return 0;
+}
+
+/*
+ * ee_ids - array of supported EEPROMs
+ */
+static const struct i2c_device_id ee_ids[] = {
+	{ "24c32",  4096},
+	{ "24c64",  8192},
+	{ "24c128", 16384},
+	{ "24c256", 32768},
+	{ "24c512", 65536},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, ee_ids);
+
+/*
+ * idt_ids - supported IDT 89HPESx devices
+ */
+static const struct i2c_device_id idt_ids[] = {
+	{ "89hpes8nt2", 0 },
+	{ "89hpes12nt3", 0 },
+
+	{ "89hpes24nt6ag2", 0 },
+	{ "89hpes32nt8ag2", 0 },
+	{ "89hpes32nt8bg2", 0 },
+	{ "89hpes12nt12g2", 0 },
+	{ "89hpes16nt16g2", 0 },
+	{ "89hpes24nt24g2", 0 },
+	{ "89hpes32nt24ag2", 0 },
+	{ "89hpes32nt24bg2", 0 },
+
+	{ "89hpes12n3", 0 },
+	{ "89hpes12n3a", 0 },
+	{ "89hpes24n3", 0 },
+	{ "89hpes24n3a", 0 },
+
+	{ "89hpes32h8", 0 },
+	{ "89hpes32h8g2", 0 },
+	{ "89hpes48h12", 0 },
+	{ "89hpes48h12g2", 0 },
+	{ "89hpes48h12ag2", 0 },
+	{ "89hpes16h16", 0 },
+	{ "89hpes22h16", 0 },
+	{ "89hpes22h16g2", 0 },
+	{ "89hpes34h16", 0 },
+	{ "89hpes34h16g2", 0 },
+	{ "89hpes64h16", 0 },
+	{ "89hpes64h16g2", 0 },
+	{ "89hpes64h16ag2", 0 },
+
+	/* { "89hpes3t3", 0 }, // No SMBus-slave iface */
+	{ "89hpes12t3g2", 0 },
+	{ "89hpes24t3g2", 0 },
+	/* { "89hpes4t4", 0 }, // No SMBus-slave iface */
+	{ "89hpes16t4", 0 },
+	{ "89hpes4t4g2", 0 },
+	{ "89hpes10t4g2", 0 },
+	{ "89hpes16t4g2", 0 },
+	{ "89hpes16t4ag2", 0 },
+	{ "89hpes5t5", 0 },
+	{ "89hpes6t5", 0 },
+	{ "89hpes8t5", 0 },
+	{ "89hpes8t5a", 0 },
+	{ "89hpes24t6", 0 },
+	{ "89hpes6t6g2", 0 },
+	{ "89hpes24t6g2", 0 },
+	{ "89hpes16t7", 0 },
+	{ "89hpes32t8", 0 },
+	{ "89hpes32t8g2", 0 },
+	{ "89hpes48t12", 0 },
+	{ "89hpes48t12g2", 0 },
+	{ /* END OF LIST */ }
+};
+MODULE_DEVICE_TABLE(i2c, idt_ids);
+
+static const struct of_device_id idt_of_match[] = {
+	{ .compatible = "idt,89hpes8nt2", },
+	{ .compatible = "idt,89hpes12nt3", },
+
+	{ .compatible = "idt,89hpes24nt6ag2", },
+	{ .compatible = "idt,89hpes32nt8ag2", },
+	{ .compatible = "idt,89hpes32nt8bg2", },
+	{ .compatible = "idt,89hpes12nt12g2", },
+	{ .compatible = "idt,89hpes16nt16g2", },
+	{ .compatible = "idt,89hpes24nt24g2", },
+	{ .compatible = "idt,89hpes32nt24ag2", },
+	{ .compatible = "idt,89hpes32nt24bg2", },
+
+	{ .compatible = "idt,89hpes12n3", },
+	{ .compatible = "idt,89hpes12n3a", },
+	{ .compatible = "idt,89hpes24n3", },
+	{ .compatible = "idt,89hpes24n3a", },
+
+	{ .compatible = "idt,89hpes32h8", },
+	{ .compatible = "idt,89hpes32h8g2", },
+	{ .compatible = "idt,89hpes48h12", },
+	{ .compatible = "idt,89hpes48h12g2", },
+	{ .compatible = "idt,89hpes48h12ag2", },
+	{ .compatible = "idt,89hpes16h16", },
+	{ .compatible = "idt,89hpes22h16", },
+	{ .compatible = "idt,89hpes22h16g2", },
+	{ .compatible = "idt,89hpes34h16", },
+	{ .compatible = "idt,89hpes34h16g2", },
+	{ .compatible = "idt,89hpes64h16", },
+	{ .compatible = "idt,89hpes64h16g2", },
+	{ .compatible = "idt,89hpes64h16ag2", },
+
+	{ .compatible = "idt,89hpes12t3g2", },
+	{ .compatible = "idt,89hpes24t3g2", },
+
+	{ .compatible = "idt,89hpes16t4", },
+	{ .compatible = "idt,89hpes4t4g2", },
+	{ .compatible = "idt,89hpes10t4g2", },
+	{ .compatible = "idt,89hpes16t4g2", },
+	{ .compatible = "idt,89hpes16t4ag2", },
+	{ .compatible = "idt,89hpes5t5", },
+	{ .compatible = "idt,89hpes6t5", },
+	{ .compatible = "idt,89hpes8t5", },
+	{ .compatible = "idt,89hpes8t5a", },
+	{ .compatible = "idt,89hpes24t6", },
+	{ .compatible = "idt,89hpes6t6g2", },
+	{ .compatible = "idt,89hpes24t6g2", },
+	{ .compatible = "idt,89hpes16t7", },
+	{ .compatible = "idt,89hpes32t8", },
+	{ .compatible = "idt,89hpes32t8g2", },
+	{ .compatible = "idt,89hpes48t12", },
+	{ .compatible = "idt,89hpes48t12g2", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, idt_of_match);
+
+/*
+ * idt_driver - IDT 89HPESx driver structure
+ */
+static struct i2c_driver idt_driver = {
+	.driver = {
+		.name = IDT_NAME,
+		.of_match_table = idt_of_match,
+	},
+	.probe = idt_probe,
+	.remove = idt_remove,
+	.id_table = idt_ids,
+};
+
+/*
+ * idt_init() - IDT 89HPESx driver init() callback method
+ */
+static int __init idt_init(void)
+{
+	/* Create Debugfs directory first */
+	if (debugfs_initialized())
+		csr_dbgdir = debugfs_create_dir("idt_csr", NULL);
+
+	/* Add new i2c-device driver */
+	return i2c_add_driver(&idt_driver);
+}
+module_init(idt_init);
+
+/*
+ * idt_exit() - IDT 89HPESx driver exit() callback method
+ */
+static void __exit idt_exit(void)
+{
+	/* Discard debugfs directory and all files if any */
+	debugfs_remove_recursive(csr_dbgdir);
+
+	/* Unregister i2c-device driver */
+	i2c_del_driver(&idt_driver);
+}
+module_exit(idt_exit);
diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c
new file mode 100644
index 000000000..fc0cf9a74
--- /dev/null
+++ b/drivers/misc/eeprom/max6875.c
@@ -0,0 +1,210 @@
+/*
+ * max6875.c - driver for MAX6874/MAX6875
+ *
+ * Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com>
+ *
+ * Based on eeprom.c
+ *
+ * The MAX6875 has a bank of registers and two banks of EEPROM.
+ * Address ranges are defined as follows:
+ *  * 0x0000 - 0x0046 = configuration registers
+ *  * 0x8000 - 0x8046 = configuration EEPROM
+ *  * 0x8100 - 0x82FF = user EEPROM
+ *
+ * This driver makes the user EEPROM available for read.
+ *
+ * The registers & config EEPROM should be accessed via i2c-dev.
+ *
+ * The MAX6875 ignores the lowest address bit, so each chip responds to
+ * two addresses - 0x50/0x51 and 0x52/0x53.
+ *
+ * Note that the MAX6875 uses i2c_smbus_write_byte_data() to set the read
+ * address, so this driver is destructive if loaded for the wrong EEPROM chip.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+/* The MAX6875 can only read/write 16 bytes at a time */
+#define SLICE_SIZE			16
+#define SLICE_BITS			4
+
+/* USER EEPROM is at addresses 0x8100 - 0x82FF */
+#define USER_EEPROM_BASE		0x8100
+#define USER_EEPROM_SIZE		0x0200
+#define USER_EEPROM_SLICES		32
+
+/* MAX6875 commands */
+#define MAX6875_CMD_BLK_READ		0x84
+
+/* Each client has this additional data */
+struct max6875_data {
+	struct i2c_client	*fake_client;
+	struct mutex		update_lock;
+
+	u32			valid;
+	u8			data[USER_EEPROM_SIZE];
+	unsigned long		last_updated[USER_EEPROM_SLICES];
+};
+
+static void max6875_update_slice(struct i2c_client *client, int slice)
+{
+	struct max6875_data *data = i2c_get_clientdata(client);
+	int i, j, addr;
+	u8 *buf;
+
+	if (slice >= USER_EEPROM_SLICES)
+		return;
+
+	mutex_lock(&data->update_lock);
+
+	buf = &data->data[slice << SLICE_BITS];
+
+	if (!(data->valid & (1 << slice)) ||
+	    time_after(jiffies, data->last_updated[slice])) {
+
+		dev_dbg(&client->dev, "Starting update of slice %u\n", slice);
+
+		data->valid &= ~(1 << slice);
+
+		addr = USER_EEPROM_BASE + (slice << SLICE_BITS);
+
+		/* select the eeprom address */
+		if (i2c_smbus_write_byte_data(client, addr >> 8, addr & 0xFF)) {
+			dev_err(&client->dev, "address set failed\n");
+			goto exit_up;
+		}
+
+		if (i2c_check_functionality(client->adapter,
+					    I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+			if (i2c_smbus_read_i2c_block_data(client,
+							  MAX6875_CMD_BLK_READ,
+							  SLICE_SIZE,
+							  buf) != SLICE_SIZE) {
+				goto exit_up;
+			}
+		} else {
+			for (i = 0; i < SLICE_SIZE; i++) {
+				j = i2c_smbus_read_byte(client);
+				if (j < 0) {
+					goto exit_up;
+				}
+				buf[i] = j;
+			}
+		}
+		data->last_updated[slice] = jiffies;
+		data->valid |= (1 << slice);
+	}
+exit_up:
+	mutex_unlock(&data->update_lock);
+}
+
+static ssize_t max6875_read(struct file *filp, struct kobject *kobj,
+			    struct bin_attribute *bin_attr,
+			    char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+	struct max6875_data *data = i2c_get_clientdata(client);
+	int slice, max_slice;
+
+	/* refresh slices which contain requested bytes */
+	max_slice = (off + count - 1) >> SLICE_BITS;
+	for (slice = (off >> SLICE_BITS); slice <= max_slice; slice++)
+		max6875_update_slice(client, slice);
+
+	memcpy(buf, &data->data[off], count);
+
+	return count;
+}
+
+static const struct bin_attribute user_eeprom_attr = {
+	.attr = {
+		.name = "eeprom",
+		.mode = S_IRUGO,
+	},
+	.size = USER_EEPROM_SIZE,
+	.read = max6875_read,
+};
+
+static int max6875_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	struct max6875_data *data;
+	int err;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA
+				     | I2C_FUNC_SMBUS_READ_BYTE))
+		return -ENODEV;
+
+	/* Only bind to even addresses */
+	if (client->addr & 1)
+		return -ENODEV;
+
+	data = kzalloc(sizeof(struct max6875_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* A fake client is created on the odd address */
+	data->fake_client = i2c_new_dummy(client->adapter, client->addr + 1);
+	if (!data->fake_client) {
+		err = -ENOMEM;
+		goto exit_kfree;
+	}
+
+	/* Init real i2c_client */
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->update_lock);
+
+	err = sysfs_create_bin_file(&client->dev.kobj, &user_eeprom_attr);
+	if (err)
+		goto exit_remove_fake;
+
+	return 0;
+
+exit_remove_fake:
+	i2c_unregister_device(data->fake_client);
+exit_kfree:
+	kfree(data);
+	return err;
+}
+
+static int max6875_remove(struct i2c_client *client)
+{
+	struct max6875_data *data = i2c_get_clientdata(client);
+
+	i2c_unregister_device(data->fake_client);
+
+	sysfs_remove_bin_file(&client->dev.kobj, &user_eeprom_attr);
+	kfree(data);
+
+	return 0;
+}
+
+static const struct i2c_device_id max6875_id[] = {
+	{ "max6875", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max6875_id);
+
+static struct i2c_driver max6875_driver = {
+	.driver = {
+		.name	= "max6875",
+	},
+	.probe		= max6875_probe,
+	.remove		= max6875_remove,
+	.id_table	= max6875_id,
+};
+
+module_i2c_driver(max6875_driver);
+
+MODULE_AUTHOR("Ben Gardner <bgardner@wabtec.com>");
+MODULE_DESCRIPTION("MAX6875 driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
new file mode 100644
index 000000000..471263ffd
--- /dev/null
+++ b/drivers/misc/enclosure.c
@@ -0,0 +1,695 @@
+/*
+ * Enclosure Services
+ *
+ * Copyright (C) 2008 James Bottomley <James.Bottomley@HansenPartnership.com>
+ *
+**-----------------------------------------------------------------------------
+**
+**  This program is free software; you can redistribute it and/or
+**  modify it under the terms of the GNU General Public License
+**  version 2 as published by the Free Software Foundation.
+**
+**  This program is distributed in the hope that it will be useful,
+**  but WITHOUT ANY WARRANTY; without even the implied warranty of
+**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**  GNU General Public License for more details.
+**
+**  You should have received a copy of the GNU General Public License
+**  along with this program; if not, write to the Free Software
+**  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+**
+**-----------------------------------------------------------------------------
+*/
+#include <linux/device.h>
+#include <linux/enclosure.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+static LIST_HEAD(container_list);
+static DEFINE_MUTEX(container_list_lock);
+static struct class enclosure_class;
+
+/**
+ * enclosure_find - find an enclosure given a parent device
+ * @dev:	the parent to match against
+ * @start:	Optional enclosure device to start from (NULL if none)
+ *
+ * Looks through the list of registered enclosures to find all those
+ * with @dev as a parent.  Returns NULL if no enclosure is
+ * found. @start can be used as a starting point to obtain multiple
+ * enclosures per parent (should begin with NULL and then be set to
+ * each returned enclosure device). Obtains a reference to the
+ * enclosure class device which must be released with device_put().
+ * If @start is not NULL, a reference must be taken on it which is
+ * released before returning (this allows a loop through all
+ * enclosures to exit with only the reference on the enclosure of
+ * interest held).  Note that the @dev may correspond to the actual
+ * device housing the enclosure, in which case no iteration via @start
+ * is required.
+ */
+struct enclosure_device *enclosure_find(struct device *dev,
+					struct enclosure_device *start)
+{
+	struct enclosure_device *edev;
+
+	mutex_lock(&container_list_lock);
+	edev = list_prepare_entry(start, &container_list, node);
+	if (start)
+		put_device(&start->edev);
+
+	list_for_each_entry_continue(edev, &container_list, node) {
+		struct device *parent = edev->edev.parent;
+		/* parent might not be immediate, so iterate up to
+		 * the root of the tree if necessary */
+		while (parent) {
+			if (parent == dev) {
+				get_device(&edev->edev);
+				mutex_unlock(&container_list_lock);
+				return edev;
+			}
+			parent = parent->parent;
+		}
+	}
+	mutex_unlock(&container_list_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(enclosure_find);
+
+/**
+ * enclosure_for_each_device - calls a function for each enclosure
+ * @fn:		the function to call
+ * @data:	the data to pass to each call
+ *
+ * Loops over all the enclosures calling the function.
+ *
+ * Note, this function uses a mutex which will be held across calls to
+ * @fn, so it must have non atomic context, and @fn may (although it
+ * should not) sleep or otherwise cause the mutex to be held for
+ * indefinite periods
+ */
+int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *),
+			      void *data)
+{
+	int error = 0;
+	struct enclosure_device *edev;
+
+	mutex_lock(&container_list_lock);
+	list_for_each_entry(edev, &container_list, node) {
+		error = fn(edev, data);
+		if (error)
+			break;
+	}
+	mutex_unlock(&container_list_lock);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(enclosure_for_each_device);
+
+/**
+ * enclosure_register - register device as an enclosure
+ *
+ * @dev:	device containing the enclosure
+ * @components:	number of components in the enclosure
+ *
+ * This sets up the device for being an enclosure.  Note that @dev does
+ * not have to be a dedicated enclosure device.  It may be some other type
+ * of device that additionally responds to enclosure services
+ */
+struct enclosure_device *
+enclosure_register(struct device *dev, const char *name, int components,
+		   struct enclosure_component_callbacks *cb)
+{
+	struct enclosure_device *edev =
+		kzalloc(sizeof(struct enclosure_device) +
+			sizeof(struct enclosure_component)*components,
+			GFP_KERNEL);
+	int err, i;
+
+	BUG_ON(!cb);
+
+	if (!edev)
+		return ERR_PTR(-ENOMEM);
+
+	edev->components = components;
+
+	edev->edev.class = &enclosure_class;
+	edev->edev.parent = get_device(dev);
+	edev->cb = cb;
+	dev_set_name(&edev->edev, "%s", name);
+	err = device_register(&edev->edev);
+	if (err)
+		goto err;
+
+	for (i = 0; i < components; i++) {
+		edev->component[i].number = -1;
+		edev->component[i].slot = -1;
+		edev->component[i].power_status = -1;
+	}
+
+	mutex_lock(&container_list_lock);
+	list_add_tail(&edev->node, &container_list);
+	mutex_unlock(&container_list_lock);
+
+	return edev;
+
+ err:
+	put_device(edev->edev.parent);
+	kfree(edev);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(enclosure_register);
+
+static struct enclosure_component_callbacks enclosure_null_callbacks;
+
+/**
+ * enclosure_unregister - remove an enclosure
+ *
+ * @edev:	the registered enclosure to remove;
+ */
+void enclosure_unregister(struct enclosure_device *edev)
+{
+	int i;
+
+	mutex_lock(&container_list_lock);
+	list_del(&edev->node);
+	mutex_unlock(&container_list_lock);
+
+	for (i = 0; i < edev->components; i++)
+		if (edev->component[i].number != -1)
+			device_unregister(&edev->component[i].cdev);
+
+	/* prevent any callbacks into service user */
+	edev->cb = &enclosure_null_callbacks;
+	device_unregister(&edev->edev);
+}
+EXPORT_SYMBOL_GPL(enclosure_unregister);
+
+#define ENCLOSURE_NAME_SIZE	64
+#define COMPONENT_NAME_SIZE	64
+
+static void enclosure_link_name(struct enclosure_component *cdev, char *name)
+{
+	strcpy(name, "enclosure_device:");
+	strcat(name, dev_name(&cdev->cdev));
+}
+
+static void enclosure_remove_links(struct enclosure_component *cdev)
+{
+	char name[ENCLOSURE_NAME_SIZE];
+
+	enclosure_link_name(cdev, name);
+
+	/*
+	 * In odd circumstances, like multipath devices, something else may
+	 * already have removed the links, so check for this condition first.
+	 */
+	if (cdev->dev->kobj.sd)
+		sysfs_remove_link(&cdev->dev->kobj, name);
+
+	if (cdev->cdev.kobj.sd)
+		sysfs_remove_link(&cdev->cdev.kobj, "device");
+}
+
+static int enclosure_add_links(struct enclosure_component *cdev)
+{
+	int error;
+	char name[ENCLOSURE_NAME_SIZE];
+
+	error = sysfs_create_link(&cdev->cdev.kobj, &cdev->dev->kobj, "device");
+	if (error)
+		return error;
+
+	enclosure_link_name(cdev, name);
+	error = sysfs_create_link(&cdev->dev->kobj, &cdev->cdev.kobj, name);
+	if (error)
+		sysfs_remove_link(&cdev->cdev.kobj, "device");
+
+	return error;
+}
+
+static void enclosure_release(struct device *cdev)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev);
+
+	put_device(cdev->parent);
+	kfree(edev);
+}
+
+static void enclosure_component_release(struct device *dev)
+{
+	struct enclosure_component *cdev = to_enclosure_component(dev);
+
+	if (cdev->dev) {
+		enclosure_remove_links(cdev);
+		put_device(cdev->dev);
+	}
+	put_device(dev->parent);
+}
+
+static struct enclosure_component *
+enclosure_component_find_by_name(struct enclosure_device *edev,
+				const char *name)
+{
+	int i;
+	const char *cname;
+	struct enclosure_component *ecomp;
+
+	if (!edev || !name || !name[0])
+		return NULL;
+
+	for (i = 0; i < edev->components; i++) {
+		ecomp = &edev->component[i];
+		cname = dev_name(&ecomp->cdev);
+		if (ecomp->number != -1 &&
+		    cname && cname[0] &&
+		    !strcmp(cname, name))
+			return ecomp;
+	}
+
+	return NULL;
+}
+
+static const struct attribute_group *enclosure_component_groups[];
+
+/**
+ * enclosure_component_alloc - prepare a new enclosure component
+ * @edev:	the enclosure to add the component
+ * @num:	the device number
+ * @type:	the type of component being added
+ * @name:	an optional name to appear in sysfs (leave NULL if none)
+ *
+ * The name is optional for enclosures that give their components a unique
+ * name.  If not, leave the field NULL and a name will be assigned.
+ *
+ * Returns a pointer to the enclosure component or an error.
+ */
+struct enclosure_component *
+enclosure_component_alloc(struct enclosure_device *edev,
+			  unsigned int number,
+			  enum enclosure_component_type type,
+			  const char *name)
+{
+	struct enclosure_component *ecomp;
+	struct device *cdev;
+	int i;
+	char newname[COMPONENT_NAME_SIZE];
+
+	if (number >= edev->components)
+		return ERR_PTR(-EINVAL);
+
+	ecomp = &edev->component[number];
+
+	if (ecomp->number != -1)
+		return ERR_PTR(-EINVAL);
+
+	ecomp->type = type;
+	ecomp->number = number;
+	cdev = &ecomp->cdev;
+	cdev->parent = get_device(&edev->edev);
+
+	if (name && name[0]) {
+		/* Some hardware (e.g. enclosure in RX300 S6) has components
+		 * with non unique names. Registering duplicates in sysfs
+		 * will lead to warnings during bootup. So make the names
+		 * unique by appending consecutive numbers -1, -2, ... */
+		i = 1;
+		snprintf(newname, COMPONENT_NAME_SIZE,
+			 "%s", name);
+		while (enclosure_component_find_by_name(edev, newname))
+			snprintf(newname, COMPONENT_NAME_SIZE,
+				 "%s-%i", name, i++);
+		dev_set_name(cdev, "%s", newname);
+	} else
+		dev_set_name(cdev, "%u", number);
+
+	cdev->release = enclosure_component_release;
+	cdev->groups = enclosure_component_groups;
+
+	return ecomp;
+}
+EXPORT_SYMBOL_GPL(enclosure_component_alloc);
+
+/**
+ * enclosure_component_register - publishes an initialized enclosure component
+ * @ecomp:	component to add
+ *
+ * Returns 0 on successful registration, releases the component otherwise
+ */
+int enclosure_component_register(struct enclosure_component *ecomp)
+{
+	struct device *cdev;
+	int err;
+
+	cdev = &ecomp->cdev;
+	err = device_register(cdev);
+	if (err) {
+		ecomp->number = -1;
+		put_device(cdev);
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(enclosure_component_register);
+
+/**
+ * enclosure_add_device - add a device as being part of an enclosure
+ * @edev:	the enclosure device being added to.
+ * @num:	the number of the component
+ * @dev:	the device being added
+ *
+ * Declares a real device to reside in slot (or identifier) @num of an
+ * enclosure.  This will cause the relevant sysfs links to appear.
+ * This function may also be used to change a device associated with
+ * an enclosure without having to call enclosure_remove_device() in
+ * between.
+ *
+ * Returns zero on success or an error.
+ */
+int enclosure_add_device(struct enclosure_device *edev, int component,
+			 struct device *dev)
+{
+	struct enclosure_component *cdev;
+	int err;
+
+	if (!edev || component >= edev->components)
+		return -EINVAL;
+
+	cdev = &edev->component[component];
+
+	if (cdev->dev == dev)
+		return -EEXIST;
+
+	if (cdev->dev) {
+		enclosure_remove_links(cdev);
+		put_device(cdev->dev);
+	}
+	cdev->dev = get_device(dev);
+	err = enclosure_add_links(cdev);
+	if (err) {
+		put_device(cdev->dev);
+		cdev->dev = NULL;
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(enclosure_add_device);
+
+/**
+ * enclosure_remove_device - remove a device from an enclosure
+ * @edev:	the enclosure device
+ * @num:	the number of the component to remove
+ *
+ * Returns zero on success or an error.
+ *
+ */
+int enclosure_remove_device(struct enclosure_device *edev, struct device *dev)
+{
+	struct enclosure_component *cdev;
+	int i;
+
+	if (!edev || !dev)
+		return -EINVAL;
+
+	for (i = 0; i < edev->components; i++) {
+		cdev = &edev->component[i];
+		if (cdev->dev == dev) {
+			enclosure_remove_links(cdev);
+			put_device(dev);
+			cdev->dev = NULL;
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(enclosure_remove_device);
+
+/*
+ * sysfs pieces below
+ */
+
+static ssize_t components_show(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev);
+
+	return snprintf(buf, 40, "%d\n", edev->components);
+}
+static DEVICE_ATTR_RO(components);
+
+static ssize_t id_show(struct device *cdev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev);
+
+	if (edev->cb->show_id)
+		return edev->cb->show_id(edev, buf);
+	return -EINVAL;
+}
+static DEVICE_ATTR_RO(id);
+
+static struct attribute *enclosure_class_attrs[] = {
+	&dev_attr_components.attr,
+	&dev_attr_id.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(enclosure_class);
+
+static struct class enclosure_class = {
+	.name			= "enclosure",
+	.owner			= THIS_MODULE,
+	.dev_release		= enclosure_release,
+	.dev_groups		= enclosure_class_groups,
+};
+
+static const char *const enclosure_status[] = {
+	[ENCLOSURE_STATUS_UNSUPPORTED] = "unsupported",
+	[ENCLOSURE_STATUS_OK] = "OK",
+	[ENCLOSURE_STATUS_CRITICAL] = "critical",
+	[ENCLOSURE_STATUS_NON_CRITICAL] = "non-critical",
+	[ENCLOSURE_STATUS_UNRECOVERABLE] = "unrecoverable",
+	[ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed",
+	[ENCLOSURE_STATUS_UNKNOWN] = "unknown",
+	[ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable",
+	[ENCLOSURE_STATUS_MAX] = NULL,
+};
+
+static const char *const enclosure_type[] = {
+	[ENCLOSURE_COMPONENT_DEVICE] = "device",
+	[ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device",
+};
+
+static ssize_t get_component_fault(struct device *cdev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_fault)
+		edev->cb->get_fault(edev, ecomp);
+	return snprintf(buf, 40, "%d\n", ecomp->fault);
+}
+
+static ssize_t set_component_fault(struct device *cdev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_fault)
+		edev->cb->set_fault(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_status(struct device *cdev,
+				    struct device_attribute *attr,char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_status)
+		edev->cb->get_status(edev, ecomp);
+	return snprintf(buf, 40, "%s\n", enclosure_status[ecomp->status]);
+}
+
+static ssize_t set_component_status(struct device *cdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int i;
+
+	for (i = 0; enclosure_status[i]; i++) {
+		if (strncmp(buf, enclosure_status[i],
+			    strlen(enclosure_status[i])) == 0 &&
+		    (buf[strlen(enclosure_status[i])] == '\n' ||
+		     buf[strlen(enclosure_status[i])] == '\0'))
+			break;
+	}
+
+	if (enclosure_status[i] && edev->cb->set_status) {
+		edev->cb->set_status(edev, ecomp, i);
+		return count;
+	} else
+		return -EINVAL;
+}
+
+static ssize_t get_component_active(struct device *cdev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_active)
+		edev->cb->get_active(edev, ecomp);
+	return snprintf(buf, 40, "%d\n", ecomp->active);
+}
+
+static ssize_t set_component_active(struct device *cdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_active)
+		edev->cb->set_active(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_locate(struct device *cdev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_locate)
+		edev->cb->get_locate(edev, ecomp);
+	return snprintf(buf, 40, "%d\n", ecomp->locate);
+}
+
+static ssize_t set_component_locate(struct device *cdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_locate)
+		edev->cb->set_locate(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_power_status(struct device *cdev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_power_status)
+		edev->cb->get_power_status(edev, ecomp);
+
+	/* If still uninitialized, the callback failed or does not exist. */
+	if (ecomp->power_status == -1)
+		return (edev->cb->get_power_status) ? -EIO : -ENOTTY;
+
+	return snprintf(buf, 40, "%s\n", ecomp->power_status ? "on" : "off");
+}
+
+static ssize_t set_component_power_status(struct device *cdev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val;
+
+	if (strncmp(buf, "on", 2) == 0 &&
+	    (buf[2] == '\n' || buf[2] == '\0'))
+		val = 1;
+	else if (strncmp(buf, "off", 3) == 0 &&
+	    (buf[3] == '\n' || buf[3] == '\0'))
+		val = 0;
+	else
+		return -EINVAL;
+
+	if (edev->cb->set_power_status)
+		edev->cb->set_power_status(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_type(struct device *cdev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	return snprintf(buf, 40, "%s\n", enclosure_type[ecomp->type]);
+}
+
+static ssize_t get_component_slot(struct device *cdev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int slot;
+
+	/* if the enclosure does not override then use 'number' as a stand-in */
+	if (ecomp->slot >= 0)
+		slot = ecomp->slot;
+	else
+		slot = ecomp->number;
+
+	return snprintf(buf, 40, "%d\n", slot);
+}
+
+static DEVICE_ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault,
+		    set_component_fault);
+static DEVICE_ATTR(status, S_IRUGO | S_IWUSR, get_component_status,
+		   set_component_status);
+static DEVICE_ATTR(active, S_IRUGO | S_IWUSR, get_component_active,
+		   set_component_active);
+static DEVICE_ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate,
+		   set_component_locate);
+static DEVICE_ATTR(power_status, S_IRUGO | S_IWUSR, get_component_power_status,
+		   set_component_power_status);
+static DEVICE_ATTR(type, S_IRUGO, get_component_type, NULL);
+static DEVICE_ATTR(slot, S_IRUGO, get_component_slot, NULL);
+
+static struct attribute *enclosure_component_attrs[] = {
+	&dev_attr_fault.attr,
+	&dev_attr_status.attr,
+	&dev_attr_active.attr,
+	&dev_attr_locate.attr,
+	&dev_attr_power_status.attr,
+	&dev_attr_type.attr,
+	&dev_attr_slot.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(enclosure_component);
+
+static int __init enclosure_init(void)
+{
+	return class_register(&enclosure_class);
+}
+
+static void __exit enclosure_exit(void)
+{
+	class_unregister(&enclosure_class);
+}
+
+module_init(enclosure_init);
+module_exit(enclosure_exit);
+
+MODULE_AUTHOR("James Bottomley");
+MODULE_DESCRIPTION("Enclosure Services");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/fsa9480.c b/drivers/misc/fsa9480.c
new file mode 100644
index 000000000..607b489a6
--- /dev/null
+++ b/drivers/misc/fsa9480.c
@@ -0,0 +1,550 @@
+/*
+ * fsa9480.c - FSA9480 micro USB switch device driver
+ *
+ * Copyright (C) 2010 Samsung Electronics
+ * Minkyu Kang <mk7.kang@samsung.com>
+ * Wonguk Jeong <wonguk.jeong@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_data/fsa9480.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+/* FSA9480 I2C registers */
+#define FSA9480_REG_DEVID		0x01
+#define FSA9480_REG_CTRL		0x02
+#define FSA9480_REG_INT1		0x03
+#define FSA9480_REG_INT2		0x04
+#define FSA9480_REG_INT1_MASK		0x05
+#define FSA9480_REG_INT2_MASK		0x06
+#define FSA9480_REG_ADC			0x07
+#define FSA9480_REG_TIMING1		0x08
+#define FSA9480_REG_TIMING2		0x09
+#define FSA9480_REG_DEV_T1		0x0a
+#define FSA9480_REG_DEV_T2		0x0b
+#define FSA9480_REG_BTN1		0x0c
+#define FSA9480_REG_BTN2		0x0d
+#define FSA9480_REG_CK			0x0e
+#define FSA9480_REG_CK_INT1		0x0f
+#define FSA9480_REG_CK_INT2		0x10
+#define FSA9480_REG_CK_INTMASK1		0x11
+#define FSA9480_REG_CK_INTMASK2		0x12
+#define FSA9480_REG_MANSW1		0x13
+#define FSA9480_REG_MANSW2		0x14
+
+/* Control */
+#define CON_SWITCH_OPEN		(1 << 4)
+#define CON_RAW_DATA		(1 << 3)
+#define CON_MANUAL_SW		(1 << 2)
+#define CON_WAIT		(1 << 1)
+#define CON_INT_MASK		(1 << 0)
+#define CON_MASK		(CON_SWITCH_OPEN | CON_RAW_DATA | \
+				CON_MANUAL_SW | CON_WAIT)
+
+/* Device Type 1 */
+#define DEV_USB_OTG		(1 << 7)
+#define DEV_DEDICATED_CHG	(1 << 6)
+#define DEV_USB_CHG		(1 << 5)
+#define DEV_CAR_KIT		(1 << 4)
+#define DEV_UART		(1 << 3)
+#define DEV_USB			(1 << 2)
+#define DEV_AUDIO_2		(1 << 1)
+#define DEV_AUDIO_1		(1 << 0)
+
+#define DEV_T1_USB_MASK		(DEV_USB_OTG | DEV_USB)
+#define DEV_T1_UART_MASK	(DEV_UART)
+#define DEV_T1_CHARGER_MASK	(DEV_DEDICATED_CHG | DEV_USB_CHG)
+
+/* Device Type 2 */
+#define DEV_AV			(1 << 6)
+#define DEV_TTY			(1 << 5)
+#define DEV_PPD			(1 << 4)
+#define DEV_JIG_UART_OFF	(1 << 3)
+#define DEV_JIG_UART_ON		(1 << 2)
+#define DEV_JIG_USB_OFF		(1 << 1)
+#define DEV_JIG_USB_ON		(1 << 0)
+
+#define DEV_T2_USB_MASK		(DEV_JIG_USB_OFF | DEV_JIG_USB_ON)
+#define DEV_T2_UART_MASK	(DEV_JIG_UART_OFF | DEV_JIG_UART_ON)
+#define DEV_T2_JIG_MASK		(DEV_JIG_USB_OFF | DEV_JIG_USB_ON | \
+				DEV_JIG_UART_OFF | DEV_JIG_UART_ON)
+
+/*
+ * Manual Switch
+ * D- [7:5] / D+ [4:2]
+ * 000: Open all / 001: USB / 010: AUDIO / 011: UART / 100: V_AUDIO
+ */
+#define SW_VAUDIO		((4 << 5) | (4 << 2))
+#define SW_UART			((3 << 5) | (3 << 2))
+#define SW_AUDIO		((2 << 5) | (2 << 2))
+#define SW_DHOST		((1 << 5) | (1 << 2))
+#define SW_AUTO			((0 << 5) | (0 << 2))
+
+/* Interrupt 1 */
+#define INT_DETACH		(1 << 1)
+#define INT_ATTACH		(1 << 0)
+
+struct fsa9480_usbsw {
+	struct i2c_client		*client;
+	struct fsa9480_platform_data	*pdata;
+	int				dev1;
+	int				dev2;
+	int				mansw;
+};
+
+static struct fsa9480_usbsw *chip;
+
+static int fsa9480_write_reg(struct i2c_client *client,
+		int reg, int value)
+{
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(client, reg, value);
+
+	if (ret < 0)
+		dev_err(&client->dev, "%s: err %d\n", __func__, ret);
+
+	return ret;
+}
+
+static int fsa9480_read_reg(struct i2c_client *client, int reg)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+
+	if (ret < 0)
+		dev_err(&client->dev, "%s: err %d\n", __func__, ret);
+
+	return ret;
+}
+
+static int fsa9480_read_irq(struct i2c_client *client, int *value)
+{
+	int ret;
+
+	ret = i2c_smbus_read_i2c_block_data(client,
+			FSA9480_REG_INT1, 2, (u8 *)value);
+	*value &= 0xffff;
+
+	if (ret < 0)
+		dev_err(&client->dev, "%s: err %d\n", __func__, ret);
+
+	return ret;
+}
+
+static void fsa9480_set_switch(const char *buf)
+{
+	struct fsa9480_usbsw *usbsw = chip;
+	struct i2c_client *client = usbsw->client;
+	unsigned int value;
+	unsigned int path = 0;
+
+	value = fsa9480_read_reg(client, FSA9480_REG_CTRL);
+
+	if (!strncmp(buf, "VAUDIO", 6)) {
+		path = SW_VAUDIO;
+		value &= ~CON_MANUAL_SW;
+	} else if (!strncmp(buf, "UART", 4)) {
+		path = SW_UART;
+		value &= ~CON_MANUAL_SW;
+	} else if (!strncmp(buf, "AUDIO", 5)) {
+		path = SW_AUDIO;
+		value &= ~CON_MANUAL_SW;
+	} else if (!strncmp(buf, "DHOST", 5)) {
+		path = SW_DHOST;
+		value &= ~CON_MANUAL_SW;
+	} else if (!strncmp(buf, "AUTO", 4)) {
+		path = SW_AUTO;
+		value |= CON_MANUAL_SW;
+	} else {
+		printk(KERN_ERR "Wrong command\n");
+		return;
+	}
+
+	usbsw->mansw = path;
+	fsa9480_write_reg(client, FSA9480_REG_MANSW1, path);
+	fsa9480_write_reg(client, FSA9480_REG_CTRL, value);
+}
+
+static ssize_t fsa9480_get_switch(char *buf)
+{
+	struct fsa9480_usbsw *usbsw = chip;
+	struct i2c_client *client = usbsw->client;
+	unsigned int value;
+
+	value = fsa9480_read_reg(client, FSA9480_REG_MANSW1);
+
+	if (value == SW_VAUDIO)
+		return sprintf(buf, "VAUDIO\n");
+	else if (value == SW_UART)
+		return sprintf(buf, "UART\n");
+	else if (value == SW_AUDIO)
+		return sprintf(buf, "AUDIO\n");
+	else if (value == SW_DHOST)
+		return sprintf(buf, "DHOST\n");
+	else if (value == SW_AUTO)
+		return sprintf(buf, "AUTO\n");
+	else
+		return sprintf(buf, "%x", value);
+}
+
+static ssize_t fsa9480_show_device(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct fsa9480_usbsw *usbsw = dev_get_drvdata(dev);
+	struct i2c_client *client = usbsw->client;
+	int dev1, dev2;
+
+	dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1);
+	dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2);
+
+	if (!dev1 && !dev2)
+		return sprintf(buf, "NONE\n");
+
+	/* USB */
+	if (dev1 & DEV_T1_USB_MASK || dev2 & DEV_T2_USB_MASK)
+		return sprintf(buf, "USB\n");
+
+	/* UART */
+	if (dev1 & DEV_T1_UART_MASK || dev2 & DEV_T2_UART_MASK)
+		return sprintf(buf, "UART\n");
+
+	/* CHARGER */
+	if (dev1 & DEV_T1_CHARGER_MASK)
+		return sprintf(buf, "CHARGER\n");
+
+	/* JIG */
+	if (dev2 & DEV_T2_JIG_MASK)
+		return sprintf(buf, "JIG\n");
+
+	return sprintf(buf, "UNKNOWN\n");
+}
+
+static ssize_t fsa9480_show_manualsw(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return fsa9480_get_switch(buf);
+
+}
+
+static ssize_t fsa9480_set_manualsw(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	fsa9480_set_switch(buf);
+
+	return count;
+}
+
+static DEVICE_ATTR(device, S_IRUGO, fsa9480_show_device, NULL);
+static DEVICE_ATTR(switch, S_IRUGO | S_IWUSR,
+		fsa9480_show_manualsw, fsa9480_set_manualsw);
+
+static struct attribute *fsa9480_attributes[] = {
+	&dev_attr_device.attr,
+	&dev_attr_switch.attr,
+	NULL
+};
+
+static const struct attribute_group fsa9480_group = {
+	.attrs = fsa9480_attributes,
+};
+
+static void fsa9480_detect_dev(struct fsa9480_usbsw *usbsw, int intr)
+{
+	int val1, val2, ctrl;
+	struct fsa9480_platform_data *pdata = usbsw->pdata;
+	struct i2c_client *client = usbsw->client;
+
+	val1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1);
+	val2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2);
+	ctrl = fsa9480_read_reg(client, FSA9480_REG_CTRL);
+
+	dev_info(&client->dev, "intr: 0x%x, dev1: 0x%x, dev2: 0x%x\n",
+			intr, val1, val2);
+
+	if (!intr)
+		goto out;
+
+	if (intr & INT_ATTACH) {	/* Attached */
+		/* USB */
+		if (val1 & DEV_T1_USB_MASK || val2 & DEV_T2_USB_MASK) {
+			if (pdata->usb_cb)
+				pdata->usb_cb(FSA9480_ATTACHED);
+
+			if (usbsw->mansw) {
+				fsa9480_write_reg(client,
+					FSA9480_REG_MANSW1, usbsw->mansw);
+			}
+		}
+
+		/* UART */
+		if (val1 & DEV_T1_UART_MASK || val2 & DEV_T2_UART_MASK) {
+			if (pdata->uart_cb)
+				pdata->uart_cb(FSA9480_ATTACHED);
+
+			if (!(ctrl & CON_MANUAL_SW)) {
+				fsa9480_write_reg(client,
+					FSA9480_REG_MANSW1, SW_UART);
+			}
+		}
+
+		/* CHARGER */
+		if (val1 & DEV_T1_CHARGER_MASK) {
+			if (pdata->charger_cb)
+				pdata->charger_cb(FSA9480_ATTACHED);
+		}
+
+		/* JIG */
+		if (val2 & DEV_T2_JIG_MASK) {
+			if (pdata->jig_cb)
+				pdata->jig_cb(FSA9480_ATTACHED);
+		}
+	} else if (intr & INT_DETACH) {	/* Detached */
+		/* USB */
+		if (usbsw->dev1 & DEV_T1_USB_MASK ||
+			usbsw->dev2 & DEV_T2_USB_MASK) {
+			if (pdata->usb_cb)
+				pdata->usb_cb(FSA9480_DETACHED);
+		}
+
+		/* UART */
+		if (usbsw->dev1 & DEV_T1_UART_MASK ||
+			usbsw->dev2 & DEV_T2_UART_MASK) {
+			if (pdata->uart_cb)
+				pdata->uart_cb(FSA9480_DETACHED);
+		}
+
+		/* CHARGER */
+		if (usbsw->dev1 & DEV_T1_CHARGER_MASK) {
+			if (pdata->charger_cb)
+				pdata->charger_cb(FSA9480_DETACHED);
+		}
+
+		/* JIG */
+		if (usbsw->dev2 & DEV_T2_JIG_MASK) {
+			if (pdata->jig_cb)
+				pdata->jig_cb(FSA9480_DETACHED);
+		}
+	}
+
+	usbsw->dev1 = val1;
+	usbsw->dev2 = val2;
+
+out:
+	ctrl &= ~CON_INT_MASK;
+	fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl);
+}
+
+static irqreturn_t fsa9480_irq_handler(int irq, void *data)
+{
+	struct fsa9480_usbsw *usbsw = data;
+	struct i2c_client *client = usbsw->client;
+	int intr;
+
+	/* clear interrupt */
+	fsa9480_read_irq(client, &intr);
+
+	/* device detection */
+	fsa9480_detect_dev(usbsw, intr);
+
+	return IRQ_HANDLED;
+}
+
+static int fsa9480_irq_init(struct fsa9480_usbsw *usbsw)
+{
+	struct fsa9480_platform_data *pdata = usbsw->pdata;
+	struct i2c_client *client = usbsw->client;
+	int ret;
+	int intr;
+	unsigned int ctrl = CON_MASK;
+
+	/* clear interrupt */
+	fsa9480_read_irq(client, &intr);
+
+	/* unmask interrupt (attach/detach only) */
+	fsa9480_write_reg(client, FSA9480_REG_INT1_MASK, 0xfc);
+	fsa9480_write_reg(client, FSA9480_REG_INT2_MASK, 0x1f);
+
+	usbsw->mansw = fsa9480_read_reg(client, FSA9480_REG_MANSW1);
+
+	if (usbsw->mansw)
+		ctrl &= ~CON_MANUAL_SW;	/* Manual Switching Mode */
+
+	fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl);
+
+	if (pdata && pdata->cfg_gpio)
+		pdata->cfg_gpio();
+
+	if (client->irq) {
+		ret = request_threaded_irq(client->irq, NULL,
+				fsa9480_irq_handler,
+				IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+				"fsa9480 micro USB", usbsw);
+		if (ret) {
+			dev_err(&client->dev, "failed to request IRQ\n");
+			return ret;
+		}
+
+		if (pdata)
+			device_init_wakeup(&client->dev, pdata->wakeup);
+	}
+
+	return 0;
+}
+
+static int fsa9480_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct fsa9480_usbsw *usbsw;
+	int ret = 0;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EIO;
+
+	usbsw = kzalloc(sizeof(struct fsa9480_usbsw), GFP_KERNEL);
+	if (!usbsw) {
+		dev_err(&client->dev, "failed to allocate driver data\n");
+		return -ENOMEM;
+	}
+
+	usbsw->client = client;
+	usbsw->pdata = client->dev.platform_data;
+
+	chip = usbsw;
+
+	i2c_set_clientdata(client, usbsw);
+
+	ret = fsa9480_irq_init(usbsw);
+	if (ret)
+		goto fail1;
+
+	ret = sysfs_create_group(&client->dev.kobj, &fsa9480_group);
+	if (ret) {
+		dev_err(&client->dev,
+				"failed to create fsa9480 attribute group\n");
+		goto fail2;
+	}
+
+	/* ADC Detect Time: 500ms */
+	fsa9480_write_reg(client, FSA9480_REG_TIMING1, 0x6);
+
+	if (chip->pdata->reset_cb)
+		chip->pdata->reset_cb();
+
+	/* device detection */
+	fsa9480_detect_dev(usbsw, INT_ATTACH);
+
+	pm_runtime_set_active(&client->dev);
+
+	return 0;
+
+fail2:
+	if (client->irq)
+		free_irq(client->irq, usbsw);
+fail1:
+	kfree(usbsw);
+	return ret;
+}
+
+static int fsa9480_remove(struct i2c_client *client)
+{
+	struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
+
+	if (client->irq)
+		free_irq(client->irq, usbsw);
+
+	sysfs_remove_group(&client->dev.kobj, &fsa9480_group);
+	device_init_wakeup(&client->dev, 0);
+	kfree(usbsw);
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static int fsa9480_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
+	struct fsa9480_platform_data *pdata = usbsw->pdata;
+
+	if (device_may_wakeup(&client->dev) && client->irq)
+		enable_irq_wake(client->irq);
+
+	if (pdata->usb_power)
+		pdata->usb_power(0);
+
+	return 0;
+}
+
+static int fsa9480_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
+	int dev1, dev2;
+
+	if (device_may_wakeup(&client->dev) && client->irq)
+		disable_irq_wake(client->irq);
+
+	/*
+	 * Clear Pending interrupt. Note that detect_dev does what
+	 * the interrupt handler does. So, we don't miss pending and
+	 * we reenable interrupt if there is one.
+	 */
+	fsa9480_read_reg(client, FSA9480_REG_INT1);
+	fsa9480_read_reg(client, FSA9480_REG_INT2);
+
+	dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1);
+	dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2);
+
+	/* device detection */
+	fsa9480_detect_dev(usbsw, (dev1 || dev2) ? INT_ATTACH : INT_DETACH);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(fsa9480_pm_ops, fsa9480_suspend, fsa9480_resume);
+#define FSA9480_PM_OPS (&fsa9480_pm_ops)
+
+#else
+
+#define FSA9480_PM_OPS NULL
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct i2c_device_id fsa9480_id[] = {
+	{"fsa9480", 0},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, fsa9480_id);
+
+static struct i2c_driver fsa9480_i2c_driver = {
+	.driver = {
+		.name = "fsa9480",
+		.pm = FSA9480_PM_OPS,
+	},
+	.probe = fsa9480_probe,
+	.remove = fsa9480_remove,
+	.id_table = fsa9480_id,
+};
+
+module_i2c_driver(fsa9480_i2c_driver);
+
+MODULE_AUTHOR("Minkyu Kang <mk7.kang@samsung.com>");
+MODULE_DESCRIPTION("FSA9480 USB Switch driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/genwqe/Kconfig b/drivers/misc/genwqe/Kconfig
new file mode 100644
index 000000000..4c0a033cb
--- /dev/null
+++ b/drivers/misc/genwqe/Kconfig
@@ -0,0 +1,19 @@
+#
+# IBM Accelerator Family 'GenWQE'
+#
+
+menuconfig GENWQE
+	tristate "GenWQE PCIe Accelerator"
+	depends on PCI && 64BIT
+	select CRC_ITU_T
+	default n
+	help
+	  Enables PCIe card driver for IBM GenWQE accelerators.
+	  The user-space interface is described in
+	  include/linux/genwqe/genwqe_card.h.
+
+config GENWQE_PLATFORM_ERROR_RECOVERY
+	int "Use platform recovery procedures (0=off, 1=on)"
+	depends on GENWQE
+	default 1 if PPC64
+	default 0
diff --git a/drivers/misc/genwqe/Makefile b/drivers/misc/genwqe/Makefile
new file mode 100644
index 000000000..98a2b4f0b
--- /dev/null
+++ b/drivers/misc/genwqe/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for GenWQE driver
+#
+
+obj-$(CONFIG_GENWQE) := genwqe_card.o
+genwqe_card-objs := card_base.o card_dev.o card_ddcb.o card_sysfs.o \
+	card_debugfs.o card_utils.o
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
new file mode 100644
index 000000000..c7cd3675b
--- /dev/null
+++ b/drivers/misc/genwqe/card_base.c
@@ -0,0 +1,1412 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Module initialization and PCIe setup. Card health monitoring and
+ * recovery functionality. Character device creation and deletion are
+ * controlled from here.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/err.h>
+#include <linux/aer.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <linux/log2.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>");
+MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>");
+MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>");
+MODULE_AUTHOR("Michael Jung <mijung@gmx.net>");
+
+MODULE_DESCRIPTION("GenWQE Card");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL");
+
+static char genwqe_driver_name[] = GENWQE_DEVNAME;
+static struct class *class_genwqe;
+static struct dentry *debugfs_genwqe;
+static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX];
+
+/* PCI structure for identifying device by PCI vendor and device ID */
+static const struct pci_device_id genwqe_device_table[] = {
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5 << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Initial SR-IOV bring-up image */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ .vendor      = PCI_VENDOR_ID_IBM,  /* VF Vendor ID */
+	  .device      = 0x0000,  /* VF Device ID */
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Fixed up image */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ .vendor      = PCI_VENDOR_ID_IBM,  /* VF Vendor ID */
+	  .device      = 0x0000,  /* VF Device ID */
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Even one more ... */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_NEW,
+	  .class       = (PCI_CLASSCODE_GENWQE5 << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ 0, }			/* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, genwqe_device_table);
+
+/**
+ * genwqe_dev_alloc() - Create and prepare a new card descriptor
+ *
+ * Return: Pointer to card descriptor, or ERR_PTR(err) on error
+ */
+static struct genwqe_dev *genwqe_dev_alloc(void)
+{
+	unsigned int i = 0, j;
+	struct genwqe_dev *cd;
+
+	for (i = 0; i < GENWQE_CARD_NO_MAX; i++) {
+		if (genwqe_devices[i] == NULL)
+			break;
+	}
+	if (i >= GENWQE_CARD_NO_MAX)
+		return ERR_PTR(-ENODEV);
+
+	cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL);
+	if (!cd)
+		return ERR_PTR(-ENOMEM);
+
+	cd->card_idx = i;
+	cd->class_genwqe = class_genwqe;
+	cd->debugfs_genwqe = debugfs_genwqe;
+
+	/*
+	 * This comes from kernel config option and can be overritten via
+	 * debugfs.
+	 */
+	cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY;
+
+	init_waitqueue_head(&cd->queue_waitq);
+
+	spin_lock_init(&cd->file_lock);
+	INIT_LIST_HEAD(&cd->file_list);
+
+	cd->card_state = GENWQE_CARD_UNUSED;
+	spin_lock_init(&cd->print_lock);
+
+	cd->ddcb_software_timeout = GENWQE_DDCB_SOFTWARE_TIMEOUT;
+	cd->kill_timeout = GENWQE_KILL_TIMEOUT;
+
+	for (j = 0; j < GENWQE_MAX_VFS; j++)
+		cd->vf_jobtimeout_msec[j] = GENWQE_VF_JOBTIMEOUT_MSEC;
+
+	genwqe_devices[i] = cd;
+	return cd;
+}
+
+static void genwqe_dev_free(struct genwqe_dev *cd)
+{
+	if (!cd)
+		return;
+
+	genwqe_devices[cd->card_idx] = NULL;
+	kfree(cd);
+}
+
+/**
+ * genwqe_bus_reset() - Card recovery
+ *
+ * pci_reset_function() will recover the device and ensure that the
+ * registers are accessible again when it completes with success. If
+ * not, the card will stay dead and registers will be unaccessible
+ * still.
+ */
+static int genwqe_bus_reset(struct genwqe_dev *cd)
+{
+	int rc = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	void __iomem *mmio;
+
+	if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE)
+		return -EIO;
+
+	mmio = cd->mmio;
+	cd->mmio = NULL;
+	pci_iounmap(pci_dev, mmio);
+
+	pci_release_mem_regions(pci_dev);
+
+	/*
+	 * Firmware/BIOS might change memory mapping during bus reset.
+	 * Settings like enable bus-mastering, ... are backuped and
+	 * restored by the pci_reset_function().
+	 */
+	dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__);
+	rc = pci_reset_function(pci_dev);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed reset func (rc %d)\n", __func__, rc);
+		return rc;
+	}
+	dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc);
+
+	/*
+	 * Here is the right spot to clear the register read
+	 * failure. pci_bus_reset() does this job in real systems.
+	 */
+	cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
+			    GENWQE_INJECT_GFIR_FATAL |
+			    GENWQE_INJECT_GFIR_INFO);
+
+	rc = pci_request_mem_regions(pci_dev, genwqe_driver_name);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: request bars failed (%d)\n", __func__, rc);
+		return -EIO;
+	}
+
+	cd->mmio = pci_iomap(pci_dev, 0, 0);
+	if (cd->mmio == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: mapping BAR0 failed\n", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/*
+ * Hardware circumvention section. Certain bitstreams in our test-lab
+ * had different kinds of problems. Here is where we adjust those
+ * bitstreams to function will with this version of our device driver.
+ *
+ * Thise circumventions are applied to the physical function only.
+ * The magical numbers below are identifying development/manufacturing
+ * versions of the bitstream used on the card.
+ *
+ * Turn off error reporting for old/manufacturing images.
+ */
+
+bool genwqe_need_err_masking(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
+}
+
+static void genwqe_tweak_hardware(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/* Mask FIRs for development images */
+	if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) &&
+	    ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) {
+		dev_warn(&pci_dev->dev,
+			 "FIRs masked due to bitstream %016llx.%016llx\n",
+			 cd->slu_unitcfg, cd->app_unitcfg);
+
+		__genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR,
+				0xFFFFFFFFFFFFFFFFull);
+
+		__genwqe_writeq(cd, IO_APP_ERR_ACT_MASK,
+				0x0000000000000000ull);
+	}
+}
+
+/**
+ * genwqe_recovery_on_fatal_gfir_required() - Version depended actions
+ *
+ * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must
+ * be ignored. This is e.g. true for the bitstream we gave to the card
+ * manufacturer, but also for some old bitstreams we released to our
+ * test-lab.
+ */
+int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull;
+}
+
+int genwqe_flash_readback_fails(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
+}
+
+/**
+ * genwqe_T_psec() - Calculate PF/VF timeout register content
+ *
+ * Note: From a design perspective it turned out to be a bad idea to
+ * use codes here to specifiy the frequency/speed values. An old
+ * driver cannot understand new codes and is therefore always a
+ * problem. Better is to measure out the value or put the
+ * speed/frequency directly into a register which is always a valid
+ * value for old as well as for new software.
+ */
+/* T = 1/f */
+static int genwqe_T_psec(struct genwqe_dev *cd)
+{
+	u16 speed;	/* 1/f -> 250,  200,  166,  175 */
+	static const int T[] = { 4000, 5000, 6000, 5714 };
+
+	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
+	if (speed >= ARRAY_SIZE(T))
+		return -1;	/* illegal value */
+
+	return T[speed];
+}
+
+/**
+ * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution
+ *
+ * Do this _after_ card_reset() is called. Otherwise the values will
+ * vanish. The settings need to be done when the queues are inactive.
+ *
+ * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16.
+ * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16.
+ */
+static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd)
+{
+	u32 T = genwqe_T_psec(cd);
+	u64 x;
+
+	if (GENWQE_PF_JOBTIMEOUT_MSEC == 0)
+		return false;
+
+	/* PF: large value needed, flash update 2sec per block */
+	x = ilog2(GENWQE_PF_JOBTIMEOUT_MSEC *
+		  16000000000uL/(T * 15)) - 10;
+
+	genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+			  0xff00 | (x & 0xff), 0);
+	return true;
+}
+
+/**
+ * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution
+ */
+static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+	unsigned int vf;
+	u32 T = genwqe_T_psec(cd);
+	u64 x;
+	int totalvfs;
+
+	totalvfs = pci_sriov_get_totalvfs(pci_dev);
+	if (totalvfs <= 0)
+		return false;
+
+	for (vf = 0; vf < totalvfs; vf++) {
+
+		if (cd->vf_jobtimeout_msec[vf] == 0)
+			continue;
+
+		x = ilog2(cd->vf_jobtimeout_msec[vf] *
+			  16000000000uL/(T * 15)) - 10;
+
+		genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+				  0xff00 | (x & 0xff), vf + 1);
+	}
+	return true;
+}
+
+static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd)
+{
+	unsigned int type, e = 0;
+
+	for (type = 0; type < GENWQE_DBG_UNITS; type++) {
+		switch (type) {
+		case GENWQE_DBG_UNIT0:
+			e = genwqe_ffdc_buff_size(cd, 0);
+			break;
+		case GENWQE_DBG_UNIT1:
+			e = genwqe_ffdc_buff_size(cd, 1);
+			break;
+		case GENWQE_DBG_UNIT2:
+			e = genwqe_ffdc_buff_size(cd, 2);
+			break;
+		case GENWQE_DBG_REGS:
+			e = GENWQE_FFDC_REGS;
+			break;
+		}
+
+		/* currently support only the debug units mentioned here */
+		cd->ffdc[type].entries = e;
+		cd->ffdc[type].regs =
+			kmalloc_array(e, sizeof(struct genwqe_reg),
+				      GFP_KERNEL);
+		/*
+		 * regs == NULL is ok, the using code treats this as no regs,
+		 * Printing warning is ok in this case.
+		 */
+	}
+	return 0;
+}
+
+static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd)
+{
+	unsigned int type;
+
+	for (type = 0; type < GENWQE_DBG_UNITS; type++) {
+		kfree(cd->ffdc[type].regs);
+		cd->ffdc[type].regs = NULL;
+	}
+}
+
+static int genwqe_read_ids(struct genwqe_dev *cd)
+{
+	int err = 0;
+	int slu_id;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) {
+		dev_err(&pci_dev->dev,
+			"err: SLUID=%016llx\n", cd->slu_unitcfg);
+		err = -EIO;
+		goto out_err;
+	}
+
+	slu_id = genwqe_get_slu_id(cd);
+	if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) {
+		dev_err(&pci_dev->dev,
+			"err: incompatible SLU Architecture %u\n", slu_id);
+		err = -ENOENT;
+		goto out_err;
+	}
+
+	cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
+	if (cd->app_unitcfg == IO_ILLEGAL_VALUE) {
+		dev_err(&pci_dev->dev,
+			"err: APPID=%016llx\n", cd->app_unitcfg);
+		err = -EIO;
+		goto out_err;
+	}
+	genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name));
+
+	/*
+	 * Is access to all registers possible? If we are a VF the
+	 * answer is obvious. If we run fully virtualized, we need to
+	 * check if we can access all registers. If we do not have
+	 * full access we will cause an UR and some informational FIRs
+	 * in the PF, but that should not harm.
+	 */
+	if (pci_dev->is_virtfn)
+		cd->is_privileged = 0;
+	else
+		cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
+				     != IO_ILLEGAL_VALUE);
+
+ out_err:
+	return err;
+}
+
+static int genwqe_start(struct genwqe_dev *cd)
+{
+	int err;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	err = genwqe_read_ids(cd);
+	if (err)
+		return err;
+
+	if (genwqe_is_privileged(cd)) {
+		/* do this after the tweaks. alloc fail is acceptable */
+		genwqe_ffdc_buffs_alloc(cd);
+		genwqe_stop_traps(cd);
+
+		/* Collect registers e.g. FIRs, UNITIDs, traces ... */
+		genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs,
+				      cd->ffdc[GENWQE_DBG_REGS].entries, 0);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0,
+				      cd->ffdc[GENWQE_DBG_UNIT0].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT0].entries);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1,
+				      cd->ffdc[GENWQE_DBG_UNIT1].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT1].entries);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2,
+				      cd->ffdc[GENWQE_DBG_UNIT2].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT2].entries);
+
+		genwqe_start_traps(cd);
+
+		if (cd->card_state == GENWQE_CARD_FATAL_ERROR) {
+			dev_warn(&pci_dev->dev,
+				 "[%s] chip reload/recovery!\n", __func__);
+
+			/*
+			 * Stealth Mode: Reload chip on either hot
+			 * reset or PERST.
+			 */
+			cd->softreset = 0x7Cull;
+			__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
+				       cd->softreset);
+
+			err = genwqe_bus_reset(cd);
+			if (err != 0) {
+				dev_err(&pci_dev->dev,
+					"[%s] err: bus reset failed!\n",
+					__func__);
+				goto out;
+			}
+
+			/*
+			 * Re-read the IDs because
+			 * it could happen that the bitstream load
+			 * failed!
+			 */
+			err = genwqe_read_ids(cd);
+			if (err)
+				goto out;
+		}
+	}
+
+	err = genwqe_setup_service_layer(cd);  /* does a reset to the card */
+	if (err != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: could not setup servicelayer!\n", __func__);
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (genwqe_is_privileged(cd)) {	 /* code is running _after_ reset */
+		genwqe_tweak_hardware(cd);
+
+		genwqe_setup_pf_jtimer(cd);
+		genwqe_setup_vf_jtimer(cd);
+	}
+
+	err = genwqe_device_create(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: chdev init failed! (err=%d)\n", err);
+		goto out_release_service_layer;
+	}
+	return 0;
+
+ out_release_service_layer:
+	genwqe_release_service_layer(cd);
+ out:
+	if (genwqe_is_privileged(cd))
+		genwqe_ffdc_buffs_free(cd);
+	return -EIO;
+}
+
+/**
+ * genwqe_stop() - Stop card operation
+ *
+ * Recovery notes:
+ *   As long as genwqe_thread runs we might access registers during
+ *   error data capture. Same is with the genwqe_health_thread.
+ *   When genwqe_bus_reset() fails this function might called two times:
+ *   first by the genwqe_health_thread() and later by genwqe_remove() to
+ *   unbind the device. We must be able to survive that.
+ *
+ * This function must be robust enough to be called twice.
+ */
+static int genwqe_stop(struct genwqe_dev *cd)
+{
+	genwqe_finish_queue(cd);	    /* no register access */
+	genwqe_device_remove(cd);	    /* device removed, procs killed */
+	genwqe_release_service_layer(cd);   /* here genwqe_thread is stopped */
+
+	if (genwqe_is_privileged(cd)) {
+		pci_disable_sriov(cd->pci_dev);	/* access pci config space */
+		genwqe_ffdc_buffs_free(cd);
+	}
+
+	return 0;
+}
+
+/**
+ * genwqe_recover_card() - Try to recover the card if it is possible
+ *
+ * If fatal_err is set no register access is possible anymore. It is
+ * likely that genwqe_start fails in that situation. Proper error
+ * handling is required in this case.
+ *
+ * genwqe_bus_reset() will cause the pci code to call genwqe_remove()
+ * and later genwqe_probe() for all virtual functions.
+ */
+static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	genwqe_stop(cd);
+
+	/*
+	 * Make sure chip is not reloaded to maintain FFDC. Write SLU
+	 * Reset Register, CPLDReset field to 0.
+	 */
+	if (!fatal_err) {
+		cd->softreset = 0x70ull;
+		__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
+	}
+
+	rc = genwqe_bus_reset(cd);
+	if (rc != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: card recovery impossible!\n", __func__);
+		return rc;
+	}
+
+	rc = genwqe_start(cd);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed to launch device!\n", __func__);
+		return rc;
+	}
+	return 0;
+}
+
+static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir)
+{
+	*gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	return (*gfir & GFIR_ERR_TRIGGER) &&
+		genwqe_recovery_on_fatal_gfir_required(cd);
+}
+
+/**
+ * genwqe_fir_checking() - Check the fault isolation registers of the card
+ *
+ * If this code works ok, can be tried out with help of the genwqe_poke tool:
+ *   sudo ./tools/genwqe_poke 0x8 0xfefefefefef
+ *
+ * Now the relevant FIRs/sFIRs should be printed out and the driver should
+ * invoke recovery (devices are removed and readded).
+ */
+static u64 genwqe_fir_checking(struct genwqe_dev *cd)
+{
+	int j, iterations = 0;
+	u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec;
+	u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+ healthMonitor:
+	iterations++;
+	if (iterations > 16) {
+		dev_err(&pci_dev->dev, "* exit looping after %d times\n",
+			iterations);
+		goto fatal_error;
+	}
+
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if (gfir != 0x0)
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n",
+				    IO_SLC_CFGREG_GFIR, gfir);
+	if (gfir == IO_ILLEGAL_VALUE)
+		goto fatal_error;
+
+	/*
+	 * Avoid printing when to GFIR bit is on prevents contignous
+	 * printout e.g. for the following bug:
+	 *   FIR set without a 2ndary FIR/FIR cannot be cleared
+	 * Comment out the following if to get the prints:
+	 */
+	if (gfir == 0)
+		return 0;
+
+	gfir_masked = gfir & GFIR_ERR_TRIGGER;  /* fatal errors */
+
+	for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */
+
+		/* read the primary FIR (pfir) */
+		fir_addr = (uid << 24) + 0x08;
+		fir = __genwqe_readq(cd, fir_addr);
+		if (fir == 0x0)
+			continue;  /* no error in this unit */
+
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir);
+		if (fir == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		/* read primary FEC */
+		fec_addr = (uid << 24) + 0x18;
+		fec = __genwqe_readq(cd, fec_addr);
+
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec);
+		if (fec == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) {
+
+			/* secondary fir empty, skip it */
+			if ((fir & mask) == 0x0)
+				continue;
+
+			sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
+			sfir = __genwqe_readq(cd, sfir_addr);
+
+			if (sfir == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+			dev_err(&pci_dev->dev,
+				"* 0x%08x 0x%016llx\n", sfir_addr, sfir);
+
+			sfec_addr = (uid << 24) + 0x300 + 0x08 * j;
+			sfec = __genwqe_readq(cd, sfec_addr);
+
+			if (sfec == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+			dev_err(&pci_dev->dev,
+				"* 0x%08x 0x%016llx\n", sfec_addr, sfec);
+
+			gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+			if (gfir == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+
+			/* gfir turned on during routine! get out and
+			   start over. */
+			if ((gfir_masked == 0x0) &&
+			    (gfir & GFIR_ERR_TRIGGER)) {
+				goto healthMonitor;
+			}
+
+			/* do not clear if we entered with a fatal gfir */
+			if (gfir_masked == 0x0) {
+
+				/* NEW clear by mask the logged bits */
+				sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
+				__genwqe_writeq(cd, sfir_addr, sfir);
+
+				dev_dbg(&pci_dev->dev,
+					"[HM] Clearing  2ndary FIR 0x%08x with 0x%016llx\n",
+					sfir_addr, sfir);
+
+				/*
+				 * note, these cannot be error-Firs
+				 * since gfir_masked is 0 after sfir
+				 * was read. Also, it is safe to do
+				 * this write if sfir=0. Still need to
+				 * clear the primary. This just means
+				 * there is no secondary FIR.
+				 */
+
+				/* clear by mask the logged bit. */
+				fir_clr_addr = (uid << 24) + 0x10;
+				__genwqe_writeq(cd, fir_clr_addr, mask);
+
+				dev_dbg(&pci_dev->dev,
+					"[HM] Clearing primary FIR 0x%08x with 0x%016llx\n",
+					fir_clr_addr, mask);
+			}
+		}
+	}
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if (gfir == IO_ILLEGAL_VALUE)
+		goto fatal_error;
+
+	if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) {
+		/*
+		 * Check once more that it didn't go on after all the
+		 * FIRS were cleared.
+		 */
+		dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n",
+			iterations);
+		goto healthMonitor;
+	}
+	return gfir_masked;
+
+ fatal_error:
+	return IO_ILLEGAL_VALUE;
+}
+
+/**
+ * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot
+ *
+ * Note: pci_set_pcie_reset_state() is not implemented on all archs, so this
+ * reset method will not work in all cases.
+ *
+ * Return: 0 on success or error code from pci_set_pcie_reset_state()
+ */
+static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev)
+{
+	int rc;
+
+	/*
+	 * lock pci config space access from userspace,
+	 * save state and issue PCIe fundamental reset
+	 */
+	pci_cfg_access_lock(pci_dev);
+	pci_save_state(pci_dev);
+	rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset);
+	if (!rc) {
+		/* keep PCIe reset asserted for 250ms */
+		msleep(250);
+		pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset);
+		/* Wait for 2s to reload flash and train the link */
+		msleep(2000);
+	}
+	pci_restore_state(pci_dev);
+	pci_cfg_access_unlock(pci_dev);
+	return rc;
+}
+
+
+static int genwqe_platform_recovery(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+	int rc;
+
+	dev_info(&pci_dev->dev,
+		 "[%s] resetting card for error recovery\n", __func__);
+
+	/* Clear out error injection flags */
+	cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
+			    GENWQE_INJECT_GFIR_FATAL |
+			    GENWQE_INJECT_GFIR_INFO);
+
+	genwqe_stop(cd);
+
+	/* Try recoverying the card with fundamental reset */
+	rc = genwqe_pci_fundamental_reset(pci_dev);
+	if (!rc) {
+		rc = genwqe_start(cd);
+		if (!rc)
+			dev_info(&pci_dev->dev,
+				 "[%s] card recovered\n", __func__);
+		else
+			dev_err(&pci_dev->dev,
+				"[%s] err: cannot start card services! (err=%d)\n",
+				__func__, rc);
+	} else {
+		dev_err(&pci_dev->dev,
+			"[%s] card reset failed\n", __func__);
+	}
+
+	return rc;
+}
+
+/*
+ * genwqe_reload_bistream() - reload card bitstream
+ *
+ * Set the appropriate register and call fundamental reset to reaload the card
+ * bitstream.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+static int genwqe_reload_bistream(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+	int rc;
+
+	dev_info(&pci_dev->dev,
+		 "[%s] resetting card for bitstream reload\n",
+		 __func__);
+
+	genwqe_stop(cd);
+
+	/*
+	 * Cause a CPLD reprogram with the 'next_bitstream'
+	 * partition on PCIe hot or fundamental reset
+	 */
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
+			(cd->softreset & 0xcull) | 0x70ull);
+
+	rc = genwqe_pci_fundamental_reset(pci_dev);
+	if (rc) {
+		/*
+		 * A fundamental reset failure can be caused
+		 * by lack of support on the arch, so we just
+		 * log the error and try to start the card
+		 * again.
+		 */
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed to reset card for bitstream reload\n",
+			__func__);
+	}
+
+	rc = genwqe_start(cd);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: cannot start card services! (err=%d)\n",
+			__func__, rc);
+		return rc;
+	}
+	dev_info(&pci_dev->dev,
+		 "[%s] card reloaded\n", __func__);
+	return 0;
+}
+
+
+/**
+ * genwqe_health_thread() - Health checking thread
+ *
+ * This thread is only started for the PF of the card.
+ *
+ * This thread monitors the health of the card. A critical situation
+ * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In
+ * this case we need to be recovered from outside. Writing to
+ * registers will very likely not work either.
+ *
+ * This thread must only exit if kthread_should_stop() becomes true.
+ *
+ * Condition for the health-thread to trigger:
+ *   a) when a kthread_stop() request comes in or
+ *   b) a critical GFIR occured
+ *
+ * Informational GFIRs are checked and potentially printed in
+ * GENWQE_HEALTH_CHECK_INTERVAL seconds.
+ */
+static int genwqe_health_thread(void *data)
+{
+	int rc, should_stop = 0;
+	struct genwqe_dev *cd = data;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg;
+
+ health_thread_begin:
+	while (!kthread_should_stop()) {
+		rc = wait_event_interruptible_timeout(cd->health_waitq,
+			 (genwqe_health_check_cond(cd, &gfir) ||
+			  (should_stop = kthread_should_stop())),
+				GENWQE_HEALTH_CHECK_INTERVAL * HZ);
+
+		if (should_stop)
+			break;
+
+		if (gfir == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] GFIR=%016llx\n", __func__, gfir);
+			goto fatal_error;
+		}
+
+		slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
+		if (slu_unitcfg == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] SLU_UNITCFG=%016llx\n",
+				__func__, slu_unitcfg);
+			goto fatal_error;
+		}
+
+		app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
+		if (app_unitcfg == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] APP_UNITCFG=%016llx\n",
+				__func__, app_unitcfg);
+			goto fatal_error;
+		}
+
+		gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+		if (gfir == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] %s: GFIR=%016llx\n", __func__,
+				(gfir & GFIR_ERR_TRIGGER) ? "err" : "info",
+				gfir);
+			goto fatal_error;
+		}
+
+		gfir_masked = genwqe_fir_checking(cd);
+		if (gfir_masked == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		/*
+		 * GFIR ErrorTrigger bits set => reset the card!
+		 * Never do this for old/manufacturing images!
+		 */
+		if ((gfir_masked) && !cd->skip_recovery &&
+		    genwqe_recovery_on_fatal_gfir_required(cd)) {
+
+			cd->card_state = GENWQE_CARD_FATAL_ERROR;
+
+			rc = genwqe_recover_card(cd, 0);
+			if (rc < 0) {
+				/* FIXME Card is unusable and needs unbind! */
+				goto fatal_error;
+			}
+		}
+
+		if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) {
+			/* Userspace requested card bitstream reload */
+			rc = genwqe_reload_bistream(cd);
+			if (rc)
+				goto fatal_error;
+		}
+
+		cd->last_gfir = gfir;
+		cond_resched();
+	}
+
+	return 0;
+
+ fatal_error:
+	if (cd->use_platform_recovery) {
+		/*
+		 * Since we use raw accessors, EEH errors won't be detected
+		 * by the platform until we do a non-raw MMIO or config space
+		 * read
+		 */
+		readq(cd->mmio + IO_SLC_CFGREG_GFIR);
+
+		/* We do nothing if the card is going over PCI recovery */
+		if (pci_channel_offline(pci_dev))
+			return -EIO;
+
+		/*
+		 * If it's supported by the platform, we try a fundamental reset
+		 * to recover from a fatal error. Otherwise, we continue to wait
+		 * for an external recovery procedure to take care of it.
+		 */
+		rc = genwqe_platform_recovery(cd);
+		if (!rc)
+			goto health_thread_begin;
+	}
+
+	dev_err(&pci_dev->dev,
+		"[%s] card unusable. Please trigger unbind!\n", __func__);
+
+	/* Bring down logical devices to inform user space via udev remove. */
+	cd->card_state = GENWQE_CARD_FATAL_ERROR;
+	genwqe_stop(cd);
+
+	/* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */
+	while (!kthread_should_stop())
+		cond_resched();
+
+	return -EIO;
+}
+
+static int genwqe_health_check_start(struct genwqe_dev *cd)
+{
+	int rc;
+
+	if (GENWQE_HEALTH_CHECK_INTERVAL <= 0)
+		return 0;	/* valid for disabling the service */
+
+	/* moved before request_irq() */
+	/* init_waitqueue_head(&cd->health_waitq); */
+
+	cd->health_thread = kthread_run(genwqe_health_thread, cd,
+					GENWQE_DEVNAME "%d_health",
+					cd->card_idx);
+	if (IS_ERR(cd->health_thread)) {
+		rc = PTR_ERR(cd->health_thread);
+		cd->health_thread = NULL;
+		return rc;
+	}
+	return 0;
+}
+
+static int genwqe_health_thread_running(struct genwqe_dev *cd)
+{
+	return cd->health_thread != NULL;
+}
+
+static int genwqe_health_check_stop(struct genwqe_dev *cd)
+{
+	int rc;
+
+	if (!genwqe_health_thread_running(cd))
+		return -EIO;
+
+	rc = kthread_stop(cd->health_thread);
+	cd->health_thread = NULL;
+	return 0;
+}
+
+/**
+ * genwqe_pci_setup() - Allocate PCIe related resources for our card
+ */
+static int genwqe_pci_setup(struct genwqe_dev *cd)
+{
+	int err;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	err = pci_enable_device_mem(pci_dev);
+	if (err) {
+		dev_err(&pci_dev->dev,
+			"err: failed to enable pci memory (err=%d)\n", err);
+		goto err_out;
+	}
+
+	/* Reserve PCI I/O and memory resources */
+	err = pci_request_mem_regions(pci_dev, genwqe_driver_name);
+	if (err) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: request bars failed (%d)\n", __func__, err);
+		err = -EIO;
+		goto err_disable_device;
+	}
+
+	/* check for 64-bit DMA address supported (DAC) */
+	if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64))) {
+		err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(64));
+		if (err) {
+			dev_err(&pci_dev->dev,
+				"err: DMA64 consistent mask error\n");
+			err = -EIO;
+			goto out_release_resources;
+		}
+	/* check for 32-bit DMA address supported (SAC) */
+	} else if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
+		err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pci_dev->dev,
+				"err: DMA32 consistent mask error\n");
+			err = -EIO;
+			goto out_release_resources;
+		}
+	} else {
+		dev_err(&pci_dev->dev,
+			"err: neither DMA32 nor DMA64 supported\n");
+		err = -EIO;
+		goto out_release_resources;
+	}
+
+	pci_set_master(pci_dev);
+	pci_enable_pcie_error_reporting(pci_dev);
+
+	/* EEH recovery requires PCIe fundamental reset */
+	pci_dev->needs_freset = 1;
+
+	/* request complete BAR-0 space (length = 0) */
+	cd->mmio_len = pci_resource_len(pci_dev, 0);
+	cd->mmio = pci_iomap(pci_dev, 0, 0);
+	if (cd->mmio == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: mapping BAR0 failed\n", __func__);
+		err = -ENOMEM;
+		goto out_release_resources;
+	}
+
+	cd->num_vfs = pci_sriov_get_totalvfs(pci_dev);
+	if (cd->num_vfs < 0)
+		cd->num_vfs = 0;
+
+	err = genwqe_read_ids(cd);
+	if (err)
+		goto out_iounmap;
+
+	return 0;
+
+ out_iounmap:
+	pci_iounmap(pci_dev, cd->mmio);
+ out_release_resources:
+	pci_release_mem_regions(pci_dev);
+ err_disable_device:
+	pci_disable_device(pci_dev);
+ err_out:
+	return err;
+}
+
+/**
+ * genwqe_pci_remove() - Free PCIe related resources for our card
+ */
+static void genwqe_pci_remove(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (cd->mmio)
+		pci_iounmap(pci_dev, cd->mmio);
+
+	pci_release_mem_regions(pci_dev);
+	pci_disable_device(pci_dev);
+}
+
+/**
+ * genwqe_probe() - Device initialization
+ * @pdev:	PCI device information struct
+ *
+ * Callable for multiple cards. This function is called on bind.
+ *
+ * Return: 0 if succeeded, < 0 when failed
+ */
+static int genwqe_probe(struct pci_dev *pci_dev,
+			const struct pci_device_id *id)
+{
+	int err;
+	struct genwqe_dev *cd;
+
+	genwqe_init_crc32();
+
+	cd = genwqe_dev_alloc();
+	if (IS_ERR(cd)) {
+		dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n",
+			(int)PTR_ERR(cd));
+		return PTR_ERR(cd);
+	}
+
+	dev_set_drvdata(&pci_dev->dev, cd);
+	cd->pci_dev = pci_dev;
+
+	err = genwqe_pci_setup(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: problems with PCI setup (err=%d)\n", err);
+		goto out_free_dev;
+	}
+
+	err = genwqe_start(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: cannot start card services! (err=%d)\n", err);
+		goto out_pci_remove;
+	}
+
+	if (genwqe_is_privileged(cd)) {
+		err = genwqe_health_check_start(cd);
+		if (err < 0) {
+			dev_err(&pci_dev->dev,
+				"err: cannot start health checking! (err=%d)\n",
+				err);
+			goto out_stop_services;
+		}
+	}
+	return 0;
+
+ out_stop_services:
+	genwqe_stop(cd);
+ out_pci_remove:
+	genwqe_pci_remove(cd);
+ out_free_dev:
+	genwqe_dev_free(cd);
+	return err;
+}
+
+/**
+ * genwqe_remove() - Called when device is removed (hot-plugable)
+ *
+ * Or when driver is unloaded respecitively when unbind is done.
+ */
+static void genwqe_remove(struct pci_dev *pci_dev)
+{
+	struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+	genwqe_health_check_stop(cd);
+
+	/*
+	 * genwqe_stop() must survive if it is called twice
+	 * sequentially. This happens when the health thread calls it
+	 * and fails on genwqe_bus_reset().
+	 */
+	genwqe_stop(cd);
+	genwqe_pci_remove(cd);
+	genwqe_dev_free(cd);
+}
+
+/*
+ * genwqe_err_error_detected() - Error detection callback
+ *
+ * This callback is called by the PCI subsystem whenever a PCI bus
+ * error is detected.
+ */
+static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
+						 enum pci_channel_state state)
+{
+	struct genwqe_dev *cd;
+
+	dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state);
+
+	cd = dev_get_drvdata(&pci_dev->dev);
+	if (cd == NULL)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	/* Stop the card */
+	genwqe_health_check_stop(cd);
+	genwqe_stop(cd);
+
+	/*
+	 * On permanent failure, the PCI code will call device remove
+	 * after the return of this function.
+	 * genwqe_stop() can be called twice.
+	 */
+	if (state == pci_channel_io_perm_failure) {
+		return PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		genwqe_pci_remove(cd);
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+}
+
+static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev)
+{
+	int rc;
+	struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+	rc = genwqe_pci_setup(cd);
+	if (!rc) {
+		return PCI_ERS_RESULT_RECOVERED;
+	} else {
+		dev_err(&pci_dev->dev,
+			"err: problems with PCI setup (err=%d)\n", rc);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+}
+
+static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
+{
+	return PCI_ERS_RESULT_NONE;
+}
+
+static void genwqe_err_resume(struct pci_dev *pci_dev)
+{
+	int rc;
+	struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+	rc = genwqe_start(cd);
+	if (!rc) {
+		rc = genwqe_health_check_start(cd);
+		if (rc)
+			dev_err(&pci_dev->dev,
+				"err: cannot start health checking! (err=%d)\n",
+				rc);
+	} else {
+		dev_err(&pci_dev->dev,
+			"err: cannot start card services! (err=%d)\n", rc);
+	}
+}
+
+static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs)
+{
+	int rc;
+	struct genwqe_dev *cd = dev_get_drvdata(&dev->dev);
+
+	if (numvfs > 0) {
+		genwqe_setup_vf_jtimer(cd);
+		rc = pci_enable_sriov(dev, numvfs);
+		if (rc < 0)
+			return rc;
+		return numvfs;
+	}
+	if (numvfs == 0) {
+		pci_disable_sriov(dev);
+		return 0;
+	}
+	return 0;
+}
+
+static struct pci_error_handlers genwqe_err_handler = {
+	.error_detected = genwqe_err_error_detected,
+	.mmio_enabled	= genwqe_err_result_none,
+	.slot_reset	= genwqe_err_slot_reset,
+	.resume		= genwqe_err_resume,
+};
+
+static struct pci_driver genwqe_driver = {
+	.name	  = genwqe_driver_name,
+	.id_table = genwqe_device_table,
+	.probe	  = genwqe_probe,
+	.remove	  = genwqe_remove,
+	.sriov_configure = genwqe_sriov_configure,
+	.err_handler = &genwqe_err_handler,
+};
+
+/**
+ * genwqe_devnode() - Set default access mode for genwqe devices.
+ *
+ * Default mode should be rw for everybody. Do not change default
+ * device name.
+ */
+static char *genwqe_devnode(struct device *dev, umode_t *mode)
+{
+	if (mode)
+		*mode = 0666;
+	return NULL;
+}
+
+/**
+ * genwqe_init_module() - Driver registration and initialization
+ */
+static int __init genwqe_init_module(void)
+{
+	int rc;
+
+	class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME);
+	if (IS_ERR(class_genwqe)) {
+		pr_err("[%s] create class failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	class_genwqe->devnode = genwqe_devnode;
+
+	debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL);
+	if (!debugfs_genwqe) {
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	rc = pci_register_driver(&genwqe_driver);
+	if (rc != 0) {
+		pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc);
+		goto err_out0;
+	}
+
+	return rc;
+
+ err_out0:
+	debugfs_remove(debugfs_genwqe);
+ err_out:
+	class_destroy(class_genwqe);
+	return rc;
+}
+
+/**
+ * genwqe_exit_module() - Driver exit
+ */
+static void __exit genwqe_exit_module(void)
+{
+	pci_unregister_driver(&genwqe_driver);
+	debugfs_remove(debugfs_genwqe);
+	class_destroy(class_genwqe);
+}
+
+module_init(genwqe_init_module);
+module_exit(genwqe_exit_module);
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h
new file mode 100644
index 000000000..77ed3967c
--- /dev/null
+++ b/drivers/misc/genwqe/card_base.h
@@ -0,0 +1,585 @@
+#ifndef __CARD_BASE_H__
+#define __CARD_BASE_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Interfaces within the GenWQE module. Defines genwqe_card and
+ * ddcb_queue as well as ddcb_requ.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/stringify.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <linux/genwqe/genwqe_card.h>
+#include "genwqe_driver.h"
+
+#define GENWQE_MSI_IRQS			4  /* Just one supported, no MSIx */
+
+#define GENWQE_MAX_VFS			15 /* maximum 15 VFs are possible */
+#define GENWQE_MAX_FUNCS		16 /* 1 PF and 15 VFs */
+#define GENWQE_CARD_NO_MAX		(16 * GENWQE_MAX_FUNCS)
+
+/* Compile parameters, some of them appear in debugfs for later adjustment */
+#define GENWQE_DDCB_MAX			32 /* DDCBs on the work-queue */
+#define GENWQE_POLLING_ENABLED		0  /* in case of irqs not working */
+#define GENWQE_DDCB_SOFTWARE_TIMEOUT	10 /* timeout per DDCB in seconds */
+#define GENWQE_KILL_TIMEOUT		8  /* time until process gets killed */
+#define GENWQE_VF_JOBTIMEOUT_MSEC	250  /* 250 msec */
+#define GENWQE_PF_JOBTIMEOUT_MSEC	8000 /* 8 sec should be ok */
+#define GENWQE_HEALTH_CHECK_INTERVAL	4 /* <= 0: disabled */
+
+/* Sysfs attribute groups used when we create the genwqe device */
+extern const struct attribute_group *genwqe_attribute_groups[];
+
+/*
+ * Config space for Genwqe5 A7:
+ * 00:[14 10 4b 04]40 00 10 00[00 00 00 12]00 00 00 00
+ * 10: 0c 00 00 f0 07 3c 00 00 00 00 00 00 00 00 00 00
+ * 20: 00 00 00 00 00 00 00 00 00 00 00 00[14 10 4b 04]
+ * 30: 00 00 00 00 50 00 00 00 00 00 00 00 00 00 00 00
+ */
+#define PCI_DEVICE_GENWQE		0x044b /* Genwqe DeviceID */
+
+#define PCI_SUBSYSTEM_ID_GENWQE5	0x035f /* Genwqe A5 Subsystem-ID */
+#define PCI_SUBSYSTEM_ID_GENWQE5_NEW	0x044b /* Genwqe A5 Subsystem-ID */
+#define PCI_CLASSCODE_GENWQE5		0x1200 /* UNKNOWN */
+
+#define PCI_SUBVENDOR_ID_IBM_SRIOV	0x0000
+#define PCI_SUBSYSTEM_ID_GENWQE5_SRIOV	0x0000 /* Genwqe A5 Subsystem-ID */
+#define PCI_CLASSCODE_GENWQE5_SRIOV	0x1200 /* UNKNOWN */
+
+#define	GENWQE_SLU_ARCH_REQ		2 /* Required SLU architecture level */
+
+/**
+ * struct genwqe_reg - Genwqe data dump functionality
+ */
+struct genwqe_reg {
+	u32 addr;
+	u32 idx;
+	u64 val;
+};
+
+/*
+ * enum genwqe_dbg_type - Specify chip unit to dump/debug
+ */
+enum genwqe_dbg_type {
+	GENWQE_DBG_UNIT0 = 0,  /* captured before prev errs cleared */
+	GENWQE_DBG_UNIT1 = 1,
+	GENWQE_DBG_UNIT2 = 2,
+	GENWQE_DBG_UNIT3 = 3,
+	GENWQE_DBG_UNIT4 = 4,
+	GENWQE_DBG_UNIT5 = 5,
+	GENWQE_DBG_UNIT6 = 6,
+	GENWQE_DBG_UNIT7 = 7,
+	GENWQE_DBG_REGS  = 8,
+	GENWQE_DBG_DMA   = 9,
+	GENWQE_DBG_UNITS = 10, /* max number of possible debug units  */
+};
+
+/* Software error injection to simulate card failures */
+#define GENWQE_INJECT_HARDWARE_FAILURE	0x00000001 /* injects -1 reg reads */
+#define GENWQE_INJECT_BUS_RESET_FAILURE 0x00000002 /* pci_bus_reset fail */
+#define GENWQE_INJECT_GFIR_FATAL	0x00000004 /* GFIR = 0x0000ffff */
+#define GENWQE_INJECT_GFIR_INFO		0x00000008 /* GFIR = 0xffff0000 */
+
+/*
+ * Genwqe card description and management data.
+ *
+ * Error-handling in case of card malfunction
+ * ------------------------------------------
+ *
+ * If the card is detected to be defective the outside environment
+ * will cause the PCI layer to call deinit (the cleanup function for
+ * probe). This is the same effect like doing a unbind/bind operation
+ * on the card.
+ *
+ * The genwqe card driver implements a health checking thread which
+ * verifies the card function. If this detects a problem the cards
+ * device is being shutdown and restarted again, along with a reset of
+ * the card and queue.
+ *
+ * All functions accessing the card device return either -EIO or -ENODEV
+ * code to indicate the malfunction to the user. The user has to close
+ * the file descriptor and open a new one, once the card becomes
+ * available again.
+ *
+ * If the open file descriptor is setup to receive SIGIO, the signal is
+ * genereated for the application which has to provide a handler to
+ * react on it. If the application does not close the open
+ * file descriptor a SIGKILL is send to enforce freeing the cards
+ * resources.
+ *
+ * I did not find a different way to prevent kernel problems due to
+ * reference counters for the cards character devices getting out of
+ * sync. The character device deallocation does not block, even if
+ * there is still an open file descriptor pending. If this pending
+ * descriptor is closed, the data structures used by the character
+ * device is reinstantiated, which will lead to the reference counter
+ * dropping below the allowed values.
+ *
+ * Card recovery
+ * -------------
+ *
+ * To test the internal driver recovery the following command can be used:
+ *   sudo sh -c 'echo 0xfffff > /sys/class/genwqe/genwqe0_card/err_inject'
+ */
+
+
+/**
+ * struct dma_mapping_type - Mapping type definition
+ *
+ * To avoid memcpying data arround we use user memory directly. To do
+ * this we need to pin/swap-in the memory and request a DMA address
+ * for it.
+ */
+enum dma_mapping_type {
+	GENWQE_MAPPING_RAW = 0,		/* contignous memory buffer */
+	GENWQE_MAPPING_SGL_TEMP,	/* sglist dynamically used */
+	GENWQE_MAPPING_SGL_PINNED,	/* sglist used with pinning */
+};
+
+/**
+ * struct dma_mapping - Information about memory mappings done by the driver
+ */
+struct dma_mapping {
+	enum dma_mapping_type type;
+
+	void *u_vaddr;			/* user-space vaddr/non-aligned */
+	void *k_vaddr;			/* kernel-space vaddr/non-aligned */
+	dma_addr_t dma_addr;		/* physical DMA address */
+
+	struct page **page_list;	/* list of pages used by user buff */
+	dma_addr_t *dma_list;		/* list of dma addresses per page */
+	unsigned int nr_pages;		/* number of pages */
+	unsigned int size;		/* size in bytes */
+
+	struct list_head card_list;	/* list of usr_maps for card */
+	struct list_head pin_list;	/* list of pinned memory for dev */
+	int write;			/* writable map? useful in unmapping */
+};
+
+static inline void genwqe_mapping_init(struct dma_mapping *m,
+				       enum dma_mapping_type type)
+{
+	memset(m, 0, sizeof(*m));
+	m->type = type;
+	m->write = 1; /* Assume the maps we create are R/W */
+}
+
+/**
+ * struct ddcb_queue - DDCB queue data
+ * @ddcb_max:          Number of DDCBs on the queue
+ * @ddcb_next:         Next free DDCB
+ * @ddcb_act:          Next DDCB supposed to finish
+ * @ddcb_seq:          Sequence number of last DDCB
+ * @ddcbs_in_flight:   Currently enqueued DDCBs
+ * @ddcbs_completed:   Number of already completed DDCBs
+ * @return_on_busy:    Number of -EBUSY returns on full queue
+ * @wait_on_busy:      Number of waits on full queue
+ * @ddcb_daddr:        DMA address of first DDCB in the queue
+ * @ddcb_vaddr:        Kernel virtual address of first DDCB in the queue
+ * @ddcb_req:          Associated requests (one per DDCB)
+ * @ddcb_waitqs:       Associated wait queues (one per DDCB)
+ * @ddcb_lock:         Lock to protect queuing operations
+ * @ddcb_waitq:        Wait on next DDCB finishing
+ */
+
+struct ddcb_queue {
+	int ddcb_max;			/* amount of DDCBs  */
+	int ddcb_next;			/* next available DDCB num */
+	int ddcb_act;			/* DDCB to be processed */
+	u16 ddcb_seq;			/* slc seq num */
+	unsigned int ddcbs_in_flight;	/* number of ddcbs in processing */
+	unsigned int ddcbs_completed;
+	unsigned int ddcbs_max_in_flight;
+	unsigned int return_on_busy;    /* how many times -EBUSY? */
+	unsigned int wait_on_busy;
+
+	dma_addr_t ddcb_daddr;		/* DMA address */
+	struct ddcb *ddcb_vaddr;	/* kernel virtual addr for DDCBs */
+	struct ddcb_requ **ddcb_req;	/* ddcb processing parameter */
+	wait_queue_head_t *ddcb_waitqs; /* waitqueue per ddcb */
+
+	spinlock_t ddcb_lock;		/* exclusive access to queue */
+	wait_queue_head_t busy_waitq;   /* wait for ddcb processing */
+
+	/* registers or the respective queue to be used */
+	u32 IO_QUEUE_CONFIG;
+	u32 IO_QUEUE_STATUS;
+	u32 IO_QUEUE_SEGMENT;
+	u32 IO_QUEUE_INITSQN;
+	u32 IO_QUEUE_WRAP;
+	u32 IO_QUEUE_OFFSET;
+	u32 IO_QUEUE_WTIME;
+	u32 IO_QUEUE_ERRCNTS;
+	u32 IO_QUEUE_LRW;
+};
+
+/*
+ * GFIR, SLU_UNITCFG, APP_UNITCFG
+ *   8 Units with FIR/FEC + 64 * 2ndary FIRS/FEC.
+ */
+#define GENWQE_FFDC_REGS	(3 + (8 * (2 + 2 * 64)))
+
+struct genwqe_ffdc {
+	unsigned int entries;
+	struct genwqe_reg *regs;
+};
+
+/**
+ * struct genwqe_dev - GenWQE device information
+ * @card_state:       Card operation state, see above
+ * @ffdc:             First Failure Data Capture buffers for each unit
+ * @card_thread:      Working thread to operate the DDCB queue
+ * @card_waitq:       Wait queue used in card_thread
+ * @queue:            DDCB queue
+ * @health_thread:    Card monitoring thread (only for PFs)
+ * @health_waitq:     Wait queue used in health_thread
+ * @pci_dev:          Associated PCI device (function)
+ * @mmio:             Base address of 64-bit register space
+ * @mmio_len:         Length of register area
+ * @file_lock:        Lock to protect access to file_list
+ * @file_list:        List of all processes with open GenWQE file descriptors
+ *
+ * This struct contains all information needed to communicate with a
+ * GenWQE card. It is initialized when a GenWQE device is found and
+ * destroyed when it goes away. It holds data to maintain the queue as
+ * well as data needed to feed the user interfaces.
+ */
+struct genwqe_dev {
+	enum genwqe_card_state card_state;
+	spinlock_t print_lock;
+
+	int card_idx;			/* card index 0..CARD_NO_MAX-1 */
+	u64 flags;			/* general flags */
+
+	/* FFDC data gathering */
+	struct genwqe_ffdc ffdc[GENWQE_DBG_UNITS];
+
+	/* DDCB workqueue */
+	struct task_struct *card_thread;
+	wait_queue_head_t queue_waitq;
+	struct ddcb_queue queue;	/* genwqe DDCB queue */
+	unsigned int irqs_processed;
+
+	/* Card health checking thread */
+	struct task_struct *health_thread;
+	wait_queue_head_t health_waitq;
+
+	int use_platform_recovery;	/* use platform recovery mechanisms */
+
+	/* char device */
+	dev_t  devnum_genwqe;		/* major/minor num card */
+	struct class *class_genwqe;	/* reference to class object */
+	struct device *dev;		/* for device creation */
+	struct cdev cdev_genwqe;	/* char device for card */
+
+	struct dentry *debugfs_root;	/* debugfs card root directory */
+	struct dentry *debugfs_genwqe;	/* debugfs driver root directory */
+
+	/* pci resources */
+	struct pci_dev *pci_dev;	/* PCI device */
+	void __iomem *mmio;		/* BAR-0 MMIO start */
+	unsigned long mmio_len;
+	int num_vfs;
+	u32 vf_jobtimeout_msec[GENWQE_MAX_VFS];
+	int is_privileged;		/* access to all regs possible */
+
+	/* config regs which we need often */
+	u64 slu_unitcfg;
+	u64 app_unitcfg;
+	u64 softreset;
+	u64 err_inject;
+	u64 last_gfir;
+	char app_name[5];
+
+	spinlock_t file_lock;		/* lock for open files */
+	struct list_head file_list;	/* list of open files */
+
+	/* debugfs parameters */
+	int ddcb_software_timeout;	/* wait until DDCB times out */
+	int skip_recovery;		/* circumvention if recovery fails */
+	int kill_timeout;		/* wait after sending SIGKILL */
+};
+
+/**
+ * enum genwqe_requ_state - State of a DDCB execution request
+ */
+enum genwqe_requ_state {
+	GENWQE_REQU_NEW      = 0,
+	GENWQE_REQU_ENQUEUED = 1,
+	GENWQE_REQU_TAPPED   = 2,
+	GENWQE_REQU_FINISHED = 3,
+	GENWQE_REQU_STATE_MAX,
+};
+
+/**
+ * struct genwqe_sgl - Scatter gather list describing user-space memory
+ * @sgl:            scatter gather list needs to be 128 byte aligned
+ * @sgl_dma_addr:   dma address of sgl
+ * @sgl_size:       size of area used for sgl
+ * @user_addr:      user-space address of memory area
+ * @user_size:      size of user-space memory area
+ * @page:           buffer for partial pages if needed
+ * @page_dma_addr:  dma address partial pages
+ * @write:          should we write it back to userspace?
+ */
+struct genwqe_sgl {
+	dma_addr_t sgl_dma_addr;
+	struct sg_entry *sgl;
+	size_t sgl_size;	/* size of sgl */
+
+	void __user *user_addr; /* user-space base-address */
+	size_t user_size;       /* size of memory area */
+
+	int write;
+
+	unsigned long nr_pages;
+	unsigned long fpage_offs;
+	size_t fpage_size;
+	size_t lpage_size;
+
+	void *fpage;
+	dma_addr_t fpage_dma_addr;
+
+	void *lpage;
+	dma_addr_t lpage_dma_addr;
+};
+
+int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+			  void __user *user_addr, size_t user_size, int write);
+
+int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+		     dma_addr_t *dma_list);
+
+int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl);
+
+/**
+ * struct ddcb_requ - Kernel internal representation of the DDCB request
+ * @cmd:          User space representation of the DDCB execution request
+ */
+struct ddcb_requ {
+	/* kernel specific content */
+	enum genwqe_requ_state req_state; /* request status */
+	int num;			  /* ddcb_no for this request */
+	struct ddcb_queue *queue;	  /* associated queue */
+
+	struct dma_mapping  dma_mappings[DDCB_FIXUPS];
+	struct genwqe_sgl sgls[DDCB_FIXUPS];
+
+	/* kernel/user shared content */
+	struct genwqe_ddcb_cmd cmd;	/* ddcb_no for this request */
+	struct genwqe_debug_data debug_data;
+};
+
+/**
+ * struct genwqe_file - Information for open GenWQE devices
+ */
+struct genwqe_file {
+	struct genwqe_dev *cd;
+	struct genwqe_driver *client;
+	struct file *filp;
+
+	struct fasync_struct *async_queue;
+	struct pid *opener;
+	struct list_head list;		/* entry in list of open files */
+
+	spinlock_t map_lock;		/* lock for dma_mappings */
+	struct list_head map_list;	/* list of dma_mappings */
+
+	spinlock_t pin_lock;		/* lock for pinned memory */
+	struct list_head pin_list;	/* list of pinned memory */
+};
+
+int  genwqe_setup_service_layer(struct genwqe_dev *cd); /* for PF only */
+int  genwqe_finish_queue(struct genwqe_dev *cd);
+int  genwqe_release_service_layer(struct genwqe_dev *cd);
+
+/**
+ * genwqe_get_slu_id() - Read Service Layer Unit Id
+ * Return: 0x00: Development code
+ *         0x01: SLC1 (old)
+ *         0x02: SLC2 (sept2012)
+ *         0x03: SLC2 (feb2013, generic driver)
+ */
+static inline int genwqe_get_slu_id(struct genwqe_dev *cd)
+{
+	return (int)((cd->slu_unitcfg >> 32) & 0xff);
+}
+
+int  genwqe_ddcbs_in_flight(struct genwqe_dev *cd);
+
+u8   genwqe_card_type(struct genwqe_dev *cd);
+int  genwqe_card_reset(struct genwqe_dev *cd);
+int  genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count);
+void genwqe_reset_interrupt_capability(struct genwqe_dev *cd);
+
+int  genwqe_device_create(struct genwqe_dev *cd);
+int  genwqe_device_remove(struct genwqe_dev *cd);
+
+/* debugfs */
+int  genwqe_init_debugfs(struct genwqe_dev *cd);
+void genqwe_exit_debugfs(struct genwqe_dev *cd);
+
+int  genwqe_read_softreset(struct genwqe_dev *cd);
+
+/* Hardware Circumventions */
+int  genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd);
+int  genwqe_flash_readback_fails(struct genwqe_dev *cd);
+
+/**
+ * genwqe_write_vreg() - Write register in VF window
+ * @cd:    genwqe device
+ * @reg:   register address
+ * @val:   value to write
+ * @func:  0: PF, 1: VF0, ..., 15: VF14
+ */
+int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func);
+
+/**
+ * genwqe_read_vreg() - Read register in VF window
+ * @cd:    genwqe device
+ * @reg:   register address
+ * @func:  0: PF, 1: VF0, ..., 15: VF14
+ *
+ * Return: content of the register
+ */
+u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func);
+
+/* FFDC Buffer Management */
+int  genwqe_ffdc_buff_size(struct genwqe_dev *cd, int unit_id);
+int  genwqe_ffdc_buff_read(struct genwqe_dev *cd, int unit_id,
+			   struct genwqe_reg *regs, unsigned int max_regs);
+int  genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
+			   unsigned int max_regs, int all);
+int  genwqe_ffdc_dump_dma(struct genwqe_dev *cd,
+			  struct genwqe_reg *regs, unsigned int max_regs);
+
+int  genwqe_init_debug_data(struct genwqe_dev *cd,
+			    struct genwqe_debug_data *d);
+
+void genwqe_init_crc32(void);
+int  genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len);
+
+/* Memory allocation/deallocation; dma address handling */
+int  genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m,
+		      void *uaddr, unsigned long size);
+
+int  genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m);
+
+static inline bool dma_mapping_used(struct dma_mapping *m)
+{
+	if (!m)
+		return false;
+	return m->size != 0;
+}
+
+/**
+ * __genwqe_execute_ddcb() - Execute DDCB request with addr translation
+ *
+ * This function will do the address translation changes to the DDCBs
+ * according to the definitions required by the ATS field. It looks up
+ * the memory allocation buffer or does vmap/vunmap for the respective
+ * user-space buffers, inclusive page pinning and scatter gather list
+ * buildup and teardown.
+ */
+int  __genwqe_execute_ddcb(struct genwqe_dev *cd,
+			   struct genwqe_ddcb_cmd *cmd, unsigned int f_flags);
+
+/**
+ * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation
+ *
+ * This version will not do address translation or any modification of
+ * the DDCB data. It is used e.g. for the MoveFlash DDCB which is
+ * entirely prepared by the driver itself. That means the appropriate
+ * DMA addresses are already in the DDCB and do not need any
+ * modification.
+ */
+int  __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
+			       struct genwqe_ddcb_cmd *cmd,
+			       unsigned int f_flags);
+int  __genwqe_enqueue_ddcb(struct genwqe_dev *cd,
+			   struct ddcb_requ *req,
+			   unsigned int f_flags);
+
+int  __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+int  __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+
+/* register access */
+int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val);
+u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs);
+int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val);
+u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs);
+
+void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
+				 dma_addr_t *dma_handle);
+void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
+			      void *vaddr, dma_addr_t dma_handle);
+
+/* Base clock frequency in MHz */
+int  genwqe_base_clock_frequency(struct genwqe_dev *cd);
+
+/* Before FFDC is captured the traps should be stopped. */
+void genwqe_stop_traps(struct genwqe_dev *cd);
+void genwqe_start_traps(struct genwqe_dev *cd);
+
+/* Hardware circumvention */
+bool genwqe_need_err_masking(struct genwqe_dev *cd);
+
+/**
+ * genwqe_is_privileged() - Determine operation mode for PCI function
+ *
+ * On Intel with SRIOV support we see:
+ *   PF: is_physfn = 1 is_virtfn = 0
+ *   VF: is_physfn = 0 is_virtfn = 1
+ *
+ * On Systems with no SRIOV support _and_ virtualized systems we get:
+ *       is_physfn = 0 is_virtfn = 0
+ *
+ * Other vendors have individual pci device ids to distinguish between
+ * virtual function drivers and physical function drivers. GenWQE
+ * unfortunately has just on pci device id for both, VFs and PF.
+ *
+ * The following code is used to distinguish if the card is running in
+ * privileged mode, either as true PF or in a virtualized system with
+ * full register access e.g. currently on PowerPC.
+ *
+ * if (pci_dev->is_virtfn)
+ *          cd->is_privileged = 0;
+ *  else
+ *          cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
+ *				 != IO_ILLEGAL_VALUE);
+ */
+static inline int genwqe_is_privileged(struct genwqe_dev *cd)
+{
+	return cd->is_privileged;
+}
+
+#endif	/* __CARD_BASE_H__ */
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
new file mode 100644
index 000000000..656449cb4
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -0,0 +1,1410 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Device Driver Control Block (DDCB) queue support. Definition of
+ * interrupt handlers for queue support as well as triggering the
+ * health monitor code in case of problems. The current hardware uses
+ * an MSI interrupt which is shared between error handling and
+ * functional code.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/crc-itu-t.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+/*
+ * N: next DDCB, this is where the next DDCB will be put.
+ * A: active DDCB, this is where the code will look for the next completion.
+ * x: DDCB is enqueued, we are waiting for its completion.
+
+ * Situation (1): Empty queue
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  |   |   |   |   |   |   |   |   |
+ *  +---+---+---+---+---+---+---+---+
+ *           A/N
+ *  enqueued_ddcbs = A - N = 2 - 2 = 0
+ *
+ * Situation (2): Wrapped, N > A
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  |   |   | x | x |   |   |   |   |
+ *  +---+---+---+---+---+---+---+---+
+ *            A       N
+ *  enqueued_ddcbs = N - A = 4 - 2 = 2
+ *
+ * Situation (3): Queue wrapped, A > N
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x |   |   | x | x | x | x |
+ *  +---+---+---+---+---+---+---+---+
+ *            N       A
+ *  enqueued_ddcbs = queue_max  - (A - N) = 8 - (4 - 2) = 6
+ *
+ * Situation (4a): Queue full N > A
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x | x | x | x | x | x |   |
+ *  +---+---+---+---+---+---+---+---+
+ *    A                           N
+ *
+ *  enqueued_ddcbs = N - A = 7 - 0 = 7
+ *
+ * Situation (4a): Queue full A > N
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x | x |   | x | x | x | x |
+ *  +---+---+---+---+---+---+---+---+
+ *                N   A
+ *  enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 3) = 7
+ */
+
+static int queue_empty(struct ddcb_queue *queue)
+{
+	return queue->ddcb_next == queue->ddcb_act;
+}
+
+static int queue_enqueued_ddcbs(struct ddcb_queue *queue)
+{
+	if (queue->ddcb_next >= queue->ddcb_act)
+		return queue->ddcb_next - queue->ddcb_act;
+
+	return queue->ddcb_max - (queue->ddcb_act - queue->ddcb_next);
+}
+
+static int queue_free_ddcbs(struct ddcb_queue *queue)
+{
+	int free_ddcbs = queue->ddcb_max - queue_enqueued_ddcbs(queue) - 1;
+
+	if (WARN_ON_ONCE(free_ddcbs < 0)) { /* must never ever happen! */
+		return 0;
+	}
+	return free_ddcbs;
+}
+
+/*
+ * Use of the PRIV field in the DDCB for queue debugging:
+ *
+ * (1) Trying to get rid of a DDCB which saw a timeout:
+ *     pddcb->priv[6] = 0xcc;   # cleared
+ *
+ * (2) Append a DDCB via NEXT bit:
+ *     pddcb->priv[7] = 0xaa;	# appended
+ *
+ * (3) DDCB needed tapping:
+ *     pddcb->priv[7] = 0xbb;   # tapped
+ *
+ * (4) DDCB marked as correctly finished:
+ *     pddcb->priv[6] = 0xff;	# finished
+ */
+
+static inline void ddcb_mark_tapped(struct ddcb *pddcb)
+{
+	pddcb->priv[7] = 0xbb;  /* tapped */
+}
+
+static inline void ddcb_mark_appended(struct ddcb *pddcb)
+{
+	pddcb->priv[7] = 0xaa;	/* appended */
+}
+
+static inline void ddcb_mark_cleared(struct ddcb *pddcb)
+{
+	pddcb->priv[6] = 0xcc; /* cleared */
+}
+
+static inline void ddcb_mark_finished(struct ddcb *pddcb)
+{
+	pddcb->priv[6] = 0xff;	/* finished */
+}
+
+static inline void ddcb_mark_unused(struct ddcb *pddcb)
+{
+	pddcb->priv_64 = cpu_to_be64(0); /* not tapped */
+}
+
+/**
+ * genwqe_crc16() - Generate 16-bit crc as required for DDCBs
+ * @buff:       pointer to data buffer
+ * @len:        length of data for calculation
+ * @init:       initial crc (0xffff at start)
+ *
+ * Polynomial = x^16 + x^12 + x^5 + 1   (0x1021)
+ * Example: 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff
+ *          should result in a crc16 of 0x89c3
+ *
+ * Return: crc16 checksum in big endian format !
+ */
+static inline u16 genwqe_crc16(const u8 *buff, size_t len, u16 init)
+{
+	return crc_itu_t(init, buff, len);
+}
+
+static void print_ddcb_info(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	int i;
+	struct ddcb *pddcb;
+	unsigned long flags;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	spin_lock_irqsave(&cd->print_lock, flags);
+
+	dev_info(&pci_dev->dev,
+		 "DDCB list for card #%d (ddcb_act=%d / ddcb_next=%d):\n",
+		 cd->card_idx, queue->ddcb_act, queue->ddcb_next);
+
+	pddcb = queue->ddcb_vaddr;
+	for (i = 0; i < queue->ddcb_max; i++) {
+		dev_err(&pci_dev->dev,
+			"  %c %-3d: RETC=%03x SEQ=%04x HSI=%02X SHI=%02x PRIV=%06llx CMD=%03x\n",
+			i == queue->ddcb_act ? '>' : ' ',
+			i,
+			be16_to_cpu(pddcb->retc_16),
+			be16_to_cpu(pddcb->seqnum_16),
+			pddcb->hsi,
+			pddcb->shi,
+			be64_to_cpu(pddcb->priv_64),
+			pddcb->cmd);
+		pddcb++;
+	}
+	spin_unlock_irqrestore(&cd->print_lock, flags);
+}
+
+struct genwqe_ddcb_cmd *ddcb_requ_alloc(void)
+{
+	struct ddcb_requ *req;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return NULL;
+
+	return &req->cmd;
+}
+
+void ddcb_requ_free(struct genwqe_ddcb_cmd *cmd)
+{
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+	kfree(req);
+}
+
+static inline enum genwqe_requ_state ddcb_requ_get_state(struct ddcb_requ *req)
+{
+	return req->req_state;
+}
+
+static inline void ddcb_requ_set_state(struct ddcb_requ *req,
+				       enum genwqe_requ_state new_state)
+{
+	req->req_state = new_state;
+}
+
+static inline int ddcb_requ_collect_debug_data(struct ddcb_requ *req)
+{
+	return req->cmd.ddata_addr != 0x0;
+}
+
+/**
+ * ddcb_requ_finished() - Returns the hardware state of the associated DDCB
+ * @cd:          pointer to genwqe device descriptor
+ * @req:         DDCB work request
+ *
+ * Status of ddcb_requ mirrors this hardware state, but is copied in
+ * the ddcb_requ on interrupt/polling function. The lowlevel code
+ * should check the hardware state directly, the higher level code
+ * should check the copy.
+ *
+ * This function will also return true if the state of the queue is
+ * not GENWQE_CARD_USED. This enables us to purge all DDCBs in the
+ * shutdown case.
+ */
+static int ddcb_requ_finished(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	return (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) ||
+		(cd->card_state != GENWQE_CARD_USED);
+}
+
+/**
+ * enqueue_ddcb() - Enqueue a DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @queue:	queue this operation should be done on
+ * @ddcb_no:    pointer to ddcb number being tapped
+ *
+ * Start execution of DDCB by tapping or append to queue via NEXT
+ * bit. This is done by an atomic 'compare and swap' instruction and
+ * checking SHI and HSI of the previous DDCB.
+ *
+ * This function must only be called with ddcb_lock held.
+ *
+ * Return: 1 if new DDCB is appended to previous
+ *         2 if DDCB queue is tapped via register/simulation
+ */
+#define RET_DDCB_APPENDED 1
+#define RET_DDCB_TAPPED   2
+
+static int enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_queue *queue,
+			struct ddcb *pddcb, int ddcb_no)
+{
+	unsigned int try;
+	int prev_no;
+	struct ddcb *prev_ddcb;
+	__be32 old, new, icrc_hsi_shi;
+	u64 num;
+
+	/*
+	 * For performance checks a Dispatch Timestamp can be put into
+	 * DDCB It is supposed to use the SLU's free running counter,
+	 * but this requires PCIe cycles.
+	 */
+	ddcb_mark_unused(pddcb);
+
+	/* check previous DDCB if already fetched */
+	prev_no = (ddcb_no == 0) ? queue->ddcb_max - 1 : ddcb_no - 1;
+	prev_ddcb = &queue->ddcb_vaddr[prev_no];
+
+	/*
+	 * It might have happened that the HSI.FETCHED bit is
+	 * set. Retry in this case. Therefore I expect maximum 2 times
+	 * trying.
+	 */
+	ddcb_mark_appended(pddcb);
+	for (try = 0; try < 2; try++) {
+		old = prev_ddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */
+
+		/* try to append via NEXT bit if prev DDCB is not completed */
+		if ((old & DDCB_COMPLETED_BE32) != 0x00000000)
+			break;
+
+		new = (old | DDCB_NEXT_BE32);
+
+		wmb();		/* need to ensure write ordering */
+		icrc_hsi_shi = cmpxchg(&prev_ddcb->icrc_hsi_shi_32, old, new);
+
+		if (icrc_hsi_shi == old)
+			return RET_DDCB_APPENDED; /* appended to queue */
+	}
+
+	/* Queue must be re-started by updating QUEUE_OFFSET */
+	ddcb_mark_tapped(pddcb);
+	num = (u64)ddcb_no << 8;
+
+	wmb();			/* need to ensure write ordering */
+	__genwqe_writeq(cd, queue->IO_QUEUE_OFFSET, num); /* start queue */
+
+	return RET_DDCB_TAPPED;
+}
+
+/**
+ * copy_ddcb_results() - Copy output state from real DDCB to request
+ *
+ * Copy DDCB ASV to request struct. There is no endian
+ * conversion made, since data structure in ASV is still
+ * unknown here.
+ *
+ * This is needed by:
+ *   - genwqe_purge_ddcb()
+ *   - genwqe_check_ddcb_queue()
+ */
+static void copy_ddcb_results(struct ddcb_requ *req, int ddcb_no)
+{
+	struct ddcb_queue *queue = req->queue;
+	struct ddcb *pddcb = &queue->ddcb_vaddr[req->num];
+
+	memcpy(&req->cmd.asv[0], &pddcb->asv[0], DDCB_ASV_LENGTH);
+
+	/* copy status flags of the variant part */
+	req->cmd.vcrc     = be16_to_cpu(pddcb->vcrc_16);
+	req->cmd.deque_ts = be64_to_cpu(pddcb->deque_ts_64);
+	req->cmd.cmplt_ts = be64_to_cpu(pddcb->cmplt_ts_64);
+
+	req->cmd.attn     = be16_to_cpu(pddcb->attn_16);
+	req->cmd.progress = be32_to_cpu(pddcb->progress_32);
+	req->cmd.retc     = be16_to_cpu(pddcb->retc_16);
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		int prev_no = (ddcb_no == 0) ?
+			queue->ddcb_max - 1 : ddcb_no - 1;
+		struct ddcb *prev_pddcb = &queue->ddcb_vaddr[prev_no];
+
+		memcpy(&req->debug_data.ddcb_finished, pddcb,
+		       sizeof(req->debug_data.ddcb_finished));
+		memcpy(&req->debug_data.ddcb_prev, prev_pddcb,
+		       sizeof(req->debug_data.ddcb_prev));
+	}
+}
+
+/**
+ * genwqe_check_ddcb_queue() - Checks DDCB queue for completed work equests.
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * Return: Number of DDCBs which were finished
+ */
+static int genwqe_check_ddcb_queue(struct genwqe_dev *cd,
+				   struct ddcb_queue *queue)
+{
+	unsigned long flags;
+	int ddcbs_finished = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	/* FIXME avoid soft locking CPU */
+	while (!queue_empty(queue) && (ddcbs_finished < queue->ddcb_max)) {
+
+		struct ddcb *pddcb;
+		struct ddcb_requ *req;
+		u16 vcrc, vcrc_16, retc_16;
+
+		pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
+
+		if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) ==
+		    0x00000000)
+			goto go_home; /* not completed, continue waiting */
+
+		wmb();  /*  Add sync to decouple prev. read operations */
+
+		/* Note: DDCB could be purged */
+		req = queue->ddcb_req[queue->ddcb_act];
+		if (req == NULL) {
+			/* this occurs if DDCB is purged, not an error */
+			/* Move active DDCB further; Nothing to do anymore. */
+			goto pick_next_one;
+		}
+
+		/*
+		 * HSI=0x44 (fetched and completed), but RETC is
+		 * 0x101, or even worse 0x000.
+		 *
+		 * In case of seeing the queue in inconsistent state
+		 * we read the errcnts and the queue status to provide
+		 * a trigger for our PCIe analyzer stop capturing.
+		 */
+		retc_16 = be16_to_cpu(pddcb->retc_16);
+		if ((pddcb->hsi == 0x44) && (retc_16 <= 0x101)) {
+			u64 errcnts, status;
+			u64 ddcb_offs = (u64)pddcb - (u64)queue->ddcb_vaddr;
+
+			errcnts = __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS);
+			status  = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
+
+			dev_err(&pci_dev->dev,
+				"[%s] SEQN=%04x HSI=%02x RETC=%03x Q_ERRCNTS=%016llx Q_STATUS=%016llx DDCB_DMA_ADDR=%016llx\n",
+				__func__, be16_to_cpu(pddcb->seqnum_16),
+				pddcb->hsi, retc_16, errcnts, status,
+				queue->ddcb_daddr + ddcb_offs);
+		}
+
+		copy_ddcb_results(req, queue->ddcb_act);
+		queue->ddcb_req[queue->ddcb_act] = NULL; /* take from queue */
+
+		dev_dbg(&pci_dev->dev, "FINISHED DDCB#%d\n", req->num);
+		genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+		ddcb_mark_finished(pddcb);
+
+		/* calculate CRC_16 to see if VCRC is correct */
+		vcrc = genwqe_crc16(pddcb->asv,
+				   VCRC_LENGTH(req->cmd.asv_length),
+				   0xffff);
+		vcrc_16 = be16_to_cpu(pddcb->vcrc_16);
+		if (vcrc != vcrc_16) {
+			printk_ratelimited(KERN_ERR
+				"%s %s: err: wrong VCRC pre=%02x vcrc_len=%d bytes vcrc_data=%04x is not vcrc_card=%04x\n",
+				GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+				pddcb->pre, VCRC_LENGTH(req->cmd.asv_length),
+				vcrc, vcrc_16);
+		}
+
+		ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
+		queue->ddcbs_completed++;
+		queue->ddcbs_in_flight--;
+
+		/* wake up process waiting for this DDCB, and
+                   processes on the busy queue */
+		wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
+		wake_up_interruptible(&queue->busy_waitq);
+
+pick_next_one:
+		queue->ddcb_act = (queue->ddcb_act + 1) % queue->ddcb_max;
+		ddcbs_finished++;
+	}
+
+ go_home:
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	return ddcbs_finished;
+}
+
+/**
+ * __genwqe_wait_ddcb(): Waits until DDCB is completed
+ * @cd:         pointer to genwqe device descriptor
+ * @req:        pointer to requsted DDCB parameters
+ *
+ * The Service Layer will update the RETC in DDCB when processing is
+ * pending or done.
+ *
+ * Return: > 0 remaining jiffies, DDCB completed
+ *           -ETIMEDOUT	when timeout
+ *           -ERESTARTSYS when ^C
+ *           -EINVAL when unknown error condition
+ *
+ * When an error is returned the called needs to ensure that
+ * purge_ddcb() is being called to get the &req removed from the
+ * queue.
+ */
+int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	int rc;
+	unsigned int ddcb_no;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (req == NULL)
+		return -EINVAL;
+
+	queue = req->queue;
+	if (queue == NULL)
+		return -EINVAL;
+
+	ddcb_no = req->num;
+	if (ddcb_no >= queue->ddcb_max)
+		return -EINVAL;
+
+	rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no],
+				ddcb_requ_finished(cd, req),
+				GENWQE_DDCB_SOFTWARE_TIMEOUT * HZ);
+
+	/*
+	 * We need to distinguish 3 cases here:
+	 *   1. rc == 0              timeout occured
+	 *   2. rc == -ERESTARTSYS   signal received
+	 *   3. rc > 0               remaining jiffies condition is true
+	 */
+	if (rc == 0) {
+		struct ddcb_queue *queue = req->queue;
+		struct ddcb *pddcb;
+
+		/*
+		 * Timeout may be caused by long task switching time.
+		 * When timeout happens, check if the request has
+		 * meanwhile completed.
+		 */
+		genwqe_check_ddcb_queue(cd, req->queue);
+		if (ddcb_requ_finished(cd, req))
+			return rc;
+
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d timeout rc=%d state=%d req @ %p\n",
+			__func__, req->num, rc,	ddcb_requ_get_state(req),
+			req);
+		dev_err(&pci_dev->dev,
+			"[%s]      IO_QUEUE_STATUS=0x%016llx\n", __func__,
+			__genwqe_readq(cd, queue->IO_QUEUE_STATUS));
+
+		pddcb = &queue->ddcb_vaddr[req->num];
+		genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+		print_ddcb_info(cd, req->queue);
+		return -ETIMEDOUT;
+
+	} else if (rc == -ERESTARTSYS) {
+		return rc;
+		/*
+		 * EINTR:       Stops the application
+		 * ERESTARTSYS: Restartable systemcall; called again
+		 */
+
+	} else if (rc < 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d unknown result (rc=%d) %d!\n",
+			__func__, req->num, rc, ddcb_requ_get_state(req));
+		return -EINVAL;
+	}
+
+	/* Severe error occured. Driver is forced to stop operation */
+	if (cd->card_state != GENWQE_CARD_USED) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d forced to stop (rc=%d)\n",
+			__func__, req->num, rc);
+		return -EIO;
+	}
+	return rc;
+}
+
+/**
+ * get_next_ddcb() - Get next available DDCB
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * DDCB's content is completely cleared but presets for PRE and
+ * SEQNUM. This function must only be called when ddcb_lock is held.
+ *
+ * Return: NULL if no empty DDCB available otherwise ptr to next DDCB.
+ */
+static struct ddcb *get_next_ddcb(struct genwqe_dev *cd,
+				  struct ddcb_queue *queue,
+				  int *num)
+{
+	u64 *pu64;
+	struct ddcb *pddcb;
+
+	if (queue_free_ddcbs(queue) == 0) /* queue is  full */
+		return NULL;
+
+	/* find new ddcb */
+	pddcb = &queue->ddcb_vaddr[queue->ddcb_next];
+
+	/* if it is not completed, we are not allowed to use it */
+	/* barrier(); */
+	if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == 0x00000000)
+		return NULL;
+
+	*num = queue->ddcb_next;	/* internal DDCB number */
+	queue->ddcb_next = (queue->ddcb_next + 1) % queue->ddcb_max;
+
+	/* clear important DDCB fields */
+	pu64 = (u64 *)pddcb;
+	pu64[0] = 0ULL;		/* offs 0x00 (ICRC,HSI,SHI,...) */
+	pu64[1] = 0ULL;		/* offs 0x01 (ACFUNC,CMD...) */
+
+	/* destroy previous results in ASV */
+	pu64[0x80/8] = 0ULL;	/* offs 0x80 (ASV + 0) */
+	pu64[0x88/8] = 0ULL;	/* offs 0x88 (ASV + 0x08) */
+	pu64[0x90/8] = 0ULL;	/* offs 0x90 (ASV + 0x10) */
+	pu64[0x98/8] = 0ULL;	/* offs 0x98 (ASV + 0x18) */
+	pu64[0xd0/8] = 0ULL;	/* offs 0xd0 (RETC,ATTN...) */
+
+	pddcb->pre = DDCB_PRESET_PRE; /* 128 */
+	pddcb->seqnum_16 = cpu_to_be16(queue->ddcb_seq++);
+	return pddcb;
+}
+
+/**
+ * __genwqe_purge_ddcb() - Remove a DDCB from the workqueue
+ * @cd:         genwqe device descriptor
+ * @req:        DDCB request
+ *
+ * This will fail when the request was already FETCHED. In this case
+ * we need to wait until it is finished. Else the DDCB can be
+ * reused. This function also ensures that the request data structure
+ * is removed from ddcb_req[].
+ *
+ * Do not forget to call this function when genwqe_wait_ddcb() fails,
+ * such that the request gets really removed from ddcb_req[].
+ *
+ * Return: 0 success
+ */
+int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	struct ddcb *pddcb = NULL;
+	unsigned int t;
+	unsigned long flags;
+	struct ddcb_queue *queue = req->queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u64 queue_status;
+	__be32 icrc_hsi_shi = 0x0000;
+	__be32 old, new;
+
+	/* unsigned long flags; */
+	if (GENWQE_DDCB_SOFTWARE_TIMEOUT <= 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: software timeout is not set!\n", __func__);
+		return -EFAULT;
+	}
+
+	pddcb = &queue->ddcb_vaddr[req->num];
+
+	for (t = 0; t < GENWQE_DDCB_SOFTWARE_TIMEOUT * 10; t++) {
+
+		spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+		/* Check if req was meanwhile finished */
+		if (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED)
+			goto go_home;
+
+		/* try to set PURGE bit if FETCHED/COMPLETED are not set */
+		old = pddcb->icrc_hsi_shi_32;	/* read SHI/HSI in BE32 */
+		if ((old & DDCB_FETCHED_BE32) == 0x00000000) {
+
+			new = (old | DDCB_PURGE_BE32);
+			icrc_hsi_shi = cmpxchg(&pddcb->icrc_hsi_shi_32,
+					       old, new);
+			if (icrc_hsi_shi == old)
+				goto finish_ddcb;
+		}
+
+		/* normal finish with HSI bit */
+		barrier();
+		icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
+		if (icrc_hsi_shi & DDCB_COMPLETED_BE32)
+			goto finish_ddcb;
+
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+		/*
+		 * Here the check_ddcb() function will most likely
+		 * discover this DDCB to be finished some point in
+		 * time. It will mark the req finished and free it up
+		 * in the list.
+		 */
+
+		copy_ddcb_results(req, req->num); /* for the failing case */
+		msleep(100); /* sleep for 1/10 second and try again */
+		continue;
+
+finish_ddcb:
+		copy_ddcb_results(req, req->num);
+		ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
+		queue->ddcbs_in_flight--;
+		queue->ddcb_req[req->num] = NULL; /* delete from array */
+		ddcb_mark_cleared(pddcb);
+
+		/* Move active DDCB further; Nothing to do here anymore. */
+
+		/*
+		 * We need to ensure that there is at least one free
+		 * DDCB in the queue. To do that, we must update
+		 * ddcb_act only if the COMPLETED bit is set for the
+		 * DDCB we are working on else we treat that DDCB even
+		 * if we PURGED it as occupied (hardware is supposed
+		 * to set the COMPLETED bit yet!).
+		 */
+		icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
+		if ((icrc_hsi_shi & DDCB_COMPLETED_BE32) &&
+		    (queue->ddcb_act == req->num)) {
+			queue->ddcb_act = ((queue->ddcb_act + 1) %
+					   queue->ddcb_max);
+		}
+go_home:
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 0;
+	}
+
+	/*
+	 * If the card is dead and the queue is forced to stop, we
+	 * might see this in the queue status register.
+	 */
+	queue_status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
+
+	dev_dbg(&pci_dev->dev, "UN/FINISHED DDCB#%d\n", req->num);
+	genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+	dev_err(&pci_dev->dev,
+		"[%s] err: DDCB#%d not purged and not completed after %d seconds QSTAT=%016llx!!\n",
+		__func__, req->num, GENWQE_DDCB_SOFTWARE_TIMEOUT,
+		queue_status);
+
+	print_ddcb_info(cd, req->queue);
+
+	return -EFAULT;
+}
+
+int genwqe_init_debug_data(struct genwqe_dev *cd, struct genwqe_debug_data *d)
+{
+	int len;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (d == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: invalid memory for debug data!\n",
+			__func__);
+		return -EFAULT;
+	}
+
+	len  = sizeof(d->driver_version);
+	snprintf(d->driver_version, len, "%s", DRV_VERSION);
+	d->slu_unitcfg = cd->slu_unitcfg;
+	d->app_unitcfg = cd->app_unitcfg;
+	return 0;
+}
+
+/**
+ * __genwqe_enqueue_ddcb() - Enqueue a DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @req:        pointer to DDCB execution request
+ * @f_flags:    file mode: blocking, non-blocking
+ *
+ * Return: 0 if enqueuing succeeded
+ *         -EIO if card is unusable/PCIe problems
+ *         -EBUSY if enqueuing failed
+ */
+int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req,
+			  unsigned int f_flags)
+{
+	struct ddcb *pddcb;
+	unsigned long flags;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u16 icrc;
+
+ retry:
+	if (cd->card_state != GENWQE_CARD_USED) {
+		printk_ratelimited(KERN_ERR
+			"%s %s: [%s] Card is unusable/PCIe problem Req#%d\n",
+			GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+			__func__, req->num);
+		return -EIO;
+	}
+
+	queue = req->queue = &cd->queue;
+
+	/* FIXME circumvention to improve performance when no irq is
+	 * there.
+	 */
+	if (GENWQE_POLLING_ENABLED)
+		genwqe_check_ddcb_queue(cd, queue);
+
+	/*
+	 * It must be ensured to process all DDCBs in successive
+	 * order. Use a lock here in order to prevent nested DDCB
+	 * enqueuing.
+	 */
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	pddcb = get_next_ddcb(cd, queue, &req->num);	/* get ptr and num */
+	if (pddcb == NULL) {
+		int rc;
+
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+		if (f_flags & O_NONBLOCK) {
+			queue->return_on_busy++;
+			return -EBUSY;
+		}
+
+		queue->wait_on_busy++;
+		rc = wait_event_interruptible(queue->busy_waitq,
+					      queue_free_ddcbs(queue) != 0);
+		dev_dbg(&pci_dev->dev, "[%s] waiting for free DDCB: rc=%d\n",
+			__func__, rc);
+		if (rc == -ERESTARTSYS)
+			return rc;  /* interrupted by a signal */
+
+		goto retry;
+	}
+
+	if (queue->ddcb_req[req->num] != NULL) {
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+		dev_err(&pci_dev->dev,
+			"[%s] picked DDCB %d with req=%p still in use!!\n",
+			__func__, req->num, req);
+		return -EFAULT;
+	}
+	ddcb_requ_set_state(req, GENWQE_REQU_ENQUEUED);
+	queue->ddcb_req[req->num] = req;
+
+	pddcb->cmdopts_16 = cpu_to_be16(req->cmd.cmdopts);
+	pddcb->cmd = req->cmd.cmd;
+	pddcb->acfunc = req->cmd.acfunc;	/* functional unit */
+
+	/*
+	 * We know that we can get retc 0x104 with CRC error, do not
+	 * stop the queue in those cases for this command. XDIR = 1
+	 * does not work for old SLU versions.
+	 *
+	 * Last bitstream with the old XDIR behavior had SLU_ID
+	 * 0x34199.
+	 */
+	if ((cd->slu_unitcfg & 0xFFFF0ull) > 0x34199ull)
+		pddcb->xdir = 0x1;
+	else
+		pddcb->xdir = 0x0;
+
+
+	pddcb->psp = (((req->cmd.asiv_length / 8) << 4) |
+		      ((req->cmd.asv_length  / 8)));
+	pddcb->disp_ts_64 = cpu_to_be64(req->cmd.disp_ts);
+
+	/*
+	 * If copying the whole DDCB_ASIV_LENGTH is impacting
+	 * performance we need to change it to
+	 * req->cmd.asiv_length. But simulation benefits from some
+	 * non-architectured bits behind the architectured content.
+	 *
+	 * How much data is copied depends on the availability of the
+	 * ATS field, which was introduced late. If the ATS field is
+	 * supported ASIV is 8 bytes shorter than it used to be. Since
+	 * the ATS field is copied too, the code should do exactly
+	 * what it did before, but I wanted to make copying of the ATS
+	 * field very explicit.
+	 */
+	if (genwqe_get_slu_id(cd) <= 0x2) {
+		memcpy(&pddcb->__asiv[0],	/* destination */
+		       &req->cmd.__asiv[0],	/* source */
+		       DDCB_ASIV_LENGTH);	/* req->cmd.asiv_length */
+	} else {
+		pddcb->n.ats_64 = cpu_to_be64(req->cmd.ats);
+		memcpy(&pddcb->n.asiv[0],	/* destination */
+			&req->cmd.asiv[0],	/* source */
+			DDCB_ASIV_LENGTH_ATS);	/* req->cmd.asiv_length */
+	}
+
+	pddcb->icrc_hsi_shi_32 = cpu_to_be32(0x00000000); /* for crc */
+
+	/*
+	 * Calculate CRC_16 for corresponding range PSP(7:4). Include
+	 * empty 4 bytes prior to the data.
+	 */
+	icrc = genwqe_crc16((const u8 *)pddcb,
+			   ICRC_LENGTH(req->cmd.asiv_length), 0xffff);
+	pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16);
+
+	/* enable DDCB completion irq */
+	if (!GENWQE_POLLING_ENABLED)
+		pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32;
+
+	dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num);
+	genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		/* use the kernel copy of debug data. copying back to
+		   user buffer happens later */
+
+		genwqe_init_debug_data(cd, &req->debug_data);
+		memcpy(&req->debug_data.ddcb_before, pddcb,
+		       sizeof(req->debug_data.ddcb_before));
+	}
+
+	enqueue_ddcb(cd, queue, pddcb, req->num);
+	queue->ddcbs_in_flight++;
+
+	if (queue->ddcbs_in_flight > queue->ddcbs_max_in_flight)
+		queue->ddcbs_max_in_flight = queue->ddcbs_in_flight;
+
+	ddcb_requ_set_state(req, GENWQE_REQU_TAPPED);
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	wake_up_interruptible(&cd->queue_waitq);
+
+	return 0;
+}
+
+/**
+ * __genwqe_execute_raw_ddcb() - Setup and execute DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @req:        user provided DDCB request
+ * @f_flags:    file mode: blocking, non-blocking
+ */
+int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
+			      struct genwqe_ddcb_cmd *cmd,
+			      unsigned int f_flags)
+{
+	int rc = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+	if (cmd->asiv_length > DDCB_ASIV_LENGTH) {
+		dev_err(&pci_dev->dev, "[%s] err: wrong asiv_length of %d\n",
+			__func__, cmd->asiv_length);
+		return -EINVAL;
+	}
+	if (cmd->asv_length > DDCB_ASV_LENGTH) {
+		dev_err(&pci_dev->dev, "[%s] err: wrong asv_length of %d\n",
+			__func__, cmd->asiv_length);
+		return -EINVAL;
+	}
+	rc = __genwqe_enqueue_ddcb(cd, req, f_flags);
+	if (rc != 0)
+		return rc;
+
+	rc = __genwqe_wait_ddcb(cd, req);
+	if (rc < 0)		/* error or signal interrupt */
+		goto err_exit;
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		if (copy_to_user((struct genwqe_debug_data __user *)
+				 (unsigned long)cmd->ddata_addr,
+				 &req->debug_data,
+				 sizeof(struct genwqe_debug_data)))
+			return -EFAULT;
+	}
+
+	/*
+	 * Higher values than 0x102 indicate completion with faults,
+	 * lower values than 0x102 indicate processing faults. Note
+	 * that DDCB might have been purged. E.g. Cntl+C.
+	 */
+	if (cmd->retc != DDCB_RETC_COMPLETE) {
+		/* This might happen e.g. flash read, and needs to be
+		   handled by the upper layer code. */
+		rc = -EBADMSG;	/* not processed/error retc */
+	}
+
+	return rc;
+
+ err_exit:
+	__genwqe_purge_ddcb(cd, req);
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		if (copy_to_user((struct genwqe_debug_data __user *)
+				 (unsigned long)cmd->ddata_addr,
+				 &req->debug_data,
+				 sizeof(struct genwqe_debug_data)))
+			return -EFAULT;
+	}
+	return rc;
+}
+
+/**
+ * genwqe_next_ddcb_ready() - Figure out if the next DDCB is already finished
+ *
+ * We use this as condition for our wait-queue code.
+ */
+static int genwqe_next_ddcb_ready(struct genwqe_dev *cd)
+{
+	unsigned long flags;
+	struct ddcb *pddcb;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	if (queue_empty(queue)) { /* emtpy queue */
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 0;
+	}
+
+	pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
+	if (pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) { /* ddcb ready */
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 1;
+	}
+
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	return 0;
+}
+
+/**
+ * genwqe_ddcbs_in_flight() - Check how many DDCBs are in flight
+ *
+ * Keep track on the number of DDCBs which ware currently in the
+ * queue. This is needed for statistics as well as conditon if we want
+ * to wait or better do polling in case of no interrupts available.
+ */
+int genwqe_ddcbs_in_flight(struct genwqe_dev *cd)
+{
+	unsigned long flags;
+	int ddcbs_in_flight = 0;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+	ddcbs_in_flight += queue->ddcbs_in_flight;
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+	return ddcbs_in_flight;
+}
+
+static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	int rc, i;
+	struct ddcb *pddcb;
+	u64 val64;
+	unsigned int queue_size;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (GENWQE_DDCB_MAX < 2)
+		return -EINVAL;
+
+	queue_size = roundup(GENWQE_DDCB_MAX * sizeof(struct ddcb), PAGE_SIZE);
+
+	queue->ddcbs_in_flight = 0;  /* statistics */
+	queue->ddcbs_max_in_flight = 0;
+	queue->ddcbs_completed = 0;
+	queue->return_on_busy = 0;
+	queue->wait_on_busy = 0;
+
+	queue->ddcb_seq	  = 0x100; /* start sequence number */
+	queue->ddcb_max	  = GENWQE_DDCB_MAX;
+	queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size,
+						&queue->ddcb_daddr);
+	if (queue->ddcb_vaddr == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] **err: could not allocate DDCB **\n", __func__);
+		return -ENOMEM;
+	}
+	queue->ddcb_req = kcalloc(queue->ddcb_max, sizeof(struct ddcb_requ *),
+				  GFP_KERNEL);
+	if (!queue->ddcb_req) {
+		rc = -ENOMEM;
+		goto free_ddcbs;
+	}
+
+	queue->ddcb_waitqs = kcalloc(queue->ddcb_max,
+				     sizeof(wait_queue_head_t),
+				     GFP_KERNEL);
+	if (!queue->ddcb_waitqs) {
+		rc = -ENOMEM;
+		goto free_requs;
+	}
+
+	for (i = 0; i < queue->ddcb_max; i++) {
+		pddcb = &queue->ddcb_vaddr[i];		     /* DDCBs */
+		pddcb->icrc_hsi_shi_32 = DDCB_COMPLETED_BE32;
+		pddcb->retc_16 = cpu_to_be16(0xfff);
+
+		queue->ddcb_req[i] = NULL;		     /* requests */
+		init_waitqueue_head(&queue->ddcb_waitqs[i]); /* waitqueues */
+	}
+
+	queue->ddcb_act  = 0;
+	queue->ddcb_next = 0;	/* queue is empty */
+
+	spin_lock_init(&queue->ddcb_lock);
+	init_waitqueue_head(&queue->busy_waitq);
+
+	val64 = ((u64)(queue->ddcb_max - 1) <<  8); /* lastptr */
+	__genwqe_writeq(cd, queue->IO_QUEUE_CONFIG,  0x07);  /* iCRC/vCRC */
+	__genwqe_writeq(cd, queue->IO_QUEUE_SEGMENT, queue->ddcb_daddr);
+	__genwqe_writeq(cd, queue->IO_QUEUE_INITSQN, queue->ddcb_seq);
+	__genwqe_writeq(cd, queue->IO_QUEUE_WRAP,    val64);
+	return 0;
+
+ free_requs:
+	kfree(queue->ddcb_req);
+	queue->ddcb_req = NULL;
+ free_ddcbs:
+	__genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
+				queue->ddcb_daddr);
+	queue->ddcb_vaddr = NULL;
+	queue->ddcb_daddr = 0ull;
+	return -ENODEV;
+
+}
+
+static int ddcb_queue_initialized(struct ddcb_queue *queue)
+{
+	return queue->ddcb_vaddr != NULL;
+}
+
+static void free_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	unsigned int queue_size;
+
+	queue_size = roundup(queue->ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
+
+	kfree(queue->ddcb_req);
+	queue->ddcb_req = NULL;
+
+	if (queue->ddcb_vaddr) {
+		__genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
+					queue->ddcb_daddr);
+		queue->ddcb_vaddr = NULL;
+		queue->ddcb_daddr = 0ull;
+	}
+}
+
+static irqreturn_t genwqe_pf_isr(int irq, void *dev_id)
+{
+	u64 gfir;
+	struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/*
+	 * In case of fatal FIR error the queue is stopped, such that
+	 * we can safely check it without risking anything.
+	 */
+	cd->irqs_processed++;
+	wake_up_interruptible(&cd->queue_waitq);
+
+	/*
+	 * Checking for errors before kicking the queue might be
+	 * safer, but slower for the good-case ... See above.
+	 */
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if (((gfir & GFIR_ERR_TRIGGER) != 0x0) &&
+	    !pci_channel_offline(pci_dev)) {
+
+		if (cd->use_platform_recovery) {
+			/*
+			 * Since we use raw accessors, EEH errors won't be
+			 * detected by the platform until we do a non-raw
+			 * MMIO or config space read
+			 */
+			readq(cd->mmio + IO_SLC_CFGREG_GFIR);
+
+			/* Don't do anything if the PCI channel is frozen */
+			if (pci_channel_offline(pci_dev))
+				goto exit;
+		}
+
+		wake_up_interruptible(&cd->health_waitq);
+
+		/*
+		 * By default GFIRs causes recovery actions. This
+		 * count is just for debug when recovery is masked.
+		 */
+		dev_err_ratelimited(&pci_dev->dev,
+				    "[%s] GFIR=%016llx\n",
+				    __func__, gfir);
+	}
+
+ exit:
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t genwqe_vf_isr(int irq, void *dev_id)
+{
+	struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
+
+	cd->irqs_processed++;
+	wake_up_interruptible(&cd->queue_waitq);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * genwqe_card_thread() - Work thread for the DDCB queue
+ *
+ * The idea is to check if there are DDCBs in processing. If there are
+ * some finished DDCBs, we process them and wakeup the
+ * requestors. Otherwise we give other processes time using
+ * cond_resched().
+ */
+static int genwqe_card_thread(void *data)
+{
+	int should_stop = 0, rc = 0;
+	struct genwqe_dev *cd = (struct genwqe_dev *)data;
+
+	while (!kthread_should_stop()) {
+
+		genwqe_check_ddcb_queue(cd, &cd->queue);
+
+		if (GENWQE_POLLING_ENABLED) {
+			rc = wait_event_interruptible_timeout(
+				cd->queue_waitq,
+				genwqe_ddcbs_in_flight(cd) ||
+				(should_stop = kthread_should_stop()), 1);
+		} else {
+			rc = wait_event_interruptible_timeout(
+				cd->queue_waitq,
+				genwqe_next_ddcb_ready(cd) ||
+				(should_stop = kthread_should_stop()), HZ);
+		}
+		if (should_stop)
+			break;
+
+		/*
+		 * Avoid soft lockups on heavy loads; we do not want
+		 * to disable our interrupts.
+		 */
+		cond_resched();
+	}
+	return 0;
+}
+
+/**
+ * genwqe_setup_service_layer() - Setup DDCB queue
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * Allocate DDCBs. Configure Service Layer Controller (SLC).
+ *
+ * Return: 0 success
+ */
+int genwqe_setup_service_layer(struct genwqe_dev *cd)
+{
+	int rc;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (genwqe_is_privileged(cd)) {
+		rc = genwqe_card_reset(cd);
+		if (rc < 0) {
+			dev_err(&pci_dev->dev,
+				"[%s] err: reset failed.\n", __func__);
+			return rc;
+		}
+		genwqe_read_softreset(cd);
+	}
+
+	queue = &cd->queue;
+	queue->IO_QUEUE_CONFIG  = IO_SLC_QUEUE_CONFIG;
+	queue->IO_QUEUE_STATUS  = IO_SLC_QUEUE_STATUS;
+	queue->IO_QUEUE_SEGMENT = IO_SLC_QUEUE_SEGMENT;
+	queue->IO_QUEUE_INITSQN = IO_SLC_QUEUE_INITSQN;
+	queue->IO_QUEUE_OFFSET  = IO_SLC_QUEUE_OFFSET;
+	queue->IO_QUEUE_WRAP    = IO_SLC_QUEUE_WRAP;
+	queue->IO_QUEUE_WTIME   = IO_SLC_QUEUE_WTIME;
+	queue->IO_QUEUE_ERRCNTS = IO_SLC_QUEUE_ERRCNTS;
+	queue->IO_QUEUE_LRW     = IO_SLC_QUEUE_LRW;
+
+	rc = setup_ddcb_queue(cd, queue);
+	if (rc != 0) {
+		rc = -ENODEV;
+		goto err_out;
+	}
+
+	init_waitqueue_head(&cd->queue_waitq);
+	cd->card_thread = kthread_run(genwqe_card_thread, cd,
+				      GENWQE_DEVNAME "%d_thread",
+				      cd->card_idx);
+	if (IS_ERR(cd->card_thread)) {
+		rc = PTR_ERR(cd->card_thread);
+		cd->card_thread = NULL;
+		goto stop_free_queue;
+	}
+
+	rc = genwqe_set_interrupt_capability(cd, GENWQE_MSI_IRQS);
+	if (rc)
+		goto stop_kthread;
+
+	/*
+	 * We must have all wait-queues initialized when we enable the
+	 * interrupts. Otherwise we might crash if we get an early
+	 * irq.
+	 */
+	init_waitqueue_head(&cd->health_waitq);
+
+	if (genwqe_is_privileged(cd)) {
+		rc = request_irq(pci_dev->irq, genwqe_pf_isr, IRQF_SHARED,
+				 GENWQE_DEVNAME, cd);
+	} else {
+		rc = request_irq(pci_dev->irq, genwqe_vf_isr, IRQF_SHARED,
+				 GENWQE_DEVNAME, cd);
+	}
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "irq %d not free.\n", pci_dev->irq);
+		goto stop_irq_cap;
+	}
+
+	cd->card_state = GENWQE_CARD_USED;
+	return 0;
+
+ stop_irq_cap:
+	genwqe_reset_interrupt_capability(cd);
+ stop_kthread:
+	kthread_stop(cd->card_thread);
+	cd->card_thread = NULL;
+ stop_free_queue:
+	free_ddcb_queue(cd, queue);
+ err_out:
+	return rc;
+}
+
+/**
+ * queue_wake_up_all() - Handles fatal error case
+ *
+ * The PCI device got unusable and we have to stop all pending
+ * requests as fast as we can. The code after this must purge the
+ * DDCBs in question and ensure that all mappings are freed.
+ */
+static int queue_wake_up_all(struct genwqe_dev *cd)
+{
+	unsigned int i;
+	unsigned long flags;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	for (i = 0; i < queue->ddcb_max; i++)
+		wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
+
+	wake_up_interruptible(&queue->busy_waitq);
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+	return 0;
+}
+
+/**
+ * genwqe_finish_queue() - Remove any genwqe devices and user-interfaces
+ *
+ * Relies on the pre-condition that there are no users of the card
+ * device anymore e.g. with open file-descriptors.
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_finish_queue(struct genwqe_dev *cd)
+{
+	int i, rc = 0, in_flight;
+	int waitmax = GENWQE_DDCB_SOFTWARE_TIMEOUT;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct ddcb_queue *queue = &cd->queue;
+
+	if (!ddcb_queue_initialized(queue))
+		return 0;
+
+	/* Do not wipe out the error state. */
+	if (cd->card_state == GENWQE_CARD_USED)
+		cd->card_state = GENWQE_CARD_UNUSED;
+
+	/* Wake up all requests in the DDCB queue such that they
+	   should be removed nicely. */
+	queue_wake_up_all(cd);
+
+	/* We must wait to get rid of the DDCBs in flight */
+	for (i = 0; i < waitmax; i++) {
+		in_flight = genwqe_ddcbs_in_flight(cd);
+
+		if (in_flight == 0)
+			break;
+
+		dev_dbg(&pci_dev->dev,
+			"  DEBUG [%d/%d] waiting for queue to get empty: %d requests!\n",
+			i, waitmax, in_flight);
+
+		/*
+		 * Severe severe error situation: The card itself has
+		 * 16 DDCB queues, each queue has e.g. 32 entries,
+		 * each DDBC has a hardware timeout of currently 250
+		 * msec but the PFs have a hardware timeout of 8 sec
+		 * ... so I take something large.
+		 */
+		msleep(1000);
+	}
+	if (i == waitmax) {
+		dev_err(&pci_dev->dev, "  [%s] err: queue is not empty!!\n",
+			__func__);
+		rc = -EIO;
+	}
+	return rc;
+}
+
+/**
+ * genwqe_release_service_layer() - Shutdown DDCB queue
+ * @cd:       genwqe device descriptor
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_release_service_layer(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!ddcb_queue_initialized(&cd->queue))
+		return 1;
+
+	free_irq(pci_dev->irq, cd);
+	genwqe_reset_interrupt_capability(cd);
+
+	if (cd->card_thread != NULL) {
+		kthread_stop(cd->card_thread);
+		cd->card_thread = NULL;
+	}
+
+	free_ddcb_queue(cd, &cd->queue);
+	return 0;
+}
diff --git a/drivers/misc/genwqe/card_ddcb.h b/drivers/misc/genwqe/card_ddcb.h
new file mode 100644
index 000000000..0361a68d7
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.h
@@ -0,0 +1,188 @@
+#ifndef __CARD_DDCB_H__
+#define __CARD_DDCB_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#include "genwqe_driver.h"
+#include "card_base.h"
+
+/**
+ * struct ddcb - Device Driver Control Block DDCB
+ * @hsi:        Hardware software interlock
+ * @shi:        Software hardware interlock. Hsi and shi are used to interlock
+ *              software and hardware activities. We are using a compare and
+ *              swap operation to ensure that there are no races when
+ *              activating new DDCBs on the queue, or when we need to
+ *              purge a DDCB from a running queue.
+ * @acfunc:     Accelerator function addresses a unit within the chip
+ * @cmd:        Command to work on
+ * @cmdopts_16: Options for the command
+ * @asiv:       Input data
+ * @asv:        Output data
+ *
+ * The DDCB data format is big endian. Multiple consequtive DDBCs form
+ * a DDCB queue.
+ */
+#define ASIV_LENGTH		104 /* Old specification without ATS field */
+#define ASIV_LENGTH_ATS		96  /* New specification with ATS field */
+#define ASV_LENGTH		64
+
+struct ddcb {
+	union {
+		__be32 icrc_hsi_shi_32;	/* iCRC, Hardware/SW interlock */
+		struct {
+			__be16	icrc_16;
+			u8	hsi;
+			u8	shi;
+		};
+	};
+	u8  pre;		/* Preamble */
+	u8  xdir;		/* Execution Directives */
+	__be16 seqnum_16;	/* Sequence Number */
+
+	u8  acfunc;		/* Accelerator Function.. */
+	u8  cmd;		/* Command. */
+	__be16 cmdopts_16;	/* Command Options */
+	u8  sur;		/* Status Update Rate */
+	u8  psp;		/* Protection Section Pointer */
+	__be16 rsvd_0e_16;	/* Reserved invariant */
+
+	__be64 fwiv_64;		/* Firmware Invariant. */
+
+	union {
+		struct {
+			__be64 ats_64;  /* Address Translation Spec */
+			u8     asiv[ASIV_LENGTH_ATS]; /* New ASIV */
+		} n;
+		u8  __asiv[ASIV_LENGTH];	/* obsolete */
+	};
+	u8     asv[ASV_LENGTH];	/* Appl Spec Variant */
+
+	__be16 rsvd_c0_16;	/* Reserved Variant */
+	__be16 vcrc_16;		/* Variant CRC */
+	__be32 rsvd_32;		/* Reserved unprotected */
+
+	__be64 deque_ts_64;	/* Deque Time Stamp. */
+
+	__be16 retc_16;		/* Return Code */
+	__be16 attn_16;		/* Attention/Extended Error Codes */
+	__be32 progress_32;	/* Progress indicator. */
+
+	__be64 cmplt_ts_64;	/* Completion Time Stamp. */
+
+	/* The following layout matches the new service layer format */
+	__be32 ibdc_32;		/* Inbound Data Count  (* 256) */
+	__be32 obdc_32;		/* Outbound Data Count (* 256) */
+
+	__be64 rsvd_SLH_64;	/* Reserved for hardware */
+	union {			/* private data for driver */
+		u8	priv[8];
+		__be64	priv_64;
+	};
+	__be64 disp_ts_64;	/* Dispatch TimeStamp */
+} __attribute__((__packed__));
+
+/* CRC polynomials for DDCB */
+#define CRC16_POLYNOMIAL	0x1021
+
+/*
+ * SHI: Software to Hardware Interlock
+ *   This 1 byte field is written by software to interlock the
+ *   movement of one queue entry to another with the hardware in the
+ *   chip.
+ */
+#define DDCB_SHI_INTR		0x04 /* Bit 2 */
+#define DDCB_SHI_PURGE		0x02 /* Bit 1 */
+#define DDCB_SHI_NEXT		0x01 /* Bit 0 */
+
+/*
+ * HSI: Hardware to Software interlock
+ * This 1 byte field is written by hardware to interlock the movement
+ * of one queue entry to another with the software in the chip.
+ */
+#define DDCB_HSI_COMPLETED	0x40 /* Bit 6 */
+#define DDCB_HSI_FETCHED	0x04 /* Bit 2 */
+
+/*
+ * Accessing HSI/SHI is done 32-bit wide
+ *   Normally 16-bit access would work too, but on some platforms the
+ *   16 compare and swap operation is not supported. Therefore
+ *   switching to 32-bit such that those platforms will work too.
+ *
+ *                                         iCRC HSI/SHI
+ */
+#define DDCB_INTR_BE32		cpu_to_be32(0x00000004)
+#define DDCB_PURGE_BE32		cpu_to_be32(0x00000002)
+#define DDCB_NEXT_BE32		cpu_to_be32(0x00000001)
+#define DDCB_COMPLETED_BE32	cpu_to_be32(0x00004000)
+#define DDCB_FETCHED_BE32	cpu_to_be32(0x00000400)
+
+/* Definitions of DDCB presets */
+#define DDCB_PRESET_PRE		0x80
+#define ICRC_LENGTH(n)		((n) + 8 + 8 + 8)  /* used ASIV + hdr fields */
+#define VCRC_LENGTH(n)		((n))		   /* used ASV */
+
+/*
+ * Genwqe Scatter Gather list
+ *   Each element has up to 8 entries.
+ *   The chaining element is element 0 cause of prefetching needs.
+ */
+
+/*
+ * 0b0110 Chained descriptor. The descriptor is describing the next
+ * descriptor list.
+ */
+#define SG_CHAINED		(0x6)
+
+/*
+ * 0b0010 First entry of a descriptor list. Start from a Buffer-Empty
+ * condition.
+ */
+#define SG_DATA			(0x2)
+
+/*
+ * 0b0000 Early terminator. This is the last entry on the list
+ * irregardless of the length indicated.
+ */
+#define SG_END_LIST		(0x0)
+
+/**
+ * struct sglist - Scatter gather list
+ * @target_addr:       Either a dma addr of memory to work on or a
+ *                     dma addr or a subsequent sglist block.
+ * @len:               Length of the data block.
+ * @flags:             See above.
+ *
+ * Depending on the command the GenWQE card can use a scatter gather
+ * list to describe the memory it works on. Always 8 sg_entry's form
+ * a block.
+ */
+struct sg_entry {
+	__be64 target_addr;
+	__be32 len;
+	__be32 flags;
+};
+
+#endif /* __CARD_DDCB_H__ */
diff --git a/drivers/misc/genwqe/card_debugfs.c b/drivers/misc/genwqe/card_debugfs.c
new file mode 100644
index 000000000..c6b82f09b
--- /dev/null
+++ b/drivers/misc/genwqe/card_debugfs.c
@@ -0,0 +1,504 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Debugfs interfaces for the GenWQE card. Help to debug potential
+ * problems. Dump internal chip state for debugging and failure
+ * determination.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+#define GENWQE_DEBUGFS_RO(_name, _showfn)				\
+	static int genwqe_debugfs_##_name##_open(struct inode *inode,	\
+						 struct file *file)	\
+	{								\
+		return single_open(file, _showfn, inode->i_private);	\
+	}								\
+	static const struct file_operations genwqe_##_name##_fops = {	\
+		.open = genwqe_debugfs_##_name##_open,			\
+		.read = seq_read,					\
+		.llseek = seq_lseek,					\
+		.release = single_release,				\
+	}
+
+static void dbg_uidn_show(struct seq_file *s, struct genwqe_reg *regs,
+			  int entries)
+{
+	unsigned int i;
+	u32 v_hi, v_lo;
+
+	for (i = 0; i < entries; i++) {
+		v_hi = (regs[i].val >> 32) & 0xffffffff;
+		v_lo = (regs[i].val)       & 0xffffffff;
+
+		seq_printf(s, "  0x%08x 0x%08x 0x%08x 0x%08x EXT_ERR_REC\n",
+			   regs[i].addr, regs[i].idx, v_hi, v_lo);
+	}
+}
+
+static int curr_dbg_uidn_show(struct seq_file *s, void *unused, int uid)
+{
+	struct genwqe_dev *cd = s->private;
+	int entries;
+	struct genwqe_reg *regs;
+
+	entries = genwqe_ffdc_buff_size(cd, uid);
+	if (entries < 0)
+		return -EINVAL;
+
+	if (entries == 0)
+		return 0;
+
+	regs = kcalloc(entries, sizeof(*regs), GFP_KERNEL);
+	if (regs == NULL)
+		return -ENOMEM;
+
+	genwqe_stop_traps(cd); /* halt the traps while dumping data */
+	genwqe_ffdc_buff_read(cd, uid, regs, entries);
+	genwqe_start_traps(cd);
+
+	dbg_uidn_show(s, regs, entries);
+	kfree(regs);
+	return 0;
+}
+
+static int genwqe_curr_dbg_uid0_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 0);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid0, genwqe_curr_dbg_uid0_show);
+
+static int genwqe_curr_dbg_uid1_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 1);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid1, genwqe_curr_dbg_uid1_show);
+
+static int genwqe_curr_dbg_uid2_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 2);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid2, genwqe_curr_dbg_uid2_show);
+
+static int prev_dbg_uidn_show(struct seq_file *s, void *unused, int uid)
+{
+	struct genwqe_dev *cd = s->private;
+
+	dbg_uidn_show(s, cd->ffdc[uid].regs,  cd->ffdc[uid].entries);
+	return 0;
+}
+
+static int genwqe_prev_dbg_uid0_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 0);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid0, genwqe_prev_dbg_uid0_show);
+
+static int genwqe_prev_dbg_uid1_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 1);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid1, genwqe_prev_dbg_uid1_show);
+
+static int genwqe_prev_dbg_uid2_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 2);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid2, genwqe_prev_dbg_uid2_show);
+
+static int genwqe_curr_regs_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct genwqe_reg *regs;
+
+	regs = kcalloc(GENWQE_FFDC_REGS, sizeof(*regs), GFP_KERNEL);
+	if (regs == NULL)
+		return -ENOMEM;
+
+	genwqe_stop_traps(cd);
+	genwqe_read_ffdc_regs(cd, regs, GENWQE_FFDC_REGS, 1);
+	genwqe_start_traps(cd);
+
+	for (i = 0; i < GENWQE_FFDC_REGS; i++) {
+		if (regs[i].addr == 0xffffffff)
+			break;  /* invalid entries */
+
+		if (regs[i].val == 0x0ull)
+			continue;  /* do not print 0x0 FIRs */
+
+		seq_printf(s, "  0x%08x 0x%016llx\n",
+			   regs[i].addr, regs[i].val);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(curr_regs, genwqe_curr_regs_show);
+
+static int genwqe_prev_regs_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct genwqe_reg *regs = cd->ffdc[GENWQE_DBG_REGS].regs;
+
+	if (regs == NULL)
+		return -EINVAL;
+
+	for (i = 0; i < GENWQE_FFDC_REGS; i++) {
+		if (regs[i].addr == 0xffffffff)
+			break;  /* invalid entries */
+
+		if (regs[i].val == 0x0ull)
+			continue;  /* do not print 0x0 FIRs */
+
+		seq_printf(s, "  0x%08x 0x%016llx\n",
+			   regs[i].addr, regs[i].val);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(prev_regs, genwqe_prev_regs_show);
+
+static int genwqe_jtimer_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int vf_num;
+	u64 jtimer;
+
+	jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 0);
+	seq_printf(s, "  PF   0x%016llx %d msec\n", jtimer,
+		   GENWQE_PF_JOBTIMEOUT_MSEC);
+
+	for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) {
+		jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+					  vf_num + 1);
+		seq_printf(s, "  VF%-2d 0x%016llx %d msec\n", vf_num, jtimer,
+			   cd->vf_jobtimeout_msec[vf_num]);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(jtimer, genwqe_jtimer_show);
+
+static int genwqe_queue_working_time_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int vf_num;
+	u64 t;
+
+	t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, 0);
+	seq_printf(s, "  PF   0x%016llx\n", t);
+
+	for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) {
+		t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, vf_num + 1);
+		seq_printf(s, "  VF%-2d 0x%016llx\n", vf_num, t);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(queue_working_time, genwqe_queue_working_time_show);
+
+static int genwqe_ddcb_info_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct ddcb_queue *queue;
+	struct ddcb *pddcb;
+
+	queue = &cd->queue;
+	seq_puts(s, "DDCB QUEUE:\n");
+	seq_printf(s, "  ddcb_max:            %d\n"
+		   "  ddcb_daddr:          %016llx - %016llx\n"
+		   "  ddcb_vaddr:          %016llx\n"
+		   "  ddcbs_in_flight:     %u\n"
+		   "  ddcbs_max_in_flight: %u\n"
+		   "  ddcbs_completed:     %u\n"
+		   "  return_on_busy:      %u\n"
+		   "  wait_on_busy:        %u\n"
+		   "  irqs_processed:      %u\n",
+		   queue->ddcb_max, (long long)queue->ddcb_daddr,
+		   (long long)queue->ddcb_daddr +
+		   (queue->ddcb_max * DDCB_LENGTH),
+		   (long long)queue->ddcb_vaddr, queue->ddcbs_in_flight,
+		   queue->ddcbs_max_in_flight, queue->ddcbs_completed,
+		   queue->return_on_busy, queue->wait_on_busy,
+		   cd->irqs_processed);
+
+	/* Hardware State */
+	seq_printf(s, "  0x%08x 0x%016llx IO_QUEUE_CONFIG\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_STATUS\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_SEGMENT\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_INITSQN\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_WRAP\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_OFFSET\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_WTIME\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_ERRCNTS\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_LRW\n",
+		   queue->IO_QUEUE_CONFIG,
+		   __genwqe_readq(cd, queue->IO_QUEUE_CONFIG),
+		   queue->IO_QUEUE_STATUS,
+		   __genwqe_readq(cd, queue->IO_QUEUE_STATUS),
+		   queue->IO_QUEUE_SEGMENT,
+		   __genwqe_readq(cd, queue->IO_QUEUE_SEGMENT),
+		   queue->IO_QUEUE_INITSQN,
+		   __genwqe_readq(cd, queue->IO_QUEUE_INITSQN),
+		   queue->IO_QUEUE_WRAP,
+		   __genwqe_readq(cd, queue->IO_QUEUE_WRAP),
+		   queue->IO_QUEUE_OFFSET,
+		   __genwqe_readq(cd, queue->IO_QUEUE_OFFSET),
+		   queue->IO_QUEUE_WTIME,
+		   __genwqe_readq(cd, queue->IO_QUEUE_WTIME),
+		   queue->IO_QUEUE_ERRCNTS,
+		   __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS),
+		   queue->IO_QUEUE_LRW,
+		   __genwqe_readq(cd, queue->IO_QUEUE_LRW));
+
+	seq_printf(s, "DDCB list (ddcb_act=%d/ddcb_next=%d):\n",
+		   queue->ddcb_act, queue->ddcb_next);
+
+	pddcb = queue->ddcb_vaddr;
+	for (i = 0; i < queue->ddcb_max; i++) {
+		seq_printf(s, "  %-3d: RETC=%03x SEQ=%04x HSI/SHI=%02x/%02x ",
+			   i, be16_to_cpu(pddcb->retc_16),
+			   be16_to_cpu(pddcb->seqnum_16),
+			   pddcb->hsi, pddcb->shi);
+		seq_printf(s, "PRIV=%06llx CMD=%02x\n",
+			   be64_to_cpu(pddcb->priv_64), pddcb->cmd);
+		pddcb++;
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(ddcb_info, genwqe_ddcb_info_show);
+
+static int genwqe_info_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	u64 app_id, slu_id, bitstream = -1;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	app_id = __genwqe_readq(cd, IO_APP_UNITCFG);
+
+	if (genwqe_is_privileged(cd))
+		bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM);
+
+	seq_printf(s, "%s driver version: %s\n"
+		   "    Device Name/Type: %s %s CardIdx: %d\n"
+		   "    SLU/APP Config  : 0x%016llx/0x%016llx\n"
+		   "    Build Date      : %u/%x/%u\n"
+		   "    Base Clock      : %u MHz\n"
+		   "    Arch/SVN Release: %u/%llx\n"
+		   "    Bitstream       : %llx\n",
+		   GENWQE_DEVNAME, DRV_VERSION, dev_name(&pci_dev->dev),
+		   genwqe_is_privileged(cd) ?
+		   "Physical" : "Virtual or no SR-IOV",
+		   cd->card_idx, slu_id, app_id,
+		   (u16)((slu_id >> 12) & 0x0fLLU),	   /* month */
+		   (u16)((slu_id >>  4) & 0xffLLU),	   /* day */
+		   (u16)((slu_id >> 16) & 0x0fLLU) + 2010, /* year */
+		   genwqe_base_clock_frequency(cd),
+		   (u16)((slu_id >> 32) & 0xffLLU), slu_id >> 40,
+		   bitstream);
+
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(info, genwqe_info_show);
+
+int genwqe_init_debugfs(struct genwqe_dev *cd)
+{
+	struct dentry *root;
+	struct dentry *file;
+	int ret;
+	char card_name[64];
+	char name[64];
+	unsigned int i;
+
+	sprintf(card_name, "%s%d_card", GENWQE_DEVNAME, cd->card_idx);
+
+	root = debugfs_create_dir(card_name, cd->debugfs_genwqe);
+	if (!root) {
+		ret = -ENOMEM;
+		goto err0;
+	}
+
+	/* non privileged interfaces are done here */
+	file = debugfs_create_file("ddcb_info", S_IRUGO, root, cd,
+				   &genwqe_ddcb_info_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("info", S_IRUGO, root, cd,
+				   &genwqe_info_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_x64("err_inject", 0666, root, &cd->err_inject);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_u32("ddcb_software_timeout", 0666, root,
+				  &cd->ddcb_software_timeout);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_u32("kill_timeout", 0666, root,
+				  &cd->kill_timeout);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	/* privileged interfaces follow here */
+	if (!genwqe_is_privileged(cd)) {
+		cd->debugfs_root = root;
+		return 0;
+	}
+
+	file = debugfs_create_file("curr_regs", S_IRUGO, root, cd,
+				   &genwqe_curr_regs_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("curr_dbg_uid0", S_IRUGO, root, cd,
+				   &genwqe_curr_dbg_uid0_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("curr_dbg_uid1", S_IRUGO, root, cd,
+				   &genwqe_curr_dbg_uid1_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("curr_dbg_uid2", S_IRUGO, root, cd,
+				   &genwqe_curr_dbg_uid2_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_regs", S_IRUGO, root, cd,
+				   &genwqe_prev_regs_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_dbg_uid0", S_IRUGO, root, cd,
+				   &genwqe_prev_dbg_uid0_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_dbg_uid1", S_IRUGO, root, cd,
+				   &genwqe_prev_dbg_uid1_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_dbg_uid2", S_IRUGO, root, cd,
+				   &genwqe_prev_dbg_uid2_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	for (i = 0; i <  GENWQE_MAX_VFS; i++) {
+		sprintf(name, "vf%u_jobtimeout_msec", i);
+
+		file = debugfs_create_u32(name, 0666, root,
+					  &cd->vf_jobtimeout_msec[i]);
+		if (!file) {
+			ret = -ENOMEM;
+			goto err1;
+		}
+	}
+
+	file = debugfs_create_file("jobtimer", S_IRUGO, root, cd,
+				   &genwqe_jtimer_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("queue_working_time", S_IRUGO, root, cd,
+				   &genwqe_queue_working_time_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_u32("skip_recovery", 0666, root,
+				  &cd->skip_recovery);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_u32("use_platform_recovery", 0666, root,
+				  &cd->use_platform_recovery);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	cd->debugfs_root = root;
+	return 0;
+err1:
+	debugfs_remove_recursive(root);
+err0:
+	return ret;
+}
+
+void genqwe_exit_debugfs(struct genwqe_dev *cd)
+{
+	debugfs_remove_recursive(cd->debugfs_root);
+}
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
new file mode 100644
index 000000000..d2098b4d2
--- /dev/null
+++ b/drivers/misc/genwqe/card_dev.c
@@ -0,0 +1,1412 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Character device representation of the GenWQE device. This allows
+ * user-space applications to communicate with the card.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/sched/signal.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/atomic.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static int genwqe_open_files(struct genwqe_dev *cd)
+{
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	rc = list_empty(&cd->file_list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return !rc;
+}
+
+static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
+{
+	unsigned long flags;
+
+	cfile->opener = get_pid(task_tgid(current));
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_add(&cfile->list, &cd->file_list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+}
+
+static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_del(&cfile->list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	put_pid(cfile->opener);
+
+	return 0;
+}
+
+static void genwqe_add_pin(struct genwqe_file *cfile, struct dma_mapping *m)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+	list_add(&m->pin_list, &cfile->pin_list);
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+}
+
+static int genwqe_del_pin(struct genwqe_file *cfile, struct dma_mapping *m)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+	list_del(&m->pin_list);
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+
+	return 0;
+}
+
+/**
+ * genwqe_search_pin() - Search for the mapping for a userspace address
+ * @cfile:	Descriptor of opened file
+ * @u_addr:	User virtual address
+ * @size:	Size of buffer
+ * @dma_addr:	DMA address to be updated
+ *
+ * Return: Pointer to the corresponding mapping	NULL if not found
+ */
+static struct dma_mapping *genwqe_search_pin(struct genwqe_file *cfile,
+					    unsigned long u_addr,
+					    unsigned int size,
+					    void **virt_addr)
+{
+	unsigned long flags;
+	struct dma_mapping *m;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+
+	list_for_each_entry(m, &cfile->pin_list, pin_list) {
+		if ((((u64)m->u_vaddr) <= (u_addr)) &&
+		    (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
+
+			if (virt_addr)
+				*virt_addr = m->k_vaddr +
+					(u_addr - (u64)m->u_vaddr);
+
+			spin_unlock_irqrestore(&cfile->pin_lock, flags);
+			return m;
+		}
+	}
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+	return NULL;
+}
+
+static void __genwqe_add_mapping(struct genwqe_file *cfile,
+			      struct dma_mapping *dma_map)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_add(&dma_map->card_list, &cfile->map_list);
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+}
+
+static void __genwqe_del_mapping(struct genwqe_file *cfile,
+			      struct dma_mapping *dma_map)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_del(&dma_map->card_list);
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+}
+
+
+/**
+ * __genwqe_search_mapping() - Search for the mapping for a userspace address
+ * @cfile:	descriptor of opened file
+ * @u_addr:	user virtual address
+ * @size:	size of buffer
+ * @dma_addr:	DMA address to be updated
+ * Return: Pointer to the corresponding mapping	NULL if not found
+ */
+static struct dma_mapping *__genwqe_search_mapping(struct genwqe_file *cfile,
+						   unsigned long u_addr,
+						   unsigned int size,
+						   dma_addr_t *dma_addr,
+						   void **virt_addr)
+{
+	unsigned long flags;
+	struct dma_mapping *m;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_for_each_entry(m, &cfile->map_list, card_list) {
+
+		if ((((u64)m->u_vaddr) <= (u_addr)) &&
+		    (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
+
+			/* match found: current is as expected and
+			   addr is in range */
+			if (dma_addr)
+				*dma_addr = m->dma_addr +
+					(u_addr - (u64)m->u_vaddr);
+
+			if (virt_addr)
+				*virt_addr = m->k_vaddr +
+					(u_addr - (u64)m->u_vaddr);
+
+			spin_unlock_irqrestore(&cfile->map_lock, flags);
+			return m;
+		}
+	}
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+
+	dev_err(&pci_dev->dev,
+		"[%s] Entry not found: u_addr=%lx, size=%x\n",
+		__func__, u_addr, size);
+
+	return NULL;
+}
+
+static void genwqe_remove_mappings(struct genwqe_file *cfile)
+{
+	int i = 0;
+	struct list_head *node, *next;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+
+	list_for_each_safe(node, next, &cfile->map_list) {
+		dma_map = list_entry(node, struct dma_mapping, card_list);
+
+		list_del_init(&dma_map->card_list);
+
+		/*
+		 * This is really a bug, because those things should
+		 * have been already tidied up.
+		 *
+		 * GENWQE_MAPPING_RAW should have been removed via mmunmap().
+		 * GENWQE_MAPPING_SGL_TEMP should be removed by tidy up code.
+		 */
+		dev_err(&pci_dev->dev,
+			"[%s] %d. cleanup mapping: u_vaddr=%p u_kaddr=%016lx dma_addr=%lx\n",
+			__func__, i++, dma_map->u_vaddr,
+			(unsigned long)dma_map->k_vaddr,
+			(unsigned long)dma_map->dma_addr);
+
+		if (dma_map->type == GENWQE_MAPPING_RAW) {
+			/* we allocated this dynamically */
+			__genwqe_free_consistent(cd, dma_map->size,
+						dma_map->k_vaddr,
+						dma_map->dma_addr);
+			kfree(dma_map);
+		} else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) {
+			/* we use dma_map statically from the request */
+			genwqe_user_vunmap(cd, dma_map);
+		}
+	}
+}
+
+static void genwqe_remove_pinnings(struct genwqe_file *cfile)
+{
+	struct list_head *node, *next;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+
+	list_for_each_safe(node, next, &cfile->pin_list) {
+		dma_map = list_entry(node, struct dma_mapping, pin_list);
+
+		/*
+		 * This is not a bug, because a killed processed might
+		 * not call the unpin ioctl, which is supposed to free
+		 * the resources.
+		 *
+		 * Pinnings are dymically allocated and need to be
+		 * deleted.
+		 */
+		list_del_init(&dma_map->pin_list);
+		genwqe_user_vunmap(cd, dma_map);
+		kfree(dma_map);
+	}
+}
+
+/**
+ * genwqe_kill_fasync() - Send signal to all processes with open GenWQE files
+ *
+ * E.g. genwqe_send_signal(cd, SIGIO);
+ */
+static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig)
+{
+	unsigned int files = 0;
+	unsigned long flags;
+	struct genwqe_file *cfile;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_for_each_entry(cfile, &cd->file_list, list) {
+		if (cfile->async_queue)
+			kill_fasync(&cfile->async_queue, sig, POLL_HUP);
+		files++;
+	}
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return files;
+}
+
+static int genwqe_terminate(struct genwqe_dev *cd)
+{
+	unsigned int files = 0;
+	unsigned long flags;
+	struct genwqe_file *cfile;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_for_each_entry(cfile, &cd->file_list, list) {
+		kill_pid(cfile->opener, SIGKILL, 1);
+		files++;
+	}
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return files;
+}
+
+/**
+ * genwqe_open() - file open
+ * @inode:      file system information
+ * @filp:	file handle
+ *
+ * This function is executed whenever an application calls
+ * open("/dev/genwqe",..).
+ *
+ * Return: 0 if successful or <0 if errors
+ */
+static int genwqe_open(struct inode *inode, struct file *filp)
+{
+	struct genwqe_dev *cd;
+	struct genwqe_file *cfile;
+
+	cfile = kzalloc(sizeof(*cfile), GFP_KERNEL);
+	if (cfile == NULL)
+		return -ENOMEM;
+
+	cd = container_of(inode->i_cdev, struct genwqe_dev, cdev_genwqe);
+	cfile->cd = cd;
+	cfile->filp = filp;
+	cfile->client = NULL;
+
+	spin_lock_init(&cfile->map_lock);  /* list of raw memory allocations */
+	INIT_LIST_HEAD(&cfile->map_list);
+
+	spin_lock_init(&cfile->pin_lock);  /* list of user pinned memory */
+	INIT_LIST_HEAD(&cfile->pin_list);
+
+	filp->private_data = cfile;
+
+	genwqe_add_file(cd, cfile);
+	return 0;
+}
+
+/**
+ * genwqe_fasync() - Setup process to receive SIGIO.
+ * @fd:        file descriptor
+ * @filp:      file handle
+ * @mode:      file mode
+ *
+ * Sending a signal is working as following:
+ *
+ * if (cdev->async_queue)
+ *         kill_fasync(&cdev->async_queue, SIGIO, POLL_IN);
+ *
+ * Some devices also implement asynchronous notification to indicate
+ * when the device can be written; in this case, of course,
+ * kill_fasync must be called with a mode of POLL_OUT.
+ */
+static int genwqe_fasync(int fd, struct file *filp, int mode)
+{
+	struct genwqe_file *cdev = (struct genwqe_file *)filp->private_data;
+
+	return fasync_helper(fd, filp, mode, &cdev->async_queue);
+}
+
+
+/**
+ * genwqe_release() - file close
+ * @inode:      file system information
+ * @filp:       file handle
+ *
+ * This function is executed whenever an application calls 'close(fd_genwqe)'
+ *
+ * Return: always 0
+ */
+static int genwqe_release(struct inode *inode, struct file *filp)
+{
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+
+	/* there must be no entries in these lists! */
+	genwqe_remove_mappings(cfile);
+	genwqe_remove_pinnings(cfile);
+
+	/* remove this filp from the asynchronously notified filp's */
+	genwqe_fasync(-1, filp, 0);
+
+	/*
+	 * For this to work we must not release cd when this cfile is
+	 * not yet released, otherwise the list entry is invalid,
+	 * because the list itself gets reinstantiated!
+	 */
+	genwqe_del_file(cd, cfile);
+	kfree(cfile);
+	return 0;
+}
+
+static void genwqe_vma_open(struct vm_area_struct *vma)
+{
+	/* nothing ... */
+}
+
+/**
+ * genwqe_vma_close() - Called each time when vma is unmapped
+ *
+ * Free memory which got allocated by GenWQE mmap().
+ */
+static void genwqe_vma_close(struct vm_area_struct *vma)
+{
+	unsigned long vsize = vma->vm_end - vma->vm_start;
+	struct inode *inode = file_inode(vma->vm_file);
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = container_of(inode->i_cdev, struct genwqe_dev,
+					    cdev_genwqe);
+	struct pci_dev *pci_dev = cd->pci_dev;
+	dma_addr_t d_addr = 0;
+	struct genwqe_file *cfile = vma->vm_private_data;
+
+	dma_map = __genwqe_search_mapping(cfile, vma->vm_start, vsize,
+					 &d_addr, NULL);
+	if (dma_map == NULL) {
+		dev_err(&pci_dev->dev,
+			"  [%s] err: mapping not found: v=%lx, p=%lx s=%lx\n",
+			__func__, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
+			vsize);
+		return;
+	}
+	__genwqe_del_mapping(cfile, dma_map);
+	__genwqe_free_consistent(cd, dma_map->size, dma_map->k_vaddr,
+				 dma_map->dma_addr);
+	kfree(dma_map);
+}
+
+static const struct vm_operations_struct genwqe_vma_ops = {
+	.open   = genwqe_vma_open,
+	.close  = genwqe_vma_close,
+};
+
+/**
+ * genwqe_mmap() - Provide contignous buffers to userspace
+ *
+ * We use mmap() to allocate contignous buffers used for DMA
+ * transfers. After the buffer is allocated we remap it to user-space
+ * and remember a reference to our dma_mapping data structure, where
+ * we store the associated DMA address and allocated size.
+ *
+ * When we receive a DDCB execution request with the ATS bits set to
+ * plain buffer, we lookup our dma_mapping list to find the
+ * corresponding DMA address for the associated user-space address.
+ */
+static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int rc;
+	unsigned long pfn, vsize = vma->vm_end - vma->vm_start;
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+	struct dma_mapping *dma_map;
+
+	if (vsize == 0)
+		return -EINVAL;
+
+	if (get_order(vsize) > MAX_ORDER)
+		return -ENOMEM;
+
+	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
+	if (dma_map == NULL)
+		return -ENOMEM;
+
+	genwqe_mapping_init(dma_map, GENWQE_MAPPING_RAW);
+	dma_map->u_vaddr = (void *)vma->vm_start;
+	dma_map->size = vsize;
+	dma_map->nr_pages = DIV_ROUND_UP(vsize, PAGE_SIZE);
+	dma_map->k_vaddr = __genwqe_alloc_consistent(cd, vsize,
+						     &dma_map->dma_addr);
+	if (dma_map->k_vaddr == NULL) {
+		rc = -ENOMEM;
+		goto free_dma_map;
+	}
+
+	if (capable(CAP_SYS_ADMIN) && (vsize > sizeof(dma_addr_t)))
+		*(dma_addr_t *)dma_map->k_vaddr = dma_map->dma_addr;
+
+	pfn = virt_to_phys(dma_map->k_vaddr) >> PAGE_SHIFT;
+	rc = remap_pfn_range(vma,
+			     vma->vm_start,
+			     pfn,
+			     vsize,
+			     vma->vm_page_prot);
+	if (rc != 0) {
+		rc = -EFAULT;
+		goto free_dma_mem;
+	}
+
+	vma->vm_private_data = cfile;
+	vma->vm_ops = &genwqe_vma_ops;
+	__genwqe_add_mapping(cfile, dma_map);
+
+	return 0;
+
+ free_dma_mem:
+	__genwqe_free_consistent(cd, dma_map->size,
+				dma_map->k_vaddr,
+				dma_map->dma_addr);
+ free_dma_map:
+	kfree(dma_map);
+	return rc;
+}
+
+/**
+ * do_flash_update() - Excute flash update (write image or CVPD)
+ * @cd:        genwqe device
+ * @load:      details about image load
+ *
+ * Return: 0 if successful
+ */
+
+#define	FLASH_BLOCK	0x40000	/* we use 256k blocks */
+
+static int do_flash_update(struct genwqe_file *cfile,
+			   struct genwqe_bitstream *load)
+{
+	int rc = 0;
+	int blocks_to_flash;
+	dma_addr_t dma_addr;
+	u64 flash = 0;
+	size_t tocopy = 0;
+	u8 __user *buf;
+	u8 *xbuf;
+	u32 crc;
+	u8 cmdopts;
+	struct genwqe_dev *cd = cfile->cd;
+	struct file *filp = cfile->filp;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if ((load->size & 0x3) != 0)
+		return -EINVAL;
+
+	if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0)
+		return -EINVAL;
+
+	/* FIXME Bits have changed for new service layer! */
+	switch ((char)load->partition) {
+	case '0':
+		cmdopts = 0x14;
+		break;		/* download/erase_first/part_0 */
+	case '1':
+		cmdopts = 0x1C;
+		break;		/* download/erase_first/part_1 */
+	case 'v':
+		cmdopts = 0x0C;
+		break;		/* download/erase_first/vpd */
+	default:
+		return -EINVAL;
+	}
+
+	buf = (u8 __user *)load->data_addr;
+	xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
+	if (xbuf == NULL)
+		return -ENOMEM;
+
+	blocks_to_flash = load->size / FLASH_BLOCK;
+	while (load->size) {
+		struct genwqe_ddcb_cmd *req;
+
+		/*
+		 * We must be 4 byte aligned. Buffer must be 0 appened
+		 * to have defined values when calculating CRC.
+		 */
+		tocopy = min_t(size_t, load->size, FLASH_BLOCK);
+
+		rc = copy_from_user(xbuf, buf, tocopy);
+		if (rc) {
+			rc = -EFAULT;
+			goto free_buffer;
+		}
+		crc = genwqe_crc32(xbuf, tocopy, 0xffffffff);
+
+		dev_dbg(&pci_dev->dev,
+			"[%s] DMA: %lx CRC: %08x SZ: %ld %d\n",
+			__func__, (unsigned long)dma_addr, crc, tocopy,
+			blocks_to_flash);
+
+		/* prepare DDCB for SLU process */
+		req = ddcb_requ_alloc();
+		if (req == NULL) {
+			rc = -ENOMEM;
+			goto free_buffer;
+		}
+
+		req->cmd = SLCMD_MOVE_FLASH;
+		req->cmdopts = cmdopts;
+
+		/* prepare invariant values */
+		if (genwqe_get_slu_id(cd) <= 0x2) {
+			*(__be64 *)&req->__asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be64 *)&req->__asiv[8]  = cpu_to_be64(tocopy);
+			*(__be64 *)&req->__asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&req->__asiv[24] = cpu_to_be32(0);
+			req->__asiv[24]	       = load->uid;
+			*(__be32 *)&req->__asiv[28] = cpu_to_be32(crc);
+
+			/* for simulation only */
+			*(__be64 *)&req->__asiv[88] = cpu_to_be64(load->slu_id);
+			*(__be64 *)&req->__asiv[96] = cpu_to_be64(load->app_id);
+			req->asiv_length = 32; /* bytes included in crc calc */
+		} else {	/* setup DDCB for ATS architecture */
+			*(__be64 *)&req->asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be32 *)&req->asiv[8]  = cpu_to_be32(tocopy);
+			*(__be32 *)&req->asiv[12] = cpu_to_be32(0); /* resvd */
+			*(__be64 *)&req->asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&req->asiv[24] = cpu_to_be32(load->uid<<24);
+			*(__be32 *)&req->asiv[28] = cpu_to_be32(crc);
+
+			/* for simulation only */
+			*(__be64 *)&req->asiv[80] = cpu_to_be64(load->slu_id);
+			*(__be64 *)&req->asiv[88] = cpu_to_be64(load->app_id);
+
+			/* Rd only */
+			req->ats = 0x4ULL << 44;
+			req->asiv_length = 40; /* bytes included in crc calc */
+		}
+		req->asv_length  = 8;
+
+		/* For Genwqe5 we get back the calculated CRC */
+		*(u64 *)&req->asv[0] = 0ULL;			/* 0x80 */
+
+		rc = __genwqe_execute_raw_ddcb(cd, req, filp->f_flags);
+
+		load->retc = req->retc;
+		load->attn = req->attn;
+		load->progress = req->progress;
+
+		if (rc < 0) {
+			ddcb_requ_free(req);
+			goto free_buffer;
+		}
+
+		if (req->retc != DDCB_RETC_COMPLETE) {
+			rc = -EIO;
+			ddcb_requ_free(req);
+			goto free_buffer;
+		}
+
+		load->size  -= tocopy;
+		flash += tocopy;
+		buf += tocopy;
+		blocks_to_flash--;
+		ddcb_requ_free(req);
+	}
+
+ free_buffer:
+	__genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
+	return rc;
+}
+
+static int do_flash_read(struct genwqe_file *cfile,
+			 struct genwqe_bitstream *load)
+{
+	int rc, blocks_to_flash;
+	dma_addr_t dma_addr;
+	u64 flash = 0;
+	size_t tocopy = 0;
+	u8 __user *buf;
+	u8 *xbuf;
+	u8 cmdopts;
+	struct genwqe_dev *cd = cfile->cd;
+	struct file *filp = cfile->filp;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct genwqe_ddcb_cmd *cmd;
+
+	if ((load->size & 0x3) != 0)
+		return -EINVAL;
+
+	if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0)
+		return -EINVAL;
+
+	/* FIXME Bits have changed for new service layer! */
+	switch ((char)load->partition) {
+	case '0':
+		cmdopts = 0x12;
+		break;		/* upload/part_0 */
+	case '1':
+		cmdopts = 0x1A;
+		break;		/* upload/part_1 */
+	case 'v':
+		cmdopts = 0x0A;
+		break;		/* upload/vpd */
+	default:
+		return -EINVAL;
+	}
+
+	buf = (u8 __user *)load->data_addr;
+	xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
+	if (xbuf == NULL)
+		return -ENOMEM;
+
+	blocks_to_flash = load->size / FLASH_BLOCK;
+	while (load->size) {
+		/*
+		 * We must be 4 byte aligned. Buffer must be 0 appened
+		 * to have defined values when calculating CRC.
+		 */
+		tocopy = min_t(size_t, load->size, FLASH_BLOCK);
+
+		dev_dbg(&pci_dev->dev,
+			"[%s] DMA: %lx SZ: %ld %d\n",
+			__func__, (unsigned long)dma_addr, tocopy,
+			blocks_to_flash);
+
+		/* prepare DDCB for SLU process */
+		cmd = ddcb_requ_alloc();
+		if (cmd == NULL) {
+			rc = -ENOMEM;
+			goto free_buffer;
+		}
+		cmd->cmd = SLCMD_MOVE_FLASH;
+		cmd->cmdopts = cmdopts;
+
+		/* prepare invariant values */
+		if (genwqe_get_slu_id(cd) <= 0x2) {
+			*(__be64 *)&cmd->__asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be64 *)&cmd->__asiv[8]  = cpu_to_be64(tocopy);
+			*(__be64 *)&cmd->__asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&cmd->__asiv[24] = cpu_to_be32(0);
+			cmd->__asiv[24] = load->uid;
+			*(__be32 *)&cmd->__asiv[28] = cpu_to_be32(0) /* CRC */;
+			cmd->asiv_length = 32; /* bytes included in crc calc */
+		} else {	/* setup DDCB for ATS architecture */
+			*(__be64 *)&cmd->asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be32 *)&cmd->asiv[8]  = cpu_to_be32(tocopy);
+			*(__be32 *)&cmd->asiv[12] = cpu_to_be32(0); /* resvd */
+			*(__be64 *)&cmd->asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&cmd->asiv[24] = cpu_to_be32(load->uid<<24);
+			*(__be32 *)&cmd->asiv[28] = cpu_to_be32(0); /* CRC */
+
+			/* rd/wr */
+			cmd->ats = 0x5ULL << 44;
+			cmd->asiv_length = 40; /* bytes included in crc calc */
+		}
+		cmd->asv_length  = 8;
+
+		/* we only get back the calculated CRC */
+		*(u64 *)&cmd->asv[0] = 0ULL;	/* 0x80 */
+
+		rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags);
+
+		load->retc = cmd->retc;
+		load->attn = cmd->attn;
+		load->progress = cmd->progress;
+
+		if ((rc < 0) && (rc != -EBADMSG)) {
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		rc = copy_to_user(buf, xbuf, tocopy);
+		if (rc) {
+			rc = -EFAULT;
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		/* We know that we can get retc 0x104 with CRC err */
+		if (((cmd->retc == DDCB_RETC_FAULT) &&
+		     (cmd->attn != 0x02)) ||  /* Normally ignore CRC error */
+		    ((cmd->retc == DDCB_RETC_COMPLETE) &&
+		     (cmd->attn != 0x00))) {  /* Everything was fine */
+			rc = -EIO;
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		load->size  -= tocopy;
+		flash += tocopy;
+		buf += tocopy;
+		blocks_to_flash--;
+		ddcb_requ_free(cmd);
+	}
+	rc = 0;
+
+ free_buffer:
+	__genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
+	return rc;
+}
+
+static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
+{
+	int rc;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+	struct dma_mapping *dma_map;
+	unsigned long map_addr;
+	unsigned long map_size;
+
+	if ((m->addr == 0x0) || (m->size == 0))
+		return -EINVAL;
+	if (m->size > ULONG_MAX - PAGE_SIZE - (m->addr & ~PAGE_MASK))
+		return -EINVAL;
+
+	map_addr = (m->addr & PAGE_MASK);
+	map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
+
+	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
+	if (dma_map == NULL)
+		return -ENOMEM;
+
+	genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED);
+	rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size);
+	if (rc != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] genwqe_user_vmap rc=%d\n", __func__, rc);
+		kfree(dma_map);
+		return rc;
+	}
+
+	genwqe_add_pin(cfile, dma_map);
+	return 0;
+}
+
+static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
+{
+	struct genwqe_dev *cd = cfile->cd;
+	struct dma_mapping *dma_map;
+	unsigned long map_addr;
+	unsigned long map_size;
+
+	if (m->addr == 0x0)
+		return -EINVAL;
+
+	map_addr = (m->addr & PAGE_MASK);
+	map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
+
+	dma_map = genwqe_search_pin(cfile, map_addr, map_size, NULL);
+	if (dma_map == NULL)
+		return -ENOENT;
+
+	genwqe_del_pin(cfile, dma_map);
+	genwqe_user_vunmap(cd, dma_map);
+	kfree(dma_map);
+	return 0;
+}
+
+/**
+ * ddcb_cmd_cleanup() - Remove dynamically created fixup entries
+ *
+ * Only if there are any. Pinnings are not removed.
+ */
+static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req)
+{
+	unsigned int i;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+
+	for (i = 0; i < DDCB_FIXUPS; i++) {
+		dma_map = &req->dma_mappings[i];
+
+		if (dma_mapping_used(dma_map)) {
+			__genwqe_del_mapping(cfile, dma_map);
+			genwqe_user_vunmap(cd, dma_map);
+		}
+		if (req->sgls[i].sgl != NULL)
+			genwqe_free_sync_sgl(cd, &req->sgls[i]);
+	}
+	return 0;
+}
+
+/**
+ * ddcb_cmd_fixups() - Establish DMA fixups/sglists for user memory references
+ *
+ * Before the DDCB gets executed we need to handle the fixups. We
+ * replace the user-space addresses with DMA addresses or do
+ * additional setup work e.g. generating a scatter-gather list which
+ * is used to describe the memory referred to in the fixup.
+ */
+static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req)
+{
+	int rc;
+	unsigned int asiv_offs, i;
+	struct genwqe_dev *cd = cfile->cd;
+	struct genwqe_ddcb_cmd *cmd = &req->cmd;
+	struct dma_mapping *m;
+
+	for (i = 0, asiv_offs = 0x00; asiv_offs <= 0x58;
+	     i++, asiv_offs += 0x08) {
+
+		u64 u_addr;
+		dma_addr_t d_addr;
+		u32 u_size = 0;
+		u64 ats_flags;
+
+		ats_flags = ATS_GET_FLAGS(cmd->ats, asiv_offs);
+
+		switch (ats_flags) {
+
+		case ATS_TYPE_DATA:
+			break;	/* nothing to do here */
+
+		case ATS_TYPE_FLAT_RDWR:
+		case ATS_TYPE_FLAT_RD: {
+			u_addr = be64_to_cpu(*((__be64 *)&cmd->
+					       asiv[asiv_offs]));
+			u_size = be32_to_cpu(*((__be32 *)&cmd->
+					       asiv[asiv_offs + 0x08]));
+
+			/*
+			 * No data available. Ignore u_addr in this
+			 * case and set addr to 0. Hardware must not
+			 * fetch the buffer.
+			 */
+			if (u_size == 0x0) {
+				*((__be64 *)&cmd->asiv[asiv_offs]) =
+					cpu_to_be64(0x0);
+				break;
+			}
+
+			m = __genwqe_search_mapping(cfile, u_addr, u_size,
+						   &d_addr, NULL);
+			if (m == NULL) {
+				rc = -EFAULT;
+				goto err_out;
+			}
+
+			*((__be64 *)&cmd->asiv[asiv_offs]) =
+				cpu_to_be64(d_addr);
+			break;
+		}
+
+		case ATS_TYPE_SGL_RDWR:
+		case ATS_TYPE_SGL_RD: {
+			int page_offs;
+
+			u_addr = be64_to_cpu(*((__be64 *)
+					       &cmd->asiv[asiv_offs]));
+			u_size = be32_to_cpu(*((__be32 *)
+					       &cmd->asiv[asiv_offs + 0x08]));
+
+			/*
+			 * No data available. Ignore u_addr in this
+			 * case and set addr to 0. Hardware must not
+			 * fetch the empty sgl.
+			 */
+			if (u_size == 0x0) {
+				*((__be64 *)&cmd->asiv[asiv_offs]) =
+					cpu_to_be64(0x0);
+				break;
+			}
+
+			m = genwqe_search_pin(cfile, u_addr, u_size, NULL);
+			if (m != NULL) {
+				page_offs = (u_addr -
+					     (u64)m->u_vaddr)/PAGE_SIZE;
+			} else {
+				m = &req->dma_mappings[i];
+
+				genwqe_mapping_init(m,
+						    GENWQE_MAPPING_SGL_TEMP);
+
+				if (ats_flags == ATS_TYPE_SGL_RD)
+					m->write = 0;
+
+				rc = genwqe_user_vmap(cd, m, (void *)u_addr,
+						      u_size);
+				if (rc != 0)
+					goto err_out;
+
+				__genwqe_add_mapping(cfile, m);
+				page_offs = 0;
+			}
+
+			/* create genwqe style scatter gather list */
+			rc = genwqe_alloc_sync_sgl(cd, &req->sgls[i],
+						   (void __user *)u_addr,
+						   u_size, m->write);
+			if (rc != 0)
+				goto err_out;
+
+			genwqe_setup_sgl(cd, &req->sgls[i],
+					 &m->dma_list[page_offs]);
+
+			*((__be64 *)&cmd->asiv[asiv_offs]) =
+				cpu_to_be64(req->sgls[i].sgl_dma_addr);
+
+			break;
+		}
+		default:
+			rc = -EINVAL;
+			goto err_out;
+		}
+	}
+	return 0;
+
+ err_out:
+	ddcb_cmd_cleanup(cfile, req);
+	return rc;
+}
+
+/**
+ * genwqe_execute_ddcb() - Execute DDCB using userspace address fixups
+ *
+ * The code will build up the translation tables or lookup the
+ * contignous memory allocation table to find the right translations
+ * and DMA addresses.
+ */
+static int genwqe_execute_ddcb(struct genwqe_file *cfile,
+			       struct genwqe_ddcb_cmd *cmd)
+{
+	int rc;
+	struct genwqe_dev *cd = cfile->cd;
+	struct file *filp = cfile->filp;
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+	rc = ddcb_cmd_fixups(cfile, req);
+	if (rc != 0)
+		return rc;
+
+	rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags);
+	ddcb_cmd_cleanup(cfile, req);
+	return rc;
+}
+
+static int do_execute_ddcb(struct genwqe_file *cfile,
+			   unsigned long arg, int raw)
+{
+	int rc;
+	struct genwqe_ddcb_cmd *cmd;
+	struct genwqe_dev *cd = cfile->cd;
+	struct file *filp = cfile->filp;
+
+	cmd = ddcb_requ_alloc();
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) {
+		ddcb_requ_free(cmd);
+		return -EFAULT;
+	}
+
+	if (!raw)
+		rc = genwqe_execute_ddcb(cfile, cmd);
+	else
+		rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags);
+
+	/* Copy back only the modifed fields. Do not copy ASIV
+	   back since the copy got modified by the driver. */
+	if (copy_to_user((void __user *)arg, cmd,
+			 sizeof(*cmd) - DDCB_ASIV_LENGTH)) {
+		ddcb_requ_free(cmd);
+		return -EFAULT;
+	}
+
+	ddcb_requ_free(cmd);
+	return rc;
+}
+
+/**
+ * genwqe_ioctl() - IO control
+ * @filp:       file handle
+ * @cmd:        command identifier (passed from user)
+ * @arg:        argument (passed from user)
+ *
+ * Return: 0 success
+ */
+static long genwqe_ioctl(struct file *filp, unsigned int cmd,
+			 unsigned long arg)
+{
+	int rc = 0;
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct genwqe_reg_io __user *io;
+	u64 val;
+	u32 reg_offs;
+
+	/* Return -EIO if card hit EEH */
+	if (pci_channel_offline(pci_dev))
+		return -EIO;
+
+	if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE)
+		return -EINVAL;
+
+	switch (cmd) {
+
+	case GENWQE_GET_CARD_STATE:
+		put_user(cd->card_state, (enum genwqe_card_state __user *)arg);
+		return 0;
+
+		/* Register access */
+	case GENWQE_READ_REG64: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
+			return -EINVAL;
+
+		val = __genwqe_readq(cd, reg_offs);
+		put_user(val, &io->val64);
+		return 0;
+	}
+
+	case GENWQE_WRITE_REG64: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
+			return -EINVAL;
+
+		if (get_user(val, &io->val64))
+			return -EFAULT;
+
+		__genwqe_writeq(cd, reg_offs, val);
+		return 0;
+	}
+
+	case GENWQE_READ_REG32: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
+			return -EINVAL;
+
+		val = __genwqe_readl(cd, reg_offs);
+		put_user(val, &io->val64);
+		return 0;
+	}
+
+	case GENWQE_WRITE_REG32: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
+			return -EINVAL;
+
+		if (get_user(val, &io->val64))
+			return -EFAULT;
+
+		__genwqe_writel(cd, reg_offs, val);
+		return 0;
+	}
+
+		/* Flash update/reading */
+	case GENWQE_SLU_UPDATE: {
+		struct genwqe_bitstream load;
+
+		if (!genwqe_is_privileged(cd))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (copy_from_user(&load, (void __user *)arg,
+				   sizeof(load)))
+			return -EFAULT;
+
+		rc = do_flash_update(cfile, &load);
+
+		if (copy_to_user((void __user *)arg, &load, sizeof(load)))
+			return -EFAULT;
+
+		return rc;
+	}
+
+	case GENWQE_SLU_READ: {
+		struct genwqe_bitstream load;
+
+		if (!genwqe_is_privileged(cd))
+			return -EPERM;
+
+		if (genwqe_flash_readback_fails(cd))
+			return -ENOSPC;	 /* known to fail for old versions */
+
+		if (copy_from_user(&load, (void __user *)arg, sizeof(load)))
+			return -EFAULT;
+
+		rc = do_flash_read(cfile, &load);
+
+		if (copy_to_user((void __user *)arg, &load, sizeof(load)))
+			return -EFAULT;
+
+		return rc;
+	}
+
+		/* memory pinning and unpinning */
+	case GENWQE_PIN_MEM: {
+		struct genwqe_mem m;
+
+		if (copy_from_user(&m, (void __user *)arg, sizeof(m)))
+			return -EFAULT;
+
+		return genwqe_pin_mem(cfile, &m);
+	}
+
+	case GENWQE_UNPIN_MEM: {
+		struct genwqe_mem m;
+
+		if (copy_from_user(&m, (void __user *)arg, sizeof(m)))
+			return -EFAULT;
+
+		return genwqe_unpin_mem(cfile, &m);
+	}
+
+		/* launch an DDCB and wait for completion */
+	case GENWQE_EXECUTE_DDCB:
+		return do_execute_ddcb(cfile, arg, 0);
+
+	case GENWQE_EXECUTE_RAW_DDCB: {
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		return do_execute_ddcb(cfile, arg, 1);
+	}
+
+	default:
+		return -EINVAL;
+	}
+
+	return rc;
+}
+
+#if defined(CONFIG_COMPAT)
+/**
+ * genwqe_compat_ioctl() - Compatibility ioctl
+ *
+ * Called whenever a 32-bit process running under a 64-bit kernel
+ * performs an ioctl on /dev/genwqe<n>_card.
+ *
+ * @filp:        file pointer.
+ * @cmd:         command.
+ * @arg:         user argument.
+ * Return:       zero on success or negative number on failure.
+ */
+static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd,
+				unsigned long arg)
+{
+	return genwqe_ioctl(filp, cmd, arg);
+}
+#endif /* defined(CONFIG_COMPAT) */
+
+static const struct file_operations genwqe_fops = {
+	.owner		= THIS_MODULE,
+	.open		= genwqe_open,
+	.fasync		= genwqe_fasync,
+	.mmap		= genwqe_mmap,
+	.unlocked_ioctl	= genwqe_ioctl,
+#if defined(CONFIG_COMPAT)
+	.compat_ioctl   = genwqe_compat_ioctl,
+#endif
+	.release	= genwqe_release,
+};
+
+static int genwqe_device_initialized(struct genwqe_dev *cd)
+{
+	return cd->dev != NULL;
+}
+
+/**
+ * genwqe_device_create() - Create and configure genwqe char device
+ * @cd:      genwqe device descriptor
+ *
+ * This function must be called before we create any more genwqe
+ * character devices, because it is allocating the major and minor
+ * number which are supposed to be used by the client drivers.
+ */
+int genwqe_device_create(struct genwqe_dev *cd)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/*
+	 * Here starts the individual setup per client. It must
+	 * initialize its own cdev data structure with its own fops.
+	 * The appropriate devnum needs to be created. The ranges must
+	 * not overlap.
+	 */
+	rc = alloc_chrdev_region(&cd->devnum_genwqe, 0,
+				 GENWQE_MAX_MINOR, GENWQE_DEVNAME);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "err: alloc_chrdev_region failed\n");
+		goto err_dev;
+	}
+
+	cdev_init(&cd->cdev_genwqe, &genwqe_fops);
+	cd->cdev_genwqe.owner = THIS_MODULE;
+
+	rc = cdev_add(&cd->cdev_genwqe, cd->devnum_genwqe, 1);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "err: cdev_add failed\n");
+		goto err_add;
+	}
+
+	/*
+	 * Finally the device in /dev/... must be created. The rule is
+	 * to use card%d_clientname for each created device.
+	 */
+	cd->dev = device_create_with_groups(cd->class_genwqe,
+					    &cd->pci_dev->dev,
+					    cd->devnum_genwqe, cd,
+					    genwqe_attribute_groups,
+					    GENWQE_DEVNAME "%u_card",
+					    cd->card_idx);
+	if (IS_ERR(cd->dev)) {
+		rc = PTR_ERR(cd->dev);
+		goto err_cdev;
+	}
+
+	rc = genwqe_init_debugfs(cd);
+	if (rc != 0)
+		goto err_debugfs;
+
+	return 0;
+
+ err_debugfs:
+	device_destroy(cd->class_genwqe, cd->devnum_genwqe);
+ err_cdev:
+	cdev_del(&cd->cdev_genwqe);
+ err_add:
+	unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
+ err_dev:
+	cd->dev = NULL;
+	return rc;
+}
+
+static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd)
+{
+	int rc;
+	unsigned int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_open_files(cd))
+		return 0;
+
+	dev_warn(&pci_dev->dev, "[%s] send SIGIO and wait ...\n", __func__);
+
+	rc = genwqe_kill_fasync(cd, SIGIO);
+	if (rc > 0) {
+		/* give kill_timeout seconds to close file descriptors ... */
+		for (i = 0; (i < GENWQE_KILL_TIMEOUT) &&
+			     genwqe_open_files(cd); i++) {
+			dev_info(&pci_dev->dev, "  %d sec ...", i);
+
+			cond_resched();
+			msleep(1000);
+		}
+
+		/* if no open files we can safely continue, else ... */
+		if (!genwqe_open_files(cd))
+			return 0;
+
+		dev_warn(&pci_dev->dev,
+			 "[%s] send SIGKILL and wait ...\n", __func__);
+
+		rc = genwqe_terminate(cd);
+		if (rc) {
+			/* Give kill_timout more seconds to end processes */
+			for (i = 0; (i < GENWQE_KILL_TIMEOUT) &&
+				     genwqe_open_files(cd); i++) {
+				dev_warn(&pci_dev->dev, "  %d sec ...", i);
+
+				cond_resched();
+				msleep(1000);
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_device_remove() - Remove genwqe's char device
+ *
+ * This function must be called after the client devices are removed
+ * because it will free the major/minor number range for the genwqe
+ * drivers.
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_device_remove(struct genwqe_dev *cd)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_device_initialized(cd))
+		return 1;
+
+	genwqe_inform_and_stop_processes(cd);
+
+	/*
+	 * We currently do wait until all filedescriptors are
+	 * closed. This leads to a problem when we abort the
+	 * application which will decrease this reference from
+	 * 1/unused to 0/illegal and not from 2/used 1/empty.
+	 */
+	rc = kref_read(&cd->cdev_genwqe.kobj.kref);
+	if (rc != 1) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc);
+		panic("Fatal err: cannot free resources with pending references!");
+	}
+
+	genqwe_exit_debugfs(cd);
+	device_destroy(cd->class_genwqe, cd->devnum_genwqe);
+	cdev_del(&cd->cdev_genwqe);
+	unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
+	cd->dev = NULL;
+
+	return 0;
+}
diff --git a/drivers/misc/genwqe/card_sysfs.c b/drivers/misc/genwqe/card_sysfs.c
new file mode 100644
index 000000000..c24c9b7c1
--- /dev/null
+++ b/drivers/misc/genwqe/card_sysfs.c
@@ -0,0 +1,303 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Sysfs interfaces for the GenWQE card. There are attributes to query
+ * the version of the bitstream as well as some for the driver. For
+ * debugging, please also see the debugfs interfaces of this driver.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static const char * const genwqe_types[] = {
+	[GENWQE_TYPE_ALTERA_230] = "GenWQE4-230",
+	[GENWQE_TYPE_ALTERA_530] = "GenWQE4-530",
+	[GENWQE_TYPE_ALTERA_A4]  = "GenWQE5-A4",
+	[GENWQE_TYPE_ALTERA_A7]  = "GenWQE5-A7",
+};
+
+static ssize_t status_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+	const char *cs[GENWQE_CARD_STATE_MAX] = { "unused", "used", "error" };
+
+	return sprintf(buf, "%s\n", cs[cd->card_state]);
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t appid_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	char app_name[5];
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	genwqe_read_app_id(cd, app_name, sizeof(app_name));
+	return sprintf(buf, "%s\n", app_name);
+}
+static DEVICE_ATTR_RO(appid);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	u64 slu_id, app_id;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	app_id = __genwqe_readq(cd, IO_APP_UNITCFG);
+
+	return sprintf(buf, "%016llx.%016llx\n", slu_id, app_id);
+}
+static DEVICE_ATTR_RO(version);
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	u8 card_type;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	card_type = genwqe_card_type(cd);
+	return sprintf(buf, "%s\n", (card_type >= ARRAY_SIZE(genwqe_types)) ?
+		       "invalid" : genwqe_types[card_type]);
+}
+static DEVICE_ATTR_RO(type);
+
+static ssize_t tempsens_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	u64 tempsens;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	tempsens = __genwqe_readq(cd, IO_SLU_TEMPERATURE_SENSOR);
+	return sprintf(buf, "%016llx\n", tempsens);
+}
+static DEVICE_ATTR_RO(tempsens);
+
+static ssize_t freerunning_timer_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	u64 t;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	t = __genwqe_readq(cd, IO_SLC_FREE_RUNNING_TIMER);
+	return sprintf(buf, "%016llx\n", t);
+}
+static DEVICE_ATTR_RO(freerunning_timer);
+
+static ssize_t queue_working_time_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	u64 t;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	t = __genwqe_readq(cd, IO_SLC_QUEUE_WTIME);
+	return sprintf(buf, "%016llx\n", t);
+}
+static DEVICE_ATTR_RO(queue_working_time);
+
+static ssize_t base_clock_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	u64 base_clock;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	base_clock = genwqe_base_clock_frequency(cd);
+	return sprintf(buf, "%lld\n", base_clock);
+}
+static DEVICE_ATTR_RO(base_clock);
+
+/**
+ * curr_bitstream_show() - Show the current bitstream id
+ *
+ * There is a bug in some old versions of the CPLD which selects the
+ * bitstream, which causes the IO_SLU_BITSTREAM register to report
+ * unreliable data in very rare cases. This makes this sysfs
+ * unreliable up to the point were a new CPLD version is being used.
+ *
+ * Unfortunately there is no automatic way yet to query the CPLD
+ * version, such that you need to manually ensure via programming
+ * tools that you have a recent version of the CPLD software.
+ *
+ * The proposed circumvention is to use a special recovery bitstream
+ * on the backup partition (0) to identify problems while loading the
+ * image.
+ */
+static ssize_t curr_bitstream_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int curr_bitstream;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	curr_bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
+	return sprintf(buf, "%d\n", curr_bitstream);
+}
+static DEVICE_ATTR_RO(curr_bitstream);
+
+/**
+ * next_bitstream_show() - Show the next activated bitstream
+ *
+ * IO_SLC_CFGREG_SOFTRESET: This register can only be accessed by the PF.
+ */
+static ssize_t next_bitstream_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int next_bitstream;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	switch ((cd->softreset & 0xc) >> 2) {
+	case 0x2:
+		next_bitstream =  0;
+		break;
+	case 0x3:
+		next_bitstream =  1;
+		break;
+	default:
+		next_bitstream = -1;
+		break;		/* error */
+	}
+	return sprintf(buf, "%d\n", next_bitstream);
+}
+
+static ssize_t next_bitstream_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	int partition;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	if (kstrtoint(buf, 0, &partition) < 0)
+		return -EINVAL;
+
+	switch (partition) {
+	case 0x0:
+		cd->softreset = 0x78;
+		break;
+	case 0x1:
+		cd->softreset = 0x7c;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
+	return count;
+}
+static DEVICE_ATTR_RW(next_bitstream);
+
+static ssize_t reload_bitstream_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	int reload;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	if (kstrtoint(buf, 0, &reload) < 0)
+		return -EINVAL;
+
+	if (reload == 0x1) {
+		if (cd->card_state == GENWQE_CARD_UNUSED ||
+		    cd->card_state == GENWQE_CARD_USED)
+			cd->card_state = GENWQE_CARD_RELOAD_BITSTREAM;
+		else
+			return -EIO;
+	} else {
+		return -EINVAL;
+	}
+
+	return count;
+}
+static DEVICE_ATTR_WO(reload_bitstream);
+
+/*
+ * Create device_attribute structures / params: name, mode, show, store
+ * additional flag if valid in VF
+ */
+static struct attribute *genwqe_attributes[] = {
+	&dev_attr_tempsens.attr,
+	&dev_attr_next_bitstream.attr,
+	&dev_attr_curr_bitstream.attr,
+	&dev_attr_base_clock.attr,
+	&dev_attr_type.attr,
+	&dev_attr_version.attr,
+	&dev_attr_appid.attr,
+	&dev_attr_status.attr,
+	&dev_attr_freerunning_timer.attr,
+	&dev_attr_queue_working_time.attr,
+	&dev_attr_reload_bitstream.attr,
+	NULL,
+};
+
+static struct attribute *genwqe_normal_attributes[] = {
+	&dev_attr_type.attr,
+	&dev_attr_version.attr,
+	&dev_attr_appid.attr,
+	&dev_attr_status.attr,
+	&dev_attr_freerunning_timer.attr,
+	&dev_attr_queue_working_time.attr,
+	NULL,
+};
+
+/**
+ * genwqe_is_visible() - Determine if sysfs attribute should be visible or not
+ *
+ * VFs have restricted mmio capabilities, so not all sysfs entries
+ * are allowed in VFs.
+ */
+static umode_t genwqe_is_visible(struct kobject *kobj,
+				 struct attribute *attr, int n)
+{
+	unsigned int j;
+	struct device *dev = kobj_to_dev(kobj);
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+	umode_t mode = attr->mode;
+
+	if (genwqe_is_privileged(cd))
+		return mode;
+
+	for (j = 0; genwqe_normal_attributes[j] != NULL;  j++)
+		if (genwqe_normal_attributes[j] == attr)
+			return mode;
+
+	return 0;
+}
+
+static struct attribute_group genwqe_attribute_group = {
+	.is_visible = genwqe_is_visible,
+	.attrs      = genwqe_attributes,
+};
+
+const struct attribute_group *genwqe_attribute_groups[] = {
+	&genwqe_attribute_group,
+	NULL,
+};
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
new file mode 100644
index 000000000..22301bba8
--- /dev/null
+++ b/drivers/misc/genwqe/card_utils.c
@@ -0,0 +1,1061 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Miscelanous functionality used in the other GenWQE driver parts.
+ */
+
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/page-flags.h>
+#include <linux/scatterlist.h>
+#include <linux/hugetlb.h>
+#include <linux/iommu.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <asm/pgtable.h>
+
+#include "genwqe_driver.h"
+#include "card_base.h"
+#include "card_ddcb.h"
+
+/**
+ * __genwqe_writeq() - Write 64-bit register
+ * @cd:	        genwqe device descriptor
+ * @byte_offs:  byte offset within BAR
+ * @val:        64-bit value
+ *
+ * Return: 0 if success; < 0 if error
+ */
+int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return -EIO;
+
+	if (cd->mmio == NULL)
+		return -EIO;
+
+	if (pci_channel_offline(pci_dev))
+		return -EIO;
+
+	__raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs);
+	return 0;
+}
+
+/**
+ * __genwqe_readq() - Read 64-bit register
+ * @cd:         genwqe device descriptor
+ * @byte_offs:  offset within BAR
+ *
+ * Return: value from register
+ */
+u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return 0xffffffffffffffffull;
+
+	if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) &&
+	    (byte_offs == IO_SLC_CFGREG_GFIR))
+		return 0x000000000000ffffull;
+
+	if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) &&
+	    (byte_offs == IO_SLC_CFGREG_GFIR))
+		return 0x00000000ffff0000ull;
+
+	if (cd->mmio == NULL)
+		return 0xffffffffffffffffull;
+
+	return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs));
+}
+
+/**
+ * __genwqe_writel() - Write 32-bit register
+ * @cd:	        genwqe device descriptor
+ * @byte_offs:  byte offset within BAR
+ * @val:        32-bit value
+ *
+ * Return: 0 if success; < 0 if error
+ */
+int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return -EIO;
+
+	if (cd->mmio == NULL)
+		return -EIO;
+
+	if (pci_channel_offline(pci_dev))
+		return -EIO;
+
+	__raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs);
+	return 0;
+}
+
+/**
+ * __genwqe_readl() - Read 32-bit register
+ * @cd:         genwqe device descriptor
+ * @byte_offs:  offset within BAR
+ *
+ * Return: Value from register
+ */
+u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return 0xffffffff;
+
+	if (cd->mmio == NULL)
+		return 0xffffffff;
+
+	return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs));
+}
+
+/**
+ * genwqe_read_app_id() - Extract app_id
+ *
+ * app_unitcfg need to be filled with valid data first
+ */
+int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len)
+{
+	int i, j;
+	u32 app_id = (u32)cd->app_unitcfg;
+
+	memset(app_name, 0, len);
+	for (i = 0, j = 0; j < min(len, 4); j++) {
+		char ch = (char)((app_id >> (24 - j*8)) & 0xff);
+
+		if (ch == ' ')
+			continue;
+		app_name[i++] = isprint(ch) ? ch : 'X';
+	}
+	return i;
+}
+
+/**
+ * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations
+ *
+ * Existing kernel functions seem to use a different polynom,
+ * therefore we could not use them here.
+ *
+ * Genwqe's Polynomial = 0x20044009
+ */
+#define CRC32_POLYNOMIAL	0x20044009
+static u32 crc32_tab[256];	/* crc32 lookup table */
+
+void genwqe_init_crc32(void)
+{
+	int i, j;
+	u32 crc;
+
+	for (i = 0;  i < 256;  i++) {
+		crc = i << 24;
+		for (j = 0;  j < 8;  j++) {
+			if (crc & 0x80000000)
+				crc = (crc << 1) ^ CRC32_POLYNOMIAL;
+			else
+				crc = (crc << 1);
+		}
+		crc32_tab[i] = crc;
+	}
+}
+
+/**
+ * genwqe_crc32() - Generate 32-bit crc as required for DDCBs
+ * @buff:       pointer to data buffer
+ * @len:        length of data for calculation
+ * @init:       initial crc (0xffffffff at start)
+ *
+ * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009)
+
+ * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should
+ * result in a crc32 of 0xf33cb7d3.
+ *
+ * The existing kernel crc functions did not cover this polynom yet.
+ *
+ * Return: crc32 checksum.
+ */
+u32 genwqe_crc32(u8 *buff, size_t len, u32 init)
+{
+	int i;
+	u32 crc;
+
+	crc = init;
+	while (len--) {
+		i = ((crc >> 24) ^ *buff++) & 0xFF;
+		crc = (crc << 8) ^ crc32_tab[i];
+	}
+	return crc;
+}
+
+void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
+			       dma_addr_t *dma_handle)
+{
+	if (get_order(size) >= MAX_ORDER)
+		return NULL;
+
+	return dma_zalloc_coherent(&cd->pci_dev->dev, size, dma_handle,
+				   GFP_KERNEL);
+}
+
+void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
+			     void *vaddr, dma_addr_t dma_handle)
+{
+	if (vaddr == NULL)
+		return;
+
+	dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle);
+}
+
+static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
+			      int num_pages)
+{
+	int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) {
+		pci_unmap_page(pci_dev, dma_list[i],
+			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+		dma_list[i] = 0x0;
+	}
+}
+
+static int genwqe_map_pages(struct genwqe_dev *cd,
+			   struct page **page_list, int num_pages,
+			   dma_addr_t *dma_list)
+{
+	int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/* establish DMA mapping for requested pages */
+	for (i = 0; i < num_pages; i++) {
+		dma_addr_t daddr;
+
+		dma_list[i] = 0x0;
+		daddr = pci_map_page(pci_dev, page_list[i],
+				     0,	 /* map_offs */
+				     PAGE_SIZE,
+				     PCI_DMA_BIDIRECTIONAL);  /* FIXME rd/rw */
+
+		if (pci_dma_mapping_error(pci_dev, daddr)) {
+			dev_err(&pci_dev->dev,
+				"[%s] err: no dma addr daddr=%016llx!\n",
+				__func__, (long long)daddr);
+			goto err;
+		}
+
+		dma_list[i] = daddr;
+	}
+	return 0;
+
+ err:
+	genwqe_unmap_pages(cd, dma_list, num_pages);
+	return -EIO;
+}
+
+static int genwqe_sgl_size(int num_pages)
+{
+	int len, num_tlb = num_pages / 7;
+
+	len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1);
+	return roundup(len, PAGE_SIZE);
+}
+
+/**
+ * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages
+ *
+ * Allocates memory for sgl and overlapping pages. Pages which might
+ * overlap other user-space memory blocks are being cached for DMAs,
+ * such that we do not run into syncronization issues. Data is copied
+ * from user-space into the cached pages.
+ */
+int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+			  void __user *user_addr, size_t user_size, int write)
+{
+	int ret = -ENOMEM;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	sgl->fpage_offs = offset_in_page((unsigned long)user_addr);
+	sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size);
+	sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE);
+	sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE;
+
+	dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n",
+		__func__, user_addr, user_size, sgl->nr_pages,
+		sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size);
+
+	sgl->user_addr = user_addr;
+	sgl->user_size = user_size;
+	sgl->write = write;
+	sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages);
+
+	if (get_order(sgl->sgl_size) > MAX_ORDER) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: too much memory requested!\n", __func__);
+		return ret;
+	}
+
+	sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size,
+					     &sgl->sgl_dma_addr);
+	if (sgl->sgl == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: no memory available!\n", __func__);
+		return ret;
+	}
+
+	/* Only use buffering on incomplete pages */
+	if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) {
+		sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
+						       &sgl->fpage_dma_addr);
+		if (sgl->fpage == NULL)
+			goto err_out;
+
+		/* Sync with user memory */
+		if (copy_from_user(sgl->fpage + sgl->fpage_offs,
+				   user_addr, sgl->fpage_size)) {
+			ret = -EFAULT;
+			goto err_out;
+		}
+	}
+	if (sgl->lpage_size != 0) {
+		sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
+						       &sgl->lpage_dma_addr);
+		if (sgl->lpage == NULL)
+			goto err_out1;
+
+		/* Sync with user memory */
+		if (copy_from_user(sgl->lpage, user_addr + user_size -
+				   sgl->lpage_size, sgl->lpage_size)) {
+			ret = -EFAULT;
+			goto err_out2;
+		}
+	}
+	return 0;
+
+ err_out2:
+	__genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
+				 sgl->lpage_dma_addr);
+	sgl->lpage = NULL;
+	sgl->lpage_dma_addr = 0;
+ err_out1:
+	__genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
+				 sgl->fpage_dma_addr);
+	sgl->fpage = NULL;
+	sgl->fpage_dma_addr = 0;
+ err_out:
+	__genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
+				 sgl->sgl_dma_addr);
+	sgl->sgl = NULL;
+	sgl->sgl_dma_addr = 0;
+	sgl->sgl_size = 0;
+
+	return ret;
+}
+
+int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+		     dma_addr_t *dma_list)
+{
+	int i = 0, j = 0, p;
+	unsigned long dma_offs, map_offs;
+	dma_addr_t prev_daddr = 0;
+	struct sg_entry *s, *last_s = NULL;
+	size_t size = sgl->user_size;
+
+	dma_offs = 128;		/* next block if needed/dma_offset */
+	map_offs = sgl->fpage_offs; /* offset in first page */
+
+	s = &sgl->sgl[0];	/* first set of 8 entries */
+	p = 0;			/* page */
+	while (p < sgl->nr_pages) {
+		dma_addr_t daddr;
+		unsigned int size_to_map;
+
+		/* always write the chaining entry, cleanup is done later */
+		j = 0;
+		s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs);
+		s[j].len	 = cpu_to_be32(128);
+		s[j].flags	 = cpu_to_be32(SG_CHAINED);
+		j++;
+
+		while (j < 8) {
+			/* DMA mapping for requested page, offs, size */
+			size_to_map = min(size, PAGE_SIZE - map_offs);
+
+			if ((p == 0) && (sgl->fpage != NULL)) {
+				daddr = sgl->fpage_dma_addr + map_offs;
+
+			} else if ((p == sgl->nr_pages - 1) &&
+				   (sgl->lpage != NULL)) {
+				daddr = sgl->lpage_dma_addr;
+			} else {
+				daddr = dma_list[p] + map_offs;
+			}
+
+			size -= size_to_map;
+			map_offs = 0;
+
+			if (prev_daddr == daddr) {
+				u32 prev_len = be32_to_cpu(last_s->len);
+
+				/* pr_info("daddr combining: "
+					"%016llx/%08x -> %016llx\n",
+					prev_daddr, prev_len, daddr); */
+
+				last_s->len = cpu_to_be32(prev_len +
+							  size_to_map);
+
+				p++; /* process next page */
+				if (p == sgl->nr_pages)
+					goto fixup;  /* nothing to do */
+
+				prev_daddr = daddr + size_to_map;
+				continue;
+			}
+
+			/* start new entry */
+			s[j].target_addr = cpu_to_be64(daddr);
+			s[j].len	 = cpu_to_be32(size_to_map);
+			s[j].flags	 = cpu_to_be32(SG_DATA);
+			prev_daddr = daddr + size_to_map;
+			last_s = &s[j];
+			j++;
+
+			p++;	/* process next page */
+			if (p == sgl->nr_pages)
+				goto fixup;  /* nothing to do */
+		}
+		dma_offs += 128;
+		s += 8;		/* continue 8 elements further */
+	}
+ fixup:
+	if (j == 1) {		/* combining happened on last entry! */
+		s -= 8;		/* full shift needed on previous sgl block */
+		j =  7;		/* shift all elements */
+	}
+
+	for (i = 0; i < j; i++)	/* move elements 1 up */
+		s[i] = s[i + 1];
+
+	s[i].target_addr = cpu_to_be64(0);
+	s[i].len	 = cpu_to_be32(0);
+	s[i].flags	 = cpu_to_be32(SG_END_LIST);
+	return 0;
+}
+
+/**
+ * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages
+ *
+ * After the DMA transfer has been completed we free the memory for
+ * the sgl and the cached pages. Data is being transferred from cached
+ * pages into user-space buffers.
+ */
+int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl)
+{
+	int rc = 0;
+	size_t offset;
+	unsigned long res;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (sgl->fpage) {
+		if (sgl->write) {
+			res = copy_to_user(sgl->user_addr,
+				sgl->fpage + sgl->fpage_offs, sgl->fpage_size);
+			if (res) {
+				dev_err(&pci_dev->dev,
+					"[%s] err: copying fpage! (res=%lu)\n",
+					__func__, res);
+				rc = -EFAULT;
+			}
+		}
+		__genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
+					 sgl->fpage_dma_addr);
+		sgl->fpage = NULL;
+		sgl->fpage_dma_addr = 0;
+	}
+	if (sgl->lpage) {
+		if (sgl->write) {
+			offset = sgl->user_size - sgl->lpage_size;
+			res = copy_to_user(sgl->user_addr + offset, sgl->lpage,
+					   sgl->lpage_size);
+			if (res) {
+				dev_err(&pci_dev->dev,
+					"[%s] err: copying lpage! (res=%lu)\n",
+					__func__, res);
+				rc = -EFAULT;
+			}
+		}
+		__genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
+					 sgl->lpage_dma_addr);
+		sgl->lpage = NULL;
+		sgl->lpage_dma_addr = 0;
+	}
+	__genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
+				 sgl->sgl_dma_addr);
+
+	sgl->sgl = NULL;
+	sgl->sgl_dma_addr = 0x0;
+	sgl->sgl_size = 0;
+	return rc;
+}
+
+/**
+ * genwqe_free_user_pages() - Give pinned pages back
+ *
+ * Documentation of get_user_pages is in mm/gup.c:
+ *
+ * If the page is written to, set_page_dirty (or set_page_dirty_lock,
+ * as appropriate) must be called after the page is finished with, and
+ * before put_page is called.
+ */
+static int genwqe_free_user_pages(struct page **page_list,
+			unsigned int nr_pages, int dirty)
+{
+	unsigned int i;
+
+	for (i = 0; i < nr_pages; i++) {
+		if (page_list[i] != NULL) {
+			if (dirty)
+				set_page_dirty_lock(page_list[i]);
+			put_page(page_list[i]);
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_user_vmap() - Map user-space memory to virtual kernel memory
+ * @cd:         pointer to genwqe device
+ * @m:          mapping params
+ * @uaddr:      user virtual address
+ * @size:       size of memory to be mapped
+ *
+ * We need to think about how we could speed this up. Of course it is
+ * not a good idea to do this over and over again, like we are
+ * currently doing it. Nevertheless, I am curious where on the path
+ * the performance is spend. Most probably within the memory
+ * allocation functions, but maybe also in the DMA mapping code.
+ *
+ * Restrictions: The maximum size of the possible mapping currently depends
+ *               on the amount of memory we can get using kzalloc() for the
+ *               page_list and pci_alloc_consistent for the sg_list.
+ *               The sg_list is currently itself not scattered, which could
+ *               be fixed with some effort. The page_list must be split into
+ *               PAGE_SIZE chunks too. All that will make the complicated
+ *               code more complicated.
+ *
+ * Return: 0 if success
+ */
+int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr,
+		     unsigned long size)
+{
+	int rc = -EINVAL;
+	unsigned long data, offs;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if ((uaddr == NULL) || (size == 0)) {
+		m->size = 0;	/* mark unused and not added */
+		return -EINVAL;
+	}
+	m->u_vaddr = uaddr;
+	m->size    = size;
+
+	/* determine space needed for page_list. */
+	data = (unsigned long)uaddr;
+	offs = offset_in_page(data);
+	if (size > ULONG_MAX - PAGE_SIZE - offs) {
+		m->size = 0;	/* mark unused and not added */
+		return -EINVAL;
+	}
+	m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE);
+
+	m->page_list = kcalloc(m->nr_pages,
+			       sizeof(struct page *) + sizeof(dma_addr_t),
+			       GFP_KERNEL);
+	if (!m->page_list) {
+		dev_err(&pci_dev->dev, "err: alloc page_list failed\n");
+		m->nr_pages = 0;
+		m->u_vaddr = NULL;
+		m->size = 0;	/* mark unused and not added */
+		return -ENOMEM;
+	}
+	m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages);
+
+	/* pin user pages in memory */
+	rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */
+				 m->nr_pages,
+				 m->write,		/* readable/writable */
+				 m->page_list);	/* ptrs to pages */
+	if (rc < 0)
+		goto fail_get_user_pages;
+
+	/* assumption: get_user_pages can be killed by signals. */
+	if (rc < m->nr_pages) {
+		genwqe_free_user_pages(m->page_list, rc, m->write);
+		rc = -EFAULT;
+		goto fail_get_user_pages;
+	}
+
+	rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list);
+	if (rc != 0)
+		goto fail_free_user_pages;
+
+	return 0;
+
+ fail_free_user_pages:
+	genwqe_free_user_pages(m->page_list, m->nr_pages, m->write);
+
+ fail_get_user_pages:
+	kfree(m->page_list);
+	m->page_list = NULL;
+	m->dma_list = NULL;
+	m->nr_pages = 0;
+	m->u_vaddr = NULL;
+	m->size = 0;		/* mark unused and not added */
+	return rc;
+}
+
+/**
+ * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel
+ *                        memory
+ * @cd:         pointer to genwqe device
+ * @m:          mapping params
+ */
+int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!dma_mapping_used(m)) {
+		dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n",
+			__func__, m);
+		return -EINVAL;
+	}
+
+	if (m->dma_list)
+		genwqe_unmap_pages(cd, m->dma_list, m->nr_pages);
+
+	if (m->page_list) {
+		genwqe_free_user_pages(m->page_list, m->nr_pages, m->write);
+
+		kfree(m->page_list);
+		m->page_list = NULL;
+		m->dma_list = NULL;
+		m->nr_pages = 0;
+	}
+
+	m->u_vaddr = NULL;
+	m->size = 0;		/* mark as unused and not added */
+	return 0;
+}
+
+/**
+ * genwqe_card_type() - Get chip type SLU Configuration Register
+ * @cd:         pointer to the genwqe device descriptor
+ * Return: 0: Altera Stratix-IV 230
+ *         1: Altera Stratix-IV 530
+ *         2: Altera Stratix-V A4
+ *         3: Altera Stratix-V A7
+ */
+u8 genwqe_card_type(struct genwqe_dev *cd)
+{
+	u64 card_type = cd->slu_unitcfg;
+
+	return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20);
+}
+
+/**
+ * genwqe_card_reset() - Reset the card
+ * @cd:         pointer to the genwqe device descriptor
+ */
+int genwqe_card_reset(struct genwqe_dev *cd)
+{
+	u64 softrst;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_is_privileged(cd))
+		return -ENODEV;
+
+	/* new SL */
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull);
+	msleep(1000);
+	__genwqe_readq(cd, IO_HSU_FIR_CLR);
+	__genwqe_readq(cd, IO_APP_FIR_CLR);
+	__genwqe_readq(cd, IO_SLU_FIR_CLR);
+
+	/*
+	 * Read-modify-write to preserve the stealth bits
+	 *
+	 * For SL >= 039, Stealth WE bit allows removing
+	 * the read-modify-wrote.
+	 * r-m-w may require a mask 0x3C to avoid hitting hard
+	 * reset again for error reset (should be 0, chicken).
+	 */
+	softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull;
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull);
+
+	/* give ERRORRESET some time to finish */
+	msleep(50);
+
+	if (genwqe_need_err_masking(cd)) {
+		dev_info(&pci_dev->dev,
+			 "[%s] masking errors for old bitstreams\n", __func__);
+		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
+	}
+	return 0;
+}
+
+int genwqe_read_softreset(struct genwqe_dev *cd)
+{
+	u64 bitstream;
+
+	if (!genwqe_is_privileged(cd))
+		return -ENODEV;
+
+	bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
+	cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull;
+	return 0;
+}
+
+/**
+ * genwqe_set_interrupt_capability() - Configure MSI capability structure
+ * @cd:         pointer to the device
+ * Return: 0 if no error
+ */
+int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count)
+{
+	int rc;
+
+	rc = pci_alloc_irq_vectors(cd->pci_dev, 1, count, PCI_IRQ_MSI);
+	if (rc < 0)
+		return rc;
+	return 0;
+}
+
+/**
+ * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability()
+ * @cd:         pointer to the device
+ */
+void genwqe_reset_interrupt_capability(struct genwqe_dev *cd)
+{
+	pci_free_irq_vectors(cd->pci_dev);
+}
+
+/**
+ * set_reg_idx() - Fill array with data. Ignore illegal offsets.
+ * @cd:         card device
+ * @r:          debug register array
+ * @i:          index to desired entry
+ * @m:          maximum possible entries
+ * @addr:       addr which is read
+ * @index:      index in debug array
+ * @val:        read value
+ */
+static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r,
+		       unsigned int *i, unsigned int m, u32 addr, u32 idx,
+		       u64 val)
+{
+	if (WARN_ON_ONCE(*i >= m))
+		return -EFAULT;
+
+	r[*i].addr = addr;
+	r[*i].idx = idx;
+	r[*i].val = val;
+	++*i;
+	return 0;
+}
+
+static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r,
+		   unsigned int *i, unsigned int m, u32 addr, u64 val)
+{
+	return set_reg_idx(cd, r, i, m, addr, 0, val);
+}
+
+int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
+			 unsigned int max_regs, int all)
+{
+	unsigned int i, j, idx = 0;
+	u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr;
+	u64 gfir, sluid, appid, ufir, ufec, sfir, sfec;
+
+	/* Global FIR */
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir);
+
+	/* UnitCfg for SLU */
+	sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */
+	set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid);
+
+	/* UnitCfg for APP */
+	appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */
+	set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid);
+
+	/* Check all chip Units */
+	for (i = 0; i < GENWQE_MAX_UNITS; i++) {
+
+		/* Unit FIR */
+		ufir_addr = (i << 24) | 0x008;
+		ufir = __genwqe_readq(cd, ufir_addr);
+		set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir);
+
+		/* Unit FEC */
+		ufec_addr = (i << 24) | 0x018;
+		ufec = __genwqe_readq(cd, ufec_addr);
+		set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec);
+
+		for (j = 0; j < 64; j++) {
+			/* wherever there is a primary 1, read the 2ndary */
+			if (!all && (!(ufir & (1ull << j))))
+				continue;
+
+			sfir_addr = (i << 24) | (0x100 + 8 * j);
+			sfir = __genwqe_readq(cd, sfir_addr);
+			set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir);
+
+			sfec_addr = (i << 24) | (0x300 + 8 * j);
+			sfec = __genwqe_readq(cd, sfec_addr);
+			set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec);
+		}
+	}
+
+	/* fill with invalid data until end */
+	for (i = idx; i < max_regs; i++) {
+		regs[i].addr = 0xffffffff;
+		regs[i].val = 0xffffffffffffffffull;
+	}
+	return idx;
+}
+
+/**
+ * genwqe_ffdc_buff_size() - Calculates the number of dump registers
+ */
+int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid)
+{
+	int entries = 0, ring, traps, traces, trace_entries;
+	u32 eevptr_addr, l_addr, d_len, d_type;
+	u64 eevptr, val, addr;
+
+	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
+	eevptr = __genwqe_readq(cd, eevptr_addr);
+
+	if ((eevptr != 0x0) && (eevptr != -1ull)) {
+		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
+
+		while (1) {
+			val = __genwqe_readq(cd, l_addr);
+
+			if ((val == 0x0) || (val == -1ull))
+				break;
+
+			/* 38:24 */
+			d_len  = (val & 0x0000007fff000000ull) >> 24;
+
+			/* 39 */
+			d_type = (val & 0x0000008000000000ull) >> 36;
+
+			if (d_type) {	/* repeat */
+				entries += d_len;
+			} else {	/* size in bytes! */
+				entries += d_len >> 3;
+			}
+
+			l_addr += 8;
+		}
+	}
+
+	for (ring = 0; ring < 8; ring++) {
+		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
+		val = __genwqe_readq(cd, addr);
+
+		if ((val == 0x0ull) || (val == -1ull))
+			continue;
+
+		traps = (val >> 24) & 0xff;
+		traces = (val >> 16) & 0xff;
+		trace_entries = val & 0xffff;
+
+		entries += traps + (traces * trace_entries);
+	}
+	return entries;
+}
+
+/**
+ * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure
+ */
+int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid,
+			  struct genwqe_reg *regs, unsigned int max_regs)
+{
+	int i, traps, traces, trace, trace_entries, trace_entry, ring;
+	unsigned int idx = 0;
+	u32 eevptr_addr, l_addr, d_addr, d_len, d_type;
+	u64 eevptr, e, val, addr;
+
+	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
+	eevptr = __genwqe_readq(cd, eevptr_addr);
+
+	if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) {
+		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
+		while (1) {
+			e = __genwqe_readq(cd, l_addr);
+			if ((e == 0x0) || (e == 0xffffffffffffffffull))
+				break;
+
+			d_addr = (e & 0x0000000000ffffffull);	    /* 23:0 */
+			d_len  = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */
+			d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */
+			d_addr |= GENWQE_UID_OFFS(uid);
+
+			if (d_type) {
+				for (i = 0; i < (int)d_len; i++) {
+					val = __genwqe_readq(cd, d_addr);
+					set_reg_idx(cd, regs, &idx, max_regs,
+						    d_addr, i, val);
+				}
+			} else {
+				d_len >>= 3; /* Size in bytes! */
+				for (i = 0; i < (int)d_len; i++, d_addr += 8) {
+					val = __genwqe_readq(cd, d_addr);
+					set_reg_idx(cd, regs, &idx, max_regs,
+						    d_addr, 0, val);
+				}
+			}
+			l_addr += 8;
+		}
+	}
+
+	/*
+	 * To save time, there are only 6 traces poplulated on Uid=2,
+	 * Ring=1. each with iters=512.
+	 */
+	for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds,
+					      2...7 are ASI rings */
+		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
+		val = __genwqe_readq(cd, addr);
+
+		if ((val == 0x0ull) || (val == -1ull))
+			continue;
+
+		traps = (val >> 24) & 0xff;	/* Number of Traps	*/
+		traces = (val >> 16) & 0xff;	/* Number of Traces	*/
+		trace_entries = val & 0xffff;	/* Entries per trace	*/
+
+		/* Note: This is a combined loop that dumps both the traps */
+		/* (for the trace == 0 case) as well as the traces 1 to    */
+		/* 'traces'.						   */
+		for (trace = 0; trace <= traces; trace++) {
+			u32 diag_sel =
+				GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace);
+
+			addr = (GENWQE_UID_OFFS(uid) |
+				IO_EXTENDED_DIAG_SELECTOR);
+			__genwqe_writeq(cd, addr, diag_sel);
+
+			for (trace_entry = 0;
+			     trace_entry < (trace ? trace_entries : traps);
+			     trace_entry++) {
+				addr = (GENWQE_UID_OFFS(uid) |
+					IO_EXTENDED_DIAG_READ_MBX);
+				val = __genwqe_readq(cd, addr);
+				set_reg_idx(cd, regs, &idx, max_regs, addr,
+					    (diag_sel<<16) | trace_entry, val);
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_write_vreg() - Write register in virtual window
+ *
+ * Note, these registers are only accessible to the PF through the
+ * VF-window. It is not intended for the VF to access.
+ */
+int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func)
+{
+	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
+	__genwqe_writeq(cd, reg, val);
+	return 0;
+}
+
+/**
+ * genwqe_read_vreg() - Read register in virtual window
+ *
+ * Note, these registers are only accessible to the PF through the
+ * VF-window. It is not intended for the VF to access.
+ */
+u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func)
+{
+	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
+	return __genwqe_readq(cd, reg);
+}
+
+/**
+ * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card
+ *
+ * Note: From a design perspective it turned out to be a bad idea to
+ * use codes here to specifiy the frequency/speed values. An old
+ * driver cannot understand new codes and is therefore always a
+ * problem. Better is to measure out the value or put the
+ * speed/frequency directly into a register which is always a valid
+ * value for old as well as for new software.
+ *
+ * Return: Card clock in MHz
+ */
+int genwqe_base_clock_frequency(struct genwqe_dev *cd)
+{
+	u16 speed;		/*         MHz  MHz  MHz  MHz */
+	static const int speed_grade[] = { 250, 200, 166, 175 };
+
+	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
+	if (speed >= ARRAY_SIZE(speed_grade))
+		return 0;	/* illegal value */
+
+	return speed_grade[speed];
+}
+
+/**
+ * genwqe_stop_traps() - Stop traps
+ *
+ * Before reading out the analysis data, we need to stop the traps.
+ */
+void genwqe_stop_traps(struct genwqe_dev *cd)
+{
+	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull);
+}
+
+/**
+ * genwqe_start_traps() - Start traps
+ *
+ * After having read the data, we can/must enable the traps again.
+ */
+void genwqe_start_traps(struct genwqe_dev *cd)
+{
+	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull);
+
+	if (genwqe_need_err_masking(cd))
+		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
+}
diff --git a/drivers/misc/genwqe/genwqe_driver.h b/drivers/misc/genwqe/genwqe_driver.h
new file mode 100644
index 000000000..15355350e
--- /dev/null
+++ b/drivers/misc/genwqe/genwqe_driver.h
@@ -0,0 +1,77 @@
+#ifndef __GENWQE_DRIVER_H__
+#define __GENWQE_DRIVER_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+#include <linux/scatterlist.h>
+#include <linux/iommu.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+
+#include <asm/byteorder.h>
+#include <linux/genwqe/genwqe_card.h>
+
+#define DRV_VERSION		"2.0.25"
+
+/*
+ * Static minor number assignement, until we decide/implement
+ * something dynamic.
+ */
+#define GENWQE_MAX_MINOR	128 /* up to 128 possible genwqe devices */
+
+/**
+ * genwqe_requ_alloc() - Allocate a new DDCB execution request
+ *
+ * This data structure contains the user visiable fields of the DDCB
+ * to be executed.
+ *
+ * Return: ptr to genwqe_ddcb_cmd data structure
+ */
+struct genwqe_ddcb_cmd *ddcb_requ_alloc(void);
+
+/**
+ * ddcb_requ_free() - Free DDCB execution request.
+ * @req:       ptr to genwqe_ddcb_cmd data structure.
+ */
+void ddcb_requ_free(struct genwqe_ddcb_cmd *req);
+
+u32  genwqe_crc32(u8 *buff, size_t len, u32 init);
+
+static inline void genwqe_hexdump(struct pci_dev *pci_dev,
+				  const void *buff, unsigned int size)
+{
+	char prefix[32];
+
+	scnprintf(prefix, sizeof(prefix), "%s %s: ",
+		  GENWQE_DEVNAME, pci_name(pci_dev));
+
+	print_hex_dump_debug(prefix, DUMP_PREFIX_OFFSET, 16, 1, buff,
+			     size, true);
+}
+
+#endif	/* __GENWQE_DRIVER_H__ */
diff --git a/drivers/misc/hmc6352.c b/drivers/misc/hmc6352.c
new file mode 100644
index 000000000..38f90e179
--- /dev/null
+++ b/drivers/misc/hmc6352.c
@@ -0,0 +1,157 @@
+/*
+ * hmc6352.c - Honeywell Compass Driver
+ *
+ * Copyright (C) 2009 Intel Corp
+ *
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/sysfs.h>
+#include <linux/nospec.h>
+
+static DEFINE_MUTEX(compass_mutex);
+
+static int compass_command(struct i2c_client *c, u8 cmd)
+{
+	int ret = i2c_master_send(c, &cmd, 1);
+	if (ret < 0)
+		dev_warn(&c->dev, "command '%c' failed.\n", cmd);
+	return ret;
+}
+
+static int compass_store(struct device *dev, const char *buf, size_t count,
+			const char *map)
+{
+	struct i2c_client *c = to_i2c_client(dev);
+	int ret;
+	unsigned long val;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+	if (val >= strlen(map))
+		return -EINVAL;
+	val = array_index_nospec(val, strlen(map));
+	mutex_lock(&compass_mutex);
+	ret = compass_command(c, map[val]);
+	mutex_unlock(&compass_mutex);
+	if (ret < 0)
+		return ret;
+	return count;
+}
+
+static ssize_t compass_calibration_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	return compass_store(dev, buf, count, "EC");
+}
+
+static ssize_t compass_power_mode_store(struct device *dev,
+		struct device_attribute *attr, const  char *buf, size_t count)
+{
+	return compass_store(dev, buf, count, "SW");
+}
+
+static ssize_t compass_heading_data_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned char i2c_data[2];
+	int ret;
+
+	mutex_lock(&compass_mutex);
+	ret = compass_command(client, 'A');
+	if (ret != 1) {
+		mutex_unlock(&compass_mutex);
+		return ret;
+	}
+	msleep(10); /* sending 'A' cmd we need to wait for 7-10 millisecs */
+	ret = i2c_master_recv(client, i2c_data, 2);
+	mutex_unlock(&compass_mutex);
+	if (ret < 0) {
+		dev_warn(dev, "i2c read data cmd failed\n");
+		return ret;
+	}
+	ret = (i2c_data[0] << 8) | i2c_data[1];
+	return sprintf(buf, "%d.%d\n", ret/10, ret%10);
+}
+
+
+static DEVICE_ATTR(heading0_input, S_IRUGO, compass_heading_data_show, NULL);
+static DEVICE_ATTR(calibration, S_IWUSR, NULL, compass_calibration_store);
+static DEVICE_ATTR(power_state, S_IWUSR, NULL, compass_power_mode_store);
+
+static struct attribute *mid_att_compass[] = {
+	&dev_attr_heading0_input.attr,
+	&dev_attr_calibration.attr,
+	&dev_attr_power_state.attr,
+	NULL
+};
+
+static const struct attribute_group m_compass_gr = {
+	.name = "hmc6352",
+	.attrs = mid_att_compass
+};
+
+static int hmc6352_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	int res;
+
+	res = sysfs_create_group(&client->dev.kobj, &m_compass_gr);
+	if (res) {
+		dev_err(&client->dev, "device_create_file failed\n");
+		return res;
+	}
+	dev_info(&client->dev, "%s HMC6352 compass chip found\n",
+							client->name);
+	return 0;
+}
+
+static int hmc6352_remove(struct i2c_client *client)
+{
+	sysfs_remove_group(&client->dev.kobj, &m_compass_gr);
+	return 0;
+}
+
+static const struct i2c_device_id hmc6352_id[] = {
+	{ "hmc6352", 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, hmc6352_id);
+
+static struct i2c_driver hmc6352_driver = {
+	.driver = {
+		.name = "hmc6352",
+	},
+	.probe = hmc6352_probe,
+	.remove = hmc6352_remove,
+	.id_table = hmc6352_id,
+};
+
+module_i2c_driver(hmc6352_driver);
+
+MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com");
+MODULE_DESCRIPTION("hmc6352 Compass Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
new file mode 100644
index 000000000..e9c9ef52c
--- /dev/null
+++ b/drivers/misc/hpilo.c
@@ -0,0 +1,918 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for the HP iLO management processor.
+ *
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *	David Altobelli <david.altobelli@hpe.com>
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include "hpilo.h"
+
+static struct class *ilo_class;
+static unsigned int ilo_major;
+static unsigned int max_ccb = 16;
+static char ilo_hwdev[MAX_ILO_DEV];
+
+static inline int get_entry_id(int entry)
+{
+	return (entry & ENTRY_MASK_DESCRIPTOR) >> ENTRY_BITPOS_DESCRIPTOR;
+}
+
+static inline int get_entry_len(int entry)
+{
+	return ((entry & ENTRY_MASK_QWORDS) >> ENTRY_BITPOS_QWORDS) << 3;
+}
+
+static inline int mk_entry(int id, int len)
+{
+	int qlen = len & 7 ? (len >> 3) + 1 : len >> 3;
+	return id << ENTRY_BITPOS_DESCRIPTOR | qlen << ENTRY_BITPOS_QWORDS;
+}
+
+static inline int desc_mem_sz(int nr_entry)
+{
+	return nr_entry << L2_QENTRY_SZ;
+}
+
+/*
+ * FIFO queues, shared with hardware.
+ *
+ * If a queue has empty slots, an entry is added to the queue tail,
+ * and that entry is marked as occupied.
+ * Entries can be dequeued from the head of the list, when the device
+ * has marked the entry as consumed.
+ *
+ * Returns true on successful queue/dequeue, false on failure.
+ */
+static int fifo_enqueue(struct ilo_hwinfo *hw, char *fifobar, int entry)
+{
+	struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&hw->fifo_lock, flags);
+	if (!(fifo_q->fifobar[(fifo_q->tail + 1) & fifo_q->imask]
+	      & ENTRY_MASK_O)) {
+		fifo_q->fifobar[fifo_q->tail & fifo_q->imask] |=
+				(entry & ENTRY_MASK_NOSTATE) | fifo_q->merge;
+		fifo_q->tail += 1;
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+	return ret;
+}
+
+static int fifo_dequeue(struct ilo_hwinfo *hw, char *fifobar, int *entry)
+{
+	struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+	unsigned long flags;
+	int ret = 0;
+	u64 c;
+
+	spin_lock_irqsave(&hw->fifo_lock, flags);
+	c = fifo_q->fifobar[fifo_q->head & fifo_q->imask];
+	if (c & ENTRY_MASK_C) {
+		if (entry)
+			*entry = c & ENTRY_MASK_NOSTATE;
+
+		fifo_q->fifobar[fifo_q->head & fifo_q->imask] =
+							(c | ENTRY_MASK) + 1;
+		fifo_q->head += 1;
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+	return ret;
+}
+
+static int fifo_check_recv(struct ilo_hwinfo *hw, char *fifobar)
+{
+	struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+	unsigned long flags;
+	int ret = 0;
+	u64 c;
+
+	spin_lock_irqsave(&hw->fifo_lock, flags);
+	c = fifo_q->fifobar[fifo_q->head & fifo_q->imask];
+	if (c & ENTRY_MASK_C)
+		ret = 1;
+	spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+	return ret;
+}
+
+static int ilo_pkt_enqueue(struct ilo_hwinfo *hw, struct ccb *ccb,
+			   int dir, int id, int len)
+{
+	char *fifobar;
+	int entry;
+
+	if (dir == SENDQ)
+		fifobar = ccb->ccb_u1.send_fifobar;
+	else
+		fifobar = ccb->ccb_u3.recv_fifobar;
+
+	entry = mk_entry(id, len);
+	return fifo_enqueue(hw, fifobar, entry);
+}
+
+static int ilo_pkt_dequeue(struct ilo_hwinfo *hw, struct ccb *ccb,
+			   int dir, int *id, int *len, void **pkt)
+{
+	char *fifobar, *desc;
+	int entry = 0, pkt_id = 0;
+	int ret;
+
+	if (dir == SENDQ) {
+		fifobar = ccb->ccb_u1.send_fifobar;
+		desc = ccb->ccb_u2.send_desc;
+	} else {
+		fifobar = ccb->ccb_u3.recv_fifobar;
+		desc = ccb->ccb_u4.recv_desc;
+	}
+
+	ret = fifo_dequeue(hw, fifobar, &entry);
+	if (ret) {
+		pkt_id = get_entry_id(entry);
+		if (id)
+			*id = pkt_id;
+		if (len)
+			*len = get_entry_len(entry);
+		if (pkt)
+			*pkt = (void *)(desc + desc_mem_sz(pkt_id));
+	}
+
+	return ret;
+}
+
+static int ilo_pkt_recv(struct ilo_hwinfo *hw, struct ccb *ccb)
+{
+	char *fifobar = ccb->ccb_u3.recv_fifobar;
+
+	return fifo_check_recv(hw, fifobar);
+}
+
+static inline void doorbell_set(struct ccb *ccb)
+{
+	iowrite8(1, ccb->ccb_u5.db_base);
+}
+
+static inline void doorbell_clr(struct ccb *ccb)
+{
+	iowrite8(2, ccb->ccb_u5.db_base);
+}
+
+static inline int ctrl_set(int l2sz, int idxmask, int desclim)
+{
+	int active = 0, go = 1;
+	return l2sz << CTRL_BITPOS_L2SZ |
+	       idxmask << CTRL_BITPOS_FIFOINDEXMASK |
+	       desclim << CTRL_BITPOS_DESCLIMIT |
+	       active << CTRL_BITPOS_A |
+	       go << CTRL_BITPOS_G;
+}
+
+static void ctrl_setup(struct ccb *ccb, int nr_desc, int l2desc_sz)
+{
+	/* for simplicity, use the same parameters for send and recv ctrls */
+	ccb->send_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1);
+	ccb->recv_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1);
+}
+
+static inline int fifo_sz(int nr_entry)
+{
+	/* size of a fifo is determined by the number of entries it contains */
+	return (nr_entry * sizeof(u64)) + FIFOHANDLESIZE;
+}
+
+static void fifo_setup(void *base_addr, int nr_entry)
+{
+	struct fifo *fifo_q = base_addr;
+	int i;
+
+	/* set up an empty fifo */
+	fifo_q->head = 0;
+	fifo_q->tail = 0;
+	fifo_q->reset = 0;
+	fifo_q->nrents = nr_entry;
+	fifo_q->imask = nr_entry - 1;
+	fifo_q->merge = ENTRY_MASK_O;
+
+	for (i = 0; i < nr_entry; i++)
+		fifo_q->fifobar[i] = 0;
+}
+
+static void ilo_ccb_close(struct pci_dev *pdev, struct ccb_data *data)
+{
+	struct ccb *driver_ccb = &data->driver_ccb;
+	struct ccb __iomem *device_ccb = data->mapped_ccb;
+	int retries;
+
+	/* complicated dance to tell the hw we are stopping */
+	doorbell_clr(driver_ccb);
+	iowrite32(ioread32(&device_ccb->send_ctrl) & ~(1 << CTRL_BITPOS_G),
+		  &device_ccb->send_ctrl);
+	iowrite32(ioread32(&device_ccb->recv_ctrl) & ~(1 << CTRL_BITPOS_G),
+		  &device_ccb->recv_ctrl);
+
+	/* give iLO some time to process stop request */
+	for (retries = MAX_WAIT; retries > 0; retries--) {
+		doorbell_set(driver_ccb);
+		udelay(WAIT_TIME);
+		if (!(ioread32(&device_ccb->send_ctrl) & (1 << CTRL_BITPOS_A))
+		    &&
+		    !(ioread32(&device_ccb->recv_ctrl) & (1 << CTRL_BITPOS_A)))
+			break;
+	}
+	if (retries == 0)
+		dev_err(&pdev->dev, "Closing, but controller still active\n");
+
+	/* clear the hw ccb */
+	memset_io(device_ccb, 0, sizeof(struct ccb));
+
+	/* free resources used to back send/recv queues */
+	pci_free_consistent(pdev, data->dma_size, data->dma_va, data->dma_pa);
+}
+
+static int ilo_ccb_setup(struct ilo_hwinfo *hw, struct ccb_data *data, int slot)
+{
+	char *dma_va;
+	dma_addr_t dma_pa;
+	struct ccb *driver_ccb, *ilo_ccb;
+
+	driver_ccb = &data->driver_ccb;
+	ilo_ccb = &data->ilo_ccb;
+
+	data->dma_size = 2 * fifo_sz(NR_QENTRY) +
+			 2 * desc_mem_sz(NR_QENTRY) +
+			 ILO_START_ALIGN + ILO_CACHE_SZ;
+
+	data->dma_va = pci_alloc_consistent(hw->ilo_dev, data->dma_size,
+					    &data->dma_pa);
+	if (!data->dma_va)
+		return -ENOMEM;
+
+	dma_va = (char *)data->dma_va;
+	dma_pa = data->dma_pa;
+
+	memset(dma_va, 0, data->dma_size);
+
+	dma_va = (char *)roundup((unsigned long)dma_va, ILO_START_ALIGN);
+	dma_pa = roundup(dma_pa, ILO_START_ALIGN);
+
+	/*
+	 * Create two ccb's, one with virt addrs, one with phys addrs.
+	 * Copy the phys addr ccb to device shared mem.
+	 */
+	ctrl_setup(driver_ccb, NR_QENTRY, L2_QENTRY_SZ);
+	ctrl_setup(ilo_ccb, NR_QENTRY, L2_QENTRY_SZ);
+
+	fifo_setup(dma_va, NR_QENTRY);
+	driver_ccb->ccb_u1.send_fifobar = dma_va + FIFOHANDLESIZE;
+	ilo_ccb->ccb_u1.send_fifobar_pa = dma_pa + FIFOHANDLESIZE;
+	dma_va += fifo_sz(NR_QENTRY);
+	dma_pa += fifo_sz(NR_QENTRY);
+
+	dma_va = (char *)roundup((unsigned long)dma_va, ILO_CACHE_SZ);
+	dma_pa = roundup(dma_pa, ILO_CACHE_SZ);
+
+	fifo_setup(dma_va, NR_QENTRY);
+	driver_ccb->ccb_u3.recv_fifobar = dma_va + FIFOHANDLESIZE;
+	ilo_ccb->ccb_u3.recv_fifobar_pa = dma_pa + FIFOHANDLESIZE;
+	dma_va += fifo_sz(NR_QENTRY);
+	dma_pa += fifo_sz(NR_QENTRY);
+
+	driver_ccb->ccb_u2.send_desc = dma_va;
+	ilo_ccb->ccb_u2.send_desc_pa = dma_pa;
+	dma_pa += desc_mem_sz(NR_QENTRY);
+	dma_va += desc_mem_sz(NR_QENTRY);
+
+	driver_ccb->ccb_u4.recv_desc = dma_va;
+	ilo_ccb->ccb_u4.recv_desc_pa = dma_pa;
+
+	driver_ccb->channel = slot;
+	ilo_ccb->channel = slot;
+
+	driver_ccb->ccb_u5.db_base = hw->db_vaddr + (slot << L2_DB_SIZE);
+	ilo_ccb->ccb_u5.db_base = NULL; /* hw ccb's doorbell is not used */
+
+	return 0;
+}
+
+static void ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot)
+{
+	int pkt_id, pkt_sz;
+	struct ccb *driver_ccb = &data->driver_ccb;
+
+	/* copy the ccb with physical addrs to device memory */
+	data->mapped_ccb = (struct ccb __iomem *)
+				(hw->ram_vaddr + (slot * ILOHW_CCB_SZ));
+	memcpy_toio(data->mapped_ccb, &data->ilo_ccb, sizeof(struct ccb));
+
+	/* put packets on the send and receive queues */
+	pkt_sz = 0;
+	for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++) {
+		ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, pkt_sz);
+		doorbell_set(driver_ccb);
+	}
+
+	pkt_sz = desc_mem_sz(1);
+	for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++)
+		ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, pkt_sz);
+
+	/* the ccb is ready to use */
+	doorbell_clr(driver_ccb);
+}
+
+static int ilo_ccb_verify(struct ilo_hwinfo *hw, struct ccb_data *data)
+{
+	int pkt_id, i;
+	struct ccb *driver_ccb = &data->driver_ccb;
+
+	/* make sure iLO is really handling requests */
+	for (i = MAX_WAIT; i > 0; i--) {
+		if (ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, NULL, NULL))
+			break;
+		udelay(WAIT_TIME);
+	}
+
+	if (i == 0) {
+		dev_err(&hw->ilo_dev->dev, "Open could not dequeue a packet\n");
+		return -EBUSY;
+	}
+
+	ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, 0);
+	doorbell_set(driver_ccb);
+	return 0;
+}
+
+static inline int is_channel_reset(struct ccb *ccb)
+{
+	/* check for this particular channel needing a reset */
+	return FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset;
+}
+
+static inline void set_channel_reset(struct ccb *ccb)
+{
+	/* set a flag indicating this channel needs a reset */
+	FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset = 1;
+}
+
+static inline int get_device_outbound(struct ilo_hwinfo *hw)
+{
+	return ioread32(&hw->mmio_vaddr[DB_OUT]);
+}
+
+static inline int is_db_reset(int db_out)
+{
+	return db_out & (1 << DB_RESET);
+}
+
+static inline int is_device_reset(struct ilo_hwinfo *hw)
+{
+	/* check for global reset condition */
+	return is_db_reset(get_device_outbound(hw));
+}
+
+static inline void clear_pending_db(struct ilo_hwinfo *hw, int clr)
+{
+	iowrite32(clr, &hw->mmio_vaddr[DB_OUT]);
+}
+
+static inline void clear_device(struct ilo_hwinfo *hw)
+{
+	/* clear the device (reset bits, pending channel entries) */
+	clear_pending_db(hw, -1);
+}
+
+static inline void ilo_enable_interrupts(struct ilo_hwinfo *hw)
+{
+	iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) | 1, &hw->mmio_vaddr[DB_IRQ]);
+}
+
+static inline void ilo_disable_interrupts(struct ilo_hwinfo *hw)
+{
+	iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) & ~1,
+		 &hw->mmio_vaddr[DB_IRQ]);
+}
+
+static void ilo_set_reset(struct ilo_hwinfo *hw)
+{
+	int slot;
+
+	/*
+	 * Mapped memory is zeroed on ilo reset, so set a per ccb flag
+	 * to indicate that this ccb needs to be closed and reopened.
+	 */
+	for (slot = 0; slot < max_ccb; slot++) {
+		if (!hw->ccb_alloc[slot])
+			continue;
+		set_channel_reset(&hw->ccb_alloc[slot]->driver_ccb);
+	}
+}
+
+static ssize_t ilo_read(struct file *fp, char __user *buf,
+			size_t len, loff_t *off)
+{
+	int err, found, cnt, pkt_id, pkt_len;
+	struct ccb_data *data = fp->private_data;
+	struct ccb *driver_ccb = &data->driver_ccb;
+	struct ilo_hwinfo *hw = data->ilo_hw;
+	void *pkt;
+
+	if (is_channel_reset(driver_ccb)) {
+		/*
+		 * If the device has been reset, applications
+		 * need to close and reopen all ccbs.
+		 */
+		return -ENODEV;
+	}
+
+	/*
+	 * This function is to be called when data is expected
+	 * in the channel, and will return an error if no packet is found
+	 * during the loop below.  The sleep/retry logic is to allow
+	 * applications to call read() immediately post write(),
+	 * and give iLO some time to process the sent packet.
+	 */
+	cnt = 20;
+	do {
+		/* look for a received packet */
+		found = ilo_pkt_dequeue(hw, driver_ccb, RECVQ, &pkt_id,
+					&pkt_len, &pkt);
+		if (found)
+			break;
+		cnt--;
+		msleep(100);
+	} while (!found && cnt);
+
+	if (!found)
+		return -EAGAIN;
+
+	/* only copy the length of the received packet */
+	if (pkt_len < len)
+		len = pkt_len;
+
+	err = copy_to_user(buf, pkt, len);
+
+	/* return the received packet to the queue */
+	ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, desc_mem_sz(1));
+
+	return err ? -EFAULT : len;
+}
+
+static ssize_t ilo_write(struct file *fp, const char __user *buf,
+			 size_t len, loff_t *off)
+{
+	int err, pkt_id, pkt_len;
+	struct ccb_data *data = fp->private_data;
+	struct ccb *driver_ccb = &data->driver_ccb;
+	struct ilo_hwinfo *hw = data->ilo_hw;
+	void *pkt;
+
+	if (is_channel_reset(driver_ccb))
+		return -ENODEV;
+
+	/* get a packet to send the user command */
+	if (!ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, &pkt_len, &pkt))
+		return -EBUSY;
+
+	/* limit the length to the length of the packet */
+	if (pkt_len < len)
+		len = pkt_len;
+
+	/* on failure, set the len to 0 to return empty packet to the device */
+	err = copy_from_user(pkt, buf, len);
+	if (err)
+		len = 0;
+
+	/* send the packet */
+	ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, len);
+	doorbell_set(driver_ccb);
+
+	return err ? -EFAULT : len;
+}
+
+static __poll_t ilo_poll(struct file *fp, poll_table *wait)
+{
+	struct ccb_data *data = fp->private_data;
+	struct ccb *driver_ccb = &data->driver_ccb;
+
+	poll_wait(fp, &data->ccb_waitq, wait);
+
+	if (is_channel_reset(driver_ccb))
+		return EPOLLERR;
+	else if (ilo_pkt_recv(data->ilo_hw, driver_ccb))
+		return EPOLLIN | EPOLLRDNORM;
+
+	return 0;
+}
+
+static int ilo_close(struct inode *ip, struct file *fp)
+{
+	int slot;
+	struct ccb_data *data;
+	struct ilo_hwinfo *hw;
+	unsigned long flags;
+
+	slot = iminor(ip) % max_ccb;
+	hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev);
+
+	spin_lock(&hw->open_lock);
+
+	if (hw->ccb_alloc[slot]->ccb_cnt == 1) {
+
+		data = fp->private_data;
+
+		spin_lock_irqsave(&hw->alloc_lock, flags);
+		hw->ccb_alloc[slot] = NULL;
+		spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+		ilo_ccb_close(hw->ilo_dev, data);
+
+		kfree(data);
+	} else
+		hw->ccb_alloc[slot]->ccb_cnt--;
+
+	spin_unlock(&hw->open_lock);
+
+	return 0;
+}
+
+static int ilo_open(struct inode *ip, struct file *fp)
+{
+	int slot, error;
+	struct ccb_data *data;
+	struct ilo_hwinfo *hw;
+	unsigned long flags;
+
+	slot = iminor(ip) % max_ccb;
+	hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev);
+
+	/* new ccb allocation */
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	spin_lock(&hw->open_lock);
+
+	/* each fd private_data holds sw/hw view of ccb */
+	if (hw->ccb_alloc[slot] == NULL) {
+		/* create a channel control block for this minor */
+		error = ilo_ccb_setup(hw, data, slot);
+		if (error) {
+			kfree(data);
+			goto out;
+		}
+
+		data->ccb_cnt = 1;
+		data->ccb_excl = fp->f_flags & O_EXCL;
+		data->ilo_hw = hw;
+		init_waitqueue_head(&data->ccb_waitq);
+
+		/* write the ccb to hw */
+		spin_lock_irqsave(&hw->alloc_lock, flags);
+		ilo_ccb_open(hw, data, slot);
+		hw->ccb_alloc[slot] = data;
+		spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+		/* make sure the channel is functional */
+		error = ilo_ccb_verify(hw, data);
+		if (error) {
+
+			spin_lock_irqsave(&hw->alloc_lock, flags);
+			hw->ccb_alloc[slot] = NULL;
+			spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+			ilo_ccb_close(hw->ilo_dev, data);
+
+			kfree(data);
+			goto out;
+		}
+
+	} else {
+		kfree(data);
+		if (fp->f_flags & O_EXCL || hw->ccb_alloc[slot]->ccb_excl) {
+			/*
+			 * The channel exists, and either this open
+			 * or a previous open of this channel wants
+			 * exclusive access.
+			 */
+			error = -EBUSY;
+		} else {
+			hw->ccb_alloc[slot]->ccb_cnt++;
+			error = 0;
+		}
+	}
+out:
+	spin_unlock(&hw->open_lock);
+
+	if (!error)
+		fp->private_data = hw->ccb_alloc[slot];
+
+	return error;
+}
+
+static const struct file_operations ilo_fops = {
+	.owner		= THIS_MODULE,
+	.read		= ilo_read,
+	.write		= ilo_write,
+	.poll		= ilo_poll,
+	.open 		= ilo_open,
+	.release 	= ilo_close,
+	.llseek		= noop_llseek,
+};
+
+static irqreturn_t ilo_isr(int irq, void *data)
+{
+	struct ilo_hwinfo *hw = data;
+	int pending, i;
+
+	spin_lock(&hw->alloc_lock);
+
+	/* check for ccbs which have data */
+	pending = get_device_outbound(hw);
+	if (!pending) {
+		spin_unlock(&hw->alloc_lock);
+		return IRQ_NONE;
+	}
+
+	if (is_db_reset(pending)) {
+		/* wake up all ccbs if the device was reset */
+		pending = -1;
+		ilo_set_reset(hw);
+	}
+
+	for (i = 0; i < max_ccb; i++) {
+		if (!hw->ccb_alloc[i])
+			continue;
+		if (pending & (1 << i))
+			wake_up_interruptible(&hw->ccb_alloc[i]->ccb_waitq);
+	}
+
+	/* clear the device of the channels that have been handled */
+	clear_pending_db(hw, pending);
+
+	spin_unlock(&hw->alloc_lock);
+
+	return IRQ_HANDLED;
+}
+
+static void ilo_unmap_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
+{
+	pci_iounmap(pdev, hw->db_vaddr);
+	pci_iounmap(pdev, hw->ram_vaddr);
+	pci_iounmap(pdev, hw->mmio_vaddr);
+}
+
+static int ilo_map_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
+{
+	int bar;
+	unsigned long off;
+
+	/* map the memory mapped i/o registers */
+	hw->mmio_vaddr = pci_iomap(pdev, 1, 0);
+	if (hw->mmio_vaddr == NULL) {
+		dev_err(&pdev->dev, "Error mapping mmio\n");
+		goto out;
+	}
+
+	/* map the adapter shared memory region */
+	if (pdev->subsystem_device == 0x00E4) {
+		bar = 5;
+		/* Last 8k is reserved for CCBs */
+		off = pci_resource_len(pdev, bar) - 0x2000;
+	} else {
+		bar = 2;
+		off = 0;
+	}
+	hw->ram_vaddr = pci_iomap_range(pdev, bar, off, max_ccb * ILOHW_CCB_SZ);
+	if (hw->ram_vaddr == NULL) {
+		dev_err(&pdev->dev, "Error mapping shared mem\n");
+		goto mmio_free;
+	}
+
+	/* map the doorbell aperture */
+	hw->db_vaddr = pci_iomap(pdev, 3, max_ccb * ONE_DB_SIZE);
+	if (hw->db_vaddr == NULL) {
+		dev_err(&pdev->dev, "Error mapping doorbell\n");
+		goto ram_free;
+	}
+
+	return 0;
+ram_free:
+	pci_iounmap(pdev, hw->ram_vaddr);
+mmio_free:
+	pci_iounmap(pdev, hw->mmio_vaddr);
+out:
+	return -ENOMEM;
+}
+
+static void ilo_remove(struct pci_dev *pdev)
+{
+	int i, minor;
+	struct ilo_hwinfo *ilo_hw = pci_get_drvdata(pdev);
+
+	if (!ilo_hw)
+		return;
+
+	clear_device(ilo_hw);
+
+	minor = MINOR(ilo_hw->cdev.dev);
+	for (i = minor; i < minor + max_ccb; i++)
+		device_destroy(ilo_class, MKDEV(ilo_major, i));
+
+	cdev_del(&ilo_hw->cdev);
+	ilo_disable_interrupts(ilo_hw);
+	free_irq(pdev->irq, ilo_hw);
+	ilo_unmap_device(pdev, ilo_hw);
+	pci_release_regions(pdev);
+	/*
+	 * pci_disable_device(pdev) used to be here. But this PCI device has
+	 * two functions with interrupt lines connected to a single pin. The
+	 * other one is a USB host controller. So when we disable the PIN here
+	 * e.g. by rmmod hpilo, the controller stops working. It is because
+	 * the interrupt link is disabled in ACPI since it is not refcounted
+	 * yet. See acpi_pci_link_free_irq called from acpi_pci_irq_disable.
+	 */
+	kfree(ilo_hw);
+	ilo_hwdev[(minor / max_ccb)] = 0;
+}
+
+static int ilo_probe(struct pci_dev *pdev,
+			       const struct pci_device_id *ent)
+{
+	int devnum, minor, start, error = 0;
+	struct ilo_hwinfo *ilo_hw;
+
+	/* Ignore subsystem_device = 0x1979 (set by BIOS)  */
+	if (pdev->subsystem_device == 0x1979)
+		return 0;
+
+	if (max_ccb > MAX_CCB)
+		max_ccb = MAX_CCB;
+	else if (max_ccb < MIN_CCB)
+		max_ccb = MIN_CCB;
+
+	/* find a free range for device files */
+	for (devnum = 0; devnum < MAX_ILO_DEV; devnum++) {
+		if (ilo_hwdev[devnum] == 0) {
+			ilo_hwdev[devnum] = 1;
+			break;
+		}
+	}
+
+	if (devnum == MAX_ILO_DEV) {
+		dev_err(&pdev->dev, "Error finding free device\n");
+		return -ENODEV;
+	}
+
+	/* track global allocations for this device */
+	error = -ENOMEM;
+	ilo_hw = kzalloc(sizeof(*ilo_hw), GFP_KERNEL);
+	if (!ilo_hw)
+		goto out;
+
+	ilo_hw->ilo_dev = pdev;
+	spin_lock_init(&ilo_hw->alloc_lock);
+	spin_lock_init(&ilo_hw->fifo_lock);
+	spin_lock_init(&ilo_hw->open_lock);
+
+	error = pci_enable_device(pdev);
+	if (error)
+		goto free;
+
+	pci_set_master(pdev);
+
+	error = pci_request_regions(pdev, ILO_NAME);
+	if (error)
+		goto disable;
+
+	error = ilo_map_device(pdev, ilo_hw);
+	if (error)
+		goto free_regions;
+
+	pci_set_drvdata(pdev, ilo_hw);
+	clear_device(ilo_hw);
+
+	error = request_irq(pdev->irq, ilo_isr, IRQF_SHARED, "hpilo", ilo_hw);
+	if (error)
+		goto unmap;
+
+	ilo_enable_interrupts(ilo_hw);
+
+	cdev_init(&ilo_hw->cdev, &ilo_fops);
+	ilo_hw->cdev.owner = THIS_MODULE;
+	start = devnum * max_ccb;
+	error = cdev_add(&ilo_hw->cdev, MKDEV(ilo_major, start), max_ccb);
+	if (error) {
+		dev_err(&pdev->dev, "Could not add cdev\n");
+		goto remove_isr;
+	}
+
+	for (minor = 0 ; minor < max_ccb; minor++) {
+		struct device *dev;
+		dev = device_create(ilo_class, &pdev->dev,
+				    MKDEV(ilo_major, minor), NULL,
+				    "hpilo!d%dccb%d", devnum, minor);
+		if (IS_ERR(dev))
+			dev_err(&pdev->dev, "Could not create files\n");
+	}
+
+	return 0;
+remove_isr:
+	ilo_disable_interrupts(ilo_hw);
+	free_irq(pdev->irq, ilo_hw);
+unmap:
+	ilo_unmap_device(pdev, ilo_hw);
+free_regions:
+	pci_release_regions(pdev);
+disable:
+/*	pci_disable_device(pdev);  see comment in ilo_remove */
+free:
+	kfree(ilo_hw);
+out:
+	ilo_hwdev[devnum] = 0;
+	return error;
+}
+
+static const struct pci_device_id ilo_devices[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_COMPAQ, 0xB204) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HP, 0x3307) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, ilo_devices);
+
+static struct pci_driver ilo_driver = {
+	.name 	  = ILO_NAME,
+	.id_table = ilo_devices,
+	.probe 	  = ilo_probe,
+	.remove   = ilo_remove,
+};
+
+static int __init ilo_init(void)
+{
+	int error;
+	dev_t dev;
+
+	ilo_class = class_create(THIS_MODULE, "iLO");
+	if (IS_ERR(ilo_class)) {
+		error = PTR_ERR(ilo_class);
+		goto out;
+	}
+
+	error = alloc_chrdev_region(&dev, 0, MAX_OPEN, ILO_NAME);
+	if (error)
+		goto class_destroy;
+
+	ilo_major = MAJOR(dev);
+
+	error =	pci_register_driver(&ilo_driver);
+	if (error)
+		goto chr_remove;
+
+	return 0;
+chr_remove:
+	unregister_chrdev_region(dev, MAX_OPEN);
+class_destroy:
+	class_destroy(ilo_class);
+out:
+	return error;
+}
+
+static void __exit ilo_exit(void)
+{
+	pci_unregister_driver(&ilo_driver);
+	unregister_chrdev_region(MKDEV(ilo_major, 0), MAX_OPEN);
+	class_destroy(ilo_class);
+}
+
+MODULE_VERSION("1.5.0");
+MODULE_ALIAS(ILO_NAME);
+MODULE_DESCRIPTION(ILO_NAME);
+MODULE_AUTHOR("David Altobelli <david.altobelli@hpe.com>");
+MODULE_LICENSE("GPL v2");
+
+module_param(max_ccb, uint, 0444);
+MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (8-24)(default=16)");
+
+module_init(ilo_init);
+module_exit(ilo_exit);
diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h
new file mode 100644
index 000000000..94dfb9e40
--- /dev/null
+++ b/drivers/misc/hpilo.h
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/char/hpilo.h
+ *
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *	David Altobelli <david.altobelli@hp.com>
+ */
+#ifndef __HPILO_H
+#define __HPILO_H
+
+#define ILO_NAME "hpilo"
+
+/* max number of open channel control blocks per device, hw limited to 32 */
+#define MAX_CCB	       24
+/* min number of open channel control blocks per device, hw limited to 32 */
+#define MIN_CCB		8
+/* max number of supported devices */
+#define MAX_ILO_DEV	1
+/* max number of files */
+#define MAX_OPEN	(MAX_CCB * MAX_ILO_DEV)
+/* total wait time in usec */
+#define MAX_WAIT_TIME	10000
+/* per spin wait time in usec */
+#define WAIT_TIME	10
+/* spin counter for open/close delay */
+#define MAX_WAIT	(MAX_WAIT_TIME / WAIT_TIME)
+
+/*
+ * Per device, used to track global memory allocations.
+ */
+struct ilo_hwinfo {
+	/* mmio registers on device */
+	char __iomem *mmio_vaddr;
+
+	/* doorbell registers on device */
+	char __iomem *db_vaddr;
+
+	/* shared memory on device used for channel control blocks */
+	char __iomem *ram_vaddr;
+
+	/* files corresponding to this device */
+	struct ccb_data *ccb_alloc[MAX_CCB];
+
+	struct pci_dev *ilo_dev;
+
+	/*
+	 * open_lock      serializes ccb_cnt during open and close
+	 * [ irq disabled ]
+	 * -> alloc_lock  used when adding/removing/searching ccb_alloc,
+	 *                which represents all ccbs open on the device
+	 * --> fifo_lock  controls access to fifo queues shared with hw
+	 *
+	 * Locks must be taken in this order, but open_lock and alloc_lock
+	 * are optional, they do not need to be held in order to take a
+	 * lower level lock.
+	 */
+	spinlock_t open_lock;
+	spinlock_t alloc_lock;
+	spinlock_t fifo_lock;
+
+	struct cdev cdev;
+};
+
+/* offset from mmio_vaddr for enabling doorbell interrupts */
+#define DB_IRQ		0xB2
+/* offset from mmio_vaddr for outbound communications */
+#define DB_OUT		0xD4
+/* DB_OUT reset bit */
+#define DB_RESET	26
+
+/*
+ * Channel control block. Used to manage hardware queues.
+ * The format must match hw's version.  The hw ccb is 128 bytes,
+ * but the context area shouldn't be touched by the driver.
+ */
+#define ILOSW_CCB_SZ	64
+#define ILOHW_CCB_SZ 	128
+struct ccb {
+	union {
+		char *send_fifobar;
+		u64 send_fifobar_pa;
+	} ccb_u1;
+	union {
+		char *send_desc;
+		u64 send_desc_pa;
+	} ccb_u2;
+	u64 send_ctrl;
+
+	union {
+		char *recv_fifobar;
+		u64 recv_fifobar_pa;
+	} ccb_u3;
+	union {
+		char *recv_desc;
+		u64 recv_desc_pa;
+	} ccb_u4;
+	u64 recv_ctrl;
+
+	union {
+		char __iomem *db_base;
+		u64 padding5;
+	} ccb_u5;
+
+	u64 channel;
+
+	/* unused context area (64 bytes) */
+};
+
+/* ccb queue parameters */
+#define SENDQ		1
+#define RECVQ 		2
+#define NR_QENTRY    	4
+#define L2_QENTRY_SZ 	12
+
+/* ccb ctrl bitfields */
+#define CTRL_BITPOS_L2SZ             0
+#define CTRL_BITPOS_FIFOINDEXMASK    4
+#define CTRL_BITPOS_DESCLIMIT        18
+#define CTRL_BITPOS_A                30
+#define CTRL_BITPOS_G                31
+
+/* ccb doorbell macros */
+#define L2_DB_SIZE		14
+#define ONE_DB_SIZE		(1 << L2_DB_SIZE)
+
+/*
+ * Per fd structure used to track the ccb allocated to that dev file.
+ */
+struct ccb_data {
+	/* software version of ccb, using virtual addrs */
+	struct ccb  driver_ccb;
+
+	/* hardware version of ccb, using physical addrs */
+	struct ccb  ilo_ccb;
+
+	/* hardware ccb is written to this shared mapped device memory */
+	struct ccb __iomem *mapped_ccb;
+
+	/* dma'able memory used for send/recv queues */
+	void       *dma_va;
+	dma_addr_t  dma_pa;
+	size_t      dma_size;
+
+	/* pointer to hardware device info */
+	struct ilo_hwinfo *ilo_hw;
+
+	/* queue for this ccb to wait for recv data */
+	wait_queue_head_t ccb_waitq;
+
+	/* usage count, to allow for shared ccb's */
+	int	    ccb_cnt;
+
+	/* open wanted exclusive access to this ccb */
+	int	    ccb_excl;
+};
+
+/*
+ * FIFO queue structure, shared with hw.
+ */
+#define ILO_START_ALIGN	4096
+#define ILO_CACHE_SZ 	 128
+struct fifo {
+    u64 nrents;	/* user requested number of fifo entries */
+    u64 imask;  /* mask to extract valid fifo index */
+    u64 merge;	/*  O/C bits to merge in during enqueue operation */
+    u64 reset;	/* set to non-zero when the target device resets */
+    u8  pad_0[ILO_CACHE_SZ - (sizeof(u64) * 4)];
+
+    u64 head;
+    u8  pad_1[ILO_CACHE_SZ - (sizeof(u64))];
+
+    u64 tail;
+    u8  pad_2[ILO_CACHE_SZ - (sizeof(u64))];
+
+    u64 fifobar[1];
+};
+
+/* convert between struct fifo, and the fifobar, which is saved in the ccb */
+#define FIFOHANDLESIZE (sizeof(struct fifo) - sizeof(u64))
+#define FIFOBARTOHANDLE(_fifo) \
+	((struct fifo *)(((char *)(_fifo)) - FIFOHANDLESIZE))
+
+/* the number of qwords to consume from the entry descriptor */
+#define ENTRY_BITPOS_QWORDS      0
+/* descriptor index number (within a specified queue) */
+#define ENTRY_BITPOS_DESCRIPTOR  10
+/* state bit, fifo entry consumed by consumer */
+#define ENTRY_BITPOS_C           22
+/* state bit, fifo entry is occupied */
+#define ENTRY_BITPOS_O           23
+
+#define ENTRY_BITS_QWORDS        10
+#define ENTRY_BITS_DESCRIPTOR    12
+#define ENTRY_BITS_C             1
+#define ENTRY_BITS_O             1
+#define ENTRY_BITS_TOTAL	\
+	(ENTRY_BITS_C + ENTRY_BITS_O + \
+	 ENTRY_BITS_QWORDS + ENTRY_BITS_DESCRIPTOR)
+
+/* extract various entry fields */
+#define ENTRY_MASK ((1 << ENTRY_BITS_TOTAL) - 1)
+#define ENTRY_MASK_C (((1 << ENTRY_BITS_C) - 1) << ENTRY_BITPOS_C)
+#define ENTRY_MASK_O (((1 << ENTRY_BITS_O) - 1) << ENTRY_BITPOS_O)
+#define ENTRY_MASK_QWORDS \
+	(((1 << ENTRY_BITS_QWORDS) - 1) << ENTRY_BITPOS_QWORDS)
+#define ENTRY_MASK_DESCRIPTOR \
+	(((1 << ENTRY_BITS_DESCRIPTOR) - 1) << ENTRY_BITPOS_DESCRIPTOR)
+
+#define ENTRY_MASK_NOSTATE (ENTRY_MASK >> (ENTRY_BITS_C + ENTRY_BITS_O))
+
+#endif /* __HPILO_H */
diff --git a/drivers/misc/ibmasm/Makefile b/drivers/misc/ibmasm/Makefile
new file mode 100644
index 000000000..1b9dd0f44
--- /dev/null
+++ b/drivers/misc/ibmasm/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_IBM_ASM) := ibmasm.o
+
+ibmasm-y :=	module.o      \
+		ibmasmfs.o    \
+		event.o       \
+		command.o     \
+		remote.o      \
+		heartbeat.o   \
+		r_heartbeat.o \
+		dot_command.o \
+		lowlevel.o
+
+ibmasm-$(CONFIG_SERIAL_8250) += uart.o
+
diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c
new file mode 100644
index 000000000..7d56f45de
--- /dev/null
+++ b/drivers/misc/ibmasm/command.c
@@ -0,0 +1,187 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+static void exec_next_command(struct service_processor *sp);
+
+static atomic_t command_count = ATOMIC_INIT(0);
+
+struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size)
+{
+	struct command *cmd;
+
+	if (buffer_size > IBMASM_CMD_MAX_BUFFER_SIZE)
+		return NULL;
+
+	cmd = kzalloc(sizeof(struct command), GFP_KERNEL);
+	if (cmd == NULL)
+		return NULL;
+
+
+	cmd->buffer = kzalloc(buffer_size, GFP_KERNEL);
+	if (cmd->buffer == NULL) {
+		kfree(cmd);
+		return NULL;
+	}
+	cmd->buffer_size = buffer_size;
+
+	kref_init(&cmd->kref);
+	cmd->lock = &sp->lock;
+
+	cmd->status = IBMASM_CMD_PENDING;
+	init_waitqueue_head(&cmd->wait);
+	INIT_LIST_HEAD(&cmd->queue_node);
+
+	atomic_inc(&command_count);
+	dbg("command count: %d\n", atomic_read(&command_count));
+
+	return cmd;
+}
+
+void ibmasm_free_command(struct kref *kref)
+{
+	struct command *cmd = to_command(kref);
+
+	list_del(&cmd->queue_node);
+	atomic_dec(&command_count);
+	dbg("command count: %d\n", atomic_read(&command_count));
+	kfree(cmd->buffer);
+	kfree(cmd);
+}
+
+static void enqueue_command(struct service_processor *sp, struct command *cmd)
+{
+	list_add_tail(&cmd->queue_node, &sp->command_queue);
+}
+
+static struct command *dequeue_command(struct service_processor *sp)
+{
+	struct command *cmd;
+	struct list_head *next;
+
+	if (list_empty(&sp->command_queue))
+		return NULL;
+
+	next = sp->command_queue.next;
+	list_del_init(next);
+	cmd = list_entry(next, struct command, queue_node);
+
+	return cmd;
+}
+
+static inline void do_exec_command(struct service_processor *sp)
+{
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+	if (ibmasm_send_i2o_message(sp)) {
+		sp->current_command->status = IBMASM_CMD_FAILED;
+		wake_up(&sp->current_command->wait);
+		command_put(sp->current_command);
+		exec_next_command(sp);
+	}
+}
+
+/**
+ * exec_command
+ * send a command to a service processor
+ * Commands are executed sequentially. One command (sp->current_command)
+ * is sent to the service processor. Once the interrupt handler gets a
+ * message of type command_response, the message is copied into
+ * the current commands buffer,
+ */
+void ibmasm_exec_command(struct service_processor *sp, struct command *cmd)
+{
+	unsigned long flags;
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+	spin_lock_irqsave(&sp->lock, flags);
+
+	if (!sp->current_command) {
+		sp->current_command = cmd;
+		command_get(sp->current_command);
+		spin_unlock_irqrestore(&sp->lock, flags);
+		do_exec_command(sp);
+	} else {
+		enqueue_command(sp, cmd);
+		spin_unlock_irqrestore(&sp->lock, flags);
+	}
+}
+
+static void exec_next_command(struct service_processor *sp)
+{
+	unsigned long flags;
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+	spin_lock_irqsave(&sp->lock, flags);
+	sp->current_command = dequeue_command(sp);
+	if (sp->current_command) {
+		command_get(sp->current_command);
+		spin_unlock_irqrestore(&sp->lock, flags);
+		do_exec_command(sp);
+	} else {
+		spin_unlock_irqrestore(&sp->lock, flags);
+	}
+}
+
+/**
+ * Sleep until a command has failed or a response has been received
+ * and the command status been updated by the interrupt handler.
+ * (see receive_response).
+ */
+void ibmasm_wait_for_response(struct command *cmd, int timeout)
+{
+	wait_event_interruptible_timeout(cmd->wait,
+				cmd->status == IBMASM_CMD_COMPLETE ||
+				cmd->status == IBMASM_CMD_FAILED,
+				timeout * HZ);
+}
+
+/**
+ * receive_command_response
+ * called by the interrupt handler when a dot command of type command_response
+ * was received.
+ */
+void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size)
+{
+	struct command *cmd = sp->current_command;
+
+	if (!sp->current_command)
+		return;
+
+	memcpy_fromio(cmd->buffer, response, min(size, cmd->buffer_size));
+	cmd->status = IBMASM_CMD_COMPLETE;
+	wake_up(&sp->current_command->wait);
+	command_put(sp->current_command);
+	exec_next_command(sp);
+}
diff --git a/drivers/misc/ibmasm/dot_command.c b/drivers/misc/ibmasm/dot_command.c
new file mode 100644
index 000000000..d7b2ca358
--- /dev/null
+++ b/drivers/misc/ibmasm/dot_command.c
@@ -0,0 +1,152 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include "ibmasm.h"
+#include "dot_command.h"
+
+/**
+ * Dispatch an incoming message to the specific handler for the message.
+ * Called from interrupt context.
+ */
+void ibmasm_receive_message(struct service_processor *sp, void *message, int message_size)
+{
+	u32 size;
+	struct dot_command_header *header = (struct dot_command_header *)message;
+
+	if (message_size == 0)
+		return;
+
+	size = get_dot_command_size(message);
+	if (size == 0)
+		return;
+
+	if (size > message_size)
+		size = message_size;
+
+	switch (header->type) {
+	case sp_event:
+		ibmasm_receive_event(sp, message, size);
+		break;
+	case sp_command_response:
+		ibmasm_receive_command_response(sp, message, size);
+		break;
+	case sp_heartbeat:
+		ibmasm_receive_heartbeat(sp, message, size);
+		break;
+	default:
+		dev_err(sp->dev, "Received unknown message from service processor\n");
+	}
+}
+
+
+#define INIT_BUFFER_SIZE 32
+
+
+/**
+ * send the 4.3.5.10 dot command (driver VPD) to the service processor
+ */
+int ibmasm_send_driver_vpd(struct service_processor *sp)
+{
+	struct command *command;
+	struct dot_command_header *header;
+	u8 *vpd_command;
+	u8 *vpd_data;
+	int result = 0;
+
+	command = ibmasm_new_command(sp, INIT_BUFFER_SIZE);
+	if (command == NULL)
+		return -ENOMEM;
+
+	header = (struct dot_command_header *)command->buffer;
+	header->type                = sp_write;
+	header->command_size        = 4;
+	header->data_size           = 16;
+	header->status              = 0;
+	header->reserved            = 0;
+
+	vpd_command = command->buffer + sizeof(struct dot_command_header);
+	vpd_command[0] = 0x4;
+	vpd_command[1] = 0x3;
+	vpd_command[2] = 0x5;
+	vpd_command[3] = 0xa;
+
+	vpd_data = vpd_command + header->command_size;
+	vpd_data[0] = 0;
+	strcat(vpd_data, IBMASM_DRIVER_VPD);
+	vpd_data[10] = 0;
+	vpd_data[15] = 0;
+
+	ibmasm_exec_command(sp, command);
+	ibmasm_wait_for_response(command, IBMASM_CMD_TIMEOUT_NORMAL);
+
+	if (command->status != IBMASM_CMD_COMPLETE)
+		result = -ENODEV;
+
+	command_put(command);
+
+	return result;
+}
+
+struct os_state_command {
+	struct dot_command_header	header;
+	unsigned char			command[3];
+	unsigned char			data;
+};
+
+/**
+ * send the 4.3.6 dot command (os state) to the service processor
+ * During driver init this function is called with os state "up".
+ * This causes the service processor to start sending heartbeats the
+ * driver.
+ * During driver exit the function is called with os state "down",
+ * causing the service processor to stop the heartbeats.
+ */
+int ibmasm_send_os_state(struct service_processor *sp, int os_state)
+{
+	struct command *cmd;
+	struct os_state_command *os_state_cmd;
+	int result = 0;
+
+	cmd = ibmasm_new_command(sp, sizeof(struct os_state_command));
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	os_state_cmd = (struct os_state_command *)cmd->buffer;
+	os_state_cmd->header.type		= sp_write;
+	os_state_cmd->header.command_size	= 3;
+	os_state_cmd->header.data_size		= 1;
+	os_state_cmd->header.status		= 0;
+	os_state_cmd->command[0]		= 4;
+	os_state_cmd->command[1]		= 3;
+	os_state_cmd->command[2]		= 6;
+	os_state_cmd->data			= os_state;
+
+	ibmasm_exec_command(sp, cmd);
+	ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL);
+
+	if (cmd->status != IBMASM_CMD_COMPLETE)
+		result = -ENODEV;
+
+	command_put(cmd);
+	return result;
+}
diff --git a/drivers/misc/ibmasm/dot_command.h b/drivers/misc/ibmasm/dot_command.h
new file mode 100644
index 000000000..fc9fc9d4e
--- /dev/null
+++ b/drivers/misc/ibmasm/dot_command.h
@@ -0,0 +1,78 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#ifndef __DOT_COMMAND_H__
+#define __DOT_COMMAND_H__
+
+/*
+ * dot commands are the protocol used to communicate with the service
+ * processor.
+ * They consist of header, a command of variable length and data of
+ * variable length.
+ */
+
+/* dot command types */
+#define sp_write		0
+#define sp_write_next		1
+#define sp_read			2
+#define sp_read_next		3
+#define sp_command_response	4
+#define sp_event		5
+#define sp_heartbeat		6
+
+#pragma pack(1)
+struct dot_command_header {
+	u8	type;
+	u8	command_size;
+	u16	data_size;
+	u8	status;
+	u8	reserved;
+};
+#pragma pack()
+
+static inline size_t get_dot_command_size(void *buffer)
+{
+	struct dot_command_header *cmd = (struct dot_command_header *)buffer;
+	return sizeof(struct dot_command_header) + cmd->command_size + cmd->data_size;
+}
+
+static inline unsigned int get_dot_command_timeout(void *buffer)
+{
+	struct dot_command_header *header = (struct dot_command_header *)buffer;
+	unsigned char *cmd = buffer + sizeof(struct dot_command_header);
+
+	/* dot commands 6.3.1, 7.1 and 8.x need a longer timeout */
+
+	if (header->command_size == 3) {
+		if ((cmd[0] == 6) && (cmd[1] == 3) && (cmd[2] == 1))
+			return IBMASM_CMD_TIMEOUT_EXTRA;
+	} else if (header->command_size == 2) {
+		if ((cmd[0] == 7) && (cmd[1] == 1))
+			return IBMASM_CMD_TIMEOUT_EXTRA;
+		if (cmd[0] == 8)
+			return IBMASM_CMD_TIMEOUT_EXTRA;
+	}
+	return IBMASM_CMD_TIMEOUT_NORMAL;
+}
+
+#endif /* __DOT_COMMAND_H__ */
diff --git a/drivers/misc/ibmasm/event.c b/drivers/misc/ibmasm/event.c
new file mode 100644
index 000000000..7e33025b4
--- /dev/null
+++ b/drivers/misc/ibmasm/event.c
@@ -0,0 +1,177 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+/*
+ * ASM service processor event handling routines.
+ *
+ * Events are signalled to the device drivers through interrupts.
+ * They have the format of dot commands, with the type field set to
+ * sp_event.
+ * The driver does not interpret the events, it simply stores them in a
+ * circular buffer.
+ */
+
+static void wake_up_event_readers(struct service_processor *sp)
+{
+	struct event_reader *reader;
+
+	list_for_each_entry(reader, &sp->event_buffer->readers, node)
+                wake_up_interruptible(&reader->wait);
+}
+
+/**
+ * receive_event
+ * Called by the interrupt handler when a dot command of type sp_event is
+ * received.
+ * Store the event in the circular event buffer, wake up any sleeping
+ * event readers.
+ * There is no reader marker in the buffer, therefore readers are
+ * responsible for keeping up with the writer, or they will lose events.
+ */
+void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size)
+{
+	struct event_buffer *buffer = sp->event_buffer;
+	struct ibmasm_event *event;
+	unsigned long flags;
+
+	data_size = min(data_size, IBMASM_EVENT_MAX_SIZE);
+
+	spin_lock_irqsave(&sp->lock, flags);
+	/* copy the event into the next slot in the circular buffer */
+	event = &buffer->events[buffer->next_index];
+	memcpy_fromio(event->data, data, data_size);
+	event->data_size = data_size;
+	event->serial_number = buffer->next_serial_number;
+
+	/* advance indices in the buffer */
+	buffer->next_index = (buffer->next_index + 1) % IBMASM_NUM_EVENTS;
+	buffer->next_serial_number++;
+	spin_unlock_irqrestore(&sp->lock, flags);
+
+	wake_up_event_readers(sp);
+}
+
+static inline int event_available(struct event_buffer *b, struct event_reader *r)
+{
+	return (r->next_serial_number < b->next_serial_number);
+}
+
+/**
+ * get_next_event
+ * Called by event readers (initiated from user space through the file
+ * system).
+ * Sleeps until a new event is available.
+ */
+int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader)
+{
+	struct event_buffer *buffer = sp->event_buffer;
+	struct ibmasm_event *event;
+	unsigned int index;
+	unsigned long flags;
+
+	reader->cancelled = 0;
+
+	if (wait_event_interruptible(reader->wait,
+			event_available(buffer, reader) || reader->cancelled))
+		return -ERESTARTSYS;
+
+	if (!event_available(buffer, reader))
+		return 0;
+
+	spin_lock_irqsave(&sp->lock, flags);
+
+	index = buffer->next_index;
+	event = &buffer->events[index];
+	while (event->serial_number < reader->next_serial_number) {
+		index = (index + 1) % IBMASM_NUM_EVENTS;
+		event = &buffer->events[index];
+	}
+	memcpy(reader->data, event->data, event->data_size);
+	reader->data_size = event->data_size;
+	reader->next_serial_number = event->serial_number + 1;
+
+	spin_unlock_irqrestore(&sp->lock, flags);
+
+	return event->data_size;
+}
+
+void ibmasm_cancel_next_event(struct event_reader *reader)
+{
+        reader->cancelled = 1;
+        wake_up_interruptible(&reader->wait);
+}
+
+void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader)
+{
+	unsigned long flags;
+
+	reader->next_serial_number = sp->event_buffer->next_serial_number;
+	init_waitqueue_head(&reader->wait);
+	spin_lock_irqsave(&sp->lock, flags);
+	list_add(&reader->node, &sp->event_buffer->readers);
+	spin_unlock_irqrestore(&sp->lock, flags);
+}
+
+void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sp->lock, flags);
+	list_del(&reader->node);
+	spin_unlock_irqrestore(&sp->lock, flags);
+}
+
+int ibmasm_event_buffer_init(struct service_processor *sp)
+{
+	struct event_buffer *buffer;
+	struct ibmasm_event *event;
+	int i;
+
+	buffer = kmalloc(sizeof(struct event_buffer), GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	buffer->next_index = 0;
+	buffer->next_serial_number = 1;
+
+	event = buffer->events;
+	for (i=0; i<IBMASM_NUM_EVENTS; i++, event++)
+		event->serial_number = 0;
+
+	INIT_LIST_HEAD(&buffer->readers);
+
+	sp->event_buffer = buffer;
+
+	return 0;
+}
+
+void ibmasm_event_buffer_exit(struct service_processor *sp)
+{
+	kfree(sp->event_buffer);
+}
diff --git a/drivers/misc/ibmasm/heartbeat.c b/drivers/misc/ibmasm/heartbeat.c
new file mode 100644
index 000000000..90746378f
--- /dev/null
+++ b/drivers/misc/ibmasm/heartbeat.c
@@ -0,0 +1,101 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/notifier.h>
+#include "ibmasm.h"
+#include "dot_command.h"
+#include "lowlevel.h"
+
+static int suspend_heartbeats = 0;
+
+/*
+ * Once the driver indicates to the service processor that it is running
+ * - see send_os_state() - the service processor sends periodic heartbeats
+ * to the driver. The driver must respond to the heartbeats or else the OS
+ * will be rebooted.
+ * In the case of a panic the interrupt handler continues to work and thus
+ * continues to respond to heartbeats, making the service processor believe
+ * the OS is still running and thus preventing a reboot.
+ * To prevent this from happening a callback is added the panic_notifier_list.
+ * Before responding to a heartbeat the driver checks if a panic has happened,
+ * if yes it suspends heartbeat, causing the service processor to reboot as
+ * expected.
+ */
+static int panic_happened(struct notifier_block *n, unsigned long val, void *v)
+{
+	suspend_heartbeats = 1;
+	return 0;
+}
+
+static struct notifier_block panic_notifier = { panic_happened, NULL, 1 };
+
+void ibmasm_register_panic_notifier(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_notifier);
+}
+
+void ibmasm_unregister_panic_notifier(void)
+{
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+			&panic_notifier);
+}
+
+
+int ibmasm_heartbeat_init(struct service_processor *sp)
+{
+	sp->heartbeat = ibmasm_new_command(sp, HEARTBEAT_BUFFER_SIZE);
+	if (sp->heartbeat == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ibmasm_heartbeat_exit(struct service_processor *sp)
+{
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+	ibmasm_wait_for_response(sp->heartbeat, IBMASM_CMD_TIMEOUT_NORMAL);
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+	suspend_heartbeats = 1;
+	command_put(sp->heartbeat);
+}
+
+void ibmasm_receive_heartbeat(struct service_processor *sp,  void *message, size_t size)
+{
+	struct command *cmd = sp->heartbeat;
+	struct dot_command_header *header = (struct dot_command_header *)cmd->buffer;
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+	if (suspend_heartbeats)
+		return;
+
+	/* return the received dot command to sender */
+	cmd->status = IBMASM_CMD_PENDING;
+	size = min(size, cmd->buffer_size);
+	memcpy_fromio(cmd->buffer, message, size);
+	header->type = sp_write;
+	ibmasm_exec_command(sp, cmd);
+}
diff --git a/drivers/misc/ibmasm/i2o.h b/drivers/misc/ibmasm/i2o.h
new file mode 100644
index 000000000..2e9566dab
--- /dev/null
+++ b/drivers/misc/ibmasm/i2o.h
@@ -0,0 +1,77 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#pragma pack(1)
+struct i2o_header {
+	u8	version;
+	u8	message_flags;
+	u16	message_size;
+	u8	target;
+	u8	initiator_and_target;
+	u8	initiator;
+	u8	function;
+	u32	initiator_context;
+};
+#pragma pack()
+
+#define I2O_HEADER_TEMPLATE \
+      { .version              = 0x01, \
+	.message_flags        = 0x00, \
+	.function             = 0xFF, \
+	.initiator            = 0x00, \
+	.initiator_and_target = 0x40, \
+	.target               = 0x00, \
+	.initiator_context    = 0x0 }
+
+#define I2O_MESSAGE_SIZE	0x1000
+#define I2O_COMMAND_SIZE	(I2O_MESSAGE_SIZE - sizeof(struct i2o_header))
+
+#pragma pack(1)
+struct i2o_message {
+	struct i2o_header	header;
+	void			*data;
+};
+#pragma pack()
+
+static inline unsigned short outgoing_message_size(unsigned int data_size)
+{
+	unsigned int size;
+	unsigned short i2o_size;
+
+	if (data_size > I2O_COMMAND_SIZE)
+		data_size = I2O_COMMAND_SIZE;
+
+	size = sizeof(struct i2o_header) + data_size;
+
+	i2o_size = size / sizeof(u32);
+
+	if (size % sizeof(u32))
+	       i2o_size++;
+
+	return i2o_size;
+}
+
+static inline u32 incoming_data_size(struct i2o_message *i2o_message)
+{
+	return (sizeof(u32) * i2o_message->header.message_size);
+}
diff --git a/drivers/misc/ibmasm/ibmasm.h b/drivers/misc/ibmasm/ibmasm.h
new file mode 100644
index 000000000..9fea49d2e
--- /dev/null
+++ b/drivers/misc/ibmasm/ibmasm.h
@@ -0,0 +1,223 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/time64.h>
+
+/* Driver identification */
+#define DRIVER_NAME	"ibmasm"
+#define DRIVER_VERSION  "1.0"
+#define DRIVER_AUTHOR   "Max Asbock <masbock@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>"
+#define DRIVER_DESC     "IBM ASM Service Processor Driver"
+
+#define err(msg) printk(KERN_ERR "%s: " msg "\n", DRIVER_NAME)
+#define info(msg) printk(KERN_INFO "%s: " msg "\n", DRIVER_NAME)
+
+extern int ibmasm_debug;
+#define dbg(STR, ARGS...)					\
+	do {							\
+		if (ibmasm_debug)				\
+			printk(KERN_DEBUG STR , ##ARGS);	\
+	} while (0)
+
+static inline char *get_timestamp(char *buf)
+{
+	struct timespec64 now;
+
+	ktime_get_real_ts64(&now);
+	sprintf(buf, "%llu.%.08lu", (long long)now.tv_sec,
+				now.tv_nsec / NSEC_PER_USEC);
+	return buf;
+}
+
+#define IBMASM_CMD_PENDING	0
+#define IBMASM_CMD_COMPLETE	1
+#define IBMASM_CMD_FAILED	2
+
+#define IBMASM_CMD_TIMEOUT_NORMAL	45
+#define IBMASM_CMD_TIMEOUT_EXTRA	240
+
+#define IBMASM_CMD_MAX_BUFFER_SIZE	0x8000
+
+#define REVERSE_HEARTBEAT_TIMEOUT	120
+
+#define HEARTBEAT_BUFFER_SIZE		0x400
+
+#ifdef IA64
+#define IBMASM_DRIVER_VPD "Lin64 6.08      "
+#else
+#define IBMASM_DRIVER_VPD "Lin32 6.08      "
+#endif
+
+#define SYSTEM_STATE_OS_UP      5
+#define SYSTEM_STATE_OS_DOWN    4
+
+#define IBMASM_NAME_SIZE	16
+
+#define IBMASM_NUM_EVENTS	10
+#define IBMASM_EVENT_MAX_SIZE	2048u
+
+
+struct command {
+	struct list_head	queue_node;
+	wait_queue_head_t	wait;
+	unsigned char		*buffer;
+	size_t			buffer_size;
+	int			status;
+	struct kref		kref;
+	spinlock_t		*lock;
+};
+#define to_command(c) container_of(c, struct command, kref)
+
+void ibmasm_free_command(struct kref *kref);
+static inline void command_put(struct command *cmd)
+{
+	unsigned long flags;
+	spinlock_t *lock = cmd->lock;
+
+	spin_lock_irqsave(lock, flags);
+	kref_put(&cmd->kref, ibmasm_free_command);
+	spin_unlock_irqrestore(lock, flags);
+}
+
+static inline void command_get(struct command *cmd)
+{
+	kref_get(&cmd->kref);
+}
+
+
+struct ibmasm_event {
+	unsigned int	serial_number;
+	unsigned int	data_size;
+	unsigned char	data[IBMASM_EVENT_MAX_SIZE];
+};
+
+struct event_buffer {
+	struct ibmasm_event	events[IBMASM_NUM_EVENTS];
+	unsigned int		next_serial_number;
+	unsigned int		next_index;
+	struct list_head	readers;
+};
+
+struct event_reader {
+	int			cancelled;
+	unsigned int		next_serial_number;
+	wait_queue_head_t	wait;
+	struct list_head	node;
+	unsigned int		data_size;
+	unsigned char		data[IBMASM_EVENT_MAX_SIZE];
+};
+
+struct reverse_heartbeat {
+	wait_queue_head_t	wait;
+	unsigned int		stopped;
+};
+
+struct ibmasm_remote {
+	struct input_dev *keybd_dev;
+	struct input_dev *mouse_dev;
+};
+
+struct service_processor {
+	struct list_head	node;
+	spinlock_t		lock;
+	void __iomem		*base_address;
+	unsigned int		irq;
+	struct command		*current_command;
+	struct command		*heartbeat;
+	struct list_head	command_queue;
+	struct event_buffer	*event_buffer;
+	char			dirname[IBMASM_NAME_SIZE];
+	char			devname[IBMASM_NAME_SIZE];
+	unsigned int		number;
+	struct ibmasm_remote	remote;
+	int			serial_line;
+	struct device		*dev;
+};
+
+/* command processing */
+struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size);
+void ibmasm_exec_command(struct service_processor *sp, struct command *cmd);
+void ibmasm_wait_for_response(struct command *cmd, int timeout);
+void ibmasm_receive_command_response(struct service_processor *sp, void *response,  size_t size);
+
+/* event processing */
+int ibmasm_event_buffer_init(struct service_processor *sp);
+void ibmasm_event_buffer_exit(struct service_processor *sp);
+void ibmasm_receive_event(struct service_processor *sp, void *data,  unsigned int data_size);
+void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader);
+void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader);
+int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader);
+void ibmasm_cancel_next_event(struct event_reader *reader);
+
+/* heartbeat - from SP to OS */
+void ibmasm_register_panic_notifier(void);
+void ibmasm_unregister_panic_notifier(void);
+int ibmasm_heartbeat_init(struct service_processor *sp);
+void ibmasm_heartbeat_exit(struct service_processor *sp);
+void ibmasm_receive_heartbeat(struct service_processor *sp,  void *message, size_t size);
+
+/* reverse heartbeat - from OS to SP */
+void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb);
+int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb);
+void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb);
+
+/* dot commands */
+void ibmasm_receive_message(struct service_processor *sp, void *data, int data_size);
+int ibmasm_send_driver_vpd(struct service_processor *sp);
+int ibmasm_send_os_state(struct service_processor *sp, int os_state);
+
+/* low level message processing */
+int ibmasm_send_i2o_message(struct service_processor *sp);
+irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id);
+
+/* remote console */
+void ibmasm_handle_mouse_interrupt(struct service_processor *sp);
+int ibmasm_init_remote_input_dev(struct service_processor *sp);
+void ibmasm_free_remote_input_dev(struct service_processor *sp);
+
+/* file system */
+int ibmasmfs_register(void);
+void ibmasmfs_unregister(void);
+void ibmasmfs_add_sp(struct service_processor *sp);
+
+/* uart */
+#if IS_ENABLED(CONFIG_SERIAL_8250)
+void ibmasm_register_uart(struct service_processor *sp);
+void ibmasm_unregister_uart(struct service_processor *sp);
+#else
+#define ibmasm_register_uart(sp)	do { } while(0)
+#define ibmasm_unregister_uart(sp)	do { } while(0)
+#endif
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
new file mode 100644
index 000000000..fa840666b
--- /dev/null
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -0,0 +1,608 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+/*
+ * Parts of this code are based on an article by Jonathan Corbet
+ * that appeared in Linux Weekly News.
+ */
+
+
+/*
+ * The IBMASM file virtual filesystem. It creates the following hierarchy
+ * dynamically when mounted from user space:
+ *
+ *    /ibmasm
+ *    |-- 0
+ *    |   |-- command
+ *    |   |-- event
+ *    |   |-- reverse_heartbeat
+ *    |   `-- remote_video
+ *    |       |-- depth
+ *    |       |-- height
+ *    |       `-- width
+ *    .
+ *    .
+ *    .
+ *    `-- n
+ *        |-- command
+ *        |-- event
+ *        |-- reverse_heartbeat
+ *        `-- remote_video
+ *            |-- depth
+ *            |-- height
+ *            `-- width
+ *
+ * For each service processor the following files are created:
+ *
+ * command: execute dot commands
+ *	write: execute a dot command on the service processor
+ *	read: return the result of a previously executed dot command
+ *
+ * events: listen for service processor events
+ *	read: sleep (interruptible) until an event occurs
+ *      write: wakeup sleeping event listener
+ *
+ * reverse_heartbeat: send a heartbeat to the service processor
+ *	read: sleep (interruptible) until the reverse heartbeat fails
+ *      write: wakeup sleeping heartbeat listener
+ *
+ * remote_video/width
+ * remote_video/height
+ * remote_video/width: control remote display settings
+ *	write: set value
+ *	read: read value
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <asm/io.h>
+#include "ibmasm.h"
+#include "remote.h"
+#include "dot_command.h"
+
+#define IBMASMFS_MAGIC 0x66726f67
+
+static LIST_HEAD(service_processors);
+
+static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode);
+static void ibmasmfs_create_files (struct super_block *sb);
+static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent);
+
+
+static struct dentry *ibmasmfs_mount(struct file_system_type *fst,
+			int flags, const char *name, void *data)
+{
+	return mount_single(fst, flags, data, ibmasmfs_fill_super);
+}
+
+static const struct super_operations ibmasmfs_s_ops = {
+	.statfs		= simple_statfs,
+	.drop_inode	= generic_delete_inode,
+};
+
+static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations;
+
+static struct file_system_type ibmasmfs_type = {
+	.owner          = THIS_MODULE,
+	.name           = "ibmasmfs",
+	.mount          = ibmasmfs_mount,
+	.kill_sb        = kill_litter_super,
+};
+MODULE_ALIAS_FS("ibmasmfs");
+
+static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent)
+{
+	struct inode *root;
+
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
+	sb->s_magic = IBMASMFS_MAGIC;
+	sb->s_op = &ibmasmfs_s_ops;
+	sb->s_time_gran = 1;
+
+	root = ibmasmfs_make_inode (sb, S_IFDIR | 0500);
+	if (!root)
+		return -ENOMEM;
+
+	root->i_op = &simple_dir_inode_operations;
+	root->i_fop = ibmasmfs_dir_ops;
+
+	sb->s_root = d_make_root(root);
+	if (!sb->s_root)
+		return -ENOMEM;
+
+	ibmasmfs_create_files(sb);
+	return 0;
+}
+
+static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
+{
+	struct inode *ret = new_inode(sb);
+
+	if (ret) {
+		ret->i_ino = get_next_ino();
+		ret->i_mode = mode;
+		ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+	}
+	return ret;
+}
+
+static struct dentry *ibmasmfs_create_file(struct dentry *parent,
+			const char *name,
+			const struct file_operations *fops,
+			void *data,
+			int mode)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	dentry = d_alloc_name(parent, name);
+	if (!dentry)
+		return NULL;
+
+	inode = ibmasmfs_make_inode(parent->d_sb, S_IFREG | mode);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+
+	inode->i_fop = fops;
+	inode->i_private = data;
+
+	d_add(dentry, inode);
+	return dentry;
+}
+
+static struct dentry *ibmasmfs_create_dir(struct dentry *parent,
+				const char *name)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	dentry = d_alloc_name(parent, name);
+	if (!dentry)
+		return NULL;
+
+	inode = ibmasmfs_make_inode(parent->d_sb, S_IFDIR | 0500);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = ibmasmfs_dir_ops;
+
+	d_add(dentry, inode);
+	return dentry;
+}
+
+int ibmasmfs_register(void)
+{
+	return register_filesystem(&ibmasmfs_type);
+}
+
+void ibmasmfs_unregister(void)
+{
+	unregister_filesystem(&ibmasmfs_type);
+}
+
+void ibmasmfs_add_sp(struct service_processor *sp)
+{
+	list_add(&sp->node, &service_processors);
+}
+
+/* struct to save state between command file operations */
+struct ibmasmfs_command_data {
+	struct service_processor	*sp;
+	struct command			*command;
+};
+
+/* struct to save state between event file operations */
+struct ibmasmfs_event_data {
+	struct service_processor	*sp;
+	struct event_reader		reader;
+	int				active;
+};
+
+/* struct to save state between reverse heartbeat file operations */
+struct ibmasmfs_heartbeat_data {
+	struct service_processor	*sp;
+	struct reverse_heartbeat	heartbeat;
+	int				active;
+};
+
+static int command_file_open(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_command_data *command_data;
+
+	if (!inode->i_private)
+		return -ENODEV;
+
+	command_data = kmalloc(sizeof(struct ibmasmfs_command_data), GFP_KERNEL);
+	if (!command_data)
+		return -ENOMEM;
+
+	command_data->command = NULL;
+	command_data->sp = inode->i_private;
+	file->private_data = command_data;
+	return 0;
+}
+
+static int command_file_close(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_command_data *command_data = file->private_data;
+
+	if (command_data->command)
+		command_put(command_data->command);
+
+	kfree(command_data);
+	return 0;
+}
+
+static ssize_t command_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_command_data *command_data = file->private_data;
+	struct command *cmd;
+	int len;
+	unsigned long flags;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	spin_lock_irqsave(&command_data->sp->lock, flags);
+	cmd = command_data->command;
+	if (cmd == NULL) {
+		spin_unlock_irqrestore(&command_data->sp->lock, flags);
+		return 0;
+	}
+	command_data->command = NULL;
+	spin_unlock_irqrestore(&command_data->sp->lock, flags);
+
+	if (cmd->status != IBMASM_CMD_COMPLETE) {
+		command_put(cmd);
+		return -EIO;
+	}
+	len = min(count, cmd->buffer_size);
+	if (copy_to_user(buf, cmd->buffer, len)) {
+		command_put(cmd);
+		return -EFAULT;
+	}
+	command_put(cmd);
+
+	return len;
+}
+
+static ssize_t command_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_command_data *command_data = file->private_data;
+	struct command *cmd;
+	unsigned long flags;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	/* commands are executed sequentially, only one command at a time */
+	if (command_data->command)
+		return -EAGAIN;
+
+	cmd = ibmasm_new_command(command_data->sp, count);
+	if (!cmd)
+		return -ENOMEM;
+
+	if (copy_from_user(cmd->buffer, ubuff, count)) {
+		command_put(cmd);
+		return -EFAULT;
+	}
+
+	spin_lock_irqsave(&command_data->sp->lock, flags);
+	if (command_data->command) {
+		spin_unlock_irqrestore(&command_data->sp->lock, flags);
+		command_put(cmd);
+		return -EAGAIN;
+	}
+	command_data->command = cmd;
+	spin_unlock_irqrestore(&command_data->sp->lock, flags);
+
+	ibmasm_exec_command(command_data->sp, cmd);
+	ibmasm_wait_for_response(cmd, get_dot_command_timeout(cmd->buffer));
+
+	return count;
+}
+
+static int event_file_open(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_event_data *event_data;
+	struct service_processor *sp;
+
+	if (!inode->i_private)
+		return -ENODEV;
+
+	sp = inode->i_private;
+
+	event_data = kmalloc(sizeof(struct ibmasmfs_event_data), GFP_KERNEL);
+	if (!event_data)
+		return -ENOMEM;
+
+	ibmasm_event_reader_register(sp, &event_data->reader);
+
+	event_data->sp = sp;
+	event_data->active = 0;
+	file->private_data = event_data;
+	return 0;
+}
+
+static int event_file_close(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_event_data *event_data = file->private_data;
+
+	ibmasm_event_reader_unregister(event_data->sp, &event_data->reader);
+	kfree(event_data);
+	return 0;
+}
+
+static ssize_t event_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_event_data *event_data = file->private_data;
+	struct event_reader *reader = &event_data->reader;
+	struct service_processor *sp = event_data->sp;
+	int ret;
+	unsigned long flags;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > IBMASM_EVENT_MAX_SIZE)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	spin_lock_irqsave(&sp->lock, flags);
+	if (event_data->active) {
+		spin_unlock_irqrestore(&sp->lock, flags);
+		return -EBUSY;
+	}
+	event_data->active = 1;
+	spin_unlock_irqrestore(&sp->lock, flags);
+
+	ret = ibmasm_get_next_event(sp, reader);
+	if (ret <= 0)
+		goto out;
+
+	if (count < reader->data_size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+        if (copy_to_user(buf, reader->data, reader->data_size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+	ret = reader->data_size;
+
+out:
+	event_data->active = 0;
+	return ret;
+}
+
+static ssize_t event_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_event_data *event_data = file->private_data;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count != 1)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	ibmasm_cancel_next_event(&event_data->reader);
+	return 0;
+}
+
+static int r_heartbeat_file_open(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat;
+
+	if (!inode->i_private)
+		return -ENODEV;
+
+	rhbeat = kmalloc(sizeof(struct ibmasmfs_heartbeat_data), GFP_KERNEL);
+	if (!rhbeat)
+		return -ENOMEM;
+
+	rhbeat->sp = inode->i_private;
+	rhbeat->active = 0;
+	ibmasm_init_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat);
+	file->private_data = rhbeat;
+	return 0;
+}
+
+static int r_heartbeat_file_close(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+
+	kfree(rhbeat);
+	return 0;
+}
+
+static ssize_t r_heartbeat_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+	unsigned long flags;
+	int result;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > 1024)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	/* allow only one reverse heartbeat per process */
+	spin_lock_irqsave(&rhbeat->sp->lock, flags);
+	if (rhbeat->active) {
+		spin_unlock_irqrestore(&rhbeat->sp->lock, flags);
+		return -EBUSY;
+	}
+	rhbeat->active = 1;
+	spin_unlock_irqrestore(&rhbeat->sp->lock, flags);
+
+	result = ibmasm_start_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat);
+	rhbeat->active = 0;
+
+	return result;
+}
+
+static ssize_t r_heartbeat_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count != 1)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	if (rhbeat->active)
+		ibmasm_stop_reverse_heartbeat(&rhbeat->heartbeat);
+
+	return 1;
+}
+
+static int remote_settings_file_close(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	void __iomem *address = (void __iomem *)file->private_data;
+	int len = 0;
+	unsigned int value;
+	char lbuf[20];
+
+	value = readl(address);
+	len = snprintf(lbuf, sizeof(lbuf), "%d\n", value);
+
+	return simple_read_from_buffer(buf, count, offset, lbuf, len);
+}
+
+static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset)
+{
+	void __iomem *address = (void __iomem *)file->private_data;
+	char *buff;
+	unsigned int value;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > 1024)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	buff = kzalloc (count + 1, GFP_KERNEL);
+	if (!buff)
+		return -ENOMEM;
+
+
+	if (copy_from_user(buff, ubuff, count)) {
+		kfree(buff);
+		return -EFAULT;
+	}
+
+	value = simple_strtoul(buff, NULL, 10);
+	writel(value, address);
+	kfree(buff);
+
+	return count;
+}
+
+static const struct file_operations command_fops = {
+	.open =		command_file_open,
+	.release =	command_file_close,
+	.read =		command_file_read,
+	.write =	command_file_write,
+	.llseek =	generic_file_llseek,
+};
+
+static const struct file_operations event_fops = {
+	.open =		event_file_open,
+	.release =	event_file_close,
+	.read =		event_file_read,
+	.write =	event_file_write,
+	.llseek =	generic_file_llseek,
+};
+
+static const struct file_operations r_heartbeat_fops = {
+	.open =		r_heartbeat_file_open,
+	.release =	r_heartbeat_file_close,
+	.read =		r_heartbeat_file_read,
+	.write =	r_heartbeat_file_write,
+	.llseek =	generic_file_llseek,
+};
+
+static const struct file_operations remote_settings_fops = {
+	.open =		simple_open,
+	.release =	remote_settings_file_close,
+	.read =		remote_settings_file_read,
+	.write =	remote_settings_file_write,
+	.llseek =	generic_file_llseek,
+};
+
+
+static void ibmasmfs_create_files (struct super_block *sb)
+{
+	struct list_head *entry;
+	struct service_processor *sp;
+
+	list_for_each(entry, &service_processors) {
+		struct dentry *dir;
+		struct dentry *remote_dir;
+		sp = list_entry(entry, struct service_processor, node);
+		dir = ibmasmfs_create_dir(sb->s_root, sp->dirname);
+		if (!dir)
+			continue;
+
+		ibmasmfs_create_file(dir, "command", &command_fops, sp, S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(dir, "event", &event_fops, sp, S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(dir, "reverse_heartbeat", &r_heartbeat_fops, sp, S_IRUSR|S_IWUSR);
+
+		remote_dir = ibmasmfs_create_dir(dir, "remote_video");
+		if (!remote_dir)
+			continue;
+
+		ibmasmfs_create_file(remote_dir, "width", &remote_settings_fops, (void *)display_width(sp), S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(remote_dir, "height", &remote_settings_fops, (void *)display_height(sp), S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(remote_dir, "depth", &remote_settings_fops, (void *)display_depth(sp), S_IRUSR|S_IWUSR);
+	}
+}
diff --git a/drivers/misc/ibmasm/lowlevel.c b/drivers/misc/ibmasm/lowlevel.c
new file mode 100644
index 000000000..5319ea261
--- /dev/null
+++ b/drivers/misc/ibmasm/lowlevel.c
@@ -0,0 +1,85 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include "ibmasm.h"
+#include "lowlevel.h"
+#include "i2o.h"
+#include "dot_command.h"
+#include "remote.h"
+
+static struct i2o_header header = I2O_HEADER_TEMPLATE;
+
+
+int ibmasm_send_i2o_message(struct service_processor *sp)
+{
+	u32 mfa;
+	unsigned int command_size;
+	struct i2o_message *message;
+	struct command *command = sp->current_command;
+
+	mfa = get_mfa_inbound(sp->base_address);
+	if (!mfa)
+		return 1;
+
+	command_size = get_dot_command_size(command->buffer);
+	header.message_size = outgoing_message_size(command_size);
+
+	message = get_i2o_message(sp->base_address, mfa);
+
+	memcpy_toio(&message->header, &header, sizeof(struct i2o_header));
+	memcpy_toio(&message->data, command->buffer, command_size);
+
+	set_mfa_inbound(sp->base_address, mfa);
+
+	return 0;
+}
+
+irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id)
+{
+	u32	mfa;
+	struct service_processor *sp = (struct service_processor *)dev_id;
+	void __iomem *base_address = sp->base_address;
+	char tsbuf[32];
+
+	if (!sp_interrupt_pending(base_address))
+		return IRQ_NONE;
+
+	dbg("respond to interrupt at %s\n", get_timestamp(tsbuf));
+
+	if (mouse_interrupt_pending(sp)) {
+		ibmasm_handle_mouse_interrupt(sp);
+		clear_mouse_interrupt(sp);
+	}
+
+	mfa = get_mfa_outbound(base_address);
+	if (valid_mfa(mfa)) {
+		struct i2o_message *msg = get_i2o_message(base_address, mfa);
+		ibmasm_receive_message(sp, &msg->data, incoming_data_size(msg));
+	} else
+		dbg("didn't get a valid MFA\n");
+
+	set_mfa_outbound(base_address, mfa);
+	dbg("finished interrupt at   %s\n", get_timestamp(tsbuf));
+
+	return IRQ_HANDLED;
+}
diff --git a/drivers/misc/ibmasm/lowlevel.h b/drivers/misc/ibmasm/lowlevel.h
new file mode 100644
index 000000000..e97848f51
--- /dev/null
+++ b/drivers/misc/ibmasm/lowlevel.h
@@ -0,0 +1,137 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+/* Condor service processor specific hardware definitions */
+
+#ifndef __IBMASM_CONDOR_H__
+#define __IBMASM_CONDOR_H__
+
+#include <asm/io.h>
+
+#define VENDORID_IBM	0x1014
+#define DEVICEID_RSA	0x010F
+
+#define GET_MFA_ADDR(x)  (x & 0xFFFFFF00)
+
+#define MAILBOX_FULL(x)  (x & 0x00000001)
+
+#define NO_MFAS_AVAILABLE     0xFFFFFFFF
+
+
+#define INBOUND_QUEUE_PORT   0x40  /* contains address of next free MFA */
+#define OUTBOUND_QUEUE_PORT  0x44  /* contains address of posted MFA    */
+
+#define SP_INTR_MASK	0x00000008
+#define UART_INTR_MASK	0x00000010
+
+#define INTR_STATUS_REGISTER   0x13A0
+#define INTR_CONTROL_REGISTER  0x13A4
+
+#define SCOUT_COM_A_BASE         0x0000
+#define SCOUT_COM_B_BASE         0x0100
+#define SCOUT_COM_C_BASE         0x0200
+#define SCOUT_COM_D_BASE         0x0300
+
+static inline int sp_interrupt_pending(void __iomem *base_address)
+{
+	return SP_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER);
+}
+
+static inline int uart_interrupt_pending(void __iomem *base_address)
+{
+	return UART_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER);
+}
+
+static inline void ibmasm_enable_interrupts(void __iomem *base_address, int mask)
+{
+	void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER;
+	writel( readl(ctrl_reg) & ~mask, ctrl_reg);
+}
+
+static inline void ibmasm_disable_interrupts(void __iomem *base_address, int mask)
+{
+	void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER;
+	writel( readl(ctrl_reg) | mask, ctrl_reg);
+}
+
+static inline void enable_sp_interrupts(void __iomem *base_address)
+{
+	ibmasm_enable_interrupts(base_address, SP_INTR_MASK);
+}
+
+static inline void disable_sp_interrupts(void __iomem *base_address)
+{
+	ibmasm_disable_interrupts(base_address, SP_INTR_MASK);
+}
+
+static inline void enable_uart_interrupts(void __iomem *base_address)
+{
+	ibmasm_enable_interrupts(base_address, UART_INTR_MASK);
+}
+
+static inline void disable_uart_interrupts(void __iomem *base_address)
+{
+	ibmasm_disable_interrupts(base_address, UART_INTR_MASK);
+}
+
+#define valid_mfa(mfa)	( (mfa) != NO_MFAS_AVAILABLE )
+
+static inline u32 get_mfa_outbound(void __iomem *base_address)
+{
+	int retry;
+	u32 mfa;
+
+	for (retry=0; retry<=10; retry++) {
+		mfa = readl(base_address + OUTBOUND_QUEUE_PORT);
+		if (valid_mfa(mfa))
+			break;
+	}
+	return mfa;
+}
+
+static inline void set_mfa_outbound(void __iomem *base_address, u32 mfa)
+{
+	writel(mfa, base_address + OUTBOUND_QUEUE_PORT);
+}
+
+static inline u32 get_mfa_inbound(void __iomem *base_address)
+{
+	u32 mfa = readl(base_address + INBOUND_QUEUE_PORT);
+
+	if (MAILBOX_FULL(mfa))
+		return 0;
+
+	return mfa;
+}
+
+static inline void set_mfa_inbound(void __iomem *base_address, u32 mfa)
+{
+	writel(mfa, base_address + INBOUND_QUEUE_PORT);
+}
+
+static inline struct i2o_message *get_i2o_message(void __iomem *base_address, u32 mfa)
+{
+	return (struct i2o_message *)(GET_MFA_ADDR(mfa) + base_address);
+}
+
+#endif /* __IBMASM_CONDOR_H__ */
diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c
new file mode 100644
index 000000000..9f8344169
--- /dev/null
+++ b/drivers/misc/ibmasm/module.c
@@ -0,0 +1,238 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ * This driver is based on code originally written by Pete Reynolds
+ * and others.
+ *
+ */
+
+/*
+ * The ASM device driver does the following things:
+ *
+ * 1) When loaded it sends a message to the service processor,
+ * indicating that an OS is * running. This causes the service processor
+ * to send periodic heartbeats to the OS.
+ *
+ * 2) Answers the periodic heartbeats sent by the service processor.
+ * Failure to do so would result in system reboot.
+ *
+ * 3) Acts as a pass through for dot commands sent from user applications.
+ * The interface for this is the ibmasmfs file system.
+ *
+ * 4) Allows user applications to register for event notification. Events
+ * are sent to the driver through interrupts. They can be read from user
+ * space through the ibmasmfs file system.
+ *
+ * 5) Allows user space applications to send heartbeats to the service
+ * processor (aka reverse heartbeats). Again this happens through ibmasmfs.
+ *
+ * 6) Handles remote mouse and keyboard event interrupts and makes them
+ * available to user applications through ibmasmfs.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+#include "remote.h"
+
+int ibmasm_debug = 0;
+module_param(ibmasm_debug, int , S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(ibmasm_debug, " Set debug mode on or off");
+
+
+static int ibmasm_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int result;
+	struct service_processor *sp;
+
+	if ((result = pci_enable_device(pdev))) {
+		dev_err(&pdev->dev, "Failed to enable PCI device\n");
+		return result;
+	}
+	if ((result = pci_request_regions(pdev, DRIVER_NAME))) {
+		dev_err(&pdev->dev, "Failed to allocate PCI resources\n");
+		goto error_resources;
+	}
+	/* vnc client won't work without bus-mastering */
+	pci_set_master(pdev);
+
+	sp = kzalloc(sizeof(struct service_processor), GFP_KERNEL);
+	if (sp == NULL) {
+		dev_err(&pdev->dev, "Failed to allocate memory\n");
+		result = -ENOMEM;
+		goto error_kmalloc;
+	}
+
+	spin_lock_init(&sp->lock);
+	INIT_LIST_HEAD(&sp->command_queue);
+
+	pci_set_drvdata(pdev, (void *)sp);
+	sp->dev = &pdev->dev;
+	sp->number = pdev->bus->number;
+	snprintf(sp->dirname, IBMASM_NAME_SIZE, "%d", sp->number);
+	snprintf(sp->devname, IBMASM_NAME_SIZE, "%s%d", DRIVER_NAME, sp->number);
+
+	result = ibmasm_event_buffer_init(sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to allocate event buffer\n");
+		goto error_eventbuffer;
+	}
+
+	result = ibmasm_heartbeat_init(sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to allocate heartbeat command\n");
+		goto error_heartbeat;
+	}
+
+	sp->irq = pdev->irq;
+	sp->base_address = pci_ioremap_bar(pdev, 0);
+	if (!sp->base_address) {
+		dev_err(sp->dev, "Failed to ioremap pci memory\n");
+		result =  -ENODEV;
+		goto error_ioremap;
+	}
+
+	result = request_irq(sp->irq, ibmasm_interrupt_handler, IRQF_SHARED, sp->devname, (void*)sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to register interrupt handler\n");
+		goto error_request_irq;
+	}
+
+	enable_sp_interrupts(sp->base_address);
+
+	result = ibmasm_init_remote_input_dev(sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to initialize remote queue\n");
+		goto error_init_remote;
+	}
+
+	result = ibmasm_send_driver_vpd(sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to send driver VPD to service processor\n");
+		goto error_send_message;
+	}
+	result = ibmasm_send_os_state(sp, SYSTEM_STATE_OS_UP);
+	if (result) {
+		dev_err(sp->dev, "Failed to send OS state to service processor\n");
+		goto error_send_message;
+	}
+	ibmasmfs_add_sp(sp);
+
+	ibmasm_register_uart(sp);
+
+	return 0;
+
+error_send_message:
+	ibmasm_free_remote_input_dev(sp);
+error_init_remote:
+	disable_sp_interrupts(sp->base_address);
+	free_irq(sp->irq, (void *)sp);
+error_request_irq:
+	iounmap(sp->base_address);
+error_ioremap:
+	ibmasm_heartbeat_exit(sp);
+error_heartbeat:
+	ibmasm_event_buffer_exit(sp);
+error_eventbuffer:
+	kfree(sp);
+error_kmalloc:
+        pci_release_regions(pdev);
+error_resources:
+        pci_disable_device(pdev);
+
+	return result;
+}
+
+static void ibmasm_remove_one(struct pci_dev *pdev)
+{
+	struct service_processor *sp = pci_get_drvdata(pdev);
+
+	dbg("Unregistering UART\n");
+	ibmasm_unregister_uart(sp);
+	dbg("Sending OS down message\n");
+	if (ibmasm_send_os_state(sp, SYSTEM_STATE_OS_DOWN))
+		err("failed to get response to 'Send OS State' command\n");
+	dbg("Disabling heartbeats\n");
+	ibmasm_heartbeat_exit(sp);
+	dbg("Disabling interrupts\n");
+	disable_sp_interrupts(sp->base_address);
+	dbg("Freeing SP irq\n");
+	free_irq(sp->irq, (void *)sp);
+	dbg("Cleaning up\n");
+	ibmasm_free_remote_input_dev(sp);
+	iounmap(sp->base_address);
+	ibmasm_event_buffer_exit(sp);
+	kfree(sp);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static struct pci_device_id ibmasm_pci_table[] =
+{
+	{ PCI_DEVICE(VENDORID_IBM, DEVICEID_RSA) },
+	{},
+};
+
+static struct pci_driver ibmasm_driver = {
+	.name		= DRIVER_NAME,
+	.id_table	= ibmasm_pci_table,
+	.probe		= ibmasm_init_one,
+	.remove		= ibmasm_remove_one,
+};
+
+static void __exit ibmasm_exit (void)
+{
+	ibmasm_unregister_panic_notifier();
+	ibmasmfs_unregister();
+	pci_unregister_driver(&ibmasm_driver);
+	info(DRIVER_DESC " version " DRIVER_VERSION " unloaded");
+}
+
+static int __init ibmasm_init(void)
+{
+	int result = pci_register_driver(&ibmasm_driver);
+	if (result)
+		return result;
+
+	result = ibmasmfs_register();
+	if (result) {
+		pci_unregister_driver(&ibmasm_driver);
+		err("Failed to register ibmasmfs file system");
+		return result;
+	}
+
+	ibmasm_register_panic_notifier();
+	info(DRIVER_DESC " version " DRIVER_VERSION " loaded");
+	return 0;
+}
+
+module_init(ibmasm_init);
+module_exit(ibmasm_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, ibmasm_pci_table);
+
diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c
new file mode 100644
index 000000000..5c7dd26db
--- /dev/null
+++ b/drivers/misc/ibmasm/r_heartbeat.c
@@ -0,0 +1,99 @@
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/sched/signal.h>
+#include "ibmasm.h"
+#include "dot_command.h"
+
+/*
+ * Reverse Heartbeat, i.e. heartbeats sent from the driver to the
+ * service processor.
+ * These heartbeats are initiated by user level programs.
+ */
+
+/* the reverse heartbeat dot command */
+#pragma pack(1)
+static struct {
+	struct dot_command_header	header;
+	unsigned char			command[3];
+} rhb_dot_cmd = {
+	.header = {
+		.type =		sp_read,
+		.command_size = 3,
+		.data_size =	0,
+		.status =	0
+	},
+	.command = { 4, 3, 6 }
+};
+#pragma pack()
+
+void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb)
+{
+	init_waitqueue_head(&rhb->wait);
+	rhb->stopped = 0;
+}
+
+/**
+ * start_reverse_heartbeat
+ * Loop forever, sending a reverse heartbeat dot command to the service
+ * processor, then sleeping. The loop comes to an end if the service
+ * processor fails to respond 3 times or we were interrupted.
+ */
+int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb)
+{
+	struct command *cmd;
+	int times_failed = 0;
+	int result = 1;
+
+	cmd = ibmasm_new_command(sp, sizeof rhb_dot_cmd);
+	if (!cmd)
+		return -ENOMEM;
+
+	while (times_failed < 3) {
+		memcpy(cmd->buffer, (void *)&rhb_dot_cmd, sizeof rhb_dot_cmd);
+		cmd->status = IBMASM_CMD_PENDING;
+		ibmasm_exec_command(sp, cmd);
+		ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL);
+
+		if (cmd->status != IBMASM_CMD_COMPLETE)
+			times_failed++;
+
+		wait_event_interruptible_timeout(rhb->wait,
+			rhb->stopped,
+			REVERSE_HEARTBEAT_TIMEOUT * HZ);
+
+		if (signal_pending(current) || rhb->stopped) {
+			result = -EINTR;
+			break;
+		}
+	}
+	command_put(cmd);
+	rhb->stopped = 0;
+
+	return result;
+}
+
+void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb)
+{
+	rhb->stopped = 1;
+	wake_up_interruptible(&rhb->wait);
+}
diff --git a/drivers/misc/ibmasm/remote.c b/drivers/misc/ibmasm/remote.c
new file mode 100644
index 000000000..477bb43c8
--- /dev/null
+++ b/drivers/misc/ibmasm/remote.c
@@ -0,0 +1,282 @@
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Authors: Max Asböck <amax@us.ibm.com>
+ *          Vernon Mauery <vernux@us.ibm.com>
+ *
+ */
+
+/* Remote mouse and keyboard event handling functions */
+
+#include <linux/pci.h>
+#include "ibmasm.h"
+#include "remote.h"
+
+#define MOUSE_X_MAX	1600
+#define MOUSE_Y_MAX	1200
+
+static const unsigned short xlate_high[XLATE_SIZE] = {
+	[KEY_SYM_ENTER & 0xff] = KEY_ENTER,
+	[KEY_SYM_KPSLASH & 0xff] = KEY_KPSLASH,
+	[KEY_SYM_KPSTAR & 0xff] = KEY_KPASTERISK,
+	[KEY_SYM_KPMINUS & 0xff] = KEY_KPMINUS,
+	[KEY_SYM_KPDOT & 0xff] = KEY_KPDOT,
+	[KEY_SYM_KPPLUS & 0xff] = KEY_KPPLUS,
+	[KEY_SYM_KP0 & 0xff] = KEY_KP0,
+	[KEY_SYM_KP1 & 0xff] = KEY_KP1,
+	[KEY_SYM_KP2 & 0xff] = KEY_KP2, [KEY_SYM_KPDOWN & 0xff] = KEY_KP2,
+	[KEY_SYM_KP3 & 0xff] = KEY_KP3,
+	[KEY_SYM_KP4 & 0xff] = KEY_KP4, [KEY_SYM_KPLEFT & 0xff] = KEY_KP4,
+	[KEY_SYM_KP5 & 0xff] = KEY_KP5,
+	[KEY_SYM_KP6 & 0xff] = KEY_KP6, [KEY_SYM_KPRIGHT & 0xff] = KEY_KP6,
+	[KEY_SYM_KP7 & 0xff] = KEY_KP7,
+	[KEY_SYM_KP8 & 0xff] = KEY_KP8, [KEY_SYM_KPUP & 0xff] = KEY_KP8,
+	[KEY_SYM_KP9 & 0xff] = KEY_KP9,
+	[KEY_SYM_BK_SPC & 0xff] = KEY_BACKSPACE,
+	[KEY_SYM_TAB & 0xff] = KEY_TAB,
+	[KEY_SYM_CTRL & 0xff] = KEY_LEFTCTRL,
+	[KEY_SYM_ALT & 0xff] = KEY_LEFTALT,
+	[KEY_SYM_INSERT & 0xff] = KEY_INSERT,
+	[KEY_SYM_DELETE & 0xff] = KEY_DELETE,
+	[KEY_SYM_SHIFT & 0xff] = KEY_LEFTSHIFT,
+	[KEY_SYM_UARROW & 0xff] = KEY_UP,
+	[KEY_SYM_DARROW & 0xff] = KEY_DOWN,
+	[KEY_SYM_LARROW & 0xff] = KEY_LEFT,
+	[KEY_SYM_RARROW & 0xff] = KEY_RIGHT,
+	[KEY_SYM_ESCAPE & 0xff] = KEY_ESC,
+        [KEY_SYM_PAGEUP & 0xff] = KEY_PAGEUP,
+        [KEY_SYM_PAGEDOWN & 0xff] = KEY_PAGEDOWN,
+        [KEY_SYM_HOME & 0xff] = KEY_HOME,
+        [KEY_SYM_END & 0xff] = KEY_END,
+	[KEY_SYM_F1 & 0xff] = KEY_F1,
+	[KEY_SYM_F2 & 0xff] = KEY_F2,
+	[KEY_SYM_F3 & 0xff] = KEY_F3,
+	[KEY_SYM_F4 & 0xff] = KEY_F4,
+	[KEY_SYM_F5 & 0xff] = KEY_F5,
+	[KEY_SYM_F6 & 0xff] = KEY_F6,
+	[KEY_SYM_F7 & 0xff] = KEY_F7,
+	[KEY_SYM_F8 & 0xff] = KEY_F8,
+	[KEY_SYM_F9 & 0xff] = KEY_F9,
+	[KEY_SYM_F10 & 0xff] = KEY_F10,
+	[KEY_SYM_F11 & 0xff] = KEY_F11,
+	[KEY_SYM_F12 & 0xff] = KEY_F12,
+	[KEY_SYM_CAP_LOCK & 0xff] = KEY_CAPSLOCK,
+	[KEY_SYM_NUM_LOCK & 0xff] = KEY_NUMLOCK,
+	[KEY_SYM_SCR_LOCK & 0xff] = KEY_SCROLLLOCK,
+};
+
+static const unsigned short xlate[XLATE_SIZE] = {
+	[NO_KEYCODE] = KEY_RESERVED,
+	[KEY_SYM_SPACE] = KEY_SPACE,
+	[KEY_SYM_TILDE] = KEY_GRAVE,        [KEY_SYM_BKTIC] = KEY_GRAVE,
+	[KEY_SYM_ONE] = KEY_1,              [KEY_SYM_BANG] = KEY_1,
+	[KEY_SYM_TWO] = KEY_2,              [KEY_SYM_AT] = KEY_2,
+	[KEY_SYM_THREE] = KEY_3,            [KEY_SYM_POUND] = KEY_3,
+	[KEY_SYM_FOUR] = KEY_4,             [KEY_SYM_DOLLAR] = KEY_4,
+	[KEY_SYM_FIVE] = KEY_5,             [KEY_SYM_PERCENT] = KEY_5,
+	[KEY_SYM_SIX] = KEY_6,              [KEY_SYM_CARAT] = KEY_6,
+	[KEY_SYM_SEVEN] = KEY_7,            [KEY_SYM_AMPER] = KEY_7,
+	[KEY_SYM_EIGHT] = KEY_8,            [KEY_SYM_STAR] = KEY_8,
+	[KEY_SYM_NINE] = KEY_9,             [KEY_SYM_LPAREN] = KEY_9,
+	[KEY_SYM_ZERO] = KEY_0,             [KEY_SYM_RPAREN] = KEY_0,
+	[KEY_SYM_MINUS] = KEY_MINUS,        [KEY_SYM_USCORE] = KEY_MINUS,
+	[KEY_SYM_EQUAL] = KEY_EQUAL,        [KEY_SYM_PLUS] = KEY_EQUAL,
+	[KEY_SYM_LBRKT] = KEY_LEFTBRACE,    [KEY_SYM_LCURLY] = KEY_LEFTBRACE,
+	[KEY_SYM_RBRKT] = KEY_RIGHTBRACE,   [KEY_SYM_RCURLY] = KEY_RIGHTBRACE,
+	[KEY_SYM_SLASH] = KEY_BACKSLASH,    [KEY_SYM_PIPE] = KEY_BACKSLASH,
+	[KEY_SYM_TIC] = KEY_APOSTROPHE,     [KEY_SYM_QUOTE] = KEY_APOSTROPHE,
+	[KEY_SYM_SEMIC] = KEY_SEMICOLON,    [KEY_SYM_COLON] = KEY_SEMICOLON,
+	[KEY_SYM_COMMA] = KEY_COMMA,        [KEY_SYM_LT] = KEY_COMMA,
+	[KEY_SYM_PERIOD] = KEY_DOT,         [KEY_SYM_GT] = KEY_DOT,
+	[KEY_SYM_BSLASH] = KEY_SLASH,       [KEY_SYM_QMARK] = KEY_SLASH,
+	[KEY_SYM_A] = KEY_A,                [KEY_SYM_a] = KEY_A,
+	[KEY_SYM_B] = KEY_B,                [KEY_SYM_b] = KEY_B,
+	[KEY_SYM_C] = KEY_C,                [KEY_SYM_c] = KEY_C,
+	[KEY_SYM_D] = KEY_D,                [KEY_SYM_d] = KEY_D,
+	[KEY_SYM_E] = KEY_E,                [KEY_SYM_e] = KEY_E,
+	[KEY_SYM_F] = KEY_F,                [KEY_SYM_f] = KEY_F,
+	[KEY_SYM_G] = KEY_G,                [KEY_SYM_g] = KEY_G,
+	[KEY_SYM_H] = KEY_H,                [KEY_SYM_h] = KEY_H,
+	[KEY_SYM_I] = KEY_I,                [KEY_SYM_i] = KEY_I,
+	[KEY_SYM_J] = KEY_J,                [KEY_SYM_j] = KEY_J,
+	[KEY_SYM_K] = KEY_K,                [KEY_SYM_k] = KEY_K,
+	[KEY_SYM_L] = KEY_L,                [KEY_SYM_l] = KEY_L,
+	[KEY_SYM_M] = KEY_M,                [KEY_SYM_m] = KEY_M,
+	[KEY_SYM_N] = KEY_N,                [KEY_SYM_n] = KEY_N,
+	[KEY_SYM_O] = KEY_O,                [KEY_SYM_o] = KEY_O,
+	[KEY_SYM_P] = KEY_P,                [KEY_SYM_p] = KEY_P,
+	[KEY_SYM_Q] = KEY_Q,                [KEY_SYM_q] = KEY_Q,
+	[KEY_SYM_R] = KEY_R,                [KEY_SYM_r] = KEY_R,
+	[KEY_SYM_S] = KEY_S,                [KEY_SYM_s] = KEY_S,
+	[KEY_SYM_T] = KEY_T,                [KEY_SYM_t] = KEY_T,
+	[KEY_SYM_U] = KEY_U,                [KEY_SYM_u] = KEY_U,
+	[KEY_SYM_V] = KEY_V,                [KEY_SYM_v] = KEY_V,
+	[KEY_SYM_W] = KEY_W,                [KEY_SYM_w] = KEY_W,
+	[KEY_SYM_X] = KEY_X,                [KEY_SYM_x] = KEY_X,
+	[KEY_SYM_Y] = KEY_Y,                [KEY_SYM_y] = KEY_Y,
+	[KEY_SYM_Z] = KEY_Z,                [KEY_SYM_z] = KEY_Z,
+};
+
+static void print_input(struct remote_input *input)
+{
+	if (input->type == INPUT_TYPE_MOUSE) {
+		unsigned char buttons = input->mouse_buttons;
+		dbg("remote mouse movement: (x,y)=(%d,%d)%s%s%s%s\n",
+			input->data.mouse.x, input->data.mouse.y,
+			(buttons) ? " -- buttons:" : "",
+			(buttons & REMOTE_BUTTON_LEFT) ? "left " : "",
+			(buttons & REMOTE_BUTTON_MIDDLE) ? "middle " : "",
+			(buttons & REMOTE_BUTTON_RIGHT) ? "right" : ""
+		      );
+	} else {
+		dbg("remote keypress (code, flag, down):"
+			   "%d (0x%x) [0x%x] [0x%x]\n",
+				input->data.keyboard.key_code,
+				input->data.keyboard.key_code,
+				input->data.keyboard.key_flag,
+				input->data.keyboard.key_down
+		      );
+	}
+}
+
+static void send_mouse_event(struct input_dev *dev, struct remote_input *input)
+{
+	unsigned char buttons = input->mouse_buttons;
+
+	input_report_abs(dev, ABS_X, input->data.mouse.x);
+	input_report_abs(dev, ABS_Y, input->data.mouse.y);
+	input_report_key(dev, BTN_LEFT, buttons & REMOTE_BUTTON_LEFT);
+	input_report_key(dev, BTN_MIDDLE, buttons & REMOTE_BUTTON_MIDDLE);
+	input_report_key(dev, BTN_RIGHT, buttons & REMOTE_BUTTON_RIGHT);
+	input_sync(dev);
+}
+
+static void send_keyboard_event(struct input_dev *dev,
+		struct remote_input *input)
+{
+	unsigned int key;
+	unsigned short code = input->data.keyboard.key_code;
+
+	if (code & 0xff00)
+		key = xlate_high[code & 0xff];
+	else
+		key = xlate[code];
+	input_report_key(dev, key, input->data.keyboard.key_down);
+	input_sync(dev);
+}
+
+void ibmasm_handle_mouse_interrupt(struct service_processor *sp)
+{
+	unsigned long reader;
+	unsigned long writer;
+	struct remote_input input;
+
+	reader = get_queue_reader(sp);
+	writer = get_queue_writer(sp);
+
+	while (reader != writer) {
+		memcpy_fromio(&input, get_queue_entry(sp, reader),
+				sizeof(struct remote_input));
+
+		print_input(&input);
+		if (input.type == INPUT_TYPE_MOUSE) {
+			send_mouse_event(sp->remote.mouse_dev, &input);
+		} else if (input.type == INPUT_TYPE_KEYBOARD) {
+			send_keyboard_event(sp->remote.keybd_dev, &input);
+		} else
+			break;
+
+		reader = advance_queue_reader(sp, reader);
+		writer = get_queue_writer(sp);
+	}
+}
+
+int ibmasm_init_remote_input_dev(struct service_processor *sp)
+{
+	/* set up the mouse input device */
+	struct input_dev *mouse_dev, *keybd_dev;
+	struct pci_dev *pdev = to_pci_dev(sp->dev);
+	int error = -ENOMEM;
+	int i;
+
+	sp->remote.mouse_dev = mouse_dev = input_allocate_device();
+	sp->remote.keybd_dev = keybd_dev = input_allocate_device();
+
+	if (!mouse_dev || !keybd_dev)
+		goto err_free_devices;
+
+	mouse_dev->id.bustype = BUS_PCI;
+	mouse_dev->id.vendor = pdev->vendor;
+	mouse_dev->id.product = pdev->device;
+	mouse_dev->id.version = 1;
+	mouse_dev->dev.parent = sp->dev;
+	mouse_dev->evbit[0]  = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	set_bit(BTN_TOUCH, mouse_dev->keybit);
+	mouse_dev->name = "ibmasm RSA I remote mouse";
+	input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0);
+	input_set_abs_params(mouse_dev, ABS_Y, 0, MOUSE_Y_MAX, 0, 0);
+
+	keybd_dev->id.bustype = BUS_PCI;
+	keybd_dev->id.vendor = pdev->vendor;
+	keybd_dev->id.product = pdev->device;
+	keybd_dev->id.version = 2;
+	keybd_dev->dev.parent = sp->dev;
+	keybd_dev->evbit[0]  = BIT_MASK(EV_KEY);
+	keybd_dev->name = "ibmasm RSA I remote keyboard";
+
+	for (i = 0; i < XLATE_SIZE; i++) {
+		if (xlate_high[i])
+			set_bit(xlate_high[i], keybd_dev->keybit);
+		if (xlate[i])
+			set_bit(xlate[i], keybd_dev->keybit);
+	}
+
+	error = input_register_device(mouse_dev);
+	if (error)
+		goto err_free_devices;
+
+	error = input_register_device(keybd_dev);
+	if (error)
+		goto err_unregister_mouse_dev;
+
+	enable_mouse_interrupts(sp);
+
+	printk(KERN_INFO "ibmasm remote responding to events on RSA card %d\n", sp->number);
+
+	return 0;
+
+ err_unregister_mouse_dev:
+	input_unregister_device(mouse_dev);
+	mouse_dev = NULL; /* so we don't try to free it again below */
+ err_free_devices:
+	input_free_device(mouse_dev);
+	input_free_device(keybd_dev);
+
+	return error;
+}
+
+void ibmasm_free_remote_input_dev(struct service_processor *sp)
+{
+	disable_mouse_interrupts(sp);
+	input_unregister_device(sp->remote.mouse_dev);
+	input_unregister_device(sp->remote.keybd_dev);
+}
+
diff --git a/drivers/misc/ibmasm/remote.h b/drivers/misc/ibmasm/remote.h
new file mode 100644
index 000000000..a7729ef76
--- /dev/null
+++ b/drivers/misc/ibmasm/remote.h
@@ -0,0 +1,270 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ * Originally written by Pete Reynolds
+ */
+
+#ifndef _IBMASM_REMOTE_H_
+#define _IBMASM_REMOTE_H_
+
+#include <asm/io.h>
+
+/* pci offsets */
+#define CONDOR_MOUSE_DATA		0x000AC000
+#define CONDOR_MOUSE_ISR_CONTROL	0x00
+#define CONDOR_MOUSE_ISR_STATUS		0x04
+#define CONDOR_MOUSE_Q_READER		0x08
+#define CONDOR_MOUSE_Q_WRITER		0x0C
+#define CONDOR_MOUSE_Q_BEGIN		0x10
+#define CONDOR_MOUSE_MAX_X		0x14
+#define CONDOR_MOUSE_MAX_Y		0x18
+
+#define CONDOR_INPUT_DESKTOP_INFO	0x1F0
+#define CONDOR_INPUT_DISPLAY_RESX	0x1F4
+#define CONDOR_INPUT_DISPLAY_RESY	0x1F8
+#define CONDOR_INPUT_DISPLAY_BITS	0x1FC
+#define CONDOR_OUTPUT_VNC_STATUS	0x200
+
+#define CONDOR_MOUSE_INTR_STATUS_MASK	0x00000001
+
+#define INPUT_TYPE_MOUSE	0x1
+#define INPUT_TYPE_KEYBOARD	0x2
+
+
+/* mouse button states received from SP */
+#define REMOTE_DOUBLE_CLICK	0xF0
+#define REMOTE_BUTTON_LEFT	0x01
+#define REMOTE_BUTTON_MIDDLE	0x02
+#define REMOTE_BUTTON_RIGHT	0x04
+
+/* size of keysym/keycode translation matricies */
+#define XLATE_SIZE 256
+
+struct mouse_input {
+	unsigned short	y;
+	unsigned short	x;
+};
+
+
+struct keyboard_input {
+	unsigned short	key_code;
+	unsigned char	key_flag;
+	unsigned char	key_down;
+};
+
+
+
+struct remote_input {
+	union {
+		struct mouse_input	mouse;
+		struct keyboard_input	keyboard;
+	} data;
+
+	unsigned char	type;
+	unsigned char	pad1;
+	unsigned char	mouse_buttons;
+	unsigned char	pad3;
+};
+
+#define mouse_addr(sp)		(sp->base_address + CONDOR_MOUSE_DATA)
+#define display_width(sp)	(mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESX)
+#define display_height(sp)	(mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESY)
+#define display_depth(sp)	(mouse_addr(sp) + CONDOR_INPUT_DISPLAY_BITS)
+#define desktop_info(sp)	(mouse_addr(sp) + CONDOR_INPUT_DESKTOP_INFO)
+#define vnc_status(sp)		(mouse_addr(sp) + CONDOR_OUTPUT_VNC_STATUS)
+#define isr_control(sp)		(mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+
+#define mouse_interrupt_pending(sp)	readl(mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS)
+#define clear_mouse_interrupt(sp)	writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS)
+#define enable_mouse_interrupts(sp)	writel(1, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+#define disable_mouse_interrupts(sp)	writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+
+/* remote input queue operations */
+#define REMOTE_QUEUE_SIZE	60
+
+#define get_queue_writer(sp)	readl(mouse_addr(sp) + CONDOR_MOUSE_Q_WRITER)
+#define get_queue_reader(sp)	readl(mouse_addr(sp) + CONDOR_MOUSE_Q_READER)
+#define set_queue_reader(sp, reader)	writel(reader, mouse_addr(sp) + CONDOR_MOUSE_Q_READER)
+
+#define queue_begin	(mouse_addr(sp) + CONDOR_MOUSE_Q_BEGIN)
+
+#define get_queue_entry(sp, read_index) \
+	((void*)(queue_begin + read_index * sizeof(struct remote_input)))
+
+static inline int advance_queue_reader(struct service_processor *sp, unsigned long reader)
+{
+	reader++;
+	if (reader == REMOTE_QUEUE_SIZE)
+		reader = 0;
+
+	set_queue_reader(sp, reader);
+	return reader;
+}
+
+#define NO_KEYCODE 0
+#define KEY_SYM_BK_SPC   0xFF08
+#define KEY_SYM_TAB      0xFF09
+#define KEY_SYM_ENTER    0xFF0D
+#define KEY_SYM_SCR_LOCK 0xFF14
+#define KEY_SYM_ESCAPE   0xFF1B
+#define KEY_SYM_HOME     0xFF50
+#define KEY_SYM_LARROW   0xFF51
+#define KEY_SYM_UARROW   0xFF52
+#define KEY_SYM_RARROW   0xFF53
+#define KEY_SYM_DARROW   0xFF54
+#define KEY_SYM_PAGEUP   0xFF55
+#define KEY_SYM_PAGEDOWN 0xFF56
+#define KEY_SYM_END      0xFF57
+#define KEY_SYM_INSERT   0xFF63
+#define KEY_SYM_NUM_LOCK 0xFF7F
+#define KEY_SYM_KPSTAR   0xFFAA
+#define KEY_SYM_KPPLUS   0xFFAB
+#define KEY_SYM_KPMINUS  0xFFAD
+#define KEY_SYM_KPDOT    0xFFAE
+#define KEY_SYM_KPSLASH  0xFFAF
+#define KEY_SYM_KPRIGHT  0xFF96
+#define KEY_SYM_KPUP     0xFF97
+#define KEY_SYM_KPLEFT   0xFF98
+#define KEY_SYM_KPDOWN   0xFF99
+#define KEY_SYM_KP0      0xFFB0
+#define KEY_SYM_KP1      0xFFB1
+#define KEY_SYM_KP2      0xFFB2
+#define KEY_SYM_KP3      0xFFB3
+#define KEY_SYM_KP4      0xFFB4
+#define KEY_SYM_KP5      0xFFB5
+#define KEY_SYM_KP6      0xFFB6
+#define KEY_SYM_KP7      0xFFB7
+#define KEY_SYM_KP8      0xFFB8
+#define KEY_SYM_KP9      0xFFB9
+#define KEY_SYM_F1       0xFFBE      // 1B 5B 5B 41
+#define KEY_SYM_F2       0xFFBF      // 1B 5B 5B 42
+#define KEY_SYM_F3       0xFFC0      // 1B 5B 5B 43
+#define KEY_SYM_F4       0xFFC1      // 1B 5B 5B 44
+#define KEY_SYM_F5       0xFFC2      // 1B 5B 5B 45
+#define KEY_SYM_F6       0xFFC3      // 1B 5B 31 37 7E
+#define KEY_SYM_F7       0xFFC4      // 1B 5B 31 38 7E
+#define KEY_SYM_F8       0xFFC5      // 1B 5B 31 39 7E
+#define KEY_SYM_F9       0xFFC6      // 1B 5B 32 30 7E
+#define KEY_SYM_F10      0xFFC7      // 1B 5B 32 31 7E
+#define KEY_SYM_F11      0xFFC8      // 1B 5B 32 33 7E
+#define KEY_SYM_F12      0xFFC9      // 1B 5B 32 34 7E
+#define KEY_SYM_SHIFT    0xFFE1
+#define KEY_SYM_CTRL     0xFFE3
+#define KEY_SYM_ALT      0xFFE9
+#define KEY_SYM_CAP_LOCK 0xFFE5
+#define KEY_SYM_DELETE   0xFFFF
+#define KEY_SYM_TILDE    0x60
+#define KEY_SYM_BKTIC    0x7E
+#define KEY_SYM_ONE      0x31
+#define KEY_SYM_BANG     0x21
+#define KEY_SYM_TWO      0x32
+#define KEY_SYM_AT       0x40
+#define KEY_SYM_THREE    0x33
+#define KEY_SYM_POUND    0x23
+#define KEY_SYM_FOUR     0x34
+#define KEY_SYM_DOLLAR   0x24
+#define KEY_SYM_FIVE     0x35
+#define KEY_SYM_PERCENT  0x25
+#define KEY_SYM_SIX      0x36
+#define KEY_SYM_CARAT    0x5E
+#define KEY_SYM_SEVEN    0x37
+#define KEY_SYM_AMPER    0x26
+#define KEY_SYM_EIGHT    0x38
+#define KEY_SYM_STAR     0x2A
+#define KEY_SYM_NINE     0x39
+#define KEY_SYM_LPAREN   0x28
+#define KEY_SYM_ZERO     0x30
+#define KEY_SYM_RPAREN   0x29
+#define KEY_SYM_MINUS    0x2D
+#define KEY_SYM_USCORE   0x5F
+#define KEY_SYM_EQUAL    0x2B
+#define KEY_SYM_PLUS     0x3D
+#define KEY_SYM_LBRKT    0x5B
+#define KEY_SYM_LCURLY   0x7B
+#define KEY_SYM_RBRKT    0x5D
+#define KEY_SYM_RCURLY   0x7D
+#define KEY_SYM_SLASH    0x5C
+#define KEY_SYM_PIPE     0x7C
+#define KEY_SYM_TIC      0x27
+#define KEY_SYM_QUOTE    0x22
+#define KEY_SYM_SEMIC    0x3B
+#define KEY_SYM_COLON    0x3A
+#define KEY_SYM_COMMA    0x2C
+#define KEY_SYM_LT       0x3C
+#define KEY_SYM_PERIOD   0x2E
+#define KEY_SYM_GT       0x3E
+#define KEY_SYM_BSLASH   0x2F
+#define KEY_SYM_QMARK    0x3F
+#define KEY_SYM_A        0x41
+#define KEY_SYM_B        0x42
+#define KEY_SYM_C        0x43
+#define KEY_SYM_D        0x44
+#define KEY_SYM_E        0x45
+#define KEY_SYM_F        0x46
+#define KEY_SYM_G        0x47
+#define KEY_SYM_H        0x48
+#define KEY_SYM_I        0x49
+#define KEY_SYM_J        0x4A
+#define KEY_SYM_K        0x4B
+#define KEY_SYM_L        0x4C
+#define KEY_SYM_M        0x4D
+#define KEY_SYM_N        0x4E
+#define KEY_SYM_O        0x4F
+#define KEY_SYM_P        0x50
+#define KEY_SYM_Q        0x51
+#define KEY_SYM_R        0x52
+#define KEY_SYM_S        0x53
+#define KEY_SYM_T        0x54
+#define KEY_SYM_U        0x55
+#define KEY_SYM_V        0x56
+#define KEY_SYM_W        0x57
+#define KEY_SYM_X        0x58
+#define KEY_SYM_Y        0x59
+#define KEY_SYM_Z        0x5A
+#define KEY_SYM_a        0x61
+#define KEY_SYM_b        0x62
+#define KEY_SYM_c        0x63
+#define KEY_SYM_d        0x64
+#define KEY_SYM_e        0x65
+#define KEY_SYM_f        0x66
+#define KEY_SYM_g        0x67
+#define KEY_SYM_h        0x68
+#define KEY_SYM_i        0x69
+#define KEY_SYM_j        0x6A
+#define KEY_SYM_k        0x6B
+#define KEY_SYM_l        0x6C
+#define KEY_SYM_m        0x6D
+#define KEY_SYM_n        0x6E
+#define KEY_SYM_o        0x6F
+#define KEY_SYM_p        0x70
+#define KEY_SYM_q        0x71
+#define KEY_SYM_r        0x72
+#define KEY_SYM_s        0x73
+#define KEY_SYM_t        0x74
+#define KEY_SYM_u        0x75
+#define KEY_SYM_v        0x76
+#define KEY_SYM_w        0x77
+#define KEY_SYM_x        0x78
+#define KEY_SYM_y        0x79
+#define KEY_SYM_z        0x7A
+#define KEY_SYM_SPACE    0x20
+#endif /* _IBMASM_REMOTE_H_ */
diff --git a/drivers/misc/ibmasm/uart.c b/drivers/misc/ibmasm/uart.c
new file mode 100644
index 000000000..01e2b0d7e
--- /dev/null
+++ b/drivers/misc/ibmasm/uart.c
@@ -0,0 +1,72 @@
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ */
+
+#include <linux/termios.h>
+#include <linux/tty.h>
+#include <linux/serial_core.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+
+void ibmasm_register_uart(struct service_processor *sp)
+{
+	struct uart_8250_port uart;
+	void __iomem *iomem_base;
+
+	iomem_base = sp->base_address + SCOUT_COM_B_BASE;
+
+	/* read the uart scratch register to determine if the UART
+	 * is dedicated to the service processor or if the OS can use it
+	 */
+	if (0 == readl(iomem_base + UART_SCR)) {
+		dev_info(sp->dev, "IBM SP UART not registered, owned by service processor\n");
+		sp->serial_line = -1;
+		return;
+	}
+
+	memset(&uart, 0, sizeof(uart));
+	uart.port.irq		= sp->irq;
+	uart.port.uartclk	= 3686400;
+	uart.port.flags		= UPF_SHARE_IRQ;
+	uart.port.iotype	= UPIO_MEM;
+	uart.port.membase	= iomem_base;
+
+	sp->serial_line = serial8250_register_8250_port(&uart);
+	if (sp->serial_line < 0) {
+		dev_err(sp->dev, "Failed to register serial port\n");
+		return;
+	}
+	enable_uart_interrupts(sp->base_address);
+}
+
+void ibmasm_unregister_uart(struct service_processor *sp)
+{
+	if (sp->serial_line < 0)
+		return;
+
+	disable_uart_interrupts(sp->base_address);
+	serial8250_unregister_port(sp->serial_line);
+}
diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c
new file mode 100644
index 000000000..2ed23c99f
--- /dev/null
+++ b/drivers/misc/ibmvmc.c
@@ -0,0 +1,2421 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IBM Power Systems Virtual Management Channel Support.
+ *
+ * Copyright (c) 2004, 2018 IBM Corp.
+ *   Dave Engebretsen engebret@us.ibm.com
+ *   Steven Royer seroyer@linux.vnet.ibm.com
+ *   Adam Reznechek adreznec@linux.vnet.ibm.com
+ *   Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/fcntl.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/miscdevice.h>
+#include <linux/sched/signal.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/vio.h>
+
+#include "ibmvmc.h"
+
+#define IBMVMC_DRIVER_VERSION "1.0"
+
+/*
+ * Static global variables
+ */
+static DECLARE_WAIT_QUEUE_HEAD(ibmvmc_read_wait);
+
+static const char ibmvmc_driver_name[] = "ibmvmc";
+
+static struct ibmvmc_struct ibmvmc;
+static struct ibmvmc_hmc hmcs[MAX_HMCS];
+static struct crq_server_adapter ibmvmc_adapter;
+
+static int ibmvmc_max_buf_pool_size = DEFAULT_BUF_POOL_SIZE;
+static int ibmvmc_max_hmcs = DEFAULT_HMCS;
+static int ibmvmc_max_mtu = DEFAULT_MTU;
+
+static inline long h_copy_rdma(s64 length, u64 sliobn, u64 slioba,
+			       u64 dliobn, u64 dlioba)
+{
+	long rc = 0;
+
+	/* Ensure all writes to source memory are visible before hcall */
+	dma_wmb();
+	pr_debug("ibmvmc: h_copy_rdma(0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
+		 length, sliobn, slioba, dliobn, dlioba);
+	rc = plpar_hcall_norets(H_COPY_RDMA, length, sliobn, slioba,
+				dliobn, dlioba);
+	pr_debug("ibmvmc: h_copy_rdma rc = 0x%lx\n", rc);
+
+	return rc;
+}
+
+static inline void h_free_crq(uint32_t unit_address)
+{
+	long rc = 0;
+
+	do {
+		if (H_IS_LONG_BUSY(rc))
+			msleep(get_longbusy_msecs(rc));
+
+		rc = plpar_hcall_norets(H_FREE_CRQ, unit_address);
+	} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
+}
+
+/**
+ * h_request_vmc: - request a hypervisor virtual management channel device
+ * @vmc_index: drc index of the vmc device created
+ *
+ * Requests the hypervisor create a new virtual management channel device,
+ * allowing this partition to send hypervisor virtualization control
+ * commands.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static inline long h_request_vmc(u32 *vmc_index)
+{
+	long rc = 0;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	do {
+		if (H_IS_LONG_BUSY(rc))
+			msleep(get_longbusy_msecs(rc));
+
+		/* Call to request the VMC device from phyp */
+		rc = plpar_hcall(H_REQUEST_VMC, retbuf);
+		pr_debug("ibmvmc: %s rc = 0x%lx\n", __func__, rc);
+		*vmc_index = retbuf[0];
+	} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
+
+	return rc;
+}
+
+/* routines for managing a command/response queue */
+/**
+ * ibmvmc_handle_event: - Interrupt handler for crq events
+ * @irq:        number of irq to handle, not used
+ * @dev_instance: crq_server_adapter that received interrupt
+ *
+ * Disables interrupts and schedules ibmvmc_task
+ *
+ * Always returns IRQ_HANDLED
+ */
+static irqreturn_t ibmvmc_handle_event(int irq, void *dev_instance)
+{
+	struct crq_server_adapter *adapter =
+		(struct crq_server_adapter *)dev_instance;
+
+	vio_disable_interrupts(to_vio_dev(adapter->dev));
+	tasklet_schedule(&adapter->work_task);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ibmvmc_release_crq_queue - Release CRQ Queue
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-Zero - Failure
+ */
+static void ibmvmc_release_crq_queue(struct crq_server_adapter *adapter)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct crq_queue *queue = &adapter->queue;
+
+	free_irq(vdev->irq, (void *)adapter);
+	tasklet_kill(&adapter->work_task);
+
+	if (adapter->reset_task)
+		kthread_stop(adapter->reset_task);
+
+	h_free_crq(vdev->unit_address);
+	dma_unmap_single(adapter->dev,
+			 queue->msg_token,
+			 queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
+	free_page((unsigned long)queue->msgs);
+}
+
+/**
+ * ibmvmc_reset_crq_queue - Reset CRQ Queue
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * This function calls h_free_crq and then calls H_REG_CRQ and does all the
+ * bookkeeping to get us back to where we can communicate.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-Zero - Failure
+ */
+static int ibmvmc_reset_crq_queue(struct crq_server_adapter *adapter)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct crq_queue *queue = &adapter->queue;
+	int rc = 0;
+
+	/* Close the CRQ */
+	h_free_crq(vdev->unit_address);
+
+	/* Clean out the queue */
+	memset(queue->msgs, 0x00, PAGE_SIZE);
+	queue->cur = 0;
+
+	/* And re-open it again */
+	rc = plpar_hcall_norets(H_REG_CRQ,
+				vdev->unit_address,
+				queue->msg_token, PAGE_SIZE);
+	if (rc == 2)
+		/* Adapter is good, but other end is not ready */
+		dev_warn(adapter->dev, "Partner adapter not ready\n");
+	else if (rc != 0)
+		dev_err(adapter->dev, "couldn't register crq--rc 0x%x\n", rc);
+
+	return rc;
+}
+
+/**
+ * crq_queue_next_crq: - Returns the next entry in message queue
+ * @queue:      crq_queue to use
+ *
+ * Returns pointer to next entry in queue, or NULL if there are no new
+ * entried in the CRQ.
+ */
+static struct ibmvmc_crq_msg *crq_queue_next_crq(struct crq_queue *queue)
+{
+	struct ibmvmc_crq_msg *crq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&queue->lock, flags);
+	crq = &queue->msgs[queue->cur];
+	if (crq->valid & 0x80) {
+		if (++queue->cur == queue->size)
+			queue->cur = 0;
+
+		/* Ensure the read of the valid bit occurs before reading any
+		 * other bits of the CRQ entry
+		 */
+		dma_rmb();
+	} else {
+		crq = NULL;
+	}
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	return crq;
+}
+
+/**
+ * ibmvmc_send_crq - Send CRQ
+ *
+ * @adapter:	crq_server_adapter struct
+ * @word1:	Word1 Data field
+ * @word2:	Word2 Data field
+ *
+ * Return:
+ *	0 - Success
+ *	Non-Zero - Failure
+ */
+static long ibmvmc_send_crq(struct crq_server_adapter *adapter,
+			    u64 word1, u64 word2)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	long rc = 0;
+
+	dev_dbg(adapter->dev, "(0x%x, 0x%016llx, 0x%016llx)\n",
+		vdev->unit_address, word1, word2);
+
+	/*
+	 * Ensure the command buffer is flushed to memory before handing it
+	 * over to the other side to prevent it from fetching any stale data.
+	 */
+	dma_wmb();
+	rc = plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, word1, word2);
+	dev_dbg(adapter->dev, "rc = 0x%lx\n", rc);
+
+	return rc;
+}
+
+/**
+ * alloc_dma_buffer - Create DMA Buffer
+ *
+ * @vdev:	vio_dev struct
+ * @size:	Size field
+ * @dma_handle:	DMA address field
+ *
+ * Allocates memory for the command queue and maps remote memory into an
+ * ioba.
+ *
+ * Returns a pointer to the buffer
+ */
+static void *alloc_dma_buffer(struct vio_dev *vdev, size_t size,
+			      dma_addr_t *dma_handle)
+{
+	/* allocate memory */
+	void *buffer = kzalloc(size, GFP_ATOMIC);
+
+	if (!buffer) {
+		*dma_handle = 0;
+		return NULL;
+	}
+
+	/* DMA map */
+	*dma_handle = dma_map_single(&vdev->dev, buffer, size,
+				     DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(&vdev->dev, *dma_handle)) {
+		*dma_handle = 0;
+		kzfree(buffer);
+		return NULL;
+	}
+
+	return buffer;
+}
+
+/**
+ * free_dma_buffer - Free DMA Buffer
+ *
+ * @vdev:	vio_dev struct
+ * @size:	Size field
+ * @vaddr:	Address field
+ * @dma_handle:	DMA address field
+ *
+ * Releases memory for a command queue and unmaps mapped remote memory.
+ */
+static void free_dma_buffer(struct vio_dev *vdev, size_t size, void *vaddr,
+			    dma_addr_t dma_handle)
+{
+	/* DMA unmap */
+	dma_unmap_single(&vdev->dev, dma_handle, size, DMA_BIDIRECTIONAL);
+
+	/* deallocate memory */
+	kzfree(vaddr);
+}
+
+/**
+ * ibmvmc_get_valid_hmc_buffer - Retrieve Valid HMC Buffer
+ *
+ * @hmc_index:	HMC Index Field
+ *
+ * Return:
+ *	Pointer to ibmvmc_buffer
+ */
+static struct ibmvmc_buffer *ibmvmc_get_valid_hmc_buffer(u8 hmc_index)
+{
+	struct ibmvmc_buffer *buffer;
+	struct ibmvmc_buffer *ret_buf = NULL;
+	unsigned long i;
+
+	if (hmc_index > ibmvmc.max_hmc_index)
+		return NULL;
+
+	buffer = hmcs[hmc_index].buffer;
+
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].valid && buffer[i].free &&
+		    buffer[i].owner == VMC_BUF_OWNER_ALPHA) {
+			buffer[i].free = 0;
+			ret_buf = &buffer[i];
+			break;
+		}
+	}
+
+	return ret_buf;
+}
+
+/**
+ * ibmvmc_get_free_hmc_buffer - Get Free HMC Buffer
+ *
+ * @adapter:	crq_server_adapter struct
+ * @hmc_index:	Hmc Index field
+ *
+ * Return:
+ *	Pointer to ibmvmc_buffer
+ */
+static struct ibmvmc_buffer *ibmvmc_get_free_hmc_buffer(struct crq_server_adapter *adapter,
+							u8 hmc_index)
+{
+	struct ibmvmc_buffer *buffer;
+	struct ibmvmc_buffer *ret_buf = NULL;
+	unsigned long i;
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_info(adapter->dev, "get_free_hmc_buffer: invalid hmc_index=0x%x\n",
+			 hmc_index);
+		return NULL;
+	}
+
+	buffer = hmcs[hmc_index].buffer;
+
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].free &&
+		    buffer[i].owner == VMC_BUF_OWNER_ALPHA) {
+			buffer[i].free = 0;
+			ret_buf = &buffer[i];
+			break;
+		}
+	}
+
+	return ret_buf;
+}
+
+/**
+ * ibmvmc_free_hmc_buffer - Free an HMC Buffer
+ *
+ * @hmc:	ibmvmc_hmc struct
+ * @buffer:	ibmvmc_buffer struct
+ *
+ */
+static void ibmvmc_free_hmc_buffer(struct ibmvmc_hmc *hmc,
+				   struct ibmvmc_buffer *buffer)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&hmc->lock, flags);
+	buffer->free = 1;
+	spin_unlock_irqrestore(&hmc->lock, flags);
+}
+
+/**
+ * ibmvmc_count_hmc_buffers - Count HMC Buffers
+ *
+ * @hmc_index:	HMC Index field
+ * @valid:	Valid number of buffers field
+ * @free:	Free number of buffers field
+ *
+ */
+static void ibmvmc_count_hmc_buffers(u8 hmc_index, unsigned int *valid,
+				     unsigned int *free)
+{
+	struct ibmvmc_buffer *buffer;
+	unsigned long i;
+	unsigned long flags;
+
+	if (hmc_index > ibmvmc.max_hmc_index)
+		return;
+
+	if (!valid || !free)
+		return;
+
+	*valid = 0; *free = 0;
+
+	buffer = hmcs[hmc_index].buffer;
+	spin_lock_irqsave(&hmcs[hmc_index].lock, flags);
+
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].valid) {
+			*valid = *valid + 1;
+			if (buffer[i].free)
+				*free = *free + 1;
+		}
+	}
+
+	spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+}
+
+/**
+ * ibmvmc_get_free_hmc - Get Free HMC
+ *
+ * Return:
+ *	Pointer to an available HMC Connection
+ *	Null otherwise
+ */
+static struct ibmvmc_hmc *ibmvmc_get_free_hmc(void)
+{
+	unsigned long i;
+	unsigned long flags;
+
+	/*
+	 * Find an available HMC connection.
+	 */
+	for (i = 0; i <= ibmvmc.max_hmc_index; i++) {
+		spin_lock_irqsave(&hmcs[i].lock, flags);
+		if (hmcs[i].state == ibmhmc_state_free) {
+			hmcs[i].index = i;
+			hmcs[i].state = ibmhmc_state_initial;
+			spin_unlock_irqrestore(&hmcs[i].lock, flags);
+			return &hmcs[i];
+		}
+		spin_unlock_irqrestore(&hmcs[i].lock, flags);
+	}
+
+	return NULL;
+}
+
+/**
+ * ibmvmc_return_hmc - Return an HMC Connection
+ *
+ * @hmc:		ibmvmc_hmc struct
+ * @release_readers:	Number of readers connected to session
+ *
+ * This function releases the HMC connections back into the pool.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_return_hmc(struct ibmvmc_hmc *hmc, bool release_readers)
+{
+	struct ibmvmc_buffer *buffer;
+	struct crq_server_adapter *adapter;
+	struct vio_dev *vdev;
+	unsigned long i;
+	unsigned long flags;
+
+	if (!hmc || !hmc->adapter)
+		return -EIO;
+
+	if (release_readers) {
+		if (hmc->file_session) {
+			struct ibmvmc_file_session *session = hmc->file_session;
+
+			session->valid = 0;
+			wake_up_interruptible(&ibmvmc_read_wait);
+		}
+	}
+
+	adapter = hmc->adapter;
+	vdev = to_vio_dev(adapter->dev);
+
+	spin_lock_irqsave(&hmc->lock, flags);
+	hmc->index = 0;
+	hmc->state = ibmhmc_state_free;
+	hmc->queue_head = 0;
+	hmc->queue_tail = 0;
+	buffer = hmc->buffer;
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].valid) {
+			free_dma_buffer(vdev,
+					ibmvmc.max_mtu,
+					buffer[i].real_addr_local,
+					buffer[i].dma_addr_local);
+			dev_dbg(adapter->dev, "Forgot buffer id 0x%lx\n", i);
+		}
+		memset(&buffer[i], 0, sizeof(struct ibmvmc_buffer));
+
+		hmc->queue_outbound_msgs[i] = VMC_INVALID_BUFFER_ID;
+	}
+
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_open - Interface Open
+ * @buffer: Pointer to ibmvmc_buffer struct
+ * @hmc: Pointer to ibmvmc_hmc struct
+ *
+ * This command is sent by the management partition as the result of a
+ * management partition device request. It causes the hypervisor to
+ * prepare a set of data buffers for the management application connection
+ * indicated HMC idx. A unique HMC Idx would be used if multiple management
+ * applications running concurrently were desired. Before responding to this
+ * command, the hypervisor must provide the management partition with at
+ * least one of these new buffers via the Add Buffer. This indicates whether
+ * the messages are inbound or outbound from the hypervisor.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_send_open(struct ibmvmc_buffer *buffer,
+			    struct ibmvmc_hmc *hmc)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	struct crq_server_adapter *adapter;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+	int rc = 0;
+
+	if (!hmc || !hmc->adapter)
+		return -EIO;
+
+	adapter = hmc->adapter;
+
+	dev_dbg(adapter->dev, "send_open: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
+		(unsigned long)buffer->size, (unsigned long)adapter->liobn,
+		(unsigned long)buffer->dma_addr_local,
+		(unsigned long)adapter->riobn,
+		(unsigned long)buffer->dma_addr_remote);
+
+	rc = h_copy_rdma(buffer->size,
+			 adapter->liobn,
+			 buffer->dma_addr_local,
+			 adapter->riobn,
+			 buffer->dma_addr_remote);
+	if (rc) {
+		dev_err(adapter->dev, "Error: In send_open, h_copy_rdma rc 0x%x\n",
+			rc);
+		return -EIO;
+	}
+
+	hmc->state = ibmhmc_state_opening;
+
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_OPEN;
+	crq_msg.status = 0;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc->session;
+	crq_msg.hmc_index = hmc->index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer->id);
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return rc;
+}
+
+/**
+ * ibmvmc_send_close - Interface Close
+ * @hmc: Pointer to ibmvmc_hmc struct
+ *
+ * This command is sent by the management partition to terminate a
+ * management application to hypervisor connection. When this command is
+ * sent, the management partition has quiesced all I/O operations to all
+ * buffers associated with this management application connection, and
+ * has freed any storage for these buffers.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_send_close(struct ibmvmc_hmc *hmc)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	struct crq_server_adapter *adapter;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+	int rc = 0;
+
+	if (!hmc || !hmc->adapter)
+		return -EIO;
+
+	adapter = hmc->adapter;
+
+	dev_info(adapter->dev, "CRQ send: close\n");
+
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_CLOSE;
+	crq_msg.status = 0;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc->session;
+	crq_msg.hmc_index = hmc->index;
+	crq_msg.var2.rsvd = 0;
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return rc;
+}
+
+/**
+ * ibmvmc_send_capabilities - Send VMC Capabilities
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * The capabilities message is an administrative message sent after the CRQ
+ * initialization sequence of messages and is used to exchange VMC capabilities
+ * between the management partition and the hypervisor. The management
+ * partition must send this message and the hypervisor must respond with VMC
+ * capabilities Response message before HMC interface message can begin. Any
+ * HMC interface messages received before the exchange of capabilities has
+ * complete are dropped.
+ *
+ * Return:
+ *	0 - Success
+ */
+static int ibmvmc_send_capabilities(struct crq_server_adapter *adapter)
+{
+	struct ibmvmc_admin_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+
+	dev_dbg(adapter->dev, "ibmvmc: CRQ send: capabilities\n");
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_CAP;
+	crq_msg.status = 0;
+	crq_msg.rsvd[0] = 0;
+	crq_msg.rsvd[1] = 0;
+	crq_msg.max_hmc = ibmvmc_max_hmcs;
+	crq_msg.max_mtu = cpu_to_be32(ibmvmc_max_mtu);
+	crq_msg.pool_size = cpu_to_be16(ibmvmc_max_buf_pool_size);
+	crq_msg.crq_size = cpu_to_be16(adapter->queue.size);
+	crq_msg.version = cpu_to_be16(IBMVMC_PROTOCOL_VERSION);
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	ibmvmc.state = ibmvmc_state_capabilities;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_add_buffer_resp - Add Buffer Response
+ *
+ * @adapter:	crq_server_adapter struct
+ * @status:	Status field
+ * @hmc_session: HMC Session field
+ * @hmc_index:	HMC Index field
+ * @buffer_id:	Buffer Id field
+ *
+ * This command is sent by the management partition to the hypervisor in
+ * response to the Add Buffer message. The Status field indicates the result of
+ * the command.
+ *
+ * Return:
+ *	0 - Success
+ */
+static int ibmvmc_send_add_buffer_resp(struct crq_server_adapter *adapter,
+				       u8 status, u8 hmc_session,
+				       u8 hmc_index, u16 buffer_id)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+
+	dev_dbg(adapter->dev, "CRQ send: add_buffer_resp\n");
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_ADD_BUF_RESP;
+	crq_msg.status = status;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc_session;
+	crq_msg.hmc_index = hmc_index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer_id);
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_rem_buffer_resp - Remove Buffer Response
+ *
+ * @adapter:	crq_server_adapter struct
+ * @status:	Status field
+ * @hmc_session: HMC Session field
+ * @hmc_index:	HMC Index field
+ * @buffer_id:	Buffer Id field
+ *
+ * This command is sent by the management partition to the hypervisor in
+ * response to the Remove Buffer message. The Buffer ID field indicates
+ * which buffer the management partition selected to remove. The Status
+ * field indicates the result of the command.
+ *
+ * Return:
+ *	0 - Success
+ */
+static int ibmvmc_send_rem_buffer_resp(struct crq_server_adapter *adapter,
+				       u8 status, u8 hmc_session,
+				       u8 hmc_index, u16 buffer_id)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+
+	dev_dbg(adapter->dev, "CRQ send: rem_buffer_resp\n");
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_REM_BUF_RESP;
+	crq_msg.status = status;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc_session;
+	crq_msg.hmc_index = hmc_index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer_id);
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_msg - Signal Message
+ *
+ * @adapter:	crq_server_adapter struct
+ * @buffer:	ibmvmc_buffer struct
+ * @hmc:	ibmvmc_hmc struct
+ * @msg_length:	message length field
+ *
+ * This command is sent between the management partition and the hypervisor
+ * in order to signal the arrival of an HMC protocol message. The command
+ * can be sent by both the management partition and the hypervisor. It is
+ * used for all traffic between the management application and the hypervisor,
+ * regardless of who initiated the communication.
+ *
+ * There is no response to this message.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_send_msg(struct crq_server_adapter *adapter,
+			   struct ibmvmc_buffer *buffer,
+			   struct ibmvmc_hmc *hmc, int msg_len)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+	int rc = 0;
+
+	dev_dbg(adapter->dev, "CRQ send: rdma to HV\n");
+	rc = h_copy_rdma(msg_len,
+			 adapter->liobn,
+			 buffer->dma_addr_local,
+			 adapter->riobn,
+			 buffer->dma_addr_remote);
+	if (rc) {
+		dev_err(adapter->dev, "Error in send_msg, h_copy_rdma rc 0x%x\n",
+			rc);
+		return rc;
+	}
+
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_SIGNAL;
+	crq_msg.status = 0;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc->session;
+	crq_msg.hmc_index = hmc->index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer->id);
+	crq_msg.var3.msg_len = cpu_to_be32(msg_len);
+	dev_dbg(adapter->dev, "CRQ send: msg to HV 0x%llx 0x%llx\n",
+		be64_to_cpu(crq_as_u64[0]), be64_to_cpu(crq_as_u64[1]));
+
+	buffer->owner = VMC_BUF_OWNER_HV;
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return rc;
+}
+
+/**
+ * ibmvmc_open - Open Session
+ *
+ * @inode:	inode struct
+ * @file:	file struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_open(struct inode *inode, struct file *file)
+{
+	struct ibmvmc_file_session *session;
+
+	pr_debug("%s: inode = 0x%lx, file = 0x%lx, state = 0x%x\n", __func__,
+		 (unsigned long)inode, (unsigned long)file,
+		 ibmvmc.state);
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (!session)
+		return -ENOMEM;
+
+	session->file = file;
+	file->private_data = session;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_close - Close Session
+ *
+ * @inode:	inode struct
+ * @file:	file struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_close(struct inode *inode, struct file *file)
+{
+	struct ibmvmc_file_session *session;
+	struct ibmvmc_hmc *hmc;
+	int rc = 0;
+	unsigned long flags;
+
+	pr_debug("%s: file = 0x%lx, state = 0x%x\n", __func__,
+		 (unsigned long)file, ibmvmc.state);
+
+	session = file->private_data;
+	if (!session)
+		return -EIO;
+
+	hmc = session->hmc;
+	if (hmc) {
+		if (!hmc->adapter)
+			return -EIO;
+
+		if (ibmvmc.state == ibmvmc_state_failed) {
+			dev_warn(hmc->adapter->dev, "close: state_failed\n");
+			return -EIO;
+		}
+
+		spin_lock_irqsave(&hmc->lock, flags);
+		if (hmc->state >= ibmhmc_state_opening) {
+			rc = ibmvmc_send_close(hmc);
+			if (rc)
+				dev_warn(hmc->adapter->dev, "close: send_close failed.\n");
+		}
+		spin_unlock_irqrestore(&hmc->lock, flags);
+	}
+
+	kzfree(session);
+
+	return rc;
+}
+
+/**
+ * ibmvmc_read - Read
+ *
+ * @file:	file struct
+ * @buf:	Character buffer
+ * @nbytes:	Size in bytes
+ * @ppos:	Offset
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static ssize_t ibmvmc_read(struct file *file, char *buf, size_t nbytes,
+			   loff_t *ppos)
+{
+	struct ibmvmc_file_session *session;
+	struct ibmvmc_hmc *hmc;
+	struct crq_server_adapter *adapter;
+	struct ibmvmc_buffer *buffer;
+	ssize_t n;
+	ssize_t retval = 0;
+	unsigned long flags;
+	DEFINE_WAIT(wait);
+
+	pr_debug("ibmvmc: read: file = 0x%lx, buf = 0x%lx, nbytes = 0x%lx\n",
+		 (unsigned long)file, (unsigned long)buf,
+		 (unsigned long)nbytes);
+
+	if (nbytes == 0)
+		return 0;
+
+	if (nbytes > ibmvmc.max_mtu) {
+		pr_warn("ibmvmc: read: nbytes invalid 0x%x\n",
+			(unsigned int)nbytes);
+		return -EINVAL;
+	}
+
+	session = file->private_data;
+	if (!session) {
+		pr_warn("ibmvmc: read: no session\n");
+		return -EIO;
+	}
+
+	hmc = session->hmc;
+	if (!hmc) {
+		pr_warn("ibmvmc: read: no hmc\n");
+		return -EIO;
+	}
+
+	adapter = hmc->adapter;
+	if (!adapter) {
+		pr_warn("ibmvmc: read: no adapter\n");
+		return -EIO;
+	}
+
+	do {
+		prepare_to_wait(&ibmvmc_read_wait, &wait, TASK_INTERRUPTIBLE);
+
+		spin_lock_irqsave(&hmc->lock, flags);
+		if (hmc->queue_tail != hmc->queue_head)
+			/* Data is available */
+			break;
+
+		spin_unlock_irqrestore(&hmc->lock, flags);
+
+		if (!session->valid) {
+			retval = -EBADFD;
+			goto out;
+		}
+		if (file->f_flags & O_NONBLOCK) {
+			retval = -EAGAIN;
+			goto out;
+		}
+
+		schedule();
+
+		if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			goto out;
+		}
+	} while (1);
+
+	buffer = &(hmc->buffer[hmc->queue_outbound_msgs[hmc->queue_tail]]);
+	hmc->queue_tail++;
+	if (hmc->queue_tail == ibmvmc_max_buf_pool_size)
+		hmc->queue_tail = 0;
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	nbytes = min_t(size_t, nbytes, buffer->msg_len);
+	n = copy_to_user((void *)buf, buffer->real_addr_local, nbytes);
+	dev_dbg(adapter->dev, "read: copy to user nbytes = 0x%lx.\n", nbytes);
+	ibmvmc_free_hmc_buffer(hmc, buffer);
+	retval = nbytes;
+
+	if (n) {
+		dev_warn(adapter->dev, "read: copy to user failed.\n");
+		retval = -EFAULT;
+	}
+
+ out:
+	finish_wait(&ibmvmc_read_wait, &wait);
+	dev_dbg(adapter->dev, "read: out %ld\n", retval);
+	return retval;
+}
+
+/**
+ * ibmvmc_poll - Poll
+ *
+ * @file:	file struct
+ * @wait:	Poll Table
+ *
+ * Return:
+ *	poll.h return values
+ */
+static unsigned int ibmvmc_poll(struct file *file, poll_table *wait)
+{
+	struct ibmvmc_file_session *session;
+	struct ibmvmc_hmc *hmc;
+	unsigned int mask = 0;
+
+	session = file->private_data;
+	if (!session)
+		return 0;
+
+	hmc = session->hmc;
+	if (!hmc)
+		return 0;
+
+	poll_wait(file, &ibmvmc_read_wait, wait);
+
+	if (hmc->queue_head != hmc->queue_tail)
+		mask |= POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+/**
+ * ibmvmc_write - Write
+ *
+ * @file:	file struct
+ * @buf:	Character buffer
+ * @count:	Count field
+ * @ppos:	Offset
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static ssize_t ibmvmc_write(struct file *file, const char *buffer,
+			    size_t count, loff_t *ppos)
+{
+	struct ibmvmc_buffer *vmc_buffer;
+	struct ibmvmc_file_session *session;
+	struct crq_server_adapter *adapter;
+	struct ibmvmc_hmc *hmc;
+	unsigned char *buf;
+	unsigned long flags;
+	size_t bytes;
+	const char *p = buffer;
+	size_t c = count;
+	int ret = 0;
+
+	session = file->private_data;
+	if (!session)
+		return -EIO;
+
+	hmc = session->hmc;
+	if (!hmc)
+		return -EIO;
+
+	spin_lock_irqsave(&hmc->lock, flags);
+	if (hmc->state == ibmhmc_state_free) {
+		/* HMC connection is not valid (possibly was reset under us). */
+		ret = -EIO;
+		goto out;
+	}
+
+	adapter = hmc->adapter;
+	if (!adapter) {
+		ret = -EIO;
+		goto out;
+	}
+
+	if (count > ibmvmc.max_mtu) {
+		dev_warn(adapter->dev, "invalid buffer size 0x%lx\n",
+			 (unsigned long)count);
+		ret = -EIO;
+		goto out;
+	}
+
+	/* Waiting for the open resp message to the ioctl(1) - retry */
+	if (hmc->state == ibmhmc_state_opening) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* Make sure the ioctl() was called & the open msg sent, and that
+	 * the HMC connection has not failed.
+	 */
+	if (hmc->state != ibmhmc_state_ready) {
+		ret = -EIO;
+		goto out;
+	}
+
+	vmc_buffer = ibmvmc_get_valid_hmc_buffer(hmc->index);
+	if (!vmc_buffer) {
+		/* No buffer available for the msg send, or we have not yet
+		 * completed the open/open_resp sequence.  Retry until this is
+		 * complete.
+		 */
+		ret = -EBUSY;
+		goto out;
+	}
+	if (!vmc_buffer->real_addr_local) {
+		dev_err(adapter->dev, "no buffer storage assigned\n");
+		ret = -EIO;
+		goto out;
+	}
+	buf = vmc_buffer->real_addr_local;
+
+	while (c > 0) {
+		bytes = min_t(size_t, c, vmc_buffer->size);
+
+		bytes -= copy_from_user(buf, p, bytes);
+		if (!bytes) {
+			ret = -EFAULT;
+			goto out;
+		}
+		c -= bytes;
+		p += bytes;
+	}
+	if (p == buffer)
+		goto out;
+
+	file->f_path.dentry->d_inode->i_mtime = current_time(file_inode(file));
+	mark_inode_dirty(file->f_path.dentry->d_inode);
+
+	dev_dbg(adapter->dev, "write: file = 0x%lx, count = 0x%lx\n",
+		(unsigned long)file, (unsigned long)count);
+
+	ibmvmc_send_msg(adapter, vmc_buffer, hmc, count);
+	ret = p - buffer;
+ out:
+	spin_unlock_irqrestore(&hmc->lock, flags);
+	return (ssize_t)(ret);
+}
+
+/**
+ * ibmvmc_setup_hmc - Setup the HMC
+ *
+ * @session:	ibmvmc_file_session struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_setup_hmc(struct ibmvmc_file_session *session)
+{
+	struct ibmvmc_hmc *hmc;
+	unsigned int valid, free, index;
+
+	if (ibmvmc.state == ibmvmc_state_failed) {
+		pr_warn("ibmvmc: Reserve HMC: state_failed\n");
+		return -EIO;
+	}
+
+	if (ibmvmc.state < ibmvmc_state_ready) {
+		pr_warn("ibmvmc: Reserve HMC: not state_ready\n");
+		return -EAGAIN;
+	}
+
+	/* Device is busy until capabilities have been exchanged and we
+	 * have a generic buffer for each possible HMC connection.
+	 */
+	for (index = 0; index <= ibmvmc.max_hmc_index; index++) {
+		valid = 0;
+		ibmvmc_count_hmc_buffers(index, &valid, &free);
+		if (valid == 0) {
+			pr_warn("ibmvmc: buffers not ready for index %d\n",
+				index);
+			return -ENOBUFS;
+		}
+	}
+
+	/* Get an hmc object, and transition to ibmhmc_state_initial */
+	hmc = ibmvmc_get_free_hmc();
+	if (!hmc) {
+		pr_warn("%s: free hmc not found\n", __func__);
+		return -EBUSY;
+	}
+
+	hmc->session = hmc->session + 1;
+	if (hmc->session == 0xff)
+		hmc->session = 1;
+
+	session->hmc = hmc;
+	hmc->adapter = &ibmvmc_adapter;
+	hmc->file_session = session;
+	session->valid = 1;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_ioctl_sethmcid - IOCTL Set HMC ID
+ *
+ * @session:	ibmvmc_file_session struct
+ * @new_hmc_id:	HMC id field
+ *
+ * IOCTL command to setup the hmc id
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl_sethmcid(struct ibmvmc_file_session *session,
+				  unsigned char __user *new_hmc_id)
+{
+	struct ibmvmc_hmc *hmc;
+	struct ibmvmc_buffer *buffer;
+	size_t bytes;
+	char print_buffer[HMC_ID_LEN + 1];
+	unsigned long flags;
+	long rc = 0;
+
+	/* Reserve HMC session */
+	hmc = session->hmc;
+	if (!hmc) {
+		rc = ibmvmc_setup_hmc(session);
+		if (rc)
+			return rc;
+
+		hmc = session->hmc;
+		if (!hmc) {
+			pr_err("ibmvmc: setup_hmc success but no hmc\n");
+			return -EIO;
+		}
+	}
+
+	if (hmc->state != ibmhmc_state_initial) {
+		pr_warn("ibmvmc: sethmcid: invalid state to send open 0x%x\n",
+			hmc->state);
+		return -EIO;
+	}
+
+	bytes = copy_from_user(hmc->hmc_id, new_hmc_id, HMC_ID_LEN);
+	if (bytes)
+		return -EFAULT;
+
+	/* Send Open Session command */
+	spin_lock_irqsave(&hmc->lock, flags);
+	buffer = ibmvmc_get_valid_hmc_buffer(hmc->index);
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	if (!buffer || !buffer->real_addr_local) {
+		pr_warn("ibmvmc: sethmcid: no buffer available\n");
+		return -EIO;
+	}
+
+	/* Make sure buffer is NULL terminated before trying to print it */
+	memset(print_buffer, 0, HMC_ID_LEN + 1);
+	strncpy(print_buffer, hmc->hmc_id, HMC_ID_LEN);
+	pr_info("ibmvmc: sethmcid: Set HMC ID: \"%s\"\n", print_buffer);
+
+	memcpy(buffer->real_addr_local, hmc->hmc_id, HMC_ID_LEN);
+	/* RDMA over ID, send open msg, change state to ibmhmc_state_opening */
+	rc = ibmvmc_send_open(buffer, hmc);
+
+	return rc;
+}
+
+/**
+ * ibmvmc_ioctl_query - IOCTL Query
+ *
+ * @session:	ibmvmc_file_session struct
+ * @ret_struct:	ibmvmc_query_struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl_query(struct ibmvmc_file_session *session,
+			       struct ibmvmc_query_struct __user *ret_struct)
+{
+	struct ibmvmc_query_struct query_struct;
+	size_t bytes;
+
+	memset(&query_struct, 0, sizeof(query_struct));
+	query_struct.have_vmc = (ibmvmc.state > ibmvmc_state_initial);
+	query_struct.state = ibmvmc.state;
+	query_struct.vmc_drc_index = ibmvmc.vmc_drc_index;
+
+	bytes = copy_to_user(ret_struct, &query_struct,
+			     sizeof(query_struct));
+	if (bytes)
+		return -EFAULT;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_ioctl_requestvmc - IOCTL Request VMC
+ *
+ * @session:	ibmvmc_file_session struct
+ * @ret_vmc_index:	VMC Index
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl_requestvmc(struct ibmvmc_file_session *session,
+				    u32 __user *ret_vmc_index)
+{
+	/* TODO: (adreznec) Add locking to control multiple process access */
+	size_t bytes;
+	long rc;
+	u32 vmc_drc_index;
+
+	/* Call to request the VMC device from phyp*/
+	rc = h_request_vmc(&vmc_drc_index);
+	pr_debug("ibmvmc: requestvmc: H_REQUEST_VMC rc = 0x%lx\n", rc);
+
+	if (rc == H_SUCCESS) {
+		rc = 0;
+	} else if (rc == H_FUNCTION) {
+		pr_err("ibmvmc: requestvmc: h_request_vmc not supported\n");
+		return -EPERM;
+	} else if (rc == H_AUTHORITY) {
+		pr_err("ibmvmc: requestvmc: hypervisor denied vmc request\n");
+		return -EPERM;
+	} else if (rc == H_HARDWARE) {
+		pr_err("ibmvmc: requestvmc: hypervisor hardware fault\n");
+		return -EIO;
+	} else if (rc == H_RESOURCE) {
+		pr_err("ibmvmc: requestvmc: vmc resource unavailable\n");
+		return -ENODEV;
+	} else if (rc == H_NOT_AVAILABLE) {
+		pr_err("ibmvmc: requestvmc: system cannot be vmc managed\n");
+		return -EPERM;
+	} else if (rc == H_PARAMETER) {
+		pr_err("ibmvmc: requestvmc: invalid parameter\n");
+		return -EINVAL;
+	}
+
+	/* Success, set the vmc index in global struct */
+	ibmvmc.vmc_drc_index = vmc_drc_index;
+
+	bytes = copy_to_user(ret_vmc_index, &vmc_drc_index,
+			     sizeof(*ret_vmc_index));
+	if (bytes) {
+		pr_warn("ibmvmc: requestvmc: copy to user failed.\n");
+		return -EFAULT;
+	}
+	return rc;
+}
+
+/**
+ * ibmvmc_ioctl - IOCTL
+ *
+ * @session:	ibmvmc_file_session struct
+ * @cmd:	cmd field
+ * @arg:	Argument field
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl(struct file *file,
+			 unsigned int cmd, unsigned long arg)
+{
+	struct ibmvmc_file_session *session = file->private_data;
+
+	pr_debug("ibmvmc: ioctl file=0x%lx, cmd=0x%x, arg=0x%lx, ses=0x%lx\n",
+		 (unsigned long)file, cmd, arg,
+		 (unsigned long)session);
+
+	if (!session) {
+		pr_warn("ibmvmc: ioctl: no session\n");
+		return -EIO;
+	}
+
+	switch (cmd) {
+	case VMC_IOCTL_SETHMCID:
+		return ibmvmc_ioctl_sethmcid(session,
+			(unsigned char __user *)arg);
+	case VMC_IOCTL_QUERY:
+		return ibmvmc_ioctl_query(session,
+			(struct ibmvmc_query_struct __user *)arg);
+	case VMC_IOCTL_REQUESTVMC:
+		return ibmvmc_ioctl_requestvmc(session,
+			(unsigned int __user *)arg);
+	default:
+		pr_warn("ibmvmc: unknown ioctl 0x%x\n", cmd);
+		return -EINVAL;
+	}
+}
+
+static const struct file_operations ibmvmc_fops = {
+	.owner		= THIS_MODULE,
+	.read		= ibmvmc_read,
+	.write		= ibmvmc_write,
+	.poll		= ibmvmc_poll,
+	.unlocked_ioctl	= ibmvmc_ioctl,
+	.open           = ibmvmc_open,
+	.release        = ibmvmc_close,
+};
+
+/**
+ * ibmvmc_add_buffer - Add Buffer
+ *
+ * @adapter: crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * This message transfers a buffer from hypervisor ownership to management
+ * partition ownership. The LIOBA is obtained from the virtual TCE table
+ * associated with the hypervisor side of the VMC device, and points to a
+ * buffer of size MTU (as established in the capabilities exchange).
+ *
+ * Typical flow for ading buffers:
+ * 1. A new management application connection is opened by the management
+ *	partition.
+ * 2. The hypervisor assigns new buffers for the traffic associated with
+ *	that connection.
+ * 3. The hypervisor sends VMC Add Buffer messages to the management
+ *	partition, informing it of the new buffers.
+ * 4. The hypervisor sends an HMC protocol message (to the management
+ *	application) notifying it of the new buffers. This informs the
+ *	application that it has buffers available for sending HMC
+ *	commands.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_add_buffer(struct crq_server_adapter *adapter,
+			     struct ibmvmc_crq_msg *crq)
+{
+	struct ibmvmc_buffer *buffer;
+	u8 hmc_index;
+	u8 hmc_session;
+	u16 buffer_id;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!crq)
+		return -1;
+
+	hmc_session = crq->hmc_session;
+	hmc_index = crq->hmc_index;
+	buffer_id = be16_to_cpu(crq->var2.buffer_id);
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_err(adapter->dev, "add_buffer: invalid hmc_index = 0x%x\n",
+			hmc_index);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	if (buffer_id >= ibmvmc.max_buffer_pool_size) {
+		dev_err(adapter->dev, "add_buffer: invalid buffer_id = 0x%x\n",
+			buffer_id);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	spin_lock_irqsave(&hmcs[hmc_index].lock, flags);
+	buffer = &hmcs[hmc_index].buffer[buffer_id];
+
+	if (buffer->real_addr_local || buffer->dma_addr_local) {
+		dev_warn(adapter->dev, "add_buffer: already allocated id = 0x%lx\n",
+			 (unsigned long)buffer_id);
+		spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	buffer->real_addr_local = alloc_dma_buffer(to_vio_dev(adapter->dev),
+						   ibmvmc.max_mtu,
+						   &buffer->dma_addr_local);
+
+	if (!buffer->real_addr_local) {
+		dev_err(adapter->dev, "add_buffer: alloc_dma_buffer failed.\n");
+		spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INTERFACE_FAILURE,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	buffer->dma_addr_remote = be32_to_cpu(crq->var3.lioba);
+	buffer->size = ibmvmc.max_mtu;
+	buffer->owner = crq->var1.owner;
+	buffer->free = 1;
+	/* Must ensure valid==1 is observable only after all other fields are */
+	dma_wmb();
+	buffer->valid = 1;
+	buffer->id = buffer_id;
+
+	dev_dbg(adapter->dev, "add_buffer: successfully added a buffer:\n");
+	dev_dbg(adapter->dev, "   index: %d, session: %d, buffer: 0x%x, owner: %d\n",
+		hmc_index, hmc_session, buffer_id, buffer->owner);
+	dev_dbg(adapter->dev, "   local: 0x%x, remote: 0x%x\n",
+		(u32)buffer->dma_addr_local,
+		(u32)buffer->dma_addr_remote);
+	spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+
+	ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_SUCCESS, hmc_session,
+				    hmc_index, buffer_id);
+
+	return rc;
+}
+
+/**
+ * ibmvmc_rem_buffer - Remove Buffer
+ *
+ * @adapter: crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * This message requests an HMC buffer to be transferred from management
+ * partition ownership to hypervisor ownership. The management partition may
+ * not be able to satisfy the request at a particular point in time if all its
+ * buffers are in use. The management partition requires a depth of at least
+ * one inbound buffer to allow management application commands to flow to the
+ * hypervisor. It is, therefore, an interface error for the hypervisor to
+ * attempt to remove the management partition's last buffer.
+ *
+ * The hypervisor is expected to manage buffer usage with the management
+ * application directly and inform the management partition when buffers may be
+ * removed. The typical flow for removing buffers:
+ *
+ * 1. The management application no longer needs a communication path to a
+ *	particular hypervisor function. That function is closed.
+ * 2. The hypervisor and the management application quiesce all traffic to that
+ *	function. The hypervisor requests a reduction in buffer pool size.
+ * 3. The management application acknowledges the reduction in buffer pool size.
+ * 4. The hypervisor sends a Remove Buffer message to the management partition,
+ *	informing it of the reduction in buffers.
+ * 5. The management partition verifies it can remove the buffer. This is
+ *	possible if buffers have been quiesced.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+/*
+ * The hypervisor requested that we pick an unused buffer, and return it.
+ * Before sending the buffer back, we free any storage associated with the
+ * buffer.
+ */
+static int ibmvmc_rem_buffer(struct crq_server_adapter *adapter,
+			     struct ibmvmc_crq_msg *crq)
+{
+	struct ibmvmc_buffer *buffer;
+	u8 hmc_index;
+	u8 hmc_session;
+	u16 buffer_id = 0;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!crq)
+		return -1;
+
+	hmc_session = crq->hmc_session;
+	hmc_index = crq->hmc_index;
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_warn(adapter->dev, "rem_buffer: invalid hmc_index = 0x%x\n",
+			 hmc_index);
+		ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	spin_lock_irqsave(&hmcs[hmc_index].lock, flags);
+	buffer = ibmvmc_get_free_hmc_buffer(adapter, hmc_index);
+	if (!buffer) {
+		dev_info(adapter->dev, "rem_buffer: no buffer to remove\n");
+		spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+		ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_NO_BUFFER,
+					    hmc_session, hmc_index,
+					    VMC_INVALID_BUFFER_ID);
+		return -1;
+	}
+
+	buffer_id = buffer->id;
+
+	if (buffer->valid)
+		free_dma_buffer(to_vio_dev(adapter->dev),
+				ibmvmc.max_mtu,
+				buffer->real_addr_local,
+				buffer->dma_addr_local);
+
+	memset(buffer, 0, sizeof(struct ibmvmc_buffer));
+	spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+
+	dev_dbg(adapter->dev, "rem_buffer: removed buffer 0x%x.\n", buffer_id);
+	ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_SUCCESS, hmc_session,
+				    hmc_index, buffer_id);
+
+	return rc;
+}
+
+static int ibmvmc_recv_msg(struct crq_server_adapter *adapter,
+			   struct ibmvmc_crq_msg *crq)
+{
+	struct ibmvmc_buffer *buffer;
+	struct ibmvmc_hmc *hmc;
+	unsigned long msg_len;
+	u8 hmc_index;
+	u8 hmc_session;
+	u16 buffer_id;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!crq)
+		return -1;
+
+	/* Hypervisor writes CRQs directly into our memory in big endian */
+	dev_dbg(adapter->dev, "Recv_msg: msg from HV 0x%016llx 0x%016llx\n",
+		be64_to_cpu(*((unsigned long *)crq)),
+		be64_to_cpu(*(((unsigned long *)crq) + 1)));
+
+	hmc_session = crq->hmc_session;
+	hmc_index = crq->hmc_index;
+	buffer_id = be16_to_cpu(crq->var2.buffer_id);
+	msg_len = be32_to_cpu(crq->var3.msg_len);
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_err(adapter->dev, "Recv_msg: invalid hmc_index = 0x%x\n",
+			hmc_index);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	if (buffer_id >= ibmvmc.max_buffer_pool_size) {
+		dev_err(adapter->dev, "Recv_msg: invalid buffer_id = 0x%x\n",
+			buffer_id);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	hmc = &hmcs[hmc_index];
+	spin_lock_irqsave(&hmc->lock, flags);
+
+	if (hmc->state == ibmhmc_state_free) {
+		dev_err(adapter->dev, "Recv_msg: invalid hmc state = 0x%x\n",
+			hmc->state);
+		/* HMC connection is not valid (possibly was reset under us). */
+		spin_unlock_irqrestore(&hmc->lock, flags);
+		return -1;
+	}
+
+	buffer = &hmc->buffer[buffer_id];
+
+	if (buffer->valid == 0 || buffer->owner == VMC_BUF_OWNER_ALPHA) {
+		dev_err(adapter->dev, "Recv_msg: not valid, or not HV.  0x%x 0x%x\n",
+			buffer->valid, buffer->owner);
+		spin_unlock_irqrestore(&hmc->lock, flags);
+		return -1;
+	}
+
+	/* RDMA the data into the partition. */
+	rc = h_copy_rdma(msg_len,
+			 adapter->riobn,
+			 buffer->dma_addr_remote,
+			 adapter->liobn,
+			 buffer->dma_addr_local);
+
+	dev_dbg(adapter->dev, "Recv_msg: msg_len = 0x%x, buffer_id = 0x%x, queue_head = 0x%x, hmc_idx = 0x%x\n",
+		(unsigned int)msg_len, (unsigned int)buffer_id,
+		(unsigned int)hmc->queue_head, (unsigned int)hmc_index);
+	buffer->msg_len = msg_len;
+	buffer->free = 0;
+	buffer->owner = VMC_BUF_OWNER_ALPHA;
+
+	if (rc) {
+		dev_err(adapter->dev, "Failure in recv_msg: h_copy_rdma = 0x%x\n",
+			rc);
+		spin_unlock_irqrestore(&hmc->lock, flags);
+		return -1;
+	}
+
+	/* Must be locked because read operates on the same data */
+	hmc->queue_outbound_msgs[hmc->queue_head] = buffer_id;
+	hmc->queue_head++;
+	if (hmc->queue_head == ibmvmc_max_buf_pool_size)
+		hmc->queue_head = 0;
+
+	if (hmc->queue_head == hmc->queue_tail)
+		dev_err(adapter->dev, "outbound buffer queue wrapped.\n");
+
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	wake_up_interruptible(&ibmvmc_read_wait);
+
+	return 0;
+}
+
+/**
+ * ibmvmc_process_capabilities - Process Capabilities
+ *
+ * @adapter:	crq_server_adapter struct
+ * @crqp:	ibmvmc_crq_msg struct
+ *
+ */
+static void ibmvmc_process_capabilities(struct crq_server_adapter *adapter,
+					struct ibmvmc_crq_msg *crqp)
+{
+	struct ibmvmc_admin_crq_msg *crq = (struct ibmvmc_admin_crq_msg *)crqp;
+
+	if ((be16_to_cpu(crq->version) >> 8) !=
+			(IBMVMC_PROTOCOL_VERSION >> 8)) {
+		dev_err(adapter->dev, "init failed, incompatible versions 0x%x 0x%x\n",
+			be16_to_cpu(crq->version),
+			IBMVMC_PROTOCOL_VERSION);
+		ibmvmc.state = ibmvmc_state_failed;
+		return;
+	}
+
+	ibmvmc.max_mtu = min_t(u32, ibmvmc_max_mtu, be32_to_cpu(crq->max_mtu));
+	ibmvmc.max_buffer_pool_size = min_t(u16, ibmvmc_max_buf_pool_size,
+					    be16_to_cpu(crq->pool_size));
+	ibmvmc.max_hmc_index = min_t(u8, ibmvmc_max_hmcs, crq->max_hmc) - 1;
+	ibmvmc.state = ibmvmc_state_ready;
+
+	dev_info(adapter->dev, "Capabilities: mtu=0x%x, pool_size=0x%x, max_hmc=0x%x\n",
+		 ibmvmc.max_mtu, ibmvmc.max_buffer_pool_size,
+		 ibmvmc.max_hmc_index);
+}
+
+/**
+ * ibmvmc_validate_hmc_session - Validate HMC Session
+ *
+ * @adapter:	crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_validate_hmc_session(struct crq_server_adapter *adapter,
+				       struct ibmvmc_crq_msg *crq)
+{
+	unsigned char hmc_index;
+
+	hmc_index = crq->hmc_index;
+
+	if (crq->hmc_session == 0)
+		return 0;
+
+	if (hmc_index > ibmvmc.max_hmc_index)
+		return -1;
+
+	if (hmcs[hmc_index].session != crq->hmc_session) {
+		dev_warn(adapter->dev, "Drop, bad session: expected 0x%x, recv 0x%x\n",
+			 hmcs[hmc_index].session, crq->hmc_session);
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * ibmvmc_reset - Reset
+ *
+ * @adapter:	crq_server_adapter struct
+ * @xport_event:	export_event field
+ *
+ * Closes all HMC sessions and conditionally schedules a CRQ reset.
+ * @xport_event: If true, the partner closed their CRQ; we don't need to reset.
+ *               If false, we need to schedule a CRQ reset.
+ */
+static void ibmvmc_reset(struct crq_server_adapter *adapter, bool xport_event)
+{
+	int i;
+
+	if (ibmvmc.state != ibmvmc_state_sched_reset) {
+		dev_info(adapter->dev, "*** Reset to initial state.\n");
+		for (i = 0; i < ibmvmc_max_hmcs; i++)
+			ibmvmc_return_hmc(&hmcs[i], xport_event);
+
+		if (xport_event) {
+			/* CRQ was closed by the partner.  We don't need to do
+			 * anything except set ourself to the correct state to
+			 * handle init msgs.
+			 */
+			ibmvmc.state = ibmvmc_state_crqinit;
+		} else {
+			/* The partner did not close their CRQ - instead, we're
+			 * closing the CRQ on our end. Need to schedule this
+			 * for process context, because CRQ reset may require a
+			 * sleep.
+			 *
+			 * Setting ibmvmc.state here immediately prevents
+			 * ibmvmc_open from completing until the reset
+			 * completes in process context.
+			 */
+			ibmvmc.state = ibmvmc_state_sched_reset;
+			dev_dbg(adapter->dev, "Device reset scheduled");
+			wake_up_interruptible(&adapter->reset_wait_queue);
+		}
+	}
+}
+
+/**
+ * ibmvmc_reset_task - Reset Task
+ *
+ * @data:	Data field
+ *
+ * Performs a CRQ reset of the VMC device in process context.
+ * NOTE: This function should not be called directly, use ibmvmc_reset.
+ */
+static int ibmvmc_reset_task(void *data)
+{
+	struct crq_server_adapter *adapter = data;
+	int rc;
+
+	set_user_nice(current, -20);
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(adapter->reset_wait_queue,
+			(ibmvmc.state == ibmvmc_state_sched_reset) ||
+			kthread_should_stop());
+
+		if (kthread_should_stop())
+			break;
+
+		dev_dbg(adapter->dev, "CRQ resetting in process context");
+		tasklet_disable(&adapter->work_task);
+
+		rc = ibmvmc_reset_crq_queue(adapter);
+
+		if (rc != H_SUCCESS && rc != H_RESOURCE) {
+			dev_err(adapter->dev, "Error initializing CRQ.  rc = 0x%x\n",
+				rc);
+			ibmvmc.state = ibmvmc_state_failed;
+		} else {
+			ibmvmc.state = ibmvmc_state_crqinit;
+
+			if (ibmvmc_send_crq(adapter, 0xC001000000000000LL, 0)
+			    != 0 && rc != H_RESOURCE)
+				dev_warn(adapter->dev, "Failed to send initialize CRQ message\n");
+		}
+
+		vio_enable_interrupts(to_vio_dev(adapter->dev));
+		tasklet_enable(&adapter->work_task);
+	}
+
+	return 0;
+}
+
+/**
+ * ibmvmc_process_open_resp - Process Open Response
+ *
+ * @crq: ibmvmc_crq_msg struct
+ * @adapter:    crq_server_adapter struct
+ *
+ * This command is sent by the hypervisor in response to the Interface
+ * Open message. When this message is received, the indicated buffer is
+ * again available for management partition use.
+ */
+static void ibmvmc_process_open_resp(struct ibmvmc_crq_msg *crq,
+				     struct crq_server_adapter *adapter)
+{
+	unsigned char hmc_index;
+	unsigned short buffer_id;
+
+	hmc_index = crq->hmc_index;
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		/* Why would PHYP give an index > max negotiated? */
+		ibmvmc_reset(adapter, false);
+		return;
+	}
+
+	if (crq->status) {
+		dev_warn(adapter->dev, "open_resp: failed - status 0x%x\n",
+			 crq->status);
+		ibmvmc_return_hmc(&hmcs[hmc_index], false);
+		return;
+	}
+
+	if (hmcs[hmc_index].state == ibmhmc_state_opening) {
+		buffer_id = be16_to_cpu(crq->var2.buffer_id);
+		if (buffer_id >= ibmvmc.max_buffer_pool_size) {
+			dev_err(adapter->dev, "open_resp: invalid buffer_id = 0x%x\n",
+				buffer_id);
+			hmcs[hmc_index].state = ibmhmc_state_failed;
+		} else {
+			ibmvmc_free_hmc_buffer(&hmcs[hmc_index],
+					       &hmcs[hmc_index].buffer[buffer_id]);
+			hmcs[hmc_index].state = ibmhmc_state_ready;
+			dev_dbg(adapter->dev, "open_resp: set hmc state = ready\n");
+		}
+	} else {
+		dev_warn(adapter->dev, "open_resp: invalid hmc state (0x%x)\n",
+			 hmcs[hmc_index].state);
+	}
+}
+
+/**
+ * ibmvmc_process_close_resp - Process Close Response
+ *
+ * @crq: ibmvmc_crq_msg struct
+ * @adapter:    crq_server_adapter struct
+ *
+ * This command is sent by the hypervisor in response to the managemant
+ * application Interface Close message.
+ *
+ * If the close fails, simply reset the entire driver as the state of the VMC
+ * must be in tough shape.
+ */
+static void ibmvmc_process_close_resp(struct ibmvmc_crq_msg *crq,
+				      struct crq_server_adapter *adapter)
+{
+	unsigned char hmc_index;
+
+	hmc_index = crq->hmc_index;
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		ibmvmc_reset(adapter, false);
+		return;
+	}
+
+	if (crq->status) {
+		dev_warn(adapter->dev, "close_resp: failed - status 0x%x\n",
+			 crq->status);
+		ibmvmc_reset(adapter, false);
+		return;
+	}
+
+	ibmvmc_return_hmc(&hmcs[hmc_index], false);
+}
+
+/**
+ * ibmvmc_crq_process - Process CRQ
+ *
+ * @adapter:    crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * Process the CRQ message based upon the type of message received.
+ *
+ */
+static void ibmvmc_crq_process(struct crq_server_adapter *adapter,
+			       struct ibmvmc_crq_msg *crq)
+{
+	switch (crq->type) {
+	case VMC_MSG_CAP_RESP:
+		dev_dbg(adapter->dev, "CRQ recv: capabilities resp (0x%x)\n",
+			crq->type);
+		if (ibmvmc.state == ibmvmc_state_capabilities)
+			ibmvmc_process_capabilities(adapter, crq);
+		else
+			dev_warn(adapter->dev, "caps msg invalid in state 0x%x\n",
+				 ibmvmc.state);
+		break;
+	case VMC_MSG_OPEN_RESP:
+		dev_dbg(adapter->dev, "CRQ recv: open resp (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_process_open_resp(crq, adapter);
+		break;
+	case VMC_MSG_ADD_BUF:
+		dev_dbg(adapter->dev, "CRQ recv: add buf (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_add_buffer(adapter, crq);
+		break;
+	case VMC_MSG_REM_BUF:
+		dev_dbg(adapter->dev, "CRQ recv: rem buf (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_rem_buffer(adapter, crq);
+		break;
+	case VMC_MSG_SIGNAL:
+		dev_dbg(adapter->dev, "CRQ recv: signal msg (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_recv_msg(adapter, crq);
+		break;
+	case VMC_MSG_CLOSE_RESP:
+		dev_dbg(adapter->dev, "CRQ recv: close resp (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_process_close_resp(crq, adapter);
+		break;
+	case VMC_MSG_CAP:
+	case VMC_MSG_OPEN:
+	case VMC_MSG_CLOSE:
+	case VMC_MSG_ADD_BUF_RESP:
+	case VMC_MSG_REM_BUF_RESP:
+		dev_warn(adapter->dev, "CRQ recv: unexpected msg (0x%x)\n",
+			 crq->type);
+		break;
+	default:
+		dev_warn(adapter->dev, "CRQ recv: unknown msg (0x%x)\n",
+			 crq->type);
+		break;
+	}
+}
+
+/**
+ * ibmvmc_handle_crq_init - Handle CRQ Init
+ *
+ * @crq:	ibmvmc_crq_msg struct
+ * @adapter:	crq_server_adapter struct
+ *
+ * Handle the type of crq initialization based on whether
+ * it is a message or a response.
+ *
+ */
+static void ibmvmc_handle_crq_init(struct ibmvmc_crq_msg *crq,
+				   struct crq_server_adapter *adapter)
+{
+	switch (crq->type) {
+	case 0x01:	/* Initialization message */
+		dev_dbg(adapter->dev, "CRQ recv: CRQ init msg - state 0x%x\n",
+			ibmvmc.state);
+		if (ibmvmc.state == ibmvmc_state_crqinit) {
+			/* Send back a response */
+			if (ibmvmc_send_crq(adapter, 0xC002000000000000,
+					    0) == 0)
+				ibmvmc_send_capabilities(adapter);
+			else
+				dev_err(adapter->dev, " Unable to send init rsp\n");
+		} else {
+			dev_err(adapter->dev, "Invalid state 0x%x mtu = 0x%x\n",
+				ibmvmc.state, ibmvmc.max_mtu);
+		}
+
+		break;
+	case 0x02:	/* Initialization response */
+		dev_dbg(adapter->dev, "CRQ recv: initialization resp msg - state 0x%x\n",
+			ibmvmc.state);
+		if (ibmvmc.state == ibmvmc_state_crqinit)
+			ibmvmc_send_capabilities(adapter);
+		break;
+	default:
+		dev_warn(adapter->dev, "Unknown crq message type 0x%lx\n",
+			 (unsigned long)crq->type);
+	}
+}
+
+/**
+ * ibmvmc_handle_crq - Handle CRQ
+ *
+ * @crq:	ibmvmc_crq_msg struct
+ * @adapter:	crq_server_adapter struct
+ *
+ * Read the command elements from the command queue and execute the
+ * requests based upon the type of crq message.
+ *
+ */
+static void ibmvmc_handle_crq(struct ibmvmc_crq_msg *crq,
+			      struct crq_server_adapter *adapter)
+{
+	switch (crq->valid) {
+	case 0xC0:		/* initialization */
+		ibmvmc_handle_crq_init(crq, adapter);
+		break;
+	case 0xFF:	/* Hypervisor telling us the connection is closed */
+		dev_warn(adapter->dev, "CRQ recv: virtual adapter failed - resetting.\n");
+		ibmvmc_reset(adapter, true);
+		break;
+	case 0x80:	/* real payload */
+		ibmvmc_crq_process(adapter, crq);
+		break;
+	default:
+		dev_warn(adapter->dev, "CRQ recv: unknown msg 0x%02x.\n",
+			 crq->valid);
+		break;
+	}
+}
+
+static void ibmvmc_task(unsigned long data)
+{
+	struct crq_server_adapter *adapter =
+		(struct crq_server_adapter *)data;
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct ibmvmc_crq_msg *crq;
+	int done = 0;
+
+	while (!done) {
+		/* Pull all the valid messages off the CRQ */
+		while ((crq = crq_queue_next_crq(&adapter->queue)) != NULL) {
+			ibmvmc_handle_crq(crq, adapter);
+			crq->valid = 0x00;
+			/* CRQ reset was requested, stop processing CRQs.
+			 * Interrupts will be re-enabled by the reset task.
+			 */
+			if (ibmvmc.state == ibmvmc_state_sched_reset)
+				return;
+		}
+
+		vio_enable_interrupts(vdev);
+		crq = crq_queue_next_crq(&adapter->queue);
+		if (crq) {
+			vio_disable_interrupts(vdev);
+			ibmvmc_handle_crq(crq, adapter);
+			crq->valid = 0x00;
+			/* CRQ reset was requested, stop processing CRQs.
+			 * Interrupts will be re-enabled by the reset task.
+			 */
+			if (ibmvmc.state == ibmvmc_state_sched_reset)
+				return;
+		} else {
+			done = 1;
+		}
+	}
+}
+
+/**
+ * ibmvmc_init_crq_queue - Init CRQ Queue
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_init_crq_queue(struct crq_server_adapter *adapter)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct crq_queue *queue = &adapter->queue;
+	int rc = 0;
+	int retrc = 0;
+
+	queue->msgs = (struct ibmvmc_crq_msg *)get_zeroed_page(GFP_KERNEL);
+
+	if (!queue->msgs)
+		goto malloc_failed;
+
+	queue->size = PAGE_SIZE / sizeof(*queue->msgs);
+
+	queue->msg_token = dma_map_single(adapter->dev, queue->msgs,
+					  queue->size * sizeof(*queue->msgs),
+					  DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(adapter->dev, queue->msg_token))
+		goto map_failed;
+
+	retrc = plpar_hcall_norets(H_REG_CRQ,
+				   vdev->unit_address,
+				   queue->msg_token, PAGE_SIZE);
+	rc = retrc;
+
+	if (rc == H_RESOURCE)
+		rc = ibmvmc_reset_crq_queue(adapter);
+
+	if (rc == 2) {
+		dev_warn(adapter->dev, "Partner adapter not ready\n");
+		retrc = 0;
+	} else if (rc != 0) {
+		dev_err(adapter->dev, "Error %d opening adapter\n", rc);
+		goto reg_crq_failed;
+	}
+
+	queue->cur = 0;
+	spin_lock_init(&queue->lock);
+
+	tasklet_init(&adapter->work_task, ibmvmc_task, (unsigned long)adapter);
+
+	if (request_irq(vdev->irq,
+			ibmvmc_handle_event,
+			0, "ibmvmc", (void *)adapter) != 0) {
+		dev_err(adapter->dev, "couldn't register irq 0x%x\n",
+			vdev->irq);
+		goto req_irq_failed;
+	}
+
+	rc = vio_enable_interrupts(vdev);
+	if (rc != 0) {
+		dev_err(adapter->dev, "Error %d enabling interrupts!!!\n", rc);
+		goto req_irq_failed;
+	}
+
+	return retrc;
+
+req_irq_failed:
+	/* Cannot have any work since we either never got our IRQ registered,
+	 * or never got interrupts enabled
+	 */
+	tasklet_kill(&adapter->work_task);
+	h_free_crq(vdev->unit_address);
+reg_crq_failed:
+	dma_unmap_single(adapter->dev,
+			 queue->msg_token,
+			 queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
+map_failed:
+	free_page((unsigned long)queue->msgs);
+malloc_failed:
+	return -ENOMEM;
+}
+
+/* Fill in the liobn and riobn fields on the adapter */
+static int read_dma_window(struct vio_dev *vdev,
+			   struct crq_server_adapter *adapter)
+{
+	const __be32 *dma_window;
+	const __be32 *prop;
+
+	/* TODO Using of_parse_dma_window would be better, but it doesn't give
+	 * a way to read multiple windows without already knowing the size of
+	 * a window or the number of windows
+	 */
+	dma_window =
+		(const __be32 *)vio_get_attribute(vdev, "ibm,my-dma-window",
+						NULL);
+	if (!dma_window) {
+		dev_warn(adapter->dev, "Couldn't find ibm,my-dma-window property\n");
+		return -1;
+	}
+
+	adapter->liobn = be32_to_cpu(*dma_window);
+	dma_window++;
+
+	prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-address-cells",
+						NULL);
+	if (!prop) {
+		dev_warn(adapter->dev, "Couldn't find ibm,#dma-address-cells property\n");
+		dma_window++;
+	} else {
+		dma_window += be32_to_cpu(*prop);
+	}
+
+	prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-size-cells",
+						NULL);
+	if (!prop) {
+		dev_warn(adapter->dev, "Couldn't find ibm,#dma-size-cells property\n");
+		dma_window++;
+	} else {
+		dma_window += be32_to_cpu(*prop);
+	}
+
+	/* dma_window should point to the second window now */
+	adapter->riobn = be32_to_cpu(*dma_window);
+
+	return 0;
+}
+
+static int ibmvmc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+{
+	struct crq_server_adapter *adapter = &ibmvmc_adapter;
+	int rc;
+
+	dev_set_drvdata(&vdev->dev, NULL);
+	memset(adapter, 0, sizeof(*adapter));
+	adapter->dev = &vdev->dev;
+
+	dev_info(adapter->dev, "Probe for UA 0x%x\n", vdev->unit_address);
+
+	rc = read_dma_window(vdev, adapter);
+	if (rc != 0) {
+		ibmvmc.state = ibmvmc_state_failed;
+		return -1;
+	}
+
+	dev_dbg(adapter->dev, "Probe: liobn 0x%x, riobn 0x%x\n",
+		adapter->liobn, adapter->riobn);
+
+	init_waitqueue_head(&adapter->reset_wait_queue);
+	adapter->reset_task = kthread_run(ibmvmc_reset_task, adapter, "ibmvmc");
+	if (IS_ERR(adapter->reset_task)) {
+		dev_err(adapter->dev, "Failed to start reset thread\n");
+		ibmvmc.state = ibmvmc_state_failed;
+		rc = PTR_ERR(adapter->reset_task);
+		adapter->reset_task = NULL;
+		return rc;
+	}
+
+	rc = ibmvmc_init_crq_queue(adapter);
+	if (rc != 0 && rc != H_RESOURCE) {
+		dev_err(adapter->dev, "Error initializing CRQ.  rc = 0x%x\n",
+			rc);
+		ibmvmc.state = ibmvmc_state_failed;
+		goto crq_failed;
+	}
+
+	ibmvmc.state = ibmvmc_state_crqinit;
+
+	/* Try to send an initialization message.  Note that this is allowed
+	 * to fail if the other end is not acive.  In that case we just wait
+	 * for the other side to initialize.
+	 */
+	if (ibmvmc_send_crq(adapter, 0xC001000000000000LL, 0) != 0 &&
+	    rc != H_RESOURCE)
+		dev_warn(adapter->dev, "Failed to send initialize CRQ message\n");
+
+	dev_set_drvdata(&vdev->dev, adapter);
+
+	return 0;
+
+crq_failed:
+	kthread_stop(adapter->reset_task);
+	adapter->reset_task = NULL;
+	return -EPERM;
+}
+
+static int ibmvmc_remove(struct vio_dev *vdev)
+{
+	struct crq_server_adapter *adapter = dev_get_drvdata(&vdev->dev);
+
+	dev_info(adapter->dev, "Entering remove for UA 0x%x\n",
+		 vdev->unit_address);
+	ibmvmc_release_crq_queue(adapter);
+
+	return 0;
+}
+
+static struct vio_device_id ibmvmc_device_table[] = {
+	{ "ibm,vmc", "IBM,vmc" },
+	{ "", "" }
+};
+MODULE_DEVICE_TABLE(vio, ibmvmc_device_table);
+
+static struct vio_driver ibmvmc_driver = {
+	.name        = ibmvmc_driver_name,
+	.id_table    = ibmvmc_device_table,
+	.probe       = ibmvmc_probe,
+	.remove      = ibmvmc_remove,
+};
+
+static void __init ibmvmc_scrub_module_parms(void)
+{
+	if (ibmvmc_max_mtu > MAX_MTU) {
+		pr_warn("ibmvmc: Max MTU reduced to %d\n", MAX_MTU);
+		ibmvmc_max_mtu = MAX_MTU;
+	} else if (ibmvmc_max_mtu < MIN_MTU) {
+		pr_warn("ibmvmc: Max MTU increased to %d\n", MIN_MTU);
+		ibmvmc_max_mtu = MIN_MTU;
+	}
+
+	if (ibmvmc_max_buf_pool_size > MAX_BUF_POOL_SIZE) {
+		pr_warn("ibmvmc: Max buffer pool size reduced to %d\n",
+			MAX_BUF_POOL_SIZE);
+		ibmvmc_max_buf_pool_size = MAX_BUF_POOL_SIZE;
+	} else if (ibmvmc_max_buf_pool_size < MIN_BUF_POOL_SIZE) {
+		pr_warn("ibmvmc: Max buffer pool size increased to %d\n",
+			MIN_BUF_POOL_SIZE);
+		ibmvmc_max_buf_pool_size = MIN_BUF_POOL_SIZE;
+	}
+
+	if (ibmvmc_max_hmcs > MAX_HMCS) {
+		pr_warn("ibmvmc: Max HMCs reduced to %d\n", MAX_HMCS);
+		ibmvmc_max_hmcs = MAX_HMCS;
+	} else if (ibmvmc_max_hmcs < MIN_HMCS) {
+		pr_warn("ibmvmc: Max HMCs increased to %d\n", MIN_HMCS);
+		ibmvmc_max_hmcs = MIN_HMCS;
+	}
+}
+
+static struct miscdevice ibmvmc_miscdev = {
+	.name = ibmvmc_driver_name,
+	.minor = MISC_DYNAMIC_MINOR,
+	.fops = &ibmvmc_fops,
+};
+
+static int __init ibmvmc_module_init(void)
+{
+	int rc, i, j;
+
+	ibmvmc.state = ibmvmc_state_initial;
+	pr_info("ibmvmc: version %s\n", IBMVMC_DRIVER_VERSION);
+
+	rc = misc_register(&ibmvmc_miscdev);
+	if (rc) {
+		pr_err("ibmvmc: misc registration failed\n");
+		goto misc_register_failed;
+	}
+	pr_info("ibmvmc: node %d:%d\n", MISC_MAJOR,
+		ibmvmc_miscdev.minor);
+
+	/* Initialize data structures */
+	memset(hmcs, 0, sizeof(struct ibmvmc_hmc) * MAX_HMCS);
+	for (i = 0; i < MAX_HMCS; i++) {
+		spin_lock_init(&hmcs[i].lock);
+		hmcs[i].state = ibmhmc_state_free;
+		for (j = 0; j < MAX_BUF_POOL_SIZE; j++)
+			hmcs[i].queue_outbound_msgs[j] = VMC_INVALID_BUFFER_ID;
+	}
+
+	/* Sanity check module parms */
+	ibmvmc_scrub_module_parms();
+
+	/*
+	 * Initialize some reasonable values.  Might be negotiated smaller
+	 * values during the capabilities exchange.
+	 */
+	ibmvmc.max_mtu = ibmvmc_max_mtu;
+	ibmvmc.max_buffer_pool_size = ibmvmc_max_buf_pool_size;
+	ibmvmc.max_hmc_index = ibmvmc_max_hmcs - 1;
+
+	rc = vio_register_driver(&ibmvmc_driver);
+
+	if (rc) {
+		pr_err("ibmvmc: rc %d from vio_register_driver\n", rc);
+		goto vio_reg_failed;
+	}
+
+	return 0;
+
+vio_reg_failed:
+	misc_deregister(&ibmvmc_miscdev);
+misc_register_failed:
+	return rc;
+}
+
+static void __exit ibmvmc_module_exit(void)
+{
+	pr_info("ibmvmc: module exit\n");
+	vio_unregister_driver(&ibmvmc_driver);
+	misc_deregister(&ibmvmc_miscdev);
+}
+
+module_init(ibmvmc_module_init);
+module_exit(ibmvmc_module_exit);
+
+module_param_named(buf_pool_size, ibmvmc_max_buf_pool_size,
+		   int, 0644);
+MODULE_PARM_DESC(buf_pool_size, "Buffer pool size");
+module_param_named(max_hmcs, ibmvmc_max_hmcs, int, 0644);
+MODULE_PARM_DESC(max_hmcs, "Max HMCs");
+module_param_named(max_mtu, ibmvmc_max_mtu, int, 0644);
+MODULE_PARM_DESC(max_mtu, "Max MTU");
+
+MODULE_AUTHOR("Steven Royer <seroyer@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM VMC");
+MODULE_VERSION(IBMVMC_DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/ibmvmc.h b/drivers/misc/ibmvmc.h
new file mode 100644
index 000000000..e140ada8f
--- /dev/null
+++ b/drivers/misc/ibmvmc.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0+
+ *
+ * linux/drivers/misc/ibmvmc.h
+ *
+ * IBM Power Systems Virtual Management Channel Support.
+ *
+ * Copyright (c) 2004, 2018 IBM Corp.
+ *   Dave Engebretsen engebret@us.ibm.com
+ *   Steven Royer seroyer@linux.vnet.ibm.com
+ *   Adam Reznechek adreznec@linux.vnet.ibm.com
+ *   Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+ */
+#ifndef IBMVMC_H
+#define IBMVMC_H
+
+#include <linux/types.h>
+#include <linux/cdev.h>
+
+#include <asm/vio.h>
+
+#define IBMVMC_PROTOCOL_VERSION    0x0101
+
+#define MIN_BUF_POOL_SIZE 16
+#define MIN_HMCS          1
+#define MIN_MTU           4096
+#define MAX_BUF_POOL_SIZE 64
+#define MAX_HMCS          2
+#define MAX_MTU           (4 * 4096)
+#define DEFAULT_BUF_POOL_SIZE 32
+#define DEFAULT_HMCS          1
+#define DEFAULT_MTU           4096
+#define HMC_ID_LEN        32
+
+#define VMC_INVALID_BUFFER_ID 0xFFFF
+
+/* ioctl numbers */
+#define VMC_BASE	     0xCC
+#define VMC_IOCTL_SETHMCID   _IOW(VMC_BASE, 0x00, unsigned char *)
+#define VMC_IOCTL_QUERY      _IOR(VMC_BASE, 0x01, struct ibmvmc_query_struct)
+#define VMC_IOCTL_REQUESTVMC _IOR(VMC_BASE, 0x02, u32)
+
+#define VMC_MSG_CAP          0x01
+#define VMC_MSG_CAP_RESP     0x81
+#define VMC_MSG_OPEN         0x02
+#define VMC_MSG_OPEN_RESP    0x82
+#define VMC_MSG_CLOSE        0x03
+#define VMC_MSG_CLOSE_RESP   0x83
+#define VMC_MSG_ADD_BUF      0x04
+#define VMC_MSG_ADD_BUF_RESP 0x84
+#define VMC_MSG_REM_BUF      0x05
+#define VMC_MSG_REM_BUF_RESP 0x85
+#define VMC_MSG_SIGNAL       0x06
+
+#define VMC_MSG_SUCCESS 0
+#define VMC_MSG_INVALID_HMC_INDEX 1
+#define VMC_MSG_INVALID_BUFFER_ID 2
+#define VMC_MSG_CLOSED_HMC        3
+#define VMC_MSG_INTERFACE_FAILURE 4
+#define VMC_MSG_NO_BUFFER         5
+
+#define VMC_BUF_OWNER_ALPHA 0
+#define VMC_BUF_OWNER_HV    1
+
+enum ibmvmc_states {
+	ibmvmc_state_sched_reset  = -1,
+	ibmvmc_state_initial      = 0,
+	ibmvmc_state_crqinit      = 1,
+	ibmvmc_state_capabilities = 2,
+	ibmvmc_state_ready        = 3,
+	ibmvmc_state_failed       = 4,
+};
+
+enum ibmhmc_states {
+	/* HMC connection not established */
+	ibmhmc_state_free    = 0,
+
+	/* HMC connection established (open called) */
+	ibmhmc_state_initial = 1,
+
+	/* open msg sent to HV, due to ioctl(1) call */
+	ibmhmc_state_opening = 2,
+
+	/* HMC connection ready, open resp msg from HV */
+	ibmhmc_state_ready   = 3,
+
+	/* HMC connection failure */
+	ibmhmc_state_failed  = 4,
+};
+
+struct ibmvmc_buffer {
+	u8 valid;	/* 1 when DMA storage allocated to buffer          */
+	u8 free;	/* 1 when buffer available for the Alpha Partition */
+	u8 owner;
+	u16 id;
+	u32 size;
+	u32 msg_len;
+	dma_addr_t dma_addr_local;
+	dma_addr_t dma_addr_remote;
+	void *real_addr_local;
+};
+
+struct ibmvmc_admin_crq_msg {
+	u8 valid;	/* RPA Defined           */
+	u8 type;	/* ibmvmc msg type       */
+	u8 status;	/* Response msg status. Zero is success and on failure,
+			 * either 1 - General Failure, or 2 - Invalid Version is
+			 * returned.
+			 */
+	u8 rsvd[2];
+	u8 max_hmc;	/* Max # of independent HMC connections supported */
+	__be16 pool_size;	/* Maximum number of buffers supported per HMC
+				 * connection
+				 */
+	__be32 max_mtu;		/* Maximum message size supported (bytes) */
+	__be16 crq_size;	/* # of entries available in the CRQ for the
+				 * source partition. The target partition must
+				 * limit the number of outstanding messages to
+				 * one half or less.
+				 */
+	__be16 version;	/* Indicates the code level of the management partition
+			 * or the hypervisor with the high-order byte
+			 * indicating a major version and the low-order byte
+			 * indicating a minor version.
+			 */
+};
+
+struct ibmvmc_crq_msg {
+	u8 valid;     /* RPA Defined           */
+	u8 type;      /* ibmvmc msg type       */
+	u8 status;    /* Response msg status   */
+	union {
+		u8 rsvd;  /* Reserved              */
+		u8 owner;
+	} var1;
+	u8 hmc_session;	/* Session Identifier for the current VMC connection */
+	u8 hmc_index;	/* A unique HMC Idx would be used if multiple management
+			 * applications running concurrently were desired
+			 */
+	union {
+		__be16 rsvd;
+		__be16 buffer_id;
+	} var2;
+	__be32 rsvd;
+	union {
+		__be32 rsvd;
+		__be32 lioba;
+		__be32 msg_len;
+	} var3;
+};
+
+/* an RPA command/response transport queue */
+struct crq_queue {
+	struct ibmvmc_crq_msg *msgs;
+	int size, cur;
+	dma_addr_t msg_token;
+	spinlock_t lock;
+};
+
+/* VMC server adapter settings */
+struct crq_server_adapter {
+	struct device *dev;
+	struct crq_queue queue;
+	u32 liobn;
+	u32 riobn;
+	struct tasklet_struct work_task;
+	wait_queue_head_t reset_wait_queue;
+	struct task_struct *reset_task;
+};
+
+/* Driver wide settings */
+struct ibmvmc_struct {
+	u32 state;
+	u32 max_mtu;
+	u32 max_buffer_pool_size;
+	u32 max_hmc_index;
+	struct crq_server_adapter *adapter;
+	struct cdev cdev;
+	u32 vmc_drc_index;
+};
+
+struct ibmvmc_file_session;
+
+/* Connection specific settings */
+struct ibmvmc_hmc {
+	u8 session;
+	u8 index;
+	u32 state;
+	struct crq_server_adapter *adapter;
+	spinlock_t lock;
+	unsigned char hmc_id[HMC_ID_LEN];
+	struct ibmvmc_buffer buffer[MAX_BUF_POOL_SIZE];
+	unsigned short queue_outbound_msgs[MAX_BUF_POOL_SIZE];
+	int queue_head, queue_tail;
+	struct ibmvmc_file_session *file_session;
+};
+
+struct ibmvmc_file_session {
+	struct file *file;
+	struct ibmvmc_hmc *hmc;
+	bool valid;
+};
+
+struct ibmvmc_query_struct {
+	int have_vmc;
+	int state;
+	int vmc_drc_index;
+};
+
+#endif /* __IBMVMC_H */
diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
new file mode 100644
index 000000000..81a0541ef
--- /dev/null
+++ b/drivers/misc/ics932s401.c
@@ -0,0 +1,495 @@
+/*
+ * A driver for the Integrated Circuits ICS932S401
+ * Copyright (C) 2008 IBM
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+
+/* Addresses to scan */
+static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END };
+
+/* ICS932S401 registers */
+#define ICS932S401_REG_CFG2			0x01
+#define		ICS932S401_CFG1_SPREAD		0x01
+#define ICS932S401_REG_CFG7			0x06
+#define		ICS932S401_FS_MASK		0x07
+#define	ICS932S401_REG_VENDOR_REV		0x07
+#define		ICS932S401_VENDOR		1
+#define		ICS932S401_VENDOR_MASK		0x0F
+#define		ICS932S401_REV			4
+#define		ICS932S401_REV_SHIFT		4
+#define ICS932S401_REG_DEVICE			0x09
+#define		ICS932S401_DEVICE		11
+#define	ICS932S401_REG_CTRL			0x0A
+#define		ICS932S401_MN_ENABLED		0x80
+#define		ICS932S401_CPU_ALT		0x04
+#define		ICS932S401_SRC_ALT		0x08
+#define ICS932S401_REG_CPU_M_CTRL		0x0B
+#define		ICS932S401_M_MASK		0x3F
+#define	ICS932S401_REG_CPU_N_CTRL		0x0C
+#define	ICS932S401_REG_CPU_SPREAD1		0x0D
+#define ICS932S401_REG_CPU_SPREAD2		0x0E
+#define		ICS932S401_SPREAD_MASK		0x7FFF
+#define ICS932S401_REG_SRC_M_CTRL		0x0F
+#define ICS932S401_REG_SRC_N_CTRL		0x10
+#define	ICS932S401_REG_SRC_SPREAD1		0x11
+#define ICS932S401_REG_SRC_SPREAD2		0x12
+#define ICS932S401_REG_CPU_DIVISOR		0x13
+#define		ICS932S401_CPU_DIVISOR_SHIFT	4
+#define ICS932S401_REG_PCISRC_DIVISOR		0x14
+#define		ICS932S401_SRC_DIVISOR_MASK	0x0F
+#define		ICS932S401_PCI_DIVISOR_SHIFT	4
+
+/* Base clock is 14.318MHz */
+#define BASE_CLOCK				14318
+
+#define NUM_REGS				21
+#define NUM_MIRRORED_REGS			15
+
+static int regs_to_copy[NUM_MIRRORED_REGS] = {
+	ICS932S401_REG_CFG2,
+	ICS932S401_REG_CFG7,
+	ICS932S401_REG_VENDOR_REV,
+	ICS932S401_REG_DEVICE,
+	ICS932S401_REG_CTRL,
+	ICS932S401_REG_CPU_M_CTRL,
+	ICS932S401_REG_CPU_N_CTRL,
+	ICS932S401_REG_CPU_SPREAD1,
+	ICS932S401_REG_CPU_SPREAD2,
+	ICS932S401_REG_SRC_M_CTRL,
+	ICS932S401_REG_SRC_N_CTRL,
+	ICS932S401_REG_SRC_SPREAD1,
+	ICS932S401_REG_SRC_SPREAD2,
+	ICS932S401_REG_CPU_DIVISOR,
+	ICS932S401_REG_PCISRC_DIVISOR,
+};
+
+/* How often do we reread sensors values? (In jiffies) */
+#define SENSOR_REFRESH_INTERVAL	(2 * HZ)
+
+/* How often do we reread sensor limit values? (In jiffies) */
+#define LIMIT_REFRESH_INTERVAL	(60 * HZ)
+
+struct ics932s401_data {
+	struct attribute_group	attrs;
+	struct mutex		lock;
+	char			sensors_valid;
+	unsigned long		sensors_last_updated;	/* In jiffies */
+
+	u8			regs[NUM_REGS];
+};
+
+static int ics932s401_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id);
+static int ics932s401_detect(struct i2c_client *client,
+			  struct i2c_board_info *info);
+static int ics932s401_remove(struct i2c_client *client);
+
+static const struct i2c_device_id ics932s401_id[] = {
+	{ "ics932s401", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ics932s401_id);
+
+static struct i2c_driver ics932s401_driver = {
+	.class		= I2C_CLASS_HWMON,
+	.driver = {
+		.name	= "ics932s401",
+	},
+	.probe		= ics932s401_probe,
+	.remove		= ics932s401_remove,
+	.id_table	= ics932s401_id,
+	.detect		= ics932s401_detect,
+	.address_list	= normal_i2c,
+};
+
+static struct ics932s401_data *ics932s401_update_device(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ics932s401_data *data = i2c_get_clientdata(client);
+	unsigned long local_jiffies = jiffies;
+	int i, temp;
+
+	mutex_lock(&data->lock);
+	if (time_before(local_jiffies, data->sensors_last_updated +
+		SENSOR_REFRESH_INTERVAL)
+		&& data->sensors_valid)
+		goto out;
+
+	/*
+	 * Each register must be read as a word and then right shifted 8 bits.
+	 * Not really sure why this is; setting the "byte count programming"
+	 * register to 1 does not fix this problem.
+	 */
+	for (i = 0; i < NUM_MIRRORED_REGS; i++) {
+		temp = i2c_smbus_read_word_data(client, regs_to_copy[i]);
+		data->regs[regs_to_copy[i]] = temp >> 8;
+	}
+
+	data->sensors_last_updated = local_jiffies;
+	data->sensors_valid = 1;
+
+out:
+	mutex_unlock(&data->lock);
+	return data;
+}
+
+static ssize_t show_spread_enabled(struct device *dev,
+				   struct device_attribute *devattr,
+				   char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	if (data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD)
+		return sprintf(buf, "1\n");
+
+	return sprintf(buf, "0\n");
+}
+
+/* bit to cpu khz map */
+static const int fs_speeds[] = {
+	266666,
+	133333,
+	200000,
+	166666,
+	333333,
+	100000,
+	400000,
+	0,
+};
+
+/* clock divisor map */
+static const int divisors[] = {2, 3, 5, 15, 4, 6, 10, 30, 8, 12, 20, 60, 16,
+			       24, 40, 120};
+
+/* Calculate CPU frequency from the M/N registers. */
+static int calculate_cpu_freq(struct ics932s401_data *data)
+{
+	int m, n, freq;
+
+	m = data->regs[ICS932S401_REG_CPU_M_CTRL] & ICS932S401_M_MASK;
+	n = data->regs[ICS932S401_REG_CPU_N_CTRL];
+
+	/* Pull in bits 8 & 9 from the M register */
+	n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x80) << 1;
+	n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x40) << 3;
+
+	freq = BASE_CLOCK * (n + 8) / (m + 2);
+	freq /= divisors[data->regs[ICS932S401_REG_CPU_DIVISOR] >>
+			 ICS932S401_CPU_DIVISOR_SHIFT];
+
+	return freq;
+}
+
+static ssize_t show_cpu_clock(struct device *dev,
+			      struct device_attribute *devattr,
+			      char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	return sprintf(buf, "%d\n", calculate_cpu_freq(data));
+}
+
+static ssize_t show_cpu_clock_sel(struct device *dev,
+				  struct device_attribute *devattr,
+				  char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int freq;
+
+	if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+		freq = calculate_cpu_freq(data);
+	else {
+		/* Freq is neatly wrapped up for us */
+		int fid = data->regs[ICS932S401_REG_CFG7] & ICS932S401_FS_MASK;
+
+		freq = fs_speeds[fid];
+		if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT) {
+			switch (freq) {
+			case 166666:
+				freq = 160000;
+				break;
+			case 333333:
+				freq = 320000;
+				break;
+			}
+		}
+	}
+
+	return sprintf(buf, "%d\n", freq);
+}
+
+/* Calculate SRC frequency from the M/N registers. */
+static int calculate_src_freq(struct ics932s401_data *data)
+{
+	int m, n, freq;
+
+	m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK;
+	n = data->regs[ICS932S401_REG_SRC_N_CTRL];
+
+	/* Pull in bits 8 & 9 from the M register */
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1;
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3;
+
+	freq = BASE_CLOCK * (n + 8) / (m + 2);
+	freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] &
+			 ICS932S401_SRC_DIVISOR_MASK];
+
+	return freq;
+}
+
+static ssize_t show_src_clock(struct device *dev,
+			      struct device_attribute *devattr,
+			      char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	return sprintf(buf, "%d\n", calculate_src_freq(data));
+}
+
+static ssize_t show_src_clock_sel(struct device *dev,
+				  struct device_attribute *devattr,
+				  char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int freq;
+
+	if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+		freq = calculate_src_freq(data);
+	else
+		/* Freq is neatly wrapped up for us */
+		if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT &&
+		    data->regs[ICS932S401_REG_CTRL] & ICS932S401_SRC_ALT)
+			freq = 96000;
+		else
+			freq = 100000;
+
+	return sprintf(buf, "%d\n", freq);
+}
+
+/* Calculate PCI frequency from the SRC M/N registers. */
+static int calculate_pci_freq(struct ics932s401_data *data)
+{
+	int m, n, freq;
+
+	m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK;
+	n = data->regs[ICS932S401_REG_SRC_N_CTRL];
+
+	/* Pull in bits 8 & 9 from the M register */
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1;
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3;
+
+	freq = BASE_CLOCK * (n + 8) / (m + 2);
+	freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] >>
+			 ICS932S401_PCI_DIVISOR_SHIFT];
+
+	return freq;
+}
+
+static ssize_t show_pci_clock(struct device *dev,
+			      struct device_attribute *devattr,
+			      char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	return sprintf(buf, "%d\n", calculate_pci_freq(data));
+}
+
+static ssize_t show_pci_clock_sel(struct device *dev,
+				  struct device_attribute *devattr,
+				  char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int freq;
+
+	if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+		freq = calculate_pci_freq(data);
+	else
+		freq = 33333;
+
+	return sprintf(buf, "%d\n", freq);
+}
+
+static ssize_t show_value(struct device *dev,
+			  struct device_attribute *devattr,
+			  char *buf);
+
+static ssize_t show_spread(struct device *dev,
+			   struct device_attribute *devattr,
+			   char *buf);
+
+static DEVICE_ATTR(spread_enabled, S_IRUGO, show_spread_enabled, NULL);
+static DEVICE_ATTR(cpu_clock_selection, S_IRUGO, show_cpu_clock_sel, NULL);
+static DEVICE_ATTR(cpu_clock, S_IRUGO, show_cpu_clock, NULL);
+static DEVICE_ATTR(src_clock_selection, S_IRUGO, show_src_clock_sel, NULL);
+static DEVICE_ATTR(src_clock, S_IRUGO, show_src_clock, NULL);
+static DEVICE_ATTR(pci_clock_selection, S_IRUGO, show_pci_clock_sel, NULL);
+static DEVICE_ATTR(pci_clock, S_IRUGO, show_pci_clock, NULL);
+static DEVICE_ATTR(usb_clock, S_IRUGO, show_value, NULL);
+static DEVICE_ATTR(ref_clock, S_IRUGO, show_value, NULL);
+static DEVICE_ATTR(cpu_spread, S_IRUGO, show_spread, NULL);
+static DEVICE_ATTR(src_spread, S_IRUGO, show_spread, NULL);
+
+static struct attribute *ics932s401_attr[] = {
+	&dev_attr_spread_enabled.attr,
+	&dev_attr_cpu_clock_selection.attr,
+	&dev_attr_cpu_clock.attr,
+	&dev_attr_src_clock_selection.attr,
+	&dev_attr_src_clock.attr,
+	&dev_attr_pci_clock_selection.attr,
+	&dev_attr_pci_clock.attr,
+	&dev_attr_usb_clock.attr,
+	&dev_attr_ref_clock.attr,
+	&dev_attr_cpu_spread.attr,
+	&dev_attr_src_spread.attr,
+	NULL
+};
+
+static ssize_t show_value(struct device *dev,
+			  struct device_attribute *devattr,
+			  char *buf)
+{
+	int x;
+
+	if (devattr == &dev_attr_usb_clock)
+		x = 48000;
+	else if (devattr == &dev_attr_ref_clock)
+		x = BASE_CLOCK;
+	else
+		BUG();
+
+	return sprintf(buf, "%d\n", x);
+}
+
+static ssize_t show_spread(struct device *dev,
+			   struct device_attribute *devattr,
+			   char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int reg;
+	unsigned long val;
+
+	if (!(data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD))
+		return sprintf(buf, "0%%\n");
+
+	if (devattr == &dev_attr_src_spread)
+		reg = ICS932S401_REG_SRC_SPREAD1;
+	else if (devattr == &dev_attr_cpu_spread)
+		reg = ICS932S401_REG_CPU_SPREAD1;
+	else
+		BUG();
+
+	val = data->regs[reg] | (data->regs[reg + 1] << 8);
+	val &= ICS932S401_SPREAD_MASK;
+
+	/* Scale 0..2^14 to -0.5. */
+	val = 500000 * val / 16384;
+	return sprintf(buf, "-0.%lu%%\n", val);
+}
+
+/* Return 0 if detection is successful, -ENODEV otherwise */
+static int ics932s401_detect(struct i2c_client *client,
+			  struct i2c_board_info *info)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	int vendor, device, revision;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -ENODEV;
+
+	vendor = i2c_smbus_read_word_data(client, ICS932S401_REG_VENDOR_REV);
+	vendor >>= 8;
+	revision = vendor >> ICS932S401_REV_SHIFT;
+	vendor &= ICS932S401_VENDOR_MASK;
+	if (vendor != ICS932S401_VENDOR)
+		return -ENODEV;
+
+	device = i2c_smbus_read_word_data(client, ICS932S401_REG_DEVICE);
+	device >>= 8;
+	if (device != ICS932S401_DEVICE)
+		return -ENODEV;
+
+	if (revision != ICS932S401_REV)
+		dev_info(&adapter->dev, "Unknown revision %d\n", revision);
+
+	strlcpy(info->type, "ics932s401", I2C_NAME_SIZE);
+
+	return 0;
+}
+
+static int ics932s401_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct ics932s401_data *data;
+	int err;
+
+	data = kzalloc(sizeof(struct ics932s401_data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->lock);
+
+	dev_info(&client->dev, "%s chip found\n", client->name);
+
+	/* Register sysfs hooks */
+	data->attrs.attrs = ics932s401_attr;
+	err = sysfs_create_group(&client->dev.kobj, &data->attrs);
+	if (err)
+		goto exit_free;
+
+	return 0;
+
+exit_free:
+	kfree(data);
+exit:
+	return err;
+}
+
+static int ics932s401_remove(struct i2c_client *client)
+{
+	struct ics932s401_data *data = i2c_get_clientdata(client);
+
+	sysfs_remove_group(&client->dev.kobj, &data->attrs);
+	kfree(data);
+	return 0;
+}
+
+module_i2c_driver(ics932s401_driver);
+
+MODULE_AUTHOR("Darrick J. Wong <darrick.wong@oracle.com>");
+MODULE_DESCRIPTION("ICS932S401 driver");
+MODULE_LICENSE("GPL");
+
+/* IBM IntelliStation Z30 */
+MODULE_ALIAS("dmi:bvnIBM:*:rn9228:*");
+MODULE_ALIAS("dmi:bvnIBM:*:rn9232:*");
+
+/* IBM x3650/x3550 */
+MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3650*");
+MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3550*");
diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c
new file mode 100644
index 000000000..ec0832278
--- /dev/null
+++ b/drivers/misc/ioc4.c
@@ -0,0 +1,500 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/* This file contains the master driver module for use by SGI IOC4 subdrivers.
+ *
+ * It allocates any resources shared between multiple subdevices, and
+ * provides accessor functions (where needed) and the like for those
+ * resources.  It also provides a mechanism for the subdevice modules
+ * to support loading and unloading.
+ *
+ * Non-shared resources (e.g. external interrupt A_INT_OUT register page
+ * alias, serial port and UART registers) are handled by the subdevice
+ * modules themselves.
+ *
+ * This is all necessary because IOC4 is not implemented as a multi-function
+ * PCI device, but an amalgamation of disparate registers for several
+ * types of device (ATA, serial, external interrupts).  The normal
+ * resource management in the kernel doesn't have quite the right interfaces
+ * to handle this situation (e.g. multiple modules can't claim the same
+ * PCI ID), thus this IOC4 master module.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/ioc4.h>
+#include <linux/ktime.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/time.h>
+#include <asm/io.h>
+
+/***************
+ * Definitions *
+ ***************/
+
+/* Tweakable values */
+
+/* PCI bus speed detection/calibration */
+#define IOC4_CALIBRATE_COUNT 63		/* Calibration cycle period */
+#define IOC4_CALIBRATE_CYCLES 256	/* Average over this many cycles */
+#define IOC4_CALIBRATE_DISCARD 2	/* Discard first few cycles */
+#define IOC4_CALIBRATE_LOW_MHZ 25	/* Lower bound on bus speed sanity */
+#define IOC4_CALIBRATE_HIGH_MHZ 75	/* Upper bound on bus speed sanity */
+#define IOC4_CALIBRATE_DEFAULT_MHZ 66	/* Assumed if sanity check fails */
+
+/************************
+ * Submodule management *
+ ************************/
+
+static DEFINE_MUTEX(ioc4_mutex);
+
+static LIST_HEAD(ioc4_devices);
+static LIST_HEAD(ioc4_submodules);
+
+/* Register an IOC4 submodule */
+int
+ioc4_register_submodule(struct ioc4_submodule *is)
+{
+	struct ioc4_driver_data *idd;
+
+	mutex_lock(&ioc4_mutex);
+	list_add(&is->is_list, &ioc4_submodules);
+
+	/* Initialize submodule for each IOC4 */
+	if (!is->is_probe)
+		goto out;
+
+	list_for_each_entry(idd, &ioc4_devices, idd_list) {
+		if (is->is_probe(idd)) {
+			printk(KERN_WARNING
+			       "%s: IOC4 submodule %s probe failed "
+			       "for pci_dev %s",
+			       __func__, module_name(is->is_owner),
+			       pci_name(idd->idd_pdev));
+		}
+	}
+ out:
+	mutex_unlock(&ioc4_mutex);
+	return 0;
+}
+
+/* Unregister an IOC4 submodule */
+void
+ioc4_unregister_submodule(struct ioc4_submodule *is)
+{
+	struct ioc4_driver_data *idd;
+
+	mutex_lock(&ioc4_mutex);
+	list_del(&is->is_list);
+
+	/* Remove submodule for each IOC4 */
+	if (!is->is_remove)
+		goto out;
+
+	list_for_each_entry(idd, &ioc4_devices, idd_list) {
+		if (is->is_remove(idd)) {
+			printk(KERN_WARNING
+			       "%s: IOC4 submodule %s remove failed "
+			       "for pci_dev %s.\n",
+			       __func__, module_name(is->is_owner),
+			       pci_name(idd->idd_pdev));
+		}
+	}
+ out:
+	mutex_unlock(&ioc4_mutex);
+}
+
+/*********************
+ * Device management *
+ *********************/
+
+#define IOC4_CALIBRATE_LOW_LIMIT \
+	(1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_LOW_MHZ)
+#define IOC4_CALIBRATE_HIGH_LIMIT \
+	(1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_HIGH_MHZ)
+#define IOC4_CALIBRATE_DEFAULT \
+	(1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_DEFAULT_MHZ)
+
+#define IOC4_CALIBRATE_END \
+	(IOC4_CALIBRATE_CYCLES + IOC4_CALIBRATE_DISCARD)
+
+#define IOC4_INT_OUT_MODE_TOGGLE 0x7	/* Toggle INT_OUT every COUNT+1 ticks */
+
+/* Determines external interrupt output clock period of the PCI bus an
+ * IOC4 is attached to.  This value can be used to determine the PCI
+ * bus speed.
+ *
+ * IOC4 has a design feature that various internal timers are derived from
+ * the PCI bus clock.  This causes IOC4 device drivers to need to take the
+ * bus speed into account when setting various register values (e.g. INT_OUT
+ * register COUNT field, UART divisors, etc).  Since this information is
+ * needed by several subdrivers, it is determined by the main IOC4 driver,
+ * even though the following code utilizes external interrupt registers
+ * to perform the speed calculation.
+ */
+static void
+ioc4_clock_calibrate(struct ioc4_driver_data *idd)
+{
+	union ioc4_int_out int_out;
+	union ioc4_gpcr gpcr;
+	unsigned int state, last_state;
+	uint64_t start, end, period;
+	unsigned int count;
+
+	/* Enable output */
+	gpcr.raw = 0;
+	gpcr.fields.dir = IOC4_GPCR_DIR_0;
+	gpcr.fields.int_out_en = 1;
+	writel(gpcr.raw, &idd->idd_misc_regs->gpcr_s.raw);
+
+	/* Reset to power-on state */
+	writel(0, &idd->idd_misc_regs->int_out.raw);
+	mmiowb();
+
+	/* Set up square wave */
+	int_out.raw = 0;
+	int_out.fields.count = IOC4_CALIBRATE_COUNT;
+	int_out.fields.mode = IOC4_INT_OUT_MODE_TOGGLE;
+	int_out.fields.diag = 0;
+	writel(int_out.raw, &idd->idd_misc_regs->int_out.raw);
+	mmiowb();
+
+	/* Check square wave period averaged over some number of cycles */
+	start = ktime_get_ns();
+	state = 1; /* make sure the first read isn't a rising edge */
+	for (count = 0; count <= IOC4_CALIBRATE_END; count++) {
+		do { /* wait for a rising edge */
+			last_state = state;
+			int_out.raw = readl(&idd->idd_misc_regs->int_out.raw);
+			state = int_out.fields.int_out;
+		} while (last_state || !state);
+
+		/* discard the first few cycles */
+		if (count == IOC4_CALIBRATE_DISCARD)
+			start = ktime_get_ns();
+	}
+	end = ktime_get_ns();
+
+	/* Calculation rearranged to preserve intermediate precision.
+	 * Logically:
+	 * 1. "end - start" gives us the measurement period over all
+	 *    the square wave cycles.
+	 * 2. Divide by number of square wave cycles to get the period
+	 *    of a square wave cycle.
+	 * 3. Divide by 2*(int_out.fields.count+1), which is the formula
+	 *    by which the IOC4 generates the square wave, to get the
+	 *    period of an IOC4 INT_OUT count.
+	 */
+	period = (end - start) /
+		(IOC4_CALIBRATE_CYCLES * 2 * (IOC4_CALIBRATE_COUNT + 1));
+
+	/* Bounds check the result. */
+	if (period > IOC4_CALIBRATE_LOW_LIMIT ||
+	    period < IOC4_CALIBRATE_HIGH_LIMIT) {
+		printk(KERN_INFO
+		       "IOC4 %s: Clock calibration failed.  Assuming"
+		       "PCI clock is %d ns.\n",
+		       pci_name(idd->idd_pdev),
+		       IOC4_CALIBRATE_DEFAULT / IOC4_EXTINT_COUNT_DIVISOR);
+		period = IOC4_CALIBRATE_DEFAULT;
+	} else {
+		u64 ns = period;
+
+		do_div(ns, IOC4_EXTINT_COUNT_DIVISOR);
+		printk(KERN_DEBUG
+		       "IOC4 %s: PCI clock is %llu ns.\n",
+		       pci_name(idd->idd_pdev), (unsigned long long)ns);
+	}
+
+	/* Remember results.  We store the extint clock period rather
+	 * than the PCI clock period so that greater precision is
+	 * retained.  Divide by IOC4_EXTINT_COUNT_DIVISOR to get
+	 * PCI clock period.
+	 */
+	idd->count_period = period;
+}
+
+/* There are three variants of IOC4 cards: IO9, IO10, and PCI-RT.
+ * Each brings out different combinations of IOC4 signals, thus.
+ * the IOC4 subdrivers need to know to which we're attached.
+ *
+ * We look for the presence of a SCSI (IO9) or SATA (IO10) controller
+ * on the same PCI bus at slot number 3 to differentiate IO9 from IO10.
+ * If neither is present, it's a PCI-RT.
+ */
+static unsigned int
+ioc4_variant(struct ioc4_driver_data *idd)
+{
+	struct pci_dev *pdev = NULL;
+	int found = 0;
+
+	/* IO9: Look for a QLogic ISP 12160 at the same bus and slot 3. */
+	do {
+		pdev = pci_get_device(PCI_VENDOR_ID_QLOGIC,
+				      PCI_DEVICE_ID_QLOGIC_ISP12160, pdev);
+		if (pdev &&
+		    idd->idd_pdev->bus->number == pdev->bus->number &&
+		    3 == PCI_SLOT(pdev->devfn))
+			found = 1;
+	} while (pdev && !found);
+	if (NULL != pdev) {
+		pci_dev_put(pdev);
+		return IOC4_VARIANT_IO9;
+	}
+
+	/* IO10: Look for a Vitesse VSC 7174 at the same bus and slot 3. */
+	pdev = NULL;
+	do {
+		pdev = pci_get_device(PCI_VENDOR_ID_VITESSE,
+				      PCI_DEVICE_ID_VITESSE_VSC7174, pdev);
+		if (pdev &&
+		    idd->idd_pdev->bus->number == pdev->bus->number &&
+		    3 == PCI_SLOT(pdev->devfn))
+			found = 1;
+	} while (pdev && !found);
+	if (NULL != pdev) {
+		pci_dev_put(pdev);
+		return IOC4_VARIANT_IO10;
+	}
+
+	/* PCI-RT: No SCSI/SATA controller will be present */
+	return IOC4_VARIANT_PCI_RT;
+}
+
+static void
+ioc4_load_modules(struct work_struct *work)
+{
+	request_module("sgiioc4");
+}
+
+static DECLARE_WORK(ioc4_load_modules_work, ioc4_load_modules);
+
+/* Adds a new instance of an IOC4 card */
+static int
+ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
+{
+	struct ioc4_driver_data *idd;
+	struct ioc4_submodule *is;
+	uint32_t pcmd;
+	int ret;
+
+	/* Enable IOC4 and take ownership of it */
+	if ((ret = pci_enable_device(pdev))) {
+		printk(KERN_WARNING
+		       "%s: Failed to enable IOC4 device for pci_dev %s.\n",
+		       __func__, pci_name(pdev));
+		goto out;
+	}
+	pci_set_master(pdev);
+
+	/* Set up per-IOC4 data */
+	idd = kmalloc(sizeof(struct ioc4_driver_data), GFP_KERNEL);
+	if (!idd) {
+		printk(KERN_WARNING
+		       "%s: Failed to allocate IOC4 data for pci_dev %s.\n",
+		       __func__, pci_name(pdev));
+		ret = -ENODEV;
+		goto out_idd;
+	}
+	idd->idd_pdev = pdev;
+	idd->idd_pci_id = pci_id;
+
+	/* Map IOC4 misc registers.  These are shared between subdevices
+	 * so the main IOC4 module manages them.
+	 */
+	idd->idd_bar0 = pci_resource_start(idd->idd_pdev, 0);
+	if (!idd->idd_bar0) {
+		printk(KERN_WARNING
+		       "%s: Unable to find IOC4 misc resource "
+		       "for pci_dev %s.\n",
+		       __func__, pci_name(idd->idd_pdev));
+		ret = -ENODEV;
+		goto out_pci;
+	}
+	if (!request_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs),
+			    "ioc4_misc")) {
+		printk(KERN_WARNING
+		       "%s: Unable to request IOC4 misc region "
+		       "for pci_dev %s.\n",
+		       __func__, pci_name(idd->idd_pdev));
+		ret = -ENODEV;
+		goto out_pci;
+	}
+	idd->idd_misc_regs = ioremap(idd->idd_bar0,
+				     sizeof(struct ioc4_misc_regs));
+	if (!idd->idd_misc_regs) {
+		printk(KERN_WARNING
+		       "%s: Unable to remap IOC4 misc region "
+		       "for pci_dev %s.\n",
+		       __func__, pci_name(idd->idd_pdev));
+		ret = -ENODEV;
+		goto out_misc_region;
+	}
+
+	/* Failsafe portion of per-IOC4 initialization */
+
+	/* Detect card variant */
+	idd->idd_variant = ioc4_variant(idd);
+	printk(KERN_INFO "IOC4 %s: %s card detected.\n", pci_name(pdev),
+	       idd->idd_variant == IOC4_VARIANT_IO9 ? "IO9" :
+	       idd->idd_variant == IOC4_VARIANT_PCI_RT ? "PCI-RT" :
+	       idd->idd_variant == IOC4_VARIANT_IO10 ? "IO10" : "unknown");
+
+	/* Initialize IOC4 */
+	pci_read_config_dword(idd->idd_pdev, PCI_COMMAND, &pcmd);
+	pci_write_config_dword(idd->idd_pdev, PCI_COMMAND,
+			       pcmd | PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
+
+	/* Determine PCI clock */
+	ioc4_clock_calibrate(idd);
+
+	/* Disable/clear all interrupts.  Need to do this here lest
+	 * one submodule request the shared IOC4 IRQ, but interrupt
+	 * is generated by a different subdevice.
+	 */
+	/* Disable */
+	writel(~0, &idd->idd_misc_regs->other_iec.raw);
+	writel(~0, &idd->idd_misc_regs->sio_iec);
+	/* Clear (i.e. acknowledge) */
+	writel(~0, &idd->idd_misc_regs->other_ir.raw);
+	writel(~0, &idd->idd_misc_regs->sio_ir);
+
+	/* Track PCI-device specific data */
+	idd->idd_serial_data = NULL;
+	pci_set_drvdata(idd->idd_pdev, idd);
+
+	mutex_lock(&ioc4_mutex);
+	list_add_tail(&idd->idd_list, &ioc4_devices);
+
+	/* Add this IOC4 to all submodules */
+	list_for_each_entry(is, &ioc4_submodules, is_list) {
+		if (is->is_probe && is->is_probe(idd)) {
+			printk(KERN_WARNING
+			       "%s: IOC4 submodule 0x%s probe failed "
+			       "for pci_dev %s.\n",
+			       __func__, module_name(is->is_owner),
+			       pci_name(idd->idd_pdev));
+		}
+	}
+	mutex_unlock(&ioc4_mutex);
+
+	/* Request sgiioc4 IDE driver on boards that bring that functionality
+	 * off of IOC4.  The root filesystem may be hosted on a drive connected
+	 * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it
+	 * won't be picked up by modprobes due to the ioc4 module owning the
+	 * PCI device.
+	 */
+	if (idd->idd_variant != IOC4_VARIANT_PCI_RT) {
+		/* Request the module from a work procedure as the modprobe
+		 * goes out to a userland helper and that will hang if done
+		 * directly from ioc4_probe().
+		 */
+		printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n");
+		schedule_work(&ioc4_load_modules_work);
+	}
+
+	return 0;
+
+out_misc_region:
+	release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs));
+out_pci:
+	kfree(idd);
+out_idd:
+	pci_disable_device(pdev);
+out:
+	return ret;
+}
+
+/* Removes a particular instance of an IOC4 card. */
+static void
+ioc4_remove(struct pci_dev *pdev)
+{
+	struct ioc4_submodule *is;
+	struct ioc4_driver_data *idd;
+
+	idd = pci_get_drvdata(pdev);
+
+	/* Remove this IOC4 from all submodules */
+	mutex_lock(&ioc4_mutex);
+	list_for_each_entry(is, &ioc4_submodules, is_list) {
+		if (is->is_remove && is->is_remove(idd)) {
+			printk(KERN_WARNING
+			       "%s: IOC4 submodule 0x%s remove failed "
+			       "for pci_dev %s.\n",
+			       __func__, module_name(is->is_owner),
+			       pci_name(idd->idd_pdev));
+		}
+	}
+	mutex_unlock(&ioc4_mutex);
+
+	/* Release resources */
+	iounmap(idd->idd_misc_regs);
+	if (!idd->idd_bar0) {
+		printk(KERN_WARNING
+		       "%s: Unable to get IOC4 misc mapping for pci_dev %s. "
+		       "Device removal may be incomplete.\n",
+		       __func__, pci_name(idd->idd_pdev));
+	}
+	release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs));
+
+	/* Disable IOC4 and relinquish */
+	pci_disable_device(pdev);
+
+	/* Remove and free driver data */
+	mutex_lock(&ioc4_mutex);
+	list_del(&idd->idd_list);
+	mutex_unlock(&ioc4_mutex);
+	kfree(idd);
+}
+
+static const struct pci_device_id ioc4_id_table[] = {
+	{PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC4, PCI_ANY_ID,
+	 PCI_ANY_ID, 0x0b4000, 0xFFFFFF},
+	{0}
+};
+
+static struct pci_driver ioc4_driver = {
+	.name = "IOC4",
+	.id_table = ioc4_id_table,
+	.probe = ioc4_probe,
+	.remove = ioc4_remove,
+};
+
+MODULE_DEVICE_TABLE(pci, ioc4_id_table);
+
+/*********************
+ * Module management *
+ *********************/
+
+/* Module load */
+static int __init
+ioc4_init(void)
+{
+	return pci_register_driver(&ioc4_driver);
+}
+
+/* Module unload */
+static void __exit
+ioc4_exit(void)
+{
+	/* Ensure ioc4_load_modules() has completed before exiting */
+	flush_work(&ioc4_load_modules_work);
+	pci_unregister_driver(&ioc4_driver);
+}
+
+module_init(ioc4_init);
+module_exit(ioc4_exit);
+
+MODULE_AUTHOR("Brent Casavant - Silicon Graphics, Inc. <bcasavan@sgi.com>");
+MODULE_DESCRIPTION("PCI driver master module for SGI IOC4 Base-IO Card");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(ioc4_register_submodule);
+EXPORT_SYMBOL(ioc4_unregister_submodule);
diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c
new file mode 100644
index 000000000..b8032882c
--- /dev/null
+++ b/drivers/misc/isl29003.c
@@ -0,0 +1,487 @@
+/*
+ *  isl29003.c - Linux kernel module for
+ * 	Intersil ISL29003 ambient light sensor
+ *
+ *  See file:Documentation/misc-devices/isl29003
+ *
+ *  Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ *  Based on code written by
+ *  	Rodolfo Giometti <giometti@linux.it>
+ *  	Eurotech S.p.A. <info@eurotech.it>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+
+#define ISL29003_DRV_NAME	"isl29003"
+#define DRIVER_VERSION		"1.0"
+
+#define ISL29003_REG_COMMAND		0x00
+#define ISL29003_ADC_ENABLED		(1 << 7)
+#define ISL29003_ADC_PD			(1 << 6)
+#define ISL29003_TIMING_INT		(1 << 5)
+#define ISL29003_MODE_SHIFT		(2)
+#define ISL29003_MODE_MASK		(0x3 << ISL29003_MODE_SHIFT)
+#define ISL29003_RES_SHIFT		(0)
+#define ISL29003_RES_MASK		(0x3 << ISL29003_RES_SHIFT)
+
+#define ISL29003_REG_CONTROL		0x01
+#define ISL29003_INT_FLG		(1 << 5)
+#define ISL29003_RANGE_SHIFT		(2)
+#define ISL29003_RANGE_MASK		(0x3 << ISL29003_RANGE_SHIFT)
+#define ISL29003_INT_PERSISTS_SHIFT	(0)
+#define ISL29003_INT_PERSISTS_MASK	(0xf << ISL29003_INT_PERSISTS_SHIFT)
+
+#define ISL29003_REG_IRQ_THRESH_HI	0x02
+#define ISL29003_REG_IRQ_THRESH_LO	0x03
+#define ISL29003_REG_LSB_SENSOR		0x04
+#define ISL29003_REG_MSB_SENSOR		0x05
+#define ISL29003_REG_LSB_TIMER		0x06
+#define ISL29003_REG_MSB_TIMER		0x07
+
+#define ISL29003_NUM_CACHABLE_REGS	4
+
+struct isl29003_data {
+	struct i2c_client *client;
+	struct mutex lock;
+	u8 reg_cache[ISL29003_NUM_CACHABLE_REGS];
+	u8 power_state_before_suspend;
+};
+
+static int gain_range[] = {
+	1000, 4000, 16000, 64000
+};
+
+/*
+ * register access helpers
+ */
+
+static int __isl29003_read_reg(struct i2c_client *client,
+			       u32 reg, u8 mask, u8 shift)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+
+	return (data->reg_cache[reg] & mask) >> shift;
+}
+
+static int __isl29003_write_reg(struct i2c_client *client,
+				u32 reg, u8 mask, u8 shift, u8 val)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int ret = 0;
+	u8 tmp;
+
+	if (reg >= ISL29003_NUM_CACHABLE_REGS)
+		return -EINVAL;
+
+	mutex_lock(&data->lock);
+
+	tmp = data->reg_cache[reg];
+	tmp &= ~mask;
+	tmp |= val << shift;
+
+	ret = i2c_smbus_write_byte_data(client, reg, tmp);
+	if (!ret)
+		data->reg_cache[reg] = tmp;
+
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+/*
+ * internally used functions
+ */
+
+/* range */
+static int isl29003_get_range(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_CONTROL,
+		ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT);
+}
+
+static int isl29003_set_range(struct i2c_client *client, int range)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_CONTROL,
+		ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT, range);
+}
+
+/* resolution */
+static int isl29003_get_resolution(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT);
+}
+
+static int isl29003_set_resolution(struct i2c_client *client, int res)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT, res);
+}
+
+/* mode */
+static int isl29003_get_mode(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT);
+}
+
+static int isl29003_set_mode(struct i2c_client *client, int mode)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT, mode);
+}
+
+/* power_state */
+static int isl29003_set_power_state(struct i2c_client *client, int state)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+				ISL29003_ADC_ENABLED | ISL29003_ADC_PD, 0,
+				state ? ISL29003_ADC_ENABLED : ISL29003_ADC_PD);
+}
+
+static int isl29003_get_power_state(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	u8 cmdreg = data->reg_cache[ISL29003_REG_COMMAND];
+
+	return ~cmdreg & ISL29003_ADC_PD;
+}
+
+static int isl29003_get_adc_value(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int lsb, msb, range, bitdepth;
+
+	mutex_lock(&data->lock);
+	lsb = i2c_smbus_read_byte_data(client, ISL29003_REG_LSB_SENSOR);
+
+	if (lsb < 0) {
+		mutex_unlock(&data->lock);
+		return lsb;
+	}
+
+	msb = i2c_smbus_read_byte_data(client, ISL29003_REG_MSB_SENSOR);
+	mutex_unlock(&data->lock);
+
+	if (msb < 0)
+		return msb;
+
+	range = isl29003_get_range(client);
+	bitdepth = (4 - isl29003_get_resolution(client)) * 4;
+	return (((msb << 8) | lsb) * gain_range[range]) >> bitdepth;
+}
+
+/*
+ * sysfs layer
+ */
+
+/* range */
+static ssize_t isl29003_show_range(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return sprintf(buf, "%i\n", isl29003_get_range(client));
+}
+
+static ssize_t isl29003_store_range(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > 3)
+		return -EINVAL;
+
+	ret = isl29003_set_range(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(range, S_IWUSR | S_IRUGO,
+		   isl29003_show_range, isl29003_store_range);
+
+
+/* resolution */
+static ssize_t isl29003_show_resolution(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return sprintf(buf, "%d\n", isl29003_get_resolution(client));
+}
+
+static ssize_t isl29003_store_resolution(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > 3)
+		return -EINVAL;
+
+	ret = isl29003_set_resolution(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(resolution, S_IWUSR | S_IRUGO,
+		   isl29003_show_resolution, isl29003_store_resolution);
+
+/* mode */
+static ssize_t isl29003_show_mode(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return sprintf(buf, "%d\n", isl29003_get_mode(client));
+}
+
+static ssize_t isl29003_store_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > 2)
+		return -EINVAL;
+
+	ret = isl29003_set_mode(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO,
+		   isl29003_show_mode, isl29003_store_mode);
+
+
+/* power state */
+static ssize_t isl29003_show_power_state(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return sprintf(buf, "%d\n", isl29003_get_power_state(client));
+}
+
+static ssize_t isl29003_store_power_state(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > 1)
+		return -EINVAL;
+
+	ret = isl29003_set_power_state(client, val);
+	return ret ? ret : count;
+}
+
+static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
+		   isl29003_show_power_state, isl29003_store_power_state);
+
+
+/* lux */
+static ssize_t isl29003_show_lux(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	/* No LUX data if not operational */
+	if (!isl29003_get_power_state(client))
+		return -EBUSY;
+
+	return sprintf(buf, "%d\n", isl29003_get_adc_value(client));
+}
+
+static DEVICE_ATTR(lux, S_IRUGO, isl29003_show_lux, NULL);
+
+static struct attribute *isl29003_attributes[] = {
+	&dev_attr_range.attr,
+	&dev_attr_resolution.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_power_state.attr,
+	&dev_attr_lux.attr,
+	NULL
+};
+
+static const struct attribute_group isl29003_attr_group = {
+	.attrs = isl29003_attributes,
+};
+
+static int isl29003_init_client(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int i;
+
+	/* read all the registers once to fill the cache.
+	 * if one of the reads fails, we consider the init failed */
+	for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) {
+		int v = i2c_smbus_read_byte_data(client, i);
+
+		if (v < 0)
+			return -ENODEV;
+
+		data->reg_cache[i] = v;
+	}
+
+	/* set defaults */
+	isl29003_set_range(client, 0);
+	isl29003_set_resolution(client, 0);
+	isl29003_set_mode(client, 0);
+	isl29003_set_power_state(client, 0);
+
+	return 0;
+}
+
+/*
+ * I2C layer
+ */
+
+static int isl29003_probe(struct i2c_client *client,
+				    const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct isl29003_data *data;
+	int err = 0;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
+		return -EIO;
+
+	data = kzalloc(sizeof(struct isl29003_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->client = client;
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->lock);
+
+	/* initialize the ISL29003 chip */
+	err = isl29003_init_client(client);
+	if (err)
+		goto exit_kfree;
+
+	/* register sysfs hooks */
+	err = sysfs_create_group(&client->dev.kobj, &isl29003_attr_group);
+	if (err)
+		goto exit_kfree;
+
+	dev_info(&client->dev, "driver version %s enabled\n", DRIVER_VERSION);
+	return 0;
+
+exit_kfree:
+	kfree(data);
+	return err;
+}
+
+static int isl29003_remove(struct i2c_client *client)
+{
+	sysfs_remove_group(&client->dev.kobj, &isl29003_attr_group);
+	isl29003_set_power_state(client, 0);
+	kfree(i2c_get_clientdata(client));
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int isl29003_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct isl29003_data *data = i2c_get_clientdata(client);
+
+	data->power_state_before_suspend = isl29003_get_power_state(client);
+	return isl29003_set_power_state(client, 0);
+}
+
+static int isl29003_resume(struct device *dev)
+{
+	int i;
+	struct i2c_client *client = to_i2c_client(dev);
+	struct isl29003_data *data = i2c_get_clientdata(client);
+
+	/* restore registers from cache */
+	for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++)
+		if (i2c_smbus_write_byte_data(client, i, data->reg_cache[i]))
+			return -EIO;
+
+	return isl29003_set_power_state(client,
+		data->power_state_before_suspend);
+}
+
+static SIMPLE_DEV_PM_OPS(isl29003_pm_ops, isl29003_suspend, isl29003_resume);
+#define ISL29003_PM_OPS (&isl29003_pm_ops)
+
+#else
+#define ISL29003_PM_OPS NULL
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct i2c_device_id isl29003_id[] = {
+	{ "isl29003", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, isl29003_id);
+
+static struct i2c_driver isl29003_driver = {
+	.driver = {
+		.name	= ISL29003_DRV_NAME,
+		.pm	= ISL29003_PM_OPS,
+	},
+	.probe	= isl29003_probe,
+	.remove	= isl29003_remove,
+	.id_table = isl29003_id,
+};
+
+module_i2c_driver(isl29003_driver);
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("ISL29003 ambient light sensor driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/isl29020.c b/drivers/misc/isl29020.c
new file mode 100644
index 000000000..e3bd3c1d2
--- /dev/null
+++ b/drivers/misc/isl29020.c
@@ -0,0 +1,238 @@
+/*
+ * isl29020.c - Intersil  ALS Driver
+ *
+ * Copyright (C) 2008 Intel Corp
+ *
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Data sheet at: http://www.intersil.com/data/fn/fn6505.pdf
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/sysfs.h>
+#include <linux/pm_runtime.h>
+
+static DEFINE_MUTEX(mutex);
+
+static ssize_t als_sensing_range_show(struct device *dev,
+			struct device_attribute *attr,  char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int  val;
+
+	val = i2c_smbus_read_byte_data(client, 0x00);
+
+	if (val < 0)
+		return val;
+	return sprintf(buf, "%d000\n", 1 << (2 * (val & 3)));
+
+}
+
+static ssize_t als_lux_input_data_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret_val, val;
+	unsigned long int lux;
+	int temp;
+
+	pm_runtime_get_sync(dev);
+	msleep(100);
+
+	mutex_lock(&mutex);
+	temp = i2c_smbus_read_byte_data(client, 0x02); /* MSB data */
+	if (temp < 0) {
+		pm_runtime_put_sync(dev);
+		mutex_unlock(&mutex);
+		return temp;
+	}
+
+	ret_val = i2c_smbus_read_byte_data(client, 0x01); /* LSB data */
+	mutex_unlock(&mutex);
+
+	if (ret_val < 0) {
+		pm_runtime_put_sync(dev);
+		return ret_val;
+	}
+
+	ret_val |= temp << 8;
+	val = i2c_smbus_read_byte_data(client, 0x00);
+	pm_runtime_put_sync(dev);
+	if (val < 0)
+		return val;
+	lux = ((((1 << (2 * (val & 3))))*1000) * ret_val) / 65536;
+	return sprintf(buf, "%ld\n", lux);
+}
+
+static ssize_t als_sensing_range_store(struct device *dev,
+		struct device_attribute *attr, const  char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret_val;
+	unsigned long val;
+
+	ret_val = kstrtoul(buf, 10, &val);
+	if (ret_val)
+		return ret_val;
+
+	if (val < 1 || val > 64000)
+		return -EINVAL;
+
+	/* Pick the smallest sensor range that will meet our requirements */
+	if (val <= 1000)
+		val = 1;
+	else if (val <= 4000)
+		val = 2;
+	else if (val <= 16000)
+		val = 3;
+	else
+		val = 4;
+
+	ret_val = i2c_smbus_read_byte_data(client, 0x00);
+	if (ret_val < 0)
+		return ret_val;
+
+	ret_val &= 0xFC; /*reset the bit before setting them */
+	ret_val |= val - 1;
+	ret_val = i2c_smbus_write_byte_data(client, 0x00, ret_val);
+
+	if (ret_val < 0)
+		return ret_val;
+	return count;
+}
+
+static void als_set_power_state(struct i2c_client *client, int enable)
+{
+	int ret_val;
+
+	ret_val = i2c_smbus_read_byte_data(client, 0x00);
+	if (ret_val < 0)
+		return;
+
+	if (enable)
+		ret_val |= 0x80;
+	else
+		ret_val &= 0x7F;
+
+	i2c_smbus_write_byte_data(client, 0x00, ret_val);
+}
+
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO | S_IWUSR,
+	als_sensing_range_show, als_sensing_range_store);
+static DEVICE_ATTR(lux0_input, S_IRUGO, als_lux_input_data_show, NULL);
+
+static struct attribute *mid_att_als[] = {
+	&dev_attr_lux0_sensor_range.attr,
+	&dev_attr_lux0_input.attr,
+	NULL
+};
+
+static const struct attribute_group m_als_gr = {
+	.name = "isl29020",
+	.attrs = mid_att_als
+};
+
+static int als_set_default_config(struct i2c_client *client)
+{
+	int retval;
+
+	retval = i2c_smbus_write_byte_data(client, 0x00, 0xc0);
+	if (retval < 0) {
+		dev_err(&client->dev, "default write failed.");
+		return retval;
+	}
+	return 0;
+}
+
+static int  isl29020_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	int res;
+
+	res = als_set_default_config(client);
+	if (res <  0)
+		return res;
+
+	res = sysfs_create_group(&client->dev.kobj, &m_als_gr);
+	if (res) {
+		dev_err(&client->dev, "isl29020: device create file failed\n");
+		return res;
+	}
+	dev_info(&client->dev, "%s isl29020: ALS chip found\n", client->name);
+	als_set_power_state(client, 0);
+	pm_runtime_enable(&client->dev);
+	return res;
+}
+
+static int isl29020_remove(struct i2c_client *client)
+{
+	sysfs_remove_group(&client->dev.kobj, &m_als_gr);
+	return 0;
+}
+
+static const struct i2c_device_id isl29020_id[] = {
+	{ "isl29020", 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, isl29020_id);
+
+#ifdef CONFIG_PM
+
+static int isl29020_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	als_set_power_state(client, 0);
+	return 0;
+}
+
+static int isl29020_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	als_set_power_state(client, 1);
+	return 0;
+}
+
+static const struct dev_pm_ops isl29020_pm_ops = {
+	.runtime_suspend = isl29020_runtime_suspend,
+	.runtime_resume = isl29020_runtime_resume,
+};
+
+#define ISL29020_PM_OPS (&isl29020_pm_ops)
+#else	/* CONFIG_PM */
+#define ISL29020_PM_OPS NULL
+#endif	/* CONFIG_PM */
+
+static struct i2c_driver isl29020_driver = {
+	.driver = {
+		.name = "isl29020",
+		.pm = ISL29020_PM_OPS,
+	},
+	.probe = isl29020_probe,
+	.remove = isl29020_remove,
+	.id_table = isl29020_id,
+};
+
+module_i2c_driver(isl29020_driver);
+
+MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com>");
+MODULE_DESCRIPTION("Intersil isl29020 ALS Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
new file mode 100644
index 000000000..bc4dc92af
--- /dev/null
+++ b/drivers/misc/kgdbts.c
@@ -0,0 +1,1193 @@
+/*
+ * kgdbts is a test suite for kgdb for the sole purpose of validating
+ * that key pieces of the kgdb internals are working properly such as
+ * HW/SW breakpoints, single stepping, and NMI.
+ *
+ * Created by: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (c) 2008 Wind River Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/* Information about the kgdb test suite.
+ * -------------------------------------
+ *
+ * The kgdb test suite is designed as a KGDB I/O module which
+ * simulates the communications that a debugger would have with kgdb.
+ * The tests are broken up in to a line by line and referenced here as
+ * a "get" which is kgdb requesting input and "put" which is kgdb
+ * sending a response.
+ *
+ * The kgdb suite can be invoked from the kernel command line
+ * arguments system or executed dynamically at run time.  The test
+ * suite uses the variable "kgdbts" to obtain the information about
+ * which tests to run and to configure the verbosity level.  The
+ * following are the various characters you can use with the kgdbts=
+ * line:
+ *
+ * When using the "kgdbts=" you only choose one of the following core
+ * test types:
+ * A = Run all the core tests silently
+ * V1 = Run all the core tests with minimal output
+ * V2 = Run all the core tests in debug mode
+ *
+ * You can also specify optional tests:
+ * N## = Go to sleep with interrupts of for ## seconds
+ *       to test the HW NMI watchdog
+ * F## = Break at do_fork for ## iterations
+ * S## = Break at sys_open for ## iterations
+ * I## = Run the single step test ## iterations
+ *
+ * NOTE: that the do_fork and sys_open tests are mutually exclusive.
+ *
+ * To invoke the kgdb test suite from boot you use a kernel start
+ * argument as follows:
+ * 	kgdbts=V1 kgdbwait
+ * Or if you wanted to perform the NMI test for 6 seconds and do_fork
+ * test for 100 forks, you could use:
+ * 	kgdbts=V1N6F100 kgdbwait
+ *
+ * The test suite can also be invoked at run time with:
+ *	echo kgdbts=V1N6F100 > /sys/module/kgdbts/parameters/kgdbts
+ * Or as another example:
+ *	echo kgdbts=V2 > /sys/module/kgdbts/parameters/kgdbts
+ *
+ * When developing a new kgdb arch specific implementation or
+ * using these tests for the purpose of regression testing,
+ * several invocations are required.
+ *
+ * 1) Boot with the test suite enabled by using the kernel arguments
+ *       "kgdbts=V1F100 kgdbwait"
+ *    ## If kgdb arch specific implementation has NMI use
+ *       "kgdbts=V1N6F100
+ *
+ * 2) After the system boot run the basic test.
+ * echo kgdbts=V1 > /sys/module/kgdbts/parameters/kgdbts
+ *
+ * 3) Run the concurrency tests.  It is best to use n+1
+ *    while loops where n is the number of cpus you have
+ *    in your system.  The example below uses only two
+ *    loops.
+ *
+ * ## This tests break points on sys_open
+ * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
+ * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
+ * echo kgdbts=V1S10000 > /sys/module/kgdbts/parameters/kgdbts
+ * fg # and hit control-c
+ * fg # and hit control-c
+ * ## This tests break points on do_fork
+ * while [ 1 ] ; do date > /dev/null ; done &
+ * while [ 1 ] ; do date > /dev/null ; done &
+ * echo kgdbts=V1F1000 > /sys/module/kgdbts/parameters/kgdbts
+ * fg # and hit control-c
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/kgdb.h>
+#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/sched/task.h>
+
+#include <asm/sections.h>
+
+#define v1printk(a...) do {		\
+	if (verbose)			\
+		printk(KERN_INFO a);	\
+} while (0)
+#define v2printk(a...) do {		\
+	if (verbose > 1) {		\
+		printk(KERN_INFO a);	\
+	}				\
+	touch_nmi_watchdog();		\
+} while (0)
+#define eprintk(a...) do {		\
+	printk(KERN_ERR a);		\
+	WARN_ON(1);			\
+} while (0)
+#define MAX_CONFIG_LEN		40
+
+static struct kgdb_io kgdbts_io_ops;
+static char get_buf[BUFMAX];
+static int get_buf_cnt;
+static char put_buf[BUFMAX];
+static int put_buf_cnt;
+static char scratch_buf[BUFMAX];
+static int verbose;
+static int repeat_test;
+static int test_complete;
+static int send_ack;
+static int final_ack;
+static int force_hwbrks;
+static int hwbreaks_ok;
+static int hw_break_val;
+static int hw_break_val2;
+static int cont_instead_of_sstep;
+static unsigned long cont_thread_id;
+static unsigned long sstep_thread_id;
+#if defined(CONFIG_ARM) || defined(CONFIG_MIPS) || defined(CONFIG_SPARC)
+static int arch_needs_sstep_emulation = 1;
+#else
+static int arch_needs_sstep_emulation;
+#endif
+static unsigned long cont_addr;
+static unsigned long sstep_addr;
+static int restart_from_top_after_write;
+static int sstep_state;
+
+/* Storage for the registers, in GDB format. */
+static unsigned long kgdbts_gdb_regs[(NUMREGBYTES +
+					sizeof(unsigned long) - 1) /
+					sizeof(unsigned long)];
+static struct pt_regs kgdbts_regs;
+
+/* -1 = init not run yet, 0 = unconfigured, 1 = configured. */
+static int configured		= -1;
+
+#ifdef CONFIG_KGDB_TESTS_BOOT_STRING
+static char config[MAX_CONFIG_LEN] = CONFIG_KGDB_TESTS_BOOT_STRING;
+#else
+static char config[MAX_CONFIG_LEN];
+#endif
+static struct kparam_string kps = {
+	.string			= config,
+	.maxlen			= MAX_CONFIG_LEN,
+};
+
+static void fill_get_buf(char *buf);
+
+struct test_struct {
+	char *get;
+	char *put;
+	void (*get_handler)(char *);
+	int (*put_handler)(char *, char *);
+};
+
+struct test_state {
+	char *name;
+	struct test_struct *tst;
+	int idx;
+	int (*run_test) (int, int);
+	int (*validate_put) (char *);
+};
+
+static struct test_state ts;
+
+static int kgdbts_unreg_thread(void *ptr)
+{
+	/* Wait until the tests are complete and then ungresiter the I/O
+	 * driver.
+	 */
+	while (!final_ack)
+		msleep_interruptible(1500);
+	/* Pause for any other threads to exit after final ack. */
+	msleep_interruptible(1000);
+	if (configured)
+		kgdb_unregister_io_module(&kgdbts_io_ops);
+	configured = 0;
+
+	return 0;
+}
+
+/* This is noinline such that it can be used for a single location to
+ * place a breakpoint
+ */
+static noinline void kgdbts_break_test(void)
+{
+	v2printk("kgdbts: breakpoint complete\n");
+}
+
+/* Lookup symbol info in the kernel */
+static unsigned long lookup_addr(char *arg)
+{
+	unsigned long addr = 0;
+
+	if (!strcmp(arg, "kgdbts_break_test"))
+		addr = (unsigned long)kgdbts_break_test;
+	else if (!strcmp(arg, "sys_open"))
+		addr = (unsigned long)do_sys_open;
+	else if (!strcmp(arg, "do_fork"))
+		addr = (unsigned long)_do_fork;
+	else if (!strcmp(arg, "hw_break_val"))
+		addr = (unsigned long)&hw_break_val;
+	addr = (unsigned long) dereference_function_descriptor((void *)addr);
+	return addr;
+}
+
+static void break_helper(char *bp_type, char *arg, unsigned long vaddr)
+{
+	unsigned long addr;
+
+	if (arg)
+		addr = lookup_addr(arg);
+	else
+		addr = vaddr;
+
+	sprintf(scratch_buf, "%s,%lx,%i", bp_type, addr,
+		BREAK_INSTR_SIZE);
+	fill_get_buf(scratch_buf);
+}
+
+static void sw_break(char *arg)
+{
+	break_helper(force_hwbrks ? "Z1" : "Z0", arg, 0);
+}
+
+static void sw_rem_break(char *arg)
+{
+	break_helper(force_hwbrks ? "z1" : "z0", arg, 0);
+}
+
+static void hw_break(char *arg)
+{
+	break_helper("Z1", arg, 0);
+}
+
+static void hw_rem_break(char *arg)
+{
+	break_helper("z1", arg, 0);
+}
+
+static void hw_write_break(char *arg)
+{
+	break_helper("Z2", arg, 0);
+}
+
+static void hw_rem_write_break(char *arg)
+{
+	break_helper("z2", arg, 0);
+}
+
+static void hw_access_break(char *arg)
+{
+	break_helper("Z4", arg, 0);
+}
+
+static void hw_rem_access_break(char *arg)
+{
+	break_helper("z4", arg, 0);
+}
+
+static void hw_break_val_access(void)
+{
+	hw_break_val2 = hw_break_val;
+}
+
+static void hw_break_val_write(void)
+{
+	hw_break_val++;
+}
+
+static int get_thread_id_continue(char *put_str, char *arg)
+{
+	char *ptr = &put_str[11];
+
+	if (put_str[1] != 'T' || put_str[2] != '0')
+		return 1;
+	kgdb_hex2long(&ptr, &cont_thread_id);
+	return 0;
+}
+
+static int check_and_rewind_pc(char *put_str, char *arg)
+{
+	unsigned long addr = lookup_addr(arg);
+	unsigned long ip;
+	int offset = 0;
+
+	kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+		 NUMREGBYTES);
+	gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+	ip = instruction_pointer(&kgdbts_regs);
+	v2printk("Stopped at IP: %lx\n", ip);
+#ifdef GDB_ADJUSTS_BREAK_OFFSET
+	/* On some arches, a breakpoint stop requires it to be decremented */
+	if (addr + BREAK_INSTR_SIZE == ip)
+		offset = -BREAK_INSTR_SIZE;
+#endif
+
+	if (arch_needs_sstep_emulation && sstep_addr &&
+	    ip + offset == sstep_addr &&
+	    ((!strcmp(arg, "sys_open") || !strcmp(arg, "do_fork")))) {
+		/* This is special case for emulated single step */
+		v2printk("Emul: rewind hit single step bp\n");
+		restart_from_top_after_write = 1;
+	} else if (strcmp(arg, "silent") && ip + offset != addr) {
+		eprintk("kgdbts: BP mismatch %lx expected %lx\n",
+			   ip + offset, addr);
+		return 1;
+	}
+	/* Readjust the instruction pointer if needed */
+	ip += offset;
+	cont_addr = ip;
+#ifdef GDB_ADJUSTS_BREAK_OFFSET
+	instruction_pointer_set(&kgdbts_regs, ip);
+#endif
+	return 0;
+}
+
+static int check_single_step(char *put_str, char *arg)
+{
+	unsigned long addr = lookup_addr(arg);
+	static int matched_id;
+
+	/*
+	 * From an arch indepent point of view the instruction pointer
+	 * should be on a different instruction
+	 */
+	kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+		 NUMREGBYTES);
+	gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+	v2printk("Singlestep stopped at IP: %lx\n",
+		   instruction_pointer(&kgdbts_regs));
+
+	if (sstep_thread_id != cont_thread_id) {
+		/*
+		 * Ensure we stopped in the same thread id as before, else the
+		 * debugger should continue until the original thread that was
+		 * single stepped is scheduled again, emulating gdb's behavior.
+		 */
+		v2printk("ThrID does not match: %lx\n", cont_thread_id);
+		if (arch_needs_sstep_emulation) {
+			if (matched_id &&
+			    instruction_pointer(&kgdbts_regs) != addr)
+				goto continue_test;
+			matched_id++;
+			ts.idx -= 2;
+			sstep_state = 0;
+			return 0;
+		}
+		cont_instead_of_sstep = 1;
+		ts.idx -= 4;
+		return 0;
+	}
+continue_test:
+	matched_id = 0;
+	if (instruction_pointer(&kgdbts_regs) == addr) {
+		eprintk("kgdbts: SingleStep failed at %lx\n",
+			   instruction_pointer(&kgdbts_regs));
+		return 1;
+	}
+
+	return 0;
+}
+
+static void write_regs(char *arg)
+{
+	memset(scratch_buf, 0, sizeof(scratch_buf));
+	scratch_buf[0] = 'G';
+	pt_regs_to_gdb_regs(kgdbts_gdb_regs, &kgdbts_regs);
+	kgdb_mem2hex((char *)kgdbts_gdb_regs, &scratch_buf[1], NUMREGBYTES);
+	fill_get_buf(scratch_buf);
+}
+
+static void skip_back_repeat_test(char *arg)
+{
+	int go_back = simple_strtol(arg, NULL, 10);
+
+	repeat_test--;
+	if (repeat_test <= 0) {
+		ts.idx++;
+	} else {
+		if (repeat_test % 100 == 0)
+			v1printk("kgdbts:RUN ... %d remaining\n", repeat_test);
+
+		ts.idx -= go_back;
+	}
+	fill_get_buf(ts.tst[ts.idx].get);
+}
+
+static int got_break(char *put_str, char *arg)
+{
+	test_complete = 1;
+	if (!strncmp(put_str+1, arg, 2)) {
+		if (!strncmp(arg, "T0", 2))
+			test_complete = 2;
+		return 0;
+	}
+	return 1;
+}
+
+static void get_cont_catch(char *arg)
+{
+	/* Always send detach because the test is completed at this point */
+	fill_get_buf("D");
+}
+
+static int put_cont_catch(char *put_str, char *arg)
+{
+	/* This is at the end of the test and we catch any and all input */
+	v2printk("kgdbts: cleanup task: %lx\n", sstep_thread_id);
+	ts.idx--;
+	return 0;
+}
+
+static int emul_reset(char *put_str, char *arg)
+{
+	if (strncmp(put_str, "$OK", 3))
+		return 1;
+	if (restart_from_top_after_write) {
+		restart_from_top_after_write = 0;
+		ts.idx = -1;
+	}
+	return 0;
+}
+
+static void emul_sstep_get(char *arg)
+{
+	if (!arch_needs_sstep_emulation) {
+		if (cont_instead_of_sstep) {
+			cont_instead_of_sstep = 0;
+			fill_get_buf("c");
+		} else {
+			fill_get_buf(arg);
+		}
+		return;
+	}
+	switch (sstep_state) {
+	case 0:
+		v2printk("Emulate single step\n");
+		/* Start by looking at the current PC */
+		fill_get_buf("g");
+		break;
+	case 1:
+		/* set breakpoint */
+		break_helper("Z0", NULL, sstep_addr);
+		break;
+	case 2:
+		/* Continue */
+		fill_get_buf("c");
+		break;
+	case 3:
+		/* Clear breakpoint */
+		break_helper("z0", NULL, sstep_addr);
+		break;
+	default:
+		eprintk("kgdbts: ERROR failed sstep get emulation\n");
+	}
+	sstep_state++;
+}
+
+static int emul_sstep_put(char *put_str, char *arg)
+{
+	if (!arch_needs_sstep_emulation) {
+		char *ptr = &put_str[11];
+		if (put_str[1] != 'T' || put_str[2] != '0')
+			return 1;
+		kgdb_hex2long(&ptr, &sstep_thread_id);
+		return 0;
+	}
+	switch (sstep_state) {
+	case 1:
+		/* validate the "g" packet to get the IP */
+		kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+			 NUMREGBYTES);
+		gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+		v2printk("Stopped at IP: %lx\n",
+			 instruction_pointer(&kgdbts_regs));
+		/* Want to stop at IP + break instruction size by default */
+		sstep_addr = cont_addr + BREAK_INSTR_SIZE;
+		break;
+	case 2:
+		if (strncmp(put_str, "$OK", 3)) {
+			eprintk("kgdbts: failed sstep break set\n");
+			return 1;
+		}
+		break;
+	case 3:
+		if (strncmp(put_str, "$T0", 3)) {
+			eprintk("kgdbts: failed continue sstep\n");
+			return 1;
+		} else {
+			char *ptr = &put_str[11];
+			kgdb_hex2long(&ptr, &sstep_thread_id);
+		}
+		break;
+	case 4:
+		if (strncmp(put_str, "$OK", 3)) {
+			eprintk("kgdbts: failed sstep break unset\n");
+			return 1;
+		}
+		/* Single step is complete so continue on! */
+		sstep_state = 0;
+		return 0;
+	default:
+		eprintk("kgdbts: ERROR failed sstep put emulation\n");
+	}
+
+	/* Continue on the same test line until emulation is complete */
+	ts.idx--;
+	return 0;
+}
+
+static int final_ack_set(char *put_str, char *arg)
+{
+	if (strncmp(put_str+1, arg, 2))
+		return 1;
+	final_ack = 1;
+	return 0;
+}
+/*
+ * Test to plant a breakpoint and detach, which should clear out the
+ * breakpoint and restore the original instruction.
+ */
+static struct test_struct plant_and_detach_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "", "" },
+};
+
+/*
+ * Simple test to write in a software breakpoint, check for the
+ * correct stop location and detach.
+ */
+static struct test_struct sw_breakpoint_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test a known bad memory read location to test the fault handler and
+ * read bytes 1-8 at the bad address
+ */
+static struct test_struct bad_read_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "m0,1", "E*" }, /* read 1 byte at address 1 */
+	{ "m0,2", "E*" }, /* read 1 byte at address 2 */
+	{ "m0,3", "E*" }, /* read 1 byte at address 3 */
+	{ "m0,4", "E*" }, /* read 1 byte at address 4 */
+	{ "m0,5", "E*" }, /* read 1 byte at address 5 */
+	{ "m0,6", "E*" }, /* read 1 byte at address 6 */
+	{ "m0,7", "E*" }, /* read 1 byte at address 7 */
+	{ "m0,8", "E*" }, /* read 1 byte at address 8 */
+	{ "D", "OK" }, /* Detach which removes all breakpoints and continues */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a breakpoint, remove it, single step, plant it
+ * again and detach.
+ */
+static struct test_struct singlestep_break_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", NULL, get_thread_id_continue }, /* Continue */
+	{ "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs }, /* Write registers */
+	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+	{ "g", "kgdbts_break_test", NULL, check_single_step },
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs }, /* Write registers */
+	{ "D", "OK" }, /* Remove all breakpoints and continues */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a breakpoint at do_fork for what ever the number
+ * of iterations required by the variable repeat_test.
+ */
+static struct test_struct do_fork_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "do_fork", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", NULL, get_thread_id_continue }, /* Continue */
+	{ "do_fork", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "g", "do_fork", NULL, check_and_rewind_pc }, /* check location */
+	{ "write", "OK", write_regs, emul_reset }, /* Write registers */
+	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+	{ "g", "do_fork", NULL, check_single_step },
+	{ "do_fork", "OK", sw_break, }, /* set sw breakpoint */
+	{ "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+	{ "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
+	{ "", "", get_cont_catch, put_cont_catch },
+};
+
+/* Test for hitting a breakpoint at sys_open for what ever the number
+ * of iterations required by the variable repeat_test.
+ */
+static struct test_struct sys_open_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "sys_open", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", NULL, get_thread_id_continue }, /* Continue */
+	{ "sys_open", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "g", "sys_open", NULL, check_and_rewind_pc }, /* check location */
+	{ "write", "OK", write_regs, emul_reset }, /* Write registers */
+	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+	{ "g", "sys_open", NULL, check_single_step },
+	{ "sys_open", "OK", sw_break, }, /* set sw breakpoint */
+	{ "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+	{ "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
+	{ "", "", get_cont_catch, put_cont_catch },
+};
+
+/*
+ * Test for hitting a simple hw breakpoint
+ */
+static struct test_struct hw_breakpoint_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a hw write breakpoint
+ */
+static struct test_struct hw_write_break_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */
+	{ "c", "T0*", NULL, got_break }, /* Continue */
+	{ "g", "silent", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a hw access breakpoint
+ */
+static struct test_struct hw_access_break_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */
+	{ "c", "T0*", NULL, got_break }, /* Continue */
+	{ "g", "silent", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a hw access breakpoint
+ */
+static struct test_struct nmi_sleep_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "c", "T0*", NULL, got_break }, /* Continue */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+static void fill_get_buf(char *buf)
+{
+	unsigned char checksum = 0;
+	int count = 0;
+	char ch;
+
+	strcpy(get_buf, "$");
+	strcat(get_buf, buf);
+	while ((ch = buf[count])) {
+		checksum += ch;
+		count++;
+	}
+	strcat(get_buf, "#");
+	get_buf[count + 2] = hex_asc_hi(checksum);
+	get_buf[count + 3] = hex_asc_lo(checksum);
+	get_buf[count + 4] = '\0';
+	v2printk("get%i: %s\n", ts.idx, get_buf);
+}
+
+static int validate_simple_test(char *put_str)
+{
+	char *chk_str;
+
+	if (ts.tst[ts.idx].put_handler)
+		return ts.tst[ts.idx].put_handler(put_str,
+			ts.tst[ts.idx].put);
+
+	chk_str = ts.tst[ts.idx].put;
+	if (*put_str == '$')
+		put_str++;
+
+	while (*chk_str != '\0' && *put_str != '\0') {
+		/* If someone does a * to match the rest of the string, allow
+		 * it, or stop if the received string is complete.
+		 */
+		if (*put_str == '#' || *chk_str == '*')
+			return 0;
+		if (*put_str != *chk_str)
+			return 1;
+
+		chk_str++;
+		put_str++;
+	}
+	if (*chk_str == '\0' && (*put_str == '\0' || *put_str == '#'))
+		return 0;
+
+	return 1;
+}
+
+static int run_simple_test(int is_get_char, int chr)
+{
+	int ret = 0;
+	if (is_get_char) {
+		/* Send an ACK on the get if a prior put completed and set the
+		 * send ack variable
+		 */
+		if (send_ack) {
+			send_ack = 0;
+			return '+';
+		}
+		/* On the first get char, fill the transmit buffer and then
+		 * take from the get_string.
+		 */
+		if (get_buf_cnt == 0) {
+			if (ts.tst[ts.idx].get_handler)
+				ts.tst[ts.idx].get_handler(ts.tst[ts.idx].get);
+			else
+				fill_get_buf(ts.tst[ts.idx].get);
+		}
+
+		if (get_buf[get_buf_cnt] == '\0') {
+			eprintk("kgdbts: ERROR GET: EOB on '%s' at %i\n",
+			   ts.name, ts.idx);
+			get_buf_cnt = 0;
+			fill_get_buf("D");
+		}
+		ret = get_buf[get_buf_cnt];
+		get_buf_cnt++;
+		return ret;
+	}
+
+	/* This callback is a put char which is when kgdb sends data to
+	 * this I/O module.
+	 */
+	if (ts.tst[ts.idx].get[0] == '\0' && ts.tst[ts.idx].put[0] == '\0' &&
+	    !ts.tst[ts.idx].get_handler) {
+		eprintk("kgdbts: ERROR: beyond end of test on"
+			   " '%s' line %i\n", ts.name, ts.idx);
+		return 0;
+	}
+
+	if (put_buf_cnt >= BUFMAX) {
+		eprintk("kgdbts: ERROR: put buffer overflow on"
+			   " '%s' line %i\n", ts.name, ts.idx);
+		put_buf_cnt = 0;
+		return 0;
+	}
+	/* Ignore everything until the first valid packet start '$' */
+	if (put_buf_cnt == 0 && chr != '$')
+		return 0;
+
+	put_buf[put_buf_cnt] = chr;
+	put_buf_cnt++;
+
+	/* End of packet == #XX so look for the '#' */
+	if (put_buf_cnt > 3 && put_buf[put_buf_cnt - 3] == '#') {
+		if (put_buf_cnt >= BUFMAX) {
+			eprintk("kgdbts: ERROR: put buffer overflow on"
+				" '%s' line %i\n", ts.name, ts.idx);
+			put_buf_cnt = 0;
+			return 0;
+		}
+		put_buf[put_buf_cnt] = '\0';
+		v2printk("put%i: %s\n", ts.idx, put_buf);
+		/* Trigger check here */
+		if (ts.validate_put && ts.validate_put(put_buf)) {
+			eprintk("kgdbts: ERROR PUT: end of test "
+			   "buffer on '%s' line %i expected %s got %s\n",
+			   ts.name, ts.idx, ts.tst[ts.idx].put, put_buf);
+		}
+		ts.idx++;
+		put_buf_cnt = 0;
+		get_buf_cnt = 0;
+		send_ack = 1;
+	}
+	return 0;
+}
+
+static void init_simple_test(void)
+{
+	memset(&ts, 0, sizeof(ts));
+	ts.run_test = run_simple_test;
+	ts.validate_put = validate_simple_test;
+}
+
+static void run_plant_and_detach_test(int is_early)
+{
+	char before[BREAK_INSTR_SIZE];
+	char after[BREAK_INSTR_SIZE];
+
+	probe_kernel_read(before, (char *)kgdbts_break_test,
+	  BREAK_INSTR_SIZE);
+	init_simple_test();
+	ts.tst = plant_and_detach_test;
+	ts.name = "plant_and_detach_test";
+	/* Activate test with initial breakpoint */
+	if (!is_early)
+		kgdb_breakpoint();
+	probe_kernel_read(after, (char *)kgdbts_break_test,
+	  BREAK_INSTR_SIZE);
+	if (memcmp(before, after, BREAK_INSTR_SIZE)) {
+		printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n");
+		panic("kgdb memory corruption");
+	}
+
+	/* complete the detach test */
+	if (!is_early)
+		kgdbts_break_test();
+}
+
+static void run_breakpoint_test(int is_hw_breakpoint)
+{
+	test_complete = 0;
+	init_simple_test();
+	if (is_hw_breakpoint) {
+		ts.tst = hw_breakpoint_test;
+		ts.name = "hw_breakpoint_test";
+	} else {
+		ts.tst = sw_breakpoint_test;
+		ts.name = "sw_breakpoint_test";
+	}
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	/* run code with the break point in it */
+	kgdbts_break_test();
+	kgdb_breakpoint();
+
+	if (test_complete)
+		return;
+
+	eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+	if (is_hw_breakpoint)
+		hwbreaks_ok = 0;
+}
+
+static void run_hw_break_test(int is_write_test)
+{
+	test_complete = 0;
+	init_simple_test();
+	if (is_write_test) {
+		ts.tst = hw_write_break_test;
+		ts.name = "hw_write_break_test";
+	} else {
+		ts.tst = hw_access_break_test;
+		ts.name = "hw_access_break_test";
+	}
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	hw_break_val_access();
+	if (is_write_test) {
+		if (test_complete == 2) {
+			eprintk("kgdbts: ERROR %s broke on access\n",
+				ts.name);
+			hwbreaks_ok = 0;
+		}
+		hw_break_val_write();
+	}
+	kgdb_breakpoint();
+
+	if (test_complete == 1)
+		return;
+
+	eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+	hwbreaks_ok = 0;
+}
+
+static void run_nmi_sleep_test(int nmi_sleep)
+{
+	unsigned long flags;
+
+	init_simple_test();
+	ts.tst = nmi_sleep_test;
+	ts.name = "nmi_sleep_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	local_irq_save(flags);
+	mdelay(nmi_sleep*1000);
+	touch_nmi_watchdog();
+	local_irq_restore(flags);
+	if (test_complete != 2)
+		eprintk("kgdbts: ERROR nmi_test did not hit nmi\n");
+	kgdb_breakpoint();
+	if (test_complete == 1)
+		return;
+
+	eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+}
+
+static void run_bad_read_test(void)
+{
+	init_simple_test();
+	ts.tst = bad_read_test;
+	ts.name = "bad_read_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+}
+
+static void run_do_fork_test(void)
+{
+	init_simple_test();
+	ts.tst = do_fork_test;
+	ts.name = "do_fork_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+}
+
+static void run_sys_open_test(void)
+{
+	init_simple_test();
+	ts.tst = sys_open_test;
+	ts.name = "sys_open_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+}
+
+static void run_singlestep_break_test(void)
+{
+	init_simple_test();
+	ts.tst = singlestep_break_test;
+	ts.name = "singlestep_breakpoint_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	kgdbts_break_test();
+	kgdbts_break_test();
+}
+
+static void kgdbts_run_tests(void)
+{
+	char *ptr;
+	int fork_test = 0;
+	int do_sys_open_test = 0;
+	int sstep_test = 1000;
+	int nmi_sleep = 0;
+	int i;
+
+	verbose = 0;
+	if (strstr(config, "V1"))
+		verbose = 1;
+	if (strstr(config, "V2"))
+		verbose = 2;
+
+	ptr = strchr(config, 'F');
+	if (ptr)
+		fork_test = simple_strtol(ptr + 1, NULL, 10);
+	ptr = strchr(config, 'S');
+	if (ptr)
+		do_sys_open_test = simple_strtol(ptr + 1, NULL, 10);
+	ptr = strchr(config, 'N');
+	if (ptr)
+		nmi_sleep = simple_strtol(ptr+1, NULL, 10);
+	ptr = strchr(config, 'I');
+	if (ptr)
+		sstep_test = simple_strtol(ptr+1, NULL, 10);
+
+	/* All HW break point tests */
+	if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) {
+		hwbreaks_ok = 1;
+		v1printk("kgdbts:RUN hw breakpoint test\n");
+		run_breakpoint_test(1);
+		v1printk("kgdbts:RUN hw write breakpoint test\n");
+		run_hw_break_test(1);
+		v1printk("kgdbts:RUN access write breakpoint test\n");
+		run_hw_break_test(0);
+	}
+
+	/* required internal KGDB tests */
+	v1printk("kgdbts:RUN plant and detach test\n");
+	run_plant_and_detach_test(0);
+	v1printk("kgdbts:RUN sw breakpoint test\n");
+	run_breakpoint_test(0);
+	v1printk("kgdbts:RUN bad memory access test\n");
+	run_bad_read_test();
+	v1printk("kgdbts:RUN singlestep test %i iterations\n", sstep_test);
+	for (i = 0; i < sstep_test; i++) {
+		run_singlestep_break_test();
+		if (i % 100 == 0)
+			v1printk("kgdbts:RUN singlestep [%i/%i]\n",
+				 i, sstep_test);
+	}
+
+	/* ===Optional tests=== */
+
+	if (nmi_sleep) {
+		v1printk("kgdbts:RUN NMI sleep %i seconds test\n", nmi_sleep);
+		run_nmi_sleep_test(nmi_sleep);
+	}
+
+	/* If the do_fork test is run it will be the last test that is
+	 * executed because a kernel thread will be spawned at the very
+	 * end to unregister the debug hooks.
+	 */
+	if (fork_test) {
+		repeat_test = fork_test;
+		printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n",
+			repeat_test);
+		kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
+		run_do_fork_test();
+		return;
+	}
+
+	/* If the sys_open test is run it will be the last test that is
+	 * executed because a kernel thread will be spawned at the very
+	 * end to unregister the debug hooks.
+	 */
+	if (do_sys_open_test) {
+		repeat_test = do_sys_open_test;
+		printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n",
+			repeat_test);
+		kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
+		run_sys_open_test();
+		return;
+	}
+	/* Shutdown and unregister */
+	kgdb_unregister_io_module(&kgdbts_io_ops);
+	configured = 0;
+}
+
+static int kgdbts_option_setup(char *opt)
+{
+	if (strlen(opt) >= MAX_CONFIG_LEN) {
+		printk(KERN_ERR "kgdbts: config string too long\n");
+		return 1;
+	}
+	strcpy(config, opt);
+	return 1;
+}
+
+__setup("kgdbts=", kgdbts_option_setup);
+
+static int configure_kgdbts(void)
+{
+	int err = 0;
+
+	if (!strlen(config) || isspace(config[0]))
+		goto noconfig;
+
+	final_ack = 0;
+	run_plant_and_detach_test(1);
+
+	err = kgdb_register_io_module(&kgdbts_io_ops);
+	if (err) {
+		configured = 0;
+		return err;
+	}
+	configured = 1;
+	kgdbts_run_tests();
+
+	return err;
+
+noconfig:
+	config[0] = 0;
+	configured = 0;
+
+	return err;
+}
+
+static int __init init_kgdbts(void)
+{
+	/* Already configured? */
+	if (configured == 1)
+		return 0;
+
+	return configure_kgdbts();
+}
+device_initcall(init_kgdbts);
+
+static int kgdbts_get_char(void)
+{
+	int val = 0;
+
+	if (ts.run_test)
+		val = ts.run_test(1, 0);
+
+	return val;
+}
+
+static void kgdbts_put_char(u8 chr)
+{
+	if (ts.run_test)
+		ts.run_test(0, chr);
+}
+
+static int param_set_kgdbts_var(const char *kmessage,
+				const struct kernel_param *kp)
+{
+	size_t len = strlen(kmessage);
+
+	if (len >= MAX_CONFIG_LEN) {
+		printk(KERN_ERR "kgdbts: config string too long\n");
+		return -ENOSPC;
+	}
+
+	/* Only copy in the string if the init function has not run yet */
+	if (configured < 0) {
+		strcpy(config, kmessage);
+		return 0;
+	}
+
+	if (configured == 1) {
+		printk(KERN_ERR "kgdbts: ERROR: Already configured and running.\n");
+		return -EBUSY;
+	}
+
+	strcpy(config, kmessage);
+	/* Chop out \n char as a result of echo */
+	if (len && config[len - 1] == '\n')
+		config[len - 1] = '\0';
+
+	/* Go and configure with the new params. */
+	return configure_kgdbts();
+}
+
+static void kgdbts_pre_exp_handler(void)
+{
+	/* Increment the module count when the debugger is active */
+	if (!kgdb_connected)
+		try_module_get(THIS_MODULE);
+}
+
+static void kgdbts_post_exp_handler(void)
+{
+	/* decrement the module count when the debugger detaches */
+	if (!kgdb_connected)
+		module_put(THIS_MODULE);
+}
+
+static struct kgdb_io kgdbts_io_ops = {
+	.name			= "kgdbts",
+	.read_char		= kgdbts_get_char,
+	.write_char		= kgdbts_put_char,
+	.pre_exception		= kgdbts_pre_exp_handler,
+	.post_exception		= kgdbts_post_exp_handler,
+};
+
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param here.
+ */
+module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644);
+MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]");
diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c
new file mode 100644
index 000000000..645d26536
--- /dev/null
+++ b/drivers/misc/lattice-ecp3-config.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2012 Stefan Roese <sr@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <asm/unaligned.h>
+
+#define FIRMWARE_NAME	"lattice-ecp3.bit"
+
+/*
+ * The JTAG ID's of the supported FPGA's. The ID is 32bit wide
+ * reversed as noted in the manual.
+ */
+#define ID_ECP3_17	0xc2088080
+#define ID_ECP3_35	0xc2048080
+
+/* FPGA commands */
+#define FPGA_CMD_READ_ID	0x07	/* plus 24 bits */
+#define FPGA_CMD_READ_STATUS	0x09	/* plus 24 bits */
+#define FPGA_CMD_CLEAR		0x70
+#define FPGA_CMD_REFRESH	0x71
+#define FPGA_CMD_WRITE_EN	0x4a	/* plus 2 bits */
+#define FPGA_CMD_WRITE_DIS	0x4f	/* plus 8 bits */
+#define FPGA_CMD_WRITE_INC	0x41	/* plus 0 bits */
+
+/*
+ * The status register is 32bit revered, DONE is bit 17 from the TN1222.pdf
+ * (LatticeECP3 Slave SPI Port User's Guide)
+ */
+#define FPGA_STATUS_DONE	0x00004000
+#define FPGA_STATUS_CLEARED	0x00010000
+
+#define FPGA_CLEAR_TIMEOUT	5000	/* max. 5000ms for FPGA clear */
+#define FPGA_CLEAR_MSLEEP	10
+#define FPGA_CLEAR_LOOP_COUNT	(FPGA_CLEAR_TIMEOUT / FPGA_CLEAR_MSLEEP)
+
+struct fpga_data {
+	struct completion fw_loaded;
+};
+
+struct ecp3_dev {
+	u32 jedec_id;
+	char *name;
+};
+
+static const struct ecp3_dev ecp3_dev[] = {
+	{
+		.jedec_id = ID_ECP3_17,
+		.name = "Lattice ECP3-17",
+	},
+	{
+		.jedec_id = ID_ECP3_35,
+		.name = "Lattice ECP3-35",
+	},
+};
+
+static void firmware_load(const struct firmware *fw, void *context)
+{
+	struct spi_device *spi = (struct spi_device *)context;
+	struct fpga_data *data = spi_get_drvdata(spi);
+	u8 *buffer;
+	int ret;
+	u8 txbuf[8];
+	u8 rxbuf[8];
+	int rx_len = 8;
+	int i;
+	u32 jedec_id;
+	u32 status;
+
+	if (fw == NULL) {
+		dev_err(&spi->dev, "Cannot load firmware, aborting\n");
+		goto out;
+	}
+
+	if (fw->size == 0) {
+		dev_err(&spi->dev, "Error: Firmware size is 0!\n");
+		goto out;
+	}
+
+	/* Fill dummy data (24 stuffing bits for commands) */
+	txbuf[1] = 0x00;
+	txbuf[2] = 0x00;
+	txbuf[3] = 0x00;
+
+	/* Trying to speak with the FPGA via SPI... */
+	txbuf[0] = FPGA_CMD_READ_ID;
+	ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+	jedec_id = get_unaligned_be32(&rxbuf[4]);
+	dev_dbg(&spi->dev, "FPGA JTAG ID=%08x\n", jedec_id);
+
+	for (i = 0; i < ARRAY_SIZE(ecp3_dev); i++) {
+		if (jedec_id == ecp3_dev[i].jedec_id)
+			break;
+	}
+	if (i == ARRAY_SIZE(ecp3_dev)) {
+		dev_err(&spi->dev,
+			"Error: No supported FPGA detected (JEDEC_ID=%08x)!\n",
+			jedec_id);
+		goto out;
+	}
+
+	dev_info(&spi->dev, "FPGA %s detected\n", ecp3_dev[i].name);
+
+	txbuf[0] = FPGA_CMD_READ_STATUS;
+	ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+	status = get_unaligned_be32(&rxbuf[4]);
+	dev_dbg(&spi->dev, "FPGA Status=%08x\n", status);
+
+	buffer = kzalloc(fw->size + 8, GFP_KERNEL);
+	if (!buffer) {
+		dev_err(&spi->dev, "Error: Can't allocate memory!\n");
+		goto out;
+	}
+
+	/*
+	 * Insert WRITE_INC command into stream (one SPI frame)
+	 */
+	buffer[0] = FPGA_CMD_WRITE_INC;
+	buffer[1] = 0xff;
+	buffer[2] = 0xff;
+	buffer[3] = 0xff;
+	memcpy(buffer + 4, fw->data, fw->size);
+
+	txbuf[0] = FPGA_CMD_REFRESH;
+	ret = spi_write(spi, txbuf, 4);
+
+	txbuf[0] = FPGA_CMD_WRITE_EN;
+	ret = spi_write(spi, txbuf, 4);
+
+	txbuf[0] = FPGA_CMD_CLEAR;
+	ret = spi_write(spi, txbuf, 4);
+
+	/*
+	 * Wait for FPGA memory to become cleared
+	 */
+	for (i = 0; i < FPGA_CLEAR_LOOP_COUNT; i++) {
+		txbuf[0] = FPGA_CMD_READ_STATUS;
+		ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+		status = get_unaligned_be32(&rxbuf[4]);
+		if (status == FPGA_STATUS_CLEARED)
+			break;
+
+		msleep(FPGA_CLEAR_MSLEEP);
+	}
+
+	if (i == FPGA_CLEAR_LOOP_COUNT) {
+		dev_err(&spi->dev,
+			"Error: Timeout waiting for FPGA to clear (status=%08x)!\n",
+			status);
+		kfree(buffer);
+		goto out;
+	}
+
+	dev_info(&spi->dev, "Configuring the FPGA...\n");
+	ret = spi_write(spi, buffer, fw->size + 8);
+
+	txbuf[0] = FPGA_CMD_WRITE_DIS;
+	ret = spi_write(spi, txbuf, 4);
+
+	txbuf[0] = FPGA_CMD_READ_STATUS;
+	ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+	status = get_unaligned_be32(&rxbuf[4]);
+	dev_dbg(&spi->dev, "FPGA Status=%08x\n", status);
+
+	/* Check result */
+	if (status & FPGA_STATUS_DONE)
+		dev_info(&spi->dev, "FPGA successfully configured!\n");
+	else
+		dev_info(&spi->dev, "FPGA not configured (DONE not set)\n");
+
+	/*
+	 * Don't forget to release the firmware again
+	 */
+	release_firmware(fw);
+
+	kfree(buffer);
+out:
+	complete(&data->fw_loaded);
+}
+
+static int lattice_ecp3_probe(struct spi_device *spi)
+{
+	struct fpga_data *data;
+	int err;
+
+	data = devm_kzalloc(&spi->dev, sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		dev_err(&spi->dev, "Memory allocation for fpga_data failed\n");
+		return -ENOMEM;
+	}
+	spi_set_drvdata(spi, data);
+
+	init_completion(&data->fw_loaded);
+	err = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG,
+				      FIRMWARE_NAME, &spi->dev,
+				      GFP_KERNEL, spi, firmware_load);
+	if (err) {
+		dev_err(&spi->dev, "Firmware loading failed with %d!\n", err);
+		return err;
+	}
+
+	dev_info(&spi->dev, "FPGA bitstream configuration driver registered\n");
+
+	return 0;
+}
+
+static int lattice_ecp3_remove(struct spi_device *spi)
+{
+	struct fpga_data *data = spi_get_drvdata(spi);
+
+	wait_for_completion(&data->fw_loaded);
+
+	return 0;
+}
+
+static const struct spi_device_id lattice_ecp3_id[] = {
+	{ "ecp3-17", 0 },
+	{ "ecp3-35", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, lattice_ecp3_id);
+
+static struct spi_driver lattice_ecp3_driver = {
+	.driver = {
+		.name = "lattice-ecp3",
+	},
+	.probe = lattice_ecp3_probe,
+	.remove = lattice_ecp3_remove,
+	.id_table = lattice_ecp3_id,
+};
+
+module_spi_driver(lattice_ecp3_driver);
+
+MODULE_AUTHOR("Stefan Roese <sr@denx.de>");
+MODULE_DESCRIPTION("Lattice ECP3 FPGA configuration via SPI");
+MODULE_LICENSE("GPL");
+MODULE_FIRMWARE(FIRMWARE_NAME);
diff --git a/drivers/misc/lis3lv02d/Kconfig b/drivers/misc/lis3lv02d/Kconfig
new file mode 100644
index 000000000..8f474e6fc
--- /dev/null
+++ b/drivers/misc/lis3lv02d/Kconfig
@@ -0,0 +1,37 @@
+#
+# STMicroelectonics LIS3LV02D and similar accelerometers
+#
+
+config SENSORS_LIS3_SPI
+	tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (SPI)"
+	depends on !ACPI && SPI_MASTER && INPUT
+	select SENSORS_LIS3LV02D
+	default n
+	help
+	  This driver provides support for the LIS3LV02Dx accelerometer connected
+	  via SPI. The accelerometer data is readable via
+	  /sys/devices/platform/lis3lv02d.
+
+	  This driver also provides an absolute input class device, allowing
+	  the laptop to act as a pinball machine-esque joystick.
+
+	  This driver can also be built as modules.  If so, the core module
+	  will be called lis3lv02d and a specific module for the SPI transport
+	  is called lis3lv02d_spi.
+
+config SENSORS_LIS3_I2C
+	tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (I2C)"
+	depends on I2C && INPUT
+	select SENSORS_LIS3LV02D
+	default n
+	help
+	  This driver provides support for the LIS3LV02Dx accelerometer connected
+	  via I2C. The accelerometer data is readable via
+	  /sys/devices/platform/lis3lv02d.
+
+	  This driver also provides an absolute input class device, allowing
+	  the device to act as a pinball machine-esque joystick.
+
+	  This driver can also be built as modules.  If so, the core module
+	  will be called lis3lv02d and a specific module for the I2C transport
+	  is called lis3lv02d_i2c.
diff --git a/drivers/misc/lis3lv02d/Makefile b/drivers/misc/lis3lv02d/Makefile
new file mode 100644
index 000000000..4bf58b16f
--- /dev/null
+++ b/drivers/misc/lis3lv02d/Makefile
@@ -0,0 +1,7 @@
+#
+# STMicroelectonics LIS3LV02D and similar accelerometers
+#
+
+obj-$(CONFIG_SENSORS_LIS3LV02D) += lis3lv02d.o
+obj-$(CONFIG_SENSORS_LIS3_SPI)	+= lis3lv02d_spi.o
+obj-$(CONFIG_SENSORS_LIS3_I2C)	+= lis3lv02d_i2c.o
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
new file mode 100644
index 000000000..21ac34b38
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -0,0 +1,1274 @@
+/*
+ *  lis3lv02d.c - ST LIS3LV02DL accelerometer driver
+ *
+ *  Copyright (C) 2007-2008 Yan Burman
+ *  Copyright (C) 2008 Eric Piel
+ *  Copyright (C) 2008-2009 Pavel Machek
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/sched/signal.h>
+#include <linux/dmi.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/input-polldev.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/freezer.h>
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/pm_runtime.h>
+#include <linux/atomic.h>
+#include <linux/of_device.h>
+#include "lis3lv02d.h"
+
+#define DRIVER_NAME     "lis3lv02d"
+
+/* joystick device poll interval in milliseconds */
+#define MDPS_POLL_INTERVAL 50
+#define MDPS_POLL_MIN	   0
+#define MDPS_POLL_MAX	   2000
+
+#define LIS3_SYSFS_POWERDOWN_DELAY 5000 /* In milliseconds */
+
+#define SELFTEST_OK	       0
+#define SELFTEST_FAIL	       -1
+#define SELFTEST_IRQ	       -2
+
+#define IRQ_LINE0	       0
+#define IRQ_LINE1	       1
+
+/*
+ * The sensor can also generate interrupts (DRDY) but it's pretty pointless
+ * because they are generated even if the data do not change. So it's better
+ * to keep the interrupt for the free-fall event. The values are updated at
+ * 40Hz (at the lowest frequency), but as it can be pretty time consuming on
+ * some low processor, we poll the sensor only at 20Hz... enough for the
+ * joystick.
+ */
+
+#define LIS3_PWRON_DELAY_WAI_12B	(5000)
+#define LIS3_PWRON_DELAY_WAI_8B		(3000)
+
+/*
+ * LIS3LV02D spec says 1024 LSBs corresponds 1 G -> 1LSB is 1000/1024 mG
+ * LIS302D spec says: 18 mG / digit
+ * LIS3_ACCURACY is used to increase accuracy of the intermediate
+ * calculation results.
+ */
+#define LIS3_ACCURACY			1024
+/* Sensitivity values for -2G +2G scale */
+#define LIS3_SENSITIVITY_12B		((LIS3_ACCURACY * 1000) / 1024)
+#define LIS3_SENSITIVITY_8B		(18 * LIS3_ACCURACY)
+
+/*
+ * LIS331DLH spec says 1LSBs corresponds 4G/4096 -> 1LSB is 1000/1024 mG.
+ * Below macros defines sensitivity values for +/-2G. Dataout bits for
+ * +/-2G range is 12 bits so 4 bits adjustment must be done to get 12bit
+ * data from 16bit value. Currently this driver supports only 2G range.
+ */
+#define LIS3DLH_SENSITIVITY_2G		((LIS3_ACCURACY * 1000) / 1024)
+#define SHIFT_ADJ_2G			4
+
+#define LIS3_DEFAULT_FUZZ_12B		3
+#define LIS3_DEFAULT_FLAT_12B		3
+#define LIS3_DEFAULT_FUZZ_8B		1
+#define LIS3_DEFAULT_FLAT_8B		1
+
+struct lis3lv02d lis3_dev = {
+	.misc_wait   = __WAIT_QUEUE_HEAD_INITIALIZER(lis3_dev.misc_wait),
+};
+EXPORT_SYMBOL_GPL(lis3_dev);
+
+/* just like param_set_int() but does sanity-check so that it won't point
+ * over the axis array size
+ */
+static int param_set_axis(const char *val, const struct kernel_param *kp)
+{
+	int ret = param_set_int(val, kp);
+	if (!ret) {
+		int val = *(int *)kp->arg;
+		if (val < 0)
+			val = -val;
+		if (!val || val > 3)
+			return -EINVAL;
+	}
+	return ret;
+}
+
+static const struct kernel_param_ops param_ops_axis = {
+	.set = param_set_axis,
+	.get = param_get_int,
+};
+
+#define param_check_axis(name, p) param_check_int(name, p)
+
+module_param_array_named(axes, lis3_dev.ac.as_array, axis, NULL, 0644);
+MODULE_PARM_DESC(axes, "Axis-mapping for x,y,z directions");
+
+static s16 lis3lv02d_read_8(struct lis3lv02d *lis3, int reg)
+{
+	s8 lo;
+	if (lis3->read(lis3, reg, &lo) < 0)
+		return 0;
+
+	return lo;
+}
+
+static s16 lis3lv02d_read_12(struct lis3lv02d *lis3, int reg)
+{
+	u8 lo, hi;
+
+	lis3->read(lis3, reg - 1, &lo);
+	lis3->read(lis3, reg, &hi);
+	/* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */
+	return (s16)((hi << 8) | lo);
+}
+
+/* 12bits for 2G range, 13 bits for 4G range and 14 bits for 8G range */
+static s16 lis331dlh_read_data(struct lis3lv02d *lis3, int reg)
+{
+	u8 lo, hi;
+	int v;
+
+	lis3->read(lis3, reg - 1, &lo);
+	lis3->read(lis3, reg, &hi);
+	v = (int) ((hi << 8) | lo);
+
+	return (s16) v >> lis3->shift_adj;
+}
+
+/**
+ * lis3lv02d_get_axis - For the given axis, give the value converted
+ * @axis:      1,2,3 - can also be negative
+ * @hw_values: raw values returned by the hardware
+ *
+ * Returns the converted value.
+ */
+static inline int lis3lv02d_get_axis(s8 axis, int hw_values[3])
+{
+	if (axis > 0)
+		return hw_values[axis - 1];
+	else
+		return -hw_values[-axis - 1];
+}
+
+/**
+ * lis3lv02d_get_xyz - Get X, Y and Z axis values from the accelerometer
+ * @lis3: pointer to the device struct
+ * @x:    where to store the X axis value
+ * @y:    where to store the Y axis value
+ * @z:    where to store the Z axis value
+ *
+ * Note that 40Hz input device can eat up about 10% CPU at 800MHZ
+ */
+static void lis3lv02d_get_xyz(struct lis3lv02d *lis3, int *x, int *y, int *z)
+{
+	int position[3];
+	int i;
+
+	if (lis3->blkread) {
+		if (lis3->whoami == WAI_12B) {
+			u16 data[3];
+			lis3->blkread(lis3, OUTX_L, 6, (u8 *)data);
+			for (i = 0; i < 3; i++)
+				position[i] = (s16)le16_to_cpu(data[i]);
+		} else {
+			u8 data[5];
+			/* Data: x, dummy, y, dummy, z */
+			lis3->blkread(lis3, OUTX, 5, data);
+			for (i = 0; i < 3; i++)
+				position[i] = (s8)data[i * 2];
+		}
+	} else {
+		position[0] = lis3->read_data(lis3, OUTX);
+		position[1] = lis3->read_data(lis3, OUTY);
+		position[2] = lis3->read_data(lis3, OUTZ);
+	}
+
+	for (i = 0; i < 3; i++)
+		position[i] = (position[i] * lis3->scale) / LIS3_ACCURACY;
+
+	*x = lis3lv02d_get_axis(lis3->ac.x, position);
+	*y = lis3lv02d_get_axis(lis3->ac.y, position);
+	*z = lis3lv02d_get_axis(lis3->ac.z, position);
+}
+
+/* conversion btw sampling rate and the register values */
+static int lis3_12_rates[4] = {40, 160, 640, 2560};
+static int lis3_8_rates[2] = {100, 400};
+static int lis3_3dc_rates[16] = {0, 1, 10, 25, 50, 100, 200, 400, 1600, 5000};
+static int lis3_3dlh_rates[4] = {50, 100, 400, 1000};
+
+/* ODR is Output Data Rate */
+static int lis3lv02d_get_odr_index(struct lis3lv02d *lis3)
+{
+	u8 ctrl;
+	int shift;
+
+	lis3->read(lis3, CTRL_REG1, &ctrl);
+	ctrl &= lis3->odr_mask;
+	shift = ffs(lis3->odr_mask) - 1;
+	return (ctrl >> shift);
+}
+
+static int lis3lv02d_get_pwron_wait(struct lis3lv02d *lis3)
+{
+	int odr_idx = lis3lv02d_get_odr_index(lis3);
+	int div = lis3->odrs[odr_idx];
+
+	if (div == 0) {
+		if (odr_idx == 0) {
+			/* Power-down mode, not sampling no need to sleep */
+			return 0;
+		}
+
+		dev_err(&lis3->pdev->dev, "Error unknown odrs-index: %d\n", odr_idx);
+		return -ENXIO;
+	}
+
+	/* LIS3 power on delay is quite long */
+	msleep(lis3->pwron_delay / div);
+	return 0;
+}
+
+static int lis3lv02d_set_odr(struct lis3lv02d *lis3, int rate)
+{
+	u8 ctrl;
+	int i, len, shift;
+
+	if (!rate)
+		return -EINVAL;
+
+	lis3->read(lis3, CTRL_REG1, &ctrl);
+	ctrl &= ~lis3->odr_mask;
+	len = 1 << hweight_long(lis3->odr_mask); /* # of possible values */
+	shift = ffs(lis3->odr_mask) - 1;
+
+	for (i = 0; i < len; i++)
+		if (lis3->odrs[i] == rate) {
+			lis3->write(lis3, CTRL_REG1,
+					ctrl | (i << shift));
+			return 0;
+		}
+	return -EINVAL;
+}
+
+static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3])
+{
+	u8 ctlreg, reg;
+	s16 x, y, z;
+	u8 selftest;
+	int ret;
+	u8 ctrl_reg_data;
+	unsigned char irq_cfg;
+
+	mutex_lock(&lis3->mutex);
+
+	irq_cfg = lis3->irq_cfg;
+	if (lis3->whoami == WAI_8B) {
+		lis3->data_ready_count[IRQ_LINE0] = 0;
+		lis3->data_ready_count[IRQ_LINE1] = 0;
+
+		/* Change interrupt cfg to data ready for selftest */
+		atomic_inc(&lis3->wake_thread);
+		lis3->irq_cfg = LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY;
+		lis3->read(lis3, CTRL_REG3, &ctrl_reg_data);
+		lis3->write(lis3, CTRL_REG3, (ctrl_reg_data &
+				~(LIS3_IRQ1_MASK | LIS3_IRQ2_MASK)) |
+				(LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY));
+	}
+
+	if ((lis3->whoami == WAI_3DC) || (lis3->whoami == WAI_3DLH)) {
+		ctlreg = CTRL_REG4;
+		selftest = CTRL4_ST0;
+	} else {
+		ctlreg = CTRL_REG1;
+		if (lis3->whoami == WAI_12B)
+			selftest = CTRL1_ST;
+		else
+			selftest = CTRL1_STP;
+	}
+
+	lis3->read(lis3, ctlreg, &reg);
+	lis3->write(lis3, ctlreg, (reg | selftest));
+	ret = lis3lv02d_get_pwron_wait(lis3);
+	if (ret)
+		goto fail;
+
+	/* Read directly to avoid axis remap */
+	x = lis3->read_data(lis3, OUTX);
+	y = lis3->read_data(lis3, OUTY);
+	z = lis3->read_data(lis3, OUTZ);
+
+	/* back to normal settings */
+	lis3->write(lis3, ctlreg, reg);
+	ret = lis3lv02d_get_pwron_wait(lis3);
+	if (ret)
+		goto fail;
+
+	results[0] = x - lis3->read_data(lis3, OUTX);
+	results[1] = y - lis3->read_data(lis3, OUTY);
+	results[2] = z - lis3->read_data(lis3, OUTZ);
+
+	ret = 0;
+
+	if (lis3->whoami == WAI_8B) {
+		/* Restore original interrupt configuration */
+		atomic_dec(&lis3->wake_thread);
+		lis3->write(lis3, CTRL_REG3, ctrl_reg_data);
+		lis3->irq_cfg = irq_cfg;
+
+		if ((irq_cfg & LIS3_IRQ1_MASK) &&
+			lis3->data_ready_count[IRQ_LINE0] < 2) {
+			ret = SELFTEST_IRQ;
+			goto fail;
+		}
+
+		if ((irq_cfg & LIS3_IRQ2_MASK) &&
+			lis3->data_ready_count[IRQ_LINE1] < 2) {
+			ret = SELFTEST_IRQ;
+			goto fail;
+		}
+	}
+
+	if (lis3->pdata) {
+		int i;
+		for (i = 0; i < 3; i++) {
+			/* Check against selftest acceptance limits */
+			if ((results[i] < lis3->pdata->st_min_limits[i]) ||
+			    (results[i] > lis3->pdata->st_max_limits[i])) {
+				ret = SELFTEST_FAIL;
+				goto fail;
+			}
+		}
+	}
+
+	/* test passed */
+fail:
+	mutex_unlock(&lis3->mutex);
+	return ret;
+}
+
+/*
+ * Order of registers in the list affects to order of the restore process.
+ * Perhaps it is a good idea to set interrupt enable register as a last one
+ * after all other configurations
+ */
+static u8 lis3_wai8_regs[] = { FF_WU_CFG_1, FF_WU_THS_1, FF_WU_DURATION_1,
+			       FF_WU_CFG_2, FF_WU_THS_2, FF_WU_DURATION_2,
+			       CLICK_CFG, CLICK_SRC, CLICK_THSY_X, CLICK_THSZ,
+			       CLICK_TIMELIMIT, CLICK_LATENCY, CLICK_WINDOW,
+			       CTRL_REG1, CTRL_REG2, CTRL_REG3};
+
+static u8 lis3_wai12_regs[] = {FF_WU_CFG, FF_WU_THS_L, FF_WU_THS_H,
+			       FF_WU_DURATION, DD_CFG, DD_THSI_L, DD_THSI_H,
+			       DD_THSE_L, DD_THSE_H,
+			       CTRL_REG1, CTRL_REG3, CTRL_REG2};
+
+static inline void lis3_context_save(struct lis3lv02d *lis3)
+{
+	int i;
+	for (i = 0; i < lis3->regs_size; i++)
+		lis3->read(lis3, lis3->regs[i], &lis3->reg_cache[i]);
+	lis3->regs_stored = true;
+}
+
+static inline void lis3_context_restore(struct lis3lv02d *lis3)
+{
+	int i;
+	if (lis3->regs_stored)
+		for (i = 0; i < lis3->regs_size; i++)
+			lis3->write(lis3, lis3->regs[i], lis3->reg_cache[i]);
+}
+
+void lis3lv02d_poweroff(struct lis3lv02d *lis3)
+{
+	if (lis3->reg_ctrl)
+		lis3_context_save(lis3);
+	/* disable X,Y,Z axis and power down */
+	lis3->write(lis3, CTRL_REG1, 0x00);
+	if (lis3->reg_ctrl)
+		lis3->reg_ctrl(lis3, LIS3_REG_OFF);
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_poweroff);
+
+int lis3lv02d_poweron(struct lis3lv02d *lis3)
+{
+	int err;
+	u8 reg;
+
+	lis3->init(lis3);
+
+	/*
+	 * Common configuration
+	 * BDU: (12 bits sensors only) LSB and MSB values are not updated until
+	 *      both have been read. So the value read will always be correct.
+	 * Set BOOT bit to refresh factory tuning values.
+	 */
+	if (lis3->pdata) {
+		lis3->read(lis3, CTRL_REG2, &reg);
+		if (lis3->whoami ==  WAI_12B)
+			reg |= CTRL2_BDU | CTRL2_BOOT;
+		else if (lis3->whoami ==  WAI_3DLH)
+			reg |= CTRL2_BOOT_3DLH;
+		else
+			reg |= CTRL2_BOOT_8B;
+		lis3->write(lis3, CTRL_REG2, reg);
+
+		if (lis3->whoami ==  WAI_3DLH) {
+			lis3->read(lis3, CTRL_REG4, &reg);
+			reg |= CTRL4_BDU;
+			lis3->write(lis3, CTRL_REG4, reg);
+		}
+	}
+
+	err = lis3lv02d_get_pwron_wait(lis3);
+	if (err)
+		return err;
+
+	if (lis3->reg_ctrl)
+		lis3_context_restore(lis3);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_poweron);
+
+
+static void lis3lv02d_joystick_poll(struct input_polled_dev *pidev)
+{
+	struct lis3lv02d *lis3 = pidev->private;
+	int x, y, z;
+
+	mutex_lock(&lis3->mutex);
+	lis3lv02d_get_xyz(lis3, &x, &y, &z);
+	input_report_abs(pidev->input, ABS_X, x);
+	input_report_abs(pidev->input, ABS_Y, y);
+	input_report_abs(pidev->input, ABS_Z, z);
+	input_sync(pidev->input);
+	mutex_unlock(&lis3->mutex);
+}
+
+static void lis3lv02d_joystick_open(struct input_polled_dev *pidev)
+{
+	struct lis3lv02d *lis3 = pidev->private;
+
+	if (lis3->pm_dev)
+		pm_runtime_get_sync(lis3->pm_dev);
+
+	if (lis3->pdata && lis3->whoami == WAI_8B && lis3->idev)
+		atomic_set(&lis3->wake_thread, 1);
+	/*
+	 * Update coordinates for the case where poll interval is 0 and
+	 * the chip in running purely under interrupt control
+	 */
+	lis3lv02d_joystick_poll(pidev);
+}
+
+static void lis3lv02d_joystick_close(struct input_polled_dev *pidev)
+{
+	struct lis3lv02d *lis3 = pidev->private;
+
+	atomic_set(&lis3->wake_thread, 0);
+	if (lis3->pm_dev)
+		pm_runtime_put(lis3->pm_dev);
+}
+
+static irqreturn_t lis302dl_interrupt(int irq, void *data)
+{
+	struct lis3lv02d *lis3 = data;
+
+	if (!test_bit(0, &lis3->misc_opened))
+		goto out;
+
+	/*
+	 * Be careful: on some HP laptops the bios force DD when on battery and
+	 * the lid is closed. This leads to interrupts as soon as a little move
+	 * is done.
+	 */
+	atomic_inc(&lis3->count);
+
+	wake_up_interruptible(&lis3->misc_wait);
+	kill_fasync(&lis3->async_queue, SIGIO, POLL_IN);
+out:
+	if (atomic_read(&lis3->wake_thread))
+		return IRQ_WAKE_THREAD;
+	return IRQ_HANDLED;
+}
+
+static void lis302dl_interrupt_handle_click(struct lis3lv02d *lis3)
+{
+	struct input_dev *dev = lis3->idev->input;
+	u8 click_src;
+
+	mutex_lock(&lis3->mutex);
+	lis3->read(lis3, CLICK_SRC, &click_src);
+
+	if (click_src & CLICK_SINGLE_X) {
+		input_report_key(dev, lis3->mapped_btns[0], 1);
+		input_report_key(dev, lis3->mapped_btns[0], 0);
+	}
+
+	if (click_src & CLICK_SINGLE_Y) {
+		input_report_key(dev, lis3->mapped_btns[1], 1);
+		input_report_key(dev, lis3->mapped_btns[1], 0);
+	}
+
+	if (click_src & CLICK_SINGLE_Z) {
+		input_report_key(dev, lis3->mapped_btns[2], 1);
+		input_report_key(dev, lis3->mapped_btns[2], 0);
+	}
+	input_sync(dev);
+	mutex_unlock(&lis3->mutex);
+}
+
+static inline void lis302dl_data_ready(struct lis3lv02d *lis3, int index)
+{
+	int dummy;
+
+	/* Dummy read to ack interrupt */
+	lis3lv02d_get_xyz(lis3, &dummy, &dummy, &dummy);
+	lis3->data_ready_count[index]++;
+}
+
+static irqreturn_t lis302dl_interrupt_thread1_8b(int irq, void *data)
+{
+	struct lis3lv02d *lis3 = data;
+	u8 irq_cfg = lis3->irq_cfg & LIS3_IRQ1_MASK;
+
+	if (irq_cfg == LIS3_IRQ1_CLICK)
+		lis302dl_interrupt_handle_click(lis3);
+	else if (unlikely(irq_cfg == LIS3_IRQ1_DATA_READY))
+		lis302dl_data_ready(lis3, IRQ_LINE0);
+	else
+		lis3lv02d_joystick_poll(lis3->idev);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t lis302dl_interrupt_thread2_8b(int irq, void *data)
+{
+	struct lis3lv02d *lis3 = data;
+	u8 irq_cfg = lis3->irq_cfg & LIS3_IRQ2_MASK;
+
+	if (irq_cfg == LIS3_IRQ2_CLICK)
+		lis302dl_interrupt_handle_click(lis3);
+	else if (unlikely(irq_cfg == LIS3_IRQ2_DATA_READY))
+		lis302dl_data_ready(lis3, IRQ_LINE1);
+	else
+		lis3lv02d_joystick_poll(lis3->idev);
+
+	return IRQ_HANDLED;
+}
+
+static int lis3lv02d_misc_open(struct inode *inode, struct file *file)
+{
+	struct lis3lv02d *lis3 = container_of(file->private_data,
+					      struct lis3lv02d, miscdev);
+
+	if (test_and_set_bit(0, &lis3->misc_opened))
+		return -EBUSY; /* already open */
+
+	if (lis3->pm_dev)
+		pm_runtime_get_sync(lis3->pm_dev);
+
+	atomic_set(&lis3->count, 0);
+	return 0;
+}
+
+static int lis3lv02d_misc_release(struct inode *inode, struct file *file)
+{
+	struct lis3lv02d *lis3 = container_of(file->private_data,
+					      struct lis3lv02d, miscdev);
+
+	clear_bit(0, &lis3->misc_opened); /* release the device */
+	if (lis3->pm_dev)
+		pm_runtime_put(lis3->pm_dev);
+	return 0;
+}
+
+static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf,
+				size_t count, loff_t *pos)
+{
+	struct lis3lv02d *lis3 = container_of(file->private_data,
+					      struct lis3lv02d, miscdev);
+
+	DECLARE_WAITQUEUE(wait, current);
+	u32 data;
+	unsigned char byte_data;
+	ssize_t retval = 1;
+
+	if (count < 1)
+		return -EINVAL;
+
+	add_wait_queue(&lis3->misc_wait, &wait);
+	while (true) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		data = atomic_xchg(&lis3->count, 0);
+		if (data)
+			break;
+
+		if (file->f_flags & O_NONBLOCK) {
+			retval = -EAGAIN;
+			goto out;
+		}
+
+		if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			goto out;
+		}
+
+		schedule();
+	}
+
+	if (data < 255)
+		byte_data = data;
+	else
+		byte_data = 255;
+
+	/* make sure we are not going into copy_to_user() with
+	 * TASK_INTERRUPTIBLE state */
+	set_current_state(TASK_RUNNING);
+	if (copy_to_user(buf, &byte_data, sizeof(byte_data)))
+		retval = -EFAULT;
+
+out:
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&lis3->misc_wait, &wait);
+
+	return retval;
+}
+
+static __poll_t lis3lv02d_misc_poll(struct file *file, poll_table *wait)
+{
+	struct lis3lv02d *lis3 = container_of(file->private_data,
+					      struct lis3lv02d, miscdev);
+
+	poll_wait(file, &lis3->misc_wait, wait);
+	if (atomic_read(&lis3->count))
+		return EPOLLIN | EPOLLRDNORM;
+	return 0;
+}
+
+static int lis3lv02d_misc_fasync(int fd, struct file *file, int on)
+{
+	struct lis3lv02d *lis3 = container_of(file->private_data,
+					      struct lis3lv02d, miscdev);
+
+	return fasync_helper(fd, file, on, &lis3->async_queue);
+}
+
+static const struct file_operations lis3lv02d_misc_fops = {
+	.owner   = THIS_MODULE,
+	.llseek  = no_llseek,
+	.read    = lis3lv02d_misc_read,
+	.open    = lis3lv02d_misc_open,
+	.release = lis3lv02d_misc_release,
+	.poll    = lis3lv02d_misc_poll,
+	.fasync  = lis3lv02d_misc_fasync,
+};
+
+int lis3lv02d_joystick_enable(struct lis3lv02d *lis3)
+{
+	struct input_dev *input_dev;
+	int err;
+	int max_val, fuzz, flat;
+	int btns[] = {BTN_X, BTN_Y, BTN_Z};
+
+	if (lis3->idev)
+		return -EINVAL;
+
+	lis3->idev = input_allocate_polled_device();
+	if (!lis3->idev)
+		return -ENOMEM;
+
+	lis3->idev->poll = lis3lv02d_joystick_poll;
+	lis3->idev->open = lis3lv02d_joystick_open;
+	lis3->idev->close = lis3lv02d_joystick_close;
+	lis3->idev->poll_interval = MDPS_POLL_INTERVAL;
+	lis3->idev->poll_interval_min = MDPS_POLL_MIN;
+	lis3->idev->poll_interval_max = MDPS_POLL_MAX;
+	lis3->idev->private = lis3;
+	input_dev = lis3->idev->input;
+
+	input_dev->name       = "ST LIS3LV02DL Accelerometer";
+	input_dev->phys       = DRIVER_NAME "/input0";
+	input_dev->id.bustype = BUS_HOST;
+	input_dev->id.vendor  = 0;
+	input_dev->dev.parent = &lis3->pdev->dev;
+
+	set_bit(EV_ABS, input_dev->evbit);
+	max_val = (lis3->mdps_max_val * lis3->scale) / LIS3_ACCURACY;
+	if (lis3->whoami == WAI_12B) {
+		fuzz = LIS3_DEFAULT_FUZZ_12B;
+		flat = LIS3_DEFAULT_FLAT_12B;
+	} else {
+		fuzz = LIS3_DEFAULT_FUZZ_8B;
+		flat = LIS3_DEFAULT_FLAT_8B;
+	}
+	fuzz = (fuzz * lis3->scale) / LIS3_ACCURACY;
+	flat = (flat * lis3->scale) / LIS3_ACCURACY;
+
+	input_set_abs_params(input_dev, ABS_X, -max_val, max_val, fuzz, flat);
+	input_set_abs_params(input_dev, ABS_Y, -max_val, max_val, fuzz, flat);
+	input_set_abs_params(input_dev, ABS_Z, -max_val, max_val, fuzz, flat);
+
+	lis3->mapped_btns[0] = lis3lv02d_get_axis(abs(lis3->ac.x), btns);
+	lis3->mapped_btns[1] = lis3lv02d_get_axis(abs(lis3->ac.y), btns);
+	lis3->mapped_btns[2] = lis3lv02d_get_axis(abs(lis3->ac.z), btns);
+
+	err = input_register_polled_device(lis3->idev);
+	if (err) {
+		input_free_polled_device(lis3->idev);
+		lis3->idev = NULL;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable);
+
+void lis3lv02d_joystick_disable(struct lis3lv02d *lis3)
+{
+	if (lis3->irq)
+		free_irq(lis3->irq, lis3);
+	if (lis3->pdata && lis3->pdata->irq2)
+		free_irq(lis3->pdata->irq2, lis3);
+
+	if (!lis3->idev)
+		return;
+
+	if (lis3->irq)
+		misc_deregister(&lis3->miscdev);
+	input_unregister_polled_device(lis3->idev);
+	input_free_polled_device(lis3->idev);
+	lis3->idev = NULL;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable);
+
+/* Sysfs stuff */
+static void lis3lv02d_sysfs_poweron(struct lis3lv02d *lis3)
+{
+	/*
+	 * SYSFS functions are fast visitors so put-call
+	 * immediately after the get-call. However, keep
+	 * chip running for a while and schedule delayed
+	 * suspend. This way periodic sysfs calls doesn't
+	 * suffer from relatively long power up time.
+	 */
+
+	if (lis3->pm_dev) {
+		pm_runtime_get_sync(lis3->pm_dev);
+		pm_runtime_put_noidle(lis3->pm_dev);
+		pm_schedule_suspend(lis3->pm_dev, LIS3_SYSFS_POWERDOWN_DELAY);
+	}
+}
+
+static ssize_t lis3lv02d_selftest_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+	s16 values[3];
+
+	static const char ok[] = "OK";
+	static const char fail[] = "FAIL";
+	static const char irq[] = "FAIL_IRQ";
+	const char *res;
+
+	lis3lv02d_sysfs_poweron(lis3);
+	switch (lis3lv02d_selftest(lis3, values)) {
+	case SELFTEST_FAIL:
+		res = fail;
+		break;
+	case SELFTEST_IRQ:
+		res = irq;
+		break;
+	case SELFTEST_OK:
+	default:
+		res = ok;
+		break;
+	}
+	return sprintf(buf, "%s %d %d %d\n", res,
+		values[0], values[1], values[2]);
+}
+
+static ssize_t lis3lv02d_position_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+	int x, y, z;
+
+	lis3lv02d_sysfs_poweron(lis3);
+	mutex_lock(&lis3->mutex);
+	lis3lv02d_get_xyz(lis3, &x, &y, &z);
+	mutex_unlock(&lis3->mutex);
+	return sprintf(buf, "(%d,%d,%d)\n", x, y, z);
+}
+
+static ssize_t lis3lv02d_rate_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+	int odr_idx;
+
+	lis3lv02d_sysfs_poweron(lis3);
+
+	odr_idx = lis3lv02d_get_odr_index(lis3);
+	return sprintf(buf, "%d\n", lis3->odrs[odr_idx]);
+}
+
+static ssize_t lis3lv02d_rate_set(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+	unsigned long rate;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &rate);
+	if (ret)
+		return ret;
+
+	lis3lv02d_sysfs_poweron(lis3);
+	if (lis3lv02d_set_odr(lis3, rate))
+		return -EINVAL;
+
+	return count;
+}
+
+static DEVICE_ATTR(selftest, S_IRUSR, lis3lv02d_selftest_show, NULL);
+static DEVICE_ATTR(position, S_IRUGO, lis3lv02d_position_show, NULL);
+static DEVICE_ATTR(rate, S_IRUGO | S_IWUSR, lis3lv02d_rate_show,
+					    lis3lv02d_rate_set);
+
+static struct attribute *lis3lv02d_attributes[] = {
+	&dev_attr_selftest.attr,
+	&dev_attr_position.attr,
+	&dev_attr_rate.attr,
+	NULL
+};
+
+static const struct attribute_group lis3lv02d_attribute_group = {
+	.attrs = lis3lv02d_attributes
+};
+
+
+static int lis3lv02d_add_fs(struct lis3lv02d *lis3)
+{
+	lis3->pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0);
+	if (IS_ERR(lis3->pdev))
+		return PTR_ERR(lis3->pdev);
+
+	platform_set_drvdata(lis3->pdev, lis3);
+	return sysfs_create_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group);
+}
+
+int lis3lv02d_remove_fs(struct lis3lv02d *lis3)
+{
+	sysfs_remove_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group);
+	platform_device_unregister(lis3->pdev);
+	if (lis3->pm_dev) {
+		/* Barrier after the sysfs remove */
+		pm_runtime_barrier(lis3->pm_dev);
+
+		/* SYSFS may have left chip running. Turn off if necessary */
+		if (!pm_runtime_suspended(lis3->pm_dev))
+			lis3lv02d_poweroff(lis3);
+
+		pm_runtime_disable(lis3->pm_dev);
+		pm_runtime_set_suspended(lis3->pm_dev);
+	}
+	kfree(lis3->reg_cache);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_remove_fs);
+
+static void lis3lv02d_8b_configure(struct lis3lv02d *lis3,
+				struct lis3lv02d_platform_data *p)
+{
+	int err;
+	int ctrl2 = p->hipass_ctrl;
+
+	if (p->click_flags) {
+		lis3->write(lis3, CLICK_CFG, p->click_flags);
+		lis3->write(lis3, CLICK_TIMELIMIT, p->click_time_limit);
+		lis3->write(lis3, CLICK_LATENCY, p->click_latency);
+		lis3->write(lis3, CLICK_WINDOW, p->click_window);
+		lis3->write(lis3, CLICK_THSZ, p->click_thresh_z & 0xf);
+		lis3->write(lis3, CLICK_THSY_X,
+			(p->click_thresh_x & 0xf) |
+			(p->click_thresh_y << 4));
+
+		if (lis3->idev) {
+			struct input_dev *input_dev = lis3->idev->input;
+			input_set_capability(input_dev, EV_KEY, BTN_X);
+			input_set_capability(input_dev, EV_KEY, BTN_Y);
+			input_set_capability(input_dev, EV_KEY, BTN_Z);
+		}
+	}
+
+	if (p->wakeup_flags) {
+		lis3->write(lis3, FF_WU_CFG_1, p->wakeup_flags);
+		lis3->write(lis3, FF_WU_THS_1, p->wakeup_thresh & 0x7f);
+		/* pdata value + 1 to keep this backward compatible*/
+		lis3->write(lis3, FF_WU_DURATION_1, p->duration1 + 1);
+		ctrl2 ^= HP_FF_WU1; /* Xor to keep compatible with old pdata*/
+	}
+
+	if (p->wakeup_flags2) {
+		lis3->write(lis3, FF_WU_CFG_2, p->wakeup_flags2);
+		lis3->write(lis3, FF_WU_THS_2, p->wakeup_thresh2 & 0x7f);
+		/* pdata value + 1 to keep this backward compatible*/
+		lis3->write(lis3, FF_WU_DURATION_2, p->duration2 + 1);
+		ctrl2 ^= HP_FF_WU2; /* Xor to keep compatible with old pdata*/
+	}
+	/* Configure hipass filters */
+	lis3->write(lis3, CTRL_REG2, ctrl2);
+
+	if (p->irq2) {
+		err = request_threaded_irq(p->irq2,
+					NULL,
+					lis302dl_interrupt_thread2_8b,
+					IRQF_TRIGGER_RISING | IRQF_ONESHOT |
+					(p->irq_flags2 & IRQF_TRIGGER_MASK),
+					DRIVER_NAME, lis3);
+		if (err < 0)
+			pr_err("No second IRQ. Limited functionality\n");
+	}
+}
+
+#ifdef CONFIG_OF
+int lis3lv02d_init_dt(struct lis3lv02d *lis3)
+{
+	struct lis3lv02d_platform_data *pdata;
+	struct device_node *np = lis3->of_node;
+	u32 val;
+	s32 sval;
+
+	if (!lis3->of_node)
+		return 0;
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	if (of_get_property(np, "st,click-single-x", NULL))
+		pdata->click_flags |= LIS3_CLICK_SINGLE_X;
+	if (of_get_property(np, "st,click-double-x", NULL))
+		pdata->click_flags |= LIS3_CLICK_DOUBLE_X;
+
+	if (of_get_property(np, "st,click-single-y", NULL))
+		pdata->click_flags |= LIS3_CLICK_SINGLE_Y;
+	if (of_get_property(np, "st,click-double-y", NULL))
+		pdata->click_flags |= LIS3_CLICK_DOUBLE_Y;
+
+	if (of_get_property(np, "st,click-single-z", NULL))
+		pdata->click_flags |= LIS3_CLICK_SINGLE_Z;
+	if (of_get_property(np, "st,click-double-z", NULL))
+		pdata->click_flags |= LIS3_CLICK_DOUBLE_Z;
+
+	if (!of_property_read_u32(np, "st,click-threshold-x", &val))
+		pdata->click_thresh_x = val;
+	if (!of_property_read_u32(np, "st,click-threshold-y", &val))
+		pdata->click_thresh_y = val;
+	if (!of_property_read_u32(np, "st,click-threshold-z", &val))
+		pdata->click_thresh_z = val;
+
+	if (!of_property_read_u32(np, "st,click-time-limit", &val))
+		pdata->click_time_limit = val;
+	if (!of_property_read_u32(np, "st,click-latency", &val))
+		pdata->click_latency = val;
+	if (!of_property_read_u32(np, "st,click-window", &val))
+		pdata->click_window = val;
+
+	if (of_get_property(np, "st,irq1-disable", NULL))
+		pdata->irq_cfg |= LIS3_IRQ1_DISABLE;
+	if (of_get_property(np, "st,irq1-ff-wu-1", NULL))
+		pdata->irq_cfg |= LIS3_IRQ1_FF_WU_1;
+	if (of_get_property(np, "st,irq1-ff-wu-2", NULL))
+		pdata->irq_cfg |= LIS3_IRQ1_FF_WU_2;
+	if (of_get_property(np, "st,irq1-data-ready", NULL))
+		pdata->irq_cfg |= LIS3_IRQ1_DATA_READY;
+	if (of_get_property(np, "st,irq1-click", NULL))
+		pdata->irq_cfg |= LIS3_IRQ1_CLICK;
+
+	if (of_get_property(np, "st,irq2-disable", NULL))
+		pdata->irq_cfg |= LIS3_IRQ2_DISABLE;
+	if (of_get_property(np, "st,irq2-ff-wu-1", NULL))
+		pdata->irq_cfg |= LIS3_IRQ2_FF_WU_1;
+	if (of_get_property(np, "st,irq2-ff-wu-2", NULL))
+		pdata->irq_cfg |= LIS3_IRQ2_FF_WU_2;
+	if (of_get_property(np, "st,irq2-data-ready", NULL))
+		pdata->irq_cfg |= LIS3_IRQ2_DATA_READY;
+	if (of_get_property(np, "st,irq2-click", NULL))
+		pdata->irq_cfg |= LIS3_IRQ2_CLICK;
+
+	if (of_get_property(np, "st,irq-open-drain", NULL))
+		pdata->irq_cfg |= LIS3_IRQ_OPEN_DRAIN;
+	if (of_get_property(np, "st,irq-active-low", NULL))
+		pdata->irq_cfg |= LIS3_IRQ_ACTIVE_LOW;
+
+	if (!of_property_read_u32(np, "st,wu-duration-1", &val))
+		pdata->duration1 = val;
+	if (!of_property_read_u32(np, "st,wu-duration-2", &val))
+		pdata->duration2 = val;
+
+	if (of_get_property(np, "st,wakeup-x-lo", NULL))
+		pdata->wakeup_flags |= LIS3_WAKEUP_X_LO;
+	if (of_get_property(np, "st,wakeup-x-hi", NULL))
+		pdata->wakeup_flags |= LIS3_WAKEUP_X_HI;
+	if (of_get_property(np, "st,wakeup-y-lo", NULL))
+		pdata->wakeup_flags |= LIS3_WAKEUP_Y_LO;
+	if (of_get_property(np, "st,wakeup-y-hi", NULL))
+		pdata->wakeup_flags |= LIS3_WAKEUP_Y_HI;
+	if (of_get_property(np, "st,wakeup-z-lo", NULL))
+		pdata->wakeup_flags |= LIS3_WAKEUP_Z_LO;
+	if (of_get_property(np, "st,wakeup-z-hi", NULL))
+		pdata->wakeup_flags |= LIS3_WAKEUP_Z_HI;
+	if (of_get_property(np, "st,wakeup-threshold", &val))
+		pdata->wakeup_thresh = val;
+
+	if (of_get_property(np, "st,wakeup2-x-lo", NULL))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_X_LO;
+	if (of_get_property(np, "st,wakeup2-x-hi", NULL))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_X_HI;
+	if (of_get_property(np, "st,wakeup2-y-lo", NULL))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_Y_LO;
+	if (of_get_property(np, "st,wakeup2-y-hi", NULL))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_Y_HI;
+	if (of_get_property(np, "st,wakeup2-z-lo", NULL))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_Z_LO;
+	if (of_get_property(np, "st,wakeup2-z-hi", NULL))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_Z_HI;
+	if (of_get_property(np, "st,wakeup2-threshold", &val))
+		pdata->wakeup_thresh2 = val;
+
+	if (!of_property_read_u32(np, "st,highpass-cutoff-hz", &val)) {
+		switch (val) {
+		case 1:
+			pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_1HZ;
+			break;
+		case 2:
+			pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_2HZ;
+			break;
+		case 4:
+			pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_4HZ;
+			break;
+		case 8:
+			pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_8HZ;
+			break;
+		}
+	}
+
+	if (of_get_property(np, "st,hipass1-disable", NULL))
+		pdata->hipass_ctrl |= LIS3_HIPASS1_DISABLE;
+	if (of_get_property(np, "st,hipass2-disable", NULL))
+		pdata->hipass_ctrl |= LIS3_HIPASS2_DISABLE;
+
+	if (of_property_read_s32(np, "st,axis-x", &sval) == 0)
+		pdata->axis_x = sval;
+	if (of_property_read_s32(np, "st,axis-y", &sval) == 0)
+		pdata->axis_y = sval;
+	if (of_property_read_s32(np, "st,axis-z", &sval) == 0)
+		pdata->axis_z = sval;
+
+	if (of_get_property(np, "st,default-rate", NULL))
+		pdata->default_rate = val;
+
+	if (of_property_read_s32(np, "st,min-limit-x", &sval) == 0)
+		pdata->st_min_limits[0] = sval;
+	if (of_property_read_s32(np, "st,min-limit-y", &sval) == 0)
+		pdata->st_min_limits[1] = sval;
+	if (of_property_read_s32(np, "st,min-limit-z", &sval) == 0)
+		pdata->st_min_limits[2] = sval;
+
+	if (of_property_read_s32(np, "st,max-limit-x", &sval) == 0)
+		pdata->st_max_limits[0] = sval;
+	if (of_property_read_s32(np, "st,max-limit-y", &sval) == 0)
+		pdata->st_max_limits[1] = sval;
+	if (of_property_read_s32(np, "st,max-limit-z", &sval) == 0)
+		pdata->st_max_limits[2] = sval;
+
+
+	lis3->pdata = pdata;
+
+	return 0;
+}
+
+#else
+int lis3lv02d_init_dt(struct lis3lv02d *lis3)
+{
+	return 0;
+}
+#endif
+EXPORT_SYMBOL_GPL(lis3lv02d_init_dt);
+
+/*
+ * Initialise the accelerometer and the various subsystems.
+ * Should be rather independent of the bus system.
+ */
+int lis3lv02d_init_device(struct lis3lv02d *lis3)
+{
+	int err;
+	irq_handler_t thread_fn;
+	int irq_flags = 0;
+
+	lis3->whoami = lis3lv02d_read_8(lis3, WHO_AM_I);
+
+	switch (lis3->whoami) {
+	case WAI_12B:
+		pr_info("12 bits sensor found\n");
+		lis3->read_data = lis3lv02d_read_12;
+		lis3->mdps_max_val = 2048;
+		lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_12B;
+		lis3->odrs = lis3_12_rates;
+		lis3->odr_mask = CTRL1_DF0 | CTRL1_DF1;
+		lis3->scale = LIS3_SENSITIVITY_12B;
+		lis3->regs = lis3_wai12_regs;
+		lis3->regs_size = ARRAY_SIZE(lis3_wai12_regs);
+		break;
+	case WAI_8B:
+		pr_info("8 bits sensor found\n");
+		lis3->read_data = lis3lv02d_read_8;
+		lis3->mdps_max_val = 128;
+		lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B;
+		lis3->odrs = lis3_8_rates;
+		lis3->odr_mask = CTRL1_DR;
+		lis3->scale = LIS3_SENSITIVITY_8B;
+		lis3->regs = lis3_wai8_regs;
+		lis3->regs_size = ARRAY_SIZE(lis3_wai8_regs);
+		break;
+	case WAI_3DC:
+		pr_info("8 bits 3DC sensor found\n");
+		lis3->read_data = lis3lv02d_read_8;
+		lis3->mdps_max_val = 128;
+		lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B;
+		lis3->odrs = lis3_3dc_rates;
+		lis3->odr_mask = CTRL1_ODR0|CTRL1_ODR1|CTRL1_ODR2|CTRL1_ODR3;
+		lis3->scale = LIS3_SENSITIVITY_8B;
+		break;
+	case WAI_3DLH:
+		pr_info("16 bits lis331dlh sensor found\n");
+		lis3->read_data = lis331dlh_read_data;
+		lis3->mdps_max_val = 2048; /* 12 bits for 2G */
+		lis3->shift_adj = SHIFT_ADJ_2G;
+		lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B;
+		lis3->odrs = lis3_3dlh_rates;
+		lis3->odr_mask = CTRL1_DR0 | CTRL1_DR1;
+		lis3->scale = LIS3DLH_SENSITIVITY_2G;
+		break;
+	default:
+		pr_err("unknown sensor type 0x%X\n", lis3->whoami);
+		return -EINVAL;
+	}
+
+	lis3->reg_cache = kzalloc(max(sizeof(lis3_wai8_regs),
+				     sizeof(lis3_wai12_regs)), GFP_KERNEL);
+
+	if (lis3->reg_cache == NULL) {
+		printk(KERN_ERR DRIVER_NAME "out of memory\n");
+		return -ENOMEM;
+	}
+
+	mutex_init(&lis3->mutex);
+	atomic_set(&lis3->wake_thread, 0);
+
+	lis3lv02d_add_fs(lis3);
+	err = lis3lv02d_poweron(lis3);
+	if (err) {
+		lis3lv02d_remove_fs(lis3);
+		return err;
+	}
+
+	if (lis3->pm_dev) {
+		pm_runtime_set_active(lis3->pm_dev);
+		pm_runtime_enable(lis3->pm_dev);
+	}
+
+	if (lis3lv02d_joystick_enable(lis3))
+		pr_err("joystick initialization failed\n");
+
+	/* passing in platform specific data is purely optional and only
+	 * used by the SPI transport layer at the moment */
+	if (lis3->pdata) {
+		struct lis3lv02d_platform_data *p = lis3->pdata;
+
+		if (lis3->whoami == WAI_8B)
+			lis3lv02d_8b_configure(lis3, p);
+
+		irq_flags = p->irq_flags1 & IRQF_TRIGGER_MASK;
+
+		lis3->irq_cfg = p->irq_cfg;
+		if (p->irq_cfg)
+			lis3->write(lis3, CTRL_REG3, p->irq_cfg);
+
+		if (p->default_rate)
+			lis3lv02d_set_odr(lis3, p->default_rate);
+	}
+
+	/* bail if we did not get an IRQ from the bus layer */
+	if (!lis3->irq) {
+		pr_debug("No IRQ. Disabling /dev/freefall\n");
+		goto out;
+	}
+
+	/*
+	 * The sensor can generate interrupts for free-fall and direction
+	 * detection (distinguishable with FF_WU_SRC and DD_SRC) but to keep
+	 * the things simple and _fast_ we activate it only for free-fall, so
+	 * no need to read register (very slow with ACPI). For the same reason,
+	 * we forbid shared interrupts.
+	 *
+	 * IRQF_TRIGGER_RISING seems pointless on HP laptops because the
+	 * io-apic is not configurable (and generates a warning) but I keep it
+	 * in case of support for other hardware.
+	 */
+	if (lis3->pdata && lis3->whoami == WAI_8B)
+		thread_fn = lis302dl_interrupt_thread1_8b;
+	else
+		thread_fn = NULL;
+
+	err = request_threaded_irq(lis3->irq, lis302dl_interrupt,
+				thread_fn,
+				IRQF_TRIGGER_RISING | IRQF_ONESHOT |
+				irq_flags,
+				DRIVER_NAME, lis3);
+
+	if (err < 0) {
+		pr_err("Cannot get IRQ\n");
+		goto out;
+	}
+
+	lis3->miscdev.minor	= MISC_DYNAMIC_MINOR;
+	lis3->miscdev.name	= "freefall";
+	lis3->miscdev.fops	= &lis3lv02d_misc_fops;
+
+	if (misc_register(&lis3->miscdev))
+		pr_err("misc_register failed\n");
+out:
+	return 0;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_init_device);
+
+MODULE_DESCRIPTION("ST LIS3LV02Dx three-axis digital accelerometer driver");
+MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h
new file mode 100644
index 000000000..0ef759671
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d.h
@@ -0,0 +1,332 @@
+/*
+ *  lis3lv02d.h - ST LIS3LV02DL accelerometer driver
+ *
+ *  Copyright (C) 2007-2008 Yan Burman
+ *  Copyright (C) 2008-2009 Eric Piel
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/platform_device.h>
+#include <linux/input-polldev.h>
+#include <linux/regulator/consumer.h>
+#include <linux/miscdevice.h>
+
+/*
+ * This driver tries to support the "digital" accelerometer chips from
+ * STMicroelectronics such as LIS3LV02DL, LIS302DL, LIS3L02DQ, LIS331DL,
+ * LIS331DLH, LIS35DE, or LIS202DL. They are very similar in terms of
+ * programming, with almost the same registers. In addition to differing
+ * on physical properties, they differ on the number of axes (2/3),
+ * precision (8/12 bits), and special features (freefall detection,
+ * click...). Unfortunately, not all the differences can be probed via
+ * a register. They can be connected either via I²C or SPI.
+ */
+
+#include <linux/lis3lv02d.h>
+
+enum lis3_reg {
+	WHO_AM_I	= 0x0F,
+	OFFSET_X	= 0x16,
+	OFFSET_Y	= 0x17,
+	OFFSET_Z	= 0x18,
+	GAIN_X		= 0x19,
+	GAIN_Y		= 0x1A,
+	GAIN_Z		= 0x1B,
+	CTRL_REG1	= 0x20,
+	CTRL_REG2	= 0x21,
+	CTRL_REG3	= 0x22,
+	CTRL_REG4	= 0x23,
+	HP_FILTER_RESET	= 0x23,
+	STATUS_REG	= 0x27,
+	OUTX_L		= 0x28,
+	OUTX_H		= 0x29,
+	OUTX		= 0x29,
+	OUTY_L		= 0x2A,
+	OUTY_H		= 0x2B,
+	OUTY		= 0x2B,
+	OUTZ_L		= 0x2C,
+	OUTZ_H		= 0x2D,
+	OUTZ		= 0x2D,
+};
+
+enum lis302d_reg {
+	FF_WU_CFG_1	= 0x30,
+	FF_WU_SRC_1	= 0x31,
+	FF_WU_THS_1	= 0x32,
+	FF_WU_DURATION_1 = 0x33,
+	FF_WU_CFG_2	= 0x34,
+	FF_WU_SRC_2	= 0x35,
+	FF_WU_THS_2	= 0x36,
+	FF_WU_DURATION_2 = 0x37,
+	CLICK_CFG	= 0x38,
+	CLICK_SRC	= 0x39,
+	CLICK_THSY_X	= 0x3B,
+	CLICK_THSZ	= 0x3C,
+	CLICK_TIMELIMIT	= 0x3D,
+	CLICK_LATENCY	= 0x3E,
+	CLICK_WINDOW	= 0x3F,
+};
+
+enum lis3lv02d_reg {
+	FF_WU_CFG	= 0x30,
+	FF_WU_SRC	= 0x31,
+	FF_WU_ACK	= 0x32,
+	FF_WU_THS_L	= 0x34,
+	FF_WU_THS_H	= 0x35,
+	FF_WU_DURATION	= 0x36,
+	DD_CFG		= 0x38,
+	DD_SRC		= 0x39,
+	DD_ACK		= 0x3A,
+	DD_THSI_L	= 0x3C,
+	DD_THSI_H	= 0x3D,
+	DD_THSE_L	= 0x3E,
+	DD_THSE_H	= 0x3F,
+};
+
+enum lis3_who_am_i {
+	WAI_3DLH	= 0x32,	/* 16 bits: LIS331DLH */
+	WAI_3DC		= 0x33,	/* 8 bits: LIS3DC, HP3DC */
+	WAI_12B		= 0x3A, /* 12 bits: LIS3LV02D[LQ]... */
+	WAI_8B		= 0x3B, /* 8 bits: LIS[23]02D[LQ]... */
+	WAI_6B		= 0x52, /* 6 bits: LIS331DLF - not supported */
+};
+
+enum lis3_type {
+	LIS3LV02D,
+	LIS3DC,
+	HP3DC,
+	LIS2302D,
+	LIS331DLF,
+	LIS331DLH,
+};
+
+enum lis3lv02d_ctrl1_12b {
+	CTRL1_Xen	= 0x01,
+	CTRL1_Yen	= 0x02,
+	CTRL1_Zen	= 0x04,
+	CTRL1_ST	= 0x08,
+	CTRL1_DF0	= 0x10,
+	CTRL1_DF1	= 0x20,
+	CTRL1_PD0	= 0x40,
+	CTRL1_PD1	= 0x80,
+};
+
+/* Delta to ctrl1_12b version */
+enum lis3lv02d_ctrl1_8b {
+	CTRL1_STM	= 0x08,
+	CTRL1_STP	= 0x10,
+	CTRL1_FS	= 0x20,
+	CTRL1_PD	= 0x40,
+	CTRL1_DR	= 0x80,
+};
+
+enum lis3lv02d_ctrl1_3dc {
+	CTRL1_ODR0	= 0x10,
+	CTRL1_ODR1	= 0x20,
+	CTRL1_ODR2	= 0x40,
+	CTRL1_ODR3	= 0x80,
+};
+
+enum lis331dlh_ctrl1 {
+	CTRL1_DR0	= 0x08,
+	CTRL1_DR1	= 0x10,
+	CTRL1_PM0	= 0x20,
+	CTRL1_PM1	= 0x40,
+	CTRL1_PM2	= 0x80,
+};
+
+enum lis331dlh_ctrl2 {
+	CTRL2_HPEN1	= 0x04,
+	CTRL2_HPEN2	= 0x08,
+	CTRL2_FDS_3DLH	= 0x10,
+	CTRL2_BOOT_3DLH	= 0x80,
+};
+
+enum lis331dlh_ctrl4 {
+	CTRL4_STSIGN	= 0x08,
+	CTRL4_BLE	= 0x40,
+	CTRL4_BDU	= 0x80,
+};
+
+enum lis3lv02d_ctrl2 {
+	CTRL2_DAS	= 0x01,
+	CTRL2_SIM	= 0x02,
+	CTRL2_DRDY	= 0x04,
+	CTRL2_IEN	= 0x08,
+	CTRL2_BOOT	= 0x10,
+	CTRL2_BLE	= 0x20,
+	CTRL2_BDU	= 0x40, /* Block Data Update */
+	CTRL2_FS	= 0x80, /* Full Scale selection */
+};
+
+enum lis3lv02d_ctrl4_3dc {
+	CTRL4_SIM	= 0x01,
+	CTRL4_ST0	= 0x02,
+	CTRL4_ST1	= 0x04,
+	CTRL4_FS0	= 0x10,
+	CTRL4_FS1	= 0x20,
+};
+
+enum lis302d_ctrl2 {
+	HP_FF_WU2	= 0x08,
+	HP_FF_WU1	= 0x04,
+	CTRL2_BOOT_8B   = 0x40,
+};
+
+enum lis3lv02d_ctrl3 {
+	CTRL3_CFS0	= 0x01,
+	CTRL3_CFS1	= 0x02,
+	CTRL3_FDS	= 0x10,
+	CTRL3_HPFF	= 0x20,
+	CTRL3_HPDD	= 0x40,
+	CTRL3_ECK	= 0x80,
+};
+
+enum lis3lv02d_status_reg {
+	STATUS_XDA	= 0x01,
+	STATUS_YDA	= 0x02,
+	STATUS_ZDA	= 0x04,
+	STATUS_XYZDA	= 0x08,
+	STATUS_XOR	= 0x10,
+	STATUS_YOR	= 0x20,
+	STATUS_ZOR	= 0x40,
+	STATUS_XYZOR	= 0x80,
+};
+
+enum lis3lv02d_ff_wu_cfg {
+	FF_WU_CFG_XLIE	= 0x01,
+	FF_WU_CFG_XHIE	= 0x02,
+	FF_WU_CFG_YLIE	= 0x04,
+	FF_WU_CFG_YHIE	= 0x08,
+	FF_WU_CFG_ZLIE	= 0x10,
+	FF_WU_CFG_ZHIE	= 0x20,
+	FF_WU_CFG_LIR	= 0x40,
+	FF_WU_CFG_AOI	= 0x80,
+};
+
+enum lis3lv02d_ff_wu_src {
+	FF_WU_SRC_XL	= 0x01,
+	FF_WU_SRC_XH	= 0x02,
+	FF_WU_SRC_YL	= 0x04,
+	FF_WU_SRC_YH	= 0x08,
+	FF_WU_SRC_ZL	= 0x10,
+	FF_WU_SRC_ZH	= 0x20,
+	FF_WU_SRC_IA	= 0x40,
+};
+
+enum lis3lv02d_dd_cfg {
+	DD_CFG_XLIE	= 0x01,
+	DD_CFG_XHIE	= 0x02,
+	DD_CFG_YLIE	= 0x04,
+	DD_CFG_YHIE	= 0x08,
+	DD_CFG_ZLIE	= 0x10,
+	DD_CFG_ZHIE	= 0x20,
+	DD_CFG_LIR	= 0x40,
+	DD_CFG_IEND	= 0x80,
+};
+
+enum lis3lv02d_dd_src {
+	DD_SRC_XL	= 0x01,
+	DD_SRC_XH	= 0x02,
+	DD_SRC_YL	= 0x04,
+	DD_SRC_YH	= 0x08,
+	DD_SRC_ZL	= 0x10,
+	DD_SRC_ZH	= 0x20,
+	DD_SRC_IA	= 0x40,
+};
+
+enum lis3lv02d_click_src_8b {
+	CLICK_SINGLE_X	= 0x01,
+	CLICK_DOUBLE_X	= 0x02,
+	CLICK_SINGLE_Y	= 0x04,
+	CLICK_DOUBLE_Y	= 0x08,
+	CLICK_SINGLE_Z	= 0x10,
+	CLICK_DOUBLE_Z	= 0x20,
+	CLICK_IA	= 0x40,
+};
+
+enum lis3lv02d_reg_state {
+	LIS3_REG_OFF	= 0x00,
+	LIS3_REG_ON	= 0x01,
+};
+
+union axis_conversion {
+	struct {
+		int x, y, z;
+	};
+	int as_array[3];
+
+};
+
+struct lis3lv02d {
+	void			*bus_priv; /* used by the bus layer only */
+	struct device		*pm_dev; /* for pm_runtime purposes */
+	int (*init) (struct lis3lv02d *lis3);
+	int (*write) (struct lis3lv02d *lis3, int reg, u8 val);
+	int (*read) (struct lis3lv02d *lis3, int reg, u8 *ret);
+	int (*blkread) (struct lis3lv02d *lis3, int reg, int len, u8 *ret);
+	int (*reg_ctrl) (struct lis3lv02d *lis3, bool state);
+
+	int                     *odrs;     /* Supported output data rates */
+	u8			*regs;	   /* Regs to store / restore */
+	int			regs_size;
+	u8                      *reg_cache;
+	bool			regs_stored;
+	bool			init_required;
+	u8                      odr_mask;  /* ODR bit mask */
+	u8			whoami;    /* indicates measurement precision */
+	s16 (*read_data) (struct lis3lv02d *lis3, int reg);
+	int			mdps_max_val;
+	int			pwron_delay;
+	int                     scale; /*
+					* relationship between 1 LBS and mG
+					* (1/1000th of earth gravity)
+					*/
+
+	struct input_polled_dev	*idev;     /* input device */
+	struct platform_device	*pdev;     /* platform device */
+	struct regulator_bulk_data regulators[2];
+	atomic_t		count;     /* interrupt count after last read */
+	union axis_conversion	ac;        /* hw -> logical axis */
+	int			mapped_btns[3];
+
+	u32			irq;       /* IRQ number */
+	struct fasync_struct	*async_queue; /* queue for the misc device */
+	wait_queue_head_t	misc_wait; /* Wait queue for the misc device */
+	unsigned long		misc_opened; /* bit0: whether the device is open */
+	struct miscdevice	miscdev;
+
+	int                     data_ready_count[2];
+	atomic_t		wake_thread;
+	unsigned char           irq_cfg;
+	unsigned int		shift_adj;
+
+	struct lis3lv02d_platform_data *pdata;	/* for passing board config */
+	struct mutex		mutex;     /* Serialize poll and selftest */
+
+#ifdef CONFIG_OF
+	struct device_node	*of_node;
+#endif
+};
+
+int lis3lv02d_init_device(struct lis3lv02d *lis3);
+int lis3lv02d_joystick_enable(struct lis3lv02d *lis3);
+void lis3lv02d_joystick_disable(struct lis3lv02d *lis3);
+void lis3lv02d_poweroff(struct lis3lv02d *lis3);
+int lis3lv02d_poweron(struct lis3lv02d *lis3);
+int lis3lv02d_remove_fs(struct lis3lv02d *lis3);
+int lis3lv02d_init_dt(struct lis3lv02d *lis3);
+
+extern struct lis3lv02d lis3_dev;
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
new file mode 100644
index 000000000..14b7d539f
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
@@ -0,0 +1,289 @@
+/*
+ * drivers/hwmon/lis3lv02d_i2c.c
+ *
+ * Implements I2C interface for lis3lv02d (STMicroelectronics) accelerometer.
+ * Driver is based on corresponding SPI driver written by Daniel Mack
+ * (lis3lv02d_spi.c (C) 2009 Daniel Mack <daniel@caiaq.de> ).
+ *
+ * Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include "lis3lv02d.h"
+
+#define DRV_NAME	"lis3lv02d_i2c"
+
+static const char reg_vdd[]    = "Vdd";
+static const char reg_vdd_io[] = "Vdd_IO";
+
+static int lis3_reg_ctrl(struct lis3lv02d *lis3, bool state)
+{
+	int ret;
+	if (state == LIS3_REG_OFF) {
+		ret = regulator_bulk_disable(ARRAY_SIZE(lis3->regulators),
+					lis3->regulators);
+	} else {
+		ret = regulator_bulk_enable(ARRAY_SIZE(lis3->regulators),
+					lis3->regulators);
+		/* Chip needs time to wakeup. Not mentioned in datasheet */
+		usleep_range(10000, 20000);
+	}
+	return ret;
+}
+
+static inline s32 lis3_i2c_write(struct lis3lv02d *lis3, int reg, u8 value)
+{
+	struct i2c_client *c = lis3->bus_priv;
+	return i2c_smbus_write_byte_data(c, reg, value);
+}
+
+static inline s32 lis3_i2c_read(struct lis3lv02d *lis3, int reg, u8 *v)
+{
+	struct i2c_client *c = lis3->bus_priv;
+	*v = i2c_smbus_read_byte_data(c, reg);
+	return 0;
+}
+
+static inline s32 lis3_i2c_blockread(struct lis3lv02d *lis3, int reg, int len,
+				u8 *v)
+{
+	struct i2c_client *c = lis3->bus_priv;
+	reg |= (1 << 7); /* 7th bit enables address auto incrementation */
+	return i2c_smbus_read_i2c_block_data(c, reg, len, v);
+}
+
+static int lis3_i2c_init(struct lis3lv02d *lis3)
+{
+	u8 reg;
+	int ret;
+
+	lis3_reg_ctrl(lis3, LIS3_REG_ON);
+
+	lis3->read(lis3, WHO_AM_I, &reg);
+	if (reg != lis3->whoami)
+		printk(KERN_ERR "lis3: power on failure\n");
+
+	/* power up the device */
+	ret = lis3->read(lis3, CTRL_REG1, &reg);
+	if (ret < 0)
+		return ret;
+
+	if (lis3->whoami == WAI_3DLH)
+		reg |= CTRL1_PM0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen;
+	else
+		reg |= CTRL1_PD0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen;
+
+	return lis3->write(lis3, CTRL_REG1, reg);
+}
+
+/* Default axis mapping but it can be overwritten by platform data */
+static union axis_conversion lis3lv02d_axis_map =
+	{ .as_array = { LIS3_DEV_X, LIS3_DEV_Y, LIS3_DEV_Z } };
+
+#ifdef CONFIG_OF
+static const struct of_device_id lis3lv02d_i2c_dt_ids[] = {
+	{ .compatible = "st,lis3lv02d" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, lis3lv02d_i2c_dt_ids);
+#endif
+
+static int lis3lv02d_i2c_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	int ret = 0;
+	struct lis3lv02d_platform_data *pdata = client->dev.platform_data;
+
+#ifdef CONFIG_OF
+	if (of_match_device(lis3lv02d_i2c_dt_ids, &client->dev)) {
+		lis3_dev.of_node = client->dev.of_node;
+		ret = lis3lv02d_init_dt(&lis3_dev);
+		if (ret)
+			return ret;
+		pdata = lis3_dev.pdata;
+	}
+#endif
+
+	if (pdata) {
+		if ((pdata->driver_features & LIS3_USE_BLOCK_READ) &&
+			(i2c_check_functionality(client->adapter,
+						I2C_FUNC_SMBUS_I2C_BLOCK)))
+			lis3_dev.blkread  = lis3_i2c_blockread;
+
+		if (pdata->axis_x)
+			lis3lv02d_axis_map.x = pdata->axis_x;
+
+		if (pdata->axis_y)
+			lis3lv02d_axis_map.y = pdata->axis_y;
+
+		if (pdata->axis_z)
+			lis3lv02d_axis_map.z = pdata->axis_z;
+
+		if (pdata->setup_resources)
+			ret = pdata->setup_resources();
+
+		if (ret)
+			goto fail;
+	}
+
+	lis3_dev.regulators[0].supply = reg_vdd;
+	lis3_dev.regulators[1].supply = reg_vdd_io;
+	ret = regulator_bulk_get(&client->dev,
+				 ARRAY_SIZE(lis3_dev.regulators),
+				 lis3_dev.regulators);
+	if (ret < 0)
+		goto fail;
+
+	lis3_dev.pdata	  = pdata;
+	lis3_dev.bus_priv = client;
+	lis3_dev.init	  = lis3_i2c_init;
+	lis3_dev.read	  = lis3_i2c_read;
+	lis3_dev.write	  = lis3_i2c_write;
+	lis3_dev.irq	  = client->irq;
+	lis3_dev.ac	  = lis3lv02d_axis_map;
+	lis3_dev.pm_dev	  = &client->dev;
+
+	i2c_set_clientdata(client, &lis3_dev);
+
+	/* Provide power over the init call */
+	lis3_reg_ctrl(&lis3_dev, LIS3_REG_ON);
+
+	ret = lis3lv02d_init_device(&lis3_dev);
+
+	lis3_reg_ctrl(&lis3_dev, LIS3_REG_OFF);
+
+	if (ret)
+		goto fail2;
+	return 0;
+
+fail2:
+	regulator_bulk_free(ARRAY_SIZE(lis3_dev.regulators),
+				lis3_dev.regulators);
+fail:
+	if (pdata && pdata->release_resources)
+		pdata->release_resources();
+	return ret;
+}
+
+static int lis3lv02d_i2c_remove(struct i2c_client *client)
+{
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+	struct lis3lv02d_platform_data *pdata = client->dev.platform_data;
+
+	if (pdata && pdata->release_resources)
+		pdata->release_resources();
+
+	lis3lv02d_joystick_disable(lis3);
+	lis3lv02d_remove_fs(&lis3_dev);
+
+	regulator_bulk_free(ARRAY_SIZE(lis3->regulators),
+			    lis3_dev.regulators);
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int lis3lv02d_i2c_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+	if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+		lis3lv02d_poweroff(lis3);
+	return 0;
+}
+
+static int lis3lv02d_i2c_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+	/*
+	 * pm_runtime documentation says that devices should always
+	 * be powered on at resume. Pm_runtime turns them off after system
+	 * wide resume is complete.
+	 */
+	if (!lis3->pdata || !lis3->pdata->wakeup_flags ||
+		pm_runtime_suspended(dev))
+		lis3lv02d_poweron(lis3);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int lis3_i2c_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+	lis3lv02d_poweroff(lis3);
+	return 0;
+}
+
+static int lis3_i2c_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+	lis3lv02d_poweron(lis3);
+	return 0;
+}
+#endif /* CONFIG_PM */
+
+static const struct i2c_device_id lis3lv02d_id[] = {
+	{"lis3lv02d", LIS3LV02D},
+	{"lis331dlh", LIS331DLH},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, lis3lv02d_id);
+
+static const struct dev_pm_ops lis3_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(lis3lv02d_i2c_suspend,
+				lis3lv02d_i2c_resume)
+	SET_RUNTIME_PM_OPS(lis3_i2c_runtime_suspend,
+			   lis3_i2c_runtime_resume,
+			   NULL)
+};
+
+static struct i2c_driver lis3lv02d_i2c_driver = {
+	.driver	 = {
+		.name   = DRV_NAME,
+		.pm     = &lis3_pm_ops,
+		.of_match_table = of_match_ptr(lis3lv02d_i2c_dt_ids),
+	},
+	.probe	= lis3lv02d_i2c_probe,
+	.remove	= lis3lv02d_i2c_remove,
+	.id_table = lis3lv02d_id,
+};
+
+module_i2c_driver(lis3lv02d_i2c_driver);
+
+MODULE_AUTHOR("Nokia Corporation");
+MODULE_DESCRIPTION("lis3lv02d I2C interface");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/drivers/misc/lis3lv02d/lis3lv02d_spi.c
new file mode 100644
index 000000000..e57547512
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d_spi.c
@@ -0,0 +1,153 @@
+/*
+ * lis3lv02d_spi - SPI glue layer for lis3lv02d
+ *
+ * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  publishhed by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/spi/spi.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include "lis3lv02d.h"
+
+#define DRV_NAME 	"lis3lv02d_spi"
+#define LIS3_SPI_READ	0x80
+
+static int lis3_spi_read(struct lis3lv02d *lis3, int reg, u8 *v)
+{
+	struct spi_device *spi = lis3->bus_priv;
+	int ret = spi_w8r8(spi, reg | LIS3_SPI_READ);
+	if (ret < 0)
+		return -EINVAL;
+
+	*v = (u8) ret;
+	return 0;
+}
+
+static int lis3_spi_write(struct lis3lv02d *lis3, int reg, u8 val)
+{
+	u8 tmp[2] = { reg, val };
+	struct spi_device *spi = lis3->bus_priv;
+	return spi_write(spi, tmp, sizeof(tmp));
+}
+
+static int lis3_spi_init(struct lis3lv02d *lis3)
+{
+	u8 reg;
+	int ret;
+
+	/* power up the device */
+	ret = lis3->read(lis3, CTRL_REG1, &reg);
+	if (ret < 0)
+		return ret;
+
+	reg |= CTRL1_PD0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen;
+	return lis3->write(lis3, CTRL_REG1, reg);
+}
+
+static union axis_conversion lis3lv02d_axis_normal =
+	{ .as_array = { 1, 2, 3 } };
+
+#ifdef CONFIG_OF
+static const struct of_device_id lis302dl_spi_dt_ids[] = {
+	{ .compatible = "st,lis302dl-spi" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, lis302dl_spi_dt_ids);
+#endif
+
+static int lis302dl_spi_probe(struct spi_device *spi)
+{
+	int ret;
+
+	spi->bits_per_word = 8;
+	spi->mode = SPI_MODE_0;
+	ret = spi_setup(spi);
+	if (ret < 0)
+		return ret;
+
+	lis3_dev.bus_priv	= spi;
+	lis3_dev.init		= lis3_spi_init;
+	lis3_dev.read		= lis3_spi_read;
+	lis3_dev.write		= lis3_spi_write;
+	lis3_dev.irq		= spi->irq;
+	lis3_dev.ac		= lis3lv02d_axis_normal;
+	lis3_dev.pdata		= spi->dev.platform_data;
+
+#ifdef CONFIG_OF
+	if (of_match_device(lis302dl_spi_dt_ids, &spi->dev)) {
+		lis3_dev.of_node = spi->dev.of_node;
+		ret = lis3lv02d_init_dt(&lis3_dev);
+		if (ret)
+			return ret;
+	}
+#endif
+	spi_set_drvdata(spi, &lis3_dev);
+
+	return lis3lv02d_init_device(&lis3_dev);
+}
+
+static int lis302dl_spi_remove(struct spi_device *spi)
+{
+	struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+	lis3lv02d_joystick_disable(lis3);
+	lis3lv02d_poweroff(lis3);
+
+	return lis3lv02d_remove_fs(&lis3_dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int lis3lv02d_spi_suspend(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+
+	if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+		lis3lv02d_poweroff(&lis3_dev);
+
+	return 0;
+}
+
+static int lis3lv02d_spi_resume(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+
+	if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+		lis3lv02d_poweron(lis3);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(lis3lv02d_spi_pm, lis3lv02d_spi_suspend,
+			 lis3lv02d_spi_resume);
+
+static struct spi_driver lis302dl_spi_driver = {
+	.driver	 = {
+		.name   = DRV_NAME,
+		.pm	= &lis3lv02d_spi_pm,
+		.of_match_table = of_match_ptr(lis302dl_spi_dt_ids),
+	},
+	.probe	= lis302dl_spi_probe,
+	.remove	= lis302dl_spi_remove,
+};
+
+module_spi_driver(lis302dl_spi_driver);
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("lis3lv02d SPI glue layer");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:" DRV_NAME);
diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
new file mode 100644
index 000000000..b7ceb5f22
--- /dev/null
+++ b/drivers/misc/lkdtm/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_LKDTM)		+= lkdtm.o
+
+lkdtm-$(CONFIG_LKDTM)		+= core.o
+lkdtm-$(CONFIG_LKDTM)		+= bugs.o
+lkdtm-$(CONFIG_LKDTM)		+= heap.o
+lkdtm-$(CONFIG_LKDTM)		+= perms.o
+lkdtm-$(CONFIG_LKDTM)		+= refcount.o
+lkdtm-$(CONFIG_LKDTM)		+= rodata_objcopy.o
+lkdtm-$(CONFIG_LKDTM)		+= usercopy.o
+
+KCOV_INSTRUMENT_rodata.o	:= n
+
+OBJCOPYFLAGS :=
+OBJCOPYFLAGS_rodata_objcopy.o	:= \
+			--rename-section .noinstr.text=.rodata,alloc,readonly,load,contents
+targets += rodata.o rodata_objcopy.o
+$(obj)/rodata_objcopy.o: $(obj)/rodata.o FORCE
+	$(call if_changed,objcopy)
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
new file mode 100644
index 000000000..7eebbdfbc
--- /dev/null
+++ b/drivers/misc/lkdtm/bugs.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests related to logic bugs (e.g. bad dereferences,
+ * bad alignment, bad loops, bad locking, bad scheduling, deep stacks, and
+ * lockups) along with other things that don't fit well into existing LKDTM
+ * test source files.
+ */
+#include "lkdtm.h"
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
+#include <linux/uaccess.h>
+
+struct lkdtm_list {
+	struct list_head node;
+};
+
+/*
+ * Make sure our attempts to over run the kernel stack doesn't trigger
+ * a compiler warning when CONFIG_FRAME_WARN is set. Then make sure we
+ * recurse past the end of THREAD_SIZE by default.
+ */
+#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN > 0)
+#define REC_STACK_SIZE (CONFIG_FRAME_WARN / 2)
+#else
+#define REC_STACK_SIZE (THREAD_SIZE / 8)
+#endif
+#define REC_NUM_DEFAULT ((THREAD_SIZE / REC_STACK_SIZE) * 2)
+
+static int recur_count = REC_NUM_DEFAULT;
+
+static DEFINE_SPINLOCK(lock_me_up);
+
+static int recursive_loop(int remaining)
+{
+	char buf[REC_STACK_SIZE];
+
+	/* Make sure compiler does not optimize this away. */
+	memset(buf, (remaining & 0xff) | 0x1, REC_STACK_SIZE);
+	if (!remaining)
+		return 0;
+	else
+		return recursive_loop(remaining - 1);
+}
+
+/* If the depth is negative, use the default, otherwise keep parameter. */
+void __init lkdtm_bugs_init(int *recur_param)
+{
+	if (*recur_param < 0)
+		*recur_param = recur_count;
+	else
+		recur_count = *recur_param;
+}
+
+void lkdtm_PANIC(void)
+{
+	panic("dumptest");
+}
+
+void lkdtm_BUG(void)
+{
+	BUG();
+}
+
+static int warn_counter;
+
+void lkdtm_WARNING(void)
+{
+	WARN(1, "Warning message trigger count: %d\n", warn_counter++);
+}
+
+void lkdtm_EXCEPTION(void)
+{
+	*((volatile int *) 0) = 0;
+}
+
+void lkdtm_LOOP(void)
+{
+	for (;;)
+		;
+}
+
+void lkdtm_OVERFLOW(void)
+{
+	(void) recursive_loop(recur_count);
+}
+
+static noinline void __lkdtm_CORRUPT_STACK(void *stack)
+{
+	memset(stack, '\xff', 64);
+}
+
+/* This should trip the stack canary, not corrupt the return address. */
+noinline void lkdtm_CORRUPT_STACK(void)
+{
+	/* Use default char array length that triggers stack protection. */
+	char data[8] __aligned(sizeof(void *));
+
+	__lkdtm_CORRUPT_STACK(&data);
+
+	pr_info("Corrupted stack containing char array ...\n");
+}
+
+/* Same as above but will only get a canary with -fstack-protector-strong */
+noinline void lkdtm_CORRUPT_STACK_STRONG(void)
+{
+	union {
+		unsigned short shorts[4];
+		unsigned long *ptr;
+	} data __aligned(sizeof(void *));
+
+	__lkdtm_CORRUPT_STACK(&data);
+
+	pr_info("Corrupted stack containing union ...\n");
+}
+
+void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void)
+{
+	static u8 data[5] __attribute__((aligned(4))) = {1, 2, 3, 4, 5};
+	u32 *p;
+	u32 val = 0x12345678;
+
+	p = (u32 *)(data + 1);
+	if (*p == 0)
+		val = 0x87654321;
+	*p = val;
+}
+
+void lkdtm_SOFTLOCKUP(void)
+{
+	preempt_disable();
+	for (;;)
+		cpu_relax();
+}
+
+void lkdtm_HARDLOCKUP(void)
+{
+	local_irq_disable();
+	for (;;)
+		cpu_relax();
+}
+
+void lkdtm_SPINLOCKUP(void)
+{
+	/* Must be called twice to trigger. */
+	spin_lock(&lock_me_up);
+	/* Let sparse know we intended to exit holding the lock. */
+	__release(&lock_me_up);
+}
+
+void lkdtm_HUNG_TASK(void)
+{
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule();
+}
+
+void lkdtm_CORRUPT_LIST_ADD(void)
+{
+	/*
+	 * Initially, an empty list via LIST_HEAD:
+	 *	test_head.next = &test_head
+	 *	test_head.prev = &test_head
+	 */
+	LIST_HEAD(test_head);
+	struct lkdtm_list good, bad;
+	void *target[2] = { };
+	void *redirection = &target;
+
+	pr_info("attempting good list addition\n");
+
+	/*
+	 * Adding to the list performs these actions:
+	 *	test_head.next->prev = &good.node
+	 *	good.node.next = test_head.next
+	 *	good.node.prev = test_head
+	 *	test_head.next = good.node
+	 */
+	list_add(&good.node, &test_head);
+
+	pr_info("attempting corrupted list addition\n");
+	/*
+	 * In simulating this "write what where" primitive, the "what" is
+	 * the address of &bad.node, and the "where" is the address held
+	 * by "redirection".
+	 */
+	test_head.next = redirection;
+	list_add(&bad.node, &test_head);
+
+	if (target[0] == NULL && target[1] == NULL)
+		pr_err("Overwrite did not happen, but no BUG?!\n");
+	else
+		pr_err("list_add() corruption not detected!\n");
+}
+
+void lkdtm_CORRUPT_LIST_DEL(void)
+{
+	LIST_HEAD(test_head);
+	struct lkdtm_list item;
+	void *target[2] = { };
+	void *redirection = &target;
+
+	list_add(&item.node, &test_head);
+
+	pr_info("attempting good list removal\n");
+	list_del(&item.node);
+
+	pr_info("attempting corrupted list removal\n");
+	list_add(&item.node, &test_head);
+
+	/* As with the list_add() test above, this corrupts "next". */
+	item.node.next = redirection;
+	list_del(&item.node);
+
+	if (target[0] == NULL && target[1] == NULL)
+		pr_err("Overwrite did not happen, but no BUG?!\n");
+	else
+		pr_err("list_del() corruption not detected!\n");
+}
+
+/* Test if unbalanced set_fs(KERNEL_DS)/set_fs(USER_DS) check exists. */
+void lkdtm_CORRUPT_USER_DS(void)
+{
+	pr_info("setting bad task size limit\n");
+	set_fs(KERNEL_DS);
+
+	/* Make sure we do not keep running with a KERNEL_DS! */
+	force_sig(SIGKILL, current);
+}
+
+/* Test that VMAP_STACK is actually allocating with a leading guard page */
+void lkdtm_STACK_GUARD_PAGE_LEADING(void)
+{
+	const unsigned char *stack = task_stack_page(current);
+	const unsigned char *ptr = stack - 1;
+	volatile unsigned char byte;
+
+	pr_info("attempting bad read from page below current stack\n");
+
+	byte = *ptr;
+
+	pr_err("FAIL: accessed page before stack!\n");
+}
+
+/* Test that VMAP_STACK is actually allocating with a trailing guard page */
+void lkdtm_STACK_GUARD_PAGE_TRAILING(void)
+{
+	const unsigned char *stack = task_stack_page(current);
+	const unsigned char *ptr = stack + THREAD_SIZE;
+	volatile unsigned char byte;
+
+	pr_info("attempting bad read from page above current stack\n");
+
+	byte = *ptr;
+
+	pr_err("FAIL: accessed page after stack!\n");
+}
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
new file mode 100644
index 000000000..07caaa2cf
--- /dev/null
+++ b/drivers/misc/lkdtm/core.c
@@ -0,0 +1,507 @@
+/*
+ * Linux Kernel Dump Test Module for testing kernel crashes conditions:
+ * induces system failures at predefined crashpoints and under predefined
+ * operational conditions in order to evaluate the reliability of kernel
+ * sanity checking and crash dumps obtained using different dumping
+ * solutions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Ankita Garg <ankita@in.ibm.com>
+ *
+ * It is adapted from the Linux Kernel Dump Test Tool by
+ * Fernando Luis Vazquez Cao <http://lkdtt.sourceforge.net>
+ *
+ * Debugfs support added by Simon Kagstrom <simon.kagstrom@netinsight.net>
+ *
+ * See Documentation/fault-injection/provoke-crashes.txt for instructions
+ */
+#include "lkdtm.h"
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/kprobes.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <scsi/scsi_cmnd.h>
+#include <linux/debugfs.h>
+
+#ifdef CONFIG_IDE
+#include <linux/ide.h>
+#endif
+
+#define DEFAULT_COUNT 10
+
+static int lkdtm_debugfs_open(struct inode *inode, struct file *file);
+static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf,
+		size_t count, loff_t *off);
+static ssize_t direct_entry(struct file *f, const char __user *user_buf,
+			    size_t count, loff_t *off);
+
+#ifdef CONFIG_KPROBES
+static int lkdtm_kprobe_handler(struct kprobe *kp, struct pt_regs *regs);
+static ssize_t lkdtm_debugfs_entry(struct file *f,
+				   const char __user *user_buf,
+				   size_t count, loff_t *off);
+# define CRASHPOINT_KPROBE(_symbol)				\
+		.kprobe = {					\
+			.symbol_name = (_symbol),		\
+			.pre_handler = lkdtm_kprobe_handler,	\
+		},
+# define CRASHPOINT_WRITE(_symbol)				\
+		(_symbol) ? lkdtm_debugfs_entry : direct_entry
+#else
+# define CRASHPOINT_KPROBE(_symbol)
+# define CRASHPOINT_WRITE(_symbol)		direct_entry
+#endif
+
+/* Crash points */
+struct crashpoint {
+	const char *name;
+	const struct file_operations fops;
+	struct kprobe kprobe;
+};
+
+#define CRASHPOINT(_name, _symbol)				\
+	{							\
+		.name = _name,					\
+		.fops = {					\
+			.read	= lkdtm_debugfs_read,		\
+			.llseek	= generic_file_llseek,		\
+			.open	= lkdtm_debugfs_open,		\
+			.write	= CRASHPOINT_WRITE(_symbol)	\
+		},						\
+		CRASHPOINT_KPROBE(_symbol)			\
+	}
+
+/* Define the possible places where we can trigger a crash point. */
+static struct crashpoint crashpoints[] = {
+	CRASHPOINT("DIRECT",		 NULL),
+#ifdef CONFIG_KPROBES
+	CRASHPOINT("INT_HARDWARE_ENTRY", "do_IRQ"),
+	CRASHPOINT("INT_HW_IRQ_EN",	 "handle_irq_event"),
+	CRASHPOINT("INT_TASKLET_ENTRY",	 "tasklet_action"),
+	CRASHPOINT("FS_DEVRW",		 "ll_rw_block"),
+	CRASHPOINT("MEM_SWAPOUT",	 "shrink_inactive_list"),
+	CRASHPOINT("TIMERADD",		 "hrtimer_start"),
+	CRASHPOINT("SCSI_DISPATCH_CMD",	 "scsi_dispatch_cmd"),
+# ifdef CONFIG_IDE
+	CRASHPOINT("IDE_CORE_CP",	 "generic_ide_ioctl"),
+# endif
+#endif
+};
+
+
+/* Crash types. */
+struct crashtype {
+	const char *name;
+	void (*func)(void);
+};
+
+#define CRASHTYPE(_name)			\
+	{					\
+		.name = __stringify(_name),	\
+		.func = lkdtm_ ## _name,	\
+	}
+
+/* Define the possible types of crashes that can be triggered. */
+static const struct crashtype crashtypes[] = {
+	CRASHTYPE(PANIC),
+	CRASHTYPE(BUG),
+	CRASHTYPE(WARNING),
+	CRASHTYPE(EXCEPTION),
+	CRASHTYPE(LOOP),
+	CRASHTYPE(OVERFLOW),
+	CRASHTYPE(CORRUPT_LIST_ADD),
+	CRASHTYPE(CORRUPT_LIST_DEL),
+	CRASHTYPE(CORRUPT_USER_DS),
+	CRASHTYPE(CORRUPT_STACK),
+	CRASHTYPE(CORRUPT_STACK_STRONG),
+	CRASHTYPE(STACK_GUARD_PAGE_LEADING),
+	CRASHTYPE(STACK_GUARD_PAGE_TRAILING),
+	CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
+	CRASHTYPE(OVERWRITE_ALLOCATION),
+	CRASHTYPE(WRITE_AFTER_FREE),
+	CRASHTYPE(READ_AFTER_FREE),
+	CRASHTYPE(WRITE_BUDDY_AFTER_FREE),
+	CRASHTYPE(READ_BUDDY_AFTER_FREE),
+	CRASHTYPE(SOFTLOCKUP),
+	CRASHTYPE(HARDLOCKUP),
+	CRASHTYPE(SPINLOCKUP),
+	CRASHTYPE(HUNG_TASK),
+	CRASHTYPE(EXEC_DATA),
+	CRASHTYPE(EXEC_STACK),
+	CRASHTYPE(EXEC_KMALLOC),
+	CRASHTYPE(EXEC_VMALLOC),
+	CRASHTYPE(EXEC_RODATA),
+	CRASHTYPE(EXEC_USERSPACE),
+	CRASHTYPE(EXEC_NULL),
+	CRASHTYPE(ACCESS_USERSPACE),
+	CRASHTYPE(ACCESS_NULL),
+	CRASHTYPE(WRITE_RO),
+	CRASHTYPE(WRITE_RO_AFTER_INIT),
+	CRASHTYPE(WRITE_KERN),
+	CRASHTYPE(REFCOUNT_INC_OVERFLOW),
+	CRASHTYPE(REFCOUNT_ADD_OVERFLOW),
+	CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW),
+	CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_OVERFLOW),
+	CRASHTYPE(REFCOUNT_DEC_ZERO),
+	CRASHTYPE(REFCOUNT_DEC_NEGATIVE),
+	CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE),
+	CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE),
+	CRASHTYPE(REFCOUNT_INC_ZERO),
+	CRASHTYPE(REFCOUNT_ADD_ZERO),
+	CRASHTYPE(REFCOUNT_INC_SATURATED),
+	CRASHTYPE(REFCOUNT_DEC_SATURATED),
+	CRASHTYPE(REFCOUNT_ADD_SATURATED),
+	CRASHTYPE(REFCOUNT_INC_NOT_ZERO_SATURATED),
+	CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_SATURATED),
+	CRASHTYPE(REFCOUNT_DEC_AND_TEST_SATURATED),
+	CRASHTYPE(REFCOUNT_SUB_AND_TEST_SATURATED),
+	CRASHTYPE(REFCOUNT_TIMING),
+	CRASHTYPE(ATOMIC_TIMING),
+	CRASHTYPE(USERCOPY_HEAP_SIZE_TO),
+	CRASHTYPE(USERCOPY_HEAP_SIZE_FROM),
+	CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO),
+	CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM),
+	CRASHTYPE(USERCOPY_STACK_FRAME_TO),
+	CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
+	CRASHTYPE(USERCOPY_STACK_BEYOND),
+	CRASHTYPE(USERCOPY_KERNEL),
+};
+
+
+/* Global kprobe entry and crashtype. */
+static struct kprobe *lkdtm_kprobe;
+static struct crashpoint *lkdtm_crashpoint;
+static const struct crashtype *lkdtm_crashtype;
+
+/* Module parameters */
+static int recur_count = -1;
+module_param(recur_count, int, 0644);
+MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test");
+
+static char* cpoint_name;
+module_param(cpoint_name, charp, 0444);
+MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed");
+
+static char* cpoint_type;
+module_param(cpoint_type, charp, 0444);
+MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\
+				"hitting the crash point");
+
+static int cpoint_count = DEFAULT_COUNT;
+module_param(cpoint_count, int, 0644);
+MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
+				"crash point is to be hit to trigger action");
+
+
+/* Return the crashtype number or NULL if the name is invalid */
+static const struct crashtype *find_crashtype(const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(crashtypes); i++) {
+		if (!strcmp(name, crashtypes[i].name))
+			return &crashtypes[i];
+	}
+
+	return NULL;
+}
+
+/*
+ * This is forced noinline just so it distinctly shows up in the stackdump
+ * which makes validation of expected lkdtm crashes easier.
+ */
+static noinline void lkdtm_do_action(const struct crashtype *crashtype)
+{
+	if (WARN_ON(!crashtype || !crashtype->func))
+		return;
+	crashtype->func();
+}
+
+static int lkdtm_register_cpoint(struct crashpoint *crashpoint,
+				 const struct crashtype *crashtype)
+{
+	int ret;
+
+	/* If this doesn't have a symbol, just call immediately. */
+	if (!crashpoint->kprobe.symbol_name) {
+		lkdtm_do_action(crashtype);
+		return 0;
+	}
+
+	if (lkdtm_kprobe != NULL)
+		unregister_kprobe(lkdtm_kprobe);
+
+	lkdtm_crashpoint = crashpoint;
+	lkdtm_crashtype = crashtype;
+	lkdtm_kprobe = &crashpoint->kprobe;
+	ret = register_kprobe(lkdtm_kprobe);
+	if (ret < 0) {
+		pr_info("Couldn't register kprobe %s\n",
+			crashpoint->kprobe.symbol_name);
+		lkdtm_kprobe = NULL;
+		lkdtm_crashpoint = NULL;
+		lkdtm_crashtype = NULL;
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_KPROBES
+/* Global crash counter and spinlock. */
+static int crash_count = DEFAULT_COUNT;
+static DEFINE_SPINLOCK(crash_count_lock);
+
+/* Called by kprobe entry points. */
+static int lkdtm_kprobe_handler(struct kprobe *kp, struct pt_regs *regs)
+{
+	unsigned long flags;
+	bool do_it = false;
+
+	if (WARN_ON(!lkdtm_crashpoint || !lkdtm_crashtype))
+		return 0;
+
+	spin_lock_irqsave(&crash_count_lock, flags);
+	crash_count--;
+	pr_info("Crash point %s of type %s hit, trigger in %d rounds\n",
+		lkdtm_crashpoint->name, lkdtm_crashtype->name, crash_count);
+
+	if (crash_count == 0) {
+		do_it = true;
+		crash_count = cpoint_count;
+	}
+	spin_unlock_irqrestore(&crash_count_lock, flags);
+
+	if (do_it)
+		lkdtm_do_action(lkdtm_crashtype);
+
+	return 0;
+}
+
+static ssize_t lkdtm_debugfs_entry(struct file *f,
+				   const char __user *user_buf,
+				   size_t count, loff_t *off)
+{
+	struct crashpoint *crashpoint = file_inode(f)->i_private;
+	const struct crashtype *crashtype = NULL;
+	char *buf;
+	int err;
+
+	if (count >= PAGE_SIZE)
+		return -EINVAL;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	if (copy_from_user(buf, user_buf, count)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
+	}
+	/* NULL-terminate and remove enter */
+	buf[count] = '\0';
+	strim(buf);
+
+	crashtype = find_crashtype(buf);
+	free_page((unsigned long)buf);
+
+	if (!crashtype)
+		return -EINVAL;
+
+	err = lkdtm_register_cpoint(crashpoint, crashtype);
+	if (err < 0)
+		return err;
+
+	*off += count;
+
+	return count;
+}
+#endif
+
+/* Generic read callback that just prints out the available crash types */
+static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf,
+		size_t count, loff_t *off)
+{
+	char *buf;
+	int i, n, out;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	n = snprintf(buf, PAGE_SIZE, "Available crash types:\n");
+	for (i = 0; i < ARRAY_SIZE(crashtypes); i++) {
+		n += snprintf(buf + n, PAGE_SIZE - n, "%s\n",
+			      crashtypes[i].name);
+	}
+	buf[n] = '\0';
+
+	out = simple_read_from_buffer(user_buf, count, off,
+				      buf, n);
+	free_page((unsigned long) buf);
+
+	return out;
+}
+
+static int lkdtm_debugfs_open(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+/* Special entry to just crash directly. Available without KPROBEs */
+static ssize_t direct_entry(struct file *f, const char __user *user_buf,
+		size_t count, loff_t *off)
+{
+	const struct crashtype *crashtype;
+	char *buf;
+
+	if (count >= PAGE_SIZE)
+		return -EINVAL;
+	if (count < 1)
+		return -EINVAL;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	if (copy_from_user(buf, user_buf, count)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
+	}
+	/* NULL-terminate and remove enter */
+	buf[count] = '\0';
+	strim(buf);
+
+	crashtype = find_crashtype(buf);
+	free_page((unsigned long) buf);
+	if (!crashtype)
+		return -EINVAL;
+
+	pr_info("Performing direct entry %s\n", crashtype->name);
+	lkdtm_do_action(crashtype);
+	*off += count;
+
+	return count;
+}
+
+static struct dentry *lkdtm_debugfs_root;
+
+static int __init lkdtm_module_init(void)
+{
+	struct crashpoint *crashpoint = NULL;
+	const struct crashtype *crashtype = NULL;
+	int ret = -EINVAL;
+	int i;
+
+	/* Neither or both of these need to be set */
+	if ((cpoint_type || cpoint_name) && !(cpoint_type && cpoint_name)) {
+		pr_err("Need both cpoint_type and cpoint_name or neither\n");
+		return -EINVAL;
+	}
+
+	if (cpoint_type) {
+		crashtype = find_crashtype(cpoint_type);
+		if (!crashtype) {
+			pr_err("Unknown crashtype '%s'\n", cpoint_type);
+			return -EINVAL;
+		}
+	}
+
+	if (cpoint_name) {
+		for (i = 0; i < ARRAY_SIZE(crashpoints); i++) {
+			if (!strcmp(cpoint_name, crashpoints[i].name))
+				crashpoint = &crashpoints[i];
+		}
+
+		/* Refuse unknown crashpoints. */
+		if (!crashpoint) {
+			pr_err("Invalid crashpoint %s\n", cpoint_name);
+			return -EINVAL;
+		}
+	}
+
+#ifdef CONFIG_KPROBES
+	/* Set crash count. */
+	crash_count = cpoint_count;
+#endif
+
+	/* Handle test-specific initialization. */
+	lkdtm_bugs_init(&recur_count);
+	lkdtm_perms_init();
+	lkdtm_usercopy_init();
+
+	/* Register debugfs interface */
+	lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
+	if (!lkdtm_debugfs_root) {
+		pr_err("creating root dir failed\n");
+		return -ENODEV;
+	}
+
+	/* Install debugfs trigger files. */
+	for (i = 0; i < ARRAY_SIZE(crashpoints); i++) {
+		struct crashpoint *cur = &crashpoints[i];
+		struct dentry *de;
+
+		de = debugfs_create_file(cur->name, 0644, lkdtm_debugfs_root,
+					 cur, &cur->fops);
+		if (de == NULL) {
+			pr_err("could not create crashpoint %s\n", cur->name);
+			goto out_err;
+		}
+	}
+
+	/* Install crashpoint if one was selected. */
+	if (crashpoint) {
+		ret = lkdtm_register_cpoint(crashpoint, crashtype);
+		if (ret < 0) {
+			pr_info("Invalid crashpoint %s\n", crashpoint->name);
+			goto out_err;
+		}
+		pr_info("Crash point %s of type %s registered\n",
+			crashpoint->name, cpoint_type);
+	} else {
+		pr_info("No crash points registered, enable through debugfs\n");
+	}
+
+	return 0;
+
+out_err:
+	debugfs_remove_recursive(lkdtm_debugfs_root);
+	return ret;
+}
+
+static void __exit lkdtm_module_exit(void)
+{
+	debugfs_remove_recursive(lkdtm_debugfs_root);
+
+	/* Handle test-specific clean-up. */
+	lkdtm_usercopy_exit();
+
+	if (lkdtm_kprobe != NULL)
+		unregister_kprobe(lkdtm_kprobe);
+
+	pr_info("Crash point unregistered\n");
+}
+
+module_init(lkdtm_module_init);
+module_exit(lkdtm_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Kernel crash testing module");
diff --git a/drivers/misc/lkdtm/heap.c b/drivers/misc/lkdtm/heap.c
new file mode 100644
index 000000000..65026d7de
--- /dev/null
+++ b/drivers/misc/lkdtm/heap.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests relating directly to heap memory, including
+ * page allocation and slab allocations.
+ */
+#include "lkdtm.h"
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+/*
+ * This tries to stay within the next largest power-of-2 kmalloc cache
+ * to avoid actually overwriting anything important if it's not detected
+ * correctly.
+ */
+void lkdtm_OVERWRITE_ALLOCATION(void)
+{
+	size_t len = 1020;
+	u32 *data = kmalloc(len, GFP_KERNEL);
+	if (!data)
+		return;
+
+	data[1024 / sizeof(u32)] = 0x12345678;
+	kfree(data);
+}
+
+void lkdtm_WRITE_AFTER_FREE(void)
+{
+	int *base, *again;
+	size_t len = 1024;
+	/*
+	 * The slub allocator uses the first word to store the free
+	 * pointer in some configurations. Use the middle of the
+	 * allocation to avoid running into the freelist
+	 */
+	size_t offset = (len / sizeof(*base)) / 2;
+
+	base = kmalloc(len, GFP_KERNEL);
+	if (!base)
+		return;
+	pr_info("Allocated memory %p-%p\n", base, &base[offset * 2]);
+	pr_info("Attempting bad write to freed memory at %p\n",
+		&base[offset]);
+	kfree(base);
+	base[offset] = 0x0abcdef0;
+	/* Attempt to notice the overwrite. */
+	again = kmalloc(len, GFP_KERNEL);
+	kfree(again);
+	if (again != base)
+		pr_info("Hmm, didn't get the same memory range.\n");
+}
+
+void lkdtm_READ_AFTER_FREE(void)
+{
+	int *base, *val, saw;
+	size_t len = 1024;
+	/*
+	 * The slub allocator uses the first word to store the free
+	 * pointer in some configurations. Use the middle of the
+	 * allocation to avoid running into the freelist
+	 */
+	size_t offset = (len / sizeof(*base)) / 2;
+
+	base = kmalloc(len, GFP_KERNEL);
+	if (!base) {
+		pr_info("Unable to allocate base memory.\n");
+		return;
+	}
+
+	val = kmalloc(len, GFP_KERNEL);
+	if (!val) {
+		pr_info("Unable to allocate val memory.\n");
+		kfree(base);
+		return;
+	}
+
+	*val = 0x12345678;
+	base[offset] = *val;
+	pr_info("Value in memory before free: %x\n", base[offset]);
+
+	kfree(base);
+
+	pr_info("Attempting bad read from freed memory\n");
+	saw = base[offset];
+	if (saw != *val) {
+		/* Good! Poisoning happened, so declare a win. */
+		pr_info("Memory correctly poisoned (%x)\n", saw);
+		BUG();
+	}
+	pr_info("Memory was not poisoned\n");
+
+	kfree(val);
+}
+
+void lkdtm_WRITE_BUDDY_AFTER_FREE(void)
+{
+	unsigned long p = __get_free_page(GFP_KERNEL);
+	if (!p) {
+		pr_info("Unable to allocate free page\n");
+		return;
+	}
+
+	pr_info("Writing to the buddy page before free\n");
+	memset((void *)p, 0x3, PAGE_SIZE);
+	free_page(p);
+	schedule();
+	pr_info("Attempting bad write to the buddy page after free\n");
+	memset((void *)p, 0x78, PAGE_SIZE);
+	/* Attempt to notice the overwrite. */
+	p = __get_free_page(GFP_KERNEL);
+	free_page(p);
+	schedule();
+}
+
+void lkdtm_READ_BUDDY_AFTER_FREE(void)
+{
+	unsigned long p = __get_free_page(GFP_KERNEL);
+	int saw, *val;
+	int *base;
+
+	if (!p) {
+		pr_info("Unable to allocate free page\n");
+		return;
+	}
+
+	val = kmalloc(1024, GFP_KERNEL);
+	if (!val) {
+		pr_info("Unable to allocate val memory.\n");
+		free_page(p);
+		return;
+	}
+
+	base = (int *)p;
+
+	*val = 0x12345678;
+	base[0] = *val;
+	pr_info("Value in memory before free: %x\n", base[0]);
+	free_page(p);
+	pr_info("Attempting to read from freed memory\n");
+	saw = base[0];
+	if (saw != *val) {
+		/* Good! Poisoning happened, so declare a win. */
+		pr_info("Memory correctly poisoned (%x)\n", saw);
+		BUG();
+	}
+	pr_info("Buddy page was not poisoned\n");
+
+	kfree(val);
+}
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
new file mode 100644
index 000000000..8c3f2e6af
--- /dev/null
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LKDTM_H
+#define __LKDTM_H
+
+#define pr_fmt(fmt) "lkdtm: " fmt
+
+#include <linux/kernel.h>
+
+/* lkdtm_bugs.c */
+void __init lkdtm_bugs_init(int *recur_param);
+void lkdtm_PANIC(void);
+void lkdtm_BUG(void);
+void lkdtm_WARNING(void);
+void lkdtm_EXCEPTION(void);
+void lkdtm_LOOP(void);
+void lkdtm_OVERFLOW(void);
+void lkdtm_CORRUPT_STACK(void);
+void lkdtm_CORRUPT_STACK_STRONG(void);
+void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void);
+void lkdtm_SOFTLOCKUP(void);
+void lkdtm_HARDLOCKUP(void);
+void lkdtm_SPINLOCKUP(void);
+void lkdtm_HUNG_TASK(void);
+void lkdtm_CORRUPT_LIST_ADD(void);
+void lkdtm_CORRUPT_LIST_DEL(void);
+void lkdtm_CORRUPT_USER_DS(void);
+void lkdtm_STACK_GUARD_PAGE_LEADING(void);
+void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
+
+/* lkdtm_heap.c */
+void lkdtm_OVERWRITE_ALLOCATION(void);
+void lkdtm_WRITE_AFTER_FREE(void);
+void lkdtm_READ_AFTER_FREE(void);
+void lkdtm_WRITE_BUDDY_AFTER_FREE(void);
+void lkdtm_READ_BUDDY_AFTER_FREE(void);
+
+/* lkdtm_perms.c */
+void __init lkdtm_perms_init(void);
+void lkdtm_WRITE_RO(void);
+void lkdtm_WRITE_RO_AFTER_INIT(void);
+void lkdtm_WRITE_KERN(void);
+void lkdtm_EXEC_DATA(void);
+void lkdtm_EXEC_STACK(void);
+void lkdtm_EXEC_KMALLOC(void);
+void lkdtm_EXEC_VMALLOC(void);
+void lkdtm_EXEC_RODATA(void);
+void lkdtm_EXEC_USERSPACE(void);
+void lkdtm_EXEC_NULL(void);
+void lkdtm_ACCESS_USERSPACE(void);
+void lkdtm_ACCESS_NULL(void);
+
+/* lkdtm_refcount.c */
+void lkdtm_REFCOUNT_INC_OVERFLOW(void);
+void lkdtm_REFCOUNT_ADD_OVERFLOW(void);
+void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void);
+void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void);
+void lkdtm_REFCOUNT_DEC_ZERO(void);
+void lkdtm_REFCOUNT_DEC_NEGATIVE(void);
+void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void);
+void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void);
+void lkdtm_REFCOUNT_INC_ZERO(void);
+void lkdtm_REFCOUNT_ADD_ZERO(void);
+void lkdtm_REFCOUNT_INC_SATURATED(void);
+void lkdtm_REFCOUNT_DEC_SATURATED(void);
+void lkdtm_REFCOUNT_ADD_SATURATED(void);
+void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void);
+void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void);
+void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void);
+void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void);
+void lkdtm_REFCOUNT_TIMING(void);
+void lkdtm_ATOMIC_TIMING(void);
+
+/* lkdtm_rodata.c */
+void lkdtm_rodata_do_nothing(void);
+
+/* lkdtm_usercopy.c */
+void __init lkdtm_usercopy_init(void);
+void __exit lkdtm_usercopy_exit(void);
+void lkdtm_USERCOPY_HEAP_SIZE_TO(void);
+void lkdtm_USERCOPY_HEAP_SIZE_FROM(void);
+void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void);
+void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void);
+void lkdtm_USERCOPY_STACK_FRAME_TO(void);
+void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
+void lkdtm_USERCOPY_STACK_BEYOND(void);
+void lkdtm_USERCOPY_KERNEL(void);
+
+#endif
diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
new file mode 100644
index 000000000..62f76d506
--- /dev/null
+++ b/drivers/misc/lkdtm/perms.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests related to validating kernel memory
+ * permissions: non-executable regions, non-writable regions, and
+ * even non-readable regions.
+ */
+#include "lkdtm.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+/* Whether or not to fill the target memory area with do_nothing(). */
+#define CODE_WRITE	true
+#define CODE_AS_IS	false
+
+/* How many bytes to copy to be sure we've copied enough of do_nothing(). */
+#define EXEC_SIZE 64
+
+/* This is non-const, so it will end up in the .data section. */
+static u8 data_area[EXEC_SIZE];
+
+/* This is cost, so it will end up in the .rodata section. */
+static const unsigned long rodata = 0xAA55AA55;
+
+/* This is marked __ro_after_init, so it should ultimately be .rodata. */
+static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
+
+/*
+ * This just returns to the caller. It is designed to be copied into
+ * non-executable memory regions.
+ */
+static void do_nothing(void)
+{
+	return;
+}
+
+/* Must immediately follow do_nothing for size calculuations to work out. */
+static void do_overwritten(void)
+{
+	pr_info("do_overwritten wasn't overwritten!\n");
+	return;
+}
+
+static noinline void execute_location(void *dst, bool write)
+{
+	void (*func)(void) = dst;
+
+	pr_info("attempting ok execution at %px\n", do_nothing);
+	do_nothing();
+
+	if (write == CODE_WRITE) {
+		memcpy(dst, do_nothing, EXEC_SIZE);
+		flush_icache_range((unsigned long)dst,
+				   (unsigned long)dst + EXEC_SIZE);
+	}
+	pr_info("attempting bad execution at %px\n", func);
+	func();
+}
+
+static void execute_user_location(void *dst)
+{
+	int copied;
+
+	/* Intentionally crossing kernel/user memory boundary. */
+	void (*func)(void) = dst;
+
+	pr_info("attempting ok execution at %px\n", do_nothing);
+	do_nothing();
+
+	copied = access_process_vm(current, (unsigned long)dst, do_nothing,
+				   EXEC_SIZE, FOLL_WRITE);
+	if (copied < EXEC_SIZE)
+		return;
+	pr_info("attempting bad execution at %px\n", func);
+	func();
+}
+
+void lkdtm_WRITE_RO(void)
+{
+	/* Explicitly cast away "const" for the test. */
+	unsigned long *ptr = (unsigned long *)&rodata;
+
+	pr_info("attempting bad rodata write at %px\n", ptr);
+	*ptr ^= 0xabcd1234;
+}
+
+void lkdtm_WRITE_RO_AFTER_INIT(void)
+{
+	unsigned long *ptr = &ro_after_init;
+
+	/*
+	 * Verify we were written to during init. Since an Oops
+	 * is considered a "success", a failure is to just skip the
+	 * real test.
+	 */
+	if ((*ptr & 0xAA) != 0xAA) {
+		pr_info("%p was NOT written during init!?\n", ptr);
+		return;
+	}
+
+	pr_info("attempting bad ro_after_init write at %px\n", ptr);
+	*ptr ^= 0xabcd1234;
+}
+
+void lkdtm_WRITE_KERN(void)
+{
+	size_t size;
+	unsigned char *ptr;
+
+	size = (unsigned long)do_overwritten - (unsigned long)do_nothing;
+	ptr = (unsigned char *)do_overwritten;
+
+	pr_info("attempting bad %zu byte write at %px\n", size, ptr);
+	memcpy(ptr, (unsigned char *)do_nothing, size);
+	flush_icache_range((unsigned long)ptr, (unsigned long)(ptr + size));
+
+	do_overwritten();
+}
+
+void lkdtm_EXEC_DATA(void)
+{
+	execute_location(data_area, CODE_WRITE);
+}
+
+void lkdtm_EXEC_STACK(void)
+{
+	u8 stack_area[EXEC_SIZE];
+	execute_location(stack_area, CODE_WRITE);
+}
+
+void lkdtm_EXEC_KMALLOC(void)
+{
+	u32 *kmalloc_area = kmalloc(EXEC_SIZE, GFP_KERNEL);
+	execute_location(kmalloc_area, CODE_WRITE);
+	kfree(kmalloc_area);
+}
+
+void lkdtm_EXEC_VMALLOC(void)
+{
+	u32 *vmalloc_area = vmalloc(EXEC_SIZE);
+	execute_location(vmalloc_area, CODE_WRITE);
+	vfree(vmalloc_area);
+}
+
+void lkdtm_EXEC_RODATA(void)
+{
+	execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS);
+}
+
+void lkdtm_EXEC_USERSPACE(void)
+{
+	unsigned long user_addr;
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+	execute_user_location((void *)user_addr);
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+void lkdtm_EXEC_NULL(void)
+{
+	execute_location(NULL, CODE_AS_IS);
+}
+
+void lkdtm_ACCESS_USERSPACE(void)
+{
+	unsigned long user_addr, tmp = 0;
+	unsigned long *ptr;
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+
+	if (copy_to_user((void __user *)user_addr, &tmp, sizeof(tmp))) {
+		pr_warn("copy_to_user failed\n");
+		vm_munmap(user_addr, PAGE_SIZE);
+		return;
+	}
+
+	ptr = (unsigned long *)user_addr;
+
+	pr_info("attempting bad read at %px\n", ptr);
+	tmp = *ptr;
+	tmp += 0xc0dec0de;
+
+	pr_info("attempting bad write at %px\n", ptr);
+	*ptr = tmp;
+
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+void lkdtm_ACCESS_NULL(void)
+{
+	unsigned long tmp;
+	unsigned long *ptr = (unsigned long *)NULL;
+
+	pr_info("attempting bad read at %px\n", ptr);
+	tmp = *ptr;
+	tmp += 0xc0dec0de;
+
+	pr_info("attempting bad write at %px\n", ptr);
+	*ptr = tmp;
+}
+
+void __init lkdtm_perms_init(void)
+{
+	/* Make sure we can write to __ro_after_init values during __init */
+	ro_after_init |= 0xAA;
+
+}
diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c
new file mode 100644
index 000000000..0a146b32d
--- /dev/null
+++ b/drivers/misc/lkdtm/refcount.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests related to refcount bugs (e.g. overflow,
+ * underflow, reaching zero untested, etc).
+ */
+#include "lkdtm.h"
+#include <linux/refcount.h>
+
+#ifdef CONFIG_REFCOUNT_FULL
+#define REFCOUNT_MAX		(UINT_MAX - 1)
+#define REFCOUNT_SATURATED	UINT_MAX
+#else
+#define REFCOUNT_MAX		INT_MAX
+#define REFCOUNT_SATURATED	(INT_MIN / 2)
+#endif
+
+static void overflow_check(refcount_t *ref)
+{
+	switch (refcount_read(ref)) {
+	case REFCOUNT_SATURATED:
+		pr_info("Overflow detected: saturated\n");
+		break;
+	case REFCOUNT_MAX:
+		pr_warn("Overflow detected: unsafely reset to max\n");
+		break;
+	default:
+		pr_err("Fail: refcount wrapped to %d\n", refcount_read(ref));
+	}
+}
+
+/*
+ * A refcount_inc() above the maximum value of the refcount implementation,
+ * should at least saturate, and at most also WARN.
+ */
+void lkdtm_REFCOUNT_INC_OVERFLOW(void)
+{
+	refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX - 1);
+
+	pr_info("attempting good refcount_inc() without overflow\n");
+	refcount_dec(&over);
+	refcount_inc(&over);
+
+	pr_info("attempting bad refcount_inc() overflow\n");
+	refcount_inc(&over);
+	refcount_inc(&over);
+
+	overflow_check(&over);
+}
+
+/* refcount_add() should behave just like refcount_inc() above. */
+void lkdtm_REFCOUNT_ADD_OVERFLOW(void)
+{
+	refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX - 1);
+
+	pr_info("attempting good refcount_add() without overflow\n");
+	refcount_dec(&over);
+	refcount_dec(&over);
+	refcount_dec(&over);
+	refcount_dec(&over);
+	refcount_add(4, &over);
+
+	pr_info("attempting bad refcount_add() overflow\n");
+	refcount_add(4, &over);
+
+	overflow_check(&over);
+}
+
+/* refcount_inc_not_zero() should behave just like refcount_inc() above. */
+void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void)
+{
+	refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX);
+
+	pr_info("attempting bad refcount_inc_not_zero() overflow\n");
+	if (!refcount_inc_not_zero(&over))
+		pr_warn("Weird: refcount_inc_not_zero() reported zero\n");
+
+	overflow_check(&over);
+}
+
+/* refcount_add_not_zero() should behave just like refcount_inc() above. */
+void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void)
+{
+	refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX);
+
+	pr_info("attempting bad refcount_add_not_zero() overflow\n");
+	if (!refcount_add_not_zero(6, &over))
+		pr_warn("Weird: refcount_add_not_zero() reported zero\n");
+
+	overflow_check(&over);
+}
+
+static void check_zero(refcount_t *ref)
+{
+	switch (refcount_read(ref)) {
+	case REFCOUNT_SATURATED:
+		pr_info("Zero detected: saturated\n");
+		break;
+	case REFCOUNT_MAX:
+		pr_warn("Zero detected: unsafely reset to max\n");
+		break;
+	case 0:
+		pr_warn("Still at zero: refcount_inc/add() must not inc-from-0\n");
+		break;
+	default:
+		pr_err("Fail: refcount went crazy: %d\n", refcount_read(ref));
+	}
+}
+
+/*
+ * A refcount_dec(), as opposed to a refcount_dec_and_test(), when it hits
+ * zero it should either saturate (when inc-from-zero isn't protected)
+ * or stay at zero (when inc-from-zero is protected) and should WARN for both.
+ */
+void lkdtm_REFCOUNT_DEC_ZERO(void)
+{
+	refcount_t zero = REFCOUNT_INIT(2);
+
+	pr_info("attempting good refcount_dec()\n");
+	refcount_dec(&zero);
+
+	pr_info("attempting bad refcount_dec() to zero\n");
+	refcount_dec(&zero);
+
+	check_zero(&zero);
+}
+
+static void check_negative(refcount_t *ref, int start)
+{
+	/*
+	 * CONFIG_REFCOUNT_FULL refuses to move a refcount at all on an
+	 * over-sub, so we have to track our starting position instead of
+	 * looking only at zero-pinning.
+	 */
+	if (refcount_read(ref) == start) {
+		pr_warn("Still at %d: refcount_inc/add() must not inc-from-0\n",
+			start);
+		return;
+	}
+
+	switch (refcount_read(ref)) {
+	case REFCOUNT_SATURATED:
+		pr_info("Negative detected: saturated\n");
+		break;
+	case REFCOUNT_MAX:
+		pr_warn("Negative detected: unsafely reset to max\n");
+		break;
+	default:
+		pr_err("Fail: refcount went crazy: %d\n", refcount_read(ref));
+	}
+}
+
+/* A refcount_dec() going negative should saturate and may WARN. */
+void lkdtm_REFCOUNT_DEC_NEGATIVE(void)
+{
+	refcount_t neg = REFCOUNT_INIT(0);
+
+	pr_info("attempting bad refcount_dec() below zero\n");
+	refcount_dec(&neg);
+
+	check_negative(&neg, 0);
+}
+
+/*
+ * A refcount_dec_and_test() should act like refcount_dec() above when
+ * going negative.
+ */
+void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void)
+{
+	refcount_t neg = REFCOUNT_INIT(0);
+
+	pr_info("attempting bad refcount_dec_and_test() below zero\n");
+	if (refcount_dec_and_test(&neg))
+		pr_warn("Weird: refcount_dec_and_test() reported zero\n");
+
+	check_negative(&neg, 0);
+}
+
+/*
+ * A refcount_sub_and_test() should act like refcount_dec_and_test()
+ * above when going negative.
+ */
+void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void)
+{
+	refcount_t neg = REFCOUNT_INIT(3);
+
+	pr_info("attempting bad refcount_sub_and_test() below zero\n");
+	if (refcount_sub_and_test(5, &neg))
+		pr_warn("Weird: refcount_sub_and_test() reported zero\n");
+
+	check_negative(&neg, 3);
+}
+
+static void check_from_zero(refcount_t *ref)
+{
+	switch (refcount_read(ref)) {
+	case 0:
+		pr_info("Zero detected: stayed at zero\n");
+		break;
+	case REFCOUNT_SATURATED:
+		pr_info("Zero detected: saturated\n");
+		break;
+	case REFCOUNT_MAX:
+		pr_warn("Zero detected: unsafely reset to max\n");
+		break;
+	default:
+		pr_info("Fail: zero not detected, incremented to %d\n",
+			refcount_read(ref));
+	}
+}
+
+/*
+ * A refcount_inc() from zero should pin to zero or saturate and may WARN.
+ * Only CONFIG_REFCOUNT_FULL provides this protection currently.
+ */
+void lkdtm_REFCOUNT_INC_ZERO(void)
+{
+	refcount_t zero = REFCOUNT_INIT(0);
+
+	pr_info("attempting safe refcount_inc_not_zero() from zero\n");
+	if (!refcount_inc_not_zero(&zero)) {
+		pr_info("Good: zero detected\n");
+		if (refcount_read(&zero) == 0)
+			pr_info("Correctly stayed at zero\n");
+		else
+			pr_err("Fail: refcount went past zero!\n");
+	} else {
+		pr_err("Fail: Zero not detected!?\n");
+	}
+
+	pr_info("attempting bad refcount_inc() from zero\n");
+	refcount_inc(&zero);
+
+	check_from_zero(&zero);
+}
+
+/*
+ * A refcount_add() should act like refcount_inc() above when starting
+ * at zero.
+ */
+void lkdtm_REFCOUNT_ADD_ZERO(void)
+{
+	refcount_t zero = REFCOUNT_INIT(0);
+
+	pr_info("attempting safe refcount_add_not_zero() from zero\n");
+	if (!refcount_add_not_zero(3, &zero)) {
+		pr_info("Good: zero detected\n");
+		if (refcount_read(&zero) == 0)
+			pr_info("Correctly stayed at zero\n");
+		else
+			pr_err("Fail: refcount went past zero\n");
+	} else {
+		pr_err("Fail: Zero not detected!?\n");
+	}
+
+	pr_info("attempting bad refcount_add() from zero\n");
+	refcount_add(3, &zero);
+
+	check_from_zero(&zero);
+}
+
+static void check_saturated(refcount_t *ref)
+{
+	switch (refcount_read(ref)) {
+	case REFCOUNT_SATURATED:
+		pr_info("Saturation detected: still saturated\n");
+		break;
+	case REFCOUNT_MAX:
+		pr_warn("Saturation detected: unsafely reset to max\n");
+		break;
+	default:
+		pr_err("Fail: refcount went crazy: %d\n", refcount_read(ref));
+	}
+}
+
+/*
+ * A refcount_inc() from a saturated value should at most warn about
+ * being saturated already.
+ */
+void lkdtm_REFCOUNT_INC_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_inc() from saturated\n");
+	refcount_inc(&sat);
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+void lkdtm_REFCOUNT_DEC_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_dec() from saturated\n");
+	refcount_dec(&sat);
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+void lkdtm_REFCOUNT_ADD_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_dec() from saturated\n");
+	refcount_add(8, &sat);
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_inc_not_zero() from saturated\n");
+	if (!refcount_inc_not_zero(&sat))
+		pr_warn("Weird: refcount_inc_not_zero() reported zero\n");
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_add_not_zero() from saturated\n");
+	if (!refcount_add_not_zero(7, &sat))
+		pr_warn("Weird: refcount_add_not_zero() reported zero\n");
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_dec_and_test() from saturated\n");
+	if (refcount_dec_and_test(&sat))
+		pr_warn("Weird: refcount_dec_and_test() reported zero\n");
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_sub_and_test() from saturated\n");
+	if (refcount_sub_and_test(8, &sat))
+		pr_warn("Weird: refcount_sub_and_test() reported zero\n");
+
+	check_saturated(&sat);
+}
+
+/* Used to time the existing atomic_t when used for reference counting */
+void lkdtm_ATOMIC_TIMING(void)
+{
+	unsigned int i;
+	atomic_t count = ATOMIC_INIT(1);
+
+	for (i = 0; i < INT_MAX - 1; i++)
+		atomic_inc(&count);
+
+	for (i = INT_MAX; i > 0; i--)
+		if (atomic_dec_and_test(&count))
+			break;
+
+	if (i != 1)
+		pr_err("atomic timing: out of sync up/down cycle: %u\n", i - 1);
+	else
+		pr_info("atomic timing: done\n");
+}
+
+/*
+ * This can be compared to ATOMIC_TIMING when implementing fast refcount
+ * protections. Looking at the number of CPU cycles tells the real story
+ * about performance. For example:
+ *    cd /sys/kernel/debug/provoke-crash
+ *    perf stat -B -- cat <(echo REFCOUNT_TIMING) > DIRECT
+ */
+void lkdtm_REFCOUNT_TIMING(void)
+{
+	unsigned int i;
+	refcount_t count = REFCOUNT_INIT(1);
+
+	for (i = 0; i < INT_MAX - 1; i++)
+		refcount_inc(&count);
+
+	for (i = INT_MAX; i > 0; i--)
+		if (refcount_dec_and_test(&count))
+			break;
+
+	if (i != 1)
+		pr_err("refcount: out of sync up/down cycle: %u\n", i - 1);
+	else
+		pr_info("refcount timing: done\n");
+}
diff --git a/drivers/misc/lkdtm/rodata.c b/drivers/misc/lkdtm/rodata.c
new file mode 100644
index 000000000..baacb876d
--- /dev/null
+++ b/drivers/misc/lkdtm/rodata.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This includes functions that are meant to live entirely in .rodata
+ * (via objcopy tricks), to validate the non-executability of .rodata.
+ */
+#include "lkdtm.h"
+
+void noinstr lkdtm_rodata_do_nothing(void)
+{
+	/* Does nothing. We just want an architecture agnostic "return". */
+}
diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c
new file mode 100644
index 000000000..b0e020372
--- /dev/null
+++ b/drivers/misc/lkdtm/usercopy.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests related to copy_to_user() and copy_from_user()
+ * hardening.
+ */
+#include "lkdtm.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mman.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Many of the tests here end up using const sizes, but those would
+ * normally be ignored by hardened usercopy, so force the compiler
+ * into choosing the non-const path to make sure we trigger the
+ * hardened usercopy checks by added "unconst" to all the const copies,
+ * and making sure "cache_size" isn't optimized into a const.
+ */
+static volatile size_t unconst = 0;
+static volatile size_t cache_size = 1024;
+static struct kmem_cache *whitelist_cache;
+
+static const unsigned char test_text[] = "This is a test.\n";
+
+/*
+ * Instead of adding -Wno-return-local-addr, just pass the stack address
+ * through a function to obfuscate it from the compiler.
+ */
+static noinline unsigned char *trick_compiler(unsigned char *stack)
+{
+	return stack + unconst;
+}
+
+static noinline unsigned char *do_usercopy_stack_callee(int value)
+{
+	unsigned char buf[128];
+	int i;
+
+	/* Exercise stack to avoid everything living in registers. */
+	for (i = 0; i < sizeof(buf); i++) {
+		buf[i] = value & 0xff;
+	}
+
+	/*
+	 * Put the target buffer in the middle of stack allocation
+	 * so that we don't step on future stack users regardless
+	 * of stack growth direction.
+	 */
+	return trick_compiler(&buf[(128/2)-32]);
+}
+
+static noinline void do_usercopy_stack(bool to_user, bool bad_frame)
+{
+	unsigned long user_addr;
+	unsigned char good_stack[32];
+	unsigned char *bad_stack;
+	int i;
+
+	/* Exercise stack to avoid everything living in registers. */
+	for (i = 0; i < sizeof(good_stack); i++)
+		good_stack[i] = test_text[i % sizeof(test_text)];
+
+	/* This is a pointer to outside our current stack frame. */
+	if (bad_frame) {
+		bad_stack = do_usercopy_stack_callee((uintptr_t)&bad_stack);
+	} else {
+		/* Put start address just inside stack. */
+		bad_stack = task_stack_page(current) + THREAD_SIZE;
+		bad_stack -= sizeof(unsigned long);
+	}
+
+#ifdef ARCH_HAS_CURRENT_STACK_POINTER
+	pr_info("stack     : %px\n", (void *)current_stack_pointer);
+#endif
+	pr_info("good_stack: %px-%px\n", good_stack, good_stack + sizeof(good_stack));
+	pr_info("bad_stack : %px-%px\n", bad_stack, bad_stack + sizeof(good_stack));
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+
+	if (to_user) {
+		pr_info("attempting good copy_to_user of local stack\n");
+		if (copy_to_user((void __user *)user_addr, good_stack,
+				 unconst + sizeof(good_stack))) {
+			pr_warn("copy_to_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_to_user of distant stack\n");
+		if (copy_to_user((void __user *)user_addr, bad_stack,
+				 unconst + sizeof(good_stack))) {
+			pr_warn("copy_to_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	} else {
+		/*
+		 * There isn't a safe way to not be protected by usercopy
+		 * if we're going to write to another thread's stack.
+		 */
+		if (!bad_frame)
+			goto free_user;
+
+		pr_info("attempting good copy_from_user of local stack\n");
+		if (copy_from_user(good_stack, (void __user *)user_addr,
+				   unconst + sizeof(good_stack))) {
+			pr_warn("copy_from_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_from_user of distant stack\n");
+		if (copy_from_user(bad_stack, (void __user *)user_addr,
+				   unconst + sizeof(good_stack))) {
+			pr_warn("copy_from_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	}
+
+free_user:
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+/*
+ * This checks for whole-object size validation with hardened usercopy,
+ * with or without usercopy whitelisting.
+ */
+static void do_usercopy_heap_size(bool to_user)
+{
+	unsigned long user_addr;
+	unsigned char *one, *two;
+	void __user *test_user_addr;
+	void *test_kern_addr;
+	size_t size = unconst + 1024;
+
+	one = kmalloc(size, GFP_KERNEL);
+	two = kmalloc(size, GFP_KERNEL);
+	if (!one || !two) {
+		pr_warn("Failed to allocate kernel memory\n");
+		goto free_kernel;
+	}
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		goto free_kernel;
+	}
+
+	memset(one, 'A', size);
+	memset(two, 'B', size);
+
+	test_user_addr = (void __user *)(user_addr + 16);
+	test_kern_addr = one + 16;
+
+	if (to_user) {
+		pr_info("attempting good copy_to_user of correct size\n");
+		if (copy_to_user(test_user_addr, test_kern_addr, size / 2)) {
+			pr_warn("copy_to_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_to_user of too large size\n");
+		if (copy_to_user(test_user_addr, test_kern_addr, size)) {
+			pr_warn("copy_to_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	} else {
+		pr_info("attempting good copy_from_user of correct size\n");
+		if (copy_from_user(test_kern_addr, test_user_addr, size / 2)) {
+			pr_warn("copy_from_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_from_user of too large size\n");
+		if (copy_from_user(test_kern_addr, test_user_addr, size)) {
+			pr_warn("copy_from_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	}
+
+free_user:
+	vm_munmap(user_addr, PAGE_SIZE);
+free_kernel:
+	kfree(one);
+	kfree(two);
+}
+
+/*
+ * This checks for the specific whitelist window within an object. If this
+ * test passes, then do_usercopy_heap_size() tests will pass too.
+ */
+static void do_usercopy_heap_whitelist(bool to_user)
+{
+	unsigned long user_alloc;
+	unsigned char *buf = NULL;
+	unsigned char __user *user_addr;
+	size_t offset, size;
+
+	/* Make sure cache was prepared. */
+	if (!whitelist_cache) {
+		pr_warn("Failed to allocate kernel cache\n");
+		return;
+	}
+
+	/*
+	 * Allocate a buffer with a whitelisted window in the buffer.
+	 */
+	buf = kmem_cache_alloc(whitelist_cache, GFP_KERNEL);
+	if (!buf) {
+		pr_warn("Failed to allocate buffer from whitelist cache\n");
+		goto free_alloc;
+	}
+
+	/* Allocate user memory we'll poke at. */
+	user_alloc = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_alloc >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		goto free_alloc;
+	}
+	user_addr = (void __user *)user_alloc;
+
+	memset(buf, 'B', cache_size);
+
+	/* Whitelisted window in buffer, from kmem_cache_create_usercopy. */
+	offset = (cache_size / 4) + unconst;
+	size = (cache_size / 16) + unconst;
+
+	if (to_user) {
+		pr_info("attempting good copy_to_user inside whitelist\n");
+		if (copy_to_user(user_addr, buf + offset, size)) {
+			pr_warn("copy_to_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_to_user outside whitelist\n");
+		if (copy_to_user(user_addr, buf + offset - 1, size)) {
+			pr_warn("copy_to_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	} else {
+		pr_info("attempting good copy_from_user inside whitelist\n");
+		if (copy_from_user(buf + offset, user_addr, size)) {
+			pr_warn("copy_from_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_from_user outside whitelist\n");
+		if (copy_from_user(buf + offset - 1, user_addr, size)) {
+			pr_warn("copy_from_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	}
+
+free_user:
+	vm_munmap(user_alloc, PAGE_SIZE);
+free_alloc:
+	if (buf)
+		kmem_cache_free(whitelist_cache, buf);
+}
+
+/* Callable tests. */
+void lkdtm_USERCOPY_HEAP_SIZE_TO(void)
+{
+	do_usercopy_heap_size(true);
+}
+
+void lkdtm_USERCOPY_HEAP_SIZE_FROM(void)
+{
+	do_usercopy_heap_size(false);
+}
+
+void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void)
+{
+	do_usercopy_heap_whitelist(true);
+}
+
+void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void)
+{
+	do_usercopy_heap_whitelist(false);
+}
+
+void lkdtm_USERCOPY_STACK_FRAME_TO(void)
+{
+	do_usercopy_stack(true, true);
+}
+
+void lkdtm_USERCOPY_STACK_FRAME_FROM(void)
+{
+	do_usercopy_stack(false, true);
+}
+
+void lkdtm_USERCOPY_STACK_BEYOND(void)
+{
+	do_usercopy_stack(true, false);
+}
+
+void lkdtm_USERCOPY_KERNEL(void)
+{
+	unsigned long user_addr;
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+
+	pr_info("attempting good copy_to_user from kernel rodata\n");
+	if (copy_to_user((void __user *)user_addr, test_text,
+			 unconst + sizeof(test_text))) {
+		pr_warn("copy_to_user failed unexpectedly?!\n");
+		goto free_user;
+	}
+
+	pr_info("attempting bad copy_to_user from kernel text\n");
+	if (copy_to_user((void __user *)user_addr, vm_mmap,
+			 unconst + PAGE_SIZE)) {
+		pr_warn("copy_to_user failed, but lacked Oops\n");
+		goto free_user;
+	}
+
+free_user:
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+void __init lkdtm_usercopy_init(void)
+{
+	/* Prepare cache that lacks SLAB_USERCOPY flag. */
+	whitelist_cache =
+		kmem_cache_create_usercopy("lkdtm-usercopy", cache_size,
+					   0, 0,
+					   cache_size / 4,
+					   cache_size / 16,
+					   NULL);
+}
+
+void __exit lkdtm_usercopy_exit(void)
+{
+	kmem_cache_destroy(whitelist_cache);
+}
diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig
new file mode 100644
index 000000000..c49e1d226
--- /dev/null
+++ b/drivers/misc/mei/Kconfig
@@ -0,0 +1,45 @@
+config INTEL_MEI
+	tristate "Intel Management Engine Interface"
+	depends on X86 && PCI
+	help
+	  The Intel Management Engine (Intel ME) provides Manageability,
+	  Security and Media services for system containing Intel chipsets.
+	  if selected /dev/mei misc device will be created.
+
+	  For more information see
+	  <http://software.intel.com/en-us/manageability/>
+
+config INTEL_MEI_ME
+	tristate "ME Enabled Intel Chipsets"
+	select INTEL_MEI
+	depends on X86 && PCI
+	help
+	  MEI support for ME Enabled Intel chipsets.
+
+	  Supported Chipsets are:
+	  7 Series Chipset Family
+	  6 Series Chipset Family
+	  5 Series Chipset Family
+	  4 Series Chipset Family
+	  Mobile 4 Series Chipset Family
+	  ICH9
+	  82946GZ/GL
+	  82G35 Express
+	  82Q963/Q965
+	  82P965/G965
+	  Mobile PM965/GM965
+	  Mobile GME965/GLE960
+	  82Q35 Express
+	  82G33/G31/P35/P31 Express
+	  82Q33 Express
+	  82X38/X48 Express
+
+config INTEL_MEI_TXE
+	tristate "Intel Trusted Execution Environment with ME Interface"
+	select INTEL_MEI
+	depends on X86 && PCI
+	help
+	  MEI Support for Trusted Execution Environment device on Intel SoCs
+
+	  Supported SoCs:
+	  Intel Bay Trail
diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile
new file mode 100644
index 000000000..cd6825afa
--- /dev/null
+++ b/drivers/misc/mei/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - Intel Management Engine Interface (Intel MEI) Linux driver
+# Copyright (c) 2010-2014, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MEI) += mei.o
+mei-objs := init.o
+mei-objs += hbm.o
+mei-objs += interrupt.o
+mei-objs += client.o
+mei-objs += main.o
+mei-objs += bus.o
+mei-objs += bus-fixup.o
+mei-$(CONFIG_DEBUG_FS) += debugfs.o
+
+obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o
+mei-me-objs := pci-me.o
+mei-me-objs += hw-me.o
+
+obj-$(CONFIG_INTEL_MEI_TXE) += mei-txe.o
+mei-txe-objs := pci-txe.o
+mei-txe-objs += hw-txe.o
+
+mei-$(CONFIG_EVENT_TRACING) += mei-trace.o
+CFLAGS_mei-trace.o = -I$(src)
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
new file mode 100644
index 000000000..198e030e5
--- /dev/null
+++ b/drivers/misc/mei/bus-fixup.c
@@ -0,0 +1,505 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2018, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+
+#include <linux/mei_cl_bus.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+#define MEI_UUID_NFC_INFO UUID_LE(0xd2de1625, 0x382d, 0x417d, \
+			0x48, 0xa4, 0xef, 0xab, 0xba, 0x8a, 0x12, 0x06)
+
+static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO;
+
+#define MEI_UUID_NFC_HCI UUID_LE(0x0bb17a78, 0x2a8e, 0x4c50, \
+			0x94, 0xd4, 0x50, 0x26, 0x67, 0x23, 0x77, 0x5c)
+
+#define MEI_UUID_WD UUID_LE(0x05B79A6F, 0x4628, 0x4D7F, \
+			    0x89, 0x9D, 0xA9, 0x15, 0x14, 0xCB, 0x32, 0xAB)
+
+#define MEI_UUID_MKHIF_FIX UUID_LE(0x55213584, 0x9a29, 0x4916, \
+			0xba, 0xdf, 0xf, 0xb7, 0xed, 0x68, 0x2a, 0xeb)
+
+#define MEI_UUID_ANY NULL_UUID_LE
+
+/**
+ * number_of_connections - determine whether an client be on the bus
+ *    according number of connections
+ *    We support only clients:
+ *       1. with single connection
+ *       2. and fixed clients (max_number_of_connections == 0)
+ *
+ * @cldev: me clients device
+ */
+static void number_of_connections(struct mei_cl_device *cldev)
+{
+	dev_dbg(&cldev->dev, "running hook %s\n", __func__);
+
+	if (cldev->me_cl->props.max_number_of_connections > 1)
+		cldev->do_match = 0;
+}
+
+/**
+ * blacklist - blacklist a client from the bus
+ *
+ * @cldev: me clients device
+ */
+static void blacklist(struct mei_cl_device *cldev)
+{
+	dev_dbg(&cldev->dev, "running hook %s\n", __func__);
+
+	cldev->do_match = 0;
+}
+
+#define OSTYPE_LINUX    2
+struct mei_os_ver {
+	__le16 build;
+	__le16 reserved1;
+	u8  os_type;
+	u8  major;
+	u8  minor;
+	u8  reserved2;
+} __packed;
+
+#define MKHI_FEATURE_PTT 0x10
+
+struct mkhi_rule_id {
+	__le16 rule_type;
+	u8 feature_id;
+	u8 reserved;
+} __packed;
+
+struct mkhi_fwcaps {
+	struct mkhi_rule_id id;
+	u8 len;
+	u8 data[0];
+} __packed;
+
+struct mkhi_fw_ver_block {
+	u16 minor;
+	u8 major;
+	u8 platform;
+	u16 buildno;
+	u16 hotfix;
+} __packed;
+
+struct mkhi_fw_ver {
+	struct mkhi_fw_ver_block ver[MEI_MAX_FW_VER_BLOCKS];
+} __packed;
+
+#define MKHI_FWCAPS_GROUP_ID 0x3
+#define MKHI_FWCAPS_SET_OS_VER_APP_RULE_CMD 6
+#define MKHI_GEN_GROUP_ID 0xFF
+#define MKHI_GEN_GET_FW_VERSION_CMD 0x2
+struct mkhi_msg_hdr {
+	u8  group_id;
+	u8  command;
+	u8  reserved;
+	u8  result;
+} __packed;
+
+struct mkhi_msg {
+	struct mkhi_msg_hdr hdr;
+	u8 data[0];
+} __packed;
+
+#define MKHI_OSVER_BUF_LEN (sizeof(struct mkhi_msg_hdr) + \
+			    sizeof(struct mkhi_fwcaps) + \
+			    sizeof(struct mei_os_ver))
+static int mei_osver(struct mei_cl_device *cldev)
+{
+	const size_t size = MKHI_OSVER_BUF_LEN;
+	char buf[MKHI_OSVER_BUF_LEN];
+	struct mkhi_msg *req;
+	struct mkhi_fwcaps *fwcaps;
+	struct mei_os_ver *os_ver;
+	unsigned int mode = MEI_CL_IO_TX_BLOCKING | MEI_CL_IO_TX_INTERNAL;
+
+	memset(buf, 0, size);
+
+	req = (struct mkhi_msg *)buf;
+	req->hdr.group_id = MKHI_FWCAPS_GROUP_ID;
+	req->hdr.command = MKHI_FWCAPS_SET_OS_VER_APP_RULE_CMD;
+
+	fwcaps = (struct mkhi_fwcaps *)req->data;
+
+	fwcaps->id.rule_type = 0x0;
+	fwcaps->id.feature_id = MKHI_FEATURE_PTT;
+	fwcaps->len = sizeof(*os_ver);
+	os_ver = (struct mei_os_ver *)fwcaps->data;
+	os_ver->os_type = OSTYPE_LINUX;
+
+	return __mei_cl_send(cldev->cl, buf, size, mode);
+}
+
+#define MKHI_FWVER_BUF_LEN (sizeof(struct mkhi_msg_hdr) + \
+			    sizeof(struct mkhi_fw_ver))
+#define MKHI_FWVER_LEN(__num) (sizeof(struct mkhi_msg_hdr) + \
+			       sizeof(struct mkhi_fw_ver_block) * (__num))
+#define MKHI_RCV_TIMEOUT 500 /* receive timeout in msec */
+static int mei_fwver(struct mei_cl_device *cldev)
+{
+	char buf[MKHI_FWVER_BUF_LEN];
+	struct mkhi_msg *req;
+	struct mkhi_fw_ver *fwver;
+	int bytes_recv, ret, i;
+
+	memset(buf, 0, sizeof(buf));
+
+	req = (struct mkhi_msg *)buf;
+	req->hdr.group_id = MKHI_GEN_GROUP_ID;
+	req->hdr.command = MKHI_GEN_GET_FW_VERSION_CMD;
+
+	ret = __mei_cl_send(cldev->cl, buf, sizeof(struct mkhi_msg_hdr),
+			    MEI_CL_IO_TX_BLOCKING);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "Could not send ReqFWVersion cmd\n");
+		return ret;
+	}
+
+	ret = 0;
+	bytes_recv = __mei_cl_recv(cldev->cl, buf, sizeof(buf), 0,
+				   MKHI_RCV_TIMEOUT);
+	if (bytes_recv < 0 || (size_t)bytes_recv < MKHI_FWVER_LEN(1)) {
+		/*
+		 * Should be at least one version block,
+		 * error out if nothing found
+		 */
+		dev_err(&cldev->dev, "Could not read FW version\n");
+		return -EIO;
+	}
+
+	fwver = (struct mkhi_fw_ver *)req->data;
+	memset(cldev->bus->fw_ver, 0, sizeof(cldev->bus->fw_ver));
+	for (i = 0; i < MEI_MAX_FW_VER_BLOCKS; i++) {
+		if ((size_t)bytes_recv < MKHI_FWVER_LEN(i + 1))
+			break;
+		dev_dbg(&cldev->dev, "FW version%d %d:%d.%d.%d.%d\n",
+			i, fwver->ver[i].platform,
+			fwver->ver[i].major, fwver->ver[i].minor,
+			fwver->ver[i].hotfix, fwver->ver[i].buildno);
+
+		cldev->bus->fw_ver[i].platform = fwver->ver[i].platform;
+		cldev->bus->fw_ver[i].major = fwver->ver[i].major;
+		cldev->bus->fw_ver[i].minor = fwver->ver[i].minor;
+		cldev->bus->fw_ver[i].hotfix = fwver->ver[i].hotfix;
+		cldev->bus->fw_ver[i].buildno = fwver->ver[i].buildno;
+	}
+
+	return ret;
+}
+
+static void mei_mkhi_fix(struct mei_cl_device *cldev)
+{
+	int ret;
+
+	/* No need to enable the client if nothing is needed from it */
+	if (!cldev->bus->fw_f_fw_ver_supported &&
+	    !cldev->bus->hbm_f_os_supported)
+		return;
+
+	ret = mei_cldev_enable(cldev);
+	if (ret)
+		return;
+
+	if (cldev->bus->fw_f_fw_ver_supported) {
+		ret = mei_fwver(cldev);
+		if (ret < 0)
+			dev_err(&cldev->dev, "FW version command failed %d\n",
+				ret);
+	}
+
+	if (cldev->bus->hbm_f_os_supported) {
+		ret = mei_osver(cldev);
+		if (ret < 0)
+			dev_err(&cldev->dev, "OS version command failed %d\n",
+				ret);
+	}
+	mei_cldev_disable(cldev);
+}
+
+/**
+ * mei_wd - wd client on the bus, change protocol version
+ *   as the API has changed.
+ *
+ * @cldev: me clients device
+ */
+#if IS_ENABLED(CONFIG_INTEL_MEI_ME)
+#include <linux/pci.h>
+#include "hw-me-regs.h"
+static void mei_wd(struct mei_cl_device *cldev)
+{
+	struct pci_dev *pdev = to_pci_dev(cldev->dev.parent);
+
+	dev_dbg(&cldev->dev, "running hook %s\n", __func__);
+	if (pdev->device == MEI_DEV_ID_WPT_LP ||
+	    pdev->device == MEI_DEV_ID_SPT ||
+	    pdev->device == MEI_DEV_ID_SPT_H)
+		cldev->me_cl->props.protocol_version = 0x2;
+
+	cldev->do_match = 1;
+}
+#else
+static inline void mei_wd(struct mei_cl_device *cldev) {}
+#endif /* CONFIG_INTEL_MEI_ME */
+
+struct mei_nfc_cmd {
+	u8 command;
+	u8 status;
+	u16 req_id;
+	u32 reserved;
+	u16 data_size;
+	u8 sub_command;
+	u8 data[];
+} __packed;
+
+struct mei_nfc_reply {
+	u8 command;
+	u8 status;
+	u16 req_id;
+	u32 reserved;
+	u16 data_size;
+	u8 sub_command;
+	u8 reply_status;
+	u8 data[];
+} __packed;
+
+struct mei_nfc_if_version {
+	u8 radio_version_sw[3];
+	u8 reserved[3];
+	u8 radio_version_hw[3];
+	u8 i2c_addr;
+	u8 fw_ivn;
+	u8 vendor_id;
+	u8 radio_type;
+} __packed;
+
+
+#define MEI_NFC_CMD_MAINTENANCE 0x00
+#define MEI_NFC_SUBCMD_IF_VERSION 0x01
+
+/* Vendors */
+#define MEI_NFC_VENDOR_INSIDE 0x00
+#define MEI_NFC_VENDOR_NXP    0x01
+
+/* Radio types */
+#define MEI_NFC_VENDOR_INSIDE_UREAD 0x00
+#define MEI_NFC_VENDOR_NXP_PN544    0x01
+
+/**
+ * mei_nfc_if_version - get NFC interface version
+ *
+ * @cl: host client (nfc info)
+ * @ver: NFC interface version to be filled in
+ *
+ * Return: 0 on success; < 0 otherwise
+ */
+static int mei_nfc_if_version(struct mei_cl *cl,
+			      struct mei_nfc_if_version *ver)
+{
+	struct mei_device *bus;
+	struct mei_nfc_cmd cmd = {
+		.command = MEI_NFC_CMD_MAINTENANCE,
+		.data_size = 1,
+		.sub_command = MEI_NFC_SUBCMD_IF_VERSION,
+	};
+	struct mei_nfc_reply *reply = NULL;
+	size_t if_version_length;
+	int bytes_recv, ret;
+
+	bus = cl->dev;
+
+	WARN_ON(mutex_is_locked(&bus->device_lock));
+
+	ret = __mei_cl_send(cl, (u8 *)&cmd, sizeof(struct mei_nfc_cmd),
+			    MEI_CL_IO_TX_BLOCKING);
+	if (ret < 0) {
+		dev_err(bus->dev, "Could not send IF version cmd\n");
+		return ret;
+	}
+
+	/* to be sure on the stack we alloc memory */
+	if_version_length = sizeof(struct mei_nfc_reply) +
+		sizeof(struct mei_nfc_if_version);
+
+	reply = kzalloc(if_version_length, GFP_KERNEL);
+	if (!reply)
+		return -ENOMEM;
+
+	ret = 0;
+	bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length, 0, 0);
+	if (bytes_recv < 0 || (size_t)bytes_recv < if_version_length) {
+		dev_err(bus->dev, "Could not read IF version\n");
+		ret = -EIO;
+		goto err;
+	}
+
+	memcpy(ver, reply->data, sizeof(struct mei_nfc_if_version));
+
+	dev_info(bus->dev, "NFC MEI VERSION: IVN 0x%x Vendor ID 0x%x Type 0x%x\n",
+		ver->fw_ivn, ver->vendor_id, ver->radio_type);
+
+err:
+	kfree(reply);
+	return ret;
+}
+
+/**
+ * mei_nfc_radio_name - derive nfc radio name from the interface version
+ *
+ * @ver: NFC radio version
+ *
+ * Return: radio name string
+ */
+static const char *mei_nfc_radio_name(struct mei_nfc_if_version *ver)
+{
+
+	if (ver->vendor_id == MEI_NFC_VENDOR_INSIDE) {
+		if (ver->radio_type == MEI_NFC_VENDOR_INSIDE_UREAD)
+			return "microread";
+	}
+
+	if (ver->vendor_id == MEI_NFC_VENDOR_NXP) {
+		if (ver->radio_type == MEI_NFC_VENDOR_NXP_PN544)
+			return "pn544";
+	}
+
+	return NULL;
+}
+
+/**
+ * mei_nfc - The nfc fixup function. The function retrieves nfc radio
+ *    name and set is as device attribute so we can load
+ *    the proper device driver for it
+ *
+ * @cldev: me client device (nfc)
+ */
+static void mei_nfc(struct mei_cl_device *cldev)
+{
+	struct mei_device *bus;
+	struct mei_cl *cl;
+	struct mei_me_client *me_cl = NULL;
+	struct mei_nfc_if_version ver;
+	const char *radio_name = NULL;
+	int ret;
+
+	bus = cldev->bus;
+
+	dev_dbg(&cldev->dev, "running hook %s\n", __func__);
+
+	mutex_lock(&bus->device_lock);
+	/* we need to connect to INFO GUID */
+	cl = mei_cl_alloc_linked(bus);
+	if (IS_ERR(cl)) {
+		ret = PTR_ERR(cl);
+		cl = NULL;
+		dev_err(bus->dev, "nfc hook alloc failed %d\n", ret);
+		goto out;
+	}
+
+	me_cl = mei_me_cl_by_uuid(bus, &mei_nfc_info_guid);
+	if (!me_cl) {
+		ret = -ENOTTY;
+		dev_err(bus->dev, "Cannot find nfc info %d\n", ret);
+		goto out;
+	}
+
+	ret = mei_cl_connect(cl, me_cl, NULL);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "Can't connect to the NFC INFO ME ret = %d\n",
+			ret);
+		goto out;
+	}
+
+	mutex_unlock(&bus->device_lock);
+
+	ret = mei_nfc_if_version(cl, &ver);
+	if (ret)
+		goto disconnect;
+
+	radio_name = mei_nfc_radio_name(&ver);
+
+	if (!radio_name) {
+		ret = -ENOENT;
+		dev_err(&cldev->dev, "Can't get the NFC interface version ret = %d\n",
+			ret);
+		goto disconnect;
+	}
+
+	dev_dbg(bus->dev, "nfc radio %s\n", radio_name);
+	strlcpy(cldev->name, radio_name, sizeof(cldev->name));
+
+disconnect:
+	mutex_lock(&bus->device_lock);
+	if (mei_cl_disconnect(cl) < 0)
+		dev_err(bus->dev, "Can't disconnect the NFC INFO ME\n");
+
+	mei_cl_flush_queues(cl, NULL);
+
+out:
+	mei_cl_unlink(cl);
+	mutex_unlock(&bus->device_lock);
+	mei_me_cl_put(me_cl);
+	kfree(cl);
+
+	if (ret)
+		cldev->do_match = 0;
+
+	dev_dbg(bus->dev, "end of fixup match = %d\n", cldev->do_match);
+}
+
+#define MEI_FIXUP(_uuid, _hook) { _uuid, _hook }
+
+static struct mei_fixup {
+
+	const uuid_le uuid;
+	void (*hook)(struct mei_cl_device *cldev);
+} mei_fixups[] = {
+	MEI_FIXUP(MEI_UUID_ANY, number_of_connections),
+	MEI_FIXUP(MEI_UUID_NFC_INFO, blacklist),
+	MEI_FIXUP(MEI_UUID_NFC_HCI, mei_nfc),
+	MEI_FIXUP(MEI_UUID_WD, mei_wd),
+	MEI_FIXUP(MEI_UUID_MKHIF_FIX, mei_mkhi_fix),
+};
+
+/**
+ * mei_cldev_fixup - run fixup handlers
+ *
+ * @cldev: me client device
+ */
+void mei_cl_bus_dev_fixup(struct mei_cl_device *cldev)
+{
+	struct mei_fixup *f;
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(mei_fixups); i++) {
+
+		f = &mei_fixups[i];
+		if (uuid_le_cmp(f->uuid, MEI_UUID_ANY) == 0 ||
+		    uuid_le_cmp(f->uuid, *uuid) == 0)
+			f->hook(cldev);
+	}
+}
+
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
new file mode 100644
index 000000000..57b5868c9
--- /dev/null
+++ b/drivers/misc/mei/bus.c
@@ -0,0 +1,1153 @@
+/*
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2012-2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/sched/signal.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+#include <linux/mei_cl_bus.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+#define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver)
+#define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev)
+
+/**
+ * __mei_cl_send - internal client send (write)
+ *
+ * @cl: host client
+ * @buf: buffer to send
+ * @length: buffer length
+ * @mode: sending mode
+ *
+ * Return: written size bytes or < 0 on error
+ */
+ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
+		      unsigned int mode)
+{
+	struct mei_device *bus;
+	struct mei_cl_cb *cb;
+	ssize_t rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	bus = cl->dev;
+
+	mutex_lock(&bus->device_lock);
+	if (bus->dev_state != MEI_DEV_ENABLED) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	if (!mei_cl_is_connected(cl)) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	/* Check if we have an ME client device */
+	if (!mei_me_cl_is_active(cl->me_cl)) {
+		rets = -ENOTTY;
+		goto out;
+	}
+
+	if (length > mei_cl_mtu(cl)) {
+		rets = -EFBIG;
+		goto out;
+	}
+
+	while (cl->tx_cb_queued >= bus->tx_queue_limit) {
+		mutex_unlock(&bus->device_lock);
+		rets = wait_event_interruptible(cl->tx_wait,
+				cl->writing_state == MEI_WRITE_COMPLETE ||
+				(!mei_cl_is_connected(cl)));
+		mutex_lock(&bus->device_lock);
+		if (rets) {
+			if (signal_pending(current))
+				rets = -EINTR;
+			goto out;
+		}
+		if (!mei_cl_is_connected(cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+	}
+
+	cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, NULL);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	cb->internal = !!(mode & MEI_CL_IO_TX_INTERNAL);
+	cb->blocking = !!(mode & MEI_CL_IO_TX_BLOCKING);
+	memcpy(cb->buf.data, buf, length);
+
+	rets = mei_cl_write(cl, cb);
+
+out:
+	mutex_unlock(&bus->device_lock);
+
+	return rets;
+}
+
+/**
+ * __mei_cl_recv - internal client receive (read)
+ *
+ * @cl: host client
+ * @buf: buffer to receive
+ * @length: buffer length
+ * @mode: io mode
+ * @timeout: recv timeout, 0 for infinite timeout
+ *
+ * Return: read size in bytes of < 0 on error
+ */
+ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length,
+		      unsigned int mode, unsigned long timeout)
+{
+	struct mei_device *bus;
+	struct mei_cl_cb *cb;
+	size_t r_length;
+	ssize_t rets;
+	bool nonblock = !!(mode & MEI_CL_IO_RX_NONBLOCK);
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	bus = cl->dev;
+
+	mutex_lock(&bus->device_lock);
+	if (bus->dev_state != MEI_DEV_ENABLED) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	cb = mei_cl_read_cb(cl, NULL);
+	if (cb)
+		goto copy;
+
+	rets = mei_cl_read_start(cl, length, NULL);
+	if (rets && rets != -EBUSY)
+		goto out;
+
+	if (nonblock) {
+		rets = -EAGAIN;
+		goto out;
+	}
+
+	/* wait on event only if there is no other waiter */
+	/* synchronized under device mutex */
+	if (!waitqueue_active(&cl->rx_wait)) {
+
+		mutex_unlock(&bus->device_lock);
+
+		if (timeout) {
+			rets = wait_event_interruptible_timeout
+					(cl->rx_wait,
+					(!list_empty(&cl->rd_completed)) ||
+					(!mei_cl_is_connected(cl)),
+					msecs_to_jiffies(timeout));
+			if (rets == 0)
+				return -ETIME;
+			if (rets < 0) {
+				if (signal_pending(current))
+					return -EINTR;
+				return -ERESTARTSYS;
+			}
+		} else {
+			if (wait_event_interruptible
+					(cl->rx_wait,
+					(!list_empty(&cl->rd_completed)) ||
+					(!mei_cl_is_connected(cl)))) {
+				if (signal_pending(current))
+					return -EINTR;
+				return -ERESTARTSYS;
+			}
+		}
+
+		mutex_lock(&bus->device_lock);
+
+		if (!mei_cl_is_connected(cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+	}
+
+	cb = mei_cl_read_cb(cl, NULL);
+	if (!cb) {
+		rets = 0;
+		goto out;
+	}
+
+copy:
+	if (cb->status) {
+		rets = cb->status;
+		goto free;
+	}
+
+	r_length = min_t(size_t, length, cb->buf_idx);
+	memcpy(buf, cb->buf.data, r_length);
+	rets = r_length;
+
+free:
+	mei_io_cb_free(cb);
+out:
+	mutex_unlock(&bus->device_lock);
+
+	return rets;
+}
+
+/**
+ * mei_cldev_send - me device send  (write)
+ *
+ * @cldev: me client device
+ * @buf: buffer to send
+ * @length: buffer length
+ *
+ * Return: written size in bytes or < 0 on error
+ */
+ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
+{
+	struct mei_cl *cl = cldev->cl;
+
+	return __mei_cl_send(cl, buf, length, MEI_CL_IO_TX_BLOCKING);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_send);
+
+/**
+ * mei_cldev_recv_nonblock - non block client receive (read)
+ *
+ * @cldev: me client device
+ * @buf: buffer to receive
+ * @length: buffer length
+ *
+ * Return: read size in bytes of < 0 on error
+ *         -EAGAIN if function will block.
+ */
+ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf,
+				size_t length)
+{
+	struct mei_cl *cl = cldev->cl;
+
+	return __mei_cl_recv(cl, buf, length, MEI_CL_IO_RX_NONBLOCK, 0);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_recv_nonblock);
+
+/**
+ * mei_cldev_recv - client receive (read)
+ *
+ * @cldev: me client device
+ * @buf: buffer to receive
+ * @length: buffer length
+ *
+ * Return: read size in bytes of < 0 on error
+ */
+ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
+{
+	struct mei_cl *cl = cldev->cl;
+
+	return __mei_cl_recv(cl, buf, length, 0, 0);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_recv);
+
+/**
+ * mei_cl_bus_rx_work - dispatch rx event for a bus device
+ *
+ * @work: work
+ */
+static void mei_cl_bus_rx_work(struct work_struct *work)
+{
+	struct mei_cl_device *cldev;
+	struct mei_device *bus;
+
+	cldev = container_of(work, struct mei_cl_device, rx_work);
+
+	bus = cldev->bus;
+
+	if (cldev->rx_cb)
+		cldev->rx_cb(cldev);
+
+	mutex_lock(&bus->device_lock);
+	mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
+	mutex_unlock(&bus->device_lock);
+}
+
+/**
+ * mei_cl_bus_notif_work - dispatch FW notif event for a bus device
+ *
+ * @work: work
+ */
+static void mei_cl_bus_notif_work(struct work_struct *work)
+{
+	struct mei_cl_device *cldev;
+
+	cldev = container_of(work, struct mei_cl_device, notif_work);
+
+	if (cldev->notif_cb)
+		cldev->notif_cb(cldev);
+}
+
+/**
+ * mei_cl_bus_notify_event - schedule notify cb on bus client
+ *
+ * @cl: host client
+ *
+ * Return: true if event was scheduled
+ *         false if the client is not waiting for event
+ */
+bool mei_cl_bus_notify_event(struct mei_cl *cl)
+{
+	struct mei_cl_device *cldev = cl->cldev;
+
+	if (!cldev || !cldev->notif_cb)
+		return false;
+
+	if (!cl->notify_ev)
+		return false;
+
+	schedule_work(&cldev->notif_work);
+
+	cl->notify_ev = false;
+
+	return true;
+}
+
+/**
+ * mei_cl_bus_rx_event - schedule rx event
+ *
+ * @cl: host client
+ *
+ * Return: true if event was scheduled
+ *         false if the client is not waiting for event
+ */
+bool mei_cl_bus_rx_event(struct mei_cl *cl)
+{
+	struct mei_cl_device *cldev = cl->cldev;
+
+	if (!cldev || !cldev->rx_cb)
+		return false;
+
+	schedule_work(&cldev->rx_work);
+
+	return true;
+}
+
+/**
+ * mei_cldev_register_rx_cb - register Rx event callback
+ *
+ * @cldev: me client devices
+ * @rx_cb: callback function
+ *
+ * Return: 0 on success
+ *         -EALREADY if an callback is already registered
+ *         <0 on other errors
+ */
+int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb)
+{
+	struct mei_device *bus = cldev->bus;
+	int ret;
+
+	if (!rx_cb)
+		return -EINVAL;
+	if (cldev->rx_cb)
+		return -EALREADY;
+
+	cldev->rx_cb = rx_cb;
+	INIT_WORK(&cldev->rx_work, mei_cl_bus_rx_work);
+
+	mutex_lock(&bus->device_lock);
+	ret = mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
+	mutex_unlock(&bus->device_lock);
+	if (ret && ret != -EBUSY)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_register_rx_cb);
+
+/**
+ * mei_cldev_register_notif_cb - register FW notification event callback
+ *
+ * @cldev: me client devices
+ * @notif_cb: callback function
+ *
+ * Return: 0 on success
+ *         -EALREADY if an callback is already registered
+ *         <0 on other errors
+ */
+int mei_cldev_register_notif_cb(struct mei_cl_device *cldev,
+				mei_cldev_cb_t notif_cb)
+{
+	struct mei_device *bus = cldev->bus;
+	int ret;
+
+	if (!notif_cb)
+		return -EINVAL;
+
+	if (cldev->notif_cb)
+		return -EALREADY;
+
+	cldev->notif_cb = notif_cb;
+	INIT_WORK(&cldev->notif_work, mei_cl_bus_notif_work);
+
+	mutex_lock(&bus->device_lock);
+	ret = mei_cl_notify_request(cldev->cl, NULL, 1);
+	mutex_unlock(&bus->device_lock);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_register_notif_cb);
+
+/**
+ * mei_cldev_get_drvdata - driver data getter
+ *
+ * @cldev: mei client device
+ *
+ * Return: driver private data
+ */
+void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev)
+{
+	return dev_get_drvdata(&cldev->dev);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_get_drvdata);
+
+/**
+ * mei_cldev_set_drvdata - driver data setter
+ *
+ * @cldev: mei client device
+ * @data: data to store
+ */
+void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data)
+{
+	dev_set_drvdata(&cldev->dev, data);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata);
+
+/**
+ * mei_cldev_uuid - return uuid of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client uuid
+ */
+const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev)
+{
+	return mei_me_cl_uuid(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_uuid);
+
+/**
+ * mei_cldev_ver - return protocol version of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client protocol version
+ */
+u8 mei_cldev_ver(const struct mei_cl_device *cldev)
+{
+	return mei_me_cl_ver(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_ver);
+
+/**
+ * mei_cldev_enabled - check whether the device is enabled
+ *
+ * @cldev: mei client device
+ *
+ * Return: true if me client is initialized and connected
+ */
+bool mei_cldev_enabled(struct mei_cl_device *cldev)
+{
+	return mei_cl_is_connected(cldev->cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_enabled);
+
+/**
+ * mei_cl_bus_module_get - acquire module of the underlying
+ *    hw driver.
+ *
+ * @cldev: mei client device
+ *
+ * Return: true on success; false if the module was removed.
+ */
+static bool mei_cl_bus_module_get(struct mei_cl_device *cldev)
+{
+	return try_module_get(cldev->bus->dev->driver->owner);
+}
+
+/**
+ * mei_cl_bus_module_put -  release the underlying hw module.
+ *
+ * @cldev: mei client device
+ */
+static void mei_cl_bus_module_put(struct mei_cl_device *cldev)
+{
+	module_put(cldev->bus->dev->driver->owner);
+}
+
+/**
+ * mei_cldev_enable - enable me client device
+ *     create connection with me client
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success and < 0 on error
+ */
+int mei_cldev_enable(struct mei_cl_device *cldev)
+{
+	struct mei_device *bus = cldev->bus;
+	struct mei_cl *cl;
+	int ret;
+
+	cl = cldev->cl;
+
+	mutex_lock(&bus->device_lock);
+	if (cl->state == MEI_FILE_UNINITIALIZED) {
+		ret = mei_cl_link(cl);
+		if (ret)
+			goto out;
+		/* update pointers */
+		cl->cldev = cldev;
+	}
+
+	if (mei_cl_is_connected(cl)) {
+		ret = 0;
+		goto out;
+	}
+
+	if (!mei_me_cl_is_active(cldev->me_cl)) {
+		dev_err(&cldev->dev, "me client is not active\n");
+		ret = -ENOTTY;
+		goto out;
+	}
+
+	ret = mei_cl_connect(cl, cldev->me_cl, NULL);
+	if (ret < 0)
+		dev_err(&cldev->dev, "cannot connect\n");
+
+out:
+	mutex_unlock(&bus->device_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_enable);
+
+/**
+ * mei_cldev_unregister_callbacks - internal wrapper for unregistering
+ *  callbacks.
+ *
+ * @cldev: client device
+ */
+static void mei_cldev_unregister_callbacks(struct mei_cl_device *cldev)
+{
+	if (cldev->rx_cb) {
+		cancel_work_sync(&cldev->rx_work);
+		cldev->rx_cb = NULL;
+	}
+
+	if (cldev->notif_cb) {
+		cancel_work_sync(&cldev->notif_work);
+		cldev->notif_cb = NULL;
+	}
+}
+
+/**
+ * mei_cldev_disable - disable me client device
+ *     disconnect form the me client
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success and < 0 on error
+ */
+int mei_cldev_disable(struct mei_cl_device *cldev)
+{
+	struct mei_device *bus;
+	struct mei_cl *cl;
+	int err;
+
+	if (!cldev)
+		return -ENODEV;
+
+	cl = cldev->cl;
+
+	bus = cldev->bus;
+
+	mei_cldev_unregister_callbacks(cldev);
+
+	mutex_lock(&bus->device_lock);
+
+	if (!mei_cl_is_connected(cl)) {
+		dev_dbg(bus->dev, "Already disconnected\n");
+		err = 0;
+		goto out;
+	}
+
+	err = mei_cl_disconnect(cl);
+	if (err < 0)
+		dev_err(bus->dev, "Could not disconnect from the ME client\n");
+
+out:
+	/* Flush queues and remove any pending read */
+	mei_cl_flush_queues(cl, NULL);
+	mei_cl_unlink(cl);
+
+	mutex_unlock(&bus->device_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_disable);
+
+/**
+ * mei_cl_device_find - find matching entry in the driver id table
+ *
+ * @cldev: me client device
+ * @cldrv: me client driver
+ *
+ * Return: id on success; NULL if no id is matching
+ */
+static const
+struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev,
+					    struct mei_cl_driver *cldrv)
+{
+	const struct mei_cl_device_id *id;
+	const uuid_le *uuid;
+	u8 version;
+	bool match;
+
+	uuid = mei_me_cl_uuid(cldev->me_cl);
+	version = mei_me_cl_ver(cldev->me_cl);
+
+	id = cldrv->id_table;
+	while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) {
+		if (!uuid_le_cmp(*uuid, id->uuid)) {
+			match = true;
+
+			if (cldev->name[0])
+				if (strncmp(cldev->name, id->name,
+					    sizeof(id->name)))
+					match = false;
+
+			if (id->version != MEI_CL_VERSION_ANY)
+				if (id->version != version)
+					match = false;
+			if (match)
+				return id;
+		}
+
+		id++;
+	}
+
+	return NULL;
+}
+
+/**
+ * mei_cl_device_match  - device match function
+ *
+ * @dev: device
+ * @drv: driver
+ *
+ * Return:  1 if matching device was found 0 otherwise
+ */
+static int mei_cl_device_match(struct device *dev, struct device_driver *drv)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct mei_cl_driver *cldrv = to_mei_cl_driver(drv);
+	const struct mei_cl_device_id *found_id;
+
+	if (!cldev)
+		return 0;
+
+	if (!cldev->do_match)
+		return 0;
+
+	if (!cldrv || !cldrv->id_table)
+		return 0;
+
+	found_id = mei_cl_device_find(cldev, cldrv);
+	if (found_id)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * mei_cl_device_probe - bus probe function
+ *
+ * @dev: device
+ *
+ * Return:  0 on success; < 0 otherwise
+ */
+static int mei_cl_device_probe(struct device *dev)
+{
+	struct mei_cl_device *cldev;
+	struct mei_cl_driver *cldrv;
+	const struct mei_cl_device_id *id;
+	int ret;
+
+	cldev = to_mei_cl_device(dev);
+	cldrv = to_mei_cl_driver(dev->driver);
+
+	if (!cldev)
+		return 0;
+
+	if (!cldrv || !cldrv->probe)
+		return -ENODEV;
+
+	id = mei_cl_device_find(cldev, cldrv);
+	if (!id)
+		return -ENODEV;
+
+	if (!mei_cl_bus_module_get(cldev)) {
+		dev_err(&cldev->dev, "get hw module failed");
+		return -ENODEV;
+	}
+
+	ret = cldrv->probe(cldev, id);
+	if (ret) {
+		mei_cl_bus_module_put(cldev);
+		return ret;
+	}
+
+	__module_get(THIS_MODULE);
+	return 0;
+}
+
+/**
+ * mei_cl_device_remove - remove device from the bus
+ *
+ * @dev: device
+ *
+ * Return:  0 on success; < 0 otherwise
+ */
+static int mei_cl_device_remove(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct mei_cl_driver *cldrv;
+	int ret = 0;
+
+	if (!cldev || !dev->driver)
+		return 0;
+
+	cldrv = to_mei_cl_driver(dev->driver);
+	if (cldrv->remove)
+		ret = cldrv->remove(cldev);
+
+	mei_cldev_unregister_callbacks(cldev);
+
+	mei_cl_bus_module_put(cldev);
+	module_put(THIS_MODULE);
+
+	return ret;
+}
+
+static ssize_t name_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s", cldev->name);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t uuid_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+
+	return scnprintf(buf, PAGE_SIZE, "%pUl", uuid);
+}
+static DEVICE_ATTR_RO(uuid);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	u8 version = mei_me_cl_ver(cldev->me_cl);
+
+	return scnprintf(buf, PAGE_SIZE, "%02X", version);
+}
+static DEVICE_ATTR_RO(version);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	u8 version = mei_me_cl_ver(cldev->me_cl);
+
+	return scnprintf(buf, PAGE_SIZE, "mei:%s:%pUl:%02X:",
+			 cldev->name, uuid, version);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *mei_cldev_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_uuid.attr,
+	&dev_attr_version.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(mei_cldev);
+
+/**
+ * mei_cl_device_uevent - me client bus uevent handler
+ *
+ * @dev: device
+ * @env: uevent kobject
+ *
+ * Return: 0 on success -ENOMEM on when add_uevent_var fails
+ */
+static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	u8 version = mei_me_cl_ver(cldev->me_cl);
+
+	if (add_uevent_var(env, "MEI_CL_VERSION=%d", version))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MEI_CL_UUID=%pUl", uuid))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:%02X:",
+			   cldev->name, uuid, version))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static struct bus_type mei_cl_bus_type = {
+	.name		= "mei",
+	.dev_groups	= mei_cldev_groups,
+	.match		= mei_cl_device_match,
+	.probe		= mei_cl_device_probe,
+	.remove		= mei_cl_device_remove,
+	.uevent		= mei_cl_device_uevent,
+};
+
+static struct mei_device *mei_dev_bus_get(struct mei_device *bus)
+{
+	if (bus)
+		get_device(bus->dev);
+
+	return bus;
+}
+
+static void mei_dev_bus_put(struct mei_device *bus)
+{
+	if (bus)
+		put_device(bus->dev);
+}
+
+static void mei_cl_bus_dev_release(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+
+	if (!cldev)
+		return;
+
+	mei_me_cl_put(cldev->me_cl);
+	mei_dev_bus_put(cldev->bus);
+	mei_cl_unlink(cldev->cl);
+	kfree(cldev->cl);
+	kfree(cldev);
+}
+
+static const struct device_type mei_cl_device_type = {
+	.release = mei_cl_bus_dev_release,
+};
+
+/**
+ * mei_cl_bus_set_name - set device name for me client device
+ *  <controller>-<client device>
+ *  Example: 0000:00:16.0-55213584-9a29-4916-badf-0fb7ed682aeb
+ *
+ * @cldev: me client device
+ */
+static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev)
+{
+	dev_set_name(&cldev->dev, "%s-%pUl",
+		     dev_name(cldev->bus->dev),
+		     mei_me_cl_uuid(cldev->me_cl));
+}
+
+/**
+ * mei_cl_bus_dev_alloc - initialize and allocate mei client device
+ *
+ * @bus: mei device
+ * @me_cl: me client
+ *
+ * Return: allocated device structur or NULL on allocation failure
+ */
+static struct mei_cl_device *mei_cl_bus_dev_alloc(struct mei_device *bus,
+						  struct mei_me_client *me_cl)
+{
+	struct mei_cl_device *cldev;
+	struct mei_cl *cl;
+
+	cldev = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL);
+	if (!cldev)
+		return NULL;
+
+	cl = mei_cl_allocate(bus);
+	if (!cl) {
+		kfree(cldev);
+		return NULL;
+	}
+
+	device_initialize(&cldev->dev);
+	cldev->dev.parent = bus->dev;
+	cldev->dev.bus    = &mei_cl_bus_type;
+	cldev->dev.type   = &mei_cl_device_type;
+	cldev->bus        = mei_dev_bus_get(bus);
+	cldev->me_cl      = mei_me_cl_get(me_cl);
+	cldev->cl         = cl;
+	mei_cl_bus_set_name(cldev);
+	cldev->is_added   = 0;
+	INIT_LIST_HEAD(&cldev->bus_list);
+
+	return cldev;
+}
+
+/**
+ * mei_cl_dev_setup - setup me client device
+ *    run fix up routines and set the device name
+ *
+ * @bus: mei device
+ * @cldev: me client device
+ *
+ * Return: true if the device is eligible for enumeration
+ */
+static bool mei_cl_bus_dev_setup(struct mei_device *bus,
+				 struct mei_cl_device *cldev)
+{
+	cldev->do_match = 1;
+	mei_cl_bus_dev_fixup(cldev);
+
+	/* the device name can change during fix up */
+	if (cldev->do_match)
+		mei_cl_bus_set_name(cldev);
+
+	return cldev->do_match == 1;
+}
+
+/**
+ * mei_cl_bus_dev_add - add me client devices
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success; < 0 on failre
+ */
+static int mei_cl_bus_dev_add(struct mei_cl_device *cldev)
+{
+	int ret;
+
+	dev_dbg(cldev->bus->dev, "adding %pUL:%02X\n",
+		mei_me_cl_uuid(cldev->me_cl),
+		mei_me_cl_ver(cldev->me_cl));
+	ret = device_add(&cldev->dev);
+	if (!ret)
+		cldev->is_added = 1;
+
+	return ret;
+}
+
+/**
+ * mei_cl_bus_dev_stop - stop the driver
+ *
+ * @cldev: me client device
+ */
+static void mei_cl_bus_dev_stop(struct mei_cl_device *cldev)
+{
+	if (cldev->is_added)
+		device_release_driver(&cldev->dev);
+}
+
+/**
+ * mei_cl_bus_dev_destroy - destroy me client devices object
+ *
+ * @cldev: me client device
+ *
+ * Locking: called under "dev->cl_bus_lock" lock
+ */
+static void mei_cl_bus_dev_destroy(struct mei_cl_device *cldev)
+{
+
+	WARN_ON(!mutex_is_locked(&cldev->bus->cl_bus_lock));
+
+	if (!cldev->is_added)
+		return;
+
+	device_del(&cldev->dev);
+
+	list_del_init(&cldev->bus_list);
+
+	cldev->is_added = 0;
+	put_device(&cldev->dev);
+}
+
+/**
+ * mei_cl_bus_remove_device - remove a devices form the bus
+ *
+ * @cldev: me client device
+ */
+static void mei_cl_bus_remove_device(struct mei_cl_device *cldev)
+{
+	mei_cl_bus_dev_stop(cldev);
+	mei_cl_bus_dev_destroy(cldev);
+}
+
+/**
+ * mei_cl_bus_remove_devices - remove all devices form the bus
+ *
+ * @bus: mei device
+ */
+void mei_cl_bus_remove_devices(struct mei_device *bus)
+{
+	struct mei_cl_device *cldev, *next;
+
+	mutex_lock(&bus->cl_bus_lock);
+	list_for_each_entry_safe(cldev, next, &bus->device_list, bus_list)
+		mei_cl_bus_remove_device(cldev);
+	mutex_unlock(&bus->cl_bus_lock);
+}
+
+
+/**
+ * mei_cl_bus_dev_init - allocate and initializes an mei client devices
+ *     based on me client
+ *
+ * @bus: mei device
+ * @me_cl: me client
+ *
+ * Locking: called under "dev->cl_bus_lock" lock
+ */
+static void mei_cl_bus_dev_init(struct mei_device *bus,
+				struct mei_me_client *me_cl)
+{
+	struct mei_cl_device *cldev;
+
+	WARN_ON(!mutex_is_locked(&bus->cl_bus_lock));
+
+	dev_dbg(bus->dev, "initializing %pUl", mei_me_cl_uuid(me_cl));
+
+	if (me_cl->bus_added)
+		return;
+
+	cldev = mei_cl_bus_dev_alloc(bus, me_cl);
+	if (!cldev)
+		return;
+
+	me_cl->bus_added = true;
+	list_add_tail(&cldev->bus_list, &bus->device_list);
+
+}
+
+/**
+ * mei_cl_bus_rescan - scan me clients list and add create
+ *    devices for eligible clients
+ *
+ * @bus: mei device
+ */
+static void mei_cl_bus_rescan(struct mei_device *bus)
+{
+	struct mei_cl_device *cldev, *n;
+	struct mei_me_client *me_cl;
+
+	mutex_lock(&bus->cl_bus_lock);
+
+	down_read(&bus->me_clients_rwsem);
+	list_for_each_entry(me_cl, &bus->me_clients, list)
+		mei_cl_bus_dev_init(bus, me_cl);
+	up_read(&bus->me_clients_rwsem);
+
+	list_for_each_entry_safe(cldev, n, &bus->device_list, bus_list) {
+
+		if (!mei_me_cl_is_active(cldev->me_cl)) {
+			mei_cl_bus_remove_device(cldev);
+			continue;
+		}
+
+		if (cldev->is_added)
+			continue;
+
+		if (mei_cl_bus_dev_setup(bus, cldev))
+			mei_cl_bus_dev_add(cldev);
+		else {
+			list_del_init(&cldev->bus_list);
+			put_device(&cldev->dev);
+		}
+	}
+	mutex_unlock(&bus->cl_bus_lock);
+
+	dev_dbg(bus->dev, "rescan end");
+}
+
+void mei_cl_bus_rescan_work(struct work_struct *work)
+{
+	struct mei_device *bus =
+		container_of(work, struct mei_device, bus_rescan_work);
+
+	mei_cl_bus_rescan(bus);
+}
+
+int __mei_cldev_driver_register(struct mei_cl_driver *cldrv,
+				struct module *owner)
+{
+	int err;
+
+	cldrv->driver.name = cldrv->name;
+	cldrv->driver.owner = owner;
+	cldrv->driver.bus = &mei_cl_bus_type;
+
+	err = driver_register(&cldrv->driver);
+	if (err)
+		return err;
+
+	pr_debug("mei: driver [%s] registered\n", cldrv->driver.name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__mei_cldev_driver_register);
+
+void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv)
+{
+	driver_unregister(&cldrv->driver);
+
+	pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_driver_unregister);
+
+
+int __init mei_cl_bus_init(void)
+{
+	return bus_register(&mei_cl_bus_type);
+}
+
+void __exit mei_cl_bus_exit(void)
+{
+	bus_unregister(&mei_cl_bus_type);
+}
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
new file mode 100644
index 000000000..524b8c0d3
--- /dev/null
+++ b/drivers/misc/mei/client.c
@@ -0,0 +1,1844 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/sched/signal.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+/**
+ * mei_me_cl_init - initialize me client
+ *
+ * @me_cl: me client
+ */
+void mei_me_cl_init(struct mei_me_client *me_cl)
+{
+	INIT_LIST_HEAD(&me_cl->list);
+	kref_init(&me_cl->refcnt);
+}
+
+/**
+ * mei_me_cl_get - increases me client refcount
+ *
+ * @me_cl: me client
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: me client or NULL
+ */
+struct mei_me_client *mei_me_cl_get(struct mei_me_client *me_cl)
+{
+	if (me_cl && kref_get_unless_zero(&me_cl->refcnt))
+		return me_cl;
+
+	return NULL;
+}
+
+/**
+ * mei_me_cl_release - free me client
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @ref: me_client refcount
+ */
+static void mei_me_cl_release(struct kref *ref)
+{
+	struct mei_me_client *me_cl =
+		container_of(ref, struct mei_me_client, refcnt);
+
+	kfree(me_cl);
+}
+
+/**
+ * mei_me_cl_put - decrease me client refcount and free client if necessary
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @me_cl: me client
+ */
+void mei_me_cl_put(struct mei_me_client *me_cl)
+{
+	if (me_cl)
+		kref_put(&me_cl->refcnt, mei_me_cl_release);
+}
+
+/**
+ * __mei_me_cl_del  - delete me client from the list and decrease
+ *     reference counter
+ *
+ * @dev: mei device
+ * @me_cl: me client
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+static void __mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+	if (!me_cl)
+		return;
+
+	list_del_init(&me_cl->list);
+	mei_me_cl_put(me_cl);
+}
+
+/**
+ * mei_me_cl_del - delete me client from the list and decrease
+ *     reference counter
+ *
+ * @dev: mei device
+ * @me_cl: me client
+ */
+void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+	down_write(&dev->me_clients_rwsem);
+	__mei_me_cl_del(dev, me_cl);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_me_cl_add - add me client to the list
+ *
+ * @dev: mei device
+ * @me_cl: me client
+ */
+void mei_me_cl_add(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+	down_write(&dev->me_clients_rwsem);
+	list_add(&me_cl->list, &dev->me_clients);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * __mei_me_cl_by_uuid - locate me client by uuid
+ *	increases ref count
+ *
+ * @dev: mei device
+ * @uuid: me client uuid
+ *
+ * Return: me client or NULL if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+static struct mei_me_client *__mei_me_cl_by_uuid(struct mei_device *dev,
+					const uuid_le *uuid)
+{
+	struct mei_me_client *me_cl;
+	const uuid_le *pn;
+
+	WARN_ON(!rwsem_is_locked(&dev->me_clients_rwsem));
+
+	list_for_each_entry(me_cl, &dev->me_clients, list) {
+		pn = &me_cl->props.protocol_name;
+		if (uuid_le_cmp(*uuid, *pn) == 0)
+			return mei_me_cl_get(me_cl);
+	}
+
+	return NULL;
+}
+
+/**
+ * mei_me_cl_by_uuid - locate me client by uuid
+ *	increases ref count
+ *
+ * @dev: mei device
+ * @uuid: me client uuid
+ *
+ * Return: me client or NULL if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+struct mei_me_client *mei_me_cl_by_uuid(struct mei_device *dev,
+					const uuid_le *uuid)
+{
+	struct mei_me_client *me_cl;
+
+	down_read(&dev->me_clients_rwsem);
+	me_cl = __mei_me_cl_by_uuid(dev, uuid);
+	up_read(&dev->me_clients_rwsem);
+
+	return me_cl;
+}
+
+/**
+ * mei_me_cl_by_id - locate me client by client id
+ *	increases ref count
+ *
+ * @dev: the device structure
+ * @client_id: me client id
+ *
+ * Return: me client or NULL if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id)
+{
+
+	struct mei_me_client *__me_cl, *me_cl = NULL;
+
+	down_read(&dev->me_clients_rwsem);
+	list_for_each_entry(__me_cl, &dev->me_clients, list) {
+		if (__me_cl->client_id == client_id) {
+			me_cl = mei_me_cl_get(__me_cl);
+			break;
+		}
+	}
+	up_read(&dev->me_clients_rwsem);
+
+	return me_cl;
+}
+
+/**
+ * __mei_me_cl_by_uuid_id - locate me client by client id and uuid
+ *	increases ref count
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ * @client_id: me client id
+ *
+ * Return: me client or null if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+static struct mei_me_client *__mei_me_cl_by_uuid_id(struct mei_device *dev,
+					   const uuid_le *uuid, u8 client_id)
+{
+	struct mei_me_client *me_cl;
+	const uuid_le *pn;
+
+	WARN_ON(!rwsem_is_locked(&dev->me_clients_rwsem));
+
+	list_for_each_entry(me_cl, &dev->me_clients, list) {
+		pn = &me_cl->props.protocol_name;
+		if (uuid_le_cmp(*uuid, *pn) == 0 &&
+		    me_cl->client_id == client_id)
+			return mei_me_cl_get(me_cl);
+	}
+
+	return NULL;
+}
+
+
+/**
+ * mei_me_cl_by_uuid_id - locate me client by client id and uuid
+ *	increases ref count
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ * @client_id: me client id
+ *
+ * Return: me client or null if not found
+ */
+struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev,
+					   const uuid_le *uuid, u8 client_id)
+{
+	struct mei_me_client *me_cl;
+
+	down_read(&dev->me_clients_rwsem);
+	me_cl = __mei_me_cl_by_uuid_id(dev, uuid, client_id);
+	up_read(&dev->me_clients_rwsem);
+
+	return me_cl;
+}
+
+/**
+ * mei_me_cl_rm_by_uuid - remove all me clients matching uuid
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid)
+{
+	struct mei_me_client *me_cl;
+
+	dev_dbg(dev->dev, "remove %pUl\n", uuid);
+
+	down_write(&dev->me_clients_rwsem);
+	me_cl = __mei_me_cl_by_uuid(dev, uuid);
+	__mei_me_cl_del(dev, me_cl);
+	mei_me_cl_put(me_cl);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_me_cl_rm_by_uuid_id - remove all me clients matching client id
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ * @id: me client id
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_me_cl_rm_by_uuid_id(struct mei_device *dev, const uuid_le *uuid, u8 id)
+{
+	struct mei_me_client *me_cl;
+
+	dev_dbg(dev->dev, "remove %pUl %d\n", uuid, id);
+
+	down_write(&dev->me_clients_rwsem);
+	me_cl = __mei_me_cl_by_uuid_id(dev, uuid, id);
+	__mei_me_cl_del(dev, me_cl);
+	mei_me_cl_put(me_cl);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_me_cl_rm_all - remove all me clients
+ *
+ * @dev: the device structure
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_me_cl_rm_all(struct mei_device *dev)
+{
+	struct mei_me_client *me_cl, *next;
+
+	down_write(&dev->me_clients_rwsem);
+	list_for_each_entry_safe(me_cl, next, &dev->me_clients, list)
+		__mei_me_cl_del(dev, me_cl);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_cl_cmp_id - tells if the clients are the same
+ *
+ * @cl1: host client 1
+ * @cl2: host client 2
+ *
+ * Return: true  - if the clients has same host and me ids
+ *         false - otherwise
+ */
+static inline bool mei_cl_cmp_id(const struct mei_cl *cl1,
+				const struct mei_cl *cl2)
+{
+	return cl1 && cl2 &&
+		(cl1->host_client_id == cl2->host_client_id) &&
+		(mei_cl_me_id(cl1) == mei_cl_me_id(cl2));
+}
+
+/**
+ * mei_io_cb_free - free mei_cb_private related memory
+ *
+ * @cb: mei callback struct
+ */
+void mei_io_cb_free(struct mei_cl_cb *cb)
+{
+	if (cb == NULL)
+		return;
+
+	list_del(&cb->list);
+	kfree(cb->buf.data);
+	kfree(cb);
+}
+
+/**
+ * mei_tx_cb_queue - queue tx callback
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @cb: mei callback struct
+ * @head: an instance of list to queue on
+ */
+static inline void mei_tx_cb_enqueue(struct mei_cl_cb *cb,
+				     struct list_head *head)
+{
+	list_add_tail(&cb->list, head);
+	cb->cl->tx_cb_queued++;
+}
+
+/**
+ * mei_tx_cb_dequeue - dequeue tx callback
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @cb: mei callback struct to dequeue and free
+ */
+static inline void mei_tx_cb_dequeue(struct mei_cl_cb *cb)
+{
+	if (!WARN_ON(cb->cl->tx_cb_queued == 0))
+		cb->cl->tx_cb_queued--;
+
+	mei_io_cb_free(cb);
+}
+
+/**
+ * mei_io_cb_init - allocate and initialize io callback
+ *
+ * @cl: mei client
+ * @type: operation type
+ * @fp: pointer to file structure
+ *
+ * Return: mei_cl_cb pointer or NULL;
+ */
+static struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl,
+					enum mei_cb_file_ops type,
+					const struct file *fp)
+{
+	struct mei_cl_cb *cb;
+
+	cb = kzalloc(sizeof(struct mei_cl_cb), GFP_KERNEL);
+	if (!cb)
+		return NULL;
+
+	INIT_LIST_HEAD(&cb->list);
+	cb->fp = fp;
+	cb->cl = cl;
+	cb->buf_idx = 0;
+	cb->fop_type = type;
+	return cb;
+}
+
+/**
+ * mei_io_list_flush_cl - removes cbs belonging to the cl.
+ *
+ * @head:  an instance of our list structure
+ * @cl:    host client
+ */
+static void mei_io_list_flush_cl(struct list_head *head,
+				 const struct mei_cl *cl)
+{
+	struct mei_cl_cb *cb, *next;
+
+	list_for_each_entry_safe(cb, next, head, list) {
+		if (mei_cl_cmp_id(cl, cb->cl))
+			list_del_init(&cb->list);
+	}
+}
+
+/**
+ * mei_io_tx_list_free_cl - removes cb belonging to the cl and free them
+ *
+ * @head: An instance of our list structure
+ * @cl: host client
+ */
+static void mei_io_tx_list_free_cl(struct list_head *head,
+				   const struct mei_cl *cl)
+{
+	struct mei_cl_cb *cb, *next;
+
+	list_for_each_entry_safe(cb, next, head, list) {
+		if (mei_cl_cmp_id(cl, cb->cl))
+			mei_tx_cb_dequeue(cb);
+	}
+}
+
+/**
+ * mei_io_list_free_fp - free cb from a list that matches file pointer
+ *
+ * @head: io list
+ * @fp: file pointer (matching cb file object), may be NULL
+ */
+static void mei_io_list_free_fp(struct list_head *head, const struct file *fp)
+{
+	struct mei_cl_cb *cb, *next;
+
+	list_for_each_entry_safe(cb, next, head, list)
+		if (!fp || fp == cb->fp)
+			mei_io_cb_free(cb);
+}
+
+/**
+ * mei_cl_alloc_cb - a convenient wrapper for allocating read cb
+ *
+ * @cl: host client
+ * @length: size of the buffer
+ * @fop_type: operation type
+ * @fp: associated file pointer (might be NULL)
+ *
+ * Return: cb on success and NULL on failure
+ */
+struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
+				  enum mei_cb_file_ops fop_type,
+				  const struct file *fp)
+{
+	struct mei_cl_cb *cb;
+
+	cb = mei_io_cb_init(cl, fop_type, fp);
+	if (!cb)
+		return NULL;
+
+	if (length == 0)
+		return cb;
+
+	cb->buf.data = kmalloc(length, GFP_KERNEL);
+	if (!cb->buf.data) {
+		mei_io_cb_free(cb);
+		return NULL;
+	}
+	cb->buf.size = length;
+
+	return cb;
+}
+
+/**
+ * mei_cl_enqueue_ctrl_wr_cb - a convenient wrapper for allocating
+ *     and enqueuing of the control commands cb
+ *
+ * @cl: host client
+ * @length: size of the buffer
+ * @fop_type: operation type
+ * @fp: associated file pointer (might be NULL)
+ *
+ * Return: cb on success and NULL on failure
+ * Locking: called under "dev->device_lock" lock
+ */
+struct mei_cl_cb *mei_cl_enqueue_ctrl_wr_cb(struct mei_cl *cl, size_t length,
+					    enum mei_cb_file_ops fop_type,
+					    const struct file *fp)
+{
+	struct mei_cl_cb *cb;
+
+	/* for RX always allocate at least client's mtu */
+	if (length)
+		length = max_t(size_t, length, mei_cl_mtu(cl));
+
+	cb = mei_cl_alloc_cb(cl, length, fop_type, fp);
+	if (!cb)
+		return NULL;
+
+	list_add_tail(&cb->list, &cl->dev->ctrl_wr_list);
+	return cb;
+}
+
+/**
+ * mei_cl_read_cb - find this cl's callback in the read list
+ *     for a specific file
+ *
+ * @cl: host client
+ * @fp: file pointer (matching cb file object), may be NULL
+ *
+ * Return: cb on success, NULL if cb is not found
+ */
+struct mei_cl_cb *mei_cl_read_cb(const struct mei_cl *cl, const struct file *fp)
+{
+	struct mei_cl_cb *cb;
+
+	list_for_each_entry(cb, &cl->rd_completed, list)
+		if (!fp || fp == cb->fp)
+			return cb;
+
+	return NULL;
+}
+
+/**
+ * mei_cl_flush_queues - flushes queue lists belonging to cl.
+ *
+ * @cl: host client
+ * @fp: file pointer (matching cb file object), may be NULL
+ *
+ * Return: 0 on success, -EINVAL if cl or cl->dev is NULL.
+ */
+int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp)
+{
+	struct mei_device *dev;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -EINVAL;
+
+	dev = cl->dev;
+
+	cl_dbg(dev, cl, "remove list entry belonging to cl\n");
+	mei_io_tx_list_free_cl(&cl->dev->write_list, cl);
+	mei_io_tx_list_free_cl(&cl->dev->write_waiting_list, cl);
+	mei_io_list_flush_cl(&cl->dev->ctrl_wr_list, cl);
+	mei_io_list_flush_cl(&cl->dev->ctrl_rd_list, cl);
+	mei_io_list_free_fp(&cl->rd_pending, fp);
+	mei_io_list_free_fp(&cl->rd_completed, fp);
+
+	return 0;
+}
+
+/**
+ * mei_cl_init - initializes cl.
+ *
+ * @cl: host client to be initialized
+ * @dev: mei device
+ */
+static void mei_cl_init(struct mei_cl *cl, struct mei_device *dev)
+{
+	memset(cl, 0, sizeof(struct mei_cl));
+	init_waitqueue_head(&cl->wait);
+	init_waitqueue_head(&cl->rx_wait);
+	init_waitqueue_head(&cl->tx_wait);
+	init_waitqueue_head(&cl->ev_wait);
+	INIT_LIST_HEAD(&cl->rd_completed);
+	INIT_LIST_HEAD(&cl->rd_pending);
+	INIT_LIST_HEAD(&cl->link);
+	cl->writing_state = MEI_IDLE;
+	cl->state = MEI_FILE_UNINITIALIZED;
+	cl->dev = dev;
+}
+
+/**
+ * mei_cl_allocate - allocates cl  structure and sets it up.
+ *
+ * @dev: mei device
+ * Return:  The allocated file or NULL on failure
+ */
+struct mei_cl *mei_cl_allocate(struct mei_device *dev)
+{
+	struct mei_cl *cl;
+
+	cl = kmalloc(sizeof(struct mei_cl), GFP_KERNEL);
+	if (!cl)
+		return NULL;
+
+	mei_cl_init(cl, dev);
+
+	return cl;
+}
+
+/**
+ * mei_cl_link - allocate host id in the host map
+ *
+ * @cl: host client
+ *
+ * Return: 0 on success
+ *	-EINVAL on incorrect values
+ *	-EMFILE if open count exceeded.
+ */
+int mei_cl_link(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+	int id;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -EINVAL;
+
+	dev = cl->dev;
+
+	id = find_first_zero_bit(dev->host_clients_map, MEI_CLIENTS_MAX);
+	if (id >= MEI_CLIENTS_MAX) {
+		dev_err(dev->dev, "id exceeded %d", MEI_CLIENTS_MAX);
+		return -EMFILE;
+	}
+
+	if (dev->open_handle_count >= MEI_MAX_OPEN_HANDLE_COUNT) {
+		dev_err(dev->dev, "open_handle_count exceeded %d",
+			MEI_MAX_OPEN_HANDLE_COUNT);
+		return -EMFILE;
+	}
+
+	dev->open_handle_count++;
+
+	cl->host_client_id = id;
+	list_add_tail(&cl->link, &dev->file_list);
+
+	set_bit(id, dev->host_clients_map);
+
+	cl->state = MEI_FILE_INITIALIZING;
+
+	cl_dbg(dev, cl, "link cl\n");
+	return 0;
+}
+
+/**
+ * mei_cl_unlink - remove host client from the list
+ *
+ * @cl: host client
+ *
+ * Return: always 0
+ */
+int mei_cl_unlink(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+
+	/* don't shout on error exit path */
+	if (!cl)
+		return 0;
+
+	if (WARN_ON(!cl->dev))
+		return 0;
+
+	dev = cl->dev;
+
+	cl_dbg(dev, cl, "unlink client");
+
+	if (dev->open_handle_count > 0)
+		dev->open_handle_count--;
+
+	/* never clear the 0 bit */
+	if (cl->host_client_id)
+		clear_bit(cl->host_client_id, dev->host_clients_map);
+
+	list_del_init(&cl->link);
+
+	cl->state = MEI_FILE_UNINITIALIZED;
+	cl->writing_state = MEI_IDLE;
+
+	WARN_ON(!list_empty(&cl->rd_completed) ||
+		!list_empty(&cl->rd_pending) ||
+		!list_empty(&cl->link));
+
+	return 0;
+}
+
+void mei_host_client_init(struct mei_device *dev)
+{
+	dev->dev_state = MEI_DEV_ENABLED;
+	dev->reset_count = 0;
+
+	schedule_work(&dev->bus_rescan_work);
+
+	pm_runtime_mark_last_busy(dev->dev);
+	dev_dbg(dev->dev, "rpm: autosuspend\n");
+	pm_request_autosuspend(dev->dev);
+}
+
+/**
+ * mei_hbuf_acquire - try to acquire host buffer
+ *
+ * @dev: the device structure
+ * Return: true if host buffer was acquired
+ */
+bool mei_hbuf_acquire(struct mei_device *dev)
+{
+	if (mei_pg_state(dev) == MEI_PG_ON ||
+	    mei_pg_in_transition(dev)) {
+		dev_dbg(dev->dev, "device is in pg\n");
+		return false;
+	}
+
+	if (!dev->hbuf_is_ready) {
+		dev_dbg(dev->dev, "hbuf is not ready\n");
+		return false;
+	}
+
+	dev->hbuf_is_ready = false;
+
+	return true;
+}
+
+/**
+ * mei_cl_wake_all - wake up readers, writers and event waiters so
+ *                 they can be interrupted
+ *
+ * @cl: host client
+ */
+static void mei_cl_wake_all(struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+
+	/* synchronized under device mutex */
+	if (waitqueue_active(&cl->rx_wait)) {
+		cl_dbg(dev, cl, "Waking up reading client!\n");
+		wake_up_interruptible(&cl->rx_wait);
+	}
+	/* synchronized under device mutex */
+	if (waitqueue_active(&cl->tx_wait)) {
+		cl_dbg(dev, cl, "Waking up writing client!\n");
+		wake_up_interruptible(&cl->tx_wait);
+	}
+	/* synchronized under device mutex */
+	if (waitqueue_active(&cl->ev_wait)) {
+		cl_dbg(dev, cl, "Waking up waiting for event clients!\n");
+		wake_up_interruptible(&cl->ev_wait);
+	}
+	/* synchronized under device mutex */
+	if (waitqueue_active(&cl->wait)) {
+		cl_dbg(dev, cl, "Waking up ctrl write clients!\n");
+		wake_up(&cl->wait);
+	}
+}
+
+/**
+ * mei_cl_set_disconnected - set disconnected state and clear
+ *   associated states and resources
+ *
+ * @cl: host client
+ */
+static void mei_cl_set_disconnected(struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+
+	if (cl->state == MEI_FILE_DISCONNECTED ||
+	    cl->state <= MEI_FILE_INITIALIZING)
+		return;
+
+	cl->state = MEI_FILE_DISCONNECTED;
+	mei_io_tx_list_free_cl(&dev->write_list, cl);
+	mei_io_tx_list_free_cl(&dev->write_waiting_list, cl);
+	mei_io_list_flush_cl(&dev->ctrl_rd_list, cl);
+	mei_io_list_flush_cl(&dev->ctrl_wr_list, cl);
+	mei_cl_wake_all(cl);
+	cl->rx_flow_ctrl_creds = 0;
+	cl->tx_flow_ctrl_creds = 0;
+	cl->timer_count = 0;
+
+	if (!cl->me_cl)
+		return;
+
+	if (!WARN_ON(cl->me_cl->connect_count == 0))
+		cl->me_cl->connect_count--;
+
+	if (cl->me_cl->connect_count == 0)
+		cl->me_cl->tx_flow_ctrl_creds = 0;
+
+	mei_me_cl_put(cl->me_cl);
+	cl->me_cl = NULL;
+}
+
+static int mei_cl_set_connecting(struct mei_cl *cl, struct mei_me_client *me_cl)
+{
+	if (!mei_me_cl_get(me_cl))
+		return -ENOENT;
+
+	/* only one connection is allowed for fixed address clients */
+	if (me_cl->props.fixed_address) {
+		if (me_cl->connect_count) {
+			mei_me_cl_put(me_cl);
+			return -EBUSY;
+		}
+	}
+
+	cl->me_cl = me_cl;
+	cl->state = MEI_FILE_CONNECTING;
+	cl->me_cl->connect_count++;
+
+	return 0;
+}
+
+/*
+ * mei_cl_send_disconnect - send disconnect request
+ *
+ * @cl: host client
+ * @cb: callback block
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_send_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev = cl->dev;
+
+	ret = mei_hbm_cl_disconnect_req(dev, cl);
+	cl->status = ret;
+	if (ret) {
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	cl->timer_count = MEI_CONNECT_TIMEOUT;
+	mei_schedule_stall_timer(dev);
+
+	return 0;
+}
+
+/**
+ * mei_cl_irq_disconnect - processes close related operation from
+ *	interrupt thread context - send disconnect request
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb,
+			  struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_request));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_cl_send_disconnect(cl, cb);
+	if (ret)
+		list_move_tail(&cb->list, cmpl_list);
+
+	return ret;
+}
+
+/**
+ * __mei_cl_disconnect - disconnect host client from the me one
+ *     internal function runtime pm has to be already acquired
+ *
+ * @cl: host client
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int __mei_cl_disconnect(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	dev = cl->dev;
+
+	cl->state = MEI_FILE_DISCONNECTING;
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DISCONNECT, NULL);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	if (mei_hbuf_acquire(dev)) {
+		rets = mei_cl_send_disconnect(cl, cb);
+		if (rets) {
+			cl_err(dev, cl, "failed to disconnect.\n");
+			goto out;
+		}
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait,
+			   cl->state == MEI_FILE_DISCONNECT_REPLY ||
+			   cl->state == MEI_FILE_DISCONNECTED,
+			   mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+
+	rets = cl->status;
+	if (cl->state != MEI_FILE_DISCONNECT_REPLY &&
+	    cl->state != MEI_FILE_DISCONNECTED) {
+		cl_dbg(dev, cl, "timeout on disconnect from FW client.\n");
+		rets = -ETIME;
+	}
+
+out:
+	/* we disconnect also on error */
+	mei_cl_set_disconnected(cl);
+	if (!rets)
+		cl_dbg(dev, cl, "successfully disconnected from FW client.\n");
+
+	mei_io_cb_free(cb);
+	return rets;
+}
+
+/**
+ * mei_cl_disconnect - disconnect host client from the me one
+ *
+ * @cl: host client
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_cl_disconnect(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	cl_dbg(dev, cl, "disconnecting");
+
+	if (!mei_cl_is_connected(cl))
+		return 0;
+
+	if (mei_cl_is_fixed_address(cl)) {
+		mei_cl_set_disconnected(cl);
+		return 0;
+	}
+
+	if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+		cl_dbg(dev, cl, "Device is powering down, don't bother with disconnection\n");
+		mei_cl_set_disconnected(cl);
+		return 0;
+	}
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		return rets;
+	}
+
+	rets = __mei_cl_disconnect(cl);
+
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return rets;
+}
+
+
+/**
+ * mei_cl_is_other_connecting - checks if other
+ *    client with the same me client id is connecting
+ *
+ * @cl: private data of the file object
+ *
+ * Return: true if other client is connected, false - otherwise.
+ */
+static bool mei_cl_is_other_connecting(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+
+	dev = cl->dev;
+
+	list_for_each_entry(cb, &dev->ctrl_rd_list, list) {
+		if (cb->fop_type == MEI_FOP_CONNECT &&
+		    mei_cl_me_id(cl) == mei_cl_me_id(cb->cl))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * mei_cl_send_connect - send connect request
+ *
+ * @cl: host client
+ * @cb: callback block
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_send_connect(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev = cl->dev;
+
+	ret = mei_hbm_cl_connect_req(dev, cl);
+	cl->status = ret;
+	if (ret) {
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	cl->timer_count = MEI_CONNECT_TIMEOUT;
+	mei_schedule_stall_timer(dev);
+	return 0;
+}
+
+/**
+ * mei_cl_irq_connect - send connect request in irq_thread context
+ *
+ * @cl: host client
+ * @cb: callback block
+ * @cmpl_list: complete list
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb,
+		       struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int rets;
+
+	if (mei_cl_is_other_connecting(cl))
+		return 0;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_request));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	rets = mei_cl_send_connect(cl, cb);
+	if (rets)
+		list_move_tail(&cb->list, cmpl_list);
+
+	return rets;
+}
+
+/**
+ * mei_cl_connect - connect host client to the me one
+ *
+ * @cl: host client
+ * @me_cl: me client
+ * @fp: pointer to file structure
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
+		   const struct file *fp)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev || !me_cl))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	rets = mei_cl_set_connecting(cl, me_cl);
+	if (rets)
+		goto nortpm;
+
+	if (mei_cl_is_fixed_address(cl)) {
+		cl->state = MEI_FILE_CONNECTED;
+		rets = 0;
+		goto nortpm;
+	}
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		goto nortpm;
+	}
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_CONNECT, fp);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	/* run hbuf acquire last so we don't have to undo */
+	if (!mei_cl_is_other_connecting(cl) && mei_hbuf_acquire(dev)) {
+		rets = mei_cl_send_connect(cl, cb);
+		if (rets)
+			goto out;
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait,
+			(cl->state == MEI_FILE_CONNECTED ||
+			 cl->state == MEI_FILE_DISCONNECTED ||
+			 cl->state == MEI_FILE_DISCONNECT_REQUIRED ||
+			 cl->state == MEI_FILE_DISCONNECT_REPLY),
+			mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+
+	if (!mei_cl_is_connected(cl)) {
+		if (cl->state == MEI_FILE_DISCONNECT_REQUIRED) {
+			mei_io_list_flush_cl(&dev->ctrl_rd_list, cl);
+			mei_io_list_flush_cl(&dev->ctrl_wr_list, cl);
+			 /* ignore disconnect return valuue;
+			  * in case of failure reset will be invoked
+			  */
+			__mei_cl_disconnect(cl);
+			rets = -EFAULT;
+			goto out;
+		}
+
+		/* timeout or something went really wrong */
+		if (!cl->status)
+			cl->status = -EFAULT;
+	}
+
+	rets = cl->status;
+out:
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	mei_io_cb_free(cb);
+
+nortpm:
+	if (!mei_cl_is_connected(cl))
+		mei_cl_set_disconnected(cl);
+
+	return rets;
+}
+
+/**
+ * mei_cl_alloc_linked - allocate and link host client
+ *
+ * @dev: the device structure
+ *
+ * Return: cl on success ERR_PTR on failure
+ */
+struct mei_cl *mei_cl_alloc_linked(struct mei_device *dev)
+{
+	struct mei_cl *cl;
+	int ret;
+
+	cl = mei_cl_allocate(dev);
+	if (!cl) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = mei_cl_link(cl);
+	if (ret)
+		goto err;
+
+	return cl;
+err:
+	kfree(cl);
+	return ERR_PTR(ret);
+}
+
+/**
+ * mei_cl_tx_flow_ctrl_creds - checks flow_control credits for cl.
+ *
+ * @cl: host client
+ *
+ * Return: 1 if tx_flow_ctrl_creds >0, 0 - otherwise.
+ */
+static int mei_cl_tx_flow_ctrl_creds(struct mei_cl *cl)
+{
+	if (WARN_ON(!cl || !cl->me_cl))
+		return -EINVAL;
+
+	if (cl->tx_flow_ctrl_creds > 0)
+		return 1;
+
+	if (mei_cl_is_fixed_address(cl))
+		return 1;
+
+	if (mei_cl_is_single_recv_buf(cl)) {
+		if (cl->me_cl->tx_flow_ctrl_creds > 0)
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * mei_cl_tx_flow_ctrl_creds_reduce - reduces transmit flow control credits
+ *   for a client
+ *
+ * @cl: host client
+ *
+ * Return:
+ *	0 on success
+ *	-EINVAL when ctrl credits are <= 0
+ */
+static int mei_cl_tx_flow_ctrl_creds_reduce(struct mei_cl *cl)
+{
+	if (WARN_ON(!cl || !cl->me_cl))
+		return -EINVAL;
+
+	if (mei_cl_is_fixed_address(cl))
+		return 0;
+
+	if (mei_cl_is_single_recv_buf(cl)) {
+		if (WARN_ON(cl->me_cl->tx_flow_ctrl_creds <= 0))
+			return -EINVAL;
+		cl->me_cl->tx_flow_ctrl_creds--;
+	} else {
+		if (WARN_ON(cl->tx_flow_ctrl_creds <= 0))
+			return -EINVAL;
+		cl->tx_flow_ctrl_creds--;
+	}
+	return 0;
+}
+
+/**
+ *  mei_cl_notify_fop2req - convert fop to proper request
+ *
+ * @fop: client notification start response command
+ *
+ * Return:  MEI_HBM_NOTIFICATION_START/STOP
+ */
+u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop)
+{
+	if (fop == MEI_FOP_NOTIFY_START)
+		return MEI_HBM_NOTIFICATION_START;
+	else
+		return MEI_HBM_NOTIFICATION_STOP;
+}
+
+/**
+ *  mei_cl_notify_req2fop - convert notification request top file operation type
+ *
+ * @req: hbm notification request type
+ *
+ * Return:  MEI_FOP_NOTIFY_START/STOP
+ */
+enum mei_cb_file_ops mei_cl_notify_req2fop(u8 req)
+{
+	if (req == MEI_HBM_NOTIFICATION_START)
+		return MEI_FOP_NOTIFY_START;
+	else
+		return MEI_FOP_NOTIFY_STOP;
+}
+
+/**
+ * mei_cl_irq_notify - send notification request in irq_thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb,
+		      struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+	bool request;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_request));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	request = mei_cl_notify_fop2req(cb->fop_type);
+	ret = mei_hbm_cl_notify_req(dev, cl, request);
+	if (ret) {
+		cl->status = ret;
+		list_move_tail(&cb->list, cmpl_list);
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	return 0;
+}
+
+/**
+ * mei_cl_notify_request - send notification stop/start request
+ *
+ * @cl: host client
+ * @fp: associate request with file
+ * @request: 1 for start or 0 for stop
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_notify_request(struct mei_cl *cl,
+			  const struct file *fp, u8 request)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	enum mei_cb_file_ops fop_type;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!dev->hbm_f_ev_supported) {
+		cl_dbg(dev, cl, "notifications not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		return rets;
+	}
+
+	fop_type = mei_cl_notify_req2fop(request);
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, fop_type, fp);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	if (mei_hbuf_acquire(dev)) {
+		if (mei_hbm_cl_notify_req(dev, cl, request)) {
+			rets = -ENODEV;
+			goto out;
+		}
+		list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait,
+			   cl->notify_en == request || !mei_cl_is_connected(cl),
+			   mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+
+	if (cl->notify_en != request && !cl->status)
+		cl->status = -EFAULT;
+
+	rets = cl->status;
+
+out:
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	mei_io_cb_free(cb);
+	return rets;
+}
+
+/**
+ * mei_cl_notify - raise notification
+ *
+ * @cl: host client
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_cl_notify(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+
+	if (!cl || !cl->dev)
+		return;
+
+	dev = cl->dev;
+
+	if (!cl->notify_en)
+		return;
+
+	cl_dbg(dev, cl, "notify event");
+	cl->notify_ev = true;
+	if (!mei_cl_bus_notify_event(cl))
+		wake_up_interruptible(&cl->ev_wait);
+
+	if (cl->ev_async)
+		kill_fasync(&cl->ev_async, SIGIO, POLL_PRI);
+
+}
+
+/**
+ * mei_cl_notify_get - get or wait for notification event
+ *
+ * @cl: host client
+ * @block: this request is blocking
+ * @notify_ev: true if notification event was received
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev)
+{
+	struct mei_device *dev;
+	int rets;
+
+	*notify_ev = false;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!dev->hbm_f_ev_supported) {
+		cl_dbg(dev, cl, "notifications not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	if (cl->notify_ev)
+		goto out;
+
+	if (!block)
+		return -EAGAIN;
+
+	mutex_unlock(&dev->device_lock);
+	rets = wait_event_interruptible(cl->ev_wait, cl->notify_ev);
+	mutex_lock(&dev->device_lock);
+
+	if (rets < 0)
+		return rets;
+
+out:
+	*notify_ev = cl->notify_ev;
+	cl->notify_ev = false;
+	return 0;
+}
+
+/**
+ * mei_cl_read_start - the start read client message function.
+ *
+ * @cl: host client
+ * @length: number of bytes to read
+ * @fp: pointer to file structure
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_cl_read_start(struct mei_cl *cl, size_t length, const struct file *fp)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	if (!mei_me_cl_is_active(cl->me_cl)) {
+		cl_err(dev, cl, "no such me client\n");
+		return  -ENOTTY;
+	}
+
+	if (mei_cl_is_fixed_address(cl))
+		return 0;
+
+	/* HW currently supports only one pending read */
+	if (cl->rx_flow_ctrl_creds)
+		return -EBUSY;
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, length, MEI_FOP_READ, fp);
+	if (!cb)
+		return -ENOMEM;
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		goto nortpm;
+	}
+
+	rets = 0;
+	if (mei_hbuf_acquire(dev)) {
+		rets = mei_hbm_cl_flow_control_req(dev, cl);
+		if (rets < 0)
+			goto out;
+
+		list_move_tail(&cb->list, &cl->rd_pending);
+	}
+	cl->rx_flow_ctrl_creds++;
+
+out:
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+nortpm:
+	if (rets)
+		mei_io_cb_free(cb);
+
+	return rets;
+}
+
+/**
+ * mei_msg_hdr_init - initialize mei message header
+ *
+ * @mei_hdr: mei message header
+ * @cb: message callback structure
+ */
+static void mei_msg_hdr_init(struct mei_msg_hdr *mei_hdr, struct mei_cl_cb *cb)
+{
+	mei_hdr->host_addr = mei_cl_host_addr(cb->cl);
+	mei_hdr->me_addr = mei_cl_me_id(cb->cl);
+	mei_hdr->length = 0;
+	mei_hdr->reserved = 0;
+	mei_hdr->msg_complete = 0;
+	mei_hdr->dma_ring = 0;
+	mei_hdr->internal = cb->internal;
+}
+
+/**
+ * mei_cl_irq_write - write a message to device
+ *	from the interrupt thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise error.
+ */
+int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
+		     struct list_head *cmpl_list)
+{
+	struct mei_device *dev;
+	struct mei_msg_data *buf;
+	struct mei_msg_hdr mei_hdr;
+	size_t hdr_len = sizeof(mei_hdr);
+	size_t len;
+	size_t hbuf_len;
+	int hbuf_slots;
+	int rets;
+	bool first_chunk;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	buf = &cb->buf;
+
+	first_chunk = cb->buf_idx == 0;
+
+	rets = first_chunk ? mei_cl_tx_flow_ctrl_creds(cl) : 1;
+	if (rets < 0)
+		goto err;
+
+	if (rets == 0) {
+		cl_dbg(dev, cl, "No flow control credentials: not sending.\n");
+		return 0;
+	}
+
+	len = buf->size - cb->buf_idx;
+	hbuf_slots = mei_hbuf_empty_slots(dev);
+	if (hbuf_slots < 0) {
+		rets = -EOVERFLOW;
+		goto err;
+	}
+
+	hbuf_len = mei_slots2data(hbuf_slots);
+
+	mei_msg_hdr_init(&mei_hdr, cb);
+
+	/**
+	 * Split the message only if we can write the whole host buffer
+	 * otherwise wait for next time the host buffer is empty.
+	 */
+	if (len + hdr_len <= hbuf_len) {
+		mei_hdr.length = len;
+		mei_hdr.msg_complete = 1;
+	} else if ((u32)hbuf_slots == mei_hbuf_depth(dev)) {
+		mei_hdr.length = hbuf_len - hdr_len;
+	} else {
+		return 0;
+	}
+
+	cl_dbg(dev, cl, "buf: size = %zu idx = %zu\n",
+			cb->buf.size, cb->buf_idx);
+
+	rets = mei_write_message(dev, &mei_hdr, hdr_len,
+				 buf->data + cb->buf_idx, mei_hdr.length);
+	if (rets)
+		goto err;
+
+	cl->status = 0;
+	cl->writing_state = MEI_WRITING;
+	cb->buf_idx += mei_hdr.length;
+
+	if (first_chunk) {
+		if (mei_cl_tx_flow_ctrl_creds_reduce(cl)) {
+			rets = -EIO;
+			goto err;
+		}
+	}
+
+	if (mei_hdr.msg_complete)
+		list_move_tail(&cb->list, &dev->write_waiting_list);
+
+	return 0;
+
+err:
+	cl->status = rets;
+	list_move_tail(&cb->list, cmpl_list);
+	return rets;
+}
+
+/**
+ * mei_cl_write - submit a write cb to mei device
+ *	assumes device_lock is locked
+ *
+ * @cl: host client
+ * @cb: write callback with filled data
+ *
+ * Return: number of bytes sent on success, <0 on failure.
+ */
+ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	struct mei_device *dev;
+	struct mei_msg_data *buf;
+	struct mei_msg_hdr mei_hdr;
+	size_t hdr_len = sizeof(mei_hdr);
+	size_t len;
+	size_t hbuf_len;
+	int hbuf_slots;
+	ssize_t rets;
+	bool blocking;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	if (WARN_ON(!cb))
+		return -EINVAL;
+
+	dev = cl->dev;
+
+	buf = &cb->buf;
+	len = buf->size;
+	blocking = cb->blocking;
+
+	cl_dbg(dev, cl, "len=%zd\n", len);
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %zd\n", rets);
+		goto free;
+	}
+
+	cb->buf_idx = 0;
+	cl->writing_state = MEI_IDLE;
+
+
+	rets = mei_cl_tx_flow_ctrl_creds(cl);
+	if (rets < 0)
+		goto err;
+
+	mei_msg_hdr_init(&mei_hdr, cb);
+
+	if (rets == 0) {
+		cl_dbg(dev, cl, "No flow control credentials: not sending.\n");
+		rets = len;
+		goto out;
+	}
+
+	if (!mei_hbuf_acquire(dev)) {
+		cl_dbg(dev, cl, "Cannot acquire the host buffer: not sending.\n");
+		rets = len;
+		goto out;
+	}
+
+	hbuf_slots = mei_hbuf_empty_slots(dev);
+	if (hbuf_slots < 0) {
+		rets = -EOVERFLOW;
+		goto out;
+	}
+
+	hbuf_len = mei_slots2data(hbuf_slots);
+
+	if (len + hdr_len <= hbuf_len) {
+		mei_hdr.length = len;
+		mei_hdr.msg_complete = 1;
+	} else {
+		mei_hdr.length = hbuf_len - hdr_len;
+	}
+
+	rets = mei_write_message(dev, &mei_hdr, hdr_len,
+				 buf->data, mei_hdr.length);
+	if (rets)
+		goto err;
+
+	rets = mei_cl_tx_flow_ctrl_creds_reduce(cl);
+	if (rets)
+		goto err;
+
+	cl->writing_state = MEI_WRITING;
+	cb->buf_idx = mei_hdr.length;
+
+out:
+	if (mei_hdr.msg_complete)
+		mei_tx_cb_enqueue(cb, &dev->write_waiting_list);
+	else
+		mei_tx_cb_enqueue(cb, &dev->write_list);
+
+	cb = NULL;
+	if (blocking && cl->writing_state != MEI_WRITE_COMPLETE) {
+
+		mutex_unlock(&dev->device_lock);
+		rets = wait_event_interruptible(cl->tx_wait,
+				cl->writing_state == MEI_WRITE_COMPLETE ||
+				(!mei_cl_is_connected(cl)));
+		mutex_lock(&dev->device_lock);
+		/* wait_event_interruptible returns -ERESTARTSYS */
+		if (rets) {
+			if (signal_pending(current))
+				rets = -EINTR;
+			goto err;
+		}
+		if (cl->writing_state != MEI_WRITE_COMPLETE) {
+			rets = -EFAULT;
+			goto err;
+		}
+	}
+
+	rets = len;
+err:
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+free:
+	mei_io_cb_free(cb);
+
+	return rets;
+}
+
+
+/**
+ * mei_cl_complete - processes completed operation for a client
+ *
+ * @cl: private data of the file object.
+ * @cb: callback block.
+ */
+void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	struct mei_device *dev = cl->dev;
+
+	switch (cb->fop_type) {
+	case MEI_FOP_WRITE:
+		mei_tx_cb_dequeue(cb);
+		cl->writing_state = MEI_WRITE_COMPLETE;
+		if (waitqueue_active(&cl->tx_wait)) {
+			wake_up_interruptible(&cl->tx_wait);
+		} else {
+			pm_runtime_mark_last_busy(dev->dev);
+			pm_request_autosuspend(dev->dev);
+		}
+		break;
+
+	case MEI_FOP_READ:
+		list_add_tail(&cb->list, &cl->rd_completed);
+		if (!mei_cl_is_fixed_address(cl) &&
+		    !WARN_ON(!cl->rx_flow_ctrl_creds))
+			cl->rx_flow_ctrl_creds--;
+		if (!mei_cl_bus_rx_event(cl))
+			wake_up_interruptible(&cl->rx_wait);
+		break;
+
+	case MEI_FOP_CONNECT:
+	case MEI_FOP_DISCONNECT:
+	case MEI_FOP_NOTIFY_STOP:
+	case MEI_FOP_NOTIFY_START:
+		if (waitqueue_active(&cl->wait))
+			wake_up(&cl->wait);
+
+		break;
+	case MEI_FOP_DISCONNECT_RSP:
+		mei_io_cb_free(cb);
+		mei_cl_set_disconnected(cl);
+		break;
+	default:
+		BUG_ON(0);
+	}
+}
+
+
+/**
+ * mei_cl_all_disconnect - disconnect forcefully all connected clients
+ *
+ * @dev: mei device
+ */
+void mei_cl_all_disconnect(struct mei_device *dev)
+{
+	struct mei_cl *cl;
+
+	list_for_each_entry(cl, &dev->file_list, link)
+		mei_cl_set_disconnected(cl);
+}
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
new file mode 100644
index 000000000..0d23efcc7
--- /dev/null
+++ b/drivers/misc/mei/client.h
@@ -0,0 +1,236 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _MEI_CLIENT_H_
+#define _MEI_CLIENT_H_
+
+#include <linux/types.h>
+#include <linux/poll.h>
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+
+/*
+ * reference counting base function
+ */
+void mei_me_cl_init(struct mei_me_client *me_cl);
+void mei_me_cl_put(struct mei_me_client *me_cl);
+struct mei_me_client *mei_me_cl_get(struct mei_me_client *me_cl);
+
+void mei_me_cl_add(struct mei_device *dev, struct mei_me_client *me_cl);
+void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl);
+
+struct mei_me_client *mei_me_cl_by_uuid(struct mei_device *dev,
+					const uuid_le *uuid);
+struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id);
+struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev,
+					   const uuid_le *uuid, u8 client_id);
+void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid);
+void mei_me_cl_rm_by_uuid_id(struct mei_device *dev,
+			     const uuid_le *uuid, u8 id);
+void mei_me_cl_rm_all(struct mei_device *dev);
+
+/**
+ * mei_me_cl_is_active - check whether me client is active in the fw
+ *
+ * @me_cl: me client
+ *
+ * Return: true if the me client is active in the firmware
+ */
+static inline bool mei_me_cl_is_active(const struct mei_me_client *me_cl)
+{
+	return !list_empty_careful(&me_cl->list);
+}
+
+/**
+ * mei_me_cl_uuid - return me client protocol name (uuid)
+ *
+ * @me_cl: me client
+ *
+ * Return: me client protocol name
+ */
+static inline const uuid_le *mei_me_cl_uuid(const struct mei_me_client *me_cl)
+{
+	return &me_cl->props.protocol_name;
+}
+
+/**
+ * mei_me_cl_ver - return me client protocol version
+ *
+ * @me_cl: me client
+ *
+ * Return: me client protocol version
+ */
+static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl)
+{
+	return me_cl->props.protocol_version;
+}
+
+/*
+ * MEI IO Functions
+ */
+void mei_io_cb_free(struct mei_cl_cb *priv_cb);
+
+/*
+ * MEI Host Client Functions
+ */
+
+struct mei_cl *mei_cl_allocate(struct mei_device *dev);
+
+int mei_cl_link(struct mei_cl *cl);
+int mei_cl_unlink(struct mei_cl *cl);
+
+struct mei_cl *mei_cl_alloc_linked(struct mei_device *dev);
+
+struct mei_cl_cb *mei_cl_read_cb(const struct mei_cl *cl,
+				 const struct file *fp);
+struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
+				  enum mei_cb_file_ops type,
+				  const struct file *fp);
+struct mei_cl_cb *mei_cl_enqueue_ctrl_wr_cb(struct mei_cl *cl, size_t length,
+					    enum mei_cb_file_ops type,
+					    const struct file *fp);
+int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp);
+
+/*
+ *  MEI input output function prototype
+ */
+
+/**
+ * mei_cl_is_connected - host client is connected
+ *
+ * @cl: host client
+ *
+ * Return: true if the host client is connected
+ */
+static inline bool mei_cl_is_connected(struct mei_cl *cl)
+{
+	return  cl->state == MEI_FILE_CONNECTED;
+}
+
+/**
+ * mei_cl_me_id - me client id
+ *
+ * @cl: host client
+ *
+ * Return: me client id or 0 if client is not connected
+ */
+static inline u8 mei_cl_me_id(const struct mei_cl *cl)
+{
+	return cl->me_cl ? cl->me_cl->client_id : 0;
+}
+
+/**
+ * mei_cl_mtu - maximal message that client can send and receive
+ *
+ * @cl: host client
+ *
+ * Return: mtu or 0 if client is not connected
+ */
+static inline size_t mei_cl_mtu(const struct mei_cl *cl)
+{
+	return cl->me_cl ? cl->me_cl->props.max_msg_length : 0;
+}
+
+/**
+ * mei_cl_is_fixed_address - check whether the me client uses fixed address
+ *
+ * @cl: host client
+ *
+ * Return: true if the client is connected and it has fixed me address
+ */
+static inline bool mei_cl_is_fixed_address(const struct mei_cl *cl)
+{
+	return cl->me_cl && cl->me_cl->props.fixed_address;
+}
+
+/**
+ * mei_cl_is_single_recv_buf- check whether the me client
+ *       uses single receiving buffer
+ *
+ * @cl: host client
+ *
+ * Return: true if single_recv_buf == 1; 0 otherwise
+ */
+static inline bool mei_cl_is_single_recv_buf(const struct mei_cl *cl)
+{
+	return cl->me_cl->props.single_recv_buf;
+}
+
+/**
+ * mei_cl_uuid -  client's uuid
+ *
+ * @cl: host client
+ *
+ * Return: return uuid of connected me client
+ */
+static inline const uuid_le *mei_cl_uuid(const struct mei_cl *cl)
+{
+	return mei_me_cl_uuid(cl->me_cl);
+}
+
+/**
+ * mei_cl_host_addr - client's host address
+ *
+ * @cl: host client
+ *
+ * Return: 0 for fixed address client, host address for dynamic client
+ */
+static inline u8 mei_cl_host_addr(const struct mei_cl *cl)
+{
+	return  mei_cl_is_fixed_address(cl) ? 0 : cl->host_client_id;
+}
+
+int mei_cl_disconnect(struct mei_cl *cl);
+int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb,
+			  struct list_head *cmpl_list);
+int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
+		   const struct file *file);
+int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb,
+		       struct list_head *cmpl_list);
+int mei_cl_read_start(struct mei_cl *cl, size_t length, const struct file *fp);
+ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb);
+int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
+		     struct list_head *cmpl_list);
+
+void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb);
+
+void mei_host_client_init(struct mei_device *dev);
+
+u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop);
+enum mei_cb_file_ops mei_cl_notify_req2fop(u8 request);
+int mei_cl_notify_request(struct mei_cl *cl,
+			  const struct file *file, u8 request);
+int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb,
+		      struct list_head *cmpl_list);
+int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev);
+void mei_cl_notify(struct mei_cl *cl);
+
+void mei_cl_all_disconnect(struct mei_device *dev);
+
+#define MEI_CL_FMT "cl:host=%02d me=%02d "
+#define MEI_CL_PRM(cl) (cl)->host_client_id, mei_cl_me_id(cl)
+
+#define cl_dbg(dev, cl, format, arg...) \
+	dev_dbg((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg)
+
+#define cl_warn(dev, cl, format, arg...) \
+	dev_warn((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg)
+
+#define cl_err(dev, cl, format, arg...) \
+	dev_err((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg)
+
+#endif /* _MEI_CLIENT_H_ */
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
new file mode 100644
index 000000000..7b5df8fd6
--- /dev/null
+++ b/drivers/misc/mei/debugfs.c
@@ -0,0 +1,288 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2012-2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/debugfs.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+#include "hw.h"
+
+static ssize_t mei_dbgfs_read_meclients(struct file *fp, char __user *ubuf,
+					size_t cnt, loff_t *ppos)
+{
+	struct mei_device *dev = fp->private_data;
+	struct mei_me_client *me_cl;
+	size_t bufsz = 1;
+	char *buf;
+	int i = 0;
+	int pos = 0;
+	int ret;
+
+#define HDR \
+"  |id|fix|         UUID                       |con|msg len|sb|refc|\n"
+
+	down_read(&dev->me_clients_rwsem);
+	list_for_each_entry(me_cl, &dev->me_clients, list)
+		bufsz++;
+
+	bufsz *= sizeof(HDR) + 1;
+	buf = kzalloc(bufsz, GFP_KERNEL);
+	if (!buf) {
+		up_read(&dev->me_clients_rwsem);
+		return -ENOMEM;
+	}
+
+	pos += scnprintf(buf + pos, bufsz - pos, HDR);
+#undef HDR
+
+	/*  if the driver is not enabled the list won't be consistent */
+	if (dev->dev_state != MEI_DEV_ENABLED)
+		goto out;
+
+	list_for_each_entry(me_cl, &dev->me_clients, list) {
+
+		if (mei_me_cl_get(me_cl)) {
+			pos += scnprintf(buf + pos, bufsz - pos,
+				"%2d|%2d|%3d|%pUl|%3d|%7d|%2d|%4d|\n",
+				i++, me_cl->client_id,
+				me_cl->props.fixed_address,
+				&me_cl->props.protocol_name,
+				me_cl->props.max_number_of_connections,
+				me_cl->props.max_msg_length,
+				me_cl->props.single_recv_buf,
+				kref_read(&me_cl->refcnt));
+
+			mei_me_cl_put(me_cl);
+		}
+	}
+
+out:
+	up_read(&dev->me_clients_rwsem);
+	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos);
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations mei_dbgfs_fops_meclients = {
+	.open = simple_open,
+	.read = mei_dbgfs_read_meclients,
+	.llseek = generic_file_llseek,
+};
+
+static ssize_t mei_dbgfs_read_active(struct file *fp, char __user *ubuf,
+					size_t cnt, loff_t *ppos)
+{
+	struct mei_device *dev = fp->private_data;
+	struct mei_cl *cl;
+	size_t bufsz = 1;
+	char *buf;
+	int i = 0;
+	int pos = 0;
+	int ret;
+
+#define HDR "   |me|host|state|rd|wr|wrq\n"
+
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	/*
+	 * if the driver is not enabled the list won't be consistent,
+	 * we output empty table
+	 */
+	if (dev->dev_state == MEI_DEV_ENABLED)
+		list_for_each_entry(cl, &dev->file_list, link)
+			bufsz++;
+
+	bufsz *= sizeof(HDR) + 1;
+
+	buf = kzalloc(bufsz, GFP_KERNEL);
+	if  (!buf) {
+		mutex_unlock(&dev->device_lock);
+		return -ENOMEM;
+	}
+
+	pos += scnprintf(buf + pos, bufsz - pos, HDR);
+#undef HDR
+
+	/*  if the driver is not enabled the list won't be consistent */
+	if (dev->dev_state != MEI_DEV_ENABLED)
+		goto out;
+
+	list_for_each_entry(cl, &dev->file_list, link) {
+
+		pos += scnprintf(buf + pos, bufsz - pos,
+			"%3d|%2d|%4d|%5d|%2d|%2d|%3u\n",
+			i, mei_cl_me_id(cl), cl->host_client_id, cl->state,
+			!list_empty(&cl->rd_completed), cl->writing_state,
+			cl->tx_cb_queued);
+		i++;
+	}
+out:
+	mutex_unlock(&dev->device_lock);
+	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos);
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations mei_dbgfs_fops_active = {
+	.open = simple_open,
+	.read = mei_dbgfs_read_active,
+	.llseek = generic_file_llseek,
+};
+
+static ssize_t mei_dbgfs_read_devstate(struct file *fp, char __user *ubuf,
+					size_t cnt, loff_t *ppos)
+{
+	struct mei_device *dev = fp->private_data;
+	const size_t bufsz = 1024;
+	char *buf = kzalloc(bufsz, GFP_KERNEL);
+	int pos = 0;
+	int ret;
+
+	if  (!buf)
+		return -ENOMEM;
+
+	pos += scnprintf(buf + pos, bufsz - pos, "dev: %s\n",
+			mei_dev_state_str(dev->dev_state));
+	pos += scnprintf(buf + pos, bufsz - pos, "hbm: %s\n",
+			mei_hbm_state_str(dev->hbm_state));
+
+	if (dev->hbm_state >= MEI_HBM_ENUM_CLIENTS &&
+	    dev->hbm_state <= MEI_HBM_STARTED) {
+		pos += scnprintf(buf + pos, bufsz - pos, "hbm features:\n");
+		pos += scnprintf(buf + pos, bufsz - pos, "\tPG: %01d\n",
+				 dev->hbm_f_pg_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tDC: %01d\n",
+				 dev->hbm_f_dc_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tIE: %01d\n",
+				 dev->hbm_f_ie_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tDOT: %01d\n",
+				 dev->hbm_f_dot_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tEV: %01d\n",
+				 dev->hbm_f_ev_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tFA: %01d\n",
+				 dev->hbm_f_fa_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tOS: %01d\n",
+				 dev->hbm_f_os_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tDR: %01d\n",
+				 dev->hbm_f_dr_supported);
+	}
+
+	pos += scnprintf(buf + pos, bufsz - pos, "pg:  %s, %s\n",
+			mei_pg_is_enabled(dev) ? "ENABLED" : "DISABLED",
+			mei_pg_state_str(mei_pg_state(dev)));
+	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos);
+	kfree(buf);
+	return ret;
+}
+static const struct file_operations mei_dbgfs_fops_devstate = {
+	.open = simple_open,
+	.read = mei_dbgfs_read_devstate,
+	.llseek = generic_file_llseek,
+};
+
+static ssize_t mei_dbgfs_write_allow_fa(struct file *file,
+					const char __user *user_buf,
+					size_t count, loff_t *ppos)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev = container_of(file->private_data,
+			   struct mei_device, allow_fixed_address);
+
+	ret = debugfs_write_file_bool(file, user_buf, count, ppos);
+	if (ret < 0)
+		return ret;
+	dev->override_fixed_address = true;
+	return ret;
+}
+
+static const struct file_operations mei_dbgfs_fops_allow_fa = {
+	.open = simple_open,
+	.read = debugfs_read_file_bool,
+	.write = mei_dbgfs_write_allow_fa,
+	.llseek = generic_file_llseek,
+};
+
+/**
+ * mei_dbgfs_deregister - Remove the debugfs files and directories
+ *
+ * @dev: the mei device structure
+ */
+void mei_dbgfs_deregister(struct mei_device *dev)
+{
+	if (!dev->dbgfs_dir)
+		return;
+	debugfs_remove_recursive(dev->dbgfs_dir);
+	dev->dbgfs_dir = NULL;
+}
+
+/**
+ * mei_dbgfs_register - Add the debugfs files
+ *
+ * @dev: the mei device structure
+ * @name: the mei device name
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_dbgfs_register(struct mei_device *dev, const char *name)
+{
+	struct dentry *dir, *f;
+
+	dir = debugfs_create_dir(name, NULL);
+	if (!dir)
+		return -ENOMEM;
+
+	dev->dbgfs_dir = dir;
+
+	f = debugfs_create_file("meclients", S_IRUSR, dir,
+				dev, &mei_dbgfs_fops_meclients);
+	if (!f) {
+		dev_err(dev->dev, "meclients: registration failed\n");
+		goto err;
+	}
+	f = debugfs_create_file("active", S_IRUSR, dir,
+				dev, &mei_dbgfs_fops_active);
+	if (!f) {
+		dev_err(dev->dev, "active: registration failed\n");
+		goto err;
+	}
+	f = debugfs_create_file("devstate", S_IRUSR, dir,
+				dev, &mei_dbgfs_fops_devstate);
+	if (!f) {
+		dev_err(dev->dev, "devstate: registration failed\n");
+		goto err;
+	}
+	f = debugfs_create_file("allow_fixed_address", S_IRUSR | S_IWUSR, dir,
+				&dev->allow_fixed_address,
+				&mei_dbgfs_fops_allow_fa);
+	if (!f) {
+		dev_err(dev->dev, "allow_fixed_address: registration failed\n");
+		goto err;
+	}
+	return 0;
+err:
+	mei_dbgfs_deregister(dev);
+	return -ENODEV;
+}
+
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
new file mode 100644
index 000000000..d39cc2909
--- /dev/null
+++ b/drivers/misc/mei/hbm.c
@@ -0,0 +1,1287 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+static const char *mei_hbm_status_str(enum mei_hbm_status status)
+{
+#define MEI_HBM_STATUS(status) case MEI_HBMS_##status: return #status
+	switch (status) {
+	MEI_HBM_STATUS(SUCCESS);
+	MEI_HBM_STATUS(CLIENT_NOT_FOUND);
+	MEI_HBM_STATUS(ALREADY_EXISTS);
+	MEI_HBM_STATUS(REJECTED);
+	MEI_HBM_STATUS(INVALID_PARAMETER);
+	MEI_HBM_STATUS(NOT_ALLOWED);
+	MEI_HBM_STATUS(ALREADY_STARTED);
+	MEI_HBM_STATUS(NOT_STARTED);
+	default: return "unknown";
+	}
+#undef MEI_HBM_STATUS
+};
+
+static const char *mei_cl_conn_status_str(enum mei_cl_connect_status status)
+{
+#define MEI_CL_CS(status) case MEI_CL_CONN_##status: return #status
+	switch (status) {
+	MEI_CL_CS(SUCCESS);
+	MEI_CL_CS(NOT_FOUND);
+	MEI_CL_CS(ALREADY_STARTED);
+	MEI_CL_CS(OUT_OF_RESOURCES);
+	MEI_CL_CS(MESSAGE_SMALL);
+	MEI_CL_CS(NOT_ALLOWED);
+	default: return "unknown";
+	}
+#undef MEI_CL_CCS
+}
+
+const char *mei_hbm_state_str(enum mei_hbm_state state)
+{
+#define MEI_HBM_STATE(state) case MEI_HBM_##state: return #state
+	switch (state) {
+	MEI_HBM_STATE(IDLE);
+	MEI_HBM_STATE(STARTING);
+	MEI_HBM_STATE(STARTED);
+	MEI_HBM_STATE(ENUM_CLIENTS);
+	MEI_HBM_STATE(CLIENT_PROPERTIES);
+	MEI_HBM_STATE(STOPPED);
+	default:
+		return "unknown";
+	}
+#undef MEI_HBM_STATE
+}
+
+/**
+ * mei_cl_conn_status_to_errno - convert client connect response
+ * status to error code
+ *
+ * @status: client connect response status
+ *
+ * Return: corresponding error code
+ */
+static int mei_cl_conn_status_to_errno(enum mei_cl_connect_status status)
+{
+	switch (status) {
+	case MEI_CL_CONN_SUCCESS:          return 0;
+	case MEI_CL_CONN_NOT_FOUND:        return -ENOTTY;
+	case MEI_CL_CONN_ALREADY_STARTED:  return -EBUSY;
+	case MEI_CL_CONN_OUT_OF_RESOURCES: return -EBUSY;
+	case MEI_CL_CONN_MESSAGE_SMALL:    return -EINVAL;
+	case MEI_CL_CONN_NOT_ALLOWED:      return -EBUSY;
+	default:                           return -EINVAL;
+	}
+}
+
+/**
+ * mei_hbm_write_message - wrapper for sending hbm messages.
+ *
+ * @dev: mei device
+ * @hdr: mei header
+ * @data: payload
+ */
+static inline int mei_hbm_write_message(struct mei_device *dev,
+					struct mei_msg_hdr *hdr,
+					const void *data)
+{
+	return mei_write_message(dev, hdr, sizeof(*hdr), data, hdr->length);
+}
+
+/**
+ * mei_hbm_idle - set hbm to idle state
+ *
+ * @dev: the device structure
+ */
+void mei_hbm_idle(struct mei_device *dev)
+{
+	dev->init_clients_timer = 0;
+	dev->hbm_state = MEI_HBM_IDLE;
+}
+
+/**
+ * mei_hbm_reset - reset hbm counters and book keeping data structurs
+ *
+ * @dev: the device structure
+ */
+void mei_hbm_reset(struct mei_device *dev)
+{
+	mei_me_cl_rm_all(dev);
+
+	mei_hbm_idle(dev);
+}
+
+/**
+ * mei_hbm_hdr - construct hbm header
+ *
+ * @hdr: hbm header
+ * @length: payload length
+ */
+
+static inline void mei_hbm_hdr(struct mei_msg_hdr *hdr, size_t length)
+{
+	hdr->host_addr = 0;
+	hdr->me_addr = 0;
+	hdr->length = length;
+	hdr->msg_complete = 1;
+	hdr->dma_ring = 0;
+	hdr->reserved = 0;
+	hdr->internal = 0;
+}
+
+/**
+ * mei_hbm_cl_hdr - construct client hbm header
+ *
+ * @cl: client
+ * @hbm_cmd: host bus message command
+ * @buf: buffer for cl header
+ * @len: buffer length
+ */
+static inline
+void mei_hbm_cl_hdr(struct mei_cl *cl, u8 hbm_cmd, void *buf, size_t len)
+{
+	struct mei_hbm_cl_cmd *cmd = buf;
+
+	memset(cmd, 0, len);
+
+	cmd->hbm_cmd = hbm_cmd;
+	cmd->host_addr = mei_cl_host_addr(cl);
+	cmd->me_addr = mei_cl_me_id(cl);
+}
+
+/**
+ * mei_hbm_cl_write - write simple hbm client message
+ *
+ * @dev: the device structure
+ * @cl: client
+ * @hbm_cmd: host bus message command
+ * @buf: message buffer
+ * @len: buffer length
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static inline int mei_hbm_cl_write(struct mei_device *dev, struct mei_cl *cl,
+				   u8 hbm_cmd, void *buf, size_t len)
+{
+	struct mei_msg_hdr mei_hdr;
+
+	mei_hbm_hdr(&mei_hdr, len);
+	mei_hbm_cl_hdr(cl, hbm_cmd, buf, len);
+
+	return mei_hbm_write_message(dev, &mei_hdr, buf);
+}
+
+/**
+ * mei_hbm_cl_addr_equal - check if the client's and
+ *	the message address match
+ *
+ * @cl: client
+ * @cmd: hbm client message
+ *
+ * Return: true if addresses are the same
+ */
+static inline
+bool mei_hbm_cl_addr_equal(struct mei_cl *cl, struct mei_hbm_cl_cmd *cmd)
+{
+	return  mei_cl_host_addr(cl) == cmd->host_addr &&
+		mei_cl_me_id(cl) == cmd->me_addr;
+}
+
+/**
+ * mei_hbm_cl_find_by_cmd - find recipient client
+ *
+ * @dev: the device structure
+ * @buf: a buffer with hbm cl command
+ *
+ * Return: the recipient client or NULL if not found
+ */
+static inline
+struct mei_cl *mei_hbm_cl_find_by_cmd(struct mei_device *dev, void *buf)
+{
+	struct mei_hbm_cl_cmd *cmd = (struct mei_hbm_cl_cmd *)buf;
+	struct mei_cl *cl;
+
+	list_for_each_entry(cl, &dev->file_list, link)
+		if (mei_hbm_cl_addr_equal(cl, cmd))
+			return cl;
+	return NULL;
+}
+
+
+/**
+ * mei_hbm_start_wait - wait for start response message.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+int mei_hbm_start_wait(struct mei_device *dev)
+{
+	int ret;
+
+	if (dev->hbm_state > MEI_HBM_STARTING)
+		return 0;
+
+	mutex_unlock(&dev->device_lock);
+	ret = wait_event_timeout(dev->wait_hbm_start,
+			dev->hbm_state != MEI_HBM_STARTING,
+			mei_secs_to_jiffies(MEI_HBM_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+
+	if (ret == 0 && (dev->hbm_state <= MEI_HBM_STARTING)) {
+		dev->hbm_state = MEI_HBM_IDLE;
+		dev_err(dev->dev, "waiting for mei start failed\n");
+		return -ETIME;
+	}
+	return 0;
+}
+
+/**
+ * mei_hbm_start_req - sends start request message.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+int mei_hbm_start_req(struct mei_device *dev)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_host_version_request start_req;
+	const size_t len = sizeof(struct hbm_host_version_request);
+	int ret;
+
+	mei_hbm_reset(dev);
+
+	mei_hbm_hdr(&mei_hdr, len);
+
+	/* host start message */
+	memset(&start_req, 0, len);
+	start_req.hbm_cmd = HOST_START_REQ_CMD;
+	start_req.host_version.major_version = HBM_MAJOR_VERSION;
+	start_req.host_version.minor_version = HBM_MINOR_VERSION;
+
+	dev->hbm_state = MEI_HBM_IDLE;
+	ret = mei_hbm_write_message(dev, &mei_hdr, &start_req);
+	if (ret) {
+		dev_err(dev->dev, "version message write failed: ret = %d\n",
+			ret);
+		return ret;
+	}
+
+	dev->hbm_state = MEI_HBM_STARTING;
+	dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+	mei_schedule_stall_timer(dev);
+	return 0;
+}
+
+/**
+ * mei_hbm_enum_clients_req - sends enumeration client request message.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_enum_clients_req(struct mei_device *dev)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_host_enum_request enum_req;
+	const size_t len = sizeof(struct hbm_host_enum_request);
+	int ret;
+
+	/* enumerate clients */
+	mei_hbm_hdr(&mei_hdr, len);
+
+	memset(&enum_req, 0, len);
+	enum_req.hbm_cmd = HOST_ENUM_REQ_CMD;
+	enum_req.flags |= dev->hbm_f_dc_supported ?
+			  MEI_HBM_ENUM_F_ALLOW_ADD : 0;
+	enum_req.flags |= dev->hbm_f_ie_supported ?
+			  MEI_HBM_ENUM_F_IMMEDIATE_ENUM : 0;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &enum_req);
+	if (ret) {
+		dev_err(dev->dev, "enumeration request write failed: ret = %d.\n",
+			ret);
+		return ret;
+	}
+	dev->hbm_state = MEI_HBM_ENUM_CLIENTS;
+	dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+	mei_schedule_stall_timer(dev);
+	return 0;
+}
+
+/**
+ * mei_hbm_me_cl_add - add new me client to the list
+ *
+ * @dev: the device structure
+ * @res: hbm property response
+ *
+ * Return: 0 on success and -ENOMEM on allocation failure
+ */
+
+static int mei_hbm_me_cl_add(struct mei_device *dev,
+			     struct hbm_props_response *res)
+{
+	struct mei_me_client *me_cl;
+	const uuid_le *uuid = &res->client_properties.protocol_name;
+
+	mei_me_cl_rm_by_uuid(dev, uuid);
+
+	me_cl = kzalloc(sizeof(struct mei_me_client), GFP_KERNEL);
+	if (!me_cl)
+		return -ENOMEM;
+
+	mei_me_cl_init(me_cl);
+
+	me_cl->props = res->client_properties;
+	me_cl->client_id = res->me_addr;
+	me_cl->tx_flow_ctrl_creds = 0;
+
+	mei_me_cl_add(dev, me_cl);
+
+	return 0;
+}
+
+/**
+ * mei_hbm_add_cl_resp - send response to fw on client add request
+ *
+ * @dev: the device structure
+ * @addr: me address
+ * @status: response status
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_add_cl_resp(struct mei_device *dev, u8 addr, u8 status)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_add_client_response resp;
+	const size_t len = sizeof(struct hbm_add_client_response);
+	int ret;
+
+	dev_dbg(dev->dev, "adding client response\n");
+
+	mei_hbm_hdr(&mei_hdr, len);
+
+	memset(&resp, 0, sizeof(struct hbm_add_client_response));
+	resp.hbm_cmd = MEI_HBM_ADD_CLIENT_RES_CMD;
+	resp.me_addr = addr;
+	resp.status  = status;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &resp);
+	if (ret)
+		dev_err(dev->dev, "add client response write failed: ret = %d\n",
+			ret);
+	return ret;
+}
+
+/**
+ * mei_hbm_fw_add_cl_req - request from the fw to add a client
+ *
+ * @dev: the device structure
+ * @req: add client request
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_fw_add_cl_req(struct mei_device *dev,
+			      struct hbm_add_client_request *req)
+{
+	int ret;
+	u8 status = MEI_HBMS_SUCCESS;
+
+	BUILD_BUG_ON(sizeof(struct hbm_add_client_request) !=
+			sizeof(struct hbm_props_response));
+
+	ret = mei_hbm_me_cl_add(dev, (struct hbm_props_response *)req);
+	if (ret)
+		status = !MEI_HBMS_SUCCESS;
+
+	if (dev->dev_state == MEI_DEV_ENABLED)
+		schedule_work(&dev->bus_rescan_work);
+
+	return mei_hbm_add_cl_resp(dev, req->me_addr, status);
+}
+
+/**
+ * mei_hbm_cl_notify_req - send notification request
+ *
+ * @dev: the device structure
+ * @cl: a client to disconnect from
+ * @start: true for start false for stop
+ *
+ * Return: 0 on success and -EIO on write failure
+ */
+int mei_hbm_cl_notify_req(struct mei_device *dev,
+			  struct mei_cl *cl, u8 start)
+{
+
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_notification_request req;
+	const size_t len = sizeof(struct hbm_notification_request);
+	int ret;
+
+	mei_hbm_hdr(&mei_hdr, len);
+	mei_hbm_cl_hdr(cl, MEI_HBM_NOTIFY_REQ_CMD, &req, len);
+
+	req.start = start;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret)
+		dev_err(dev->dev, "notify request failed: ret = %d\n", ret);
+
+	return ret;
+}
+
+/**
+ *  notify_res_to_fop - convert notification response to the proper
+ *      notification FOP
+ *
+ * @cmd: client notification start response command
+ *
+ * Return:  MEI_FOP_NOTIFY_START or MEI_FOP_NOTIFY_STOP;
+ */
+static inline enum mei_cb_file_ops notify_res_to_fop(struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_notification_response *rs =
+		(struct hbm_notification_response *)cmd;
+
+	return mei_cl_notify_req2fop(rs->start);
+}
+
+/**
+ * mei_hbm_cl_notify_start_res - update the client state according
+ *       notify start response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: client notification start response command
+ */
+static void mei_hbm_cl_notify_start_res(struct mei_device *dev,
+					struct mei_cl *cl,
+					struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_notification_response *rs =
+		(struct hbm_notification_response *)cmd;
+
+	cl_dbg(dev, cl, "hbm: notify start response status=%d\n", rs->status);
+
+	if (rs->status == MEI_HBMS_SUCCESS ||
+	    rs->status == MEI_HBMS_ALREADY_STARTED) {
+		cl->notify_en = true;
+		cl->status = 0;
+	} else {
+		cl->status = -EINVAL;
+	}
+}
+
+/**
+ * mei_hbm_cl_notify_stop_res - update the client state according
+ *       notify stop response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: client notification stop response command
+ */
+static void mei_hbm_cl_notify_stop_res(struct mei_device *dev,
+				       struct mei_cl *cl,
+				       struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_notification_response *rs =
+		(struct hbm_notification_response *)cmd;
+
+	cl_dbg(dev, cl, "hbm: notify stop response status=%d\n", rs->status);
+
+	if (rs->status == MEI_HBMS_SUCCESS ||
+	    rs->status == MEI_HBMS_NOT_STARTED) {
+		cl->notify_en = false;
+		cl->status = 0;
+	} else {
+		/* TODO: spec is not clear yet about other possible issues */
+		cl->status = -EINVAL;
+	}
+}
+
+/**
+ * mei_hbm_cl_notify - signal notification event
+ *
+ * @dev: the device structure
+ * @cmd: notification client message
+ */
+static void mei_hbm_cl_notify(struct mei_device *dev,
+			      struct mei_hbm_cl_cmd *cmd)
+{
+	struct mei_cl *cl;
+
+	cl = mei_hbm_cl_find_by_cmd(dev, cmd);
+	if (cl)
+		mei_cl_notify(cl);
+}
+
+/**
+ * mei_hbm_prop_req - request property for a single client
+ *
+ * @dev: the device structure
+ * @start_idx: client index to start search
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_prop_req(struct mei_device *dev, unsigned long start_idx)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_props_request prop_req;
+	const size_t len = sizeof(struct hbm_props_request);
+	unsigned long addr;
+	int ret;
+
+	addr = find_next_bit(dev->me_clients_map, MEI_CLIENTS_MAX, start_idx);
+
+	/* We got all client properties */
+	if (addr == MEI_CLIENTS_MAX) {
+		dev->hbm_state = MEI_HBM_STARTED;
+		mei_host_client_init(dev);
+
+		return 0;
+	}
+
+	mei_hbm_hdr(&mei_hdr, len);
+
+	memset(&prop_req, 0, sizeof(struct hbm_props_request));
+
+	prop_req.hbm_cmd = HOST_CLIENT_PROPERTIES_REQ_CMD;
+	prop_req.me_addr = addr;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &prop_req);
+	if (ret) {
+		dev_err(dev->dev, "properties request write failed: ret = %d\n",
+			ret);
+		return ret;
+	}
+
+	dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+	mei_schedule_stall_timer(dev);
+
+	return 0;
+}
+
+/**
+ * mei_hbm_pg - sends pg command
+ *
+ * @dev: the device structure
+ * @pg_cmd: the pg command code
+ *
+ * Return: -EIO on write failure
+ *         -EOPNOTSUPP if the operation is not supported by the protocol
+ */
+int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_power_gate req;
+	const size_t len = sizeof(struct hbm_power_gate);
+	int ret;
+
+	if (!dev->hbm_f_pg_supported)
+		return -EOPNOTSUPP;
+
+	mei_hbm_hdr(&mei_hdr, len);
+
+	memset(&req, 0, len);
+	req.hbm_cmd = pg_cmd;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret)
+		dev_err(dev->dev, "power gate command write failed.\n");
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mei_hbm_pg);
+
+/**
+ * mei_hbm_stop_req - send stop request message
+ *
+ * @dev: mei device
+ *
+ * Return: -EIO on write failure
+ */
+static int mei_hbm_stop_req(struct mei_device *dev)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_host_stop_request req;
+	const size_t len = sizeof(struct hbm_host_stop_request);
+
+	mei_hbm_hdr(&mei_hdr, len);
+
+	memset(&req, 0, len);
+	req.hbm_cmd = HOST_STOP_REQ_CMD;
+	req.reason = DRIVER_STOP_REQUEST;
+
+	return mei_hbm_write_message(dev, &mei_hdr, &req);
+}
+
+/**
+ * mei_hbm_cl_flow_control_req - sends flow control request.
+ *
+ * @dev: the device structure
+ * @cl: client info
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct hbm_flow_control req;
+
+	cl_dbg(dev, cl, "sending flow control\n");
+	return mei_hbm_cl_write(dev, cl, MEI_FLOW_CONTROL_CMD,
+				&req, sizeof(req));
+}
+
+/**
+ * mei_hbm_add_single_tx_flow_ctrl_creds - adds single buffer credentials.
+ *
+ * @dev: the device structure
+ * @fctrl: flow control response bus message
+ *
+ * Return: 0 on success, < 0 otherwise
+ */
+static int mei_hbm_add_single_tx_flow_ctrl_creds(struct mei_device *dev,
+						 struct hbm_flow_control *fctrl)
+{
+	struct mei_me_client *me_cl;
+	int rets;
+
+	me_cl = mei_me_cl_by_id(dev, fctrl->me_addr);
+	if (!me_cl) {
+		dev_err(dev->dev, "no such me client %d\n", fctrl->me_addr);
+		return -ENOENT;
+	}
+
+	if (WARN_ON(me_cl->props.single_recv_buf == 0)) {
+		rets = -EINVAL;
+		goto out;
+	}
+
+	me_cl->tx_flow_ctrl_creds++;
+	dev_dbg(dev->dev, "recv flow ctrl msg ME %d (single) creds = %d.\n",
+		fctrl->me_addr, me_cl->tx_flow_ctrl_creds);
+
+	rets = 0;
+out:
+	mei_me_cl_put(me_cl);
+	return rets;
+}
+
+/**
+ * mei_hbm_cl_flow_control_res - flow control response from me
+ *
+ * @dev: the device structure
+ * @fctrl: flow control response bus message
+ */
+static void mei_hbm_cl_tx_flow_ctrl_creds_res(struct mei_device *dev,
+					       struct hbm_flow_control *fctrl)
+{
+	struct mei_cl *cl;
+
+	if (!fctrl->host_addr) {
+		/* single receive buffer */
+		mei_hbm_add_single_tx_flow_ctrl_creds(dev, fctrl);
+		return;
+	}
+
+	cl = mei_hbm_cl_find_by_cmd(dev, fctrl);
+	if (cl) {
+		cl->tx_flow_ctrl_creds++;
+		cl_dbg(dev, cl, "flow control creds = %d.\n",
+				cl->tx_flow_ctrl_creds);
+	}
+}
+
+
+/**
+ * mei_hbm_cl_disconnect_req - sends disconnect message to fw.
+ *
+ * @dev: the device structure
+ * @cl: a client to disconnect from
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct hbm_client_connect_request req;
+
+	return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_REQ_CMD,
+				&req, sizeof(req));
+}
+
+/**
+ * mei_hbm_cl_disconnect_rsp - sends disconnect respose to the FW
+ *
+ * @dev: the device structure
+ * @cl: a client to disconnect from
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct hbm_client_connect_response resp;
+
+	return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_RES_CMD,
+				&resp, sizeof(resp));
+}
+
+/**
+ * mei_hbm_cl_disconnect_res - update the client state according
+ *       disconnect response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: disconnect client response host bus message
+ */
+static void mei_hbm_cl_disconnect_res(struct mei_device *dev, struct mei_cl *cl,
+				      struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_client_connect_response *rs =
+		(struct hbm_client_connect_response *)cmd;
+
+	cl_dbg(dev, cl, "hbm: disconnect response status=%d\n", rs->status);
+
+	if (rs->status == MEI_CL_DISCONN_SUCCESS)
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+	cl->status = 0;
+}
+
+/**
+ * mei_hbm_cl_connect_req - send connection request to specific me client
+ *
+ * @dev: the device structure
+ * @cl: a client to connect to
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct hbm_client_connect_request req;
+
+	return mei_hbm_cl_write(dev, cl, CLIENT_CONNECT_REQ_CMD,
+				&req, sizeof(req));
+}
+
+/**
+ * mei_hbm_cl_connect_res - update the client state according
+ *        connection response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: connect client response host bus message
+ */
+static void mei_hbm_cl_connect_res(struct mei_device *dev, struct mei_cl *cl,
+				   struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_client_connect_response *rs =
+		(struct hbm_client_connect_response *)cmd;
+
+	cl_dbg(dev, cl, "hbm: connect response status=%s\n",
+			mei_cl_conn_status_str(rs->status));
+
+	if (rs->status == MEI_CL_CONN_SUCCESS)
+		cl->state = MEI_FILE_CONNECTED;
+	else {
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+		if (rs->status == MEI_CL_CONN_NOT_FOUND) {
+			mei_me_cl_del(dev, cl->me_cl);
+			if (dev->dev_state == MEI_DEV_ENABLED)
+				schedule_work(&dev->bus_rescan_work);
+		}
+	}
+	cl->status = mei_cl_conn_status_to_errno(rs->status);
+}
+
+/**
+ * mei_hbm_cl_res - process hbm response received on behalf
+ *         an client
+ *
+ * @dev: the device structure
+ * @rs:  hbm client message
+ * @fop_type: file operation type
+ */
+static void mei_hbm_cl_res(struct mei_device *dev,
+			   struct mei_hbm_cl_cmd *rs,
+			   enum mei_cb_file_ops fop_type)
+{
+	struct mei_cl *cl;
+	struct mei_cl_cb *cb, *next;
+
+	cl = NULL;
+	list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list, list) {
+
+		cl = cb->cl;
+
+		if (cb->fop_type != fop_type)
+			continue;
+
+		if (mei_hbm_cl_addr_equal(cl, rs)) {
+			list_del_init(&cb->list);
+			break;
+		}
+	}
+
+	if (!cl)
+		return;
+
+	switch (fop_type) {
+	case MEI_FOP_CONNECT:
+		mei_hbm_cl_connect_res(dev, cl, rs);
+		break;
+	case MEI_FOP_DISCONNECT:
+		mei_hbm_cl_disconnect_res(dev, cl, rs);
+		break;
+	case MEI_FOP_NOTIFY_START:
+		mei_hbm_cl_notify_start_res(dev, cl, rs);
+		break;
+	case MEI_FOP_NOTIFY_STOP:
+		mei_hbm_cl_notify_stop_res(dev, cl, rs);
+		break;
+	default:
+		return;
+	}
+
+	cl->timer_count = 0;
+	wake_up(&cl->wait);
+}
+
+
+/**
+ * mei_hbm_fw_disconnect_req - disconnect request initiated by ME firmware
+ *  host sends disconnect response
+ *
+ * @dev: the device structure.
+ * @disconnect_req: disconnect request bus message from the me
+ *
+ * Return: -ENOMEM on allocation failure
+ */
+static int mei_hbm_fw_disconnect_req(struct mei_device *dev,
+		struct hbm_client_connect_request *disconnect_req)
+{
+	struct mei_cl *cl;
+	struct mei_cl_cb *cb;
+
+	cl = mei_hbm_cl_find_by_cmd(dev, disconnect_req);
+	if (cl) {
+		cl_warn(dev, cl, "fw disconnect request received\n");
+		cl->state = MEI_FILE_DISCONNECTING;
+		cl->timer_count = 0;
+
+		cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DISCONNECT_RSP,
+					       NULL);
+		if (!cb)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * mei_hbm_pg_enter_res - PG enter response received
+ *
+ * @dev: the device structure.
+ *
+ * Return: 0 on success, -EPROTO on state mismatch
+ */
+static int mei_hbm_pg_enter_res(struct mei_device *dev)
+{
+	if (mei_pg_state(dev) != MEI_PG_OFF ||
+	    dev->pg_event != MEI_PG_EVENT_WAIT) {
+		dev_err(dev->dev, "hbm: pg entry response: state mismatch [%s, %d]\n",
+			mei_pg_state_str(mei_pg_state(dev)), dev->pg_event);
+		return -EPROTO;
+	}
+
+	dev->pg_event = MEI_PG_EVENT_RECEIVED;
+	wake_up(&dev->wait_pg);
+
+	return 0;
+}
+
+/**
+ * mei_hbm_pg_resume - process with PG resume
+ *
+ * @dev: the device structure.
+ */
+void mei_hbm_pg_resume(struct mei_device *dev)
+{
+	pm_request_resume(dev->dev);
+}
+EXPORT_SYMBOL_GPL(mei_hbm_pg_resume);
+
+/**
+ * mei_hbm_pg_exit_res - PG exit response received
+ *
+ * @dev: the device structure.
+ *
+ * Return: 0 on success, -EPROTO on state mismatch
+ */
+static int mei_hbm_pg_exit_res(struct mei_device *dev)
+{
+	if (mei_pg_state(dev) != MEI_PG_ON ||
+	    (dev->pg_event != MEI_PG_EVENT_WAIT &&
+	     dev->pg_event != MEI_PG_EVENT_IDLE)) {
+		dev_err(dev->dev, "hbm: pg exit response: state mismatch [%s, %d]\n",
+			mei_pg_state_str(mei_pg_state(dev)), dev->pg_event);
+		return -EPROTO;
+	}
+
+	switch (dev->pg_event) {
+	case MEI_PG_EVENT_WAIT:
+		dev->pg_event = MEI_PG_EVENT_RECEIVED;
+		wake_up(&dev->wait_pg);
+		break;
+	case MEI_PG_EVENT_IDLE:
+		/*
+		* If the driver is not waiting on this then
+		* this is HW initiated exit from PG.
+		* Start runtime pm resume sequence to exit from PG.
+		*/
+		dev->pg_event = MEI_PG_EVENT_RECEIVED;
+		mei_hbm_pg_resume(dev);
+		break;
+	default:
+		WARN(1, "hbm: pg exit response: unexpected pg event = %d\n",
+		     dev->pg_event);
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_hbm_config_features - check what hbm features and commands
+ *        are supported by the fw
+ *
+ * @dev: the device structure
+ */
+static void mei_hbm_config_features(struct mei_device *dev)
+{
+	/* Power Gating Isolation Support */
+	dev->hbm_f_pg_supported = 0;
+	if (dev->version.major_version > HBM_MAJOR_VERSION_PGI)
+		dev->hbm_f_pg_supported = 1;
+
+	if (dev->version.major_version == HBM_MAJOR_VERSION_PGI &&
+	    dev->version.minor_version >= HBM_MINOR_VERSION_PGI)
+		dev->hbm_f_pg_supported = 1;
+
+	dev->hbm_f_dc_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_DC)
+		dev->hbm_f_dc_supported = 1;
+
+	dev->hbm_f_ie_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_IE)
+		dev->hbm_f_ie_supported = 1;
+
+	/* disconnect on connect timeout instead of link reset */
+	dev->hbm_f_dot_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_DOT)
+		dev->hbm_f_dot_supported = 1;
+
+	/* Notification Event Support */
+	dev->hbm_f_ev_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_EV)
+		dev->hbm_f_ev_supported = 1;
+
+	/* Fixed Address Client Support */
+	dev->hbm_f_fa_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_FA)
+		dev->hbm_f_fa_supported = 1;
+
+	/* OS ver message Support */
+	dev->hbm_f_os_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_OS)
+		dev->hbm_f_os_supported = 1;
+
+	/* DMA Ring Support */
+	dev->hbm_f_dr_supported = 0;
+	if (dev->version.major_version > HBM_MAJOR_VERSION_DR ||
+	    (dev->version.major_version == HBM_MAJOR_VERSION_DR &&
+	     dev->version.minor_version >= HBM_MINOR_VERSION_DR))
+		dev->hbm_f_dr_supported = 1;
+}
+
+/**
+ * mei_hbm_version_is_supported - checks whether the driver can
+ *     support the hbm version of the device
+ *
+ * @dev: the device structure
+ * Return: true if driver can support hbm version of the device
+ */
+bool mei_hbm_version_is_supported(struct mei_device *dev)
+{
+	return	(dev->version.major_version < HBM_MAJOR_VERSION) ||
+		(dev->version.major_version == HBM_MAJOR_VERSION &&
+		 dev->version.minor_version <= HBM_MINOR_VERSION);
+}
+
+/**
+ * mei_hbm_dispatch - bottom half read routine after ISR to
+ * handle the read bus message cmd processing.
+ *
+ * @dev: the device structure
+ * @hdr: header of bus message
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
+{
+	struct mei_bus_message *mei_msg;
+	struct hbm_host_version_response *version_res;
+	struct hbm_props_response *props_res;
+	struct hbm_host_enum_response *enum_res;
+	struct hbm_add_client_request *add_cl_req;
+	int ret;
+
+	struct mei_hbm_cl_cmd *cl_cmd;
+	struct hbm_client_connect_request *disconnect_req;
+	struct hbm_flow_control *fctrl;
+
+	/* read the message to our buffer */
+	BUG_ON(hdr->length >= sizeof(dev->rd_msg_buf));
+	mei_read_slots(dev, dev->rd_msg_buf, hdr->length);
+	mei_msg = (struct mei_bus_message *)dev->rd_msg_buf;
+	cl_cmd  = (struct mei_hbm_cl_cmd *)mei_msg;
+
+	/* ignore spurious message and prevent reset nesting
+	 * hbm is put to idle during system reset
+	 */
+	if (dev->hbm_state == MEI_HBM_IDLE) {
+		dev_dbg(dev->dev, "hbm: state is idle ignore spurious messages\n");
+		return 0;
+	}
+
+	switch (mei_msg->hbm_cmd) {
+	case HOST_START_RES_CMD:
+		dev_dbg(dev->dev, "hbm: start: response message received.\n");
+
+		dev->init_clients_timer = 0;
+
+		version_res = (struct hbm_host_version_response *)mei_msg;
+
+		dev_dbg(dev->dev, "HBM VERSION: DRIVER=%02d:%02d DEVICE=%02d:%02d\n",
+				HBM_MAJOR_VERSION, HBM_MINOR_VERSION,
+				version_res->me_max_version.major_version,
+				version_res->me_max_version.minor_version);
+
+		if (version_res->host_version_supported) {
+			dev->version.major_version = HBM_MAJOR_VERSION;
+			dev->version.minor_version = HBM_MINOR_VERSION;
+		} else {
+			dev->version.major_version =
+				version_res->me_max_version.major_version;
+			dev->version.minor_version =
+				version_res->me_max_version.minor_version;
+		}
+
+		if (!mei_hbm_version_is_supported(dev)) {
+			dev_warn(dev->dev, "hbm: start: version mismatch - stopping the driver.\n");
+
+			dev->hbm_state = MEI_HBM_STOPPED;
+			if (mei_hbm_stop_req(dev)) {
+				dev_err(dev->dev, "hbm: start: failed to send stop request\n");
+				return -EIO;
+			}
+			break;
+		}
+
+		mei_hbm_config_features(dev);
+
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_STARTING) {
+			dev_err(dev->dev, "hbm: start: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		if (mei_hbm_enum_clients_req(dev)) {
+			dev_err(dev->dev, "hbm: start: failed to send enumeration request\n");
+			return -EIO;
+		}
+
+		wake_up(&dev->wait_hbm_start);
+		break;
+
+	case CLIENT_CONNECT_RES_CMD:
+		dev_dbg(dev->dev, "hbm: client connect response: message received.\n");
+		mei_hbm_cl_res(dev, cl_cmd, MEI_FOP_CONNECT);
+		break;
+
+	case CLIENT_DISCONNECT_RES_CMD:
+		dev_dbg(dev->dev, "hbm: client disconnect response: message received.\n");
+		mei_hbm_cl_res(dev, cl_cmd, MEI_FOP_DISCONNECT);
+		break;
+
+	case MEI_FLOW_CONTROL_CMD:
+		dev_dbg(dev->dev, "hbm: client flow control response: message received.\n");
+
+		fctrl = (struct hbm_flow_control *)mei_msg;
+		mei_hbm_cl_tx_flow_ctrl_creds_res(dev, fctrl);
+		break;
+
+	case MEI_PG_ISOLATION_ENTRY_RES_CMD:
+		dev_dbg(dev->dev, "hbm: power gate isolation entry response received\n");
+		ret = mei_hbm_pg_enter_res(dev);
+		if (ret)
+			return ret;
+		break;
+
+	case MEI_PG_ISOLATION_EXIT_REQ_CMD:
+		dev_dbg(dev->dev, "hbm: power gate isolation exit request received\n");
+		ret = mei_hbm_pg_exit_res(dev);
+		if (ret)
+			return ret;
+		break;
+
+	case HOST_CLIENT_PROPERTIES_RES_CMD:
+		dev_dbg(dev->dev, "hbm: properties response: message received.\n");
+
+		dev->init_clients_timer = 0;
+
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_CLIENT_PROPERTIES) {
+			dev_err(dev->dev, "hbm: properties response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		props_res = (struct hbm_props_response *)mei_msg;
+
+		if (props_res->status == MEI_HBMS_CLIENT_NOT_FOUND) {
+			dev_dbg(dev->dev, "hbm: properties response: %d CLIENT_NOT_FOUND\n",
+				props_res->me_addr);
+		} else if (props_res->status) {
+			dev_err(dev->dev, "hbm: properties response: wrong status = %d %s\n",
+				props_res->status,
+				mei_hbm_status_str(props_res->status));
+			return -EPROTO;
+		} else {
+			mei_hbm_me_cl_add(dev, props_res);
+		}
+
+		/* request property for the next client */
+		if (mei_hbm_prop_req(dev, props_res->me_addr + 1))
+			return -EIO;
+
+		break;
+
+	case HOST_ENUM_RES_CMD:
+		dev_dbg(dev->dev, "hbm: enumeration response: message received\n");
+
+		dev->init_clients_timer = 0;
+
+		enum_res = (struct hbm_host_enum_response *) mei_msg;
+		BUILD_BUG_ON(sizeof(dev->me_clients_map)
+				< sizeof(enum_res->valid_addresses));
+		memcpy(dev->me_clients_map, enum_res->valid_addresses,
+				sizeof(enum_res->valid_addresses));
+
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_ENUM_CLIENTS) {
+			dev_err(dev->dev, "hbm: enumeration response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		dev->hbm_state = MEI_HBM_CLIENT_PROPERTIES;
+
+		/* first property request */
+		if (mei_hbm_prop_req(dev, 0))
+			return -EIO;
+
+		break;
+
+	case HOST_STOP_RES_CMD:
+		dev_dbg(dev->dev, "hbm: stop response: message received\n");
+
+		dev->init_clients_timer = 0;
+
+		if (dev->hbm_state != MEI_HBM_STOPPED) {
+			dev_err(dev->dev, "hbm: stop response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		dev->dev_state = MEI_DEV_POWER_DOWN;
+		dev_info(dev->dev, "hbm: stop response: resetting.\n");
+		/* force the reset */
+		return -EPROTO;
+		break;
+
+	case CLIENT_DISCONNECT_REQ_CMD:
+		dev_dbg(dev->dev, "hbm: disconnect request: message received\n");
+
+		disconnect_req = (struct hbm_client_connect_request *)mei_msg;
+		mei_hbm_fw_disconnect_req(dev, disconnect_req);
+		break;
+
+	case ME_STOP_REQ_CMD:
+		dev_dbg(dev->dev, "hbm: stop request: message received\n");
+		dev->hbm_state = MEI_HBM_STOPPED;
+		if (mei_hbm_stop_req(dev)) {
+			dev_err(dev->dev, "hbm: stop request: failed to send stop request\n");
+			return -EIO;
+		}
+		break;
+
+	case MEI_HBM_ADD_CLIENT_REQ_CMD:
+		dev_dbg(dev->dev, "hbm: add client request received\n");
+		/*
+		 * after the host receives the enum_resp
+		 * message clients may be added or removed
+		 */
+		if (dev->hbm_state <= MEI_HBM_ENUM_CLIENTS ||
+		    dev->hbm_state >= MEI_HBM_STOPPED) {
+			dev_err(dev->dev, "hbm: add client: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+		add_cl_req = (struct hbm_add_client_request *)mei_msg;
+		ret = mei_hbm_fw_add_cl_req(dev, add_cl_req);
+		if (ret) {
+			dev_err(dev->dev, "hbm: add client: failed to send response %d\n",
+				ret);
+			return -EIO;
+		}
+		dev_dbg(dev->dev, "hbm: add client request processed\n");
+		break;
+
+	case MEI_HBM_NOTIFY_RES_CMD:
+		dev_dbg(dev->dev, "hbm: notify response received\n");
+		mei_hbm_cl_res(dev, cl_cmd, notify_res_to_fop(cl_cmd));
+		break;
+
+	case MEI_HBM_NOTIFICATION_CMD:
+		dev_dbg(dev->dev, "hbm: notification\n");
+		mei_hbm_cl_notify(dev, cl_cmd);
+		break;
+
+	default:
+		BUG();
+		break;
+
+	}
+	return 0;
+}
+
diff --git a/drivers/misc/mei/hbm.h b/drivers/misc/mei/hbm.h
new file mode 100644
index 000000000..a2025a508
--- /dev/null
+++ b/drivers/misc/mei/hbm.h
@@ -0,0 +1,62 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _MEI_HBM_H_
+#define _MEI_HBM_H_
+
+struct mei_device;
+struct mei_msg_hdr;
+struct mei_cl;
+
+/**
+ * enum mei_hbm_state - host bus message protocol state
+ *
+ * @MEI_HBM_IDLE : protocol not started
+ * @MEI_HBM_STARTING : start request message was sent
+ * @MEI_HBM_ENUM_CLIENTS : enumeration request was sent
+ * @MEI_HBM_CLIENT_PROPERTIES : acquiring clients properties
+ * @MEI_HBM_STARTED : enumeration was completed
+ * @MEI_HBM_STOPPED : stopping exchange
+ */
+enum mei_hbm_state {
+	MEI_HBM_IDLE = 0,
+	MEI_HBM_STARTING,
+	MEI_HBM_ENUM_CLIENTS,
+	MEI_HBM_CLIENT_PROPERTIES,
+	MEI_HBM_STARTED,
+	MEI_HBM_STOPPED,
+};
+
+const char *mei_hbm_state_str(enum mei_hbm_state state);
+
+int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr);
+
+void mei_hbm_idle(struct mei_device *dev);
+void mei_hbm_reset(struct mei_device *dev);
+int mei_hbm_start_req(struct mei_device *dev);
+int mei_hbm_start_wait(struct mei_device *dev);
+int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl);
+bool mei_hbm_version_is_supported(struct mei_device *dev);
+int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd);
+void mei_hbm_pg_resume(struct mei_device *dev);
+int mei_hbm_cl_notify_req(struct mei_device *dev,
+			  struct mei_cl *cl, u8 request);
+
+#endif /* _MEI_HBM_H_ */
+
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
new file mode 100644
index 000000000..6bbc78698
--- /dev/null
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -0,0 +1,245 @@
+/******************************************************************************
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Intel MEI Interface Header
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
+ * USA
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ *	Intel Corporation.
+ *	linux-mei@linux.intel.com
+ *	http://www.intel.com
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#ifndef _MEI_HW_MEI_REGS_H_
+#define _MEI_HW_MEI_REGS_H_
+
+/*
+ * MEI device IDs
+ */
+#define MEI_DEV_ID_82946GZ    0x2974  /* 82946GZ/GL */
+#define MEI_DEV_ID_82G35      0x2984  /* 82G35 Express */
+#define MEI_DEV_ID_82Q965     0x2994  /* 82Q963/Q965 */
+#define MEI_DEV_ID_82G965     0x29A4  /* 82P965/G965 */
+
+#define MEI_DEV_ID_82GM965    0x2A04  /* Mobile PM965/GM965 */
+#define MEI_DEV_ID_82GME965   0x2A14  /* Mobile GME965/GLE960 */
+
+#define MEI_DEV_ID_ICH9_82Q35 0x29B4  /* 82Q35 Express */
+#define MEI_DEV_ID_ICH9_82G33 0x29C4  /* 82G33/G31/P35/P31 Express */
+#define MEI_DEV_ID_ICH9_82Q33 0x29D4  /* 82Q33 Express */
+#define MEI_DEV_ID_ICH9_82X38 0x29E4  /* 82X38/X48 Express */
+#define MEI_DEV_ID_ICH9_3200  0x29F4  /* 3200/3210 Server */
+
+#define MEI_DEV_ID_ICH9_6     0x28B4  /* Bearlake */
+#define MEI_DEV_ID_ICH9_7     0x28C4  /* Bearlake */
+#define MEI_DEV_ID_ICH9_8     0x28D4  /* Bearlake */
+#define MEI_DEV_ID_ICH9_9     0x28E4  /* Bearlake */
+#define MEI_DEV_ID_ICH9_10    0x28F4  /* Bearlake */
+
+#define MEI_DEV_ID_ICH9M_1    0x2A44  /* Cantiga */
+#define MEI_DEV_ID_ICH9M_2    0x2A54  /* Cantiga */
+#define MEI_DEV_ID_ICH9M_3    0x2A64  /* Cantiga */
+#define MEI_DEV_ID_ICH9M_4    0x2A74  /* Cantiga */
+
+#define MEI_DEV_ID_ICH10_1    0x2E04  /* Eaglelake */
+#define MEI_DEV_ID_ICH10_2    0x2E14  /* Eaglelake */
+#define MEI_DEV_ID_ICH10_3    0x2E24  /* Eaglelake */
+#define MEI_DEV_ID_ICH10_4    0x2E34  /* Eaglelake */
+
+#define MEI_DEV_ID_IBXPK_1    0x3B64  /* Calpella */
+#define MEI_DEV_ID_IBXPK_2    0x3B65  /* Calpella */
+
+#define MEI_DEV_ID_CPT_1      0x1C3A  /* Couger Point */
+#define MEI_DEV_ID_PBG_1      0x1D3A  /* C600/X79 Patsburg */
+
+#define MEI_DEV_ID_PPT_1      0x1E3A  /* Panther Point */
+#define MEI_DEV_ID_PPT_2      0x1CBA  /* Panther Point */
+#define MEI_DEV_ID_PPT_3      0x1DBA  /* Panther Point */
+
+#define MEI_DEV_ID_LPT_H      0x8C3A  /* Lynx Point H */
+#define MEI_DEV_ID_LPT_W      0x8D3A  /* Lynx Point - Wellsburg */
+#define MEI_DEV_ID_LPT_LP     0x9C3A  /* Lynx Point LP */
+#define MEI_DEV_ID_LPT_HR     0x8CBA  /* Lynx Point H Refresh */
+
+#define MEI_DEV_ID_WPT_LP     0x9CBA  /* Wildcat Point LP */
+#define MEI_DEV_ID_WPT_LP_2   0x9CBB  /* Wildcat Point LP 2 */
+
+#define MEI_DEV_ID_SPT        0x9D3A  /* Sunrise Point */
+#define MEI_DEV_ID_SPT_2      0x9D3B  /* Sunrise Point 2 */
+#define MEI_DEV_ID_SPT_H      0xA13A  /* Sunrise Point H */
+#define MEI_DEV_ID_SPT_H_2    0xA13B  /* Sunrise Point H 2 */
+
+#define MEI_DEV_ID_LBG        0xA1BA  /* Lewisburg (SPT) */
+
+#define MEI_DEV_ID_BXT_M      0x1A9A  /* Broxton M */
+#define MEI_DEV_ID_APL_I      0x5A9A  /* Apollo Lake I */
+
+#define MEI_DEV_ID_DNV_IE     0x19E5  /* Denverton IE */
+
+#define MEI_DEV_ID_GLK        0x319A  /* Gemini Lake */
+
+#define MEI_DEV_ID_KBP        0xA2BA  /* Kaby Point */
+#define MEI_DEV_ID_KBP_2      0xA2BB  /* Kaby Point 2 */
+
+#define MEI_DEV_ID_CNP_LP     0x9DE0  /* Cannon Point LP */
+#define MEI_DEV_ID_CNP_LP_4   0x9DE4  /* Cannon Point LP 4 (iTouch) */
+#define MEI_DEV_ID_CNP_H      0xA360  /* Cannon Point H */
+#define MEI_DEV_ID_CNP_H_4    0xA364  /* Cannon Point H 4 (iTouch) */
+
+#define MEI_DEV_ID_CMP_LP     0x02e0  /* Comet Point LP */
+#define MEI_DEV_ID_CMP_LP_3   0x02e4  /* Comet Point LP 3 (iTouch) */
+
+#define MEI_DEV_ID_CMP_V      0xA3BA  /* Comet Point Lake V */
+
+#define MEI_DEV_ID_CMP_H      0x06e0  /* Comet Lake H */
+#define MEI_DEV_ID_CMP_H_3    0x06e4  /* Comet Lake H 3 (iTouch) */
+
+#define MEI_DEV_ID_CDF        0x18D3  /* Cedar Fork */
+
+#define MEI_DEV_ID_ICP_LP     0x34E0  /* Ice Lake Point LP */
+#define MEI_DEV_ID_ICP_N      0x38E0  /* Ice Lake Point N */
+
+#define MEI_DEV_ID_TGP_LP     0xA0E0  /* Tiger Lake Point LP */
+
+#define MEI_DEV_ID_MCC        0x4B70  /* Mule Creek Canyon (EHL) */
+#define MEI_DEV_ID_MCC_4      0x4B75  /* Mule Creek Canyon 4 (EHL) */
+
+/*
+ * MEI HW Section
+ */
+
+/* Host Firmware Status Registers in PCI Config Space */
+#define PCI_CFG_HFS_1         0x40
+#  define PCI_CFG_HFS_1_D0I3_MSK     0x80000000
+#define PCI_CFG_HFS_2         0x48
+#define PCI_CFG_HFS_3         0x60
+#define PCI_CFG_HFS_4         0x64
+#define PCI_CFG_HFS_5         0x68
+#define PCI_CFG_HFS_6         0x6C
+
+/* MEI registers */
+/* H_CB_WW - Host Circular Buffer (CB) Write Window register */
+#define H_CB_WW    0
+/* H_CSR - Host Control Status register */
+#define H_CSR      4
+/* ME_CB_RW - ME Circular Buffer Read Window register (read only) */
+#define ME_CB_RW   8
+/* ME_CSR_HA - ME Control Status Host Access register (read only) */
+#define ME_CSR_HA  0xC
+/* H_HGC_CSR - PGI register */
+#define H_HPG_CSR  0x10
+/* H_D0I3C - D0I3 Control  */
+#define H_D0I3C    0x800
+
+/* register bits of H_CSR (Host Control Status register) */
+/* Host Circular Buffer Depth - maximum number of 32-bit entries in CB */
+#define H_CBD             0xFF000000
+/* Host Circular Buffer Write Pointer */
+#define H_CBWP            0x00FF0000
+/* Host Circular Buffer Read Pointer */
+#define H_CBRP            0x0000FF00
+/* Host Reset */
+#define H_RST             0x00000010
+/* Host Ready */
+#define H_RDY             0x00000008
+/* Host Interrupt Generate */
+#define H_IG              0x00000004
+/* Host Interrupt Status */
+#define H_IS              0x00000002
+/* Host Interrupt Enable */
+#define H_IE              0x00000001
+/* Host D0I3 Interrupt Enable */
+#define H_D0I3C_IE        0x00000020
+/* Host D0I3 Interrupt Status */
+#define H_D0I3C_IS        0x00000040
+
+/* H_CSR masks */
+#define H_CSR_IE_MASK     (H_IE | H_D0I3C_IE)
+#define H_CSR_IS_MASK     (H_IS | H_D0I3C_IS)
+
+/* register bits of ME_CSR_HA (ME Control Status Host Access register) */
+/* ME CB (Circular Buffer) Depth HRA (Host Read Access) - host read only
+access to ME_CBD */
+#define ME_CBD_HRA        0xFF000000
+/* ME CB Write Pointer HRA - host read only access to ME_CBWP */
+#define ME_CBWP_HRA       0x00FF0000
+/* ME CB Read Pointer HRA - host read only access to ME_CBRP */
+#define ME_CBRP_HRA       0x0000FF00
+/* ME Power Gate Isolation Capability HRA  - host ready only access */
+#define ME_PGIC_HRA       0x00000040
+/* ME Reset HRA - host read only access to ME_RST */
+#define ME_RST_HRA        0x00000010
+/* ME Ready HRA - host read only access to ME_RDY */
+#define ME_RDY_HRA        0x00000008
+/* ME Interrupt Generate HRA - host read only access to ME_IG */
+#define ME_IG_HRA         0x00000004
+/* ME Interrupt Status HRA - host read only access to ME_IS */
+#define ME_IS_HRA         0x00000002
+/* ME Interrupt Enable HRA - host read only access to ME_IE */
+#define ME_IE_HRA         0x00000001
+
+
+/* H_HPG_CSR register bits */
+#define H_HPG_CSR_PGIHEXR 0x00000001
+#define H_HPG_CSR_PGI     0x00000002
+
+/* H_D0I3C register bits */
+#define H_D0I3C_CIP      0x00000001
+#define H_D0I3C_IR       0x00000002
+#define H_D0I3C_I3       0x00000004
+#define H_D0I3C_RR       0x00000008
+
+#endif /* _MEI_HW_MEI_REGS_H_ */
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
new file mode 100644
index 000000000..60c8c8418
--- /dev/null
+++ b/drivers/misc/mei/hw-me.c
@@ -0,0 +1,1500 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/sizes.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+
+#include "hw-me.h"
+#include "hw-me-regs.h"
+
+#include "mei-trace.h"
+
+/**
+ * mei_me_reg_read - Reads 32bit data from the mei device
+ *
+ * @hw: the me hardware structure
+ * @offset: offset from which to read the data
+ *
+ * Return: register value (u32)
+ */
+static inline u32 mei_me_reg_read(const struct mei_me_hw *hw,
+			       unsigned long offset)
+{
+	return ioread32(hw->mem_addr + offset);
+}
+
+
+/**
+ * mei_me_reg_write - Writes 32bit data to the mei device
+ *
+ * @hw: the me hardware structure
+ * @offset: offset from which to write the data
+ * @value: register value to write (u32)
+ */
+static inline void mei_me_reg_write(const struct mei_me_hw *hw,
+				 unsigned long offset, u32 value)
+{
+	iowrite32(value, hw->mem_addr + offset);
+}
+
+/**
+ * mei_me_mecbrw_read - Reads 32bit data from ME circular buffer
+ *  read window register
+ *
+ * @dev: the device structure
+ *
+ * Return: ME_CB_RW register value (u32)
+ */
+static inline u32 mei_me_mecbrw_read(const struct mei_device *dev)
+{
+	return mei_me_reg_read(to_me_hw(dev), ME_CB_RW);
+}
+
+/**
+ * mei_me_hcbww_write - write 32bit data to the host circular buffer
+ *
+ * @dev: the device structure
+ * @data: 32bit data to be written to the host circular buffer
+ */
+static inline void mei_me_hcbww_write(struct mei_device *dev, u32 data)
+{
+	mei_me_reg_write(to_me_hw(dev), H_CB_WW, data);
+}
+
+/**
+ * mei_me_mecsr_read - Reads 32bit data from the ME CSR
+ *
+ * @dev: the device structure
+ *
+ * Return: ME_CSR_HA register value (u32)
+ */
+static inline u32 mei_me_mecsr_read(const struct mei_device *dev)
+{
+	u32 reg;
+
+	reg = mei_me_reg_read(to_me_hw(dev), ME_CSR_HA);
+	trace_mei_reg_read(dev->dev, "ME_CSR_HA", ME_CSR_HA, reg);
+
+	return reg;
+}
+
+/**
+ * mei_hcsr_read - Reads 32bit data from the host CSR
+ *
+ * @dev: the device structure
+ *
+ * Return: H_CSR register value (u32)
+ */
+static inline u32 mei_hcsr_read(const struct mei_device *dev)
+{
+	u32 reg;
+
+	reg = mei_me_reg_read(to_me_hw(dev), H_CSR);
+	trace_mei_reg_read(dev->dev, "H_CSR", H_CSR, reg);
+
+	return reg;
+}
+
+/**
+ * mei_hcsr_write - writes H_CSR register to the mei device
+ *
+ * @dev: the device structure
+ * @reg: new register value
+ */
+static inline void mei_hcsr_write(struct mei_device *dev, u32 reg)
+{
+	trace_mei_reg_write(dev->dev, "H_CSR", H_CSR, reg);
+	mei_me_reg_write(to_me_hw(dev), H_CSR, reg);
+}
+
+/**
+ * mei_hcsr_set - writes H_CSR register to the mei device,
+ * and ignores the H_IS bit for it is write-one-to-zero.
+ *
+ * @dev: the device structure
+ * @reg: new register value
+ */
+static inline void mei_hcsr_set(struct mei_device *dev, u32 reg)
+{
+	reg &= ~H_CSR_IS_MASK;
+	mei_hcsr_write(dev, reg);
+}
+
+/**
+ * mei_hcsr_set_hig - set host interrupt (set H_IG)
+ *
+ * @dev: the device structure
+ */
+static inline void mei_hcsr_set_hig(struct mei_device *dev)
+{
+	u32 hcsr;
+
+	hcsr = mei_hcsr_read(dev) | H_IG;
+	mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_d0i3c_read - Reads 32bit data from the D0I3C register
+ *
+ * @dev: the device structure
+ *
+ * Return: H_D0I3C register value (u32)
+ */
+static inline u32 mei_me_d0i3c_read(const struct mei_device *dev)
+{
+	u32 reg;
+
+	reg = mei_me_reg_read(to_me_hw(dev), H_D0I3C);
+	trace_mei_reg_read(dev->dev, "H_D0I3C", H_D0I3C, reg);
+
+	return reg;
+}
+
+/**
+ * mei_me_d0i3c_write - writes H_D0I3C register to device
+ *
+ * @dev: the device structure
+ * @reg: new register value
+ */
+static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg)
+{
+	trace_mei_reg_write(dev->dev, "H_D0I3C", H_D0I3C, reg);
+	mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg);
+}
+
+/**
+ * mei_me_fw_status - read fw status register from pci config space
+ *
+ * @dev: mei device
+ * @fw_status: fw status register values
+ *
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_fw_status(struct mei_device *dev,
+			    struct mei_fw_status *fw_status)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	struct mei_me_hw *hw = to_me_hw(dev);
+	const struct mei_fw_status *fw_src = &hw->cfg->fw_status;
+	int ret;
+	int i;
+
+	if (!fw_status)
+		return -EINVAL;
+
+	fw_status->count = fw_src->count;
+	for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) {
+		ret = pci_read_config_dword(pdev, fw_src->status[i],
+					    &fw_status->status[i]);
+		trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HSF_X",
+				       fw_src->status[i],
+				       fw_status->status[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_me_hw_config - configure hw dependent settings
+ *
+ * @dev: mei device
+ */
+static void mei_me_hw_config(struct mei_device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 hcsr, reg;
+
+	/* Doesn't change in runtime */
+	hcsr = mei_hcsr_read(dev);
+	hw->hbuf_depth = (hcsr & H_CBD) >> 24;
+
+	reg = 0;
+	pci_read_config_dword(pdev, PCI_CFG_HFS_1, &reg);
+	trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg);
+	hw->d0i3_supported =
+		((reg & PCI_CFG_HFS_1_D0I3_MSK) == PCI_CFG_HFS_1_D0I3_MSK);
+
+	hw->pg_state = MEI_PG_OFF;
+	if (hw->d0i3_supported) {
+		reg = mei_me_d0i3c_read(dev);
+		if (reg & H_D0I3C_I3)
+			hw->pg_state = MEI_PG_ON;
+	}
+}
+
+/**
+ * mei_me_pg_state  - translate internal pg state
+ *   to the mei power gating state
+ *
+ * @dev:  mei device
+ *
+ * Return: MEI_PG_OFF if aliveness is on and MEI_PG_ON otherwise
+ */
+static inline enum mei_pg_state mei_me_pg_state(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	return hw->pg_state;
+}
+
+static inline u32 me_intr_src(u32 hcsr)
+{
+	return hcsr & H_CSR_IS_MASK;
+}
+
+/**
+ * me_intr_disable - disables mei device interrupts
+ *      using supplied hcsr register value.
+ *
+ * @dev: the device structure
+ * @hcsr: supplied hcsr register value
+ */
+static inline void me_intr_disable(struct mei_device *dev, u32 hcsr)
+{
+	hcsr &= ~H_CSR_IE_MASK;
+	mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_intr_clear - clear and stop interrupts
+ *
+ * @dev: the device structure
+ * @hcsr: supplied hcsr register value
+ */
+static inline void me_intr_clear(struct mei_device *dev, u32 hcsr)
+{
+	if (me_intr_src(hcsr))
+		mei_hcsr_write(dev, hcsr);
+}
+
+/**
+ * mei_me_intr_clear - clear and stop interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_me_intr_clear(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	me_intr_clear(dev, hcsr);
+}
+/**
+ * mei_me_intr_enable - enables mei device interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_me_intr_enable(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	hcsr |= H_CSR_IE_MASK;
+	mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_intr_disable - disables mei device interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_me_intr_disable(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	me_intr_disable(dev, hcsr);
+}
+
+/**
+ * mei_me_synchronize_irq - wait for pending IRQ handlers
+ *
+ * @dev: the device structure
+ */
+static void mei_me_synchronize_irq(struct mei_device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	synchronize_irq(pdev->irq);
+}
+
+/**
+ * mei_me_hw_reset_release - release device from the reset
+ *
+ * @dev: the device structure
+ */
+static void mei_me_hw_reset_release(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	hcsr |= H_IG;
+	hcsr &= ~H_RST;
+	mei_hcsr_set(dev, hcsr);
+
+	/* complete this write before we set host ready on another CPU */
+	mmiowb();
+}
+
+/**
+ * mei_me_host_set_ready - enable device
+ *
+ * @dev: mei device
+ */
+static void mei_me_host_set_ready(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	hcsr |= H_CSR_IE_MASK | H_IG | H_RDY;
+	mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_host_is_ready - check whether the host has turned ready
+ *
+ * @dev: mei device
+ * Return: bool
+ */
+static bool mei_me_host_is_ready(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	return (hcsr & H_RDY) == H_RDY;
+}
+
+/**
+ * mei_me_hw_is_ready - check whether the me(hw) has turned ready
+ *
+ * @dev: mei device
+ * Return: bool
+ */
+static bool mei_me_hw_is_ready(struct mei_device *dev)
+{
+	u32 mecsr = mei_me_mecsr_read(dev);
+
+	return (mecsr & ME_RDY_HRA) == ME_RDY_HRA;
+}
+
+/**
+ * mei_me_hw_is_resetting - check whether the me(hw) is in reset
+ *
+ * @dev: mei device
+ * Return: bool
+ */
+static bool mei_me_hw_is_resetting(struct mei_device *dev)
+{
+	u32 mecsr = mei_me_mecsr_read(dev);
+
+	return (mecsr & ME_RST_HRA) == ME_RST_HRA;
+}
+
+/**
+ * mei_me_hw_ready_wait - wait until the me(hw) has turned ready
+ *  or timeout is reached
+ *
+ * @dev: mei device
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_hw_ready_wait(struct mei_device *dev)
+{
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_hw_ready,
+			dev->recvd_hw_ready,
+			mei_secs_to_jiffies(MEI_HW_READY_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+	if (!dev->recvd_hw_ready) {
+		dev_err(dev->dev, "wait hw ready failed\n");
+		return -ETIME;
+	}
+
+	mei_me_hw_reset_release(dev);
+	dev->recvd_hw_ready = false;
+	return 0;
+}
+
+/**
+ * mei_me_hw_start - hw start routine
+ *
+ * @dev: mei device
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_hw_start(struct mei_device *dev)
+{
+	int ret = mei_me_hw_ready_wait(dev);
+
+	if (ret)
+		return ret;
+	dev_dbg(dev->dev, "hw is ready\n");
+
+	mei_me_host_set_ready(dev);
+	return ret;
+}
+
+
+/**
+ * mei_hbuf_filled_slots - gets number of device filled buffer slots
+ *
+ * @dev: the device structure
+ *
+ * Return: number of filled slots
+ */
+static unsigned char mei_hbuf_filled_slots(struct mei_device *dev)
+{
+	u32 hcsr;
+	char read_ptr, write_ptr;
+
+	hcsr = mei_hcsr_read(dev);
+
+	read_ptr = (char) ((hcsr & H_CBRP) >> 8);
+	write_ptr = (char) ((hcsr & H_CBWP) >> 16);
+
+	return (unsigned char) (write_ptr - read_ptr);
+}
+
+/**
+ * mei_me_hbuf_is_empty - checks if host buffer is empty.
+ *
+ * @dev: the device structure
+ *
+ * Return: true if empty, false - otherwise.
+ */
+static bool mei_me_hbuf_is_empty(struct mei_device *dev)
+{
+	return mei_hbuf_filled_slots(dev) == 0;
+}
+
+/**
+ * mei_me_hbuf_empty_slots - counts write empty slots.
+ *
+ * @dev: the device structure
+ *
+ * Return: -EOVERFLOW if overflow, otherwise empty slots count
+ */
+static int mei_me_hbuf_empty_slots(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	unsigned char filled_slots, empty_slots;
+
+	filled_slots = mei_hbuf_filled_slots(dev);
+	empty_slots = hw->hbuf_depth - filled_slots;
+
+	/* check for overflow */
+	if (filled_slots > hw->hbuf_depth)
+		return -EOVERFLOW;
+
+	return empty_slots;
+}
+
+/**
+ * mei_me_hbuf_depth - returns depth of the hw buffer.
+ *
+ * @dev: the device structure
+ *
+ * Return: size of hw buffer in slots
+ */
+static u32 mei_me_hbuf_depth(const struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	return hw->hbuf_depth;
+}
+
+/**
+ * mei_me_hbuf_write - writes a message to host hw buffer.
+ *
+ * @dev: the device structure
+ * @hdr: header of message
+ * @hdr_len: header length in bytes: must be multiplication of a slot (4bytes)
+ * @data: payload
+ * @data_len: payload length in bytes
+ *
+ * Return: 0 if success, < 0 - otherwise.
+ */
+static int mei_me_hbuf_write(struct mei_device *dev,
+			     const void *hdr, size_t hdr_len,
+			     const void *data, size_t data_len)
+{
+	unsigned long rem;
+	unsigned long i;
+	const u32 *reg_buf;
+	u32 dw_cnt;
+	int empty_slots;
+
+	if (WARN_ON(!hdr || !data || hdr_len & 0x3))
+		return -EINVAL;
+
+	dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM((struct mei_msg_hdr *)hdr));
+
+	empty_slots = mei_hbuf_empty_slots(dev);
+	dev_dbg(dev->dev, "empty slots = %hu.\n", empty_slots);
+
+	if (empty_slots < 0)
+		return -EOVERFLOW;
+
+	dw_cnt = mei_data2slots(hdr_len + data_len);
+	if (dw_cnt > (u32)empty_slots)
+		return -EMSGSIZE;
+
+	reg_buf = hdr;
+	for (i = 0; i < hdr_len / MEI_SLOT_SIZE; i++)
+		mei_me_hcbww_write(dev, reg_buf[i]);
+
+	reg_buf = data;
+	for (i = 0; i < data_len / MEI_SLOT_SIZE; i++)
+		mei_me_hcbww_write(dev, reg_buf[i]);
+
+	rem = data_len & 0x3;
+	if (rem > 0) {
+		u32 reg = 0;
+
+		memcpy(&reg, (const u8 *)data + data_len - rem, rem);
+		mei_me_hcbww_write(dev, reg);
+	}
+
+	mei_hcsr_set_hig(dev);
+	if (!mei_me_hw_is_ready(dev))
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * mei_me_count_full_read_slots - counts read full slots.
+ *
+ * @dev: the device structure
+ *
+ * Return: -EOVERFLOW if overflow, otherwise filled slots count
+ */
+static int mei_me_count_full_read_slots(struct mei_device *dev)
+{
+	u32 me_csr;
+	char read_ptr, write_ptr;
+	unsigned char buffer_depth, filled_slots;
+
+	me_csr = mei_me_mecsr_read(dev);
+	buffer_depth = (unsigned char)((me_csr & ME_CBD_HRA) >> 24);
+	read_ptr = (char) ((me_csr & ME_CBRP_HRA) >> 8);
+	write_ptr = (char) ((me_csr & ME_CBWP_HRA) >> 16);
+	filled_slots = (unsigned char) (write_ptr - read_ptr);
+
+	/* check for overflow */
+	if (filled_slots > buffer_depth)
+		return -EOVERFLOW;
+
+	dev_dbg(dev->dev, "filled_slots =%08x\n", filled_slots);
+	return (int)filled_slots;
+}
+
+/**
+ * mei_me_read_slots - reads a message from mei device.
+ *
+ * @dev: the device structure
+ * @buffer: message buffer will be written
+ * @buffer_length: message size will be read
+ *
+ * Return: always 0
+ */
+static int mei_me_read_slots(struct mei_device *dev, unsigned char *buffer,
+			     unsigned long buffer_length)
+{
+	u32 *reg_buf = (u32 *)buffer;
+
+	for (; buffer_length >= MEI_SLOT_SIZE; buffer_length -= MEI_SLOT_SIZE)
+		*reg_buf++ = mei_me_mecbrw_read(dev);
+
+	if (buffer_length > 0) {
+		u32 reg = mei_me_mecbrw_read(dev);
+
+		memcpy(reg_buf, &reg, buffer_length);
+	}
+
+	mei_hcsr_set_hig(dev);
+	return 0;
+}
+
+/**
+ * mei_me_pg_set - write pg enter register
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_set(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 reg;
+
+	reg = mei_me_reg_read(hw, H_HPG_CSR);
+	trace_mei_reg_read(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+
+	reg |= H_HPG_CSR_PGI;
+
+	trace_mei_reg_write(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+	mei_me_reg_write(hw, H_HPG_CSR, reg);
+}
+
+/**
+ * mei_me_pg_unset - write pg exit register
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_unset(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 reg;
+
+	reg = mei_me_reg_read(hw, H_HPG_CSR);
+	trace_mei_reg_read(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+
+	WARN(!(reg & H_HPG_CSR_PGI), "PGI is not set\n");
+
+	reg |= H_HPG_CSR_PGIHEXR;
+
+	trace_mei_reg_write(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+	mei_me_reg_write(hw, H_HPG_CSR, reg);
+}
+
+/**
+ * mei_me_pg_legacy_enter_sync - perform legacy pg entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_pg_legacy_enter_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT);
+	int ret;
+
+	dev->pg_event = MEI_PG_EVENT_WAIT;
+
+	ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD);
+	if (ret)
+		return ret;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event == MEI_PG_EVENT_RECEIVED) {
+		mei_me_pg_set(dev);
+		ret = 0;
+	} else {
+		ret = -ETIME;
+	}
+
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	hw->pg_state = MEI_PG_ON;
+
+	return ret;
+}
+
+/**
+ * mei_me_pg_legacy_exit_sync - perform legacy pg exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_pg_legacy_exit_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT);
+	int ret;
+
+	if (dev->pg_event == MEI_PG_EVENT_RECEIVED)
+		goto reply;
+
+	dev->pg_event = MEI_PG_EVENT_WAIT;
+
+	mei_me_pg_unset(dev);
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout);
+	mutex_lock(&dev->device_lock);
+
+reply:
+	if (dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+		ret = -ETIME;
+		goto out;
+	}
+
+	dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+	ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD);
+	if (ret)
+		return ret;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED)
+		ret = 0;
+	else
+		ret = -ETIME;
+
+out:
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	hw->pg_state = MEI_PG_OFF;
+
+	return ret;
+}
+
+/**
+ * mei_me_pg_in_transition - is device now in pg transition
+ *
+ * @dev: the device structure
+ *
+ * Return: true if in pg transition, false otherwise
+ */
+static bool mei_me_pg_in_transition(struct mei_device *dev)
+{
+	return dev->pg_event >= MEI_PG_EVENT_WAIT &&
+	       dev->pg_event <= MEI_PG_EVENT_INTR_WAIT;
+}
+
+/**
+ * mei_me_pg_is_enabled - detect if PG is supported by HW
+ *
+ * @dev: the device structure
+ *
+ * Return: true is pg supported, false otherwise
+ */
+static bool mei_me_pg_is_enabled(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 reg = mei_me_mecsr_read(dev);
+
+	if (hw->d0i3_supported)
+		return true;
+
+	if ((reg & ME_PGIC_HRA) == 0)
+		goto notsupported;
+
+	if (!dev->hbm_f_pg_supported)
+		goto notsupported;
+
+	return true;
+
+notsupported:
+	dev_dbg(dev->dev, "pg: not supported: d0i3 = %d HGP = %d hbm version %d.%d ?= %d.%d\n",
+		hw->d0i3_supported,
+		!!(reg & ME_PGIC_HRA),
+		dev->version.major_version,
+		dev->version.minor_version,
+		HBM_MAJOR_VERSION_PGI,
+		HBM_MINOR_VERSION_PGI);
+
+	return false;
+}
+
+/**
+ * mei_me_d0i3_set - write d0i3 register bit on mei device.
+ *
+ * @dev: the device structure
+ * @intr: ask for interrupt
+ *
+ * Return: D0I3C register value
+ */
+static u32 mei_me_d0i3_set(struct mei_device *dev, bool intr)
+{
+	u32 reg = mei_me_d0i3c_read(dev);
+
+	reg |= H_D0I3C_I3;
+	if (intr)
+		reg |= H_D0I3C_IR;
+	else
+		reg &= ~H_D0I3C_IR;
+	mei_me_d0i3c_write(dev, reg);
+	/* read it to ensure HW consistency */
+	reg = mei_me_d0i3c_read(dev);
+	return reg;
+}
+
+/**
+ * mei_me_d0i3_unset - clean d0i3 register bit on mei device.
+ *
+ * @dev: the device structure
+ *
+ * Return: D0I3C register value
+ */
+static u32 mei_me_d0i3_unset(struct mei_device *dev)
+{
+	u32 reg = mei_me_d0i3c_read(dev);
+
+	reg &= ~H_D0I3C_I3;
+	reg |= H_D0I3C_IR;
+	mei_me_d0i3c_write(dev, reg);
+	/* read it to ensure HW consistency */
+	reg = mei_me_d0i3c_read(dev);
+	return reg;
+}
+
+/**
+ * mei_me_d0i3_enter_sync - perform d0i3 entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_enter_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	unsigned long d0i3_timeout = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT);
+	unsigned long pgi_timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT);
+	int ret;
+	u32 reg;
+
+	reg = mei_me_d0i3c_read(dev);
+	if (reg & H_D0I3C_I3) {
+		/* we are in d0i3, nothing to do */
+		dev_dbg(dev->dev, "d0i3 set not needed\n");
+		ret = 0;
+		goto on;
+	}
+
+	/* PGI entry procedure */
+	dev->pg_event = MEI_PG_EVENT_WAIT;
+
+	ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD);
+	if (ret)
+		/* FIXME: should we reset here? */
+		goto out;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_RECEIVED, pgi_timeout);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+		ret = -ETIME;
+		goto out;
+	}
+	/* end PGI entry procedure */
+
+	dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+
+	reg = mei_me_d0i3_set(dev, true);
+	if (!(reg & H_D0I3C_CIP)) {
+		dev_dbg(dev->dev, "d0i3 enter wait not needed\n");
+		ret = 0;
+		goto on;
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, d0i3_timeout);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) {
+		reg = mei_me_d0i3c_read(dev);
+		if (!(reg & H_D0I3C_I3)) {
+			ret = -ETIME;
+			goto out;
+		}
+	}
+
+	ret = 0;
+on:
+	hw->pg_state = MEI_PG_ON;
+out:
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	dev_dbg(dev->dev, "d0i3 enter ret = %d\n", ret);
+	return ret;
+}
+
+/**
+ * mei_me_d0i3_enter - perform d0i3 entry procedure
+ *   no hbm PG handshake
+ *   no waiting for confirmation; runs with interrupts
+ *   disabled
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_enter(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 reg;
+
+	reg = mei_me_d0i3c_read(dev);
+	if (reg & H_D0I3C_I3) {
+		/* we are in d0i3, nothing to do */
+		dev_dbg(dev->dev, "already d0i3 : set not needed\n");
+		goto on;
+	}
+
+	mei_me_d0i3_set(dev, false);
+on:
+	hw->pg_state = MEI_PG_ON;
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	dev_dbg(dev->dev, "d0i3 enter\n");
+	return 0;
+}
+
+/**
+ * mei_me_d0i3_exit_sync - perform d0i3 exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_exit_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	unsigned long timeout = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT);
+	int ret;
+	u32 reg;
+
+	dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+
+	reg = mei_me_d0i3c_read(dev);
+	if (!(reg & H_D0I3C_I3)) {
+		/* we are not in d0i3, nothing to do */
+		dev_dbg(dev->dev, "d0i3 exit not needed\n");
+		ret = 0;
+		goto off;
+	}
+
+	reg = mei_me_d0i3_unset(dev);
+	if (!(reg & H_D0I3C_CIP)) {
+		dev_dbg(dev->dev, "d0i3 exit wait not needed\n");
+		ret = 0;
+		goto off;
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) {
+		reg = mei_me_d0i3c_read(dev);
+		if (reg & H_D0I3C_I3) {
+			ret = -ETIME;
+			goto out;
+		}
+	}
+
+	ret = 0;
+off:
+	hw->pg_state = MEI_PG_OFF;
+out:
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+
+	dev_dbg(dev->dev, "d0i3 exit ret = %d\n", ret);
+	return ret;
+}
+
+/**
+ * mei_me_pg_legacy_intr - perform legacy pg processing
+ *			   in interrupt thread handler
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_legacy_intr(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (dev->pg_event != MEI_PG_EVENT_INTR_WAIT)
+		return;
+
+	dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED;
+	hw->pg_state = MEI_PG_OFF;
+	if (waitqueue_active(&dev->wait_pg))
+		wake_up(&dev->wait_pg);
+}
+
+/**
+ * mei_me_d0i3_intr - perform d0i3 processing in interrupt thread handler
+ *
+ * @dev: the device structure
+ * @intr_source: interrupt source
+ */
+static void mei_me_d0i3_intr(struct mei_device *dev, u32 intr_source)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (dev->pg_event == MEI_PG_EVENT_INTR_WAIT &&
+	    (intr_source & H_D0I3C_IS)) {
+		dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED;
+		if (hw->pg_state == MEI_PG_ON) {
+			hw->pg_state = MEI_PG_OFF;
+			if (dev->hbm_state != MEI_HBM_IDLE) {
+				/*
+				 * force H_RDY because it could be
+				 * wiped off during PG
+				 */
+				dev_dbg(dev->dev, "d0i3 set host ready\n");
+				mei_me_host_set_ready(dev);
+			}
+		} else {
+			hw->pg_state = MEI_PG_ON;
+		}
+
+		wake_up(&dev->wait_pg);
+	}
+
+	if (hw->pg_state == MEI_PG_ON && (intr_source & H_IS)) {
+		/*
+		 * HW sent some data and we are in D0i3, so
+		 * we got here because of HW initiated exit from D0i3.
+		 * Start runtime pm resume sequence to exit low power state.
+		 */
+		dev_dbg(dev->dev, "d0i3 want resume\n");
+		mei_hbm_pg_resume(dev);
+	}
+}
+
+/**
+ * mei_me_pg_intr - perform pg processing in interrupt thread handler
+ *
+ * @dev: the device structure
+ * @intr_source: interrupt source
+ */
+static void mei_me_pg_intr(struct mei_device *dev, u32 intr_source)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (hw->d0i3_supported)
+		mei_me_d0i3_intr(dev, intr_source);
+	else
+		mei_me_pg_legacy_intr(dev);
+}
+
+/**
+ * mei_me_pg_enter_sync - perform runtime pm entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_me_pg_enter_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (hw->d0i3_supported)
+		return mei_me_d0i3_enter_sync(dev);
+	else
+		return mei_me_pg_legacy_enter_sync(dev);
+}
+
+/**
+ * mei_me_pg_exit_sync - perform runtime pm exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_me_pg_exit_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (hw->d0i3_supported)
+		return mei_me_d0i3_exit_sync(dev);
+	else
+		return mei_me_pg_legacy_exit_sync(dev);
+}
+
+/**
+ * mei_me_hw_reset - resets fw via mei csr register.
+ *
+ * @dev: the device structure
+ * @intr_enable: if interrupt should be enabled after reset.
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	int ret;
+	u32 hcsr;
+
+	if (intr_enable) {
+		mei_me_intr_enable(dev);
+		if (hw->d0i3_supported) {
+			ret = mei_me_d0i3_exit_sync(dev);
+			if (ret)
+				return ret;
+		}
+	}
+
+	pm_runtime_set_active(dev->dev);
+
+	hcsr = mei_hcsr_read(dev);
+	/* H_RST may be found lit before reset is started,
+	 * for example if preceding reset flow hasn't completed.
+	 * In that case asserting H_RST will be ignored, therefore
+	 * we need to clean H_RST bit to start a successful reset sequence.
+	 */
+	if ((hcsr & H_RST) == H_RST) {
+		dev_warn(dev->dev, "H_RST is set = 0x%08X", hcsr);
+		hcsr &= ~H_RST;
+		mei_hcsr_set(dev, hcsr);
+		hcsr = mei_hcsr_read(dev);
+	}
+
+	hcsr |= H_RST | H_IG | H_CSR_IS_MASK;
+
+	if (!intr_enable)
+		hcsr &= ~H_CSR_IE_MASK;
+
+	dev->recvd_hw_ready = false;
+	mei_hcsr_write(dev, hcsr);
+
+	/*
+	 * Host reads the H_CSR once to ensure that the
+	 * posted write to H_CSR completes.
+	 */
+	hcsr = mei_hcsr_read(dev);
+
+	if ((hcsr & H_RST) == 0)
+		dev_warn(dev->dev, "H_RST is not set = 0x%08X", hcsr);
+
+	if ((hcsr & H_RDY) == H_RDY)
+		dev_warn(dev->dev, "H_RDY is not cleared 0x%08X", hcsr);
+
+	if (!intr_enable) {
+		mei_me_hw_reset_release(dev);
+		if (hw->d0i3_supported) {
+			ret = mei_me_d0i3_enter(dev);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+/**
+ * mei_me_irq_quick_handler - The ISR of the MEI device
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: irqreturn_t
+ */
+irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id)
+{
+	struct mei_device *dev = (struct mei_device *)dev_id;
+	u32 hcsr;
+
+	hcsr = mei_hcsr_read(dev);
+	if (!me_intr_src(hcsr))
+		return IRQ_NONE;
+
+	dev_dbg(dev->dev, "interrupt source 0x%08X\n", me_intr_src(hcsr));
+
+	/* disable interrupts on device */
+	me_intr_disable(dev, hcsr);
+	return IRQ_WAKE_THREAD;
+}
+
+/**
+ * mei_me_irq_thread_handler - function called after ISR to handle the interrupt
+ * processing.
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: irqreturn_t
+ *
+ */
+irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
+{
+	struct mei_device *dev = (struct mei_device *) dev_id;
+	struct list_head cmpl_list;
+	s32 slots;
+	u32 hcsr;
+	int rets = 0;
+
+	dev_dbg(dev->dev, "function called after ISR to handle the interrupt processing.\n");
+	/* initialize our complete list */
+	mutex_lock(&dev->device_lock);
+
+	hcsr = mei_hcsr_read(dev);
+	me_intr_clear(dev, hcsr);
+
+	INIT_LIST_HEAD(&cmpl_list);
+
+	/* check if ME wants a reset */
+	if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) {
+		dev_warn(dev->dev, "FW not ready: resetting.\n");
+		schedule_work(&dev->reset_work);
+		goto end;
+	}
+
+	if (mei_me_hw_is_resetting(dev))
+		mei_hcsr_set_hig(dev);
+
+	mei_me_pg_intr(dev, me_intr_src(hcsr));
+
+	/*  check if we need to start the dev */
+	if (!mei_host_is_ready(dev)) {
+		if (mei_hw_is_ready(dev)) {
+			dev_dbg(dev->dev, "we need to start the dev.\n");
+			dev->recvd_hw_ready = true;
+			wake_up(&dev->wait_hw_ready);
+		} else {
+			dev_dbg(dev->dev, "Spurious Interrupt\n");
+		}
+		goto end;
+	}
+	/* check slots available for reading */
+	slots = mei_count_full_read_slots(dev);
+	while (slots > 0) {
+		dev_dbg(dev->dev, "slots to read = %08x\n", slots);
+		rets = mei_irq_read_handler(dev, &cmpl_list, &slots);
+		/* There is a race between ME write and interrupt delivery:
+		 * Not all data is always available immediately after the
+		 * interrupt, so try to read again on the next interrupt.
+		 */
+		if (rets == -ENODATA)
+			break;
+
+		if (rets &&
+		    (dev->dev_state != MEI_DEV_RESETTING &&
+		     dev->dev_state != MEI_DEV_POWER_DOWN)) {
+			dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n",
+						rets);
+			schedule_work(&dev->reset_work);
+			goto end;
+		}
+	}
+
+	dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+
+	/*
+	 * During PG handshake only allowed write is the replay to the
+	 * PG exit message, so block calling write function
+	 * if the pg event is in PG handshake
+	 */
+	if (dev->pg_event != MEI_PG_EVENT_WAIT &&
+	    dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+		rets = mei_irq_write_handler(dev, &cmpl_list);
+		dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+	}
+
+	mei_irq_compl_handler(dev, &cmpl_list);
+
+end:
+	dev_dbg(dev->dev, "interrupt thread end ret = %d\n", rets);
+	mei_me_intr_enable(dev);
+	mutex_unlock(&dev->device_lock);
+	return IRQ_HANDLED;
+}
+
+static const struct mei_hw_ops mei_me_hw_ops = {
+
+	.fw_status = mei_me_fw_status,
+	.pg_state  = mei_me_pg_state,
+
+	.host_is_ready = mei_me_host_is_ready,
+
+	.hw_is_ready = mei_me_hw_is_ready,
+	.hw_reset = mei_me_hw_reset,
+	.hw_config = mei_me_hw_config,
+	.hw_start = mei_me_hw_start,
+
+	.pg_in_transition = mei_me_pg_in_transition,
+	.pg_is_enabled = mei_me_pg_is_enabled,
+
+	.intr_clear = mei_me_intr_clear,
+	.intr_enable = mei_me_intr_enable,
+	.intr_disable = mei_me_intr_disable,
+	.synchronize_irq = mei_me_synchronize_irq,
+
+	.hbuf_free_slots = mei_me_hbuf_empty_slots,
+	.hbuf_is_ready = mei_me_hbuf_is_empty,
+	.hbuf_depth = mei_me_hbuf_depth,
+
+	.write = mei_me_hbuf_write,
+
+	.rdbuf_full_slots = mei_me_count_full_read_slots,
+	.read_hdr = mei_me_mecbrw_read,
+	.read = mei_me_read_slots
+};
+
+static bool mei_me_fw_type_nm(struct pci_dev *pdev)
+{
+	u32 reg;
+
+	pci_read_config_dword(pdev, PCI_CFG_HFS_2, &reg);
+	trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_2", PCI_CFG_HFS_2, reg);
+	/* make sure that bit 9 (NM) is up and bit 10 (DM) is down */
+	return (reg & 0x600) == 0x200;
+}
+
+#define MEI_CFG_FW_NM                           \
+	.quirk_probe = mei_me_fw_type_nm
+
+static bool mei_me_fw_type_sps(struct pci_dev *pdev)
+{
+	u32 reg;
+	unsigned int devfn;
+
+	/*
+	 * Read ME FW Status register to check for SPS Firmware
+	 * The SPS FW is only signaled in pci function 0
+	 */
+	devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, &reg);
+	trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg);
+	/* if bits [19:16] = 15, running SPS Firmware */
+	return (reg & 0xf0000) == 0xf0000;
+}
+
+#define MEI_CFG_FW_SPS                           \
+	.quirk_probe = mei_me_fw_type_sps
+
+#define MEI_CFG_FW_VER_SUPP                     \
+	.fw_ver_supported = 1
+
+#define MEI_CFG_ICH_HFS                      \
+	.fw_status.count = 0
+
+#define MEI_CFG_ICH10_HFS                        \
+	.fw_status.count = 1,                   \
+	.fw_status.status[0] = PCI_CFG_HFS_1
+
+#define MEI_CFG_PCH_HFS                         \
+	.fw_status.count = 2,                   \
+	.fw_status.status[0] = PCI_CFG_HFS_1,   \
+	.fw_status.status[1] = PCI_CFG_HFS_2
+
+#define MEI_CFG_PCH8_HFS                        \
+	.fw_status.count = 6,                   \
+	.fw_status.status[0] = PCI_CFG_HFS_1,   \
+	.fw_status.status[1] = PCI_CFG_HFS_2,   \
+	.fw_status.status[2] = PCI_CFG_HFS_3,   \
+	.fw_status.status[3] = PCI_CFG_HFS_4,   \
+	.fw_status.status[4] = PCI_CFG_HFS_5,   \
+	.fw_status.status[5] = PCI_CFG_HFS_6
+
+#define MEI_CFG_DMA_128 \
+	.dma_size[DMA_DSCR_HOST] = SZ_128K, \
+	.dma_size[DMA_DSCR_DEVICE] = SZ_128K, \
+	.dma_size[DMA_DSCR_CTRL] = PAGE_SIZE
+
+/* ICH Legacy devices */
+static const struct mei_cfg mei_me_ich_cfg = {
+	MEI_CFG_ICH_HFS,
+};
+
+/* ICH devices */
+static const struct mei_cfg mei_me_ich10_cfg = {
+	MEI_CFG_ICH10_HFS,
+};
+
+/* PCH6 devices */
+static const struct mei_cfg mei_me_pch6_cfg = {
+	MEI_CFG_PCH_HFS,
+};
+
+/* PCH7 devices */
+static const struct mei_cfg mei_me_pch7_cfg = {
+	MEI_CFG_PCH_HFS,
+	MEI_CFG_FW_VER_SUPP,
+};
+
+/* PCH Cougar Point and Patsburg with quirk for Node Manager exclusion */
+static const struct mei_cfg mei_me_pch_cpt_pbg_cfg = {
+	MEI_CFG_PCH_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_FW_NM,
+};
+
+/* PCH8 Lynx Point and newer devices */
+static const struct mei_cfg mei_me_pch8_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+};
+
+/* PCH8 Lynx Point with quirk for SPS Firmware exclusion */
+static const struct mei_cfg mei_me_pch8_sps_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_FW_SPS,
+};
+
+/* Cannon Lake and newer devices */
+static const struct mei_cfg mei_me_pch12_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_DMA_128,
+};
+
+/*
+ * mei_cfg_list - A list of platform platform specific configurations.
+ * Note: has to be synchronized with  enum mei_cfg_idx.
+ */
+static const struct mei_cfg *const mei_cfg_list[] = {
+	[MEI_ME_UNDEF_CFG] = NULL,
+	[MEI_ME_ICH_CFG] = &mei_me_ich_cfg,
+	[MEI_ME_ICH10_CFG] = &mei_me_ich10_cfg,
+	[MEI_ME_PCH6_CFG] = &mei_me_pch6_cfg,
+	[MEI_ME_PCH7_CFG] = &mei_me_pch7_cfg,
+	[MEI_ME_PCH_CPT_PBG_CFG] = &mei_me_pch_cpt_pbg_cfg,
+	[MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg,
+	[MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg,
+	[MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg,
+};
+
+const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(mei_cfg_list) != MEI_ME_NUM_CFG);
+
+	if (idx >= MEI_ME_NUM_CFG)
+		return NULL;
+
+	return mei_cfg_list[idx];
+};
+
+/**
+ * mei_me_dev_init - allocates and initializes the mei device structure
+ *
+ * @pdev: The pci device structure
+ * @cfg: per device generation config
+ *
+ * Return: The mei_device pointer on success, NULL on failure.
+ */
+struct mei_device *mei_me_dev_init(struct pci_dev *pdev,
+				   const struct mei_cfg *cfg)
+{
+	struct mei_device *dev;
+	struct mei_me_hw *hw;
+
+	dev = devm_kzalloc(&pdev->dev, sizeof(struct mei_device) +
+			   sizeof(struct mei_me_hw), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+	hw = to_me_hw(dev);
+
+	mei_device_init(dev, &pdev->dev, &mei_me_hw_ops);
+	hw->cfg = cfg;
+	dev->fw_f_fw_ver_supported = cfg->fw_ver_supported;
+
+	return dev;
+}
+
diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h
new file mode 100644
index 000000000..775971339
--- /dev/null
+++ b/drivers/misc/mei/hw-me.h
@@ -0,0 +1,115 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+
+
+#ifndef _MEI_INTERFACE_H_
+#define _MEI_INTERFACE_H_
+
+#include <linux/irqreturn.h>
+#include <linux/pci.h>
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+/*
+ * mei_cfg - mei device configuration
+ *
+ * @fw_status: FW status
+ * @quirk_probe: device exclusion quirk
+ * @dma_size: device DMA buffers size
+ * @fw_ver_supported: is fw version retrievable from FW
+ */
+struct mei_cfg {
+	const struct mei_fw_status fw_status;
+	bool (*quirk_probe)(struct pci_dev *pdev);
+	size_t dma_size[DMA_DSCR_NUM];
+	u32 fw_ver_supported:1;
+};
+
+
+#define MEI_PCI_DEVICE(dev, cfg) \
+	.vendor = PCI_VENDOR_ID_INTEL, .device = (dev), \
+	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, \
+	.driver_data = (kernel_ulong_t)(cfg),
+
+#define MEI_ME_RPM_TIMEOUT    500 /* ms */
+
+/**
+ * struct mei_me_hw - me hw specific data
+ *
+ * @cfg: per device generation config and ops
+ * @mem_addr: io memory address
+ * @pg_state: power gating state
+ * @d0i3_supported: di03 support
+ * @hbuf_depth: depth of hardware host/write buffer in slots
+ */
+struct mei_me_hw {
+	const struct mei_cfg *cfg;
+	void __iomem *mem_addr;
+	enum mei_pg_state pg_state;
+	bool d0i3_supported;
+	u8 hbuf_depth;
+};
+
+#define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw)
+
+/**
+ * enum mei_cfg_idx - indices to platform specific configurations.
+ *
+ * Note: has to be synchronized with mei_cfg_list[]
+ *
+ * @MEI_ME_UNDEF_CFG:      Lower sentinel.
+ * @MEI_ME_ICH_CFG:        I/O Controller Hub legacy devices.
+ * @MEI_ME_ICH10_CFG:      I/O Controller Hub platforms Gen10
+ * @MEI_ME_PCH6_CFG:       Platform Controller Hub platforms (Gen6).
+ * @MEI_ME_PCH7_CFG:       Platform Controller Hub platforms (Gen7).
+ * @MEI_ME_PCH_CPT_PBG_CFG:Platform Controller Hub workstations
+ *                         with quirk for Node Manager exclusion.
+ * @MEI_ME_PCH8_CFG:       Platform Controller Hub Gen8 and newer
+ *                         client platforms.
+ * @MEI_ME_PCH8_SPS_CFG:   Platform Controller Hub Gen8 and newer
+ *                         servers platforms with quirk for
+ *                         SPS firmware exclusion.
+ * @MEI_ME_PCH12_CFG:      Platform Controller Hub Gen12 and newer
+ * @MEI_ME_NUM_CFG:        Upper Sentinel.
+ */
+enum mei_cfg_idx {
+	MEI_ME_UNDEF_CFG,
+	MEI_ME_ICH_CFG,
+	MEI_ME_ICH10_CFG,
+	MEI_ME_PCH6_CFG,
+	MEI_ME_PCH7_CFG,
+	MEI_ME_PCH_CPT_PBG_CFG,
+	MEI_ME_PCH8_CFG,
+	MEI_ME_PCH8_SPS_CFG,
+	MEI_ME_PCH12_CFG,
+	MEI_ME_NUM_CFG,
+};
+
+const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx);
+
+struct mei_device *mei_me_dev_init(struct pci_dev *pdev,
+				   const struct mei_cfg *cfg);
+
+int mei_me_pg_enter_sync(struct mei_device *dev);
+int mei_me_pg_exit_sync(struct mei_device *dev);
+
+irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id);
+irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id);
+
+#endif /* _MEI_INTERFACE_H_ */
diff --git a/drivers/misc/mei/hw-txe-regs.h b/drivers/misc/mei/hw-txe-regs.h
new file mode 100644
index 000000000..f19229c4e
--- /dev/null
+++ b/drivers/misc/mei/hw-txe-regs.h
@@ -0,0 +1,294 @@
+/******************************************************************************
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Intel MEI Interface Header
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING
+ *
+ * Contact Information:
+ *	Intel Corporation.
+ *	linux-mei@linux.intel.com
+ *	http://www.intel.com
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#ifndef _MEI_HW_TXE_REGS_H_
+#define _MEI_HW_TXE_REGS_H_
+
+#include "hw.h"
+
+#define SEC_ALIVENESS_TIMER_TIMEOUT        (5 * MSEC_PER_SEC)
+#define SEC_ALIVENESS_WAIT_TIMEOUT         (1 * MSEC_PER_SEC)
+#define SEC_RESET_WAIT_TIMEOUT             (1 * MSEC_PER_SEC)
+#define SEC_READY_WAIT_TIMEOUT             (5 * MSEC_PER_SEC)
+#define START_MESSAGE_RESPONSE_WAIT_TIMEOUT (5 * MSEC_PER_SEC)
+#define RESET_CANCEL_WAIT_TIMEOUT          (1 * MSEC_PER_SEC)
+
+enum {
+	SEC_BAR,
+	BRIDGE_BAR,
+
+	NUM_OF_MEM_BARS
+};
+
+/* SeC FW Status Register
+ *
+ * FW uses this register in order to report its status to host.
+ * This register resides in PCI-E config space.
+ */
+#define PCI_CFG_TXE_FW_STS0   0x40
+#  define PCI_CFG_TXE_FW_STS0_WRK_ST_MSK    0x0000000F
+#  define PCI_CFG_TXE_FW_STS0_OP_ST_MSK     0x000001C0
+#  define PCI_CFG_TXE_FW_STS0_FW_INIT_CMPLT 0x00000200
+#  define PCI_CFG_TXE_FW_STS0_ERR_CODE_MSK  0x0000F000
+#  define PCI_CFG_TXE_FW_STS0_OP_MODE_MSK   0x000F0000
+#  define PCI_CFG_TXE_FW_STS0_RST_CNT_MSK   0x00F00000
+#define PCI_CFG_TXE_FW_STS1   0x48
+
+#define IPC_BASE_ADDR	0x80400 /* SeC IPC Base Address */
+
+/* IPC Input Doorbell Register */
+#define SEC_IPC_INPUT_DOORBELL_REG       (0x0000 + IPC_BASE_ADDR)
+
+/* IPC Input Status Register
+ * This register indicates whether or not processing of
+ * the most recent command has been completed by the SEC
+ * New commands and payloads should not be written by the Host
+ * until this indicates that the previous command has been processed.
+ */
+#define SEC_IPC_INPUT_STATUS_REG         (0x0008 + IPC_BASE_ADDR)
+#  define SEC_IPC_INPUT_STATUS_RDY    BIT(0)
+
+/* IPC Host Interrupt Status Register */
+#define SEC_IPC_HOST_INT_STATUS_REG      (0x0010 + IPC_BASE_ADDR)
+#define   SEC_IPC_HOST_INT_STATUS_OUT_DB             BIT(0)
+#define   SEC_IPC_HOST_INT_STATUS_IN_RDY             BIT(1)
+#define   SEC_IPC_HOST_INT_STATUS_HDCP_M0_RCVD       BIT(5)
+#define   SEC_IPC_HOST_INT_STATUS_ILL_MEM_ACCESS     BIT(17)
+#define   SEC_IPC_HOST_INT_STATUS_AES_HKEY_ERR       BIT(18)
+#define   SEC_IPC_HOST_INT_STATUS_DES_HKEY_ERR       BIT(19)
+#define   SEC_IPC_HOST_INT_STATUS_TMRMTB_OVERFLOW    BIT(21)
+
+/* Convenient mask for pending interrupts */
+#define   SEC_IPC_HOST_INT_STATUS_PENDING \
+		(SEC_IPC_HOST_INT_STATUS_OUT_DB| \
+		SEC_IPC_HOST_INT_STATUS_IN_RDY)
+
+/* IPC Host Interrupt Mask Register */
+#define SEC_IPC_HOST_INT_MASK_REG        (0x0014 + IPC_BASE_ADDR)
+
+#  define SEC_IPC_HOST_INT_MASK_OUT_DB	BIT(0) /* Output Doorbell Int Mask */
+#  define SEC_IPC_HOST_INT_MASK_IN_RDY	BIT(1) /* Input Ready Int Mask */
+
+/* IPC Input Payload RAM */
+#define SEC_IPC_INPUT_PAYLOAD_REG        (0x0100 + IPC_BASE_ADDR)
+/* IPC Shared Payload RAM */
+#define IPC_SHARED_PAYLOAD_REG           (0x0200 + IPC_BASE_ADDR)
+
+/* SeC Address Translation Table Entry 2 - Ctrl
+ *
+ * This register resides also in SeC's PCI-E Memory space.
+ */
+#define SATT2_CTRL_REG                   0x1040
+#  define SATT2_CTRL_VALID_MSK            BIT(0)
+#  define SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT 8
+#  define SATT2_CTRL_BRIDGE_HOST_EN_MSK   BIT(12)
+
+/* SATT Table Entry 2 SAP Base Address Register */
+#define SATT2_SAP_BA_REG                 0x1044
+/* SATT Table Entry 2 SAP Size Register. */
+#define SATT2_SAP_SIZE_REG               0x1048
+ /* SATT Table Entry 2 SAP Bridge Address - LSB Register */
+#define SATT2_BRG_BA_LSB_REG             0x104C
+
+/* Host High-level Interrupt Status Register */
+#define HHISR_REG                        0x2020
+/* Host High-level Interrupt Enable Register
+ *
+ * Resides in PCI memory space. This is the top hierarchy for
+ * interrupts from SeC to host, aggregating both interrupts that
+ * arrive through HICR registers as well as interrupts
+ * that arrive via IPC.
+ */
+#define HHIER_REG                        0x2024
+#define   IPC_HHIER_SEC	BIT(0)
+#define   IPC_HHIER_BRIDGE	BIT(1)
+#define   IPC_HHIER_MSK	(IPC_HHIER_SEC | IPC_HHIER_BRIDGE)
+
+/* Host High-level Interrupt Mask Register.
+ *
+ * Resides in PCI memory space.
+ * This is the top hierarchy for masking interrupts from SeC to host.
+ */
+#define HHIMR_REG                        0x2028
+#define   IPC_HHIMR_SEC       BIT(0)
+#define   IPC_HHIMR_BRIDGE    BIT(1)
+
+/* Host High-level IRQ Status Register */
+#define HHIRQSR_REG                      0x202C
+
+/* Host Interrupt Cause Register 0 - SeC IPC Readiness
+ *
+ * This register is both an ICR to Host from PCI Memory Space
+ * and it is also exposed in the SeC memory space.
+ * This register is used by SeC's IPC driver in order
+ * to synchronize with host about IPC interface state.
+ */
+#define HICR_SEC_IPC_READINESS_REG       0x2040
+#define   HICR_SEC_IPC_READINESS_HOST_RDY  BIT(0)
+#define   HICR_SEC_IPC_READINESS_SEC_RDY   BIT(1)
+#define   HICR_SEC_IPC_READINESS_SYS_RDY     \
+	  (HICR_SEC_IPC_READINESS_HOST_RDY | \
+	   HICR_SEC_IPC_READINESS_SEC_RDY)
+#define   HICR_SEC_IPC_READINESS_RDY_CLR   BIT(2)
+
+/* Host Interrupt Cause Register 1 - Aliveness Response */
+/* This register is both an ICR to Host from PCI Memory Space
+ * and it is also exposed in the SeC memory space.
+ * The register may be used by SeC to ACK a host request for aliveness.
+ */
+#define HICR_HOST_ALIVENESS_RESP_REG     0x2044
+#define   HICR_HOST_ALIVENESS_RESP_ACK    BIT(0)
+
+/* Host Interrupt Cause Register 2 - SeC IPC Output Doorbell */
+#define HICR_SEC_IPC_OUTPUT_DOORBELL_REG 0x2048
+
+/* Host Interrupt Status Register.
+ *
+ * Resides in PCI memory space.
+ * This is the main register involved in generating interrupts
+ * from SeC to host via HICRs.
+ * The interrupt generation rules are as follows:
+ * An interrupt will be generated whenever for any i,
+ * there is a transition from a state where at least one of
+ * the following conditions did not hold, to a state where
+ * ALL the following conditions hold:
+ * A) HISR.INT[i]_STS == 1.
+ * B) HIER.INT[i]_EN == 1.
+ */
+#define HISR_REG                         0x2060
+#define   HISR_INT_0_STS      BIT(0)
+#define   HISR_INT_1_STS      BIT(1)
+#define   HISR_INT_2_STS      BIT(2)
+#define   HISR_INT_3_STS      BIT(3)
+#define   HISR_INT_4_STS      BIT(4)
+#define   HISR_INT_5_STS      BIT(5)
+#define   HISR_INT_6_STS      BIT(6)
+#define   HISR_INT_7_STS      BIT(7)
+#define   HISR_INT_STS_MSK \
+	(HISR_INT_0_STS | HISR_INT_1_STS | HISR_INT_2_STS)
+
+/* Host Interrupt Enable Register. Resides in PCI memory space. */
+#define HIER_REG                         0x2064
+#define   HIER_INT_0_EN      BIT(0)
+#define   HIER_INT_1_EN      BIT(1)
+#define   HIER_INT_2_EN      BIT(2)
+#define   HIER_INT_3_EN      BIT(3)
+#define   HIER_INT_4_EN      BIT(4)
+#define   HIER_INT_5_EN      BIT(5)
+#define   HIER_INT_6_EN      BIT(6)
+#define   HIER_INT_7_EN      BIT(7)
+
+#define   HIER_INT_EN_MSK \
+	 (HIER_INT_0_EN | HIER_INT_1_EN | HIER_INT_2_EN)
+
+
+/* SEC Memory Space IPC output payload.
+ *
+ * This register is part of the output payload which SEC provides to host.
+ */
+#define BRIDGE_IPC_OUTPUT_PAYLOAD_REG    0x20C0
+
+/* SeC Interrupt Cause Register - Host Aliveness Request
+ * This register is both an ICR to SeC and it is also exposed
+ * in the host-visible PCI memory space.
+ * The register is used by host to request SeC aliveness.
+ */
+#define SICR_HOST_ALIVENESS_REQ_REG      0x214C
+#define   SICR_HOST_ALIVENESS_REQ_REQUESTED    BIT(0)
+
+
+/* SeC Interrupt Cause Register - Host IPC Readiness
+ *
+ * This register is both an ICR to SeC and it is also exposed
+ * in the host-visible PCI memory space.
+ * This register is used by the host's SeC driver uses in order
+ * to synchronize with SeC about IPC interface state.
+ */
+#define SICR_HOST_IPC_READINESS_REQ_REG  0x2150
+
+
+#define SICR_HOST_IPC_READINESS_HOST_RDY  BIT(0)
+#define SICR_HOST_IPC_READINESS_SEC_RDY   BIT(1)
+#define SICR_HOST_IPC_READINESS_SYS_RDY     \
+	(SICR_HOST_IPC_READINESS_HOST_RDY | \
+	 SICR_HOST_IPC_READINESS_SEC_RDY)
+#define SICR_HOST_IPC_READINESS_RDY_CLR   BIT(2)
+
+/* SeC Interrupt Cause Register - SeC IPC Output Status
+ *
+ * This register indicates whether or not processing of the most recent
+ * command has been completed by the Host.
+ * New commands and payloads should not be written by SeC until this
+ * register indicates that the previous command has been processed.
+ */
+#define SICR_SEC_IPC_OUTPUT_STATUS_REG   0x2154
+#  define SEC_IPC_OUTPUT_STATUS_RDY BIT(0)
+
+
+
+/*  MEI IPC Message payload size 64 bytes */
+#define PAYLOAD_SIZE        64
+
+/* MAX size for SATT range 32MB */
+#define SATT_RANGE_MAX     (32 << 20)
+
+
+#endif /* _MEI_HW_TXE_REGS_H_ */
+
diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c
new file mode 100644
index 000000000..8449fe036
--- /dev/null
+++ b/drivers/misc/mei/hw-txe.c
@@ -0,0 +1,1267 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/jiffies.h>
+#include <linux/ktime.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hw-txe.h"
+#include "client.h"
+#include "hbm.h"
+
+#include "mei-trace.h"
+
+#define TXE_HBUF_DEPTH (PAYLOAD_SIZE / MEI_SLOT_SIZE)
+
+/**
+ * mei_txe_reg_read - Reads 32bit data from the txe device
+ *
+ * @base_addr: registers base address
+ * @offset: register offset
+ *
+ * Return: register value
+ */
+static inline u32 mei_txe_reg_read(void __iomem *base_addr,
+					unsigned long offset)
+{
+	return ioread32(base_addr + offset);
+}
+
+/**
+ * mei_txe_reg_write - Writes 32bit data to the txe device
+ *
+ * @base_addr: registers base address
+ * @offset: register offset
+ * @value: the value to write
+ */
+static inline void mei_txe_reg_write(void __iomem *base_addr,
+				unsigned long offset, u32 value)
+{
+	iowrite32(value, base_addr + offset);
+}
+
+/**
+ * mei_txe_sec_reg_read_silent - Reads 32bit data from the SeC BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ *
+ * Doesn't check for aliveness while Reads 32bit data from the SeC BAR
+ *
+ * Return: register value
+ */
+static inline u32 mei_txe_sec_reg_read_silent(struct mei_txe_hw *hw,
+				unsigned long offset)
+{
+	return mei_txe_reg_read(hw->mem_addr[SEC_BAR], offset);
+}
+
+/**
+ * mei_txe_sec_reg_read - Reads 32bit data from the SeC BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ *
+ * Reads 32bit data from the SeC BAR and shout loud if aliveness is not set
+ *
+ * Return: register value
+ */
+static inline u32 mei_txe_sec_reg_read(struct mei_txe_hw *hw,
+				unsigned long offset)
+{
+	WARN(!hw->aliveness, "sec read: aliveness not asserted\n");
+	return mei_txe_sec_reg_read_silent(hw, offset);
+}
+/**
+ * mei_txe_sec_reg_write_silent - Writes 32bit data to the SeC BAR
+ *   doesn't check for aliveness
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ * @value: value to write
+ *
+ * Doesn't check for aliveness while writes 32bit data from to the SeC BAR
+ */
+static inline void mei_txe_sec_reg_write_silent(struct mei_txe_hw *hw,
+				unsigned long offset, u32 value)
+{
+	mei_txe_reg_write(hw->mem_addr[SEC_BAR], offset, value);
+}
+
+/**
+ * mei_txe_sec_reg_write - Writes 32bit data to the SeC BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ * @value: value to write
+ *
+ * Writes 32bit data from the SeC BAR and shout loud if aliveness is not set
+ */
+static inline void mei_txe_sec_reg_write(struct mei_txe_hw *hw,
+				unsigned long offset, u32 value)
+{
+	WARN(!hw->aliveness, "sec write: aliveness not asserted\n");
+	mei_txe_sec_reg_write_silent(hw, offset, value);
+}
+/**
+ * mei_txe_br_reg_read - Reads 32bit data from the Bridge BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: offset from which to read the data
+ *
+ * Return: the byte read.
+ */
+static inline u32 mei_txe_br_reg_read(struct mei_txe_hw *hw,
+				unsigned long offset)
+{
+	return mei_txe_reg_read(hw->mem_addr[BRIDGE_BAR], offset);
+}
+
+/**
+ * mei_txe_br_reg_write - Writes 32bit data to the Bridge BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: offset from which to write the data
+ * @value: the byte to write
+ */
+static inline void mei_txe_br_reg_write(struct mei_txe_hw *hw,
+				unsigned long offset, u32 value)
+{
+	mei_txe_reg_write(hw->mem_addr[BRIDGE_BAR], offset, value);
+}
+
+/**
+ * mei_txe_aliveness_set - request for aliveness change
+ *
+ * @dev: the device structure
+ * @req: requested aliveness value
+ *
+ * Request for aliveness change and returns true if the change is
+ *   really needed and false if aliveness is already
+ *   in the requested state
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: true if request was send
+ */
+static bool mei_txe_aliveness_set(struct mei_device *dev, u32 req)
+{
+
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	bool do_req = hw->aliveness != req;
+
+	dev_dbg(dev->dev, "Aliveness current=%d request=%d\n",
+				hw->aliveness, req);
+	if (do_req) {
+		dev->pg_event = MEI_PG_EVENT_WAIT;
+		mei_txe_br_reg_write(hw, SICR_HOST_ALIVENESS_REQ_REG, req);
+	}
+	return do_req;
+}
+
+
+/**
+ * mei_txe_aliveness_req_get - get aliveness requested register value
+ *
+ * @dev: the device structure
+ *
+ * Extract HICR_HOST_ALIVENESS_RESP_ACK bit from
+ * from HICR_HOST_ALIVENESS_REQ register value
+ *
+ * Return: SICR_HOST_ALIVENESS_REQ_REQUESTED bit value
+ */
+static u32 mei_txe_aliveness_req_get(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 reg;
+
+	reg = mei_txe_br_reg_read(hw, SICR_HOST_ALIVENESS_REQ_REG);
+	return reg & SICR_HOST_ALIVENESS_REQ_REQUESTED;
+}
+
+/**
+ * mei_txe_aliveness_get - get aliveness response register value
+ *
+ * @dev: the device structure
+ *
+ * Return: HICR_HOST_ALIVENESS_RESP_ACK bit from HICR_HOST_ALIVENESS_RESP
+ *         register
+ */
+static u32 mei_txe_aliveness_get(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 reg;
+
+	reg = mei_txe_br_reg_read(hw, HICR_HOST_ALIVENESS_RESP_REG);
+	return reg & HICR_HOST_ALIVENESS_RESP_ACK;
+}
+
+/**
+ * mei_txe_aliveness_poll - waits for aliveness to settle
+ *
+ * @dev: the device structure
+ * @expected: expected aliveness value
+ *
+ * Polls for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set
+ *
+ * Return: 0 if the expected value was received, -ETIME otherwise
+ */
+static int mei_txe_aliveness_poll(struct mei_device *dev, u32 expected)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	ktime_t stop, start;
+
+	start = ktime_get();
+	stop = ktime_add(start, ms_to_ktime(SEC_ALIVENESS_WAIT_TIMEOUT));
+	do {
+		hw->aliveness = mei_txe_aliveness_get(dev);
+		if (hw->aliveness == expected) {
+			dev->pg_event = MEI_PG_EVENT_IDLE;
+			dev_dbg(dev->dev, "aliveness settled after %lld usecs\n",
+				ktime_to_us(ktime_sub(ktime_get(), start)));
+			return 0;
+		}
+		usleep_range(20, 50);
+	} while (ktime_before(ktime_get(), stop));
+
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	dev_err(dev->dev, "aliveness timed out\n");
+	return -ETIME;
+}
+
+/**
+ * mei_txe_aliveness_wait - waits for aliveness to settle
+ *
+ * @dev: the device structure
+ * @expected: expected aliveness value
+ *
+ * Waits for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set
+ *
+ * Return: 0 on success and < 0 otherwise
+ */
+static int mei_txe_aliveness_wait(struct mei_device *dev, u32 expected)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	const unsigned long timeout =
+			msecs_to_jiffies(SEC_ALIVENESS_WAIT_TIMEOUT);
+	long err;
+	int ret;
+
+	hw->aliveness = mei_txe_aliveness_get(dev);
+	if (hw->aliveness == expected)
+		return 0;
+
+	mutex_unlock(&dev->device_lock);
+	err = wait_event_timeout(hw->wait_aliveness_resp,
+			dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout);
+	mutex_lock(&dev->device_lock);
+
+	hw->aliveness = mei_txe_aliveness_get(dev);
+	ret = hw->aliveness == expected ? 0 : -ETIME;
+
+	if (ret)
+		dev_warn(dev->dev, "aliveness timed out = %ld aliveness = %d event = %d\n",
+			err, hw->aliveness, dev->pg_event);
+	else
+		dev_dbg(dev->dev, "aliveness settled after = %d msec aliveness = %d event = %d\n",
+			jiffies_to_msecs(timeout - err),
+			hw->aliveness, dev->pg_event);
+
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	return ret;
+}
+
+/**
+ * mei_txe_aliveness_set_sync - sets an wait for aliveness to complete
+ *
+ * @dev: the device structure
+ * @req: requested aliveness value
+ *
+ * Return: 0 on success and < 0 otherwise
+ */
+int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req)
+{
+	if (mei_txe_aliveness_set(dev, req))
+		return mei_txe_aliveness_wait(dev, req);
+	return 0;
+}
+
+/**
+ * mei_txe_pg_in_transition - is device now in pg transition
+ *
+ * @dev: the device structure
+ *
+ * Return: true if in pg transition, false otherwise
+ */
+static bool mei_txe_pg_in_transition(struct mei_device *dev)
+{
+	return dev->pg_event == MEI_PG_EVENT_WAIT;
+}
+
+/**
+ * mei_txe_pg_is_enabled - detect if PG is supported by HW
+ *
+ * @dev: the device structure
+ *
+ * Return: true is pg supported, false otherwise
+ */
+static bool mei_txe_pg_is_enabled(struct mei_device *dev)
+{
+	return true;
+}
+
+/**
+ * mei_txe_pg_state  - translate aliveness register value
+ *   to the mei power gating state
+ *
+ * @dev: the device structure
+ *
+ * Return: MEI_PG_OFF if aliveness is on and MEI_PG_ON otherwise
+ */
+static inline enum mei_pg_state mei_txe_pg_state(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	return hw->aliveness ? MEI_PG_OFF : MEI_PG_ON;
+}
+
+/**
+ * mei_txe_input_ready_interrupt_enable - sets the Input Ready Interrupt
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_input_ready_interrupt_enable(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 hintmsk;
+	/* Enable the SEC_IPC_HOST_INT_MASK_IN_RDY interrupt */
+	hintmsk = mei_txe_sec_reg_read(hw, SEC_IPC_HOST_INT_MASK_REG);
+	hintmsk |= SEC_IPC_HOST_INT_MASK_IN_RDY;
+	mei_txe_sec_reg_write(hw, SEC_IPC_HOST_INT_MASK_REG, hintmsk);
+}
+
+/**
+ * mei_txe_input_doorbell_set - sets bit 0 in
+ *    SEC_IPC_INPUT_DOORBELL.IPC_INPUT_DOORBELL.
+ *
+ * @hw: the txe hardware structure
+ */
+static void mei_txe_input_doorbell_set(struct mei_txe_hw *hw)
+{
+	/* Clear the interrupt cause */
+	clear_bit(TXE_INTR_IN_READY_BIT, &hw->intr_cause);
+	mei_txe_sec_reg_write(hw, SEC_IPC_INPUT_DOORBELL_REG, 1);
+}
+
+/**
+ * mei_txe_output_ready_set - Sets the SICR_SEC_IPC_OUTPUT_STATUS bit to 1
+ *
+ * @hw: the txe hardware structure
+ */
+static void mei_txe_output_ready_set(struct mei_txe_hw *hw)
+{
+	mei_txe_br_reg_write(hw,
+			SICR_SEC_IPC_OUTPUT_STATUS_REG,
+			SEC_IPC_OUTPUT_STATUS_RDY);
+}
+
+/**
+ * mei_txe_is_input_ready - check if TXE is ready for receiving data
+ *
+ * @dev: the device structure
+ *
+ * Return: true if INPUT STATUS READY bit is set
+ */
+static bool mei_txe_is_input_ready(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 status;
+
+	status = mei_txe_sec_reg_read(hw, SEC_IPC_INPUT_STATUS_REG);
+	return !!(SEC_IPC_INPUT_STATUS_RDY & status);
+}
+
+/**
+ * mei_txe_intr_clear - clear all interrupts
+ *
+ * @dev: the device structure
+ */
+static inline void mei_txe_intr_clear(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_sec_reg_write_silent(hw, SEC_IPC_HOST_INT_STATUS_REG,
+		SEC_IPC_HOST_INT_STATUS_PENDING);
+	mei_txe_br_reg_write(hw, HISR_REG, HISR_INT_STS_MSK);
+	mei_txe_br_reg_write(hw, HHISR_REG, IPC_HHIER_MSK);
+}
+
+/**
+ * mei_txe_intr_disable - disable all interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_intr_disable(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_br_reg_write(hw, HHIER_REG, 0);
+	mei_txe_br_reg_write(hw, HIER_REG, 0);
+}
+/**
+ * mei_txe_intr_enable - enable all interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_intr_enable(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_br_reg_write(hw, HHIER_REG, IPC_HHIER_MSK);
+	mei_txe_br_reg_write(hw, HIER_REG, HIER_INT_EN_MSK);
+}
+
+/**
+ * mei_txe_synchronize_irq - wait for pending IRQ handlers
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_synchronize_irq(struct mei_device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	synchronize_irq(pdev->irq);
+}
+
+/**
+ * mei_txe_pending_interrupts - check if there are pending interrupts
+ *	only Aliveness, Input ready, and output doorbell are of relevance
+ *
+ * @dev: the device structure
+ *
+ * Checks if there are pending interrupts
+ * only Aliveness, Readiness, Input ready, and Output doorbell are relevant
+ *
+ * Return: true if there are pending interrupts
+ */
+static bool mei_txe_pending_interrupts(struct mei_device *dev)
+{
+
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	bool ret = (hw->intr_cause & (TXE_INTR_READINESS |
+				      TXE_INTR_ALIVENESS |
+				      TXE_INTR_IN_READY  |
+				      TXE_INTR_OUT_DB));
+
+	if (ret) {
+		dev_dbg(dev->dev,
+			"Pending Interrupts InReady=%01d Readiness=%01d, Aliveness=%01d, OutDoor=%01d\n",
+			!!(hw->intr_cause & TXE_INTR_IN_READY),
+			!!(hw->intr_cause & TXE_INTR_READINESS),
+			!!(hw->intr_cause & TXE_INTR_ALIVENESS),
+			!!(hw->intr_cause & TXE_INTR_OUT_DB));
+	}
+	return ret;
+}
+
+/**
+ * mei_txe_input_payload_write - write a dword to the host buffer
+ *	at offset idx
+ *
+ * @dev: the device structure
+ * @idx: index in the host buffer
+ * @value: value
+ */
+static void mei_txe_input_payload_write(struct mei_device *dev,
+			unsigned long idx, u32 value)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_sec_reg_write(hw, SEC_IPC_INPUT_PAYLOAD_REG +
+			(idx * sizeof(u32)), value);
+}
+
+/**
+ * mei_txe_out_data_read - read dword from the device buffer
+ *	at offset idx
+ *
+ * @dev: the device structure
+ * @idx: index in the device buffer
+ *
+ * Return: register value at index
+ */
+static u32 mei_txe_out_data_read(const struct mei_device *dev,
+					unsigned long idx)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	return mei_txe_br_reg_read(hw,
+		BRIDGE_IPC_OUTPUT_PAYLOAD_REG + (idx * sizeof(u32)));
+}
+
+/* Readiness */
+
+/**
+ * mei_txe_readiness_set_host_rdy - set host readiness bit
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_readiness_set_host_rdy(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_br_reg_write(hw,
+		SICR_HOST_IPC_READINESS_REQ_REG,
+		SICR_HOST_IPC_READINESS_HOST_RDY);
+}
+
+/**
+ * mei_txe_readiness_clear - clear host readiness bit
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_readiness_clear(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_br_reg_write(hw, SICR_HOST_IPC_READINESS_REQ_REG,
+				SICR_HOST_IPC_READINESS_RDY_CLR);
+}
+/**
+ * mei_txe_readiness_get - Reads and returns
+ *	the HICR_SEC_IPC_READINESS register value
+ *
+ * @dev: the device structure
+ *
+ * Return: the HICR_SEC_IPC_READINESS register value
+ */
+static u32 mei_txe_readiness_get(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	return mei_txe_br_reg_read(hw, HICR_SEC_IPC_READINESS_REG);
+}
+
+
+/**
+ * mei_txe_readiness_is_sec_rdy - check readiness
+ *  for HICR_SEC_IPC_READINESS_SEC_RDY
+ *
+ * @readiness: cached readiness state
+ *
+ * Return: true if readiness bit is set
+ */
+static inline bool mei_txe_readiness_is_sec_rdy(u32 readiness)
+{
+	return !!(readiness & HICR_SEC_IPC_READINESS_SEC_RDY);
+}
+
+/**
+ * mei_txe_hw_is_ready - check if the hw is ready
+ *
+ * @dev: the device structure
+ *
+ * Return: true if sec is ready
+ */
+static bool mei_txe_hw_is_ready(struct mei_device *dev)
+{
+	u32 readiness =  mei_txe_readiness_get(dev);
+
+	return mei_txe_readiness_is_sec_rdy(readiness);
+}
+
+/**
+ * mei_txe_host_is_ready - check if the host is ready
+ *
+ * @dev: the device structure
+ *
+ * Return: true if host is ready
+ */
+static inline bool mei_txe_host_is_ready(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 reg = mei_txe_br_reg_read(hw, HICR_SEC_IPC_READINESS_REG);
+
+	return !!(reg & HICR_SEC_IPC_READINESS_HOST_RDY);
+}
+
+/**
+ * mei_txe_readiness_wait - wait till readiness settles
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and -ETIME on timeout
+ */
+static int mei_txe_readiness_wait(struct mei_device *dev)
+{
+	if (mei_txe_hw_is_ready(dev))
+		return 0;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_hw_ready, dev->recvd_hw_ready,
+			msecs_to_jiffies(SEC_RESET_WAIT_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+	if (!dev->recvd_hw_ready) {
+		dev_err(dev->dev, "wait for readiness failed\n");
+		return -ETIME;
+	}
+
+	dev->recvd_hw_ready = false;
+	return 0;
+}
+
+static const struct mei_fw_status mei_txe_fw_sts = {
+	.count = 2,
+	.status[0] = PCI_CFG_TXE_FW_STS0,
+	.status[1] = PCI_CFG_TXE_FW_STS1
+};
+
+/**
+ * mei_txe_fw_status - read fw status register from pci config space
+ *
+ * @dev: mei device
+ * @fw_status: fw status register values
+ *
+ * Return: 0 on success, error otherwise
+ */
+static int mei_txe_fw_status(struct mei_device *dev,
+			     struct mei_fw_status *fw_status)
+{
+	const struct mei_fw_status *fw_src = &mei_txe_fw_sts;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	int ret;
+	int i;
+
+	if (!fw_status)
+		return -EINVAL;
+
+	fw_status->count = fw_src->count;
+	for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) {
+		ret = pci_read_config_dword(pdev, fw_src->status[i],
+					    &fw_status->status[i]);
+		trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HSF_X",
+				       fw_src->status[i],
+				       fw_status->status[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ *  mei_txe_hw_config - configure hardware at the start of the devices
+ *
+ * @dev: the device structure
+ *
+ * Configure hardware at the start of the device should be done only
+ *   once at the device probe time
+ */
+static void mei_txe_hw_config(struct mei_device *dev)
+{
+
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	hw->aliveness = mei_txe_aliveness_get(dev);
+	hw->readiness = mei_txe_readiness_get(dev);
+
+	dev_dbg(dev->dev, "aliveness_resp = 0x%08x, readiness = 0x%08x.\n",
+		hw->aliveness, hw->readiness);
+}
+
+/**
+ * mei_txe_write - writes a message to device.
+ *
+ * @dev: the device structure
+ * @hdr: header of message
+ * @hdr_len: header length in bytes - must multiplication of a slot (4bytes)
+ * @data: payload
+ * @data_len: paylead length in bytes
+ *
+ * Return: 0 if success, < 0 - otherwise.
+ */
+static int mei_txe_write(struct mei_device *dev,
+			 const void *hdr, size_t hdr_len,
+			 const void *data, size_t data_len)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	unsigned long rem;
+	const u32 *reg_buf;
+	u32 slots = TXE_HBUF_DEPTH;
+	u32 dw_cnt;
+	unsigned long i, j;
+
+	if (WARN_ON(!hdr || !data || hdr_len & 0x3))
+		return -EINVAL;
+
+	dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM((struct mei_msg_hdr *)hdr));
+
+	dw_cnt = mei_data2slots(hdr_len + data_len);
+	if (dw_cnt > slots)
+		return -EMSGSIZE;
+
+	if (WARN(!hw->aliveness, "txe write: aliveness not asserted\n"))
+		return -EAGAIN;
+
+	/* Enable Input Ready Interrupt. */
+	mei_txe_input_ready_interrupt_enable(dev);
+
+	if (!mei_txe_is_input_ready(dev)) {
+		char fw_sts_str[MEI_FW_STATUS_STR_SZ];
+
+		mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ);
+		dev_err(dev->dev, "Input is not ready %s\n", fw_sts_str);
+		return -EAGAIN;
+	}
+
+	reg_buf = hdr;
+	for (i = 0; i < hdr_len / MEI_SLOT_SIZE; i++)
+		mei_txe_input_payload_write(dev, i, reg_buf[i]);
+
+	reg_buf = data;
+	for (j = 0; j < data_len / MEI_SLOT_SIZE; j++)
+		mei_txe_input_payload_write(dev, i + j, reg_buf[j]);
+
+	rem = data_len & 0x3;
+	if (rem > 0) {
+		u32 reg = 0;
+
+		memcpy(&reg, (const u8 *)data + data_len - rem, rem);
+		mei_txe_input_payload_write(dev, i + j, reg);
+	}
+
+	/* after each write the whole buffer is consumed */
+	hw->slots = 0;
+
+	/* Set Input-Doorbell */
+	mei_txe_input_doorbell_set(hw);
+
+	return 0;
+}
+
+/**
+ * mei_txe_hbuf_depth - mimics the me hbuf circular buffer
+ *
+ * @dev: the device structure
+ *
+ * Return: the TXE_HBUF_DEPTH
+ */
+static u32 mei_txe_hbuf_depth(const struct mei_device *dev)
+{
+	return TXE_HBUF_DEPTH;
+}
+
+/**
+ * mei_txe_hbuf_empty_slots - mimics the me hbuf circular buffer
+ *
+ * @dev: the device structure
+ *
+ * Return: always TXE_HBUF_DEPTH
+ */
+static int mei_txe_hbuf_empty_slots(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	return hw->slots;
+}
+
+/**
+ * mei_txe_count_full_read_slots - mimics the me device circular buffer
+ *
+ * @dev: the device structure
+ *
+ * Return: always buffer size in dwords count
+ */
+static int mei_txe_count_full_read_slots(struct mei_device *dev)
+{
+	/* read buffers has static size */
+	return TXE_HBUF_DEPTH;
+}
+
+/**
+ * mei_txe_read_hdr - read message header which is always in 4 first bytes
+ *
+ * @dev: the device structure
+ *
+ * Return: mei message header
+ */
+
+static u32 mei_txe_read_hdr(const struct mei_device *dev)
+{
+	return mei_txe_out_data_read(dev, 0);
+}
+/**
+ * mei_txe_read - reads a message from the txe device.
+ *
+ * @dev: the device structure
+ * @buf: message buffer will be written
+ * @len: message size will be read
+ *
+ * Return: -EINVAL on error wrong argument and 0 on success
+ */
+static int mei_txe_read(struct mei_device *dev,
+		unsigned char *buf, unsigned long len)
+{
+
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 *reg_buf, reg;
+	u32 rem;
+	u32 i;
+
+	if (WARN_ON(!buf || !len))
+		return -EINVAL;
+
+	reg_buf = (u32 *)buf;
+	rem = len & 0x3;
+
+	dev_dbg(dev->dev, "buffer-length = %lu buf[0]0x%08X\n",
+		len, mei_txe_out_data_read(dev, 0));
+
+	for (i = 0; i < len / MEI_SLOT_SIZE; i++) {
+		/* skip header: index starts from 1 */
+		reg = mei_txe_out_data_read(dev, i + 1);
+		dev_dbg(dev->dev, "buf[%d] = 0x%08X\n", i, reg);
+		*reg_buf++ = reg;
+	}
+
+	if (rem) {
+		reg = mei_txe_out_data_read(dev, i + 1);
+		memcpy(reg_buf, &reg, rem);
+	}
+
+	mei_txe_output_ready_set(hw);
+	return 0;
+}
+
+/**
+ * mei_txe_hw_reset - resets host and fw.
+ *
+ * @dev: the device structure
+ * @intr_enable: if interrupt should be enabled after reset.
+ *
+ * Return: 0 on success and < 0 in case of error
+ */
+static int mei_txe_hw_reset(struct mei_device *dev, bool intr_enable)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	u32 aliveness_req;
+	/*
+	 * read input doorbell to ensure consistency between  Bridge and SeC
+	 * return value might be garbage return
+	 */
+	(void)mei_txe_sec_reg_read_silent(hw, SEC_IPC_INPUT_DOORBELL_REG);
+
+	aliveness_req = mei_txe_aliveness_req_get(dev);
+	hw->aliveness = mei_txe_aliveness_get(dev);
+
+	/* Disable interrupts in this stage we will poll */
+	mei_txe_intr_disable(dev);
+
+	/*
+	 * If Aliveness Request and Aliveness Response are not equal then
+	 * wait for them to be equal
+	 * Since we might have interrupts disabled - poll for it
+	 */
+	if (aliveness_req != hw->aliveness)
+		if (mei_txe_aliveness_poll(dev, aliveness_req) < 0) {
+			dev_err(dev->dev, "wait for aliveness settle failed ... bailing out\n");
+			return -EIO;
+		}
+
+	/*
+	 * If Aliveness Request and Aliveness Response are set then clear them
+	 */
+	if (aliveness_req) {
+		mei_txe_aliveness_set(dev, 0);
+		if (mei_txe_aliveness_poll(dev, 0) < 0) {
+			dev_err(dev->dev, "wait for aliveness failed ... bailing out\n");
+			return -EIO;
+		}
+	}
+
+	/*
+	 * Set readiness RDY_CLR bit
+	 */
+	mei_txe_readiness_clear(dev);
+
+	return 0;
+}
+
+/**
+ * mei_txe_hw_start - start the hardware after reset
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_txe_hw_start(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	int ret;
+
+	u32 hisr;
+
+	/* bring back interrupts */
+	mei_txe_intr_enable(dev);
+
+	ret = mei_txe_readiness_wait(dev);
+	if (ret < 0) {
+		dev_err(dev->dev, "waiting for readiness failed\n");
+		return ret;
+	}
+
+	/*
+	 * If HISR.INT2_STS interrupt status bit is set then clear it.
+	 */
+	hisr = mei_txe_br_reg_read(hw, HISR_REG);
+	if (hisr & HISR_INT_2_STS)
+		mei_txe_br_reg_write(hw, HISR_REG, HISR_INT_2_STS);
+
+	/* Clear the interrupt cause of OutputDoorbell */
+	clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause);
+
+	ret = mei_txe_aliveness_set_sync(dev, 1);
+	if (ret < 0) {
+		dev_err(dev->dev, "wait for aliveness failed ... bailing out\n");
+		return ret;
+	}
+
+	pm_runtime_set_active(dev->dev);
+
+	/* enable input ready interrupts:
+	 * SEC_IPC_HOST_INT_MASK.IPC_INPUT_READY_INT_MASK
+	 */
+	mei_txe_input_ready_interrupt_enable(dev);
+
+
+	/*  Set the SICR_SEC_IPC_OUTPUT_STATUS.IPC_OUTPUT_READY bit */
+	mei_txe_output_ready_set(hw);
+
+	/* Set bit SICR_HOST_IPC_READINESS.HOST_RDY
+	 */
+	mei_txe_readiness_set_host_rdy(dev);
+
+	return 0;
+}
+
+/**
+ * mei_txe_check_and_ack_intrs - translate multi BAR interrupt into
+ *  single bit mask and acknowledge the interrupts
+ *
+ * @dev: the device structure
+ * @do_ack: acknowledge interrupts
+ *
+ * Return: true if found interrupts to process.
+ */
+static bool mei_txe_check_and_ack_intrs(struct mei_device *dev, bool do_ack)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 hisr;
+	u32 hhisr;
+	u32 ipc_isr;
+	u32 aliveness;
+	bool generated;
+
+	/* read interrupt registers */
+	hhisr = mei_txe_br_reg_read(hw, HHISR_REG);
+	generated = (hhisr & IPC_HHIER_MSK);
+	if (!generated)
+		goto out;
+
+	hisr = mei_txe_br_reg_read(hw, HISR_REG);
+
+	aliveness = mei_txe_aliveness_get(dev);
+	if (hhisr & IPC_HHIER_SEC && aliveness) {
+		ipc_isr = mei_txe_sec_reg_read_silent(hw,
+				SEC_IPC_HOST_INT_STATUS_REG);
+	} else {
+		ipc_isr = 0;
+		hhisr &= ~IPC_HHIER_SEC;
+	}
+
+	generated = generated ||
+		(hisr & HISR_INT_STS_MSK) ||
+		(ipc_isr & SEC_IPC_HOST_INT_STATUS_PENDING);
+
+	if (generated && do_ack) {
+		/* Save the interrupt causes */
+		hw->intr_cause |= hisr & HISR_INT_STS_MSK;
+		if (ipc_isr & SEC_IPC_HOST_INT_STATUS_IN_RDY)
+			hw->intr_cause |= TXE_INTR_IN_READY;
+
+
+		mei_txe_intr_disable(dev);
+		/* Clear the interrupts in hierarchy:
+		 * IPC and Bridge, than the High Level */
+		mei_txe_sec_reg_write_silent(hw,
+			SEC_IPC_HOST_INT_STATUS_REG, ipc_isr);
+		mei_txe_br_reg_write(hw, HISR_REG, hisr);
+		mei_txe_br_reg_write(hw, HHISR_REG, hhisr);
+	}
+
+out:
+	return generated;
+}
+
+/**
+ * mei_txe_irq_quick_handler - The ISR of the MEI device
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: IRQ_WAKE_THREAD if interrupt is designed for the device
+ *         IRQ_NONE otherwise
+ */
+irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id)
+{
+	struct mei_device *dev = dev_id;
+
+	if (mei_txe_check_and_ack_intrs(dev, true))
+		return IRQ_WAKE_THREAD;
+	return IRQ_NONE;
+}
+
+
+/**
+ * mei_txe_irq_thread_handler - txe interrupt thread
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: IRQ_HANDLED
+ */
+irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id)
+{
+	struct mei_device *dev = (struct mei_device *) dev_id;
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	struct list_head cmpl_list;
+	s32 slots;
+	int rets = 0;
+
+	dev_dbg(dev->dev, "irq thread: Interrupt Registers HHISR|HISR|SEC=%02X|%04X|%02X\n",
+		mei_txe_br_reg_read(hw, HHISR_REG),
+		mei_txe_br_reg_read(hw, HISR_REG),
+		mei_txe_sec_reg_read_silent(hw, SEC_IPC_HOST_INT_STATUS_REG));
+
+
+	/* initialize our complete list */
+	mutex_lock(&dev->device_lock);
+	INIT_LIST_HEAD(&cmpl_list);
+
+	if (pci_dev_msi_enabled(to_pci_dev(dev->dev)))
+		mei_txe_check_and_ack_intrs(dev, true);
+
+	/* show irq events */
+	mei_txe_pending_interrupts(dev);
+
+	hw->aliveness = mei_txe_aliveness_get(dev);
+	hw->readiness = mei_txe_readiness_get(dev);
+
+	/* Readiness:
+	 * Detection of TXE driver going through reset
+	 * or TXE driver resetting the HECI interface.
+	 */
+	if (test_and_clear_bit(TXE_INTR_READINESS_BIT, &hw->intr_cause)) {
+		dev_dbg(dev->dev, "Readiness Interrupt was received...\n");
+
+		/* Check if SeC is going through reset */
+		if (mei_txe_readiness_is_sec_rdy(hw->readiness)) {
+			dev_dbg(dev->dev, "we need to start the dev.\n");
+			dev->recvd_hw_ready = true;
+		} else {
+			dev->recvd_hw_ready = false;
+			if (dev->dev_state != MEI_DEV_RESETTING) {
+
+				dev_warn(dev->dev, "FW not ready: resetting.\n");
+				schedule_work(&dev->reset_work);
+				goto end;
+
+			}
+		}
+		wake_up(&dev->wait_hw_ready);
+	}
+
+	/************************************************************/
+	/* Check interrupt cause:
+	 * Aliveness: Detection of SeC acknowledge of host request that
+	 * it remain alive or host cancellation of that request.
+	 */
+
+	if (test_and_clear_bit(TXE_INTR_ALIVENESS_BIT, &hw->intr_cause)) {
+		/* Clear the interrupt cause */
+		dev_dbg(dev->dev,
+			"Aliveness Interrupt: Status: %d\n", hw->aliveness);
+		dev->pg_event = MEI_PG_EVENT_RECEIVED;
+		if (waitqueue_active(&hw->wait_aliveness_resp))
+			wake_up(&hw->wait_aliveness_resp);
+	}
+
+
+	/* Output Doorbell:
+	 * Detection of SeC having sent output to host
+	 */
+	slots = mei_count_full_read_slots(dev);
+	if (test_and_clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause)) {
+		/* Read from TXE */
+		rets = mei_irq_read_handler(dev, &cmpl_list, &slots);
+		if (rets &&
+		    (dev->dev_state != MEI_DEV_RESETTING &&
+		     dev->dev_state != MEI_DEV_POWER_DOWN)) {
+			dev_err(dev->dev,
+				"mei_irq_read_handler ret = %d.\n", rets);
+
+			schedule_work(&dev->reset_work);
+			goto end;
+		}
+	}
+	/* Input Ready: Detection if host can write to SeC */
+	if (test_and_clear_bit(TXE_INTR_IN_READY_BIT, &hw->intr_cause)) {
+		dev->hbuf_is_ready = true;
+		hw->slots = TXE_HBUF_DEPTH;
+	}
+
+	if (hw->aliveness && dev->hbuf_is_ready) {
+		/* get the real register value */
+		dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+		rets = mei_irq_write_handler(dev, &cmpl_list);
+		if (rets && rets != -EMSGSIZE)
+			dev_err(dev->dev, "mei_irq_write_handler ret = %d.\n",
+				rets);
+		dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+	}
+
+	mei_irq_compl_handler(dev, &cmpl_list);
+
+end:
+	dev_dbg(dev->dev, "interrupt thread end ret = %d\n", rets);
+
+	mutex_unlock(&dev->device_lock);
+
+	mei_enable_interrupts(dev);
+	return IRQ_HANDLED;
+}
+
+static const struct mei_hw_ops mei_txe_hw_ops = {
+
+	.host_is_ready = mei_txe_host_is_ready,
+
+	.fw_status = mei_txe_fw_status,
+	.pg_state = mei_txe_pg_state,
+
+	.hw_is_ready = mei_txe_hw_is_ready,
+	.hw_reset = mei_txe_hw_reset,
+	.hw_config = mei_txe_hw_config,
+	.hw_start = mei_txe_hw_start,
+
+	.pg_in_transition = mei_txe_pg_in_transition,
+	.pg_is_enabled = mei_txe_pg_is_enabled,
+
+	.intr_clear = mei_txe_intr_clear,
+	.intr_enable = mei_txe_intr_enable,
+	.intr_disable = mei_txe_intr_disable,
+	.synchronize_irq = mei_txe_synchronize_irq,
+
+	.hbuf_free_slots = mei_txe_hbuf_empty_slots,
+	.hbuf_is_ready = mei_txe_is_input_ready,
+	.hbuf_depth = mei_txe_hbuf_depth,
+
+	.write = mei_txe_write,
+
+	.rdbuf_full_slots = mei_txe_count_full_read_slots,
+	.read_hdr = mei_txe_read_hdr,
+
+	.read = mei_txe_read,
+
+};
+
+/**
+ * mei_txe_dev_init - allocates and initializes txe hardware specific structure
+ *
+ * @pdev: pci device
+ *
+ * Return: struct mei_device * on success or NULL
+ */
+struct mei_device *mei_txe_dev_init(struct pci_dev *pdev)
+{
+	struct mei_device *dev;
+	struct mei_txe_hw *hw;
+
+	dev = devm_kzalloc(&pdev->dev, sizeof(struct mei_device) +
+			   sizeof(struct mei_txe_hw), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	mei_device_init(dev, &pdev->dev, &mei_txe_hw_ops);
+
+	hw = to_txe_hw(dev);
+
+	init_waitqueue_head(&hw->wait_aliveness_resp);
+
+	return dev;
+}
+
+/**
+ * mei_txe_setup_satt2 - SATT2 configuration for DMA support.
+ *
+ * @dev:   the device structure
+ * @addr:  physical address start of the range
+ * @range: physical range size
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	u32 lo32 = lower_32_bits(addr);
+	u32 hi32 = upper_32_bits(addr);
+	u32 ctrl;
+
+	/* SATT is limited to 36 Bits */
+	if (hi32 & ~0xF)
+		return -EINVAL;
+
+	/* SATT has to be 16Byte aligned */
+	if (lo32 & 0xF)
+		return -EINVAL;
+
+	/* SATT range has to be 4Bytes aligned */
+	if (range & 0x4)
+		return -EINVAL;
+
+	/* SATT is limited to 32 MB range*/
+	if (range > SATT_RANGE_MAX)
+		return -EINVAL;
+
+	ctrl = SATT2_CTRL_VALID_MSK;
+	ctrl |= hi32  << SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT;
+
+	mei_txe_br_reg_write(hw, SATT2_SAP_SIZE_REG, range);
+	mei_txe_br_reg_write(hw, SATT2_BRG_BA_LSB_REG, lo32);
+	mei_txe_br_reg_write(hw, SATT2_CTRL_REG, ctrl);
+	dev_dbg(dev->dev, "SATT2: SAP_SIZE_OFFSET=0x%08X, BRG_BA_LSB_OFFSET=0x%08X, CTRL_OFFSET=0x%08X\n",
+		range, lo32, ctrl);
+
+	return 0;
+}
diff --git a/drivers/misc/mei/hw-txe.h b/drivers/misc/mei/hw-txe.h
new file mode 100644
index 000000000..e1e8b66d7
--- /dev/null
+++ b/drivers/misc/mei/hw-txe.h
@@ -0,0 +1,75 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _MEI_HW_TXE_H_
+#define _MEI_HW_TXE_H_
+
+#include <linux/irqreturn.h>
+
+#include "hw.h"
+#include "hw-txe-regs.h"
+
+#define MEI_TXI_RPM_TIMEOUT    500 /* ms */
+
+/* Flatten Hierarchy interrupt cause */
+#define TXE_INTR_READINESS_BIT  0 /* HISR_INT_0_STS */
+#define TXE_INTR_READINESS      HISR_INT_0_STS
+#define TXE_INTR_ALIVENESS_BIT  1 /* HISR_INT_1_STS */
+#define TXE_INTR_ALIVENESS      HISR_INT_1_STS
+#define TXE_INTR_OUT_DB_BIT     2 /* HISR_INT_2_STS */
+#define TXE_INTR_OUT_DB         HISR_INT_2_STS
+#define TXE_INTR_IN_READY_BIT   8 /* beyond HISR */
+#define TXE_INTR_IN_READY       BIT(8)
+
+/**
+ * struct mei_txe_hw - txe hardware specifics
+ *
+ * @mem_addr:            SeC and BRIDGE bars
+ * @aliveness:           aliveness (power gating) state of the hardware
+ * @readiness:           readiness state of the hardware
+ * @slots:               number of empty slots
+ * @wait_aliveness_resp: aliveness wait queue
+ * @intr_cause:          translated interrupt cause
+ */
+struct mei_txe_hw {
+	void __iomem * const *mem_addr;
+	u32 aliveness;
+	u32 readiness;
+	u32 slots;
+
+	wait_queue_head_t wait_aliveness_resp;
+
+	unsigned long intr_cause;
+};
+
+#define to_txe_hw(dev) (struct mei_txe_hw *)((dev)->hw)
+
+static inline struct mei_device *hw_txe_to_mei(struct mei_txe_hw *hw)
+{
+	return container_of((void *)hw, struct mei_device, hw);
+}
+
+struct mei_device *mei_txe_dev_init(struct pci_dev *pdev);
+
+irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id);
+irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id);
+
+int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req);
+
+int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range);
+
+
+#endif /* _MEI_HW_TXE_H_ */
diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h
new file mode 100644
index 000000000..656559257
--- /dev/null
+++ b/drivers/misc/mei/hw.h
@@ -0,0 +1,515 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _MEI_HW_TYPES_H_
+#define _MEI_HW_TYPES_H_
+
+#include <linux/uuid.h>
+
+/*
+ * Timeouts in Seconds
+ */
+#define MEI_HW_READY_TIMEOUT        2  /* Timeout on ready message */
+#define MEI_CONNECT_TIMEOUT         3  /* HPS: at least 2 seconds */
+
+#define MEI_CL_CONNECT_TIMEOUT     15  /* HPS: Client Connect Timeout */
+#define MEI_CLIENTS_INIT_TIMEOUT   15  /* HPS: Clients Enumeration Timeout */
+
+#define MEI_PGI_TIMEOUT             1  /* PG Isolation time response 1 sec */
+#define MEI_D0I3_TIMEOUT            5  /* D0i3 set/unset max response time */
+#define MEI_HBM_TIMEOUT             1  /* 1 second */
+
+/*
+ * MEI Version
+ */
+#define HBM_MINOR_VERSION                   0
+#define HBM_MAJOR_VERSION                   2
+
+/*
+ * MEI version with PGI support
+ */
+#define HBM_MINOR_VERSION_PGI               1
+#define HBM_MAJOR_VERSION_PGI               1
+
+/*
+ * MEI version with Dynamic clients support
+ */
+#define HBM_MINOR_VERSION_DC               0
+#define HBM_MAJOR_VERSION_DC               2
+
+/*
+ * MEI version with immediate reply to enum request support
+ */
+#define HBM_MINOR_VERSION_IE               0
+#define HBM_MAJOR_VERSION_IE               2
+
+/*
+ * MEI version with disconnect on connection timeout support
+ */
+#define HBM_MINOR_VERSION_DOT              0
+#define HBM_MAJOR_VERSION_DOT              2
+
+/*
+ * MEI version with notification support
+ */
+#define HBM_MINOR_VERSION_EV               0
+#define HBM_MAJOR_VERSION_EV               2
+
+/*
+ * MEI version with fixed address client support
+ */
+#define HBM_MINOR_VERSION_FA               0
+#define HBM_MAJOR_VERSION_FA               2
+
+/*
+ * MEI version with OS ver message support
+ */
+#define HBM_MINOR_VERSION_OS               0
+#define HBM_MAJOR_VERSION_OS               2
+
+/*
+ * MEI version with dma ring support
+ */
+#define HBM_MINOR_VERSION_DR               1
+#define HBM_MAJOR_VERSION_DR               2
+
+/* Host bus message command opcode */
+#define MEI_HBM_CMD_OP_MSK                  0x7f
+/* Host bus message command RESPONSE */
+#define MEI_HBM_CMD_RES_MSK                 0x80
+
+/*
+ * MEI Bus Message Command IDs
+ */
+#define HOST_START_REQ_CMD                  0x01
+#define HOST_START_RES_CMD                  0x81
+
+#define HOST_STOP_REQ_CMD                   0x02
+#define HOST_STOP_RES_CMD                   0x82
+
+#define ME_STOP_REQ_CMD                     0x03
+
+#define HOST_ENUM_REQ_CMD                   0x04
+#define HOST_ENUM_RES_CMD                   0x84
+
+#define HOST_CLIENT_PROPERTIES_REQ_CMD      0x05
+#define HOST_CLIENT_PROPERTIES_RES_CMD      0x85
+
+#define CLIENT_CONNECT_REQ_CMD              0x06
+#define CLIENT_CONNECT_RES_CMD              0x86
+
+#define CLIENT_DISCONNECT_REQ_CMD           0x07
+#define CLIENT_DISCONNECT_RES_CMD           0x87
+
+#define MEI_FLOW_CONTROL_CMD                0x08
+
+#define MEI_PG_ISOLATION_ENTRY_REQ_CMD      0x0a
+#define MEI_PG_ISOLATION_ENTRY_RES_CMD      0x8a
+#define MEI_PG_ISOLATION_EXIT_REQ_CMD       0x0b
+#define MEI_PG_ISOLATION_EXIT_RES_CMD       0x8b
+
+#define MEI_HBM_ADD_CLIENT_REQ_CMD          0x0f
+#define MEI_HBM_ADD_CLIENT_RES_CMD          0x8f
+
+#define MEI_HBM_NOTIFY_REQ_CMD              0x10
+#define MEI_HBM_NOTIFY_RES_CMD              0x90
+#define MEI_HBM_NOTIFICATION_CMD            0x11
+
+#define MEI_HBM_DMA_SETUP_REQ_CMD           0x12
+#define MEI_HBM_DMA_SETUP_RES_CMD           0x92
+
+/*
+ * MEI Stop Reason
+ * used by hbm_host_stop_request.reason
+ */
+enum mei_stop_reason_types {
+	DRIVER_STOP_REQUEST = 0x00,
+	DEVICE_D1_ENTRY = 0x01,
+	DEVICE_D2_ENTRY = 0x02,
+	DEVICE_D3_ENTRY = 0x03,
+	SYSTEM_S1_ENTRY = 0x04,
+	SYSTEM_S2_ENTRY = 0x05,
+	SYSTEM_S3_ENTRY = 0x06,
+	SYSTEM_S4_ENTRY = 0x07,
+	SYSTEM_S5_ENTRY = 0x08
+};
+
+
+/**
+ * enum mei_hbm_status  - mei host bus messages return values
+ *
+ * @MEI_HBMS_SUCCESS           : status success
+ * @MEI_HBMS_CLIENT_NOT_FOUND  : client not found
+ * @MEI_HBMS_ALREADY_EXISTS    : connection already established
+ * @MEI_HBMS_REJECTED          : connection is rejected
+ * @MEI_HBMS_INVALID_PARAMETER : invalid parameter
+ * @MEI_HBMS_NOT_ALLOWED       : operation not allowed
+ * @MEI_HBMS_ALREADY_STARTED   : system is already started
+ * @MEI_HBMS_NOT_STARTED       : system not started
+ *
+ * @MEI_HBMS_MAX               : sentinel
+ */
+enum mei_hbm_status {
+	MEI_HBMS_SUCCESS           = 0,
+	MEI_HBMS_CLIENT_NOT_FOUND  = 1,
+	MEI_HBMS_ALREADY_EXISTS    = 2,
+	MEI_HBMS_REJECTED          = 3,
+	MEI_HBMS_INVALID_PARAMETER = 4,
+	MEI_HBMS_NOT_ALLOWED       = 5,
+	MEI_HBMS_ALREADY_STARTED   = 6,
+	MEI_HBMS_NOT_STARTED       = 7,
+
+	MEI_HBMS_MAX
+};
+
+
+/*
+ * Client Connect Status
+ * used by hbm_client_connect_response.status
+ */
+enum mei_cl_connect_status {
+	MEI_CL_CONN_SUCCESS          = MEI_HBMS_SUCCESS,
+	MEI_CL_CONN_NOT_FOUND        = MEI_HBMS_CLIENT_NOT_FOUND,
+	MEI_CL_CONN_ALREADY_STARTED  = MEI_HBMS_ALREADY_EXISTS,
+	MEI_CL_CONN_OUT_OF_RESOURCES = MEI_HBMS_REJECTED,
+	MEI_CL_CONN_MESSAGE_SMALL    = MEI_HBMS_INVALID_PARAMETER,
+	MEI_CL_CONN_NOT_ALLOWED      = MEI_HBMS_NOT_ALLOWED,
+};
+
+/*
+ * Client Disconnect Status
+ */
+enum  mei_cl_disconnect_status {
+	MEI_CL_DISCONN_SUCCESS = MEI_HBMS_SUCCESS
+};
+
+/**
+ * struct mei_msg_hdr - MEI BUS Interface Section
+ *
+ * @me_addr: device address
+ * @host_addr: host address
+ * @length: message length
+ * @reserved: reserved
+ * @dma_ring: message is on dma ring
+ * @internal: message is internal
+ * @msg_complete: last packet of the message
+ */
+struct mei_msg_hdr {
+	u32 me_addr:8;
+	u32 host_addr:8;
+	u32 length:9;
+	u32 reserved:4;
+	u32 dma_ring:1;
+	u32 internal:1;
+	u32 msg_complete:1;
+} __packed;
+
+struct mei_bus_message {
+	u8 hbm_cmd;
+	u8 data[0];
+} __packed;
+
+/**
+ * struct hbm_cl_cmd - client specific host bus command
+ *	CONNECT, DISCONNECT, and FlOW CONTROL
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @data: generic data
+ */
+struct mei_hbm_cl_cmd {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 data;
+};
+
+struct hbm_version {
+	u8 minor_version;
+	u8 major_version;
+} __packed;
+
+struct hbm_host_version_request {
+	u8 hbm_cmd;
+	u8 reserved;
+	struct hbm_version host_version;
+} __packed;
+
+struct hbm_host_version_response {
+	u8 hbm_cmd;
+	u8 host_version_supported;
+	struct hbm_version me_max_version;
+} __packed;
+
+struct hbm_host_stop_request {
+	u8 hbm_cmd;
+	u8 reason;
+	u8 reserved[2];
+} __packed;
+
+struct hbm_host_stop_response {
+	u8 hbm_cmd;
+	u8 reserved[3];
+} __packed;
+
+struct hbm_me_stop_request {
+	u8 hbm_cmd;
+	u8 reason;
+	u8 reserved[2];
+} __packed;
+
+/**
+ * enum hbm_host_enum_flags - enumeration request flags (HBM version >= 2.0)
+ *
+ * @MEI_HBM_ENUM_F_ALLOW_ADD: allow dynamic clients add
+ * @MEI_HBM_ENUM_F_IMMEDIATE_ENUM: allow FW to send answer immediately
+ */
+enum hbm_host_enum_flags {
+	MEI_HBM_ENUM_F_ALLOW_ADD = BIT(0),
+	MEI_HBM_ENUM_F_IMMEDIATE_ENUM = BIT(1),
+};
+
+/**
+ * struct hbm_host_enum_request - enumeration request from host to fw
+ *
+ * @hbm_cmd : bus message command header
+ * @flags   : request flags
+ * @reserved: reserved
+ */
+struct hbm_host_enum_request {
+	u8 hbm_cmd;
+	u8 flags;
+	u8 reserved[2];
+} __packed;
+
+struct hbm_host_enum_response {
+	u8 hbm_cmd;
+	u8 reserved[3];
+	u8 valid_addresses[32];
+} __packed;
+
+struct mei_client_properties {
+	uuid_le protocol_name;
+	u8 protocol_version;
+	u8 max_number_of_connections;
+	u8 fixed_address;
+	u8 single_recv_buf;
+	u32 max_msg_length;
+} __packed;
+
+struct hbm_props_request {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 reserved[2];
+} __packed;
+
+struct hbm_props_response {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 status;
+	u8 reserved[1];
+	struct mei_client_properties client_properties;
+} __packed;
+
+/**
+ * struct hbm_add_client_request - request to add a client
+ *     might be sent by fw after enumeration has already completed
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @reserved: reserved
+ * @client_properties: client properties
+ */
+struct hbm_add_client_request {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 reserved[2];
+	struct mei_client_properties client_properties;
+} __packed;
+
+/**
+ * struct hbm_add_client_response - response to add a client
+ *     sent by the host to report client addition status to fw
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @status: if HBMS_SUCCESS then the client can now accept connections.
+ * @reserved: reserved
+ */
+struct hbm_add_client_response {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 status;
+	u8 reserved[1];
+} __packed;
+
+/**
+ * struct hbm_power_gate - power gate request/response
+ *
+ * @hbm_cmd: bus message command header
+ * @reserved: reserved
+ */
+struct hbm_power_gate {
+	u8 hbm_cmd;
+	u8 reserved[3];
+} __packed;
+
+/**
+ * struct hbm_client_connect_request - connect/disconnect request
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @reserved: reserved
+ */
+struct hbm_client_connect_request {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 reserved;
+} __packed;
+
+/**
+ * struct hbm_client_connect_response - connect/disconnect response
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @status: status of the request
+ */
+struct hbm_client_connect_response {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 status;
+} __packed;
+
+
+#define MEI_FC_MESSAGE_RESERVED_LENGTH           5
+
+struct hbm_flow_control {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 reserved[MEI_FC_MESSAGE_RESERVED_LENGTH];
+} __packed;
+
+#define MEI_HBM_NOTIFICATION_START 1
+#define MEI_HBM_NOTIFICATION_STOP  0
+/**
+ * struct hbm_notification_request - start/stop notification request
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @start:  start = 1 or stop = 0 asynchronous notifications
+ */
+struct hbm_notification_request {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 start;
+} __packed;
+
+/**
+ * struct hbm_notification_response - start/stop notification response
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: - address of the client in the driver
+ * @status: (mei_hbm_status) response status for the request
+ *  - MEI_HBMS_SUCCESS: successful stop/start
+ *  - MEI_HBMS_CLIENT_NOT_FOUND: if the connection could not be found.
+ *  - MEI_HBMS_ALREADY_STARTED: for start requests for a previously
+ *                         started notification.
+ *  - MEI_HBMS_NOT_STARTED: for stop request for a connected client for whom
+ *                         asynchronous notifications are currently disabled.
+ *
+ * @start:  start = 1 or stop = 0 asynchronous notifications
+ * @reserved: reserved
+ */
+struct hbm_notification_response {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 status;
+	u8 start;
+	u8 reserved[3];
+} __packed;
+
+/**
+ * struct hbm_notification - notification event
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr:  address of the client in ME
+ * @host_addr:  address of the client in the driver
+ * @reserved: reserved for alignment
+ */
+struct hbm_notification {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 reserved[1];
+} __packed;
+
+/**
+ * struct hbm_dma_mem_dscr - dma ring
+ *
+ * @addr_hi: the high 32bits of 64 bit address
+ * @addr_lo: the low  32bits of 64 bit address
+ * @size   : size in bytes (must be power of 2)
+ */
+struct hbm_dma_mem_dscr {
+	u32 addr_hi;
+	u32 addr_lo;
+	u32 size;
+} __packed;
+
+enum {
+	DMA_DSCR_HOST = 0,
+	DMA_DSCR_DEVICE = 1,
+	DMA_DSCR_CTRL = 2,
+	DMA_DSCR_NUM,
+};
+
+/**
+ * struct hbm_dma_setup_request - dma setup request
+ *
+ * @hbm_cmd: bus message command header
+ * @reserved: reserved for alignment
+ * @dma_dscr: dma descriptor for HOST, DEVICE, and CTRL
+ */
+struct hbm_dma_setup_request {
+	u8 hbm_cmd;
+	u8 reserved[3];
+	struct hbm_dma_mem_dscr dma_dscr[DMA_DSCR_NUM];
+} __packed;
+
+/**
+ * struct hbm_dma_setup_response - dma setup response
+ *
+ * @hbm_cmd: bus message command header
+ * @status: 0 on success; otherwise DMA setup failed.
+ * @reserved: reserved for alignment
+ */
+struct hbm_dma_setup_response {
+	u8 hbm_cmd;
+	u8 status;
+	u8 reserved[2];
+} __packed;
+
+#endif
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
new file mode 100644
index 000000000..4888ebc07
--- /dev/null
+++ b/drivers/misc/mei/init.c
@@ -0,0 +1,406 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+const char *mei_dev_state_str(int state)
+{
+#define MEI_DEV_STATE(state) case MEI_DEV_##state: return #state
+	switch (state) {
+	MEI_DEV_STATE(INITIALIZING);
+	MEI_DEV_STATE(INIT_CLIENTS);
+	MEI_DEV_STATE(ENABLED);
+	MEI_DEV_STATE(RESETTING);
+	MEI_DEV_STATE(DISABLED);
+	MEI_DEV_STATE(POWER_DOWN);
+	MEI_DEV_STATE(POWER_UP);
+	default:
+		return "unknown";
+	}
+#undef MEI_DEV_STATE
+}
+
+const char *mei_pg_state_str(enum mei_pg_state state)
+{
+#define MEI_PG_STATE(state) case MEI_PG_##state: return #state
+	switch (state) {
+	MEI_PG_STATE(OFF);
+	MEI_PG_STATE(ON);
+	default:
+		return "unknown";
+	}
+#undef MEI_PG_STATE
+}
+
+/**
+ * mei_fw_status2str - convert fw status registers to printable string
+ *
+ * @fw_status:  firmware status
+ * @buf: string buffer at minimal size MEI_FW_STATUS_STR_SZ
+ * @len: buffer len must be >= MEI_FW_STATUS_STR_SZ
+ *
+ * Return: number of bytes written or -EINVAL if buffer is to small
+ */
+ssize_t mei_fw_status2str(struct mei_fw_status *fw_status,
+			  char *buf, size_t len)
+{
+	ssize_t cnt = 0;
+	int i;
+
+	buf[0] = '\0';
+
+	if (len < MEI_FW_STATUS_STR_SZ)
+		return -EINVAL;
+
+	for (i = 0; i < fw_status->count; i++)
+		cnt += scnprintf(buf + cnt, len - cnt, "%08X ",
+				fw_status->status[i]);
+
+	/* drop last space */
+	buf[cnt] = '\0';
+	return cnt;
+}
+EXPORT_SYMBOL_GPL(mei_fw_status2str);
+
+/**
+ * mei_cancel_work - Cancel mei background jobs
+ *
+ * @dev: the device structure
+ */
+void mei_cancel_work(struct mei_device *dev)
+{
+	cancel_work_sync(&dev->reset_work);
+	cancel_work_sync(&dev->bus_rescan_work);
+
+	cancel_delayed_work_sync(&dev->timer_work);
+}
+EXPORT_SYMBOL_GPL(mei_cancel_work);
+
+/**
+ * mei_reset - resets host and fw.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success or < 0 if the reset hasn't succeeded
+ */
+int mei_reset(struct mei_device *dev)
+{
+	enum mei_dev_state state = dev->dev_state;
+	bool interrupts_enabled;
+	int ret;
+
+	if (state != MEI_DEV_INITIALIZING &&
+	    state != MEI_DEV_DISABLED &&
+	    state != MEI_DEV_POWER_DOWN &&
+	    state != MEI_DEV_POWER_UP) {
+		char fw_sts_str[MEI_FW_STATUS_STR_SZ];
+
+		mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ);
+		dev_warn(dev->dev, "unexpected reset: dev_state = %s fw status = %s\n",
+			 mei_dev_state_str(state), fw_sts_str);
+	}
+
+	mei_clear_interrupts(dev);
+
+	/* we're already in reset, cancel the init timer
+	 * if the reset was called due the hbm protocol error
+	 * we need to call it before hw start
+	 * so the hbm watchdog won't kick in
+	 */
+	mei_hbm_idle(dev);
+
+	/* enter reset flow */
+	interrupts_enabled = state != MEI_DEV_POWER_DOWN;
+	dev->dev_state = MEI_DEV_RESETTING;
+
+	dev->reset_count++;
+	if (dev->reset_count > MEI_MAX_CONSEC_RESET) {
+		dev_err(dev->dev, "reset: reached maximal consecutive resets: disabling the device\n");
+		dev->dev_state = MEI_DEV_DISABLED;
+		return -ENODEV;
+	}
+
+	ret = mei_hw_reset(dev, interrupts_enabled);
+	/* fall through and remove the sw state even if hw reset has failed */
+
+	/* no need to clean up software state in case of power up */
+	if (state != MEI_DEV_INITIALIZING && state != MEI_DEV_POWER_UP)
+		mei_cl_all_disconnect(dev);
+
+	mei_hbm_reset(dev);
+
+	dev->rd_msg_hdr = 0;
+
+	if (ret) {
+		dev_err(dev->dev, "hw_reset failed ret = %d\n", ret);
+		return ret;
+	}
+
+	if (state == MEI_DEV_POWER_DOWN) {
+		dev_dbg(dev->dev, "powering down: end of reset\n");
+		dev->dev_state = MEI_DEV_DISABLED;
+		return 0;
+	}
+
+	ret = mei_hw_start(dev);
+	if (ret) {
+		dev_err(dev->dev, "hw_start failed ret = %d\n", ret);
+		return ret;
+	}
+
+	dev_dbg(dev->dev, "link is established start sending messages.\n");
+
+	dev->dev_state = MEI_DEV_INIT_CLIENTS;
+	ret = mei_hbm_start_req(dev);
+	if (ret) {
+		dev_err(dev->dev, "hbm_start failed ret = %d\n", ret);
+		dev->dev_state = MEI_DEV_RESETTING;
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_reset);
+
+/**
+ * mei_start - initializes host and fw to start work.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_start(struct mei_device *dev)
+{
+	int ret;
+
+	mutex_lock(&dev->device_lock);
+
+	/* acknowledge interrupt and stop interrupts */
+	mei_clear_interrupts(dev);
+
+	mei_hw_config(dev);
+
+	dev_dbg(dev->dev, "reset in start the mei device.\n");
+
+	dev->reset_count = 0;
+	do {
+		dev->dev_state = MEI_DEV_INITIALIZING;
+		ret = mei_reset(dev);
+
+		if (ret == -ENODEV || dev->dev_state == MEI_DEV_DISABLED) {
+			dev_err(dev->dev, "reset failed ret = %d", ret);
+			goto err;
+		}
+	} while (ret);
+
+	if (mei_hbm_start_wait(dev)) {
+		dev_err(dev->dev, "HBM haven't started");
+		goto err;
+	}
+
+	if (!mei_host_is_ready(dev)) {
+		dev_err(dev->dev, "host is not ready.\n");
+		goto err;
+	}
+
+	if (!mei_hw_is_ready(dev)) {
+		dev_err(dev->dev, "ME is not ready.\n");
+		goto err;
+	}
+
+	if (!mei_hbm_version_is_supported(dev)) {
+		dev_dbg(dev->dev, "MEI start failed.\n");
+		goto err;
+	}
+
+	dev_dbg(dev->dev, "link layer has been established.\n");
+
+	mutex_unlock(&dev->device_lock);
+	return 0;
+err:
+	dev_err(dev->dev, "link layer initialization failed.\n");
+	dev->dev_state = MEI_DEV_DISABLED;
+	mutex_unlock(&dev->device_lock);
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(mei_start);
+
+/**
+ * mei_restart - restart device after suspend
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success or -ENODEV if the restart hasn't succeeded
+ */
+int mei_restart(struct mei_device *dev)
+{
+	int err;
+
+	mutex_lock(&dev->device_lock);
+
+	dev->dev_state = MEI_DEV_POWER_UP;
+	dev->reset_count = 0;
+
+	err = mei_reset(dev);
+
+	mutex_unlock(&dev->device_lock);
+
+	if (err == -ENODEV || dev->dev_state == MEI_DEV_DISABLED) {
+		dev_err(dev->dev, "device disabled = %d\n", err);
+		return -ENODEV;
+	}
+
+	/* try to start again */
+	if (err)
+		schedule_work(&dev->reset_work);
+
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_restart);
+
+static void mei_reset_work(struct work_struct *work)
+{
+	struct mei_device *dev =
+		container_of(work, struct mei_device,  reset_work);
+	int ret;
+
+	mei_clear_interrupts(dev);
+	mei_synchronize_irq(dev);
+
+	mutex_lock(&dev->device_lock);
+
+	ret = mei_reset(dev);
+
+	mutex_unlock(&dev->device_lock);
+
+	if (dev->dev_state == MEI_DEV_DISABLED) {
+		dev_err(dev->dev, "device disabled = %d\n", ret);
+		return;
+	}
+
+	/* retry reset in case of failure */
+	if (ret)
+		schedule_work(&dev->reset_work);
+}
+
+void mei_stop(struct mei_device *dev)
+{
+	dev_dbg(dev->dev, "stopping the device.\n");
+
+	mutex_lock(&dev->device_lock);
+	dev->dev_state = MEI_DEV_POWER_DOWN;
+	mutex_unlock(&dev->device_lock);
+	mei_cl_bus_remove_devices(dev);
+
+	mei_cancel_work(dev);
+
+	mei_clear_interrupts(dev);
+	mei_synchronize_irq(dev);
+
+	mutex_lock(&dev->device_lock);
+
+	mei_reset(dev);
+	/* move device to disabled state unconditionally */
+	dev->dev_state = MEI_DEV_DISABLED;
+
+	mutex_unlock(&dev->device_lock);
+}
+EXPORT_SYMBOL_GPL(mei_stop);
+
+/**
+ * mei_write_is_idle - check if the write queues are idle
+ *
+ * @dev: the device structure
+ *
+ * Return: true of there is no pending write
+ */
+bool mei_write_is_idle(struct mei_device *dev)
+{
+	bool idle = (dev->dev_state == MEI_DEV_ENABLED &&
+		list_empty(&dev->ctrl_wr_list) &&
+		list_empty(&dev->write_list)   &&
+		list_empty(&dev->write_waiting_list));
+
+	dev_dbg(dev->dev, "write pg: is idle[%d] state=%s ctrl=%01d write=%01d wwait=%01d\n",
+		idle,
+		mei_dev_state_str(dev->dev_state),
+		list_empty(&dev->ctrl_wr_list),
+		list_empty(&dev->write_list),
+		list_empty(&dev->write_waiting_list));
+
+	return idle;
+}
+EXPORT_SYMBOL_GPL(mei_write_is_idle);
+
+/**
+ * mei_device_init  -- initialize mei_device structure
+ *
+ * @dev: the mei device
+ * @device: the device structure
+ * @hw_ops: hw operations
+ */
+void mei_device_init(struct mei_device *dev,
+		     struct device *device,
+		     const struct mei_hw_ops *hw_ops)
+{
+	/* setup our list array */
+	INIT_LIST_HEAD(&dev->file_list);
+	INIT_LIST_HEAD(&dev->device_list);
+	INIT_LIST_HEAD(&dev->me_clients);
+	mutex_init(&dev->device_lock);
+	init_rwsem(&dev->me_clients_rwsem);
+	mutex_init(&dev->cl_bus_lock);
+	init_waitqueue_head(&dev->wait_hw_ready);
+	init_waitqueue_head(&dev->wait_pg);
+	init_waitqueue_head(&dev->wait_hbm_start);
+	dev->dev_state = MEI_DEV_INITIALIZING;
+	dev->reset_count = 0;
+
+	INIT_LIST_HEAD(&dev->write_list);
+	INIT_LIST_HEAD(&dev->write_waiting_list);
+	INIT_LIST_HEAD(&dev->ctrl_wr_list);
+	INIT_LIST_HEAD(&dev->ctrl_rd_list);
+	dev->tx_queue_limit = MEI_TX_QUEUE_LIMIT_DEFAULT;
+
+	INIT_DELAYED_WORK(&dev->timer_work, mei_timer);
+	INIT_WORK(&dev->reset_work, mei_reset_work);
+	INIT_WORK(&dev->bus_rescan_work, mei_cl_bus_rescan_work);
+
+	bitmap_zero(dev->host_clients_map, MEI_CLIENTS_MAX);
+	dev->open_handle_count = 0;
+
+	/*
+	 * Reserving the first client ID
+	 * 0: Reserved for MEI Bus Message communications
+	 */
+	bitmap_set(dev->host_clients_map, 0, 1);
+
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	dev->ops      = hw_ops;
+	dev->dev      = device;
+}
+EXPORT_SYMBOL_GPL(mei_device_init);
+
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
new file mode 100644
index 000000000..66f4d12d0
--- /dev/null
+++ b/drivers/misc/mei/interrupt.c
@@ -0,0 +1,535 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+
+#include <linux/export.h>
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+
+/**
+ * mei_irq_compl_handler - dispatch complete handlers
+ *	for the completed callbacks
+ *
+ * @dev: mei device
+ * @cmpl_list: list of completed cbs
+ */
+void mei_irq_compl_handler(struct mei_device *dev, struct list_head *cmpl_list)
+{
+	struct mei_cl_cb *cb, *next;
+	struct mei_cl *cl;
+
+	list_for_each_entry_safe(cb, next, cmpl_list, list) {
+		cl = cb->cl;
+		list_del_init(&cb->list);
+
+		dev_dbg(dev->dev, "completing call back.\n");
+		mei_cl_complete(cl, cb);
+	}
+}
+EXPORT_SYMBOL_GPL(mei_irq_compl_handler);
+
+/**
+ * mei_cl_hbm_equal - check if hbm is addressed to the client
+ *
+ * @cl: host client
+ * @mei_hdr: header of mei client message
+ *
+ * Return: true if matches, false otherwise
+ */
+static inline int mei_cl_hbm_equal(struct mei_cl *cl,
+			struct mei_msg_hdr *mei_hdr)
+{
+	return  mei_cl_host_addr(cl) == mei_hdr->host_addr &&
+		mei_cl_me_id(cl) == mei_hdr->me_addr;
+}
+
+/**
+ * mei_irq_discard_msg  - discard received message
+ *
+ * @dev: mei device
+ * @hdr: message header
+ */
+static void mei_irq_discard_msg(struct mei_device *dev, struct mei_msg_hdr *hdr)
+{
+	/*
+	 * no need to check for size as it is guarantied
+	 * that length fits into rd_msg_buf
+	 */
+	mei_read_slots(dev, dev->rd_msg_buf, hdr->length);
+	dev_dbg(dev->dev, "discarding message " MEI_HDR_FMT "\n",
+		MEI_HDR_PRM(hdr));
+}
+
+/**
+ * mei_cl_irq_read_msg - process client message
+ *
+ * @cl: reading client
+ * @mei_hdr: header of mei client message
+ * @cmpl_list: completion list
+ *
+ * Return: always 0
+ */
+static int mei_cl_irq_read_msg(struct mei_cl *cl,
+			       struct mei_msg_hdr *mei_hdr,
+			       struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	struct mei_cl_cb *cb;
+	size_t buf_sz;
+
+	cb = list_first_entry_or_null(&cl->rd_pending, struct mei_cl_cb, list);
+	if (!cb) {
+		if (!mei_cl_is_fixed_address(cl)) {
+			cl_err(dev, cl, "pending read cb not found\n");
+			goto discard;
+		}
+		cb = mei_cl_alloc_cb(cl, mei_cl_mtu(cl), MEI_FOP_READ, cl->fp);
+		if (!cb)
+			goto discard;
+		list_add_tail(&cb->list, &cl->rd_pending);
+	}
+
+	if (!mei_cl_is_connected(cl)) {
+		cl_dbg(dev, cl, "not connected\n");
+		cb->status = -ENODEV;
+		goto discard;
+	}
+
+	buf_sz = mei_hdr->length + cb->buf_idx;
+	/* catch for integer overflow */
+	if (buf_sz < cb->buf_idx) {
+		cl_err(dev, cl, "message is too big len %d idx %zu\n",
+		       mei_hdr->length, cb->buf_idx);
+		cb->status = -EMSGSIZE;
+		goto discard;
+	}
+
+	if (cb->buf.size < buf_sz) {
+		cl_dbg(dev, cl, "message overflow. size %zu len %d idx %zu\n",
+			cb->buf.size, mei_hdr->length, cb->buf_idx);
+		cb->status = -EMSGSIZE;
+		goto discard;
+	}
+
+	mei_read_slots(dev, cb->buf.data + cb->buf_idx, mei_hdr->length);
+
+	cb->buf_idx += mei_hdr->length;
+
+	if (mei_hdr->msg_complete) {
+		cl_dbg(dev, cl, "completed read length = %zu\n", cb->buf_idx);
+		list_move_tail(&cb->list, cmpl_list);
+	} else {
+		pm_runtime_mark_last_busy(dev->dev);
+		pm_request_autosuspend(dev->dev);
+	}
+
+	return 0;
+
+discard:
+	if (cb)
+		list_move_tail(&cb->list, cmpl_list);
+	mei_irq_discard_msg(dev, mei_hdr);
+	return 0;
+}
+
+/**
+ * mei_cl_irq_disconnect_rsp - send disconnection response message
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_irq_disconnect_rsp(struct mei_cl *cl, struct mei_cl_cb *cb,
+				     struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_response));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_hbm_cl_disconnect_rsp(dev, cl);
+	list_move_tail(&cb->list, cmpl_list);
+
+	return ret;
+}
+
+/**
+ * mei_cl_irq_read - processes client read related operation from the
+ *	interrupt thread context - request for flow control credits
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb,
+			   struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	if (!list_empty(&cl->rd_pending))
+		return 0;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_flow_control));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_hbm_cl_flow_control_req(dev, cl);
+	if (ret) {
+		cl->status = ret;
+		cb->buf_idx = 0;
+		list_move_tail(&cb->list, cmpl_list);
+		return ret;
+	}
+
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_request_autosuspend(dev->dev);
+
+	list_move_tail(&cb->list, &cl->rd_pending);
+
+	return 0;
+}
+
+static inline bool hdr_is_hbm(struct mei_msg_hdr *mei_hdr)
+{
+	return mei_hdr->host_addr == 0 && mei_hdr->me_addr == 0;
+}
+
+static inline bool hdr_is_fixed(struct mei_msg_hdr *mei_hdr)
+{
+	return mei_hdr->host_addr == 0 && mei_hdr->me_addr != 0;
+}
+
+static inline int hdr_is_valid(u32 msg_hdr)
+{
+	struct mei_msg_hdr *mei_hdr;
+
+	mei_hdr = (struct mei_msg_hdr *)&msg_hdr;
+	if (!msg_hdr || mei_hdr->reserved)
+		return -EBADMSG;
+
+	return 0;
+}
+
+/**
+ * mei_irq_read_handler - bottom half read routine after ISR to
+ * handle the read processing.
+ *
+ * @dev: the device structure
+ * @cmpl_list: An instance of our list structure
+ * @slots: slots to read.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_irq_read_handler(struct mei_device *dev,
+			 struct list_head *cmpl_list, s32 *slots)
+{
+	struct mei_msg_hdr *mei_hdr;
+	struct mei_cl *cl;
+	int ret;
+
+	if (!dev->rd_msg_hdr) {
+		dev->rd_msg_hdr = mei_read_hdr(dev);
+		(*slots)--;
+		dev_dbg(dev->dev, "slots =%08x.\n", *slots);
+
+		ret = hdr_is_valid(dev->rd_msg_hdr);
+		if (ret) {
+			dev_err(dev->dev, "corrupted message header 0x%08X\n",
+				dev->rd_msg_hdr);
+			goto end;
+		}
+	}
+
+	mei_hdr = (struct mei_msg_hdr *)&dev->rd_msg_hdr;
+	dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM(mei_hdr));
+
+	if (mei_slots2data(*slots) < mei_hdr->length) {
+		dev_err(dev->dev, "less data available than length=%08x.\n",
+				*slots);
+		/* we can't read the message */
+		ret = -ENODATA;
+		goto end;
+	}
+
+	/*  HBM message */
+	if (hdr_is_hbm(mei_hdr)) {
+		ret = mei_hbm_dispatch(dev, mei_hdr);
+		if (ret) {
+			dev_dbg(dev->dev, "mei_hbm_dispatch failed ret = %d\n",
+					ret);
+			goto end;
+		}
+		goto reset_slots;
+	}
+
+	/* find recipient cl */
+	list_for_each_entry(cl, &dev->file_list, link) {
+		if (mei_cl_hbm_equal(cl, mei_hdr)) {
+			cl_dbg(dev, cl, "got a message\n");
+			break;
+		}
+	}
+
+	/* if no recipient cl was found we assume corrupted header */
+	if (&cl->link == &dev->file_list) {
+		/* A message for not connected fixed address clients
+		 * should be silently discarded
+		 * On power down client may be force cleaned,
+		 * silently discard such messages
+		 */
+		if (hdr_is_fixed(mei_hdr) ||
+		    dev->dev_state == MEI_DEV_POWER_DOWN) {
+			mei_irq_discard_msg(dev, mei_hdr);
+			ret = 0;
+			goto reset_slots;
+		}
+		dev_err(dev->dev, "no destination client found 0x%08X\n",
+				dev->rd_msg_hdr);
+		ret = -EBADMSG;
+		goto end;
+	}
+
+	ret = mei_cl_irq_read_msg(cl, mei_hdr, cmpl_list);
+
+
+reset_slots:
+	/* reset the number of slots and header */
+	*slots = mei_count_full_read_slots(dev);
+	dev->rd_msg_hdr = 0;
+
+	if (*slots == -EOVERFLOW) {
+		/* overflow - reset */
+		dev_err(dev->dev, "resetting due to slots overflow.\n");
+		/* set the event since message has been read */
+		ret = -ERANGE;
+		goto end;
+	}
+end:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mei_irq_read_handler);
+
+
+/**
+ * mei_irq_write_handler -  dispatch write requests
+ *  after irq received
+ *
+ * @dev: the device structure
+ * @cmpl_list: An instance of our list structure
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_irq_write_handler(struct mei_device *dev, struct list_head *cmpl_list)
+{
+
+	struct mei_cl *cl;
+	struct mei_cl_cb *cb, *next;
+	s32 slots;
+	int ret;
+
+
+	if (!mei_hbuf_acquire(dev))
+		return 0;
+
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if (slots == 0)
+		return -EMSGSIZE;
+
+	/* complete all waiting for write CB */
+	dev_dbg(dev->dev, "complete all waiting for write cb.\n");
+
+	list_for_each_entry_safe(cb, next, &dev->write_waiting_list, list) {
+		cl = cb->cl;
+
+		cl->status = 0;
+		cl_dbg(dev, cl, "MEI WRITE COMPLETE\n");
+		cl->writing_state = MEI_WRITE_COMPLETE;
+		list_move_tail(&cb->list, cmpl_list);
+	}
+
+	/* complete control write list CB */
+	dev_dbg(dev->dev, "complete control write list cb.\n");
+	list_for_each_entry_safe(cb, next, &dev->ctrl_wr_list, list) {
+		cl = cb->cl;
+		switch (cb->fop_type) {
+		case MEI_FOP_DISCONNECT:
+			/* send disconnect message */
+			ret = mei_cl_irq_disconnect(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+
+			break;
+		case MEI_FOP_READ:
+			/* send flow control message */
+			ret = mei_cl_irq_read(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+
+			break;
+		case MEI_FOP_CONNECT:
+			/* connect message */
+			ret = mei_cl_irq_connect(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+
+			break;
+		case MEI_FOP_DISCONNECT_RSP:
+			/* send disconnect resp */
+			ret = mei_cl_irq_disconnect_rsp(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+			break;
+
+		case MEI_FOP_NOTIFY_START:
+		case MEI_FOP_NOTIFY_STOP:
+			ret = mei_cl_irq_notify(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+			break;
+		default:
+			BUG();
+		}
+
+	}
+	/* complete  write list CB */
+	dev_dbg(dev->dev, "complete write list cb.\n");
+	list_for_each_entry_safe(cb, next, &dev->write_list, list) {
+		cl = cb->cl;
+		ret = mei_cl_irq_write(cl, cb, cmpl_list);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_irq_write_handler);
+
+
+/**
+ * mei_connect_timeout  - connect/disconnect timeouts
+ *
+ * @cl: host client
+ */
+static void mei_connect_timeout(struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+
+	if (cl->state == MEI_FILE_CONNECTING) {
+		if (dev->hbm_f_dot_supported) {
+			cl->state = MEI_FILE_DISCONNECT_REQUIRED;
+			wake_up(&cl->wait);
+			return;
+		}
+	}
+	mei_reset(dev);
+}
+
+#define MEI_STALL_TIMER_FREQ (2 * HZ)
+/**
+ * mei_schedule_stall_timer - re-arm stall_timer work
+ *
+ * Schedule stall timer
+ *
+ * @dev: the device structure
+ */
+void mei_schedule_stall_timer(struct mei_device *dev)
+{
+	schedule_delayed_work(&dev->timer_work, MEI_STALL_TIMER_FREQ);
+}
+
+/**
+ * mei_timer - timer function.
+ *
+ * @work: pointer to the work_struct structure
+ *
+ */
+void mei_timer(struct work_struct *work)
+{
+	struct mei_cl *cl;
+	struct mei_device *dev = container_of(work,
+					struct mei_device, timer_work.work);
+	bool reschedule_timer = false;
+
+	mutex_lock(&dev->device_lock);
+
+	/* Catch interrupt stalls during HBM init handshake */
+	if (dev->dev_state == MEI_DEV_INIT_CLIENTS &&
+	    dev->hbm_state != MEI_HBM_IDLE) {
+
+		if (dev->init_clients_timer) {
+			if (--dev->init_clients_timer == 0) {
+				dev_err(dev->dev, "timer: init clients timeout hbm_state = %d.\n",
+					dev->hbm_state);
+				mei_reset(dev);
+				goto out;
+			}
+			reschedule_timer = true;
+		}
+	}
+
+	if (dev->dev_state != MEI_DEV_ENABLED)
+		goto out;
+
+	/*** connect/disconnect timeouts ***/
+	list_for_each_entry(cl, &dev->file_list, link) {
+		if (cl->timer_count) {
+			if (--cl->timer_count == 0) {
+				dev_err(dev->dev, "timer: connect/disconnect timeout.\n");
+				mei_connect_timeout(cl);
+				goto out;
+			}
+			reschedule_timer = true;
+		}
+	}
+
+out:
+	if (dev->dev_state != MEI_DEV_DISABLED && reschedule_timer)
+		mei_schedule_stall_timer(dev);
+
+	mutex_unlock(&dev->device_lock);
+}
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
new file mode 100644
index 000000000..87281b369
--- /dev/null
+++ b/drivers/misc/mei/main.c
@@ -0,0 +1,1022 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2018, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/sched/signal.h>
+#include <linux/uuid.h>
+#include <linux/compat.h>
+#include <linux/jiffies.h>
+#include <linux/interrupt.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+/**
+ * mei_open - the open function
+ *
+ * @inode: pointer to inode structure
+ * @file: pointer to file structure
+ *
+ * Return: 0 on success, <0 on error
+ */
+static int mei_open(struct inode *inode, struct file *file)
+{
+	struct mei_device *dev;
+	struct mei_cl *cl;
+
+	int err;
+
+	dev = container_of(inode->i_cdev, struct mei_device, cdev);
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	if (dev->dev_state != MEI_DEV_ENABLED) {
+		dev_dbg(dev->dev, "dev_state != MEI_ENABLED  dev_state = %s\n",
+		    mei_dev_state_str(dev->dev_state));
+		err = -ENODEV;
+		goto err_unlock;
+	}
+
+	cl = mei_cl_alloc_linked(dev);
+	if (IS_ERR(cl)) {
+		err = PTR_ERR(cl);
+		goto err_unlock;
+	}
+
+	cl->fp = file;
+	file->private_data = cl;
+
+	mutex_unlock(&dev->device_lock);
+
+	return nonseekable_open(inode, file);
+
+err_unlock:
+	mutex_unlock(&dev->device_lock);
+	return err;
+}
+
+/**
+ * mei_release - the release function
+ *
+ * @inode: pointer to inode structure
+ * @file: pointer to file structure
+ *
+ * Return: 0 on success, <0 on error
+ */
+static int mei_release(struct inode *inode, struct file *file)
+{
+	struct mei_cl *cl = file->private_data;
+	struct mei_device *dev;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	mutex_lock(&dev->device_lock);
+
+	rets = mei_cl_disconnect(cl);
+
+	mei_cl_flush_queues(cl, file);
+	cl_dbg(dev, cl, "removing\n");
+
+	mei_cl_unlink(cl);
+
+	file->private_data = NULL;
+
+	kfree(cl);
+
+	mutex_unlock(&dev->device_lock);
+	return rets;
+}
+
+
+/**
+ * mei_read - the read function.
+ *
+ * @file: pointer to file structure
+ * @ubuf: pointer to user buffer
+ * @length: buffer length
+ * @offset: data offset in buffer
+ *
+ * Return: >=0 data length on success , <0 on error
+ */
+static ssize_t mei_read(struct file *file, char __user *ubuf,
+			size_t length, loff_t *offset)
+{
+	struct mei_cl *cl = file->private_data;
+	struct mei_device *dev;
+	struct mei_cl_cb *cb = NULL;
+	bool nonblock = !!(file->f_flags & O_NONBLOCK);
+	ssize_t rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+
+	mutex_lock(&dev->device_lock);
+	if (dev->dev_state != MEI_DEV_ENABLED) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	if (length == 0) {
+		rets = 0;
+		goto out;
+	}
+
+	if (ubuf == NULL) {
+		rets = -EMSGSIZE;
+		goto out;
+	}
+
+	cb = mei_cl_read_cb(cl, file);
+	if (cb)
+		goto copy_buffer;
+
+	if (*offset > 0)
+		*offset = 0;
+
+	rets = mei_cl_read_start(cl, length, file);
+	if (rets && rets != -EBUSY) {
+		cl_dbg(dev, cl, "mei start read failure status = %zd\n", rets);
+		goto out;
+	}
+
+	if (nonblock) {
+		rets = -EAGAIN;
+		goto out;
+	}
+
+	mutex_unlock(&dev->device_lock);
+	if (wait_event_interruptible(cl->rx_wait,
+				     !list_empty(&cl->rd_completed) ||
+				     !mei_cl_is_connected(cl))) {
+		if (signal_pending(current))
+			return -EINTR;
+		return -ERESTARTSYS;
+	}
+	mutex_lock(&dev->device_lock);
+
+	if (!mei_cl_is_connected(cl)) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	cb = mei_cl_read_cb(cl, file);
+	if (!cb) {
+		rets = 0;
+		goto out;
+	}
+
+copy_buffer:
+	/* now copy the data to user space */
+	if (cb->status) {
+		rets = cb->status;
+		cl_dbg(dev, cl, "read operation failed %zd\n", rets);
+		goto free;
+	}
+
+	cl_dbg(dev, cl, "buf.size = %zu buf.idx = %zu offset = %lld\n",
+	       cb->buf.size, cb->buf_idx, *offset);
+	if (*offset >= cb->buf_idx) {
+		rets = 0;
+		goto free;
+	}
+
+	/* length is being truncated to PAGE_SIZE,
+	 * however buf_idx may point beyond that */
+	length = min_t(size_t, length, cb->buf_idx - *offset);
+
+	if (copy_to_user(ubuf, cb->buf.data + *offset, length)) {
+		dev_dbg(dev->dev, "failed to copy data to userland\n");
+		rets = -EFAULT;
+		goto free;
+	}
+
+	rets = length;
+	*offset += length;
+	/* not all data was read, keep the cb */
+	if (*offset < cb->buf_idx)
+		goto out;
+
+free:
+	mei_io_cb_free(cb);
+	*offset = 0;
+
+out:
+	cl_dbg(dev, cl, "end mei read rets = %zd\n", rets);
+	mutex_unlock(&dev->device_lock);
+	return rets;
+}
+/**
+ * mei_write - the write function.
+ *
+ * @file: pointer to file structure
+ * @ubuf: pointer to user buffer
+ * @length: buffer length
+ * @offset: data offset in buffer
+ *
+ * Return: >=0 data length on success , <0 on error
+ */
+static ssize_t mei_write(struct file *file, const char __user *ubuf,
+			 size_t length, loff_t *offset)
+{
+	struct mei_cl *cl = file->private_data;
+	struct mei_cl_cb *cb;
+	struct mei_device *dev;
+	ssize_t rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	mutex_lock(&dev->device_lock);
+
+	if (dev->dev_state != MEI_DEV_ENABLED) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	if (!mei_cl_is_connected(cl)) {
+		cl_err(dev, cl, "is not connected");
+		rets = -ENODEV;
+		goto out;
+	}
+
+	if (!mei_me_cl_is_active(cl->me_cl)) {
+		rets = -ENOTTY;
+		goto out;
+	}
+
+	if (length > mei_cl_mtu(cl)) {
+		rets = -EFBIG;
+		goto out;
+	}
+
+	if (length == 0) {
+		rets = 0;
+		goto out;
+	}
+
+	while (cl->tx_cb_queued >= dev->tx_queue_limit) {
+		if (file->f_flags & O_NONBLOCK) {
+			rets = -EAGAIN;
+			goto out;
+		}
+		mutex_unlock(&dev->device_lock);
+		rets = wait_event_interruptible(cl->tx_wait,
+				cl->writing_state == MEI_WRITE_COMPLETE ||
+				(!mei_cl_is_connected(cl)));
+		mutex_lock(&dev->device_lock);
+		if (rets) {
+			if (signal_pending(current))
+				rets = -EINTR;
+			goto out;
+		}
+		if (!mei_cl_is_connected(cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+	}
+
+	cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, file);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	rets = copy_from_user(cb->buf.data, ubuf, length);
+	if (rets) {
+		dev_dbg(dev->dev, "failed to copy data from userland\n");
+		rets = -EFAULT;
+		mei_io_cb_free(cb);
+		goto out;
+	}
+
+	rets = mei_cl_write(cl, cb);
+out:
+	mutex_unlock(&dev->device_lock);
+	return rets;
+}
+
+/**
+ * mei_ioctl_connect_client - the connect to fw client IOCTL function
+ *
+ * @file: private data of the file object
+ * @data: IOCTL connect data, input and output parameters
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_ioctl_connect_client(struct file *file,
+			struct mei_connect_client_data *data)
+{
+	struct mei_device *dev;
+	struct mei_client *client;
+	struct mei_me_client *me_cl;
+	struct mei_cl *cl;
+	int rets;
+
+	cl = file->private_data;
+	dev = cl->dev;
+
+	if (dev->dev_state != MEI_DEV_ENABLED)
+		return -ENODEV;
+
+	if (cl->state != MEI_FILE_INITIALIZING &&
+	    cl->state != MEI_FILE_DISCONNECTED)
+		return  -EBUSY;
+
+	/* find ME client we're trying to connect to */
+	me_cl = mei_me_cl_by_uuid(dev, &data->in_client_uuid);
+	if (!me_cl) {
+		dev_dbg(dev->dev, "Cannot connect to FW Client UUID = %pUl\n",
+			&data->in_client_uuid);
+		rets = -ENOTTY;
+		goto end;
+	}
+
+	if (me_cl->props.fixed_address) {
+		bool forbidden = dev->override_fixed_address ?
+			 !dev->allow_fixed_address : !dev->hbm_f_fa_supported;
+		if (forbidden) {
+			dev_dbg(dev->dev, "Connection forbidden to FW Client UUID = %pUl\n",
+				&data->in_client_uuid);
+			rets = -ENOTTY;
+			goto end;
+		}
+	}
+
+	dev_dbg(dev->dev, "Connect to FW Client ID = %d\n",
+			me_cl->client_id);
+	dev_dbg(dev->dev, "FW Client - Protocol Version = %d\n",
+			me_cl->props.protocol_version);
+	dev_dbg(dev->dev, "FW Client - Max Msg Len = %d\n",
+			me_cl->props.max_msg_length);
+
+	/* prepare the output buffer */
+	client = &data->out_client_properties;
+	client->max_msg_length = me_cl->props.max_msg_length;
+	client->protocol_version = me_cl->props.protocol_version;
+	dev_dbg(dev->dev, "Can connect?\n");
+
+	rets = mei_cl_connect(cl, me_cl, file);
+
+end:
+	mei_me_cl_put(me_cl);
+	return rets;
+}
+
+/**
+ * mei_ioctl_client_notify_request -
+ *     propagate event notification request to client
+ *
+ * @file: pointer to file structure
+ * @request: 0 - disable, 1 - enable
+ *
+ * Return: 0 on success , <0 on error
+ */
+static int mei_ioctl_client_notify_request(const struct file *file, u32 request)
+{
+	struct mei_cl *cl = file->private_data;
+
+	if (request != MEI_HBM_NOTIFICATION_START &&
+	    request != MEI_HBM_NOTIFICATION_STOP)
+		return -EINVAL;
+
+	return mei_cl_notify_request(cl, file, (u8)request);
+}
+
+/**
+ * mei_ioctl_client_notify_get -  wait for notification request
+ *
+ * @file: pointer to file structure
+ * @notify_get: 0 - disable, 1 - enable
+ *
+ * Return: 0 on success , <0 on error
+ */
+static int mei_ioctl_client_notify_get(const struct file *file, u32 *notify_get)
+{
+	struct mei_cl *cl = file->private_data;
+	bool notify_ev;
+	bool block = (file->f_flags & O_NONBLOCK) == 0;
+	int rets;
+
+	rets = mei_cl_notify_get(cl, block, &notify_ev);
+	if (rets)
+		return rets;
+
+	*notify_get = notify_ev ? 1 : 0;
+	return 0;
+}
+
+/**
+ * mei_ioctl - the IOCTL function
+ *
+ * @file: pointer to file structure
+ * @cmd: ioctl command
+ * @data: pointer to mei message structure
+ *
+ * Return: 0 on success , <0 on error
+ */
+static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data)
+{
+	struct mei_device *dev;
+	struct mei_cl *cl = file->private_data;
+	struct mei_connect_client_data connect_data;
+	u32 notify_get, notify_req;
+	int rets;
+
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	dev_dbg(dev->dev, "IOCTL cmd = 0x%x", cmd);
+
+	mutex_lock(&dev->device_lock);
+	if (dev->dev_state != MEI_DEV_ENABLED) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	switch (cmd) {
+	case IOCTL_MEI_CONNECT_CLIENT:
+		dev_dbg(dev->dev, ": IOCTL_MEI_CONNECT_CLIENT.\n");
+		if (copy_from_user(&connect_data, (char __user *)data,
+				sizeof(struct mei_connect_client_data))) {
+			dev_dbg(dev->dev, "failed to copy data from userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+
+		rets = mei_ioctl_connect_client(file, &connect_data);
+		if (rets)
+			goto out;
+
+		/* if all is ok, copying the data back to user. */
+		if (copy_to_user((char __user *)data, &connect_data,
+				sizeof(struct mei_connect_client_data))) {
+			dev_dbg(dev->dev, "failed to copy data to userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+
+		break;
+
+	case IOCTL_MEI_NOTIFY_SET:
+		dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_SET.\n");
+		if (copy_from_user(&notify_req,
+				   (char __user *)data, sizeof(notify_req))) {
+			dev_dbg(dev->dev, "failed to copy data from userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+		rets = mei_ioctl_client_notify_request(file, notify_req);
+		break;
+
+	case IOCTL_MEI_NOTIFY_GET:
+		dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_GET.\n");
+		rets = mei_ioctl_client_notify_get(file, &notify_get);
+		if (rets)
+			goto out;
+
+		dev_dbg(dev->dev, "copy connect data to user\n");
+		if (copy_to_user((char __user *)data,
+				&notify_get, sizeof(notify_get))) {
+			dev_dbg(dev->dev, "failed to copy data to userland\n");
+			rets = -EFAULT;
+			goto out;
+
+		}
+		break;
+
+	default:
+		rets = -ENOIOCTLCMD;
+	}
+
+out:
+	mutex_unlock(&dev->device_lock);
+	return rets;
+}
+
+/**
+ * mei_compat_ioctl - the compat IOCTL function
+ *
+ * @file: pointer to file structure
+ * @cmd: ioctl command
+ * @data: pointer to mei message structure
+ *
+ * Return: 0 on success , <0 on error
+ */
+#ifdef CONFIG_COMPAT
+static long mei_compat_ioctl(struct file *file,
+			unsigned int cmd, unsigned long data)
+{
+	return mei_ioctl(file, cmd, (unsigned long)compat_ptr(data));
+}
+#endif
+
+
+/**
+ * mei_poll - the poll function
+ *
+ * @file: pointer to file structure
+ * @wait: pointer to poll_table structure
+ *
+ * Return: poll mask
+ */
+static __poll_t mei_poll(struct file *file, poll_table *wait)
+{
+	__poll_t req_events = poll_requested_events(wait);
+	struct mei_cl *cl = file->private_data;
+	struct mei_device *dev;
+	__poll_t mask = 0;
+	bool notify_en;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return EPOLLERR;
+
+	dev = cl->dev;
+
+	mutex_lock(&dev->device_lock);
+
+	notify_en = cl->notify_en && (req_events & EPOLLPRI);
+
+	if (dev->dev_state != MEI_DEV_ENABLED ||
+	    !mei_cl_is_connected(cl)) {
+		mask = EPOLLERR;
+		goto out;
+	}
+
+	if (notify_en) {
+		poll_wait(file, &cl->ev_wait, wait);
+		if (cl->notify_ev)
+			mask |= EPOLLPRI;
+	}
+
+	if (req_events & (EPOLLIN | EPOLLRDNORM)) {
+		poll_wait(file, &cl->rx_wait, wait);
+
+		if (!list_empty(&cl->rd_completed))
+			mask |= EPOLLIN | EPOLLRDNORM;
+		else
+			mei_cl_read_start(cl, mei_cl_mtu(cl), file);
+	}
+
+	if (req_events & (EPOLLOUT | EPOLLWRNORM)) {
+		poll_wait(file, &cl->tx_wait, wait);
+		if (cl->tx_cb_queued < dev->tx_queue_limit)
+			mask |= EPOLLOUT | EPOLLWRNORM;
+	}
+
+out:
+	mutex_unlock(&dev->device_lock);
+	return mask;
+}
+
+/**
+ * mei_cl_is_write_queued - check if the client has pending writes.
+ *
+ * @cl: writing host client
+ *
+ * Return: true if client is writing, false otherwise.
+ */
+static bool mei_cl_is_write_queued(struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+	struct mei_cl_cb *cb;
+
+	list_for_each_entry(cb, &dev->write_list, list)
+		if (cb->cl == cl)
+			return true;
+	list_for_each_entry(cb, &dev->write_waiting_list, list)
+		if (cb->cl == cl)
+			return true;
+	return false;
+}
+
+/**
+ * mei_fsync - the fsync handler
+ *
+ * @fp:       pointer to file structure
+ * @start:    unused
+ * @end:      unused
+ * @datasync: unused
+ *
+ * Return: 0 on success, -ENODEV if client is not connected
+ */
+static int mei_fsync(struct file *fp, loff_t start, loff_t end, int datasync)
+{
+	struct mei_cl *cl = fp->private_data;
+	struct mei_device *dev;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	mutex_lock(&dev->device_lock);
+
+	if (dev->dev_state != MEI_DEV_ENABLED || !mei_cl_is_connected(cl)) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	while (mei_cl_is_write_queued(cl)) {
+		mutex_unlock(&dev->device_lock);
+		rets = wait_event_interruptible(cl->tx_wait,
+				cl->writing_state == MEI_WRITE_COMPLETE ||
+				!mei_cl_is_connected(cl));
+		mutex_lock(&dev->device_lock);
+		if (rets) {
+			if (signal_pending(current))
+				rets = -EINTR;
+			goto out;
+		}
+		if (!mei_cl_is_connected(cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+	}
+	rets = 0;
+out:
+	mutex_unlock(&dev->device_lock);
+	return rets;
+}
+
+/**
+ * mei_fasync - asynchronous io support
+ *
+ * @fd: file descriptor
+ * @file: pointer to file structure
+ * @band: band bitmap
+ *
+ * Return: negative on error,
+ *         0 if it did no changes,
+ *         and positive a process was added or deleted
+ */
+static int mei_fasync(int fd, struct file *file, int band)
+{
+
+	struct mei_cl *cl = file->private_data;
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	return fasync_helper(fd, file, band, &cl->ev_async);
+}
+
+/**
+ * fw_status_show - mei device fw_status attribute show method
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t fw_status_show(struct device *device,
+		struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	struct mei_fw_status fw_status;
+	int err, i;
+	ssize_t cnt = 0;
+
+	mutex_lock(&dev->device_lock);
+	err = mei_fw_status(dev, &fw_status);
+	mutex_unlock(&dev->device_lock);
+	if (err) {
+		dev_err(device, "read fw_status error = %d\n", err);
+		return err;
+	}
+
+	for (i = 0; i < fw_status.count; i++)
+		cnt += scnprintf(buf + cnt, PAGE_SIZE - cnt, "%08X\n",
+				fw_status.status[i]);
+	return cnt;
+}
+static DEVICE_ATTR_RO(fw_status);
+
+/**
+ * hbm_ver_show - display HBM protocol version negotiated with FW
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t hbm_ver_show(struct device *device,
+			    struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	struct hbm_version ver;
+
+	mutex_lock(&dev->device_lock);
+	ver = dev->version;
+	mutex_unlock(&dev->device_lock);
+
+	return sprintf(buf, "%u.%u\n", ver.major_version, ver.minor_version);
+}
+static DEVICE_ATTR_RO(hbm_ver);
+
+/**
+ * hbm_ver_drv_show - display HBM protocol version advertised by driver
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t hbm_ver_drv_show(struct device *device,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u.%u\n", HBM_MAJOR_VERSION, HBM_MINOR_VERSION);
+}
+static DEVICE_ATTR_RO(hbm_ver_drv);
+
+static ssize_t tx_queue_limit_show(struct device *device,
+				   struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	u8 size = 0;
+
+	mutex_lock(&dev->device_lock);
+	size = dev->tx_queue_limit;
+	mutex_unlock(&dev->device_lock);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", size);
+}
+
+static ssize_t tx_queue_limit_store(struct device *device,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	u8 limit;
+	unsigned int inp;
+	int err;
+
+	err = kstrtouint(buf, 10, &inp);
+	if (err)
+		return err;
+	if (inp > MEI_TX_QUEUE_LIMIT_MAX || inp < MEI_TX_QUEUE_LIMIT_MIN)
+		return -EINVAL;
+	limit = inp;
+
+	mutex_lock(&dev->device_lock);
+	dev->tx_queue_limit = limit;
+	mutex_unlock(&dev->device_lock);
+
+	return count;
+}
+static DEVICE_ATTR_RW(tx_queue_limit);
+
+/**
+ * fw_ver_show - display ME FW version
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t fw_ver_show(struct device *device,
+			   struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	struct mei_fw_version *ver;
+	ssize_t cnt = 0;
+	int i;
+
+	ver = dev->fw_ver;
+
+	for (i = 0; i < MEI_MAX_FW_VER_BLOCKS; i++)
+		cnt += scnprintf(buf + cnt, PAGE_SIZE - cnt, "%u:%u.%u.%u.%u\n",
+				 ver[i].platform, ver[i].major, ver[i].minor,
+				 ver[i].hotfix, ver[i].buildno);
+	return cnt;
+}
+static DEVICE_ATTR_RO(fw_ver);
+
+static struct attribute *mei_attrs[] = {
+	&dev_attr_fw_status.attr,
+	&dev_attr_hbm_ver.attr,
+	&dev_attr_hbm_ver_drv.attr,
+	&dev_attr_tx_queue_limit.attr,
+	&dev_attr_fw_ver.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(mei);
+
+/*
+ * file operations structure will be used for mei char device.
+ */
+static const struct file_operations mei_fops = {
+	.owner = THIS_MODULE,
+	.read = mei_read,
+	.unlocked_ioctl = mei_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = mei_compat_ioctl,
+#endif
+	.open = mei_open,
+	.release = mei_release,
+	.write = mei_write,
+	.poll = mei_poll,
+	.fsync = mei_fsync,
+	.fasync = mei_fasync,
+	.llseek = no_llseek
+};
+
+static struct class *mei_class;
+static dev_t mei_devt;
+#define MEI_MAX_DEVS  MINORMASK
+static DEFINE_MUTEX(mei_minor_lock);
+static DEFINE_IDR(mei_idr);
+
+/**
+ * mei_minor_get - obtain next free device minor number
+ *
+ * @dev:  device pointer
+ *
+ * Return: allocated minor, or -ENOSPC if no free minor left
+ */
+static int mei_minor_get(struct mei_device *dev)
+{
+	int ret;
+
+	mutex_lock(&mei_minor_lock);
+	ret = idr_alloc(&mei_idr, dev, 0, MEI_MAX_DEVS, GFP_KERNEL);
+	if (ret >= 0)
+		dev->minor = ret;
+	else if (ret == -ENOSPC)
+		dev_err(dev->dev, "too many mei devices\n");
+
+	mutex_unlock(&mei_minor_lock);
+	return ret;
+}
+
+/**
+ * mei_minor_free - mark device minor number as free
+ *
+ * @dev:  device pointer
+ */
+static void mei_minor_free(struct mei_device *dev)
+{
+	mutex_lock(&mei_minor_lock);
+	idr_remove(&mei_idr, dev->minor);
+	mutex_unlock(&mei_minor_lock);
+}
+
+int mei_register(struct mei_device *dev, struct device *parent)
+{
+	struct device *clsdev; /* class device */
+	int ret, devno;
+
+	ret = mei_minor_get(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Fill in the data structures */
+	devno = MKDEV(MAJOR(mei_devt), dev->minor);
+	cdev_init(&dev->cdev, &mei_fops);
+	dev->cdev.owner = parent->driver->owner;
+
+	/* Add the device */
+	ret = cdev_add(&dev->cdev, devno, 1);
+	if (ret) {
+		dev_err(parent, "unable to add device %d:%d\n",
+			MAJOR(mei_devt), dev->minor);
+		goto err_dev_add;
+	}
+
+	clsdev = device_create_with_groups(mei_class, parent, devno,
+					   dev, mei_groups,
+					   "mei%d", dev->minor);
+
+	if (IS_ERR(clsdev)) {
+		dev_err(parent, "unable to create device %d:%d\n",
+			MAJOR(mei_devt), dev->minor);
+		ret = PTR_ERR(clsdev);
+		goto err_dev_create;
+	}
+
+	ret = mei_dbgfs_register(dev, dev_name(clsdev));
+	if (ret) {
+		dev_err(clsdev, "cannot register debugfs ret = %d\n", ret);
+		goto err_dev_dbgfs;
+	}
+
+	return 0;
+
+err_dev_dbgfs:
+	device_destroy(mei_class, devno);
+err_dev_create:
+	cdev_del(&dev->cdev);
+err_dev_add:
+	mei_minor_free(dev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mei_register);
+
+void mei_deregister(struct mei_device *dev)
+{
+	int devno;
+
+	devno = dev->cdev.dev;
+	cdev_del(&dev->cdev);
+
+	mei_dbgfs_deregister(dev);
+
+	device_destroy(mei_class, devno);
+
+	mei_minor_free(dev);
+}
+EXPORT_SYMBOL_GPL(mei_deregister);
+
+static int __init mei_init(void)
+{
+	int ret;
+
+	mei_class = class_create(THIS_MODULE, "mei");
+	if (IS_ERR(mei_class)) {
+		pr_err("couldn't create class\n");
+		ret = PTR_ERR(mei_class);
+		goto err;
+	}
+
+	ret = alloc_chrdev_region(&mei_devt, 0, MEI_MAX_DEVS, "mei");
+	if (ret < 0) {
+		pr_err("unable to allocate char dev region\n");
+		goto err_class;
+	}
+
+	ret = mei_cl_bus_init();
+	if (ret < 0) {
+		pr_err("unable to initialize bus\n");
+		goto err_chrdev;
+	}
+
+	return 0;
+
+err_chrdev:
+	unregister_chrdev_region(mei_devt, MEI_MAX_DEVS);
+err_class:
+	class_destroy(mei_class);
+err:
+	return ret;
+}
+
+static void __exit mei_exit(void)
+{
+	unregister_chrdev_region(mei_devt, MEI_MAX_DEVS);
+	class_destroy(mei_class);
+	mei_cl_bus_exit();
+}
+
+module_init(mei_init);
+module_exit(mei_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Management Engine Interface");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/misc/mei/mei-trace.c b/drivers/misc/mei/mei-trace.c
new file mode 100644
index 000000000..374edde72
--- /dev/null
+++ b/drivers/misc/mei/mei-trace.c
@@ -0,0 +1,26 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/module.h>
+
+/* sparse doesn't like tracepoint macros */
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "mei-trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL(mei_reg_read);
+EXPORT_TRACEPOINT_SYMBOL(mei_reg_write);
+EXPORT_TRACEPOINT_SYMBOL(mei_pci_cfg_read);
+#endif /* __CHECKER__ */
diff --git a/drivers/misc/mei/mei-trace.h b/drivers/misc/mei/mei-trace.h
new file mode 100644
index 000000000..b52e9b97a
--- /dev/null
+++ b/drivers/misc/mei/mei-trace.h
@@ -0,0 +1,93 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#if !defined(_MEI_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MEI_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <linux/device.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mei
+
+TRACE_EVENT(mei_reg_read,
+	TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val),
+	TP_ARGS(dev, reg, offs, val),
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(const char *, reg)
+		__field(u32, offs)
+		__field(u32, val)
+	),
+	TP_fast_assign(
+		__assign_str(dev, dev_name(dev))
+		__entry->reg  = reg;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] read %s:[%#x] = %#x",
+		  __get_str(dev), __entry->reg, __entry->offs, __entry->val)
+);
+
+TRACE_EVENT(mei_reg_write,
+	TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val),
+	TP_ARGS(dev, reg, offs, val),
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(const char *, reg)
+		__field(u32, offs)
+		__field(u32, val)
+	),
+	TP_fast_assign(
+		__assign_str(dev, dev_name(dev))
+		__entry->reg = reg;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] write %s[%#x] = %#x",
+		  __get_str(dev), __entry->reg,  __entry->offs, __entry->val)
+);
+
+TRACE_EVENT(mei_pci_cfg_read,
+	TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val),
+	TP_ARGS(dev, reg, offs, val),
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(const char *, reg)
+		__field(u32, offs)
+		__field(u32, val)
+	),
+	TP_fast_assign(
+		__assign_str(dev, dev_name(dev))
+		__entry->reg  = reg;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] pci cfg read %s:[%#x] = %#x",
+		  __get_str(dev), __entry->reg, __entry->offs, __entry->val)
+);
+
+#endif /* _MEI_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE mei-trace
+#include <trace/define_trace.h>
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
new file mode 100644
index 000000000..fc7a5e3fb
--- /dev/null
+++ b/drivers/misc/mei/mei_dev.h
@@ -0,0 +1,756 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2018, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _MEI_DEV_H_
+#define _MEI_DEV_H_
+
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/poll.h>
+#include <linux/mei.h>
+#include <linux/mei_cl_bus.h>
+
+#include "hw.h"
+#include "hbm.h"
+
+#define MEI_SLOT_SIZE             sizeof(u32)
+#define MEI_RD_MSG_BUF_SIZE       (128 * MEI_SLOT_SIZE)
+
+/*
+ * Number of Maximum MEI Clients
+ */
+#define MEI_CLIENTS_MAX 256
+
+/*
+ * maximum number of consecutive resets
+ */
+#define MEI_MAX_CONSEC_RESET  3
+
+/*
+ * Number of File descriptors/handles
+ * that can be opened to the driver.
+ *
+ * Limit to 255: 256 Total Clients
+ * minus internal client for MEI Bus Messages
+ */
+#define  MEI_MAX_OPEN_HANDLE_COUNT (MEI_CLIENTS_MAX - 1)
+
+/* File state */
+enum file_state {
+	MEI_FILE_UNINITIALIZED = 0,
+	MEI_FILE_INITIALIZING,
+	MEI_FILE_CONNECTING,
+	MEI_FILE_CONNECTED,
+	MEI_FILE_DISCONNECTING,
+	MEI_FILE_DISCONNECT_REPLY,
+	MEI_FILE_DISCONNECT_REQUIRED,
+	MEI_FILE_DISCONNECTED,
+};
+
+/* MEI device states */
+enum mei_dev_state {
+	MEI_DEV_INITIALIZING = 0,
+	MEI_DEV_INIT_CLIENTS,
+	MEI_DEV_ENABLED,
+	MEI_DEV_RESETTING,
+	MEI_DEV_DISABLED,
+	MEI_DEV_POWER_DOWN,
+	MEI_DEV_POWER_UP
+};
+
+const char *mei_dev_state_str(int state);
+
+enum mei_file_transaction_states {
+	MEI_IDLE,
+	MEI_WRITING,
+	MEI_WRITE_COMPLETE,
+};
+
+/**
+ * enum mei_cb_file_ops  - file operation associated with the callback
+ * @MEI_FOP_READ:       read
+ * @MEI_FOP_WRITE:      write
+ * @MEI_FOP_CONNECT:    connect
+ * @MEI_FOP_DISCONNECT: disconnect
+ * @MEI_FOP_DISCONNECT_RSP: disconnect response
+ * @MEI_FOP_NOTIFY_START:   start notification
+ * @MEI_FOP_NOTIFY_STOP:    stop notification
+ */
+enum mei_cb_file_ops {
+	MEI_FOP_READ = 0,
+	MEI_FOP_WRITE,
+	MEI_FOP_CONNECT,
+	MEI_FOP_DISCONNECT,
+	MEI_FOP_DISCONNECT_RSP,
+	MEI_FOP_NOTIFY_START,
+	MEI_FOP_NOTIFY_STOP,
+};
+
+/**
+ * enum mei_cl_io_mode - io mode between driver and fw
+ *
+ * @MEI_CL_IO_TX_BLOCKING: send is blocking
+ * @MEI_CL_IO_TX_INTERNAL: internal communication between driver and FW
+ *
+ * @MEI_CL_IO_RX_NONBLOCK: recv is non-blocking
+ */
+enum mei_cl_io_mode {
+	MEI_CL_IO_TX_BLOCKING = BIT(0),
+	MEI_CL_IO_TX_INTERNAL = BIT(1),
+
+	MEI_CL_IO_RX_NONBLOCK = BIT(2),
+};
+
+/*
+ * Intel MEI message data struct
+ */
+struct mei_msg_data {
+	size_t size;
+	unsigned char *data;
+};
+
+/* Maximum number of processed FW status registers */
+#define MEI_FW_STATUS_MAX 6
+/* Minimal  buffer for FW status string (8 bytes in dw + space or '\0') */
+#define MEI_FW_STATUS_STR_SZ (MEI_FW_STATUS_MAX * (8 + 1))
+
+
+/*
+ * struct mei_fw_status - storage of FW status data
+ *
+ * @count: number of actually available elements in array
+ * @status: FW status registers
+ */
+struct mei_fw_status {
+	int count;
+	u32 status[MEI_FW_STATUS_MAX];
+};
+
+/**
+ * struct mei_me_client - representation of me (fw) client
+ *
+ * @list: link in me client list
+ * @refcnt: struct reference count
+ * @props: client properties
+ * @client_id: me client id
+ * @tx_flow_ctrl_creds: flow control credits
+ * @connect_count: number connections to this client
+ * @bus_added: added to bus
+ */
+struct mei_me_client {
+	struct list_head list;
+	struct kref refcnt;
+	struct mei_client_properties props;
+	u8 client_id;
+	u8 tx_flow_ctrl_creds;
+	u8 connect_count;
+	u8 bus_added;
+};
+
+
+struct mei_cl;
+
+/**
+ * struct mei_cl_cb - file operation callback structure
+ *
+ * @list: link in callback queue
+ * @cl: file client who is running this operation
+ * @fop_type: file operation type
+ * @buf: buffer for data associated with the callback
+ * @buf_idx: last read index
+ * @fp: pointer to file structure
+ * @status: io status of the cb
+ * @internal: communication between driver and FW flag
+ * @blocking: transmission blocking mode
+ */
+struct mei_cl_cb {
+	struct list_head list;
+	struct mei_cl *cl;
+	enum mei_cb_file_ops fop_type;
+	struct mei_msg_data buf;
+	size_t buf_idx;
+	const struct file *fp;
+	int status;
+	u32 internal:1;
+	u32 blocking:1;
+};
+
+/**
+ * struct mei_cl - me client host representation
+ *    carried in file->private_data
+ *
+ * @link: link in the clients list
+ * @dev: mei parent device
+ * @state: file operation state
+ * @tx_wait: wait queue for tx completion
+ * @rx_wait: wait queue for rx completion
+ * @wait:  wait queue for management operation
+ * @ev_wait: notification wait queue
+ * @ev_async: event async notification
+ * @status: connection status
+ * @me_cl: fw client connected
+ * @fp: file associated with client
+ * @host_client_id: host id
+ * @tx_flow_ctrl_creds: transmit flow credentials
+ * @rx_flow_ctrl_creds: receive flow credentials
+ * @timer_count:  watchdog timer for operation completion
+ * @notify_en: notification - enabled/disabled
+ * @notify_ev: pending notification event
+ * @tx_cb_queued: number of tx callbacks in queue
+ * @writing_state: state of the tx
+ * @rd_pending: pending read credits
+ * @rd_completed: completed read
+ *
+ * @cldev: device on the mei client bus
+ */
+struct mei_cl {
+	struct list_head link;
+	struct mei_device *dev;
+	enum file_state state;
+	wait_queue_head_t tx_wait;
+	wait_queue_head_t rx_wait;
+	wait_queue_head_t wait;
+	wait_queue_head_t ev_wait;
+	struct fasync_struct *ev_async;
+	int status;
+	struct mei_me_client *me_cl;
+	const struct file *fp;
+	u8 host_client_id;
+	u8 tx_flow_ctrl_creds;
+	u8 rx_flow_ctrl_creds;
+	u8 timer_count;
+	u8 notify_en;
+	u8 notify_ev;
+	u8 tx_cb_queued;
+	enum mei_file_transaction_states writing_state;
+	struct list_head rd_pending;
+	struct list_head rd_completed;
+
+	struct mei_cl_device *cldev;
+};
+
+#define MEI_TX_QUEUE_LIMIT_DEFAULT 50
+#define MEI_TX_QUEUE_LIMIT_MAX 255
+#define MEI_TX_QUEUE_LIMIT_MIN 30
+
+/**
+ * struct mei_hw_ops - hw specific ops
+ *
+ * @host_is_ready    : query for host readiness
+ *
+ * @hw_is_ready      : query if hw is ready
+ * @hw_reset         : reset hw
+ * @hw_start         : start hw after reset
+ * @hw_config        : configure hw
+ *
+ * @fw_status        : get fw status registers
+ * @pg_state         : power gating state of the device
+ * @pg_in_transition : is device now in pg transition
+ * @pg_is_enabled    : is power gating enabled
+ *
+ * @intr_clear       : clear pending interrupts
+ * @intr_enable      : enable interrupts
+ * @intr_disable     : disable interrupts
+ * @synchronize_irq  : synchronize irqs
+ *
+ * @hbuf_free_slots  : query for write buffer empty slots
+ * @hbuf_is_ready    : query if write buffer is empty
+ * @hbuf_depth       : query for write buffer depth
+ *
+ * @write            : write a message to FW
+ *
+ * @rdbuf_full_slots : query how many slots are filled
+ *
+ * @read_hdr         : get first 4 bytes (header)
+ * @read             : read a buffer from the FW
+ */
+struct mei_hw_ops {
+
+	bool (*host_is_ready)(struct mei_device *dev);
+
+	bool (*hw_is_ready)(struct mei_device *dev);
+	int (*hw_reset)(struct mei_device *dev, bool enable);
+	int (*hw_start)(struct mei_device *dev);
+	void (*hw_config)(struct mei_device *dev);
+
+	int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts);
+	enum mei_pg_state (*pg_state)(struct mei_device *dev);
+	bool (*pg_in_transition)(struct mei_device *dev);
+	bool (*pg_is_enabled)(struct mei_device *dev);
+
+	void (*intr_clear)(struct mei_device *dev);
+	void (*intr_enable)(struct mei_device *dev);
+	void (*intr_disable)(struct mei_device *dev);
+	void (*synchronize_irq)(struct mei_device *dev);
+
+	int (*hbuf_free_slots)(struct mei_device *dev);
+	bool (*hbuf_is_ready)(struct mei_device *dev);
+	u32 (*hbuf_depth)(const struct mei_device *dev);
+	int (*write)(struct mei_device *dev,
+		     const void *hdr, size_t hdr_len,
+		     const void *data, size_t data_len);
+
+	int (*rdbuf_full_slots)(struct mei_device *dev);
+
+	u32 (*read_hdr)(const struct mei_device *dev);
+	int (*read)(struct mei_device *dev,
+		     unsigned char *buf, unsigned long len);
+};
+
+/* MEI bus API*/
+void mei_cl_bus_rescan_work(struct work_struct *work);
+void mei_cl_bus_dev_fixup(struct mei_cl_device *dev);
+ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
+		      unsigned int mode);
+ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length,
+		      unsigned int mode, unsigned long timeout);
+bool mei_cl_bus_rx_event(struct mei_cl *cl);
+bool mei_cl_bus_notify_event(struct mei_cl *cl);
+void mei_cl_bus_remove_devices(struct mei_device *bus);
+int mei_cl_bus_init(void);
+void mei_cl_bus_exit(void);
+
+/**
+ * enum mei_pg_event - power gating transition events
+ *
+ * @MEI_PG_EVENT_IDLE: the driver is not in power gating transition
+ * @MEI_PG_EVENT_WAIT: the driver is waiting for a pg event to complete
+ * @MEI_PG_EVENT_RECEIVED: the driver received pg event
+ * @MEI_PG_EVENT_INTR_WAIT: the driver is waiting for a pg event interrupt
+ * @MEI_PG_EVENT_INTR_RECEIVED: the driver received pg event interrupt
+ */
+enum mei_pg_event {
+	MEI_PG_EVENT_IDLE,
+	MEI_PG_EVENT_WAIT,
+	MEI_PG_EVENT_RECEIVED,
+	MEI_PG_EVENT_INTR_WAIT,
+	MEI_PG_EVENT_INTR_RECEIVED,
+};
+
+/**
+ * enum mei_pg_state - device internal power gating state
+ *
+ * @MEI_PG_OFF: device is not power gated - it is active
+ * @MEI_PG_ON:  device is power gated - it is in lower power state
+ */
+enum mei_pg_state {
+	MEI_PG_OFF = 0,
+	MEI_PG_ON =  1,
+};
+
+const char *mei_pg_state_str(enum mei_pg_state state);
+
+/**
+ * struct mei_fw_version - MEI FW version struct
+ *
+ * @platform: platform identifier
+ * @major: major version field
+ * @minor: minor version field
+ * @buildno: build number version field
+ * @hotfix: hotfix number version field
+ */
+struct mei_fw_version {
+	u8 platform;
+	u8 major;
+	u16 minor;
+	u16 buildno;
+	u16 hotfix;
+};
+
+#define MEI_MAX_FW_VER_BLOCKS 3
+
+/**
+ * struct mei_device -  MEI private device struct
+ *
+ * @dev         : device on a bus
+ * @cdev        : character device
+ * @minor       : minor number allocated for device
+ *
+ * @write_list  : write pending list
+ * @write_waiting_list : write completion list
+ * @ctrl_wr_list : pending control write list
+ * @ctrl_rd_list : pending control read list
+ * @tx_queue_limit: tx queues per client linit
+ *
+ * @file_list   : list of opened handles
+ * @open_handle_count: number of opened handles
+ *
+ * @device_lock : big device lock
+ * @timer_work  : MEI timer delayed work (timeouts)
+ *
+ * @recvd_hw_ready : hw ready message received flag
+ *
+ * @wait_hw_ready : wait queue for receive HW ready message form FW
+ * @wait_pg     : wait queue for receive PG message from FW
+ * @wait_hbm_start : wait queue for receive HBM start message from FW
+ *
+ * @reset_count : number of consecutive resets
+ * @dev_state   : device state
+ * @hbm_state   : state of host bus message protocol
+ * @init_clients_timer : HBM init handshake timeout
+ *
+ * @pg_event    : power gating event
+ * @pg_domain   : runtime PM domain
+ *
+ * @rd_msg_buf  : control messages buffer
+ * @rd_msg_hdr  : read message header storage
+ *
+ * @hbuf_is_ready : query if the host host/write buffer is ready
+ *
+ * @version     : HBM protocol version in use
+ * @hbm_f_pg_supported  : hbm feature pgi protocol
+ * @hbm_f_dc_supported  : hbm feature dynamic clients
+ * @hbm_f_dot_supported : hbm feature disconnect on timeout
+ * @hbm_f_ev_supported  : hbm feature event notification
+ * @hbm_f_fa_supported  : hbm feature fixed address client
+ * @hbm_f_ie_supported  : hbm feature immediate reply to enum request
+ * @hbm_f_os_supported  : hbm feature support OS ver message
+ * @hbm_f_dr_supported  : hbm feature dma ring supported
+ *
+ * @fw_ver : FW versions
+ *
+ * @fw_f_fw_ver_supported : fw feature: fw version supported
+ *
+ * @me_clients_rwsem: rw lock over me_clients list
+ * @me_clients  : list of FW clients
+ * @me_clients_map : FW clients bit map
+ * @host_clients_map : host clients id pool
+ *
+ * @allow_fixed_address: allow user space to connect a fixed client
+ * @override_fixed_address: force allow fixed address behavior
+ *
+ * @reset_work  : work item for the device reset
+ * @bus_rescan_work : work item for the bus rescan
+ *
+ * @device_list : mei client bus list
+ * @cl_bus_lock : client bus list lock
+ *
+ * @dbgfs_dir   : debugfs mei root directory
+ *
+ * @ops:        : hw specific operations
+ * @hw          : hw specific data
+ */
+struct mei_device {
+	struct device *dev;
+	struct cdev cdev;
+	int minor;
+
+	struct list_head write_list;
+	struct list_head write_waiting_list;
+	struct list_head ctrl_wr_list;
+	struct list_head ctrl_rd_list;
+	u8 tx_queue_limit;
+
+	struct list_head file_list;
+	long open_handle_count;
+
+	struct mutex device_lock;
+	struct delayed_work timer_work;
+
+	bool recvd_hw_ready;
+	/*
+	 * waiting queue for receive message from FW
+	 */
+	wait_queue_head_t wait_hw_ready;
+	wait_queue_head_t wait_pg;
+	wait_queue_head_t wait_hbm_start;
+
+	/*
+	 * mei device  states
+	 */
+	unsigned long reset_count;
+	enum mei_dev_state dev_state;
+	enum mei_hbm_state hbm_state;
+	u16 init_clients_timer;
+
+	/*
+	 * Power Gating support
+	 */
+	enum mei_pg_event pg_event;
+#ifdef CONFIG_PM
+	struct dev_pm_domain pg_domain;
+#endif /* CONFIG_PM */
+
+	unsigned char rd_msg_buf[MEI_RD_MSG_BUF_SIZE];
+	u32 rd_msg_hdr;
+
+	/* write buffer */
+	bool hbuf_is_ready;
+
+	struct hbm_version version;
+	unsigned int hbm_f_pg_supported:1;
+	unsigned int hbm_f_dc_supported:1;
+	unsigned int hbm_f_dot_supported:1;
+	unsigned int hbm_f_ev_supported:1;
+	unsigned int hbm_f_fa_supported:1;
+	unsigned int hbm_f_ie_supported:1;
+	unsigned int hbm_f_os_supported:1;
+	unsigned int hbm_f_dr_supported:1;
+
+	struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS];
+
+	unsigned int fw_f_fw_ver_supported:1;
+
+	struct rw_semaphore me_clients_rwsem;
+	struct list_head me_clients;
+	DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX);
+	DECLARE_BITMAP(host_clients_map, MEI_CLIENTS_MAX);
+
+	bool allow_fixed_address;
+	bool override_fixed_address;
+
+	struct work_struct reset_work;
+	struct work_struct bus_rescan_work;
+
+	/* List of bus devices */
+	struct list_head device_list;
+	struct mutex cl_bus_lock;
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	struct dentry *dbgfs_dir;
+#endif /* CONFIG_DEBUG_FS */
+
+
+	const struct mei_hw_ops *ops;
+	char hw[0] __aligned(sizeof(void *));
+};
+
+static inline unsigned long mei_secs_to_jiffies(unsigned long sec)
+{
+	return msecs_to_jiffies(sec * MSEC_PER_SEC);
+}
+
+/**
+ * mei_data2slots - get slots number from a message length
+ *
+ * @length: size of the messages in bytes
+ *
+ * Return: number of slots
+ */
+static inline u32 mei_data2slots(size_t length)
+{
+	return DIV_ROUND_UP(length, MEI_SLOT_SIZE);
+}
+
+/**
+ * mei_hbm2slots - get slots number from a hbm message length
+ *                 length + size of the mei message header
+ *
+ * @length: size of the messages in bytes
+ *
+ * Return: number of slots
+ */
+static inline u32 mei_hbm2slots(size_t length)
+{
+	return DIV_ROUND_UP(sizeof(struct mei_msg_hdr) + length, MEI_SLOT_SIZE);
+}
+
+/**
+ * mei_slots2data - get data in slots - bytes from slots
+ *
+ * @slots: number of available slots
+ *
+ * Return: number of bytes in slots
+ */
+static inline u32 mei_slots2data(int slots)
+{
+	return slots * MEI_SLOT_SIZE;
+}
+
+/*
+ * mei init function prototypes
+ */
+void mei_device_init(struct mei_device *dev,
+		     struct device *device,
+		     const struct mei_hw_ops *hw_ops);
+int mei_reset(struct mei_device *dev);
+int mei_start(struct mei_device *dev);
+int mei_restart(struct mei_device *dev);
+void mei_stop(struct mei_device *dev);
+void mei_cancel_work(struct mei_device *dev);
+
+/*
+ *  MEI interrupt functions prototype
+ */
+
+void mei_timer(struct work_struct *work);
+void mei_schedule_stall_timer(struct mei_device *dev);
+int mei_irq_read_handler(struct mei_device *dev,
+			 struct list_head *cmpl_list, s32 *slots);
+
+int mei_irq_write_handler(struct mei_device *dev, struct list_head *cmpl_list);
+void mei_irq_compl_handler(struct mei_device *dev, struct list_head *cmpl_list);
+
+/*
+ * Register Access Function
+ */
+
+
+static inline void mei_hw_config(struct mei_device *dev)
+{
+	dev->ops->hw_config(dev);
+}
+
+static inline enum mei_pg_state mei_pg_state(struct mei_device *dev)
+{
+	return dev->ops->pg_state(dev);
+}
+
+static inline bool mei_pg_in_transition(struct mei_device *dev)
+{
+	return dev->ops->pg_in_transition(dev);
+}
+
+static inline bool mei_pg_is_enabled(struct mei_device *dev)
+{
+	return dev->ops->pg_is_enabled(dev);
+}
+
+static inline int mei_hw_reset(struct mei_device *dev, bool enable)
+{
+	return dev->ops->hw_reset(dev, enable);
+}
+
+static inline int mei_hw_start(struct mei_device *dev)
+{
+	return dev->ops->hw_start(dev);
+}
+
+static inline void mei_clear_interrupts(struct mei_device *dev)
+{
+	dev->ops->intr_clear(dev);
+}
+
+static inline void mei_enable_interrupts(struct mei_device *dev)
+{
+	dev->ops->intr_enable(dev);
+}
+
+static inline void mei_disable_interrupts(struct mei_device *dev)
+{
+	dev->ops->intr_disable(dev);
+}
+
+static inline void mei_synchronize_irq(struct mei_device *dev)
+{
+	dev->ops->synchronize_irq(dev);
+}
+
+static inline bool mei_host_is_ready(struct mei_device *dev)
+{
+	return dev->ops->host_is_ready(dev);
+}
+static inline bool mei_hw_is_ready(struct mei_device *dev)
+{
+	return dev->ops->hw_is_ready(dev);
+}
+
+static inline bool mei_hbuf_is_ready(struct mei_device *dev)
+{
+	return dev->ops->hbuf_is_ready(dev);
+}
+
+static inline int mei_hbuf_empty_slots(struct mei_device *dev)
+{
+	return dev->ops->hbuf_free_slots(dev);
+}
+
+static inline u32 mei_hbuf_depth(const struct mei_device *dev)
+{
+	return dev->ops->hbuf_depth(dev);
+}
+
+static inline int mei_write_message(struct mei_device *dev,
+				    const void *hdr, size_t hdr_len,
+				    const void *data, size_t data_len)
+{
+	return dev->ops->write(dev, hdr, hdr_len, data, data_len);
+}
+
+static inline u32 mei_read_hdr(const struct mei_device *dev)
+{
+	return dev->ops->read_hdr(dev);
+}
+
+static inline void mei_read_slots(struct mei_device *dev,
+		     unsigned char *buf, unsigned long len)
+{
+	dev->ops->read(dev, buf, len);
+}
+
+static inline int mei_count_full_read_slots(struct mei_device *dev)
+{
+	return dev->ops->rdbuf_full_slots(dev);
+}
+
+static inline int mei_fw_status(struct mei_device *dev,
+				struct mei_fw_status *fw_status)
+{
+	return dev->ops->fw_status(dev, fw_status);
+}
+
+bool mei_hbuf_acquire(struct mei_device *dev);
+
+bool mei_write_is_idle(struct mei_device *dev);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+int mei_dbgfs_register(struct mei_device *dev, const char *name);
+void mei_dbgfs_deregister(struct mei_device *dev);
+#else
+static inline int mei_dbgfs_register(struct mei_device *dev, const char *name)
+{
+	return 0;
+}
+static inline void mei_dbgfs_deregister(struct mei_device *dev) {}
+#endif /* CONFIG_DEBUG_FS */
+
+int mei_register(struct mei_device *dev, struct device *parent);
+void mei_deregister(struct mei_device *dev);
+
+#define MEI_HDR_FMT "hdr:host=%02d me=%02d len=%d dma=%1d internal=%1d comp=%1d"
+#define MEI_HDR_PRM(hdr)                  \
+	(hdr)->host_addr, (hdr)->me_addr, \
+	(hdr)->length, (hdr)->dma_ring, (hdr)->internal, (hdr)->msg_complete
+
+ssize_t mei_fw_status2str(struct mei_fw_status *fw_sts, char *buf, size_t len);
+/**
+ * mei_fw_status_str - fetch and convert fw status registers to printable string
+ *
+ * @dev: the device structure
+ * @buf: string buffer at minimal size MEI_FW_STATUS_STR_SZ
+ * @len: buffer len must be >= MEI_FW_STATUS_STR_SZ
+ *
+ * Return: number of bytes written or < 0 on failure
+ */
+static inline ssize_t mei_fw_status_str(struct mei_device *dev,
+					char *buf, size_t len)
+{
+	struct mei_fw_status fw_status;
+	int ret;
+
+	buf[0] = '\0';
+
+	ret = mei_fw_status(dev, &fw_status);
+	if (ret)
+		return ret;
+
+	ret = mei_fw_status2str(&fw_status, buf, MEI_FW_STATUS_STR_SZ);
+
+	return ret;
+}
+
+
+#endif
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
new file mode 100644
index 000000000..35086f67d
--- /dev/null
+++ b/drivers/misc/mei/pci-me.c
@@ -0,0 +1,534 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2012, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/uuid.h>
+#include <linux/compat.h>
+#include <linux/jiffies.h>
+#include <linux/interrupt.h>
+
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+#include "hw-me-regs.h"
+#include "hw-me.h"
+
+/* mei_pci_tbl - PCI Device ID Table */
+static const struct pci_device_id mei_me_pci_tbl[] = {
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82946GZ, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82G35, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82Q965, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82G965, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82GM965, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82GME965, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q35, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82G33, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q33, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82X38, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_3200, MEI_ME_ICH_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_6, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_7, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_8, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_9, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_10, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_1, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_2, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_3, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_4, MEI_ME_ICH_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_1, MEI_ME_ICH10_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_2, MEI_ME_ICH10_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_3, MEI_ME_ICH10_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_4, MEI_ME_ICH10_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH6_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH6_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CPT_1, MEI_ME_PCH_CPT_PBG_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_PBG_1, MEI_ME_PCH_CPT_PBG_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH7_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH7_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH7_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_HR, MEI_ME_PCH8_SPS_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP_2, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_DNV_IE, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_GLK, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_4, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_V, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_N, MEI_ME_PCH12_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH12_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CDF, MEI_ME_PCH8_CFG)},
+
+	/* required last entry */
+	{0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mei_me_pci_tbl);
+
+#ifdef CONFIG_PM
+static inline void mei_me_set_pm_domain(struct mei_device *dev);
+static inline void mei_me_unset_pm_domain(struct mei_device *dev);
+#else
+static inline void mei_me_set_pm_domain(struct mei_device *dev) {}
+static inline void mei_me_unset_pm_domain(struct mei_device *dev) {}
+#endif /* CONFIG_PM */
+
+/**
+ * mei_me_quirk_probe - probe for devices that doesn't valid ME interface
+ *
+ * @pdev: PCI device structure
+ * @cfg: per generation config
+ *
+ * Return: true if ME Interface is valid, false otherwise
+ */
+static bool mei_me_quirk_probe(struct pci_dev *pdev,
+				const struct mei_cfg *cfg)
+{
+	if (cfg->quirk_probe && cfg->quirk_probe(pdev)) {
+		dev_info(&pdev->dev, "Device doesn't have valid ME Interface\n");
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * mei_me_probe - Device Initialization Routine
+ *
+ * @pdev: PCI device structure
+ * @ent: entry in kcs_pci_tbl
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	const struct mei_cfg *cfg;
+	struct mei_device *dev;
+	struct mei_me_hw *hw;
+	unsigned int irqflags;
+	int err;
+
+	cfg = mei_me_get_cfg(ent->driver_data);
+	if (!cfg)
+		return -ENODEV;
+
+	if (!mei_me_quirk_probe(pdev, cfg))
+		return -ENODEV;
+
+	/* enable pci dev */
+	err = pcim_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable pci device.\n");
+		goto end;
+	}
+	/* set PCI host mastering  */
+	pci_set_master(pdev);
+	/* pci request regions and mapping IO device memory for mei driver */
+	err = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
+	if (err) {
+		dev_err(&pdev->dev, "failed to get pci regions.\n");
+		goto end;
+	}
+
+	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) ||
+	    dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+
+		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+		if (err)
+			err = dma_set_coherent_mask(&pdev->dev,
+						    DMA_BIT_MASK(32));
+	}
+	if (err) {
+		dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
+		goto end;
+	}
+
+	/* allocates and initializes the mei dev structure */
+	dev = mei_me_dev_init(pdev, cfg);
+	if (!dev) {
+		err = -ENOMEM;
+		goto end;
+	}
+	hw = to_me_hw(dev);
+	hw->mem_addr = pcim_iomap_table(pdev)[0];
+
+	pci_enable_msi(pdev);
+
+	 /* request and enable interrupt */
+	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED;
+
+	err = request_threaded_irq(pdev->irq,
+			mei_me_irq_quick_handler,
+			mei_me_irq_thread_handler,
+			irqflags, KBUILD_MODNAME, dev);
+	if (err) {
+		dev_err(&pdev->dev, "request_threaded_irq failure. irq = %d\n",
+		       pdev->irq);
+		goto end;
+	}
+
+	if (mei_start(dev)) {
+		dev_err(&pdev->dev, "init hw failure.\n");
+		err = -ENODEV;
+		goto release_irq;
+	}
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_ME_RPM_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+
+	err = mei_register(dev, &pdev->dev);
+	if (err)
+		goto stop;
+
+	pci_set_drvdata(pdev, dev);
+
+	/*
+	 * MEI requires to resume from runtime suspend mode
+	 * in order to perform link reset flow upon system suspend.
+	 */
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+
+	/*
+	 * ME maps runtime suspend/resume to D0i states,
+	 * hence we need to go around native PCI runtime service which
+	 * eventually brings the device into D3cold/hot state,
+	 * but the mei device cannot wake up from D3 unlike from D0i3.
+	 * To get around the PCI device native runtime pm,
+	 * ME uses runtime pm domain handlers which take precedence
+	 * over the driver's pm handlers.
+	 */
+	mei_me_set_pm_domain(dev);
+
+	if (mei_pg_is_enabled(dev)) {
+		pm_runtime_put_noidle(&pdev->dev);
+		if (hw->d0i3_supported)
+			pm_runtime_allow(&pdev->dev);
+	}
+
+	dev_dbg(&pdev->dev, "initialization successful.\n");
+
+	return 0;
+
+stop:
+	mei_stop(dev);
+release_irq:
+	mei_cancel_work(dev);
+	mei_disable_interrupts(dev);
+	free_irq(pdev->irq, dev);
+end:
+	dev_err(&pdev->dev, "initialization failed.\n");
+	return err;
+}
+
+/**
+ * mei_me_shutdown - Device Removal Routine
+ *
+ * @pdev: PCI device structure
+ *
+ * mei_me_shutdown is called from the reboot notifier
+ * it's a simplified version of remove so we go down
+ * faster.
+ */
+static void mei_me_shutdown(struct pci_dev *pdev)
+{
+	struct mei_device *dev;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return;
+
+	dev_dbg(&pdev->dev, "shutdown\n");
+	mei_stop(dev);
+
+	mei_me_unset_pm_domain(dev);
+
+	mei_disable_interrupts(dev);
+	free_irq(pdev->irq, dev);
+}
+
+/**
+ * mei_me_remove - Device Removal Routine
+ *
+ * @pdev: PCI device structure
+ *
+ * mei_me_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+static void mei_me_remove(struct pci_dev *pdev)
+{
+	struct mei_device *dev;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return;
+
+	if (mei_pg_is_enabled(dev))
+		pm_runtime_get_noresume(&pdev->dev);
+
+	dev_dbg(&pdev->dev, "stop\n");
+	mei_stop(dev);
+
+	mei_me_unset_pm_domain(dev);
+
+	mei_disable_interrupts(dev);
+
+	free_irq(pdev->irq, dev);
+
+	mei_deregister(dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int mei_me_pci_suspend(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev = pci_get_drvdata(pdev);
+
+	if (!dev)
+		return -ENODEV;
+
+	dev_dbg(&pdev->dev, "suspend\n");
+
+	mei_stop(dev);
+
+	mei_disable_interrupts(dev);
+
+	free_irq(pdev->irq, dev);
+	pci_disable_msi(pdev);
+
+	return 0;
+}
+
+static int mei_me_pci_resume(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev;
+	unsigned int irqflags;
+	int err;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	pci_enable_msi(pdev);
+
+	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED;
+
+	/* request and enable interrupt */
+	err = request_threaded_irq(pdev->irq,
+			mei_me_irq_quick_handler,
+			mei_me_irq_thread_handler,
+			irqflags, KBUILD_MODNAME, dev);
+
+	if (err) {
+		dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n",
+				pdev->irq);
+		return err;
+	}
+
+	err = mei_restart(dev);
+	if (err)
+		return err;
+
+	/* Start timer if stopped in suspend */
+	schedule_delayed_work(&dev->timer_work, HZ);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int mei_me_pm_runtime_idle(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev;
+
+	dev_dbg(&pdev->dev, "rpm: me: runtime_idle\n");
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return -ENODEV;
+	if (mei_write_is_idle(dev))
+		pm_runtime_autosuspend(device);
+
+	return -EBUSY;
+}
+
+static int mei_me_pm_runtime_suspend(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev;
+	int ret;
+
+	dev_dbg(&pdev->dev, "rpm: me: runtime suspend\n");
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	if (mei_write_is_idle(dev))
+		ret = mei_me_pg_enter_sync(dev);
+	else
+		ret = -EAGAIN;
+
+	mutex_unlock(&dev->device_lock);
+
+	dev_dbg(&pdev->dev, "rpm: me: runtime suspend ret=%d\n", ret);
+
+	if (ret && ret != -EAGAIN)
+		schedule_work(&dev->reset_work);
+
+	return ret;
+}
+
+static int mei_me_pm_runtime_resume(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev;
+	int ret;
+
+	dev_dbg(&pdev->dev, "rpm: me: runtime resume\n");
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	ret = mei_me_pg_exit_sync(dev);
+
+	mutex_unlock(&dev->device_lock);
+
+	dev_dbg(&pdev->dev, "rpm: me: runtime resume ret = %d\n", ret);
+
+	if (ret)
+		schedule_work(&dev->reset_work);
+
+	return ret;
+}
+
+/**
+ * mei_me_set_pm_domain - fill and set pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_me_set_pm_domain(struct mei_device *dev)
+{
+	struct pci_dev *pdev  = to_pci_dev(dev->dev);
+
+	if (pdev->dev.bus && pdev->dev.bus->pm) {
+		dev->pg_domain.ops = *pdev->dev.bus->pm;
+
+		dev->pg_domain.ops.runtime_suspend = mei_me_pm_runtime_suspend;
+		dev->pg_domain.ops.runtime_resume = mei_me_pm_runtime_resume;
+		dev->pg_domain.ops.runtime_idle = mei_me_pm_runtime_idle;
+
+		dev_pm_domain_set(&pdev->dev, &dev->pg_domain);
+	}
+}
+
+/**
+ * mei_me_unset_pm_domain - clean pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_me_unset_pm_domain(struct mei_device *dev)
+{
+	/* stop using pm callbacks if any */
+	dev_pm_domain_set(dev->dev, NULL);
+}
+
+static const struct dev_pm_ops mei_me_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mei_me_pci_suspend,
+				mei_me_pci_resume)
+	SET_RUNTIME_PM_OPS(
+		mei_me_pm_runtime_suspend,
+		mei_me_pm_runtime_resume,
+		mei_me_pm_runtime_idle)
+};
+
+#define MEI_ME_PM_OPS	(&mei_me_pm_ops)
+#else
+#define MEI_ME_PM_OPS	NULL
+#endif /* CONFIG_PM */
+/*
+ *  PCI driver structure
+ */
+static struct pci_driver mei_me_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = mei_me_pci_tbl,
+	.probe = mei_me_probe,
+	.remove = mei_me_remove,
+	.shutdown = mei_me_shutdown,
+	.driver.pm = MEI_ME_PM_OPS,
+	.driver.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+};
+
+module_pci_driver(mei_me_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Management Engine Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c
new file mode 100644
index 000000000..e1b909123
--- /dev/null
+++ b/drivers/misc/mei/pci-txe.c
@@ -0,0 +1,422 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/uuid.h>
+#include <linux/jiffies.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+
+#include "mei_dev.h"
+#include "hw-txe.h"
+
+static const struct pci_device_id mei_txe_pci_tbl[] = {
+	{PCI_VDEVICE(INTEL, 0x0F18)}, /* Baytrail */
+	{PCI_VDEVICE(INTEL, 0x2298)}, /* Cherrytrail */
+
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, mei_txe_pci_tbl);
+
+#ifdef CONFIG_PM
+static inline void mei_txe_set_pm_domain(struct mei_device *dev);
+static inline void mei_txe_unset_pm_domain(struct mei_device *dev);
+#else
+static inline void mei_txe_set_pm_domain(struct mei_device *dev) {}
+static inline void mei_txe_unset_pm_domain(struct mei_device *dev) {}
+#endif /* CONFIG_PM */
+
+/**
+ * mei_txe_probe - Device Initialization Routine
+ *
+ * @pdev: PCI device structure
+ * @ent: entry in mei_txe_pci_tbl
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct mei_device *dev;
+	struct mei_txe_hw *hw;
+	const int mask = BIT(SEC_BAR) | BIT(BRIDGE_BAR);
+	int err;
+
+	/* enable pci dev */
+	err = pcim_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable pci device.\n");
+		goto end;
+	}
+	/* set PCI host mastering  */
+	pci_set_master(pdev);
+	/* pci request regions and mapping IO device memory for mei driver */
+	err = pcim_iomap_regions(pdev, mask, KBUILD_MODNAME);
+	if (err) {
+		dev_err(&pdev->dev, "failed to get pci regions.\n");
+		goto end;
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(36));
+	if (err) {
+		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev, "No suitable DMA available.\n");
+			goto end;
+		}
+	}
+
+	/* allocates and initializes the mei dev structure */
+	dev = mei_txe_dev_init(pdev);
+	if (!dev) {
+		err = -ENOMEM;
+		goto end;
+	}
+	hw = to_txe_hw(dev);
+	hw->mem_addr = pcim_iomap_table(pdev);
+
+	pci_enable_msi(pdev);
+
+	/* clear spurious interrupts */
+	mei_clear_interrupts(dev);
+
+	/* request and enable interrupt  */
+	if (pci_dev_msi_enabled(pdev))
+		err = request_threaded_irq(pdev->irq,
+			NULL,
+			mei_txe_irq_thread_handler,
+			IRQF_ONESHOT, KBUILD_MODNAME, dev);
+	else
+		err = request_threaded_irq(pdev->irq,
+			mei_txe_irq_quick_handler,
+			mei_txe_irq_thread_handler,
+			IRQF_SHARED, KBUILD_MODNAME, dev);
+	if (err) {
+		dev_err(&pdev->dev, "mei: request_threaded_irq failure. irq = %d\n",
+			pdev->irq);
+		goto end;
+	}
+
+	if (mei_start(dev)) {
+		dev_err(&pdev->dev, "init hw failure.\n");
+		err = -ENODEV;
+		goto release_irq;
+	}
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_TXI_RPM_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+
+	err = mei_register(dev, &pdev->dev);
+	if (err)
+		goto stop;
+
+	pci_set_drvdata(pdev, dev);
+
+	/*
+	 * MEI requires to resume from runtime suspend mode
+	 * in order to perform link reset flow upon system suspend.
+	 */
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+
+	/*
+	 * TXE maps runtime suspend/resume to own power gating states,
+	 * hence we need to go around native PCI runtime service which
+	 * eventually brings the device into D3cold/hot state.
+	 * But the TXE device cannot wake up from D3 unlike from own
+	 * power gating. To get around PCI device native runtime pm,
+	 * TXE uses runtime pm domain handlers which take precedence.
+	 */
+	mei_txe_set_pm_domain(dev);
+
+	pm_runtime_put_noidle(&pdev->dev);
+
+	return 0;
+
+stop:
+	mei_stop(dev);
+release_irq:
+	mei_cancel_work(dev);
+	mei_disable_interrupts(dev);
+	free_irq(pdev->irq, dev);
+end:
+	dev_err(&pdev->dev, "initialization failed.\n");
+	return err;
+}
+
+/**
+ * mei_txe_remove - Device Shutdown Routine
+ *
+ * @pdev: PCI device structure
+ *
+ *  mei_txe_shutdown is called from the reboot notifier
+ *  it's a simplified version of remove so we go down
+ *  faster.
+ */
+static void mei_txe_shutdown(struct pci_dev *pdev)
+{
+	struct mei_device *dev;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return;
+
+	dev_dbg(&pdev->dev, "shutdown\n");
+	mei_stop(dev);
+
+	mei_txe_unset_pm_domain(dev);
+
+	mei_disable_interrupts(dev);
+	free_irq(pdev->irq, dev);
+}
+
+/**
+ * mei_txe_remove - Device Removal Routine
+ *
+ * @pdev: PCI device structure
+ *
+ * mei_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+static void mei_txe_remove(struct pci_dev *pdev)
+{
+	struct mei_device *dev;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev) {
+		dev_err(&pdev->dev, "mei: dev == NULL\n");
+		return;
+	}
+
+	pm_runtime_get_noresume(&pdev->dev);
+
+	mei_stop(dev);
+
+	mei_txe_unset_pm_domain(dev);
+
+	mei_disable_interrupts(dev);
+	free_irq(pdev->irq, dev);
+
+	mei_deregister(dev);
+}
+
+
+#ifdef CONFIG_PM_SLEEP
+static int mei_txe_pci_suspend(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev = pci_get_drvdata(pdev);
+
+	if (!dev)
+		return -ENODEV;
+
+	dev_dbg(&pdev->dev, "suspend\n");
+
+	mei_stop(dev);
+
+	mei_disable_interrupts(dev);
+
+	free_irq(pdev->irq, dev);
+	pci_disable_msi(pdev);
+
+	return 0;
+}
+
+static int mei_txe_pci_resume(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev;
+	int err;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	pci_enable_msi(pdev);
+
+	mei_clear_interrupts(dev);
+
+	/* request and enable interrupt */
+	if (pci_dev_msi_enabled(pdev))
+		err = request_threaded_irq(pdev->irq,
+			NULL,
+			mei_txe_irq_thread_handler,
+			IRQF_ONESHOT, KBUILD_MODNAME, dev);
+	else
+		err = request_threaded_irq(pdev->irq,
+			mei_txe_irq_quick_handler,
+			mei_txe_irq_thread_handler,
+			IRQF_SHARED, KBUILD_MODNAME, dev);
+	if (err) {
+		dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n",
+				pdev->irq);
+		return err;
+	}
+
+	err = mei_restart(dev);
+
+	return err;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int mei_txe_pm_runtime_idle(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev;
+
+	dev_dbg(&pdev->dev, "rpm: txe: runtime_idle\n");
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return -ENODEV;
+	if (mei_write_is_idle(dev))
+		pm_runtime_autosuspend(device);
+
+	return -EBUSY;
+}
+static int mei_txe_pm_runtime_suspend(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev;
+	int ret;
+
+	dev_dbg(&pdev->dev, "rpm: txe: runtime suspend\n");
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	if (mei_write_is_idle(dev))
+		ret = mei_txe_aliveness_set_sync(dev, 0);
+	else
+		ret = -EAGAIN;
+
+	/* keep irq on we are staying in D0 */
+
+	dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret);
+
+	mutex_unlock(&dev->device_lock);
+
+	if (ret && ret != -EAGAIN)
+		schedule_work(&dev->reset_work);
+
+	return ret;
+}
+
+static int mei_txe_pm_runtime_resume(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev;
+	int ret;
+
+	dev_dbg(&pdev->dev, "rpm: txe: runtime resume\n");
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	mei_enable_interrupts(dev);
+
+	ret = mei_txe_aliveness_set_sync(dev, 1);
+
+	mutex_unlock(&dev->device_lock);
+
+	dev_dbg(&pdev->dev, "rpm: txe: runtime resume ret = %d\n", ret);
+
+	if (ret)
+		schedule_work(&dev->reset_work);
+
+	return ret;
+}
+
+/**
+ * mei_txe_set_pm_domain - fill and set pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_txe_set_pm_domain(struct mei_device *dev)
+{
+	struct pci_dev *pdev  = to_pci_dev(dev->dev);
+
+	if (pdev->dev.bus && pdev->dev.bus->pm) {
+		dev->pg_domain.ops = *pdev->dev.bus->pm;
+
+		dev->pg_domain.ops.runtime_suspend = mei_txe_pm_runtime_suspend;
+		dev->pg_domain.ops.runtime_resume = mei_txe_pm_runtime_resume;
+		dev->pg_domain.ops.runtime_idle = mei_txe_pm_runtime_idle;
+
+		dev_pm_domain_set(&pdev->dev, &dev->pg_domain);
+	}
+}
+
+/**
+ * mei_txe_unset_pm_domain - clean pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_txe_unset_pm_domain(struct mei_device *dev)
+{
+	/* stop using pm callbacks if any */
+	dev_pm_domain_set(dev->dev, NULL);
+}
+
+static const struct dev_pm_ops mei_txe_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mei_txe_pci_suspend,
+				mei_txe_pci_resume)
+	SET_RUNTIME_PM_OPS(
+		mei_txe_pm_runtime_suspend,
+		mei_txe_pm_runtime_resume,
+		mei_txe_pm_runtime_idle)
+};
+
+#define MEI_TXE_PM_OPS	(&mei_txe_pm_ops)
+#else
+#define MEI_TXE_PM_OPS	NULL
+#endif /* CONFIG_PM */
+
+/*
+ *  PCI driver structure
+ */
+static struct pci_driver mei_txe_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = mei_txe_pci_tbl,
+	.probe = mei_txe_probe,
+	.remove = mei_txe_remove,
+	.shutdown = mei_txe_shutdown,
+	.driver.pm = MEI_TXE_PM_OPS,
+};
+
+module_pci_driver(mei_txe_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Trusted Execution Environment Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
new file mode 100644
index 000000000..227cc7443
--- /dev/null
+++ b/drivers/misc/mic/Kconfig
@@ -0,0 +1,156 @@
+menu "Intel MIC & related support"
+
+comment "Intel MIC Bus Driver"
+
+config INTEL_MIC_BUS
+	tristate "Intel MIC Bus Driver"
+	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	help
+	  This option is selected by any driver which registers a
+	  device or driver on the MIC Bus, such as CONFIG_INTEL_MIC_HOST,
+	  CONFIG_INTEL_MIC_CARD, CONFIG_INTEL_MIC_X100_DMA etc.
+
+	  If you are building a host/card kernel with an Intel MIC device
+	  then say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "SCIF Bus Driver"
+
+config SCIF_BUS
+	tristate "SCIF Bus Driver"
+	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	help
+	  This option is selected by any driver which registers a
+	  device or driver on the SCIF Bus, such as CONFIG_INTEL_MIC_HOST
+	  and CONFIG_INTEL_MIC_CARD.
+
+	  If you are building a host/card kernel with an Intel MIC device
+	  then say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "VOP Bus Driver"
+
+config VOP_BUS
+	tristate "VOP Bus Driver"
+	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	help
+	  This option is selected by any driver which registers a
+	  device or driver on the VOP Bus, such as CONFIG_INTEL_MIC_HOST
+	  and CONFIG_INTEL_MIC_CARD.
+
+	  If you are building a host/card kernel with an Intel MIC device
+	  then say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Host Driver"
+
+config INTEL_MIC_HOST
+	tristate "Intel MIC Host Driver"
+	depends on 64BIT && PCI && X86
+	depends on INTEL_MIC_BUS && SCIF_BUS && MIC_COSM && VOP_BUS
+	help
+	  This enables Host Driver support for the Intel Many Integrated
+	  Core (MIC) family of PCIe form factor coprocessor devices that
+	  run a 64 bit Linux OS. The driver manages card OS state and
+	  enables communication between host and card. Intel MIC X100
+	  devices are currently supported.
+
+	  If you are building a host kernel with an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Card Driver"
+
+config INTEL_MIC_CARD
+	tristate "Intel MIC Card Driver"
+	depends on 64BIT && X86
+	depends on INTEL_MIC_BUS && SCIF_BUS && MIC_COSM && VOP_BUS
+	select VIRTIO
+	help
+	  This enables card driver support for the Intel Many Integrated
+	  Core (MIC) device family. The card driver communicates shutdown/
+	  crash events to the host and allows registration/configuration of
+	  virtio devices. Intel MIC X100 devices are currently supported.
+
+	  If you are building a card kernel for an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  For more information see
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "SCIF Driver"
+
+config SCIF
+	tristate "SCIF Driver"
+	depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT
+	select IOMMU_IOVA
+	help
+	  This enables SCIF Driver support for the Intel Many Integrated
+	  Core (MIC) family of PCIe form factor coprocessor devices that
+	  run a 64 bit Linux OS. The Symmetric Communication Interface
+	  (SCIF (pronounced as skiff)) is a low level communications API
+	  across PCIe currently implemented for MIC.
+
+	  If you are building a host kernel with an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Coprocessor State Management (COSM) Drivers"
+
+config MIC_COSM
+	tristate "Intel MIC Coprocessor State Management (COSM) Drivers"
+	depends on 64BIT && PCI && X86 && SCIF
+	help
+	  This enables COSM driver support for the Intel Many
+	  Integrated Core (MIC) family of PCIe form factor coprocessor
+	  devices. COSM drivers implement functions such as boot,
+	  shutdown, reset and reboot of MIC devices.
+
+	  If you are building a host kernel with an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "VOP Driver"
+
+config VOP
+	tristate "VOP Driver"
+	depends on 64BIT && PCI && X86 && VOP_BUS
+	select VHOST_RING
+	select VIRTIO
+	help
+	  This enables VOP (Virtio over PCIe) Driver support for the Intel
+	  Many Integrated Core (MIC) family of PCIe form factor coprocessor
+	  devices. The VOP driver allows virtio drivers, e.g. net, console
+	  and block drivers, on the card connect to user space virtio
+	  devices on the host.
+
+	  If you are building a host kernel with an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+if VOP
+source "drivers/vhost/Kconfig.vringh"
+endif
+
+endmenu
diff --git a/drivers/misc/mic/Makefile b/drivers/misc/mic/Makefile
new file mode 100644
index 000000000..1a43622b1
--- /dev/null
+++ b/drivers/misc/mic/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2013, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MIC_HOST) += host/
+obj-$(CONFIG_INTEL_MIC_CARD) += card/
+obj-y += bus/
+obj-$(CONFIG_SCIF) += scif/
+obj-$(CONFIG_MIC_COSM) += cosm/
+obj-$(CONFIG_MIC_COSM) += cosm_client/
+obj-$(CONFIG_VOP) += vop/
diff --git a/drivers/misc/mic/bus/Makefile b/drivers/misc/mic/bus/Makefile
new file mode 100644
index 000000000..8758a7daa
--- /dev/null
+++ b/drivers/misc/mic/bus/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2014, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o
+obj-$(CONFIG_SCIF_BUS) += scif_bus.o
+obj-$(CONFIG_MIC_COSM) += cosm_bus.o
+obj-$(CONFIG_VOP_BUS) += vop_bus.o
diff --git a/drivers/misc/mic/bus/cosm_bus.c b/drivers/misc/mic/bus/cosm_bus.c
new file mode 100644
index 000000000..d31d6c6e6
--- /dev/null
+++ b/drivers/misc/mic/bus/cosm_bus.c
@@ -0,0 +1,141 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Bus Driver
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include "cosm_bus.h"
+
+/* Unique numbering for cosm devices. */
+static DEFINE_IDA(cosm_index_ida);
+
+static int cosm_dev_probe(struct device *d)
+{
+	struct cosm_device *dev = dev_to_cosm(d);
+	struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
+
+	return drv->probe(dev);
+}
+
+static int cosm_dev_remove(struct device *d)
+{
+	struct cosm_device *dev = dev_to_cosm(d);
+	struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type cosm_bus = {
+	.name  = "cosm_bus",
+	.probe = cosm_dev_probe,
+	.remove = cosm_dev_remove,
+};
+
+int cosm_register_driver(struct cosm_driver *driver)
+{
+	driver->driver.bus = &cosm_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(cosm_register_driver);
+
+void cosm_unregister_driver(struct cosm_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(cosm_unregister_driver);
+
+static inline void cosm_release_dev(struct device *d)
+{
+	struct cosm_device *cdev = dev_to_cosm(d);
+
+	kfree(cdev);
+}
+
+struct cosm_device *
+cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops)
+{
+	struct cosm_device *cdev;
+	int ret;
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		return ERR_PTR(-ENOMEM);
+
+	cdev->dev.parent = pdev;
+	cdev->dev.release = cosm_release_dev;
+	cdev->hw_ops = hw_ops;
+	dev_set_drvdata(&cdev->dev, cdev);
+	cdev->dev.bus = &cosm_bus;
+
+	/* Assign a unique device index and hence name */
+	ret = ida_simple_get(&cosm_index_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto free_cdev;
+
+	cdev->index = ret;
+	cdev->dev.id = ret;
+	dev_set_name(&cdev->dev, "cosm-dev%u", cdev->index);
+
+	ret = device_register(&cdev->dev);
+	if (ret)
+		goto ida_remove;
+	return cdev;
+ida_remove:
+	ida_simple_remove(&cosm_index_ida, cdev->index);
+free_cdev:
+	put_device(&cdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(cosm_register_device);
+
+void cosm_unregister_device(struct cosm_device *dev)
+{
+	int index = dev->index; /* save for after device release */
+
+	device_unregister(&dev->dev);
+	ida_simple_remove(&cosm_index_ida, index);
+}
+EXPORT_SYMBOL_GPL(cosm_unregister_device);
+
+struct cosm_device *cosm_find_cdev_by_id(int id)
+{
+	struct device *dev = subsys_find_device_by_id(&cosm_bus, id, NULL);
+
+	return dev ? container_of(dev, struct cosm_device, dev) : NULL;
+}
+EXPORT_SYMBOL_GPL(cosm_find_cdev_by_id);
+
+static int __init cosm_init(void)
+{
+	return bus_register(&cosm_bus);
+}
+
+static void __exit cosm_exit(void)
+{
+	bus_unregister(&cosm_bus);
+	ida_destroy(&cosm_index_ida);
+}
+
+core_initcall(cosm_init);
+module_exit(cosm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC card OS state management bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/cosm_bus.h b/drivers/misc/mic/bus/cosm_bus.h
new file mode 100644
index 000000000..8b6341855
--- /dev/null
+++ b/drivers/misc/mic/bus/cosm_bus.h
@@ -0,0 +1,136 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Bus Driver
+ */
+#ifndef _COSM_BUS_H_
+#define _COSM_BUS_H_
+
+#include <linux/scif.h>
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+/**
+ * cosm_device - representation of a cosm device
+ *
+ * @attr_group: Pointer to list of sysfs attribute groups.
+ * @sdev: Device for sysfs entries.
+ * @state: MIC state.
+ * @prev_state: MIC state previous to MIC_RESETTING
+ * @shutdown_status: MIC status reported by card for shutdown/crashes.
+ * @shutdown_status_int: Internal shutdown status maintained by the driver
+ * @cosm_mutex: Mutex for synchronizing access to data structures.
+ * @reset_trigger_work: Work for triggering reset requests.
+ * @scif_work: Work for handling per device SCIF connections
+ * @cmdline: Kernel command line.
+ * @firmware: Firmware file name.
+ * @ramdisk: Ramdisk file name.
+ * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates.
+ * @log_buf_addr: Log buffer address for MIC.
+ * @log_buf_len: Log buffer length address for MIC.
+ * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes.
+ * @hw_ops: the hardware bus ops for this device.
+ * @dev: underlying device.
+ * @index: unique position on the cosm bus
+ * @dbg_dir: debug fs directory
+ * @newepd: new endpoint from scif accept to be assigned to this cdev
+ * @epd: SCIF endpoint for this cdev
+ * @heartbeat_watchdog_enable: if heartbeat watchdog is enabled for this cdev
+ * @sysfs_heartbeat_enable: sysfs setting for disabling heartbeat notification
+ */
+struct cosm_device {
+	const struct attribute_group **attr_group;
+	struct device *sdev;
+	u8 state;
+	u8 prev_state;
+	u8 shutdown_status;
+	u8 shutdown_status_int;
+	struct mutex cosm_mutex;
+	struct work_struct reset_trigger_work;
+	struct work_struct scif_work;
+	char *cmdline;
+	char *firmware;
+	char *ramdisk;
+	char *bootmode;
+	void *log_buf_addr;
+	int *log_buf_len;
+	struct kernfs_node *state_sysfs;
+	struct cosm_hw_ops *hw_ops;
+	struct device dev;
+	int index;
+	struct dentry *dbg_dir;
+	scif_epd_t newepd;
+	scif_epd_t epd;
+	bool heartbeat_watchdog_enable;
+	bool sysfs_heartbeat_enable;
+};
+
+/**
+ * cosm_driver - operations for a cosm driver
+ *
+ * @driver: underlying device driver (populate name and owner).
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct cosm_driver {
+	struct device_driver driver;
+	int (*probe)(struct cosm_device *dev);
+	void (*remove)(struct cosm_device *dev);
+};
+
+/**
+ * cosm_hw_ops - cosm bus ops
+ *
+ * @reset: trigger MIC reset
+ * @force_reset: force MIC reset
+ * @post_reset: inform MIC reset is complete
+ * @ready: is MIC ready for OS download
+ * @start: boot MIC
+ * @stop: prepare MIC for reset
+ * @family: return MIC HW family string
+ * @stepping: return MIC HW stepping string
+ * @aper: return MIC PCIe aperture
+ */
+struct cosm_hw_ops {
+	void (*reset)(struct cosm_device *cdev);
+	void (*force_reset)(struct cosm_device *cdev);
+	void (*post_reset)(struct cosm_device *cdev, enum mic_states state);
+	bool (*ready)(struct cosm_device *cdev);
+	int (*start)(struct cosm_device *cdev, int id);
+	void (*stop)(struct cosm_device *cdev, bool force);
+	ssize_t (*family)(struct cosm_device *cdev, char *buf);
+	ssize_t (*stepping)(struct cosm_device *cdev, char *buf);
+	struct mic_mw *(*aper)(struct cosm_device *cdev);
+};
+
+struct cosm_device *
+cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops);
+void cosm_unregister_device(struct cosm_device *dev);
+int cosm_register_driver(struct cosm_driver *drv);
+void cosm_unregister_driver(struct cosm_driver *drv);
+struct cosm_device *cosm_find_cdev_by_id(int id);
+
+static inline struct cosm_device *dev_to_cosm(struct device *dev)
+{
+	return container_of(dev, struct cosm_device, dev);
+}
+
+static inline struct cosm_driver *drv_to_cosm(struct device_driver *drv)
+{
+	return container_of(drv, struct cosm_driver, driver);
+}
+#endif /* _COSM_BUS_H */
diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c
new file mode 100644
index 000000000..77b16ca66
--- /dev/null
+++ b/drivers/misc/mic/bus/mic_bus.c
@@ -0,0 +1,204 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Bus driver.
+ *
+ * This implementation is very similar to the the virtio bus driver
+ * implementation @ drivers/virtio/virtio.c
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/mic_bus.h>
+
+static ssize_t device_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	return sprintf(buf, "0x%04x\n", dev->id.device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t vendor_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	return sprintf(buf, "0x%04x\n", dev->id.vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t modalias_show(struct device *d,
+			     struct device_attribute *attr, char *buf)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	return sprintf(buf, "mbus:d%08Xv%08X\n",
+		       dev->id.device, dev->id.vendor);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *mbus_dev_attrs[] = {
+	&dev_attr_device.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(mbus_dev);
+
+static inline int mbus_id_match(const struct mbus_device *dev,
+				const struct mbus_device_id *id)
+{
+	if (id->device != dev->id.device && id->device != MBUS_DEV_ANY_ID)
+		return 0;
+
+	return id->vendor == MBUS_DEV_ANY_ID || id->vendor == dev->id.vendor;
+}
+
+/*
+ * This looks through all the IDs a driver claims to support.  If any of them
+ * match, we return 1 and the kernel will call mbus_dev_probe().
+ */
+static int mbus_dev_match(struct device *dv, struct device_driver *dr)
+{
+	unsigned int i;
+	struct mbus_device *dev = dev_to_mbus(dv);
+	const struct mbus_device_id *ids;
+
+	ids = drv_to_mbus(dr)->id_table;
+	for (i = 0; ids[i].device; i++)
+		if (mbus_id_match(dev, &ids[i]))
+			return 1;
+	return 0;
+}
+
+static int mbus_uevent(struct device *dv, struct kobj_uevent_env *env)
+{
+	struct mbus_device *dev = dev_to_mbus(dv);
+
+	return add_uevent_var(env, "MODALIAS=mbus:d%08Xv%08X",
+			      dev->id.device, dev->id.vendor);
+}
+
+static int mbus_dev_probe(struct device *d)
+{
+	int err;
+	struct mbus_device *dev = dev_to_mbus(d);
+	struct mbus_driver *drv = drv_to_mbus(dev->dev.driver);
+
+	err = drv->probe(dev);
+	if (!err)
+		if (drv->scan)
+			drv->scan(dev);
+	return err;
+}
+
+static int mbus_dev_remove(struct device *d)
+{
+	struct mbus_device *dev = dev_to_mbus(d);
+	struct mbus_driver *drv = drv_to_mbus(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type mic_bus = {
+	.name  = "mic_bus",
+	.match = mbus_dev_match,
+	.dev_groups = mbus_dev_groups,
+	.uevent = mbus_uevent,
+	.probe = mbus_dev_probe,
+	.remove = mbus_dev_remove,
+};
+
+int mbus_register_driver(struct mbus_driver *driver)
+{
+	driver->driver.bus = &mic_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(mbus_register_driver);
+
+void mbus_unregister_driver(struct mbus_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(mbus_unregister_driver);
+
+static void mbus_release_dev(struct device *d)
+{
+	struct mbus_device *mbdev = dev_to_mbus(d);
+	kfree(mbdev);
+}
+
+struct mbus_device *
+mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
+		     struct mbus_hw_ops *hw_ops, int index,
+		     void __iomem *mmio_va)
+{
+	int ret;
+	struct mbus_device *mbdev;
+
+	mbdev = kzalloc(sizeof(*mbdev), GFP_KERNEL);
+	if (!mbdev)
+		return ERR_PTR(-ENOMEM);
+
+	mbdev->mmio_va = mmio_va;
+	mbdev->dev.parent = pdev;
+	mbdev->id.device = id;
+	mbdev->id.vendor = MBUS_DEV_ANY_ID;
+	mbdev->dev.dma_ops = dma_ops;
+	mbdev->dev.dma_mask = &mbdev->dev.coherent_dma_mask;
+	dma_set_mask(&mbdev->dev, DMA_BIT_MASK(64));
+	mbdev->dev.release = mbus_release_dev;
+	mbdev->hw_ops = hw_ops;
+	mbdev->dev.bus = &mic_bus;
+	mbdev->index = index;
+	dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index);
+	/*
+	 * device_register() causes the bus infrastructure to look for a
+	 * matching driver.
+	 */
+	ret = device_register(&mbdev->dev);
+	if (ret)
+		goto free_mbdev;
+	return mbdev;
+free_mbdev:
+	put_device(&mbdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(mbus_register_device);
+
+void mbus_unregister_device(struct mbus_device *mbdev)
+{
+	device_unregister(&mbdev->dev);
+}
+EXPORT_SYMBOL_GPL(mbus_unregister_device);
+
+static int __init mbus_init(void)
+{
+	return bus_register(&mic_bus);
+}
+
+static void __exit mbus_exit(void)
+{
+	bus_unregister(&mic_bus);
+}
+
+core_initcall(mbus_init);
+module_exit(mbus_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC Bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c
new file mode 100644
index 000000000..a444db5f6
--- /dev/null
+++ b/drivers/misc/mic/bus/scif_bus.c
@@ -0,0 +1,209 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel Symmetric Communications Interface Bus driver.
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/dma-mapping.h>
+
+#include "scif_bus.h"
+
+static ssize_t device_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+
+	return sprintf(buf, "0x%04x\n", dev->id.device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t vendor_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+
+	return sprintf(buf, "0x%04x\n", dev->id.vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t modalias_show(struct device *d,
+			     struct device_attribute *attr, char *buf)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+
+	return sprintf(buf, "scif:d%08Xv%08X\n",
+		       dev->id.device, dev->id.vendor);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *scif_dev_attrs[] = {
+	&dev_attr_device.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(scif_dev);
+
+static inline int scif_id_match(const struct scif_hw_dev *dev,
+				const struct scif_hw_dev_id *id)
+{
+	if (id->device != dev->id.device && id->device != SCIF_DEV_ANY_ID)
+		return 0;
+
+	return id->vendor == SCIF_DEV_ANY_ID || id->vendor == dev->id.vendor;
+}
+
+/*
+ * This looks through all the IDs a driver claims to support.  If any of them
+ * match, we return 1 and the kernel will call scif_dev_probe().
+ */
+static int scif_dev_match(struct device *dv, struct device_driver *dr)
+{
+	unsigned int i;
+	struct scif_hw_dev *dev = dev_to_scif(dv);
+	const struct scif_hw_dev_id *ids;
+
+	ids = drv_to_scif(dr)->id_table;
+	for (i = 0; ids[i].device; i++)
+		if (scif_id_match(dev, &ids[i]))
+			return 1;
+	return 0;
+}
+
+static int scif_uevent(struct device *dv, struct kobj_uevent_env *env)
+{
+	struct scif_hw_dev *dev = dev_to_scif(dv);
+
+	return add_uevent_var(env, "MODALIAS=scif:d%08Xv%08X",
+			      dev->id.device, dev->id.vendor);
+}
+
+static int scif_dev_probe(struct device *d)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+	struct scif_driver *drv = drv_to_scif(dev->dev.driver);
+
+	return drv->probe(dev);
+}
+
+static int scif_dev_remove(struct device *d)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+	struct scif_driver *drv = drv_to_scif(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type scif_bus = {
+	.name  = "scif_bus",
+	.match = scif_dev_match,
+	.dev_groups = scif_dev_groups,
+	.uevent = scif_uevent,
+	.probe = scif_dev_probe,
+	.remove = scif_dev_remove,
+};
+
+int scif_register_driver(struct scif_driver *driver)
+{
+	driver->driver.bus = &scif_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(scif_register_driver);
+
+void scif_unregister_driver(struct scif_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(scif_unregister_driver);
+
+static void scif_release_dev(struct device *d)
+{
+	struct scif_hw_dev *sdev = dev_to_scif(d);
+
+	kfree(sdev);
+}
+
+struct scif_hw_dev *
+scif_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
+		     struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
+		     struct mic_mw *mmio, struct mic_mw *aper, void *dp,
+		     void __iomem *rdp, struct dma_chan **chan, int num_chan,
+		     bool card_rel_da)
+{
+	int ret;
+	struct scif_hw_dev *sdev;
+
+	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+	if (!sdev)
+		return ERR_PTR(-ENOMEM);
+
+	sdev->dev.parent = pdev;
+	sdev->id.device = id;
+	sdev->id.vendor = SCIF_DEV_ANY_ID;
+	sdev->dev.dma_ops = dma_ops;
+	sdev->dev.release = scif_release_dev;
+	sdev->hw_ops = hw_ops;
+	sdev->dnode = dnode;
+	sdev->snode = snode;
+	dev_set_drvdata(&sdev->dev, sdev);
+	sdev->dev.bus = &scif_bus;
+	sdev->mmio = mmio;
+	sdev->aper = aper;
+	sdev->dp = dp;
+	sdev->rdp = rdp;
+	sdev->dev.dma_mask = &sdev->dev.coherent_dma_mask;
+	dma_set_mask(&sdev->dev, DMA_BIT_MASK(64));
+	sdev->dma_ch = chan;
+	sdev->num_dma_ch = num_chan;
+	sdev->card_rel_da = card_rel_da;
+	dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode);
+	/*
+	 * device_register() causes the bus infrastructure to look for a
+	 * matching driver.
+	 */
+	ret = device_register(&sdev->dev);
+	if (ret)
+		goto free_sdev;
+	return sdev;
+free_sdev:
+	put_device(&sdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(scif_register_device);
+
+void scif_unregister_device(struct scif_hw_dev *sdev)
+{
+	device_unregister(&sdev->dev);
+}
+EXPORT_SYMBOL_GPL(scif_unregister_device);
+
+static int __init scif_init(void)
+{
+	return bus_register(&scif_bus);
+}
+
+static void __exit scif_exit(void)
+{
+	bus_unregister(&scif_bus);
+}
+
+core_initcall(scif_init);
+module_exit(scif_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) SCIF Bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h
new file mode 100644
index 000000000..ff5956821
--- /dev/null
+++ b/drivers/misc/mic/bus/scif_bus.h
@@ -0,0 +1,133 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel Symmetric Communications Interface Bus driver.
+ */
+#ifndef _SCIF_BUS_H_
+#define _SCIF_BUS_H_
+/*
+ * Everything a scif driver needs to work with any particular scif
+ * hardware abstraction layer.
+ */
+#include <linux/dma-mapping.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+struct scif_hw_dev_id {
+	u32 device;
+	u32 vendor;
+};
+
+#define MIC_SCIF_DEV 1
+#define SCIF_DEV_ANY_ID 0xffffffff
+
+/**
+ * scif_hw_dev - representation of a hardware device abstracted for scif
+ * @hw_ops: the hardware ops supported by this device
+ * @id: the device type identification (used to match it with a driver)
+ * @mmio: MMIO memory window
+ * @aper: Aperture memory window
+ * @dev: underlying device
+ * @dnode - The destination node which this device will communicate with.
+ * @snode - The source node for this device.
+ * @dp - Self device page
+ * @rdp - Remote device page
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @card_rel_da - Set to true if DMA addresses programmed in the DMA engine
+ *		are relative to the card point of view
+ */
+struct scif_hw_dev {
+	struct scif_hw_ops *hw_ops;
+	struct scif_hw_dev_id id;
+	struct mic_mw *mmio;
+	struct mic_mw *aper;
+	struct device dev;
+	u8 dnode;
+	u8 snode;
+	void *dp;
+	void __iomem *rdp;
+	struct dma_chan **dma_ch;
+	int num_dma_ch;
+	bool card_rel_da;
+};
+
+/**
+ * scif_driver - operations for a scif I/O driver
+ * @driver: underlying device driver (populate name and owner).
+ * @id_table: the ids serviced by this driver.
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct scif_driver {
+	struct device_driver driver;
+	const struct scif_hw_dev_id *id_table;
+	int (*probe)(struct scif_hw_dev *dev);
+	void (*remove)(struct scif_hw_dev *dev);
+};
+
+/**
+ * scif_hw_ops - Hardware operations for accessing a SCIF device on the SCIF bus.
+ *
+ * @next_db: Obtain the next available doorbell.
+ * @request_irq: Request an interrupt on a particular doorbell.
+ * @free_irq: Free an interrupt requested previously.
+ * @ack_interrupt: acknowledge an interrupt in the ISR.
+ * @send_intr: Send an interrupt to the remote node on a specified doorbell.
+ * @send_p2p_intr: Send an interrupt to the peer node on a specified doorbell
+ * which is specifically targeted for a peer to peer node.
+ * @ioremap: Map a buffer with the specified physical address and length.
+ * @iounmap: Unmap a buffer previously mapped.
+ */
+struct scif_hw_ops {
+	int (*next_db)(struct scif_hw_dev *sdev);
+	struct mic_irq * (*request_irq)(struct scif_hw_dev *sdev,
+					irqreturn_t (*func)(int irq,
+							    void *data),
+					const char *name, void *data,
+					int db);
+	void (*free_irq)(struct scif_hw_dev *sdev,
+			 struct mic_irq *cookie, void *data);
+	void (*ack_interrupt)(struct scif_hw_dev *sdev, int num);
+	void (*send_intr)(struct scif_hw_dev *sdev, int db);
+	void (*send_p2p_intr)(struct scif_hw_dev *sdev, int db,
+			      struct mic_mw *mw);
+	void __iomem * (*ioremap)(struct scif_hw_dev *sdev,
+				  phys_addr_t pa, size_t len);
+	void (*iounmap)(struct scif_hw_dev *sdev, void __iomem *va);
+};
+
+int scif_register_driver(struct scif_driver *driver);
+void scif_unregister_driver(struct scif_driver *driver);
+struct scif_hw_dev *
+scif_register_device(struct device *pdev, int id,
+		     const struct dma_map_ops *dma_ops,
+		     struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
+		     struct mic_mw *mmio, struct mic_mw *aper,
+		     void *dp, void __iomem *rdp,
+		     struct dma_chan **chan, int num_chan,
+		     bool card_rel_da);
+void scif_unregister_device(struct scif_hw_dev *sdev);
+
+static inline struct scif_hw_dev *dev_to_scif(struct device *dev)
+{
+	return container_of(dev, struct scif_hw_dev, dev);
+}
+
+static inline struct scif_driver *drv_to_scif(struct device_driver *drv)
+{
+	return container_of(drv, struct scif_driver, driver);
+}
+#endif /* _SCIF_BUS_H */
diff --git a/drivers/misc/mic/bus/vop_bus.c b/drivers/misc/mic/bus/vop_bus.c
new file mode 100644
index 000000000..e5bb9c749
--- /dev/null
+++ b/drivers/misc/mic/bus/vop_bus.c
@@ -0,0 +1,205 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) Bus driver.
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/dma-mapping.h>
+
+#include "vop_bus.h"
+
+static ssize_t device_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct vop_device *dev = dev_to_vop(d);
+
+	return sprintf(buf, "0x%04x\n", dev->id.device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t vendor_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct vop_device *dev = dev_to_vop(d);
+
+	return sprintf(buf, "0x%04x\n", dev->id.vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t modalias_show(struct device *d,
+			     struct device_attribute *attr, char *buf)
+{
+	struct vop_device *dev = dev_to_vop(d);
+
+	return sprintf(buf, "vop:d%08Xv%08X\n",
+		       dev->id.device, dev->id.vendor);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *vop_dev_attrs[] = {
+	&dev_attr_device.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vop_dev);
+
+static inline int vop_id_match(const struct vop_device *dev,
+			       const struct vop_device_id *id)
+{
+	if (id->device != dev->id.device && id->device != VOP_DEV_ANY_ID)
+		return 0;
+
+	return id->vendor == VOP_DEV_ANY_ID || id->vendor == dev->id.vendor;
+}
+
+/*
+ * This looks through all the IDs a driver claims to support.  If any of them
+ * match, we return 1 and the kernel will call vop_dev_probe().
+ */
+static int vop_dev_match(struct device *dv, struct device_driver *dr)
+{
+	unsigned int i;
+	struct vop_device *dev = dev_to_vop(dv);
+	const struct vop_device_id *ids;
+
+	ids = drv_to_vop(dr)->id_table;
+	for (i = 0; ids[i].device; i++)
+		if (vop_id_match(dev, &ids[i]))
+			return 1;
+	return 0;
+}
+
+static int vop_uevent(struct device *dv, struct kobj_uevent_env *env)
+{
+	struct vop_device *dev = dev_to_vop(dv);
+
+	return add_uevent_var(env, "MODALIAS=vop:d%08Xv%08X",
+			      dev->id.device, dev->id.vendor);
+}
+
+static int vop_dev_probe(struct device *d)
+{
+	struct vop_device *dev = dev_to_vop(d);
+	struct vop_driver *drv = drv_to_vop(dev->dev.driver);
+
+	return drv->probe(dev);
+}
+
+static int vop_dev_remove(struct device *d)
+{
+	struct vop_device *dev = dev_to_vop(d);
+	struct vop_driver *drv = drv_to_vop(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type vop_bus = {
+	.name  = "vop_bus",
+	.match = vop_dev_match,
+	.dev_groups = vop_dev_groups,
+	.uevent = vop_uevent,
+	.probe = vop_dev_probe,
+	.remove = vop_dev_remove,
+};
+
+int vop_register_driver(struct vop_driver *driver)
+{
+	driver->driver.bus = &vop_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(vop_register_driver);
+
+void vop_unregister_driver(struct vop_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(vop_unregister_driver);
+
+static void vop_release_dev(struct device *d)
+{
+	struct vop_device *dev = dev_to_vop(d);
+
+	kfree(dev);
+}
+
+struct vop_device *
+vop_register_device(struct device *pdev, int id,
+		    const struct dma_map_ops *dma_ops,
+		    struct vop_hw_ops *hw_ops, u8 dnode, struct mic_mw *aper,
+		    struct dma_chan *chan)
+{
+	int ret;
+	struct vop_device *vdev;
+
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev)
+		return ERR_PTR(-ENOMEM);
+
+	vdev->dev.parent = pdev;
+	vdev->id.device = id;
+	vdev->id.vendor = VOP_DEV_ANY_ID;
+	vdev->dev.dma_ops = dma_ops;
+	vdev->dev.dma_mask = &vdev->dev.coherent_dma_mask;
+	dma_set_mask(&vdev->dev, DMA_BIT_MASK(64));
+	vdev->dev.release = vop_release_dev;
+	vdev->hw_ops = hw_ops;
+	vdev->dev.bus = &vop_bus;
+	vdev->dnode = dnode;
+	vdev->aper = aper;
+	vdev->dma_ch = chan;
+	vdev->index = dnode - 1;
+	dev_set_name(&vdev->dev, "vop-dev%u", vdev->index);
+	/*
+	 * device_register() causes the bus infrastructure to look for a
+	 * matching driver.
+	 */
+	ret = device_register(&vdev->dev);
+	if (ret)
+		goto free_vdev;
+	return vdev;
+free_vdev:
+	put_device(&vdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(vop_register_device);
+
+void vop_unregister_device(struct vop_device *dev)
+{
+	device_unregister(&dev->dev);
+}
+EXPORT_SYMBOL_GPL(vop_unregister_device);
+
+static int __init vop_init(void)
+{
+	return bus_register(&vop_bus);
+}
+
+static void __exit vop_exit(void)
+{
+	bus_unregister(&vop_bus);
+}
+
+core_initcall(vop_init);
+module_exit(vop_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) VOP Bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/vop_bus.h b/drivers/misc/mic/bus/vop_bus.h
new file mode 100644
index 000000000..fff7a865d
--- /dev/null
+++ b/drivers/misc/mic/bus/vop_bus.h
@@ -0,0 +1,140 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio over PCIe Bus driver.
+ */
+#ifndef _VOP_BUS_H_
+#define _VOP_BUS_H_
+/*
+ * Everything a vop driver needs to work with any particular vop
+ * implementation.
+ */
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#include "../common/mic_dev.h"
+
+struct vop_device_id {
+	u32 device;
+	u32 vendor;
+};
+
+#define VOP_DEV_TRNSP 1
+#define VOP_DEV_ANY_ID 0xffffffff
+/*
+ * Size of the internal buffer used during DMA's as an intermediate buffer
+ * for copy to/from user. Must be an integral number of pages.
+ */
+#define VOP_INT_DMA_BUF_SIZE PAGE_ALIGN(64 * 1024ULL)
+
+/**
+ * vop_device - representation of a device using vop
+ * @hw_ops: the hardware ops supported by this device.
+ * @id: the device type identification (used to match it with a driver).
+ * @dev: underlying device.
+ * @dnode - The destination node which this device will communicate with.
+ * @aper: Aperture memory window
+ * @dma_ch - DMA channel
+ * @index: unique position on the vop bus
+ */
+struct vop_device {
+	struct vop_hw_ops *hw_ops;
+	struct vop_device_id id;
+	struct device dev;
+	u8 dnode;
+	struct mic_mw *aper;
+	struct dma_chan *dma_ch;
+	int index;
+};
+
+/**
+ * vop_driver - operations for a vop I/O driver
+ * @driver: underlying device driver (populate name and owner).
+ * @id_table: the ids serviced by this driver.
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct vop_driver {
+	struct device_driver driver;
+	const struct vop_device_id *id_table;
+	int (*probe)(struct vop_device *dev);
+	void (*remove)(struct vop_device *dev);
+};
+
+/**
+ * vop_hw_ops - Hardware operations for accessing a VOP device on the VOP bus.
+ *
+ * @next_db: Obtain the next available doorbell.
+ * @request_irq: Request an interrupt on a particular doorbell.
+ * @free_irq: Free an interrupt requested previously.
+ * @ack_interrupt: acknowledge an interrupt in the ISR.
+ * @get_remote_dp: Get access to the virtio device page used by the remote
+ *                 node to add/remove/configure virtio devices.
+ * @get_dp: Get access to the virtio device page used by the self
+ *          node to add/remove/configure virtio devices.
+ * @send_intr: Send an interrupt to the peer node on a specified doorbell.
+ * @ioremap: Map a buffer with the specified DMA address and length.
+ * @iounmap: Unmap a buffer previously mapped.
+ * @dma_filter: The DMA filter function to use for obtaining access to
+ *		a DMA channel on the peer node.
+ */
+struct vop_hw_ops {
+	int (*next_db)(struct vop_device *vpdev);
+	struct mic_irq *(*request_irq)(struct vop_device *vpdev,
+				       irqreturn_t (*func)(int irq, void *data),
+				       const char *name, void *data,
+				       int intr_src);
+	void (*free_irq)(struct vop_device *vpdev,
+			 struct mic_irq *cookie, void *data);
+	void (*ack_interrupt)(struct vop_device *vpdev, int num);
+	void __iomem * (*get_remote_dp)(struct vop_device *vpdev);
+	void * (*get_dp)(struct vop_device *vpdev);
+	void (*send_intr)(struct vop_device *vpdev, int db);
+	void __iomem * (*ioremap)(struct vop_device *vpdev,
+				  dma_addr_t pa, size_t len);
+	void (*iounmap)(struct vop_device *vpdev, void __iomem *va);
+};
+
+struct vop_device *
+vop_register_device(struct device *pdev, int id,
+		    const struct dma_map_ops *dma_ops,
+		    struct vop_hw_ops *hw_ops, u8 dnode, struct mic_mw *aper,
+		    struct dma_chan *chan);
+void vop_unregister_device(struct vop_device *dev);
+int vop_register_driver(struct vop_driver *drv);
+void vop_unregister_driver(struct vop_driver *drv);
+
+/*
+ * module_vop_driver() - Helper macro for drivers that don't do
+ * anything special in module init/exit.  This eliminates a lot of
+ * boilerplate.  Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit()
+ */
+#define module_vop_driver(__vop_driver) \
+	module_driver(__vop_driver, vop_register_driver, \
+			vop_unregister_driver)
+
+static inline struct vop_device *dev_to_vop(struct device *dev)
+{
+	return container_of(dev, struct vop_device, dev);
+}
+
+static inline struct vop_driver *drv_to_vop(struct device_driver *drv)
+{
+	return container_of(drv, struct vop_driver, driver);
+}
+#endif /* _VOP_BUS_H */
diff --git a/drivers/misc/mic/card/Makefile b/drivers/misc/mic/card/Makefile
new file mode 100644
index 000000000..921a7e7e0
--- /dev/null
+++ b/drivers/misc/mic/card/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2013, Intel Corporation.
+#
+ccflags-y += -DINTEL_MIC_CARD
+
+obj-$(CONFIG_INTEL_MIC_CARD) += mic_card.o
+mic_card-y += mic_x100.o
+mic_card-y += mic_device.o
+mic_card-y += mic_debugfs.o
diff --git a/drivers/misc/mic/card/mic_debugfs.c b/drivers/misc/mic/card/mic_debugfs.c
new file mode 100644
index 000000000..421b3d791
--- /dev/null
+++ b/drivers/misc/mic/card/mic_debugfs.c
@@ -0,0 +1,130 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+
+/* Debugfs parent dir */
+static struct dentry *mic_dbg;
+
+/**
+ * mic_intr_test - Send interrupts to host.
+ */
+static int mic_intr_test(struct seq_file *s, void *unused)
+{
+	struct mic_driver *mdrv = s->private;
+	struct mic_device *mdev = &mdrv->mdev;
+
+	mic_send_intr(mdev, 0);
+	msleep(1000);
+	mic_send_intr(mdev, 1);
+	msleep(1000);
+	mic_send_intr(mdev, 2);
+	msleep(1000);
+	mic_send_intr(mdev, 3);
+	msleep(1000);
+
+	return 0;
+}
+
+static int mic_intr_test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_intr_test, inode->i_private);
+}
+
+static int mic_intr_test_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations intr_test_ops = {
+	.owner   = THIS_MODULE,
+	.open    = mic_intr_test_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = mic_intr_test_release
+};
+
+/**
+ * mic_create_card_debug_dir - Initialize MIC debugfs entries.
+ */
+void __init mic_create_card_debug_dir(struct mic_driver *mdrv)
+{
+	struct dentry *d;
+
+	if (!mic_dbg)
+		return;
+
+	mdrv->dbg_dir = debugfs_create_dir(mdrv->name, mic_dbg);
+	if (!mdrv->dbg_dir) {
+		dev_err(mdrv->dev, "Cant create dbg_dir %s\n", mdrv->name);
+		return;
+	}
+
+	d = debugfs_create_file("intr_test", 0444, mdrv->dbg_dir,
+		mdrv, &intr_test_ops);
+
+	if (!d) {
+		dev_err(mdrv->dev,
+			"Cant create dbg intr_test %s\n", mdrv->name);
+		return;
+	}
+}
+
+/**
+ * mic_delete_card_debug_dir - Uninitialize MIC debugfs entries.
+ */
+void mic_delete_card_debug_dir(struct mic_driver *mdrv)
+{
+	if (!mdrv->dbg_dir)
+		return;
+
+	debugfs_remove_recursive(mdrv->dbg_dir);
+}
+
+/**
+ * mic_init_card_debugfs - Initialize global debugfs entry.
+ */
+void __init mic_init_card_debugfs(void)
+{
+	mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!mic_dbg)
+		pr_err("can't create debugfs dir\n");
+}
+
+/**
+ * mic_exit_card_debugfs - Uninitialize global debugfs entry
+ */
+void mic_exit_card_debugfs(void)
+{
+	debugfs_remove(mic_dbg);
+}
diff --git a/drivers/misc/mic/card/mic_device.c b/drivers/misc/mic/card/mic_device.c
new file mode 100644
index 000000000..e749af48f
--- /dev/null
+++ b/drivers/misc/mic/card/mic_device.c
@@ -0,0 +1,429 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/dmaengine.h>
+#include <linux/kmod.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+
+static struct mic_driver *g_drv;
+
+static int __init mic_dp_init(void)
+{
+	struct mic_driver *mdrv = g_drv;
+	struct mic_device *mdev = &mdrv->mdev;
+	struct mic_bootparam __iomem *bootparam;
+	u64 lo, hi, dp_dma_addr;
+	u32 magic;
+
+	lo = mic_read_spad(&mdrv->mdev, MIC_DPLO_SPAD);
+	hi = mic_read_spad(&mdrv->mdev, MIC_DPHI_SPAD);
+
+	dp_dma_addr = lo | (hi << 32);
+	mdrv->dp = mic_card_map(mdev, dp_dma_addr, MIC_DP_SIZE);
+	if (!mdrv->dp) {
+		dev_err(mdrv->dev, "Cannot remap Aperture BAR\n");
+		return -ENOMEM;
+	}
+	bootparam = mdrv->dp;
+	magic = ioread32(&bootparam->magic);
+	if (MIC_MAGIC != magic) {
+		dev_err(mdrv->dev, "bootparam magic mismatch 0x%x\n", magic);
+		return -EIO;
+	}
+	return 0;
+}
+
+/* Uninitialize the device page */
+static void mic_dp_uninit(void)
+{
+	mic_card_unmap(&g_drv->mdev, g_drv->dp);
+}
+
+/**
+ * mic_request_card_irq - request an irq.
+ *
+ * @handler: interrupt handler passed to request_threaded_irq.
+ * @thread_fn: thread fn. passed to request_threaded_irq.
+ * @name: The ASCII name of the callee requesting the irq.
+ * @data: private data that is returned back when calling the
+ * function handler.
+ * @index: The doorbell index of the requester.
+ *
+ * returns: The cookie that is transparent to the caller. Passed
+ * back when calling mic_free_irq. An appropriate error code
+ * is returned on failure. Caller needs to use IS_ERR(return_val)
+ * to check for failure and PTR_ERR(return_val) to obtained the
+ * error code.
+ *
+ */
+struct mic_irq *
+mic_request_card_irq(irq_handler_t handler,
+		     irq_handler_t thread_fn, const char *name,
+		     void *data, int index)
+{
+	int rc = 0;
+	unsigned long cookie;
+	struct mic_driver *mdrv = g_drv;
+
+	rc  = request_threaded_irq(mic_db_to_irq(mdrv, index), handler,
+				   thread_fn, 0, name, data);
+	if (rc) {
+		dev_err(mdrv->dev, "request_threaded_irq failed rc = %d\n", rc);
+		goto err;
+	}
+	mdrv->irq_info.irq_usage_count[index]++;
+	cookie = index;
+	return (struct mic_irq *)cookie;
+err:
+	return ERR_PTR(rc);
+}
+
+/**
+ * mic_free_card_irq - free irq.
+ *
+ * @cookie: cookie obtained during a successful call to mic_request_threaded_irq
+ * @data: private data specified by the calling function during the
+ * mic_request_threaded_irq
+ *
+ * returns: none.
+ */
+void mic_free_card_irq(struct mic_irq *cookie, void *data)
+{
+	int index;
+	struct mic_driver *mdrv = g_drv;
+
+	index = (unsigned long)cookie & 0xFFFFU;
+	free_irq(mic_db_to_irq(mdrv, index), data);
+	mdrv->irq_info.irq_usage_count[index]--;
+}
+
+/**
+ * mic_next_card_db - Get the doorbell with minimum usage count.
+ *
+ * Returns the irq index.
+ */
+int mic_next_card_db(void)
+{
+	int i;
+	int index = 0;
+	struct mic_driver *mdrv = g_drv;
+
+	for (i = 0; i < mdrv->intr_info.num_intr; i++) {
+		if (mdrv->irq_info.irq_usage_count[i] <
+			mdrv->irq_info.irq_usage_count[index])
+			index = i;
+	}
+
+	return index;
+}
+
+/**
+ * mic_init_irq - Initialize irq information.
+ *
+ * Returns 0 in success. Appropriate error code on failure.
+ */
+static int mic_init_irq(void)
+{
+	struct mic_driver *mdrv = g_drv;
+
+	mdrv->irq_info.irq_usage_count = kzalloc((sizeof(u32) *
+			mdrv->intr_info.num_intr),
+			GFP_KERNEL);
+	if (!mdrv->irq_info.irq_usage_count)
+		return -ENOMEM;
+	return 0;
+}
+
+/**
+ * mic_uninit_irq - Uninitialize irq information.
+ *
+ * None.
+ */
+static void mic_uninit_irq(void)
+{
+	struct mic_driver *mdrv = g_drv;
+
+	kfree(mdrv->irq_info.irq_usage_count);
+}
+
+static inline struct mic_driver *scdev_to_mdrv(struct scif_hw_dev *scdev)
+{
+	return dev_get_drvdata(scdev->dev.parent);
+}
+
+static struct mic_irq *
+___mic_request_irq(struct scif_hw_dev *scdev,
+		   irqreturn_t (*func)(int irq, void *data),
+				       const char *name, void *data,
+				       int db)
+{
+	return mic_request_card_irq(func, NULL, name, data, db);
+}
+
+static void
+___mic_free_irq(struct scif_hw_dev *scdev,
+		struct mic_irq *cookie, void *data)
+{
+	return mic_free_card_irq(cookie, data);
+}
+
+static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	mic_ack_interrupt(&mdrv->mdev);
+}
+
+static int ___mic_next_db(struct scif_hw_dev *scdev)
+{
+	return mic_next_card_db();
+}
+
+static void ___mic_send_intr(struct scif_hw_dev *scdev, int db)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	mic_send_intr(&mdrv->mdev, db);
+}
+
+static void ___mic_send_p2p_intr(struct scif_hw_dev *scdev, int db,
+				 struct mic_mw *mw)
+{
+	mic_send_p2p_intr(db, mw);
+}
+
+static void __iomem *
+___mic_ioremap(struct scif_hw_dev *scdev,
+	       phys_addr_t pa, size_t len)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	return mic_card_map(&mdrv->mdev, pa, len);
+}
+
+static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	mic_card_unmap(&mdrv->mdev, va);
+}
+
+static struct scif_hw_ops scif_hw_ops = {
+	.request_irq = ___mic_request_irq,
+	.free_irq = ___mic_free_irq,
+	.ack_interrupt = ___mic_ack_interrupt,
+	.next_db = ___mic_next_db,
+	.send_intr = ___mic_send_intr,
+	.send_p2p_intr = ___mic_send_p2p_intr,
+	.ioremap = ___mic_ioremap,
+	.iounmap = ___mic_iounmap,
+};
+
+static inline struct mic_driver *vpdev_to_mdrv(struct vop_device *vpdev)
+{
+	return dev_get_drvdata(vpdev->dev.parent);
+}
+
+static struct mic_irq *
+__mic_request_irq(struct vop_device *vpdev,
+		  irqreturn_t (*func)(int irq, void *data),
+		   const char *name, void *data, int intr_src)
+{
+	return mic_request_card_irq(func, NULL, name, data, intr_src);
+}
+
+static void __mic_free_irq(struct vop_device *vpdev,
+			   struct mic_irq *cookie, void *data)
+{
+	return mic_free_card_irq(cookie, data);
+}
+
+static void __mic_ack_interrupt(struct vop_device *vpdev, int num)
+{
+	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
+
+	mic_ack_interrupt(&mdrv->mdev);
+}
+
+static int __mic_next_db(struct vop_device *vpdev)
+{
+	return mic_next_card_db();
+}
+
+static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev)
+{
+	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
+
+	return mdrv->dp;
+}
+
+static void __mic_send_intr(struct vop_device *vpdev, int db)
+{
+	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
+
+	mic_send_intr(&mdrv->mdev, db);
+}
+
+static void __iomem *__mic_ioremap(struct vop_device *vpdev,
+				   dma_addr_t pa, size_t len)
+{
+	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
+
+	return mic_card_map(&mdrv->mdev, pa, len);
+}
+
+static void __mic_iounmap(struct vop_device *vpdev, void __iomem *va)
+{
+	struct mic_driver *mdrv = vpdev_to_mdrv(vpdev);
+
+	mic_card_unmap(&mdrv->mdev, va);
+}
+
+static struct vop_hw_ops vop_hw_ops = {
+	.request_irq = __mic_request_irq,
+	.free_irq = __mic_free_irq,
+	.ack_interrupt = __mic_ack_interrupt,
+	.next_db = __mic_next_db,
+	.get_remote_dp = __mic_get_remote_dp,
+	.send_intr = __mic_send_intr,
+	.ioremap = __mic_ioremap,
+	.iounmap = __mic_iounmap,
+};
+
+static int mic_request_dma_chans(struct mic_driver *mdrv)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+
+	do {
+		chan = dma_request_channel(mask, NULL, NULL);
+		if (chan) {
+			mdrv->dma_ch[mdrv->num_dma_ch++] = chan;
+			if (mdrv->num_dma_ch >= MIC_MAX_DMA_CHAN)
+				break;
+		}
+	} while (chan);
+	dev_info(mdrv->dev, "DMA channels # %d\n", mdrv->num_dma_ch);
+	return mdrv->num_dma_ch;
+}
+
+static void mic_free_dma_chans(struct mic_driver *mdrv)
+{
+	int i = 0;
+
+	for (i = 0; i < mdrv->num_dma_ch; i++) {
+		dma_release_channel(mdrv->dma_ch[i]);
+		mdrv->dma_ch[i] = NULL;
+	}
+	mdrv->num_dma_ch = 0;
+}
+
+/*
+ * mic_driver_init - MIC driver initialization tasks.
+ *
+ * Returns 0 in success. Appropriate error code on failure.
+ */
+int __init mic_driver_init(struct mic_driver *mdrv)
+{
+	int rc;
+	struct mic_bootparam __iomem *bootparam;
+	u8 node_id;
+
+	g_drv = mdrv;
+	/* Unloading the card module is not supported. */
+	if (!try_module_get(mdrv->dev->driver->owner)) {
+		rc = -ENODEV;
+		goto done;
+	}
+	rc = mic_dp_init();
+	if (rc)
+		goto put;
+	rc = mic_init_irq();
+	if (rc)
+		goto dp_uninit;
+	if (!mic_request_dma_chans(mdrv)) {
+		rc = -ENODEV;
+		goto irq_uninit;
+	}
+	mdrv->vpdev = vop_register_device(mdrv->dev, VOP_DEV_TRNSP,
+					  NULL, &vop_hw_ops, 0,
+					  NULL, mdrv->dma_ch[0]);
+	if (IS_ERR(mdrv->vpdev)) {
+		rc = PTR_ERR(mdrv->vpdev);
+		goto dma_free;
+	}
+	bootparam = mdrv->dp;
+	node_id = ioread8(&bootparam->node_id);
+	mdrv->scdev = scif_register_device(mdrv->dev, MIC_SCIF_DEV,
+					   NULL, &scif_hw_ops,
+					   0, node_id, &mdrv->mdev.mmio, NULL,
+					   NULL, mdrv->dp, mdrv->dma_ch,
+					   mdrv->num_dma_ch, true);
+	if (IS_ERR(mdrv->scdev)) {
+		rc = PTR_ERR(mdrv->scdev);
+		goto vop_remove;
+	}
+	mic_create_card_debug_dir(mdrv);
+done:
+	return rc;
+vop_remove:
+	vop_unregister_device(mdrv->vpdev);
+dma_free:
+	mic_free_dma_chans(mdrv);
+irq_uninit:
+	mic_uninit_irq();
+dp_uninit:
+	mic_dp_uninit();
+put:
+	module_put(mdrv->dev->driver->owner);
+	return rc;
+}
+
+/*
+ * mic_driver_uninit - MIC driver uninitialization tasks.
+ *
+ * Returns None
+ */
+void mic_driver_uninit(struct mic_driver *mdrv)
+{
+	mic_delete_card_debug_dir(mdrv);
+	scif_unregister_device(mdrv->scdev);
+	vop_unregister_device(mdrv->vpdev);
+	mic_free_dma_chans(mdrv);
+	mic_uninit_irq();
+	mic_dp_uninit();
+	module_put(mdrv->dev->driver->owner);
+}
diff --git a/drivers/misc/mic/card/mic_device.h b/drivers/misc/mic/card/mic_device.h
new file mode 100644
index 000000000..333dbed97
--- /dev/null
+++ b/drivers/misc/mic/card/mic_device.h
@@ -0,0 +1,149 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#ifndef _MIC_CARD_DEVICE_H_
+#define _MIC_CARD_DEVICE_H_
+
+#include <linux/workqueue.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
+#include "../bus/vop_bus.h"
+
+/**
+ * struct mic_intr_info - Contains h/w specific interrupt sources info
+ *
+ * @num_intr: The number of irqs available
+ */
+struct mic_intr_info {
+	u32 num_intr;
+};
+
+/**
+ * struct mic_irq_info - OS specific irq information
+ *
+ * @irq_usage_count: usage count array tracking the number of sources
+ * assigned for each irq.
+ */
+struct mic_irq_info {
+	int *irq_usage_count;
+};
+
+/**
+ * struct mic_device -  MIC device information.
+ *
+ * @mmio: MMIO bar information.
+ */
+struct mic_device {
+	struct mic_mw mmio;
+};
+
+/**
+ * struct mic_driver - MIC card driver information.
+ *
+ * @name: Name for MIC driver.
+ * @dbg_dir: debugfs directory of this MIC device.
+ * @dev: The device backing this MIC.
+ * @dp: The pointer to the virtio device page.
+ * @mdev: MIC device information for the host.
+ * @hotplug_work: Hot plug work for adding/removing virtio devices.
+ * @irq_info: The OS specific irq information
+ * @intr_info: H/W specific interrupt information.
+ * @dma_mbdev: dma device on the MIC virtual bus.
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @scdev: SCIF device on the SCIF virtual bus.
+ * @vpdev: Virtio over PCIe device on the VOP virtual bus.
+ */
+struct mic_driver {
+	char name[20];
+	struct dentry *dbg_dir;
+	struct device *dev;
+	void __iomem *dp;
+	struct mic_device mdev;
+	struct work_struct hotplug_work;
+	struct mic_irq_info irq_info;
+	struct mic_intr_info intr_info;
+	struct mbus_device *dma_mbdev;
+	struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
+	int num_dma_ch;
+	struct scif_hw_dev *scdev;
+	struct vop_device *vpdev;
+};
+
+/**
+ * struct mic_irq - opaque pointer used as cookie
+ */
+struct mic_irq;
+
+/**
+ * mic_mmio_read - read from an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @offset: register offset.
+ *
+ * RETURNS: register value.
+ */
+static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset)
+{
+	return ioread32(mw->va + offset);
+}
+
+/**
+ * mic_mmio_write - write to an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @val: the data value to put into the register
+ * @offset: register offset.
+ *
+ * RETURNS: none.
+ */
+static inline void
+mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
+{
+	iowrite32(val, mw->va + offset);
+}
+
+int mic_driver_init(struct mic_driver *mdrv);
+void mic_driver_uninit(struct mic_driver *mdrv);
+int mic_next_card_db(void);
+struct mic_irq *
+mic_request_card_irq(irq_handler_t handler, irq_handler_t thread_fn,
+		     const char *name, void *data, int db);
+void mic_free_card_irq(struct mic_irq *cookie, void *data);
+u32 mic_read_spad(struct mic_device *mdev, unsigned int idx);
+void mic_send_intr(struct mic_device *mdev, int doorbell);
+void mic_send_p2p_intr(int doorbell, struct mic_mw *mw);
+int mic_db_to_irq(struct mic_driver *mdrv, int db);
+u32 mic_ack_interrupt(struct mic_device *mdev);
+void mic_hw_intr_init(struct mic_driver *mdrv);
+void __iomem *
+mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size);
+void mic_card_unmap(struct mic_device *mdev, void __iomem *addr);
+void __init mic_create_card_debug_dir(struct mic_driver *mdrv);
+void mic_delete_card_debug_dir(struct mic_driver *mdrv);
+void __init mic_init_card_debugfs(void);
+void mic_exit_card_debugfs(void);
+#endif
diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c
new file mode 100644
index 000000000..4007adc66
--- /dev/null
+++ b/drivers/misc/mic/card/mic_x100.c
@@ -0,0 +1,359 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_x100.h"
+
+static const char mic_driver_name[] = "mic";
+
+static struct mic_driver g_drv;
+
+/**
+ * mic_read_spad - read from the scratchpad register
+ * @mdev: pointer to mic_device instance
+ * @idx: index to scratchpad register, 0 based
+ *
+ * This function allows reading of the 32bit scratchpad register.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+u32 mic_read_spad(struct mic_device *mdev, unsigned int idx)
+{
+	return mic_mmio_read(&mdev->mmio,
+		MIC_X100_SBOX_BASE_ADDRESS +
+		MIC_X100_SBOX_SPAD0 + idx * 4);
+}
+
+/**
+ * __mic_send_intr - Send interrupt to Host.
+ * @mdev: pointer to mic_device instance
+ * @doorbell: Doorbell number.
+ */
+void mic_send_intr(struct mic_device *mdev, int doorbell)
+{
+	struct mic_mw *mw = &mdev->mmio;
+
+	if (doorbell > MIC_X100_MAX_DOORBELL_IDX)
+		return;
+	/* Ensure that the interrupt is ordered w.r.t previous stores. */
+	wmb();
+	mic_mmio_write(mw, MIC_X100_SBOX_SDBIC0_DBREQ_BIT,
+		       MIC_X100_SBOX_BASE_ADDRESS +
+		       (MIC_X100_SBOX_SDBIC0 + (4 * doorbell)));
+}
+
+/*
+ * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC.
+ */
+static void mic_x100_send_sbox_intr(struct mic_mw *mw, int doorbell)
+{
+	u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
+	u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS +
+					apic_icr_offset);
+
+	/* for MIC we need to make sure we "hit" the send_icr bit (13) */
+	apicicr_low = (apicicr_low | (1 << 13));
+	/*
+	 * Ensure that the interrupt is ordered w.r.t. previous stores
+	 * to main memory. Fence instructions are not implemented in X100
+	 * since execution is in order but a compiler barrier is still
+	 * required.
+	 */
+	wmb();
+	mic_mmio_write(mw, apicicr_low,
+		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
+}
+
+static void mic_x100_send_rdmasr_intr(struct mic_mw *mw, int doorbell)
+{
+	int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
+	/*
+	 * Ensure that the interrupt is ordered w.r.t. previous stores
+	 * to main memory. Fence instructions are not implemented in X100
+	 * since execution is in order but a compiler barrier is still
+	 * required.
+	 */
+	wmb();
+	mic_mmio_write(mw, 0, MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset);
+}
+
+/**
+ * mic_ack_interrupt - Device specific interrupt handling.
+ * @mdev: pointer to mic_device instance
+ *
+ * Returns: bitmask of doorbell events triggered.
+ */
+u32 mic_ack_interrupt(struct mic_device *mdev)
+{
+	return 0;
+}
+
+static inline int mic_get_sbox_irq(int db)
+{
+	return MIC_X100_IRQ_BASE + db;
+}
+
+static inline int mic_get_rdmasr_irq(int index)
+{
+	return  MIC_X100_RDMASR_IRQ_BASE + index;
+}
+
+void mic_send_p2p_intr(int db, struct mic_mw *mw)
+{
+	int rdmasr_index;
+
+	if (db < MIC_X100_NUM_SBOX_IRQ) {
+		mic_x100_send_sbox_intr(mw, db);
+	} else {
+		rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ;
+		mic_x100_send_rdmasr_intr(mw, rdmasr_index);
+	}
+}
+
+/**
+ * mic_hw_intr_init - Initialize h/w specific interrupt
+ * information.
+ * @mdrv: pointer to mic_driver
+ */
+void mic_hw_intr_init(struct mic_driver *mdrv)
+{
+	mdrv->intr_info.num_intr = MIC_X100_NUM_SBOX_IRQ +
+				MIC_X100_NUM_RDMASR_IRQ;
+}
+
+/**
+ * mic_db_to_irq - Retrieve irq number corresponding to a doorbell.
+ * @mdrv: pointer to mic_driver
+ * @db: The doorbell obtained for which the irq is needed. Doorbell
+ * may correspond to an sbox doorbell or an rdmasr index.
+ *
+ * Returns the irq corresponding to the doorbell.
+ */
+int mic_db_to_irq(struct mic_driver *mdrv, int db)
+{
+	int rdmasr_index;
+
+	/*
+	 * The total number of doorbell interrupts on the card are 16. Indices
+	 * 0-8 falls in the SBOX category and 8-15 fall in the RDMASR category.
+	 */
+	if (db < MIC_X100_NUM_SBOX_IRQ) {
+		return mic_get_sbox_irq(db);
+	} else {
+		rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ;
+		return mic_get_rdmasr_irq(rdmasr_index);
+	}
+}
+
+/*
+ * mic_card_map - Allocate virtual address for a remote memory region.
+ * @mdev: pointer to mic_device instance.
+ * @addr: Remote DMA address.
+ * @size: Size of the region.
+ *
+ * Returns: Virtual address backing the remote memory region.
+ */
+void __iomem *
+mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size)
+{
+	return ioremap(addr, size);
+}
+
+/*
+ * mic_card_unmap - Unmap the virtual address for a remote memory region.
+ * @mdev: pointer to mic_device instance.
+ * @addr: Virtual address for remote memory region.
+ *
+ * Returns: None.
+ */
+void mic_card_unmap(struct mic_device *mdev, void __iomem *addr)
+{
+	iounmap(addr);
+}
+
+static inline struct mic_driver *mbdev_to_mdrv(struct mbus_device *mbdev)
+{
+	return dev_get_drvdata(mbdev->dev.parent);
+}
+
+static struct mic_irq *
+_mic_request_threaded_irq(struct mbus_device *mbdev,
+			  irq_handler_t handler, irq_handler_t thread_fn,
+			  const char *name, void *data, int intr_src)
+{
+	int rc = 0;
+	unsigned int irq = intr_src;
+	unsigned long cookie = irq;
+
+	rc  = request_threaded_irq(irq, handler, thread_fn, 0, name, data);
+	if (rc) {
+		dev_err(mbdev_to_mdrv(mbdev)->dev,
+			"request_threaded_irq failed rc = %d\n", rc);
+		return ERR_PTR(rc);
+	}
+	return (struct mic_irq *)cookie;
+}
+
+static void _mic_free_irq(struct mbus_device *mbdev,
+			  struct mic_irq *cookie, void *data)
+{
+	unsigned long irq = (unsigned long)cookie;
+	free_irq(irq, data);
+}
+
+static void _mic_ack_interrupt(struct mbus_device *mbdev, int num)
+{
+	mic_ack_interrupt(&mbdev_to_mdrv(mbdev)->mdev);
+}
+
+static struct mbus_hw_ops mbus_hw_ops = {
+	.request_threaded_irq = _mic_request_threaded_irq,
+	.free_irq = _mic_free_irq,
+	.ack_interrupt = _mic_ack_interrupt,
+};
+
+static int __init mic_probe(struct platform_device *pdev)
+{
+	struct mic_driver *mdrv = &g_drv;
+	struct mic_device *mdev = &mdrv->mdev;
+	int rc = 0;
+
+	mdrv->dev = &pdev->dev;
+	snprintf(mdrv->name, sizeof(mic_driver_name), mic_driver_name);
+
+	/* FIXME: use dma_set_mask_and_coherent() and check result */
+	dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+
+	mdev->mmio.pa = MIC_X100_MMIO_BASE;
+	mdev->mmio.len = MIC_X100_MMIO_LEN;
+	mdev->mmio.va = devm_ioremap(&pdev->dev, MIC_X100_MMIO_BASE,
+				     MIC_X100_MMIO_LEN);
+	if (!mdev->mmio.va) {
+		dev_err(&pdev->dev, "Cannot remap MMIO BAR\n");
+		rc = -EIO;
+		goto done;
+	}
+	mic_hw_intr_init(mdrv);
+	platform_set_drvdata(pdev, mdrv);
+	mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC,
+					       NULL, &mbus_hw_ops, 0,
+					       mdrv->mdev.mmio.va);
+	if (IS_ERR(mdrv->dma_mbdev)) {
+		rc = PTR_ERR(mdrv->dma_mbdev);
+		dev_err(&pdev->dev, "mbus_add_device failed rc %d\n", rc);
+		goto done;
+	}
+	rc = mic_driver_init(mdrv);
+	if (rc) {
+		dev_err(&pdev->dev, "mic_driver_init failed rc %d\n", rc);
+		goto remove_dma;
+	}
+done:
+	return rc;
+remove_dma:
+	mbus_unregister_device(mdrv->dma_mbdev);
+	return rc;
+}
+
+static int mic_remove(struct platform_device *pdev)
+{
+	struct mic_driver *mdrv = &g_drv;
+
+	mic_driver_uninit(mdrv);
+	mbus_unregister_device(mdrv->dma_mbdev);
+	return 0;
+}
+
+static void mic_platform_shutdown(struct platform_device *pdev)
+{
+	mic_remove(pdev);
+}
+
+static struct platform_driver __refdata mic_platform_driver = {
+	.probe = mic_probe,
+	.remove = mic_remove,
+	.shutdown = mic_platform_shutdown,
+	.driver         = {
+		.name   = mic_driver_name,
+	},
+};
+
+static struct platform_device *mic_platform_dev;
+
+static int __init mic_init(void)
+{
+	int ret;
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	if (!(c->x86 == 11 && c->x86_model == 1)) {
+		ret = -ENODEV;
+		pr_err("%s not running on X100 ret %d\n", __func__, ret);
+		goto done;
+	}
+
+	request_module("mic_x100_dma");
+	mic_init_card_debugfs();
+
+	mic_platform_dev = platform_device_register_simple(mic_driver_name,
+							   0, NULL, 0);
+	ret = PTR_ERR_OR_ZERO(mic_platform_dev);
+	if (ret) {
+		pr_err("platform_device_register_full ret %d\n", ret);
+		goto cleanup_debugfs;
+	}
+	ret = platform_driver_register(&mic_platform_driver);
+	if (ret) {
+		pr_err("platform_driver_register ret %d\n", ret);
+		goto device_unregister;
+	}
+	return ret;
+
+device_unregister:
+	platform_device_unregister(mic_platform_dev);
+cleanup_debugfs:
+	mic_exit_card_debugfs();
+done:
+	return ret;
+}
+
+static void __exit mic_exit(void)
+{
+	platform_driver_unregister(&mic_platform_driver);
+	platform_device_unregister(mic_platform_dev);
+	mic_exit_card_debugfs();
+}
+
+module_init(mic_init);
+module_exit(mic_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC X100 Card driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/card/mic_x100.h b/drivers/misc/mic/card/mic_x100.h
new file mode 100644
index 000000000..7e2224934
--- /dev/null
+++ b/drivers/misc/mic/card/mic_x100.h
@@ -0,0 +1,49 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed: Knights Ferry, and
+ * the Intel product codenamed: Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel MIC Card driver.
+ *
+ */
+#ifndef _MIC_X100_CARD_H_
+#define _MIC_X100_CARD_H_
+
+#define MIC_X100_MMIO_BASE 0x08007C0000ULL
+#define MIC_X100_MMIO_LEN 0x00020000ULL
+#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000ULL
+
+#define MIC_X100_SBOX_SPAD0 0x0000AB20
+#define MIC_X100_SBOX_SDBIC0 0x0000CC90
+#define MIC_X100_SBOX_SDBIC0_DBREQ_BIT 0x80000000
+#define MIC_X100_SBOX_RDMASR0	0x0000B180
+#define MIC_X100_SBOX_APICICR0 0x0000A9D0
+
+#define MIC_X100_MAX_DOORBELL_IDX 8
+
+#define MIC_X100_NUM_SBOX_IRQ 8
+#define MIC_X100_NUM_RDMASR_IRQ 8
+#define MIC_X100_SBOX_IRQ_BASE 0
+#define MIC_X100_RDMASR_IRQ_BASE 17
+
+#define MIC_X100_IRQ_BASE 26
+
+#endif
diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h
new file mode 100644
index 000000000..50776772e
--- /dev/null
+++ b/drivers/misc/mic/common/mic_dev.h
@@ -0,0 +1,67 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC driver.
+ *
+ */
+#ifndef __MIC_DEV_H__
+#define __MIC_DEV_H__
+
+/* The maximum number of MIC devices supported in a single host system. */
+#define MIC_MAX_NUM_DEVS 128
+
+/**
+ * enum mic_hw_family - The hardware family to which a device belongs.
+ */
+enum mic_hw_family {
+	MIC_FAMILY_X100 = 0,
+	MIC_FAMILY_X200,
+	MIC_FAMILY_UNKNOWN,
+	MIC_FAMILY_LAST
+};
+
+/**
+ * struct mic_mw - MIC memory window
+ *
+ * @pa: Base physical address.
+ * @va: Base ioremap'd virtual address.
+ * @len: Size of the memory window.
+ */
+struct mic_mw {
+	phys_addr_t pa;
+	void __iomem *va;
+	resource_size_t len;
+};
+
+/*
+ * Scratch pad register offsets used by the host to communicate
+ * device page DMA address to the card.
+ */
+#define MIC_DPLO_SPAD 14
+#define MIC_DPHI_SPAD 15
+
+/*
+ * These values are supposed to be in the config_change field of the
+ * device page when the host sends a config change interrupt to the card.
+ */
+#define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1
+#define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2
+
+/* Maximum number of DMA channels */
+#define MIC_MAX_DMA_CHAN 4
+
+#endif
diff --git a/drivers/misc/mic/cosm/Makefile b/drivers/misc/mic/cosm/Makefile
new file mode 100644
index 000000000..97d74cb12
--- /dev/null
+++ b/drivers/misc/mic/cosm/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - Intel MIC Coprocessor State Management (COSM) Driver
+# Copyright(c) 2015, Intel Corporation.
+#
+obj-$(CONFIG_MIC_COSM) += mic_cosm.o
+
+mic_cosm-objs := cosm_main.o
+mic_cosm-objs += cosm_debugfs.o
+mic_cosm-objs += cosm_sysfs.o
+mic_cosm-objs += cosm_scif_server.o
diff --git a/drivers/misc/mic/cosm/cosm_debugfs.c b/drivers/misc/mic/cosm/cosm_debugfs.c
new file mode 100644
index 000000000..216cb3cd2
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_debugfs.c
@@ -0,0 +1,156 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include "cosm_main.h"
+
+/* Debugfs parent dir */
+static struct dentry *cosm_dbg;
+
+/**
+ * cosm_log_buf_show - Display MIC kernel log buffer
+ *
+ * log_buf addr/len is read from System.map by user space
+ * and populated in sysfs entries.
+ */
+static int cosm_log_buf_show(struct seq_file *s, void *unused)
+{
+	void __iomem *log_buf_va;
+	int __iomem *log_buf_len_va;
+	struct cosm_device *cdev = s->private;
+	void *kva;
+	int size;
+	u64 aper_offset;
+
+	if (!cdev || !cdev->log_buf_addr || !cdev->log_buf_len)
+		goto done;
+
+	mutex_lock(&cdev->cosm_mutex);
+	switch (cdev->state) {
+	case MIC_BOOTING:
+	case MIC_ONLINE:
+	case MIC_SHUTTING_DOWN:
+		break;
+	default:
+		goto unlock;
+	}
+
+	/*
+	 * Card kernel will never be relocated and any kernel text/data mapping
+	 * can be translated to phys address by subtracting __START_KERNEL_map.
+	 */
+	aper_offset = (u64)cdev->log_buf_len - __START_KERNEL_map;
+	log_buf_len_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
+	aper_offset = (u64)cdev->log_buf_addr - __START_KERNEL_map;
+	log_buf_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
+
+	size = ioread32(log_buf_len_va);
+	kva = kmalloc(size, GFP_KERNEL);
+	if (!kva)
+		goto unlock;
+
+	memcpy_fromio(kva, log_buf_va, size);
+	seq_write(s, kva, size);
+	kfree(kva);
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+done:
+	return 0;
+}
+
+static int cosm_log_buf_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cosm_log_buf_show, inode->i_private);
+}
+
+static const struct file_operations log_buf_ops = {
+	.owner   = THIS_MODULE,
+	.open    = cosm_log_buf_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+/**
+ * cosm_force_reset_show - Force MIC reset
+ *
+ * Invokes the force_reset COSM bus op instead of the standard reset
+ * op in case a force reset of the MIC device is required
+ */
+static int cosm_force_reset_show(struct seq_file *s, void *pos)
+{
+	struct cosm_device *cdev = s->private;
+
+	cosm_stop(cdev, true);
+	return 0;
+}
+
+static int cosm_force_reset_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cosm_force_reset_show, inode->i_private);
+}
+
+static const struct file_operations force_reset_ops = {
+	.owner   = THIS_MODULE,
+	.open    = cosm_force_reset_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+void cosm_create_debug_dir(struct cosm_device *cdev)
+{
+	char name[16];
+
+	if (!cosm_dbg)
+		return;
+
+	scnprintf(name, sizeof(name), "mic%d", cdev->index);
+	cdev->dbg_dir = debugfs_create_dir(name, cosm_dbg);
+	if (!cdev->dbg_dir)
+		return;
+
+	debugfs_create_file("log_buf", 0444, cdev->dbg_dir, cdev, &log_buf_ops);
+	debugfs_create_file("force_reset", 0444, cdev->dbg_dir, cdev,
+			    &force_reset_ops);
+}
+
+void cosm_delete_debug_dir(struct cosm_device *cdev)
+{
+	if (!cdev->dbg_dir)
+		return;
+
+	debugfs_remove_recursive(cdev->dbg_dir);
+}
+
+void cosm_init_debugfs(void)
+{
+	cosm_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!cosm_dbg)
+		pr_err("can't create debugfs dir\n");
+}
+
+void cosm_exit_debugfs(void)
+{
+	debugfs_remove(cosm_dbg);
+}
diff --git a/drivers/misc/mic/cosm/cosm_main.c b/drivers/misc/mic/cosm/cosm_main.c
new file mode 100644
index 000000000..7005cb1e0
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_main.c
@@ -0,0 +1,393 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include "cosm_main.h"
+
+static const char cosm_driver_name[] = "mic";
+
+/* COSM ID allocator */
+static struct ida g_cosm_ida;
+/* Class of MIC devices for sysfs accessibility. */
+static struct class *g_cosm_class;
+/* Number of MIC devices */
+static atomic_t g_num_dev;
+
+/**
+ * cosm_hw_reset - Issue a HW reset for the MIC device
+ * @cdev: pointer to cosm_device instance
+ */
+static void cosm_hw_reset(struct cosm_device *cdev, bool force)
+{
+	int i;
+
+#define MIC_RESET_TO (45)
+	if (force && cdev->hw_ops->force_reset)
+		cdev->hw_ops->force_reset(cdev);
+	else
+		cdev->hw_ops->reset(cdev);
+
+	for (i = 0; i < MIC_RESET_TO; i++) {
+		if (cdev->hw_ops->ready(cdev)) {
+			cosm_set_state(cdev, MIC_READY);
+			return;
+		}
+		/*
+		 * Resets typically take 10s of seconds to complete.
+		 * Since an MMIO read is required to check if the
+		 * firmware is ready or not, a 1 second delay works nicely.
+		 */
+		msleep(1000);
+	}
+	cosm_set_state(cdev, MIC_RESET_FAILED);
+}
+
+/**
+ * cosm_start - Start the MIC
+ * @cdev: pointer to cosm_device instance
+ *
+ * This function prepares an MIC for boot and initiates boot.
+ * RETURNS: An appropriate -ERRNO error value on error, or 0 for success.
+ */
+int cosm_start(struct cosm_device *cdev)
+{
+	const struct cred *orig_cred;
+	struct cred *override_cred;
+	int rc;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (!cdev->bootmode) {
+		dev_err(&cdev->dev, "%s %d bootmode not set\n",
+			__func__, __LINE__);
+		rc = -EINVAL;
+		goto unlock_ret;
+	}
+retry:
+	if (cdev->state != MIC_READY) {
+		dev_err(&cdev->dev, "%s %d MIC state not READY\n",
+			__func__, __LINE__);
+		rc = -EINVAL;
+		goto unlock_ret;
+	}
+	if (!cdev->hw_ops->ready(cdev)) {
+		cosm_hw_reset(cdev, false);
+		/*
+		 * The state will either be MIC_READY if the reset succeeded
+		 * or MIC_RESET_FAILED if the firmware reset failed.
+		 */
+		goto retry;
+	}
+
+	/*
+	 * Set credentials to root to allow non-root user to download initramsfs
+	 * with 600 permissions
+	 */
+	override_cred = prepare_creds();
+	if (!override_cred) {
+		dev_err(&cdev->dev, "%s %d prepare_creds failed\n",
+			__func__, __LINE__);
+		rc = -ENOMEM;
+		goto unlock_ret;
+	}
+	override_cred->fsuid = GLOBAL_ROOT_UID;
+	orig_cred = override_creds(override_cred);
+
+	rc = cdev->hw_ops->start(cdev, cdev->index);
+
+	revert_creds(orig_cred);
+	put_cred(override_cred);
+	if (rc)
+		goto unlock_ret;
+
+	/*
+	 * If linux is being booted, card is treated 'online' only
+	 * when the scif interface in the card is up. If anything else
+	 * is booted, we set card to 'online' immediately.
+	 */
+	if (!strcmp(cdev->bootmode, "linux"))
+		cosm_set_state(cdev, MIC_BOOTING);
+	else
+		cosm_set_state(cdev, MIC_ONLINE);
+unlock_ret:
+	mutex_unlock(&cdev->cosm_mutex);
+	if (rc)
+		dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc);
+	return rc;
+}
+
+/**
+ * cosm_stop - Prepare the MIC for reset and trigger reset
+ * @cdev: pointer to cosm_device instance
+ * @force: force a MIC to reset even if it is already reset and ready.
+ *
+ * RETURNS: None
+ */
+void cosm_stop(struct cosm_device *cdev, bool force)
+{
+	mutex_lock(&cdev->cosm_mutex);
+	if (cdev->state != MIC_READY || force) {
+		/*
+		 * Don't call hw_ops if they have been called previously.
+		 * stop(..) calls device_unregister and will crash the system if
+		 * called multiple times.
+		 */
+		u8 state = cdev->state == MIC_RESETTING ?
+					cdev->prev_state : cdev->state;
+		bool call_hw_ops = state != MIC_RESET_FAILED &&
+					state != MIC_READY;
+
+		if (cdev->state != MIC_RESETTING)
+			cosm_set_state(cdev, MIC_RESETTING);
+		cdev->heartbeat_watchdog_enable = false;
+		if (call_hw_ops)
+			cdev->hw_ops->stop(cdev, force);
+		cosm_hw_reset(cdev, force);
+		cosm_set_shutdown_status(cdev, MIC_NOP);
+		if (call_hw_ops && cdev->hw_ops->post_reset)
+			cdev->hw_ops->post_reset(cdev, cdev->state);
+	}
+	mutex_unlock(&cdev->cosm_mutex);
+	flush_work(&cdev->scif_work);
+}
+
+/**
+ * cosm_reset_trigger_work - Trigger MIC reset
+ * @work: The work structure
+ *
+ * This work is scheduled whenever the host wants to reset the MIC.
+ */
+static void cosm_reset_trigger_work(struct work_struct *work)
+{
+	struct cosm_device *cdev = container_of(work, struct cosm_device,
+						reset_trigger_work);
+	cosm_stop(cdev, false);
+}
+
+/**
+ * cosm_reset - Schedule MIC reset
+ * @cdev: pointer to cosm_device instance
+ *
+ * RETURNS: An -EINVAL if the card is already READY or 0 for success.
+ */
+int cosm_reset(struct cosm_device *cdev)
+{
+	int rc = 0;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (cdev->state != MIC_READY) {
+		if (cdev->state != MIC_RESETTING) {
+			cdev->prev_state = cdev->state;
+			cosm_set_state(cdev, MIC_RESETTING);
+			schedule_work(&cdev->reset_trigger_work);
+		}
+	} else {
+		dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__);
+		rc = -EINVAL;
+	}
+	mutex_unlock(&cdev->cosm_mutex);
+	return rc;
+}
+
+/**
+ * cosm_shutdown - Initiate MIC shutdown.
+ * @cdev: pointer to cosm_device instance
+ *
+ * RETURNS: None
+ */
+int cosm_shutdown(struct cosm_device *cdev)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN };
+	int rc = 0;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (cdev->state != MIC_ONLINE) {
+		rc = -EINVAL;
+		dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n",
+			__func__, __LINE__, cosm_state_string[cdev->state]);
+		goto err;
+	}
+
+	if (!cdev->epd) {
+		rc = -ENOTCONN;
+		dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n",
+			__func__, __LINE__, rc);
+		goto err;
+	}
+
+	rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+	if (rc < 0) {
+		dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
+			__func__, __LINE__, rc);
+		goto err;
+	}
+	cdev->heartbeat_watchdog_enable = false;
+	cosm_set_state(cdev, MIC_SHUTTING_DOWN);
+	rc = 0;
+err:
+	mutex_unlock(&cdev->cosm_mutex);
+	return rc;
+}
+
+static int cosm_driver_probe(struct cosm_device *cdev)
+{
+	int rc;
+
+	/* Initialize SCIF server at first probe */
+	if (atomic_add_return(1, &g_num_dev) == 1) {
+		rc = cosm_scif_init();
+		if (rc)
+			goto scif_exit;
+	}
+	mutex_init(&cdev->cosm_mutex);
+	INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work);
+	INIT_WORK(&cdev->scif_work, cosm_scif_work);
+	cdev->sysfs_heartbeat_enable = true;
+	cosm_sysfs_init(cdev);
+	cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent,
+			       MKDEV(0, cdev->index), cdev, cdev->attr_group,
+			       "mic%d", cdev->index);
+	if (IS_ERR(cdev->sdev)) {
+		rc = PTR_ERR(cdev->sdev);
+		dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n",
+			rc);
+		goto scif_exit;
+	}
+
+	cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd,
+		"state");
+	if (!cdev->state_sysfs) {
+		rc = -ENODEV;
+		dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
+		goto destroy_device;
+	}
+	cosm_create_debug_dir(cdev);
+	return 0;
+destroy_device:
+	device_destroy(g_cosm_class, MKDEV(0, cdev->index));
+scif_exit:
+	if (atomic_dec_and_test(&g_num_dev))
+		cosm_scif_exit();
+	return rc;
+}
+
+static void cosm_driver_remove(struct cosm_device *cdev)
+{
+	cosm_delete_debug_dir(cdev);
+	sysfs_put(cdev->state_sysfs);
+	device_destroy(g_cosm_class, MKDEV(0, cdev->index));
+	flush_work(&cdev->reset_trigger_work);
+	cosm_stop(cdev, false);
+	if (atomic_dec_and_test(&g_num_dev))
+		cosm_scif_exit();
+
+	/* These sysfs entries might have allocated */
+	kfree(cdev->cmdline);
+	kfree(cdev->firmware);
+	kfree(cdev->ramdisk);
+	kfree(cdev->bootmode);
+}
+
+static int cosm_suspend(struct device *dev)
+{
+	struct cosm_device *cdev = dev_to_cosm(dev);
+
+	mutex_lock(&cdev->cosm_mutex);
+	switch (cdev->state) {
+	/**
+	 * Suspend/freeze hooks in userspace have already shutdown the card.
+	 * Card should be 'ready' in most cases. It is however possible that
+	 * some userspace application initiated a boot. In those cases, we
+	 * simply reset the card.
+	 */
+	case MIC_ONLINE:
+	case MIC_BOOTING:
+	case MIC_SHUTTING_DOWN:
+		mutex_unlock(&cdev->cosm_mutex);
+		cosm_stop(cdev, false);
+		break;
+	default:
+		mutex_unlock(&cdev->cosm_mutex);
+		break;
+	}
+	return 0;
+}
+
+static const struct dev_pm_ops cosm_pm_ops = {
+	.suspend = cosm_suspend,
+	.freeze = cosm_suspend
+};
+
+static struct cosm_driver cosm_driver = {
+	.driver = {
+		.name =  KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+		.pm = &cosm_pm_ops,
+	},
+	.probe = cosm_driver_probe,
+	.remove = cosm_driver_remove
+};
+
+static int __init cosm_init(void)
+{
+	int ret;
+
+	cosm_init_debugfs();
+
+	g_cosm_class = class_create(THIS_MODULE, cosm_driver_name);
+	if (IS_ERR(g_cosm_class)) {
+		ret = PTR_ERR(g_cosm_class);
+		pr_err("class_create failed ret %d\n", ret);
+		goto cleanup_debugfs;
+	}
+
+	ida_init(&g_cosm_ida);
+	ret = cosm_register_driver(&cosm_driver);
+	if (ret) {
+		pr_err("cosm_register_driver failed ret %d\n", ret);
+		goto ida_destroy;
+	}
+	return 0;
+ida_destroy:
+	ida_destroy(&g_cosm_ida);
+	class_destroy(g_cosm_class);
+cleanup_debugfs:
+	cosm_exit_debugfs();
+	return ret;
+}
+
+static void __exit cosm_exit(void)
+{
+	cosm_unregister_driver(&cosm_driver);
+	ida_destroy(&g_cosm_ida);
+	class_destroy(g_cosm_class);
+	cosm_exit_debugfs();
+}
+
+module_init(cosm_init);
+module_exit(cosm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/cosm/cosm_main.h b/drivers/misc/mic/cosm/cosm_main.h
new file mode 100644
index 000000000..aa78cdf25
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_main.h
@@ -0,0 +1,73 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#ifndef _COSM_COSM_H_
+#define _COSM_COSM_H_
+
+#include <linux/scif.h>
+#include "../bus/cosm_bus.h"
+
+#define COSM_HEARTBEAT_SEND_SEC 30
+#define SCIF_COSM_LISTEN_PORT  201
+
+/**
+ * enum COSM msg id's
+ * @COSM_MSG_SHUTDOWN: host->card trigger shutdown
+ * @COSM_MSG_SYNC_TIME: host->card send host time to card to sync time
+ * @COSM_MSG_HEARTBEAT: card->host heartbeat
+ * @COSM_MSG_SHUTDOWN_STATUS: card->host with shutdown status as payload
+ */
+enum cosm_msg_id {
+	COSM_MSG_SHUTDOWN,
+	COSM_MSG_SYNC_TIME,
+	COSM_MSG_HEARTBEAT,
+	COSM_MSG_SHUTDOWN_STATUS,
+};
+
+struct cosm_msg {
+	u64 id;
+	union {
+		u64 shutdown_status;
+		struct {
+			u64 tv_sec;
+			u64 tv_nsec;
+		} timespec;
+	};
+};
+
+extern const char * const cosm_state_string[];
+extern const char * const cosm_shutdown_status_string[];
+
+void cosm_sysfs_init(struct cosm_device *cdev);
+int cosm_start(struct cosm_device *cdev);
+void cosm_stop(struct cosm_device *cdev, bool force);
+int cosm_reset(struct cosm_device *cdev);
+int cosm_shutdown(struct cosm_device *cdev);
+void cosm_set_state(struct cosm_device *cdev, u8 state);
+void cosm_set_shutdown_status(struct cosm_device *cdev, u8 status);
+void cosm_init_debugfs(void);
+void cosm_exit_debugfs(void);
+void cosm_create_debug_dir(struct cosm_device *cdev);
+void cosm_delete_debug_dir(struct cosm_device *cdev);
+int cosm_scif_init(void);
+void cosm_scif_exit(void);
+void cosm_scif_work(struct work_struct *work);
+
+#endif
diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c
new file mode 100644
index 000000000..e94b7eac4
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_scif_server.c
@@ -0,0 +1,411 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+
+#include "cosm_main.h"
+
+/*
+ * The COSM driver uses SCIF to communicate between the management node and the
+ * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b)
+ * receive a shutdown status back from the card upon completion of shutdown and
+ * (c) receive periodic heartbeat messages from the card used to deduce if the
+ * card has crashed.
+ *
+ * A COSM server consisting of a SCIF listening endpoint waits for incoming
+ * connections from the card. Upon acceptance of the connection, a separate
+ * work-item is scheduled to handle SCIF message processing for that card. The
+ * life-time of this work-item is therefore the time from which the connection
+ * from a card is accepted to the time at which the connection is closed. A new
+ * work-item starts each time the card boots and is alive till the card (a)
+ * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is
+ * unloaded.
+ *
+ * From the point of view of COSM interactions with SCIF during card
+ * shutdown, reset and crash are as follows:
+ *
+ * Card shutdown
+ * -------------
+ * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN
+ *    message from the host.
+ * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting
+ *    in scif_remove(..) getting called on the card
+ * 3. scif_remove -> scif_stop -> scif_handle_remove_node ->
+ *    scif_peer_unregister_device -> device_unregister for the host peer device
+ * 4. During device_unregister remove(..) method of cosm_client is invoked which
+ *    closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT
+ *    message being sent to host SCIF. SCIF_DISCNCT message processing on the
+ *    host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes
+ *    up the host COSM thread blocked in scif_poll(..) resulting in
+ *    scif_poll(..)  returning EPOLLHUP.
+ * 5. On the card, scif_peer_release_dev is next called which results in an
+ *    SCIF_EXIT message being sent to the host and after receiving the
+ *    SCIF_EXIT_ACK from the host the peer device teardown on the card is
+ *    complete.
+ * 6. As part of the SCIF_EXIT message processing on the host, host sends a
+ *    SCIF_REMOVE_NODE to itself corresponding to the card being removed. This
+ *    starts a similar SCIF peer device teardown sequence on the host
+ *    corresponding to the card being shut down.
+ *
+ * Card reset
+ * ----------
+ * The case of interest here is when the card has not been previously shut down
+ * since most of the steps below are skipped in that case:
+
+ * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver
+ *    which unregisters the SCIF HW device resulting in scif_remove(..) being
+ *    called on the host.
+ * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a
+ *    SCIF_EXIT message being sent to the card.
+ * 3. The card executes scif_stop() as part of SCIF_EXIT message
+ *    processing. This results in the COSM endpoint on the card being closed and
+ *    the SCIF host peer device on the card getting unregistered similar to
+ *    steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the
+ *    host returns EPOLLHUP as a result.
+ * 4. On the host, card peer device unregister and SCIF HW remove(..) also
+ *    subsequently complete.
+ *
+ * Card crash
+ * ----------
+ * If a reset is issued after the card has crashed, there is no SCIF_DISCNT
+ * message from the card which would result in scif_poll(..) returning
+ * EPOLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE
+ * message to itself resulting in the card SCIF peer device being unregistered,
+ * this results in a scif_peer_release_dev -> scif_cleanup_scifdev->
+ * scif_invalidate_ep call sequence which sets the endpoint state to
+ * DISCONNECTED and results in scif_poll(..) returning EPOLLHUP.
+ */
+
+#define COSM_SCIF_BACKLOG 16
+#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10
+#define COSM_HEARTBEAT_TIMEOUT_SEC \
+		(COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC)
+#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC)
+
+static struct task_struct *server_thread;
+static scif_epd_t listen_epd;
+
+/* Publish MIC card's shutdown status to user space MIC daemon */
+static void cosm_update_mic_status(struct cosm_device *cdev)
+{
+	if (cdev->shutdown_status_int != MIC_NOP) {
+		cosm_set_shutdown_status(cdev, cdev->shutdown_status_int);
+		cdev->shutdown_status_int = MIC_NOP;
+	}
+}
+
+/* Store MIC card's shutdown status internally when it is received */
+static void cosm_shutdown_status_int(struct cosm_device *cdev,
+				     enum mic_status shutdown_status)
+{
+	switch (shutdown_status) {
+	case MIC_HALTED:
+	case MIC_POWER_OFF:
+	case MIC_RESTART:
+	case MIC_CRASHED:
+		break;
+	default:
+		dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n",
+			__func__, __LINE__, shutdown_status);
+		return;
+	};
+	cdev->shutdown_status_int = shutdown_status;
+	cdev->heartbeat_watchdog_enable = false;
+
+	if (cdev->state != MIC_SHUTTING_DOWN)
+		cosm_set_state(cdev, MIC_SHUTTING_DOWN);
+}
+
+/* Non-blocking recv. Read and process all available messages */
+static void cosm_scif_recv(struct cosm_device *cdev)
+{
+	struct cosm_msg msg;
+	int rc;
+
+	while (1) {
+		rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0);
+		if (!rc) {
+			break;
+		} else if (rc < 0) {
+			dev_dbg(&cdev->dev, "%s: %d rc %d\n",
+				__func__, __LINE__, rc);
+			break;
+		}
+		dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n",
+			__func__, __LINE__, rc, msg.id);
+
+		switch (msg.id) {
+		case COSM_MSG_SHUTDOWN_STATUS:
+			cosm_shutdown_status_int(cdev, msg.shutdown_status);
+			break;
+		case COSM_MSG_HEARTBEAT:
+			/* Nothing to do, heartbeat only unblocks scif_poll */
+			break;
+		default:
+			dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n",
+				__func__, __LINE__, msg.id);
+			break;
+		}
+	}
+}
+
+/* Publish crashed status for this MIC card */
+static void cosm_set_crashed(struct cosm_device *cdev)
+{
+	dev_err(&cdev->dev, "node alive timeout\n");
+	cosm_shutdown_status_int(cdev, MIC_CRASHED);
+	cosm_update_mic_status(cdev);
+}
+
+/* Send host time to the MIC card to sync system time between host and MIC */
+static void cosm_send_time(struct cosm_device *cdev)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME };
+	struct timespec64 ts;
+	int rc;
+
+	ktime_get_real_ts64(&ts);
+	msg.timespec.tv_sec = ts.tv_sec;
+	msg.timespec.tv_nsec = ts.tv_nsec;
+
+	rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+	if (rc < 0)
+		dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
+			__func__, __LINE__, rc);
+}
+
+/*
+ * Close this cosm_device's endpoint after its peer endpoint on the card has
+ * been closed. In all cases except MIC card crash EPOLLHUP on the host is
+ * triggered by the client's endpoint being closed.
+ */
+static void cosm_scif_close(struct cosm_device *cdev)
+{
+	/*
+	 * Because SHUTDOWN_STATUS message is sent by the MIC cards in the
+	 * reboot notifier when shutdown is still not complete, we notify mpssd
+	 * to reset the card when SCIF endpoint is closed.
+	 */
+	cosm_update_mic_status(cdev);
+	scif_close(cdev->epd);
+	cdev->epd = NULL;
+	dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
+}
+
+/*
+ * Set card state to ONLINE when a new SCIF connection from a MIC card is
+ * received. Normally the state is BOOTING when the connection comes in, but can
+ * be ONLINE if cosm_client driver on the card was unloaded and then reloaded.
+ */
+static int cosm_set_online(struct cosm_device *cdev)
+{
+	int rc = 0;
+
+	if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) {
+		cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable;
+		cdev->epd = cdev->newepd;
+		if (cdev->state == MIC_BOOTING)
+			cosm_set_state(cdev, MIC_ONLINE);
+		cosm_send_time(cdev);
+		dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
+	} else {
+		dev_warn(&cdev->dev, "%s %d not going online in state: %s\n",
+			 __func__, __LINE__, cosm_state_string[cdev->state]);
+		rc = -EINVAL;
+	}
+	/* Drop reference acquired by bus_find_device in the server thread */
+	put_device(&cdev->dev);
+	return rc;
+}
+
+/*
+ * Work function for handling work for a SCIF connection from a particular MIC
+ * card. It first sets the card state to ONLINE and then calls scif_poll to
+ * block on activity such as incoming messages on the SCIF endpoint. When the
+ * endpoint is closed, the work function exits, completing its life cycle, from
+ * MIC card boot to card shutdown/reset/crash.
+ */
+void cosm_scif_work(struct work_struct *work)
+{
+	struct cosm_device *cdev = container_of(work, struct cosm_device,
+						scif_work);
+	struct scif_pollepd pollepd;
+	int rc;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (cosm_set_online(cdev))
+		goto exit;
+
+	while (1) {
+		pollepd.epd = cdev->epd;
+		pollepd.events = EPOLLIN;
+
+		/* Drop the mutex before blocking in scif_poll(..) */
+		mutex_unlock(&cdev->cosm_mutex);
+		/* poll(..) with timeout on our endpoint */
+		rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC);
+		mutex_lock(&cdev->cosm_mutex);
+		if (rc < 0) {
+			dev_err(&cdev->dev, "%s %d scif_poll rc %d\n",
+				__func__, __LINE__, rc);
+			continue;
+		}
+
+		/* There is a message from the card */
+		if (pollepd.revents & EPOLLIN)
+			cosm_scif_recv(cdev);
+
+		/* The peer endpoint is closed or this endpoint disconnected */
+		if (pollepd.revents & EPOLLHUP) {
+			cosm_scif_close(cdev);
+			break;
+		}
+
+		/* Did we timeout from poll? */
+		if (!rc && cdev->heartbeat_watchdog_enable)
+			cosm_set_crashed(cdev);
+	}
+exit:
+	dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__);
+	mutex_unlock(&cdev->cosm_mutex);
+}
+
+/*
+ * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC
+ * cards, finds the correct cosm_device to associate that connection with and
+ * schedules individual work items for each MIC card.
+ */
+static int cosm_scif_server(void *unused)
+{
+	struct cosm_device *cdev;
+	scif_epd_t newepd;
+	struct scif_port_id port_id;
+	int rc;
+
+	allow_signal(SIGKILL);
+
+	while (!kthread_should_stop()) {
+		rc = scif_accept(listen_epd, &port_id, &newepd,
+				 SCIF_ACCEPT_SYNC);
+		if (rc < 0) {
+			if (-ERESTARTSYS != rc)
+				pr_err("%s %d rc %d\n", __func__, __LINE__, rc);
+			continue;
+		}
+
+		/*
+		 * Associate the incoming connection with a particular
+		 * cosm_device, COSM device ID == SCIF node ID - 1
+		 */
+		cdev = cosm_find_cdev_by_id(port_id.node - 1);
+		if (!cdev)
+			continue;
+		cdev->newepd = newepd;
+		schedule_work(&cdev->scif_work);
+	}
+
+	pr_debug("%s %d Server thread stopped\n", __func__, __LINE__);
+	return 0;
+}
+
+static int cosm_scif_listen(void)
+{
+	int rc;
+
+	listen_epd = scif_open();
+	if (!listen_epd) {
+		pr_err("%s %d scif_open failed\n", __func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT);
+	if (rc < 0) {
+		pr_err("%s %d scif_bind failed rc %d\n",
+		       __func__, __LINE__, rc);
+		goto err;
+	}
+
+	rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG);
+	if (rc < 0) {
+		pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc);
+		goto err;
+	}
+	pr_debug("%s %d listen_epd set up\n", __func__, __LINE__);
+	return 0;
+err:
+	scif_close(listen_epd);
+	listen_epd = NULL;
+	return rc;
+}
+
+static void cosm_scif_listen_exit(void)
+{
+	pr_debug("%s %d closing listen_epd\n", __func__, __LINE__);
+	if (listen_epd) {
+		scif_close(listen_epd);
+		listen_epd = NULL;
+	}
+}
+
+/*
+ * Create a listening SCIF endpoint and a server kthread which accepts incoming
+ * SCIF connections from MIC cards
+ */
+int cosm_scif_init(void)
+{
+	int rc = cosm_scif_listen();
+
+	if (rc) {
+		pr_err("%s %d cosm_scif_listen rc %d\n",
+		       __func__, __LINE__, rc);
+		goto err;
+	}
+
+	server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server");
+	if (IS_ERR(server_thread)) {
+		rc = PTR_ERR(server_thread);
+		pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc);
+		goto listen_exit;
+	}
+	return 0;
+listen_exit:
+	cosm_scif_listen_exit();
+err:
+	return rc;
+}
+
+/* Stop the running server thread and close the listening SCIF endpoint */
+void cosm_scif_exit(void)
+{
+	int rc;
+
+	if (!IS_ERR_OR_NULL(server_thread)) {
+		rc = send_sig(SIGKILL, server_thread, 0);
+		if (rc) {
+			pr_err("%s %d send_sig rc %d\n",
+			       __func__, __LINE__, rc);
+			return;
+		}
+		kthread_stop(server_thread);
+	}
+
+	cosm_scif_listen_exit();
+}
diff --git a/drivers/misc/mic/cosm/cosm_sysfs.c b/drivers/misc/mic/cosm/cosm_sysfs.c
new file mode 100644
index 000000000..29d6863b6
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_sysfs.c
@@ -0,0 +1,461 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#include <linux/slab.h>
+#include "cosm_main.h"
+
+/*
+ * A state-to-string lookup table, for exposing a human readable state
+ * via sysfs. Always keep in sync with enum cosm_states
+ */
+const char * const cosm_state_string[] = {
+	[MIC_READY] = "ready",
+	[MIC_BOOTING] = "booting",
+	[MIC_ONLINE] = "online",
+	[MIC_SHUTTING_DOWN] = "shutting_down",
+	[MIC_RESETTING] = "resetting",
+	[MIC_RESET_FAILED] = "reset_failed",
+};
+
+/*
+ * A shutdown-status-to-string lookup table, for exposing a human
+ * readable state via sysfs. Always keep in sync with enum cosm_shutdown_status
+ */
+const char * const cosm_shutdown_status_string[] = {
+	[MIC_NOP] = "nop",
+	[MIC_CRASHED] = "crashed",
+	[MIC_HALTED] = "halted",
+	[MIC_POWER_OFF] = "poweroff",
+	[MIC_RESTART] = "restart",
+};
+
+void cosm_set_shutdown_status(struct cosm_device *cdev, u8 shutdown_status)
+{
+	dev_dbg(&cdev->dev, "Shutdown Status %s -> %s\n",
+		cosm_shutdown_status_string[cdev->shutdown_status],
+		cosm_shutdown_status_string[shutdown_status]);
+	cdev->shutdown_status = shutdown_status;
+}
+
+void cosm_set_state(struct cosm_device *cdev, u8 state)
+{
+	dev_dbg(&cdev->dev, "State %s -> %s\n",
+		cosm_state_string[cdev->state],
+		cosm_state_string[state]);
+	cdev->state = state;
+	sysfs_notify_dirent(cdev->state_sysfs);
+}
+
+static ssize_t
+family_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return cdev->hw_ops->family(cdev, buf);
+}
+static DEVICE_ATTR_RO(family);
+
+static ssize_t
+stepping_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return cdev->hw_ops->stepping(cdev, buf);
+}
+static DEVICE_ATTR_RO(stepping);
+
+static ssize_t
+state_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev || cdev->state >= MIC_LAST)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n",
+		cosm_state_string[cdev->state]);
+}
+
+static ssize_t
+state_store(struct device *dev, struct device_attribute *attr,
+	    const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int rc;
+
+	if (!cdev)
+		return -EINVAL;
+
+	if (sysfs_streq(buf, "boot")) {
+		rc = cosm_start(cdev);
+		goto done;
+	}
+	if (sysfs_streq(buf, "reset")) {
+		rc = cosm_reset(cdev);
+		goto done;
+	}
+
+	if (sysfs_streq(buf, "shutdown")) {
+		rc = cosm_shutdown(cdev);
+		goto done;
+	}
+	rc = -EINVAL;
+done:
+	if (rc)
+		count = rc;
+	return count;
+}
+static DEVICE_ATTR_RW(state);
+
+static ssize_t shutdown_status_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev || cdev->shutdown_status >= MIC_STATUS_LAST)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n",
+		cosm_shutdown_status_string[cdev->shutdown_status]);
+}
+static DEVICE_ATTR_RO(shutdown_status);
+
+static ssize_t
+heartbeat_enable_show(struct device *dev,
+		      struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", cdev->sysfs_heartbeat_enable);
+}
+
+static ssize_t
+heartbeat_enable_store(struct device *dev,
+		       struct device_attribute *attr,
+		       const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int enable;
+	int ret;
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	ret = kstrtoint(buf, 10, &enable);
+	if (ret)
+		goto unlock;
+
+	cdev->sysfs_heartbeat_enable = enable;
+	/* if state is not online, cdev->heartbeat_watchdog_enable is 0 */
+	if (cdev->state == MIC_ONLINE)
+		cdev->heartbeat_watchdog_enable = enable;
+	ret = count;
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return ret;
+}
+static DEVICE_ATTR_RW(heartbeat_enable);
+
+static ssize_t
+cmdline_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *cmdline;
+
+	if (!cdev)
+		return -EINVAL;
+
+	cmdline = cdev->cmdline;
+
+	if (cmdline)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline);
+	return 0;
+}
+
+static ssize_t
+cmdline_store(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->cmdline);
+
+	cdev->cmdline = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->cmdline) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+
+	strncpy(cdev->cmdline, buf, count);
+
+	if (cdev->cmdline[count - 1] == '\n')
+		cdev->cmdline[count - 1] = '\0';
+	else
+		cdev->cmdline[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(cmdline);
+
+static ssize_t
+firmware_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *firmware;
+
+	if (!cdev)
+		return -EINVAL;
+
+	firmware = cdev->firmware;
+
+	if (firmware)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", firmware);
+	return 0;
+}
+
+static ssize_t
+firmware_store(struct device *dev, struct device_attribute *attr,
+	       const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->firmware);
+
+	cdev->firmware = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->firmware) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+	strncpy(cdev->firmware, buf, count);
+
+	if (cdev->firmware[count - 1] == '\n')
+		cdev->firmware[count - 1] = '\0';
+	else
+		cdev->firmware[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(firmware);
+
+static ssize_t
+ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *ramdisk;
+
+	if (!cdev)
+		return -EINVAL;
+
+	ramdisk = cdev->ramdisk;
+
+	if (ramdisk)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk);
+	return 0;
+}
+
+static ssize_t
+ramdisk_store(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->ramdisk);
+
+	cdev->ramdisk = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->ramdisk) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+
+	strncpy(cdev->ramdisk, buf, count);
+
+	if (cdev->ramdisk[count - 1] == '\n')
+		cdev->ramdisk[count - 1] = '\0';
+	else
+		cdev->ramdisk[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(ramdisk);
+
+static ssize_t
+bootmode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *bootmode;
+
+	if (!cdev)
+		return -EINVAL;
+
+	bootmode = cdev->bootmode;
+
+	if (bootmode)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode);
+	return 0;
+}
+
+static ssize_t
+bootmode_store(struct device *dev, struct device_attribute *attr,
+	       const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "flash"))
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->bootmode);
+
+	cdev->bootmode = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->bootmode) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+
+	strncpy(cdev->bootmode, buf, count);
+
+	if (cdev->bootmode[count - 1] == '\n')
+		cdev->bootmode[count - 1] = '\0';
+	else
+		cdev->bootmode[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(bootmode);
+
+static ssize_t
+log_buf_addr_show(struct device *dev, struct device_attribute *attr,
+		  char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_addr);
+}
+
+static ssize_t
+log_buf_addr_store(struct device *dev, struct device_attribute *attr,
+		   const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int ret;
+	unsigned long addr;
+
+	if (!cdev)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 16, &addr);
+	if (ret)
+		goto exit;
+
+	cdev->log_buf_addr = (void *)addr;
+	ret = count;
+exit:
+	return ret;
+}
+static DEVICE_ATTR_RW(log_buf_addr);
+
+static ssize_t
+log_buf_len_show(struct device *dev, struct device_attribute *attr,
+		 char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_len);
+}
+
+static ssize_t
+log_buf_len_store(struct device *dev, struct device_attribute *attr,
+		  const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int ret;
+	unsigned long addr;
+
+	if (!cdev)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 16, &addr);
+	if (ret)
+		goto exit;
+
+	cdev->log_buf_len = (int *)addr;
+	ret = count;
+exit:
+	return ret;
+}
+static DEVICE_ATTR_RW(log_buf_len);
+
+static struct attribute *cosm_default_attrs[] = {
+	&dev_attr_family.attr,
+	&dev_attr_stepping.attr,
+	&dev_attr_state.attr,
+	&dev_attr_shutdown_status.attr,
+	&dev_attr_heartbeat_enable.attr,
+	&dev_attr_cmdline.attr,
+	&dev_attr_firmware.attr,
+	&dev_attr_ramdisk.attr,
+	&dev_attr_bootmode.attr,
+	&dev_attr_log_buf_addr.attr,
+	&dev_attr_log_buf_len.attr,
+
+	NULL
+};
+
+ATTRIBUTE_GROUPS(cosm_default);
+
+void cosm_sysfs_init(struct cosm_device *cdev)
+{
+	cdev->attr_group = cosm_default_groups;
+}
diff --git a/drivers/misc/mic/cosm_client/Makefile b/drivers/misc/mic/cosm_client/Makefile
new file mode 100644
index 000000000..6f751a519
--- /dev/null
+++ b/drivers/misc/mic/cosm_client/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile - Intel MIC COSM Client Driver
+# Copyright(c) 2015, Intel Corporation.
+#
+obj-$(CONFIG_MIC_COSM) += cosm_client.o
+
+cosm_client-objs += cosm_scif_client.o
diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c
new file mode 100644
index 000000000..225078cb5
--- /dev/null
+++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c
@@ -0,0 +1,281 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Client Driver
+ *
+ */
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+
+#include "../cosm/cosm_main.h"
+
+#define COSM_SCIF_MAX_RETRIES 10
+#define COSM_HEARTBEAT_SEND_MSEC (COSM_HEARTBEAT_SEND_SEC * MSEC_PER_SEC)
+
+static struct task_struct *client_thread;
+static scif_epd_t client_epd;
+static struct scif_peer_dev *client_spdev;
+
+/*
+ * Reboot notifier: receives shutdown status from the OS and communicates it
+ * back to the COSM process on the host
+ */
+static int cosm_reboot_event(struct notifier_block *this, unsigned long event,
+			     void *ptr)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN_STATUS };
+	int rc;
+
+	event = (event == SYS_RESTART) ? SYSTEM_RESTART : event;
+	dev_info(&client_spdev->dev, "%s %d received event %ld\n",
+		 __func__, __LINE__, event);
+
+	msg.shutdown_status = event;
+	rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+	if (rc < 0)
+		dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
+			__func__, __LINE__, rc);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cosm_reboot = {
+	.notifier_call  = cosm_reboot_event,
+};
+
+/* Set system time from timespec value received from the host */
+static void cosm_set_time(struct cosm_msg *msg)
+{
+	struct timespec64 ts = {
+		.tv_sec = msg->timespec.tv_sec,
+		.tv_nsec = msg->timespec.tv_nsec,
+	};
+	int rc = do_settimeofday64(&ts);
+
+	if (rc)
+		dev_err(&client_spdev->dev, "%s: %d settimeofday rc %d\n",
+			__func__, __LINE__, rc);
+}
+
+/* COSM client receive message processing */
+static void cosm_client_recv(void)
+{
+	struct cosm_msg msg;
+	int rc;
+
+	while (1) {
+		rc = scif_recv(client_epd, &msg, sizeof(msg), 0);
+		if (!rc) {
+			return;
+		} else if (rc < 0) {
+			dev_err(&client_spdev->dev, "%s: %d rc %d\n",
+				__func__, __LINE__, rc);
+			return;
+		}
+
+		dev_dbg(&client_spdev->dev, "%s: %d rc %d id 0x%llx\n",
+			__func__, __LINE__, rc, msg.id);
+
+		switch (msg.id) {
+		case COSM_MSG_SYNC_TIME:
+			cosm_set_time(&msg);
+			break;
+		case COSM_MSG_SHUTDOWN:
+			orderly_poweroff(true);
+			break;
+		default:
+			dev_err(&client_spdev->dev, "%s: %d unknown id %lld\n",
+				__func__, __LINE__, msg.id);
+			break;
+		}
+	}
+}
+
+/* Initiate connection to the COSM server on the host */
+static int cosm_scif_connect(void)
+{
+	struct scif_port_id port_id;
+	int i, rc;
+
+	client_epd = scif_open();
+	if (!client_epd) {
+		dev_err(&client_spdev->dev, "%s %d scif_open failed\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	port_id.node = 0;
+	port_id.port = SCIF_COSM_LISTEN_PORT;
+
+	for (i = 0; i < COSM_SCIF_MAX_RETRIES; i++) {
+		rc = scif_connect(client_epd, &port_id);
+		if (rc < 0)
+			msleep(1000);
+		else
+			break;
+	}
+
+	if (rc < 0) {
+		dev_err(&client_spdev->dev, "%s %d scif_connect rc %d\n",
+			__func__, __LINE__, rc);
+		scif_close(client_epd);
+		client_epd = NULL;
+	}
+	return rc < 0 ? rc : 0;
+}
+
+/* Close host SCIF connection */
+static void cosm_scif_connect_exit(void)
+{
+	if (client_epd) {
+		scif_close(client_epd);
+		client_epd = NULL;
+	}
+}
+
+/*
+ * COSM SCIF client thread function: waits for messages from the host and sends
+ * a heartbeat to the host
+ */
+static int cosm_scif_client(void *unused)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_HEARTBEAT };
+	struct scif_pollepd pollepd;
+	int rc;
+
+	allow_signal(SIGKILL);
+
+	while (!kthread_should_stop()) {
+		pollepd.epd = client_epd;
+		pollepd.events = EPOLLIN;
+
+		rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC);
+		if (rc < 0) {
+			if (-EINTR != rc)
+				dev_err(&client_spdev->dev,
+					"%s %d scif_poll rc %d\n",
+					__func__, __LINE__, rc);
+			continue;
+		}
+
+		if (pollepd.revents & EPOLLIN)
+			cosm_client_recv();
+
+		msg.id = COSM_MSG_HEARTBEAT;
+		rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+		if (rc < 0)
+			dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
+				__func__, __LINE__, rc);
+	}
+
+	dev_dbg(&client_spdev->dev, "%s %d Client thread stopped\n",
+		__func__, __LINE__);
+	return 0;
+}
+
+static void cosm_scif_probe(struct scif_peer_dev *spdev)
+{
+	int rc;
+
+	dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
+		__func__, __LINE__, spdev->dnode);
+
+	/* We are only interested in the host with spdev->dnode == 0 */
+	if (spdev->dnode)
+		return;
+
+	client_spdev = spdev;
+	rc = cosm_scif_connect();
+	if (rc)
+		goto exit;
+
+	rc = register_reboot_notifier(&cosm_reboot);
+	if (rc) {
+		dev_err(&spdev->dev,
+			"reboot notifier registration failed rc %d\n", rc);
+		goto connect_exit;
+	}
+
+	client_thread = kthread_run(cosm_scif_client, NULL, "cosm_client");
+	if (IS_ERR(client_thread)) {
+		rc = PTR_ERR(client_thread);
+		dev_err(&spdev->dev, "%s %d kthread_run rc %d\n",
+			__func__, __LINE__, rc);
+		goto unreg_reboot;
+	}
+	return;
+unreg_reboot:
+	unregister_reboot_notifier(&cosm_reboot);
+connect_exit:
+	cosm_scif_connect_exit();
+exit:
+	client_spdev = NULL;
+}
+
+static void cosm_scif_remove(struct scif_peer_dev *spdev)
+{
+	int rc;
+
+	dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
+		__func__, __LINE__, spdev->dnode);
+
+	if (spdev->dnode)
+		return;
+
+	if (!IS_ERR_OR_NULL(client_thread)) {
+		rc = send_sig(SIGKILL, client_thread, 0);
+		if (rc) {
+			pr_err("%s %d send_sig rc %d\n",
+			       __func__, __LINE__, rc);
+			return;
+		}
+		kthread_stop(client_thread);
+	}
+	unregister_reboot_notifier(&cosm_reboot);
+	cosm_scif_connect_exit();
+	client_spdev = NULL;
+}
+
+static struct scif_client scif_client_cosm = {
+	.name = KBUILD_MODNAME,
+	.probe = cosm_scif_probe,
+	.remove = cosm_scif_remove,
+};
+
+static int __init cosm_client_init(void)
+{
+	int rc = scif_client_register(&scif_client_cosm);
+
+	if (rc)
+		pr_err("scif_client_register failed rc %d\n", rc);
+	return rc;
+}
+
+static void __exit cosm_client_exit(void)
+{
+	scif_client_unregister(&scif_client_cosm);
+}
+
+module_init(cosm_client_init);
+module_exit(cosm_client_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC card OS state management client driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile
new file mode 100644
index 000000000..25f153367
--- /dev/null
+++ b/drivers/misc/mic/host/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2013, Intel Corporation.
+#
+obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o
+mic_host-objs := mic_main.o
+mic_host-objs += mic_x100.o
+mic_host-objs += mic_smpt.o
+mic_host-objs += mic_intr.o
+mic_host-objs += mic_boot.o
+mic_host-objs += mic_debugfs.o
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
new file mode 100644
index 000000000..c327985c9
--- /dev/null
+++ b/drivers/misc/mic/host/mic_boot.c
@@ -0,0 +1,599 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/pci.h>
+#include <linux/kmod.h>
+#include <linux/mic_common.h>
+#include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
+#include "../bus/vop_bus.h"
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+
+static inline struct mic_device *vpdev_to_mdev(struct device *dev)
+{
+	return dev_get_drvdata(dev->parent);
+}
+
+static dma_addr_t
+_mic_dma_map_page(struct device *dev, struct page *page,
+		  unsigned long offset, size_t size,
+		  enum dma_data_direction dir, unsigned long attrs)
+{
+	void *va = phys_to_virt(page_to_phys(page)) + offset;
+	struct mic_device *mdev = vpdev_to_mdev(dev);
+
+	return mic_map_single(mdev, va, size);
+}
+
+static void _mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+				size_t size, enum dma_data_direction dir,
+				unsigned long attrs)
+{
+	struct mic_device *mdev = vpdev_to_mdev(dev);
+
+	mic_unmap_single(mdev, dma_addr, size);
+}
+
+static const struct dma_map_ops _mic_dma_ops = {
+	.map_page = _mic_dma_map_page,
+	.unmap_page = _mic_dma_unmap_page,
+};
+
+static struct mic_irq *
+__mic_request_irq(struct vop_device *vpdev,
+		  irqreturn_t (*func)(int irq, void *data),
+		  const char *name, void *data, int intr_src)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	return mic_request_threaded_irq(mdev, func, NULL, name, data,
+					intr_src, MIC_INTR_DB);
+}
+
+static void __mic_free_irq(struct vop_device *vpdev,
+			   struct mic_irq *cookie, void *data)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	mic_free_irq(mdev, cookie, data);
+}
+
+static void __mic_ack_interrupt(struct vop_device *vpdev, int num)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	mdev->ops->intr_workarounds(mdev);
+}
+
+static int __mic_next_db(struct vop_device *vpdev)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	return mic_next_db(mdev);
+}
+
+static void *__mic_get_dp(struct vop_device *vpdev)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	return mdev->dp;
+}
+
+static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev)
+{
+	return NULL;
+}
+
+static void __mic_send_intr(struct vop_device *vpdev, int db)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	mdev->ops->send_intr(mdev, db);
+}
+
+static void __iomem *__mic_ioremap(struct vop_device *vpdev,
+				   dma_addr_t pa, size_t len)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	return mdev->aper.va + pa;
+}
+
+static void __mic_iounmap(struct vop_device *vpdev, void __iomem *va)
+{
+	/* nothing to do */
+}
+
+static struct vop_hw_ops vop_hw_ops = {
+	.request_irq = __mic_request_irq,
+	.free_irq = __mic_free_irq,
+	.ack_interrupt = __mic_ack_interrupt,
+	.next_db = __mic_next_db,
+	.get_dp = __mic_get_dp,
+	.get_remote_dp = __mic_get_remote_dp,
+	.send_intr = __mic_send_intr,
+	.ioremap = __mic_ioremap,
+	.iounmap = __mic_iounmap,
+};
+
+static inline struct mic_device *scdev_to_mdev(struct scif_hw_dev *scdev)
+{
+	return dev_get_drvdata(scdev->dev.parent);
+}
+
+static void *__mic_dma_alloc(struct device *dev, size_t size,
+			     dma_addr_t *dma_handle, gfp_t gfp,
+			     unsigned long attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+	dma_addr_t tmp;
+	void *va = kmalloc(size, gfp);
+
+	if (va) {
+		tmp = mic_map_single(mdev, va, size);
+		if (dma_mapping_error(dev, tmp)) {
+			kfree(va);
+			va = NULL;
+		} else {
+			*dma_handle = tmp;
+		}
+	}
+	return va;
+}
+
+static void __mic_dma_free(struct device *dev, size_t size, void *vaddr,
+			   dma_addr_t dma_handle, unsigned long attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mic_unmap_single(mdev, dma_handle, size);
+	kfree(vaddr);
+}
+
+static dma_addr_t
+__mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+		   size_t size, enum dma_data_direction dir,
+		   unsigned long attrs)
+{
+	void *va = phys_to_virt(page_to_phys(page)) + offset;
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mic_map_single(mdev, va, size);
+}
+
+static void
+__mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+		     size_t size, enum dma_data_direction dir,
+		     unsigned long attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mic_unmap_single(mdev, dma_addr, size);
+}
+
+static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			    int nents, enum dma_data_direction dir,
+			    unsigned long attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+	struct scatterlist *s;
+	int i, j, ret;
+	dma_addr_t da;
+
+	ret = dma_map_sg(&mdev->pdev->dev, sg, nents, dir);
+	if (ret <= 0)
+		return 0;
+
+	for_each_sg(sg, s, nents, i) {
+		da = mic_map(mdev, sg_dma_address(s) + s->offset, s->length);
+		if (!da)
+			goto err;
+		sg_dma_address(s) = da;
+	}
+	return nents;
+err:
+	for_each_sg(sg, s, i, j) {
+		mic_unmap(mdev, sg_dma_address(s), s->length);
+		sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s));
+	}
+	dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
+	return 0;
+}
+
+static void __mic_dma_unmap_sg(struct device *dev,
+			       struct scatterlist *sg, int nents,
+			       enum dma_data_direction dir,
+			       unsigned long attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+	struct scatterlist *s;
+	dma_addr_t da;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		da = mic_to_dma_addr(mdev, sg_dma_address(s));
+		mic_unmap(mdev, sg_dma_address(s), s->length);
+		sg_dma_address(s) = da;
+	}
+	dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
+}
+
+static const struct dma_map_ops __mic_dma_ops = {
+	.alloc = __mic_dma_alloc,
+	.free = __mic_dma_free,
+	.map_page = __mic_dma_map_page,
+	.unmap_page = __mic_dma_unmap_page,
+	.map_sg = __mic_dma_map_sg,
+	.unmap_sg = __mic_dma_unmap_sg,
+};
+
+static struct mic_irq *
+___mic_request_irq(struct scif_hw_dev *scdev,
+		   irqreturn_t (*func)(int irq, void *data),
+				       const char *name,
+				       void *data, int db)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mic_request_threaded_irq(mdev, func, NULL, name, data,
+					db, MIC_INTR_DB);
+}
+
+static void
+___mic_free_irq(struct scif_hw_dev *scdev,
+		struct mic_irq *cookie, void *data)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mic_free_irq(mdev, cookie, data);
+}
+
+static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mdev->ops->intr_workarounds(mdev);
+}
+
+static int ___mic_next_db(struct scif_hw_dev *scdev)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mic_next_db(mdev);
+}
+
+static void ___mic_send_intr(struct scif_hw_dev *scdev, int db)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mdev->ops->send_intr(mdev, db);
+}
+
+static void __iomem *___mic_ioremap(struct scif_hw_dev *scdev,
+				    phys_addr_t pa, size_t len)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mdev->aper.va + pa;
+}
+
+static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va)
+{
+	/* nothing to do */
+}
+
+static struct scif_hw_ops scif_hw_ops = {
+	.request_irq = ___mic_request_irq,
+	.free_irq = ___mic_free_irq,
+	.ack_interrupt = ___mic_ack_interrupt,
+	.next_db = ___mic_next_db,
+	.send_intr = ___mic_send_intr,
+	.ioremap = ___mic_ioremap,
+	.iounmap = ___mic_iounmap,
+};
+
+static inline struct mic_device *mbdev_to_mdev(struct mbus_device *mbdev)
+{
+	return dev_get_drvdata(mbdev->dev.parent);
+}
+
+static dma_addr_t
+mic_dma_map_page(struct device *dev, struct page *page,
+		 unsigned long offset, size_t size, enum dma_data_direction dir,
+		 unsigned long attrs)
+{
+	void *va = phys_to_virt(page_to_phys(page)) + offset;
+	struct mic_device *mdev = dev_get_drvdata(dev->parent);
+
+	return mic_map_single(mdev, va, size);
+}
+
+static void
+mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+		   size_t size, enum dma_data_direction dir,
+		   unsigned long attrs)
+{
+	struct mic_device *mdev = dev_get_drvdata(dev->parent);
+	mic_unmap_single(mdev, dma_addr, size);
+}
+
+static const struct dma_map_ops mic_dma_ops = {
+	.map_page = mic_dma_map_page,
+	.unmap_page = mic_dma_unmap_page,
+};
+
+static struct mic_irq *
+_mic_request_threaded_irq(struct mbus_device *mbdev,
+			  irq_handler_t handler, irq_handler_t thread_fn,
+			  const char *name, void *data, int intr_src)
+{
+	return mic_request_threaded_irq(mbdev_to_mdev(mbdev), handler,
+					thread_fn, name, data,
+					intr_src, MIC_INTR_DMA);
+}
+
+static void _mic_free_irq(struct mbus_device *mbdev,
+			  struct mic_irq *cookie, void *data)
+{
+	mic_free_irq(mbdev_to_mdev(mbdev), cookie, data);
+}
+
+static void _mic_ack_interrupt(struct mbus_device *mbdev, int num)
+{
+	struct mic_device *mdev = mbdev_to_mdev(mbdev);
+	mdev->ops->intr_workarounds(mdev);
+}
+
+static struct mbus_hw_ops mbus_hw_ops = {
+	.request_threaded_irq = _mic_request_threaded_irq,
+	.free_irq = _mic_free_irq,
+	.ack_interrupt = _mic_ack_interrupt,
+};
+
+/* Initialize the MIC bootparams */
+void mic_bootparam_init(struct mic_device *mdev)
+{
+	struct mic_bootparam *bootparam = mdev->dp;
+
+	bootparam->magic = cpu_to_le32(MIC_MAGIC);
+	bootparam->h2c_config_db = -1;
+	bootparam->node_id = mdev->id + 1;
+	bootparam->scif_host_dma_addr = 0x0;
+	bootparam->scif_card_dma_addr = 0x0;
+	bootparam->c2h_scif_db = -1;
+	bootparam->h2c_scif_db = -1;
+}
+
+static inline struct mic_device *cosmdev_to_mdev(struct cosm_device *cdev)
+{
+	return dev_get_drvdata(cdev->dev.parent);
+}
+
+static void _mic_reset(struct cosm_device *cdev)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+	mdev->ops->reset_fw_ready(mdev);
+	mdev->ops->reset(mdev);
+}
+
+static bool _mic_ready(struct cosm_device *cdev)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+	return mdev->ops->is_fw_ready(mdev);
+}
+
+/**
+ * mic_request_dma_chans - Request DMA channels
+ * @mdev: pointer to mic_device instance
+ *
+ * returns number of DMA channels acquired
+ */
+static int mic_request_dma_chans(struct mic_device *mdev)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+
+	do {
+		chan = dma_request_channel(mask, mdev->ops->dma_filter,
+					   &mdev->pdev->dev);
+		if (chan) {
+			mdev->dma_ch[mdev->num_dma_ch++] = chan;
+			if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN)
+				break;
+		}
+	} while (chan);
+	dev_info(&mdev->pdev->dev, "DMA channels # %d\n", mdev->num_dma_ch);
+	return mdev->num_dma_ch;
+}
+
+/**
+ * mic_free_dma_chans - release DMA channels
+ * @mdev: pointer to mic_device instance
+ *
+ * returns none
+ */
+static void mic_free_dma_chans(struct mic_device *mdev)
+{
+	int i = 0;
+
+	for (i = 0; i < mdev->num_dma_ch; i++) {
+		dma_release_channel(mdev->dma_ch[i]);
+		mdev->dma_ch[i] = NULL;
+	}
+	mdev->num_dma_ch = 0;
+}
+
+/**
+ * _mic_start - Start the MIC.
+ * @cdev: pointer to cosm_device instance
+ * @id: MIC device id/index provided by COSM used in other drivers like SCIF
+ *
+ * This function prepares an MIC for boot and initiates boot.
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ *
+ * For all cosm_hw_ops the caller holds a mutex to ensure serialization.
+ */
+static int _mic_start(struct cosm_device *cdev, int id)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+	int rc;
+
+	mic_bootparam_init(mdev);
+	mdev->dma_mbdev = mbus_register_device(&mdev->pdev->dev,
+					       MBUS_DEV_DMA_HOST, &mic_dma_ops,
+					       &mbus_hw_ops, id, mdev->mmio.va);
+	if (IS_ERR(mdev->dma_mbdev)) {
+		rc = PTR_ERR(mdev->dma_mbdev);
+		goto unlock_ret;
+	}
+	if (!mic_request_dma_chans(mdev)) {
+		rc = -ENODEV;
+		goto dma_remove;
+	}
+	mdev->scdev = scif_register_device(&mdev->pdev->dev, MIC_SCIF_DEV,
+					   &__mic_dma_ops, &scif_hw_ops,
+					   id + 1, 0, &mdev->mmio,
+					   &mdev->aper, mdev->dp, NULL,
+					   mdev->dma_ch, mdev->num_dma_ch,
+					   true);
+	if (IS_ERR(mdev->scdev)) {
+		rc = PTR_ERR(mdev->scdev);
+		goto dma_free;
+	}
+
+	mdev->vpdev = vop_register_device(&mdev->pdev->dev,
+					  VOP_DEV_TRNSP, &_mic_dma_ops,
+					  &vop_hw_ops, id + 1, &mdev->aper,
+					  mdev->dma_ch[0]);
+	if (IS_ERR(mdev->vpdev)) {
+		rc = PTR_ERR(mdev->vpdev);
+		goto scif_remove;
+	}
+
+	rc = mdev->ops->load_mic_fw(mdev, NULL);
+	if (rc)
+		goto vop_remove;
+	mic_smpt_restore(mdev);
+	mic_intr_restore(mdev);
+	mdev->intr_ops->enable_interrupts(mdev);
+	mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
+	mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
+	mdev->ops->send_firmware_intr(mdev);
+	goto unlock_ret;
+vop_remove:
+	vop_unregister_device(mdev->vpdev);
+scif_remove:
+	scif_unregister_device(mdev->scdev);
+dma_free:
+	mic_free_dma_chans(mdev);
+dma_remove:
+	mbus_unregister_device(mdev->dma_mbdev);
+unlock_ret:
+	return rc;
+}
+
+/**
+ * _mic_stop - Prepare the MIC for reset and trigger reset.
+ * @cdev: pointer to cosm_device instance
+ * @force: force a MIC to reset even if it is already offline.
+ *
+ * RETURNS: None.
+ */
+static void _mic_stop(struct cosm_device *cdev, bool force)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+	/*
+	 * Since SCIF handles card shutdown and reset (using COSM), it will
+	 * will be the first to be registered and the last to be
+	 * unregistered.
+	 */
+	vop_unregister_device(mdev->vpdev);
+	scif_unregister_device(mdev->scdev);
+	mic_free_dma_chans(mdev);
+	mbus_unregister_device(mdev->dma_mbdev);
+	mic_bootparam_init(mdev);
+}
+
+static ssize_t _mic_family(struct cosm_device *cdev, char *buf)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+	static const char *family[MIC_FAMILY_LAST] = { "x100", "Unknown" };
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", family[mdev->family]);
+}
+
+static ssize_t _mic_stepping(struct cosm_device *cdev, char *buf)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+	const char *string = "??";
+
+	switch (mdev->stepping) {
+	case MIC_A0_STEP:
+		string = "A0";
+		break;
+	case MIC_B0_STEP:
+		string = "B0";
+		break;
+	case MIC_B1_STEP:
+		string = "B1";
+		break;
+	case MIC_C0_STEP:
+		string = "C0";
+		break;
+	default:
+		break;
+	}
+	return scnprintf(buf, PAGE_SIZE, "%s\n", string);
+}
+
+static struct mic_mw *_mic_aper(struct cosm_device *cdev)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+	return &mdev->aper;
+}
+
+struct cosm_hw_ops cosm_hw_ops = {
+	.reset = _mic_reset,
+	.force_reset = _mic_reset,
+	.post_reset = NULL,
+	.ready = _mic_ready,
+	.start = _mic_start,
+	.stop = _mic_stop,
+	.family = _mic_family,
+	.stepping = _mic_stepping,
+	.aper = _mic_aper,
+};
diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c
new file mode 100644
index 000000000..0a9daba8b
--- /dev/null
+++ b/drivers/misc/mic/host/mic_debugfs.c
@@ -0,0 +1,216 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+
+/* Debugfs parent dir */
+static struct dentry *mic_dbg;
+
+static int mic_smpt_show(struct seq_file *s, void *pos)
+{
+	int i;
+	struct mic_device *mdev = s->private;
+	unsigned long flags;
+
+	seq_printf(s, "MIC %-2d |%-10s| %-14s %-10s\n",
+		   mdev->id, "SMPT entry", "SW DMA addr", "RefCount");
+	seq_puts(s, "====================================================\n");
+
+	if (mdev->smpt) {
+		struct mic_smpt_info *smpt_info = mdev->smpt;
+		spin_lock_irqsave(&smpt_info->smpt_lock, flags);
+		for (i = 0; i < smpt_info->info.num_reg; i++) {
+			seq_printf(s, "%9s|%-10d| %-#14llx %-10lld\n",
+				   " ",  i, smpt_info->entry[i].dma_addr,
+				   smpt_info->entry[i].ref_count);
+		}
+		spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+	}
+	seq_puts(s, "====================================================\n");
+	return 0;
+}
+
+static int mic_smpt_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_smpt_show, inode->i_private);
+}
+
+static int mic_smpt_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations smpt_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = mic_smpt_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = mic_smpt_debug_release
+};
+
+static int mic_post_code_show(struct seq_file *s, void *pos)
+{
+	struct mic_device *mdev = s->private;
+	u32 reg = mdev->ops->get_postcode(mdev);
+
+	seq_printf(s, "%c%c", reg & 0xff, (reg >> 8) & 0xff);
+	return 0;
+}
+
+static int mic_post_code_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_post_code_show, inode->i_private);
+}
+
+static int mic_post_code_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations post_code_ops = {
+	.owner   = THIS_MODULE,
+	.open    = mic_post_code_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = mic_post_code_debug_release
+};
+
+static int mic_msi_irq_info_show(struct seq_file *s, void *pos)
+{
+	struct mic_device *mdev  = s->private;
+	int reg;
+	int i, j;
+	u16 entry;
+	u16 vector;
+	struct pci_dev *pdev = mdev->pdev;
+
+	if (pci_dev_msi_enabled(pdev)) {
+		for (i = 0; i < mdev->irq_info.num_vectors; i++) {
+			if (pdev->msix_enabled) {
+				entry = mdev->irq_info.msix_entries[i].entry;
+				vector = mdev->irq_info.msix_entries[i].vector;
+			} else {
+				entry = 0;
+				vector = pdev->irq;
+			}
+
+			reg = mdev->intr_ops->read_msi_to_src_map(mdev, entry);
+
+			seq_printf(s, "%s %-10d %s %-10d MXAR[%d]: %08X\n",
+				   "IRQ:", vector, "Entry:", entry, i, reg);
+
+			seq_printf(s, "%-10s", "offset:");
+			for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--)
+				seq_printf(s, "%4d ", j);
+			seq_puts(s, "\n");
+
+
+			seq_printf(s, "%-10s", "count:");
+			for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--)
+				seq_printf(s, "%4d ",
+					   (mdev->irq_info.mic_msi_map[i] &
+					   BIT(j)) ? 1 : 0);
+			seq_puts(s, "\n\n");
+		}
+	} else {
+		seq_puts(s, "MSI/MSIx interrupts not enabled\n");
+	}
+
+	return 0;
+}
+
+static int mic_msi_irq_info_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_msi_irq_info_show, inode->i_private);
+}
+
+static int
+mic_msi_irq_info_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations msi_irq_info_ops = {
+	.owner   = THIS_MODULE,
+	.open    = mic_msi_irq_info_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = mic_msi_irq_info_debug_release
+};
+
+/**
+ * mic_create_debug_dir - Initialize MIC debugfs entries.
+ */
+void mic_create_debug_dir(struct mic_device *mdev)
+{
+	char name[16];
+
+	if (!mic_dbg)
+		return;
+
+	scnprintf(name, sizeof(name), "mic%d", mdev->id);
+	mdev->dbg_dir = debugfs_create_dir(name, mic_dbg);
+	if (!mdev->dbg_dir)
+		return;
+
+	debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops);
+
+	debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev,
+			    &post_code_ops);
+
+	debugfs_create_file("msi_irq_info", 0444, mdev->dbg_dir, mdev,
+			    &msi_irq_info_ops);
+}
+
+/**
+ * mic_delete_debug_dir - Uninitialize MIC debugfs entries.
+ */
+void mic_delete_debug_dir(struct mic_device *mdev)
+{
+	if (!mdev->dbg_dir)
+		return;
+
+	debugfs_remove_recursive(mdev->dbg_dir);
+}
+
+/**
+ * mic_init_debugfs - Initialize global debugfs entry.
+ */
+void __init mic_init_debugfs(void)
+{
+	mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!mic_dbg)
+		pr_err("can't create debugfs dir\n");
+}
+
+/**
+ * mic_exit_debugfs - Uninitialize global debugfs entry
+ */
+void mic_exit_debugfs(void)
+{
+	debugfs_remove(mic_dbg);
+}
diff --git a/drivers/misc/mic/host/mic_device.h b/drivers/misc/mic/host/mic_device.h
new file mode 100644
index 000000000..52b12b22f
--- /dev/null
+++ b/drivers/misc/mic/host/mic_device.h
@@ -0,0 +1,169 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef _MIC_DEVICE_H_
+#define _MIC_DEVICE_H_
+
+#include <linux/cdev.h>
+#include <linux/idr.h>
+#include <linux/notifier.h>
+#include <linux/irqreturn.h>
+#include <linux/dmaengine.h>
+#include <linux/miscdevice.h>
+#include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
+#include "../bus/vop_bus.h"
+#include "../bus/cosm_bus.h"
+#include "mic_intr.h"
+
+/**
+ * enum mic_stepping - MIC stepping ids.
+ */
+enum mic_stepping {
+	MIC_A0_STEP = 0x0,
+	MIC_B0_STEP = 0x10,
+	MIC_B1_STEP = 0x11,
+	MIC_C0_STEP = 0x20,
+};
+
+extern struct cosm_hw_ops cosm_hw_ops;
+
+/**
+ * struct mic_device -  MIC device information for each card.
+ *
+ * @mmio: MMIO bar information.
+ * @aper: Aperture bar information.
+ * @family: The MIC family to which this device belongs.
+ * @ops: MIC HW specific operations.
+ * @id: The unique device id for this MIC device.
+ * @stepping: Stepping ID.
+ * @pdev: Underlying PCI device.
+ * @mic_mutex: Mutex for synchronizing access to mic_device.
+ * @intr_ops: HW specific interrupt operations.
+ * @smpt_ops: Hardware specific SMPT operations.
+ * @smpt: MIC SMPT information.
+ * @intr_info: H/W specific interrupt information.
+ * @irq_info: The OS specific irq information
+ * @dbg_dir: debugfs directory of this MIC device.
+ * @bootaddr: MIC boot address.
+ * @dp: virtio device page
+ * @dp_dma_addr: virtio device page DMA address.
+ * @dma_mbdev: MIC BUS DMA device.
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @scdev: SCIF device on the SCIF virtual bus.
+ * @vpdev: Virtio over PCIe device on the VOP virtual bus.
+ * @cosm_dev: COSM device
+ */
+struct mic_device {
+	struct mic_mw mmio;
+	struct mic_mw aper;
+	enum mic_hw_family family;
+	struct mic_hw_ops *ops;
+	int id;
+	enum mic_stepping stepping;
+	struct pci_dev *pdev;
+	struct mutex mic_mutex;
+	struct mic_hw_intr_ops *intr_ops;
+	struct mic_smpt_ops *smpt_ops;
+	struct mic_smpt_info *smpt;
+	struct mic_intr_info *intr_info;
+	struct mic_irq_info irq_info;
+	struct dentry *dbg_dir;
+	u32 bootaddr;
+	void *dp;
+	dma_addr_t dp_dma_addr;
+	struct mbus_device *dma_mbdev;
+	struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
+	int num_dma_ch;
+	struct scif_hw_dev *scdev;
+	struct vop_device *vpdev;
+	struct cosm_device *cosm_dev;
+};
+
+/**
+ * struct mic_hw_ops - MIC HW specific operations.
+ * @aper_bar: Aperture bar resource number.
+ * @mmio_bar: MMIO bar resource number.
+ * @read_spad: Read from scratch pad register.
+ * @write_spad: Write to scratch pad register.
+ * @send_intr: Send an interrupt for a particular doorbell on the card.
+ * @ack_interrupt: Hardware specific operations to ack the h/w on
+ * receipt of an interrupt.
+ * @intr_workarounds: Hardware specific workarounds needed after
+ * handling an interrupt.
+ * @reset: Reset the remote processor.
+ * @reset_fw_ready: Reset firmware ready field.
+ * @is_fw_ready: Check if firmware is ready for OS download.
+ * @send_firmware_intr: Send an interrupt to the card firmware.
+ * @load_mic_fw: Load firmware segments required to boot the card
+ * into card memory. This includes the kernel, command line, ramdisk etc.
+ * @get_postcode: Get post code status from firmware.
+ * @dma_filter: DMA filter function to be used.
+ */
+struct mic_hw_ops {
+	u8 aper_bar;
+	u8 mmio_bar;
+	u32 (*read_spad)(struct mic_device *mdev, unsigned int idx);
+	void (*write_spad)(struct mic_device *mdev, unsigned int idx, u32 val);
+	void (*send_intr)(struct mic_device *mdev, int doorbell);
+	u32 (*ack_interrupt)(struct mic_device *mdev);
+	void (*intr_workarounds)(struct mic_device *mdev);
+	void (*reset)(struct mic_device *mdev);
+	void (*reset_fw_ready)(struct mic_device *mdev);
+	bool (*is_fw_ready)(struct mic_device *mdev);
+	void (*send_firmware_intr)(struct mic_device *mdev);
+	int (*load_mic_fw)(struct mic_device *mdev, const char *buf);
+	u32 (*get_postcode)(struct mic_device *mdev);
+	bool (*dma_filter)(struct dma_chan *chan, void *param);
+};
+
+/**
+ * mic_mmio_read - read from an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @offset: register offset.
+ *
+ * RETURNS: register value.
+ */
+static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset)
+{
+	return ioread32(mw->va + offset);
+}
+
+/**
+ * mic_mmio_write - write to an MMIO register.
+ * @mw: MMIO register base virtual address.
+ * @val: the data value to put into the register
+ * @offset: register offset.
+ *
+ * RETURNS: none.
+ */
+static inline void
+mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
+{
+	iowrite32(val, mw->va + offset);
+}
+
+void mic_bootparam_init(struct mic_device *mdev);
+void mic_create_debug_dir(struct mic_device *dev);
+void mic_delete_debug_dir(struct mic_device *dev);
+void __init mic_init_debugfs(void);
+void mic_exit_debugfs(void);
+#endif
diff --git a/drivers/misc/mic/host/mic_intr.c b/drivers/misc/mic/host/mic_intr.c
new file mode 100644
index 000000000..08ca3e372
--- /dev/null
+++ b/drivers/misc/mic/host/mic_intr.c
@@ -0,0 +1,645 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+
+static irqreturn_t mic_thread_fn(int irq, void *dev)
+{
+	struct mic_device *mdev = dev;
+	struct mic_intr_info *intr_info = mdev->intr_info;
+	struct mic_irq_info *irq_info = &mdev->irq_info;
+	struct mic_intr_cb *intr_cb;
+	struct pci_dev *pdev = mdev->pdev;
+	int i;
+
+	spin_lock(&irq_info->mic_thread_lock);
+	for (i = intr_info->intr_start_idx[MIC_INTR_DB];
+			i < intr_info->intr_len[MIC_INTR_DB]; i++)
+		if (test_and_clear_bit(i, &irq_info->mask)) {
+			list_for_each_entry(intr_cb, &irq_info->cb_list[i],
+					    list)
+				if (intr_cb->thread_fn)
+					intr_cb->thread_fn(pdev->irq,
+							 intr_cb->data);
+		}
+	spin_unlock(&irq_info->mic_thread_lock);
+	return IRQ_HANDLED;
+}
+/**
+ * mic_interrupt - Generic interrupt handler for
+ * MSI and INTx based interrupts.
+ */
+static irqreturn_t mic_interrupt(int irq, void *dev)
+{
+	struct mic_device *mdev = dev;
+	struct mic_intr_info *intr_info = mdev->intr_info;
+	struct mic_irq_info *irq_info = &mdev->irq_info;
+	struct mic_intr_cb *intr_cb;
+	struct pci_dev *pdev = mdev->pdev;
+	u32 mask;
+	int i;
+
+	mask = mdev->ops->ack_interrupt(mdev);
+	if (!mask)
+		return IRQ_NONE;
+
+	spin_lock(&irq_info->mic_intr_lock);
+	for (i = intr_info->intr_start_idx[MIC_INTR_DB];
+			i < intr_info->intr_len[MIC_INTR_DB]; i++)
+		if (mask & BIT(i)) {
+			list_for_each_entry(intr_cb, &irq_info->cb_list[i],
+					    list)
+				if (intr_cb->handler)
+					intr_cb->handler(pdev->irq,
+							 intr_cb->data);
+			set_bit(i, &irq_info->mask);
+		}
+	spin_unlock(&irq_info->mic_intr_lock);
+	return IRQ_WAKE_THREAD;
+}
+
+/* Return the interrupt offset from the index. Index is 0 based. */
+static u16 mic_map_src_to_offset(struct mic_device *mdev,
+				 int intr_src, enum mic_intr_type type)
+{
+	if (type >= MIC_NUM_INTR_TYPES)
+		return MIC_NUM_OFFSETS;
+	if (intr_src >= mdev->intr_info->intr_len[type])
+		return MIC_NUM_OFFSETS;
+
+	return mdev->intr_info->intr_start_idx[type] + intr_src;
+}
+
+/* Return next available msix_entry. */
+static struct msix_entry *mic_get_available_vector(struct mic_device *mdev)
+{
+	int i;
+	struct mic_irq_info *info = &mdev->irq_info;
+
+	for (i = 0; i < info->num_vectors; i++)
+		if (!info->mic_msi_map[i])
+			return &info->msix_entries[i];
+	return NULL;
+}
+
+/**
+ * mic_register_intr_callback - Register a callback handler for the
+ * given source id.
+ *
+ * @mdev: pointer to the mic_device instance
+ * @idx: The source id to be registered.
+ * @handler: The function to be called when the source id receives
+ * the interrupt.
+ * @thread_fn: thread fn. corresponding to the handler
+ * @data: Private data of the requester.
+ * Return the callback structure that was registered or an
+ * appropriate error on failure.
+ */
+static struct mic_intr_cb *mic_register_intr_callback(struct mic_device *mdev,
+			u8 idx, irq_handler_t handler, irq_handler_t thread_fn,
+			void *data)
+{
+	struct mic_intr_cb *intr_cb;
+	unsigned long flags;
+	int rc;
+	intr_cb = kmalloc(sizeof(*intr_cb), GFP_KERNEL);
+
+	if (!intr_cb)
+		return ERR_PTR(-ENOMEM);
+
+	intr_cb->handler = handler;
+	intr_cb->thread_fn = thread_fn;
+	intr_cb->data = data;
+	intr_cb->cb_id = ida_simple_get(&mdev->irq_info.cb_ida,
+		0, 0, GFP_KERNEL);
+	if (intr_cb->cb_id < 0) {
+		rc = intr_cb->cb_id;
+		goto ida_fail;
+	}
+
+	spin_lock(&mdev->irq_info.mic_thread_lock);
+	spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
+	list_add_tail(&intr_cb->list, &mdev->irq_info.cb_list[idx]);
+	spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
+	spin_unlock(&mdev->irq_info.mic_thread_lock);
+
+	return intr_cb;
+ida_fail:
+	kfree(intr_cb);
+	return ERR_PTR(rc);
+}
+
+/**
+ * mic_unregister_intr_callback - Unregister the callback handler
+ * identified by its callback id.
+ *
+ * @mdev: pointer to the mic_device instance
+ * @idx: The callback structure id to be unregistered.
+ * Return the source id that was unregistered or MIC_NUM_OFFSETS if no
+ * such callback handler was found.
+ */
+static u8 mic_unregister_intr_callback(struct mic_device *mdev, u32 idx)
+{
+	struct list_head *pos, *tmp;
+	struct mic_intr_cb *intr_cb;
+	unsigned long flags;
+	int i;
+
+	spin_lock(&mdev->irq_info.mic_thread_lock);
+	spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
+	for (i = 0;  i < MIC_NUM_OFFSETS; i++) {
+		list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) {
+			intr_cb = list_entry(pos, struct mic_intr_cb, list);
+			if (intr_cb->cb_id == idx) {
+				list_del(pos);
+				ida_simple_remove(&mdev->irq_info.cb_ida,
+						  intr_cb->cb_id);
+				kfree(intr_cb);
+				spin_unlock_irqrestore(
+					&mdev->irq_info.mic_intr_lock, flags);
+				spin_unlock(&mdev->irq_info.mic_thread_lock);
+				return i;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
+	spin_unlock(&mdev->irq_info.mic_thread_lock);
+	return MIC_NUM_OFFSETS;
+}
+
+/**
+ * mic_setup_msix - Initializes MSIx interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ *
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	int rc, i;
+	int entry_size = sizeof(*mdev->irq_info.msix_entries);
+
+	mdev->irq_info.msix_entries = kmalloc_array(MIC_MIN_MSIX,
+						    entry_size, GFP_KERNEL);
+	if (!mdev->irq_info.msix_entries) {
+		rc = -ENOMEM;
+		goto err_nomem1;
+	}
+
+	for (i = 0; i < MIC_MIN_MSIX; i++)
+		mdev->irq_info.msix_entries[i].entry = i;
+
+	rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries,
+				   MIC_MIN_MSIX);
+	if (rc) {
+		dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc);
+		goto err_enable_msix;
+	}
+
+	mdev->irq_info.num_vectors = MIC_MIN_MSIX;
+	mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) *
+		mdev->irq_info.num_vectors), GFP_KERNEL);
+
+	if (!mdev->irq_info.mic_msi_map) {
+		rc = -ENOMEM;
+		goto err_nomem2;
+	}
+
+	dev_dbg(&mdev->pdev->dev,
+		"%d MSIx irqs setup\n", mdev->irq_info.num_vectors);
+	return 0;
+err_nomem2:
+	pci_disable_msix(pdev);
+err_enable_msix:
+	kfree(mdev->irq_info.msix_entries);
+err_nomem1:
+	mdev->irq_info.num_vectors = 0;
+	return rc;
+}
+
+/**
+ * mic_setup_callbacks - Initialize data structures needed
+ * to handle callbacks.
+ *
+ * @mdev: pointer to mic_device instance
+ */
+static int mic_setup_callbacks(struct mic_device *mdev)
+{
+	int i;
+
+	mdev->irq_info.cb_list = kmalloc_array(MIC_NUM_OFFSETS,
+					       sizeof(*mdev->irq_info.cb_list),
+					       GFP_KERNEL);
+	if (!mdev->irq_info.cb_list)
+		return -ENOMEM;
+
+	for (i = 0; i < MIC_NUM_OFFSETS; i++)
+		INIT_LIST_HEAD(&mdev->irq_info.cb_list[i]);
+	ida_init(&mdev->irq_info.cb_ida);
+	spin_lock_init(&mdev->irq_info.mic_intr_lock);
+	spin_lock_init(&mdev->irq_info.mic_thread_lock);
+	return 0;
+}
+
+/**
+ * mic_release_callbacks - Uninitialize data structures needed
+ * to handle callbacks.
+ *
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_release_callbacks(struct mic_device *mdev)
+{
+	unsigned long flags;
+	struct list_head *pos, *tmp;
+	struct mic_intr_cb *intr_cb;
+	int i;
+
+	spin_lock(&mdev->irq_info.mic_thread_lock);
+	spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
+	for (i = 0; i < MIC_NUM_OFFSETS; i++) {
+		if (list_empty(&mdev->irq_info.cb_list[i]))
+			break;
+
+		list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) {
+			intr_cb = list_entry(pos, struct mic_intr_cb, list);
+			list_del(pos);
+			ida_simple_remove(&mdev->irq_info.cb_ida,
+					  intr_cb->cb_id);
+			kfree(intr_cb);
+		}
+	}
+	spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags);
+	spin_unlock(&mdev->irq_info.mic_thread_lock);
+	ida_destroy(&mdev->irq_info.cb_ida);
+	kfree(mdev->irq_info.cb_list);
+}
+
+/**
+ * mic_setup_msi - Initializes MSI interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int mic_setup_msi(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	int rc;
+
+	rc = pci_enable_msi(pdev);
+	if (rc) {
+		dev_dbg(&pdev->dev, "Error enabling MSI. rc = %d\n", rc);
+		return rc;
+	}
+
+	mdev->irq_info.num_vectors = 1;
+	mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) *
+		mdev->irq_info.num_vectors), GFP_KERNEL);
+
+	if (!mdev->irq_info.mic_msi_map) {
+		rc = -ENOMEM;
+		goto err_nomem1;
+	}
+
+	rc = mic_setup_callbacks(mdev);
+	if (rc) {
+		dev_err(&pdev->dev, "Error setting up callbacks\n");
+		goto err_nomem2;
+	}
+
+	rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn,
+				  0, "mic-msi", mdev);
+	if (rc) {
+		dev_err(&pdev->dev, "Error allocating MSI interrupt\n");
+		goto err_irq_req_fail;
+	}
+
+	dev_dbg(&pdev->dev, "%d MSI irqs setup\n", mdev->irq_info.num_vectors);
+	return 0;
+err_irq_req_fail:
+	mic_release_callbacks(mdev);
+err_nomem2:
+	kfree(mdev->irq_info.mic_msi_map);
+err_nomem1:
+	pci_disable_msi(pdev);
+	mdev->irq_info.num_vectors = 0;
+	return rc;
+}
+
+/**
+ * mic_setup_intx - Initializes legacy interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int mic_setup_intx(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	int rc;
+
+	/* Enable intx */
+	pci_intx(pdev, 1);
+	rc = mic_setup_callbacks(mdev);
+	if (rc) {
+		dev_err(&pdev->dev, "Error setting up callbacks\n");
+		goto err_nomem;
+	}
+
+	rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn,
+				  IRQF_SHARED, "mic-intx", mdev);
+	if (rc)
+		goto err;
+
+	dev_dbg(&pdev->dev, "intx irq setup\n");
+	return 0;
+err:
+	mic_release_callbacks(mdev);
+err_nomem:
+	return rc;
+}
+
+/**
+ * mic_next_db - Retrieve the next doorbell interrupt source id.
+ * The id is picked sequentially from the available pool of
+ * doorlbell ids.
+ *
+ * @mdev: pointer to the mic_device instance.
+ *
+ * Returns the next doorbell interrupt source.
+ */
+int mic_next_db(struct mic_device *mdev)
+{
+	int next_db;
+
+	next_db = mdev->irq_info.next_avail_src %
+		mdev->intr_info->intr_len[MIC_INTR_DB];
+	mdev->irq_info.next_avail_src++;
+	return next_db;
+}
+
+#define COOKIE_ID_SHIFT 16
+#define GET_ENTRY(cookie) ((cookie) & 0xFFFF)
+#define GET_OFFSET(cookie) ((cookie) >> COOKIE_ID_SHIFT)
+#define MK_COOKIE(x, y) ((x) | (y) << COOKIE_ID_SHIFT)
+
+/**
+ * mic_request_threaded_irq - request an irq. mic_mutex needs
+ * to be held before calling this function.
+ *
+ * @mdev: pointer to mic_device instance
+ * @handler: The callback function that handles the interrupt.
+ * The function needs to call ack_interrupts
+ * (mdev->ops->ack_interrupt(mdev)) when handling the interrupts.
+ * @thread_fn: thread fn required by request_threaded_irq.
+ * @name: The ASCII name of the callee requesting the irq.
+ * @data: private data that is returned back when calling the
+ * function handler.
+ * @intr_src: The source id of the requester. Its the doorbell id
+ * for Doorbell interrupts and DMA channel id for DMA interrupts.
+ * @type: The type of interrupt. Values defined in mic_intr_type
+ *
+ * returns: The cookie that is transparent to the caller. Passed
+ * back when calling mic_free_irq. An appropriate error code
+ * is returned on failure. Caller needs to use IS_ERR(return_val)
+ * to check for failure and PTR_ERR(return_val) to obtained the
+ * error code.
+ *
+ */
+struct mic_irq *
+mic_request_threaded_irq(struct mic_device *mdev,
+			 irq_handler_t handler, irq_handler_t thread_fn,
+			 const char *name, void *data, int intr_src,
+			 enum mic_intr_type type)
+{
+	u16 offset;
+	int rc = 0;
+	struct msix_entry *msix = NULL;
+	unsigned long cookie = 0;
+	u16 entry;
+	struct mic_intr_cb *intr_cb;
+	struct pci_dev *pdev = mdev->pdev;
+
+	offset = mic_map_src_to_offset(mdev, intr_src, type);
+	if (offset >= MIC_NUM_OFFSETS) {
+		dev_err(&mdev->pdev->dev,
+			"Error mapping index %d to a valid source id.\n",
+			intr_src);
+		rc = -EINVAL;
+		goto err;
+	}
+
+	if (mdev->irq_info.num_vectors > 1) {
+		msix = mic_get_available_vector(mdev);
+		if (!msix) {
+			dev_err(&mdev->pdev->dev,
+				"No MSIx vectors available for use.\n");
+			rc = -ENOSPC;
+			goto err;
+		}
+
+		rc = request_threaded_irq(msix->vector, handler, thread_fn,
+					  0, name, data);
+		if (rc) {
+			dev_dbg(&mdev->pdev->dev,
+				"request irq failed rc = %d\n", rc);
+			goto err;
+		}
+		entry = msix->entry;
+		mdev->irq_info.mic_msi_map[entry] |= BIT(offset);
+		mdev->intr_ops->program_msi_to_src_map(mdev,
+				entry, offset, true);
+		cookie = MK_COOKIE(entry, offset);
+		dev_dbg(&mdev->pdev->dev, "irq: %d assigned for src: %d\n",
+			msix->vector, intr_src);
+	} else {
+		intr_cb = mic_register_intr_callback(mdev, offset, handler,
+						     thread_fn, data);
+		if (IS_ERR(intr_cb)) {
+			dev_err(&mdev->pdev->dev,
+				"No available callback entries for use\n");
+			rc = PTR_ERR(intr_cb);
+			goto err;
+		}
+
+		entry = 0;
+		if (pci_dev_msi_enabled(pdev)) {
+			mdev->irq_info.mic_msi_map[entry] |= (1 << offset);
+			mdev->intr_ops->program_msi_to_src_map(mdev,
+				entry, offset, true);
+		}
+		cookie = MK_COOKIE(entry, intr_cb->cb_id);
+		dev_dbg(&mdev->pdev->dev, "callback %d registered for src: %d\n",
+			intr_cb->cb_id, intr_src);
+	}
+	return (struct mic_irq *)cookie;
+err:
+	return ERR_PTR(rc);
+}
+
+/**
+ * mic_free_irq - free irq. mic_mutex
+ *  needs to be held before calling this function.
+ *
+ * @mdev: pointer to mic_device instance
+ * @cookie: cookie obtained during a successful call to mic_request_threaded_irq
+ * @data: private data specified by the calling function during the
+ * mic_request_threaded_irq
+ *
+ * returns: none.
+ */
+void mic_free_irq(struct mic_device *mdev,
+		  struct mic_irq *cookie, void *data)
+{
+	u32 offset;
+	u32 entry;
+	u8 src_id;
+	unsigned int irq;
+	struct pci_dev *pdev = mdev->pdev;
+
+	entry = GET_ENTRY((unsigned long)cookie);
+	offset = GET_OFFSET((unsigned long)cookie);
+	if (mdev->irq_info.num_vectors > 1) {
+		if (entry >= mdev->irq_info.num_vectors) {
+			dev_warn(&mdev->pdev->dev,
+				 "entry %d should be < num_irq %d\n",
+				entry, mdev->irq_info.num_vectors);
+			return;
+		}
+		irq = mdev->irq_info.msix_entries[entry].vector;
+		free_irq(irq, data);
+		mdev->irq_info.mic_msi_map[entry] &= ~(BIT(offset));
+		mdev->intr_ops->program_msi_to_src_map(mdev,
+			entry, offset, false);
+
+		dev_dbg(&mdev->pdev->dev, "irq: %d freed\n", irq);
+	} else {
+		irq = pdev->irq;
+		src_id = mic_unregister_intr_callback(mdev, offset);
+		if (src_id >= MIC_NUM_OFFSETS) {
+			dev_warn(&mdev->pdev->dev, "Error unregistering callback\n");
+			return;
+		}
+		if (pci_dev_msi_enabled(pdev)) {
+			mdev->irq_info.mic_msi_map[entry] &= ~(BIT(src_id));
+			mdev->intr_ops->program_msi_to_src_map(mdev,
+				entry, src_id, false);
+		}
+		dev_dbg(&mdev->pdev->dev, "callback %d unregistered for src: %d\n",
+			offset, src_id);
+	}
+}
+
+/**
+ * mic_setup_interrupts - Initializes interrupts.
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	int rc;
+
+	rc = mic_setup_msix(mdev, pdev);
+	if (!rc)
+		goto done;
+
+	rc = mic_setup_msi(mdev, pdev);
+	if (!rc)
+		goto done;
+
+	rc = mic_setup_intx(mdev, pdev);
+	if (rc) {
+		dev_err(&mdev->pdev->dev, "no usable interrupts\n");
+		return rc;
+	}
+done:
+	mdev->intr_ops->enable_interrupts(mdev);
+	return 0;
+}
+
+/**
+ * mic_free_interrupts - Frees interrupts setup by mic_setup_interrupts
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: PCI device structure
+ *
+ * returns none.
+ */
+void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	int i;
+
+	mdev->intr_ops->disable_interrupts(mdev);
+	if (mdev->irq_info.num_vectors > 1) {
+		for (i = 0; i < mdev->irq_info.num_vectors; i++) {
+			if (mdev->irq_info.mic_msi_map[i])
+				dev_warn(&pdev->dev, "irq %d may still be in use.\n",
+					 mdev->irq_info.msix_entries[i].vector);
+		}
+		kfree(mdev->irq_info.mic_msi_map);
+		kfree(mdev->irq_info.msix_entries);
+		pci_disable_msix(pdev);
+	} else {
+		if (pci_dev_msi_enabled(pdev)) {
+			free_irq(pdev->irq, mdev);
+			kfree(mdev->irq_info.mic_msi_map);
+			pci_disable_msi(pdev);
+		} else {
+			free_irq(pdev->irq, mdev);
+		}
+		mic_release_callbacks(mdev);
+	}
+}
+
+/**
+ * mic_intr_restore - Restore MIC interrupt registers.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * Restore the interrupt registers to values previously
+ * stored in the SW data structures. mic_mutex needs to
+ * be held before calling this function.
+ *
+ * returns None.
+ */
+void mic_intr_restore(struct mic_device *mdev)
+{
+	int entry, offset;
+	struct pci_dev *pdev = mdev->pdev;
+
+	if (!pci_dev_msi_enabled(pdev))
+		return;
+
+	for (entry = 0; entry < mdev->irq_info.num_vectors; entry++) {
+		for (offset = 0; offset < MIC_NUM_OFFSETS; offset++) {
+			if (mdev->irq_info.mic_msi_map[entry] & BIT(offset))
+				mdev->intr_ops->program_msi_to_src_map(mdev,
+					entry, offset, true);
+		}
+	}
+}
diff --git a/drivers/misc/mic/host/mic_intr.h b/drivers/misc/mic/host/mic_intr.h
new file mode 100644
index 000000000..cce28824d
--- /dev/null
+++ b/drivers/misc/mic/host/mic_intr.h
@@ -0,0 +1,149 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef _MIC_INTR_H_
+#define _MIC_INTR_H_
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+/*
+ * The minimum number of msix vectors required for normal operation.
+ * 3 for virtio network, console and block devices.
+ * 1 for card shutdown notifications.
+ * 4 for host owned DMA channels.
+ * 1 for SCIF
+ */
+#define MIC_MIN_MSIX 9
+#define MIC_NUM_OFFSETS 32
+
+/**
+ * mic_intr_source - The type of source that will generate
+ * the interrupt.The number of types needs to be in sync with
+ * MIC_NUM_INTR_TYPES
+ *
+ * MIC_INTR_DB: The source is a doorbell
+ * MIC_INTR_DMA: The source is a DMA channel
+ * MIC_INTR_ERR: The source is an error interrupt e.g. SBOX ERR
+ * MIC_NUM_INTR_TYPES: Total number of interrupt sources.
+ */
+enum mic_intr_type {
+	MIC_INTR_DB = 0,
+	MIC_INTR_DMA,
+	MIC_INTR_ERR,
+	MIC_NUM_INTR_TYPES
+};
+
+/**
+ * struct mic_intr_info - Contains h/w specific interrupt sources
+ * information.
+ *
+ * @intr_start_idx: Contains the starting indexes of the
+ * interrupt types.
+ * @intr_len: Contains the length of the interrupt types.
+ */
+struct mic_intr_info {
+	u16 intr_start_idx[MIC_NUM_INTR_TYPES];
+	u16 intr_len[MIC_NUM_INTR_TYPES];
+};
+
+/**
+ * struct mic_irq_info - OS specific irq information
+ *
+ * @next_avail_src: next available doorbell that can be assigned.
+ * @msix_entries: msix entries allocated while setting up MSI-x
+ * @mic_msi_map: The MSI/MSI-x mapping information.
+ * @num_vectors: The number of MSI/MSI-x vectors that have been allocated.
+ * @cb_ida: callback ID allocator to track the callbacks registered.
+ * @mic_intr_lock: spinlock to protect the interrupt callback list.
+ * @mic_thread_lock: spinlock to protect the thread callback list.
+ *		   This lock is used to protect against thread_fn while
+ *		   mic_intr_lock is used to protect against interrupt handler.
+ * @cb_list: Array of callback lists one for each source.
+ * @mask: Mask used by the main thread fn to call the underlying thread fns.
+ */
+struct mic_irq_info {
+	int next_avail_src;
+	struct msix_entry *msix_entries;
+	u32 *mic_msi_map;
+	u16 num_vectors;
+	struct ida cb_ida;
+	spinlock_t mic_intr_lock;
+	spinlock_t mic_thread_lock;
+	struct list_head *cb_list;
+	unsigned long mask;
+};
+
+/**
+ * struct mic_intr_cb - Interrupt callback structure.
+ *
+ * @handler: The callback function
+ * @thread_fn: The thread_fn.
+ * @data: Private data of the requester.
+ * @cb_id: The callback id. Identifies this callback.
+ * @list: list head pointing to the next callback structure.
+ */
+struct mic_intr_cb {
+	irq_handler_t handler;
+	irq_handler_t thread_fn;
+	void *data;
+	int cb_id;
+	struct list_head list;
+};
+
+/**
+ * struct mic_irq - opaque pointer used as cookie
+ */
+struct mic_irq;
+
+/* Forward declaration */
+struct mic_device;
+
+/**
+ * struct mic_hw_intr_ops: MIC HW specific interrupt operations
+ * @intr_init: Initialize H/W specific interrupt information.
+ * @enable_interrupts: Enable interrupts from the hardware.
+ * @disable_interrupts: Disable interrupts from the hardware.
+ * @program_msi_to_src_map: Update MSI mapping registers with
+ * irq information.
+ * @read_msi_to_src_map: Read MSI mapping registers containing
+ * irq information.
+ */
+struct mic_hw_intr_ops {
+	void (*intr_init)(struct mic_device *mdev);
+	void (*enable_interrupts)(struct mic_device *mdev);
+	void (*disable_interrupts)(struct mic_device *mdev);
+	void (*program_msi_to_src_map) (struct mic_device *mdev,
+			int idx, int intr_src, bool set);
+	u32 (*read_msi_to_src_map) (struct mic_device *mdev,
+			int idx);
+};
+
+int mic_next_db(struct mic_device *mdev);
+struct mic_irq *
+mic_request_threaded_irq(struct mic_device *mdev,
+			 irq_handler_t handler, irq_handler_t thread_fn,
+			 const char *name, void *data, int intr_src,
+			 enum mic_intr_type type);
+void mic_free_irq(struct mic_device *mdev,
+		struct mic_irq *cookie, void *data);
+int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev);
+void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev);
+void mic_intr_restore(struct mic_device *mdev);
+#endif
diff --git a/drivers/misc/mic/host/mic_main.c b/drivers/misc/mic/host/mic_main.c
new file mode 100644
index 000000000..035be3e9c
--- /dev/null
+++ b/drivers/misc/mic/host/mic_main.c
@@ -0,0 +1,347 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ */
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_x100.h"
+#include "mic_smpt.h"
+
+static const char mic_driver_name[] = "mic";
+
+static const struct pci_device_id mic_pci_tbl[] = {
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2250)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2251)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2252)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2253)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2254)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2255)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2256)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2257)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2258)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2259)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225a)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225b)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225c)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225d)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225e)},
+
+	/* required last entry */
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mic_pci_tbl);
+
+/* ID allocator for MIC devices */
+static struct ida g_mic_ida;
+
+/* Initialize the device page */
+static int mic_dp_init(struct mic_device *mdev)
+{
+	mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL);
+	if (!mdev->dp)
+		return -ENOMEM;
+
+	mdev->dp_dma_addr = mic_map_single(mdev,
+		mdev->dp, MIC_DP_SIZE);
+	if (mic_map_error(mdev->dp_dma_addr)) {
+		kfree(mdev->dp);
+		dev_err(&mdev->pdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, -ENOMEM);
+		return -ENOMEM;
+	}
+	mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
+	mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
+	return 0;
+}
+
+/* Uninitialize the device page */
+static void mic_dp_uninit(struct mic_device *mdev)
+{
+	mic_unmap_single(mdev, mdev->dp_dma_addr, MIC_DP_SIZE);
+	kfree(mdev->dp);
+}
+
+/**
+ * mic_ops_init: Initialize HW specific operation tables.
+ *
+ * @mdev: pointer to mic_device instance
+ *
+ * returns none.
+ */
+static void mic_ops_init(struct mic_device *mdev)
+{
+	switch (mdev->family) {
+	case MIC_FAMILY_X100:
+		mdev->ops = &mic_x100_ops;
+		mdev->intr_ops = &mic_x100_intr_ops;
+		mdev->smpt_ops = &mic_x100_smpt_ops;
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * mic_get_family - Determine hardware family to which this MIC belongs.
+ *
+ * @pdev: The pci device structure
+ *
+ * returns family.
+ */
+static enum mic_hw_family mic_get_family(struct pci_dev *pdev)
+{
+	enum mic_hw_family family;
+
+	switch (pdev->device) {
+	case MIC_X100_PCI_DEVICE_2250:
+	case MIC_X100_PCI_DEVICE_2251:
+	case MIC_X100_PCI_DEVICE_2252:
+	case MIC_X100_PCI_DEVICE_2253:
+	case MIC_X100_PCI_DEVICE_2254:
+	case MIC_X100_PCI_DEVICE_2255:
+	case MIC_X100_PCI_DEVICE_2256:
+	case MIC_X100_PCI_DEVICE_2257:
+	case MIC_X100_PCI_DEVICE_2258:
+	case MIC_X100_PCI_DEVICE_2259:
+	case MIC_X100_PCI_DEVICE_225a:
+	case MIC_X100_PCI_DEVICE_225b:
+	case MIC_X100_PCI_DEVICE_225c:
+	case MIC_X100_PCI_DEVICE_225d:
+	case MIC_X100_PCI_DEVICE_225e:
+		family = MIC_FAMILY_X100;
+		break;
+	default:
+		family = MIC_FAMILY_UNKNOWN;
+		break;
+	}
+	return family;
+}
+
+/**
+ * mic_device_init - Allocates and initializes the MIC device structure
+ *
+ * @mdev: pointer to mic_device instance
+ * @pdev: The pci device structure
+ *
+ * returns none.
+ */
+static void
+mic_device_init(struct mic_device *mdev, struct pci_dev *pdev)
+{
+	mdev->pdev = pdev;
+	mdev->family = mic_get_family(pdev);
+	mdev->stepping = pdev->revision;
+	mic_ops_init(mdev);
+	mutex_init(&mdev->mic_mutex);
+	mdev->irq_info.next_avail_src = 0;
+}
+
+/**
+ * mic_probe - Device Initialization Routine
+ *
+ * @pdev: PCI device structure
+ * @ent: entry in mic_pci_tbl
+ *
+ * returns 0 on success, < 0 on failure.
+ */
+static int mic_probe(struct pci_dev *pdev,
+		     const struct pci_device_id *ent)
+{
+	int rc;
+	struct mic_device *mdev;
+
+	mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+	if (!mdev) {
+		rc = -ENOMEM;
+		dev_err(&pdev->dev, "mdev kmalloc failed rc %d\n", rc);
+		goto mdev_alloc_fail;
+	}
+	mdev->id = ida_simple_get(&g_mic_ida, 0, MIC_MAX_NUM_DEVS, GFP_KERNEL);
+	if (mdev->id < 0) {
+		rc = mdev->id;
+		dev_err(&pdev->dev, "ida_simple_get failed rc %d\n", rc);
+		goto ida_fail;
+	}
+
+	mic_device_init(mdev, pdev);
+
+	rc = pci_enable_device(pdev);
+	if (rc) {
+		dev_err(&pdev->dev, "failed to enable pci device.\n");
+		goto ida_remove;
+	}
+
+	pci_set_master(pdev);
+
+	rc = pci_request_regions(pdev, mic_driver_name);
+	if (rc) {
+		dev_err(&pdev->dev, "failed to get pci regions.\n");
+		goto disable_device;
+	}
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc) {
+		dev_err(&pdev->dev, "Cannot set DMA mask\n");
+		goto release_regions;
+	}
+
+	mdev->mmio.pa = pci_resource_start(pdev, mdev->ops->mmio_bar);
+	mdev->mmio.len = pci_resource_len(pdev, mdev->ops->mmio_bar);
+	mdev->mmio.va = pci_ioremap_bar(pdev, mdev->ops->mmio_bar);
+	if (!mdev->mmio.va) {
+		dev_err(&pdev->dev, "Cannot remap MMIO BAR\n");
+		rc = -EIO;
+		goto release_regions;
+	}
+
+	mdev->aper.pa = pci_resource_start(pdev, mdev->ops->aper_bar);
+	mdev->aper.len = pci_resource_len(pdev, mdev->ops->aper_bar);
+	mdev->aper.va = ioremap_wc(mdev->aper.pa, mdev->aper.len);
+	if (!mdev->aper.va) {
+		dev_err(&pdev->dev, "Cannot remap Aperture BAR\n");
+		rc = -EIO;
+		goto unmap_mmio;
+	}
+
+	mdev->intr_ops->intr_init(mdev);
+	rc = mic_setup_interrupts(mdev, pdev);
+	if (rc) {
+		dev_err(&pdev->dev, "mic_setup_interrupts failed %d\n", rc);
+		goto unmap_aper;
+	}
+	rc = mic_smpt_init(mdev);
+	if (rc) {
+		dev_err(&pdev->dev, "smpt_init failed %d\n", rc);
+		goto free_interrupts;
+	}
+
+	pci_set_drvdata(pdev, mdev);
+
+	rc = mic_dp_init(mdev);
+	if (rc) {
+		dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc);
+		goto smpt_uninit;
+	}
+	mic_bootparam_init(mdev);
+	mic_create_debug_dir(mdev);
+
+	mdev->cosm_dev = cosm_register_device(&mdev->pdev->dev, &cosm_hw_ops);
+	if (IS_ERR(mdev->cosm_dev)) {
+		rc = PTR_ERR(mdev->cosm_dev);
+		dev_err(&pdev->dev, "cosm_add_device failed rc %d\n", rc);
+		goto cleanup_debug_dir;
+	}
+	return 0;
+cleanup_debug_dir:
+	mic_delete_debug_dir(mdev);
+	mic_dp_uninit(mdev);
+smpt_uninit:
+	mic_smpt_uninit(mdev);
+free_interrupts:
+	mic_free_interrupts(mdev, pdev);
+unmap_aper:
+	iounmap(mdev->aper.va);
+unmap_mmio:
+	iounmap(mdev->mmio.va);
+release_regions:
+	pci_release_regions(pdev);
+disable_device:
+	pci_disable_device(pdev);
+ida_remove:
+	ida_simple_remove(&g_mic_ida, mdev->id);
+ida_fail:
+	kfree(mdev);
+mdev_alloc_fail:
+	dev_err(&pdev->dev, "Probe failed rc %d\n", rc);
+	return rc;
+}
+
+/**
+ * mic_remove - Device Removal Routine
+ * mic_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ *
+ * @pdev: PCI device structure
+ */
+static void mic_remove(struct pci_dev *pdev)
+{
+	struct mic_device *mdev;
+
+	mdev = pci_get_drvdata(pdev);
+	if (!mdev)
+		return;
+
+	cosm_unregister_device(mdev->cosm_dev);
+	mic_delete_debug_dir(mdev);
+	mic_dp_uninit(mdev);
+	mic_smpt_uninit(mdev);
+	mic_free_interrupts(mdev, pdev);
+	iounmap(mdev->aper.va);
+	iounmap(mdev->mmio.va);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	ida_simple_remove(&g_mic_ida, mdev->id);
+	kfree(mdev);
+}
+
+static struct pci_driver mic_driver = {
+	.name = mic_driver_name,
+	.id_table = mic_pci_tbl,
+	.probe = mic_probe,
+	.remove = mic_remove
+};
+
+static int __init mic_init(void)
+{
+	int ret;
+
+	request_module("mic_x100_dma");
+	mic_init_debugfs();
+	ida_init(&g_mic_ida);
+	ret = pci_register_driver(&mic_driver);
+	if (ret) {
+		pr_err("pci_register_driver failed ret %d\n", ret);
+		goto cleanup_debugfs;
+	}
+	return 0;
+cleanup_debugfs:
+	ida_destroy(&g_mic_ida);
+	mic_exit_debugfs();
+	return ret;
+}
+
+static void __exit mic_exit(void)
+{
+	pci_unregister_driver(&mic_driver);
+	ida_destroy(&g_mic_ida);
+	mic_exit_debugfs();
+}
+
+module_init(mic_init);
+module_exit(mic_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC X100 Host driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/host/mic_smpt.c b/drivers/misc/mic/host/mic_smpt.c
new file mode 100644
index 000000000..c3f958580
--- /dev/null
+++ b/drivers/misc/mic/host/mic_smpt.c
@@ -0,0 +1,439 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/pci.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_smpt.h"
+
+static inline u64 mic_system_page_mask(struct mic_device *mdev)
+{
+	return (1ULL << mdev->smpt->info.page_shift) - 1ULL;
+}
+
+static inline u8 mic_sys_addr_to_smpt(struct mic_device *mdev, dma_addr_t pa)
+{
+	return (pa - mdev->smpt->info.base) >> mdev->smpt->info.page_shift;
+}
+
+static inline u64 mic_smpt_to_pa(struct mic_device *mdev, u8 index)
+{
+	return mdev->smpt->info.base + (index * mdev->smpt->info.page_size);
+}
+
+static inline u64 mic_smpt_offset(struct mic_device *mdev, dma_addr_t pa)
+{
+	return pa & mic_system_page_mask(mdev);
+}
+
+static inline u64 mic_smpt_align_low(struct mic_device *mdev, dma_addr_t pa)
+{
+	return ALIGN(pa - mic_system_page_mask(mdev),
+		mdev->smpt->info.page_size);
+}
+
+static inline u64 mic_smpt_align_high(struct mic_device *mdev, dma_addr_t pa)
+{
+	return ALIGN(pa, mdev->smpt->info.page_size);
+}
+
+/* Total Cumulative system memory accessible by MIC across all SMPT entries */
+static inline u64 mic_max_system_memory(struct mic_device *mdev)
+{
+	return mdev->smpt->info.num_reg * mdev->smpt->info.page_size;
+}
+
+/* Maximum system memory address accessible by MIC */
+static inline u64 mic_max_system_addr(struct mic_device *mdev)
+{
+	return mdev->smpt->info.base + mic_max_system_memory(mdev) - 1ULL;
+}
+
+/* Check if the DMA address is a MIC system memory address */
+static inline bool
+mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa)
+{
+	return pa >= mdev->smpt->info.base && pa <= mic_max_system_addr(mdev);
+}
+
+/* Populate an SMPT entry and update the reference counts. */
+static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr,
+			       int entries, struct mic_device *mdev)
+{
+	struct mic_smpt_info *smpt_info = mdev->smpt;
+	int i;
+
+	for (i = spt; i < spt + entries; i++,
+		addr += smpt_info->info.page_size) {
+		if (!smpt_info->entry[i].ref_count &&
+		    (smpt_info->entry[i].dma_addr != addr)) {
+			mdev->smpt_ops->set(mdev, addr, i);
+			smpt_info->entry[i].dma_addr = addr;
+		}
+		smpt_info->entry[i].ref_count += ref[i - spt];
+	}
+}
+
+/*
+ * Find an available MIC address in MIC SMPT address space
+ * for a given DMA address and size.
+ */
+static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr,
+			      int entries, s64 *ref, size_t size)
+{
+	int spt;
+	int ae = 0;
+	int i;
+	unsigned long flags;
+	dma_addr_t mic_addr = 0;
+	dma_addr_t addr = dma_addr;
+	struct mic_smpt_info *smpt_info = mdev->smpt;
+
+	spin_lock_irqsave(&smpt_info->smpt_lock, flags);
+
+	/* find existing entries */
+	for (i = 0; i < smpt_info->info.num_reg; i++) {
+		if (smpt_info->entry[i].dma_addr == addr) {
+			ae++;
+			addr += smpt_info->info.page_size;
+		} else if (ae) /* cannot find contiguous entries */
+			goto not_found;
+
+		if (ae == entries)
+			goto found;
+	}
+
+	/* find free entry */
+	for (ae = 0, i = 0; i < smpt_info->info.num_reg; i++) {
+		ae = (smpt_info->entry[i].ref_count == 0) ? ae + 1 : 0;
+		if (ae == entries)
+			goto found;
+	}
+
+not_found:
+	spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+	return mic_addr;
+
+found:
+	spt = i - entries + 1;
+	mic_addr = mic_smpt_to_pa(mdev, spt);
+	mic_add_smpt_entry(spt, ref, dma_addr, entries, mdev);
+	smpt_info->map_count++;
+	smpt_info->ref_count += (s64)size;
+	spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+	return mic_addr;
+}
+
+/*
+ * Returns number of smpt entries needed for dma_addr to dma_addr + size
+ * also returns the reference count array for each of those entries
+ * and the starting smpt address
+ */
+static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr,
+				  size_t size, s64 *ref,  u64 *smpt_start)
+{
+	u64 start =  dma_addr;
+	u64 end = dma_addr + size;
+	int i = 0;
+
+	while (start < end) {
+		ref[i++] = min(mic_smpt_align_high(mdev, start + 1),
+			end) - start;
+		start = mic_smpt_align_high(mdev, start + 1);
+	}
+
+	if (smpt_start)
+		*smpt_start = mic_smpt_align_low(mdev, dma_addr);
+
+	return i;
+}
+
+/*
+ * mic_to_dma_addr - Converts a MIC address to a DMA address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @mic_addr: MIC address.
+ *
+ * returns a DMA address.
+ */
+dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr)
+{
+	struct mic_smpt_info *smpt_info = mdev->smpt;
+	int spt;
+	dma_addr_t dma_addr;
+
+	if (!mic_is_system_addr(mdev, mic_addr)) {
+		dev_err(&mdev->pdev->dev,
+			"mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr);
+		return -EINVAL;
+	}
+	spt = mic_sys_addr_to_smpt(mdev, mic_addr);
+	dma_addr = smpt_info->entry[spt].dma_addr +
+		mic_smpt_offset(mdev, mic_addr);
+	return dma_addr;
+}
+
+/**
+ * mic_map - Maps a DMA address to a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @dma_addr: DMA address.
+ * @size: Size of the region to be mapped.
+ *
+ * This API converts the DMA address provided to a DMA address understood
+ * by MIC. Caller should check for errors by calling mic_map_error(..).
+ *
+ * returns DMA address as required by MIC.
+ */
+dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size)
+{
+	dma_addr_t mic_addr = 0;
+	int num_entries;
+	s64 *ref;
+	u64 smpt_start;
+
+	if (!size || size > mic_max_system_memory(mdev))
+		return mic_addr;
+
+	ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC);
+	if (!ref)
+		return mic_addr;
+
+	num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size,
+					     ref, &smpt_start);
+
+	/* Set the smpt table appropriately and get 16G aligned mic address */
+	mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size);
+
+	kfree(ref);
+
+	/*
+	 * If mic_addr is zero then its an error case
+	 * since mic_addr can never be zero.
+	 * else generate mic_addr by adding the 16G offset in dma_addr
+	 */
+	if (!mic_addr && MIC_FAMILY_X100 == mdev->family) {
+		dev_err(&mdev->pdev->dev,
+			"mic_map failed dma_addr 0x%llx size 0x%lx\n",
+			dma_addr, size);
+		return mic_addr;
+	} else {
+		return mic_addr + mic_smpt_offset(mdev, dma_addr);
+	}
+}
+
+/**
+ * mic_unmap - Unmaps a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @mic_addr: MIC physical address.
+ * @size: Size of the region to be unmapped.
+ *
+ * This API unmaps the mappings created by mic_map(..).
+ *
+ * returns None.
+ */
+void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
+{
+	struct mic_smpt_info *smpt_info = mdev->smpt;
+	s64 *ref;
+	int num_smpt;
+	int spt;
+	int i;
+	unsigned long flags;
+
+	if (!size)
+		return;
+
+	if (!mic_is_system_addr(mdev, mic_addr)) {
+		dev_err(&mdev->pdev->dev,
+			"invalid address: 0x%llx\n", mic_addr);
+		return;
+	}
+
+	spt = mic_sys_addr_to_smpt(mdev, mic_addr);
+	ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC);
+	if (!ref)
+		return;
+
+	/* Get number of smpt entries to be mapped, ref count array */
+	num_smpt = mic_get_smpt_ref_count(mdev, mic_addr, size, ref, NULL);
+
+	spin_lock_irqsave(&smpt_info->smpt_lock, flags);
+	smpt_info->unmap_count++;
+	smpt_info->ref_count -= (s64)size;
+
+	for (i = spt; i < spt + num_smpt; i++) {
+		smpt_info->entry[i].ref_count -= ref[i - spt];
+		if (smpt_info->entry[i].ref_count < 0)
+			dev_warn(&mdev->pdev->dev,
+				 "ref count for entry %d is negative\n", i);
+	}
+	spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
+	kfree(ref);
+}
+
+/**
+ * mic_map_single - Maps a virtual address to a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @va: Kernel direct mapped virtual address.
+ * @size: Size of the region to be mapped.
+ *
+ * This API calls pci_map_single(..) for the direct mapped virtual address
+ * and then converts the DMA address provided to a DMA address understood
+ * by MIC. Caller should check for errors by calling mic_map_error(..).
+ *
+ * returns DMA address as required by MIC.
+ */
+dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size)
+{
+	dma_addr_t mic_addr = 0;
+	struct pci_dev *pdev = mdev->pdev;
+	dma_addr_t dma_addr =
+		pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL);
+
+	if (!pci_dma_mapping_error(pdev, dma_addr)) {
+		mic_addr = mic_map(mdev, dma_addr, size);
+		if (!mic_addr) {
+			dev_err(&mdev->pdev->dev,
+				"mic_map failed dma_addr 0x%llx size 0x%lx\n",
+				dma_addr, size);
+			pci_unmap_single(pdev, dma_addr,
+					 size, PCI_DMA_BIDIRECTIONAL);
+		}
+	}
+	return mic_addr;
+}
+
+/**
+ * mic_unmap_single - Unmaps a MIC physical address.
+ *
+ * @mdev: pointer to mic_device instance.
+ * @mic_addr: MIC physical address.
+ * @size: Size of the region to be unmapped.
+ *
+ * This API unmaps the mappings created by mic_map_single(..).
+ *
+ * returns None.
+ */
+void
+mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
+{
+	struct pci_dev *pdev = mdev->pdev;
+	dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr);
+	mic_unmap(mdev, mic_addr, size);
+	pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
+}
+
+/**
+ * mic_smpt_init - Initialize MIC System Memory Page Tables.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * returns 0 for success and -errno for error.
+ */
+int mic_smpt_init(struct mic_device *mdev)
+{
+	int i, err = 0;
+	dma_addr_t dma_addr;
+	struct mic_smpt_info *smpt_info;
+
+	mdev->smpt = kmalloc(sizeof(*mdev->smpt), GFP_KERNEL);
+	if (!mdev->smpt)
+		return -ENOMEM;
+
+	smpt_info = mdev->smpt;
+	mdev->smpt_ops->init(mdev);
+	smpt_info->entry = kmalloc_array(smpt_info->info.num_reg,
+					 sizeof(*smpt_info->entry), GFP_KERNEL);
+	if (!smpt_info->entry) {
+		err = -ENOMEM;
+		goto free_smpt;
+	}
+	spin_lock_init(&smpt_info->smpt_lock);
+	for (i = 0; i < smpt_info->info.num_reg; i++) {
+		dma_addr = i * smpt_info->info.page_size;
+		smpt_info->entry[i].dma_addr = dma_addr;
+		smpt_info->entry[i].ref_count = 0;
+		mdev->smpt_ops->set(mdev, dma_addr, i);
+	}
+	smpt_info->ref_count = 0;
+	smpt_info->map_count = 0;
+	smpt_info->unmap_count = 0;
+	return 0;
+free_smpt:
+	kfree(smpt_info);
+	return err;
+}
+
+/**
+ * mic_smpt_uninit - UnInitialize MIC System Memory Page Tables.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * returns None.
+ */
+void mic_smpt_uninit(struct mic_device *mdev)
+{
+	struct mic_smpt_info *smpt_info = mdev->smpt;
+	int i;
+
+	dev_dbg(&mdev->pdev->dev,
+		"nodeid %d SMPT ref count %lld map %lld unmap %lld\n",
+		mdev->id, smpt_info->ref_count,
+		smpt_info->map_count, smpt_info->unmap_count);
+
+	for (i = 0; i < smpt_info->info.num_reg; i++) {
+		dev_dbg(&mdev->pdev->dev,
+			"SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n",
+			i, smpt_info->entry[i].dma_addr,
+			smpt_info->entry[i].ref_count);
+		if (smpt_info->entry[i].ref_count)
+			dev_warn(&mdev->pdev->dev,
+				 "ref count for entry %d is not zero\n", i);
+	}
+	kfree(smpt_info->entry);
+	kfree(smpt_info);
+}
+
+/**
+ * mic_smpt_restore - Restore MIC System Memory Page Tables.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * Restore the SMPT registers to values previously stored in the
+ * SW data structures. Some MIC steppings lose register state
+ * across resets and this API should be called for performing
+ * a restore operation if required.
+ *
+ * returns None.
+ */
+void mic_smpt_restore(struct mic_device *mdev)
+{
+	int i;
+	dma_addr_t dma_addr;
+
+	for (i = 0; i < mdev->smpt->info.num_reg; i++) {
+		dma_addr = mdev->smpt->entry[i].dma_addr;
+		mdev->smpt_ops->set(mdev, dma_addr, i);
+	}
+}
diff --git a/drivers/misc/mic/host/mic_smpt.h b/drivers/misc/mic/host/mic_smpt.h
new file mode 100644
index 000000000..68721c6e7
--- /dev/null
+++ b/drivers/misc/mic/host/mic_smpt.h
@@ -0,0 +1,99 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef MIC_SMPT_H
+#define MIC_SMPT_H
+/**
+ * struct mic_smpt_ops - MIC HW specific SMPT operations.
+ * @init: Initialize hardware specific SMPT information in mic_smpt_hw_info.
+ * @set: Set the value for a particular SMPT entry.
+ */
+struct mic_smpt_ops {
+	void (*init)(struct mic_device *mdev);
+	void (*set)(struct mic_device *mdev, dma_addr_t dma_addr, u8 index);
+};
+
+/**
+ * struct mic_smpt - MIC SMPT entry information.
+ * @dma_addr: Base DMA address for this SMPT entry.
+ * @ref_count: Number of active mappings for this SMPT entry in bytes.
+ */
+struct mic_smpt {
+	dma_addr_t dma_addr;
+	s64 ref_count;
+};
+
+/**
+ * struct mic_smpt_hw_info - MIC SMPT hardware specific information.
+ * @num_reg: Number of SMPT registers.
+ * @page_shift: System memory page shift.
+ * @page_size: System memory page size.
+ * @base: System address base.
+ */
+struct mic_smpt_hw_info {
+	u8 num_reg;
+	u8 page_shift;
+	u64 page_size;
+	u64 base;
+};
+
+/**
+ * struct mic_smpt_info - MIC SMPT information.
+ * @entry: Array of SMPT entries.
+ * @smpt_lock: Spin lock protecting access to SMPT data structures.
+ * @info: Hardware specific SMPT information.
+ * @ref_count: Number of active SMPT mappings (for debug).
+ * @map_count: Number of SMPT mappings created (for debug).
+ * @unmap_count: Number of SMPT mappings destroyed (for debug).
+ */
+struct mic_smpt_info {
+	struct mic_smpt *entry;
+	spinlock_t smpt_lock;
+	struct mic_smpt_hw_info info;
+	s64 ref_count;
+	s64 map_count;
+	s64 unmap_count;
+};
+
+dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size);
+void mic_unmap_single(struct mic_device *mdev,
+	dma_addr_t mic_addr, size_t size);
+dma_addr_t mic_map(struct mic_device *mdev,
+	dma_addr_t dma_addr, size_t size);
+void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size);
+dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr);
+
+/**
+ * mic_map_error - Check a MIC address for errors.
+ *
+ * @mdev: pointer to mic_device instance.
+ *
+ * returns Whether there was an error during mic_map..(..) APIs.
+ */
+static inline bool mic_map_error(dma_addr_t mic_addr)
+{
+	return !mic_addr;
+}
+
+int mic_smpt_init(struct mic_device *mdev);
+void mic_smpt_uninit(struct mic_device *mdev);
+void mic_smpt_restore(struct mic_device *mdev);
+
+#endif
diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c
new file mode 100644
index 000000000..82a973c85
--- /dev/null
+++ b/drivers/misc/mic/host/mic_x100.c
@@ -0,0 +1,584 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#include <linux/fs.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/firmware.h>
+#include <linux/delay.h>
+
+#include "../common/mic_dev.h"
+#include "mic_device.h"
+#include "mic_x100.h"
+#include "mic_smpt.h"
+
+/**
+ * mic_x100_write_spad - write to the scratchpad register
+ * @mdev: pointer to mic_device instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register.
+ *
+ * RETURNS: none.
+ */
+static void
+mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val)
+{
+	dev_dbg(&mdev->pdev->dev, "Writing 0x%x to scratch pad index %d\n",
+		val, idx);
+	mic_mmio_write(&mdev->mmio, val,
+		       MIC_X100_SBOX_BASE_ADDRESS +
+		       MIC_X100_SBOX_SPAD0 + idx * 4);
+}
+
+/**
+ * mic_x100_read_spad - read from the scratchpad register
+ * @mdev: pointer to mic_device instance
+ * @idx: index to scratchpad register, 0 based
+ *
+ * This function allows reading of the 32bit scratchpad register.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static u32
+mic_x100_read_spad(struct mic_device *mdev, unsigned int idx)
+{
+	u32 val = mic_mmio_read(&mdev->mmio,
+		MIC_X100_SBOX_BASE_ADDRESS +
+		MIC_X100_SBOX_SPAD0 + idx * 4);
+
+	dev_dbg(&mdev->pdev->dev,
+		"Reading 0x%x from scratch pad index %d\n", val, idx);
+	return val;
+}
+
+/**
+ * mic_x100_enable_interrupts - Enable interrupts.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_enable_interrupts(struct mic_device *mdev)
+{
+	u32 reg;
+	struct mic_mw *mw = &mdev->mmio;
+	u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0;
+	u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0;
+
+	reg = mic_mmio_read(mw, sice0);
+	reg |= MIC_X100_SBOX_DBR_BITS(0xf) | MIC_X100_SBOX_DMA_BITS(0xff);
+	mic_mmio_write(mw, reg, sice0);
+
+	/*
+	 * Enable auto-clear when enabling interrupts. Applicable only for
+	 * MSI-x. Legacy and MSI mode cannot have auto-clear enabled.
+	 */
+	if (mdev->irq_info.num_vectors > 1) {
+		reg = mic_mmio_read(mw, siac0);
+		reg |= MIC_X100_SBOX_DBR_BITS(0xf) |
+			MIC_X100_SBOX_DMA_BITS(0xff);
+		mic_mmio_write(mw, reg, siac0);
+	}
+}
+
+/**
+ * mic_x100_disable_interrupts - Disable interrupts.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_disable_interrupts(struct mic_device *mdev)
+{
+	u32 reg;
+	struct mic_mw *mw = &mdev->mmio;
+	u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0;
+	u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0;
+	u32 sicc0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICC0;
+
+	reg = mic_mmio_read(mw, sice0);
+	mic_mmio_write(mw, reg, sicc0);
+
+	if (mdev->irq_info.num_vectors > 1) {
+		reg = mic_mmio_read(mw, siac0);
+		reg &= ~(MIC_X100_SBOX_DBR_BITS(0xf) |
+			MIC_X100_SBOX_DMA_BITS(0xff));
+		mic_mmio_write(mw, reg, siac0);
+	}
+}
+
+/**
+ * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_send_sbox_intr(struct mic_device *mdev,
+				    int doorbell)
+{
+	struct mic_mw *mw = &mdev->mmio;
+	u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
+	u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS +
+					apic_icr_offset);
+
+	/* for MIC we need to make sure we "hit" the send_icr bit (13) */
+	apicicr_low = (apicicr_low | (1 << 13));
+
+	/* Ensure that the interrupt is ordered w.r.t. previous stores. */
+	wmb();
+	mic_mmio_write(mw, apicicr_low,
+		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
+}
+
+/**
+ * mic_x100_send_rdmasr_intr - Send an RDMASR interrupt to MIC.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_send_rdmasr_intr(struct mic_device *mdev,
+				      int doorbell)
+{
+	int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
+	/* Ensure that the interrupt is ordered w.r.t. previous stores. */
+	wmb();
+	mic_mmio_write(&mdev->mmio, 0,
+		       MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset);
+}
+
+/**
+ * __mic_x100_send_intr - Send interrupt to MIC.
+ * @mdev: pointer to mic_device instance
+ * @doorbell: doorbell number.
+ */
+static void mic_x100_send_intr(struct mic_device *mdev, int doorbell)
+{
+	int rdmasr_db;
+	if (doorbell < MIC_X100_NUM_SBOX_IRQ) {
+		mic_x100_send_sbox_intr(mdev, doorbell);
+	} else {
+		rdmasr_db = doorbell - MIC_X100_NUM_SBOX_IRQ;
+		mic_x100_send_rdmasr_intr(mdev, rdmasr_db);
+	}
+}
+
+/**
+ * mic_x100_ack_interrupt - Read the interrupt sources register and
+ * clear it. This function will be called in the MSI/INTx case.
+ * @mdev: Pointer to mic_device instance.
+ *
+ * Returns: bitmask of interrupt sources triggered.
+ */
+static u32 mic_x100_ack_interrupt(struct mic_device *mdev)
+{
+	u32 sicr0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICR0;
+	u32 reg = mic_mmio_read(&mdev->mmio, sicr0);
+	mic_mmio_write(&mdev->mmio, reg, sicr0);
+	return reg;
+}
+
+/**
+ * mic_x100_intr_workarounds - These hardware specific workarounds are
+ * to be invoked everytime an interrupt is handled.
+ * @mdev: Pointer to mic_device instance.
+ *
+ * Returns: none
+ */
+static void mic_x100_intr_workarounds(struct mic_device *mdev)
+{
+	struct mic_mw *mw = &mdev->mmio;
+
+	/* Clear pending bit array. */
+	if (MIC_A0_STEP == mdev->stepping)
+		mic_mmio_write(mw, 1, MIC_X100_SBOX_BASE_ADDRESS +
+			MIC_X100_SBOX_MSIXPBACR);
+
+	if (mdev->stepping >= MIC_B0_STEP)
+		mdev->intr_ops->enable_interrupts(mdev);
+}
+
+/**
+ * mic_x100_hw_intr_init - Initialize h/w specific interrupt
+ * information.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_hw_intr_init(struct mic_device *mdev)
+{
+	mdev->intr_info = (struct mic_intr_info *)mic_x100_intr_init;
+}
+
+/**
+ * mic_x100_read_msi_to_src_map - read from the MSI mapping registers
+ * @mdev: pointer to mic_device instance
+ * @idx: index to the mapping register, 0 based
+ *
+ * This function allows reading of the 32bit MSI mapping register.
+ *
+ * RETURNS: The value in the register.
+ */
+static u32
+mic_x100_read_msi_to_src_map(struct mic_device *mdev, int idx)
+{
+	return mic_mmio_read(&mdev->mmio,
+		MIC_X100_SBOX_BASE_ADDRESS +
+		MIC_X100_SBOX_MXAR0 + idx * 4);
+}
+
+/**
+ * mic_x100_program_msi_to_src_map - program the MSI mapping registers
+ * @mdev: pointer to mic_device instance
+ * @idx: index to the mapping register, 0 based
+ * @offset: The bit offset in the register that needs to be updated.
+ * @set: boolean specifying if the bit in the specified offset needs
+ * to be set or cleared.
+ *
+ * RETURNS: None.
+ */
+static void
+mic_x100_program_msi_to_src_map(struct mic_device *mdev,
+				int idx, int offset, bool set)
+{
+	unsigned long reg;
+	struct mic_mw *mw = &mdev->mmio;
+	u32 mxar = MIC_X100_SBOX_BASE_ADDRESS +
+		MIC_X100_SBOX_MXAR0 + idx * 4;
+
+	reg = mic_mmio_read(mw, mxar);
+	if (set)
+		__set_bit(offset, &reg);
+	else
+		__clear_bit(offset, &reg);
+	mic_mmio_write(mw, reg, mxar);
+}
+
+/*
+ * mic_x100_reset_fw_ready - Reset Firmware ready status field.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_reset_fw_ready(struct mic_device *mdev)
+{
+	mdev->ops->write_spad(mdev, MIC_X100_DOWNLOAD_INFO, 0);
+}
+
+/*
+ * mic_x100_is_fw_ready - Check if firmware is ready.
+ * @mdev: pointer to mic_device instance
+ */
+static bool mic_x100_is_fw_ready(struct mic_device *mdev)
+{
+	u32 scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
+	return MIC_X100_SPAD2_DOWNLOAD_STATUS(scratch2) ? true : false;
+}
+
+/**
+ * mic_x100_get_apic_id - Get bootstrap APIC ID.
+ * @mdev: pointer to mic_device instance
+ */
+static u32 mic_x100_get_apic_id(struct mic_device *mdev)
+{
+	u32 scratch2 = 0;
+
+	scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
+	return MIC_X100_SPAD2_APIC_ID(scratch2);
+}
+
+/**
+ * mic_x100_send_firmware_intr - Send an interrupt to the firmware on MIC.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_send_firmware_intr(struct mic_device *mdev)
+{
+	u32 apicicr_low;
+	u64 apic_icr_offset = MIC_X100_SBOX_APICICR7;
+	int vector = MIC_X100_BSP_INTERRUPT_VECTOR;
+	struct mic_mw *mw = &mdev->mmio;
+
+	/*
+	 * For MIC we need to make sure we "hit"
+	 * the send_icr bit (13).
+	 */
+	apicicr_low = (vector | (1 << 13));
+
+	mic_mmio_write(mw, mic_x100_get_apic_id(mdev),
+		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset + 4);
+
+	/* Ensure that the interrupt is ordered w.r.t. previous stores. */
+	wmb();
+	mic_mmio_write(mw, apicicr_low,
+		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
+}
+
+/**
+ * mic_x100_hw_reset - Reset the MIC device.
+ * @mdev: pointer to mic_device instance
+ */
+static void mic_x100_hw_reset(struct mic_device *mdev)
+{
+	u32 reset_reg;
+	u32 rgcr = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_RGCR;
+	struct mic_mw *mw = &mdev->mmio;
+
+	/* Ensure that the reset is ordered w.r.t. previous loads and stores */
+	mb();
+	/* Trigger reset */
+	reset_reg = mic_mmio_read(mw, rgcr);
+	reset_reg |= 0x1;
+	mic_mmio_write(mw, reset_reg, rgcr);
+	/*
+	 * It seems we really want to delay at least 1 second
+	 * after touching reset to prevent a lot of problems.
+	 */
+	msleep(1000);
+}
+
+/**
+ * mic_x100_load_command_line - Load command line to MIC.
+ * @mdev: pointer to mic_device instance
+ * @fw: the firmware image
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw)
+{
+	u32 len = 0;
+	u32 boot_mem;
+	char *buf;
+	void __iomem *cmd_line_va = mdev->aper.va + mdev->bootaddr + fw->size;
+#define CMDLINE_SIZE 2048
+
+	boot_mem = mdev->aper.len >> 20;
+	buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	len += snprintf(buf, CMDLINE_SIZE - len,
+		" mem=%dM", boot_mem);
+	if (mdev->cosm_dev->cmdline)
+		snprintf(buf + len, CMDLINE_SIZE - len, " %s",
+			 mdev->cosm_dev->cmdline);
+	memcpy_toio(cmd_line_va, buf, strlen(buf) + 1);
+	kfree(buf);
+	return 0;
+}
+
+/**
+ * mic_x100_load_ramdisk - Load ramdisk to MIC.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_load_ramdisk(struct mic_device *mdev)
+{
+	const struct firmware *fw;
+	int rc;
+	struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr;
+
+	rc = request_firmware(&fw, mdev->cosm_dev->ramdisk, &mdev->pdev->dev);
+	if (rc < 0) {
+		dev_err(&mdev->pdev->dev,
+			"ramdisk request_firmware failed: %d %s\n",
+			rc, mdev->cosm_dev->ramdisk);
+		goto error;
+	}
+	/*
+	 * Typically the bootaddr for card OS is 64M
+	 * so copy over the ramdisk @ 128M.
+	 */
+	memcpy_toio(mdev->aper.va + (mdev->bootaddr << 1), fw->data, fw->size);
+	iowrite32(mdev->bootaddr << 1, &bp->hdr.ramdisk_image);
+	iowrite32(fw->size, &bp->hdr.ramdisk_size);
+	release_firmware(fw);
+error:
+	return rc;
+}
+
+/**
+ * mic_x100_get_boot_addr - Get MIC boot address.
+ * @mdev: pointer to mic_device instance
+ *
+ * This function is called during firmware load to determine
+ * the address at which the OS should be downloaded in card
+ * memory i.e. GDDR.
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_get_boot_addr(struct mic_device *mdev)
+{
+	u32 scratch2, boot_addr;
+	int rc = 0;
+
+	scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
+	boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2);
+	dev_dbg(&mdev->pdev->dev, "%s %d boot_addr 0x%x\n",
+		__func__, __LINE__, boot_addr);
+	if (boot_addr > (1 << 31)) {
+		dev_err(&mdev->pdev->dev,
+			"incorrect bootaddr 0x%x\n",
+			boot_addr);
+		rc = -EINVAL;
+		goto error;
+	}
+	mdev->bootaddr = boot_addr;
+error:
+	return rc;
+}
+
+/**
+ * mic_x100_load_firmware - Load firmware to MIC.
+ * @mdev: pointer to mic_device instance
+ * @buf: buffer containing boot string including firmware/ramdisk path.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+static int
+mic_x100_load_firmware(struct mic_device *mdev, const char *buf)
+{
+	int rc;
+	const struct firmware *fw;
+
+	rc = mic_x100_get_boot_addr(mdev);
+	if (rc)
+		return rc;
+	/* load OS */
+	rc = request_firmware(&fw, mdev->cosm_dev->firmware, &mdev->pdev->dev);
+	if (rc < 0) {
+		dev_err(&mdev->pdev->dev,
+			"ramdisk request_firmware failed: %d %s\n",
+			rc, mdev->cosm_dev->firmware);
+		return rc;
+	}
+	if (mdev->bootaddr > mdev->aper.len - fw->size) {
+		rc = -EINVAL;
+		dev_err(&mdev->pdev->dev, "%s %d rc %d bootaddr 0x%x\n",
+			__func__, __LINE__, rc, mdev->bootaddr);
+		goto error;
+	}
+	memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size);
+	mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size);
+	if (!strcmp(mdev->cosm_dev->bootmode, "flash")) {
+		rc = -EINVAL;
+		dev_err(&mdev->pdev->dev, "%s %d rc %d\n",
+			__func__, __LINE__, rc);
+		goto error;
+	}
+	/* load command line */
+	rc = mic_x100_load_command_line(mdev, fw);
+	if (rc) {
+		dev_err(&mdev->pdev->dev, "%s %d rc %d\n",
+			__func__, __LINE__, rc);
+		goto error;
+	}
+	release_firmware(fw);
+	/* load ramdisk */
+	if (mdev->cosm_dev->ramdisk)
+		rc = mic_x100_load_ramdisk(mdev);
+
+	return rc;
+
+error:
+	release_firmware(fw);
+	return rc;
+}
+
+/**
+ * mic_x100_get_postcode - Get postcode status from firmware.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: postcode.
+ */
+static u32 mic_x100_get_postcode(struct mic_device *mdev)
+{
+	return mic_mmio_read(&mdev->mmio, MIC_X100_POSTCODE);
+}
+
+/**
+ * mic_x100_smpt_set - Update an SMPT entry with a DMA address.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: none.
+ */
+static void
+mic_x100_smpt_set(struct mic_device *mdev, dma_addr_t dma_addr, u8 index)
+{
+#define SNOOP_ON	(0 << 0)
+#define SNOOP_OFF	(1 << 0)
+/*
+ * Sbox Smpt Reg Bits:
+ * Bits	31:2	Host address
+ * Bits	1	RSVD
+ * Bits	0	No snoop
+ */
+#define BUILD_SMPT(NO_SNOOP, HOST_ADDR)  \
+	(u32)(((HOST_ADDR) << 2) | ((NO_SNOOP) & 0x01))
+
+	uint32_t smpt_reg_val = BUILD_SMPT(SNOOP_ON,
+			dma_addr >> mdev->smpt->info.page_shift);
+	mic_mmio_write(&mdev->mmio, smpt_reg_val,
+		       MIC_X100_SBOX_BASE_ADDRESS +
+		       MIC_X100_SBOX_SMPT00 + (4 * index));
+}
+
+/**
+ * mic_x100_smpt_hw_init - Initialize SMPT X100 specific fields.
+ * @mdev: pointer to mic_device instance
+ *
+ * RETURNS: none.
+ */
+static void mic_x100_smpt_hw_init(struct mic_device *mdev)
+{
+	struct mic_smpt_hw_info *info = &mdev->smpt->info;
+
+	info->num_reg = 32;
+	info->page_shift = 34;
+	info->page_size = (1ULL << info->page_shift);
+	info->base = 0x8000000000ULL;
+}
+
+struct mic_smpt_ops mic_x100_smpt_ops = {
+	.init = mic_x100_smpt_hw_init,
+	.set = mic_x100_smpt_set,
+};
+
+static bool mic_x100_dma_filter(struct dma_chan *chan, void *param)
+{
+	if (chan->device->dev->parent == (struct device *)param)
+		return true;
+	return false;
+}
+
+struct mic_hw_ops mic_x100_ops = {
+	.aper_bar = MIC_X100_APER_BAR,
+	.mmio_bar = MIC_X100_MMIO_BAR,
+	.read_spad = mic_x100_read_spad,
+	.write_spad = mic_x100_write_spad,
+	.send_intr = mic_x100_send_intr,
+	.ack_interrupt = mic_x100_ack_interrupt,
+	.intr_workarounds = mic_x100_intr_workarounds,
+	.reset = mic_x100_hw_reset,
+	.reset_fw_ready = mic_x100_reset_fw_ready,
+	.is_fw_ready = mic_x100_is_fw_ready,
+	.send_firmware_intr = mic_x100_send_firmware_intr,
+	.load_mic_fw = mic_x100_load_firmware,
+	.get_postcode = mic_x100_get_postcode,
+	.dma_filter = mic_x100_dma_filter,
+};
+
+struct mic_hw_intr_ops mic_x100_intr_ops = {
+	.intr_init = mic_x100_hw_intr_init,
+	.enable_interrupts = mic_x100_enable_interrupts,
+	.disable_interrupts = mic_x100_disable_interrupts,
+	.program_msi_to_src_map = mic_x100_program_msi_to_src_map,
+	.read_msi_to_src_map = mic_x100_read_msi_to_src_map,
+};
diff --git a/drivers/misc/mic/host/mic_x100.h b/drivers/misc/mic/host/mic_x100.h
new file mode 100644
index 000000000..8b7daa182
--- /dev/null
+++ b/drivers/misc/mic/host/mic_x100.h
@@ -0,0 +1,98 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Host driver.
+ *
+ */
+#ifndef _MIC_X100_HW_H_
+#define _MIC_X100_HW_H_
+
+#define MIC_X100_PCI_DEVICE_2250 0x2250
+#define MIC_X100_PCI_DEVICE_2251 0x2251
+#define MIC_X100_PCI_DEVICE_2252 0x2252
+#define MIC_X100_PCI_DEVICE_2253 0x2253
+#define MIC_X100_PCI_DEVICE_2254 0x2254
+#define MIC_X100_PCI_DEVICE_2255 0x2255
+#define MIC_X100_PCI_DEVICE_2256 0x2256
+#define MIC_X100_PCI_DEVICE_2257 0x2257
+#define MIC_X100_PCI_DEVICE_2258 0x2258
+#define MIC_X100_PCI_DEVICE_2259 0x2259
+#define MIC_X100_PCI_DEVICE_225a 0x225a
+#define MIC_X100_PCI_DEVICE_225b 0x225b
+#define MIC_X100_PCI_DEVICE_225c 0x225c
+#define MIC_X100_PCI_DEVICE_225d 0x225d
+#define MIC_X100_PCI_DEVICE_225e 0x225e
+
+#define MIC_X100_APER_BAR 0
+#define MIC_X100_MMIO_BAR 4
+
+#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000
+#define MIC_X100_SBOX_SPAD0 0x0000AB20
+#define MIC_X100_SBOX_SICR0_DBR(x) ((x) & 0xf)
+#define MIC_X100_SBOX_SICR0_DMA(x) (((x) >> 8) & 0xff)
+#define MIC_X100_SBOX_SICE0_DBR(x) ((x) & 0xf)
+#define MIC_X100_SBOX_DBR_BITS(x) ((x) & 0xf)
+#define MIC_X100_SBOX_SICE0_DMA(x) (((x) >> 8) & 0xff)
+#define MIC_X100_SBOX_DMA_BITS(x) (((x) & 0xff) << 8)
+
+#define MIC_X100_SBOX_APICICR0 0x0000A9D0
+#define MIC_X100_SBOX_SICR0 0x00009004
+#define MIC_X100_SBOX_SICE0 0x0000900C
+#define MIC_X100_SBOX_SICC0 0x00009010
+#define MIC_X100_SBOX_SIAC0 0x00009014
+#define MIC_X100_SBOX_MSIXPBACR 0x00009084
+#define MIC_X100_SBOX_MXAR0 0x00009044
+#define MIC_X100_SBOX_SMPT00 0x00003100
+#define MIC_X100_SBOX_RDMASR0 0x0000B180
+
+#define MIC_X100_DOORBELL_IDX_START 0
+#define MIC_X100_NUM_DOORBELL 4
+#define MIC_X100_DMA_IDX_START 8
+#define MIC_X100_NUM_DMA 8
+#define MIC_X100_ERR_IDX_START 30
+#define MIC_X100_NUM_ERR 1
+
+#define MIC_X100_NUM_SBOX_IRQ 8
+#define MIC_X100_NUM_RDMASR_IRQ 8
+#define MIC_X100_RDMASR_IRQ_BASE 17
+#define MIC_X100_SPAD2_DOWNLOAD_STATUS(x) ((x) & 0x1)
+#define MIC_X100_SPAD2_APIC_ID(x)	(((x) >> 1) & 0x1ff)
+#define MIC_X100_SPAD2_DOWNLOAD_ADDR(x) ((x) & 0xfffff000)
+#define MIC_X100_SBOX_APICICR7 0x0000AA08
+#define MIC_X100_SBOX_RGCR 0x00004010
+#define MIC_X100_SBOX_SDBIC0 0x0000CC90
+#define MIC_X100_DOWNLOAD_INFO 2
+#define MIC_X100_FW_SIZE 5
+#define MIC_X100_POSTCODE 0x242c
+
+static const u16 mic_x100_intr_init[] = {
+		MIC_X100_DOORBELL_IDX_START,
+		MIC_X100_DMA_IDX_START,
+		MIC_X100_ERR_IDX_START,
+		MIC_X100_NUM_DOORBELL,
+		MIC_X100_NUM_DMA,
+		MIC_X100_NUM_ERR,
+};
+
+/* Host->Card(bootstrap) Interrupt Vector */
+#define MIC_X100_BSP_INTERRUPT_VECTOR 229
+
+extern struct mic_hw_ops mic_x100_ops;
+extern struct mic_smpt_ops mic_x100_smpt_ops;
+extern struct mic_hw_intr_ops mic_x100_intr_ops;
+
+#endif
diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile
new file mode 100644
index 000000000..ff372555d
--- /dev/null
+++ b/drivers/misc/mic/scif/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile - SCIF driver.
+# Copyright(c) 2014, Intel Corporation.
+#
+obj-$(CONFIG_SCIF) += scif.o
+scif-objs := scif_main.o
+scif-objs += scif_peer_bus.o
+scif-objs += scif_ports.o
+scif-objs += scif_debugfs.o
+scif-objs += scif_fd.o
+scif-objs += scif_api.o
+scif-objs += scif_epd.o
+scif-objs += scif_rb.o
+scif-objs += scif_nodeqp.o
+scif-objs += scif_nm.o
+scif-objs += scif_dma.o
+scif-objs += scif_fence.o
+scif-objs += scif_mmap.o
+scif-objs += scif_rma.o
+scif-objs += scif_rma_list.o
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
new file mode 100644
index 000000000..8dd0ccede
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -0,0 +1,1496 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/scif.h>
+#include "scif_main.h"
+#include "scif_map.h"
+
+static const char * const scif_ep_states[] = {
+	"Unbound",
+	"Bound",
+	"Listening",
+	"Connected",
+	"Connecting",
+	"Mapping",
+	"Closing",
+	"Close Listening",
+	"Disconnected",
+	"Zombie"};
+
+enum conn_async_state {
+	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
+	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
+	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
+};
+
+/*
+ * File operations for anonymous inode file associated with a SCIF endpoint,
+ * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
+ * poll API in the kernel and these take in a struct file *. Since a struct
+ * file is not available to kernel mode SCIF, it uses an anonymous file for
+ * this purpose.
+ */
+const struct file_operations scif_anon_fops = {
+	.owner = THIS_MODULE,
+};
+
+scif_epd_t scif_open(void)
+{
+	struct scif_endpt *ep;
+	int err;
+
+	might_sleep();
+	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+	if (!ep)
+		goto err_ep_alloc;
+
+	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
+	if (!ep->qp_info.qp)
+		goto err_qp_alloc;
+
+	err = scif_anon_inode_getfile(ep);
+	if (err)
+		goto err_anon_inode;
+
+	spin_lock_init(&ep->lock);
+	mutex_init(&ep->sendlock);
+	mutex_init(&ep->recvlock);
+
+	scif_rma_ep_init(ep);
+	ep->state = SCIFEP_UNBOUND;
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI open: ep %p success\n", ep);
+	return ep;
+
+err_anon_inode:
+	kfree(ep->qp_info.qp);
+err_qp_alloc:
+	kfree(ep);
+err_ep_alloc:
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(scif_open);
+
+/*
+ * scif_disconnect_ep - Disconnects the endpoint if found
+ * @epd: The end point returned from scif_open()
+ */
+static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
+{
+	struct scifmsg msg;
+	struct scif_endpt *fep = NULL;
+	struct scif_endpt *tmpep;
+	struct list_head *pos, *tmpq;
+	int err;
+
+	/*
+	 * Wake up any threads blocked in send()/recv() before closing
+	 * out the connection. Grabbing and releasing the send/recv lock
+	 * will ensure that any blocked senders/receivers have exited for
+	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
+	 * close. Ring 3 endpoints are not affected since close will not
+	 * be called while there are IOCTLs executing.
+	 */
+	wake_up_interruptible(&ep->sendwq);
+	wake_up_interruptible(&ep->recvwq);
+	mutex_lock(&ep->sendlock);
+	mutex_unlock(&ep->sendlock);
+	mutex_lock(&ep->recvlock);
+	mutex_unlock(&ep->recvlock);
+
+	/* Remove from the connected list */
+	mutex_lock(&scif_info.connlock);
+	list_for_each_safe(pos, tmpq, &scif_info.connected) {
+		tmpep = list_entry(pos, struct scif_endpt, list);
+		if (tmpep == ep) {
+			list_del(pos);
+			fep = tmpep;
+			spin_lock(&ep->lock);
+			break;
+		}
+	}
+
+	if (!fep) {
+		/*
+		 * The other side has completed the disconnect before
+		 * the end point can be removed from the list. Therefore
+		 * the ep lock is not locked, traverse the disconnected
+		 * list to find the endpoint and release the conn lock.
+		 */
+		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+			tmpep = list_entry(pos, struct scif_endpt, list);
+			if (tmpep == ep) {
+				list_del(pos);
+				break;
+			}
+		}
+		mutex_unlock(&scif_info.connlock);
+		return NULL;
+	}
+
+	init_completion(&ep->discon);
+	msg.uop = SCIF_DISCNCT;
+	msg.src = ep->port;
+	msg.dst = ep->peer;
+	msg.payload[0] = (u64)ep;
+	msg.payload[1] = ep->remote_ep;
+
+	err = scif_nodeqp_send(ep->remote_dev, &msg);
+	spin_unlock(&ep->lock);
+	mutex_unlock(&scif_info.connlock);
+
+	if (!err)
+		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
+		wait_for_completion_timeout(&ep->discon,
+					    SCIF_NODE_ALIVE_TIMEOUT);
+	return ep;
+}
+
+int scif_close(scif_epd_t epd)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_endpt *tmpep;
+	struct list_head *pos, *tmpq;
+	enum scif_epd_state oldstate;
+	bool flush_conn;
+
+	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
+		ep, scif_ep_states[ep->state]);
+	might_sleep();
+	spin_lock(&ep->lock);
+	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
+	spin_unlock(&ep->lock);
+
+	if (flush_conn)
+		flush_work(&scif_info.conn_work);
+
+	spin_lock(&ep->lock);
+	oldstate = ep->state;
+
+	ep->state = SCIFEP_CLOSING;
+
+	switch (oldstate) {
+	case SCIFEP_ZOMBIE:
+		dev_err(scif_info.mdev.this_device,
+			"SCIFAPI close: zombie state unexpected\n");
+		/* fall through */
+	case SCIFEP_DISCONNECTED:
+		spin_unlock(&ep->lock);
+		scif_unregister_all_windows(epd);
+		/* Remove from the disconnected list */
+		mutex_lock(&scif_info.connlock);
+		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+			tmpep = list_entry(pos, struct scif_endpt, list);
+			if (tmpep == ep) {
+				list_del(pos);
+				break;
+			}
+		}
+		mutex_unlock(&scif_info.connlock);
+		break;
+	case SCIFEP_UNBOUND:
+	case SCIFEP_BOUND:
+	case SCIFEP_CONNECTING:
+		spin_unlock(&ep->lock);
+		break;
+	case SCIFEP_MAPPING:
+	case SCIFEP_CONNECTED:
+	case SCIFEP_CLOSING:
+	{
+		spin_unlock(&ep->lock);
+		scif_unregister_all_windows(epd);
+		scif_disconnect_ep(ep);
+		break;
+	}
+	case SCIFEP_LISTENING:
+	case SCIFEP_CLLISTEN:
+	{
+		struct scif_conreq *conreq;
+		struct scifmsg msg;
+		struct scif_endpt *aep;
+
+		spin_unlock(&ep->lock);
+		mutex_lock(&scif_info.eplock);
+
+		/* remove from listen list */
+		list_for_each_safe(pos, tmpq, &scif_info.listen) {
+			tmpep = list_entry(pos, struct scif_endpt, list);
+			if (tmpep == ep)
+				list_del(pos);
+		}
+		/* Remove any dangling accepts */
+		while (ep->acceptcnt) {
+			aep = list_first_entry(&ep->li_accept,
+					       struct scif_endpt, liacceptlist);
+			list_del(&aep->liacceptlist);
+			scif_put_port(aep->port.port);
+			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
+				tmpep = list_entry(pos, struct scif_endpt,
+						   miacceptlist);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			mutex_unlock(&scif_info.eplock);
+			mutex_lock(&scif_info.connlock);
+			list_for_each_safe(pos, tmpq, &scif_info.connected) {
+				tmpep = list_entry(pos,
+						   struct scif_endpt, list);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+				tmpep = list_entry(pos,
+						   struct scif_endpt, list);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			mutex_unlock(&scif_info.connlock);
+			scif_teardown_ep(aep);
+			mutex_lock(&scif_info.eplock);
+			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
+			ep->acceptcnt--;
+		}
+
+		spin_lock(&ep->lock);
+		mutex_unlock(&scif_info.eplock);
+
+		/* Remove and reject any pending connection requests. */
+		while (ep->conreqcnt) {
+			conreq = list_first_entry(&ep->conlist,
+						  struct scif_conreq, list);
+			list_del(&conreq->list);
+
+			msg.uop = SCIF_CNCT_REJ;
+			msg.dst.node = conreq->msg.src.node;
+			msg.dst.port = conreq->msg.src.port;
+			msg.payload[0] = conreq->msg.payload[0];
+			msg.payload[1] = conreq->msg.payload[1];
+			/*
+			 * No Error Handling on purpose for scif_nodeqp_send().
+			 * If the remote node is lost we still want free the
+			 * connection requests on the self node.
+			 */
+			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
+					 &msg);
+			ep->conreqcnt--;
+			kfree(conreq);
+		}
+
+		spin_unlock(&ep->lock);
+		/* If a kSCIF accept is waiting wake it up */
+		wake_up_interruptible(&ep->conwq);
+		break;
+	}
+	}
+	scif_put_port(ep->port.port);
+	scif_anon_inode_fput(ep);
+	scif_teardown_ep(ep);
+	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scif_close);
+
+/**
+ * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
+ *			accept new connections.
+ * @epd: The end point returned from scif_open()
+ */
+int __scif_flush(scif_epd_t epd)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	switch (ep->state) {
+	case SCIFEP_LISTENING:
+	{
+		ep->state = SCIFEP_CLLISTEN;
+
+		/* If an accept is waiting wake it up */
+		wake_up_interruptible(&ep->conwq);
+		break;
+	}
+	default:
+		break;
+	}
+	return 0;
+}
+
+int scif_bind(scif_epd_t epd, u16 pn)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int ret = 0;
+	int tmp;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI bind: ep %p %s requested port number %d\n",
+		ep, scif_ep_states[ep->state], pn);
+	if (pn) {
+		/*
+		 * Similar to IETF RFC 1700, SCIF ports below
+		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
+		 * processes or by processes executed by privileged users.
+		 */
+		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
+			ret = -EACCES;
+			goto scif_bind_admin_exit;
+		}
+	}
+
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_BOUND) {
+		ret = -EINVAL;
+		goto scif_bind_exit;
+	} else if (ep->state != SCIFEP_UNBOUND) {
+		ret = -EISCONN;
+		goto scif_bind_exit;
+	}
+
+	if (pn) {
+		tmp = scif_rsrv_port(pn);
+		if (tmp != pn) {
+			ret = -EINVAL;
+			goto scif_bind_exit;
+		}
+	} else {
+		ret = scif_get_new_port();
+		if (ret < 0)
+			goto scif_bind_exit;
+		pn = ret;
+	}
+
+	ep->state = SCIFEP_BOUND;
+	ep->port.node = scif_info.nodeid;
+	ep->port.port = pn;
+	ep->conn_async_state = ASYNC_CONN_IDLE;
+	ret = pn;
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI bind: bound to port number %d\n", pn);
+scif_bind_exit:
+	spin_unlock(&ep->lock);
+scif_bind_admin_exit:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scif_bind);
+
+int scif_listen(scif_epd_t epd, int backlog)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
+	spin_lock(&ep->lock);
+	switch (ep->state) {
+	case SCIFEP_ZOMBIE:
+	case SCIFEP_CLOSING:
+	case SCIFEP_CLLISTEN:
+	case SCIFEP_UNBOUND:
+	case SCIFEP_DISCONNECTED:
+		spin_unlock(&ep->lock);
+		return -EINVAL;
+	case SCIFEP_LISTENING:
+	case SCIFEP_CONNECTED:
+	case SCIFEP_CONNECTING:
+	case SCIFEP_MAPPING:
+		spin_unlock(&ep->lock);
+		return -EISCONN;
+	case SCIFEP_BOUND:
+		break;
+	}
+
+	ep->state = SCIFEP_LISTENING;
+	ep->backlog = backlog;
+
+	ep->conreqcnt = 0;
+	ep->acceptcnt = 0;
+	INIT_LIST_HEAD(&ep->conlist);
+	init_waitqueue_head(&ep->conwq);
+	INIT_LIST_HEAD(&ep->li_accept);
+	spin_unlock(&ep->lock);
+
+	/*
+	 * Listen status is complete so delete the qp information not needed
+	 * on a listen before placing on the list of listening ep's
+	 */
+	scif_teardown_ep(ep);
+	ep->qp_info.qp = NULL;
+
+	mutex_lock(&scif_info.eplock);
+	list_add_tail(&ep->list, &scif_info.listen);
+	mutex_unlock(&scif_info.eplock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scif_listen);
+
+/*
+ ************************************************************************
+ * SCIF connection flow:
+ *
+ * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
+ *	connections via a SCIF_CNCT_REQ message
+ * 2) A SCIF endpoint can initiate a SCIF connection by calling
+ *	scif_connect(..) which calls scif_setup_qp_connect(..) which
+ *	allocates the local qp for the endpoint ring buffer and then sends
+ *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
+ *	a SCIF_CNCT_REJ message
+ * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
+ *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
+ *	message otherwise
+ * 4) A thread blocked waiting for incoming connections allocates its local
+ *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
+ *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
+ *	the node sends a SCIF_CNCT_REJ message
+ * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
+ *	connecting endpoint is woken up as part of handling
+ *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
+ *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
+ *	success or a SCIF_CNCT_GNTNACK message on failure and completes
+ *	the scif_connect(..) API
+ * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
+ *	in step 4 is woken up and completes the scif_accept(..) API
+ * 7) The SCIF connection is now established between the two SCIF endpoints.
+ */
+static int scif_conn_func(struct scif_endpt *ep)
+{
+	int err = 0;
+	struct scifmsg msg;
+	struct device *spdev;
+
+	err = scif_reserve_dma_chan(ep);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		ep->state = SCIFEP_BOUND;
+		goto connect_error_simple;
+	}
+	/* Initiate the first part of the endpoint QP setup */
+	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
+				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s err %d qp_offset 0x%llx\n",
+			__func__, err, ep->qp_info.qp_offset);
+		ep->state = SCIFEP_BOUND;
+		goto connect_error_simple;
+	}
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		goto cleanup_qp;
+	}
+	/* Format connect message and send it */
+	msg.src = ep->port;
+	msg.dst = ep->conn_port;
+	msg.uop = SCIF_CNCT_REQ;
+	msg.payload[0] = (u64)ep;
+	msg.payload[1] = ep->qp_info.qp_offset;
+	err = _scif_nodeqp_send(ep->remote_dev, &msg);
+	if (err)
+		goto connect_error_dec;
+	scif_put_peer_dev(spdev);
+	/*
+	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
+	 * SCIF_CNCT_REJ message.
+	 */
+	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d timeout\n", __func__, __LINE__);
+		ep->state = SCIFEP_BOUND;
+	}
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		goto cleanup_qp;
+	}
+	if (ep->state == SCIFEP_MAPPING) {
+		err = scif_setup_qp_connect_response(ep->remote_dev,
+						     ep->qp_info.qp,
+						     ep->qp_info.gnt_pld);
+		/*
+		 * If the resource to map the queue are not available then
+		 * we need to tell the other side to terminate the accept
+		 */
+		if (err) {
+			dev_err(&ep->remote_dev->sdev->dev,
+				"%s %d err %d\n", __func__, __LINE__, err);
+			msg.uop = SCIF_CNCT_GNTNACK;
+			msg.payload[0] = ep->remote_ep;
+			_scif_nodeqp_send(ep->remote_dev, &msg);
+			ep->state = SCIFEP_BOUND;
+			goto connect_error_dec;
+		}
+
+		msg.uop = SCIF_CNCT_GNTACK;
+		msg.payload[0] = ep->remote_ep;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		if (err) {
+			ep->state = SCIFEP_BOUND;
+			goto connect_error_dec;
+		}
+		ep->state = SCIFEP_CONNECTED;
+		mutex_lock(&scif_info.connlock);
+		list_add_tail(&ep->list, &scif_info.connected);
+		mutex_unlock(&scif_info.connlock);
+		dev_dbg(&ep->remote_dev->sdev->dev,
+			"SCIFAPI connect: ep %p connected\n", ep);
+	} else if (ep->state == SCIFEP_BOUND) {
+		dev_dbg(&ep->remote_dev->sdev->dev,
+			"SCIFAPI connect: ep %p connection refused\n", ep);
+		err = -ECONNREFUSED;
+		goto connect_error_dec;
+	}
+	scif_put_peer_dev(spdev);
+	return err;
+connect_error_dec:
+	scif_put_peer_dev(spdev);
+cleanup_qp:
+	scif_cleanup_ep_qp(ep);
+connect_error_simple:
+	return err;
+}
+
+/*
+ * scif_conn_handler:
+ *
+ * Workqueue handler for servicing non-blocking SCIF connect
+ *
+ */
+void scif_conn_handler(struct work_struct *work)
+{
+	struct scif_endpt *ep;
+
+	do {
+		ep = NULL;
+		spin_lock(&scif_info.nb_connect_lock);
+		if (!list_empty(&scif_info.nb_connect_list)) {
+			ep = list_first_entry(&scif_info.nb_connect_list,
+					      struct scif_endpt, conn_list);
+			list_del(&ep->conn_list);
+		}
+		spin_unlock(&scif_info.nb_connect_lock);
+		if (ep) {
+			ep->conn_err = scif_conn_func(ep);
+			wake_up_interruptible(&ep->conn_pend_wq);
+		}
+	} while (ep);
+}
+
+int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+	struct scif_dev *remote_dev;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
+		scif_ep_states[ep->state]);
+
+	if (!scif_dev || dst->node > scif_info.maxid)
+		return -ENODEV;
+
+	might_sleep();
+
+	remote_dev = &scif_dev[dst->node];
+	spdev = scif_get_peer_dev(remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+
+	spin_lock(&ep->lock);
+	switch (ep->state) {
+	case SCIFEP_ZOMBIE:
+	case SCIFEP_CLOSING:
+		err = -EINVAL;
+		break;
+	case SCIFEP_DISCONNECTED:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		else
+			err = -EINVAL;
+		break;
+	case SCIFEP_LISTENING:
+	case SCIFEP_CLLISTEN:
+		err = -EOPNOTSUPP;
+		break;
+	case SCIFEP_CONNECTING:
+	case SCIFEP_MAPPING:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			err = -EINPROGRESS;
+		else
+			err = -EISCONN;
+		break;
+	case SCIFEP_CONNECTED:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		else
+			err = -EISCONN;
+		break;
+	case SCIFEP_UNBOUND:
+		err = scif_get_new_port();
+		if (err < 0)
+			break;
+		ep->port.port = err;
+		ep->port.node = scif_info.nodeid;
+		ep->conn_async_state = ASYNC_CONN_IDLE;
+		/* Fall through */
+	case SCIFEP_BOUND:
+		/*
+		 * If a non-blocking connect has been already initiated
+		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
+		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
+		 * SCIF_BOUND due an error in the connection process
+		 * (e.g., connection refused) If conn_async_state is
+		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
+		 * so that the error status can be collected. If the state is
+		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
+		 * EINPROGRESS since some other thread is waiting to collect
+		 * error status.
+		 */
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+			err = -EINPROGRESS;
+		} else {
+			ep->conn_port = *dst;
+			init_waitqueue_head(&ep->sendwq);
+			init_waitqueue_head(&ep->recvwq);
+			init_waitqueue_head(&ep->conwq);
+			ep->conn_async_state = 0;
+
+			if (unlikely(non_block))
+				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
+		}
+		break;
+	}
+
+	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
+			goto connect_simple_unlock1;
+
+	ep->state = SCIFEP_CONNECTING;
+	ep->remote_dev = &scif_dev[dst->node];
+	ep->qp_info.qp->magic = SCIFEP_MAGIC;
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		init_waitqueue_head(&ep->conn_pend_wq);
+		spin_lock(&scif_info.nb_connect_lock);
+		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
+		spin_unlock(&scif_info.nb_connect_lock);
+		err = -EINPROGRESS;
+		schedule_work(&scif_info.conn_work);
+	}
+connect_simple_unlock1:
+	spin_unlock(&ep->lock);
+	scif_put_peer_dev(spdev);
+	if (err) {
+		return err;
+	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+		flush_work(&scif_info.conn_work);
+		err = ep->conn_err;
+		spin_lock(&ep->lock);
+		ep->conn_async_state = ASYNC_CONN_IDLE;
+		spin_unlock(&ep->lock);
+	} else {
+		err = scif_conn_func(ep);
+	}
+	return err;
+}
+
+int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
+{
+	return __scif_connect(epd, dst, false);
+}
+EXPORT_SYMBOL_GPL(scif_connect);
+
+/**
+ * scif_accept() - Accept a connection request from the remote node
+ *
+ * The function accepts a connection request from the remote node.  Successful
+ * complete is indicate by a new end point being created and passed back
+ * to the caller for future reference.
+ *
+ * Upon successful complete a zero will be returned and the peer information
+ * will be filled in.
+ *
+ * If the end point is not in the listening state -EINVAL will be returned.
+ *
+ * If during the connection sequence resource allocation fails the -ENOMEM
+ * will be returned.
+ *
+ * If the function is called with the ASYNC flag set and no connection requests
+ * are pending it will return -EAGAIN.
+ *
+ * If the remote side is not sending any connection requests the caller may
+ * terminate this function with a signal.  If so a -EINTR will be returned.
+ */
+int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
+		scif_epd_t *newepd, int flags)
+{
+	struct scif_endpt *lep = (struct scif_endpt *)epd;
+	struct scif_endpt *cep;
+	struct scif_conreq *conreq;
+	struct scifmsg msg;
+	int err;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
+
+	if (flags & ~SCIF_ACCEPT_SYNC)
+		return -EINVAL;
+
+	if (!peer || !newepd)
+		return -EINVAL;
+
+	might_sleep();
+	spin_lock(&lep->lock);
+	if (lep->state != SCIFEP_LISTENING) {
+		spin_unlock(&lep->lock);
+		return -EINVAL;
+	}
+
+	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
+		/* No connection request present and we do not want to wait */
+		spin_unlock(&lep->lock);
+		return -EAGAIN;
+	}
+
+	lep->files = current->files;
+retry_connection:
+	spin_unlock(&lep->lock);
+	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
+	err = wait_event_interruptible(lep->conwq,
+				       (lep->conreqcnt ||
+				       (lep->state != SCIFEP_LISTENING)));
+	if (err)
+		return err;
+
+	if (lep->state != SCIFEP_LISTENING)
+		return -EINTR;
+
+	spin_lock(&lep->lock);
+
+	if (!lep->conreqcnt)
+		goto retry_connection;
+
+	/* Get the first connect request off the list */
+	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
+	list_del(&conreq->list);
+	lep->conreqcnt--;
+	spin_unlock(&lep->lock);
+
+	/* Fill in the peer information */
+	peer->node = conreq->msg.src.node;
+	peer->port = conreq->msg.src.port;
+
+	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
+	if (!cep) {
+		err = -ENOMEM;
+		goto scif_accept_error_epalloc;
+	}
+	spin_lock_init(&cep->lock);
+	mutex_init(&cep->sendlock);
+	mutex_init(&cep->recvlock);
+	cep->state = SCIFEP_CONNECTING;
+	cep->remote_dev = &scif_dev[peer->node];
+	cep->remote_ep = conreq->msg.payload[0];
+
+	scif_rma_ep_init(cep);
+
+	err = scif_reserve_dma_chan(cep);
+	if (err) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto scif_accept_error_qpalloc;
+	}
+
+	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
+	if (!cep->qp_info.qp) {
+		err = -ENOMEM;
+		goto scif_accept_error_qpalloc;
+	}
+
+	err = scif_anon_inode_getfile(cep);
+	if (err)
+		goto scif_accept_error_anon_inode;
+
+	cep->qp_info.qp->magic = SCIFEP_MAGIC;
+	spdev = scif_get_peer_dev(cep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		goto scif_accept_error_map;
+	}
+	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
+				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
+				   cep->remote_dev);
+	if (err) {
+		dev_dbg(&cep->remote_dev->sdev->dev,
+			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
+			lep, cep, err, cep->qp_info.qp_offset);
+		scif_put_peer_dev(spdev);
+		goto scif_accept_error_map;
+	}
+
+	cep->port.node = lep->port.node;
+	cep->port.port = lep->port.port;
+	cep->peer.node = peer->node;
+	cep->peer.port = peer->port;
+	init_waitqueue_head(&cep->sendwq);
+	init_waitqueue_head(&cep->recvwq);
+	init_waitqueue_head(&cep->conwq);
+
+	msg.uop = SCIF_CNCT_GNT;
+	msg.src = cep->port;
+	msg.payload[0] = cep->remote_ep;
+	msg.payload[1] = cep->qp_info.qp_offset;
+	msg.payload[2] = (u64)cep;
+
+	err = _scif_nodeqp_send(cep->remote_dev, &msg);
+	scif_put_peer_dev(spdev);
+	if (err)
+		goto scif_accept_error_map;
+retry:
+	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
+	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
+				 SCIF_NODE_ACCEPT_TIMEOUT);
+	if (!err && scifdev_alive(cep))
+		goto retry;
+	err = !err ? -ENODEV : 0;
+	if (err)
+		goto scif_accept_error_map;
+	kfree(conreq);
+
+	spin_lock(&cep->lock);
+
+	if (cep->state == SCIFEP_CLOSING) {
+		/*
+		 * Remote failed to allocate resources and NAKed the grant.
+		 * There is at this point nothing referencing the new end point.
+		 */
+		spin_unlock(&cep->lock);
+		scif_teardown_ep(cep);
+		kfree(cep);
+
+		/* If call with sync flag then go back and wait. */
+		if (flags & SCIF_ACCEPT_SYNC) {
+			spin_lock(&lep->lock);
+			goto retry_connection;
+		}
+		return -EAGAIN;
+	}
+
+	scif_get_port(cep->port.port);
+	*newepd = (scif_epd_t)cep;
+	spin_unlock(&cep->lock);
+	return 0;
+scif_accept_error_map:
+	scif_anon_inode_fput(cep);
+scif_accept_error_anon_inode:
+	scif_teardown_ep(cep);
+scif_accept_error_qpalloc:
+	kfree(cep);
+scif_accept_error_epalloc:
+	msg.uop = SCIF_CNCT_REJ;
+	msg.dst.node = conreq->msg.src.node;
+	msg.dst.port = conreq->msg.src.port;
+	msg.payload[0] = conreq->msg.payload[0];
+	msg.payload[1] = conreq->msg.payload[1];
+	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
+	kfree(conreq);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_accept);
+
+/*
+ * scif_msg_param_check:
+ * @epd: The end point returned from scif_open()
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
+ */
+static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
+{
+	int ret = -EINVAL;
+
+	if (len < 0)
+		goto err_ret;
+	if (flags && (!(flags & SCIF_RECV_BLOCK)))
+		goto err_ret;
+	ret = 0;
+err_ret:
+	return ret;
+}
+
+static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scifmsg notif_msg;
+	int curr_xfer_len = 0, sent_len = 0, write_count;
+	int ret = 0;
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (flags & SCIF_SEND_BLOCK)
+		might_sleep();
+
+	spin_lock(&ep->lock);
+	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
+		write_count = scif_rb_space(&qp->outbound_q);
+		if (write_count) {
+			/* Best effort to send as much data as possible */
+			curr_xfer_len = min(len - sent_len, write_count);
+			ret = scif_rb_write(&qp->outbound_q, msg,
+					    curr_xfer_len);
+			if (ret < 0)
+				break;
+			/* Success. Update write pointer */
+			scif_rb_commit(&qp->outbound_q);
+			/*
+			 * Send a notification to the peer about the
+			 * produced data message.
+			 */
+			notif_msg.src = ep->port;
+			notif_msg.uop = SCIF_CLIENT_SENT;
+			notif_msg.payload[0] = ep->remote_ep;
+			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
+			if (ret)
+				break;
+			sent_len += curr_xfer_len;
+			msg = msg + curr_xfer_len;
+			continue;
+		}
+		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
+		/* Not enough RB space. return for the Non Blocking case */
+		if (!(flags & SCIF_SEND_BLOCK))
+			break;
+
+		spin_unlock(&ep->lock);
+		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
+		ret =
+		wait_event_interruptible(ep->sendwq,
+					 (SCIFEP_CONNECTED != ep->state) ||
+					 (scif_rb_space(&qp->outbound_q) >=
+					 curr_xfer_len));
+		spin_lock(&ep->lock);
+		if (ret)
+			break;
+	}
+	if (sent_len)
+		ret = sent_len;
+	else if (!ret && SCIFEP_CONNECTED != ep->state)
+		ret = SCIFEP_DISCONNECTED == ep->state ?
+			-ECONNRESET : -ENOTCONN;
+	spin_unlock(&ep->lock);
+	return ret;
+}
+
+static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+	int read_size;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scifmsg notif_msg;
+	int curr_recv_len = 0, remaining_len = len, read_count;
+	int ret = 0;
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (flags & SCIF_RECV_BLOCK)
+		might_sleep();
+	spin_lock(&ep->lock);
+	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
+				 SCIFEP_DISCONNECTED == ep->state)) {
+		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
+		if (read_count) {
+			/*
+			 * Best effort to recv as much data as there
+			 * are bytes to read in the RB particularly
+			 * important for the Non Blocking case.
+			 */
+			curr_recv_len = min(remaining_len, read_count);
+			read_size = scif_rb_get_next(&qp->inbound_q,
+						     msg, curr_recv_len);
+			if (ep->state == SCIFEP_CONNECTED) {
+				/*
+				 * Update the read pointer only if the endpoint
+				 * is still connected else the read pointer
+				 * might no longer exist since the peer has
+				 * freed resources!
+				 */
+				scif_rb_update_read_ptr(&qp->inbound_q);
+				/*
+				 * Send a notification to the peer about the
+				 * consumed data message only if the EP is in
+				 * SCIFEP_CONNECTED state.
+				 */
+				notif_msg.src = ep->port;
+				notif_msg.uop = SCIF_CLIENT_RCVD;
+				notif_msg.payload[0] = ep->remote_ep;
+				ret = _scif_nodeqp_send(ep->remote_dev,
+							&notif_msg);
+				if (ret)
+					break;
+			}
+			remaining_len -= curr_recv_len;
+			msg = msg + curr_recv_len;
+			continue;
+		}
+		/*
+		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
+		 * we will keep looping forever.
+		 */
+		if (ep->state == SCIFEP_DISCONNECTED)
+			break;
+		/*
+		 * Return in the Non Blocking case if there is no data
+		 * to read in this iteration.
+		 */
+		if (!(flags & SCIF_RECV_BLOCK))
+			break;
+		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
+		spin_unlock(&ep->lock);
+		/*
+		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
+		 * or until other side disconnects.
+		 */
+		ret =
+		wait_event_interruptible(ep->recvwq,
+					 SCIFEP_CONNECTED != ep->state ||
+					 scif_rb_count(&qp->inbound_q,
+						       curr_recv_len)
+					 >= curr_recv_len);
+		spin_lock(&ep->lock);
+		if (ret)
+			break;
+	}
+	if (len - remaining_len)
+		ret = len - remaining_len;
+	else if (!ret && ep->state != SCIFEP_CONNECTED)
+		ret = ep->state == SCIFEP_DISCONNECTED ?
+			-ECONNRESET : -ENOTCONN;
+	spin_unlock(&ep->lock);
+	return ret;
+}
+
+/**
+ * scif_user_send() - Send data to connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_send().
+ */
+int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+	int sent_len = 0;
+	char *tmp;
+	int loop_len;
+	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	err = scif_msg_param_check(epd, len, flags);
+	if (err)
+		goto send_err;
+
+	tmp = kmalloc(chunk_len, GFP_KERNEL);
+	if (!tmp) {
+		err = -ENOMEM;
+		goto send_err;
+	}
+	/*
+	 * Grabbing the lock before breaking up the transfer in
+	 * multiple chunks is required to ensure that messages do
+	 * not get fragmented and reordered.
+	 */
+	mutex_lock(&ep->sendlock);
+	while (sent_len != len) {
+		loop_len = len - sent_len;
+		loop_len = min(chunk_len, loop_len);
+		if (copy_from_user(tmp, msg, loop_len)) {
+			err = -EFAULT;
+			goto send_free_err;
+		}
+		err = _scif_send(epd, tmp, loop_len, flags);
+		if (err < 0)
+			goto send_free_err;
+		sent_len += err;
+		msg += err;
+		if (err != loop_len)
+			goto send_free_err;
+	}
+send_free_err:
+	mutex_unlock(&ep->sendlock);
+	kfree(tmp);
+send_err:
+	return err < 0 ? err : sent_len;
+}
+
+/**
+ * scif_user_recv() - Receive data from connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_recv().
+ */
+int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+	int recv_len = 0;
+	char *tmp;
+	int loop_len;
+	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	err = scif_msg_param_check(epd, len, flags);
+	if (err)
+		goto recv_err;
+
+	tmp = kmalloc(chunk_len, GFP_KERNEL);
+	if (!tmp) {
+		err = -ENOMEM;
+		goto recv_err;
+	}
+	/*
+	 * Grabbing the lock before breaking up the transfer in
+	 * multiple chunks is required to ensure that messages do
+	 * not get fragmented and reordered.
+	 */
+	mutex_lock(&ep->recvlock);
+	while (recv_len != len) {
+		loop_len = len - recv_len;
+		loop_len = min(chunk_len, loop_len);
+		err = _scif_recv(epd, tmp, loop_len, flags);
+		if (err < 0)
+			goto recv_free_err;
+		if (copy_to_user(msg, tmp, err)) {
+			err = -EFAULT;
+			goto recv_free_err;
+		}
+		recv_len += err;
+		msg += err;
+		if (err != loop_len)
+			goto recv_free_err;
+	}
+recv_free_err:
+	mutex_unlock(&ep->recvlock);
+	kfree(tmp);
+recv_err:
+	return err < 0 ? err : recv_len;
+}
+
+/**
+ * scif_send() - Send data to connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_send().
+ */
+int scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int ret;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	ret = scif_msg_param_check(epd, len, flags);
+	if (ret)
+		return ret;
+	if (!ep->remote_dev)
+		return -ENOTCONN;
+	/*
+	 * Grab the mutex lock in the blocking case only
+	 * to ensure messages do not get fragmented/reordered.
+	 * The non blocking mode is protected using spin locks
+	 * in _scif_send().
+	 */
+	if (flags & SCIF_SEND_BLOCK)
+		mutex_lock(&ep->sendlock);
+
+	ret = _scif_send(epd, msg, len, flags);
+
+	if (flags & SCIF_SEND_BLOCK)
+		mutex_unlock(&ep->sendlock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scif_send);
+
+/**
+ * scif_recv() - Receive data from connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_recv().
+ */
+int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int ret;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	ret = scif_msg_param_check(epd, len, flags);
+	if (ret)
+		return ret;
+	/*
+	 * Grab the mutex lock in the blocking case only
+	 * to ensure messages do not get fragmented/reordered.
+	 * The non blocking mode is protected using spin locks
+	 * in _scif_send().
+	 */
+	if (flags & SCIF_RECV_BLOCK)
+		mutex_lock(&ep->recvlock);
+
+	ret = _scif_recv(epd, msg, len, flags);
+
+	if (flags & SCIF_RECV_BLOCK)
+		mutex_unlock(&ep->recvlock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scif_recv);
+
+static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
+				   poll_table *p, struct scif_endpt *ep)
+{
+	/*
+	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
+	 * and regrab it afterwards. Because the endpoint state might have
+	 * changed while the lock was given up, the state must be checked
+	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
+	 * does this.
+	 */
+	spin_unlock(&ep->lock);
+	poll_wait(f, wq, p);
+	spin_lock(&ep->lock);
+}
+
+__poll_t
+__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
+{
+	__poll_t mask = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	spin_lock(&ep->lock);
+
+	/* Endpoint is waiting for a non-blocking connect to complete */
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+			if (ep->state == SCIFEP_CONNECTED ||
+			    ep->state == SCIFEP_DISCONNECTED ||
+			    ep->conn_err)
+				mask |= EPOLLOUT;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is listening for incoming connection requests */
+	if (ep->state == SCIFEP_LISTENING) {
+		_scif_poll_wait(f, &ep->conwq, wait, ep);
+		if (ep->state == SCIFEP_LISTENING) {
+			if (ep->conreqcnt)
+				mask |= EPOLLIN;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is connected or disconnected */
+	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
+		if (poll_requested_events(wait) & EPOLLIN)
+			_scif_poll_wait(f, &ep->recvwq, wait, ep);
+		if (poll_requested_events(wait) & EPOLLOUT)
+			_scif_poll_wait(f, &ep->sendwq, wait, ep);
+		if (ep->state == SCIFEP_CONNECTED ||
+		    ep->state == SCIFEP_DISCONNECTED) {
+			/* Data can be read without blocking */
+			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
+				mask |= EPOLLIN;
+			/* Data can be written without blocking */
+			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
+				mask |= EPOLLOUT;
+			/* Return EPOLLHUP if endpoint is disconnected */
+			if (ep->state == SCIFEP_DISCONNECTED)
+				mask |= EPOLLHUP;
+			goto exit;
+		}
+	}
+
+	/* Return EPOLLERR if the endpoint is in none of the above states */
+	mask |= EPOLLERR;
+exit:
+	spin_unlock(&ep->lock);
+	return mask;
+}
+
+/**
+ * scif_poll() - Kernel mode SCIF poll
+ * @ufds: Array of scif_pollepd structures containing the end points
+ *	  and events to poll on
+ * @nfds: Size of the ufds array
+ * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
+ *
+ * The code flow in this function is based on do_poll(..) in select.c
+ *
+ * Returns the number of endpoints which have pending events or 0 in
+ * the event of a timeout. If a signal is used for wake up, -EINTR is
+ * returned.
+ */
+int
+scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
+{
+	struct poll_wqueues table;
+	poll_table *pt;
+	int i, count = 0, timed_out = timeout_msecs == 0;
+	__poll_t mask;
+	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
+		: msecs_to_jiffies(timeout_msecs);
+
+	poll_initwait(&table);
+	pt = &table.pt;
+	while (1) {
+		for (i = 0; i < nfds; i++) {
+			pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
+			mask = __scif_pollfd(ufds[i].epd->anon,
+					     pt, ufds[i].epd);
+			mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
+			if (mask) {
+				count++;
+				pt->_qproc = NULL;
+			}
+			ufds[i].revents = mask;
+		}
+		pt->_qproc = NULL;
+		if (!count) {
+			count = table.error;
+			if (signal_pending(current))
+				count = -EINTR;
+		}
+		if (count || timed_out)
+			break;
+
+		if (!schedule_timeout_interruptible(timeout))
+			timed_out = 1;
+	}
+	poll_freewait(&table);
+	return count;
+}
+EXPORT_SYMBOL_GPL(scif_poll);
+
+int scif_get_node_ids(u16 *nodes, int len, u16 *self)
+{
+	int online = 0;
+	int offset = 0;
+	int node;
+
+	if (!scif_is_mgmt_node())
+		scif_get_node_info();
+
+	*self = scif_info.nodeid;
+	mutex_lock(&scif_info.conflock);
+	len = min_t(int, len, scif_info.total);
+	for (node = 0; node <= scif_info.maxid; node++) {
+		if (_scifdev_alive(&scif_dev[node])) {
+			online++;
+			if (offset < len)
+				nodes[offset++] = node;
+		}
+	}
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
+		scif_info.total, online, offset);
+	mutex_unlock(&scif_info.conflock);
+
+	return online;
+}
+EXPORT_SYMBOL_GPL(scif_get_node_ids);
+
+static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
+{
+	struct scif_client *client =
+		container_of(si, struct scif_client, si);
+	struct scif_peer_dev *spdev =
+		container_of(dev, struct scif_peer_dev, dev);
+
+	if (client->probe)
+		client->probe(spdev);
+	return 0;
+}
+
+static void scif_remove_client_dev(struct device *dev,
+				   struct subsys_interface *si)
+{
+	struct scif_client *client =
+		container_of(si, struct scif_client, si);
+	struct scif_peer_dev *spdev =
+		container_of(dev, struct scif_peer_dev, dev);
+
+	if (client->remove)
+		client->remove(spdev);
+}
+
+void scif_client_unregister(struct scif_client *client)
+{
+	subsys_interface_unregister(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_unregister);
+
+int scif_client_register(struct scif_client *client)
+{
+	struct subsys_interface *si = &client->si;
+
+	si->name = client->name;
+	si->subsys = &scif_peer_bus;
+	si->add_dev = scif_add_client_dev;
+	si->remove_dev = scif_remove_client_dev;
+
+	return subsys_interface_register(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_register);
diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c
new file mode 100644
index 000000000..6884dad97
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_debugfs.c
@@ -0,0 +1,162 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "../common/mic_dev.h"
+#include "scif_main.h"
+
+/* Debugfs parent dir */
+static struct dentry *scif_dbg;
+
+static int scif_dev_test(struct seq_file *s, void *unused)
+{
+	int node;
+
+	seq_printf(s, "Total Nodes %d Self Node Id %d Maxid %d\n",
+		   scif_info.total, scif_info.nodeid,
+		   scif_info.maxid);
+
+	if (!scif_dev)
+		return 0;
+
+	seq_printf(s, "%-16s\t%-16s\n", "node_id", "state");
+
+	for (node = 0; node <= scif_info.maxid; node++)
+		seq_printf(s, "%-16d\t%-16s\n", scif_dev[node].node,
+			   _scifdev_alive(&scif_dev[node]) ?
+			   "Running" : "Offline");
+	return 0;
+}
+
+static int scif_dev_test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_dev_test, inode->i_private);
+}
+
+static int scif_dev_test_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations scif_dev_ops = {
+	.owner   = THIS_MODULE,
+	.open    = scif_dev_test_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = scif_dev_test_release
+};
+
+static void scif_display_window(struct scif_window *window, struct seq_file *s)
+{
+	int j;
+	struct scatterlist *sg;
+	scif_pinned_pages_t pin = window->pinned_pages;
+
+	seq_printf(s, "window %p type %d temp %d offset 0x%llx ",
+		   window, window->type, window->temp, window->offset);
+	seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ",
+		   window->nr_pages, window->nr_contig_chunks, window->prot);
+	seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ",
+		   window->ref_count, window->magic, window->peer_window);
+	seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n",
+		   window->unreg_state, window->va_for_temp);
+
+	for (j = 0; j < window->nr_contig_chunks; j++)
+		seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j,
+			   window->dma_addr[j], window->num_pages[j]);
+
+	if (window->type == SCIF_WINDOW_SELF && pin)
+		for (j = 0; j < window->nr_pages; j++)
+			seq_printf(s, "page[%d] = pinned_pages %p address %p\n",
+				   j, pin->pages[j],
+				   page_address(pin->pages[j]));
+
+	if (window->st)
+		for_each_sg(window->st->sgl, sg, window->st->nents, j)
+			seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n",
+				   j, sg_dma_address(sg), sg_dma_len(sg));
+}
+
+static void scif_display_all_windows(struct list_head *head, struct seq_file *s)
+{
+	struct list_head *item;
+	struct scif_window *window;
+
+	list_for_each(item, head) {
+		window = list_entry(item, struct scif_window, list);
+		scif_display_window(window, s);
+	}
+}
+
+static int scif_rma_test(struct seq_file *s, void *unused)
+{
+	struct scif_endpt *ep;
+	struct list_head *pos;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each(pos, &scif_info.connected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		seq_printf(s, "ep %p self windows\n", ep);
+		mutex_lock(&ep->rma_info.rma_lock);
+		scif_display_all_windows(&ep->rma_info.reg_list, s);
+		seq_printf(s, "ep %p remote windows\n", ep);
+		scif_display_all_windows(&ep->rma_info.remote_reg_list, s);
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	mutex_unlock(&scif_info.connlock);
+	return 0;
+}
+
+static int scif_rma_test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_rma_test, inode->i_private);
+}
+
+static int scif_rma_test_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations scif_rma_ops = {
+	.owner   = THIS_MODULE,
+	.open    = scif_rma_test_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = scif_rma_test_release
+};
+
+void __init scif_init_debugfs(void)
+{
+	scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!scif_dbg) {
+		dev_err(scif_info.mdev.this_device,
+			"can't create debugfs dir scif\n");
+		return;
+	}
+
+	debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops);
+	debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops);
+	debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log);
+	debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable);
+}
+
+void scif_exit_debugfs(void)
+{
+	debugfs_remove_recursive(scif_dbg);
+}
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
new file mode 100644
index 000000000..6369aeaa7
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -0,0 +1,1960 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+/*
+ * struct scif_dma_comp_cb - SCIF DMA completion callback
+ *
+ * @dma_completion_func: DMA completion callback
+ * @cb_cookie: DMA completion callback cookie
+ * @temp_buf: Temporary buffer
+ * @temp_buf_to_free: Temporary buffer to be freed
+ * @is_cache: Is a kmem_cache allocated buffer
+ * @dst_offset: Destination registration offset
+ * @dst_window: Destination registration window
+ * @len: Length of the temp buffer
+ * @temp_phys: DMA address of the temp buffer
+ * @sdev: The SCIF device
+ * @header_padding: padding for cache line alignment
+ */
+struct scif_dma_comp_cb {
+	void (*dma_completion_func)(void *cookie);
+	void *cb_cookie;
+	u8 *temp_buf;
+	u8 *temp_buf_to_free;
+	bool is_cache;
+	s64 dst_offset;
+	struct scif_window *dst_window;
+	size_t len;
+	dma_addr_t temp_phys;
+	struct scif_dev *sdev;
+	int header_padding;
+};
+
+/**
+ * struct scif_copy_work - Work for DMA copy
+ *
+ * @src_offset: Starting source offset
+ * @dst_offset: Starting destination offset
+ * @src_window: Starting src registered window
+ * @dst_window: Starting dst registered window
+ * @loopback: true if this is a loopback DMA transfer
+ * @len: Length of the transfer
+ * @comp_cb: DMA copy completion callback
+ * @remote_dev: The remote SCIF peer device
+ * @fence_type: polling or interrupt based
+ * @ordered: is this a tail byte ordered DMA transfer
+ */
+struct scif_copy_work {
+	s64 src_offset;
+	s64 dst_offset;
+	struct scif_window *src_window;
+	struct scif_window *dst_window;
+	int loopback;
+	size_t len;
+	struct scif_dma_comp_cb   *comp_cb;
+	struct scif_dev	*remote_dev;
+	int fence_type;
+	bool ordered;
+};
+
+/**
+ * scif_reserve_dma_chan:
+ * @ep: Endpoint Descriptor.
+ *
+ * This routine reserves a DMA channel for a particular
+ * endpoint. All DMA transfers for an endpoint are always
+ * programmed on the same DMA channel.
+ */
+int scif_reserve_dma_chan(struct scif_endpt *ep)
+{
+	int err = 0;
+	struct scif_dev *scifdev;
+	struct scif_hw_dev *sdev;
+	struct dma_chan *chan;
+
+	/* Loopback DMAs are not supported on the management node */
+	if (!scif_info.nodeid && scifdev_self(ep->remote_dev))
+		return 0;
+	if (scif_info.nodeid)
+		scifdev = &scif_dev[0];
+	else
+		scifdev = ep->remote_dev;
+	sdev = scifdev->sdev;
+	if (!sdev->num_dma_ch)
+		return -ENODEV;
+	chan = sdev->dma_ch[scifdev->dma_ch_idx];
+	scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch;
+	mutex_lock(&ep->rma_info.rma_lock);
+	ep->rma_info.dma_chan = chan;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+#ifdef CONFIG_MMU_NOTIFIER
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached windows
+ */
+static
+void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
+			    u64 start, u64 len)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	u64 start_va, end_va;
+	u64 end = start + len;
+
+	if (end <= start)
+		return;
+
+	list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
+		window = list_entry(item, struct scif_window, list);
+		if (!len)
+			break;
+		start_va = window->va_for_temp;
+		end_va = start_va + (window->nr_pages << PAGE_SHIFT);
+		if (start < start_va && end <= start_va)
+			break;
+		if (start >= end_va)
+			continue;
+		__scif_rma_destroy_tcw_helper(window);
+	}
+}
+
+static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len)
+{
+	struct scif_endpt *ep = mmn->ep;
+
+	spin_lock(&ep->rma_info.tc_lock);
+	__scif_rma_destroy_tcw(mmn, start, len);
+	spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+	struct list_head *item, *tmp;
+	struct scif_mmu_notif *mmn;
+
+	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+	}
+}
+
+static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+	struct list_head *item, *tmp;
+	struct scif_mmu_notif *mmn;
+
+	spin_lock(&ep->rma_info.tc_lock);
+	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		__scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+	}
+	spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+	if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit)
+		return false;
+	if ((atomic_read(&ep->rma_info.tcw_total_pages)
+			+ (cur_bytes >> PAGE_SHIFT)) >
+			scif_info.rma_tc_limit) {
+		dev_info(scif_info.mdev.this_device,
+			 "%s %d total=%d, current=%zu reached max\n",
+			 __func__, __LINE__,
+			 atomic_read(&ep->rma_info.tcw_total_pages),
+			 (1 + (cur_bytes >> PAGE_SHIFT)));
+		scif_rma_destroy_tcw_invalid();
+		__scif_rma_destroy_tcw_ep(ep);
+	}
+	return true;
+}
+
+static void scif_mmu_notifier_release(struct mmu_notifier *mn,
+				      struct mm_struct *mm)
+{
+	struct scif_mmu_notif	*mmn;
+
+	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+	scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+	schedule_work(&scif_info.misc_work);
+}
+
+static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+						     struct mm_struct *mm,
+						     unsigned long start,
+						     unsigned long end,
+						     bool blockable)
+{
+	struct scif_mmu_notif	*mmn;
+
+	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+	scif_rma_destroy_tcw(mmn, start, end - start);
+
+	return 0;
+}
+
+static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+						   struct mm_struct *mm,
+						   unsigned long start,
+						   unsigned long end)
+{
+	/*
+	 * Nothing to do here, everything needed was done in
+	 * invalidate_range_start.
+	 */
+}
+
+static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
+	.release = scif_mmu_notifier_release,
+	.clear_flush_young = NULL,
+	.invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
+	.invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
+
+static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep)
+{
+	struct scif_endpt_rma_info *rma = &ep->rma_info;
+	struct scif_mmu_notif *mmn = NULL;
+	struct list_head *item, *tmp;
+
+	mutex_lock(&ep->rma_info.mmn_lock);
+	list_for_each_safe(item, tmp, &rma->mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
+		list_del(item);
+		kfree(mmn);
+	}
+	mutex_unlock(&ep->rma_info.mmn_lock);
+}
+
+static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn,
+				   struct mm_struct *mm, struct scif_endpt *ep)
+{
+	mmn->ep = ep;
+	mmn->mm = mm;
+	mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
+	INIT_LIST_HEAD(&mmn->list);
+	INIT_LIST_HEAD(&mmn->tc_reg_list);
+}
+
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma)
+{
+	struct scif_mmu_notif *mmn;
+
+	list_for_each_entry(mmn, &rma->mmn_list, list)
+		if (mmn->mm == mm)
+			return mmn;
+	return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+	struct scif_mmu_notif *mmn
+		 = kzalloc(sizeof(*mmn), GFP_KERNEL);
+
+	if (!mmn)
+		return ERR_PTR(-ENOMEM);
+
+	scif_init_mmu_notifier(mmn, current->mm, ep);
+	if (mmu_notifier_register(&mmn->ep_mmu_notifier, current->mm)) {
+		kfree(mmn);
+		return ERR_PTR(-EBUSY);
+	}
+	list_add(&mmn->list, &ep->rma_info.mmn_list);
+	return mmn;
+}
+
+/*
+ * Called from the misc thread to destroy temporary cached windows and
+ * unregister the MMU notifier for the SCIF endpoint.
+ */
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+	struct list_head *pos, *tmpq;
+	struct scif_endpt *ep;
+restart:
+	scif_rma_destroy_tcw_invalid();
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) {
+		ep = list_entry(pos, struct scif_endpt, mmu_list);
+		list_del(&ep->mmu_list);
+		spin_unlock(&scif_info.rmalock);
+		scif_rma_destroy_tcw_ep(ep);
+		scif_ep_unregister_mmu_notifier(ep);
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+	return !!(flags & SCIF_RMA_USECACHE);
+}
+#else
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm,
+		       struct scif_endpt_rma_info *rma)
+{
+	return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+	return NULL;
+}
+
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+	return false;
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+	return false;
+}
+#endif
+
+/**
+ * scif_register_temp:
+ * @epd: End Point Descriptor.
+ * @addr: virtual address to/from which to copy
+ * @len: length of range to copy
+ * @out_offset: computed offset returned by reference.
+ * @out_window: allocated registered window returned by reference.
+ *
+ * Create a temporary registered window. The peer will not know about this
+ * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
+ */
+static int
+scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot,
+		   off_t *out_offset, struct scif_window **out_window)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err;
+	scif_pinned_pages_t pinned_pages;
+	size_t aligned_len;
+
+	aligned_len = ALIGN(len, PAGE_SIZE);
+
+	err = __scif_pin_pages((void *)(addr & PAGE_MASK),
+			       aligned_len, &prot, 0, &pinned_pages);
+	if (err)
+		return err;
+
+	pinned_pages->prot = prot;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, 0, 0,
+				     aligned_len >> PAGE_SHIFT,
+				     (s64 *)out_offset);
+	if (err)
+		goto error_unpin;
+
+	/* Allocate and prepare self registration window */
+	*out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT,
+					*out_offset, true);
+	if (!*out_window) {
+		scif_free_window_offset(ep, NULL, *out_offset);
+		err = -ENOMEM;
+		goto error_unpin;
+	}
+
+	(*out_window)->pinned_pages = pinned_pages;
+	(*out_window)->nr_pages = pinned_pages->nr_pages;
+	(*out_window)->prot = pinned_pages->prot;
+
+	(*out_window)->va_for_temp = addr & PAGE_MASK;
+	err = scif_map_window(ep->remote_dev, *out_window);
+	if (err) {
+		/* Something went wrong! Rollback */
+		scif_destroy_window(ep, *out_window);
+		*out_window = NULL;
+	} else {
+		*out_offset |= (addr - (*out_window)->va_for_temp);
+	}
+	return err;
+error_unpin:
+	if (err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	scif_unpin_pages(pinned_pages);
+	return err;
+}
+
+#define SCIF_DMA_TO (3 * HZ)
+
+/*
+ * scif_sync_dma - Program a DMA without an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * @sync_wait: Wait for DMA to complete?
+ *
+ * Return 0 on success and -errno on error.
+ */
+static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan,
+			 bool sync_wait)
+{
+	int err = 0;
+	struct dma_async_tx_descriptor *tx = NULL;
+	enum dma_ctrl_flags flags = DMA_PREP_FENCE;
+	dma_cookie_t cookie;
+	struct dma_device *ddev;
+
+	if (!chan) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	ddev = chan->device;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	if (!sync_wait) {
+		dma_async_issue_pending(chan);
+	} else {
+		if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) {
+			err = 0;
+		} else {
+			err = -EIO;
+			dev_err(&sdev->dev, "%s %d err %d\n",
+				__func__, __LINE__, err);
+		}
+	}
+release:
+	return err;
+}
+
+static void scif_dma_callback(void *arg)
+{
+	struct completion *done = (struct completion *)arg;
+
+	complete(done);
+}
+
+#define SCIF_DMA_SYNC_WAIT true
+#define SCIF_DMA_POLL BIT(0)
+#define SCIF_DMA_INTR BIT(1)
+
+/*
+ * scif_async_dma - Program a DMA with an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * Return 0 on success and -errno on error.
+ */
+static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	int err = 0;
+	struct dma_device *ddev;
+	struct dma_async_tx_descriptor *tx = NULL;
+	enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
+	DECLARE_COMPLETION_ONSTACK(done_wait);
+	dma_cookie_t cookie;
+	enum dma_status status;
+
+	if (!chan) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	ddev = chan->device;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	reinit_completion(&done_wait);
+	tx->callback = scif_dma_callback;
+	tx->callback_param = &done_wait;
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	dma_async_issue_pending(chan);
+
+	err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO);
+	if (!err) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	err = 0;
+	status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+	if (status != DMA_COMPLETE) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+release:
+	return err;
+}
+
+/*
+ * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular
+ * DMA channel via polling.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	if (!chan)
+		return -EINVAL;
+	return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT);
+}
+
+/*
+ * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular
+ * DMA channel via interrupt based blocking wait.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	if (!chan)
+		return -EINVAL;
+	return scif_async_dma(sdev, chan);
+}
+
+/**
+ * scif_rma_destroy_windows:
+ *
+ * This routine destroys all windows queued for cleanup
+ */
+void scif_rma_destroy_windows(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep;
+	struct dma_chan *chan;
+
+	might_sleep();
+restart:
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(item, tmp, &scif_info.rma) {
+		window = list_entry(item, struct scif_window,
+				    list);
+		ep = (struct scif_endpt *)window->ep;
+		chan = ep->rma_info.dma_chan;
+
+		list_del_init(&window->list);
+		spin_unlock(&scif_info.rmalock);
+		if (!chan || !scifdev_alive(ep) ||
+		    !scif_drain_dma_intr(ep->remote_dev->sdev,
+					 ep->rma_info.dma_chan))
+			/* Remove window from global list */
+			window->unreg_state = OP_COMPLETED;
+		else
+			dev_warn(&ep->remote_dev->sdev->dev,
+				 "DMA engine hung?\n");
+		if (window->unreg_state == OP_COMPLETED) {
+			if (window->type == SCIF_WINDOW_SELF)
+				scif_destroy_window(ep, window);
+			else
+				scif_destroy_remote_window(window);
+			atomic_dec(&ep->rma_info.tw_refcount);
+		}
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached registered windows
+ * which have been queued for cleanup.
+ */
+void scif_rma_destroy_tcw_invalid(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep;
+	struct dma_chan *chan;
+
+	might_sleep();
+restart:
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(item, tmp, &scif_info.rma_tc) {
+		window = list_entry(item, struct scif_window, list);
+		ep = (struct scif_endpt *)window->ep;
+		chan = ep->rma_info.dma_chan;
+		list_del_init(&window->list);
+		spin_unlock(&scif_info.rmalock);
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (!chan || !scifdev_alive(ep) ||
+		    !scif_drain_dma_intr(ep->remote_dev->sdev,
+					 ep->rma_info.dma_chan)) {
+			atomic_sub(window->nr_pages,
+				   &ep->rma_info.tcw_total_pages);
+			scif_destroy_window(ep, window);
+			atomic_dec(&ep->rma_info.tcw_refcount);
+		} else {
+			dev_warn(&ep->remote_dev->sdev->dev,
+				 "DMA engine hung?\n");
+		}
+		mutex_unlock(&ep->rma_info.rma_lock);
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+static inline
+void *_get_local_va(off_t off, struct scif_window *window, size_t len)
+{
+	int page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+	void *va = NULL;
+
+	if (window->type == SCIF_WINDOW_SELF) {
+		struct page **pages = window->pinned_pages->pages;
+
+		va = page_address(pages[page_nr]) + page_off;
+	}
+	return va;
+}
+
+static inline
+void *ioremap_remote(off_t off, struct scif_window *window,
+		     size_t len, struct scif_dev *dev,
+		     struct scif_window_iter *iter)
+{
+	dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter);
+
+	/*
+	 * If the DMA address is not card relative then we need the DMA
+	 * addresses to be an offset into the bar. The aperture base was already
+	 * added so subtract it here since scif_ioremap is going to add it again
+	 */
+	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+	    dev->sdev->aper && !dev->sdev->card_rel_da)
+		phys = phys - dev->sdev->aper->pa;
+	return scif_ioremap(phys, len, dev);
+}
+
+static inline void
+iounmap_remote(void *virt, size_t size, struct scif_copy_work *work)
+{
+	scif_iounmap(virt, size, work->remote_dev);
+}
+
+/*
+ * Takes care of ordering issue caused by
+ * 1. Hardware:  Only in the case of cpu copy from mgmt node to card
+ * because of WC memory.
+ * 2. Software: If memcpy reorders copy instructions for optimization.
+ * This could happen at both mgmt node and card.
+ */
+static inline void
+scif_ordered_memcpy_toio(char *dst, const char *src, size_t count)
+{
+	if (!count)
+		return;
+
+	memcpy_toio((void __iomem __force *)dst, src, --count);
+	/* Order the last byte with the previous stores */
+	wmb();
+	*(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_toio(char *dst, const char *src,
+					   size_t count, bool ordered)
+{
+	if (ordered)
+		scif_ordered_memcpy_toio(dst, src, count);
+	else
+		memcpy_toio((void __iomem __force *)dst, src, count);
+}
+
+static inline
+void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count)
+{
+	if (!count)
+		return;
+
+	memcpy_fromio(dst, (void __iomem __force *)src, --count);
+	/* Order the last byte with the previous loads */
+	rmb();
+	*(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_fromio(char *dst, const char *src,
+					     size_t count, bool ordered)
+{
+	if (ordered)
+		scif_ordered_memcpy_fromio(dst, src, count);
+	else
+		memcpy_fromio(dst, (void __iomem __force *)src, count);
+}
+
+#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0)
+
+/*
+ * scif_off_to_dma_addr:
+ * Obtain the dma_addr given the window and the offset.
+ * @window: Registered window.
+ * @off: Window offset.
+ * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
+ * @index: Return the index of the dma_addr array found.
+ * @start_off: start offset of index of the dma addr array found.
+ * The nr_bytes provides the callee an estimate of the maximum possible
+ * DMA xfer possible while the index/start_off provide faster lookups
+ * for the next iteration.
+ */
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+				size_t *nr_bytes, struct scif_window_iter *iter)
+{
+	int i, page_nr;
+	s64 start, end;
+	off_t page_off;
+
+	if (window->nr_pages == window->nr_contig_chunks) {
+		page_nr = (off - window->offset) >> PAGE_SHIFT;
+		page_off = off & ~PAGE_MASK;
+
+		if (nr_bytes)
+			*nr_bytes = PAGE_SIZE - page_off;
+		return window->dma_addr[page_nr] | page_off;
+	}
+	if (iter) {
+		i = iter->index;
+		start = iter->offset;
+	} else {
+		i =  0;
+		start =  window->offset;
+	}
+	for (; i < window->nr_contig_chunks; i++) {
+		end = start + (window->num_pages[i] << PAGE_SHIFT);
+		if (off >= start && off < end) {
+			if (iter) {
+				iter->index = i;
+				iter->offset = start;
+			}
+			if (nr_bytes)
+				*nr_bytes = end - off;
+			return (window->dma_addr[i] + (off - start));
+		}
+		start += (window->num_pages[i] << PAGE_SHIFT);
+	}
+	dev_err(scif_info.mdev.this_device,
+		"%s %d BUG. Addr not found? window %p off 0x%llx\n",
+		__func__, __LINE__, window, off);
+	return SCIF_RMA_ERROR_CODE;
+}
+
+/*
+ * Copy between rma window and temporary buffer
+ */
+static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window,
+				    u8 *temp, size_t rem_len, bool to_temp)
+{
+	void *window_virt;
+	size_t loop_len;
+	int offset_in_page;
+	s64 end_offset;
+
+	offset_in_page = offset & ~PAGE_MASK;
+	loop_len = PAGE_SIZE - offset_in_page;
+
+	if (rem_len < loop_len)
+		loop_len = rem_len;
+
+	window_virt = _get_local_va(offset, window, loop_len);
+	if (!window_virt)
+		return;
+	if (to_temp)
+		memcpy(temp, window_virt, loop_len);
+	else
+		memcpy(window_virt, temp, loop_len);
+
+	offset += loop_len;
+	temp += loop_len;
+	rem_len -= loop_len;
+
+	end_offset = window->offset +
+		(window->nr_pages << PAGE_SHIFT);
+	while (rem_len) {
+		if (offset == end_offset) {
+			window = list_next_entry(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		loop_len = min(PAGE_SIZE, rem_len);
+		window_virt = _get_local_va(offset, window, loop_len);
+		if (!window_virt)
+			return;
+		if (to_temp)
+			memcpy(temp, window_virt, loop_len);
+		else
+			memcpy(window_virt, temp, loop_len);
+		offset	+= loop_len;
+		temp	+= loop_len;
+		rem_len	-= loop_len;
+	}
+}
+
+/**
+ * scif_rma_completion_cb:
+ * @data: RMA cookie
+ *
+ * RMA interrupt completion callback.
+ */
+static void scif_rma_completion_cb(void *data)
+{
+	struct scif_dma_comp_cb *comp_cb = data;
+
+	/* Free DMA Completion CB. */
+	if (comp_cb->dst_window)
+		scif_rma_local_cpu_copy(comp_cb->dst_offset,
+					comp_cb->dst_window,
+					comp_cb->temp_buf +
+					comp_cb->header_padding,
+					comp_cb->len, false);
+	scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev,
+			  SCIF_KMEM_UNALIGNED_BUF_SIZE);
+	if (comp_cb->is_cache)
+		kmem_cache_free(unaligned_cache,
+				comp_cb->temp_buf_to_free);
+	else
+		kfree(comp_cb->temp_buf_to_free);
+}
+
+/* Copies between temporary buffer and offsets provided in work */
+static int
+scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
+				 u8 *temp, struct dma_chan *chan,
+				 bool src_local)
+{
+	struct scif_dma_comp_cb *comp_cb = work->comp_cb;
+	dma_addr_t window_dma_addr, temp_dma_addr;
+	dma_addr_t temp_phys = comp_cb->temp_phys;
+	size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
+	int offset_in_ca, ret = 0;
+	s64 end_offset, offset;
+	struct scif_window *window;
+	void *window_virt_addr;
+	size_t tail_len;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	if (src_local) {
+		offset = work->dst_offset;
+		window = work->dst_window;
+	} else {
+		offset = work->src_offset;
+		window = work->src_window;
+	}
+
+	offset_in_ca = offset & (L1_CACHE_BYTES - 1);
+	if (offset_in_ca) {
+		loop_len = L1_CACHE_BYTES - offset_in_ca;
+		loop_len = min(loop_len, remaining_len);
+		window_virt_addr = ioremap_remote(offset, window,
+						  loop_len,
+						  work->remote_dev,
+						  NULL);
+		if (!window_virt_addr)
+			return -ENOMEM;
+		if (src_local)
+			scif_unaligned_cpy_toio(window_virt_addr, temp,
+						loop_len,
+						work->ordered &&
+						!(remaining_len - loop_len));
+		else
+			scif_unaligned_cpy_fromio(temp, window_virt_addr,
+						  loop_len, work->ordered &&
+						  !(remaining_len - loop_len));
+		iounmap_remote(window_virt_addr, loop_len, work);
+
+		offset += loop_len;
+		temp += loop_len;
+		temp_phys += loop_len;
+		remaining_len -= loop_len;
+	}
+
+	offset_in_ca = offset & ~PAGE_MASK;
+	end_offset = window->offset +
+		(window->nr_pages << PAGE_SHIFT);
+
+	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+	remaining_len -= tail_len;
+	while (remaining_len) {
+		if (offset == end_offset) {
+			window = list_next_entry(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		if (scif_is_mgmt_node())
+			temp_dma_addr = temp_phys;
+		else
+			/* Fix if we ever enable IOMMU on the card */
+			temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
+		window_dma_addr = scif_off_to_dma_addr(window, offset,
+						       &nr_contig_bytes,
+						       NULL);
+		loop_len = min(nr_contig_bytes, remaining_len);
+		if (src_local) {
+			if (work->ordered && !tail_len &&
+			    !(remaining_len - loop_len) &&
+			    loop_len != L1_CACHE_BYTES) {
+				/*
+				 * Break up the last chunk of the transfer into
+				 * two steps. if there is no tail to guarantee
+				 * DMA ordering. SCIF_DMA_POLLING inserts
+				 * a status update descriptor in step 1 which
+				 * acts as a double sided synchronization fence
+				 * for the DMA engine to ensure that the last
+				 * cache line in step 2 is updated last.
+				 */
+				/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len -
+							    L1_CACHE_BYTES,
+							    DMA_PREP_FENCE);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+				offset += (loop_len - L1_CACHE_BYTES);
+				temp_dma_addr += (loop_len - L1_CACHE_BYTES);
+				window_dma_addr += (loop_len - L1_CACHE_BYTES);
+				remaining_len -= (loop_len - L1_CACHE_BYTES);
+				loop_len = remaining_len;
+
+				/* Step 2) DMA: L1_CACHE_BYTES */
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len, 0);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+			} else {
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len, 0);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+			}
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr,
+					window_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		}
+		if (ret < 0)
+			goto err;
+		offset += loop_len;
+		temp += loop_len;
+		temp_phys += loop_len;
+		remaining_len -= loop_len;
+		offset_in_ca = 0;
+	}
+	if (tail_len) {
+		if (offset == end_offset) {
+			window = list_next_entry(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		window_virt_addr = ioremap_remote(offset, window, tail_len,
+						  work->remote_dev,
+						  NULL);
+		if (!window_virt_addr)
+			return -ENOMEM;
+		/*
+		 * The CPU copy for the tail bytes must be initiated only once
+		 * previous DMA transfers for this endpoint have completed
+		 * to guarantee ordering.
+		 */
+		if (work->ordered) {
+			struct scif_dev *rdev = work->remote_dev;
+
+			ret = scif_drain_dma_intr(rdev->sdev, chan);
+			if (ret)
+				return ret;
+		}
+		if (src_local)
+			scif_unaligned_cpy_toio(window_virt_addr, temp,
+						tail_len, work->ordered);
+		else
+			scif_unaligned_cpy_fromio(temp, window_virt_addr,
+						  tail_len, work->ordered);
+		iounmap_remote(window_virt_addr, tail_len, work);
+	}
+	tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		ret = -ENOMEM;
+		return ret;
+	}
+	tx->callback = &scif_rma_completion_cb;
+	tx->callback_param = comp_cb;
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		ret = -ENOMEM;
+		return ret;
+	}
+	dma_async_issue_pending(chan);
+	return 0;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * _scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+					   struct dma_chan *chan)
+{
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	size_t loop_len, remaining_len, src_contig_bytes = 0;
+	size_t dst_contig_bytes = 0;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+	s64 end_src_offset, end_dst_offset;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	int ret = 0;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	remaining_len = work->len;
+
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+	end_src_offset = src_window->offset +
+		(src_window->nr_pages << PAGE_SHIFT);
+	end_dst_offset = dst_window->offset +
+		(dst_window->nr_pages << PAGE_SHIFT);
+	while (remaining_len) {
+		if (src_offset == end_src_offset) {
+			src_window = list_next_entry(src_window, list);
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(src_window, &src_win_iter);
+		}
+		if (dst_offset == end_dst_offset) {
+			dst_window = list_next_entry(dst_window, list);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(dst_window, &dst_win_iter);
+		}
+
+		/* compute dma addresses for transfer */
+		src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+						    &src_contig_bytes,
+						    &src_win_iter);
+		dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+						    &dst_contig_bytes,
+						    &dst_win_iter);
+		loop_len = min(src_contig_bytes, dst_contig_bytes);
+		loop_len = min(loop_len, remaining_len);
+		if (work->ordered && !(remaining_len - loop_len)) {
+			/*
+			 * Break up the last chunk of the transfer into two
+			 * steps to ensure that the last byte in step 2 is
+			 * updated last.
+			 */
+			/* Step 1) DMA: Body Length - 1 */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len - 1,
+							 DMA_PREP_FENCE);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			src_offset += (loop_len - 1);
+			dst_offset += (loop_len - 1);
+			src_dma_addr += (loop_len - 1);
+			dst_dma_addr += (loop_len - 1);
+			remaining_len -= (loop_len - 1);
+			loop_len = remaining_len;
+
+			/* Step 2) DMA: 1 BYTES */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+					src_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+					src_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+		}
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+	return ret;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+					  struct dma_chan *chan)
+{
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0;
+	size_t dst_contig_bytes = 0;
+	int src_cache_off;
+	s64 end_src_offset, end_dst_offset;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+	void *src_virt, *dst_virt;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	int ret = 0;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	remaining_len = work->len;
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+
+	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+	if (src_cache_off != 0) {
+		/* Head */
+		loop_len = L1_CACHE_BYTES - src_cache_off;
+		loop_len = min(loop_len, remaining_len);
+		src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+		dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!src_virt)
+			return -ENOMEM;
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!dst_virt) {
+			if (src_window->type != SCIF_WINDOW_SELF)
+				iounmap_remote(src_virt, loop_len, work);
+			return -ENOMEM;
+		}
+		if (src_window->type == SCIF_WINDOW_SELF)
+			scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+						remaining_len == loop_len ?
+						work->ordered : false);
+		else
+			scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len,
+						  remaining_len == loop_len ?
+						  work->ordered : false);
+		if (src_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(src_virt, loop_len, work);
+		if (dst_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(dst_virt, loop_len, work);
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+
+	end_src_offset = src_window->offset +
+		(src_window->nr_pages << PAGE_SHIFT);
+	end_dst_offset = dst_window->offset +
+		(dst_window->nr_pages << PAGE_SHIFT);
+	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+	remaining_len -= tail_len;
+	while (remaining_len) {
+		if (src_offset == end_src_offset) {
+			src_window = list_next_entry(src_window, list);
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(src_window, &src_win_iter);
+		}
+		if (dst_offset == end_dst_offset) {
+			dst_window = list_next_entry(dst_window, list);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(dst_window, &dst_win_iter);
+		}
+
+		/* compute dma addresses for transfer */
+		src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+						    &src_contig_bytes,
+						    &src_win_iter);
+		dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+						    &dst_contig_bytes,
+						    &dst_win_iter);
+		loop_len = min(src_contig_bytes, dst_contig_bytes);
+		loop_len = min(loop_len, remaining_len);
+		if (work->ordered && !tail_len &&
+		    !(remaining_len - loop_len)) {
+			/*
+			 * Break up the last chunk of the transfer into two
+			 * steps. if there is no tail to gurantee DMA ordering.
+			 * Passing SCIF_DMA_POLLING inserts a status update
+			 * descriptor in step 1 which acts as a double sided
+			 * synchronization fence for the DMA engine to ensure
+			 * that the last cache line in step 2 is updated last.
+			 */
+			/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len -
+							 L1_CACHE_BYTES,
+							 DMA_PREP_FENCE);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+			src_offset += (loop_len - L1_CACHE_BYTES);
+			dst_offset += (loop_len - L1_CACHE_BYTES);
+			src_dma_addr += (loop_len - L1_CACHE_BYTES);
+			dst_dma_addr += (loop_len - L1_CACHE_BYTES);
+			remaining_len -= (loop_len - L1_CACHE_BYTES);
+			loop_len = remaining_len;
+
+			/* Step 2) DMA: L1_CACHE_BYTES */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		}
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+	remaining_len = tail_len;
+	if (remaining_len) {
+		loop_len = remaining_len;
+		if (src_offset == end_src_offset)
+			src_window = list_next_entry(src_window, list);
+		if (dst_offset == end_dst_offset)
+			dst_window = list_next_entry(dst_window, list);
+
+		src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+		dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+		/*
+		 * The CPU copy for the tail bytes must be initiated only once
+		 * previous DMA transfers for this endpoint have completed to
+		 * guarantee ordering.
+		 */
+		if (work->ordered) {
+			struct scif_dev *rdev = work->remote_dev;
+
+			ret = scif_drain_dma_poll(rdev->sdev, chan);
+			if (ret)
+				return ret;
+		}
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!src_virt)
+			return -ENOMEM;
+
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!dst_virt) {
+			if (src_window->type != SCIF_WINDOW_SELF)
+				iounmap_remote(src_virt, loop_len, work);
+			return -ENOMEM;
+		}
+
+		if (src_window->type == SCIF_WINDOW_SELF)
+			scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+						work->ordered);
+		else
+			scif_unaligned_cpy_fromio(dst_virt, src_virt,
+						  loop_len, work->ordered);
+		if (src_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(src_virt, loop_len, work);
+
+		if (dst_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(dst_virt, loop_len, work);
+		remaining_len -= loop_len;
+	}
+	return ret;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * scif_rma_list_cpu_copy:
+ *
+ * Traverse all the windows and perform CPU copy.
+ */
+static int scif_rma_list_cpu_copy(struct scif_copy_work *work)
+{
+	void *src_virt, *dst_virt;
+	size_t loop_len, remaining_len;
+	int src_page_off, dst_page_off;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 end_src_offset, end_dst_offset;
+	int ret = 0;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+
+	remaining_len = work->len;
+
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+	while (remaining_len) {
+		src_page_off = src_offset & ~PAGE_MASK;
+		dst_page_off = dst_offset & ~PAGE_MASK;
+		loop_len = min(PAGE_SIZE -
+			       max(src_page_off, dst_page_off),
+			       remaining_len);
+
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev,
+						  &src_win_iter);
+		if (!src_virt) {
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev,
+						  &dst_win_iter);
+		if (!dst_virt) {
+			if (src_window->type == SCIF_WINDOW_PEER)
+				iounmap_remote(src_virt, loop_len, work);
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (work->loopback) {
+			memcpy(dst_virt, src_virt, loop_len);
+		} else {
+			if (src_window->type == SCIF_WINDOW_SELF)
+				memcpy_toio((void __iomem __force *)dst_virt,
+					    src_virt, loop_len);
+			else
+				memcpy_fromio(dst_virt,
+					      (void __iomem __force *)src_virt,
+					      loop_len);
+		}
+		if (src_window->type == SCIF_WINDOW_PEER)
+			iounmap_remote(src_virt, loop_len, work);
+
+		if (dst_window->type == SCIF_WINDOW_PEER)
+			iounmap_remote(dst_virt, loop_len, work);
+
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+		if (remaining_len) {
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			if (src_offset == end_src_offset) {
+				src_window = list_next_entry(src_window, list);
+				scif_init_window_iter(src_window,
+						      &src_win_iter);
+			}
+			if (dst_offset == end_dst_offset) {
+				dst_window = list_next_entry(dst_window, list);
+				scif_init_window_iter(dst_window,
+						      &dst_win_iter);
+			}
+		}
+	}
+error:
+	return ret;
+}
+
+static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
+					  struct scif_copy_work *work,
+					  struct dma_chan *chan, off_t loffset)
+{
+	int src_cache_off, dst_cache_off;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	u8 *temp = NULL;
+	bool src_local = true, dst_local = false;
+	struct scif_dma_comp_cb *comp_cb;
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	int err;
+
+	if (is_dma_copy_aligned(chan->device, 1, 1, 1))
+		return _scif_rma_list_dma_copy_aligned(work, chan);
+
+	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+	dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
+
+	if (dst_cache_off == src_cache_off)
+		return scif_rma_list_dma_copy_aligned(work, chan);
+
+	if (work->loopback)
+		return scif_rma_list_cpu_copy(work);
+	src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset);
+	dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset);
+	src_local = work->src_window->type == SCIF_WINDOW_SELF;
+	dst_local = work->dst_window->type == SCIF_WINDOW_SELF;
+
+	dst_local = dst_local;
+	/* Allocate dma_completion cb */
+	comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
+	if (!comp_cb)
+		goto error;
+
+	work->comp_cb = comp_cb;
+	comp_cb->cb_cookie = comp_cb;
+	comp_cb->dma_completion_func = &scif_rma_completion_cb;
+
+	if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) {
+		comp_cb->is_cache = false;
+		/* Allocate padding bytes to align to a cache line */
+		temp = kmalloc(work->len + (L1_CACHE_BYTES << 1),
+			       GFP_KERNEL);
+		if (!temp)
+			goto free_comp_cb;
+		comp_cb->temp_buf_to_free = temp;
+		/* kmalloc(..) does not guarantee cache line alignment */
+		if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES))
+			temp = PTR_ALIGN(temp, L1_CACHE_BYTES);
+	} else {
+		comp_cb->is_cache = true;
+		temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL);
+		if (!temp)
+			goto free_comp_cb;
+		comp_cb->temp_buf_to_free = temp;
+	}
+
+	if (src_local) {
+		temp += dst_cache_off;
+		scif_rma_local_cpu_copy(work->src_offset, work->src_window,
+					temp, work->len, true);
+	} else {
+		comp_cb->dst_window = work->dst_window;
+		comp_cb->dst_offset = work->dst_offset;
+		work->src_offset = work->src_offset - src_cache_off;
+		comp_cb->len = work->len;
+		work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
+		comp_cb->header_padding = src_cache_off;
+	}
+	comp_cb->temp_buf = temp;
+
+	err = scif_map_single(&comp_cb->temp_phys, temp,
+			      work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE);
+	if (err)
+		goto free_temp_buf;
+	comp_cb->sdev = work->remote_dev;
+	if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0)
+		goto free_temp_buf;
+	if (!src_local)
+		work->fence_type = SCIF_DMA_INTR;
+	return 0;
+free_temp_buf:
+	if (comp_cb->is_cache)
+		kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free);
+	else
+		kfree(comp_cb->temp_buf_to_free);
+free_comp_cb:
+	kfree(comp_cb);
+error:
+	return -ENOMEM;
+}
+
+/**
+ * scif_rma_copy:
+ * @epd: end point descriptor.
+ * @loffset: offset in local registered address space to/from which to copy
+ * @addr: user virtual address to/from which to copy
+ * @len: length of range to copy
+ * @roffset: offset in remote registered address space to/from which to copy
+ * @flags: flags
+ * @dir: LOCAL->REMOTE or vice versa.
+ * @last_chunk: true if this is the last chunk of a larger transfer
+ *
+ * Validate parameters, check if src/dst registered ranges requested for copy
+ * are valid and initiate either CPU or DMA copy.
+ */
+static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr,
+			 size_t len, off_t roffset, int flags,
+			 enum scif_rma_dir dir, bool last_chunk)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_rma_req remote_req;
+	struct scif_rma_req req;
+	struct scif_window *local_window = NULL;
+	struct scif_window *remote_window = NULL;
+	struct scif_copy_work copy_work;
+	bool loopback;
+	int err = 0;
+	struct dma_chan *chan;
+	struct scif_mmu_notif *mmn = NULL;
+	bool cache = false;
+	struct device *spdev;
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE |
+				SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
+		return -EINVAL;
+
+	loopback = scifdev_self(ep->remote_dev) ? true : false;
+	copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ?
+				SCIF_DMA_POLL : 0;
+	copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
+
+	/* Use CPU for Mgmt node <-> Mgmt node copies */
+	if (loopback && scif_is_mgmt_node()) {
+		flags |= SCIF_RMA_USECPU;
+		copy_work.fence_type = 0x0;
+	}
+
+	cache = scif_is_set_reg_cache(flags);
+
+	remote_req.out_window = &remote_window;
+	remote_req.offset = roffset;
+	remote_req.nr_bytes = len;
+	/*
+	 * If transfer is from local to remote then the remote window
+	 * must be writeable and vice versa.
+	 */
+	remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ;
+	remote_req.type = SCIF_WINDOW_PARTIAL;
+	remote_req.head = &ep->rma_info.remote_reg_list;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+
+	if (addr && cache) {
+		mutex_lock(&ep->rma_info.mmn_lock);
+		mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info);
+		if (!mmn)
+			mmn = scif_add_mmu_notifier(current->mm, ep);
+		mutex_unlock(&ep->rma_info.mmn_lock);
+		if (IS_ERR(mmn)) {
+			scif_put_peer_dev(spdev);
+			return PTR_ERR(mmn);
+		}
+		cache = cache && !scif_rma_tc_can_cache(ep, len);
+	}
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (addr) {
+		req.out_window = &local_window;
+		req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK),
+				     PAGE_SIZE);
+		req.va_for_temp = addr & PAGE_MASK;
+		req.prot = (dir == SCIF_LOCAL_TO_REMOTE ?
+			    VM_READ : VM_WRITE | VM_READ);
+		/* Does a valid local window exist? */
+		if (mmn) {
+			spin_lock(&ep->rma_info.tc_lock);
+			req.head = &mmn->tc_reg_list;
+			err = scif_query_tcw(ep, &req);
+			spin_unlock(&ep->rma_info.tc_lock);
+		}
+		if (!mmn || err) {
+			err = scif_register_temp(epd, req.va_for_temp,
+						 req.nr_bytes, req.prot,
+						 &loffset, &local_window);
+			if (err) {
+				mutex_unlock(&ep->rma_info.rma_lock);
+				goto error;
+			}
+			if (!cache)
+				goto skip_cache;
+			atomic_inc(&ep->rma_info.tcw_refcount);
+			atomic_add_return(local_window->nr_pages,
+					  &ep->rma_info.tcw_total_pages);
+			if (mmn) {
+				spin_lock(&ep->rma_info.tc_lock);
+				scif_insert_tcw(local_window,
+						&mmn->tc_reg_list);
+				spin_unlock(&ep->rma_info.tc_lock);
+			}
+		}
+skip_cache:
+		loffset = local_window->offset +
+				(addr - local_window->va_for_temp);
+	} else {
+		req.out_window = &local_window;
+		req.offset = loffset;
+		/*
+		 * If transfer is from local to remote then the self window
+		 * must be readable and vice versa.
+		 */
+		req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE;
+		req.nr_bytes = len;
+		req.type = SCIF_WINDOW_PARTIAL;
+		req.head = &ep->rma_info.reg_list;
+		/* Does a valid local window exist? */
+		err = scif_query_window(&req);
+		if (err) {
+			mutex_unlock(&ep->rma_info.rma_lock);
+			goto error;
+		}
+	}
+
+	/* Does a valid remote window exist? */
+	err = scif_query_window(&remote_req);
+	if (err) {
+		mutex_unlock(&ep->rma_info.rma_lock);
+		goto error;
+	}
+
+	/*
+	 * Prepare copy_work for submitting work to the DMA kernel thread
+	 * or CPU copy routine.
+	 */
+	copy_work.len = len;
+	copy_work.loopback = loopback;
+	copy_work.remote_dev = ep->remote_dev;
+	if (dir == SCIF_LOCAL_TO_REMOTE) {
+		copy_work.src_offset = loffset;
+		copy_work.src_window = local_window;
+		copy_work.dst_offset = roffset;
+		copy_work.dst_window = remote_window;
+	} else {
+		copy_work.src_offset = roffset;
+		copy_work.src_window = remote_window;
+		copy_work.dst_offset = loffset;
+		copy_work.dst_window = local_window;
+	}
+
+	if (flags & SCIF_RMA_USECPU) {
+		scif_rma_list_cpu_copy(&copy_work);
+	} else {
+		chan = ep->rma_info.dma_chan;
+		err = scif_rma_list_dma_copy_wrapper(epd, &copy_work,
+						     chan, loffset);
+	}
+	if (addr && !cache)
+		atomic_inc(&ep->rma_info.tw_refcount);
+
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	if (last_chunk) {
+		struct scif_dev *rdev = ep->remote_dev;
+
+		if (copy_work.fence_type == SCIF_DMA_POLL)
+			err = scif_drain_dma_poll(rdev->sdev,
+						  ep->rma_info.dma_chan);
+		else if (copy_work.fence_type == SCIF_DMA_INTR)
+			err = scif_drain_dma_intr(rdev->sdev,
+						  ep->rma_info.dma_chan);
+	}
+
+	if (addr && !cache)
+		scif_queue_for_cleanup(local_window, &scif_info.rma);
+	scif_put_peer_dev(spdev);
+	return err;
+error:
+	if (err) {
+		if (addr && local_window && !cache)
+			scif_destroy_window(ep, local_window);
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d len 0x%lx\n",
+			__func__, __LINE__, err, len);
+	}
+	scif_put_peer_dev(spdev);
+	return err;
+}
+
+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
+		  off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n",
+		epd, loffset, len, roffset, flags);
+	if (scif_unaligned(loffset, roffset)) {
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, loffset, 0x0,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_REMOTE_TO_LOCAL, false);
+			if (err)
+				goto readfrom_err;
+			loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, loffset, 0x0, len,
+			    roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+readfrom_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_readfrom);
+
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
+		 off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, loffset, len, roffset, flags);
+	if (scif_unaligned(loffset, roffset)) {
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, loffset, 0x0,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_LOCAL_TO_REMOTE, false);
+			if (err)
+				goto writeto_err;
+			loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, loffset, 0x0, len,
+			    roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+writeto_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_writeto);
+
+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len,
+		   off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, addr, len, roffset, flags);
+	if (scif_unaligned((off_t __force)addr, roffset)) {
+		if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+			flags &= ~SCIF_RMA_USECACHE;
+
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, 0, (u64)addr,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_REMOTE_TO_LOCAL, false);
+			if (err)
+				goto vreadfrom_err;
+			addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, 0, (u64)addr, len,
+			    roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+vreadfrom_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_vreadfrom);
+
+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len,
+		  off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, addr, len, roffset, flags);
+	if (scif_unaligned((off_t __force)addr, roffset)) {
+		if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+			flags &= ~SCIF_RMA_USECACHE;
+
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, 0, (u64)addr,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_LOCAL_TO_REMOTE, false);
+			if (err)
+				goto vwriteto_err;
+			addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, 0, (u64)addr, len,
+			    roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+vwriteto_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_vwriteto);
diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c
new file mode 100644
index 000000000..00e5d6d66
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_epd.c
@@ -0,0 +1,357 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+void scif_cleanup_ep_qp(struct scif_endpt *ep)
+{
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (qp->outbound_q.rb_base) {
+		scif_iounmap((void *)qp->outbound_q.rb_base,
+			     qp->outbound_q.size, ep->remote_dev);
+		qp->outbound_q.rb_base = NULL;
+	}
+	if (qp->remote_qp) {
+		scif_iounmap((void *)qp->remote_qp,
+			     sizeof(struct scif_qp), ep->remote_dev);
+		qp->remote_qp = NULL;
+	}
+	if (qp->local_qp) {
+		scif_unmap_single(qp->local_qp, ep->remote_dev,
+				  sizeof(struct scif_qp));
+		qp->local_qp = 0x0;
+	}
+	if (qp->local_buf) {
+		scif_unmap_single(qp->local_buf, ep->remote_dev,
+				  SCIF_ENDPT_QP_SIZE);
+		qp->local_buf = 0;
+	}
+}
+
+void scif_teardown_ep(void *endpt)
+{
+	struct scif_endpt *ep = endpt;
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (qp) {
+		spin_lock(&ep->lock);
+		scif_cleanup_ep_qp(ep);
+		spin_unlock(&ep->lock);
+		kfree(qp->inbound_q.rb_base);
+		kfree(qp);
+	}
+}
+
+/*
+ * Enqueue the endpoint to the zombie list for cleanup.
+ * The endpoint should not be accessed once this API returns.
+ */
+void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held)
+{
+	if (!eplock_held)
+		mutex_lock(&scif_info.eplock);
+	spin_lock(&ep->lock);
+	ep->state = SCIFEP_ZOMBIE;
+	spin_unlock(&ep->lock);
+	list_add_tail(&ep->list, &scif_info.zombie);
+	scif_info.nr_zombies++;
+	if (!eplock_held)
+		mutex_unlock(&scif_info.eplock);
+	schedule_work(&scif_info.misc_work);
+}
+
+static struct scif_endpt *scif_find_listen_ep(u16 port)
+{
+	struct scif_endpt *ep = NULL;
+	struct list_head *pos, *tmpq;
+
+	mutex_lock(&scif_info.eplock);
+	list_for_each_safe(pos, tmpq, &scif_info.listen) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (ep->port.port == port) {
+			mutex_unlock(&scif_info.eplock);
+			return ep;
+		}
+	}
+	mutex_unlock(&scif_info.eplock);
+	return NULL;
+}
+
+void scif_cleanup_zombie_epd(void)
+{
+	struct list_head *pos, *tmpq;
+	struct scif_endpt *ep;
+
+	mutex_lock(&scif_info.eplock);
+	list_for_each_safe(pos, tmpq, &scif_info.zombie) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (scif_rma_ep_can_uninit(ep)) {
+			list_del(pos);
+			scif_info.nr_zombies--;
+			put_iova_domain(&ep->rma_info.iovad);
+			kfree(ep);
+		}
+	}
+	mutex_unlock(&scif_info.eplock);
+}
+
+/**
+ * scif_cnctreq() - Respond to SCIF_CNCT_REQ interrupt message
+ * @msg:        Interrupt message
+ *
+ * This message is initiated by the remote node to request a connection
+ * to the local node.  This function looks for an end point in the
+ * listen state on the requested port id.
+ *
+ * If it finds a listening port it places the connect request on the
+ * listening end points queue and wakes up any pending accept calls.
+ *
+ * If it does not find a listening end point it sends a connection
+ * reject message to the remote node.
+ */
+void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = NULL;
+	struct scif_conreq *conreq;
+
+	conreq = kmalloc(sizeof(*conreq), GFP_KERNEL);
+	if (!conreq)
+		/* Lack of resources so reject the request. */
+		goto conreq_sendrej;
+
+	ep = scif_find_listen_ep(msg->dst.port);
+	if (!ep)
+		/*  Send reject due to no listening ports */
+		goto conreq_sendrej_free;
+	else
+		spin_lock(&ep->lock);
+
+	if (ep->backlog <= ep->conreqcnt) {
+		/*  Send reject due to too many pending requests */
+		spin_unlock(&ep->lock);
+		goto conreq_sendrej_free;
+	}
+
+	conreq->msg = *msg;
+	list_add_tail(&conreq->list, &ep->conlist);
+	ep->conreqcnt++;
+	wake_up_interruptible(&ep->conwq);
+	spin_unlock(&ep->lock);
+	return;
+
+conreq_sendrej_free:
+	kfree(conreq);
+conreq_sendrej:
+	msg->uop = SCIF_CNCT_REJ;
+	scif_nodeqp_send(&scif_dev[msg->src.node], msg);
+}
+
+/**
+ * scif_cnctgnt() - Respond to SCIF_CNCT_GNT interrupt message
+ * @msg:        Interrupt message
+ *
+ * An accept() on the remote node has occurred and sent this message
+ * to indicate success.  Place the end point in the MAPPING state and
+ * save the remote nodes memory information.  Then wake up the connect
+ * request so it can finish.
+ */
+void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTING == ep->state) {
+		ep->peer.node = msg->src.node;
+		ep->peer.port = msg->src.port;
+		ep->qp_info.gnt_pld = msg->payload[1];
+		ep->remote_ep = msg->payload[2];
+		ep->state = SCIFEP_MAPPING;
+
+		wake_up(&ep->conwq);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_cnctgnt_ack() - Respond to SCIF_CNCT_GNTACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote connection request has finished mapping the local memory.
+ * Place the connection in the connected state and wake up the pending
+ * accept() call.
+ */
+void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	mutex_lock(&scif_info.connlock);
+	spin_lock(&ep->lock);
+	/* New ep is now connected with all resources set. */
+	ep->state = SCIFEP_CONNECTED;
+	list_add_tail(&ep->list, &scif_info.connected);
+	wake_up(&ep->conwq);
+	spin_unlock(&ep->lock);
+	mutex_unlock(&scif_info.connlock);
+}
+
+/**
+ * scif_cnctgnt_nack() - Respond to SCIF_CNCT_GNTNACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote connection request failed to map the local memory it was sent.
+ * Place the end point in the CLOSING state to indicate it and wake up
+ * the pending accept();
+ */
+void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	ep->state = SCIFEP_CLOSING;
+	wake_up(&ep->conwq);
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_cnctrej() - Respond to SCIF_CNCT_REJ interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote end has rejected the connection request.  Set the end
+ * point back to the bound state and wake up the pending connect().
+ */
+void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTING == ep->state) {
+		ep->state = SCIFEP_BOUND;
+		wake_up(&ep->conwq);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_discnct() - Respond to SCIF_DISCNCT interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote node has indicated close() has been called on its end
+ * point.  Remove the local end point from the connected list, set its
+ * state to disconnected and ensure accesses to the remote node are
+ * shutdown.
+ *
+ * When all accesses to the remote end have completed then send a
+ * DISCNT_ACK to indicate it can remove its resources and complete
+ * the close routine.
+ */
+void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = NULL;
+	struct scif_endpt *tmpep;
+	struct list_head *pos, *tmpq;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each_safe(pos, tmpq, &scif_info.connected) {
+		tmpep = list_entry(pos, struct scif_endpt, list);
+		/*
+		 * The local ep may have sent a disconnect and and been closed
+		 * due to a message response time out. It may have been
+		 * allocated again and formed a new connection so we want to
+		 * check if the remote ep matches
+		 */
+		if (((u64)tmpep == msg->payload[1]) &&
+		    ((u64)tmpep->remote_ep == msg->payload[0])) {
+			list_del(pos);
+			ep = tmpep;
+			spin_lock(&ep->lock);
+			break;
+		}
+	}
+
+	/*
+	 * If the terminated end is not found then this side started closing
+	 * before the other side sent the disconnect.  If so the ep will no
+	 * longer be on the connected list.  Regardless the other side
+	 * needs to be acked to let it know close is complete.
+	 */
+	if (!ep) {
+		mutex_unlock(&scif_info.connlock);
+		goto discnct_ack;
+	}
+
+	ep->state = SCIFEP_DISCONNECTED;
+	list_add_tail(&ep->list, &scif_info.disconnected);
+
+	wake_up_interruptible(&ep->sendwq);
+	wake_up_interruptible(&ep->recvwq);
+	spin_unlock(&ep->lock);
+	mutex_unlock(&scif_info.connlock);
+
+discnct_ack:
+	msg->uop = SCIF_DISCNT_ACK;
+	scif_nodeqp_send(&scif_dev[msg->src.node], msg);
+}
+
+/**
+ * scif_discnct_ack() - Respond to SCIF_DISCNT_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side has indicated it has not more references to local resources
+ */
+void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	ep->state = SCIFEP_DISCONNECTED;
+	spin_unlock(&ep->lock);
+	complete(&ep->discon);
+}
+
+/**
+ * scif_clientsend() - Respond to SCIF_CLIENT_SEND interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is confirming send or receive interrupt handling is complete.
+ */
+void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTED == ep->state)
+		wake_up_interruptible(&ep->recvwq);
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_clientrcvd() - Respond to SCIF_CLIENT_RCVD interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is confirming send or receive interrupt handling is complete.
+ */
+void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTED == ep->state)
+		wake_up_interruptible(&ep->sendwq);
+	spin_unlock(&ep->lock);
+}
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
new file mode 100644
index 000000000..f39b663da
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_epd.h
@@ -0,0 +1,210 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_EPD_H
+#define SCIF_EPD_H
+
+#include <linux/delay.h>
+#include <linux/scif.h>
+#include <linux/scif_ioctl.h>
+
+#define SCIF_EPLOCK_HELD true
+
+enum scif_epd_state {
+	SCIFEP_UNBOUND,
+	SCIFEP_BOUND,
+	SCIFEP_LISTENING,
+	SCIFEP_CONNECTED,
+	SCIFEP_CONNECTING,
+	SCIFEP_MAPPING,
+	SCIFEP_CLOSING,
+	SCIFEP_CLLISTEN,
+	SCIFEP_DISCONNECTED,
+	SCIFEP_ZOMBIE
+};
+
+/*
+ * struct scif_conreq - Data structure added to the connection list.
+ *
+ * @msg: connection request message received
+ * @list: link to list of connection requests
+ */
+struct scif_conreq {
+	struct scifmsg msg;
+	struct list_head list;
+};
+
+/* Size of the RB for the Endpoint QP */
+#define SCIF_ENDPT_QP_SIZE 0x1000
+
+/*
+ * scif_endpt_qp_info - SCIF endpoint queue pair
+ *
+ * @qp - Qpair for this endpoint
+ * @qp_offset - DMA address of the QP
+ * @gnt_pld - Payload in a SCIF_CNCT_GNT message containing the
+ * physical address of the remote_qp.
+ */
+struct scif_endpt_qp_info {
+	struct scif_qp *qp;
+	dma_addr_t qp_offset;
+	dma_addr_t gnt_pld;
+};
+
+/*
+ * struct scif_endpt - The SCIF endpoint data structure
+ *
+ * @state: end point state
+ * @lock: lock synchronizing access to endpoint fields like state etc
+ * @port: self port information
+ * @peer: peer port information
+ * @backlog: maximum pending connection requests
+ * @qp_info: Endpoint QP information for SCIF messaging
+ * @remote_dev: scifdev used by this endpt to communicate with remote node.
+ * @remote_ep: remote endpoint
+ * @conreqcnt: Keep track of number of connection requests.
+ * @files: Open file information used to match the id passed in with
+ *         the flush routine.
+ * @conlist: list of connection requests
+ * @conwq: waitqueue for connection processing
+ * @discon: completion used during disconnection
+ * @sendwq: waitqueue used during sending messages
+ * @recvwq: waitqueue used during message receipt
+ * @sendlock: Synchronize ordering of messages sent
+ * @recvlock: Synchronize ordering of messages received
+ * @list: link to list of various endpoints like connected, listening etc
+ * @li_accept: pending ACCEPTREG
+ * @acceptcnt: pending ACCEPTREG cnt
+ * @liacceptlist: link to listen accept
+ * @miacceptlist: link to uaccept
+ * @listenep: associated listen ep
+ * @conn_work: Non blocking connect work
+ * @conn_port: Connection port
+ * @conn_err: Errors during connection
+ * @conn_async_state: Async connection
+ * @conn_pend_wq: Used by poll while waiting for incoming connections
+ * @conn_list: List of async connection requests
+ * @rma_info: Information for triggering SCIF RMA and DMA operations
+ * @mmu_list: link to list of MMU notifier cleanup work
+ * @anon: anonymous file for use in kernel mode scif poll
+ */
+struct scif_endpt {
+	enum scif_epd_state state;
+	spinlock_t lock;
+	struct scif_port_id port;
+	struct scif_port_id peer;
+	int backlog;
+	struct scif_endpt_qp_info qp_info;
+	struct scif_dev *remote_dev;
+	u64 remote_ep;
+	int conreqcnt;
+	struct files_struct *files;
+	struct list_head conlist;
+	wait_queue_head_t conwq;
+	struct completion discon;
+	wait_queue_head_t sendwq;
+	wait_queue_head_t recvwq;
+	struct mutex sendlock;
+	struct mutex recvlock;
+	struct list_head list;
+	struct list_head li_accept;
+	int acceptcnt;
+	struct list_head liacceptlist;
+	struct list_head miacceptlist;
+	struct scif_endpt *listenep;
+	struct scif_port_id conn_port;
+	int conn_err;
+	int conn_async_state;
+	wait_queue_head_t conn_pend_wq;
+	struct list_head conn_list;
+	struct scif_endpt_rma_info rma_info;
+	struct list_head mmu_list;
+	struct file *anon;
+};
+
+static inline int scifdev_alive(struct scif_endpt *ep)
+{
+	return _scifdev_alive(ep->remote_dev);
+}
+
+/*
+ * scif_verify_epd:
+ * ep: SCIF endpoint
+ *
+ * Checks several generic error conditions and returns the
+ * appropriate error.
+ */
+static inline int scif_verify_epd(struct scif_endpt *ep)
+{
+	if (ep->state == SCIFEP_DISCONNECTED)
+		return -ECONNRESET;
+
+	if (ep->state != SCIFEP_CONNECTED)
+		return -ENOTCONN;
+
+	if (!scifdev_alive(ep))
+		return -ENODEV;
+
+	return 0;
+}
+
+static inline int scif_anon_inode_getfile(scif_epd_t epd)
+{
+	epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
+	if (IS_ERR(epd->anon))
+		return PTR_ERR(epd->anon);
+	return 0;
+}
+
+static inline void scif_anon_inode_fput(scif_epd_t epd)
+{
+	if (epd->anon) {
+		fput(epd->anon);
+		epd->anon = NULL;
+	}
+}
+
+void scif_cleanup_zombie_epd(void);
+void scif_teardown_ep(void *endpt);
+void scif_cleanup_ep_qp(struct scif_endpt *ep);
+void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held);
+void scif_get_node_info(void);
+void scif_send_acks(struct scif_dev *dev);
+void scif_conn_handler(struct work_struct *work);
+int scif_rsrv_port(u16 port);
+void scif_get_port(u16 port);
+int scif_get_new_port(void);
+void scif_put_port(u16 port);
+int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags);
+int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags);
+void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
+int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
+int __scif_flush(scif_epd_t epd);
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
+__poll_t __scif_pollfd(struct file *f, poll_table *wait,
+			   struct scif_endpt *ep);
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+		     int map_flags, scif_pinned_pages_t *pages);
+#endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
new file mode 100644
index 000000000..5c2a57ae4
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_fd.c
@@ -0,0 +1,471 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+static int scif_fdopen(struct inode *inode, struct file *f)
+{
+	struct scif_endpt *priv = scif_open();
+
+	if (!priv)
+		return -ENOMEM;
+	f->private_data = priv;
+	return 0;
+}
+
+static int scif_fdclose(struct inode *inode, struct file *f)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return scif_close(priv);
+}
+
+static int scif_fdmmap(struct file *f, struct vm_area_struct *vma)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return scif_mmap(vma, priv);
+}
+
+static __poll_t scif_fdpoll(struct file *f, poll_table *wait)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return __scif_pollfd(f, wait, priv);
+}
+
+static int scif_fdflush(struct file *f, fl_owner_t id)
+{
+	struct scif_endpt *ep = f->private_data;
+
+	spin_lock(&ep->lock);
+	/*
+	 * The listening endpoint stashes the open file information before
+	 * waiting for incoming connections. The release callback would never be
+	 * called if the application closed the endpoint, while waiting for
+	 * incoming connections from a separate thread since the file descriptor
+	 * reference count is bumped up in the accept IOCTL. Call the flush
+	 * routine if the id matches the endpoint open file information so that
+	 * the listening endpoint can be woken up and the fd released.
+	 */
+	if (ep->files == id)
+		__scif_flush(ep);
+	spin_unlock(&ep->lock);
+	return 0;
+}
+
+static __always_inline void scif_err_debug(int err, const char *str)
+{
+	/*
+	 * ENOTCONN is a common uninteresting error which is
+	 * flooding debug messages to the console unnecessarily.
+	 */
+	if (err < 0 && err != -ENOTCONN)
+		dev_dbg(scif_info.mdev.this_device, "%s err %d\n", str, err);
+}
+
+static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	struct scif_endpt *priv = f->private_data;
+	void __user *argp = (void __user *)arg;
+	int err = 0;
+	struct scifioctl_msg request;
+	bool non_block = false;
+
+	non_block = !!(f->f_flags & O_NONBLOCK);
+
+	switch (cmd) {
+	case SCIF_BIND:
+	{
+		int pn;
+
+		if (copy_from_user(&pn, argp, sizeof(pn)))
+			return -EFAULT;
+
+		pn = scif_bind(priv, pn);
+		if (pn < 0)
+			return pn;
+
+		if (copy_to_user(argp, &pn, sizeof(pn)))
+			return -EFAULT;
+
+		return 0;
+	}
+	case SCIF_LISTEN:
+		return scif_listen(priv, arg);
+	case SCIF_CONNECT:
+	{
+		struct scifioctl_connect req;
+		struct scif_endpt *ep = (struct scif_endpt *)priv;
+
+		if (copy_from_user(&req, argp, sizeof(req)))
+			return -EFAULT;
+
+		err = __scif_connect(priv, &req.peer, non_block);
+		if (err < 0)
+			return err;
+
+		req.self.node = ep->port.node;
+		req.self.port = ep->port.port;
+
+		if (copy_to_user(argp, &req, sizeof(req)))
+			return -EFAULT;
+
+		return 0;
+	}
+	/*
+	 * Accept is done in two halves.  The request ioctl does the basic
+	 * functionality of accepting the request and returning the information
+	 * about it including the internal ID of the end point.  The register
+	 * is done with the internal ID on a new file descriptor opened by the
+	 * requesting process.
+	 */
+	case SCIF_ACCEPTREQ:
+	{
+		struct scifioctl_accept request;
+		scif_epd_t *ep = (scif_epd_t *)&request.endpt;
+
+		if (copy_from_user(&request, argp, sizeof(request)))
+			return -EFAULT;
+
+		err = scif_accept(priv, &request.peer, ep, request.flags);
+		if (err < 0)
+			return err;
+
+		if (copy_to_user(argp, &request, sizeof(request))) {
+			scif_close(*ep);
+			return -EFAULT;
+		}
+		/*
+		 * Add to the list of user mode eps where the second half
+		 * of the accept is not yet completed.
+		 */
+		mutex_lock(&scif_info.eplock);
+		list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept);
+		list_add_tail(&((*ep)->liacceptlist), &priv->li_accept);
+		(*ep)->listenep = priv;
+		priv->acceptcnt++;
+		mutex_unlock(&scif_info.eplock);
+
+		return 0;
+	}
+	case SCIF_ACCEPTREG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scif_endpt *newep;
+		struct scif_endpt *lisep;
+		struct scif_endpt *fep = NULL;
+		struct scif_endpt *tmpep;
+		struct list_head *pos, *tmpq;
+
+		/* Finally replace the pointer to the accepted endpoint */
+		if (copy_from_user(&newep, argp, sizeof(void *)))
+			return -EFAULT;
+
+		/* Remove form the user accept queue */
+		mutex_lock(&scif_info.eplock);
+		list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
+			tmpep = list_entry(pos,
+					   struct scif_endpt, miacceptlist);
+			if (tmpep == newep) {
+				list_del(pos);
+				fep = tmpep;
+				break;
+			}
+		}
+
+		if (!fep) {
+			mutex_unlock(&scif_info.eplock);
+			return -ENOENT;
+		}
+
+		lisep = newep->listenep;
+		list_for_each_safe(pos, tmpq, &lisep->li_accept) {
+			tmpep = list_entry(pos,
+					   struct scif_endpt, liacceptlist);
+			if (tmpep == newep) {
+				list_del(pos);
+				lisep->acceptcnt--;
+				break;
+			}
+		}
+
+		mutex_unlock(&scif_info.eplock);
+
+		/* Free the resources automatically created from the open. */
+		scif_anon_inode_fput(priv);
+		scif_teardown_ep(priv);
+		scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
+		f->private_data = newep;
+		return 0;
+	}
+	case SCIF_SEND:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		if (copy_from_user(&request, argp,
+				   sizeof(struct scifioctl_msg))) {
+			err = -EFAULT;
+			goto send_err;
+		}
+		err = scif_user_send(priv, (void __user *)request.msg,
+				     request.len, request.flags);
+		if (err < 0)
+			goto send_err;
+		if (copy_to_user(&
+				 ((struct scifioctl_msg __user *)argp)->out_len,
+				 &err, sizeof(err))) {
+			err = -EFAULT;
+			goto send_err;
+		}
+		err = 0;
+send_err:
+		scif_err_debug(err, "scif_send");
+		return err;
+	}
+	case SCIF_RECV:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		if (copy_from_user(&request, argp,
+				   sizeof(struct scifioctl_msg))) {
+			err = -EFAULT;
+			goto recv_err;
+		}
+
+		err = scif_user_recv(priv, (void __user *)request.msg,
+				     request.len, request.flags);
+		if (err < 0)
+			goto recv_err;
+
+		if (copy_to_user(&
+				 ((struct scifioctl_msg __user *)argp)->out_len,
+			&err, sizeof(err))) {
+			err = -EFAULT;
+			goto recv_err;
+		}
+		err = 0;
+recv_err:
+		scif_err_debug(err, "scif_recv");
+		return err;
+	}
+	case SCIF_GET_NODEIDS:
+	{
+		struct scifioctl_node_ids node_ids;
+		int entries;
+		u16 *nodes;
+		void __user *unodes, *uself;
+		u16 self;
+
+		if (copy_from_user(&node_ids, argp, sizeof(node_ids))) {
+			err = -EFAULT;
+			goto getnodes_err2;
+		}
+
+		entries = min_t(int, scif_info.maxid, node_ids.len);
+		nodes = kmalloc_array(entries, sizeof(u16), GFP_KERNEL);
+		if (entries && !nodes) {
+			err = -ENOMEM;
+			goto getnodes_err2;
+		}
+		node_ids.len = scif_get_node_ids(nodes, entries, &self);
+
+		unodes = (void __user *)node_ids.nodes;
+		if (copy_to_user(unodes, nodes, sizeof(u16) * entries)) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+
+		uself = (void __user *)node_ids.self;
+		if (copy_to_user(uself, &self, sizeof(u16))) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+
+		if (copy_to_user(argp, &node_ids, sizeof(node_ids))) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+getnodes_err1:
+		kfree(nodes);
+getnodes_err2:
+		return err;
+	}
+	case SCIF_REG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_reg reg;
+		off_t ret;
+
+		if (copy_from_user(&reg, argp, sizeof(reg))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		if (reg.flags & SCIF_MAP_KERNEL) {
+			err = -EINVAL;
+			goto reg_err;
+		}
+		ret = scif_register(priv, (void *)reg.addr, reg.len,
+				    reg.offset, reg.prot, reg.flags);
+		if (ret < 0) {
+			err = (int)ret;
+			goto reg_err;
+		}
+
+		if (copy_to_user(&((struct scifioctl_reg __user *)argp)
+				 ->out_offset, &ret, sizeof(reg.out_offset))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		err = 0;
+reg_err:
+		scif_err_debug(err, "scif_register");
+		return err;
+	}
+	case SCIF_UNREG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_unreg unreg;
+
+		if (copy_from_user(&unreg, argp, sizeof(unreg))) {
+			err = -EFAULT;
+			goto unreg_err;
+		}
+		err = scif_unregister(priv, unreg.offset, unreg.len);
+unreg_err:
+		scif_err_debug(err, "scif_unregister");
+		return err;
+	}
+	case SCIF_READFROM:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto readfrom_err;
+		}
+		err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset,
+				    copy.flags);
+readfrom_err:
+		scif_err_debug(err, "scif_readfrom");
+		return err;
+	}
+	case SCIF_WRITETO:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto writeto_err;
+		}
+		err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset,
+				   copy.flags);
+writeto_err:
+		scif_err_debug(err, "scif_writeto");
+		return err;
+	}
+	case SCIF_VREADFROM:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vreadfrom_err;
+		}
+		err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len,
+				     copy.roffset, copy.flags);
+vreadfrom_err:
+		scif_err_debug(err, "scif_vreadfrom");
+		return err;
+	}
+	case SCIF_VWRITETO:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vwriteto_err;
+		}
+		err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len,
+				    copy.roffset, copy.flags);
+vwriteto_err:
+		scif_err_debug(err, "scif_vwriteto");
+		return err;
+	}
+	case SCIF_FENCE_MARK:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_fence_mark mark;
+		int tmp_mark = 0;
+
+		if (copy_from_user(&mark, argp, sizeof(mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+		err = scif_fence_mark(priv, mark.flags, &tmp_mark);
+		if (err)
+			goto fence_mark_err;
+		if (copy_to_user((void __user *)mark.mark, &tmp_mark,
+				 sizeof(tmp_mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+fence_mark_err:
+		scif_err_debug(err, "scif_fence_mark");
+		return err;
+	}
+	case SCIF_FENCE_WAIT:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		err = scif_fence_wait(priv, arg);
+		scif_err_debug(err, "scif_fence_wait");
+		return err;
+	}
+	case SCIF_FENCE_SIGNAL:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_fence_signal signal;
+
+		if (copy_from_user(&signal, argp, sizeof(signal))) {
+			err = -EFAULT;
+			goto fence_signal_err;
+		}
+
+		err = scif_fence_signal(priv, signal.loff, signal.lval,
+					signal.roff, signal.rval, signal.flags);
+fence_signal_err:
+		scif_err_debug(err, "scif_fence_signal");
+		return err;
+	}
+	}
+	return -EINVAL;
+}
+
+const struct file_operations scif_fops = {
+	.open = scif_fdopen,
+	.release = scif_fdclose,
+	.unlocked_ioctl = scif_fdioctl,
+	.mmap = scif_fdmmap,
+	.poll = scif_fdpoll,
+	.flush = scif_fdflush,
+	.owner = THIS_MODULE,
+};
diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c
new file mode 100644
index 000000000..7bb929f05
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_fence.c
@@ -0,0 +1,772 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+
+#include "scif_main.h"
+
+/**
+ * scif_recv_mark: Handle SCIF_MARK request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a mark.
+ */
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int mark = 0;
+	int err;
+
+	err = _scif_fence_mark(ep, &mark);
+	if (err)
+		msg->uop = SCIF_MARK_NACK;
+	else
+		msg->uop = SCIF_MARK_ACK;
+	msg->payload[0] = ep->remote_ep;
+	msg->payload[2] = mark;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a SCIF_MARK message.
+ */
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_MARK_ACK) {
+		fence_req->state = OP_COMPLETED;
+		fence_req->dma_mark = (int)msg->payload[2];
+	} else {
+		fence_req->state = OP_FAILED;
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_wait: Handle SCIF_WAIT request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested waiting on a fence.
+ */
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_remote_fence_info *fence;
+
+	/*
+	 * Allocate structure for remote fence information and
+	 * send a NACK if the allocation failed. The peer will
+	 * return ENOMEM upon receiving a NACK.
+	 */
+	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence) {
+		msg->payload[0] = ep->remote_ep;
+		msg->uop = SCIF_WAIT_NACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+		return;
+	}
+
+	/* Prepare the fence request */
+	memcpy(&fence->msg, msg, sizeof(struct scifmsg));
+	INIT_LIST_HEAD(&fence->list);
+
+	/* Insert to the global remote fence request list */
+	mutex_lock(&scif_info.fencelock);
+	atomic_inc(&ep->rma_info.fence_refcount);
+	list_add_tail(&fence->list, &scif_info.fence);
+	mutex_unlock(&scif_info.fencelock);
+
+	schedule_work(&scif_info.misc_work);
+}
+
+/**
+ * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a SCIF_WAIT message.
+ */
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_WAIT_ACK)
+		fence_req->state = OP_COMPLETED;
+	else
+		fence_req->state = OP_FAILED;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a signal on a local offset.
+ */
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int err;
+
+	err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+			       SCIF_WINDOW_SELF);
+	if (err)
+		msg->uop = SCIF_SIG_NACK;
+	else
+		msg->uop = SCIF_SIG_ACK;
+	msg->payload[0] = ep->remote_ep;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a signal on a remote offset.
+ */
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int err;
+
+	err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+			       SCIF_WINDOW_PEER);
+	if (err)
+		msg->uop = SCIF_SIG_NACK;
+	else
+		msg->uop = SCIF_SIG_ACK;
+	msg->payload[0] = ep->remote_ep;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a signal request.
+ */
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[3];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_SIG_ACK)
+		fence_req->state = OP_COMPLETED;
+	else
+		fence_req->state = OP_FAILED;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+static inline void *scif_get_local_va(off_t off, struct scif_window *window)
+{
+	struct page **pages = window->pinned_pages->pages;
+	int page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+
+	return page_address(pages[page_nr]) + page_off;
+}
+
+static void scif_prog_signal_cb(void *arg)
+{
+	struct scif_status *status = arg;
+
+	dma_pool_free(status->ep->remote_dev->signal_pool, status,
+		      status->src_dma_addr);
+}
+
+static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct dma_chan *chan = ep->rma_info.dma_chan;
+	struct dma_device *ddev = chan->device;
+	bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1);
+	struct dma_async_tx_descriptor *tx;
+	struct scif_status *status = NULL;
+	dma_addr_t src;
+	dma_cookie_t cookie;
+	int err;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto alloc_fail;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto alloc_fail;
+	}
+	dma_async_issue_pending(chan);
+	if (x100) {
+		/*
+		 * For X100 use the status descriptor to write the value to
+		 * the destination.
+		 */
+		tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0);
+	} else {
+		status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL,
+					&src);
+		if (!status) {
+			err = -ENOMEM;
+			dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto alloc_fail;
+		}
+		status->val = val;
+		status->src_dma_addr = src;
+		status->ep = ep;
+		src += offsetof(struct scif_status, val);
+		tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val),
+						  DMA_PREP_INTERRUPT);
+	}
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto dma_fail;
+	}
+	if (!x100) {
+		tx->callback = scif_prog_signal_cb;
+		tx->callback_param = status;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = -EIO;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto dma_fail;
+	}
+	dma_async_issue_pending(chan);
+	return 0;
+dma_fail:
+	if (!x100)
+		dma_pool_free(ep->remote_dev->signal_pool, status,
+			      src - offsetof(struct scif_status, val));
+alloc_fail:
+	return err;
+}
+
+/*
+ * scif_prog_signal:
+ * @epd - Endpoint Descriptor
+ * @offset - registered address to write @val to
+ * @val - Value to be written at @offset
+ * @type - Type of the window.
+ *
+ * Arrange to write a value to the registered offset after ensuring that the
+ * offset provided is indeed valid.
+ */
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+		     enum scif_window_type type)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_window *window = NULL;
+	struct scif_rma_req req;
+	dma_addr_t dst_dma_addr;
+	int err;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	req.out_window = &window;
+	req.offset = offset;
+	req.nr_bytes = sizeof(u64);
+	req.prot = SCIF_PROT_WRITE;
+	req.type = SCIF_WINDOW_SINGLE;
+	if (type == SCIF_WINDOW_SELF)
+		req.head = &ep->rma_info.reg_list;
+	else
+		req.head = &ep->rma_info.remote_reg_list;
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto unlock_ret;
+	}
+
+	if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) {
+		u64 *dst_virt;
+
+		if (type == SCIF_WINDOW_SELF)
+			dst_virt = scif_get_local_va(offset, window);
+		else
+			dst_virt =
+			scif_get_local_va(offset, (struct scif_window *)
+					  window->peer_window);
+		*dst_virt = val;
+	} else {
+		dst_dma_addr = __scif_off_to_dma_addr(window, offset);
+		err = _scif_prog_signal(epd, dst_dma_addr, val);
+	}
+unlock_ret:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+static int _scif_fence_wait(scif_epd_t epd, int mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE;
+	int err;
+
+	/* Wait for DMA callback in scif_fence_mark_cb(..) */
+	err = wait_event_interruptible_timeout(ep->rma_info.markwq,
+					       dma_async_is_tx_complete(
+					       ep->rma_info.dma_chan,
+					       cookie, NULL, NULL) ==
+					       DMA_COMPLETE,
+					       SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err)
+		err = -ETIMEDOUT;
+	else if (err > 0)
+		err = 0;
+	return err;
+}
+
+/**
+ * scif_rma_handle_remote_fences:
+ *
+ * This routine services remote fence requests.
+ */
+void scif_rma_handle_remote_fences(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_remote_fence_info *fence;
+	struct scif_endpt *ep;
+	int mark, err;
+
+	might_sleep();
+	mutex_lock(&scif_info.fencelock);
+	list_for_each_safe(item, tmp, &scif_info.fence) {
+		fence = list_entry(item, struct scif_remote_fence_info,
+				   list);
+		/* Remove fence from global list */
+		list_del(&fence->list);
+
+		/* Initiate the fence operation */
+		ep = (struct scif_endpt *)fence->msg.payload[0];
+		mark = fence->msg.payload[2];
+		err = _scif_fence_wait(ep, mark);
+		if (err)
+			fence->msg.uop = SCIF_WAIT_NACK;
+		else
+			fence->msg.uop = SCIF_WAIT_ACK;
+		fence->msg.payload[0] = ep->remote_ep;
+		scif_nodeqp_send(ep->remote_dev, &fence->msg);
+		kfree(fence);
+		if (!atomic_sub_return(1, &ep->rma_info.fence_refcount))
+			schedule_work(&scif_info.misc_work);
+	}
+	mutex_unlock(&scif_info.fencelock);
+}
+
+static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark)
+{
+	int err;
+	struct scifmsg msg;
+	struct scif_fence_info *fence_req;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+	if (!fence_req) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_completion(&fence_req->comp);
+
+	msg.src = ep->port;
+	msg.uop = uop;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = (u64)fence_req;
+	if (uop == SCIF_WAIT)
+		msg.payload[2] = mark;
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED)
+		err = scif_nodeqp_send(ep->remote_dev, &msg);
+	else
+		err = -ENOTCONN;
+	spin_unlock(&ep->lock);
+	if (err)
+		goto error_free;
+retry:
+	/* Wait for a SCIF_WAIT_(N)ACK message */
+	err = wait_for_completion_timeout(&fence_req->comp,
+					  SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+	if (!err)
+		err = -ENODEV;
+	if (err > 0)
+		err = 0;
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (err < 0) {
+		if (fence_req->state == OP_IN_PROGRESS)
+			fence_req->state = OP_FAILED;
+	}
+	if (fence_req->state == OP_FAILED && !err)
+		err = -ENOMEM;
+	if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED)
+		*out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
+	mutex_unlock(&ep->rma_info.rma_lock);
+error_free:
+	kfree(fence_req);
+error:
+	return err;
+}
+
+/**
+ * scif_send_fence_mark:
+ * @epd: end point descriptor.
+ * @out_mark: Output DMA mark reported by peer.
+ *
+ * Send a remote fence mark request.
+ */
+static int scif_send_fence_mark(scif_epd_t epd, int *out_mark)
+{
+	return _scif_send_fence(epd, SCIF_MARK, 0, out_mark);
+}
+
+/**
+ * scif_send_fence_wait:
+ * @epd: end point descriptor.
+ * @mark: DMA mark to wait for.
+ *
+ * Send a remote fence wait request.
+ */
+static int scif_send_fence_wait(scif_epd_t epd, int mark)
+{
+	return _scif_send_fence(epd, SCIF_WAIT, mark, NULL);
+}
+
+static int _scif_send_fence_signal_wait(struct scif_endpt *ep,
+					struct scif_fence_info *fence_req)
+{
+	int err;
+
+retry:
+	/* Wait for a SCIF_SIG_(N)ACK message */
+	err = wait_for_completion_timeout(&fence_req->comp,
+					  SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+	if (!err)
+		err = -ENODEV;
+	if (err > 0)
+		err = 0;
+	if (err < 0) {
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (fence_req->state == OP_IN_PROGRESS)
+			fence_req->state = OP_FAILED;
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	if (fence_req->state == OP_FAILED && !err)
+		err = -ENXIO;
+	return err;
+}
+
+/**
+ * scif_send_fence_signal:
+ * @epd - endpoint descriptor
+ * @loff - local offset
+ * @lval - local value to write to loffset
+ * @roff - remote offset
+ * @rval - remote value to write to roffset
+ * @flags - flags
+ *
+ * Sends a remote fence signal request
+ */
+static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval,
+				  off_t loff, u64 lval, int flags)
+{
+	int err = 0;
+	struct scifmsg msg;
+	struct scif_fence_info *fence_req;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+	if (!fence_req) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_completion(&fence_req->comp);
+	msg.src = ep->port;
+	if (flags & SCIF_SIGNAL_LOCAL) {
+		msg.uop = SCIF_SIG_LOCAL;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = roff;
+		msg.payload[2] = rval;
+		msg.payload[3] = (u64)fence_req;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		if (err)
+			goto error_free;
+		err = _scif_send_fence_signal_wait(ep, fence_req);
+		if (err)
+			goto error_free;
+	}
+	fence_req->state = OP_IN_PROGRESS;
+
+	if (flags & SCIF_SIGNAL_REMOTE) {
+		msg.uop = SCIF_SIG_REMOTE;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = loff;
+		msg.payload[2] = lval;
+		msg.payload[3] = (u64)fence_req;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		if (err)
+			goto error_free;
+		err = _scif_send_fence_signal_wait(ep, fence_req);
+	}
+error_free:
+	kfree(fence_req);
+error:
+	return err;
+}
+
+static void scif_fence_mark_cb(void *arg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)arg;
+
+	wake_up_interruptible(&ep->rma_info.markwq);
+	atomic_dec(&ep->rma_info.fence_refcount);
+}
+
+/*
+ * _scif_fence_mark:
+ *
+ * @epd - endpoint descriptor
+ * Set up a mark for this endpoint and return the value of the mark.
+ */
+int _scif_fence_mark(scif_epd_t epd, int *mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct dma_chan *chan = ep->rma_info.dma_chan;
+	struct dma_device *ddev = chan->device;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+	int err;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	dma_async_issue_pending(chan);
+	tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	tx->callback = scif_fence_mark_cb;
+	tx->callback_param = ep;
+	*mark = cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	atomic_inc(&ep->rma_info.fence_refcount);
+	dma_async_issue_pending(chan);
+	return 0;
+}
+
+#define SCIF_LOOPB_MAGIC_MARK 0xdead
+
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n",
+		ep, flags, *mark);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Invalid flags? */
+	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/* At least one of init self or peer RMA should be set */
+	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+		return -EINVAL;
+
+	/* Exactly one of init self or peer RMA should be set but not both */
+	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/*
+	 * Management node loopback does not need to use DMA.
+	 * Return a valid mark to be symmetric.
+	 */
+	if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+		*mark = SCIF_LOOPB_MAGIC_MARK;
+		return 0;
+	}
+
+	if (flags & SCIF_FENCE_INIT_SELF)
+		err = _scif_fence_mark(epd, mark);
+	else
+		err = scif_send_fence_mark(ep, mark);
+
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n",
+		ep, flags, *mark, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_mark);
+
+int scif_fence_wait(scif_epd_t epd, int mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_wait: ep %p mark 0x%x\n",
+		ep, mark);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+	/*
+	 * Management node loopback does not need to use DMA.
+	 * The only valid mark provided is 0 so simply
+	 * return success if the mark is valid.
+	 */
+	if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+		if (mark == SCIF_LOOPB_MAGIC_MARK)
+			return 0;
+		else
+			return -EINVAL;
+	}
+	if (mark & SCIF_REMOTE_FENCE)
+		err = scif_send_fence_wait(epd, mark);
+	else
+		err = _scif_fence_wait(epd, mark);
+	if (err < 0)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_wait);
+
+int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval,
+		      off_t roff, u64 rval, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n",
+		ep, loff, lval, roff, rval, flags);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Invalid flags? */
+	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
+			SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
+		return -EINVAL;
+
+	/* At least one of init self or peer RMA should be set */
+	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+		return -EINVAL;
+
+	/* Exactly one of init self or peer RMA should be set but not both */
+	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
+	if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
+		return -EINVAL;
+
+	/* Only Dword offsets allowed */
+	if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1)))
+		return -EINVAL;
+
+	/* Only Dword aligned offsets allowed */
+	if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1)))
+		return -EINVAL;
+
+	if (flags & SCIF_FENCE_INIT_PEER) {
+		err = scif_send_fence_signal(epd, roff, rval, loff,
+					     lval, flags);
+	} else {
+		/* Local Signal in Local RAS */
+		if (flags & SCIF_SIGNAL_LOCAL) {
+			err = scif_prog_signal(epd, loff, lval,
+					       SCIF_WINDOW_SELF);
+			if (err)
+				goto error_ret;
+		}
+
+		/* Signal in Remote RAS */
+		if (flags & SCIF_SIGNAL_REMOTE)
+			err = scif_prog_signal(epd, roff,
+					       rval, SCIF_WINDOW_PEER);
+	}
+error_ret:
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_signal);
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
new file mode 100644
index 000000000..36d847af1
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_main.c
@@ -0,0 +1,359 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/idr.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_map.h"
+
+struct scif_info scif_info = {
+	.mdev = {
+		.minor = MISC_DYNAMIC_MINOR,
+		.name = "scif",
+		.fops = &scif_fops,
+	}
+};
+
+struct scif_dev *scif_dev;
+struct kmem_cache *unaligned_cache;
+static atomic_t g_loopb_cnt;
+
+/* Runs in the context of intr_wq */
+static void scif_intr_bh_handler(struct work_struct *work)
+{
+	struct scif_dev *scifdev =
+			container_of(work, struct scif_dev, intr_bh);
+
+	if (scifdev_self(scifdev))
+		scif_loopb_msg_handler(scifdev, scifdev->qpairs);
+	else
+		scif_nodeqp_intrhandler(scifdev, scifdev->qpairs);
+}
+
+int scif_setup_intr_wq(struct scif_dev *scifdev)
+{
+	if (!scifdev->intr_wq) {
+		snprintf(scifdev->intr_wqname, sizeof(scifdev->intr_wqname),
+			 "SCIF INTR %d", scifdev->node);
+		scifdev->intr_wq =
+			alloc_ordered_workqueue(scifdev->intr_wqname, 0);
+		if (!scifdev->intr_wq)
+			return -ENOMEM;
+		INIT_WORK(&scifdev->intr_bh, scif_intr_bh_handler);
+	}
+	return 0;
+}
+
+void scif_destroy_intr_wq(struct scif_dev *scifdev)
+{
+	if (scifdev->intr_wq) {
+		destroy_workqueue(scifdev->intr_wq);
+		scifdev->intr_wq = NULL;
+	}
+}
+
+irqreturn_t scif_intr_handler(int irq, void *data)
+{
+	struct scif_dev *scifdev = data;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	sdev->hw_ops->ack_interrupt(sdev, scifdev->db);
+	queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+	return IRQ_HANDLED;
+}
+
+static void scif_qp_setup_handler(struct work_struct *work)
+{
+	struct scif_dev *scifdev = container_of(work, struct scif_dev,
+						qp_dwork.work);
+	struct scif_hw_dev *sdev = scifdev->sdev;
+	dma_addr_t da = 0;
+	int err;
+
+	if (scif_is_mgmt_node()) {
+		struct mic_bootparam *bp = sdev->dp;
+
+		da = bp->scif_card_dma_addr;
+		scifdev->rdb = bp->h2c_scif_db;
+	} else {
+		struct mic_bootparam __iomem *bp = sdev->rdp;
+
+		da = readq(&bp->scif_host_dma_addr);
+		scifdev->rdb = ioread8(&bp->c2h_scif_db);
+	}
+	if (da) {
+		err = scif_qp_response(da, scifdev);
+		if (err)
+			dev_err(&scifdev->sdev->dev,
+				"scif_qp_response err %d\n", err);
+	} else {
+		schedule_delayed_work(&scifdev->qp_dwork,
+				      msecs_to_jiffies(1000));
+	}
+}
+
+static int scif_setup_scifdev(void)
+{
+	/* We support a maximum of 129 SCIF nodes including the mgmt node */
+#define MAX_SCIF_NODES 129
+	int i;
+	u8 num_nodes = MAX_SCIF_NODES;
+
+	scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL);
+	if (!scif_dev)
+		return -ENOMEM;
+	for (i = 0; i < num_nodes; i++) {
+		struct scif_dev *scifdev = &scif_dev[i];
+
+		scifdev->node = i;
+		scifdev->exit = OP_IDLE;
+		init_waitqueue_head(&scifdev->disconn_wq);
+		mutex_init(&scifdev->lock);
+		INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device);
+		INIT_DELAYED_WORK(&scifdev->p2p_dwork,
+				  scif_poll_qp_state);
+		INIT_DELAYED_WORK(&scifdev->qp_dwork,
+				  scif_qp_setup_handler);
+		INIT_LIST_HEAD(&scifdev->p2p);
+		RCU_INIT_POINTER(scifdev->spdev, NULL);
+	}
+	return 0;
+}
+
+static void scif_destroy_scifdev(void)
+{
+	kfree(scif_dev);
+}
+
+static int scif_probe(struct scif_hw_dev *sdev)
+{
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+	int rc;
+
+	dev_set_drvdata(&sdev->dev, sdev);
+	scifdev->sdev = sdev;
+
+	if (1 == atomic_add_return(1, &g_loopb_cnt)) {
+		struct scif_dev *loopb_dev = &scif_dev[sdev->snode];
+
+		loopb_dev->sdev = sdev;
+		rc = scif_setup_loopback_qp(loopb_dev);
+		if (rc)
+			goto exit;
+	}
+
+	rc = scif_setup_intr_wq(scifdev);
+	if (rc)
+		goto destroy_loopb;
+	rc = scif_setup_qp(scifdev);
+	if (rc)
+		goto destroy_intr;
+	scifdev->db = sdev->hw_ops->next_db(sdev);
+	scifdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+						    "SCIF_INTR", scifdev,
+						    scifdev->db);
+	if (IS_ERR(scifdev->cookie)) {
+		rc = PTR_ERR(scifdev->cookie);
+		goto free_qp;
+	}
+	if (scif_is_mgmt_node()) {
+		struct mic_bootparam *bp = sdev->dp;
+
+		bp->c2h_scif_db = scifdev->db;
+		bp->scif_host_dma_addr = scifdev->qp_dma_addr;
+	} else {
+		struct mic_bootparam __iomem *bp = sdev->rdp;
+
+		iowrite8(scifdev->db, &bp->h2c_scif_db);
+		writeq(scifdev->qp_dma_addr, &bp->scif_card_dma_addr);
+	}
+	schedule_delayed_work(&scifdev->qp_dwork,
+			      msecs_to_jiffies(1000));
+	return rc;
+free_qp:
+	scif_free_qp(scifdev);
+destroy_intr:
+	scif_destroy_intr_wq(scifdev);
+destroy_loopb:
+	if (atomic_dec_and_test(&g_loopb_cnt))
+		scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
+exit:
+	return rc;
+}
+
+void scif_stop(struct scif_dev *scifdev)
+{
+	struct scif_dev *dev;
+	int i;
+
+	for (i = scif_info.maxid; i >= 0; i--) {
+		dev = &scif_dev[i];
+		if (scifdev_self(dev))
+			continue;
+		scif_handle_remove_node(i);
+	}
+}
+
+static void scif_remove(struct scif_hw_dev *sdev)
+{
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+	if (scif_is_mgmt_node()) {
+		struct mic_bootparam *bp = sdev->dp;
+
+		bp->c2h_scif_db = -1;
+		bp->scif_host_dma_addr = 0x0;
+	} else {
+		struct mic_bootparam __iomem *bp = sdev->rdp;
+
+		iowrite8(-1, &bp->h2c_scif_db);
+		writeq(0x0, &bp->scif_card_dma_addr);
+	}
+	if (scif_is_mgmt_node()) {
+		scif_disconnect_node(scifdev->node, true);
+	} else {
+		scif_info.card_initiated_exit = true;
+		scif_stop(scifdev);
+	}
+	if (atomic_dec_and_test(&g_loopb_cnt))
+		scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
+	if (scifdev->cookie) {
+		sdev->hw_ops->free_irq(sdev, scifdev->cookie, scifdev);
+		scifdev->cookie = NULL;
+	}
+	scif_destroy_intr_wq(scifdev);
+	cancel_delayed_work(&scifdev->qp_dwork);
+	scif_free_qp(scifdev);
+	scifdev->rdb = -1;
+	scifdev->sdev = NULL;
+}
+
+static struct scif_hw_dev_id id_table[] = {
+	{ MIC_SCIF_DEV, SCIF_DEV_ANY_ID },
+	{ 0 },
+};
+
+static struct scif_driver scif_driver = {
+	.driver.name =	KBUILD_MODNAME,
+	.driver.owner =	THIS_MODULE,
+	.id_table = id_table,
+	.probe = scif_probe,
+	.remove = scif_remove,
+};
+
+static int _scif_init(void)
+{
+	int rc;
+
+	mutex_init(&scif_info.eplock);
+	spin_lock_init(&scif_info.rmalock);
+	spin_lock_init(&scif_info.nb_connect_lock);
+	spin_lock_init(&scif_info.port_lock);
+	mutex_init(&scif_info.conflock);
+	mutex_init(&scif_info.connlock);
+	mutex_init(&scif_info.fencelock);
+	INIT_LIST_HEAD(&scif_info.uaccept);
+	INIT_LIST_HEAD(&scif_info.listen);
+	INIT_LIST_HEAD(&scif_info.zombie);
+	INIT_LIST_HEAD(&scif_info.connected);
+	INIT_LIST_HEAD(&scif_info.disconnected);
+	INIT_LIST_HEAD(&scif_info.rma);
+	INIT_LIST_HEAD(&scif_info.rma_tc);
+	INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup);
+	INIT_LIST_HEAD(&scif_info.fence);
+	INIT_LIST_HEAD(&scif_info.nb_connect_list);
+	init_waitqueue_head(&scif_info.exitwq);
+	scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
+	scif_info.en_msg_log = 0;
+	scif_info.p2p_enable = 1;
+	rc = scif_setup_scifdev();
+	if (rc)
+		goto error;
+	unaligned_cache = kmem_cache_create("Unaligned_DMA",
+					    SCIF_KMEM_UNALIGNED_BUF_SIZE,
+					    0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!unaligned_cache) {
+		rc = -ENOMEM;
+		goto free_sdev;
+	}
+	INIT_WORK(&scif_info.misc_work, scif_misc_handler);
+	INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler);
+	INIT_WORK(&scif_info.conn_work, scif_conn_handler);
+	idr_init(&scif_ports);
+	return 0;
+free_sdev:
+	scif_destroy_scifdev();
+error:
+	return rc;
+}
+
+static void _scif_exit(void)
+{
+	idr_destroy(&scif_ports);
+	kmem_cache_destroy(unaligned_cache);
+	scif_destroy_scifdev();
+}
+
+static int __init scif_init(void)
+{
+	struct miscdevice *mdev = &scif_info.mdev;
+	int rc;
+
+	_scif_init();
+	iova_cache_get();
+	rc = scif_peer_bus_init();
+	if (rc)
+		goto exit;
+	rc = scif_register_driver(&scif_driver);
+	if (rc)
+		goto peer_bus_exit;
+	rc = misc_register(mdev);
+	if (rc)
+		goto unreg_scif;
+	scif_init_debugfs();
+	return 0;
+unreg_scif:
+	scif_unregister_driver(&scif_driver);
+peer_bus_exit:
+	scif_peer_bus_exit();
+exit:
+	_scif_exit();
+	return rc;
+}
+
+static void __exit scif_exit(void)
+{
+	scif_exit_debugfs();
+	misc_deregister(&scif_info.mdev);
+	scif_unregister_driver(&scif_driver);
+	scif_peer_bus_exit();
+	iova_cache_put();
+	_scif_exit();
+}
+
+module_init(scif_init);
+module_exit(scif_exit);
+
+MODULE_DEVICE_TABLE(scif, id_table);
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) SCIF driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
new file mode 100644
index 000000000..0e5eff9ad
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -0,0 +1,283 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_MAIN_H
+#define SCIF_MAIN_H
+
+#include <linux/sched/signal.h>
+#include <linux/pci.h>
+#include <linux/miscdevice.h>
+#include <linux/dmaengine.h>
+#include <linux/iova.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/vmalloc.h>
+#include <linux/scif.h>
+#include "../common/mic_dev.h"
+
+#define SCIF_MGMT_NODE 0
+#define SCIF_DEFAULT_WATCHDOG_TO 30
+#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ)
+#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ)
+#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
+
+/*
+ * Generic state used for certain node QP message exchanges
+ * like Unregister, Alloc etc.
+ */
+enum scif_msg_state {
+	OP_IDLE = 1,
+	OP_IN_PROGRESS,
+	OP_COMPLETED,
+	OP_FAILED
+};
+
+/*
+ * struct scif_info - Global SCIF information
+ *
+ * @nodeid: Node ID this node is to others
+ * @maxid: Max known node ID
+ * @total: Total number of SCIF nodes
+ * @nr_zombies: number of zombie endpoints
+ * @eplock: Lock to synchronize listening, zombie endpoint lists
+ * @connlock: Lock to synchronize connected and disconnected lists
+ * @nb_connect_lock: Synchronize non blocking connect operations
+ * @port_lock: Synchronize access to SCIF ports
+ * @uaccept: List of user acceptreq waiting for acceptreg
+ * @listen: List of listening end points
+ * @zombie: List of zombie end points with pending RMA's
+ * @connected: List of end points in connected state
+ * @disconnected: List of end points in disconnected state
+ * @nb_connect_list: List for non blocking connections
+ * @misc_work: miscellaneous SCIF tasks
+ * @conflock: Lock to synchronize SCIF node configuration changes
+ * @en_msg_log: Enable debug message logging
+ * @p2p_enable: Enable P2P SCIF network
+ * @mdev: The MISC device
+ * @conn_work: Work for workqueue handling all connections
+ * @exitwq: Wait queue for waiting for an EXIT node QP message response
+ * @loopb_dev: Dummy SCIF device used for loopback
+ * @loopb_wq: Workqueue used for handling loopback messages
+ * @loopb_wqname[16]: Name of loopback workqueue
+ * @loopb_work: Used for submitting work to loopb_wq
+ * @loopb_recv_q: List of messages received on the loopb_wq
+ * @card_initiated_exit: set when the card has initiated the exit
+ * @rmalock: Synchronize access to RMA operations
+ * @fencelock: Synchronize access to list of remote fences requested.
+ * @rma: List of temporary registered windows to be destroyed.
+ * @rma_tc: List of temporary registered & cached Windows to be destroyed
+ * @fence: List of remote fence requests
+ * @mmu_notif_work: Work for registration caching MMU notifier workqueue
+ * @mmu_notif_cleanup: List of temporary cached windows for reg cache
+ * @rma_tc_limit: RMA temporary cache limit
+ */
+struct scif_info {
+	u8 nodeid;
+	u8 maxid;
+	u8 total;
+	u32 nr_zombies;
+	struct mutex eplock;
+	struct mutex connlock;
+	spinlock_t nb_connect_lock;
+	spinlock_t port_lock;
+	struct list_head uaccept;
+	struct list_head listen;
+	struct list_head zombie;
+	struct list_head connected;
+	struct list_head disconnected;
+	struct list_head nb_connect_list;
+	struct work_struct misc_work;
+	struct mutex conflock;
+	u8 en_msg_log;
+	u8 p2p_enable;
+	struct miscdevice mdev;
+	struct work_struct conn_work;
+	wait_queue_head_t exitwq;
+	struct scif_dev *loopb_dev;
+	struct workqueue_struct *loopb_wq;
+	char loopb_wqname[16];
+	struct work_struct loopb_work;
+	struct list_head loopb_recv_q;
+	bool card_initiated_exit;
+	spinlock_t rmalock;
+	struct mutex fencelock;
+	struct list_head rma;
+	struct list_head rma_tc;
+	struct list_head fence;
+	struct work_struct mmu_notif_work;
+	struct list_head mmu_notif_cleanup;
+	unsigned long rma_tc_limit;
+};
+
+/*
+ * struct scif_p2p_info - SCIF mapping information used for P2P
+ *
+ * @ppi_peer_id - SCIF peer node id
+ * @ppi_sg - Scatter list for bar information (One for mmio and one for aper)
+ * @sg_nentries - Number of entries in the scatterlist
+ * @ppi_da: DMA address for MMIO and APER bars
+ * @ppi_len: Length of MMIO and APER bars
+ * @ppi_list: Link in list of mapping information
+ */
+struct scif_p2p_info {
+	u8 ppi_peer_id;
+	struct scatterlist *ppi_sg[2];
+	u64 sg_nentries[2];
+	dma_addr_t ppi_da[2];
+	u64 ppi_len[2];
+#define SCIF_PPI_MMIO 0
+#define SCIF_PPI_APER 1
+	struct list_head ppi_list;
+};
+
+/*
+ * struct scif_dev - SCIF remote device specific fields
+ *
+ * @node: Node id
+ * @p2p: List of P2P mapping information
+ * @qpairs: The node queue pair for exchanging control messages
+ * @intr_wq: Workqueue for handling Node QP messages
+ * @intr_wqname: Name of node QP workqueue for handling interrupts
+ * @intr_bh: Used for submitting work to intr_wq
+ * @lock: Lock used for synchronizing access to the scif device
+ * @sdev: SCIF hardware device on the SCIF hardware bus
+ * @db: doorbell the peer will trigger to generate an interrupt on self
+ * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer
+ * @cookie: Cookie received while registering the interrupt handler
+ * @peer_add_work: Work for handling device_add for peer devices
+ * @p2p_dwork: Delayed work to enable polling for P2P state
+ * @qp_dwork: Delayed work for enabling polling for remote QP information
+ * @p2p_retry: Number of times to retry polling of P2P state
+ * @base_addr: P2P aperture bar base address
+ * @mic_mw mmio: The peer MMIO information used for P2P
+ * @spdev: SCIF peer device on the SCIF peer bus
+ * @node_remove_ack_pending: True if a node_remove_ack is pending
+ * @exit_ack_pending: true if an exit_ack is pending
+ * @disconn_wq: Used while waiting for a node remove response
+ * @disconn_rescnt: Keeps track of number of node remove requests sent
+ * @exit: Status of exit message
+ * @qp_dma_addr: Queue pair DMA address passed to the peer
+ * @dma_ch_idx: Round robin index for DMA channels
+ * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's
+*/
+struct scif_dev {
+	u8 node;
+	struct list_head p2p;
+	struct scif_qp *qpairs;
+	struct workqueue_struct *intr_wq;
+	char intr_wqname[16];
+	struct work_struct intr_bh;
+	struct mutex lock;
+	struct scif_hw_dev *sdev;
+	int db;
+	int rdb;
+	struct mic_irq *cookie;
+	struct work_struct peer_add_work;
+	struct delayed_work p2p_dwork;
+	struct delayed_work qp_dwork;
+	int p2p_retry;
+	dma_addr_t base_addr;
+	struct mic_mw mmio;
+	struct scif_peer_dev __rcu *spdev;
+	bool node_remove_ack_pending;
+	bool exit_ack_pending;
+	wait_queue_head_t disconn_wq;
+	atomic_t disconn_rescnt;
+	enum scif_msg_state exit;
+	dma_addr_t qp_dma_addr;
+	int dma_ch_idx;
+	struct dma_pool *signal_pool;
+};
+
+extern bool scif_reg_cache_enable;
+extern bool scif_ulimit_check;
+extern struct scif_info scif_info;
+extern struct idr scif_ports;
+extern struct bus_type scif_peer_bus;
+extern struct scif_dev *scif_dev;
+extern const struct file_operations scif_fops;
+extern const struct file_operations scif_anon_fops;
+
+/* Size of the RB for the Node QP */
+#define SCIF_NODE_QP_SIZE 0x10000
+
+#include "scif_nodeqp.h"
+#include "scif_rma.h"
+#include "scif_rma_list.h"
+
+/*
+ * scifdev_self:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device passed is the self aka Loopback SCIF device.
+ */
+static inline int scifdev_self(struct scif_dev *dev)
+{
+	return dev->node == scif_info.nodeid;
+}
+
+static inline bool scif_is_mgmt_node(void)
+{
+	return !scif_info.nodeid;
+}
+
+/*
+ * scifdev_is_p2p:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device.
+ */
+static inline bool scifdev_is_p2p(struct scif_dev *dev)
+{
+	if (scif_is_mgmt_node())
+		return false;
+	else
+		return dev != &scif_dev[SCIF_MGMT_NODE] &&
+			!scifdev_self(dev);
+}
+
+/*
+ * scifdev_alive:
+ * @scifdev: The remote SCIF Device
+ *
+ * Returns true if the remote SCIF Device is running or sleeping for
+ * this endpoint.
+ */
+static inline int _scifdev_alive(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+
+	rcu_read_lock();
+	spdev = rcu_dereference(scifdev->spdev);
+	rcu_read_unlock();
+	return !!spdev;
+}
+
+#include "scif_epd.h"
+
+void __init scif_init_debugfs(void);
+void scif_exit_debugfs(void);
+int scif_setup_intr_wq(struct scif_dev *scifdev);
+void scif_destroy_intr_wq(struct scif_dev *scifdev);
+void scif_cleanup_scifdev(struct scif_dev *dev);
+void scif_handle_remove_node(int node);
+void scif_disconnect_node(u32 node_id, bool mgmt_initiated);
+void scif_free_qp(struct scif_dev *dev);
+void scif_misc_handler(struct work_struct *work);
+void scif_stop(struct scif_dev *scifdev);
+irqreturn_t scif_intr_handler(int irq, void *data);
+#endif /* SCIF_MAIN_H */
diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h
new file mode 100644
index 000000000..3e86360ba
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_map.h
@@ -0,0 +1,136 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_MAP_H
+#define SCIF_MAP_H
+
+#include "../bus/scif_bus.h"
+
+static __always_inline void *
+scif_alloc_coherent(dma_addr_t *dma_handle,
+		    struct scif_dev *scifdev, size_t size,
+		    gfp_t gfp)
+{
+	void *va;
+
+	if (scifdev_self(scifdev)) {
+		va = kmalloc(size, gfp);
+		if (va)
+			*dma_handle = virt_to_phys(va);
+	} else {
+		va = dma_alloc_coherent(&scifdev->sdev->dev,
+					size, dma_handle, gfp);
+		if (va && scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	return va;
+}
+
+static __always_inline void
+scif_free_coherent(void *va, dma_addr_t local,
+		   struct scif_dev *scifdev, size_t size)
+{
+	if (scifdev_self(scifdev)) {
+		kfree(va);
+	} else {
+		if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr)
+			local = local - scifdev->base_addr;
+		dma_free_coherent(&scifdev->sdev->dev,
+				  size, va, local);
+	}
+}
+
+static __always_inline int
+scif_map_single(dma_addr_t *dma_handle,
+		void *local, struct scif_dev *scifdev, size_t size)
+{
+	int err = 0;
+
+	if (scifdev_self(scifdev)) {
+		*dma_handle = virt_to_phys((local));
+	} else {
+		*dma_handle = dma_map_single(&scifdev->sdev->dev,
+					     local, size, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&scifdev->sdev->dev, *dma_handle))
+			err = -ENOMEM;
+		else if (scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	if (err)
+		*dma_handle = 0;
+	return err;
+}
+
+static __always_inline void
+scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev,
+		  size_t size)
+{
+	if (!scifdev_self(scifdev)) {
+		if (scifdev_is_p2p(scifdev))
+			local = local - scifdev->base_addr;
+		dma_unmap_single(&scifdev->sdev->dev, local,
+				 size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static __always_inline void *
+scif_ioremap(dma_addr_t phys, size_t size, struct scif_dev *scifdev)
+{
+	void *out_virt;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	if (scifdev_self(scifdev))
+		out_virt = phys_to_virt(phys);
+	else
+		out_virt = (void __force *)
+			   sdev->hw_ops->ioremap(sdev, phys, size);
+	return out_virt;
+}
+
+static __always_inline void
+scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev)
+{
+	if (!scifdev_self(scifdev)) {
+		struct scif_hw_dev *sdev = scifdev->sdev;
+
+		sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt);
+	}
+}
+
+static __always_inline int
+scif_map_page(dma_addr_t *dma_handle, struct page *page,
+	      struct scif_dev *scifdev)
+{
+	int err = 0;
+
+	if (scifdev_self(scifdev)) {
+		*dma_handle = page_to_phys(page);
+	} else {
+		struct scif_hw_dev *sdev = scifdev->sdev;
+		*dma_handle = dma_map_page(&sdev->dev,
+					   page, 0x0, PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&sdev->dev, *dma_handle))
+			err = -ENOMEM;
+		else if (scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	if (err)
+		*dma_handle = 0;
+	return err;
+}
+#endif  /* SCIF_MAP_H */
diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c
new file mode 100644
index 000000000..928211677
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_mmap.c
@@ -0,0 +1,699 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+/*
+ * struct scif_vma_info - Information about a remote memory mapping
+ *			  created via scif_mmap(..)
+ * @vma: VM area struct
+ * @list: link to list of active vmas
+ */
+struct scif_vma_info {
+	struct vm_area_struct *vma;
+	struct list_head list;
+};
+
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_window *recv_window =
+		(struct scif_window *)msg->payload[0];
+	struct scif_endpt *ep;
+
+	ep = (struct scif_endpt *)recv_window->ep;
+	req.out_window = &window;
+	req.offset = recv_window->offset;
+	req.prot = recv_window->prot;
+	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.reg_list;
+	msg->payload[0] = ep->remote_ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if (scif_query_window(&req)) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d -ENXIO\n", __func__, __LINE__);
+		msg->uop = SCIF_UNREGISTER_ACK;
+		goto error;
+	}
+
+	scif_put_window(window, window->nr_pages);
+
+	if (!window->ref_count) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		ep->rma_info.async_list_del = 1;
+		list_del_init(&window->list);
+		scif_free_window_offset(ep, window, window->offset);
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (window && !window->ref_count)
+		scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/*
+ * Remove valid remote memory mappings created via scif_mmap(..) from the
+ * process address space since the remote node is lost
+ */
+static void __scif_zap_mmaps(struct scif_endpt *ep)
+{
+	struct list_head *item;
+	struct scif_vma_info *info;
+	struct vm_area_struct *vma;
+	unsigned long size;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.vma_list) {
+		info = list_entry(item, struct scif_vma_info, list);
+		vma = info->vma;
+		size = vma->vm_end - vma->vm_start;
+		zap_vma_ptes(vma, vma->vm_start, size);
+		dev_dbg(scif_info.mdev.this_device,
+			"%s ep %p zap vma %p size 0x%lx\n",
+			__func__, ep, info->vma, size);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/*
+ * Traverse the list of endpoints for a particular remote node and
+ * zap valid remote memory mappings since the remote node is lost
+ */
+static void _scif_zap_mmaps(int node, struct list_head *head)
+{
+	struct scif_endpt *ep;
+	struct list_head *item;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each(item, head) {
+		ep = list_entry(item, struct scif_endpt, list);
+		if (ep->remote_dev->node == node)
+			__scif_zap_mmaps(ep);
+	}
+	mutex_unlock(&scif_info.connlock);
+}
+
+/*
+ * Wrapper for removing remote memory mappings for a particular node. This API
+ * is called by peer nodes as part of handling a lost node.
+ */
+void scif_zap_mmaps(int node)
+{
+	_scif_zap_mmaps(node, &scif_info.connected);
+	_scif_zap_mmaps(node, &scif_info.disconnected);
+}
+
+/*
+ * This API is only called while handling a lost node:
+ * a) Remote node is dead.
+ * b) Remote memory mappings have been zapped
+ * So we can traverse the remote_reg_list without any locks. Since
+ * the window has not yet been unregistered we can drop the ref count
+ * and queue it to the cleanup thread.
+ */
+static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
+{
+	struct list_head *pos, *tmp;
+	struct scif_window *window;
+
+	list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
+		window = list_entry(pos, struct scif_window, list);
+		if (window->ref_count)
+			scif_put_window(window, window->nr_pages);
+		else
+			dev_err(scif_info.mdev.this_device,
+				"%s %d unexpected\n",
+				__func__, __LINE__);
+		if (!window->ref_count) {
+			atomic_inc(&ep->rma_info.tw_refcount);
+			list_del_init(&window->list);
+			scif_queue_for_cleanup(window, &scif_info.rma);
+		}
+	}
+}
+
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node)
+{
+	struct scif_endpt *ep;
+	struct list_head *item;
+
+	mutex_lock(&scif_info.eplock);
+	list_for_each(item, &scif_info.zombie) {
+		ep = list_entry(item, struct scif_endpt, list);
+		if (ep->remote_dev && ep->remote_dev->node == node)
+			__scif_cleanup_rma_for_zombies(ep);
+	}
+	mutex_unlock(&scif_info.eplock);
+	flush_work(&scif_info.misc_work);
+}
+
+/* Insert the VMA into the per endpoint VMA list */
+static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+	struct scif_vma_info *info;
+	int err = 0;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		err = -ENOMEM;
+		goto done;
+	}
+	info->vma = vma;
+	spin_lock(&ep->lock);
+	list_add_tail(&info->list, &ep->rma_info.vma_list);
+	spin_unlock(&ep->lock);
+done:
+	return err;
+}
+
+/* Delete the VMA from the per endpoint VMA list */
+static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+	struct list_head *item;
+	struct scif_vma_info *info;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.vma_list) {
+		info = list_entry(item, struct scif_vma_info, list);
+		if (info->vma == vma) {
+			list_del(&info->list);
+			kfree(info);
+			break;
+		}
+	}
+	spin_unlock(&ep->lock);
+}
+
+static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
+{
+	struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+	phys_addr_t out_phys, apt_base = 0;
+
+	/*
+	 * If the DMA address is card relative then we need to add the
+	 * aperture base for mmap to work correctly
+	 */
+	if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
+		apt_base = sdev->aper->pa;
+	out_phys = apt_base + phys;
+	return out_phys;
+}
+
+int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
+		   struct scif_range **pages)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	int nr_pages, err, i;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
+		ep, offset, len);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	if (!len || (offset < 0) ||
+	    (offset + len < offset) ||
+	    (ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (ALIGN(len, PAGE_SIZE) != len))
+		return -EINVAL;
+
+	nr_pages = len >> PAGE_SHIFT;
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.prot = 0;
+	req.nr_bytes = len;
+	req.type = SCIF_WINDOW_SINGLE;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+
+	/* Allocate scif_range */
+	*pages = kzalloc(sizeof(**pages), GFP_KERNEL);
+	if (!*pages) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* Allocate phys addr array */
+	(*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
+	if (!((*pages)->phys_addr)) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
+		/* Allocate virtual address array */
+		((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
+		if (!(*pages)->va) {
+			err = -ENOMEM;
+			goto error;
+		}
+	}
+	/* Populate the values */
+	(*pages)->cookie = window;
+	(*pages)->nr_pages = nr_pages;
+	(*pages)->prot_flags = window->prot;
+
+	for (i = 0; i < nr_pages; i++) {
+		(*pages)->phys_addr[i] =
+			__scif_off_to_dma_addr(window, offset +
+					       (i * PAGE_SIZE));
+		(*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
+							ep);
+		if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
+			(*pages)->va[i] =
+				ep->remote_dev->sdev->aper->va +
+				(*pages)->phys_addr[i] -
+				ep->remote_dev->sdev->aper->pa;
+	}
+
+	scif_get_window(window, nr_pages);
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (err) {
+		if (*pages) {
+			scif_free((*pages)->phys_addr,
+				  nr_pages * sizeof(dma_addr_t));
+			scif_free((*pages)->va,
+				  nr_pages * sizeof(void *));
+			kfree(*pages);
+			*pages = NULL;
+		}
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_get_pages);
+
+int scif_put_pages(struct scif_range *pages)
+{
+	struct scif_endpt *ep;
+	struct scif_window *window;
+	struct scifmsg msg;
+
+	if (!pages || !pages->cookie)
+		return -EINVAL;
+
+	window = pages->cookie;
+
+	if (!window || window->magic != SCIFEP_MAGIC)
+		return -EINVAL;
+
+	ep = (struct scif_endpt *)window->ep;
+	/*
+	 * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
+	 * callee should be allowed to release references to the pages,
+	 * else the endpoint was not connected in the first place,
+	 * hence the ENOTCONN.
+	 */
+	if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
+		return -ENOTCONN;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+
+	scif_put_window(window, pages->nr_pages);
+
+	/* Initiate window destruction if ref count is zero */
+	if (!window->ref_count) {
+		list_del(&window->list);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		scif_drain_dma_intr(ep->remote_dev->sdev,
+				    ep->rma_info.dma_chan);
+		/* Inform the peer about this window being destroyed. */
+		msg.uop = SCIF_MUNMAP;
+		msg.src = ep->port;
+		msg.payload[0] = window->peer_window;
+		/* No error handling for notification messages */
+		scif_nodeqp_send(ep->remote_dev, &msg);
+		/* Destroy this window from the peer's registered AS */
+		scif_destroy_remote_window(window);
+	} else {
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+
+	scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
+	scif_free(pages->va, pages->nr_pages * sizeof(void *));
+	kfree(pages);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scif_put_pages);
+
+/*
+ * scif_rma_list_mmap:
+ *
+ * Traverse the remote registration list starting from start_window:
+ * 1) Create VtoP mappings via remap_pfn_range(..)
+ * 2) Once step 1) and 2) complete successfully then traverse the range of
+ *    windows again and bump the reference count.
+ * RMA lock must be held.
+ */
+static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
+			      int nr_pages, struct vm_area_struct *vma)
+{
+	s64 end_offset, loop_offset = offset;
+	struct scif_window *window = start_window;
+	int loop_nr_pages, nr_pages_left = nr_pages;
+	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+	struct list_head *head = &ep->rma_info.remote_reg_list;
+	int i, err = 0;
+	dma_addr_t phys_addr;
+	struct scif_window_iter src_win_iter;
+	size_t contig_bytes = 0;
+
+	might_sleep();
+	list_for_each_entry_from(window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_init_window_iter(window, &src_win_iter);
+		for (i = 0; i < loop_nr_pages; i++) {
+			phys_addr = scif_off_to_dma_addr(window, loop_offset,
+							 &contig_bytes,
+							 &src_win_iter);
+			phys_addr = scif_get_phys(phys_addr, ep);
+			err = remap_pfn_range(vma,
+					      vma->vm_start +
+					      loop_offset - offset,
+					      phys_addr >> PAGE_SHIFT,
+					      PAGE_SIZE,
+					      vma->vm_page_prot);
+			if (err)
+				goto error;
+			loop_offset += PAGE_SIZE;
+		}
+		nr_pages_left -= loop_nr_pages;
+		if (!nr_pages_left)
+			break;
+	}
+	/*
+	 * No more failures expected. Bump up the ref count for all
+	 * the windows. Another traversal from start_window required
+	 * for handling errors encountered across windows during
+	 * remap_pfn_range(..).
+	 */
+	loop_offset = offset;
+	nr_pages_left = nr_pages;
+	window = start_window;
+	head = &ep->rma_info.remote_reg_list;
+	list_for_each_entry_from(window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_get_window(window, loop_nr_pages);
+		nr_pages_left -= loop_nr_pages;
+		loop_offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages_left)
+			break;
+	}
+error:
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+
+/*
+ * scif_rma_list_munmap:
+ *
+ * Traverse the remote registration list starting from window:
+ * 1) Decrement ref count.
+ * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
+ * RMA lock must be held.
+ */
+static void scif_rma_list_munmap(struct scif_window *start_window,
+				 s64 offset, int nr_pages)
+{
+	struct scifmsg msg;
+	s64 loop_offset = offset, end_offset;
+	int loop_nr_pages, nr_pages_left = nr_pages;
+	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+	struct list_head *head = &ep->rma_info.remote_reg_list;
+	struct scif_window *window = start_window, *_window;
+
+	msg.uop = SCIF_MUNMAP;
+	msg.src = ep->port;
+	loop_offset = offset;
+	nr_pages_left = nr_pages;
+	list_for_each_entry_safe_from(window, _window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_put_window(window, loop_nr_pages);
+		if (!window->ref_count) {
+			struct scif_dev *rdev = ep->remote_dev;
+
+			scif_drain_dma_intr(rdev->sdev,
+					    ep->rma_info.dma_chan);
+			/* Inform the peer about this munmap */
+			msg.payload[0] = window->peer_window;
+			/* No error handling for Notification messages. */
+			scif_nodeqp_send(ep->remote_dev, &msg);
+			list_del(&window->list);
+			/* Destroy this window from the peer's registered AS */
+			scif_destroy_remote_window(window);
+		}
+		nr_pages_left -= loop_nr_pages;
+		loop_offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages_left)
+			break;
+	}
+}
+
+/*
+ * The private data field of each VMA used to mmap a remote window
+ * points to an instance of struct vma_pvt
+ */
+struct vma_pvt {
+	struct scif_endpt *ep;	/* End point for remote window */
+	s64 offset;		/* offset within remote window */
+	bool valid_offset;	/* offset is valid only if the original
+				 * mmap request was for a single page
+				 * else the offset within the vma is
+				 * the correct offset
+				 */
+	struct kref ref;
+};
+
+static void vma_pvt_release(struct kref *ref)
+{
+	struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
+
+	kfree(vmapvt);
+}
+
+/**
+ * scif_vma_open - VMA open driver callback
+ * @vma: VMM memory area.
+ * The open method is called by the kernel to allow the subsystem implementing
+ * the VMA to initialize the area. This method is invoked any time a new
+ * reference to the VMA is made (when a process forks, for example).
+ * The one exception happens when the VMA is first created by mmap;
+ * in this case, the driver's mmap method is called instead.
+ * This function is also invoked when an existing VMA is split by the kernel
+ * due to a call to munmap on a subset of the VMA resulting in two VMAs.
+ * The kernel invokes this function only on one of the two VMAs.
+ */
+static void scif_vma_open(struct vm_area_struct *vma)
+{
+	struct vma_pvt *vmapvt = vma->vm_private_data;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
+		vma->vm_start, vma->vm_end);
+	scif_insert_vma(vmapvt->ep, vma);
+	kref_get(&vmapvt->ref);
+}
+
+/**
+ * scif_munmap - VMA close driver callback.
+ * @vma: VMM memory area.
+ * When an area is destroyed, the kernel calls its close operation.
+ * Note that there's no usage count associated with VMA's; the area
+ * is opened and closed exactly once by each process that uses it.
+ */
+static void scif_munmap(struct vm_area_struct *vma)
+{
+	struct scif_endpt *ep;
+	struct vma_pvt *vmapvt = vma->vm_private_data;
+	int nr_pages = vma_pages(vma);
+	s64 offset;
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	int err;
+
+	might_sleep();
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
+		vma->vm_start, vma->vm_end);
+	ep = vmapvt->ep;
+	offset = vmapvt->valid_offset ? vmapvt->offset :
+		(vma->vm_pgoff) << PAGE_SHIFT;
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
+		ep, nr_pages, offset);
+	req.out_window = &window;
+	req.offset = offset;
+	req.nr_bytes = vma->vm_end - vma->vm_start;
+	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+	req.type = SCIF_WINDOW_PARTIAL;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+
+	err = scif_query_window(&req);
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	else
+		scif_rma_list_munmap(window, offset, nr_pages);
+
+	mutex_unlock(&ep->rma_info.rma_lock);
+	/*
+	 * The kernel probably zeroes these out but we still want
+	 * to clean up our own mess just in case.
+	 */
+	vma->vm_ops = NULL;
+	vma->vm_private_data = NULL;
+	kref_put(&vmapvt->ref, vma_pvt_release);
+	scif_delete_vma(ep, vma);
+}
+
+static const struct vm_operations_struct scif_vm_ops = {
+	.open = scif_vma_open,
+	.close = scif_munmap,
+};
+
+/**
+ * scif_mmap - Map pages in virtual address space to a remote window.
+ * @vma: VMM memory area.
+ * @epd: endpoint descriptor
+ *
+ * Return: Upon successful completion, scif_mmap() returns zero
+ * else an apt error is returned as documented in scif.h
+ */
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
+	int nr_pages = vma_pages(vma);
+	int err;
+	struct vma_pvt *vmapvt;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
+		ep, start_offset, nr_pages);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	might_sleep();
+
+	err = scif_insert_vma(ep, vma);
+	if (err)
+		return err;
+
+	vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
+	if (!vmapvt) {
+		scif_delete_vma(ep, vma);
+		return -ENOMEM;
+	}
+
+	vmapvt->ep = ep;
+	kref_init(&vmapvt->ref);
+
+	req.out_window = &window;
+	req.offset = start_offset;
+	req.nr_bytes = vma->vm_end - vma->vm_start;
+	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+	req.type = SCIF_WINDOW_PARTIAL;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unlock;
+	}
+
+	/* Default prot for loopback */
+	if (!scifdev_self(ep->remote_dev))
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+	/*
+	 * VM_DONTCOPY - Do not copy this vma on fork
+	 * VM_DONTEXPAND - Cannot expand with mremap()
+	 * VM_RESERVED - Count as reserved_vm like IO
+	 * VM_PFNMAP - Page-ranges managed without "struct page"
+	 * VM_IO - Memory mapped I/O or similar
+	 *
+	 * We do not want to copy this VMA automatically on a fork(),
+	 * expand this VMA due to mremap() or swap out these pages since
+	 * the VMA is actually backed by physical pages in the remote
+	 * node's physical memory and not via a struct page.
+	 */
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
+
+	if (!scifdev_self(ep->remote_dev))
+		vma->vm_flags |= VM_IO | VM_PFNMAP;
+
+	/* Map this range of windows */
+	err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unlock;
+	}
+	/* Set up the driver call back */
+	vma->vm_ops = &scif_vm_ops;
+	vma->vm_private_data = vmapvt;
+error_unlock:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (err) {
+		kfree(vmapvt);
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		scif_delete_vma(ep, vma);
+	}
+	return err;
+}
diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c
new file mode 100644
index 000000000..79f26a02a
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nm.c
@@ -0,0 +1,237 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_peer_bus.h"
+
+#include "scif_main.h"
+#include "scif_map.h"
+
+/**
+ * scif_invalidate_ep() - Set state for all connected endpoints
+ * to disconnected and wake up all send/recv waitqueues
+ */
+static void scif_invalidate_ep(int node)
+{
+	struct scif_endpt *ep;
+	struct list_head *pos, *tmpq;
+
+	flush_work(&scif_info.conn_work);
+	mutex_lock(&scif_info.connlock);
+	list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (ep->remote_dev->node == node) {
+			scif_unmap_all_windows(ep);
+			spin_lock(&ep->lock);
+			scif_cleanup_ep_qp(ep);
+			spin_unlock(&ep->lock);
+		}
+	}
+	list_for_each_safe(pos, tmpq, &scif_info.connected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (ep->remote_dev->node == node) {
+			list_del(pos);
+			spin_lock(&ep->lock);
+			ep->state = SCIFEP_DISCONNECTED;
+			list_add_tail(&ep->list, &scif_info.disconnected);
+			scif_cleanup_ep_qp(ep);
+			wake_up_interruptible(&ep->sendwq);
+			wake_up_interruptible(&ep->recvwq);
+			spin_unlock(&ep->lock);
+			scif_unmap_all_windows(ep);
+		}
+	}
+	mutex_unlock(&scif_info.connlock);
+}
+
+void scif_free_qp(struct scif_dev *scifdev)
+{
+	struct scif_qp *qp = scifdev->qpairs;
+
+	if (!qp)
+		return;
+	scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size);
+	kfree(qp->inbound_q.rb_base);
+	scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp));
+	kfree(scifdev->qpairs);
+	scifdev->qpairs = NULL;
+}
+
+static void scif_cleanup_qp(struct scif_dev *dev)
+{
+	struct scif_qp *qp = &dev->qpairs[0];
+
+	if (!qp)
+		return;
+	scif_iounmap((void *)qp->remote_qp, sizeof(struct scif_qp), dev);
+	scif_iounmap((void *)qp->outbound_q.rb_base,
+		     sizeof(struct scif_qp), dev);
+	qp->remote_qp = NULL;
+	qp->local_write = 0;
+	qp->inbound_q.current_write_offset = 0;
+	qp->inbound_q.current_read_offset = 0;
+	if (scifdev_is_p2p(dev))
+		scif_free_qp(dev);
+}
+
+void scif_send_acks(struct scif_dev *dev)
+{
+	struct scifmsg msg;
+
+	if (dev->node_remove_ack_pending) {
+		msg.uop = SCIF_NODE_REMOVE_ACK;
+		msg.src.node = scif_info.nodeid;
+		msg.dst.node = SCIF_MGMT_NODE;
+		msg.payload[0] = dev->node;
+		scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg);
+		dev->node_remove_ack_pending = false;
+	}
+	if (dev->exit_ack_pending) {
+		msg.uop = SCIF_EXIT_ACK;
+		msg.src.node = scif_info.nodeid;
+		msg.dst.node = dev->node;
+		scif_nodeqp_send(dev, &msg);
+		dev->exit_ack_pending = false;
+	}
+}
+
+/*
+ * scif_cleanup_scifdev
+ *
+ * @dev: Remote SCIF device.
+ * Uninitialize SCIF data structures for remote SCIF device.
+ */
+void scif_cleanup_scifdev(struct scif_dev *dev)
+{
+	struct scif_hw_dev *sdev = dev->sdev;
+
+	if (!dev->sdev)
+		return;
+	if (scifdev_is_p2p(dev)) {
+		if (dev->cookie) {
+			sdev->hw_ops->free_irq(sdev, dev->cookie, dev);
+			dev->cookie = NULL;
+		}
+		scif_destroy_intr_wq(dev);
+	}
+	flush_work(&scif_info.misc_work);
+	scif_destroy_p2p(dev);
+	scif_invalidate_ep(dev->node);
+	scif_zap_mmaps(dev->node);
+	scif_cleanup_rma_for_zombies(dev->node);
+	flush_work(&scif_info.misc_work);
+	scif_send_acks(dev);
+	if (!dev->node && scif_info.card_initiated_exit) {
+		/*
+		 * Send an SCIF_EXIT message which is the last message from MIC
+		 * to the Host and wait for a SCIF_EXIT_ACK
+		 */
+		scif_send_exit(dev);
+		scif_info.card_initiated_exit = false;
+	}
+	scif_cleanup_qp(dev);
+}
+
+/*
+ * scif_remove_node:
+ *
+ * @node: Node to remove
+ */
+void scif_handle_remove_node(int node)
+{
+	struct scif_dev *scifdev = &scif_dev[node];
+
+	if (scif_peer_unregister_device(scifdev))
+		scif_send_acks(scifdev);
+}
+
+static int scif_send_rmnode_msg(int node, int remove_node)
+{
+	struct scifmsg notif_msg;
+	struct scif_dev *dev = &scif_dev[node];
+
+	notif_msg.uop = SCIF_NODE_REMOVE;
+	notif_msg.src.node = scif_info.nodeid;
+	notif_msg.dst.node = node;
+	notif_msg.payload[0] = remove_node;
+	return scif_nodeqp_send(dev, &notif_msg);
+}
+
+/**
+ * scif_node_disconnect:
+ *
+ * @node_id[in]: source node id.
+ * @mgmt_initiated: Disconnection initiated from the mgmt node
+ *
+ * Disconnect a node from the scif network.
+ */
+void scif_disconnect_node(u32 node_id, bool mgmt_initiated)
+{
+	int ret;
+	int msg_cnt = 0;
+	u32 i = 0;
+	struct scif_dev *scifdev = &scif_dev[node_id];
+
+	if (!node_id)
+		return;
+
+	atomic_set(&scifdev->disconn_rescnt, 0);
+
+	/* Destroy p2p network */
+	for (i = 1; i <= scif_info.maxid; i++) {
+		if (i == node_id)
+			continue;
+		ret = scif_send_rmnode_msg(i, node_id);
+		if (!ret)
+			msg_cnt++;
+	}
+	/* Wait for the remote nodes to respond with SCIF_NODE_REMOVE_ACK */
+	ret = wait_event_timeout(scifdev->disconn_wq,
+				 (atomic_read(&scifdev->disconn_rescnt)
+				 == msg_cnt), SCIF_NODE_ALIVE_TIMEOUT);
+	/* Tell the card to clean up */
+	if (mgmt_initiated && _scifdev_alive(scifdev))
+		/*
+		 * Send an SCIF_EXIT message which is the last message from Host
+		 * to the MIC and wait for a SCIF_EXIT_ACK
+		 */
+		scif_send_exit(scifdev);
+	atomic_set(&scifdev->disconn_rescnt, 0);
+	/* Tell the mgmt node to clean up */
+	ret = scif_send_rmnode_msg(SCIF_MGMT_NODE, node_id);
+	if (!ret)
+		/* Wait for mgmt node to respond with SCIF_NODE_REMOVE_ACK */
+		wait_event_timeout(scifdev->disconn_wq,
+				   (atomic_read(&scifdev->disconn_rescnt) == 1),
+				   SCIF_NODE_ALIVE_TIMEOUT);
+}
+
+void scif_get_node_info(void)
+{
+	struct scifmsg msg;
+	DECLARE_COMPLETION_ONSTACK(node_info);
+
+	msg.uop = SCIF_GET_NODE_INFO;
+	msg.src.node = scif_info.nodeid;
+	msg.dst.node = SCIF_MGMT_NODE;
+	msg.payload[3] = (u64)&node_info;
+
+	if ((scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg)))
+		return;
+
+	/* Wait for a response with SCIF_GET_NODE_INFO */
+	wait_for_completion(&node_info);
+}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c
new file mode 100644
index 000000000..c66ca1a58
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nodeqp.c
@@ -0,0 +1,1354 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_nodeqp.h"
+#include "scif_map.h"
+
+/*
+ ************************************************************************
+ * SCIF node Queue Pair (QP) setup flow:
+ *
+ * 1) SCIF driver gets probed with a scif_hw_dev via the scif_hw_bus
+ * 2) scif_setup_qp(..) allocates the local qp and calls
+ *	scif_setup_qp_connect(..) which allocates and maps the local
+ *	buffer for the inbound QP
+ * 3) The local node updates the device page with the DMA address of the QP
+ * 4) A delayed work is scheduled (qp_dwork) which periodically reads if
+ *	the peer node has updated its QP DMA address
+ * 5) Once a valid non zero address is found in the QP DMA address field
+ *	in the device page, the local node maps the remote node's QP,
+ *	updates its outbound QP and sends a SCIF_INIT message to the peer
+ * 6) The SCIF_INIT message is received by the peer node QP interrupt bottom
+ *	half handler by calling scif_init(..)
+ * 7) scif_init(..) registers a new SCIF peer node by calling
+ *	scif_peer_register_device(..) which signifies the addition of a new
+ *	SCIF node
+ * 8) On the mgmt node, P2P network setup/teardown is initiated if all the
+ *	remote nodes are online via scif_p2p_setup(..)
+ * 9) For P2P setup, the host maps the remote nodes' aperture and memory
+ *	bars and sends a SCIF_NODE_ADD message to both nodes
+ * 10) As part of scif_nodeadd, both nodes set up their local inbound
+ *	QPs and send a SCIF_NODE_ADD_ACK to the mgmt node
+ * 11) As part of scif_node_add_ack(..) the mgmt node forwards the
+ *	SCIF_NODE_ADD_ACK to the remote nodes
+ * 12) As part of scif_node_add_ack(..) the remote nodes update their
+ *	outbound QPs, make sure they can access memory on the remote node
+ *	and then add a new SCIF peer node by calling
+ *	scif_peer_register_device(..) which signifies the addition of a new
+ *	SCIF node.
+ * 13) The SCIF network is now established across all nodes.
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by non mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) A non mgmt node now cleans up all local data structures and sends a
+ *	SCIF_EXIT message to the peer and waits for a SCIF_EXIT_ACK
+ * 4) As part of scif_exit(..) handling scif_disconnect_node(..) is called
+ * 5) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the
+ *	peers and waits for a SCIF_NODE_REMOVE_ACK
+ * 6) As part of scif_node_remove(..) a remote node unregisters the peer
+ *	node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 7) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ *	it sends itself a node remove message whose handling cleans up local
+ *	data structures and unregisters the peer node from the SCIF network
+ * 8) The mgmt node sends a SCIF_EXIT_ACK
+ * 9) Upon receipt of the SCIF_EXIT_ACK the node initiating the teardown
+ *	completes the SCIF remove routine
+ * 10) The SCIF network is now torn down for the node initiating the
+ *	teardown sequence
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) The mgmt node calls scif_disconnect_node(..)
+ * 4) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the peers
+ *	and waits for a SCIF_NODE_REMOVE_ACK
+ * 5) As part of scif_node_remove(..) a remote node unregisters the peer
+ *	node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 6) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ *	it unregisters the peer node from the SCIF network
+ * 7) The mgmt node sends a SCIF_EXIT message and waits for a SCIF_EXIT_ACK.
+ * 8) A non mgmt node upon receipt of a SCIF_EXIT message calls scif_stop(..)
+ *	which would clean up local data structures for all SCIF nodes and
+ *	then send a SCIF_EXIT_ACK back to the mgmt node
+ * 9) Upon receipt of the SCIF_EXIT_ACK the the mgmt node sends itself a node
+ *	remove message whose handling cleans up local data structures and
+ *	destroys any P2P mappings.
+ * 10) The SCIF hardware device for which a remove callback was received is now
+ *	disconnected from the SCIF network.
+ */
+/*
+ * Initializes "local" data structures for the QP. Allocates the QP
+ * ring buffer (rb) and initializes the "in bound" queue.
+ */
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+			  int local_size, struct scif_dev *scifdev)
+{
+	void *local_q = qp->inbound_q.rb_base;
+	int err = 0;
+	u32 tmp_rd = 0;
+
+	spin_lock_init(&qp->send_lock);
+	spin_lock_init(&qp->recv_lock);
+
+	/* Allocate rb only if not already allocated */
+	if (!local_q) {
+		local_q = kzalloc(local_size, GFP_KERNEL);
+		if (!local_q) {
+			err = -ENOMEM;
+			return err;
+		}
+	}
+
+	err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+	if (err)
+		goto kfree;
+	/*
+	 * To setup the inbound_q, the buffer lives locally, the read pointer
+	 * is remote and the write pointer is local.
+	 */
+	scif_rb_init(&qp->inbound_q,
+		     &tmp_rd,
+		     &qp->local_write,
+		     local_q, get_count_order(local_size));
+	/*
+	 * The read pointer is NULL initially and it is unsafe to use the ring
+	 * buffer til this changes!
+	 */
+	qp->inbound_q.read_ptr = NULL;
+	err = scif_map_single(qp_offset, qp,
+			      scifdev, sizeof(struct scif_qp));
+	if (err)
+		goto unmap;
+	qp->local_qp = *qp_offset;
+	return err;
+unmap:
+	scif_unmap_single(qp->local_buf, scifdev, local_size);
+	qp->local_buf = 0;
+kfree:
+	kfree(local_q);
+	return err;
+}
+
+/* When the other side has already done it's allocation, this is called */
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+			 dma_addr_t phys, int local_size,
+			 struct scif_dev *scifdev)
+{
+	void *local_q;
+	void *remote_q;
+	struct scif_qp *remote_qp;
+	int remote_size;
+	int err = 0;
+
+	spin_lock_init(&qp->send_lock);
+	spin_lock_init(&qp->recv_lock);
+	/* Start by figuring out where we need to point */
+	remote_qp = scif_ioremap(phys, sizeof(struct scif_qp), scifdev);
+	if (!remote_qp)
+		return -EIO;
+	qp->remote_qp = remote_qp;
+	if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+		err = -EIO;
+		goto iounmap;
+	}
+	qp->remote_buf = remote_qp->local_buf;
+	remote_size = qp->remote_qp->inbound_q.size;
+	remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev);
+	if (!remote_q) {
+		err = -EIO;
+		goto iounmap;
+	}
+	qp->remote_qp->local_write = 0;
+	/*
+	 * To setup the outbound_q, the buffer lives in remote memory,
+	 * the read pointer is local, the write pointer is remote
+	 */
+	scif_rb_init(&qp->outbound_q,
+		     &qp->local_read,
+		     &qp->remote_qp->local_write,
+		     remote_q,
+		     get_count_order(remote_size));
+	local_q = kzalloc(local_size, GFP_KERNEL);
+	if (!local_q) {
+		err = -ENOMEM;
+		goto iounmap_1;
+	}
+	err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+	if (err)
+		goto kfree;
+	qp->remote_qp->local_read = 0;
+	/*
+	 * To setup the inbound_q, the buffer lives locally, the read pointer
+	 * is remote and the write pointer is local
+	 */
+	scif_rb_init(&qp->inbound_q,
+		     &qp->remote_qp->local_read,
+		     &qp->local_write,
+		     local_q, get_count_order(local_size));
+	err = scif_map_single(qp_offset, qp, scifdev,
+			      sizeof(struct scif_qp));
+	if (err)
+		goto unmap;
+	qp->local_qp = *qp_offset;
+	return err;
+unmap:
+	scif_unmap_single(qp->local_buf, scifdev, local_size);
+	qp->local_buf = 0;
+kfree:
+	kfree(local_q);
+iounmap_1:
+	scif_iounmap(remote_q, remote_size, scifdev);
+	qp->outbound_q.rb_base = NULL;
+iounmap:
+	scif_iounmap(qp->remote_qp, sizeof(struct scif_qp), scifdev);
+	qp->remote_qp = NULL;
+	return err;
+}
+
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+				   struct scif_qp *qp, u64 payload)
+{
+	int err = 0;
+	void *r_buf;
+	int remote_size;
+	phys_addr_t tmp_phys;
+
+	qp->remote_qp = scif_ioremap(payload, sizeof(struct scif_qp), scifdev);
+
+	if (!qp->remote_qp) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+		dev_err(&scifdev->sdev->dev,
+			"SCIFEP_MAGIC mismatch between self %d remote %d\n",
+			scif_dev[scif_info.nodeid].node, scifdev->node);
+		err = -ENODEV;
+		goto error;
+	}
+
+	tmp_phys = qp->remote_qp->local_buf;
+	remote_size = qp->remote_qp->inbound_q.size;
+	r_buf = scif_ioremap(tmp_phys, remote_size, scifdev);
+
+	if (!r_buf)
+		return -EIO;
+
+	qp->local_read = 0;
+	scif_rb_init(&qp->outbound_q,
+		     &qp->local_read,
+		     &qp->remote_qp->local_write,
+		     r_buf,
+		     get_count_order(remote_size));
+	/*
+	 * Because the node QP may already be processing an INIT message, set
+	 * the read pointer so the cached read offset isn't lost
+	 */
+	qp->remote_qp->local_read = qp->inbound_q.current_read_offset;
+	/*
+	 * resetup the inbound_q now that we know where the
+	 * inbound_read really is.
+	 */
+	scif_rb_init(&qp->inbound_q,
+		     &qp->remote_qp->local_read,
+		     &qp->local_write,
+		     qp->inbound_q.rb_base,
+		     get_count_order(qp->inbound_q.size));
+error:
+	return err;
+}
+
+static __always_inline void
+scif_send_msg_intr(struct scif_dev *scifdev)
+{
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	if (scifdev_is_p2p(scifdev))
+		sdev->hw_ops->send_p2p_intr(sdev, scifdev->rdb, &scifdev->mmio);
+	else
+		sdev->hw_ops->send_intr(sdev, scifdev->rdb);
+}
+
+int scif_qp_response(phys_addr_t phys, struct scif_dev *scifdev)
+{
+	int err = 0;
+	struct scifmsg msg;
+
+	err = scif_setup_qp_connect_response(scifdev, scifdev->qpairs, phys);
+	if (!err) {
+		/*
+		 * Now that everything is setup and mapped, we're ready
+		 * to tell the peer about our queue's location
+		 */
+		msg.uop = SCIF_INIT;
+		msg.dst.node = scifdev->node;
+		err = scif_nodeqp_send(scifdev, &msg);
+	}
+	return err;
+}
+
+void scif_send_exit(struct scif_dev *scifdev)
+{
+	struct scifmsg msg;
+	int ret;
+
+	scifdev->exit = OP_IN_PROGRESS;
+	msg.uop = SCIF_EXIT;
+	msg.src.node = scif_info.nodeid;
+	msg.dst.node = scifdev->node;
+	ret = scif_nodeqp_send(scifdev, &msg);
+	if (ret)
+		goto done;
+	/* Wait for a SCIF_EXIT_ACK message */
+	wait_event_timeout(scif_info.exitwq, scifdev->exit == OP_COMPLETED,
+			   SCIF_NODE_ALIVE_TIMEOUT);
+done:
+	scifdev->exit = OP_IDLE;
+}
+
+int scif_setup_qp(struct scif_dev *scifdev)
+{
+	int err = 0;
+	int local_size;
+	struct scif_qp *qp;
+
+	local_size = SCIF_NODE_QP_SIZE;
+
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp) {
+		err = -ENOMEM;
+		return err;
+	}
+	qp->magic = SCIFEP_MAGIC;
+	scifdev->qpairs = qp;
+	err = scif_setup_qp_connect(qp, &scifdev->qp_dma_addr,
+				    local_size, scifdev);
+	if (err)
+		goto free_qp;
+	/*
+	 * We're as setup as we can be. The inbound_q is setup, w/o a usable
+	 * outbound q.  When we get a message, the read_ptr will be updated,
+	 * and we will pull the message.
+	 */
+	return err;
+free_qp:
+	kfree(scifdev->qpairs);
+	scifdev->qpairs = NULL;
+	return err;
+}
+
+static void scif_p2p_freesg(struct scatterlist *sg)
+{
+	kfree(sg);
+}
+
+static struct scatterlist *
+scif_p2p_setsg(phys_addr_t pa, int page_size, int page_cnt)
+{
+	struct scatterlist *sg;
+	struct page *page;
+	int i;
+
+	sg = kcalloc(page_cnt, sizeof(struct scatterlist), GFP_KERNEL);
+	if (!sg)
+		return NULL;
+	sg_init_table(sg, page_cnt);
+	for (i = 0; i < page_cnt; i++) {
+		page = pfn_to_page(pa >> PAGE_SHIFT);
+		sg_set_page(&sg[i], page, page_size, 0);
+		pa += page_size;
+	}
+	return sg;
+}
+
+/* Init p2p mappings required to access peerdev from scifdev */
+static struct scif_p2p_info *
+scif_init_p2p_info(struct scif_dev *scifdev, struct scif_dev *peerdev)
+{
+	struct scif_p2p_info *p2p;
+	int num_mmio_pages, num_aper_pages, sg_page_shift, err, num_aper_chunks;
+	struct scif_hw_dev *psdev = peerdev->sdev;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	num_mmio_pages = psdev->mmio->len >> PAGE_SHIFT;
+	num_aper_pages = psdev->aper->len >> PAGE_SHIFT;
+
+	p2p = kzalloc(sizeof(*p2p), GFP_KERNEL);
+	if (!p2p)
+		return NULL;
+	p2p->ppi_sg[SCIF_PPI_MMIO] = scif_p2p_setsg(psdev->mmio->pa,
+						    PAGE_SIZE, num_mmio_pages);
+	if (!p2p->ppi_sg[SCIF_PPI_MMIO])
+		goto free_p2p;
+	p2p->sg_nentries[SCIF_PPI_MMIO] = num_mmio_pages;
+	sg_page_shift = get_order(min(psdev->aper->len, (u64)(1 << 30)));
+	num_aper_chunks = num_aper_pages >> (sg_page_shift - PAGE_SHIFT);
+	p2p->ppi_sg[SCIF_PPI_APER] = scif_p2p_setsg(psdev->aper->pa,
+						    1 << sg_page_shift,
+						    num_aper_chunks);
+	p2p->sg_nentries[SCIF_PPI_APER] = num_aper_chunks;
+	err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+			 num_mmio_pages, PCI_DMA_BIDIRECTIONAL);
+	if (err != num_mmio_pages)
+		goto scif_p2p_free;
+	err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+			 num_aper_chunks, PCI_DMA_BIDIRECTIONAL);
+	if (err != num_aper_chunks)
+		goto dma_unmap;
+	p2p->ppi_da[SCIF_PPI_MMIO] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_MMIO]);
+	p2p->ppi_da[SCIF_PPI_APER] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_APER]);
+	p2p->ppi_len[SCIF_PPI_MMIO] = num_mmio_pages;
+	p2p->ppi_len[SCIF_PPI_APER] = num_aper_pages;
+	p2p->ppi_peer_id = peerdev->node;
+	return p2p;
+dma_unmap:
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+		     p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+scif_p2p_free:
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+free_p2p:
+	kfree(p2p);
+	return NULL;
+}
+
+/* Uninitialize and release resources from a p2p mapping */
+static void scif_deinit_p2p_info(struct scif_dev *scifdev,
+				 struct scif_p2p_info *p2p)
+{
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+		     p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+		     p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+	kfree(p2p);
+}
+
+/**
+ * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
+ * @dst: Destination node
+ *
+ * Connect the src and dst node by setting up the p2p connection
+ * between them. Management node here acts like a proxy.
+ */
+static void scif_node_connect(struct scif_dev *scifdev, int dst)
+{
+	struct scif_dev *dev_j = scifdev;
+	struct scif_dev *dev_i = NULL;
+	struct scif_p2p_info *p2p_ij = NULL;    /* bus addr for j from i */
+	struct scif_p2p_info *p2p_ji = NULL;    /* bus addr for i from j */
+	struct scif_p2p_info *p2p;
+	struct list_head *pos, *tmp;
+	struct scifmsg msg;
+	int err;
+	u64 tmppayload;
+
+	if (dst < 1 || dst > scif_info.maxid)
+		return;
+
+	dev_i = &scif_dev[dst];
+
+	if (!_scifdev_alive(dev_i))
+		return;
+	/*
+	 * If the p2p connection is already setup or in the process of setting
+	 * up then just ignore this request. The requested node will get
+	 * informed by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK
+	 */
+	if (!list_empty(&dev_i->p2p)) {
+		list_for_each_safe(pos, tmp, &dev_i->p2p) {
+			p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+			if (p2p->ppi_peer_id == dev_j->node)
+				return;
+		}
+	}
+	p2p_ij = scif_init_p2p_info(dev_i, dev_j);
+	if (!p2p_ij)
+		return;
+	p2p_ji = scif_init_p2p_info(dev_j, dev_i);
+	if (!p2p_ji) {
+		scif_deinit_p2p_info(dev_i, p2p_ij);
+		return;
+	}
+	list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p);
+	list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p);
+
+	/*
+	 * Send a SCIF_NODE_ADD to dev_i, pass it its bus address
+	 * as seen from dev_j
+	 */
+	msg.uop = SCIF_NODE_ADD;
+	msg.src.node = dev_j->node;
+	msg.dst.node = dev_i->node;
+
+	msg.payload[0] = p2p_ji->ppi_da[SCIF_PPI_APER];
+	msg.payload[1] = p2p_ij->ppi_da[SCIF_PPI_MMIO];
+	msg.payload[2] = p2p_ij->ppi_da[SCIF_PPI_APER];
+	msg.payload[3] = p2p_ij->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+	err = scif_nodeqp_send(dev_i,  &msg);
+	if (err) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d error %d\n", __func__, __LINE__, err);
+		return;
+	}
+
+	/* Same as above but to dev_j */
+	msg.uop = SCIF_NODE_ADD;
+	msg.src.node = dev_i->node;
+	msg.dst.node = dev_j->node;
+
+	tmppayload = msg.payload[0];
+	msg.payload[0] = msg.payload[2];
+	msg.payload[2] = tmppayload;
+	msg.payload[1] = p2p_ji->ppi_da[SCIF_PPI_MMIO];
+	msg.payload[3] = p2p_ji->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+	scif_nodeqp_send(dev_j, &msg);
+}
+
+static void scif_p2p_setup(void)
+{
+	int i, j;
+
+	if (!scif_info.p2p_enable)
+		return;
+
+	for (i = 1; i <= scif_info.maxid; i++)
+		if (!_scifdev_alive(&scif_dev[i]))
+			return;
+
+	for (i = 1; i <= scif_info.maxid; i++) {
+		for (j = 1; j <= scif_info.maxid; j++) {
+			struct scif_dev *scifdev = &scif_dev[i];
+
+			if (i == j)
+				continue;
+			scif_node_connect(scifdev, j);
+		}
+	}
+}
+
+static char *message_types[] = {"BAD",
+				"INIT",
+				"EXIT",
+				"SCIF_EXIT_ACK",
+				"SCIF_NODE_ADD",
+				"SCIF_NODE_ADD_ACK",
+				"SCIF_NODE_ADD_NACK",
+				"REMOVE_NODE",
+				"REMOVE_NODE_ACK",
+				"CNCT_REQ",
+				"CNCT_GNT",
+				"CNCT_GNTACK",
+				"CNCT_GNTNACK",
+				"CNCT_REJ",
+				"DISCNCT",
+				"DISCNT_ACK",
+				"CLIENT_SENT",
+				"CLIENT_RCVD",
+				"SCIF_GET_NODE_INFO",
+				"REGISTER",
+				"REGISTER_ACK",
+				"REGISTER_NACK",
+				"UNREGISTER",
+				"UNREGISTER_ACK",
+				"UNREGISTER_NACK",
+				"ALLOC_REQ",
+				"ALLOC_GNT",
+				"ALLOC_REJ",
+				"FREE_PHYS",
+				"FREE_VIRT",
+				"MUNMAP",
+				"MARK",
+				"MARK_ACK",
+				"MARK_NACK",
+				"WAIT",
+				"WAIT_ACK",
+				"WAIT_NACK",
+				"SIGNAL_LOCAL",
+				"SIGNAL_REMOTE",
+				"SIG_ACK",
+				"SIG_NACK"};
+
+static void
+scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
+		     const char *label)
+{
+	if (!scif_info.en_msg_log)
+		return;
+	if (msg->uop > SCIF_MAX_MSG) {
+		dev_err(&scifdev->sdev->dev,
+			"%s: unknown msg type %d\n", label, msg->uop);
+		return;
+	}
+	dev_info(&scifdev->sdev->dev,
+		 "%s: msg type %s, src %d:%d, dest %d:%d payload 0x%llx:0x%llx:0x%llx:0x%llx\n",
+		 label, message_types[msg->uop], msg->src.node, msg->src.port,
+		 msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1],
+		 msg->payload[2], msg->payload[3]);
+}
+
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_qp *qp = scifdev->qpairs;
+	int err = -ENOMEM, loop_cnt = 0;
+
+	scif_display_message(scifdev, msg, "Sent");
+	if (!qp) {
+		err = -EINVAL;
+		goto error;
+	}
+	spin_lock(&qp->send_lock);
+
+	while ((err = scif_rb_write(&qp->outbound_q,
+				    msg, sizeof(struct scifmsg)))) {
+		mdelay(1);
+#define SCIF_NODEQP_SEND_TO_MSEC (3 * 1000)
+		if (loop_cnt++ > (SCIF_NODEQP_SEND_TO_MSEC)) {
+			err = -ENODEV;
+			break;
+		}
+	}
+	if (!err)
+		scif_rb_commit(&qp->outbound_q);
+	spin_unlock(&qp->send_lock);
+	if (!err) {
+		if (scifdev_self(scifdev))
+			/*
+			 * For loopback we need to emulate an interrupt by
+			 * queuing work for the queue handling real node
+			 * Qp interrupts.
+			 */
+			queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+		else
+			scif_send_msg_intr(scifdev);
+	}
+error:
+	if (err)
+		dev_dbg(&scifdev->sdev->dev,
+			"%s %d error %d uop %d\n",
+			 __func__, __LINE__, err, msg->uop);
+	return err;
+}
+
+/**
+ * scif_nodeqp_send - Send a message on the node queue pair
+ * @scifdev: Scif Device.
+ * @msg: The message to be sent.
+ */
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	int err;
+	struct device *spdev = NULL;
+
+	if (msg->uop > SCIF_EXIT_ACK) {
+		/* Dont send messages once the exit flow has begun */
+		if (OP_IDLE != scifdev->exit)
+			return -ENODEV;
+		spdev = scif_get_peer_dev(scifdev);
+		if (IS_ERR(spdev)) {
+			err = PTR_ERR(spdev);
+			return err;
+		}
+	}
+	err = _scif_nodeqp_send(scifdev, msg);
+	if (msg->uop > SCIF_EXIT_ACK)
+		scif_put_peer_dev(spdev);
+	return err;
+}
+
+/*
+ * scif_misc_handler:
+ *
+ * Work queue handler for servicing miscellaneous SCIF tasks.
+ * Examples include:
+ * 1) Remote fence requests.
+ * 2) Destruction of temporary registered windows
+ *    created during scif_vreadfrom()/scif_vwriteto().
+ * 3) Cleanup of zombie endpoints.
+ */
+void scif_misc_handler(struct work_struct *work)
+{
+	scif_rma_handle_remote_fences();
+	scif_rma_destroy_windows();
+	scif_rma_destroy_tcw_invalid();
+	scif_cleanup_zombie_epd();
+}
+
+/**
+ * scif_init() - Respond to SCIF_INIT interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ */
+static __always_inline void
+scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	/*
+	 * Allow the thread waiting for device page updates for the peer QP DMA
+	 * address to complete initializing the inbound_q.
+	 */
+	flush_delayed_work(&scifdev->qp_dwork);
+
+	scif_peer_register_device(scifdev);
+
+	if (scif_is_mgmt_node()) {
+		mutex_lock(&scif_info.conflock);
+		scif_p2p_setup();
+		mutex_unlock(&scif_info.conflock);
+	}
+}
+
+/**
+ * scif_exit() - Respond to SCIF_EXIT interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * This function stops the SCIF interface for the node which sent
+ * the SCIF_EXIT message and starts waiting for that node to
+ * resetup the queue pair again.
+ */
+static __always_inline void
+scif_exit(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+	scifdev->exit_ack_pending = true;
+	if (scif_is_mgmt_node())
+		scif_disconnect_node(scifdev->node, false);
+	else
+		scif_stop(scifdev);
+	schedule_delayed_work(&scifdev->qp_dwork,
+			      msecs_to_jiffies(1000));
+}
+
+/**
+ * scif_exitack() - Respond to SCIF_EXIT_ACK interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ */
+static __always_inline void
+scif_exit_ack(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+	scifdev->exit = OP_COMPLETED;
+	wake_up(&scif_info.exitwq);
+}
+
+/**
+ * scif_node_add() - Respond to SCIF_NODE_ADD interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * When the mgmt node driver has finished initializing a MIC node queue pair it
+ * marks the node as online. It then looks for all currently online MIC cards
+ * and send a SCIF_NODE_ADD message to identify the ID of the new card for
+ * peer to peer initialization
+ *
+ * The local node allocates its incoming queue and sends its address in the
+ * SCIF_NODE_ADD_ACK message back to the mgmt node, the mgmt node "reflects"
+ * this message to the new node
+ */
+static __always_inline void
+scif_node_add(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_dev *newdev;
+	dma_addr_t qp_offset;
+	int qp_connect;
+	struct scif_hw_dev *sdev;
+
+	dev_dbg(&scifdev->sdev->dev,
+		"Scifdev %d:%d received NODE_ADD msg for node %d\n",
+		scifdev->node, msg->dst.node, msg->src.node);
+	dev_dbg(&scifdev->sdev->dev,
+		"Remote address for this node's aperture %llx\n",
+		msg->payload[0]);
+	newdev = &scif_dev[msg->src.node];
+	newdev->node = msg->src.node;
+	newdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+	sdev = newdev->sdev;
+
+	if (scif_setup_intr_wq(newdev)) {
+		dev_err(&scifdev->sdev->dev,
+			"failed to setup interrupts for %d\n", msg->src.node);
+		goto interrupt_setup_error;
+	}
+	newdev->mmio.va = ioremap_nocache(msg->payload[1], sdev->mmio->len);
+	if (!newdev->mmio.va) {
+		dev_err(&scifdev->sdev->dev,
+			"failed to map mmio for %d\n", msg->src.node);
+		goto mmio_map_error;
+	}
+	newdev->qpairs = kzalloc(sizeof(*newdev->qpairs), GFP_KERNEL);
+	if (!newdev->qpairs)
+		goto qp_alloc_error;
+	/*
+	 * Set the base address of the remote node's memory since it gets
+	 * added to qp_offset
+	 */
+	newdev->base_addr = msg->payload[0];
+
+	qp_connect = scif_setup_qp_connect(newdev->qpairs, &qp_offset,
+					   SCIF_NODE_QP_SIZE, newdev);
+	if (qp_connect) {
+		dev_err(&scifdev->sdev->dev,
+			"failed to setup qp_connect %d\n", qp_connect);
+		goto qp_connect_error;
+	}
+
+	newdev->db = sdev->hw_ops->next_db(sdev);
+	newdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+						   "SCIF_INTR", newdev,
+						   newdev->db);
+	if (IS_ERR(newdev->cookie))
+		goto qp_connect_error;
+	newdev->qpairs->magic = SCIFEP_MAGIC;
+	newdev->qpairs->qp_state = SCIF_QP_OFFLINE;
+
+	msg->uop = SCIF_NODE_ADD_ACK;
+	msg->dst.node = msg->src.node;
+	msg->src.node = scif_info.nodeid;
+	msg->payload[0] = qp_offset;
+	msg->payload[2] = newdev->db;
+	scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+	return;
+qp_connect_error:
+	kfree(newdev->qpairs);
+	newdev->qpairs = NULL;
+qp_alloc_error:
+	iounmap(newdev->mmio.va);
+	newdev->mmio.va = NULL;
+mmio_map_error:
+interrupt_setup_error:
+	dev_err(&scifdev->sdev->dev,
+		"node add failed for node %d\n", msg->src.node);
+	msg->uop = SCIF_NODE_ADD_NACK;
+	msg->dst.node = msg->src.node;
+	msg->src.node = scif_info.nodeid;
+	scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+}
+
+void scif_poll_qp_state(struct work_struct *work)
+{
+#define SCIF_NODE_QP_RETRY 100
+#define SCIF_NODE_QP_TIMEOUT 100
+	struct scif_dev *peerdev = container_of(work, struct scif_dev,
+							p2p_dwork.work);
+	struct scif_qp *qp = &peerdev->qpairs[0];
+
+	if (qp->qp_state != SCIF_QP_ONLINE ||
+	    qp->remote_qp->qp_state != SCIF_QP_ONLINE) {
+		if (peerdev->p2p_retry++ == SCIF_NODE_QP_RETRY) {
+			dev_err(&peerdev->sdev->dev,
+				"Warning: QP check timeout with state %d\n",
+				qp->qp_state);
+			goto timeout;
+		}
+		schedule_delayed_work(&peerdev->p2p_dwork,
+				      msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
+		return;
+	}
+	return;
+timeout:
+	dev_err(&peerdev->sdev->dev,
+		"%s %d remote node %d offline,  state = 0x%x\n",
+		__func__, __LINE__, peerdev->node, qp->qp_state);
+	qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
+	scif_peer_unregister_device(peerdev);
+	scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_ack() - Respond to SCIF_NODE_ADD_ACK interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * After a MIC node receives the SCIF_NODE_ADD_ACK message it send this
+ * message to the mgmt node to confirm the sequence is finished.
+ *
+ */
+static __always_inline void
+scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_dev *peerdev;
+	struct scif_qp *qp;
+	struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+	dev_dbg(&scifdev->sdev->dev,
+		"Scifdev %d received SCIF_NODE_ADD_ACK msg src %d dst %d\n",
+		scifdev->node, msg->src.node, msg->dst.node);
+	dev_dbg(&scifdev->sdev->dev,
+		"payload %llx %llx %llx %llx\n", msg->payload[0],
+		msg->payload[1], msg->payload[2], msg->payload[3]);
+	if (scif_is_mgmt_node()) {
+		/*
+		 * the lock serializes with scif_qp_response_ack. The mgmt node
+		 * is forwarding the NODE_ADD_ACK message from src to dst we
+		 * need to make sure that the dst has already received a
+		 * NODE_ADD for src and setup its end of the qp to dst
+		 */
+		mutex_lock(&scif_info.conflock);
+		msg->payload[1] = scif_info.maxid;
+		scif_nodeqp_send(dst_dev, msg);
+		mutex_unlock(&scif_info.conflock);
+		return;
+	}
+	peerdev = &scif_dev[msg->src.node];
+	peerdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+	peerdev->node = msg->src.node;
+
+	qp = &peerdev->qpairs[0];
+
+	if ((scif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0],
+					    msg->payload[0])))
+		goto local_error;
+	peerdev->rdb = msg->payload[2];
+	qp->remote_qp->qp_state = SCIF_QP_ONLINE;
+
+	scif_peer_register_device(peerdev);
+
+	schedule_delayed_work(&peerdev->p2p_dwork, 0);
+	return;
+local_error:
+	scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_nack: Respond to SCIF_NODE_ADD_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * SCIF_NODE_ADD failed, so inform the waiting wq.
+ */
+static __always_inline void
+scif_node_add_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	if (scif_is_mgmt_node()) {
+		struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+		dev_dbg(&scifdev->sdev->dev,
+			"SCIF_NODE_ADD_NACK received from %d\n", scifdev->node);
+		scif_nodeqp_send(dst_dev, msg);
+	}
+}
+
+/*
+ * scif_node_remove: Handle SCIF_NODE_REMOVE message
+ * @msg: Interrupt message
+ *
+ * Handle node removal.
+ */
+static __always_inline void
+scif_node_remove(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	int node = msg->payload[0];
+	struct scif_dev *scdev = &scif_dev[node];
+
+	scdev->node_remove_ack_pending = true;
+	scif_handle_remove_node(node);
+}
+
+/*
+ * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message
+ * @msg: Interrupt message
+ *
+ * The peer has acked a SCIF_NODE_REMOVE message.
+ */
+static __always_inline void
+scif_node_remove_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_dev *sdev = &scif_dev[msg->payload[0]];
+
+	atomic_inc(&sdev->disconn_rescnt);
+	wake_up(&sdev->disconn_wq);
+}
+
+/**
+ * scif_get_node_info: Respond to SCIF_GET_NODE_INFO interrupt message
+ * @msg:        Interrupt message
+ *
+ * Retrieve node info i.e maxid and total from the mgmt node.
+ */
+static __always_inline void
+scif_get_node_info_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	if (scif_is_mgmt_node()) {
+		swap(msg->dst.node, msg->src.node);
+		mutex_lock(&scif_info.conflock);
+		msg->payload[1] = scif_info.maxid;
+		msg->payload[2] = scif_info.total;
+		mutex_unlock(&scif_info.conflock);
+		scif_nodeqp_send(scifdev, msg);
+	} else {
+		struct completion *node_info =
+			(struct completion *)msg->payload[3];
+
+		mutex_lock(&scif_info.conflock);
+		scif_info.maxid = msg->payload[1];
+		scif_info.total = msg->payload[2];
+		complete_all(node_info);
+		mutex_unlock(&scif_info.conflock);
+	}
+}
+
+static void
+scif_msg_unknown(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	/* Bogus Node Qp Message? */
+	dev_err(&scifdev->sdev->dev,
+		"Unknown message 0x%xn scifdev->node 0x%x\n",
+		msg->uop, scifdev->node);
+}
+
+static void (*scif_intr_func[SCIF_MAX_MSG + 1])
+	    (struct scif_dev *, struct scifmsg *msg) = {
+	scif_msg_unknown,	/* Error */
+	scif_init,		/* SCIF_INIT */
+	scif_exit,		/* SCIF_EXIT */
+	scif_exit_ack,		/* SCIF_EXIT_ACK */
+	scif_node_add,		/* SCIF_NODE_ADD */
+	scif_node_add_ack,	/* SCIF_NODE_ADD_ACK */
+	scif_node_add_nack,	/* SCIF_NODE_ADD_NACK */
+	scif_node_remove,	/* SCIF_NODE_REMOVE */
+	scif_node_remove_ack,	/* SCIF_NODE_REMOVE_ACK */
+	scif_cnctreq,		/* SCIF_CNCT_REQ */
+	scif_cnctgnt,		/* SCIF_CNCT_GNT */
+	scif_cnctgnt_ack,	/* SCIF_CNCT_GNTACK */
+	scif_cnctgnt_nack,	/* SCIF_CNCT_GNTNACK */
+	scif_cnctrej,		/* SCIF_CNCT_REJ */
+	scif_discnct,		/* SCIF_DISCNCT */
+	scif_discnt_ack,	/* SCIF_DISCNT_ACK */
+	scif_clientsend,	/* SCIF_CLIENT_SENT */
+	scif_clientrcvd,	/* SCIF_CLIENT_RCVD */
+	scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
+	scif_recv_reg,		/* SCIF_REGISTER */
+	scif_recv_reg_ack,	/* SCIF_REGISTER_ACK */
+	scif_recv_reg_nack,	/* SCIF_REGISTER_NACK */
+	scif_recv_unreg,	/* SCIF_UNREGISTER */
+	scif_recv_unreg_ack,	/* SCIF_UNREGISTER_ACK */
+	scif_recv_unreg_nack,	/* SCIF_UNREGISTER_NACK */
+	scif_alloc_req,		/* SCIF_ALLOC_REQ */
+	scif_alloc_gnt_rej,	/* SCIF_ALLOC_GNT */
+	scif_alloc_gnt_rej,	/* SCIF_ALLOC_REJ */
+	scif_free_virt,		/* SCIF_FREE_VIRT */
+	scif_recv_munmap,	/* SCIF_MUNMAP */
+	scif_recv_mark,		/* SCIF_MARK */
+	scif_recv_mark_resp,	/* SCIF_MARK_ACK */
+	scif_recv_mark_resp,	/* SCIF_MARK_NACK */
+	scif_recv_wait,		/* SCIF_WAIT */
+	scif_recv_wait_resp,	/* SCIF_WAIT_ACK */
+	scif_recv_wait_resp,	/* SCIF_WAIT_NACK */
+	scif_recv_sig_local,	/* SCIF_SIG_LOCAL */
+	scif_recv_sig_remote,	/* SCIF_SIG_REMOTE */
+	scif_recv_sig_resp,	/* SCIF_SIG_ACK */
+	scif_recv_sig_resp,	/* SCIF_SIG_NACK */
+};
+
+/**
+ * scif_nodeqp_msg_handler() - Common handler for node messages
+ * @scifdev: Remote device to respond to
+ * @qp: Remote memory pointer
+ * @msg: The message to be handled.
+ *
+ * This routine calls the appropriate routine to handle a Node Qp
+ * message receipt
+ */
+static int scif_max_msg_id = SCIF_MAX_MSG;
+
+static void
+scif_nodeqp_msg_handler(struct scif_dev *scifdev,
+			struct scif_qp *qp, struct scifmsg *msg)
+{
+	scif_display_message(scifdev, msg, "Rcvd");
+
+	if (msg->uop > (u32)scif_max_msg_id) {
+		/* Bogus Node Qp Message? */
+		dev_err(&scifdev->sdev->dev,
+			"Unknown message 0x%xn scifdev->node 0x%x\n",
+			msg->uop, scifdev->node);
+		return;
+	}
+
+	scif_intr_func[msg->uop](scifdev, msg);
+}
+
+/**
+ * scif_nodeqp_intrhandler() - Interrupt handler for node messages
+ * @scifdev:    Remote device to respond to
+ * @qp:         Remote memory pointer
+ *
+ * This routine is triggered by the interrupt mechanism.  It reads
+ * messages from the node queue RB and calls the Node QP Message handling
+ * routine.
+ */
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+	struct scifmsg msg;
+	int read_size;
+
+	do {
+		read_size = scif_rb_get_next(&qp->inbound_q, &msg, sizeof(msg));
+		if (!read_size)
+			break;
+		scif_nodeqp_msg_handler(scifdev, qp, &msg);
+		/*
+		 * The node queue pair is unmapped so skip the read pointer
+		 * update after receipt of a SCIF_EXIT_ACK
+		 */
+		if (SCIF_EXIT_ACK == msg.uop)
+			break;
+		scif_rb_update_read_ptr(&qp->inbound_q);
+	} while (1);
+}
+
+/**
+ * scif_loopb_wq_handler - Loopback Workqueue Handler.
+ * @work: loop back work
+ *
+ * This work queue routine is invoked by the loopback work queue handler.
+ * It grabs the recv lock, dequeues any available messages from the head
+ * of the loopback message list, calls the node QP message handler,
+ * waits for it to return, then frees up this message and dequeues more
+ * elements of the list if available.
+ */
+static void scif_loopb_wq_handler(struct work_struct *unused)
+{
+	struct scif_dev *scifdev = scif_info.loopb_dev;
+	struct scif_qp *qp = scifdev->qpairs;
+	struct scif_loopb_msg *msg;
+
+	do {
+		msg = NULL;
+		spin_lock(&qp->recv_lock);
+		if (!list_empty(&scif_info.loopb_recv_q)) {
+			msg = list_first_entry(&scif_info.loopb_recv_q,
+					       struct scif_loopb_msg,
+					       list);
+			list_del(&msg->list);
+		}
+		spin_unlock(&qp->recv_lock);
+
+		if (msg) {
+			scif_nodeqp_msg_handler(scifdev, qp, &msg->msg);
+			kfree(msg);
+		}
+	} while (msg);
+}
+
+/**
+ * scif_loopb_msg_handler() - Workqueue handler for loopback messages.
+ * @scifdev: SCIF device
+ * @qp: Queue pair.
+ *
+ * This work queue routine is triggered when a loopback message is received.
+ *
+ * We need special handling for receiving Node Qp messages on a loopback SCIF
+ * device via two workqueues for receiving messages.
+ *
+ * The reason we need the extra workqueue which is not required with *normal*
+ * non-loopback SCIF devices is the potential classic deadlock described below:
+ *
+ * Thread A tries to send a message on a loopback SCIF device and blocks since
+ * there is no space in the RB while it has the send_lock held or another
+ * lock called lock X for example.
+ *
+ * Thread B: The Loopback Node QP message receive workqueue receives the message
+ * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries
+ * to grab the send lock again or lock X and deadlocks with Thread A. The RB
+ * cannot be drained any further due to this classic deadlock.
+ *
+ * In order to avoid deadlocks as mentioned above we have an extra level of
+ * indirection achieved by having two workqueues.
+ * 1) The first workqueue whose handler is scif_loopb_msg_handler reads
+ * messages from the Node QP RB, adds them to a list and queues work for the
+ * second workqueue.
+ *
+ * 2) The second workqueue whose handler is scif_loopb_wq_handler dequeues
+ * messages from the list, handles them, frees up the memory and dequeues
+ * more elements from the list if possible.
+ */
+int
+scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+	int read_size;
+	struct scif_loopb_msg *msg;
+
+	do {
+		msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+		if (!msg)
+			return -ENOMEM;
+		read_size = scif_rb_get_next(&qp->inbound_q, &msg->msg,
+					     sizeof(struct scifmsg));
+		if (read_size != sizeof(struct scifmsg)) {
+			kfree(msg);
+			scif_rb_update_read_ptr(&qp->inbound_q);
+			break;
+		}
+		spin_lock(&qp->recv_lock);
+		list_add_tail(&msg->list, &scif_info.loopb_recv_q);
+		spin_unlock(&qp->recv_lock);
+		queue_work(scif_info.loopb_wq, &scif_info.loopb_work);
+		scif_rb_update_read_ptr(&qp->inbound_q);
+	} while (read_size == sizeof(struct scifmsg));
+	return read_size;
+}
+
+/**
+ * scif_setup_loopback_qp - One time setup work for Loopback Node Qp.
+ * @scifdev: SCIF device
+ *
+ * Sets up the required loopback workqueues, queue pairs and ring buffers
+ */
+int scif_setup_loopback_qp(struct scif_dev *scifdev)
+{
+	int err = 0;
+	void *local_q;
+	struct scif_qp *qp;
+
+	err = scif_setup_intr_wq(scifdev);
+	if (err)
+		goto exit;
+	INIT_LIST_HEAD(&scif_info.loopb_recv_q);
+	snprintf(scif_info.loopb_wqname, sizeof(scif_info.loopb_wqname),
+		 "SCIF LOOPB %d", scifdev->node);
+	scif_info.loopb_wq =
+		alloc_ordered_workqueue(scif_info.loopb_wqname, 0);
+	if (!scif_info.loopb_wq) {
+		err = -ENOMEM;
+		goto destroy_intr;
+	}
+	INIT_WORK(&scif_info.loopb_work, scif_loopb_wq_handler);
+	/* Allocate Self Qpair */
+	scifdev->qpairs = kzalloc(sizeof(*scifdev->qpairs), GFP_KERNEL);
+	if (!scifdev->qpairs) {
+		err = -ENOMEM;
+		goto destroy_loopb_wq;
+	}
+
+	qp = scifdev->qpairs;
+	qp->magic = SCIFEP_MAGIC;
+	spin_lock_init(&qp->send_lock);
+	spin_lock_init(&qp->recv_lock);
+
+	local_q = kzalloc(SCIF_NODE_QP_SIZE, GFP_KERNEL);
+	if (!local_q) {
+		err = -ENOMEM;
+		goto free_qpairs;
+	}
+	/*
+	 * For loopback the inbound_q and outbound_q are essentially the same
+	 * since the Node sends a message on the loopback interface to the
+	 * outbound_q which is then received on the inbound_q.
+	 */
+	scif_rb_init(&qp->outbound_q,
+		     &qp->local_read,
+		     &qp->local_write,
+		     local_q, get_count_order(SCIF_NODE_QP_SIZE));
+
+	scif_rb_init(&qp->inbound_q,
+		     &qp->local_read,
+		     &qp->local_write,
+		     local_q, get_count_order(SCIF_NODE_QP_SIZE));
+	scif_info.nodeid = scifdev->node;
+
+	scif_peer_register_device(scifdev);
+
+	scif_info.loopb_dev = scifdev;
+	return err;
+free_qpairs:
+	kfree(scifdev->qpairs);
+destroy_loopb_wq:
+	destroy_workqueue(scif_info.loopb_wq);
+destroy_intr:
+	scif_destroy_intr_wq(scifdev);
+exit:
+	return err;
+}
+
+/**
+ * scif_destroy_loopback_qp - One time uninit work for Loopback Node Qp
+ * @scifdev: SCIF device
+ *
+ * Destroys the workqueues and frees up the Ring Buffer and Queue Pair memory.
+ */
+int scif_destroy_loopback_qp(struct scif_dev *scifdev)
+{
+	scif_peer_unregister_device(scifdev);
+	destroy_workqueue(scif_info.loopb_wq);
+	scif_destroy_intr_wq(scifdev);
+	kfree(scifdev->qpairs->outbound_q.rb_base);
+	kfree(scifdev->qpairs);
+	scifdev->sdev = NULL;
+	scif_info.loopb_dev = NULL;
+	return 0;
+}
+
+void scif_destroy_p2p(struct scif_dev *scifdev)
+{
+	struct scif_dev *peer_dev;
+	struct scif_p2p_info *p2p;
+	struct list_head *pos, *tmp;
+	int bd;
+
+	mutex_lock(&scif_info.conflock);
+	/* Free P2P mappings in the given node for all its peer nodes */
+	list_for_each_safe(pos, tmp, &scifdev->p2p) {
+		p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+		dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+			     p2p->sg_nentries[SCIF_PPI_MMIO],
+			     DMA_BIDIRECTIONAL);
+		dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+			     p2p->sg_nentries[SCIF_PPI_APER],
+			     DMA_BIDIRECTIONAL);
+		scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+		scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+		list_del(pos);
+		kfree(p2p);
+	}
+
+	/* Free P2P mapping created in the peer nodes for the given node */
+	for (bd = SCIF_MGMT_NODE + 1; bd <= scif_info.maxid; bd++) {
+		peer_dev = &scif_dev[bd];
+		list_for_each_safe(pos, tmp, &peer_dev->p2p) {
+			p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+			if (p2p->ppi_peer_id == scifdev->node) {
+				dma_unmap_sg(&peer_dev->sdev->dev,
+					     p2p->ppi_sg[SCIF_PPI_MMIO],
+					     p2p->sg_nentries[SCIF_PPI_MMIO],
+					     DMA_BIDIRECTIONAL);
+				dma_unmap_sg(&peer_dev->sdev->dev,
+					     p2p->ppi_sg[SCIF_PPI_APER],
+					     p2p->sg_nentries[SCIF_PPI_APER],
+					     DMA_BIDIRECTIONAL);
+				scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+				scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+				list_del(pos);
+				kfree(p2p);
+			}
+		}
+	}
+	mutex_unlock(&scif_info.conflock);
+}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h
new file mode 100644
index 000000000..958962731
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_nodeqp.h
@@ -0,0 +1,221 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_NODEQP
+#define SCIF_NODEQP
+
+#include "scif_rb.h"
+#include "scif_peer_bus.h"
+
+#define SCIF_INIT 1  /* First message sent to the peer node for discovery */
+#define SCIF_EXIT 2  /* Last message from the peer informing intent to exit */
+#define SCIF_EXIT_ACK 3 /* Response to SCIF_EXIT message */
+#define SCIF_NODE_ADD 4  /* Tell Online nodes a new node exits */
+#define SCIF_NODE_ADD_ACK 5  /* Confirm to mgmt node sequence is finished */
+#define SCIF_NODE_ADD_NACK 6 /* SCIF_NODE_ADD failed */
+#define SCIF_NODE_REMOVE 7 /* Request to deactivate a SCIF node */
+#define SCIF_NODE_REMOVE_ACK 8 /* Response to a SCIF_NODE_REMOVE message */
+#define SCIF_CNCT_REQ 9  /* Phys addr of Request connection to a port */
+#define SCIF_CNCT_GNT 10  /* Phys addr of new Grant connection request */
+#define SCIF_CNCT_GNTACK 11  /* Error type Reject a connection request */
+#define SCIF_CNCT_GNTNACK 12  /* Error type Reject a connection request */
+#define SCIF_CNCT_REJ 13  /* Error type Reject a connection request */
+#define SCIF_DISCNCT 14 /* Notify peer that connection is being terminated */
+#define SCIF_DISCNT_ACK 15 /* Notify peer that connection is being terminated */
+#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */
+#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */
+#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/
+#define SCIF_REGISTER 19 /* Tell peer about a new registered window */
+#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */
+#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */
+#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */
+#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */
+#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */
+#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */
+#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */
+#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */
+#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */
+#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */
+#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */
+#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */
+#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */
+#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */
+#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */
+#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */
+#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */
+#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */
+#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */
+#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */
+#define SCIF_MAX_MSG SCIF_SIG_NACK
+
+/*
+ * struct scifmsg - Node QP message format
+ *
+ * @src: Source information
+ * @dst: Destination information
+ * @uop: The message opcode
+ * @payload: Unique payload format for each message
+ */
+struct scifmsg {
+	struct scif_port_id src;
+	struct scif_port_id dst;
+	u32 uop;
+	u64 payload[4];
+} __packed;
+
+/*
+ * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request
+ * the remote note to allocate memory
+ *
+ * phys_addr: Physical address of the buffer
+ * vaddr: Virtual address of the buffer
+ * size: Size of the buffer
+ * state: Current state
+ * allocwq: wait queue for status
+ */
+struct scif_allocmsg {
+	dma_addr_t phys_addr;
+	unsigned long vaddr;
+	size_t size;
+	enum scif_msg_state state;
+	wait_queue_head_t allocwq;
+};
+
+/*
+ * struct scif_qp - Node Queue Pair
+ *
+ * Interesting structure -- a little difficult because we can only
+ * write across the PCIe, so any r/w pointer we need to read is
+ * local. We only need to read the read pointer on the inbound_q
+ * and read the write pointer in the outbound_q
+ *
+ * @magic: Magic value to ensure the peer sees the QP correctly
+ * @outbound_q: The outbound ring buffer for sending messages
+ * @inbound_q: The inbound ring buffer for receiving messages
+ * @local_write: Local write index
+ * @local_read: Local read index
+ * @remote_qp: The remote queue pair
+ * @local_buf: DMA address of local ring buffer
+ * @local_qp: DMA address of the local queue pair data structure
+ * @remote_buf: DMA address of remote ring buffer
+ * @qp_state: QP state i.e. online or offline used for P2P
+ * @send_lock: synchronize access to outbound queue
+ * @recv_lock: Synchronize access to inbound queue
+ */
+struct scif_qp {
+	u64 magic;
+#define SCIFEP_MAGIC 0x5c1f000000005c1fULL
+	struct scif_rb outbound_q;
+	struct scif_rb inbound_q;
+
+	u32 local_write __aligned(64);
+	u32 local_read __aligned(64);
+	struct scif_qp *remote_qp;
+	dma_addr_t local_buf;
+	dma_addr_t local_qp;
+	dma_addr_t remote_buf;
+	u32 qp_state;
+#define SCIF_QP_OFFLINE 0xdead
+#define SCIF_QP_ONLINE 0xc0de
+	spinlock_t send_lock;
+	spinlock_t recv_lock;
+};
+
+/*
+ * struct scif_loopb_msg - An element in the loopback Node QP message list.
+ *
+ * @msg - The SCIF node QP message
+ * @list - link in the list of messages
+ */
+struct scif_loopb_msg {
+	struct scifmsg msg;
+	struct list_head list;
+};
+
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp);
+int scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp);
+int scif_setup_qp(struct scif_dev *scifdev);
+int scif_qp_response(phys_addr_t phys, struct scif_dev *dev);
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+			  int local_size, struct scif_dev *scifdev);
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+			 dma_addr_t phys, int local_size,
+			 struct scif_dev *scifdev);
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+				   struct scif_qp *qp, u64 payload);
+int scif_setup_loopback_qp(struct scif_dev *scifdev);
+int scif_destroy_loopback_qp(struct scif_dev *scifdev);
+void scif_poll_qp_state(struct work_struct *work);
+void scif_destroy_p2p(struct scif_dev *scifdev);
+void scif_send_exit(struct scif_dev *scifdev);
+static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+	struct device *spdev_ret;
+
+	rcu_read_lock();
+	spdev = rcu_dereference(scifdev->spdev);
+	if (spdev)
+		spdev_ret = get_device(&spdev->dev);
+	else
+		spdev_ret = ERR_PTR(-ENODEV);
+	rcu_read_unlock();
+	return spdev_ret;
+}
+
+static inline void scif_put_peer_dev(struct device *dev)
+{
+	put_device(dev);
+}
+#endif  /* SCIF_NODEQP */
diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c
new file mode 100644
index 000000000..6ffa3bdbd
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_peer_bus.c
@@ -0,0 +1,183 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ */
+#include "scif_main.h"
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+
+static inline struct scif_peer_dev *
+dev_to_scif_peer(struct device *dev)
+{
+	return container_of(dev, struct scif_peer_dev, dev);
+}
+
+struct bus_type scif_peer_bus = {
+	.name  = "scif_peer_bus",
+};
+
+static void scif_peer_release_dev(struct device *d)
+{
+	struct scif_peer_dev *sdev = dev_to_scif_peer(d);
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+	scif_cleanup_scifdev(scifdev);
+	kfree(sdev);
+}
+
+static int scif_peer_initialize_device(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+	int ret;
+
+	spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
+	if (!spdev) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	spdev->dev.parent = scifdev->sdev->dev.parent;
+	spdev->dev.release = scif_peer_release_dev;
+	spdev->dnode = scifdev->node;
+	spdev->dev.bus = &scif_peer_bus;
+	dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
+
+	device_initialize(&spdev->dev);
+	get_device(&spdev->dev);
+	rcu_assign_pointer(scifdev->spdev, spdev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total++;
+	scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
+	mutex_unlock(&scif_info.conflock);
+	return 0;
+err:
+	dev_err(&scifdev->sdev->dev,
+		"dnode %d: initialize_device rc %d\n", scifdev->node, ret);
+	return ret;
+}
+
+static int scif_peer_add_device(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev);
+	char pool_name[16];
+	int ret;
+
+	ret = device_add(&spdev->dev);
+	put_device(&spdev->dev);
+	if (ret) {
+		dev_err(&scifdev->sdev->dev,
+			"dnode %d: peer device_add failed\n", scifdev->node);
+		goto put_spdev;
+	}
+
+	scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode);
+	scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev,
+						sizeof(struct scif_status), 1,
+						0);
+	if (!scifdev->signal_pool) {
+		dev_err(&scifdev->sdev->dev,
+			"dnode %d: dmam_pool_create failed\n", scifdev->node);
+		ret = -ENOMEM;
+		goto del_spdev;
+	}
+	dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode);
+	return 0;
+del_spdev:
+	device_del(&spdev->dev);
+put_spdev:
+	RCU_INIT_POINTER(scifdev->spdev, NULL);
+	synchronize_rcu();
+	put_device(&spdev->dev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total--;
+	mutex_unlock(&scif_info.conflock);
+	return ret;
+}
+
+void scif_add_peer_device(struct work_struct *work)
+{
+	struct scif_dev *scifdev = container_of(work, struct scif_dev,
+						peer_add_work);
+
+	scif_peer_add_device(scifdev);
+}
+
+/*
+ * Peer device registration is split into a device_initialize and a device_add.
+ * The reason for doing this is as follows: First, peer device registration
+ * itself cannot be done in the message processing thread and must be delegated
+ * to another workqueue, otherwise if SCIF client probe, called during peer
+ * device registration, calls scif_connect(..), it will block the message
+ * processing thread causing a deadlock. Next, device_initialize is done in the
+ * "top-half" message processing thread and device_add in the "bottom-half"
+ * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing
+ * concurrently with SCIF_INIT message processing is unable to get a reference
+ * on the peer device, thereby failing the connect request.
+ */
+void scif_peer_register_device(struct scif_dev *scifdev)
+{
+	int ret;
+
+	mutex_lock(&scifdev->lock);
+	ret = scif_peer_initialize_device(scifdev);
+	if (ret)
+		goto exit;
+	schedule_work(&scifdev->peer_add_work);
+exit:
+	mutex_unlock(&scifdev->lock);
+}
+
+int scif_peer_unregister_device(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+
+	mutex_lock(&scifdev->lock);
+	/* Flush work to ensure device register is complete */
+	flush_work(&scifdev->peer_add_work);
+
+	/*
+	 * Continue holding scifdev->lock since theoretically unregister_device
+	 * can be called simultaneously from multiple threads
+	 */
+	spdev = rcu_dereference(scifdev->spdev);
+	if (!spdev) {
+		mutex_unlock(&scifdev->lock);
+		return -ENODEV;
+	}
+
+	RCU_INIT_POINTER(scifdev->spdev, NULL);
+	synchronize_rcu();
+	mutex_unlock(&scifdev->lock);
+
+	dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode);
+	device_unregister(&spdev->dev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total--;
+	mutex_unlock(&scif_info.conflock);
+	return 0;
+}
+
+int scif_peer_bus_init(void)
+{
+	return bus_register(&scif_peer_bus);
+}
+
+void scif_peer_bus_exit(void)
+{
+	bus_unregister(&scif_peer_bus);
+}
diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h
new file mode 100644
index 000000000..a3b8dd2ed
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_peer_bus.h
@@ -0,0 +1,31 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ */
+#ifndef _SCIF_PEER_BUS_H_
+#define _SCIF_PEER_BUS_H_
+
+#include <linux/device.h>
+#include <linux/mic_common.h>
+#include <linux/scif.h>
+
+struct scif_dev;
+
+void scif_add_peer_device(struct work_struct *work);
+void scif_peer_register_device(struct scif_dev *sdev);
+int scif_peer_unregister_device(struct scif_dev *scifdev);
+int scif_peer_bus_init(void);
+void scif_peer_bus_exit(void);
+#endif /* _SCIF_PEER_BUS_H */
diff --git a/drivers/misc/mic/scif/scif_ports.c b/drivers/misc/mic/scif/scif_ports.c
new file mode 100644
index 000000000..594e18d27
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_ports.c
@@ -0,0 +1,124 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/idr.h>
+
+#include "scif_main.h"
+
+#define SCIF_PORT_COUNT	0x10000	/* Ports available */
+
+struct idr scif_ports;
+
+/*
+ * struct scif_port - SCIF port information
+ *
+ * @ref_cnt - Reference count since there can be multiple endpoints
+ *		created via scif_accept(..) simultaneously using a port.
+ */
+struct scif_port {
+	int ref_cnt;
+};
+
+/**
+ * __scif_get_port - Reserve a specified port # for SCIF and add it
+ * to the global list.
+ * @port : port # to be reserved.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
+ *		On memory allocation failure, returns -ENOMEM.
+ */
+static int __scif_get_port(int start, int end)
+{
+	int id;
+	struct scif_port *port = kzalloc(sizeof(*port), GFP_ATOMIC);
+
+	if (!port)
+		return -ENOMEM;
+	spin_lock(&scif_info.port_lock);
+	id = idr_alloc(&scif_ports, port, start, end, GFP_ATOMIC);
+	if (id >= 0)
+		port->ref_cnt++;
+	spin_unlock(&scif_info.port_lock);
+	return id;
+}
+
+/**
+ * scif_rsrv_port - Reserve a specified port # for SCIF.
+ * @port : port # to be reserved.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
+ *		On memory allocation failure, returns -ENOMEM.
+ */
+int scif_rsrv_port(u16 port)
+{
+	return __scif_get_port(port, port + 1);
+}
+
+/**
+ * scif_get_new_port - Get and reserve any port # for SCIF in the range
+ *			SCIF_PORT_RSVD + 1 to SCIF_PORT_COUNT - 1.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if no ports available.
+ *		On memory allocation failure, returns -ENOMEM.
+ */
+int scif_get_new_port(void)
+{
+	return __scif_get_port(SCIF_PORT_RSVD + 1, SCIF_PORT_COUNT);
+}
+
+/**
+ * scif_get_port - Increment the reference count for a SCIF port
+ * @id : SCIF port
+ *
+ * @return : None
+ */
+void scif_get_port(u16 id)
+{
+	struct scif_port *port;
+
+	if (!id)
+		return;
+	spin_lock(&scif_info.port_lock);
+	port = idr_find(&scif_ports, id);
+	if (port)
+		port->ref_cnt++;
+	spin_unlock(&scif_info.port_lock);
+}
+
+/**
+ * scif_put_port - Release a reserved SCIF port
+ * @id : SCIF port to be released.
+ *
+ * @return : None
+ */
+void scif_put_port(u16 id)
+{
+	struct scif_port *port;
+
+	if (!id)
+		return;
+	spin_lock(&scif_info.port_lock);
+	port = idr_find(&scif_ports, id);
+	if (port) {
+		port->ref_cnt--;
+		if (!port->ref_cnt) {
+			idr_remove(&scif_ports, id);
+			kfree(port);
+		}
+	}
+	spin_unlock(&scif_info.port_lock);
+}
diff --git a/drivers/misc/mic/scif/scif_rb.c b/drivers/misc/mic/scif/scif_rb.c
new file mode 100644
index 000000000..b665757ca
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rb.c
@@ -0,0 +1,249 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/circ_buf.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+
+#include "scif_rb.h"
+
+#define scif_rb_ring_cnt(head, tail, size) CIRC_CNT(head, tail, size)
+#define scif_rb_ring_space(head, tail, size) CIRC_SPACE(head, tail, size)
+
+/**
+ * scif_rb_init - Initializes the ring buffer
+ * @rb: ring buffer
+ * @read_ptr: A pointer to the read offset
+ * @write_ptr: A pointer to the write offset
+ * @rb_base: A pointer to the base of the ring buffer
+ * @size: The size of the ring buffer in powers of two
+ */
+void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
+		  void *rb_base, u8 size)
+{
+	rb->rb_base = rb_base;
+	rb->size = (1 << size);
+	rb->read_ptr = read_ptr;
+	rb->write_ptr = write_ptr;
+	rb->current_read_offset = *read_ptr;
+	rb->current_write_offset = *write_ptr;
+}
+
+/* Copies a message to the ring buffer -- handles the wrap around case */
+static void memcpy_torb(struct scif_rb *rb, void *header,
+			void *msg, u32 size)
+{
+	u32 size1, size2;
+
+	if (header + size >= rb->rb_base + rb->size) {
+		/* Need to call two copies if it wraps around */
+		size1 = (u32)(rb->rb_base + rb->size - header);
+		size2 = size - size1;
+		memcpy_toio((void __iomem __force *)header, msg, size1);
+		memcpy_toio((void __iomem __force *)rb->rb_base,
+			    msg + size1, size2);
+	} else {
+		memcpy_toio((void __iomem __force *)header, msg, size);
+	}
+}
+
+/* Copies a message from the ring buffer -- handles the wrap around case */
+static void memcpy_fromrb(struct scif_rb *rb, void *header,
+			  void *msg, u32 size)
+{
+	u32 size1, size2;
+
+	if (header + size >= rb->rb_base + rb->size) {
+		/* Need to call two copies if it wraps around */
+		size1 = (u32)(rb->rb_base + rb->size - header);
+		size2 = size - size1;
+		memcpy_fromio(msg, (void __iomem __force *)header, size1);
+		memcpy_fromio(msg + size1,
+			      (void __iomem __force *)rb->rb_base, size2);
+	} else {
+		memcpy_fromio(msg, (void __iomem __force *)header, size);
+	}
+}
+
+/**
+ * scif_rb_space - Query space available for writing to the RB
+ * @rb: ring buffer
+ *
+ * Return: size available for writing to RB in bytes.
+ */
+u32 scif_rb_space(struct scif_rb *rb)
+{
+	rb->current_read_offset = *rb->read_ptr;
+	/*
+	 * Update from the HW read pointer only once the peer has exposed the
+	 * new empty slot. This barrier is paired with the memory barrier
+	 * scif_rb_update_read_ptr()
+	 */
+	mb();
+	return scif_rb_ring_space(rb->current_write_offset,
+				  rb->current_read_offset, rb->size);
+}
+
+/**
+ * scif_rb_write - Write a message to the RB
+ * @rb: ring buffer
+ * @msg: buffer to send the message.  Must be at least size bytes long
+ * @size: the size (in bytes) to be copied to the RB
+ *
+ * This API does not block if there isn't enough space in the RB.
+ * Returns: 0 on success or -ENOMEM on failure
+ */
+int scif_rb_write(struct scif_rb *rb, void *msg, u32 size)
+{
+	void *header;
+
+	if (scif_rb_space(rb) < size)
+		return -ENOMEM;
+	header = rb->rb_base + rb->current_write_offset;
+	memcpy_torb(rb, header, msg, size);
+	/*
+	 * Wait until scif_rb_commit(). Update the local ring
+	 * buffer data, not the shared data until commit.
+	 */
+	rb->current_write_offset =
+		(rb->current_write_offset + size) & (rb->size - 1);
+	return 0;
+}
+
+/**
+ * scif_rb_commit - To submit the message to let the peer fetch it
+ * @rb: ring buffer
+ */
+void scif_rb_commit(struct scif_rb *rb)
+{
+	/*
+	 * We must ensure ordering between the all the data committed
+	 * previously before we expose the new message to the peer by
+	 * updating the write_ptr. This write barrier is paired with
+	 * the read barrier in scif_rb_count(..)
+	 */
+	wmb();
+	WRITE_ONCE(*rb->write_ptr, rb->current_write_offset);
+#ifdef CONFIG_INTEL_MIC_CARD
+	/*
+	 * X100 Si bug: For the case where a Core is performing an EXT_WR
+	 * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+	 * same address with the same data before it does the Doorbell Write.
+	 * This way, if ordering is violated for the Interrupt Message, it will
+	 * fall just behind the first Posted associated with the first EXT_WR.
+	 */
+	WRITE_ONCE(*rb->write_ptr, rb->current_write_offset);
+#endif
+}
+
+/**
+ * scif_rb_get - To get next message from the ring buffer
+ * @rb: ring buffer
+ * @size: Number of bytes to be read
+ *
+ * Return: NULL if no bytes to be read from the ring buffer, otherwise the
+ *	pointer to the next byte
+ */
+static void *scif_rb_get(struct scif_rb *rb, u32 size)
+{
+	void *header = NULL;
+
+	if (scif_rb_count(rb, size) >= size)
+		header = rb->rb_base + rb->current_read_offset;
+	return header;
+}
+
+/*
+ * scif_rb_get_next - Read from ring buffer.
+ * @rb: ring buffer
+ * @msg: buffer to hold the message.  Must be at least size bytes long
+ * @size: Number of bytes to be read
+ *
+ * Return: number of bytes read if available bytes are >= size, otherwise
+ * returns zero.
+ */
+u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size)
+{
+	void *header = NULL;
+	int read_size = 0;
+
+	header = scif_rb_get(rb, size);
+	if (header) {
+		u32 next_cmd_offset =
+			(rb->current_read_offset + size) & (rb->size - 1);
+
+		read_size = size;
+		rb->current_read_offset = next_cmd_offset;
+		memcpy_fromrb(rb, header, msg, size);
+	}
+	return read_size;
+}
+
+/**
+ * scif_rb_update_read_ptr
+ * @rb: ring buffer
+ */
+void scif_rb_update_read_ptr(struct scif_rb *rb)
+{
+	u32 new_offset;
+
+	new_offset = rb->current_read_offset;
+	/*
+	 * We must ensure ordering between the all the data committed or read
+	 * previously before we expose the empty slot to the peer by updating
+	 * the read_ptr. This barrier is paired with the memory barrier in
+	 * scif_rb_space(..)
+	 */
+	mb();
+	WRITE_ONCE(*rb->read_ptr, new_offset);
+#ifdef CONFIG_INTEL_MIC_CARD
+	/*
+	 * X100 Si Bug: For the case where a Core is performing an EXT_WR
+	 * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+	 * same address with the same data before it does the Doorbell Write.
+	 * This way, if ordering is violated for the Interrupt Message, it will
+	 * fall just behind the first Posted associated with the first EXT_WR.
+	 */
+	WRITE_ONCE(*rb->read_ptr, new_offset);
+#endif
+}
+
+/**
+ * scif_rb_count
+ * @rb: ring buffer
+ * @size: Number of bytes expected to be read
+ *
+ * Return: number of bytes that can be read from the RB
+ */
+u32 scif_rb_count(struct scif_rb *rb, u32 size)
+{
+	if (scif_rb_ring_cnt(rb->current_write_offset,
+			     rb->current_read_offset,
+			     rb->size) < size) {
+		rb->current_write_offset = *rb->write_ptr;
+		/*
+		 * Update from the HW write pointer if empty only once the peer
+		 * has exposed the new message. This read barrier is paired
+		 * with the write barrier in scif_rb_commit(..)
+		 */
+		smp_rmb();
+	}
+	return scif_rb_ring_cnt(rb->current_write_offset,
+				rb->current_read_offset,
+				rb->size);
+}
diff --git a/drivers/misc/mic/scif/scif_rb.h b/drivers/misc/mic/scif/scif_rb.h
new file mode 100644
index 000000000..166dffe30
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rb.h
@@ -0,0 +1,100 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ */
+#ifndef SCIF_RB_H
+#define SCIF_RB_H
+/*
+ * This file describes a general purpose, byte based ring buffer. Writers to the
+ * ring buffer need to synchronize using a lock. The same is true for readers,
+ * although in practice, the ring buffer has a single reader. It is lockless
+ * between producer and consumer so it can handle being used across the PCIe
+ * bus. The ring buffer ensures that there are no reads across the PCIe bus for
+ * performance reasons. Two of these are used to form a single bidirectional
+ * queue-pair across PCIe.
+ */
+/*
+ * struct scif_rb - SCIF Ring Buffer
+ *
+ * @rb_base: The base of the memory used for storing RB messages
+ * @read_ptr: Pointer to the read offset
+ * @write_ptr: Pointer to the write offset
+ * @size: Size of the memory in rb_base
+ * @current_read_offset: Cached read offset for performance
+ * @current_write_offset: Cached write offset for performance
+ */
+struct scif_rb {
+	void *rb_base;
+	u32 *read_ptr;
+	u32 *write_ptr;
+	u32 size;
+	u32 current_read_offset;
+	u32 current_write_offset;
+};
+
+/* methods used by both */
+void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
+		  void *rb_base, u8 size);
+/* writer only methods */
+/* write a new command, then scif_rb_commit() */
+int scif_rb_write(struct scif_rb *rb, void *msg, u32 size);
+/* after write(), then scif_rb_commit() */
+void scif_rb_commit(struct scif_rb *rb);
+/* query space available for writing to a RB. */
+u32 scif_rb_space(struct scif_rb *rb);
+
+/* reader only methods */
+/* read a new message from the ring buffer of size bytes */
+u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size);
+/* update the read pointer so that the space can be reused */
+void scif_rb_update_read_ptr(struct scif_rb *rb);
+/* count the number of bytes that can be read */
+u32 scif_rb_count(struct scif_rb *rb, u32 size);
+#endif
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
new file mode 100644
index 000000000..e1f59b177
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -0,0 +1,1775 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/dma_remapping.h>
+#include <linux/pagemap.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+
+#include "scif_main.h"
+#include "scif_map.h"
+
+/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
+#define SCIF_MAP_ULIMIT 0x40
+
+bool scif_ulimit_check = 1;
+
+/**
+ * scif_rma_ep_init:
+ * @ep: end point
+ *
+ * Initialize RMA per EP data structures.
+ */
+void scif_rma_ep_init(struct scif_endpt *ep)
+{
+	struct scif_endpt_rma_info *rma = &ep->rma_info;
+
+	mutex_init(&rma->rma_lock);
+	init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN);
+	spin_lock_init(&rma->tc_lock);
+	mutex_init(&rma->mmn_lock);
+	INIT_LIST_HEAD(&rma->reg_list);
+	INIT_LIST_HEAD(&rma->remote_reg_list);
+	atomic_set(&rma->tw_refcount, 0);
+	atomic_set(&rma->tcw_refcount, 0);
+	atomic_set(&rma->tcw_total_pages, 0);
+	atomic_set(&rma->fence_refcount, 0);
+
+	rma->async_list_del = 0;
+	rma->dma_chan = NULL;
+	INIT_LIST_HEAD(&rma->mmn_list);
+	INIT_LIST_HEAD(&rma->vma_list);
+	init_waitqueue_head(&rma->markwq);
+}
+
+/**
+ * scif_rma_ep_can_uninit:
+ * @ep: end point
+ *
+ * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
+ */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep)
+{
+	int ret = 0;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Destroy RMA Info only if both lists are empty */
+	if (list_empty(&ep->rma_info.reg_list) &&
+	    list_empty(&ep->rma_info.remote_reg_list) &&
+	    list_empty(&ep->rma_info.mmn_list) &&
+	    !atomic_read(&ep->rma_info.tw_refcount) &&
+	    !atomic_read(&ep->rma_info.tcw_refcount) &&
+	    !atomic_read(&ep->rma_info.fence_refcount))
+		ret = 1;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return ret;
+}
+
+/**
+ * scif_create_pinned_pages:
+ * @nr_pages: number of pages in window
+ * @prot: read/write protection
+ *
+ * Allocate and prepare a set of pinned pages.
+ */
+static struct scif_pinned_pages *
+scif_create_pinned_pages(int nr_pages, int prot)
+{
+	struct scif_pinned_pages *pin;
+
+	might_sleep();
+	pin = scif_zalloc(sizeof(*pin));
+	if (!pin)
+		goto error;
+
+	pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
+	if (!pin->pages)
+		goto error_free_pinned_pages;
+
+	pin->prot = prot;
+	pin->magic = SCIFEP_MAGIC;
+	return pin;
+
+error_free_pinned_pages:
+	scif_free(pin, sizeof(*pin));
+error:
+	return NULL;
+}
+
+/**
+ * scif_destroy_pinned_pages:
+ * @pin: A set of pinned pages.
+ *
+ * Deallocate resources for pinned pages.
+ */
+static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
+{
+	int j;
+	int writeable = pin->prot & SCIF_PROT_WRITE;
+	int kernel = SCIF_MAP_KERNEL & pin->map_flags;
+
+	for (j = 0; j < pin->nr_pages; j++) {
+		if (pin->pages[j] && !kernel) {
+			if (writeable)
+				SetPageDirty(pin->pages[j]);
+			put_page(pin->pages[j]);
+		}
+	}
+
+	scif_free(pin->pages,
+		  pin->nr_pages * sizeof(*pin->pages));
+	scif_free(pin, sizeof(*pin));
+	return 0;
+}
+
+/*
+ * scif_create_window:
+ * @ep: end point
+ * @nr_pages: number of pages
+ * @offset: registration offset
+ * @temp: true if a temporary window is being created
+ *
+ * Allocate and prepare a self registration window.
+ */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+				       s64 offset, bool temp)
+{
+	struct scif_window *window;
+
+	might_sleep();
+	window = scif_zalloc(sizeof(*window));
+	if (!window)
+		goto error;
+
+	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+	if (!window->dma_addr)
+		goto error_free_window;
+
+	window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
+	if (!window->num_pages)
+		goto error_free_window;
+
+	window->offset = offset;
+	window->ep = (u64)ep;
+	window->magic = SCIFEP_MAGIC;
+	window->reg_state = OP_IDLE;
+	init_waitqueue_head(&window->regwq);
+	window->unreg_state = OP_IDLE;
+	init_waitqueue_head(&window->unregwq);
+	INIT_LIST_HEAD(&window->list);
+	window->type = SCIF_WINDOW_SELF;
+	window->temp = temp;
+	return window;
+
+error_free_window:
+	scif_free(window->dma_addr,
+		  nr_pages * sizeof(*window->dma_addr));
+	scif_free(window, sizeof(*window));
+error:
+	return NULL;
+}
+
+/**
+ * scif_destroy_incomplete_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+static void scif_destroy_incomplete_window(struct scif_endpt *ep,
+					   struct scif_window *window)
+{
+	int err;
+	int nr_pages = window->nr_pages;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+	struct scifmsg msg;
+
+retry:
+	/* Wait for a SCIF_ALLOC_GNT/REJ message */
+	err = wait_event_timeout(alloc->allocwq,
+				 alloc->state != OP_IN_PROGRESS,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (alloc->state == OP_COMPLETED) {
+		msg.uop = SCIF_FREE_VIRT;
+		msg.src = ep->port;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = window->alloc_handle.vaddr;
+		msg.payload[2] = (u64)window;
+		msg.payload[3] = SCIF_REGISTER;
+		_scif_nodeqp_send(ep->remote_dev, &msg);
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	scif_free_window_offset(ep, window, window->offset);
+	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+	scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_unmap_window:
+ * @remote_dev: SCIF remote device
+ * @window: registration window
+ *
+ * Delete any DMA mappings created for a registered self window
+ */
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+	int j;
+
+	if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
+		if (window->st) {
+			dma_unmap_sg(&remote_dev->sdev->dev,
+				     window->st->sgl, window->st->nents,
+				     DMA_BIDIRECTIONAL);
+			sg_free_table(window->st);
+			kfree(window->st);
+			window->st = NULL;
+		}
+	} else {
+		for (j = 0; j < window->nr_contig_chunks; j++) {
+			if (window->dma_addr[j]) {
+				scif_unmap_single(window->dma_addr[j],
+						  remote_dev,
+						  window->num_pages[j] <<
+						  PAGE_SHIFT);
+				window->dma_addr[j] = 0x0;
+			}
+		}
+	}
+}
+
+static inline struct mm_struct *__scif_acquire_mm(void)
+{
+	if (scif_ulimit_check)
+		return get_task_mm(current);
+	return NULL;
+}
+
+static inline void __scif_release_mm(struct mm_struct *mm)
+{
+	if (mm)
+		mmput(mm);
+}
+
+static inline int
+__scif_dec_pinned_vm_lock(struct mm_struct *mm,
+			  int nr_pages, bool try_lock)
+{
+	if (!mm || !nr_pages || !scif_ulimit_check)
+		return 0;
+	if (try_lock) {
+		if (!down_write_trylock(&mm->mmap_sem)) {
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err\n", __func__, __LINE__);
+			return -1;
+		}
+	} else {
+		down_write(&mm->mmap_sem);
+	}
+	mm->pinned_vm -= nr_pages;
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+
+static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
+					     int nr_pages)
+{
+	unsigned long locked, lock_limit;
+
+	if (!mm || !nr_pages || !scif_ulimit_check)
+		return 0;
+
+	locked = nr_pages;
+	locked += mm->pinned_vm;
+	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+		dev_err(scif_info.mdev.this_device,
+			"locked(%lu) > lock_limit(%lu)\n",
+			locked, lock_limit);
+		return -ENOMEM;
+	}
+	mm->pinned_vm = locked;
+	return 0;
+}
+
+/**
+ * scif_destroy_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
+{
+	int j;
+	struct scif_pinned_pages *pinned_pages = window->pinned_pages;
+	int nr_pages = window->nr_pages;
+
+	might_sleep();
+	if (!window->temp && window->mm) {
+		__scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
+		__scif_release_mm(window->mm);
+		window->mm = NULL;
+	}
+
+	scif_free_window_offset(ep, window, window->offset);
+	scif_unmap_window(ep->remote_dev, window);
+	/*
+	 * Decrement references for this set of pinned pages from
+	 * this window.
+	 */
+	j = atomic_sub_return(1, &pinned_pages->ref_count);
+	if (j < 0)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d incorrect ref count %d\n",
+			__func__, __LINE__, j);
+	/*
+	 * If the ref count for pinned_pages is zero then someone
+	 * has already called scif_unpin_pages() for it and we should
+	 * destroy the page cache.
+	 */
+	if (!j)
+		scif_destroy_pinned_pages(window->pinned_pages);
+	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+	window->magic = 0;
+	scif_free(window, sizeof(*window));
+	return 0;
+}
+
+/**
+ * scif_create_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Allocate and prepare lookup entries for the remote
+ * end to copy over the physical addresses.
+ * Returns 0 on success and appropriate errno on failure.
+ */
+static int scif_create_remote_lookup(struct scif_dev *remote_dev,
+				     struct scif_window *window)
+{
+	int i, j, err = 0;
+	int nr_pages = window->nr_pages;
+	bool vmalloc_dma_phys, vmalloc_num_pages;
+
+	might_sleep();
+	/* Map window */
+	err = scif_map_single(&window->mapped_offset,
+			      window, remote_dev, sizeof(*window));
+	if (err)
+		goto error_window;
+
+	/* Compute the number of lookup entries. 21 == 2MB Shift */
+	window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
+					((2) * 1024 * 1024)) >> 21;
+
+	window->dma_addr_lookup.lookup =
+		scif_alloc_coherent(&window->dma_addr_lookup.offset,
+				    remote_dev, window->nr_lookup *
+				    sizeof(*window->dma_addr_lookup.lookup),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!window->dma_addr_lookup.lookup) {
+		err = -ENOMEM;
+		goto error_window;
+	}
+
+	window->num_pages_lookup.lookup =
+		scif_alloc_coherent(&window->num_pages_lookup.offset,
+				    remote_dev, window->nr_lookup *
+				    sizeof(*window->num_pages_lookup.lookup),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!window->num_pages_lookup.lookup) {
+		err = -ENOMEM;
+		goto error_window;
+	}
+
+	vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
+	vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
+
+	/* Now map each of the pages containing physical addresses */
+	for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
+		err = scif_map_page(&window->dma_addr_lookup.lookup[j],
+				    vmalloc_dma_phys ?
+				    vmalloc_to_page(&window->dma_addr[i]) :
+				    virt_to_page(&window->dma_addr[i]),
+				    remote_dev);
+		if (err)
+			goto error_window;
+		err = scif_map_page(&window->num_pages_lookup.lookup[j],
+				    vmalloc_num_pages ?
+				    vmalloc_to_page(&window->num_pages[i]) :
+				    virt_to_page(&window->num_pages[i]),
+				    remote_dev);
+		if (err)
+			goto error_window;
+	}
+	return 0;
+error_window:
+	return err;
+}
+
+/**
+ * scif_destroy_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Destroy lookup entries used for the remote
+ * end to copy over the physical addresses.
+ */
+static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
+				       struct scif_window *window)
+{
+	int i, j;
+
+	if (window->nr_lookup) {
+		struct scif_rma_lookup *lup = &window->dma_addr_lookup;
+		struct scif_rma_lookup *npup = &window->num_pages_lookup;
+
+		for (i = 0, j = 0; i < window->nr_pages;
+			i += SCIF_NR_ADDR_IN_PAGE, j++) {
+			if (lup->lookup && lup->lookup[j])
+				scif_unmap_single(lup->lookup[j],
+						  remote_dev,
+						  PAGE_SIZE);
+			if (npup->lookup && npup->lookup[j])
+				scif_unmap_single(npup->lookup[j],
+						  remote_dev,
+						  PAGE_SIZE);
+		}
+		if (lup->lookup)
+			scif_free_coherent(lup->lookup, lup->offset,
+					   remote_dev, window->nr_lookup *
+					   sizeof(*lup->lookup));
+		if (npup->lookup)
+			scif_free_coherent(npup->lookup, npup->offset,
+					   remote_dev, window->nr_lookup *
+					   sizeof(*npup->lookup));
+		if (window->mapped_offset)
+			scif_unmap_single(window->mapped_offset,
+					  remote_dev, sizeof(*window));
+		window->nr_lookup = 0;
+	}
+}
+
+/**
+ * scif_create_remote_window:
+ * @ep: end point
+ * @nr_pages: number of pages in window
+ *
+ * Allocate and prepare a remote registration window.
+ */
+static struct scif_window *
+scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
+{
+	struct scif_window *window;
+
+	might_sleep();
+	window = scif_zalloc(sizeof(*window));
+	if (!window)
+		goto error_ret;
+
+	window->magic = SCIFEP_MAGIC;
+	window->nr_pages = nr_pages;
+
+	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+	if (!window->dma_addr)
+		goto error_window;
+
+	window->num_pages = scif_zalloc(nr_pages *
+					sizeof(*window->num_pages));
+	if (!window->num_pages)
+		goto error_window;
+
+	if (scif_create_remote_lookup(scifdev, window))
+		goto error_window;
+
+	window->type = SCIF_WINDOW_PEER;
+	window->unreg_state = OP_IDLE;
+	INIT_LIST_HEAD(&window->list);
+	return window;
+error_window:
+	scif_destroy_remote_window(window);
+error_ret:
+	return NULL;
+}
+
+/**
+ * scif_destroy_remote_window:
+ * @ep: end point
+ * @window: remote registration window
+ *
+ * Deallocate resources for remote window.
+ */
+void
+scif_destroy_remote_window(struct scif_window *window)
+{
+	scif_free(window->dma_addr, window->nr_pages *
+		  sizeof(*window->dma_addr));
+	scif_free(window->num_pages, window->nr_pages *
+		  sizeof(*window->num_pages));
+	window->magic = 0;
+	scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_iommu_map: create DMA mappings if the IOMMU is enabled
+ * @remote_dev: SCIF remote device
+ * @window: remote registration window
+ *
+ * Map the physical pages using dma_map_sg(..) and then detect the number
+ * of contiguous DMA mappings allocated
+ */
+static int scif_iommu_map(struct scif_dev *remote_dev,
+			  struct scif_window *window)
+{
+	struct scatterlist *sg;
+	int i, err;
+	scif_pinned_pages_t pin = window->pinned_pages;
+
+	window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
+	if (!window->st)
+		return -ENOMEM;
+
+	err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
+	if (err)
+		return err;
+
+	for_each_sg(window->st->sgl, sg, window->st->nents, i)
+		sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
+
+	err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
+			 window->st->nents, DMA_BIDIRECTIONAL);
+	if (!err)
+		return -ENOMEM;
+	/* Detect contiguous ranges of DMA mappings */
+	sg = window->st->sgl;
+	for (i = 0; sg; i++) {
+		dma_addr_t last_da;
+
+		window->dma_addr[i] = sg_dma_address(sg);
+		window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
+		last_da = sg_dma_address(sg) + sg_dma_len(sg);
+		while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
+			window->num_pages[i] +=
+				(sg_dma_len(sg) >> PAGE_SHIFT);
+			last_da = window->dma_addr[i] +
+				sg_dma_len(sg);
+		}
+		window->nr_contig_chunks++;
+	}
+	return 0;
+}
+
+/**
+ * scif_map_window:
+ * @remote_dev: SCIF remote device
+ * @window: self registration window
+ *
+ * Map pages of a window into the aperture/PCI.
+ * Also determine addresses required for DMA.
+ */
+int
+scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+	int i, j, k, err = 0, nr_contig_pages;
+	scif_pinned_pages_t pin;
+	phys_addr_t phys_prev, phys_curr;
+
+	might_sleep();
+
+	pin = window->pinned_pages;
+
+	if (intel_iommu_enabled && !scifdev_self(remote_dev))
+		return scif_iommu_map(remote_dev, window);
+
+	for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
+		phys_prev = page_to_phys(pin->pages[i]);
+		nr_contig_pages = 1;
+
+		/* Detect physically contiguous chunks */
+		for (k = i + 1; k < window->nr_pages; k++) {
+			phys_curr = page_to_phys(pin->pages[k]);
+			if (phys_curr != (phys_prev + PAGE_SIZE))
+				break;
+			phys_prev = phys_curr;
+			nr_contig_pages++;
+		}
+		window->num_pages[j] = nr_contig_pages;
+		window->nr_contig_chunks++;
+		if (scif_is_mgmt_node()) {
+			/*
+			 * Management node has to deal with SMPT on X100 and
+			 * hence the DMA mapping is required
+			 */
+			err = scif_map_single(&window->dma_addr[j],
+					      phys_to_virt(page_to_phys(
+							   pin->pages[i])),
+					      remote_dev,
+					      nr_contig_pages << PAGE_SHIFT);
+			if (err)
+				return err;
+		} else {
+			window->dma_addr[j] = page_to_phys(pin->pages[i]);
+		}
+	}
+	return err;
+}
+
+/**
+ * scif_send_scif_unregister:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_UNREGISTER message.
+ */
+static int scif_send_scif_unregister(struct scif_endpt *ep,
+				     struct scif_window *window)
+{
+	struct scifmsg msg;
+
+	msg.uop = SCIF_UNREGISTER;
+	msg.src = ep->port;
+	msg.payload[0] = window->alloc_handle.vaddr;
+	msg.payload[1] = (u64)window;
+	return scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_unregister_window:
+ * @window: self registration window
+ *
+ * Send an unregistration request and wait for a response.
+ */
+int scif_unregister_window(struct scif_window *window)
+{
+	int err = 0;
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+	bool send_msg = false;
+
+	might_sleep();
+	switch (window->unreg_state) {
+	case OP_IDLE:
+	{
+		window->unreg_state = OP_IN_PROGRESS;
+		send_msg = true;
+		/* fall through */
+	}
+	case OP_IN_PROGRESS:
+	{
+		scif_get_window(window, 1);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		if (send_msg) {
+			err = scif_send_scif_unregister(ep, window);
+			if (err) {
+				window->unreg_state = OP_COMPLETED;
+				goto done;
+			}
+		} else {
+			/* Return ENXIO since unregistration is in progress */
+			mutex_lock(&ep->rma_info.rma_lock);
+			return -ENXIO;
+		}
+retry:
+		/* Wait for a SCIF_UNREGISTER_(N)ACK message */
+		err = wait_event_timeout(window->unregwq,
+					 window->unreg_state != OP_IN_PROGRESS,
+					 SCIF_NODE_ALIVE_TIMEOUT);
+		if (!err && scifdev_alive(ep))
+			goto retry;
+		if (!err) {
+			err = -ENODEV;
+			window->unreg_state = OP_COMPLETED;
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err %d\n", __func__, __LINE__, err);
+		}
+		if (err > 0)
+			err = 0;
+done:
+		mutex_lock(&ep->rma_info.rma_lock);
+		scif_put_window(window, 1);
+		break;
+	}
+	case OP_FAILED:
+	{
+		if (!scifdev_alive(ep)) {
+			err = -ENODEV;
+			window->unreg_state = OP_COMPLETED;
+		}
+		break;
+	}
+	case OP_COMPLETED:
+		break;
+	default:
+		err = -ENODEV;
+	}
+
+	if (window->unreg_state == OP_COMPLETED && window->ref_count)
+		scif_put_window(window, window->nr_pages);
+
+	if (!window->ref_count) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		list_del_init(&window->list);
+		scif_free_window_offset(ep, window, window->offset);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
+		    scifdev_alive(ep)) {
+			scif_drain_dma_intr(ep->remote_dev->sdev,
+					    ep->rma_info.dma_chan);
+		} else {
+			if (!__scif_dec_pinned_vm_lock(window->mm,
+						       window->nr_pages, 1)) {
+				__scif_release_mm(window->mm);
+				window->mm = NULL;
+			}
+		}
+		scif_queue_for_cleanup(window, &scif_info.rma);
+		mutex_lock(&ep->rma_info.rma_lock);
+	}
+	return err;
+}
+
+/**
+ * scif_send_alloc_request:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request
+ */
+static int scif_send_alloc_request(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	struct scifmsg msg;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+
+	/* Set up the Alloc Handle */
+	alloc->state = OP_IN_PROGRESS;
+	init_waitqueue_head(&alloc->allocwq);
+
+	/* Send out an allocation request */
+	msg.uop = SCIF_ALLOC_REQ;
+	msg.payload[1] = window->nr_pages;
+	msg.payload[2] = (u64)&window->alloc_handle;
+	return _scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_prep_remote_window:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request, wait for an allocation response,
+ * and prepares the remote window by copying over the page lists
+ */
+static int scif_prep_remote_window(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	struct scifmsg msg;
+	struct scif_window *remote_window;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+	dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
+	int i = 0, j = 0;
+	int nr_contig_chunks, loop_nr_contig_chunks;
+	int remaining_nr_contig_chunks, nr_lookup;
+	int err, map_err;
+
+	map_err = scif_map_window(ep->remote_dev, window);
+	if (map_err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d map_err %d\n", __func__, __LINE__, map_err);
+	remaining_nr_contig_chunks = window->nr_contig_chunks;
+	nr_contig_chunks = window->nr_contig_chunks;
+retry:
+	/* Wait for a SCIF_ALLOC_GNT/REJ message */
+	err = wait_event_timeout(alloc->allocwq,
+				 alloc->state != OP_IN_PROGRESS,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Synchronize with the thread waking up allocwq */
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+
+	if (!err)
+		err = -ENODEV;
+
+	if (err > 0)
+		err = 0;
+	else
+		return err;
+
+	/* Bail out. The remote end rejected this request */
+	if (alloc->state == OP_FAILED)
+		return -ENOMEM;
+
+	if (map_err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, map_err);
+		msg.uop = SCIF_FREE_VIRT;
+		msg.src = ep->port;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = window->alloc_handle.vaddr;
+		msg.payload[2] = (u64)window;
+		msg.payload[3] = SCIF_REGISTER;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		return err;
+	}
+
+	remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
+				     ep->remote_dev);
+
+	/* Compute the number of lookup entries. 21 == 2MB Shift */
+	nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
+			  >> ilog2(SCIF_NR_ADDR_IN_PAGE);
+
+	dma_phys_lookup =
+		scif_ioremap(remote_window->dma_addr_lookup.offset,
+			     nr_lookup *
+			     sizeof(*remote_window->dma_addr_lookup.lookup),
+			     ep->remote_dev);
+	num_pages_lookup =
+		scif_ioremap(remote_window->num_pages_lookup.offset,
+			     nr_lookup *
+			     sizeof(*remote_window->num_pages_lookup.lookup),
+			     ep->remote_dev);
+
+	while (remaining_nr_contig_chunks) {
+		loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
+					      (int)SCIF_NR_ADDR_IN_PAGE);
+		/* #1/2 - Copy  physical addresses over to the remote side */
+
+		/* #2/2 - Copy DMA addresses (addresses that are fed into the
+		 * DMA engine) We transfer bus addresses which are then
+		 * converted into a MIC physical address on the remote
+		 * side if it is a MIC, if the remote node is a mgmt node we
+		 * transfer the MIC physical address
+		 */
+		tmp = scif_ioremap(dma_phys_lookup[j],
+				   loop_nr_contig_chunks *
+				   sizeof(*window->dma_addr),
+				   ep->remote_dev);
+		tmp1 = scif_ioremap(num_pages_lookup[j],
+				    loop_nr_contig_chunks *
+				    sizeof(*window->num_pages),
+				    ep->remote_dev);
+		if (scif_is_mgmt_node()) {
+			memcpy_toio((void __force __iomem *)tmp,
+				    &window->dma_addr[i], loop_nr_contig_chunks
+				    * sizeof(*window->dma_addr));
+			memcpy_toio((void __force __iomem *)tmp1,
+				    &window->num_pages[i], loop_nr_contig_chunks
+				    * sizeof(*window->num_pages));
+		} else {
+			if (scifdev_is_p2p(ep->remote_dev)) {
+				/*
+				 * add remote node's base address for this node
+				 * to convert it into a MIC address
+				 */
+				int m;
+				dma_addr_t dma_addr;
+
+				for (m = 0; m < loop_nr_contig_chunks; m++) {
+					dma_addr = window->dma_addr[i + m] +
+						ep->remote_dev->base_addr;
+					writeq(dma_addr,
+					       (void __force __iomem *)&tmp[m]);
+				}
+				memcpy_toio((void __force __iomem *)tmp1,
+					    &window->num_pages[i],
+					    loop_nr_contig_chunks
+					    * sizeof(*window->num_pages));
+			} else {
+				/* Mgmt node or loopback - transfer DMA
+				 * addresses as is, this is the same as a
+				 * MIC physical address (we use the dma_addr
+				 * and not the phys_addr array since the
+				 * phys_addr is only setup if there is a mmap()
+				 * request from the mgmt node)
+				 */
+				memcpy_toio((void __force __iomem *)tmp,
+					    &window->dma_addr[i],
+					    loop_nr_contig_chunks *
+					    sizeof(*window->dma_addr));
+				memcpy_toio((void __force __iomem *)tmp1,
+					    &window->num_pages[i],
+					    loop_nr_contig_chunks *
+					    sizeof(*window->num_pages));
+			}
+		}
+		remaining_nr_contig_chunks -= loop_nr_contig_chunks;
+		i += loop_nr_contig_chunks;
+		j++;
+		scif_iounmap(tmp, loop_nr_contig_chunks *
+			     sizeof(*window->dma_addr), ep->remote_dev);
+		scif_iounmap(tmp1, loop_nr_contig_chunks *
+			     sizeof(*window->num_pages), ep->remote_dev);
+	}
+
+	/* Prepare the remote window for the peer */
+	remote_window->peer_window = (u64)window;
+	remote_window->offset = window->offset;
+	remote_window->prot = window->prot;
+	remote_window->nr_contig_chunks = nr_contig_chunks;
+	remote_window->ep = ep->remote_ep;
+	scif_iounmap(num_pages_lookup,
+		     nr_lookup *
+		     sizeof(*remote_window->num_pages_lookup.lookup),
+		     ep->remote_dev);
+	scif_iounmap(dma_phys_lookup,
+		     nr_lookup *
+		     sizeof(*remote_window->dma_addr_lookup.lookup),
+		     ep->remote_dev);
+	scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
+	window->peer_window = alloc->vaddr;
+	return err;
+}
+
+/**
+ * scif_send_scif_register:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_REGISTER message if EP is connected and wait for a
+ * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
+ * message so that the peer can free its remote window allocated earlier.
+ */
+static int scif_send_scif_register(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	int err = 0;
+	struct scifmsg msg;
+
+	msg.src = ep->port;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = window->alloc_handle.vaddr;
+	msg.payload[2] = (u64)window;
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED) {
+		msg.uop = SCIF_REGISTER;
+		window->reg_state = OP_IN_PROGRESS;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		spin_unlock(&ep->lock);
+		if (!err) {
+retry:
+			/* Wait for a SCIF_REGISTER_(N)ACK message */
+			err = wait_event_timeout(window->regwq,
+						 window->reg_state !=
+						 OP_IN_PROGRESS,
+						 SCIF_NODE_ALIVE_TIMEOUT);
+			if (!err && scifdev_alive(ep))
+				goto retry;
+			err = !err ? -ENODEV : 0;
+			if (window->reg_state == OP_FAILED)
+				err = -ENOTCONN;
+		}
+	} else {
+		msg.uop = SCIF_FREE_VIRT;
+		msg.payload[3] = SCIF_REGISTER;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		spin_unlock(&ep->lock);
+		if (!err)
+			err = -ENOTCONN;
+	}
+	return err;
+}
+
+/**
+ * scif_get_window_offset:
+ * @ep: end point descriptor
+ * @flags: flags
+ * @offset: offset hint
+ * @num_pages: number of pages
+ * @out_offset: computed offset returned by reference.
+ *
+ * Compute/Claim a new offset for this EP.
+ */
+int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
+			   int num_pages, s64 *out_offset)
+{
+	s64 page_index;
+	struct iova *iova_ptr;
+	int err = 0;
+
+	if (flags & SCIF_MAP_FIXED) {
+		page_index = SCIF_IOVA_PFN(offset);
+		iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
+					page_index + num_pages - 1);
+		if (!iova_ptr)
+			err = -EADDRINUSE;
+	} else {
+		iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
+				      SCIF_DMA_63BIT_PFN - 1, 0);
+		if (!iova_ptr)
+			err = -ENOMEM;
+	}
+	if (!err)
+		*out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
+	return err;
+}
+
+/**
+ * scif_free_window_offset:
+ * @ep: end point descriptor
+ * @window: registration window
+ * @offset: Offset to be freed
+ *
+ * Free offset for this EP. The callee is supposed to grab
+ * the RMA mutex before calling this API.
+ */
+void scif_free_window_offset(struct scif_endpt *ep,
+			     struct scif_window *window, s64 offset)
+{
+	if ((window && !window->offset_freed) || !window) {
+		free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
+		if (window)
+			window->offset_freed = true;
+	}
+}
+
+/**
+ * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is requesting a memory allocation.
+ */
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	int err;
+	struct scif_window *window = NULL;
+	int nr_pages = msg->payload[1];
+
+	window = scif_create_remote_window(scifdev, nr_pages);
+	if (!window) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* The peer's allocation request is granted */
+	msg->uop = SCIF_ALLOC_GNT;
+	msg->payload[0] = (u64)window;
+	msg->payload[1] = window->mapped_offset;
+	err = scif_nodeqp_send(scifdev, msg);
+	if (err)
+		scif_destroy_remote_window(window);
+	return;
+error:
+	/* The peer's allocation request is rejected */
+	dev_err(&scifdev->sdev->dev,
+		"%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
+		__func__, __LINE__, err, window, nr_pages);
+	msg->uop = SCIF_ALLOC_REJ;
+	scif_nodeqp_send(scifdev, msg);
+}
+
+/**
+ * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side responded to a memory allocation.
+ */
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
+	struct scif_window *window = container_of(handle, struct scif_window,
+						  alloc_handle);
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	handle->vaddr = msg->payload[0];
+	handle->phys_addr = msg->payload[1];
+	if (msg->uop == SCIF_ALLOC_GNT)
+		handle->state = OP_COMPLETED;
+	else
+		handle->state = OP_FAILED;
+	wake_up(&handle->allocwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
+ * @msg:        Interrupt message
+ *
+ * Free up memory kmalloc'd earlier.
+ */
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window = (struct scif_window *)msg->payload[1];
+
+	scif_destroy_remote_window(window);
+}
+
+static void
+scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
+{
+	int j;
+	struct scif_hw_dev *sdev = dev->sdev;
+	phys_addr_t apt_base = 0;
+
+	/*
+	 * Add the aperture base if the DMA address is not card relative
+	 * since the DMA addresses need to be an offset into the bar
+	 */
+	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+	    sdev->aper && !sdev->card_rel_da)
+		apt_base = sdev->aper->pa;
+	else
+		return;
+
+	for (j = 0; j < window->nr_contig_chunks; j++) {
+		if (window->num_pages[j])
+			window->dma_addr[j] += apt_base;
+		else
+			break;
+	}
+}
+
+/**
+ * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
+ * @msg:        Interrupt message
+ *
+ * Update remote window list with a new registered window.
+ */
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED) {
+		msg->uop = SCIF_REGISTER_ACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+		scif_fixup_aper_base(ep->remote_dev, window);
+		/* No further failures expected. Insert new window */
+		scif_insert_window(window, &ep->rma_info.remote_reg_list);
+	} else {
+		msg->uop = SCIF_REGISTER_NACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+	}
+	spin_unlock(&ep->lock);
+	mutex_unlock(&ep->rma_info.rma_lock);
+	/* free up any lookup resources now that page lists are transferred */
+	scif_destroy_remote_lookup(ep->remote_dev, window);
+	/*
+	 * We could not insert the window but we need to
+	 * destroy the window.
+	 */
+	if (msg->uop == SCIF_REGISTER_NACK)
+		scif_destroy_remote_window(window);
+}
+
+/**
+ * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remove window from remote registration list;
+ */
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_window *recv_window =
+		(struct scif_window *)msg->payload[0];
+	struct scif_endpt *ep;
+	int del_window = 0;
+
+	ep = (struct scif_endpt *)recv_window->ep;
+	req.out_window = &window;
+	req.offset = recv_window->offset;
+	req.prot = 0;
+	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.remote_reg_list;
+	msg->payload[0] = ep->remote_ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if (scif_query_window(&req)) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d -ENXIO\n", __func__, __LINE__);
+		msg->uop = SCIF_UNREGISTER_ACK;
+		goto error;
+	}
+	if (window) {
+		if (window->ref_count)
+			scif_put_window(window, window->nr_pages);
+		else
+			dev_err(&scifdev->sdev->dev,
+				"%s %d ref count should be +ve\n",
+				__func__, __LINE__);
+		window->unreg_state = OP_COMPLETED;
+		if (!window->ref_count) {
+			msg->uop = SCIF_UNREGISTER_ACK;
+			atomic_inc(&ep->rma_info.tw_refcount);
+			ep->rma_info.async_list_del = 1;
+			list_del_init(&window->list);
+			del_window = 1;
+		} else {
+			/* NACK! There are valid references to this window */
+			msg->uop = SCIF_UNREGISTER_NACK;
+		}
+	} else {
+		/* The window did not make its way to the list at all. ACK */
+		msg->uop = SCIF_UNREGISTER_ACK;
+		scif_destroy_remote_window(recv_window);
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (del_window)
+		scif_drain_dma_intr(ep->remote_dev->sdev,
+				    ep->rma_info.dma_chan);
+	scif_nodeqp_send(ep->remote_dev, msg);
+	if (del_window)
+		scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/**
+ * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to complete registration.
+ */
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[2];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->reg_state = OP_COMPLETED;
+	wake_up(&window->regwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to inform it that registration
+ * cannot be completed.
+ */
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[2];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->reg_state = OP_FAILED;
+	wake_up(&window->regwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to complete unregistration.
+ */
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->unreg_state = OP_COMPLETED;
+	wake_up(&window->unregwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to inform it that unregistration
+ * cannot be completed immediately.
+ */
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->unreg_state = OP_FAILED;
+	wake_up(&window->unregwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+		     int map_flags, scif_pinned_pages_t *pages)
+{
+	struct scif_pinned_pages *pinned_pages;
+	int nr_pages, err = 0, i;
+	bool vmalloc_addr = false;
+	bool try_upgrade = false;
+	int prot = *out_prot;
+	int ulimit = 0;
+	struct mm_struct *mm = NULL;
+
+	/* Unsupported flags */
+	if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
+		return -EINVAL;
+	ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
+
+	/* Unsupported protection requested */
+	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+		return -EINVAL;
+
+	/* addr/len must be page aligned. len should be non zero */
+	if (!len ||
+	    (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+		return -EINVAL;
+
+	might_sleep();
+
+	nr_pages = len >> PAGE_SHIFT;
+
+	/* Allocate a set of pinned pages */
+	pinned_pages = scif_create_pinned_pages(nr_pages, prot);
+	if (!pinned_pages)
+		return -ENOMEM;
+
+	if (map_flags & SCIF_MAP_KERNEL) {
+		if (is_vmalloc_addr(addr))
+			vmalloc_addr = true;
+
+		for (i = 0; i < nr_pages; i++) {
+			if (vmalloc_addr)
+				pinned_pages->pages[i] =
+					vmalloc_to_page(addr + (i * PAGE_SIZE));
+			else
+				pinned_pages->pages[i] =
+					virt_to_page(addr + (i * PAGE_SIZE));
+		}
+		pinned_pages->nr_pages = nr_pages;
+		pinned_pages->map_flags = SCIF_MAP_KERNEL;
+	} else {
+		/*
+		 * SCIF supports registration caching. If a registration has
+		 * been requested with read only permissions, then we try
+		 * to pin the pages with RW permissions so that a subsequent
+		 * transfer with RW permission can hit the cache instead of
+		 * invalidating it. If the upgrade fails with RW then we
+		 * revert back to R permission and retry
+		 */
+		if (prot == SCIF_PROT_READ)
+			try_upgrade = true;
+		prot |= SCIF_PROT_WRITE;
+retry:
+		mm = current->mm;
+		down_write(&mm->mmap_sem);
+		if (ulimit) {
+			err = __scif_check_inc_pinned_vm(mm, nr_pages);
+			if (err) {
+				up_write(&mm->mmap_sem);
+				pinned_pages->nr_pages = 0;
+				goto error_unmap;
+			}
+		}
+
+		pinned_pages->nr_pages = get_user_pages(
+				(u64)addr,
+				nr_pages,
+				(prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
+				pinned_pages->pages,
+				NULL);
+		up_write(&mm->mmap_sem);
+		if (nr_pages != pinned_pages->nr_pages) {
+			if (pinned_pages->nr_pages < 0)
+				pinned_pages->nr_pages = 0;
+			if (try_upgrade) {
+				if (ulimit)
+					__scif_dec_pinned_vm_lock(mm,
+								  nr_pages, 0);
+				/* Roll back any pinned pages */
+				for (i = 0; i < pinned_pages->nr_pages; i++) {
+					if (pinned_pages->pages[i])
+						put_page(
+						pinned_pages->pages[i]);
+				}
+				prot &= ~SCIF_PROT_WRITE;
+				try_upgrade = false;
+				goto retry;
+			}
+		}
+		pinned_pages->map_flags = 0;
+	}
+
+	if (pinned_pages->nr_pages < nr_pages) {
+		err = -EFAULT;
+		goto dec_pinned;
+	}
+
+	*out_prot = prot;
+	atomic_set(&pinned_pages->ref_count, 1);
+	*pages = pinned_pages;
+	return err;
+dec_pinned:
+	if (ulimit)
+		__scif_dec_pinned_vm_lock(mm, nr_pages, 0);
+	/* Something went wrong! Rollback */
+error_unmap:
+	scif_destroy_pinned_pages(pinned_pages);
+	*pages = NULL;
+	dev_dbg(scif_info.mdev.this_device,
+		"%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
+	return err;
+}
+
+int scif_pin_pages(void *addr, size_t len, int prot,
+		   int map_flags, scif_pinned_pages_t *pages)
+{
+	return __scif_pin_pages(addr, len, &prot, map_flags, pages);
+}
+EXPORT_SYMBOL_GPL(scif_pin_pages);
+
+int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
+{
+	int err = 0, ret;
+
+	if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
+		return -EINVAL;
+
+	ret = atomic_sub_return(1, &pinned_pages->ref_count);
+	if (ret < 0) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d scif_unpin_pages called without pinning? rc %d\n",
+			__func__, __LINE__, ret);
+		return -EINVAL;
+	}
+	/*
+	 * Destroy the window if the ref count for this set of pinned
+	 * pages has dropped to zero. If it is positive then there is
+	 * a valid registered window which is backed by these pages and
+	 * it will be destroyed once all such windows are unregistered.
+	 */
+	if (!ret)
+		err = scif_destroy_pinned_pages(pinned_pages);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_unpin_pages);
+
+static inline void
+scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
+{
+	mutex_lock(&ep->rma_info.rma_lock);
+	scif_insert_window(window, &ep->rma_info.reg_list);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+off_t scif_register_pinned_pages(scif_epd_t epd,
+				 scif_pinned_pages_t pinned_pages,
+				 off_t offset, int map_flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 computed_offset;
+	struct scif_window *window;
+	int err;
+	size_t len;
+	struct device *spdev;
+
+	/* Unsupported flags */
+	if (map_flags & ~SCIF_MAP_FIXED)
+		return -EINVAL;
+
+	len = pinned_pages->nr_pages << PAGE_SHIFT;
+
+	/*
+	 * Offset is not page aligned/negative or offset+len
+	 * wraps around with SCIF_MAP_FIXED.
+	 */
+	if ((map_flags & SCIF_MAP_FIXED) &&
+	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset < 0) ||
+	    (len > LONG_MAX - offset)))
+		return -EINVAL;
+
+	might_sleep();
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+	/*
+	 * It is an error to pass pinned_pages to scif_register_pinned_pages()
+	 * after calling scif_unpin_pages().
+	 */
+	if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
+		return -EINVAL;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, map_flags, offset,
+				     len, &computed_offset);
+	if (err) {
+		atomic_sub(1, &pinned_pages->ref_count);
+		return err;
+	}
+
+	/* Allocate and prepare self registration window */
+	window = scif_create_window(ep, pinned_pages->nr_pages,
+				    computed_offset, false);
+	if (!window) {
+		atomic_sub(1, &pinned_pages->ref_count);
+		scif_free_window_offset(ep, NULL, computed_offset);
+		return -ENOMEM;
+	}
+
+	window->pinned_pages = pinned_pages;
+	window->nr_pages = pinned_pages->nr_pages;
+	window->prot = pinned_pages->prot;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		scif_destroy_window(ep, window);
+		return err;
+	}
+	err = scif_send_alloc_request(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Prepare the remote registration window */
+	err = scif_prep_remote_window(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Tell the peer about the new window */
+	err = scif_send_scif_register(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	scif_put_peer_dev(spdev);
+	/* No further failures expected. Insert new window */
+	scif_insert_local_window(window, ep);
+	return computed_offset;
+error_unmap:
+	scif_destroy_window(ep, window);
+	scif_put_peer_dev(spdev);
+	dev_err(&ep->remote_dev->sdev->dev,
+		"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
+
+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+		    int prot, int map_flags)
+{
+	scif_pinned_pages_t pinned_pages;
+	off_t err;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 computed_offset;
+	struct scif_window *window;
+	struct mm_struct *mm = NULL;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
+		epd, addr, len, offset, prot, map_flags);
+	/* Unsupported flags */
+	if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
+		return -EINVAL;
+
+	/*
+	 * Offset is not page aligned/negative or offset+len
+	 * wraps around with SCIF_MAP_FIXED.
+	 */
+	if ((map_flags & SCIF_MAP_FIXED) &&
+	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset < 0) ||
+	    (len > LONG_MAX - offset)))
+		return -EINVAL;
+
+	/* Unsupported protection requested */
+	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+		return -EINVAL;
+
+	/* addr/len must be page aligned. len should be non zero */
+	if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+	    (ALIGN(len, PAGE_SIZE) != len))
+		return -EINVAL;
+
+	might_sleep();
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, map_flags, offset,
+				     len >> PAGE_SHIFT, &computed_offset);
+	if (err)
+		return err;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		scif_free_window_offset(ep, NULL, computed_offset);
+		return err;
+	}
+	/* Allocate and prepare self registration window */
+	window = scif_create_window(ep, len >> PAGE_SHIFT,
+				    computed_offset, false);
+	if (!window) {
+		scif_free_window_offset(ep, NULL, computed_offset);
+		scif_put_peer_dev(spdev);
+		return -ENOMEM;
+	}
+
+	window->nr_pages = len >> PAGE_SHIFT;
+
+	err = scif_send_alloc_request(ep, window);
+	if (err) {
+		scif_destroy_incomplete_window(ep, window);
+		scif_put_peer_dev(spdev);
+		return err;
+	}
+
+	if (!(map_flags & SCIF_MAP_KERNEL)) {
+		mm = __scif_acquire_mm();
+		map_flags |= SCIF_MAP_ULIMIT;
+	}
+	/* Pin down the pages */
+	err = __scif_pin_pages(addr, len, &prot,
+			       map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
+			       &pinned_pages);
+	if (err) {
+		scif_destroy_incomplete_window(ep, window);
+		__scif_release_mm(mm);
+		goto error;
+	}
+
+	window->pinned_pages = pinned_pages;
+	window->prot = pinned_pages->prot;
+	window->mm = mm;
+
+	/* Prepare the remote registration window */
+	err = scif_prep_remote_window(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %ld\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Tell the peer about the new window */
+	err = scif_send_scif_register(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %ld\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	scif_put_peer_dev(spdev);
+	/* No further failures expected. Insert new window */
+	scif_insert_local_window(window, ep);
+	dev_dbg(&ep->remote_dev->sdev->dev,
+		"SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
+		epd, addr, len, computed_offset);
+	return computed_offset;
+error_unmap:
+	scif_destroy_window(ep, window);
+error:
+	scif_put_peer_dev(spdev);
+	dev_err(&ep->remote_dev->sdev->dev,
+		"%s %d err %ld\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_register);
+
+int
+scif_unregister(scif_epd_t epd, off_t offset, size_t len)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_window *window = NULL;
+	struct scif_rma_req req;
+	int nr_pages, err;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
+		ep, offset, len);
+	/* len must be page aligned. len should be non zero */
+	if (!len ||
+	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+		return -EINVAL;
+
+	/* Offset is not page aligned or offset+len wraps around */
+	if ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset < 0) ||
+	    (len > LONG_MAX - offset))
+		return -EINVAL;
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	might_sleep();
+	nr_pages = len >> PAGE_SHIFT;
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.prot = 0;
+	req.nr_bytes = len;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.reg_list;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+	/* Unregister all the windows in this range */
+	err = scif_rma_list_unregister(window, offset, nr_pages);
+	if (err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	scif_put_peer_dev(spdev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_unregister);
diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h
new file mode 100644
index 000000000..fa6722279
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.h
@@ -0,0 +1,464 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_H
+#define SCIF_RMA_H
+
+#include <linux/dma_remapping.h>
+#include <linux/mmu_notifier.h>
+
+#include "../bus/scif_bus.h"
+
+/* If this bit is set then the mark is a remote fence mark */
+#define SCIF_REMOTE_FENCE_BIT          31
+/* Magic value used to indicate a remote fence request */
+#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT)
+
+#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL)
+#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \
+				      (L1_CACHE_BYTES << 1))
+
+#define SCIF_IOVA_START_PFN		(1)
+#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64))
+#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63))
+
+/*
+ * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information
+ *
+ * @reg_list: List of registration windows for self
+ * @remote_reg_list: List of registration windows for peer
+ * @iovad: Offset generator
+ * @rma_lock: Synchronizes access to self/remote list and also protects the
+ *	      window from being destroyed while RMAs are in progress.
+ * @tc_lock: Synchronizes access to temporary cached windows list
+ *	     for SCIF Registration Caching.
+ * @mmn_lock: Synchronizes access to the list of MMU notifiers registered
+ * @tw_refcount: Keeps track of number of outstanding temporary registered
+ *		 windows created by scif_vreadfrom/scif_vwriteto which have
+ *		 not been destroyed.
+ * @tcw_refcount: Same as tw_refcount but for temporary cached windows
+ * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned
+ * @mmn_list: MMU notifier so that we can destroy the windows when required
+ * @fence_refcount: Keeps track of number of outstanding remote fence
+ *		    requests which have been received by the peer.
+ * @dma_chan: DMA channel used for all DMA transfers for this endpoint.
+ * @async_list_del: Detect asynchronous list entry deletion
+ * @vma_list: List of vmas with remote memory mappings
+ * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait
+*/
+struct scif_endpt_rma_info {
+	struct list_head reg_list;
+	struct list_head remote_reg_list;
+	struct iova_domain iovad;
+	struct mutex rma_lock;
+	spinlock_t tc_lock;
+	struct mutex mmn_lock;
+	atomic_t tw_refcount;
+	atomic_t tcw_refcount;
+	atomic_t tcw_total_pages;
+	struct list_head mmn_list;
+	atomic_t fence_refcount;
+	struct dma_chan	*dma_chan;
+	int async_list_del;
+	struct list_head vma_list;
+	wait_queue_head_t markwq;
+};
+
+/*
+ * struct scif_fence_info - used for tracking fence requests
+ *
+ * @state: State of this transfer
+ * @wq: Fences wait on this queue
+ * @dma_mark: Used for storing the DMA mark
+ */
+struct scif_fence_info {
+	enum scif_msg_state state;
+	struct completion comp;
+	int dma_mark;
+};
+
+/*
+ * struct scif_remote_fence_info - used for tracking remote fence requests
+ *
+ * @msg: List of SCIF node QP fence messages
+ * @list: Link to list of remote fence requests
+ */
+struct scif_remote_fence_info {
+	struct scifmsg msg;
+	struct list_head list;
+};
+
+/*
+ * Specifies whether an RMA operation can span across partial windows, a single
+ * window or multiple contiguous windows. Mmaps can span across partial windows.
+ * Unregistration can span across complete windows. scif_get_pages() can span a
+ * single window. A window can also be of type self or peer.
+ */
+enum scif_window_type {
+	SCIF_WINDOW_PARTIAL,
+	SCIF_WINDOW_SINGLE,
+	SCIF_WINDOW_FULL,
+	SCIF_WINDOW_SELF,
+	SCIF_WINDOW_PEER
+};
+
+/* The number of physical addresses that can be stored in a PAGE. */
+#define SCIF_NR_ADDR_IN_PAGE   (0x1000 >> 3)
+
+/*
+ * struct scif_rma_lookup - RMA lookup data structure for page list transfers
+ *
+ * Store an array of lookup offsets. Each offset in this array maps
+ * one 4K page containing 512 physical addresses i.e. 2MB. 512 such
+ * offsets in a 4K page will correspond to 1GB of registered address space.
+
+ * @lookup: Array of offsets
+ * @offset: DMA offset of lookup array
+ */
+struct scif_rma_lookup {
+	dma_addr_t *lookup;
+	dma_addr_t offset;
+};
+
+/*
+ * struct scif_pinned_pages - A set of pinned pages obtained with
+ * scif_pin_pages() which could be part of multiple registered
+ * windows across different end points.
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @prot: read/write protections
+ * @map_flags: Flags specified during the pin operation
+ * @ref_count: Reference count bumped in terms of number of pages
+ * @magic: A magic value
+ * @pages: Array of pointers to struct pages populated with get_user_pages(..)
+ */
+struct scif_pinned_pages {
+	s64 nr_pages;
+	int prot;
+	int map_flags;
+	atomic_t ref_count;
+	u64 magic;
+	struct page **pages;
+};
+
+/*
+ * struct scif_status - Stores DMA status update information
+ *
+ * @src_dma_addr: Source buffer DMA address
+ * @val: src location for value to be written to the destination
+ * @ep: SCIF endpoint
+ */
+struct scif_status {
+	dma_addr_t src_dma_addr;
+	u64 val;
+	struct scif_endpt *ep;
+};
+
+/*
+ * struct scif_window - Registration Window for Self and Remote
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @nr_contig_chunks: Number of contiguous physical chunks
+ * @prot: read/write protections
+ * @ref_count: reference count in terms of number of pages
+ * @magic: Cookie to detect corruption
+ * @offset: registered offset
+ * @va_for_temp: va address that this window represents
+ * @dma_mark: Used to determine if all DMAs against the window are done
+ * @ep: Pointer to EP. Useful for passing EP around with messages to
+	avoid expensive list traversals.
+ * @list: link to list of windows for the endpoint
+ * @type: self or peer window
+ * @peer_window: Pointer to peer window. Useful for sending messages to peer
+ *		 without requiring an extra list traversal
+ * @unreg_state: unregistration state
+ * @offset_freed: True if the offset has been freed
+ * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @st: scatter gather table for DMA mappings with IOMMU enabled
+ * @pinned_pages: The set of pinned_pages backing this window
+ * @alloc_handle: Handle for sending ALLOC_REQ
+ * @regwq: Wait Queue for an registration (N)ACK
+ * @reg_state: Registration state
+ * @unregwq: Wait Queue for an unregistration (N)ACK
+ * @dma_addr_lookup: Lookup for physical addresses used for DMA
+ * @nr_lookup: Number of entries in lookup
+ * @mapped_offset: Offset used to map the window by the peer
+ * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA
+ * @num_pages: Array specifying number of pages for each physical address
+ */
+struct scif_window {
+	s64 nr_pages;
+	int nr_contig_chunks;
+	int prot;
+	int ref_count;
+	u64 magic;
+	s64 offset;
+	unsigned long va_for_temp;
+	int dma_mark;
+	u64 ep;
+	struct list_head list;
+	enum scif_window_type type;
+	u64 peer_window;
+	enum scif_msg_state unreg_state;
+	bool offset_freed;
+	bool temp;
+	struct mm_struct *mm;
+	struct sg_table *st;
+	union {
+		struct {
+			struct scif_pinned_pages *pinned_pages;
+			struct scif_allocmsg alloc_handle;
+			wait_queue_head_t regwq;
+			enum scif_msg_state reg_state;
+			wait_queue_head_t unregwq;
+		};
+		struct {
+			struct scif_rma_lookup dma_addr_lookup;
+			struct scif_rma_lookup num_pages_lookup;
+			int nr_lookup;
+			dma_addr_t mapped_offset;
+		};
+	};
+	dma_addr_t *dma_addr;
+	u64 *num_pages;
+} __packed;
+
+/*
+ * scif_mmu_notif - SCIF mmu notifier information
+ *
+ * @mmu_notifier ep_mmu_notifier: MMU notifier operations
+ * @tc_reg_list: List of temp registration windows for self
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @ep: SCIF endpoint
+ * @list: link to list of MMU notifier information
+ */
+struct scif_mmu_notif {
+#ifdef CONFIG_MMU_NOTIFIER
+	struct mmu_notifier ep_mmu_notifier;
+#endif
+	struct list_head tc_reg_list;
+	struct mm_struct *mm;
+	struct scif_endpt *ep;
+	struct list_head list;
+};
+
+enum scif_rma_dir {
+	SCIF_LOCAL_TO_REMOTE,
+	SCIF_REMOTE_TO_LOCAL
+};
+
+extern struct kmem_cache *unaligned_cache;
+/* Initialize RMA for this EP */
+void scif_rma_ep_init(struct scif_endpt *ep);
+/* Check if epd can be uninitialized */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep);
+/* Obtain a new offset. Callee must grab RMA lock */
+int scif_get_window_offset(struct scif_endpt *ep, int flags,
+			   s64 offset, int nr_pages, s64 *out_offset);
+/* Free offset. Callee must grab RMA lock */
+void scif_free_window_offset(struct scif_endpt *ep,
+			     struct scif_window *window, s64 offset);
+/* Create self registration window */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+				       s64 offset, bool temp);
+/* Destroy self registration window.*/
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window);
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window);
+/* Map pages of self window to Aperture/PCI */
+int scif_map_window(struct scif_dev *remote_dev,
+		    struct scif_window *window);
+/* Unregister a self window */
+int scif_unregister_window(struct scif_window *window);
+/* Destroy remote registration window */
+void
+scif_destroy_remote_window(struct scif_window *window);
+/* remove valid remote memory mappings from process address space */
+void scif_zap_mmaps(int node);
+/* Query if any applications have remote memory mappings */
+bool scif_rma_do_apps_have_mmaps(int node);
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node);
+/* Reserve a DMA channel for a particular endpoint */
+int scif_reserve_dma_chan(struct scif_endpt *ep);
+/* Setup a DMA mark for an endpoint */
+int _scif_fence_mark(scif_epd_t epd, int *mark);
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+		     enum scif_window_type type);
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_mmu_notif_handler(struct work_struct *work);
+void scif_rma_handle_remote_fences(void);
+void scif_rma_destroy_windows(void);
+void scif_rma_destroy_tcw_invalid(void);
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan);
+
+struct scif_window_iter {
+	s64 offset;
+	int index;
+};
+
+static inline void
+scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter)
+{
+	iter->offset = window->offset;
+	iter->index = 0;
+}
+
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+				size_t *nr_bytes,
+				struct scif_window_iter *iter);
+static inline
+dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off)
+{
+	return scif_off_to_dma_addr(window, off, NULL, NULL);
+}
+
+static inline bool scif_unaligned(off_t src_offset, off_t dst_offset)
+{
+	src_offset = src_offset & (L1_CACHE_BYTES - 1);
+	dst_offset = dst_offset & (L1_CACHE_BYTES - 1);
+	return !(src_offset == dst_offset);
+}
+
+/*
+ * scif_zalloc:
+ * @size: Size of the allocation request.
+ *
+ * Helper API which attempts to allocate zeroed pages via
+ * __get_free_pages(..) first and then falls back on
+ * vzalloc(..) if that fails.
+ */
+static inline void *scif_zalloc(size_t size)
+{
+	void *ret = NULL;
+	size_t align = ALIGN(size, PAGE_SIZE);
+
+	if (align && get_order(align) < MAX_ORDER)
+		ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					       get_order(align));
+	return ret ? ret : vzalloc(align);
+}
+
+/*
+ * scif_free:
+ * @addr: Address to be freed.
+ * @size: Size of the allocation.
+ * Helper API which frees memory allocated via scif_zalloc().
+ */
+static inline void scif_free(void *addr, size_t size)
+{
+	size_t align = ALIGN(size, PAGE_SIZE);
+
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		free_pages((unsigned long)addr, get_order(align));
+}
+
+static inline void scif_get_window(struct scif_window *window, int nr_pages)
+{
+	window->ref_count += nr_pages;
+}
+
+static inline void scif_put_window(struct scif_window *window, int nr_pages)
+{
+	window->ref_count -= nr_pages;
+}
+
+static inline void scif_set_window_ref(struct scif_window *window, int nr_pages)
+{
+	window->ref_count = nr_pages;
+}
+
+static inline void
+scif_queue_for_cleanup(struct scif_window *window, struct list_head *list)
+{
+	spin_lock(&scif_info.rmalock);
+	list_add_tail(&window->list, list);
+	spin_unlock(&scif_info.rmalock);
+	schedule_work(&scif_info.misc_work);
+}
+
+static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window)
+{
+	list_del_init(&window->list);
+	scif_queue_for_cleanup(window, &scif_info.rma_tc);
+}
+
+static inline bool scif_is_iommu_enabled(void)
+{
+#ifdef CONFIG_INTEL_IOMMU
+	return intel_iommu_enabled;
+#else
+	return false;
+#endif
+}
+#endif /* SCIF_RMA_H */
diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c
new file mode 100644
index 000000000..a036dbb41
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.c
@@ -0,0 +1,291 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include <linux/mmu_notifier.h>
+#include <linux/highmem.h>
+
+/*
+ * scif_insert_tcw:
+ *
+ * Insert a temp window to the temp registration list sorted by va_for_temp.
+ * RMA lock must be held.
+ */
+void scif_insert_tcw(struct scif_window *window, struct list_head *head)
+{
+	struct scif_window *curr = NULL;
+	struct scif_window *prev = list_entry(head, struct scif_window, list);
+	struct list_head *item;
+
+	INIT_LIST_HEAD(&window->list);
+	/* Compare with tail and if the entry is new tail add it to the end */
+	if (!list_empty(head)) {
+		curr = list_entry(head->prev, struct scif_window, list);
+		if (curr->va_for_temp < window->va_for_temp) {
+			list_add_tail(&window->list, head);
+			return;
+		}
+	}
+	list_for_each(item, head) {
+		curr = list_entry(item, struct scif_window, list);
+		if (curr->va_for_temp > window->va_for_temp)
+			break;
+		prev = curr;
+	}
+	list_add(&window->list, &prev->list);
+}
+
+/*
+ * scif_insert_window:
+ *
+ * Insert a window to the self registration list sorted by offset.
+ * RMA lock must be held.
+ */
+void scif_insert_window(struct scif_window *window, struct list_head *head)
+{
+	struct scif_window *curr = NULL, *prev = NULL;
+	struct list_head *item;
+
+	INIT_LIST_HEAD(&window->list);
+	list_for_each(item, head) {
+		curr = list_entry(item, struct scif_window, list);
+		if (curr->offset > window->offset)
+			break;
+		prev = curr;
+	}
+	if (!prev)
+		list_add(&window->list, head);
+	else
+		list_add(&window->list, &prev->list);
+	scif_set_window_ref(window, window->nr_pages);
+}
+
+/*
+ * scif_query_tcw:
+ *
+ * Query the temp cached registration list of ep for an overlapping window
+ * in case of permission mismatch, destroy the previous window. if permissions
+ * match and overlap is partial, destroy the window but return the new range
+ * RMA lock must be held.
+ */
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req)
+{
+	struct list_head *item, *temp, *head = req->head;
+	struct scif_window *window;
+	u64 start_va_window, start_va_req = req->va_for_temp;
+	u64 end_va_window, end_va_req = start_va_req + req->nr_bytes;
+
+	if (!req->nr_bytes)
+		return -EINVAL;
+	/*
+	 * Avoid traversing the entire list to find out that there
+	 * is no entry that matches
+	 */
+	if (!list_empty(head)) {
+		window = list_last_entry(head, struct scif_window, list);
+		end_va_window = window->va_for_temp +
+			(window->nr_pages << PAGE_SHIFT);
+		if (start_va_req > end_va_window)
+			return -ENXIO;
+	}
+	list_for_each_safe(item, temp, head) {
+		window = list_entry(item, struct scif_window, list);
+		start_va_window = window->va_for_temp;
+		end_va_window = window->va_for_temp +
+			(window->nr_pages << PAGE_SHIFT);
+		if (start_va_req < start_va_window &&
+		    end_va_req < start_va_window)
+			break;
+		if (start_va_req >= end_va_window)
+			continue;
+		if ((window->prot & req->prot) == req->prot) {
+			if (start_va_req >= start_va_window &&
+			    end_va_req <= end_va_window) {
+				*req->out_window = window;
+				return 0;
+			}
+			/* expand window */
+			if (start_va_req < start_va_window) {
+				req->nr_bytes +=
+					start_va_window - start_va_req;
+				req->va_for_temp = start_va_window;
+			}
+			if (end_va_req >= end_va_window)
+				req->nr_bytes += end_va_window - end_va_req;
+		}
+		/* Destroy the old window to create a new one */
+		__scif_rma_destroy_tcw_helper(window);
+		break;
+	}
+	return -ENXIO;
+}
+
+/*
+ * scif_query_window:
+ *
+ * Query the registration list and check if a valid contiguous
+ * range of windows exist.
+ * RMA lock must be held.
+ */
+int scif_query_window(struct scif_rma_req *req)
+{
+	struct list_head *item;
+	struct scif_window *window;
+	s64 end_offset, offset = req->offset;
+	u64 tmp_min, nr_bytes_left = req->nr_bytes;
+
+	if (!req->nr_bytes)
+		return -EINVAL;
+
+	list_for_each(item, req->head) {
+		window = list_entry(item, struct scif_window, list);
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		if (offset < window->offset)
+			/* Offset not found! */
+			return -ENXIO;
+		if (offset >= end_offset)
+			continue;
+		/* Check read/write protections. */
+		if ((window->prot & req->prot) != req->prot)
+			return -EPERM;
+		if (nr_bytes_left == req->nr_bytes)
+			/* Store the first window */
+			*req->out_window = window;
+		tmp_min = min((u64)end_offset - offset, nr_bytes_left);
+		nr_bytes_left -= tmp_min;
+		offset += tmp_min;
+		/*
+		 * Range requested encompasses
+		 * multiple windows contiguously.
+		 */
+		if (!nr_bytes_left) {
+			/* Done for partial window */
+			if (req->type == SCIF_WINDOW_PARTIAL ||
+			    req->type == SCIF_WINDOW_SINGLE)
+				return 0;
+			/* Extra logic for full windows */
+			if (offset == end_offset)
+				/* Spanning multiple whole windows */
+				return 0;
+				/* Not spanning multiple whole windows */
+			return -ENXIO;
+		}
+		if (req->type == SCIF_WINDOW_SINGLE)
+			break;
+	}
+	dev_err(scif_info.mdev.this_device,
+		"%s %d ENXIO\n", __func__, __LINE__);
+	return -ENXIO;
+}
+
+/*
+ * scif_rma_list_unregister:
+ *
+ * Traverse the self registration list starting from window:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_rma_list_unregister(struct scif_window *window,
+			     s64 offset, int nr_pages)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+	struct list_head *head = &ep->rma_info.reg_list;
+	s64 end_offset;
+	int err = 0;
+	int loop_nr_pages;
+	struct scif_window *_window;
+
+	list_for_each_entry_safe_from(window, _window, head, list) {
+		end_offset = window->offset + (window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT),
+				    nr_pages);
+		err = scif_unregister_window(window);
+		if (err)
+			return err;
+		nr_pages -= loop_nr_pages;
+		offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages)
+			break;
+	}
+	return 0;
+}
+
+/*
+ * scif_unmap_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Delete any DMA mappings created
+ */
+void scif_unmap_all_windows(scif_epd_t epd)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct list_head *head = &ep->rma_info.reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, struct scif_window, list);
+		scif_unmap_window(ep->remote_dev, window);
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/*
+ * scif_unregister_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_unregister_all_windows(scif_epd_t epd)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct list_head *head = &ep->rma_info.reg_list;
+	int err = 0;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+retry:
+	item = NULL;
+	tmp = NULL;
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, struct scif_window, list);
+		ep->rma_info.async_list_del = 0;
+		err = scif_unregister_window(window);
+		if (err)
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err %d\n",
+				__func__, __LINE__, err);
+		/*
+		 * Need to restart list traversal if there has been
+		 * an asynchronous list entry deletion.
+		 */
+		if (READ_ONCE(ep->rma_info.async_list_del))
+			goto retry;
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (!list_empty(&ep->rma_info.mmn_list)) {
+		spin_lock(&scif_info.rmalock);
+		list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup);
+		spin_unlock(&scif_info.rmalock);
+		schedule_work(&scif_info.mmu_notif_work);
+	}
+	return err;
+}
diff --git a/drivers/misc/mic/scif/scif_rma_list.h b/drivers/misc/mic/scif/scif_rma_list.h
new file mode 100644
index 000000000..7d58d1d55
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.h
@@ -0,0 +1,57 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_LIST_H
+#define SCIF_RMA_LIST_H
+
+/*
+ * struct scif_rma_req - Self Registration list RMA Request query
+ *
+ * @out_window - Returns the window if found
+ * @offset: Starting offset
+ * @nr_bytes: number of bytes
+ * @prot: protection requested i.e. read or write or both
+ * @type: Specify single, partial or multiple windows
+ * @head: Head of list on which to search
+ * @va_for_temp: VA for searching temporary cached windows
+ */
+struct scif_rma_req {
+	struct scif_window **out_window;
+	union {
+		s64 offset;
+		unsigned long va_for_temp;
+	};
+	size_t nr_bytes;
+	int prot;
+	enum scif_window_type type;
+	struct list_head *head;
+};
+
+/* Insert */
+void scif_insert_window(struct scif_window *window, struct list_head *head);
+void scif_insert_tcw(struct scif_window *window,
+		     struct list_head *head);
+/* Query */
+int scif_query_window(struct scif_rma_req *request);
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request);
+/* Called from close to unregister all self windows */
+int scif_unregister_all_windows(scif_epd_t epd);
+void scif_unmap_all_windows(scif_epd_t epd);
+/* Traverse list and unregister */
+int scif_rma_list_unregister(struct scif_window *window, s64 offset,
+			     int nr_pages);
+#endif /* SCIF_RMA_LIST_H */
diff --git a/drivers/misc/mic/vop/Makefile b/drivers/misc/mic/vop/Makefile
new file mode 100644
index 000000000..78819c899
--- /dev/null
+++ b/drivers/misc/mic/vop/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2016, Intel Corporation.
+#
+obj-m := vop.o
+
+vop-objs += vop_main.o
+vop-objs += vop_debugfs.o
+vop-objs += vop_vringh.o
diff --git a/drivers/misc/mic/vop/vop_debugfs.c b/drivers/misc/mic/vop/vop_debugfs.c
new file mode 100644
index 000000000..ab43884e5
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_debugfs.c
@@ -0,0 +1,232 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "vop_main.h"
+
+static int vop_dp_show(struct seq_file *s, void *pos)
+{
+	struct mic_device_desc *d;
+	struct mic_device_ctrl *dc;
+	struct mic_vqconfig *vqconfig;
+	__u32 *features;
+	__u8 *config;
+	struct vop_info *vi = s->private;
+	struct vop_device *vpdev = vi->vpdev;
+	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+	int j, k;
+
+	seq_printf(s, "Bootparam: magic 0x%x\n",
+		   bootparam->magic);
+	seq_printf(s, "Bootparam: h2c_config_db %d\n",
+		   bootparam->h2c_config_db);
+	seq_printf(s, "Bootparam: node_id %d\n",
+		   bootparam->node_id);
+	seq_printf(s, "Bootparam: c2h_scif_db %d\n",
+		   bootparam->c2h_scif_db);
+	seq_printf(s, "Bootparam: h2c_scif_db %d\n",
+		   bootparam->h2c_scif_db);
+	seq_printf(s, "Bootparam: scif_host_dma_addr 0x%llx\n",
+		   bootparam->scif_host_dma_addr);
+	seq_printf(s, "Bootparam: scif_card_dma_addr 0x%llx\n",
+		   bootparam->scif_card_dma_addr);
+
+	for (j = sizeof(*bootparam);
+		j < MIC_DP_SIZE; j += mic_total_desc_size(d)) {
+		d = (void *)bootparam + j;
+		dc = (void *)d + mic_aligned_desc_size(d);
+
+		/* end of list */
+		if (d->type == 0)
+			break;
+
+		if (d->type == -1)
+			continue;
+
+		seq_printf(s, "Type %d ", d->type);
+		seq_printf(s, "Num VQ %d ", d->num_vq);
+		seq_printf(s, "Feature Len %d\n", d->feature_len);
+		seq_printf(s, "Config Len %d ", d->config_len);
+		seq_printf(s, "Shutdown Status %d\n", d->status);
+
+		for (k = 0; k < d->num_vq; k++) {
+			vqconfig = mic_vq_config(d) + k;
+			seq_printf(s, "vqconfig[%d]: ", k);
+			seq_printf(s, "address 0x%llx ",
+				   vqconfig->address);
+			seq_printf(s, "num %d ", vqconfig->num);
+			seq_printf(s, "used address 0x%llx\n",
+				   vqconfig->used_address);
+		}
+
+		features = (__u32 *)mic_vq_features(d);
+		seq_printf(s, "Features: Host 0x%x ", features[0]);
+		seq_printf(s, "Guest 0x%x\n", features[1]);
+
+		config = mic_vq_configspace(d);
+		for (k = 0; k < d->config_len; k++)
+			seq_printf(s, "config[%d]=%d\n", k, config[k]);
+
+		seq_puts(s, "Device control:\n");
+		seq_printf(s, "Config Change %d ", dc->config_change);
+		seq_printf(s, "Vdev reset %d\n", dc->vdev_reset);
+		seq_printf(s, "Guest Ack %d ", dc->guest_ack);
+		seq_printf(s, "Host ack %d\n", dc->host_ack);
+		seq_printf(s, "Used address updated %d ",
+			   dc->used_address_updated);
+		seq_printf(s, "Vdev 0x%llx\n", dc->vdev);
+		seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db);
+		seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db);
+	}
+	schedule_work(&vi->hotplug_work);
+	return 0;
+}
+
+static int vop_dp_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vop_dp_show, inode->i_private);
+}
+
+static int vop_dp_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations dp_ops = {
+	.owner   = THIS_MODULE,
+	.open    = vop_dp_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = vop_dp_debug_release
+};
+
+static int vop_vdev_info_show(struct seq_file *s, void *unused)
+{
+	struct vop_info *vi = s->private;
+	struct list_head *pos, *tmp;
+	struct vop_vdev *vdev;
+	int i, j;
+
+	mutex_lock(&vi->vop_mutex);
+	list_for_each_safe(pos, tmp, &vi->vdev_list) {
+		vdev = list_entry(pos, struct vop_vdev, list);
+		seq_printf(s, "VDEV type %d state %s in %ld out %ld in_dma %ld out_dma %ld\n",
+			   vdev->virtio_id,
+			   vop_vdevup(vdev) ? "UP" : "DOWN",
+			   vdev->in_bytes,
+			   vdev->out_bytes,
+			   vdev->in_bytes_dma,
+			   vdev->out_bytes_dma);
+		for (i = 0; i < MIC_MAX_VRINGS; i++) {
+			struct vring_desc *desc;
+			struct vring_avail *avail;
+			struct vring_used *used;
+			struct vop_vringh *vvr = &vdev->vvr[i];
+			struct vringh *vrh = &vvr->vrh;
+			int num = vrh->vring.num;
+
+			if (!num)
+				continue;
+			desc = vrh->vring.desc;
+			seq_printf(s, "vring i %d avail_idx %d",
+				   i, vvr->vring.info->avail_idx & (num - 1));
+			seq_printf(s, " vring i %d avail_idx %d\n",
+				   i, vvr->vring.info->avail_idx);
+			seq_printf(s, "vrh i %d weak_barriers %d",
+				   i, vrh->weak_barriers);
+			seq_printf(s, " last_avail_idx %d last_used_idx %d",
+				   vrh->last_avail_idx, vrh->last_used_idx);
+			seq_printf(s, " completed %d\n", vrh->completed);
+			for (j = 0; j < num; j++) {
+				seq_printf(s, "desc[%d] addr 0x%llx len %d",
+					   j, desc->addr, desc->len);
+				seq_printf(s, " flags 0x%x next %d\n",
+					   desc->flags, desc->next);
+				desc++;
+			}
+			avail = vrh->vring.avail;
+			seq_printf(s, "avail flags 0x%x idx %d\n",
+				   vringh16_to_cpu(vrh, avail->flags),
+				   vringh16_to_cpu(vrh,
+						   avail->idx) & (num - 1));
+			seq_printf(s, "avail flags 0x%x idx %d\n",
+				   vringh16_to_cpu(vrh, avail->flags),
+				   vringh16_to_cpu(vrh, avail->idx));
+			for (j = 0; j < num; j++)
+				seq_printf(s, "avail ring[%d] %d\n",
+					   j, avail->ring[j]);
+			used = vrh->vring.used;
+			seq_printf(s, "used flags 0x%x idx %d\n",
+				   vringh16_to_cpu(vrh, used->flags),
+				   vringh16_to_cpu(vrh, used->idx) & (num - 1));
+			seq_printf(s, "used flags 0x%x idx %d\n",
+				   vringh16_to_cpu(vrh, used->flags),
+				   vringh16_to_cpu(vrh, used->idx));
+			for (j = 0; j < num; j++)
+				seq_printf(s, "used ring[%d] id %d len %d\n",
+					   j, vringh32_to_cpu(vrh,
+							      used->ring[j].id),
+					   vringh32_to_cpu(vrh,
+							   used->ring[j].len));
+		}
+	}
+	mutex_unlock(&vi->vop_mutex);
+
+	return 0;
+}
+
+static int vop_vdev_info_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vop_vdev_info_show, inode->i_private);
+}
+
+static int vop_vdev_info_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations vdev_info_ops = {
+	.owner   = THIS_MODULE,
+	.open    = vop_vdev_info_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = vop_vdev_info_debug_release
+};
+
+void vop_init_debugfs(struct vop_info *vi)
+{
+	char name[16];
+
+	snprintf(name, sizeof(name), "%s%d", KBUILD_MODNAME, vi->vpdev->dnode);
+	vi->dbg = debugfs_create_dir(name, NULL);
+	if (!vi->dbg) {
+		pr_err("can't create debugfs dir vop\n");
+		return;
+	}
+	debugfs_create_file("dp", 0444, vi->dbg, vi, &dp_ops);
+	debugfs_create_file("vdev_info", 0444, vi->dbg, vi, &vdev_info_ops);
+}
+
+void vop_exit_debugfs(struct vop_info *vi)
+{
+	debugfs_remove_recursive(vi->dbg);
+}
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
new file mode 100644
index 000000000..f4332a97c
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -0,0 +1,766 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Adapted from:
+ *
+ * virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/dma-mapping.h>
+
+#include "vop_main.h"
+
+#define VOP_MAX_VRINGS 4
+
+/*
+ * _vop_vdev - Allocated per virtio device instance injected by the peer.
+ *
+ * @vdev: Virtio device
+ * @desc: Virtio device page descriptor
+ * @dc: Virtio device control
+ * @vpdev: VOP device which is the parent for this virtio device
+ * @vr: Buffer for accessing the VRING
+ * @used: Buffer for used
+ * @used_size: Size of the used buffer
+ * @reset_done: Track whether VOP reset is complete
+ * @virtio_cookie: Cookie returned upon requesting a interrupt
+ * @c2h_vdev_db: The doorbell used by the guest to interrupt the host
+ * @h2c_vdev_db: The doorbell used by the host to interrupt the guest
+ * @dnode: The destination node
+ */
+struct _vop_vdev {
+	struct virtio_device vdev;
+	struct mic_device_desc __iomem *desc;
+	struct mic_device_ctrl __iomem *dc;
+	struct vop_device *vpdev;
+	void __iomem *vr[VOP_MAX_VRINGS];
+	dma_addr_t used[VOP_MAX_VRINGS];
+	int used_size[VOP_MAX_VRINGS];
+	struct completion reset_done;
+	struct mic_irq *virtio_cookie;
+	int c2h_vdev_db;
+	int h2c_vdev_db;
+	int dnode;
+};
+
+#define to_vopvdev(vd) container_of(vd, struct _vop_vdev, vdev)
+
+#define _vop_aligned_desc_size(d) __mic_align(_vop_desc_size(d), 8)
+
+/* Helper API to obtain the parent of the virtio device */
+static inline struct device *_vop_dev(struct _vop_vdev *vdev)
+{
+	return vdev->vdev.dev.parent;
+}
+
+static inline unsigned _vop_desc_size(struct mic_device_desc __iomem *desc)
+{
+	return sizeof(*desc)
+		+ ioread8(&desc->num_vq) * sizeof(struct mic_vqconfig)
+		+ ioread8(&desc->feature_len) * 2
+		+ ioread8(&desc->config_len);
+}
+
+static inline struct mic_vqconfig __iomem *
+_vop_vq_config(struct mic_device_desc __iomem *desc)
+{
+	return (struct mic_vqconfig __iomem *)(desc + 1);
+}
+
+static inline u8 __iomem *
+_vop_vq_features(struct mic_device_desc __iomem *desc)
+{
+	return (u8 __iomem *)(_vop_vq_config(desc) + ioread8(&desc->num_vq));
+}
+
+static inline u8 __iomem *
+_vop_vq_configspace(struct mic_device_desc __iomem *desc)
+{
+	return _vop_vq_features(desc) + ioread8(&desc->feature_len) * 2;
+}
+
+static inline unsigned
+_vop_total_desc_size(struct mic_device_desc __iomem *desc)
+{
+	return _vop_aligned_desc_size(desc) + sizeof(struct mic_device_ctrl);
+}
+
+/* This gets the device's feature bits. */
+static u64 vop_get_features(struct virtio_device *vdev)
+{
+	unsigned int i, bits;
+	u32 features = 0;
+	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+	u8 __iomem *in_features = _vop_vq_features(desc);
+	int feature_len = ioread8(&desc->feature_len);
+
+	bits = min_t(unsigned, feature_len, sizeof(vdev->features)) * 8;
+	for (i = 0; i < bits; i++)
+		if (ioread8(&in_features[i / 8]) & (BIT(i % 8)))
+			features |= BIT(i);
+
+	return features;
+}
+
+static int vop_finalize_features(struct virtio_device *vdev)
+{
+	unsigned int i, bits;
+	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+	u8 feature_len = ioread8(&desc->feature_len);
+	/* Second half of bitmap is features we accept. */
+	u8 __iomem *out_features =
+		_vop_vq_features(desc) + feature_len;
+
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
+	memset_io(out_features, 0, feature_len);
+	bits = min_t(unsigned, feature_len,
+		     sizeof(vdev->features)) * 8;
+	for (i = 0; i < bits; i++) {
+		if (__virtio_test_bit(vdev, i))
+			iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)),
+				 &out_features[i / 8]);
+	}
+	return 0;
+}
+
+/*
+ * Reading and writing elements in config space
+ */
+static void vop_get(struct virtio_device *vdev, unsigned int offset,
+		    void *buf, unsigned len)
+{
+	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+
+	if (offset + len > ioread8(&desc->config_len))
+		return;
+	memcpy_fromio(buf, _vop_vq_configspace(desc) + offset, len);
+}
+
+static void vop_set(struct virtio_device *vdev, unsigned int offset,
+		    const void *buf, unsigned len)
+{
+	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+
+	if (offset + len > ioread8(&desc->config_len))
+		return;
+	memcpy_toio(_vop_vq_configspace(desc) + offset, buf, len);
+}
+
+/*
+ * The operations to get and set the status word just access the status
+ * field of the device descriptor. set_status also interrupts the host
+ * to tell about status changes.
+ */
+static u8 vop_get_status(struct virtio_device *vdev)
+{
+	return ioread8(&to_vopvdev(vdev)->desc->status);
+}
+
+static void vop_set_status(struct virtio_device *dev, u8 status)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+	struct vop_device *vpdev = vdev->vpdev;
+
+	if (!status)
+		return;
+	iowrite8(status, &vdev->desc->status);
+	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+}
+
+/* Inform host on a virtio device reset and wait for ack from host */
+static void vop_reset_inform_host(struct virtio_device *dev)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+	struct mic_device_ctrl __iomem *dc = vdev->dc;
+	struct vop_device *vpdev = vdev->vpdev;
+	int retry;
+
+	iowrite8(0, &dc->host_ack);
+	iowrite8(1, &dc->vdev_reset);
+	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+
+	/* Wait till host completes all card accesses and acks the reset */
+	for (retry = 100; retry--;) {
+		if (ioread8(&dc->host_ack))
+			break;
+		msleep(100);
+	};
+
+	dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry);
+
+	/* Reset status to 0 in case we timed out */
+	iowrite8(0, &vdev->desc->status);
+}
+
+static void vop_reset(struct virtio_device *dev)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+
+	dev_dbg(_vop_dev(vdev), "%s: virtio id %d\n",
+		__func__, dev->id.device);
+
+	vop_reset_inform_host(dev);
+	complete_all(&vdev->reset_done);
+}
+
+/*
+ * The virtio_ring code calls this API when it wants to notify the Host.
+ */
+static bool vop_notify(struct virtqueue *vq)
+{
+	struct _vop_vdev *vdev = vq->priv;
+	struct vop_device *vpdev = vdev->vpdev;
+
+	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+	return true;
+}
+
+static void vop_del_vq(struct virtqueue *vq, int n)
+{
+	struct _vop_vdev *vdev = to_vopvdev(vq->vdev);
+	struct vring *vr = (struct vring *)(vq + 1);
+	struct vop_device *vpdev = vdev->vpdev;
+
+	dma_unmap_single(&vpdev->dev, vdev->used[n],
+			 vdev->used_size[n], DMA_BIDIRECTIONAL);
+	free_pages((unsigned long)vr->used, get_order(vdev->used_size[n]));
+	vring_del_virtqueue(vq);
+	vpdev->hw_ops->iounmap(vpdev, vdev->vr[n]);
+	vdev->vr[n] = NULL;
+}
+
+static void vop_del_vqs(struct virtio_device *dev)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+	struct virtqueue *vq, *n;
+	int idx = 0;
+
+	dev_dbg(_vop_dev(vdev), "%s\n", __func__);
+
+	list_for_each_entry_safe(vq, n, &dev->vqs, list)
+		vop_del_vq(vq, idx++);
+}
+
+/*
+ * This routine will assign vring's allocated in host/io memory. Code in
+ * virtio_ring.c however continues to access this io memory as if it were local
+ * memory without io accessors.
+ */
+static struct virtqueue *vop_find_vq(struct virtio_device *dev,
+				     unsigned index,
+				     void (*callback)(struct virtqueue *vq),
+				     const char *name, bool ctx)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+	struct vop_device *vpdev = vdev->vpdev;
+	struct mic_vqconfig __iomem *vqconfig;
+	struct mic_vqconfig config;
+	struct virtqueue *vq;
+	void __iomem *va;
+	struct _mic_vring_info __iomem *info;
+	void *used;
+	int vr_size, _vr_size, err, magic;
+	struct vring *vr;
+	u8 type = ioread8(&vdev->desc->type);
+
+	if (index >= ioread8(&vdev->desc->num_vq))
+		return ERR_PTR(-ENOENT);
+
+	if (!name)
+		return ERR_PTR(-ENOENT);
+
+	/* First assign the vring's allocated in host memory */
+	vqconfig = _vop_vq_config(vdev->desc) + index;
+	memcpy_fromio(&config, vqconfig, sizeof(config));
+	_vr_size = round_up(vring_size(le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN), 4);
+	vr_size = PAGE_ALIGN(_vr_size + sizeof(struct _mic_vring_info));
+	va = vpdev->hw_ops->ioremap(vpdev, le64_to_cpu(config.address),
+			vr_size);
+	if (!va)
+		return ERR_PTR(-ENOMEM);
+	vdev->vr[index] = va;
+	memset_io(va, 0x0, _vr_size);
+	vq = vring_new_virtqueue(
+				index,
+				le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN,
+				dev,
+				false,
+				ctx,
+				(void __force *)va, vop_notify, callback, name);
+	if (!vq) {
+		err = -ENOMEM;
+		goto unmap;
+	}
+	info = va + _vr_size;
+	magic = ioread32(&info->magic);
+
+	if (WARN(magic != MIC_MAGIC + type + index, "magic mismatch")) {
+		err = -EIO;
+		goto unmap;
+	}
+
+	/* Allocate and reassign used ring now */
+	vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 +
+					     sizeof(struct vring_used_elem) *
+					     le16_to_cpu(config.num));
+	used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					get_order(vdev->used_size[index]));
+	if (!used) {
+		err = -ENOMEM;
+		dev_err(_vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto del_vq;
+	}
+	vdev->used[index] = dma_map_single(&vpdev->dev, used,
+					    vdev->used_size[index],
+					    DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&vpdev->dev, vdev->used[index])) {
+		err = -ENOMEM;
+		dev_err(_vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto free_used;
+	}
+	writeq(vdev->used[index], &vqconfig->used_address);
+	/*
+	 * To reassign the used ring here we are directly accessing
+	 * struct vring_virtqueue which is a private data structure
+	 * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in
+	 * vring_new_virtqueue() would ensure that
+	 *  (&vq->vring == (struct vring *) (&vq->vq + 1));
+	 */
+	vr = (struct vring *)(vq + 1);
+	vr->used = used;
+
+	vq->priv = vdev;
+	return vq;
+free_used:
+	free_pages((unsigned long)used,
+		   get_order(vdev->used_size[index]));
+del_vq:
+	vring_del_virtqueue(vq);
+unmap:
+	vpdev->hw_ops->iounmap(vpdev, vdev->vr[index]);
+	return ERR_PTR(err);
+}
+
+static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char * const names[], const bool *ctx,
+			struct irq_affinity *desc)
+{
+	struct _vop_vdev *vdev = to_vopvdev(dev);
+	struct vop_device *vpdev = vdev->vpdev;
+	struct mic_device_ctrl __iomem *dc = vdev->dc;
+	int i, err, retry;
+
+	/* We must have this many virtqueues. */
+	if (nvqs > ioread8(&vdev->desc->num_vq))
+		return -ENOENT;
+
+	for (i = 0; i < nvqs; ++i) {
+		dev_dbg(_vop_dev(vdev), "%s: %d: %s\n",
+			__func__, i, names[i]);
+		vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i],
+				     ctx ? ctx[i] : false);
+		if (IS_ERR(vqs[i])) {
+			err = PTR_ERR(vqs[i]);
+			goto error;
+		}
+	}
+
+	iowrite8(1, &dc->used_address_updated);
+	/*
+	 * Send an interrupt to the host to inform it that used
+	 * rings have been re-assigned.
+	 */
+	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+	for (retry = 100; --retry;) {
+		if (!ioread8(&dc->used_address_updated))
+			break;
+		msleep(100);
+	};
+
+	dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry);
+	if (!retry) {
+		err = -ENODEV;
+		goto error;
+	}
+
+	return 0;
+error:
+	vop_del_vqs(dev);
+	return err;
+}
+
+/*
+ * The config ops structure as defined by virtio config
+ */
+static struct virtio_config_ops vop_vq_config_ops = {
+	.get_features = vop_get_features,
+	.finalize_features = vop_finalize_features,
+	.get = vop_get,
+	.set = vop_set,
+	.get_status = vop_get_status,
+	.set_status = vop_set_status,
+	.reset = vop_reset,
+	.find_vqs = vop_find_vqs,
+	.del_vqs = vop_del_vqs,
+};
+
+static irqreturn_t vop_virtio_intr_handler(int irq, void *data)
+{
+	struct _vop_vdev *vdev = data;
+	struct vop_device *vpdev = vdev->vpdev;
+	struct virtqueue *vq;
+
+	vpdev->hw_ops->ack_interrupt(vpdev, vdev->h2c_vdev_db);
+	list_for_each_entry(vq, &vdev->vdev.vqs, list)
+		vring_interrupt(0, vq);
+
+	return IRQ_HANDLED;
+}
+
+static void vop_virtio_release_dev(struct device *_d)
+{
+	struct virtio_device *vdev =
+			container_of(_d, struct virtio_device, dev);
+	struct _vop_vdev *vop_vdev =
+			container_of(vdev, struct _vop_vdev, vdev);
+
+	kfree(vop_vdev);
+}
+
+/*
+ * adds a new device and register it with virtio
+ * appropriate drivers are loaded by the device model
+ */
+static int _vop_add_device(struct mic_device_desc __iomem *d,
+			   unsigned int offset, struct vop_device *vpdev,
+			   int dnode)
+{
+	struct _vop_vdev *vdev, *reg_dev = NULL;
+	int ret;
+	u8 type = ioread8(&d->type);
+
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev)
+		return -ENOMEM;
+
+	vdev->vpdev = vpdev;
+	vdev->vdev.dev.parent = &vpdev->dev;
+	vdev->vdev.dev.release = vop_virtio_release_dev;
+	vdev->vdev.id.device = type;
+	vdev->vdev.config = &vop_vq_config_ops;
+	vdev->desc = d;
+	vdev->dc = (void __iomem *)d + _vop_aligned_desc_size(d);
+	vdev->dnode = dnode;
+	vdev->vdev.priv = (void *)(u64)dnode;
+	init_completion(&vdev->reset_done);
+
+	vdev->h2c_vdev_db = vpdev->hw_ops->next_db(vpdev);
+	vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
+			vop_virtio_intr_handler, "virtio intr",
+			vdev, vdev->h2c_vdev_db);
+	if (IS_ERR(vdev->virtio_cookie)) {
+		ret = PTR_ERR(vdev->virtio_cookie);
+		goto kfree;
+	}
+	iowrite8((u8)vdev->h2c_vdev_db, &vdev->dc->h2c_vdev_db);
+	vdev->c2h_vdev_db = ioread8(&vdev->dc->c2h_vdev_db);
+
+	ret = register_virtio_device(&vdev->vdev);
+	reg_dev = vdev;
+	if (ret) {
+		dev_err(_vop_dev(vdev),
+			"Failed to register vop device %u type %u\n",
+			offset, type);
+		goto free_irq;
+	}
+	writeq((u64)vdev, &vdev->dc->vdev);
+	dev_dbg(_vop_dev(vdev), "%s: registered vop device %u type %u vdev %p\n",
+		__func__, offset, type, vdev);
+
+	return 0;
+
+free_irq:
+	vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
+kfree:
+	if (reg_dev)
+		put_device(&vdev->vdev.dev);
+	else
+		kfree(vdev);
+	return ret;
+}
+
+/*
+ * match for a vop device with a specific desc pointer
+ */
+static int vop_match_desc(struct device *dev, void *data)
+{
+	struct virtio_device *_dev = dev_to_virtio(dev);
+	struct _vop_vdev *vdev = to_vopvdev(_dev);
+
+	return vdev->desc == (void __iomem *)data;
+}
+
+static void _vop_handle_config_change(struct mic_device_desc __iomem *d,
+				      unsigned int offset,
+				      struct vop_device *vpdev)
+{
+	struct mic_device_ctrl __iomem *dc
+		= (void __iomem *)d + _vop_aligned_desc_size(d);
+	struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev);
+
+	if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED)
+		return;
+
+	dev_dbg(&vpdev->dev, "%s %d\n", __func__, __LINE__);
+	virtio_config_changed(&vdev->vdev);
+	iowrite8(1, &dc->guest_ack);
+}
+
+/*
+ * removes a virtio device if a hot remove event has been
+ * requested by the host.
+ */
+static int _vop_remove_device(struct mic_device_desc __iomem *d,
+			      unsigned int offset, struct vop_device *vpdev)
+{
+	struct mic_device_ctrl __iomem *dc
+		= (void __iomem *)d + _vop_aligned_desc_size(d);
+	struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev);
+	u8 status;
+	int ret = -1;
+
+	if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) {
+		struct device *dev = get_device(&vdev->vdev.dev);
+
+		dev_dbg(&vpdev->dev,
+			"%s %d config_change %d type %d vdev %p\n",
+			__func__, __LINE__,
+			ioread8(&dc->config_change), ioread8(&d->type), vdev);
+		status = ioread8(&d->status);
+		reinit_completion(&vdev->reset_done);
+		unregister_virtio_device(&vdev->vdev);
+		vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
+		iowrite8(-1, &dc->h2c_vdev_db);
+		if (status & VIRTIO_CONFIG_S_DRIVER_OK)
+			wait_for_completion(&vdev->reset_done);
+		put_device(dev);
+		iowrite8(1, &dc->guest_ack);
+		dev_dbg(&vpdev->dev, "%s %d guest_ack %d\n",
+			__func__, __LINE__, ioread8(&dc->guest_ack));
+		iowrite8(-1, &d->type);
+		ret = 0;
+	}
+	return ret;
+}
+
+#define REMOVE_DEVICES true
+
+static void _vop_scan_devices(void __iomem *dp, struct vop_device *vpdev,
+			      bool remove, int dnode)
+{
+	s8 type;
+	unsigned int i;
+	struct mic_device_desc __iomem *d;
+	struct mic_device_ctrl __iomem *dc;
+	struct device *dev;
+	int ret;
+
+	for (i = sizeof(struct mic_bootparam);
+			i < MIC_DP_SIZE; i += _vop_total_desc_size(d)) {
+		d = dp + i;
+		dc = (void __iomem *)d + _vop_aligned_desc_size(d);
+		/*
+		 * This read barrier is paired with the corresponding write
+		 * barrier on the host which is inserted before adding or
+		 * removing a virtio device descriptor, by updating the type.
+		 */
+		rmb();
+		type = ioread8(&d->type);
+
+		/* end of list */
+		if (type == 0)
+			break;
+
+		if (type == -1)
+			continue;
+
+		/* device already exists */
+		dev = device_find_child(&vpdev->dev, (void __force *)d,
+					vop_match_desc);
+		if (dev) {
+			if (remove)
+				iowrite8(MIC_VIRTIO_PARAM_DEV_REMOVE,
+					 &dc->config_change);
+			put_device(dev);
+			_vop_handle_config_change(d, i, vpdev);
+			ret = _vop_remove_device(d, i, vpdev);
+			if (remove) {
+				iowrite8(0, &dc->config_change);
+				iowrite8(0, &dc->guest_ack);
+			}
+			continue;
+		}
+
+		/* new device */
+		dev_dbg(&vpdev->dev, "%s %d Adding new virtio device %p\n",
+			__func__, __LINE__, d);
+		if (!remove)
+			_vop_add_device(d, i, vpdev, dnode);
+	}
+}
+
+static void vop_scan_devices(struct vop_info *vi,
+			     struct vop_device *vpdev, bool remove)
+{
+	void __iomem *dp = vpdev->hw_ops->get_remote_dp(vpdev);
+
+	if (!dp)
+		return;
+	mutex_lock(&vi->vop_mutex);
+	_vop_scan_devices(dp, vpdev, remove, vpdev->dnode);
+	mutex_unlock(&vi->vop_mutex);
+}
+
+/*
+ * vop_hotplug_device tries to find changes in the device page.
+ */
+static void vop_hotplug_devices(struct work_struct *work)
+{
+	struct vop_info *vi = container_of(work, struct vop_info,
+					     hotplug_work);
+
+	vop_scan_devices(vi, vi->vpdev, !REMOVE_DEVICES);
+}
+
+/*
+ * Interrupt handler for hot plug/config changes etc.
+ */
+static irqreturn_t vop_extint_handler(int irq, void *data)
+{
+	struct vop_info *vi = data;
+	struct mic_bootparam __iomem *bp;
+	struct vop_device *vpdev = vi->vpdev;
+
+	bp = vpdev->hw_ops->get_remote_dp(vpdev);
+	dev_dbg(&vpdev->dev, "%s %d hotplug work\n",
+		__func__, __LINE__);
+	vpdev->hw_ops->ack_interrupt(vpdev, ioread8(&bp->h2c_config_db));
+	schedule_work(&vi->hotplug_work);
+	return IRQ_HANDLED;
+}
+
+static int vop_driver_probe(struct vop_device *vpdev)
+{
+	struct vop_info *vi;
+	int rc;
+
+	vi = kzalloc(sizeof(*vi), GFP_KERNEL);
+	if (!vi) {
+		rc = -ENOMEM;
+		goto exit;
+	}
+	dev_set_drvdata(&vpdev->dev, vi);
+	vi->vpdev = vpdev;
+
+	mutex_init(&vi->vop_mutex);
+	INIT_WORK(&vi->hotplug_work, vop_hotplug_devices);
+	if (vpdev->dnode) {
+		rc = vop_host_init(vi);
+		if (rc < 0)
+			goto free;
+	} else {
+		struct mic_bootparam __iomem *bootparam;
+
+		vop_scan_devices(vi, vpdev, !REMOVE_DEVICES);
+
+		vi->h2c_config_db = vpdev->hw_ops->next_db(vpdev);
+		vi->cookie = vpdev->hw_ops->request_irq(vpdev,
+							vop_extint_handler,
+							"virtio_config_intr",
+							vi, vi->h2c_config_db);
+		if (IS_ERR(vi->cookie)) {
+			rc = PTR_ERR(vi->cookie);
+			goto free;
+		}
+		bootparam = vpdev->hw_ops->get_remote_dp(vpdev);
+		iowrite8(vi->h2c_config_db, &bootparam->h2c_config_db);
+	}
+	vop_init_debugfs(vi);
+	return 0;
+free:
+	kfree(vi);
+exit:
+	return rc;
+}
+
+static void vop_driver_remove(struct vop_device *vpdev)
+{
+	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
+
+	if (vpdev->dnode) {
+		vop_host_uninit(vi);
+	} else {
+		struct mic_bootparam __iomem *bootparam =
+			vpdev->hw_ops->get_remote_dp(vpdev);
+		if (bootparam)
+			iowrite8(-1, &bootparam->h2c_config_db);
+		vpdev->hw_ops->free_irq(vpdev, vi->cookie, vi);
+		flush_work(&vi->hotplug_work);
+		vop_scan_devices(vi, vpdev, REMOVE_DEVICES);
+	}
+	vop_exit_debugfs(vi);
+	kfree(vi);
+}
+
+static struct vop_device_id id_table[] = {
+	{ VOP_DEV_TRNSP, VOP_DEV_ANY_ID },
+	{ 0 },
+};
+
+static struct vop_driver vop_driver = {
+	.driver.name =	KBUILD_MODNAME,
+	.driver.owner =	THIS_MODULE,
+	.id_table = id_table,
+	.probe = vop_driver_probe,
+	.remove = vop_driver_remove,
+};
+
+module_vop_driver(vop_driver);
+
+MODULE_DEVICE_TABLE(mbus, id_table);
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Virtio Over PCIe (VOP) driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/vop/vop_main.h b/drivers/misc/mic/vop/vop_main.h
new file mode 100644
index 000000000..ba47ec7a6
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_main.h
@@ -0,0 +1,170 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#ifndef _VOP_MAIN_H_
+#define _VOP_MAIN_H_
+
+#include <linux/vringh.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio.h>
+#include <linux/miscdevice.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+#include "../bus/vop_bus.h"
+
+/*
+ * Note on endianness.
+ * 1. Host can be both BE or LE
+ * 2. Guest/card is LE. Host uses le_to_cpu to access desc/avail
+ *    rings and ioreadXX/iowriteXX to access used ring.
+ * 3. Device page exposed by host to guest contains LE values. Guest
+ *    accesses these using ioreadXX/iowriteXX etc. This way in general we
+ *    obey the virtio spec according to which guest works with native
+ *    endianness and host is aware of guest endianness and does all
+ *    required endianness conversion.
+ * 4. Data provided from user space to guest (in ADD_DEVICE and
+ *    CONFIG_CHANGE ioctl's) is not interpreted by the driver and should be
+ *    in guest endianness.
+ */
+
+/*
+ * vop_info - Allocated per invocation of VOP probe
+ *
+ * @vpdev: VOP device
+ * @hotplug_work: Handle virtio device creation, deletion and configuration
+ * @cookie: Cookie received upon requesting a virtio configuration interrupt
+ * @h2c_config_db: The doorbell used by the peer to indicate a config change
+ * @vdev_list: List of "active" virtio devices injected in the peer node
+ * @vop_mutex: Synchronize access to the device page as well as serialize
+ *             creation/deletion of virtio devices on the peer node
+ * @dp: Peer device page information
+ * @dbg: Debugfs entry
+ * @dma_ch: The DMA channel used by this transport for data transfers.
+ * @name: Name for this transport used in misc device creation.
+ * @miscdev: The misc device registered.
+ */
+struct vop_info {
+	struct vop_device *vpdev;
+	struct work_struct hotplug_work;
+	struct mic_irq *cookie;
+	int h2c_config_db;
+	struct list_head vdev_list;
+	struct mutex vop_mutex;
+	void __iomem *dp;
+	struct dentry *dbg;
+	struct dma_chan *dma_ch;
+	char name[16];
+	struct miscdevice miscdev;
+};
+
+/**
+ * struct vop_vringh - Virtio ring host information.
+ *
+ * @vring: The VOP vring used for setting up user space mappings.
+ * @vrh: The host VRINGH used for accessing the card vrings.
+ * @riov: The VRINGH read kernel IOV.
+ * @wiov: The VRINGH write kernel IOV.
+ * @head: The VRINGH head index address passed to vringh_getdesc_kern(..).
+ * @vr_mutex: Mutex for synchronizing access to the VRING.
+ * @buf: Temporary kernel buffer used to copy in/out data
+ * from/to the card via DMA.
+ * @buf_da: dma address of buf.
+ * @vdev: Back pointer to VOP virtio device for vringh_notify(..).
+ */
+struct vop_vringh {
+	struct mic_vring vring;
+	struct vringh vrh;
+	struct vringh_kiov riov;
+	struct vringh_kiov wiov;
+	u16 head;
+	struct mutex vr_mutex;
+	void *buf;
+	dma_addr_t buf_da;
+	struct vop_vdev *vdev;
+};
+
+/**
+ * struct vop_vdev - Host information for a card Virtio device.
+ *
+ * @virtio_id - Virtio device id.
+ * @waitq - Waitqueue to allow ring3 apps to poll.
+ * @vpdev - pointer to VOP bus device.
+ * @poll_wake - Used for waking up threads blocked in poll.
+ * @out_bytes - Debug stats for number of bytes copied from host to card.
+ * @in_bytes - Debug stats for number of bytes copied from card to host.
+ * @out_bytes_dma - Debug stats for number of bytes copied from host to card
+ * using DMA.
+ * @in_bytes_dma - Debug stats for number of bytes copied from card to host
+ * using DMA.
+ * @tx_len_unaligned - Debug stats for number of bytes copied to the card where
+ * the transfer length did not have the required DMA alignment.
+ * @tx_dst_unaligned - Debug stats for number of bytes copied where the
+ * destination address on the card did not have the required DMA alignment.
+ * @vvr - Store per VRING data structures.
+ * @virtio_bh_work - Work struct used to schedule virtio bottom half handling.
+ * @dd - Virtio device descriptor.
+ * @dc - Virtio device control fields.
+ * @list - List of Virtio devices.
+ * @virtio_db - The doorbell used by the card to interrupt the host.
+ * @virtio_cookie - The cookie returned while requesting interrupts.
+ * @vi: Transport information.
+ * @vdev_mutex: Mutex synchronizing virtio device injection,
+ *              removal and data transfers.
+ * @destroy: Track if a virtio device is being destroyed.
+ * @deleted: The virtio device has been deleted.
+ */
+struct vop_vdev {
+	int virtio_id;
+	wait_queue_head_t waitq;
+	struct vop_device *vpdev;
+	int poll_wake;
+	unsigned long out_bytes;
+	unsigned long in_bytes;
+	unsigned long out_bytes_dma;
+	unsigned long in_bytes_dma;
+	unsigned long tx_len_unaligned;
+	unsigned long tx_dst_unaligned;
+	unsigned long rx_dst_unaligned;
+	struct vop_vringh vvr[MIC_MAX_VRINGS];
+	struct work_struct virtio_bh_work;
+	struct mic_device_desc *dd;
+	struct mic_device_ctrl *dc;
+	struct list_head list;
+	int virtio_db;
+	struct mic_irq *virtio_cookie;
+	struct vop_info *vi;
+	struct mutex vdev_mutex;
+	struct completion destroy;
+	bool deleted;
+};
+
+/* Helper API to check if a virtio device is running */
+static inline bool vop_vdevup(struct vop_vdev *vdev)
+{
+	return !!vdev->dd->status;
+}
+
+void vop_init_debugfs(struct vop_info *vi);
+void vop_exit_debugfs(struct vop_info *vi);
+int vop_host_init(struct vop_info *vi);
+void vop_host_uninit(struct vop_info *vi);
+#endif
diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
new file mode 100644
index 000000000..a252c2199
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -0,0 +1,1169 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+#include <linux/mic_ioctl.h>
+#include "vop_main.h"
+
+/* Helper API to obtain the VOP PCIe device */
+static inline struct device *vop_dev(struct vop_vdev *vdev)
+{
+	return vdev->vpdev->dev.parent;
+}
+
+/* Helper API to check if a virtio device is initialized */
+static inline int vop_vdev_inited(struct vop_vdev *vdev)
+{
+	if (!vdev)
+		return -EINVAL;
+	/* Device has not been created yet */
+	if (!vdev->dd || !vdev->dd->type) {
+		dev_err(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, -EINVAL);
+		return -EINVAL;
+	}
+	/* Device has been removed/deleted */
+	if (vdev->dd->type == -1) {
+		dev_dbg(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, -ENODEV);
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static void _vop_notify(struct vringh *vrh)
+{
+	struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
+	struct vop_vdev *vdev = vvrh->vdev;
+	struct vop_device *vpdev = vdev->vpdev;
+	s8 db = vdev->dc->h2c_vdev_db;
+
+	if (db != -1)
+		vpdev->hw_ops->send_intr(vpdev, db);
+}
+
+static void vop_virtio_init_post(struct vop_vdev *vdev)
+{
+	struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
+	struct vop_device *vpdev = vdev->vpdev;
+	int i, used_size;
+
+	for (i = 0; i < vdev->dd->num_vq; i++) {
+		used_size = PAGE_ALIGN(sizeof(u16) * 3 +
+				sizeof(struct vring_used_elem) *
+				le16_to_cpu(vqconfig->num));
+		if (!le64_to_cpu(vqconfig[i].used_address)) {
+			dev_warn(vop_dev(vdev), "used_address zero??\n");
+			continue;
+		}
+		vdev->vvr[i].vrh.vring.used =
+			(void __force *)vpdev->hw_ops->ioremap(
+			vpdev,
+			le64_to_cpu(vqconfig[i].used_address),
+			used_size);
+	}
+
+	vdev->dc->used_address_updated = 0;
+
+	dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
+		 __func__, vdev->virtio_id);
+}
+
+static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
+{
+	int i;
+
+	dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
+		__func__, vdev->dd->status, vdev->virtio_id);
+
+	for (i = 0; i < vdev->dd->num_vq; i++)
+		/*
+		 * Avoid lockdep false positive. The + 1 is for the vop
+		 * mutex which is held in the reset devices code path.
+		 */
+		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
+
+	/* 0 status means "reset" */
+	vdev->dd->status = 0;
+	vdev->dc->vdev_reset = 0;
+	vdev->dc->host_ack = 1;
+
+	for (i = 0; i < vdev->dd->num_vq; i++) {
+		struct vringh *vrh = &vdev->vvr[i].vrh;
+
+		vdev->vvr[i].vring.info->avail_idx = 0;
+		vrh->completed = 0;
+		vrh->last_avail_idx = 0;
+		vrh->last_used_idx = 0;
+	}
+
+	for (i = 0; i < vdev->dd->num_vq; i++)
+		mutex_unlock(&vdev->vvr[i].vr_mutex);
+}
+
+static void vop_virtio_reset_devices(struct vop_info *vi)
+{
+	struct list_head *pos, *tmp;
+	struct vop_vdev *vdev;
+
+	list_for_each_safe(pos, tmp, &vi->vdev_list) {
+		vdev = list_entry(pos, struct vop_vdev, list);
+		vop_virtio_device_reset(vdev);
+		vdev->poll_wake = 1;
+		wake_up(&vdev->waitq);
+	}
+}
+
+static void vop_bh_handler(struct work_struct *work)
+{
+	struct vop_vdev *vdev = container_of(work, struct vop_vdev,
+			virtio_bh_work);
+
+	if (vdev->dc->used_address_updated)
+		vop_virtio_init_post(vdev);
+
+	if (vdev->dc->vdev_reset)
+		vop_virtio_device_reset(vdev);
+
+	vdev->poll_wake = 1;
+	wake_up(&vdev->waitq);
+}
+
+static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
+{
+	struct vop_vdev *vdev = data;
+	struct vop_device *vpdev = vdev->vpdev;
+
+	vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
+	schedule_work(&vdev->virtio_bh_work);
+	return IRQ_HANDLED;
+}
+
+static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
+{
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+	int ret = 0, retry, i;
+	struct vop_device *vpdev = vdev->vpdev;
+	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
+	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+	s8 db = bootparam->h2c_config_db;
+
+	mutex_lock(&vi->vop_mutex);
+	for (i = 0; i < vdev->dd->num_vq; i++)
+		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
+
+	if (db == -1 || vdev->dd->type == -1) {
+		ret = -EIO;
+		goto exit;
+	}
+
+	memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
+	vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
+	vpdev->hw_ops->send_intr(vpdev, db);
+
+	for (retry = 100; retry--;) {
+		ret = wait_event_timeout(wake, vdev->dc->guest_ack,
+					 msecs_to_jiffies(100));
+		if (ret)
+			break;
+	}
+
+	dev_dbg(vop_dev(vdev),
+		"%s %d retry: %d\n", __func__, __LINE__, retry);
+	vdev->dc->config_change = 0;
+	vdev->dc->guest_ack = 0;
+exit:
+	for (i = 0; i < vdev->dd->num_vq; i++)
+		mutex_unlock(&vdev->vvr[i].vr_mutex);
+	mutex_unlock(&vi->vop_mutex);
+	return ret;
+}
+
+static int vop_copy_dp_entry(struct vop_vdev *vdev,
+			     struct mic_device_desc *argp, __u8 *type,
+			     struct mic_device_desc **devpage)
+{
+	struct vop_device *vpdev = vdev->vpdev;
+	struct mic_device_desc *devp;
+	struct mic_vqconfig *vqconfig;
+	int ret = 0, i;
+	bool slot_found = false;
+
+	vqconfig = mic_vq_config(argp);
+	for (i = 0; i < argp->num_vq; i++) {
+		if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
+			ret =  -EINVAL;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			goto exit;
+		}
+	}
+
+	/* Find the first free device page entry */
+	for (i = sizeof(struct mic_bootparam);
+		i < MIC_DP_SIZE - mic_total_desc_size(argp);
+		i += mic_total_desc_size(devp)) {
+		devp = vpdev->hw_ops->get_dp(vpdev) + i;
+		if (devp->type == 0 || devp->type == -1) {
+			slot_found = true;
+			break;
+		}
+	}
+	if (!slot_found) {
+		ret =  -EINVAL;
+		dev_err(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, ret);
+		goto exit;
+	}
+	/*
+	 * Save off the type before doing the memcpy. Type will be set in the
+	 * end after completing all initialization for the new device.
+	 */
+	*type = argp->type;
+	argp->type = 0;
+	memcpy(devp, argp, mic_desc_size(argp));
+
+	*devpage = devp;
+exit:
+	return ret;
+}
+
+static void vop_init_device_ctrl(struct vop_vdev *vdev,
+				 struct mic_device_desc *devpage)
+{
+	struct mic_device_ctrl *dc;
+
+	dc = (void *)devpage + mic_aligned_desc_size(devpage);
+
+	dc->config_change = 0;
+	dc->guest_ack = 0;
+	dc->vdev_reset = 0;
+	dc->host_ack = 0;
+	dc->used_address_updated = 0;
+	dc->c2h_vdev_db = -1;
+	dc->h2c_vdev_db = -1;
+	vdev->dc = dc;
+}
+
+static int vop_virtio_add_device(struct vop_vdev *vdev,
+				 struct mic_device_desc *argp)
+{
+	struct vop_info *vi = vdev->vi;
+	struct vop_device *vpdev = vi->vpdev;
+	struct mic_device_desc *dd = NULL;
+	struct mic_vqconfig *vqconfig;
+	int vr_size, i, j, ret;
+	u8 type = 0;
+	s8 db = -1;
+	char irqname[16];
+	struct mic_bootparam *bootparam;
+	u16 num;
+	dma_addr_t vr_addr;
+
+	bootparam = vpdev->hw_ops->get_dp(vpdev);
+	init_waitqueue_head(&vdev->waitq);
+	INIT_LIST_HEAD(&vdev->list);
+	vdev->vpdev = vpdev;
+
+	ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
+	if (ret) {
+		dev_err(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, ret);
+		return ret;
+	}
+
+	vop_init_device_ctrl(vdev, dd);
+
+	vdev->dd = dd;
+	vdev->virtio_id = type;
+	vqconfig = mic_vq_config(dd);
+	INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
+
+	for (i = 0; i < dd->num_vq; i++) {
+		struct vop_vringh *vvr = &vdev->vvr[i];
+		struct mic_vring *vr = &vdev->vvr[i].vring;
+
+		num = le16_to_cpu(vqconfig[i].num);
+		mutex_init(&vvr->vr_mutex);
+		vr_size = PAGE_ALIGN(round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4) +
+			sizeof(struct _mic_vring_info));
+		vr->va = (void *)
+			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					 get_order(vr_size));
+		if (!vr->va) {
+			ret = -ENOMEM;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			goto err;
+		}
+		vr->len = vr_size;
+		vr->info = vr->va + round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4);
+		vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
+		vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
+					 DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&vpdev->dev, vr_addr)) {
+			free_pages((unsigned long)vr->va, get_order(vr_size));
+			ret = -ENOMEM;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			goto err;
+		}
+		vqconfig[i].address = cpu_to_le64(vr_addr);
+
+		vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
+		ret = vringh_init_kern(&vvr->vrh,
+				       *(u32 *)mic_vq_features(vdev->dd),
+				       num, false, vr->vr.desc, vr->vr.avail,
+				       vr->vr.used);
+		if (ret) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			goto err;
+		}
+		vringh_kiov_init(&vvr->riov, NULL, 0);
+		vringh_kiov_init(&vvr->wiov, NULL, 0);
+		vvr->head = USHRT_MAX;
+		vvr->vdev = vdev;
+		vvr->vrh.notify = _vop_notify;
+		dev_dbg(&vpdev->dev,
+			"%s %d index %d va %p info %p vr_size 0x%x\n",
+			__func__, __LINE__, i, vr->va, vr->info, vr_size);
+		vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
+					get_order(VOP_INT_DMA_BUF_SIZE));
+		vvr->buf_da = dma_map_single(&vpdev->dev,
+					  vvr->buf, VOP_INT_DMA_BUF_SIZE,
+					  DMA_BIDIRECTIONAL);
+	}
+
+	snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
+		 vdev->virtio_id);
+	vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
+	vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
+			_vop_virtio_intr_handler, irqname, vdev,
+			vdev->virtio_db);
+	if (IS_ERR(vdev->virtio_cookie)) {
+		ret = PTR_ERR(vdev->virtio_cookie);
+		dev_dbg(&vpdev->dev, "request irq failed\n");
+		goto err;
+	}
+
+	vdev->dc->c2h_vdev_db = vdev->virtio_db;
+
+	/*
+	 * Order the type update with previous stores. This write barrier
+	 * is paired with the corresponding read barrier before the uncached
+	 * system memory read of the type, on the card while scanning the
+	 * device page.
+	 */
+	smp_wmb();
+	dd->type = type;
+	argp->type = type;
+
+	if (bootparam) {
+		db = bootparam->h2c_config_db;
+		if (db != -1)
+			vpdev->hw_ops->send_intr(vpdev, db);
+	}
+	dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
+	return 0;
+err:
+	vqconfig = mic_vq_config(dd);
+	for (j = 0; j < i; j++) {
+		struct vop_vringh *vvr = &vdev->vvr[j];
+
+		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
+				 vvr->vring.len, DMA_BIDIRECTIONAL);
+		free_pages((unsigned long)vvr->vring.va,
+			   get_order(vvr->vring.len));
+	}
+	return ret;
+}
+
+static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
+			   struct vop_device *vpdev)
+{
+	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+	s8 db;
+	int ret, retry;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
+	db = bootparam->h2c_config_db;
+	if (db != -1)
+		vpdev->hw_ops->send_intr(vpdev, db);
+	else
+		goto done;
+	for (retry = 15; retry--;) {
+		ret = wait_event_timeout(wake, devp->guest_ack,
+					 msecs_to_jiffies(1000));
+		if (ret)
+			break;
+	}
+done:
+	devp->config_change = 0;
+	devp->guest_ack = 0;
+}
+
+static void vop_virtio_del_device(struct vop_vdev *vdev)
+{
+	struct vop_info *vi = vdev->vi;
+	struct vop_device *vpdev = vdev->vpdev;
+	int i;
+	struct mic_vqconfig *vqconfig;
+	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+
+	if (!bootparam)
+		goto skip_hot_remove;
+	vop_dev_remove(vi, vdev->dc, vpdev);
+skip_hot_remove:
+	vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
+	flush_work(&vdev->virtio_bh_work);
+	vqconfig = mic_vq_config(vdev->dd);
+	for (i = 0; i < vdev->dd->num_vq; i++) {
+		struct vop_vringh *vvr = &vdev->vvr[i];
+
+		dma_unmap_single(&vpdev->dev,
+				 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
+				 DMA_BIDIRECTIONAL);
+		free_pages((unsigned long)vvr->buf,
+			   get_order(VOP_INT_DMA_BUF_SIZE));
+		vringh_kiov_cleanup(&vvr->riov);
+		vringh_kiov_cleanup(&vvr->wiov);
+		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
+				 vvr->vring.len, DMA_BIDIRECTIONAL);
+		free_pages((unsigned long)vvr->vring.va,
+			   get_order(vvr->vring.len));
+	}
+	/*
+	 * Order the type update with previous stores. This write barrier
+	 * is paired with the corresponding read barrier before the uncached
+	 * system memory read of the type, on the card while scanning the
+	 * device page.
+	 */
+	smp_wmb();
+	vdev->dd->type = -1;
+}
+
+/*
+ * vop_sync_dma - Wrapper for synchronous DMAs.
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @dst - destination DMA address.
+ * @src - source DMA address.
+ * @len - size of the transfer.
+ *
+ * Return DMA_SUCCESS on success
+ */
+static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
+			size_t len)
+{
+	int err = 0;
+	struct dma_device *ddev;
+	struct dma_async_tx_descriptor *tx;
+	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
+	struct dma_chan *vop_ch = vi->dma_ch;
+
+	if (!vop_ch) {
+		err = -EBUSY;
+		goto error;
+	}
+	ddev = vop_ch->device;
+	tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
+		DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		goto error;
+	} else {
+		dma_cookie_t cookie;
+
+		cookie = tx->tx_submit(tx);
+		if (dma_submit_error(cookie)) {
+			err = -ENOMEM;
+			goto error;
+		}
+		dma_async_issue_pending(vop_ch);
+		err = dma_sync_wait(vop_ch, cookie);
+	}
+error:
+	if (err)
+		dev_err(&vi->vpdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+	return err;
+}
+
+#define VOP_USE_DMA true
+
+/*
+ * Initiates the copies across the PCIe bus from card memory to a user
+ * space buffer. When transfers are done using DMA, source/destination
+ * addresses and transfer length must follow the alignment requirements of
+ * the MIC DMA engine.
+ */
+static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
+				   size_t len, u64 daddr, size_t dlen,
+				   int vr_idx)
+{
+	struct vop_device *vpdev = vdev->vpdev;
+	void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
+	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
+	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
+	size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
+	bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
+	size_t dma_offset, partlen;
+	int err;
+
+	if (!VOP_USE_DMA) {
+		if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
+			err = -EFAULT;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		vdev->in_bytes += len;
+		err = 0;
+		goto err;
+	}
+
+	dma_offset = daddr - round_down(daddr, dma_alignment);
+	daddr -= dma_offset;
+	len += dma_offset;
+	/*
+	 * X100 uses DMA addresses as seen by the card so adding
+	 * the aperture base is not required for DMA. However x200
+	 * requires DMA addresses to be an offset into the bar so
+	 * add the aperture base for x200.
+	 */
+	if (x200)
+		daddr += vpdev->aper->pa;
+	while (len) {
+		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
+		err = vop_sync_dma(vdev, vvr->buf_da, daddr,
+				   ALIGN(partlen, dma_alignment));
+		if (err) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		if (copy_to_user(ubuf, vvr->buf + dma_offset,
+				 partlen - dma_offset)) {
+			err = -EFAULT;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		daddr += partlen;
+		ubuf += partlen;
+		dbuf += partlen;
+		vdev->in_bytes_dma += partlen;
+		vdev->in_bytes += partlen;
+		len -= partlen;
+		dma_offset = 0;
+	}
+	err = 0;
+err:
+	vpdev->hw_ops->iounmap(vpdev, dbuf);
+	dev_dbg(vop_dev(vdev),
+		"%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
+		__func__, ubuf, dbuf, len, vr_idx);
+	return err;
+}
+
+/*
+ * Initiates copies across the PCIe bus from a user space buffer to card
+ * memory. When transfers are done using DMA, source/destination addresses
+ * and transfer length must follow the alignment requirements of the MIC
+ * DMA engine.
+ */
+static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
+				     size_t len, u64 daddr, size_t dlen,
+				     int vr_idx)
+{
+	struct vop_device *vpdev = vdev->vpdev;
+	void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
+	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
+	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
+	size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
+	bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
+	size_t partlen;
+	bool dma = VOP_USE_DMA;
+	int err = 0;
+	size_t offset = 0;
+
+	if (daddr & (dma_alignment - 1)) {
+		vdev->tx_dst_unaligned += len;
+		dma = false;
+	} else if (ALIGN(len, dma_alignment) > dlen) {
+		vdev->tx_len_unaligned += len;
+		dma = false;
+	}
+
+	if (!dma)
+		goto memcpy;
+
+	/*
+	 * X100 uses DMA addresses as seen by the card so adding
+	 * the aperture base is not required for DMA. However x200
+	 * requires DMA addresses to be an offset into the bar so
+	 * add the aperture base for x200.
+	 */
+	if (x200)
+		daddr += vpdev->aper->pa;
+	while (len) {
+		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
+
+		if (copy_from_user(vvr->buf, ubuf, partlen)) {
+			err = -EFAULT;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		err = vop_sync_dma(vdev, daddr, vvr->buf_da,
+				   ALIGN(partlen, dma_alignment));
+		if (err) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		daddr += partlen;
+		ubuf += partlen;
+		dbuf += partlen;
+		vdev->out_bytes_dma += partlen;
+		vdev->out_bytes += partlen;
+		len -= partlen;
+	}
+memcpy:
+	/*
+	 * We are copying to IO below and should ideally use something
+	 * like copy_from_user_toio(..) if it existed.
+	 */
+	while (len) {
+		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
+
+		if (copy_from_user(vvr->buf, ubuf + offset, partlen)) {
+			err = -EFAULT;
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto err;
+		}
+		memcpy_toio(dbuf + offset, vvr->buf, partlen);
+		offset += partlen;
+		vdev->out_bytes += partlen;
+		len -= partlen;
+	}
+	err = 0;
+err:
+	vpdev->hw_ops->iounmap(vpdev, dbuf);
+	dev_dbg(vop_dev(vdev),
+		"%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
+		__func__, ubuf, dbuf, len, vr_idx);
+	return err;
+}
+
+#define MIC_VRINGH_READ true
+
+/* Determine the total number of bytes consumed in a VRINGH KIOV */
+static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
+{
+	int i;
+	u32 total = iov->consumed;
+
+	for (i = 0; i < iov->i; i++)
+		total += iov->iov[i].iov_len;
+	return total;
+}
+
+/*
+ * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
+ * This API is heavily based on the vringh_iov_xfer(..) implementation
+ * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
+ * and vringh_iov_push_kern(..) directly is because there is no
+ * way to override the VRINGH xfer(..) routines as of v3.10.
+ */
+static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
+			   void __user *ubuf, size_t len, bool read, int vr_idx,
+			   size_t *out_len)
+{
+	int ret = 0;
+	size_t partlen, tot_len = 0;
+
+	while (len && iov->i < iov->used) {
+		struct kvec *kiov = &iov->iov[iov->i];
+
+		partlen = min(kiov->iov_len, len);
+		if (read)
+			ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
+						      (u64)kiov->iov_base,
+						      kiov->iov_len,
+						      vr_idx);
+		else
+			ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
+							(u64)kiov->iov_base,
+							kiov->iov_len,
+							vr_idx);
+		if (ret) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			break;
+		}
+		len -= partlen;
+		ubuf += partlen;
+		tot_len += partlen;
+		iov->consumed += partlen;
+		kiov->iov_len -= partlen;
+		kiov->iov_base += partlen;
+		if (!kiov->iov_len) {
+			/* Fix up old iov element then increment. */
+			kiov->iov_len = iov->consumed;
+			kiov->iov_base -= iov->consumed;
+
+			iov->consumed = 0;
+			iov->i++;
+		}
+	}
+	*out_len = tot_len;
+	return ret;
+}
+
+/*
+ * Use the standard VRINGH infrastructure in the kernel to fetch new
+ * descriptors, initiate the copies and update the used ring.
+ */
+static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
+{
+	int ret = 0;
+	u32 iovcnt = copy->iovcnt;
+	struct iovec iov;
+	struct iovec __user *u_iov = copy->iov;
+	void __user *ubuf = NULL;
+	struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
+	struct vringh_kiov *riov = &vvr->riov;
+	struct vringh_kiov *wiov = &vvr->wiov;
+	struct vringh *vrh = &vvr->vrh;
+	u16 *head = &vvr->head;
+	struct mic_vring *vr = &vvr->vring;
+	size_t len = 0, out_len;
+
+	copy->out_len = 0;
+	/* Fetch a new IOVEC if all previous elements have been processed */
+	if (riov->i == riov->used && wiov->i == wiov->used) {
+		ret = vringh_getdesc_kern(vrh, riov, wiov,
+					  head, GFP_KERNEL);
+		/* Check if there are available descriptors */
+		if (ret <= 0)
+			return ret;
+	}
+	while (iovcnt) {
+		if (!len) {
+			/* Copy over a new iovec from user space. */
+			ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
+			if (ret) {
+				ret = -EINVAL;
+				dev_err(vop_dev(vdev), "%s %d err %d\n",
+					__func__, __LINE__, ret);
+				break;
+			}
+			len = iov.iov_len;
+			ubuf = iov.iov_base;
+		}
+		/* Issue all the read descriptors first */
+		ret = vop_vringh_copy(vdev, riov, ubuf, len,
+				      MIC_VRINGH_READ, copy->vr_idx, &out_len);
+		if (ret) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			break;
+		}
+		len -= out_len;
+		ubuf += out_len;
+		copy->out_len += out_len;
+		/* Issue the write descriptors next */
+		ret = vop_vringh_copy(vdev, wiov, ubuf, len,
+				      !MIC_VRINGH_READ, copy->vr_idx, &out_len);
+		if (ret) {
+			dev_err(vop_dev(vdev), "%s %d err %d\n",
+				__func__, __LINE__, ret);
+			break;
+		}
+		len -= out_len;
+		ubuf += out_len;
+		copy->out_len += out_len;
+		if (!len) {
+			/* One user space iovec is now completed */
+			iovcnt--;
+			u_iov++;
+		}
+		/* Exit loop if all elements in KIOVs have been processed. */
+		if (riov->i == riov->used && wiov->i == wiov->used)
+			break;
+	}
+	/*
+	 * Update the used ring if a descriptor was available and some data was
+	 * copied in/out and the user asked for a used ring update.
+	 */
+	if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
+		u32 total = 0;
+
+		/* Determine the total data consumed */
+		total += vop_vringh_iov_consumed(riov);
+		total += vop_vringh_iov_consumed(wiov);
+		vringh_complete_kern(vrh, *head, total);
+		*head = USHRT_MAX;
+		if (vringh_need_notify_kern(vrh) > 0)
+			vringh_notify(vrh);
+		vringh_kiov_cleanup(riov);
+		vringh_kiov_cleanup(wiov);
+		/* Update avail idx for user space */
+		vr->info->avail_idx = vrh->last_avail_idx;
+	}
+	return ret;
+}
+
+static inline int vop_verify_copy_args(struct vop_vdev *vdev,
+				       struct mic_copy_desc *copy)
+{
+	if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
+		return -EINVAL;
+	return 0;
+}
+
+/* Copy a specified number of virtio descriptors in a chain */
+static int vop_virtio_copy_desc(struct vop_vdev *vdev,
+				struct mic_copy_desc *copy)
+{
+	int err;
+	struct vop_vringh *vvr;
+
+	err = vop_verify_copy_args(vdev, copy);
+	if (err)
+		return err;
+
+	vvr = &vdev->vvr[copy->vr_idx];
+	mutex_lock(&vvr->vr_mutex);
+	if (!vop_vdevup(vdev)) {
+		err = -ENODEV;
+		dev_err(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto err;
+	}
+	err = _vop_virtio_copy(vdev, copy);
+	if (err) {
+		dev_err(vop_dev(vdev), "%s %d err %d\n",
+			__func__, __LINE__, err);
+	}
+err:
+	mutex_unlock(&vvr->vr_mutex);
+	return err;
+}
+
+static int vop_open(struct inode *inode, struct file *f)
+{
+	struct vop_vdev *vdev;
+	struct vop_info *vi = container_of(f->private_data,
+		struct vop_info, miscdev);
+
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev)
+		return -ENOMEM;
+	vdev->vi = vi;
+	mutex_init(&vdev->vdev_mutex);
+	f->private_data = vdev;
+	init_completion(&vdev->destroy);
+	complete(&vdev->destroy);
+	return 0;
+}
+
+static int vop_release(struct inode *inode, struct file *f)
+{
+	struct vop_vdev *vdev = f->private_data, *vdev_tmp;
+	struct vop_info *vi = vdev->vi;
+	struct list_head *pos, *tmp;
+	bool found = false;
+
+	mutex_lock(&vdev->vdev_mutex);
+	if (vdev->deleted)
+		goto unlock;
+	mutex_lock(&vi->vop_mutex);
+	list_for_each_safe(pos, tmp, &vi->vdev_list) {
+		vdev_tmp = list_entry(pos, struct vop_vdev, list);
+		if (vdev == vdev_tmp) {
+			vop_virtio_del_device(vdev);
+			list_del(pos);
+			found = true;
+			break;
+		}
+	}
+	mutex_unlock(&vi->vop_mutex);
+unlock:
+	mutex_unlock(&vdev->vdev_mutex);
+	if (!found)
+		wait_for_completion(&vdev->destroy);
+	f->private_data = NULL;
+	kfree(vdev);
+	return 0;
+}
+
+static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	struct vop_vdev *vdev = f->private_data;
+	struct vop_info *vi = vdev->vi;
+	void __user *argp = (void __user *)arg;
+	int ret;
+
+	switch (cmd) {
+	case MIC_VIRTIO_ADD_DEVICE:
+	{
+		struct mic_device_desc dd, *dd_config;
+
+		if (copy_from_user(&dd, argp, sizeof(dd)))
+			return -EFAULT;
+
+		if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
+		    dd.num_vq > MIC_MAX_VRINGS)
+			return -EINVAL;
+
+		dd_config = memdup_user(argp, mic_desc_size(&dd));
+		if (IS_ERR(dd_config))
+			return PTR_ERR(dd_config);
+
+		/* Ensure desc has not changed between the two reads */
+		if (memcmp(&dd, dd_config, sizeof(dd))) {
+			ret = -EINVAL;
+			goto free_ret;
+		}
+		mutex_lock(&vdev->vdev_mutex);
+		mutex_lock(&vi->vop_mutex);
+		ret = vop_virtio_add_device(vdev, dd_config);
+		if (ret)
+			goto unlock_ret;
+		list_add_tail(&vdev->list, &vi->vdev_list);
+unlock_ret:
+		mutex_unlock(&vi->vop_mutex);
+		mutex_unlock(&vdev->vdev_mutex);
+free_ret:
+		kfree(dd_config);
+		return ret;
+	}
+	case MIC_VIRTIO_COPY_DESC:
+	{
+		struct mic_copy_desc copy;
+
+		mutex_lock(&vdev->vdev_mutex);
+		ret = vop_vdev_inited(vdev);
+		if (ret)
+			goto _unlock_ret;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			ret = -EFAULT;
+			goto _unlock_ret;
+		}
+
+		ret = vop_virtio_copy_desc(vdev, &copy);
+		if (ret < 0)
+			goto _unlock_ret;
+		if (copy_to_user(
+			&((struct mic_copy_desc __user *)argp)->out_len,
+			&copy.out_len, sizeof(copy.out_len)))
+			ret = -EFAULT;
+_unlock_ret:
+		mutex_unlock(&vdev->vdev_mutex);
+		return ret;
+	}
+	case MIC_VIRTIO_CONFIG_CHANGE:
+	{
+		void *buf;
+
+		mutex_lock(&vdev->vdev_mutex);
+		ret = vop_vdev_inited(vdev);
+		if (ret)
+			goto __unlock_ret;
+		buf = memdup_user(argp, vdev->dd->config_len);
+		if (IS_ERR(buf)) {
+			ret = PTR_ERR(buf);
+			goto __unlock_ret;
+		}
+		ret = vop_virtio_config_change(vdev, buf);
+		kfree(buf);
+__unlock_ret:
+		mutex_unlock(&vdev->vdev_mutex);
+		return ret;
+	}
+	default:
+		return -ENOIOCTLCMD;
+	};
+	return 0;
+}
+
+/*
+ * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
+ * not when previously enqueued buffers may be available. This means that
+ * in the card->host (TX) path, when userspace is unblocked by poll it
+ * must drain all available descriptors or it can stall.
+ */
+static __poll_t vop_poll(struct file *f, poll_table *wait)
+{
+	struct vop_vdev *vdev = f->private_data;
+	__poll_t mask = 0;
+
+	mutex_lock(&vdev->vdev_mutex);
+	if (vop_vdev_inited(vdev)) {
+		mask = EPOLLERR;
+		goto done;
+	}
+	poll_wait(f, &vdev->waitq, wait);
+	if (vop_vdev_inited(vdev)) {
+		mask = EPOLLERR;
+	} else if (vdev->poll_wake) {
+		vdev->poll_wake = 0;
+		mask = EPOLLIN | EPOLLOUT;
+	}
+done:
+	mutex_unlock(&vdev->vdev_mutex);
+	return mask;
+}
+
+static inline int
+vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
+		 unsigned long *size, unsigned long *pa)
+{
+	struct vop_device *vpdev = vdev->vpdev;
+	unsigned long start = MIC_DP_SIZE;
+	int i;
+
+	/*
+	 * MMAP interface is as follows:
+	 * offset				region
+	 * 0x0					virtio device_page
+	 * 0x1000				first vring
+	 * 0x1000 + size of 1st vring		second vring
+	 * ....
+	 */
+	if (!offset) {
+		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
+		*size = MIC_DP_SIZE;
+		return 0;
+	}
+
+	for (i = 0; i < vdev->dd->num_vq; i++) {
+		struct vop_vringh *vvr = &vdev->vvr[i];
+
+		if (offset == start) {
+			*pa = virt_to_phys(vvr->vring.va);
+			*size = vvr->vring.len;
+			return 0;
+		}
+		start += vvr->vring.len;
+	}
+	return -1;
+}
+
+/*
+ * Maps the device page and virtio rings to user space for readonly access.
+ */
+static int vop_mmap(struct file *f, struct vm_area_struct *vma)
+{
+	struct vop_vdev *vdev = f->private_data;
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
+	int i, err;
+
+	err = vop_vdev_inited(vdev);
+	if (err)
+		goto ret;
+	if (vma->vm_flags & VM_WRITE) {
+		err = -EACCES;
+		goto ret;
+	}
+	while (size_rem) {
+		i = vop_query_offset(vdev, offset, &size, &pa);
+		if (i < 0) {
+			err = -EINVAL;
+			goto ret;
+		}
+		err = remap_pfn_range(vma, vma->vm_start + offset,
+				      pa >> PAGE_SHIFT, size,
+				      vma->vm_page_prot);
+		if (err)
+			goto ret;
+		size_rem -= size;
+		offset += size;
+	}
+ret:
+	return err;
+}
+
+static const struct file_operations vop_fops = {
+	.open = vop_open,
+	.release = vop_release,
+	.unlocked_ioctl = vop_ioctl,
+	.poll = vop_poll,
+	.mmap = vop_mmap,
+	.owner = THIS_MODULE,
+};
+
+int vop_host_init(struct vop_info *vi)
+{
+	int rc;
+	struct miscdevice *mdev;
+	struct vop_device *vpdev = vi->vpdev;
+
+	INIT_LIST_HEAD(&vi->vdev_list);
+	vi->dma_ch = vpdev->dma_ch;
+	mdev = &vi->miscdev;
+	mdev->minor = MISC_DYNAMIC_MINOR;
+	snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
+	mdev->name = vi->name;
+	mdev->fops = &vop_fops;
+	mdev->parent = &vpdev->dev;
+
+	rc = misc_register(mdev);
+	if (rc)
+		dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
+	return rc;
+}
+
+void vop_host_uninit(struct vop_info *vi)
+{
+	struct list_head *pos, *tmp;
+	struct vop_vdev *vdev;
+
+	mutex_lock(&vi->vop_mutex);
+	vop_virtio_reset_devices(vi);
+	list_for_each_safe(pos, tmp, &vi->vdev_list) {
+		vdev = list_entry(pos, struct vop_vdev, list);
+		list_del(pos);
+		reinit_completion(&vdev->destroy);
+		mutex_unlock(&vi->vop_mutex);
+		mutex_lock(&vdev->vdev_mutex);
+		vop_virtio_del_device(vdev);
+		vdev->deleted = true;
+		mutex_unlock(&vdev->vdev_mutex);
+		complete(&vdev->destroy);
+		mutex_lock(&vi->vop_mutex);
+	}
+	mutex_unlock(&vi->vop_mutex);
+	misc_deregister(&vi->miscdev);
+}
diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig
new file mode 100644
index 000000000..4bbdb0d3c
--- /dev/null
+++ b/drivers/misc/ocxl/Kconfig
@@ -0,0 +1,31 @@
+#
+# Open Coherent Accelerator (OCXL) compatible devices
+#
+
+config OCXL_BASE
+	bool
+	default n
+	select PPC_COPRO_BASE
+
+config OCXL
+	tristate "OpenCAPI coherent accelerator support"
+	depends on PPC_POWERNV && PCI && EEH
+	select OCXL_BASE
+	default m
+	help
+	  Select this option to enable the ocxl driver for Open
+	  Coherent Accelerator Processor Interface (OpenCAPI) devices.
+
+	  OpenCAPI allows FPGA and ASIC accelerators to be coherently
+	  attached to a CPU over an OpenCAPI link.
+
+	  The ocxl driver enables userspace programs to access these
+	  accelerators through devices in /dev/ocxl/.
+
+	  For more information, see http://opencapi.org.
+
+	  This is not to be confused with the support for IBM CAPI
+	  accelerators (CONFIG_CXL), which are PCI-based instead of a
+	  dedicated OpenCAPI link, and don't follow the same protocol.
+
+	  If unsure, say N.
diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile
new file mode 100644
index 000000000..5229dcda8
--- /dev/null
+++ b/drivers/misc/ocxl/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0+
+ccflags-$(CONFIG_PPC_WERROR)	+= -Werror
+
+ocxl-y				+= main.o pci.o config.o file.o pasid.o
+ocxl-y				+= link.o context.o afu_irq.o sysfs.o trace.o
+obj-$(CONFIG_OCXL)		+= ocxl.o
+
+# For tracepoints to include our trace.h from tracepoint infrastructure:
+CFLAGS_trace.o := -I$(src)
+
+# ccflags-y += -DDEBUG
diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
new file mode 100644
index 000000000..e70cfa245
--- /dev/null
+++ b/drivers/misc/ocxl/afu_irq.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/interrupt.h>
+#include <linux/eventfd.h>
+#include <asm/pnv-ocxl.h>
+#include "ocxl_internal.h"
+#include "trace.h"
+
+struct afu_irq {
+	int id;
+	int hw_irq;
+	unsigned int virq;
+	char *name;
+	u64 trigger_page;
+	struct eventfd_ctx *ev_ctx;
+};
+
+static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset)
+{
+	return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT;
+}
+
+static u64 irq_id_to_offset(struct ocxl_context *ctx, int id)
+{
+	return ctx->afu->irq_base_offset + (id << PAGE_SHIFT);
+}
+
+static irqreturn_t afu_irq_handler(int virq, void *data)
+{
+	struct afu_irq *irq = (struct afu_irq *) data;
+
+	trace_ocxl_afu_irq_receive(virq);
+	if (irq->ev_ctx)
+		eventfd_signal(irq->ev_ctx, 1);
+	return IRQ_HANDLED;
+}
+
+static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq)
+{
+	int rc;
+
+	irq->virq = irq_create_mapping(NULL, irq->hw_irq);
+	if (!irq->virq) {
+		pr_err("irq_create_mapping failed\n");
+		return -ENOMEM;
+	}
+	pr_debug("hw_irq %d mapped to virq %u\n", irq->hw_irq, irq->virq);
+
+	irq->name = kasprintf(GFP_KERNEL, "ocxl-afu-%u", irq->virq);
+	if (!irq->name) {
+		irq_dispose_mapping(irq->virq);
+		return -ENOMEM;
+	}
+
+	rc = request_irq(irq->virq, afu_irq_handler, 0, irq->name, irq);
+	if (rc) {
+		kfree(irq->name);
+		irq->name = NULL;
+		irq_dispose_mapping(irq->virq);
+		pr_err("request_irq failed: %d\n", rc);
+		return rc;
+	}
+	return 0;
+}
+
+static void release_afu_irq(struct afu_irq *irq)
+{
+	free_irq(irq->virq, irq);
+	irq_dispose_mapping(irq->virq);
+	kfree(irq->name);
+}
+
+int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset)
+{
+	struct afu_irq *irq;
+	int rc;
+
+	irq = kzalloc(sizeof(struct afu_irq), GFP_KERNEL);
+	if (!irq)
+		return -ENOMEM;
+
+	/*
+	 * We limit the number of afu irqs per context and per link to
+	 * avoid a single process or user depleting the pool of IPIs
+	 */
+
+	mutex_lock(&ctx->irq_lock);
+
+	irq->id = idr_alloc(&ctx->irq_idr, irq, 0, MAX_IRQ_PER_CONTEXT,
+			GFP_KERNEL);
+	if (irq->id < 0) {
+		rc = -ENOSPC;
+		goto err_unlock;
+	}
+
+	rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq,
+				&irq->trigger_page);
+	if (rc)
+		goto err_idr;
+
+	rc = setup_afu_irq(ctx, irq);
+	if (rc)
+		goto err_alloc;
+
+	*irq_offset = irq_id_to_offset(ctx, irq->id);
+
+	trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq,
+				*irq_offset);
+	mutex_unlock(&ctx->irq_lock);
+	return 0;
+
+err_alloc:
+	ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
+err_idr:
+	idr_remove(&ctx->irq_idr, irq->id);
+err_unlock:
+	mutex_unlock(&ctx->irq_lock);
+	kfree(irq);
+	return rc;
+}
+
+static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx)
+{
+	trace_ocxl_afu_irq_free(ctx->pasid, irq->id);
+	if (ctx->mapping)
+		unmap_mapping_range(ctx->mapping,
+				irq_id_to_offset(ctx, irq->id),
+				1 << PAGE_SHIFT, 1);
+	release_afu_irq(irq);
+	if (irq->ev_ctx)
+		eventfd_ctx_put(irq->ev_ctx);
+	ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
+	kfree(irq);
+}
+
+int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset)
+{
+	struct afu_irq *irq;
+	int id = irq_offset_to_id(ctx, irq_offset);
+
+	mutex_lock(&ctx->irq_lock);
+
+	irq = idr_find(&ctx->irq_idr, id);
+	if (!irq) {
+		mutex_unlock(&ctx->irq_lock);
+		return -EINVAL;
+	}
+	idr_remove(&ctx->irq_idr, irq->id);
+	afu_irq_free(irq, ctx);
+	mutex_unlock(&ctx->irq_lock);
+	return 0;
+}
+
+void ocxl_afu_irq_free_all(struct ocxl_context *ctx)
+{
+	struct afu_irq *irq;
+	int id;
+
+	mutex_lock(&ctx->irq_lock);
+	idr_for_each_entry(&ctx->irq_idr, irq, id)
+		afu_irq_free(irq, ctx);
+	mutex_unlock(&ctx->irq_lock);
+}
+
+int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd)
+{
+	struct afu_irq *irq;
+	struct eventfd_ctx *ev_ctx;
+	int rc = 0, id = irq_offset_to_id(ctx, irq_offset);
+
+	mutex_lock(&ctx->irq_lock);
+	irq = idr_find(&ctx->irq_idr, id);
+	if (!irq) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	ev_ctx = eventfd_ctx_fdget(eventfd);
+	if (IS_ERR(ev_ctx)) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	irq->ev_ctx = ev_ctx;
+unlock:
+	mutex_unlock(&ctx->irq_lock);
+	return rc;
+}
+
+u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset)
+{
+	struct afu_irq *irq;
+	int id = irq_offset_to_id(ctx, irq_offset);
+	u64 addr = 0;
+
+	mutex_lock(&ctx->irq_lock);
+	irq = idr_find(&ctx->irq_idr, id);
+	if (irq)
+		addr = irq->trigger_page;
+	mutex_unlock(&ctx->irq_lock);
+	return addr;
+}
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
new file mode 100644
index 000000000..8f2c5d8bd
--- /dev/null
+++ b/drivers/misc/ocxl/config.c
@@ -0,0 +1,725 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/pci.h>
+#include <asm/pnv-ocxl.h>
+#include <misc/ocxl.h>
+#include <misc/ocxl-config.h>
+
+#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit)))
+#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s)
+
+#define OCXL_DVSEC_AFU_IDX_MASK              GENMASK(5, 0)
+#define OCXL_DVSEC_ACTAG_MASK                GENMASK(11, 0)
+#define OCXL_DVSEC_PASID_MASK                GENMASK(19, 0)
+#define OCXL_DVSEC_PASID_LOG_MASK            GENMASK(4, 0)
+
+#define OCXL_DVSEC_TEMPL_VERSION         0x0
+#define OCXL_DVSEC_TEMPL_NAME            0x4
+#define OCXL_DVSEC_TEMPL_AFU_VERSION     0x1C
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL     0x20
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ  0x28
+#define OCXL_DVSEC_TEMPL_MMIO_PP         0x30
+#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ      0x38
+#define OCXL_DVSEC_TEMPL_MEM_SZ          0x3C
+#define OCXL_DVSEC_TEMPL_WWID            0x40
+
+#define OCXL_MAX_AFU_PER_FUNCTION 64
+#define OCXL_TEMPL_LEN            0x58
+#define OCXL_TEMPL_NAME_LEN       24
+#define OCXL_CFG_TIMEOUT     3
+
+static int find_dvsec(struct pci_dev *dev, int dvsec_id)
+{
+	int vsec = 0;
+	u16 vendor, id;
+
+	while ((vsec = pci_find_next_ext_capability(dev, vsec,
+						    OCXL_EXT_CAP_ID_DVSEC))) {
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+				&vendor);
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+		if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+			return vsec;
+	}
+	return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+	int vsec = 0;
+	u16 vendor, id;
+	u8 idx;
+
+	while ((vsec = pci_find_next_ext_capability(dev, vsec,
+						    OCXL_EXT_CAP_ID_DVSEC))) {
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+				&vendor);
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+
+		if (vendor == PCI_VENDOR_ID_IBM &&
+			id == OCXL_DVSEC_AFU_CTRL_ID) {
+			pci_read_config_byte(dev,
+					vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+					&idx);
+			if (idx == afu_idx)
+				return vsec;
+		}
+	}
+	return 0;
+}
+
+static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	u16 val;
+	int pos;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID);
+	if (!pos) {
+		/*
+		 * PASID capability is not mandatory, but there
+		 * shouldn't be any AFU
+		 */
+		dev_dbg(&dev->dev, "Function doesn't require any PASID\n");
+		fn->max_pasid_log = -1;
+		goto out;
+	}
+	pci_read_config_word(dev, pos + PCI_PASID_CAP, &val);
+	fn->max_pasid_log = EXTRACT_BITS(val, 8, 12);
+
+out:
+	dev_dbg(&dev->dev, "PASID capability:\n");
+	dev_dbg(&dev->dev, "  Max PASID log = %d\n", fn->max_pasid_log);
+	return 0;
+}
+
+static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int pos;
+
+	pos = find_dvsec(dev, OCXL_DVSEC_TL_ID);
+	if (!pos && PCI_FUNC(dev->devfn) == 0) {
+		dev_err(&dev->dev, "Can't find TL DVSEC\n");
+		return -ENODEV;
+	}
+	if (pos && PCI_FUNC(dev->devfn) != 0) {
+		dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n");
+		return -ENODEV;
+	}
+	fn->dvsec_tl_pos = pos;
+	return 0;
+}
+
+static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int pos, afu_present;
+	u32 val;
+
+	pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID);
+	if (!pos) {
+		dev_err(&dev->dev, "Can't find function DVSEC\n");
+		return -ENODEV;
+	}
+	fn->dvsec_function_pos = pos;
+
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+	afu_present = EXTRACT_BIT(val, 31);
+	if (!afu_present) {
+		fn->max_afu_index = -1;
+		dev_dbg(&dev->dev, "Function doesn't define any AFU\n");
+		goto out;
+	}
+	fn->max_afu_index = EXTRACT_BITS(val, 24, 29);
+
+out:
+	dev_dbg(&dev->dev, "Function DVSEC:\n");
+	dev_dbg(&dev->dev, "  Max AFU index = %d\n", fn->max_afu_index);
+	return 0;
+}
+
+static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int pos;
+
+	if (fn->max_afu_index < 0) {
+		fn->dvsec_afu_info_pos = -1;
+		return 0;
+	}
+
+	pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID);
+	if (!pos) {
+		dev_err(&dev->dev, "Can't find AFU information DVSEC\n");
+		return -ENODEV;
+	}
+	fn->dvsec_afu_info_pos = pos;
+	return 0;
+}
+
+static int read_dvsec_vendor(struct pci_dev *dev)
+{
+	int pos;
+	u32 cfg, tlx, dlx;
+
+	/*
+	 * vendor specific DVSEC is optional
+	 *
+	 * It's currently only used on function 0 to specify the
+	 * version of some logic blocks. Some older images may not
+	 * even have it so we ignore any errors
+	 */
+	if (PCI_FUNC(dev->devfn) != 0)
+		return 0;
+
+	pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
+	if (!pos)
+		return 0;
+
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx);
+
+	dev_dbg(&dev->dev, "Vendor specific DVSEC:\n");
+	dev_dbg(&dev->dev, "  CFG version = 0x%x\n", cfg);
+	dev_dbg(&dev->dev, "  TLX version = 0x%x\n", tlx);
+	dev_dbg(&dev->dev, "  DLX version = 0x%x\n", dlx);
+	return 0;
+}
+
+static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) {
+		dev_err(&dev->dev,
+			"AFUs are defined but no PASIDs are requested\n");
+		return -EINVAL;
+	}
+
+	if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) {
+		dev_err(&dev->dev,
+			"Max AFU index out of architectural limit (%d vs %d)\n",
+			fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int rc;
+
+	rc = read_pasid(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_tl(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev,
+			"Invalid Transaction Layer DVSEC configuration: %d\n",
+			rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_function(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev,
+			"Invalid Function DVSEC configuration: %d\n", rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_afu_info(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_vendor(dev);
+	if (rc) {
+		dev_err(&dev->dev,
+			"Invalid vendor specific DVSEC configuration: %d\n",
+			rc);
+		return -ENODEV;
+	}
+
+	rc = validate_function(dev, fn);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_read_function);
+
+static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			int offset, u32 *data)
+{
+	u32 val;
+	unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+	int pos = fn->dvsec_afu_info_pos;
+
+	/* Protect 'data valid' bit */
+	if (EXTRACT_BIT(offset, 31)) {
+		dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n");
+		return -EINVAL;
+	}
+
+	pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+	while (!EXTRACT_BIT(val, 31)) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_err(&dev->dev,
+				"Timeout while reading AFU info DVSEC (offset=%d)\n",
+				offset);
+			return -EBUSY;
+		}
+		cpu_relax();
+		pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+	}
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data);
+	return 0;
+}
+
+int ocxl_config_check_afu_index(struct pci_dev *dev,
+				struct ocxl_fn_config *fn, int afu_idx)
+{
+	u32 val;
+	int rc, templ_major, templ_minor, len;
+
+	pci_write_config_byte(dev,
+			fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+			afu_idx);
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val);
+	if (rc)
+		return rc;
+
+	/* AFU index map can have holes */
+	if (!val)
+		return 0;
+
+	templ_major = EXTRACT_BITS(val, 8, 15);
+	templ_minor = EXTRACT_BITS(val, 0, 7);
+	dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n",
+		templ_major, templ_minor);
+
+	len = EXTRACT_BITS(val, 16, 31);
+	if (len != OCXL_TEMPL_LEN) {
+		dev_warn(&dev->dev,
+			"Unexpected template length in AFU information (%#x)\n",
+			len);
+	}
+	return 1;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_check_afu_index);
+
+static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			struct ocxl_afu_config *afu)
+{
+	int i, rc;
+	u32 val, *ptr;
+
+	BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN);
+	for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) {
+		rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val);
+		if (rc)
+			return rc;
+		ptr = (u32 *) &afu->name[i];
+		*ptr = le32_to_cpu((__force __le32) val);
+	}
+	afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */
+	return 0;
+}
+
+static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			struct ocxl_afu_config *afu)
+{
+	int rc;
+	u32 val;
+
+	/*
+	 * Global MMIO
+	 */
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val);
+	if (rc)
+		return rc;
+	afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2);
+	afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val);
+	if (rc)
+		return rc;
+	afu->global_mmio_offset += (u64) val << 32;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val);
+	if (rc)
+		return rc;
+	afu->global_mmio_size = val;
+
+	/*
+	 * Per-process MMIO
+	 */
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val);
+	if (rc)
+		return rc;
+	afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2);
+	afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val);
+	if (rc)
+		return rc;
+	afu->pp_mmio_offset += (u64) val << 32;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val);
+	if (rc)
+		return rc;
+	afu->pp_mmio_stride = val;
+
+	return 0;
+}
+
+static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+	int pos;
+	u8 val8;
+	u16 val16;
+
+	pos = find_dvsec_afu_ctrl(dev, afu->idx);
+	if (!pos) {
+		dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n",
+			afu->idx);
+		return -ENODEV;
+	}
+	afu->dvsec_afu_control_pos = pos;
+
+	pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8);
+	afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4);
+
+	pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16);
+	afu->actag_supported = EXTRACT_BITS(val16, 0, 11);
+	return 0;
+}
+
+static bool char_allowed(int c)
+{
+	/*
+	 * Permitted Characters : Alphanumeric, hyphen, underscore, comma
+	 */
+	if ((c >= 0x30 && c <= 0x39) /* digits */ ||
+		(c >= 0x41 && c <= 0x5A) /* upper case */ ||
+		(c >= 0x61 && c <= 0x7A) /* lower case */ ||
+		c == 0 /* NULL */ ||
+		c == 0x2D /* - */ ||
+		c == 0x5F /* _ */ ||
+		c == 0x2C /* , */)
+		return true;
+	return false;
+}
+
+static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+	int i;
+
+	if (!afu->name[0]) {
+		dev_err(&dev->dev, "Empty AFU name\n");
+		return -EINVAL;
+	}
+	for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) {
+		if (!char_allowed(afu->name[i])) {
+			dev_err(&dev->dev,
+				"Invalid character in AFU name\n");
+			return -EINVAL;
+		}
+	}
+
+	if (afu->global_mmio_bar != 0 &&
+		afu->global_mmio_bar != 2 &&
+		afu->global_mmio_bar != 4) {
+		dev_err(&dev->dev, "Invalid global MMIO bar number\n");
+		return -EINVAL;
+	}
+	if (afu->pp_mmio_bar != 0 &&
+		afu->pp_mmio_bar != 2 &&
+		afu->pp_mmio_bar != 4) {
+		dev_err(&dev->dev, "Invalid per-process MMIO bar number\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			struct ocxl_afu_config *afu, u8 afu_idx)
+{
+	int rc;
+	u32 val32;
+
+	/*
+	 * First, we need to write the AFU idx for the AFU we want to
+	 * access.
+	 */
+	WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx);
+	afu->idx = afu_idx;
+	pci_write_config_byte(dev,
+			fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+			afu->idx);
+
+	rc = read_afu_name(dev, fn, afu);
+	if (rc)
+		return rc;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32);
+	if (rc)
+		return rc;
+	afu->version_major = EXTRACT_BITS(val32, 24, 31);
+	afu->version_minor = EXTRACT_BITS(val32, 16, 23);
+	afu->afuc_type = EXTRACT_BITS(val32, 14, 15);
+	afu->afum_type = EXTRACT_BITS(val32, 12, 13);
+	afu->profile = EXTRACT_BITS(val32, 0, 7);
+
+	rc = read_afu_mmio(dev, fn, afu);
+	if (rc)
+		return rc;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32);
+	if (rc)
+		return rc;
+	afu->log_mem_size = EXTRACT_BITS(val32, 0, 7);
+
+	rc = read_afu_control(dev, afu);
+	if (rc)
+		return rc;
+
+	dev_dbg(&dev->dev, "AFU configuration:\n");
+	dev_dbg(&dev->dev, "  name = %s\n", afu->name);
+	dev_dbg(&dev->dev, "  version = %d.%d\n", afu->version_major,
+		afu->version_minor);
+	dev_dbg(&dev->dev, "  global mmio bar = %hhu\n", afu->global_mmio_bar);
+	dev_dbg(&dev->dev, "  global mmio offset = %#llx\n",
+		afu->global_mmio_offset);
+	dev_dbg(&dev->dev, "  global mmio size = %#x\n", afu->global_mmio_size);
+	dev_dbg(&dev->dev, "  pp mmio bar = %hhu\n", afu->pp_mmio_bar);
+	dev_dbg(&dev->dev, "  pp mmio offset = %#llx\n", afu->pp_mmio_offset);
+	dev_dbg(&dev->dev, "  pp mmio stride = %#x\n", afu->pp_mmio_stride);
+	dev_dbg(&dev->dev, "  mem size (log) = %hhu\n", afu->log_mem_size);
+	dev_dbg(&dev->dev, "  pasid supported (log) = %u\n",
+		afu->pasid_supported_log);
+	dev_dbg(&dev->dev, "  actag supported = %u\n",
+		afu->actag_supported);
+
+	rc = validate_afu(dev, afu);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_read_afu);
+
+int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled,
+			u16 *supported)
+{
+	int rc;
+
+	/*
+	 * This is really a simple wrapper for the kernel API, to
+	 * avoid an external driver using ocxl as a library to call
+	 * platform-dependent code
+	 */
+	rc = pnv_ocxl_get_actag(dev, base, enabled, supported);
+	if (rc) {
+		dev_err(&dev->dev, "Can't get actag for device: %d\n", rc);
+		return rc;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_get_actag_info);
+
+void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base,
+			int actag_count)
+{
+	u16 val;
+
+	val = actag_count & OCXL_DVSEC_ACTAG_MASK;
+	pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val);
+
+	val = actag_base & OCXL_DVSEC_ACTAG_MASK;
+	pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_actag);
+
+int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count)
+{
+	return pnv_ocxl_get_pasid_count(dev, count);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_get_pasid_info);
+
+void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base,
+			u32 pasid_count_log)
+{
+	u8 val8;
+	u32 val32;
+
+	val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK;
+	pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8);
+
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+			&val32);
+	val32 &= ~OCXL_DVSEC_PASID_MASK;
+	val32 |= pasid_base & OCXL_DVSEC_PASID_MASK;
+	pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+			val32);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_pasid);
+
+void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable)
+{
+	u8 val;
+
+	pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val);
+	if (enable)
+		val |= 1;
+	else
+		val &= 0xFE;
+	pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state);
+
+int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
+{
+	u32 val;
+	__be32 *be32ptr;
+	u8 timers;
+	int i, rc;
+	long recv_cap;
+	char *recv_rate;
+
+	/*
+	 * Skip on function != 0, as the TL can only be defined on 0
+	 */
+	if (PCI_FUNC(dev->devfn) != 0)
+		return 0;
+
+	recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
+	if (!recv_rate)
+		return -ENOMEM;
+	/*
+	 * The spec defines 64 templates for messages in the
+	 * Transaction Layer (TL).
+	 *
+	 * The host and device each support a subset, so we need to
+	 * configure the transmitters on each side to send only
+	 * templates the receiver understands, at a rate the receiver
+	 * can process.  Per the spec, template 0 must be supported by
+	 * everybody. That's the template which has been used by the
+	 * host and device so far.
+	 *
+	 * The sending rate limit must be set before the template is
+	 * enabled.
+	 */
+
+	/*
+	 * Device -> host
+	 */
+	rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
+				PNV_OCXL_TL_RATE_BUF_SIZE);
+	if (rc)
+		goto out;
+
+	for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+		be32ptr = (__be32 *) &recv_rate[i];
+		pci_write_config_dword(dev,
+				tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
+				be32_to_cpu(*be32ptr));
+	}
+	val = recv_cap >> 32;
+	pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
+	val = recv_cap & GENMASK(31, 0);
+	pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val);
+
+	/*
+	 * Host -> device
+	 */
+	for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+		pci_read_config_dword(dev,
+				tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
+				&val);
+		be32ptr = (__be32 *) &recv_rate[i];
+		*be32ptr = cpu_to_be32(val);
+	}
+	pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
+	recv_cap = (long) val << 32;
+	pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val);
+	recv_cap |= val;
+
+	rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
+				PNV_OCXL_TL_RATE_BUF_SIZE);
+	if (rc)
+		goto out;
+
+	/*
+	 * Opencapi commands needing to be retried are classified per
+	 * the TL in 2 groups: short and long commands.
+	 *
+	 * The short back off timer it not used for now. It will be
+	 * for opencapi 4.0.
+	 *
+	 * The long back off timer is typically used when an AFU hits
+	 * a page fault but the NPU is already processing one. So the
+	 * AFU needs to wait before it can resubmit. Having a value
+	 * too low doesn't break anything, but can generate extra
+	 * traffic on the link.
+	 * We set it to 1.6 us for now. It's shorter than, but in the
+	 * same order of magnitude as the time spent to process a page
+	 * fault.
+	 */
+	timers = 0x2 << 4; /* long timer = 1.6 us */
+	pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
+			timers);
+
+	rc = 0;
+out:
+	kfree(recv_rate);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_TL);
+
+int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid)
+{
+	u32 val;
+	unsigned long timeout;
+
+	pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+			&val);
+	if (EXTRACT_BIT(val, 20)) {
+		dev_err(&dev->dev,
+			"Can't terminate PASID %#x, previous termination didn't complete\n",
+			pasid);
+		return -EBUSY;
+	}
+
+	val &= ~OCXL_DVSEC_PASID_MASK;
+	val |= pasid & OCXL_DVSEC_PASID_MASK;
+	val |= BIT(20);
+	pci_write_config_dword(dev,
+			afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+			val);
+
+	timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+	pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+			&val);
+	while (EXTRACT_BIT(val, 20)) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_err(&dev->dev,
+				"Timeout while waiting for AFU to terminate PASID %#x\n",
+				pasid);
+			return -EBUSY;
+		}
+		cpu_relax();
+		pci_read_config_dword(dev,
+				afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+				&val);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_terminate_pasid);
+
+void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first,
+			u32 tag_count)
+{
+	u32 val;
+
+	val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16;
+	val |= tag_count & OCXL_DVSEC_ACTAG_MASK;
+	pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG,
+			val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_actag);
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
new file mode 100644
index 000000000..c10a940e3
--- /dev/null
+++ b/drivers/misc/ocxl/context.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include "trace.h"
+#include "ocxl_internal.h"
+
+struct ocxl_context *ocxl_context_alloc(void)
+{
+	return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
+}
+
+int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
+		struct address_space *mapping)
+{
+	int pasid;
+
+	ctx->afu = afu;
+	mutex_lock(&afu->contexts_lock);
+	pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
+			afu->pasid_base + afu->pasid_max, GFP_KERNEL);
+	if (pasid < 0) {
+		mutex_unlock(&afu->contexts_lock);
+		return pasid;
+	}
+	afu->pasid_count++;
+	mutex_unlock(&afu->contexts_lock);
+
+	ctx->pasid = pasid;
+	ctx->status = OPENED;
+	mutex_init(&ctx->status_mutex);
+	ctx->mapping = mapping;
+	mutex_init(&ctx->mapping_lock);
+	init_waitqueue_head(&ctx->events_wq);
+	mutex_init(&ctx->xsl_error_lock);
+	mutex_init(&ctx->irq_lock);
+	idr_init(&ctx->irq_idr);
+	ctx->tidr = 0;
+
+	/*
+	 * Keep a reference on the AFU to make sure it's valid for the
+	 * duration of the life of the context
+	 */
+	ocxl_afu_get(afu);
+	return 0;
+}
+
+/*
+ * Callback for when a translation fault triggers an error
+ * data:	a pointer to the context which triggered the fault
+ * addr:	the address that triggered the error
+ * dsisr:	the value of the PPC64 dsisr register
+ */
+static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
+{
+	struct ocxl_context *ctx = (struct ocxl_context *) data;
+
+	mutex_lock(&ctx->xsl_error_lock);
+	ctx->xsl_error.addr = addr;
+	ctx->xsl_error.dsisr = dsisr;
+	ctx->xsl_error.count++;
+	mutex_unlock(&ctx->xsl_error_lock);
+
+	wake_up_all(&ctx->events_wq);
+}
+
+int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
+{
+	int rc;
+
+	// Locks both status & tidr
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status != OPENED) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
+			current->mm->context.id, ctx->tidr, amr, current->mm,
+			xsl_fault_error, ctx);
+	if (rc)
+		goto out;
+
+	ctx->status = ATTACHED;
+out:
+	mutex_unlock(&ctx->status_mutex);
+	return rc;
+}
+
+static vm_fault_t map_afu_irq(struct vm_area_struct *vma, unsigned long address,
+		u64 offset, struct ocxl_context *ctx)
+{
+	u64 trigger_addr;
+
+	trigger_addr = ocxl_afu_irq_get_addr(ctx, offset);
+	if (!trigger_addr)
+		return VM_FAULT_SIGBUS;
+
+	return vmf_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT);
+}
+
+static vm_fault_t map_pp_mmio(struct vm_area_struct *vma, unsigned long address,
+		u64 offset, struct ocxl_context *ctx)
+{
+	u64 pp_mmio_addr;
+	int pasid_off;
+	vm_fault_t ret;
+
+	if (offset >= ctx->afu->config.pp_mmio_stride)
+		return VM_FAULT_SIGBUS;
+
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status != ATTACHED) {
+		mutex_unlock(&ctx->status_mutex);
+		pr_debug("%s: Context not attached, failing mmio mmap\n",
+			__func__);
+		return VM_FAULT_SIGBUS;
+	}
+
+	pasid_off = ctx->pasid - ctx->afu->pasid_base;
+	pp_mmio_addr = ctx->afu->pp_mmio_start +
+		pasid_off * ctx->afu->config.pp_mmio_stride +
+		offset;
+
+	ret = vmf_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT);
+	mutex_unlock(&ctx->status_mutex);
+	return ret;
+}
+
+static vm_fault_t ocxl_mmap_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct ocxl_context *ctx = vma->vm_file->private_data;
+	u64 offset;
+	vm_fault_t ret;
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+	pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__,
+		ctx->pasid, vmf->address, offset);
+
+	if (offset < ctx->afu->irq_base_offset)
+		ret = map_pp_mmio(vma, vmf->address, offset, ctx);
+	else
+		ret = map_afu_irq(vma, vmf->address, offset, ctx);
+	return ret;
+}
+
+static const struct vm_operations_struct ocxl_vmops = {
+	.fault = ocxl_mmap_fault,
+};
+
+static int check_mmap_afu_irq(struct ocxl_context *ctx,
+			struct vm_area_struct *vma)
+{
+	/* only one page */
+	if (vma_pages(vma) != 1)
+		return -EINVAL;
+
+	/* check offset validty */
+	if (!ocxl_afu_irq_get_addr(ctx, vma->vm_pgoff << PAGE_SHIFT))
+		return -EINVAL;
+
+	/*
+	 * trigger page should only be accessible in write mode.
+	 *
+	 * It's a bit theoretical, as a page mmaped with only
+	 * PROT_WRITE is currently readable, but it doesn't hurt.
+	 */
+	if ((vma->vm_flags & VM_READ) || (vma->vm_flags & VM_EXEC) ||
+		!(vma->vm_flags & VM_WRITE))
+		return -EINVAL;
+	vma->vm_flags &= ~(VM_MAYREAD | VM_MAYEXEC);
+	return 0;
+}
+
+static int check_mmap_mmio(struct ocxl_context *ctx,
+			struct vm_area_struct *vma)
+{
+	if ((vma_pages(vma) + vma->vm_pgoff) >
+		(ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT))
+		return -EINVAL;
+	return 0;
+}
+
+int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma)
+{
+	int rc;
+
+	if ((vma->vm_pgoff << PAGE_SHIFT) < ctx->afu->irq_base_offset)
+		rc = check_mmap_mmio(ctx, vma);
+	else
+		rc = check_mmap_afu_irq(ctx, vma);
+	if (rc)
+		return rc;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_ops = &ocxl_vmops;
+	return 0;
+}
+
+int ocxl_context_detach(struct ocxl_context *ctx)
+{
+	struct pci_dev *dev;
+	int afu_control_pos;
+	enum ocxl_context_status status;
+	int rc;
+
+	mutex_lock(&ctx->status_mutex);
+	status = ctx->status;
+	ctx->status = CLOSED;
+	mutex_unlock(&ctx->status_mutex);
+	if (status != ATTACHED)
+		return 0;
+
+	dev = to_pci_dev(ctx->afu->fn->dev.parent);
+	afu_control_pos = ctx->afu->config.dvsec_afu_control_pos;
+
+	mutex_lock(&ctx->afu->afu_control_lock);
+	rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid);
+	mutex_unlock(&ctx->afu->afu_control_lock);
+	trace_ocxl_terminate_pasid(ctx->pasid, rc);
+	if (rc) {
+		/*
+		 * If we timeout waiting for the AFU to terminate the
+		 * pasid, then it's dangerous to clean up the Process
+		 * Element entry in the SPA, as it may be referenced
+		 * in the future by the AFU. In which case, we would
+		 * checkstop because of an invalid PE access (FIR
+		 * register 2, bit 42). So leave the PE
+		 * defined. Caller shouldn't free the context so that
+		 * PASID remains allocated.
+		 *
+		 * A link reset will be required to cleanup the AFU
+		 * and the SPA.
+		 */
+		if (rc == -EBUSY)
+			return rc;
+	}
+	rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid);
+	if (rc) {
+		dev_warn(&ctx->afu->dev,
+			"Couldn't remove PE entry cleanly: %d\n", rc);
+	}
+	return 0;
+}
+
+void ocxl_context_detach_all(struct ocxl_afu *afu)
+{
+	struct ocxl_context *ctx;
+	int tmp;
+
+	mutex_lock(&afu->contexts_lock);
+	idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
+		ocxl_context_detach(ctx);
+		/*
+		 * We are force detaching - remove any active mmio
+		 * mappings so userspace cannot interfere with the
+		 * card if it comes back.  Easiest way to exercise
+		 * this is to unbind and rebind the driver via sysfs
+		 * while it is in use.
+		 */
+		mutex_lock(&ctx->mapping_lock);
+		if (ctx->mapping)
+			unmap_mapping_range(ctx->mapping, 0, 0, 1);
+		mutex_unlock(&ctx->mapping_lock);
+	}
+	mutex_unlock(&afu->contexts_lock);
+}
+
+void ocxl_context_free(struct ocxl_context *ctx)
+{
+	mutex_lock(&ctx->afu->contexts_lock);
+	ctx->afu->pasid_count--;
+	idr_remove(&ctx->afu->contexts_idr, ctx->pasid);
+	mutex_unlock(&ctx->afu->contexts_lock);
+
+	ocxl_afu_irq_free_all(ctx);
+	idr_destroy(&ctx->irq_idr);
+	/* reference to the AFU taken in ocxl_context_init */
+	ocxl_afu_put(ctx->afu);
+	kfree(ctx);
+}
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
new file mode 100644
index 000000000..e6a607488
--- /dev/null
+++ b/drivers/misc/ocxl/file.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+#include <uapi/misc/ocxl.h>
+#include <asm/reg.h>
+#include <asm/switch_to.h>
+#include "ocxl_internal.h"
+
+
+#define OCXL_NUM_MINORS 256 /* Total to reserve */
+
+static dev_t ocxl_dev;
+static struct class *ocxl_class;
+static struct mutex minors_idr_lock;
+static struct idr minors_idr;
+
+static struct ocxl_afu *find_and_get_afu(dev_t devno)
+{
+	struct ocxl_afu *afu;
+	int afu_minor;
+
+	afu_minor = MINOR(devno);
+	/*
+	 * We don't declare an RCU critical section here, as our AFU
+	 * is protected by a reference counter on the device. By the time the
+	 * minor number of a device is removed from the idr, the ref count of
+	 * the device is already at 0, so no user API will access that AFU and
+	 * this function can't return it.
+	 */
+	afu = idr_find(&minors_idr, afu_minor);
+	if (afu)
+		ocxl_afu_get(afu);
+	return afu;
+}
+
+static int allocate_afu_minor(struct ocxl_afu *afu)
+{
+	int minor;
+
+	mutex_lock(&minors_idr_lock);
+	minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL);
+	mutex_unlock(&minors_idr_lock);
+	return minor;
+}
+
+static void free_afu_minor(struct ocxl_afu *afu)
+{
+	mutex_lock(&minors_idr_lock);
+	idr_remove(&minors_idr, MINOR(afu->dev.devt));
+	mutex_unlock(&minors_idr_lock);
+}
+
+static int afu_open(struct inode *inode, struct file *file)
+{
+	struct ocxl_afu *afu;
+	struct ocxl_context *ctx;
+	int rc;
+
+	pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+
+	afu = find_and_get_afu(inode->i_rdev);
+	if (!afu)
+		return -ENODEV;
+
+	ctx = ocxl_context_alloc();
+	if (!ctx) {
+		rc = -ENOMEM;
+		goto put_afu;
+	}
+
+	rc = ocxl_context_init(ctx, afu, inode->i_mapping);
+	if (rc)
+		goto put_afu;
+	file->private_data = ctx;
+	ocxl_afu_put(afu);
+	return 0;
+
+put_afu:
+	ocxl_afu_put(afu);
+	return rc;
+}
+
+static long afu_ioctl_attach(struct ocxl_context *ctx,
+			struct ocxl_ioctl_attach __user *uarg)
+{
+	struct ocxl_ioctl_attach arg;
+	u64 amr = 0;
+	int rc;
+
+	pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	/* Make sure reserved fields are not set for forward compatibility */
+	if (arg.reserved1 || arg.reserved2 || arg.reserved3)
+		return -EINVAL;
+
+	amr = arg.amr & mfspr(SPRN_UAMOR);
+	rc = ocxl_context_attach(ctx, amr);
+	return rc;
+}
+
+static long afu_ioctl_get_metadata(struct ocxl_context *ctx,
+		struct ocxl_ioctl_metadata __user *uarg)
+{
+	struct ocxl_ioctl_metadata arg;
+
+	memset(&arg, 0, sizeof(arg));
+
+	arg.version = 0;
+
+	arg.afu_version_major = ctx->afu->config.version_major;
+	arg.afu_version_minor = ctx->afu->config.version_minor;
+	arg.pasid = ctx->pasid;
+	arg.pp_mmio_size = ctx->afu->config.pp_mmio_stride;
+	arg.global_mmio_size = ctx->afu->config.global_mmio_size;
+
+	if (copy_to_user(uarg, &arg, sizeof(arg)))
+		return -EFAULT;
+
+	return 0;
+}
+
+#ifdef CONFIG_PPC64
+static long afu_ioctl_enable_p9_wait(struct ocxl_context *ctx,
+		struct ocxl_ioctl_p9_wait __user *uarg)
+{
+	struct ocxl_ioctl_p9_wait arg;
+
+	memset(&arg, 0, sizeof(arg));
+
+	if (cpu_has_feature(CPU_FTR_P9_TIDR)) {
+		enum ocxl_context_status status;
+
+		// Locks both status & tidr
+		mutex_lock(&ctx->status_mutex);
+		if (!ctx->tidr) {
+			if (set_thread_tidr(current)) {
+				mutex_unlock(&ctx->status_mutex);
+				return -ENOENT;
+			}
+
+			ctx->tidr = current->thread.tidr;
+		}
+
+		status = ctx->status;
+		mutex_unlock(&ctx->status_mutex);
+
+		if (status == ATTACHED) {
+			int rc;
+			struct link *link = ctx->afu->fn->link;
+
+			rc = ocxl_link_update_pe(link, ctx->pasid, ctx->tidr);
+			if (rc)
+				return rc;
+		}
+
+		arg.thread_id = ctx->tidr;
+	} else
+		return -ENOENT;
+
+	if (copy_to_user(uarg, &arg, sizeof(arg)))
+		return -EFAULT;
+
+	return 0;
+}
+#endif
+
+
+static long afu_ioctl_get_features(struct ocxl_context *ctx,
+		struct ocxl_ioctl_features __user *uarg)
+{
+	struct ocxl_ioctl_features arg;
+
+	memset(&arg, 0, sizeof(arg));
+
+#ifdef CONFIG_PPC64
+	if (cpu_has_feature(CPU_FTR_P9_TIDR))
+		arg.flags[0] |= OCXL_IOCTL_FEATURES_FLAGS0_P9_WAIT;
+#endif
+
+	if (copy_to_user(uarg, &arg, sizeof(arg)))
+		return -EFAULT;
+
+	return 0;
+}
+
+#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" :			\
+			x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" :	\
+			x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" :		\
+			x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" :	\
+			x == OCXL_IOCTL_GET_METADATA ? "GET_METADATA" :	\
+			x == OCXL_IOCTL_ENABLE_P9_WAIT ? "ENABLE_P9_WAIT" :	\
+			x == OCXL_IOCTL_GET_FEATURES ? "GET_FEATURES" :	\
+			"UNKNOWN")
+
+static long afu_ioctl(struct file *file, unsigned int cmd,
+		unsigned long args)
+{
+	struct ocxl_context *ctx = file->private_data;
+	struct ocxl_ioctl_irq_fd irq_fd;
+	u64 irq_offset;
+	long rc;
+
+	pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid,
+		CMD_STR(cmd));
+
+	if (ctx->status == CLOSED)
+		return -EIO;
+
+	switch (cmd) {
+	case OCXL_IOCTL_ATTACH:
+		rc = afu_ioctl_attach(ctx,
+				(struct ocxl_ioctl_attach __user *) args);
+		break;
+
+	case OCXL_IOCTL_IRQ_ALLOC:
+		rc = ocxl_afu_irq_alloc(ctx, &irq_offset);
+		if (!rc) {
+			rc = copy_to_user((u64 __user *) args, &irq_offset,
+					sizeof(irq_offset));
+			if (rc) {
+				ocxl_afu_irq_free(ctx, irq_offset);
+				return -EFAULT;
+			}
+		}
+		break;
+
+	case OCXL_IOCTL_IRQ_FREE:
+		rc = copy_from_user(&irq_offset, (u64 __user *) args,
+				sizeof(irq_offset));
+		if (rc)
+			return -EFAULT;
+		rc = ocxl_afu_irq_free(ctx, irq_offset);
+		break;
+
+	case OCXL_IOCTL_IRQ_SET_FD:
+		rc = copy_from_user(&irq_fd, (u64 __user *) args,
+				sizeof(irq_fd));
+		if (rc)
+			return -EFAULT;
+		if (irq_fd.reserved)
+			return -EINVAL;
+		rc = ocxl_afu_irq_set_fd(ctx, irq_fd.irq_offset,
+					irq_fd.eventfd);
+		break;
+
+	case OCXL_IOCTL_GET_METADATA:
+		rc = afu_ioctl_get_metadata(ctx,
+				(struct ocxl_ioctl_metadata __user *) args);
+		break;
+
+#ifdef CONFIG_PPC64
+	case OCXL_IOCTL_ENABLE_P9_WAIT:
+		rc = afu_ioctl_enable_p9_wait(ctx,
+				(struct ocxl_ioctl_p9_wait __user *) args);
+		break;
+#endif
+
+	case OCXL_IOCTL_GET_FEATURES:
+		rc = afu_ioctl_get_features(ctx,
+				(struct ocxl_ioctl_features __user *) args);
+		break;
+
+	default:
+		rc = -EINVAL;
+	}
+	return rc;
+}
+
+static long afu_compat_ioctl(struct file *file, unsigned int cmd,
+			unsigned long args)
+{
+	return afu_ioctl(file, cmd, args);
+}
+
+static int afu_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct ocxl_context *ctx = file->private_data;
+
+	pr_debug("%s for context %d\n", __func__, ctx->pasid);
+	return ocxl_context_mmap(ctx, vma);
+}
+
+static bool has_xsl_error(struct ocxl_context *ctx)
+{
+	bool ret;
+
+	mutex_lock(&ctx->xsl_error_lock);
+	ret = !!ctx->xsl_error.addr;
+	mutex_unlock(&ctx->xsl_error_lock);
+
+	return ret;
+}
+
+/*
+ * Are there any events pending on the AFU
+ * ctx: The AFU context
+ * Returns: true if there are events pending
+ */
+static bool afu_events_pending(struct ocxl_context *ctx)
+{
+	if (has_xsl_error(ctx))
+		return true;
+	return false;
+}
+
+static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait)
+{
+	struct ocxl_context *ctx = file->private_data;
+	unsigned int mask = 0;
+	bool closed;
+
+	pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+	poll_wait(file, &ctx->events_wq, wait);
+
+	mutex_lock(&ctx->status_mutex);
+	closed = (ctx->status == CLOSED);
+	mutex_unlock(&ctx->status_mutex);
+
+	if (afu_events_pending(ctx))
+		mask = EPOLLIN | EPOLLRDNORM;
+	else if (closed)
+		mask = EPOLLERR;
+
+	return mask;
+}
+
+/*
+ * Populate the supplied buffer with a single XSL error
+ * ctx:	The AFU context to report the error from
+ * header: the event header to populate
+ * buf: The buffer to write the body into (should be at least
+ *      AFU_EVENT_BODY_XSL_ERROR_SIZE)
+ * Return: the amount of buffer that was populated
+ */
+static ssize_t append_xsl_error(struct ocxl_context *ctx,
+				struct ocxl_kernel_event_header *header,
+				char __user *buf)
+{
+	struct ocxl_kernel_event_xsl_fault_error body;
+
+	memset(&body, 0, sizeof(body));
+
+	mutex_lock(&ctx->xsl_error_lock);
+	if (!ctx->xsl_error.addr) {
+		mutex_unlock(&ctx->xsl_error_lock);
+		return 0;
+	}
+
+	body.addr = ctx->xsl_error.addr;
+	body.dsisr = ctx->xsl_error.dsisr;
+	body.count = ctx->xsl_error.count;
+
+	ctx->xsl_error.addr = 0;
+	ctx->xsl_error.dsisr = 0;
+	ctx->xsl_error.count = 0;
+
+	mutex_unlock(&ctx->xsl_error_lock);
+
+	header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR;
+
+	if (copy_to_user(buf, &body, sizeof(body)))
+		return -EFAULT;
+
+	return sizeof(body);
+}
+
+#define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error)
+
+/*
+ * Reports events on the AFU
+ * Format:
+ *	Header (struct ocxl_kernel_event_header)
+ *	Body (struct ocxl_kernel_event_*)
+ *	Header...
+ */
+static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
+			loff_t *off)
+{
+	struct ocxl_context *ctx = file->private_data;
+	struct ocxl_kernel_event_header header;
+	ssize_t rc;
+	ssize_t used = 0;
+	DEFINE_WAIT(event_wait);
+
+	memset(&header, 0, sizeof(header));
+
+	/* Require offset to be 0 */
+	if (*off != 0)
+		return -EINVAL;
+
+	if (count < (sizeof(struct ocxl_kernel_event_header) +
+			AFU_EVENT_BODY_MAX_SIZE))
+		return -EINVAL;
+
+	for (;;) {
+		prepare_to_wait(&ctx->events_wq, &event_wait,
+				TASK_INTERRUPTIBLE);
+
+		if (afu_events_pending(ctx))
+			break;
+
+		if (ctx->status == CLOSED)
+			break;
+
+		if (file->f_flags & O_NONBLOCK) {
+			finish_wait(&ctx->events_wq, &event_wait);
+			return -EAGAIN;
+		}
+
+		if (signal_pending(current)) {
+			finish_wait(&ctx->events_wq, &event_wait);
+			return -ERESTARTSYS;
+		}
+
+		schedule();
+	}
+
+	finish_wait(&ctx->events_wq, &event_wait);
+
+	if (has_xsl_error(ctx)) {
+		used = append_xsl_error(ctx, &header, buf + sizeof(header));
+		if (used < 0)
+			return used;
+	}
+
+	if (!afu_events_pending(ctx))
+		header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST;
+
+	if (copy_to_user(buf, &header, sizeof(header)))
+		return -EFAULT;
+
+	used += sizeof(header);
+
+	rc = used;
+	return rc;
+}
+
+static int afu_release(struct inode *inode, struct file *file)
+{
+	struct ocxl_context *ctx = file->private_data;
+	int rc;
+
+	pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+	rc = ocxl_context_detach(ctx);
+	mutex_lock(&ctx->mapping_lock);
+	ctx->mapping = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	wake_up_all(&ctx->events_wq);
+	if (rc != -EBUSY)
+		ocxl_context_free(ctx);
+	return 0;
+}
+
+static const struct file_operations ocxl_afu_fops = {
+	.owner		= THIS_MODULE,
+	.open           = afu_open,
+	.unlocked_ioctl = afu_ioctl,
+	.compat_ioctl   = afu_compat_ioctl,
+	.mmap           = afu_mmap,
+	.poll           = afu_poll,
+	.read           = afu_read,
+	.release        = afu_release,
+};
+
+int ocxl_create_cdev(struct ocxl_afu *afu)
+{
+	int rc;
+
+	cdev_init(&afu->cdev, &ocxl_afu_fops);
+	rc = cdev_add(&afu->cdev, afu->dev.devt, 1);
+	if (rc) {
+		dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc);
+		return rc;
+	}
+	return 0;
+}
+
+void ocxl_destroy_cdev(struct ocxl_afu *afu)
+{
+	cdev_del(&afu->cdev);
+}
+
+int ocxl_register_afu(struct ocxl_afu *afu)
+{
+	int minor;
+
+	minor = allocate_afu_minor(afu);
+	if (minor < 0)
+		return minor;
+	afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor);
+	afu->dev.class = ocxl_class;
+	return device_register(&afu->dev);
+}
+
+void ocxl_unregister_afu(struct ocxl_afu *afu)
+{
+	free_afu_minor(afu);
+}
+
+static char *ocxl_devnode(struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev));
+}
+
+int ocxl_file_init(void)
+{
+	int rc;
+
+	mutex_init(&minors_idr_lock);
+	idr_init(&minors_idr);
+
+	rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl");
+	if (rc) {
+		pr_err("Unable to allocate ocxl major number: %d\n", rc);
+		return rc;
+	}
+
+	ocxl_class = class_create(THIS_MODULE, "ocxl");
+	if (IS_ERR(ocxl_class)) {
+		pr_err("Unable to create ocxl class\n");
+		unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
+		return PTR_ERR(ocxl_class);
+	}
+
+	ocxl_class->devnode = ocxl_devnode;
+	return 0;
+}
+
+void ocxl_file_exit(void)
+{
+	class_destroy(ocxl_class);
+	unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
+	idr_destroy(&minors_idr);
+}
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
new file mode 100644
index 000000000..646d16450
--- /dev/null
+++ b/drivers/misc/ocxl/link.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include <linux/mutex.h>
+#include <linux/mm_types.h>
+#include <linux/mmu_context.h>
+#include <asm/copro.h>
+#include <asm/pnv-ocxl.h>
+#include <misc/ocxl.h>
+#include "ocxl_internal.h"
+#include "trace.h"
+
+
+#define SPA_PASID_BITS		15
+#define SPA_PASID_MAX		((1 << SPA_PASID_BITS) - 1)
+#define SPA_PE_MASK		SPA_PASID_MAX
+#define SPA_SPA_SIZE_LOG	22 /* Each SPA is 4 Mb */
+
+#define SPA_CFG_SF		(1ull << (63-0))
+#define SPA_CFG_TA		(1ull << (63-1))
+#define SPA_CFG_HV		(1ull << (63-3))
+#define SPA_CFG_UV		(1ull << (63-4))
+#define SPA_CFG_XLAT_hpt	(0ull << (63-6)) /* Hashed page table (HPT) mode */
+#define SPA_CFG_XLAT_roh	(2ull << (63-6)) /* Radix on HPT mode */
+#define SPA_CFG_XLAT_ror	(3ull << (63-6)) /* Radix on Radix mode */
+#define SPA_CFG_PR		(1ull << (63-49))
+#define SPA_CFG_TC		(1ull << (63-54))
+#define SPA_CFG_DR		(1ull << (63-59))
+
+#define SPA_XSL_TF		(1ull << (63-3))  /* Translation fault */
+#define SPA_XSL_S		(1ull << (63-38)) /* Store operation */
+
+#define SPA_PE_VALID		0x80000000
+
+
+struct pe_data {
+	struct mm_struct *mm;
+	/* callback to trigger when a translation fault occurs */
+	void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr);
+	/* opaque pointer to be passed to the above callback */
+	void *xsl_err_data;
+	struct rcu_head rcu;
+};
+
+struct spa {
+	struct ocxl_process_element *spa_mem;
+	int spa_order;
+	struct mutex spa_lock;
+	struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */
+	char *irq_name;
+	int virq;
+	void __iomem *reg_dsisr;
+	void __iomem *reg_dar;
+	void __iomem *reg_tfc;
+	void __iomem *reg_pe_handle;
+	/*
+	 * The following field are used by the memory fault
+	 * interrupt handler. We can only have one interrupt at a
+	 * time. The NPU won't raise another interrupt until the
+	 * previous one has been ack'd by writing to the TFC register
+	 */
+	struct xsl_fault {
+		struct work_struct fault_work;
+		u64 pe;
+		u64 dsisr;
+		u64 dar;
+		struct pe_data pe_data;
+	} xsl_fault;
+};
+
+/*
+ * A opencapi link can be used be by several PCI functions. We have
+ * one link per device slot.
+ *
+ * A linked list of opencapi links should suffice, as there's a
+ * limited number of opencapi slots on a system and lookup is only
+ * done when the device is probed
+ */
+struct link {
+	struct list_head list;
+	struct kref ref;
+	int domain;
+	int bus;
+	int dev;
+	atomic_t irq_available;
+	struct spa *spa;
+	void *platform_data;
+};
+static struct list_head links_list = LIST_HEAD_INIT(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+enum xsl_response {
+	CONTINUE,
+	ADDRESS_ERROR,
+	RESTART,
+};
+
+
+static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe)
+{
+	u64 reg;
+
+	*dsisr = in_be64(spa->reg_dsisr);
+	*dar = in_be64(spa->reg_dar);
+	reg = in_be64(spa->reg_pe_handle);
+	*pe = reg & SPA_PE_MASK;
+}
+
+static void ack_irq(struct spa *spa, enum xsl_response r)
+{
+	u64 reg = 0;
+
+	/* continue is not supported */
+	if (r == RESTART)
+		reg = PPC_BIT(31);
+	else if (r == ADDRESS_ERROR)
+		reg = PPC_BIT(30);
+	else
+		WARN(1, "Invalid irq response %d\n", r);
+
+	if (reg) {
+		trace_ocxl_fault_ack(spa->spa_mem, spa->xsl_fault.pe,
+				spa->xsl_fault.dsisr, spa->xsl_fault.dar, reg);
+		out_be64(spa->reg_tfc, reg);
+	}
+}
+
+static void xsl_fault_handler_bh(struct work_struct *fault_work)
+{
+	vm_fault_t flt = 0;
+	unsigned long access, flags, inv_flags = 0;
+	enum xsl_response r;
+	struct xsl_fault *fault = container_of(fault_work, struct xsl_fault,
+					fault_work);
+	struct spa *spa = container_of(fault, struct spa, xsl_fault);
+
+	int rc;
+
+	/*
+	 * We must release a reference on mm_users whenever exiting this
+	 * function (taken in the memory fault interrupt handler)
+	 */
+	rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr,
+				&flt);
+	if (rc) {
+		pr_debug("copro_handle_mm_fault failed: %d\n", rc);
+		if (fault->pe_data.xsl_err_cb) {
+			fault->pe_data.xsl_err_cb(
+				fault->pe_data.xsl_err_data,
+				fault->dar, fault->dsisr);
+		}
+		r = ADDRESS_ERROR;
+		goto ack;
+	}
+
+	if (!radix_enabled()) {
+		/*
+		 * update_mmu_cache() will not have loaded the hash
+		 * since current->trap is not a 0x400 or 0x300, so
+		 * just call hash_page_mm() here.
+		 */
+		access = _PAGE_PRESENT | _PAGE_READ;
+		if (fault->dsisr & SPA_XSL_S)
+			access |= _PAGE_WRITE;
+
+		if (REGION_ID(fault->dar) != USER_REGION_ID)
+			access |= _PAGE_PRIVILEGED;
+
+		local_irq_save(flags);
+		hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300,
+			inv_flags);
+		local_irq_restore(flags);
+	}
+	r = RESTART;
+ack:
+	mmput(fault->pe_data.mm);
+	ack_irq(spa, r);
+}
+
+static irqreturn_t xsl_fault_handler(int irq, void *data)
+{
+	struct link *link = (struct link *) data;
+	struct spa *spa = link->spa;
+	u64 dsisr, dar, pe_handle;
+	struct pe_data *pe_data;
+	struct ocxl_process_element *pe;
+	int lpid, pid, tid;
+	bool schedule = false;
+
+	read_irq(spa, &dsisr, &dar, &pe_handle);
+	trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1);
+
+	WARN_ON(pe_handle > SPA_PE_MASK);
+	pe = spa->spa_mem + pe_handle;
+	lpid = be32_to_cpu(pe->lpid);
+	pid = be32_to_cpu(pe->pid);
+	tid = be32_to_cpu(pe->tid);
+	/* We could be reading all null values here if the PE is being
+	 * removed while an interrupt kicks in. It's not supposed to
+	 * happen if the driver notified the AFU to terminate the
+	 * PASID, and the AFU waited for pending operations before
+	 * acknowledging. But even if it happens, we won't find a
+	 * memory context below and fail silently, so it should be ok.
+	 */
+	if (!(dsisr & SPA_XSL_TF)) {
+		WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr);
+		ack_irq(spa, ADDRESS_ERROR);
+		return IRQ_HANDLED;
+	}
+
+	rcu_read_lock();
+	pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle);
+	if (!pe_data) {
+		/*
+		 * Could only happen if the driver didn't notify the
+		 * AFU about PASID termination before removing the PE,
+		 * or the AFU didn't wait for all memory access to
+		 * have completed.
+		 *
+		 * Either way, we fail early, but we shouldn't log an
+		 * error message, as it is a valid (if unexpected)
+		 * scenario
+		 */
+		rcu_read_unlock();
+		pr_debug("Unknown mm context for xsl interrupt\n");
+		ack_irq(spa, ADDRESS_ERROR);
+		return IRQ_HANDLED;
+	}
+	WARN_ON(pe_data->mm->context.id != pid);
+
+	if (mmget_not_zero(pe_data->mm)) {
+			spa->xsl_fault.pe = pe_handle;
+			spa->xsl_fault.dar = dar;
+			spa->xsl_fault.dsisr = dsisr;
+			spa->xsl_fault.pe_data = *pe_data;
+			schedule = true;
+			/* mm_users count released by bottom half */
+	}
+	rcu_read_unlock();
+	if (schedule)
+		schedule_work(&spa->xsl_fault.fault_work);
+	else
+		ack_irq(spa, ADDRESS_ERROR);
+	return IRQ_HANDLED;
+}
+
+static void unmap_irq_registers(struct spa *spa)
+{
+	pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc,
+				spa->reg_pe_handle);
+}
+
+static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
+{
+	return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar,
+				&spa->reg_tfc, &spa->reg_pe_handle);
+}
+
+static int setup_xsl_irq(struct pci_dev *dev, struct link *link)
+{
+	struct spa *spa = link->spa;
+	int rc;
+	int hwirq;
+
+	rc = pnv_ocxl_get_xsl_irq(dev, &hwirq);
+	if (rc)
+		return rc;
+
+	rc = map_irq_registers(dev, spa);
+	if (rc)
+		return rc;
+
+	spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x",
+				link->domain, link->bus, link->dev);
+	if (!spa->irq_name) {
+		unmap_irq_registers(spa);
+		dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n");
+		return -ENOMEM;
+	}
+	/*
+	 * At some point, we'll need to look into allowing a higher
+	 * number of interrupts. Could we have an IRQ domain per link?
+	 */
+	spa->virq = irq_create_mapping(NULL, hwirq);
+	if (!spa->virq) {
+		kfree(spa->irq_name);
+		unmap_irq_registers(spa);
+		dev_err(&dev->dev,
+			"irq_create_mapping failed for translation interrupt\n");
+		return -EINVAL;
+	}
+
+	dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq);
+
+	rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name,
+			link);
+	if (rc) {
+		irq_dispose_mapping(spa->virq);
+		kfree(spa->irq_name);
+		unmap_irq_registers(spa);
+		dev_err(&dev->dev,
+			"request_irq failed for translation interrupt: %d\n",
+			rc);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void release_xsl_irq(struct link *link)
+{
+	struct spa *spa = link->spa;
+
+	if (spa->virq) {
+		free_irq(spa->virq, link);
+		irq_dispose_mapping(spa->virq);
+	}
+	kfree(spa->irq_name);
+	unmap_irq_registers(spa);
+}
+
+static int alloc_spa(struct pci_dev *dev, struct link *link)
+{
+	struct spa *spa;
+
+	spa = kzalloc(sizeof(struct spa), GFP_KERNEL);
+	if (!spa)
+		return -ENOMEM;
+
+	mutex_init(&spa->spa_lock);
+	INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL);
+	INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh);
+
+	spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT;
+	spa->spa_mem = (struct ocxl_process_element *)
+		__get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order);
+	if (!spa->spa_mem) {
+		dev_err(&dev->dev, "Can't allocate Shared Process Area\n");
+		kfree(spa);
+		return -ENOMEM;
+	}
+	pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus,
+		link->dev, spa->spa_mem);
+
+	link->spa = spa;
+	return 0;
+}
+
+static void free_spa(struct link *link)
+{
+	struct spa *spa = link->spa;
+
+	pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus,
+		link->dev);
+
+	if (spa && spa->spa_mem) {
+		free_pages((unsigned long) spa->spa_mem, spa->spa_order);
+		kfree(spa);
+		link->spa = NULL;
+	}
+}
+
+static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link)
+{
+	struct link *link;
+	int rc;
+
+	link = kzalloc(sizeof(struct link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+
+	kref_init(&link->ref);
+	link->domain = pci_domain_nr(dev->bus);
+	link->bus = dev->bus->number;
+	link->dev = PCI_SLOT(dev->devfn);
+	atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
+
+	rc = alloc_spa(dev, link);
+	if (rc)
+		goto err_free;
+
+	rc = setup_xsl_irq(dev, link);
+	if (rc)
+		goto err_spa;
+
+	/* platform specific hook */
+	rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask,
+				&link->platform_data);
+	if (rc)
+		goto err_xsl_irq;
+
+	*out_link = link;
+	return 0;
+
+err_xsl_irq:
+	release_xsl_irq(link);
+err_spa:
+	free_spa(link);
+err_free:
+	kfree(link);
+	return rc;
+}
+
+static void free_link(struct link *link)
+{
+	release_xsl_irq(link);
+	free_spa(link);
+	kfree(link);
+}
+
+int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
+{
+	int rc = 0;
+	struct link *link;
+
+	mutex_lock(&links_list_lock);
+	list_for_each_entry(link, &links_list, list) {
+		/* The functions of a device all share the same link */
+		if (link->domain == pci_domain_nr(dev->bus) &&
+			link->bus == dev->bus->number &&
+			link->dev == PCI_SLOT(dev->devfn)) {
+			kref_get(&link->ref);
+			*link_handle = link;
+			goto unlock;
+		}
+	}
+	rc = alloc_link(dev, PE_mask, &link);
+	if (rc)
+		goto unlock;
+
+	list_add(&link->list, &links_list);
+	*link_handle = link;
+unlock:
+	mutex_unlock(&links_list_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_setup);
+
+static void release_xsl(struct kref *ref)
+{
+	struct link *link = container_of(ref, struct link, ref);
+
+	list_del(&link->list);
+	/* call platform code before releasing data */
+	pnv_ocxl_spa_release(link->platform_data);
+	free_link(link);
+}
+
+void ocxl_link_release(struct pci_dev *dev, void *link_handle)
+{
+	struct link *link = (struct link *) link_handle;
+
+	mutex_lock(&links_list_lock);
+	kref_put(&link->ref, release_xsl);
+	mutex_unlock(&links_list_lock);
+}
+EXPORT_SYMBOL_GPL(ocxl_link_release);
+
+static u64 calculate_cfg_state(bool kernel)
+{
+	u64 state;
+
+	state = SPA_CFG_DR;
+	if (mfspr(SPRN_LPCR) & LPCR_TC)
+		state |= SPA_CFG_TC;
+	if (radix_enabled())
+		state |= SPA_CFG_XLAT_ror;
+	else
+		state |= SPA_CFG_XLAT_hpt;
+	state |= SPA_CFG_HV;
+	if (kernel) {
+		if (mfmsr() & MSR_SF)
+			state |= SPA_CFG_SF;
+	} else {
+		state |= SPA_CFG_PR;
+		if (!test_tsk_thread_flag(current, TIF_32BIT))
+			state |= SPA_CFG_SF;
+	}
+	return state;
+}
+
+int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
+		u64 amr, struct mm_struct *mm,
+		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
+		void *xsl_err_data)
+{
+	struct link *link = (struct link *) link_handle;
+	struct spa *spa = link->spa;
+	struct ocxl_process_element *pe;
+	int pe_handle, rc = 0;
+	struct pe_data *pe_data;
+
+	BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128);
+	if (pasid > SPA_PASID_MAX)
+		return -EINVAL;
+
+	mutex_lock(&spa->spa_lock);
+	pe_handle = pasid & SPA_PE_MASK;
+	pe = spa->spa_mem + pe_handle;
+
+	if (pe->software_state) {
+		rc = -EBUSY;
+		goto unlock;
+	}
+
+	pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL);
+	if (!pe_data) {
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	pe_data->mm = mm;
+	pe_data->xsl_err_cb = xsl_err_cb;
+	pe_data->xsl_err_data = xsl_err_data;
+
+	memset(pe, 0, sizeof(struct ocxl_process_element));
+	pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
+	pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+	pe->pid = cpu_to_be32(pidr);
+	pe->tid = cpu_to_be32(tidr);
+	pe->amr = cpu_to_be64(amr);
+	pe->software_state = cpu_to_be32(SPA_PE_VALID);
+
+	mm_context_add_copro(mm);
+	/*
+	 * Barrier is to make sure PE is visible in the SPA before it
+	 * is used by the device. It also helps with the global TLBI
+	 * invalidation
+	 */
+	mb();
+	radix_tree_insert(&spa->pe_tree, pe_handle, pe_data);
+
+	/*
+	 * The mm must stay valid for as long as the device uses it. We
+	 * lower the count when the context is removed from the SPA.
+	 *
+	 * We grab mm_count (and not mm_users), as we don't want to
+	 * end up in a circular dependency if a process mmaps its
+	 * mmio, therefore incrementing the file ref count when
+	 * calling mmap(), and forgets to unmap before exiting. In
+	 * that scenario, when the kernel handles the death of the
+	 * process, the file is not cleaned because unmap was not
+	 * called, and the mm wouldn't be freed because we would still
+	 * have a reference on mm_users. Incrementing mm_count solves
+	 * the problem.
+	 */
+	mmgrab(mm);
+	trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr);
+unlock:
+	mutex_unlock(&spa->spa_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
+
+int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
+{
+	struct link *link = (struct link *) link_handle;
+	struct spa *spa = link->spa;
+	struct ocxl_process_element *pe;
+	int pe_handle, rc;
+
+	if (pasid > SPA_PASID_MAX)
+		return -EINVAL;
+
+	pe_handle = pasid & SPA_PE_MASK;
+	pe = spa->spa_mem + pe_handle;
+
+	mutex_lock(&spa->spa_lock);
+
+	pe->tid = cpu_to_be32(tid);
+
+	/*
+	 * The barrier makes sure the PE is updated
+	 * before we clear the NPU context cache below, so that the
+	 * old PE cannot be reloaded erroneously.
+	 */
+	mb();
+
+	/*
+	 * hook to platform code
+	 * On powerpc, the entry needs to be cleared from the context
+	 * cache of the NPU.
+	 */
+	rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle);
+	WARN_ON(rc);
+
+	mutex_unlock(&spa->spa_lock);
+	return rc;
+}
+
+int ocxl_link_remove_pe(void *link_handle, int pasid)
+{
+	struct link *link = (struct link *) link_handle;
+	struct spa *spa = link->spa;
+	struct ocxl_process_element *pe;
+	struct pe_data *pe_data;
+	int pe_handle, rc;
+
+	if (pasid > SPA_PASID_MAX)
+		return -EINVAL;
+
+	/*
+	 * About synchronization with our memory fault handler:
+	 *
+	 * Before removing the PE, the driver is supposed to have
+	 * notified the AFU, which should have cleaned up and make
+	 * sure the PASID is no longer in use, including pending
+	 * interrupts. However, there's no way to be sure...
+	 *
+	 * We clear the PE and remove the context from our radix
+	 * tree. From that point on, any new interrupt for that
+	 * context will fail silently, which is ok. As mentioned
+	 * above, that's not expected, but it could happen if the
+	 * driver or AFU didn't do the right thing.
+	 *
+	 * There could still be a bottom half running, but we don't
+	 * need to wait/flush, as it is managing a reference count on
+	 * the mm it reads from the radix tree.
+	 */
+	pe_handle = pasid & SPA_PE_MASK;
+	pe = spa->spa_mem + pe_handle;
+
+	mutex_lock(&spa->spa_lock);
+
+	if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	trace_ocxl_context_remove(current->pid, spa->spa_mem, pasid,
+				be32_to_cpu(pe->pid), be32_to_cpu(pe->tid));
+
+	memset(pe, 0, sizeof(struct ocxl_process_element));
+	/*
+	 * The barrier makes sure the PE is removed from the SPA
+	 * before we clear the NPU context cache below, so that the
+	 * old PE cannot be reloaded erroneously.
+	 */
+	mb();
+
+	/*
+	 * hook to platform code
+	 * On powerpc, the entry needs to be cleared from the context
+	 * cache of the NPU.
+	 */
+	rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle);
+	WARN_ON(rc);
+
+	pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);
+	if (!pe_data) {
+		WARN(1, "Couldn't find pe data when removing PE\n");
+	} else {
+		mm_context_remove_copro(pe_data->mm);
+		mmdrop(pe_data->mm);
+		kfree_rcu(pe_data, rcu);
+	}
+unlock:
+	mutex_unlock(&spa->spa_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_remove_pe);
+
+int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr)
+{
+	struct link *link = (struct link *) link_handle;
+	int rc, irq;
+	u64 addr;
+
+	if (atomic_dec_if_positive(&link->irq_available) < 0)
+		return -ENOSPC;
+
+	rc = pnv_ocxl_alloc_xive_irq(&irq, &addr);
+	if (rc) {
+		atomic_inc(&link->irq_available);
+		return rc;
+	}
+
+	*hw_irq = irq;
+	*trigger_addr = addr;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc);
+
+void ocxl_link_free_irq(void *link_handle, int hw_irq)
+{
+	struct link *link = (struct link *) link_handle;
+
+	pnv_ocxl_free_xive_irq(hw_irq);
+	atomic_inc(&link->irq_available);
+}
+EXPORT_SYMBOL_GPL(ocxl_link_free_irq);
diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c
new file mode 100644
index 000000000..7210d9e05
--- /dev/null
+++ b/drivers/misc/ocxl/main.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "ocxl_internal.h"
+
+static int __init init_ocxl(void)
+{
+	int rc = 0;
+
+	rc = ocxl_file_init();
+	if (rc)
+		return rc;
+
+	rc = pci_register_driver(&ocxl_pci_driver);
+	if (rc) {
+		ocxl_file_exit();
+		return rc;
+	}
+	return 0;
+}
+
+static void exit_ocxl(void)
+{
+	pci_unregister_driver(&ocxl_pci_driver);
+	ocxl_file_exit();
+}
+
+module_init(init_ocxl);
+module_exit(exit_ocxl);
+
+MODULE_DESCRIPTION("Open Coherent Accelerator");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
new file mode 100644
index 000000000..a32f21510
--- /dev/null
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _OCXL_INTERNAL_H_
+#define _OCXL_INTERNAL_H_
+
+#include <linux/pci.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <misc/ocxl.h>
+
+#define MAX_IRQ_PER_LINK	2000
+#define MAX_IRQ_PER_CONTEXT	MAX_IRQ_PER_LINK
+
+#define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev)
+#define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev)
+
+extern struct pci_driver ocxl_pci_driver;
+
+
+struct ocxl_fn {
+	struct device dev;
+	int bar_used[3];
+	struct ocxl_fn_config config;
+	struct list_head afu_list;
+	int pasid_base;
+	int actag_base;
+	int actag_enabled;
+	int actag_supported;
+	struct list_head pasid_list;
+	struct list_head actag_list;
+	void *link;
+};
+
+struct ocxl_afu {
+	struct ocxl_fn *fn;
+	struct list_head list;
+	struct device dev;
+	struct cdev cdev;
+	struct ocxl_afu_config config;
+	int pasid_base;
+	int pasid_count; /* opened contexts */
+	int pasid_max; /* maximum number of contexts */
+	int actag_base;
+	int actag_enabled;
+	struct mutex contexts_lock;
+	struct idr contexts_idr;
+	struct mutex afu_control_lock;
+	u64 global_mmio_start;
+	u64 irq_base_offset;
+	void __iomem *global_mmio_ptr;
+	u64 pp_mmio_start;
+	struct bin_attribute attr_global_mmio;
+};
+
+enum ocxl_context_status {
+	CLOSED,
+	OPENED,
+	ATTACHED,
+};
+
+// Contains metadata about a translation fault
+struct ocxl_xsl_error {
+	u64 addr; // The address that triggered the fault
+	u64 dsisr; // the value of the dsisr register
+	u64 count; // The number of times this fault has been triggered
+};
+
+struct ocxl_context {
+	struct ocxl_afu *afu;
+	int pasid;
+	struct mutex status_mutex;
+	enum ocxl_context_status status;
+	struct address_space *mapping;
+	struct mutex mapping_lock;
+	wait_queue_head_t events_wq;
+	struct mutex xsl_error_lock;
+	struct ocxl_xsl_error xsl_error;
+	struct mutex irq_lock;
+	struct idr irq_idr;
+	u16 tidr; // Thread ID used for P9 wait implementation
+};
+
+struct ocxl_process_element {
+	__be64 config_state;
+	__be32 reserved1[11];
+	__be32 lpid;
+	__be32 tid;
+	__be32 pid;
+	__be32 reserved2[10];
+	__be64 amr;
+	__be32 reserved3[3];
+	__be32 software_state;
+};
+
+
+extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu);
+extern void ocxl_afu_put(struct ocxl_afu *afu);
+
+extern int ocxl_create_cdev(struct ocxl_afu *afu);
+extern void ocxl_destroy_cdev(struct ocxl_afu *afu);
+extern int ocxl_register_afu(struct ocxl_afu *afu);
+extern void ocxl_unregister_afu(struct ocxl_afu *afu);
+
+extern int ocxl_file_init(void);
+extern void ocxl_file_exit(void);
+
+extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
+extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
+extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
+extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
+
+extern struct ocxl_context *ocxl_context_alloc(void);
+extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
+			struct address_space *mapping);
+extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr);
+extern int ocxl_context_mmap(struct ocxl_context *ctx,
+			struct vm_area_struct *vma);
+extern int ocxl_context_detach(struct ocxl_context *ctx);
+extern void ocxl_context_detach_all(struct ocxl_afu *afu);
+extern void ocxl_context_free(struct ocxl_context *ctx);
+
+extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu);
+extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu);
+
+extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset);
+extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset);
+extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx);
+extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset,
+			int eventfd);
+extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset);
+
+#endif /* _OCXL_INTERNAL_H_ */
diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c
new file mode 100644
index 000000000..d14cb56e6
--- /dev/null
+++ b/drivers/misc/ocxl/pasid.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include "ocxl_internal.h"
+
+
+struct id_range {
+	struct list_head list;
+	u32 start;
+	u32 end;
+};
+
+#ifdef DEBUG
+static void dump_list(struct list_head *head, char *type_str)
+{
+	struct id_range *cur;
+
+	pr_debug("%s ranges allocated:\n", type_str);
+	list_for_each_entry(cur, head, list) {
+		pr_debug("Range %d->%d\n", cur->start, cur->end);
+	}
+}
+#endif
+
+static int range_alloc(struct list_head *head, u32 size, int max_id,
+		char *type_str)
+{
+	struct list_head *pos;
+	struct id_range *cur, *new;
+	int rc, last_end;
+
+	new = kmalloc(sizeof(struct id_range), GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	pos = head;
+	last_end = -1;
+	list_for_each_entry(cur, head, list) {
+		if ((cur->start - last_end) > size)
+			break;
+		last_end = cur->end;
+		pos = &cur->list;
+	}
+
+	new->start = last_end + 1;
+	new->end = new->start + size - 1;
+
+	if (new->end > max_id) {
+		kfree(new);
+		rc = -ENOSPC;
+	} else {
+		list_add(&new->list, pos);
+		rc = new->start;
+	}
+
+#ifdef DEBUG
+	dump_list(head, type_str);
+#endif
+	return rc;
+}
+
+static void range_free(struct list_head *head, u32 start, u32 size,
+		char *type_str)
+{
+	bool found = false;
+	struct id_range *cur, *tmp;
+
+	list_for_each_entry_safe(cur, tmp, head, list) {
+		if (cur->start == start && cur->end == (start + size - 1)) {
+			found = true;
+			list_del(&cur->list);
+			kfree(cur);
+			break;
+		}
+	}
+	WARN_ON(!found);
+#ifdef DEBUG
+	dump_list(head, type_str);
+#endif
+}
+
+int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size)
+{
+	int max_pasid;
+
+	if (fn->config.max_pasid_log < 0)
+		return -ENOSPC;
+	max_pasid = 1 << fn->config.max_pasid_log;
+	return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid");
+}
+
+void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
+{
+	return range_free(&fn->pasid_list, start, size, "afu pasid");
+}
+
+int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size)
+{
+	int max_actag;
+
+	max_actag = fn->actag_enabled;
+	return range_alloc(&fn->actag_list, size, max_actag, "afu actag");
+}
+
+void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
+{
+	return range_free(&fn->actag_list, start, size, "afu actag");
+}
diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c
new file mode 100644
index 000000000..21f425472
--- /dev/null
+++ b/drivers/misc/ocxl/pci.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/idr.h>
+#include <asm/pnv-ocxl.h>
+#include "ocxl_internal.h"
+
+/*
+ * Any opencapi device which wants to use this 'generic' driver should
+ * use the 0x062B device ID. Vendors should define the subsystem
+ * vendor/device ID to help differentiate devices.
+ */
+static const struct pci_device_id ocxl_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl);
+
+
+static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn)
+{
+	return (get_device(&fn->dev) == NULL) ? NULL : fn;
+}
+
+static void ocxl_fn_put(struct ocxl_fn *fn)
+{
+	put_device(&fn->dev);
+}
+
+struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu)
+{
+	return (get_device(&afu->dev) == NULL) ? NULL : afu;
+}
+
+void ocxl_afu_put(struct ocxl_afu *afu)
+{
+	put_device(&afu->dev);
+}
+
+static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn)
+{
+	struct ocxl_afu *afu;
+
+	afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL);
+	if (!afu)
+		return NULL;
+
+	mutex_init(&afu->contexts_lock);
+	mutex_init(&afu->afu_control_lock);
+	idr_init(&afu->contexts_idr);
+	afu->fn = fn;
+	ocxl_fn_get(fn);
+	return afu;
+}
+
+static void free_afu(struct ocxl_afu *afu)
+{
+	idr_destroy(&afu->contexts_idr);
+	ocxl_fn_put(afu->fn);
+	kfree(afu);
+}
+
+static void free_afu_dev(struct device *dev)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(dev);
+
+	ocxl_unregister_afu(afu);
+	free_afu(afu);
+}
+
+static int set_afu_device(struct ocxl_afu *afu, const char *location)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int rc;
+
+	afu->dev.parent = &fn->dev;
+	afu->dev.release = free_afu_dev;
+	rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location,
+		afu->config.idx);
+	return rc;
+}
+
+static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int actag_count, actag_offset;
+
+	/*
+	 * if there were not enough actags for the function, each afu
+	 * reduces its count as well
+	 */
+	actag_count = afu->config.actag_supported *
+		fn->actag_enabled / fn->actag_supported;
+	actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
+	if (actag_offset < 0) {
+		dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n",
+			actag_count, actag_offset);
+		return actag_offset;
+	}
+	afu->actag_base = fn->actag_base + actag_offset;
+	afu->actag_enabled = actag_count;
+
+	ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos,
+				afu->actag_base, afu->actag_enabled);
+	dev_dbg(&afu->dev, "actag base=%d enabled=%d\n",
+		afu->actag_base, afu->actag_enabled);
+	return 0;
+}
+
+static void reclaim_afu_actag(struct ocxl_afu *afu)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int start_offset, size;
+
+	start_offset = afu->actag_base - fn->actag_base;
+	size = afu->actag_enabled;
+	ocxl_actag_afu_free(afu->fn, start_offset, size);
+}
+
+static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int pasid_count, pasid_offset;
+
+	/*
+	 * We only support the case where the function configuration
+	 * requested enough PASIDs to cover all AFUs.
+	 */
+	pasid_count = 1 << afu->config.pasid_supported_log;
+	pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
+	if (pasid_offset < 0) {
+		dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n",
+			pasid_count, pasid_offset);
+		return pasid_offset;
+	}
+	afu->pasid_base = fn->pasid_base + pasid_offset;
+	afu->pasid_count = 0;
+	afu->pasid_max = pasid_count;
+
+	ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos,
+				afu->pasid_base,
+				afu->config.pasid_supported_log);
+	dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n",
+		afu->pasid_base, pasid_count);
+	return 0;
+}
+
+static void reclaim_afu_pasid(struct ocxl_afu *afu)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int start_offset, size;
+
+	start_offset = afu->pasid_base - fn->pasid_base;
+	size = 1 << afu->config.pasid_supported_log;
+	ocxl_pasid_afu_free(afu->fn, start_offset, size);
+}
+
+static int reserve_fn_bar(struct ocxl_fn *fn, int bar)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	int rc, idx;
+
+	if (bar != 0 && bar != 2 && bar != 4)
+		return -EINVAL;
+
+	idx = bar >> 1;
+	if (fn->bar_used[idx]++ == 0) {
+		rc = pci_request_region(dev, bar, "ocxl");
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+static void release_fn_bar(struct ocxl_fn *fn, int bar)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	int idx;
+
+	if (bar != 0 && bar != 2 && bar != 4)
+		return;
+
+	idx = bar >> 1;
+	if (--fn->bar_used[idx] == 0)
+		pci_release_region(dev, bar);
+	WARN_ON(fn->bar_used[idx] < 0);
+}
+
+static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+	int rc;
+
+	rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar);
+	if (rc)
+		return rc;
+
+	rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+	if (rc) {
+		release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+		return rc;
+	}
+
+	afu->global_mmio_start =
+		pci_resource_start(dev, afu->config.global_mmio_bar) +
+		afu->config.global_mmio_offset;
+	afu->pp_mmio_start =
+		pci_resource_start(dev, afu->config.pp_mmio_bar) +
+		afu->config.pp_mmio_offset;
+
+	afu->global_mmio_ptr = ioremap(afu->global_mmio_start,
+				afu->config.global_mmio_size);
+	if (!afu->global_mmio_ptr) {
+		release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+		release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+		dev_err(&dev->dev, "Error mapping global mmio area\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * Leave an empty page between the per-process mmio area and
+	 * the AFU interrupt mappings
+	 */
+	afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE;
+	return 0;
+}
+
+static void unmap_mmio_areas(struct ocxl_afu *afu)
+{
+	if (afu->global_mmio_ptr) {
+		iounmap(afu->global_mmio_ptr);
+		afu->global_mmio_ptr = NULL;
+	}
+	afu->global_mmio_start = 0;
+	afu->pp_mmio_start = 0;
+	release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+	release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+}
+
+static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
+{
+	int rc;
+
+	rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx);
+	if (rc)
+		return rc;
+
+	rc = set_afu_device(afu, dev_name(&dev->dev));
+	if (rc)
+		return rc;
+
+	rc = assign_afu_actag(afu, dev);
+	if (rc)
+		return rc;
+
+	rc = assign_afu_pasid(afu, dev);
+	if (rc) {
+		reclaim_afu_actag(afu);
+		return rc;
+	}
+
+	rc = map_mmio_areas(afu, dev);
+	if (rc) {
+		reclaim_afu_pasid(afu);
+		reclaim_afu_actag(afu);
+		return rc;
+	}
+	return 0;
+}
+
+static void deconfigure_afu(struct ocxl_afu *afu)
+{
+	unmap_mmio_areas(afu);
+	reclaim_afu_pasid(afu);
+	reclaim_afu_actag(afu);
+}
+
+static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu)
+{
+	int rc;
+
+	ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1);
+	/*
+	 * Char device creation is the last step, as processes can
+	 * call our driver immediately, so all our inits must be finished.
+	 */
+	rc = ocxl_create_cdev(afu);
+	if (rc)
+		return rc;
+	return 0;
+}
+
+static void deactivate_afu(struct ocxl_afu *afu)
+{
+	struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
+
+	ocxl_destroy_cdev(afu);
+	ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0);
+}
+
+static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx)
+{
+	int rc;
+	struct ocxl_afu *afu;
+
+	afu = alloc_afu(fn);
+	if (!afu)
+		return -ENOMEM;
+
+	rc = configure_afu(afu, afu_idx, dev);
+	if (rc) {
+		free_afu(afu);
+		return rc;
+	}
+
+	rc = ocxl_register_afu(afu);
+	if (rc)
+		goto err;
+
+	rc = ocxl_sysfs_add_afu(afu);
+	if (rc)
+		goto err;
+
+	rc = activate_afu(dev, afu);
+	if (rc)
+		goto err_sys;
+
+	list_add_tail(&afu->list, &fn->afu_list);
+	return 0;
+
+err_sys:
+	ocxl_sysfs_remove_afu(afu);
+err:
+	deconfigure_afu(afu);
+	device_unregister(&afu->dev);
+	return rc;
+}
+
+static void remove_afu(struct ocxl_afu *afu)
+{
+	list_del(&afu->list);
+	ocxl_context_detach_all(afu);
+	deactivate_afu(afu);
+	ocxl_sysfs_remove_afu(afu);
+	deconfigure_afu(afu);
+	device_unregister(&afu->dev);
+}
+
+static struct ocxl_fn *alloc_function(struct pci_dev *dev)
+{
+	struct ocxl_fn *fn;
+
+	fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL);
+	if (!fn)
+		return NULL;
+
+	INIT_LIST_HEAD(&fn->afu_list);
+	INIT_LIST_HEAD(&fn->pasid_list);
+	INIT_LIST_HEAD(&fn->actag_list);
+	return fn;
+}
+
+static void free_function(struct ocxl_fn *fn)
+{
+	WARN_ON(!list_empty(&fn->afu_list));
+	WARN_ON(!list_empty(&fn->pasid_list));
+	kfree(fn);
+}
+
+static void free_function_dev(struct device *dev)
+{
+	struct ocxl_fn *fn = to_ocxl_function(dev);
+
+	free_function(fn);
+}
+
+static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev)
+{
+	int rc;
+
+	fn->dev.parent = &dev->dev;
+	fn->dev.release = free_function_dev;
+	rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev));
+	if (rc)
+		return rc;
+	pci_set_drvdata(dev, fn);
+	return 0;
+}
+
+static int assign_function_actag(struct ocxl_fn *fn)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	u16 base, enabled, supported;
+	int rc;
+
+	rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported);
+	if (rc)
+		return rc;
+
+	fn->actag_base = base;
+	fn->actag_enabled = enabled;
+	fn->actag_supported = supported;
+
+	ocxl_config_set_actag(dev, fn->config.dvsec_function_pos,
+			fn->actag_base,	fn->actag_enabled);
+	dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n",
+		fn->actag_base, fn->actag_enabled);
+	return 0;
+}
+
+static int set_function_pasid(struct ocxl_fn *fn)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	int rc, desired_count, max_count;
+
+	/* A function may not require any PASID */
+	if (fn->config.max_pasid_log < 0)
+		return 0;
+
+	rc = ocxl_config_get_pasid_info(dev, &max_count);
+	if (rc)
+		return rc;
+
+	desired_count = 1 << fn->config.max_pasid_log;
+
+	if (desired_count > max_count) {
+		dev_err(&fn->dev,
+			"Function requires more PASIDs than is available (%d vs. %d)\n",
+			desired_count, max_count);
+		return -ENOSPC;
+	}
+
+	fn->pasid_base = 0;
+	return 0;
+}
+
+static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev)
+{
+	int rc;
+
+	rc = pci_enable_device(dev);
+	if (rc) {
+		dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc);
+		return rc;
+	}
+
+	/*
+	 * Once it has been confirmed to work on our hardware, we
+	 * should reset the function, to force the adapter to restart
+	 * from scratch.
+	 * A function reset would also reset all its AFUs.
+	 *
+	 * Some hints for implementation:
+	 *
+	 * - there's not status bit to know when the reset is done. We
+	 *   should try reading the config space to know when it's
+	 *   done.
+	 * - probably something like:
+	 *	Reset
+	 *	wait 100ms
+	 *	issue config read
+	 *	allow device up to 1 sec to return success on config
+	 *	read before declaring it broken
+	 *
+	 * Some shared logic on the card (CFG, TLX) won't be reset, so
+	 * there's no guarantee that it will be enough.
+	 */
+	rc = ocxl_config_read_function(dev, &fn->config);
+	if (rc)
+		return rc;
+
+	rc = set_function_device(fn, dev);
+	if (rc)
+		return rc;
+
+	rc = assign_function_actag(fn);
+	if (rc)
+		return rc;
+
+	rc = set_function_pasid(fn);
+	if (rc)
+		return rc;
+
+	rc = ocxl_link_setup(dev, 0, &fn->link);
+	if (rc)
+		return rc;
+
+	rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos);
+	if (rc) {
+		ocxl_link_release(dev, fn->link);
+		return rc;
+	}
+	return 0;
+}
+
+static void deconfigure_function(struct ocxl_fn *fn)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+
+	ocxl_link_release(dev, fn->link);
+	pci_disable_device(dev);
+}
+
+static struct ocxl_fn *init_function(struct pci_dev *dev)
+{
+	struct ocxl_fn *fn;
+	int rc;
+
+	fn = alloc_function(dev);
+	if (!fn)
+		return ERR_PTR(-ENOMEM);
+
+	rc = configure_function(fn, dev);
+	if (rc) {
+		free_function(fn);
+		return ERR_PTR(rc);
+	}
+
+	rc = device_register(&fn->dev);
+	if (rc) {
+		deconfigure_function(fn);
+		put_device(&fn->dev);
+		return ERR_PTR(rc);
+	}
+	return fn;
+}
+
+static void remove_function(struct ocxl_fn *fn)
+{
+	deconfigure_function(fn);
+	device_unregister(&fn->dev);
+}
+
+static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	int rc, afu_count = 0;
+	u8 afu;
+	struct ocxl_fn *fn;
+
+	if (!radix_enabled()) {
+		dev_err(&dev->dev, "Unsupported memory model (hash)\n");
+		return -ENODEV;
+	}
+
+	fn = init_function(dev);
+	if (IS_ERR(fn)) {
+		dev_err(&dev->dev, "function init failed: %li\n",
+			PTR_ERR(fn));
+		return PTR_ERR(fn);
+	}
+
+	for (afu = 0; afu <= fn->config.max_afu_index; afu++) {
+		rc = ocxl_config_check_afu_index(dev, &fn->config, afu);
+		if (rc > 0) {
+			rc = init_afu(dev, fn, afu);
+			if (rc) {
+				dev_err(&dev->dev,
+					"Can't initialize AFU index %d\n", afu);
+				continue;
+			}
+			afu_count++;
+		}
+	}
+	dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count);
+	return 0;
+}
+
+static void ocxl_remove(struct pci_dev *dev)
+{
+	struct ocxl_afu *afu, *tmp;
+	struct ocxl_fn *fn = pci_get_drvdata(dev);
+
+	list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) {
+		remove_afu(afu);
+	}
+	remove_function(fn);
+}
+
+struct pci_driver ocxl_pci_driver = {
+	.name = "ocxl",
+	.id_table = ocxl_pci_tbl,
+	.probe = ocxl_probe,
+	.remove = ocxl_remove,
+	.shutdown = ocxl_remove,
+};
diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c
new file mode 100644
index 000000000..0ab1fd1b2
--- /dev/null
+++ b/drivers/misc/ocxl/sysfs.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sysfs.h>
+#include "ocxl_internal.h"
+
+static ssize_t global_mmio_size_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			afu->config.global_mmio_size);
+}
+
+static ssize_t pp_mmio_size_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			afu->config.pp_mmio_stride);
+}
+
+static ssize_t afu_version_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n",
+			afu->config.version_major,
+			afu->config.version_minor);
+}
+
+static ssize_t contexts_show(struct device *device,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%d/%d\n",
+			afu->pasid_count, afu->pasid_max);
+}
+
+static struct device_attribute afu_attrs[] = {
+	__ATTR_RO(global_mmio_size),
+	__ATTR_RO(pp_mmio_size),
+	__ATTR_RO(afu_version),
+	__ATTR_RO(contexts),
+};
+
+static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr, char *buf,
+				loff_t off, size_t count)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
+
+	if (count == 0 || off < 0 ||
+		off >= afu->config.global_mmio_size)
+		return 0;
+	memcpy_fromio(buf, afu->global_mmio_ptr + off, count);
+	return count;
+}
+
+static vm_fault_t global_mmio_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct ocxl_afu *afu = vma->vm_private_data;
+	unsigned long offset;
+
+	if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT))
+		return VM_FAULT_SIGBUS;
+
+	offset = vmf->pgoff;
+	offset += (afu->global_mmio_start >> PAGE_SHIFT);
+	return vmf_insert_pfn(vma, vmf->address, offset);
+}
+
+static const struct vm_operations_struct global_mmio_vmops = {
+	.fault = global_mmio_fault,
+};
+
+static int global_mmio_mmap(struct file *filp, struct kobject *kobj,
+			struct bin_attribute *bin_attr,
+			struct vm_area_struct *vma)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
+
+	if ((vma_pages(vma) + vma->vm_pgoff) >
+		(afu->config.global_mmio_size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_ops = &global_mmio_vmops;
+	vma->vm_private_data = afu;
+	return 0;
+}
+
+int ocxl_sysfs_add_afu(struct ocxl_afu *afu)
+{
+	int i, rc;
+
+	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+		rc = device_create_file(&afu->dev, &afu_attrs[i]);
+		if (rc)
+			goto err;
+	}
+
+	sysfs_attr_init(&afu->attr_global_mmio.attr);
+	afu->attr_global_mmio.attr.name = "global_mmio_area";
+	afu->attr_global_mmio.attr.mode = 0600;
+	afu->attr_global_mmio.size = afu->config.global_mmio_size;
+	afu->attr_global_mmio.read = global_mmio_read;
+	afu->attr_global_mmio.mmap = global_mmio_mmap;
+	rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio);
+	if (rc) {
+		dev_err(&afu->dev,
+			"Unable to create global mmio attr for afu: %d\n",
+			rc);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		device_remove_file(&afu->dev, &afu_attrs[i]);
+	return rc;
+}
+
+void ocxl_sysfs_remove_afu(struct ocxl_afu *afu)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
+		device_remove_file(&afu->dev, &afu_attrs[i]);
+	device_remove_bin_file(&afu->dev, &afu->attr_global_mmio);
+}
diff --git a/drivers/misc/ocxl/trace.c b/drivers/misc/ocxl/trace.c
new file mode 100644
index 000000000..1e6947049
--- /dev/null
+++ b/drivers/misc/ocxl/trace.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#endif
diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
new file mode 100644
index 000000000..bcb7ff330
--- /dev/null
+++ b/drivers/misc/ocxl/trace.h
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ocxl
+
+#if !defined(_TRACE_OCXL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_OCXL_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(ocxl_context,
+	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+	TP_ARGS(pid, spa, pasid, pidr, tidr),
+
+	TP_STRUCT__entry(
+		__field(pid_t, pid)
+		__field(void*, spa)
+		__field(int, pasid)
+		__field(u32, pidr)
+		__field(u32, tidr)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+		__entry->spa = spa;
+		__entry->pasid = pasid;
+		__entry->pidr = pidr;
+		__entry->tidr = tidr;
+	),
+
+	TP_printk("linux pid=%d spa=0x%p pasid=0x%x pidr=0x%x tidr=0x%x",
+		__entry->pid,
+		__entry->spa,
+		__entry->pasid,
+		__entry->pidr,
+		__entry->tidr
+	)
+);
+
+DEFINE_EVENT(ocxl_context, ocxl_context_add,
+	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+	TP_ARGS(pid, spa, pasid, pidr, tidr)
+);
+
+DEFINE_EVENT(ocxl_context, ocxl_context_remove,
+	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+	TP_ARGS(pid, spa, pasid, pidr, tidr)
+);
+
+TRACE_EVENT(ocxl_terminate_pasid,
+	TP_PROTO(int pasid, int rc),
+	TP_ARGS(pasid, rc),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(int, rc)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->rc = rc;
+	),
+
+	TP_printk("pasid=0x%x rc=%d",
+		__entry->pasid,
+		__entry->rc
+	)
+);
+
+DECLARE_EVENT_CLASS(ocxl_fault_handler,
+	TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+	TP_ARGS(spa, pe, dsisr, dar, tfc),
+
+	TP_STRUCT__entry(
+		__field(void *, spa)
+		__field(u64, pe)
+		__field(u64, dsisr)
+		__field(u64, dar)
+		__field(u64, tfc)
+	),
+
+	TP_fast_assign(
+		__entry->spa = spa;
+		__entry->pe = pe;
+		__entry->dsisr = dsisr;
+		__entry->dar = dar;
+		__entry->tfc = tfc;
+	),
+
+	TP_printk("spa=%p pe=0x%llx dsisr=0x%llx dar=0x%llx tfc=0x%llx",
+		__entry->spa,
+		__entry->pe,
+		__entry->dsisr,
+		__entry->dar,
+		__entry->tfc
+	)
+);
+
+DEFINE_EVENT(ocxl_fault_handler, ocxl_fault,
+	TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+	TP_ARGS(spa, pe, dsisr, dar, tfc)
+);
+
+DEFINE_EVENT(ocxl_fault_handler, ocxl_fault_ack,
+	TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+	TP_ARGS(spa, pe, dsisr, dar, tfc)
+);
+
+TRACE_EVENT(ocxl_afu_irq_alloc,
+	TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq,
+		u64 irq_offset),
+	TP_ARGS(pasid, irq_id, virq, hw_irq, irq_offset),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(int, irq_id)
+		__field(unsigned int, virq)
+		__field(int, hw_irq)
+		__field(u64, irq_offset)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->irq_id = irq_id;
+		__entry->virq = virq;
+		__entry->hw_irq = hw_irq;
+		__entry->irq_offset = irq_offset;
+	),
+
+	TP_printk("pasid=0x%x irq_id=%d virq=%u hw_irq=%d irq_offset=0x%llx",
+		__entry->pasid,
+		__entry->irq_id,
+		__entry->virq,
+		__entry->hw_irq,
+		__entry->irq_offset
+	)
+);
+
+TRACE_EVENT(ocxl_afu_irq_free,
+	TP_PROTO(int pasid, int irq_id),
+	TP_ARGS(pasid, irq_id),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(int, irq_id)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->irq_id = irq_id;
+	),
+
+	TP_printk("pasid=0x%x irq_id=%d",
+		__entry->pasid,
+		__entry->irq_id
+	)
+);
+
+TRACE_EVENT(ocxl_afu_irq_receive,
+	TP_PROTO(int virq),
+	TP_ARGS(virq),
+
+	TP_STRUCT__entry(
+		__field(int, virq)
+	),
+
+	TP_fast_assign(
+		__entry->virq = virq;
+	),
+
+	TP_printk("virq=%d",
+		__entry->virq
+	)
+);
+
+#endif /* _TRACE_OCXL_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c
new file mode 100644
index 000000000..309703e9c
--- /dev/null
+++ b/drivers/misc/pch_phub.c
@@ -0,0 +1,915 @@
+/*
+ * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/if_ether.h>
+#include <linux/ctype.h>
+#include <linux/dmi.h>
+#include <linux/of.h>
+
+#define PHUB_STATUS 0x00		/* Status Register offset */
+#define PHUB_CONTROL 0x04		/* Control Register offset */
+#define PHUB_TIMEOUT 0x05		/* Time out value for Status Register */
+#define PCH_PHUB_ROM_WRITE_ENABLE 0x01	/* Enabling for writing ROM */
+#define PCH_PHUB_ROM_WRITE_DISABLE 0x00	/* Disabling for writing ROM */
+#define PCH_PHUB_MAC_START_ADDR_EG20T 0x14  /* MAC data area start address
+					       offset */
+#define PCH_PHUB_MAC_START_ADDR_ML7223 0x20C  /* MAC data area start address
+						 offset */
+#define PCH_PHUB_ROM_START_ADDR_EG20T 0x80 /* ROM data area start address offset
+					      (Intel EG20T PCH)*/
+#define PCH_PHUB_ROM_START_ADDR_ML7213 0x400 /* ROM data area start address
+						offset(LAPIS Semicon ML7213)
+					      */
+#define PCH_PHUB_ROM_START_ADDR_ML7223 0x400 /* ROM data area start address
+						offset(LAPIS Semicon ML7223)
+					      */
+
+/* MAX number of INT_REDUCE_CONTROL registers */
+#define MAX_NUM_INT_REDUCE_CONTROL_REG 128
+#define PCI_DEVICE_ID_PCH1_PHUB 0x8801
+#define PCH_MINOR_NOS 1
+#define CLKCFG_CAN_50MHZ 0x12000000
+#define CLKCFG_CANCLK_MASK 0xFF000000
+#define CLKCFG_UART_MASK			0xFFFFFF
+
+/* CM-iTC */
+#define CLKCFG_UART_48MHZ			(1 << 16)
+#define CLKCFG_UART_25MHZ			(2 << 16)
+#define CLKCFG_BAUDDIV				(2 << 20)
+#define CLKCFG_PLL2VCO				(8 << 9)
+#define CLKCFG_UARTCLKSEL			(1 << 18)
+
+/* Macros for ML7213 */
+#define PCI_DEVICE_ID_ROHM_ML7213_PHUB		0x801A
+
+/* Macros for ML7223 */
+#define PCI_DEVICE_ID_ROHM_ML7223_mPHUB	0x8012 /* for Bus-m */
+#define PCI_DEVICE_ID_ROHM_ML7223_nPHUB	0x8002 /* for Bus-n */
+
+/* Macros for ML7831 */
+#define PCI_DEVICE_ID_ROHM_ML7831_PHUB 0x8801
+
+/* SROM ACCESS Macro */
+#define PCH_WORD_ADDR_MASK (~((1 << 2) - 1))
+
+/* Registers address offset */
+#define PCH_PHUB_ID_REG				0x0000
+#define PCH_PHUB_QUEUE_PRI_VAL_REG		0x0004
+#define PCH_PHUB_RC_QUEUE_MAXSIZE_REG		0x0008
+#define PCH_PHUB_BRI_QUEUE_MAXSIZE_REG		0x000C
+#define PCH_PHUB_COMP_RESP_TIMEOUT_REG		0x0010
+#define PCH_PHUB_BUS_SLAVE_CONTROL_REG		0x0014
+#define PCH_PHUB_DEADLOCK_AVOID_TYPE_REG	0x0018
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG0	0x0020
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG1	0x0024
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG2	0x0028
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG3	0x002C
+#define PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE	0x0040
+#define CLKCFG_REG_OFFSET			0x500
+#define FUNCSEL_REG_OFFSET			0x508
+
+#define PCH_PHUB_OROM_SIZE 15360
+
+/**
+ * struct pch_phub_reg - PHUB register structure
+ * @phub_id_reg:			PHUB_ID register val
+ * @q_pri_val_reg:			QUEUE_PRI_VAL register val
+ * @rc_q_maxsize_reg:			RC_QUEUE_MAXSIZE register val
+ * @bri_q_maxsize_reg:			BRI_QUEUE_MAXSIZE register val
+ * @comp_resp_timeout_reg:		COMP_RESP_TIMEOUT register val
+ * @bus_slave_control_reg:		BUS_SLAVE_CONTROL_REG register val
+ * @deadlock_avoid_type_reg:		DEADLOCK_AVOID_TYPE register val
+ * @intpin_reg_wpermit_reg0:		INTPIN_REG_WPERMIT register 0 val
+ * @intpin_reg_wpermit_reg1:		INTPIN_REG_WPERMIT register 1 val
+ * @intpin_reg_wpermit_reg2:		INTPIN_REG_WPERMIT register 2 val
+ * @intpin_reg_wpermit_reg3:		INTPIN_REG_WPERMIT register 3 val
+ * @int_reduce_control_reg:		INT_REDUCE_CONTROL registers val
+ * @clkcfg_reg:				CLK CFG register val
+ * @funcsel_reg:			Function select register value
+ * @pch_phub_base_address:		Register base address
+ * @pch_phub_extrom_base_address:	external rom base address
+ * @pch_mac_start_address:		MAC address area start address
+ * @pch_opt_rom_start_address:		Option ROM start address
+ * @ioh_type:				Save IOH type
+ * @pdev:				pointer to pci device struct
+ */
+struct pch_phub_reg {
+	u32 phub_id_reg;
+	u32 q_pri_val_reg;
+	u32 rc_q_maxsize_reg;
+	u32 bri_q_maxsize_reg;
+	u32 comp_resp_timeout_reg;
+	u32 bus_slave_control_reg;
+	u32 deadlock_avoid_type_reg;
+	u32 intpin_reg_wpermit_reg0;
+	u32 intpin_reg_wpermit_reg1;
+	u32 intpin_reg_wpermit_reg2;
+	u32 intpin_reg_wpermit_reg3;
+	u32 int_reduce_control_reg[MAX_NUM_INT_REDUCE_CONTROL_REG];
+	u32 clkcfg_reg;
+	u32 funcsel_reg;
+	void __iomem *pch_phub_base_address;
+	void __iomem *pch_phub_extrom_base_address;
+	u32 pch_mac_start_address;
+	u32 pch_opt_rom_start_address;
+	int ioh_type;
+	struct pci_dev *pdev;
+};
+
+/* SROM SPEC for MAC address assignment offset */
+static const int pch_phub_mac_offset[ETH_ALEN] = {0x3, 0x2, 0x1, 0x0, 0xb, 0xa};
+
+static DEFINE_MUTEX(pch_phub_mutex);
+
+/**
+ * pch_phub_read_modify_write_reg() - Reading modifying and writing register
+ * @reg_addr_offset:	Register offset address value.
+ * @data:		Writing value.
+ * @mask:		Mask value.
+ */
+static void pch_phub_read_modify_write_reg(struct pch_phub_reg *chip,
+					   unsigned int reg_addr_offset,
+					   unsigned int data, unsigned int mask)
+{
+	void __iomem *reg_addr = chip->pch_phub_base_address + reg_addr_offset;
+	iowrite32(((ioread32(reg_addr) & ~mask)) | data, reg_addr);
+}
+
+#ifdef CONFIG_PM
+/* pch_phub_save_reg_conf - saves register configuration */
+static void pch_phub_save_reg_conf(struct pci_dev *pdev)
+{
+	unsigned int i;
+	struct pch_phub_reg *chip = pci_get_drvdata(pdev);
+
+	void __iomem *p = chip->pch_phub_base_address;
+
+	chip->phub_id_reg = ioread32(p + PCH_PHUB_ID_REG);
+	chip->q_pri_val_reg = ioread32(p + PCH_PHUB_QUEUE_PRI_VAL_REG);
+	chip->rc_q_maxsize_reg = ioread32(p + PCH_PHUB_RC_QUEUE_MAXSIZE_REG);
+	chip->bri_q_maxsize_reg = ioread32(p + PCH_PHUB_BRI_QUEUE_MAXSIZE_REG);
+	chip->comp_resp_timeout_reg =
+				ioread32(p + PCH_PHUB_COMP_RESP_TIMEOUT_REG);
+	chip->bus_slave_control_reg =
+				ioread32(p + PCH_PHUB_BUS_SLAVE_CONTROL_REG);
+	chip->deadlock_avoid_type_reg =
+				ioread32(p + PCH_PHUB_DEADLOCK_AVOID_TYPE_REG);
+	chip->intpin_reg_wpermit_reg0 =
+				ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG0);
+	chip->intpin_reg_wpermit_reg1 =
+				ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG1);
+	chip->intpin_reg_wpermit_reg2 =
+				ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG2);
+	chip->intpin_reg_wpermit_reg3 =
+				ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG3);
+	dev_dbg(&pdev->dev, "%s : "
+		"chip->phub_id_reg=%x, "
+		"chip->q_pri_val_reg=%x, "
+		"chip->rc_q_maxsize_reg=%x, "
+		"chip->bri_q_maxsize_reg=%x, "
+		"chip->comp_resp_timeout_reg=%x, "
+		"chip->bus_slave_control_reg=%x, "
+		"chip->deadlock_avoid_type_reg=%x, "
+		"chip->intpin_reg_wpermit_reg0=%x, "
+		"chip->intpin_reg_wpermit_reg1=%x, "
+		"chip->intpin_reg_wpermit_reg2=%x, "
+		"chip->intpin_reg_wpermit_reg3=%x\n", __func__,
+		chip->phub_id_reg,
+		chip->q_pri_val_reg,
+		chip->rc_q_maxsize_reg,
+		chip->bri_q_maxsize_reg,
+		chip->comp_resp_timeout_reg,
+		chip->bus_slave_control_reg,
+		chip->deadlock_avoid_type_reg,
+		chip->intpin_reg_wpermit_reg0,
+		chip->intpin_reg_wpermit_reg1,
+		chip->intpin_reg_wpermit_reg2,
+		chip->intpin_reg_wpermit_reg3);
+	for (i = 0; i < MAX_NUM_INT_REDUCE_CONTROL_REG; i++) {
+		chip->int_reduce_control_reg[i] =
+		    ioread32(p + PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE + 4 * i);
+		dev_dbg(&pdev->dev, "%s : "
+			"chip->int_reduce_control_reg[%d]=%x\n",
+			__func__, i, chip->int_reduce_control_reg[i]);
+	}
+	chip->clkcfg_reg = ioread32(p + CLKCFG_REG_OFFSET);
+	if ((chip->ioh_type == 2) || (chip->ioh_type == 4))
+		chip->funcsel_reg = ioread32(p + FUNCSEL_REG_OFFSET);
+}
+
+/* pch_phub_restore_reg_conf - restore register configuration */
+static void pch_phub_restore_reg_conf(struct pci_dev *pdev)
+{
+	unsigned int i;
+	struct pch_phub_reg *chip = pci_get_drvdata(pdev);
+	void __iomem *p;
+	p = chip->pch_phub_base_address;
+
+	iowrite32(chip->phub_id_reg, p + PCH_PHUB_ID_REG);
+	iowrite32(chip->q_pri_val_reg, p + PCH_PHUB_QUEUE_PRI_VAL_REG);
+	iowrite32(chip->rc_q_maxsize_reg, p + PCH_PHUB_RC_QUEUE_MAXSIZE_REG);
+	iowrite32(chip->bri_q_maxsize_reg, p + PCH_PHUB_BRI_QUEUE_MAXSIZE_REG);
+	iowrite32(chip->comp_resp_timeout_reg,
+					p + PCH_PHUB_COMP_RESP_TIMEOUT_REG);
+	iowrite32(chip->bus_slave_control_reg,
+					p + PCH_PHUB_BUS_SLAVE_CONTROL_REG);
+	iowrite32(chip->deadlock_avoid_type_reg,
+					p + PCH_PHUB_DEADLOCK_AVOID_TYPE_REG);
+	iowrite32(chip->intpin_reg_wpermit_reg0,
+					p + PCH_PHUB_INTPIN_REG_WPERMIT_REG0);
+	iowrite32(chip->intpin_reg_wpermit_reg1,
+					p + PCH_PHUB_INTPIN_REG_WPERMIT_REG1);
+	iowrite32(chip->intpin_reg_wpermit_reg2,
+					p + PCH_PHUB_INTPIN_REG_WPERMIT_REG2);
+	iowrite32(chip->intpin_reg_wpermit_reg3,
+					p + PCH_PHUB_INTPIN_REG_WPERMIT_REG3);
+	dev_dbg(&pdev->dev, "%s : "
+		"chip->phub_id_reg=%x, "
+		"chip->q_pri_val_reg=%x, "
+		"chip->rc_q_maxsize_reg=%x, "
+		"chip->bri_q_maxsize_reg=%x, "
+		"chip->comp_resp_timeout_reg=%x, "
+		"chip->bus_slave_control_reg=%x, "
+		"chip->deadlock_avoid_type_reg=%x, "
+		"chip->intpin_reg_wpermit_reg0=%x, "
+		"chip->intpin_reg_wpermit_reg1=%x, "
+		"chip->intpin_reg_wpermit_reg2=%x, "
+		"chip->intpin_reg_wpermit_reg3=%x\n", __func__,
+		chip->phub_id_reg,
+		chip->q_pri_val_reg,
+		chip->rc_q_maxsize_reg,
+		chip->bri_q_maxsize_reg,
+		chip->comp_resp_timeout_reg,
+		chip->bus_slave_control_reg,
+		chip->deadlock_avoid_type_reg,
+		chip->intpin_reg_wpermit_reg0,
+		chip->intpin_reg_wpermit_reg1,
+		chip->intpin_reg_wpermit_reg2,
+		chip->intpin_reg_wpermit_reg3);
+	for (i = 0; i < MAX_NUM_INT_REDUCE_CONTROL_REG; i++) {
+		iowrite32(chip->int_reduce_control_reg[i],
+			p + PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE + 4 * i);
+		dev_dbg(&pdev->dev, "%s : "
+			"chip->int_reduce_control_reg[%d]=%x\n",
+			__func__, i, chip->int_reduce_control_reg[i]);
+	}
+
+	iowrite32(chip->clkcfg_reg, p + CLKCFG_REG_OFFSET);
+	if ((chip->ioh_type == 2) || (chip->ioh_type == 4))
+		iowrite32(chip->funcsel_reg, p + FUNCSEL_REG_OFFSET);
+}
+#endif
+
+/**
+ * pch_phub_read_serial_rom() - Reading Serial ROM
+ * @offset_address:	Serial ROM offset address to read.
+ * @data:		Read buffer for specified Serial ROM value.
+ */
+static void pch_phub_read_serial_rom(struct pch_phub_reg *chip,
+				     unsigned int offset_address, u8 *data)
+{
+	void __iomem *mem_addr = chip->pch_phub_extrom_base_address +
+								offset_address;
+
+	*data = ioread8(mem_addr);
+}
+
+/**
+ * pch_phub_write_serial_rom() - Writing Serial ROM
+ * @offset_address:	Serial ROM offset address.
+ * @data:		Serial ROM value to write.
+ */
+static int pch_phub_write_serial_rom(struct pch_phub_reg *chip,
+				     unsigned int offset_address, u8 data)
+{
+	void __iomem *mem_addr = chip->pch_phub_extrom_base_address +
+					(offset_address & PCH_WORD_ADDR_MASK);
+	int i;
+	unsigned int word_data;
+	unsigned int pos;
+	unsigned int mask;
+	pos = (offset_address % 4) * 8;
+	mask = ~(0xFF << pos);
+
+	iowrite32(PCH_PHUB_ROM_WRITE_ENABLE,
+			chip->pch_phub_extrom_base_address + PHUB_CONTROL);
+
+	word_data = ioread32(mem_addr);
+	iowrite32((word_data & mask) | (u32)data << pos, mem_addr);
+
+	i = 0;
+	while (ioread8(chip->pch_phub_extrom_base_address +
+						PHUB_STATUS) != 0x00) {
+		msleep(1);
+		if (i == PHUB_TIMEOUT)
+			return -ETIMEDOUT;
+		i++;
+	}
+
+	iowrite32(PCH_PHUB_ROM_WRITE_DISABLE,
+			chip->pch_phub_extrom_base_address + PHUB_CONTROL);
+
+	return 0;
+}
+
+/**
+ * pch_phub_read_serial_rom_val() - Read Serial ROM value
+ * @offset_address:	Serial ROM address offset value.
+ * @data:		Serial ROM value to read.
+ */
+static void pch_phub_read_serial_rom_val(struct pch_phub_reg *chip,
+					 unsigned int offset_address, u8 *data)
+{
+	unsigned int mem_addr;
+
+	mem_addr = chip->pch_mac_start_address +
+			pch_phub_mac_offset[offset_address];
+
+	pch_phub_read_serial_rom(chip, mem_addr, data);
+}
+
+/**
+ * pch_phub_write_serial_rom_val() - writing Serial ROM value
+ * @offset_address:	Serial ROM address offset value.
+ * @data:		Serial ROM value.
+ */
+static int pch_phub_write_serial_rom_val(struct pch_phub_reg *chip,
+					 unsigned int offset_address, u8 data)
+{
+	int retval;
+	unsigned int mem_addr;
+
+	mem_addr = chip->pch_mac_start_address +
+			pch_phub_mac_offset[offset_address];
+
+	retval = pch_phub_write_serial_rom(chip, mem_addr, data);
+
+	return retval;
+}
+
+/* pch_phub_gbe_serial_rom_conf - makes Serial ROM header format configuration
+ * for Gigabit Ethernet MAC address
+ */
+static int pch_phub_gbe_serial_rom_conf(struct pch_phub_reg *chip)
+{
+	int retval;
+
+	retval = pch_phub_write_serial_rom(chip, 0x0b, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x0a, 0x10);
+	retval |= pch_phub_write_serial_rom(chip, 0x09, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x08, 0x02);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x0f, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x0e, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x0d, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x0c, 0x80);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x13, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x12, 0x10);
+	retval |= pch_phub_write_serial_rom(chip, 0x11, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x10, 0x18);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x1b, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x1a, 0x10);
+	retval |= pch_phub_write_serial_rom(chip, 0x19, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x18, 0x19);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x23, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x22, 0x10);
+	retval |= pch_phub_write_serial_rom(chip, 0x21, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x20, 0x3a);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x27, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x26, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x25, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x24, 0x00);
+
+	return retval;
+}
+
+/* pch_phub_gbe_serial_rom_conf_mp - makes SerialROM header format configuration
+ * for Gigabit Ethernet MAC address
+ */
+static int pch_phub_gbe_serial_rom_conf_mp(struct pch_phub_reg *chip)
+{
+	int retval;
+	u32 offset_addr;
+
+	offset_addr = 0x200;
+	retval = pch_phub_write_serial_rom(chip, 0x03 + offset_addr, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x02 + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x01 + offset_addr, 0x40);
+	retval |= pch_phub_write_serial_rom(chip, 0x00 + offset_addr, 0x02);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x07 + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x06 + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x05 + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x04 + offset_addr, 0x80);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x0b + offset_addr, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x0a + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x09 + offset_addr, 0x40);
+	retval |= pch_phub_write_serial_rom(chip, 0x08 + offset_addr, 0x18);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x13 + offset_addr, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x12 + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x11 + offset_addr, 0x40);
+	retval |= pch_phub_write_serial_rom(chip, 0x10 + offset_addr, 0x19);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x1b + offset_addr, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x1a + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x19 + offset_addr, 0x40);
+	retval |= pch_phub_write_serial_rom(chip, 0x18 + offset_addr, 0x3a);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x1f + offset_addr, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x1e + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x1d + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x1c + offset_addr, 0x00);
+
+	return retval;
+}
+
+/**
+ * pch_phub_read_gbe_mac_addr() - Read Gigabit Ethernet MAC address
+ * @offset_address:	Gigabit Ethernet MAC address offset value.
+ * @data:		Buffer of the Gigabit Ethernet MAC address value.
+ */
+static void pch_phub_read_gbe_mac_addr(struct pch_phub_reg *chip, u8 *data)
+{
+	int i;
+	for (i = 0; i < ETH_ALEN; i++)
+		pch_phub_read_serial_rom_val(chip, i, &data[i]);
+}
+
+/**
+ * pch_phub_write_gbe_mac_addr() - Write MAC address
+ * @offset_address:	Gigabit Ethernet MAC address offset value.
+ * @data:		Gigabit Ethernet MAC address value.
+ */
+static int pch_phub_write_gbe_mac_addr(struct pch_phub_reg *chip, u8 *data)
+{
+	int retval;
+	int i;
+
+	if ((chip->ioh_type == 1) || (chip->ioh_type == 5)) /* EG20T or ML7831*/
+		retval = pch_phub_gbe_serial_rom_conf(chip);
+	else	/* ML7223 */
+		retval = pch_phub_gbe_serial_rom_conf_mp(chip);
+	if (retval)
+		return retval;
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		retval = pch_phub_write_serial_rom_val(chip, i, data[i]);
+		if (retval)
+			return retval;
+	}
+
+	return retval;
+}
+
+static ssize_t pch_phub_bin_read(struct file *filp, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	unsigned int rom_signature;
+	unsigned char rom_length;
+	unsigned int tmp;
+	unsigned int addr_offset;
+	unsigned int orom_size;
+	int ret;
+	int err;
+	ssize_t rom_size;
+
+	struct pch_phub_reg *chip = dev_get_drvdata(kobj_to_dev(kobj));
+
+	ret = mutex_lock_interruptible(&pch_phub_mutex);
+	if (ret) {
+		err = -ERESTARTSYS;
+		goto return_err_nomutex;
+	}
+
+	/* Get Rom signature */
+	chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+	if (!chip->pch_phub_extrom_base_address) {
+		err = -ENODATA;
+		goto exrom_map_err;
+	}
+
+	pch_phub_read_serial_rom(chip, chip->pch_opt_rom_start_address,
+				(unsigned char *)&rom_signature);
+	rom_signature &= 0xff;
+	pch_phub_read_serial_rom(chip, chip->pch_opt_rom_start_address + 1,
+				(unsigned char *)&tmp);
+	rom_signature |= (tmp & 0xff) << 8;
+	if (rom_signature == 0xAA55) {
+		pch_phub_read_serial_rom(chip,
+					 chip->pch_opt_rom_start_address + 2,
+					 &rom_length);
+		orom_size = rom_length * 512;
+		if (orom_size < off) {
+			addr_offset = 0;
+			goto return_ok;
+		}
+		if (orom_size < count) {
+			addr_offset = 0;
+			goto return_ok;
+		}
+
+		for (addr_offset = 0; addr_offset < count; addr_offset++) {
+			pch_phub_read_serial_rom(chip,
+			    chip->pch_opt_rom_start_address + addr_offset + off,
+			    &buf[addr_offset]);
+		}
+	} else {
+		err = -ENODATA;
+		goto return_err;
+	}
+return_ok:
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+	mutex_unlock(&pch_phub_mutex);
+	return addr_offset;
+
+return_err:
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+exrom_map_err:
+	mutex_unlock(&pch_phub_mutex);
+return_err_nomutex:
+	return err;
+}
+
+static ssize_t pch_phub_bin_write(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr,
+				  char *buf, loff_t off, size_t count)
+{
+	int err;
+	unsigned int addr_offset;
+	int ret;
+	ssize_t rom_size;
+	struct pch_phub_reg *chip = dev_get_drvdata(kobj_to_dev(kobj));
+
+	ret = mutex_lock_interruptible(&pch_phub_mutex);
+	if (ret)
+		return -ERESTARTSYS;
+
+	if (off > PCH_PHUB_OROM_SIZE) {
+		addr_offset = 0;
+		goto return_ok;
+	}
+	if (count > PCH_PHUB_OROM_SIZE) {
+		addr_offset = 0;
+		goto return_ok;
+	}
+
+	chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+	if (!chip->pch_phub_extrom_base_address) {
+		err = -ENOMEM;
+		goto exrom_map_err;
+	}
+
+	for (addr_offset = 0; addr_offset < count; addr_offset++) {
+		if (PCH_PHUB_OROM_SIZE < off + addr_offset)
+			goto return_ok;
+
+		ret = pch_phub_write_serial_rom(chip,
+			    chip->pch_opt_rom_start_address + addr_offset + off,
+			    buf[addr_offset]);
+		if (ret) {
+			err = ret;
+			goto return_err;
+		}
+	}
+
+return_ok:
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+	mutex_unlock(&pch_phub_mutex);
+	return addr_offset;
+
+return_err:
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+
+exrom_map_err:
+	mutex_unlock(&pch_phub_mutex);
+	return err;
+}
+
+static ssize_t show_pch_mac(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	u8 mac[8];
+	struct pch_phub_reg *chip = dev_get_drvdata(dev);
+	ssize_t rom_size;
+
+	chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+	if (!chip->pch_phub_extrom_base_address)
+		return -ENOMEM;
+
+	pch_phub_read_gbe_mac_addr(chip, mac);
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+
+	return sprintf(buf, "%pM\n", mac);
+}
+
+static ssize_t store_pch_mac(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	u8 mac[ETH_ALEN];
+	ssize_t rom_size;
+	struct pch_phub_reg *chip = dev_get_drvdata(dev);
+	int ret;
+
+	if (!mac_pton(buf, mac))
+		return -EINVAL;
+
+	chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+	if (!chip->pch_phub_extrom_base_address)
+		return -ENOMEM;
+
+	ret = pch_phub_write_gbe_mac_addr(chip, mac);
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(pch_mac, S_IRUGO | S_IWUSR, show_pch_mac, store_pch_mac);
+
+static const struct bin_attribute pch_bin_attr = {
+	.attr = {
+		.name = "pch_firmware",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = PCH_PHUB_OROM_SIZE + 1,
+	.read = pch_phub_bin_read,
+	.write = pch_phub_bin_write,
+};
+
+static int pch_phub_probe(struct pci_dev *pdev,
+				    const struct pci_device_id *id)
+{
+	int ret;
+	struct pch_phub_reg *chip;
+
+	chip = kzalloc(sizeof(struct pch_phub_reg), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		dev_err(&pdev->dev,
+		"%s : pci_enable_device FAILED(ret=%d)", __func__, ret);
+		goto err_pci_enable_dev;
+	}
+	dev_dbg(&pdev->dev, "%s : pci_enable_device returns %d\n", __func__,
+		ret);
+
+	ret = pci_request_regions(pdev, KBUILD_MODNAME);
+	if (ret) {
+		dev_err(&pdev->dev,
+		"%s : pci_request_regions FAILED(ret=%d)", __func__, ret);
+		goto err_req_regions;
+	}
+	dev_dbg(&pdev->dev, "%s : "
+		"pci_request_regions returns %d\n", __func__, ret);
+
+	chip->pch_phub_base_address = pci_iomap(pdev, 1, 0);
+
+
+	if (chip->pch_phub_base_address == NULL) {
+		dev_err(&pdev->dev, "%s : pci_iomap FAILED", __func__);
+		ret = -ENOMEM;
+		goto err_pci_iomap;
+	}
+	dev_dbg(&pdev->dev, "%s : pci_iomap SUCCESS and value "
+		"in pch_phub_base_address variable is %p\n", __func__,
+		chip->pch_phub_base_address);
+
+	chip->pdev = pdev; /* Save pci device struct */
+
+	if (id->driver_data == 1) { /* EG20T PCH */
+		const char *board_name;
+		unsigned int prefetch = 0x000affaa;
+
+		if (pdev->dev.of_node)
+			of_property_read_u32(pdev->dev.of_node,
+						  "intel,eg20t-prefetch",
+						  &prefetch);
+
+		ret = sysfs_create_file(&pdev->dev.kobj,
+					&dev_attr_pch_mac.attr);
+		if (ret)
+			goto err_sysfs_create;
+
+		ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+		if (ret)
+			goto exit_bin_attr;
+
+		pch_phub_read_modify_write_reg(chip,
+					       (unsigned int)CLKCFG_REG_OFFSET,
+					       CLKCFG_CAN_50MHZ,
+					       CLKCFG_CANCLK_MASK);
+
+		/* quirk for CM-iTC board */
+		board_name = dmi_get_system_info(DMI_BOARD_NAME);
+		if (board_name && strstr(board_name, "CM-iTC"))
+			pch_phub_read_modify_write_reg(chip,
+						(unsigned int)CLKCFG_REG_OFFSET,
+						CLKCFG_UART_48MHZ | CLKCFG_BAUDDIV |
+						CLKCFG_PLL2VCO | CLKCFG_UARTCLKSEL,
+						CLKCFG_UART_MASK);
+
+		/* set the prefech value */
+		iowrite32(prefetch, chip->pch_phub_base_address + 0x14);
+		/* set the interrupt delay value */
+		iowrite32(0x25, chip->pch_phub_base_address + 0x44);
+		chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T;
+		chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T;
+
+		/* quirk for MIPS Boston platform */
+		if (pdev->dev.of_node) {
+			if (of_machine_is_compatible("img,boston")) {
+				pch_phub_read_modify_write_reg(chip,
+					(unsigned int)CLKCFG_REG_OFFSET,
+					CLKCFG_UART_25MHZ,
+					CLKCFG_UART_MASK);
+			}
+		}
+	} else if (id->driver_data == 2) { /* ML7213 IOH */
+		ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+		if (ret)
+			goto err_sysfs_create;
+		/* set the prefech value
+		 * Device2(USB OHCI #1/ USB EHCI #1/ USB Device):a
+		 * Device4(SDIO #0,1,2):f
+		 * Device6(SATA 2):f
+		 * Device8(USB OHCI #0/ USB EHCI #0):a
+		 */
+		iowrite32(0x000affa0, chip->pch_phub_base_address + 0x14);
+		chip->pch_opt_rom_start_address =\
+						 PCH_PHUB_ROM_START_ADDR_ML7213;
+	} else if (id->driver_data == 3) { /* ML7223 IOH Bus-m*/
+		/* set the prefech value
+		 * Device8(GbE)
+		 */
+		iowrite32(0x000a0000, chip->pch_phub_base_address + 0x14);
+		/* set the interrupt delay value */
+		iowrite32(0x25, chip->pch_phub_base_address + 0x140);
+		chip->pch_opt_rom_start_address =\
+						 PCH_PHUB_ROM_START_ADDR_ML7223;
+		chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_ML7223;
+	} else if (id->driver_data == 4) { /* ML7223 IOH Bus-n*/
+		ret = sysfs_create_file(&pdev->dev.kobj,
+					&dev_attr_pch_mac.attr);
+		if (ret)
+			goto err_sysfs_create;
+		ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+		if (ret)
+			goto exit_bin_attr;
+		/* set the prefech value
+		 * Device2(USB OHCI #0,1,2,3/ USB EHCI #0):a
+		 * Device4(SDIO #0,1):f
+		 * Device6(SATA 2):f
+		 */
+		iowrite32(0x0000ffa0, chip->pch_phub_base_address + 0x14);
+		chip->pch_opt_rom_start_address =\
+						 PCH_PHUB_ROM_START_ADDR_ML7223;
+		chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_ML7223;
+	} else if (id->driver_data == 5) { /* ML7831 */
+		ret = sysfs_create_file(&pdev->dev.kobj,
+					&dev_attr_pch_mac.attr);
+		if (ret)
+			goto err_sysfs_create;
+
+		ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+		if (ret)
+			goto exit_bin_attr;
+
+		/* set the prefech value */
+		iowrite32(0x000affaa, chip->pch_phub_base_address + 0x14);
+		/* set the interrupt delay value */
+		iowrite32(0x25, chip->pch_phub_base_address + 0x44);
+		chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T;
+		chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T;
+	}
+
+	chip->ioh_type = id->driver_data;
+	pci_set_drvdata(pdev, chip);
+
+	return 0;
+exit_bin_attr:
+	sysfs_remove_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr);
+
+err_sysfs_create:
+	pci_iounmap(pdev, chip->pch_phub_base_address);
+err_pci_iomap:
+	pci_release_regions(pdev);
+err_req_regions:
+	pci_disable_device(pdev);
+err_pci_enable_dev:
+	kfree(chip);
+	dev_err(&pdev->dev, "%s returns %d\n", __func__, ret);
+	return ret;
+}
+
+static void pch_phub_remove(struct pci_dev *pdev)
+{
+	struct pch_phub_reg *chip = pci_get_drvdata(pdev);
+
+	sysfs_remove_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr);
+	sysfs_remove_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+	pci_iounmap(pdev, chip->pch_phub_base_address);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	kfree(chip);
+}
+
+#ifdef CONFIG_PM
+
+static int pch_phub_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	int ret;
+
+	pch_phub_save_reg_conf(pdev);
+	ret = pci_save_state(pdev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			" %s -pci_save_state returns %d\n", __func__, ret);
+		return ret;
+	}
+	pci_enable_wake(pdev, PCI_D3hot, 0);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+	return 0;
+}
+
+static int pch_phub_resume(struct pci_dev *pdev)
+{
+	int ret;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		dev_err(&pdev->dev,
+		"%s-pci_enable_device failed(ret=%d) ", __func__, ret);
+		return ret;
+	}
+
+	pci_enable_wake(pdev, PCI_D3hot, 0);
+	pch_phub_restore_reg_conf(pdev);
+
+	return 0;
+}
+#else
+#define pch_phub_suspend NULL
+#define pch_phub_resume NULL
+#endif /* CONFIG_PM */
+
+static const struct pci_device_id pch_phub_pcidev_id[] = {
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH1_PHUB),       1,  },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7213_PHUB), 2,  },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7223_mPHUB), 3,  },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7223_nPHUB), 4,  },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7831_PHUB), 5,  },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, pch_phub_pcidev_id);
+
+static struct pci_driver pch_phub_driver = {
+	.name = "pch_phub",
+	.id_table = pch_phub_pcidev_id,
+	.probe = pch_phub_probe,
+	.remove = pch_phub_remove,
+	.suspend = pch_phub_suspend,
+	.resume = pch_phub_resume
+};
+
+module_pci_driver(pch_phub_driver);
+
+MODULE_DESCRIPTION("Intel EG20T PCH/LAPIS Semiconductor IOH(ML7213/ML7223) PHUB");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
new file mode 100644
index 000000000..7d166f57f
--- /dev/null
+++ b/drivers/misc/pci_endpoint_test.c
@@ -0,0 +1,831 @@
+/**
+ * Host side test driver to test endpoint functionality
+ *
+ * Copyright (C) 2017 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#include <linux/pci_regs.h>
+
+#include <uapi/linux/pcitest.h>
+
+#define DRV_MODULE_NAME				"pci-endpoint-test"
+
+#define IRQ_TYPE_UNDEFINED			-1
+#define IRQ_TYPE_LEGACY				0
+#define IRQ_TYPE_MSI				1
+#define IRQ_TYPE_MSIX				2
+
+#define PCI_ENDPOINT_TEST_MAGIC			0x0
+
+#define PCI_ENDPOINT_TEST_COMMAND		0x4
+#define COMMAND_RAISE_LEGACY_IRQ		BIT(0)
+#define COMMAND_RAISE_MSI_IRQ			BIT(1)
+#define COMMAND_RAISE_MSIX_IRQ			BIT(2)
+#define COMMAND_READ				BIT(3)
+#define COMMAND_WRITE				BIT(4)
+#define COMMAND_COPY				BIT(5)
+
+#define PCI_ENDPOINT_TEST_STATUS		0x8
+#define STATUS_READ_SUCCESS			BIT(0)
+#define STATUS_READ_FAIL			BIT(1)
+#define STATUS_WRITE_SUCCESS			BIT(2)
+#define STATUS_WRITE_FAIL			BIT(3)
+#define STATUS_COPY_SUCCESS			BIT(4)
+#define STATUS_COPY_FAIL			BIT(5)
+#define STATUS_IRQ_RAISED			BIT(6)
+#define STATUS_SRC_ADDR_INVALID			BIT(7)
+#define STATUS_DST_ADDR_INVALID			BIT(8)
+
+#define PCI_ENDPOINT_TEST_LOWER_SRC_ADDR	0x0c
+#define PCI_ENDPOINT_TEST_UPPER_SRC_ADDR	0x10
+
+#define PCI_ENDPOINT_TEST_LOWER_DST_ADDR	0x14
+#define PCI_ENDPOINT_TEST_UPPER_DST_ADDR	0x18
+
+#define PCI_ENDPOINT_TEST_SIZE			0x1c
+#define PCI_ENDPOINT_TEST_CHECKSUM		0x20
+
+#define PCI_ENDPOINT_TEST_IRQ_TYPE		0x24
+#define PCI_ENDPOINT_TEST_IRQ_NUMBER		0x28
+
+#define PCI_DEVICE_ID_TI_AM654			0xb00c
+
+#define is_am654_pci_dev(pdev)		\
+		((pdev)->device == PCI_DEVICE_ID_TI_AM654)
+
+static DEFINE_IDA(pci_endpoint_test_ida);
+
+#define to_endpoint_test(priv) container_of((priv), struct pci_endpoint_test, \
+					    miscdev)
+
+static bool no_msi;
+module_param(no_msi, bool, 0444);
+MODULE_PARM_DESC(no_msi, "Disable MSI interrupt in pci_endpoint_test");
+
+static int irq_type = IRQ_TYPE_MSI;
+module_param(irq_type, int, 0444);
+MODULE_PARM_DESC(irq_type, "IRQ mode selection in pci_endpoint_test (0 - Legacy, 1 - MSI, 2 - MSI-X)");
+
+enum pci_barno {
+	BAR_0,
+	BAR_1,
+	BAR_2,
+	BAR_3,
+	BAR_4,
+	BAR_5,
+};
+
+struct pci_endpoint_test {
+	struct pci_dev	*pdev;
+	void __iomem	*base;
+	void __iomem	*bar[6];
+	struct completion irq_raised;
+	int		last_irq;
+	int		num_irqs;
+	int		irq_type;
+	/* mutex to protect the ioctls */
+	struct mutex	mutex;
+	struct miscdevice miscdev;
+	enum pci_barno test_reg_bar;
+	size_t alignment;
+};
+
+struct pci_endpoint_test_data {
+	enum pci_barno test_reg_bar;
+	size_t alignment;
+	int irq_type;
+};
+
+static inline u32 pci_endpoint_test_readl(struct pci_endpoint_test *test,
+					  u32 offset)
+{
+	return readl(test->base + offset);
+}
+
+static inline void pci_endpoint_test_writel(struct pci_endpoint_test *test,
+					    u32 offset, u32 value)
+{
+	writel(value, test->base + offset);
+}
+
+static inline u32 pci_endpoint_test_bar_readl(struct pci_endpoint_test *test,
+					      int bar, int offset)
+{
+	return readl(test->bar[bar] + offset);
+}
+
+static inline void pci_endpoint_test_bar_writel(struct pci_endpoint_test *test,
+						int bar, u32 offset, u32 value)
+{
+	writel(value, test->bar[bar] + offset);
+}
+
+static irqreturn_t pci_endpoint_test_irqhandler(int irq, void *dev_id)
+{
+	struct pci_endpoint_test *test = dev_id;
+	u32 reg;
+
+	reg = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS);
+	if (reg & STATUS_IRQ_RAISED) {
+		test->last_irq = irq;
+		complete(&test->irq_raised);
+		reg &= ~STATUS_IRQ_RAISED;
+	}
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_STATUS,
+				 reg);
+
+	return IRQ_HANDLED;
+}
+
+static void pci_endpoint_test_free_irq_vectors(struct pci_endpoint_test *test)
+{
+	struct pci_dev *pdev = test->pdev;
+
+	pci_free_irq_vectors(pdev);
+	test->irq_type = IRQ_TYPE_UNDEFINED;
+}
+
+static bool pci_endpoint_test_alloc_irq_vectors(struct pci_endpoint_test *test,
+						int type)
+{
+	int irq = -1;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+	bool res = true;
+
+	switch (type) {
+	case IRQ_TYPE_LEGACY:
+		irq = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY);
+		if (irq < 0)
+			dev_err(dev, "Failed to get Legacy interrupt\n");
+		break;
+	case IRQ_TYPE_MSI:
+		irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
+		if (irq < 0)
+			dev_err(dev, "Failed to get MSI interrupts\n");
+		break;
+	case IRQ_TYPE_MSIX:
+		irq = pci_alloc_irq_vectors(pdev, 1, 2048, PCI_IRQ_MSIX);
+		if (irq < 0)
+			dev_err(dev, "Failed to get MSI-X interrupts\n");
+		break;
+	default:
+		dev_err(dev, "Invalid IRQ type selected\n");
+	}
+
+	if (irq < 0) {
+		irq = 0;
+		res = false;
+	}
+
+	test->irq_type = type;
+	test->num_irqs = irq;
+
+	return res;
+}
+
+static void pci_endpoint_test_release_irq(struct pci_endpoint_test *test)
+{
+	int i;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+
+	for (i = 0; i < test->num_irqs; i++)
+		devm_free_irq(dev, pci_irq_vector(pdev, i), test);
+
+	test->num_irqs = 0;
+}
+
+static bool pci_endpoint_test_request_irq(struct pci_endpoint_test *test)
+{
+	int i;
+	int err;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+
+	for (i = 0; i < test->num_irqs; i++) {
+		err = devm_request_irq(dev, pci_irq_vector(pdev, i),
+				       pci_endpoint_test_irqhandler,
+				       IRQF_SHARED, DRV_MODULE_NAME, test);
+		if (err)
+			goto fail;
+	}
+
+	return true;
+
+fail:
+	switch (irq_type) {
+	case IRQ_TYPE_LEGACY:
+		dev_err(dev, "Failed to request IRQ %d for Legacy\n",
+			pci_irq_vector(pdev, i));
+		break;
+	case IRQ_TYPE_MSI:
+		dev_err(dev, "Failed to request IRQ %d for MSI %d\n",
+			pci_irq_vector(pdev, i),
+			i + 1);
+		break;
+	case IRQ_TYPE_MSIX:
+		dev_err(dev, "Failed to request IRQ %d for MSI-X %d\n",
+			pci_irq_vector(pdev, i),
+			i + 1);
+		break;
+	}
+
+	return false;
+}
+
+static bool pci_endpoint_test_bar(struct pci_endpoint_test *test,
+				  enum pci_barno barno)
+{
+	int j;
+	u32 val;
+	int size;
+	struct pci_dev *pdev = test->pdev;
+
+	if (!test->bar[barno])
+		return false;
+
+	size = pci_resource_len(pdev, barno);
+
+	if (barno == test->test_reg_bar)
+		size = 0x4;
+
+	for (j = 0; j < size; j += 4)
+		pci_endpoint_test_bar_writel(test, barno, j, 0xA0A0A0A0);
+
+	for (j = 0; j < size; j += 4) {
+		val = pci_endpoint_test_bar_readl(test, barno, j);
+		if (val != 0xA0A0A0A0)
+			return false;
+	}
+
+	return true;
+}
+
+static bool pci_endpoint_test_legacy_irq(struct pci_endpoint_test *test)
+{
+	u32 val;
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE,
+				 IRQ_TYPE_LEGACY);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 0);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
+				 COMMAND_RAISE_LEGACY_IRQ);
+	val = wait_for_completion_timeout(&test->irq_raised,
+					  msecs_to_jiffies(1000));
+	if (!val)
+		return false;
+
+	return true;
+}
+
+static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test,
+				       u16 msi_num, bool msix)
+{
+	u32 val;
+	struct pci_dev *pdev = test->pdev;
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE,
+				 msix == false ? IRQ_TYPE_MSI :
+				 IRQ_TYPE_MSIX);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, msi_num);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
+				 msix == false ? COMMAND_RAISE_MSI_IRQ :
+				 COMMAND_RAISE_MSIX_IRQ);
+	val = wait_for_completion_timeout(&test->irq_raised,
+					  msecs_to_jiffies(1000));
+	if (!val)
+		return false;
+
+	if (pci_irq_vector(pdev, msi_num - 1) == test->last_irq)
+		return true;
+
+	return false;
+}
+
+static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size)
+{
+	bool ret = false;
+	void *src_addr;
+	void *dst_addr;
+	dma_addr_t src_phys_addr;
+	dma_addr_t dst_phys_addr;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+	void *orig_src_addr;
+	dma_addr_t orig_src_phys_addr;
+	void *orig_dst_addr;
+	dma_addr_t orig_dst_phys_addr;
+	size_t offset;
+	size_t alignment = test->alignment;
+	int irq_type = test->irq_type;
+	u32 src_crc32;
+	u32 dst_crc32;
+
+	if (size > SIZE_MAX - alignment)
+		goto err;
+
+	if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		goto err;
+	}
+
+	orig_src_addr = dma_alloc_coherent(dev, size + alignment,
+					   &orig_src_phys_addr, GFP_KERNEL);
+	if (!orig_src_addr) {
+		dev_err(dev, "Failed to allocate source buffer\n");
+		ret = false;
+		goto err;
+	}
+
+	if (alignment && !IS_ALIGNED(orig_src_phys_addr, alignment)) {
+		src_phys_addr = PTR_ALIGN(orig_src_phys_addr, alignment);
+		offset = src_phys_addr - orig_src_phys_addr;
+		src_addr = orig_src_addr + offset;
+	} else {
+		src_phys_addr = orig_src_phys_addr;
+		src_addr = orig_src_addr;
+	}
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_SRC_ADDR,
+				 lower_32_bits(src_phys_addr));
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_SRC_ADDR,
+				 upper_32_bits(src_phys_addr));
+
+	get_random_bytes(src_addr, size);
+	src_crc32 = crc32_le(~0, src_addr, size);
+
+	orig_dst_addr = dma_alloc_coherent(dev, size + alignment,
+					   &orig_dst_phys_addr, GFP_KERNEL);
+	if (!orig_dst_addr) {
+		dev_err(dev, "Failed to allocate destination address\n");
+		ret = false;
+		goto err_orig_src_addr;
+	}
+
+	if (alignment && !IS_ALIGNED(orig_dst_phys_addr, alignment)) {
+		dst_phys_addr = PTR_ALIGN(orig_dst_phys_addr, alignment);
+		offset = dst_phys_addr - orig_dst_phys_addr;
+		dst_addr = orig_dst_addr + offset;
+	} else {
+		dst_phys_addr = orig_dst_phys_addr;
+		dst_addr = orig_dst_addr;
+	}
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR,
+				 lower_32_bits(dst_phys_addr));
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_DST_ADDR,
+				 upper_32_bits(dst_phys_addr));
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE,
+				 size);
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
+				 COMMAND_COPY);
+
+	wait_for_completion(&test->irq_raised);
+
+	dst_crc32 = crc32_le(~0, dst_addr, size);
+	if (dst_crc32 == src_crc32)
+		ret = true;
+
+	dma_free_coherent(dev, size + alignment, orig_dst_addr,
+			  orig_dst_phys_addr);
+
+err_orig_src_addr:
+	dma_free_coherent(dev, size + alignment, orig_src_addr,
+			  orig_src_phys_addr);
+
+err:
+	return ret;
+}
+
+static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size)
+{
+	bool ret = false;
+	u32 reg;
+	void *addr;
+	dma_addr_t phys_addr;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+	void *orig_addr;
+	dma_addr_t orig_phys_addr;
+	size_t offset;
+	size_t alignment = test->alignment;
+	int irq_type = test->irq_type;
+	u32 crc32;
+
+	if (size > SIZE_MAX - alignment)
+		goto err;
+
+	if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		goto err;
+	}
+
+	orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr,
+				       GFP_KERNEL);
+	if (!orig_addr) {
+		dev_err(dev, "Failed to allocate address\n");
+		ret = false;
+		goto err;
+	}
+
+	if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) {
+		phys_addr =  PTR_ALIGN(orig_phys_addr, alignment);
+		offset = phys_addr - orig_phys_addr;
+		addr = orig_addr + offset;
+	} else {
+		phys_addr = orig_phys_addr;
+		addr = orig_addr;
+	}
+
+	get_random_bytes(addr, size);
+
+	crc32 = crc32_le(~0, addr, size);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_CHECKSUM,
+				 crc32);
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_SRC_ADDR,
+				 lower_32_bits(phys_addr));
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_SRC_ADDR,
+				 upper_32_bits(phys_addr));
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size);
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
+				 COMMAND_READ);
+
+	wait_for_completion(&test->irq_raised);
+
+	reg = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS);
+	if (reg & STATUS_READ_SUCCESS)
+		ret = true;
+
+	dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr);
+
+err:
+	return ret;
+}
+
+static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size)
+{
+	bool ret = false;
+	void *addr;
+	dma_addr_t phys_addr;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+	void *orig_addr;
+	dma_addr_t orig_phys_addr;
+	size_t offset;
+	size_t alignment = test->alignment;
+	int irq_type = test->irq_type;
+	u32 crc32;
+
+	if (size > SIZE_MAX - alignment)
+		goto err;
+
+	if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		goto err;
+	}
+
+	orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr,
+				       GFP_KERNEL);
+	if (!orig_addr) {
+		dev_err(dev, "Failed to allocate destination address\n");
+		ret = false;
+		goto err;
+	}
+
+	if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) {
+		phys_addr = PTR_ALIGN(orig_phys_addr, alignment);
+		offset = phys_addr - orig_phys_addr;
+		addr = orig_addr + offset;
+	} else {
+		phys_addr = orig_phys_addr;
+		addr = orig_addr;
+	}
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR,
+				 lower_32_bits(phys_addr));
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_DST_ADDR,
+				 upper_32_bits(phys_addr));
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size);
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
+				 COMMAND_WRITE);
+
+	wait_for_completion(&test->irq_raised);
+
+	crc32 = crc32_le(~0, addr, size);
+	if (crc32 == pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_CHECKSUM))
+		ret = true;
+
+	dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr);
+err:
+	return ret;
+}
+
+static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test,
+				      int req_irq_type)
+{
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+
+	if (req_irq_type < IRQ_TYPE_LEGACY || req_irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		return false;
+	}
+
+	if (test->irq_type == req_irq_type)
+		return true;
+
+	pci_endpoint_test_release_irq(test);
+	pci_endpoint_test_free_irq_vectors(test);
+
+	if (!pci_endpoint_test_alloc_irq_vectors(test, req_irq_type))
+		goto err;
+
+	if (!pci_endpoint_test_request_irq(test))
+		goto err;
+
+	return true;
+
+err:
+	pci_endpoint_test_free_irq_vectors(test);
+	return false;
+}
+
+static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd,
+				    unsigned long arg)
+{
+	int ret = -EINVAL;
+	enum pci_barno bar;
+	struct pci_endpoint_test *test = to_endpoint_test(file->private_data);
+	struct pci_dev *pdev = test->pdev;
+
+	mutex_lock(&test->mutex);
+	switch (cmd) {
+	case PCITEST_BAR:
+		bar = arg;
+		if (bar < 0 || bar > 5)
+			goto ret;
+		if (is_am654_pci_dev(pdev) && bar == BAR_0)
+			goto ret;
+		ret = pci_endpoint_test_bar(test, bar);
+		break;
+	case PCITEST_LEGACY_IRQ:
+		ret = pci_endpoint_test_legacy_irq(test);
+		break;
+	case PCITEST_MSI:
+	case PCITEST_MSIX:
+		ret = pci_endpoint_test_msi_irq(test, arg, cmd == PCITEST_MSIX);
+		break;
+	case PCITEST_WRITE:
+		ret = pci_endpoint_test_write(test, arg);
+		break;
+	case PCITEST_READ:
+		ret = pci_endpoint_test_read(test, arg);
+		break;
+	case PCITEST_COPY:
+		ret = pci_endpoint_test_copy(test, arg);
+		break;
+	case PCITEST_SET_IRQTYPE:
+		ret = pci_endpoint_test_set_irq(test, arg);
+		break;
+	case PCITEST_GET_IRQTYPE:
+		ret = irq_type;
+		break;
+	}
+
+ret:
+	mutex_unlock(&test->mutex);
+	return ret;
+}
+
+static const struct file_operations pci_endpoint_test_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = pci_endpoint_test_ioctl,
+};
+
+static int pci_endpoint_test_probe(struct pci_dev *pdev,
+				   const struct pci_device_id *ent)
+{
+	int err;
+	int id;
+	char name[24];
+	enum pci_barno bar;
+	void __iomem *base;
+	struct device *dev = &pdev->dev;
+	struct pci_endpoint_test *test;
+	struct pci_endpoint_test_data *data;
+	enum pci_barno test_reg_bar = BAR_0;
+	struct miscdevice *misc_device;
+
+	if (pci_is_bridge(pdev))
+		return -ENODEV;
+
+	test = devm_kzalloc(dev, sizeof(*test), GFP_KERNEL);
+	if (!test)
+		return -ENOMEM;
+
+	test->test_reg_bar = 0;
+	test->alignment = 0;
+	test->pdev = pdev;
+	test->irq_type = IRQ_TYPE_UNDEFINED;
+
+	if (no_msi)
+		irq_type = IRQ_TYPE_LEGACY;
+
+	data = (struct pci_endpoint_test_data *)ent->driver_data;
+	if (data) {
+		test_reg_bar = data->test_reg_bar;
+		test->test_reg_bar = test_reg_bar;
+		test->alignment = data->alignment;
+		irq_type = data->irq_type;
+	}
+
+	init_completion(&test->irq_raised);
+	mutex_init(&test->mutex);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Cannot enable PCI device\n");
+		return err;
+	}
+
+	err = pci_request_regions(pdev, DRV_MODULE_NAME);
+	if (err) {
+		dev_err(dev, "Cannot obtain PCI resources\n");
+		goto err_disable_pdev;
+	}
+
+	pci_set_master(pdev);
+
+	if (!pci_endpoint_test_alloc_irq_vectors(test, irq_type))
+		goto err_disable_irq;
+
+	if (!pci_endpoint_test_request_irq(test))
+		goto err_disable_irq;
+
+	for (bar = BAR_0; bar <= BAR_5; bar++) {
+		if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
+			base = pci_ioremap_bar(pdev, bar);
+			if (!base) {
+				dev_err(dev, "Failed to read BAR%d\n", bar);
+				WARN_ON(bar == test_reg_bar);
+			}
+			test->bar[bar] = base;
+		}
+	}
+
+	test->base = test->bar[test_reg_bar];
+	if (!test->base) {
+		err = -ENOMEM;
+		dev_err(dev, "Cannot perform PCI test without BAR%d\n",
+			test_reg_bar);
+		goto err_iounmap;
+	}
+
+	pci_set_drvdata(pdev, test);
+
+	id = ida_simple_get(&pci_endpoint_test_ida, 0, 0, GFP_KERNEL);
+	if (id < 0) {
+		err = id;
+		dev_err(dev, "Unable to get id\n");
+		goto err_iounmap;
+	}
+
+	snprintf(name, sizeof(name), DRV_MODULE_NAME ".%d", id);
+	misc_device = &test->miscdev;
+	misc_device->minor = MISC_DYNAMIC_MINOR;
+	misc_device->name = kstrdup(name, GFP_KERNEL);
+	if (!misc_device->name) {
+		err = -ENOMEM;
+		goto err_ida_remove;
+	}
+	misc_device->fops = &pci_endpoint_test_fops,
+
+	err = misc_register(misc_device);
+	if (err) {
+		dev_err(dev, "Failed to register device\n");
+		goto err_kfree_name;
+	}
+
+	return 0;
+
+err_kfree_name:
+	kfree(misc_device->name);
+
+err_ida_remove:
+	ida_simple_remove(&pci_endpoint_test_ida, id);
+
+err_iounmap:
+	for (bar = BAR_0; bar <= BAR_5; bar++) {
+		if (test->bar[bar])
+			pci_iounmap(pdev, test->bar[bar]);
+	}
+	pci_endpoint_test_release_irq(test);
+
+err_disable_irq:
+	pci_endpoint_test_free_irq_vectors(test);
+	pci_release_regions(pdev);
+
+err_disable_pdev:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+static void pci_endpoint_test_remove(struct pci_dev *pdev)
+{
+	int id;
+	enum pci_barno bar;
+	struct pci_endpoint_test *test = pci_get_drvdata(pdev);
+	struct miscdevice *misc_device = &test->miscdev;
+
+	if (sscanf(misc_device->name, DRV_MODULE_NAME ".%d", &id) != 1)
+		return;
+	if (id < 0)
+		return;
+
+	misc_deregister(&test->miscdev);
+	kfree(misc_device->name);
+	ida_simple_remove(&pci_endpoint_test_ida, id);
+	for (bar = BAR_0; bar <= BAR_5; bar++) {
+		if (test->bar[bar])
+			pci_iounmap(pdev, test->bar[bar]);
+	}
+
+	pci_endpoint_test_release_irq(test);
+	pci_endpoint_test_free_irq_vectors(test);
+
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static const struct pci_endpoint_test_data am654_data = {
+	.test_reg_bar = BAR_2,
+	.alignment = SZ_64K,
+	.irq_type = IRQ_TYPE_MSI,
+};
+
+static const struct pci_device_id pci_endpoint_test_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA74x) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA72x) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, 0x81c0) },
+	{ PCI_DEVICE_DATA(SYNOPSYS, EDDA, NULL) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_AM654),
+	  .driver_data = (kernel_ulong_t)&am654_data
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, pci_endpoint_test_tbl);
+
+static struct pci_driver pci_endpoint_test_driver = {
+	.name		= DRV_MODULE_NAME,
+	.id_table	= pci_endpoint_test_tbl,
+	.probe		= pci_endpoint_test_probe,
+	.remove		= pci_endpoint_test_remove,
+};
+module_pci_driver(pci_endpoint_test_driver);
+
+MODULE_DESCRIPTION("PCI ENDPOINT TEST HOST DRIVER");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
new file mode 100644
index 000000000..b084245f6
--- /dev/null
+++ b/drivers/misc/phantom.c
@@ -0,0 +1,571 @@
+/*
+ *  Copyright (C) 2005-2007 Jiri Slaby <jirislaby@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  You need a userspace library to cooperate with this driver. It (and other
+ *  info) may be obtained here:
+ *  http://www.fi.muni.cz/~xslaby/phantom.html
+ *  or alternatively, you might use OpenHaptics provided by Sensable.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/interrupt.h>
+#include <linux/cdev.h>
+#include <linux/slab.h>
+#include <linux/phantom.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+
+#include <linux/atomic.h>
+#include <asm/io.h>
+
+#define PHANTOM_VERSION		"n0.9.8"
+
+#define PHANTOM_MAX_MINORS	8
+
+#define PHN_IRQCTL		0x4c    /* irq control in caddr space */
+
+#define PHB_RUNNING		1
+#define PHB_NOT_OH		2
+
+static DEFINE_MUTEX(phantom_mutex);
+static struct class *phantom_class;
+static int phantom_major;
+
+struct phantom_device {
+	unsigned int opened;
+	void __iomem *caddr;
+	u32 __iomem *iaddr;
+	u32 __iomem *oaddr;
+	unsigned long status;
+	atomic_t counter;
+
+	wait_queue_head_t wait;
+	struct cdev cdev;
+
+	struct mutex open_lock;
+	spinlock_t regs_lock;
+
+	/* used in NOT_OH mode */
+	struct phm_regs oregs;
+	u32 ctl_reg;
+};
+
+static unsigned char phantom_devices[PHANTOM_MAX_MINORS];
+
+static int phantom_status(struct phantom_device *dev, unsigned long newstat)
+{
+	pr_debug("phantom_status %lx %lx\n", dev->status, newstat);
+
+	if (!(dev->status & PHB_RUNNING) && (newstat & PHB_RUNNING)) {
+		atomic_set(&dev->counter, 0);
+		iowrite32(PHN_CTL_IRQ, dev->iaddr + PHN_CONTROL);
+		iowrite32(0x43, dev->caddr + PHN_IRQCTL);
+		ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+	} else if ((dev->status & PHB_RUNNING) && !(newstat & PHB_RUNNING)) {
+		iowrite32(0, dev->caddr + PHN_IRQCTL);
+		ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+	}
+
+	dev->status = newstat;
+
+	return 0;
+}
+
+/*
+ * File ops
+ */
+
+static long phantom_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	struct phantom_device *dev = file->private_data;
+	struct phm_regs rs;
+	struct phm_reg r;
+	void __user *argp = (void __user *)arg;
+	unsigned long flags;
+	unsigned int i;
+
+	switch (cmd) {
+	case PHN_SETREG:
+	case PHN_SET_REG:
+		if (copy_from_user(&r, argp, sizeof(r)))
+			return -EFAULT;
+
+		if (r.reg > 7)
+			return -EINVAL;
+
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) &&
+				phantom_status(dev, dev->status | PHB_RUNNING)){
+			spin_unlock_irqrestore(&dev->regs_lock, flags);
+			return -ENODEV;
+		}
+
+		pr_debug("phantom: writing %x to %u\n", r.value, r.reg);
+
+		/* preserve amp bit (don't allow to change it when in NOT_OH) */
+		if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) {
+			r.value &= ~PHN_CTL_AMP;
+			r.value |= dev->ctl_reg & PHN_CTL_AMP;
+			dev->ctl_reg = r.value;
+		}
+
+		iowrite32(r.value, dev->iaddr + r.reg);
+		ioread32(dev->iaddr); /* PCI posting */
+
+		if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ))
+			phantom_status(dev, dev->status & ~PHB_RUNNING);
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
+	case PHN_SETREGS:
+	case PHN_SET_REGS:
+		if (copy_from_user(&rs, argp, sizeof(rs)))
+			return -EFAULT;
+
+		pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask);
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (dev->status & PHB_NOT_OH)
+			memcpy(&dev->oregs, &rs, sizeof(rs));
+		else {
+			u32 m = min(rs.count, 8U);
+			for (i = 0; i < m; i++)
+				if (rs.mask & BIT(i))
+					iowrite32(rs.values[i], dev->oaddr + i);
+			ioread32(dev->iaddr); /* PCI posting */
+		}
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
+	case PHN_GETREG:
+	case PHN_GET_REG:
+		if (copy_from_user(&r, argp, sizeof(r)))
+			return -EFAULT;
+
+		if (r.reg > 7)
+			return -EINVAL;
+
+		r.value = ioread32(dev->iaddr + r.reg);
+
+		if (copy_to_user(argp, &r, sizeof(r)))
+			return -EFAULT;
+		break;
+	case PHN_GETREGS:
+	case PHN_GET_REGS: {
+		u32 m;
+
+		if (copy_from_user(&rs, argp, sizeof(rs)))
+			return -EFAULT;
+
+		m = min(rs.count, 8U);
+
+		pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask);
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		for (i = 0; i < m; i++)
+			if (rs.mask & BIT(i))
+				rs.values[i] = ioread32(dev->iaddr + i);
+		atomic_set(&dev->counter, 0);
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+
+		if (copy_to_user(argp, &rs, sizeof(rs)))
+			return -EFAULT;
+		break;
+	} case PHN_NOT_OH:
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (dev->status & PHB_RUNNING) {
+			printk(KERN_ERR "phantom: you need to set NOT_OH "
+					"before you start the device!\n");
+			spin_unlock_irqrestore(&dev->regs_lock, flags);
+			return -EINVAL;
+		}
+		dev->status |= PHB_NOT_OH;
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
+	default:
+		return -ENOTTY;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_COMPAT
+static long phantom_compat_ioctl(struct file *filp, unsigned int cmd,
+		unsigned long arg)
+{
+	if (_IOC_NR(cmd) <= 3 && _IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
+		cmd &= ~(_IOC_SIZEMASK << _IOC_SIZESHIFT);
+		cmd |= sizeof(void *) << _IOC_SIZESHIFT;
+	}
+	return phantom_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
+}
+#else
+#define phantom_compat_ioctl NULL
+#endif
+
+static int phantom_open(struct inode *inode, struct file *file)
+{
+	struct phantom_device *dev = container_of(inode->i_cdev,
+			struct phantom_device, cdev);
+
+	mutex_lock(&phantom_mutex);
+	nonseekable_open(inode, file);
+
+	if (mutex_lock_interruptible(&dev->open_lock)) {
+		mutex_unlock(&phantom_mutex);
+		return -ERESTARTSYS;
+	}
+
+	if (dev->opened) {
+		mutex_unlock(&dev->open_lock);
+		mutex_unlock(&phantom_mutex);
+		return -EINVAL;
+	}
+
+	WARN_ON(dev->status & PHB_NOT_OH);
+
+	file->private_data = dev;
+
+	atomic_set(&dev->counter, 0);
+	dev->opened++;
+	mutex_unlock(&dev->open_lock);
+	mutex_unlock(&phantom_mutex);
+	return 0;
+}
+
+static int phantom_release(struct inode *inode, struct file *file)
+{
+	struct phantom_device *dev = file->private_data;
+
+	mutex_lock(&dev->open_lock);
+
+	dev->opened = 0;
+	phantom_status(dev, dev->status & ~PHB_RUNNING);
+	dev->status &= ~PHB_NOT_OH;
+
+	mutex_unlock(&dev->open_lock);
+
+	return 0;
+}
+
+static __poll_t phantom_poll(struct file *file, poll_table *wait)
+{
+	struct phantom_device *dev = file->private_data;
+	__poll_t mask = 0;
+
+	pr_debug("phantom_poll: %d\n", atomic_read(&dev->counter));
+	poll_wait(file, &dev->wait, wait);
+
+	if (!(dev->status & PHB_RUNNING))
+		mask = EPOLLERR;
+	else if (atomic_read(&dev->counter))
+		mask = EPOLLIN | EPOLLRDNORM;
+
+	pr_debug("phantom_poll end: %x/%d\n", mask, atomic_read(&dev->counter));
+
+	return mask;
+}
+
+static const struct file_operations phantom_file_ops = {
+	.open = phantom_open,
+	.release = phantom_release,
+	.unlocked_ioctl = phantom_ioctl,
+	.compat_ioctl = phantom_compat_ioctl,
+	.poll = phantom_poll,
+	.llseek = no_llseek,
+};
+
+static irqreturn_t phantom_isr(int irq, void *data)
+{
+	struct phantom_device *dev = data;
+	unsigned int i;
+	u32 ctl;
+
+	spin_lock(&dev->regs_lock);
+	ctl = ioread32(dev->iaddr + PHN_CONTROL);
+	if (!(ctl & PHN_CTL_IRQ)) {
+		spin_unlock(&dev->regs_lock);
+		return IRQ_NONE;
+	}
+
+	iowrite32(0, dev->iaddr);
+	iowrite32(0xc0, dev->iaddr);
+
+	if (dev->status & PHB_NOT_OH) {
+		struct phm_regs *r = &dev->oregs;
+		u32 m = min(r->count, 8U);
+
+		for (i = 0; i < m; i++)
+			if (r->mask & BIT(i))
+				iowrite32(r->values[i], dev->oaddr + i);
+
+		dev->ctl_reg ^= PHN_CTL_AMP;
+		iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL);
+	}
+	spin_unlock(&dev->regs_lock);
+
+	ioread32(dev->iaddr); /* PCI posting */
+
+	atomic_inc(&dev->counter);
+	wake_up_interruptible(&dev->wait);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Init and deinit driver
+ */
+
+static unsigned int phantom_get_free(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < PHANTOM_MAX_MINORS; i++)
+		if (phantom_devices[i] == 0)
+			break;
+
+	return i;
+}
+
+static int phantom_probe(struct pci_dev *pdev,
+	const struct pci_device_id *pci_id)
+{
+	struct phantom_device *pht;
+	unsigned int minor;
+	int retval;
+
+	retval = pci_enable_device(pdev);
+	if (retval) {
+		dev_err(&pdev->dev, "pci_enable_device failed!\n");
+		goto err;
+	}
+
+	minor = phantom_get_free();
+	if (minor == PHANTOM_MAX_MINORS) {
+		dev_err(&pdev->dev, "too many devices found!\n");
+		retval = -EIO;
+		goto err_dis;
+	}
+
+	phantom_devices[minor] = 1;
+
+	retval = pci_request_regions(pdev, "phantom");
+	if (retval) {
+		dev_err(&pdev->dev, "pci_request_regions failed!\n");
+		goto err_null;
+	}
+
+	retval = -ENOMEM;
+	pht = kzalloc(sizeof(*pht), GFP_KERNEL);
+	if (pht == NULL) {
+		dev_err(&pdev->dev, "unable to allocate device\n");
+		goto err_reg;
+	}
+
+	pht->caddr = pci_iomap(pdev, 0, 0);
+	if (pht->caddr == NULL) {
+		dev_err(&pdev->dev, "can't remap conf space\n");
+		goto err_fr;
+	}
+	pht->iaddr = pci_iomap(pdev, 2, 0);
+	if (pht->iaddr == NULL) {
+		dev_err(&pdev->dev, "can't remap input space\n");
+		goto err_unmc;
+	}
+	pht->oaddr = pci_iomap(pdev, 3, 0);
+	if (pht->oaddr == NULL) {
+		dev_err(&pdev->dev, "can't remap output space\n");
+		goto err_unmi;
+	}
+
+	mutex_init(&pht->open_lock);
+	spin_lock_init(&pht->regs_lock);
+	init_waitqueue_head(&pht->wait);
+	cdev_init(&pht->cdev, &phantom_file_ops);
+	pht->cdev.owner = THIS_MODULE;
+
+	iowrite32(0, pht->caddr + PHN_IRQCTL);
+	ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */
+	retval = request_irq(pdev->irq, phantom_isr,
+			IRQF_SHARED, "phantom", pht);
+	if (retval) {
+		dev_err(&pdev->dev, "can't establish ISR\n");
+		goto err_unmo;
+	}
+
+	retval = cdev_add(&pht->cdev, MKDEV(phantom_major, minor), 1);
+	if (retval) {
+		dev_err(&pdev->dev, "chardev registration failed\n");
+		goto err_irq;
+	}
+
+	if (IS_ERR(device_create(phantom_class, &pdev->dev,
+				 MKDEV(phantom_major, minor), NULL,
+				 "phantom%u", minor)))
+		dev_err(&pdev->dev, "can't create device\n");
+
+	pci_set_drvdata(pdev, pht);
+
+	return 0;
+err_irq:
+	free_irq(pdev->irq, pht);
+err_unmo:
+	pci_iounmap(pdev, pht->oaddr);
+err_unmi:
+	pci_iounmap(pdev, pht->iaddr);
+err_unmc:
+	pci_iounmap(pdev, pht->caddr);
+err_fr:
+	kfree(pht);
+err_reg:
+	pci_release_regions(pdev);
+err_null:
+	phantom_devices[minor] = 0;
+err_dis:
+	pci_disable_device(pdev);
+err:
+	return retval;
+}
+
+static void phantom_remove(struct pci_dev *pdev)
+{
+	struct phantom_device *pht = pci_get_drvdata(pdev);
+	unsigned int minor = MINOR(pht->cdev.dev);
+
+	device_destroy(phantom_class, MKDEV(phantom_major, minor));
+
+	cdev_del(&pht->cdev);
+
+	iowrite32(0, pht->caddr + PHN_IRQCTL);
+	ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */
+	free_irq(pdev->irq, pht);
+
+	pci_iounmap(pdev, pht->oaddr);
+	pci_iounmap(pdev, pht->iaddr);
+	pci_iounmap(pdev, pht->caddr);
+
+	kfree(pht);
+
+	pci_release_regions(pdev);
+
+	phantom_devices[minor] = 0;
+
+	pci_disable_device(pdev);
+}
+
+#ifdef CONFIG_PM
+static int phantom_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct phantom_device *dev = pci_get_drvdata(pdev);
+
+	iowrite32(0, dev->caddr + PHN_IRQCTL);
+	ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+
+	synchronize_irq(pdev->irq);
+
+	return 0;
+}
+
+static int phantom_resume(struct pci_dev *pdev)
+{
+	struct phantom_device *dev = pci_get_drvdata(pdev);
+
+	iowrite32(0, dev->caddr + PHN_IRQCTL);
+
+	return 0;
+}
+#else
+#define phantom_suspend	NULL
+#define phantom_resume	NULL
+#endif
+
+static struct pci_device_id phantom_pci_tbl[] = {
+	{ .vendor = PCI_VENDOR_ID_PLX, .device = PCI_DEVICE_ID_PLX_9050,
+	  .subvendor = PCI_VENDOR_ID_PLX, .subdevice = PCI_DEVICE_ID_PLX_9050,
+	  .class = PCI_CLASS_BRIDGE_OTHER << 8, .class_mask = 0xffff00 },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, phantom_pci_tbl);
+
+static struct pci_driver phantom_pci_driver = {
+	.name = "phantom",
+	.id_table = phantom_pci_tbl,
+	.probe = phantom_probe,
+	.remove = phantom_remove,
+	.suspend = phantom_suspend,
+	.resume = phantom_resume
+};
+
+static CLASS_ATTR_STRING(version, 0444, PHANTOM_VERSION);
+
+static int __init phantom_init(void)
+{
+	int retval;
+	dev_t dev;
+
+	phantom_class = class_create(THIS_MODULE, "phantom");
+	if (IS_ERR(phantom_class)) {
+		retval = PTR_ERR(phantom_class);
+		printk(KERN_ERR "phantom: can't register phantom class\n");
+		goto err;
+	}
+	retval = class_create_file(phantom_class, &class_attr_version.attr);
+	if (retval) {
+		printk(KERN_ERR "phantom: can't create sysfs version file\n");
+		goto err_class;
+	}
+
+	retval = alloc_chrdev_region(&dev, 0, PHANTOM_MAX_MINORS, "phantom");
+	if (retval) {
+		printk(KERN_ERR "phantom: can't register character device\n");
+		goto err_attr;
+	}
+	phantom_major = MAJOR(dev);
+
+	retval = pci_register_driver(&phantom_pci_driver);
+	if (retval) {
+		printk(KERN_ERR "phantom: can't register pci driver\n");
+		goto err_unchr;
+	}
+
+	printk(KERN_INFO "Phantom Linux Driver, version " PHANTOM_VERSION ", "
+			"init OK\n");
+
+	return 0;
+err_unchr:
+	unregister_chrdev_region(dev, PHANTOM_MAX_MINORS);
+err_attr:
+	class_remove_file(phantom_class, &class_attr_version.attr);
+err_class:
+	class_destroy(phantom_class);
+err:
+	return retval;
+}
+
+static void __exit phantom_exit(void)
+{
+	pci_unregister_driver(&phantom_pci_driver);
+
+	unregister_chrdev_region(MKDEV(phantom_major, 0), PHANTOM_MAX_MINORS);
+
+	class_remove_file(phantom_class, &class_attr_version.attr);
+	class_destroy(phantom_class);
+
+	pr_debug("phantom: module successfully removed\n");
+}
+
+module_init(phantom_init);
+module_exit(phantom_exit);
+
+MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>");
+MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PHANTOM_VERSION);
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
new file mode 100644
index 000000000..41f2a9f68
--- /dev/null
+++ b/drivers/misc/pti.c
@@ -0,0 +1,988 @@
+/*
+ *  pti.c - PTI driver for cJTAG data extration
+ *
+ *  Copyright (C) Intel 2010
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The PTI (Parallel Trace Interface) driver directs trace data routed from
+ * various parts in the system out through the Intel Penwell PTI port and
+ * out of the mobile device for analysis with a debugging tool
+ * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
+ * compact JTAG, standard.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <linux/miscdevice.h>
+#include <linux/intel-pti.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#define DRIVERNAME		"pti"
+#define PCINAME			"pciPTI"
+#define TTYNAME			"ttyPTI"
+#define CHARNAME		"pti"
+#define PTITTY_MINOR_START	0
+#define PTITTY_MINOR_NUM	2
+#define MAX_APP_IDS		16   /* 128 channel ids / u8 bit size */
+#define MAX_OS_IDS		16   /* 128 channel ids / u8 bit size */
+#define MAX_MODEM_IDS		16   /* 128 channel ids / u8 bit size */
+#define MODEM_BASE_ID		71   /* modem master ID address    */
+#define CONTROL_ID		72   /* control master ID address  */
+#define CONSOLE_ID		73   /* console master ID address  */
+#define OS_BASE_ID		74   /* base OS master ID address  */
+#define APP_BASE_ID		80   /* base App master ID address */
+#define CONTROL_FRAME_LEN	32   /* PTI control frame maximum size */
+#define USER_COPY_SIZE		8192 /* 8Kb buffer for user space copy */
+#define APERTURE_14		0x3800000 /* offset to first OS write addr */
+#define APERTURE_LEN		0x400000  /* address length */
+
+struct pti_tty {
+	struct pti_masterchannel *mc;
+};
+
+struct pti_dev {
+	struct tty_port port[PTITTY_MINOR_NUM];
+	unsigned long pti_addr;
+	unsigned long aperture_base;
+	void __iomem *pti_ioaddr;
+	u8 ia_app[MAX_APP_IDS];
+	u8 ia_os[MAX_OS_IDS];
+	u8 ia_modem[MAX_MODEM_IDS];
+};
+
+/*
+ * This protects access to ia_app, ia_os, and ia_modem,
+ * which keeps track of channels allocated in
+ * an aperture write id.
+ */
+static DEFINE_MUTEX(alloclock);
+
+static const struct pci_device_id pci_ids[] = {
+		{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x82B)},
+		{0}
+};
+
+static struct tty_driver *pti_tty_driver;
+static struct pti_dev *drv_data;
+
+static unsigned int pti_console_channel;
+static unsigned int pti_control_channel;
+
+/**
+ *  pti_write_to_aperture()- The private write function to PTI HW.
+ *
+ *  @mc: The 'aperture'. It's part of a write address that holds
+ *       a master and channel ID.
+ *  @buf: Data being written to the HW that will ultimately be seen
+ *        in a debugging tool (Fido, Lauterbach).
+ *  @len: Size of buffer.
+ *
+ *  Since each aperture is specified by a unique
+ *  master/channel ID, no two processes will be writing
+ *  to the same aperture at the same time so no lock is required. The
+ *  PTI-Output agent will send these out in the order that they arrived, and
+ *  thus, it will intermix these messages. The debug tool can then later
+ *  regroup the appropriate message segments together reconstituting each
+ *  message.
+ */
+static void pti_write_to_aperture(struct pti_masterchannel *mc,
+				  u8 *buf,
+				  int len)
+{
+	int dwordcnt;
+	int final;
+	int i;
+	u32 ptiword;
+	u32 __iomem *aperture;
+	u8 *p = buf;
+
+	/*
+	 * calculate the aperture offset from the base using the master and
+	 * channel id's.
+	 */
+	aperture = drv_data->pti_ioaddr + (mc->master << 15)
+		+ (mc->channel << 8);
+
+	dwordcnt = len >> 2;
+	final = len - (dwordcnt << 2);	    /* final = trailing bytes    */
+	if (final == 0 && dwordcnt != 0) {  /* always need a final dword */
+		final += 4;
+		dwordcnt--;
+	}
+
+	for (i = 0; i < dwordcnt; i++) {
+		ptiword = be32_to_cpu(*(u32 *)p);
+		p += 4;
+		iowrite32(ptiword, aperture);
+	}
+
+	aperture += PTI_LASTDWORD_DTS;	/* adding DTS signals that is EOM */
+
+	ptiword = 0;
+	for (i = 0; i < final; i++)
+		ptiword |= *p++ << (24-(8*i));
+
+	iowrite32(ptiword, aperture);
+	return;
+}
+
+/**
+ *  pti_control_frame_built_and_sent()- control frame build and send function.
+ *
+ *  @mc:          The master / channel structure on which the function
+ *                built a control frame.
+ *  @thread_name: The thread name associated with the master / channel or
+ *                'NULL' if using the 'current' global variable.
+ *
+ *  To be able to post process the PTI contents on host side, a control frame
+ *  is added before sending any PTI content. So the host side knows on
+ *  each PTI frame the name of the thread using a dedicated master / channel.
+ *  The thread name is retrieved from 'current' global variable if 'thread_name'
+ *  is 'NULL', else it is retrieved from 'thread_name' parameter.
+ *  This function builds this frame and sends it to a master ID CONTROL_ID.
+ *  The overhead is only 32 bytes since the driver only writes to HW
+ *  in 32 byte chunks.
+ */
+static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc,
+					     const char *thread_name)
+{
+	/*
+	 * Since we access the comm member in current's task_struct, we only
+	 * need to be as large as what 'comm' in that structure is.
+	 */
+	char comm[TASK_COMM_LEN];
+	struct pti_masterchannel mccontrol = {.master = CONTROL_ID,
+					      .channel = 0};
+	const char *thread_name_p;
+	const char *control_format = "%3d %3d %s";
+	u8 control_frame[CONTROL_FRAME_LEN];
+
+	if (!thread_name) {
+		if (!in_interrupt())
+			get_task_comm(comm, current);
+		else
+			strncpy(comm, "Interrupt", TASK_COMM_LEN);
+
+		/* Absolutely ensure our buffer is zero terminated. */
+		comm[TASK_COMM_LEN-1] = 0;
+		thread_name_p = comm;
+	} else {
+		thread_name_p = thread_name;
+	}
+
+	mccontrol.channel = pti_control_channel;
+	pti_control_channel = (pti_control_channel + 1) & 0x7f;
+
+	snprintf(control_frame, CONTROL_FRAME_LEN, control_format, mc->master,
+		mc->channel, thread_name_p);
+	pti_write_to_aperture(&mccontrol, control_frame, strlen(control_frame));
+}
+
+/**
+ *  pti_write_full_frame_to_aperture()- high level function to
+ *					write to PTI.
+ *
+ *  @mc:  The 'aperture'. It's part of a write address that holds
+ *        a master and channel ID.
+ *  @buf: Data being written to the HW that will ultimately be seen
+ *        in a debugging tool (Fido, Lauterbach).
+ *  @len: Size of buffer.
+ *
+ *  All threads sending data (either console, user space application, ...)
+ *  are calling the high level function to write to PTI meaning that it is
+ *  possible to add a control frame before sending the content.
+ */
+static void pti_write_full_frame_to_aperture(struct pti_masterchannel *mc,
+						const unsigned char *buf,
+						int len)
+{
+	pti_control_frame_built_and_sent(mc, NULL);
+	pti_write_to_aperture(mc, (u8 *)buf, len);
+}
+
+/**
+ * get_id()- Allocate a master and channel ID.
+ *
+ * @id_array:    an array of bits representing what channel
+ *               id's are allocated for writing.
+ * @max_ids:     The max amount of available write IDs to use.
+ * @base_id:     The starting SW channel ID, based on the Intel
+ *               PTI arch.
+ * @thread_name: The thread name associated with the master / channel or
+ *               'NULL' if using the 'current' global variable.
+ *
+ * Returns:
+ *	pti_masterchannel struct with master, channel ID address
+ *	0 for error
+ *
+ * Each bit in the arrays ia_app and ia_os correspond to a master and
+ * channel id. The bit is one if the id is taken and 0 if free. For
+ * every master there are 128 channel id's.
+ */
+static struct pti_masterchannel *get_id(u8 *id_array,
+					int max_ids,
+					int base_id,
+					const char *thread_name)
+{
+	struct pti_masterchannel *mc;
+	int i, j, mask;
+
+	mc = kmalloc(sizeof(struct pti_masterchannel), GFP_KERNEL);
+	if (mc == NULL)
+		return NULL;
+
+	/* look for a byte with a free bit */
+	for (i = 0; i < max_ids; i++)
+		if (id_array[i] != 0xff)
+			break;
+	if (i == max_ids) {
+		kfree(mc);
+		return NULL;
+	}
+	/* find the bit in the 128 possible channel opportunities */
+	mask = 0x80;
+	for (j = 0; j < 8; j++) {
+		if ((id_array[i] & mask) == 0)
+			break;
+		mask >>= 1;
+	}
+
+	/* grab it */
+	id_array[i] |= mask;
+	mc->master  = base_id;
+	mc->channel = ((i & 0xf)<<3) + j;
+	/* write new master Id / channel Id allocation to channel control */
+	pti_control_frame_built_and_sent(mc, thread_name);
+	return mc;
+}
+
+/*
+ * The following three functions:
+ * pti_request_mastercahannel(), mipi_release_masterchannel()
+ * and pti_writedata() are an API for other kernel drivers to
+ * access PTI.
+ */
+
+/**
+ * pti_request_masterchannel()- Kernel API function used to allocate
+ *				a master, channel ID address
+ *				to write to PTI HW.
+ *
+ * @type:        0- request Application  master, channel aperture ID
+ *                  write address.
+ *               1- request OS master, channel aperture ID write
+ *                  address.
+ *               2- request Modem master, channel aperture ID
+ *                  write address.
+ *               Other values, error.
+ * @thread_name: The thread name associated with the master / channel or
+ *               'NULL' if using the 'current' global variable.
+ *
+ * Returns:
+ *	pti_masterchannel struct
+ *	0 for error
+ */
+struct pti_masterchannel *pti_request_masterchannel(u8 type,
+						    const char *thread_name)
+{
+	struct pti_masterchannel *mc;
+
+	mutex_lock(&alloclock);
+
+	switch (type) {
+
+	case 0:
+		mc = get_id(drv_data->ia_app, MAX_APP_IDS,
+			    APP_BASE_ID, thread_name);
+		break;
+
+	case 1:
+		mc = get_id(drv_data->ia_os, MAX_OS_IDS,
+			    OS_BASE_ID, thread_name);
+		break;
+
+	case 2:
+		mc = get_id(drv_data->ia_modem, MAX_MODEM_IDS,
+			    MODEM_BASE_ID, thread_name);
+		break;
+	default:
+		mc = NULL;
+	}
+
+	mutex_unlock(&alloclock);
+	return mc;
+}
+EXPORT_SYMBOL_GPL(pti_request_masterchannel);
+
+/**
+ * pti_release_masterchannel()- Kernel API function used to release
+ *				a master, channel ID address
+ *				used to write to PTI HW.
+ *
+ * @mc: master, channel apeture ID address to be released.  This
+ *      will de-allocate the structure via kfree().
+ */
+void pti_release_masterchannel(struct pti_masterchannel *mc)
+{
+	u8 master, channel, i;
+
+	mutex_lock(&alloclock);
+
+	if (mc) {
+		master = mc->master;
+		channel = mc->channel;
+
+		if (master == APP_BASE_ID) {
+			i = channel >> 3;
+			drv_data->ia_app[i] &=  ~(0x80>>(channel & 0x7));
+		} else if (master == OS_BASE_ID) {
+			i = channel >> 3;
+			drv_data->ia_os[i] &= ~(0x80>>(channel & 0x7));
+		} else {
+			i = channel >> 3;
+			drv_data->ia_modem[i] &= ~(0x80>>(channel & 0x7));
+		}
+
+		kfree(mc);
+	}
+
+	mutex_unlock(&alloclock);
+}
+EXPORT_SYMBOL_GPL(pti_release_masterchannel);
+
+/**
+ * pti_writedata()- Kernel API function used to write trace
+ *                  debugging data to PTI HW.
+ *
+ * @mc:    Master, channel aperture ID address to write to.
+ *         Null value will return with no write occurring.
+ * @buf:   Trace debuging data to write to the PTI HW.
+ *         Null value will return with no write occurring.
+ * @count: Size of buf. Value of 0 or a negative number will
+ *         return with no write occuring.
+ */
+void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count)
+{
+	/*
+	 * since this function is exported, this is treated like an
+	 * API function, thus, all parameters should
+	 * be checked for validity.
+	 */
+	if ((mc != NULL) && (buf != NULL) && (count > 0))
+		pti_write_to_aperture(mc, buf, count);
+	return;
+}
+EXPORT_SYMBOL_GPL(pti_writedata);
+
+/*
+ * for the tty_driver_*() basic function descriptions, see tty_driver.h.
+ * Specific header comments made for PTI-related specifics.
+ */
+
+/**
+ * pti_tty_driver_open()- Open an Application master, channel aperture
+ * ID to the PTI device via tty device.
+ *
+ * @tty: tty interface.
+ * @filp: filp interface pased to tty_port_open() call.
+ *
+ * Returns:
+ *	int, 0 for success
+ *	otherwise, fail value
+ *
+ * The main purpose of using the tty device interface is for
+ * each tty port to have a unique PTI write aperture.  In an
+ * example use case, ttyPTI0 gets syslogd and an APP aperture
+ * ID and ttyPTI1 is where the n_tracesink ldisc hooks to route
+ * modem messages into PTI.  Modem trace data does not have to
+ * go to ttyPTI1, but ttyPTI0 and ttyPTI1 do need to be distinct
+ * master IDs.  These messages go through the PTI HW and out of
+ * the handheld platform and to the Fido/Lauterbach device.
+ */
+static int pti_tty_driver_open(struct tty_struct *tty, struct file *filp)
+{
+	/*
+	 * we actually want to allocate a new channel per open, per
+	 * system arch.  HW gives more than plenty channels for a single
+	 * system task to have its own channel to write trace data. This
+	 * also removes a locking requirement for the actual write
+	 * procedure.
+	 */
+	return tty_port_open(tty->port, tty, filp);
+}
+
+/**
+ * pti_tty_driver_close()- close tty device and release Application
+ * master, channel aperture ID to the PTI device via tty device.
+ *
+ * @tty: tty interface.
+ * @filp: filp interface pased to tty_port_close() call.
+ *
+ * The main purpose of using the tty device interface is to route
+ * syslog daemon messages to the PTI HW and out of the handheld platform
+ * and to the Fido/Lauterbach device.
+ */
+static void pti_tty_driver_close(struct tty_struct *tty, struct file *filp)
+{
+	tty_port_close(tty->port, tty, filp);
+}
+
+/**
+ * pti_tty_install()- Used to set up specific master-channels
+ *		      to tty ports for organizational purposes when
+ *		      tracing viewed from debuging tools.
+ *
+ * @driver: tty driver information.
+ * @tty: tty struct containing pti information.
+ *
+ * Returns:
+ *	0 for success
+ *	otherwise, error
+ */
+static int pti_tty_install(struct tty_driver *driver, struct tty_struct *tty)
+{
+	int idx = tty->index;
+	struct pti_tty *pti_tty_data;
+	int ret = tty_standard_install(driver, tty);
+
+	if (ret == 0) {
+		pti_tty_data = kmalloc(sizeof(struct pti_tty), GFP_KERNEL);
+		if (pti_tty_data == NULL)
+			return -ENOMEM;
+
+		if (idx == PTITTY_MINOR_START)
+			pti_tty_data->mc = pti_request_masterchannel(0, NULL);
+		else
+			pti_tty_data->mc = pti_request_masterchannel(2, NULL);
+
+		if (pti_tty_data->mc == NULL) {
+			kfree(pti_tty_data);
+			return -ENXIO;
+		}
+		tty->driver_data = pti_tty_data;
+	}
+
+	return ret;
+}
+
+/**
+ * pti_tty_cleanup()- Used to de-allocate master-channel resources
+ *		      tied to tty's of this driver.
+ *
+ * @tty: tty struct containing pti information.
+ */
+static void pti_tty_cleanup(struct tty_struct *tty)
+{
+	struct pti_tty *pti_tty_data = tty->driver_data;
+	if (pti_tty_data == NULL)
+		return;
+	pti_release_masterchannel(pti_tty_data->mc);
+	kfree(pti_tty_data);
+	tty->driver_data = NULL;
+}
+
+/**
+ * pti_tty_driver_write()-  Write trace debugging data through the char
+ * interface to the PTI HW.  Part of the misc device implementation.
+ *
+ * @filp: Contains private data which is used to obtain
+ *        master, channel write ID.
+ * @data: trace data to be written.
+ * @len:  # of byte to write.
+ *
+ * Returns:
+ *	int, # of bytes written
+ *	otherwise, error
+ */
+static int pti_tty_driver_write(struct tty_struct *tty,
+	const unsigned char *buf, int len)
+{
+	struct pti_tty *pti_tty_data = tty->driver_data;
+	if ((pti_tty_data != NULL) && (pti_tty_data->mc != NULL)) {
+		pti_write_to_aperture(pti_tty_data->mc, (u8 *)buf, len);
+		return len;
+	}
+	/*
+	 * we can't write to the pti hardware if the private driver_data
+	 * and the mc address is not there.
+	 */
+	else
+		return -EFAULT;
+}
+
+/**
+ * pti_tty_write_room()- Always returns 2048.
+ *
+ * @tty: contains tty info of the pti driver.
+ */
+static int pti_tty_write_room(struct tty_struct *tty)
+{
+	return 2048;
+}
+
+/**
+ * pti_char_open()- Open an Application master, channel aperture
+ * ID to the PTI device. Part of the misc device implementation.
+ *
+ * @inode: not used.
+ * @filp:  Output- will have a masterchannel struct set containing
+ *                 the allocated application PTI aperture write address.
+ *
+ * Returns:
+ *	int, 0 for success
+ *	otherwise, a fail value
+ */
+static int pti_char_open(struct inode *inode, struct file *filp)
+{
+	struct pti_masterchannel *mc;
+
+	/*
+	 * We really do want to fail immediately if
+	 * pti_request_masterchannel() fails,
+	 * before assigning the value to filp->private_data.
+	 * Slightly easier to debug if this driver needs debugging.
+	 */
+	mc = pti_request_masterchannel(0, NULL);
+	if (mc == NULL)
+		return -ENOMEM;
+	filp->private_data = mc;
+	return 0;
+}
+
+/**
+ * pti_char_release()-  Close a char channel to the PTI device. Part
+ * of the misc device implementation.
+ *
+ * @inode: Not used in this implementaiton.
+ * @filp:  Contains private_data that contains the master, channel
+ *         ID to be released by the PTI device.
+ *
+ * Returns:
+ *	always 0
+ */
+static int pti_char_release(struct inode *inode, struct file *filp)
+{
+	pti_release_masterchannel(filp->private_data);
+	filp->private_data = NULL;
+	return 0;
+}
+
+/**
+ * pti_char_write()-  Write trace debugging data through the char
+ * interface to the PTI HW.  Part of the misc device implementation.
+ *
+ * @filp:  Contains private data which is used to obtain
+ *         master, channel write ID.
+ * @data:  trace data to be written.
+ * @len:   # of byte to write.
+ * @ppose: Not used in this function implementation.
+ *
+ * Returns:
+ *	int, # of bytes written
+ *	otherwise, error value
+ *
+ * Notes: From side discussions with Alan Cox and experimenting
+ * with PTI debug HW like Nokia's Fido box and Lauterbach
+ * devices, 8192 byte write buffer used by USER_COPY_SIZE was
+ * deemed an appropriate size for this type of usage with
+ * debugging HW.
+ */
+static ssize_t pti_char_write(struct file *filp, const char __user *data,
+			      size_t len, loff_t *ppose)
+{
+	struct pti_masterchannel *mc;
+	void *kbuf;
+	const char __user *tmp;
+	size_t size = USER_COPY_SIZE;
+	size_t n = 0;
+
+	tmp = data;
+	mc = filp->private_data;
+
+	kbuf = kmalloc(size, GFP_KERNEL);
+	if (kbuf == NULL)  {
+		pr_err("%s(%d): buf allocation failed\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	do {
+		if (len - n > USER_COPY_SIZE)
+			size = USER_COPY_SIZE;
+		else
+			size = len - n;
+
+		if (copy_from_user(kbuf, tmp, size)) {
+			kfree(kbuf);
+			return n ? n : -EFAULT;
+		}
+
+		pti_write_to_aperture(mc, kbuf, size);
+		n  += size;
+		tmp += size;
+
+	} while (len > n);
+
+	kfree(kbuf);
+	return len;
+}
+
+static const struct tty_operations pti_tty_driver_ops = {
+	.open		= pti_tty_driver_open,
+	.close		= pti_tty_driver_close,
+	.write		= pti_tty_driver_write,
+	.write_room	= pti_tty_write_room,
+	.install	= pti_tty_install,
+	.cleanup	= pti_tty_cleanup
+};
+
+static const struct file_operations pti_char_driver_ops = {
+	.owner		= THIS_MODULE,
+	.write		= pti_char_write,
+	.open		= pti_char_open,
+	.release	= pti_char_release,
+};
+
+static struct miscdevice pti_char_driver = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= CHARNAME,
+	.fops		= &pti_char_driver_ops
+};
+
+/**
+ * pti_console_write()-  Write to the console that has been acquired.
+ *
+ * @c:   Not used in this implementaiton.
+ * @buf: Data to be written.
+ * @len: Length of buf.
+ */
+static void pti_console_write(struct console *c, const char *buf, unsigned len)
+{
+	static struct pti_masterchannel mc = {.master  = CONSOLE_ID,
+					      .channel = 0};
+
+	mc.channel = pti_console_channel;
+	pti_console_channel = (pti_console_channel + 1) & 0x7f;
+
+	pti_write_full_frame_to_aperture(&mc, buf, len);
+}
+
+/**
+ * pti_console_device()-  Return the driver tty structure and set the
+ *			  associated index implementation.
+ *
+ * @c:     Console device of the driver.
+ * @index: index associated with c.
+ *
+ * Returns:
+ *	always value of pti_tty_driver structure when this function
+ *	is called.
+ */
+static struct tty_driver *pti_console_device(struct console *c, int *index)
+{
+	*index = c->index;
+	return pti_tty_driver;
+}
+
+/**
+ * pti_console_setup()-  Initialize console variables used by the driver.
+ *
+ * @c:     Not used.
+ * @opts:  Not used.
+ *
+ * Returns:
+ *	always 0.
+ */
+static int pti_console_setup(struct console *c, char *opts)
+{
+	pti_console_channel = 0;
+	pti_control_channel = 0;
+	return 0;
+}
+
+/*
+ * pti_console struct, used to capture OS printk()'s and shift
+ * out to the PTI device for debugging.  This cannot be
+ * enabled upon boot because of the possibility of eating
+ * any serial console printk's (race condition discovered).
+ * The console should be enabled upon when the tty port is
+ * used for the first time.  Since the primary purpose for
+ * the tty port is to hook up syslog to it, the tty port
+ * will be open for a really long time.
+ */
+static struct console pti_console = {
+	.name		= TTYNAME,
+	.write		= pti_console_write,
+	.device		= pti_console_device,
+	.setup		= pti_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= 0,
+};
+
+/**
+ * pti_port_activate()- Used to start/initialize any items upon
+ * first opening of tty_port().
+ *
+ * @port- The tty port number of the PTI device.
+ * @tty-  The tty struct associated with this device.
+ *
+ * Returns:
+ *	always returns 0
+ *
+ * Notes: The primary purpose of the PTI tty port 0 is to hook
+ * the syslog daemon to it; thus this port will be open for a
+ * very long time.
+ */
+static int pti_port_activate(struct tty_port *port, struct tty_struct *tty)
+{
+	if (port->tty->index == PTITTY_MINOR_START)
+		console_start(&pti_console);
+	return 0;
+}
+
+/**
+ * pti_port_shutdown()- Used to stop/shutdown any items upon the
+ * last tty port close.
+ *
+ * @port- The tty port number of the PTI device.
+ *
+ * Notes: The primary purpose of the PTI tty port 0 is to hook
+ * the syslog daemon to it; thus this port will be open for a
+ * very long time.
+ */
+static void pti_port_shutdown(struct tty_port *port)
+{
+	if (port->tty->index == PTITTY_MINOR_START)
+		console_stop(&pti_console);
+}
+
+static const struct tty_port_operations tty_port_ops = {
+	.activate = pti_port_activate,
+	.shutdown = pti_port_shutdown,
+};
+
+/*
+ * Note the _probe() call sets everything up and ties the char and tty
+ * to successfully detecting the PTI device on the pci bus.
+ */
+
+/**
+ * pti_pci_probe()- Used to detect pti on the pci bus and set
+ *		    things up in the driver.
+ *
+ * @pdev- pci_dev struct values for pti.
+ * @ent-  pci_device_id struct for pti driver.
+ *
+ * Returns:
+ *	0 for success
+ *	otherwise, error
+ */
+static int pti_pci_probe(struct pci_dev *pdev,
+		const struct pci_device_id *ent)
+{
+	unsigned int a;
+	int retval = -EINVAL;
+	int pci_bar = 1;
+
+	dev_dbg(&pdev->dev, "%s %s(%d): PTI PCI ID %04x:%04x\n", __FILE__,
+			__func__, __LINE__, pdev->vendor, pdev->device);
+
+	retval = misc_register(&pti_char_driver);
+	if (retval) {
+		pr_err("%s(%d): CHAR registration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+		goto err;
+	}
+
+	retval = pci_enable_device(pdev);
+	if (retval != 0) {
+		dev_err(&pdev->dev,
+			"%s: pci_enable_device() returned error %d\n",
+			__func__, retval);
+		goto err_unreg_misc;
+	}
+
+	drv_data = kzalloc(sizeof(*drv_data), GFP_KERNEL);
+	if (drv_data == NULL) {
+		retval = -ENOMEM;
+		dev_err(&pdev->dev,
+			"%s(%d): kmalloc() returned NULL memory.\n",
+			__func__, __LINE__);
+		goto err_disable_pci;
+	}
+	drv_data->pti_addr = pci_resource_start(pdev, pci_bar);
+
+	retval = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev));
+	if (retval != 0) {
+		dev_err(&pdev->dev,
+			"%s(%d): pci_request_region() returned error %d\n",
+			__func__, __LINE__, retval);
+		goto err_free_dd;
+	}
+	drv_data->aperture_base = drv_data->pti_addr+APERTURE_14;
+	drv_data->pti_ioaddr =
+		ioremap_nocache((u32)drv_data->aperture_base,
+		APERTURE_LEN);
+	if (!drv_data->pti_ioaddr) {
+		retval = -ENOMEM;
+		goto err_rel_reg;
+	}
+
+	pci_set_drvdata(pdev, drv_data);
+
+	for (a = 0; a < PTITTY_MINOR_NUM; a++) {
+		struct tty_port *port = &drv_data->port[a];
+		tty_port_init(port);
+		port->ops = &tty_port_ops;
+
+		tty_port_register_device(port, pti_tty_driver, a, &pdev->dev);
+	}
+
+	register_console(&pti_console);
+
+	return 0;
+err_rel_reg:
+	pci_release_region(pdev, pci_bar);
+err_free_dd:
+	kfree(drv_data);
+err_disable_pci:
+	pci_disable_device(pdev);
+err_unreg_misc:
+	misc_deregister(&pti_char_driver);
+err:
+	return retval;
+}
+
+/**
+ * pti_pci_remove()- Driver exit method to remove PTI from
+ *		   PCI bus.
+ * @pdev: variable containing pci info of PTI.
+ */
+static void pti_pci_remove(struct pci_dev *pdev)
+{
+	struct pti_dev *drv_data = pci_get_drvdata(pdev);
+	unsigned int a;
+
+	unregister_console(&pti_console);
+
+	for (a = 0; a < PTITTY_MINOR_NUM; a++) {
+		tty_unregister_device(pti_tty_driver, a);
+		tty_port_destroy(&drv_data->port[a]);
+	}
+
+	iounmap(drv_data->pti_ioaddr);
+	kfree(drv_data);
+	pci_release_region(pdev, 1);
+	pci_disable_device(pdev);
+
+	misc_deregister(&pti_char_driver);
+}
+
+static struct pci_driver pti_pci_driver = {
+	.name		= PCINAME,
+	.id_table	= pci_ids,
+	.probe		= pti_pci_probe,
+	.remove		= pti_pci_remove,
+};
+
+/**
+ *
+ * pti_init()- Overall entry/init call to the pti driver.
+ *             It starts the registration process with the kernel.
+ *
+ * Returns:
+ *	int __init, 0 for success
+ *	otherwise value is an error
+ *
+ */
+static int __init pti_init(void)
+{
+	int retval = -EINVAL;
+
+	/* First register module as tty device */
+
+	pti_tty_driver = alloc_tty_driver(PTITTY_MINOR_NUM);
+	if (pti_tty_driver == NULL) {
+		pr_err("%s(%d): Memory allocation failed for ptiTTY driver\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	pti_tty_driver->driver_name		= DRIVERNAME;
+	pti_tty_driver->name			= TTYNAME;
+	pti_tty_driver->major			= 0;
+	pti_tty_driver->minor_start		= PTITTY_MINOR_START;
+	pti_tty_driver->type			= TTY_DRIVER_TYPE_SYSTEM;
+	pti_tty_driver->subtype			= SYSTEM_TYPE_SYSCONS;
+	pti_tty_driver->flags			= TTY_DRIVER_REAL_RAW |
+						  TTY_DRIVER_DYNAMIC_DEV;
+	pti_tty_driver->init_termios		= tty_std_termios;
+
+	tty_set_operations(pti_tty_driver, &pti_tty_driver_ops);
+
+	retval = tty_register_driver(pti_tty_driver);
+	if (retval) {
+		pr_err("%s(%d): TTY registration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+
+		goto put_tty;
+	}
+
+	retval = pci_register_driver(&pti_pci_driver);
+	if (retval) {
+		pr_err("%s(%d): PCI registration failed of pti driver\n",
+			__func__, __LINE__);
+		pr_err("%s(%d): Error value returned: %d\n",
+			__func__, __LINE__, retval);
+		goto unreg_tty;
+	}
+
+	return 0;
+unreg_tty:
+	tty_unregister_driver(pti_tty_driver);
+put_tty:
+	put_tty_driver(pti_tty_driver);
+	pti_tty_driver = NULL;
+	return retval;
+}
+
+/**
+ * pti_exit()- Unregisters this module as a tty and pci driver.
+ */
+static void __exit pti_exit(void)
+{
+	tty_unregister_driver(pti_tty_driver);
+	pci_unregister_driver(&pti_pci_driver);
+	put_tty_driver(pti_tty_driver);
+}
+
+module_init(pti_init);
+module_exit(pti_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ken Mills, Jay Freyensee");
+MODULE_DESCRIPTION("PTI Driver");
+
diff --git a/drivers/misc/qcom-coincell.c b/drivers/misc/qcom-coincell.c
new file mode 100644
index 000000000..829a61dbd
--- /dev/null
+++ b/drivers/misc/qcom-coincell.c
@@ -0,0 +1,153 @@
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015, Sony Mobile Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+struct qcom_coincell {
+	struct device	*dev;
+	struct regmap	*regmap;
+	u32		base_addr;
+};
+
+#define QCOM_COINCELL_REG_RSET		0x44
+#define QCOM_COINCELL_REG_VSET		0x45
+#define QCOM_COINCELL_REG_ENABLE	0x46
+
+#define QCOM_COINCELL_ENABLE		BIT(7)
+
+static const int qcom_rset_map[] = { 2100, 1700, 1200, 800 };
+static const int qcom_vset_map[] = { 2500, 3200, 3100, 3000 };
+/* NOTE: for pm8921 and others, voltage of 2500 is 16 (10000b), not 0 */
+
+/* if enable==0, rset and vset are ignored */
+static int qcom_coincell_chgr_config(struct qcom_coincell *chgr, int rset,
+				     int vset, bool enable)
+{
+	int i, j, rc;
+
+	/* if disabling, just do that and skip other operations */
+	if (!enable)
+		return regmap_write(chgr->regmap,
+			  chgr->base_addr + QCOM_COINCELL_REG_ENABLE, 0);
+
+	/* find index for current-limiting resistor */
+	for (i = 0; i < ARRAY_SIZE(qcom_rset_map); i++)
+		if (rset == qcom_rset_map[i])
+			break;
+
+	if (i >= ARRAY_SIZE(qcom_rset_map)) {
+		dev_err(chgr->dev, "invalid rset-ohms value %d\n", rset);
+		return -EINVAL;
+	}
+
+	/* find index for charge voltage */
+	for (j = 0; j < ARRAY_SIZE(qcom_vset_map); j++)
+		if (vset == qcom_vset_map[j])
+			break;
+
+	if (j >= ARRAY_SIZE(qcom_vset_map)) {
+		dev_err(chgr->dev, "invalid vset-millivolts value %d\n", vset);
+		return -EINVAL;
+	}
+
+	rc = regmap_write(chgr->regmap,
+			  chgr->base_addr + QCOM_COINCELL_REG_RSET, i);
+	if (rc) {
+		/*
+		 * This is mainly to flag a bad base_addr (reg) from dts.
+		 * Other failures writing to the registers should be
+		 * extremely rare, or indicative of problems that
+		 * should be reported elsewhere (eg. spmi failure).
+		 */
+		dev_err(chgr->dev, "could not write to RSET register\n");
+		return rc;
+	}
+
+	rc = regmap_write(chgr->regmap,
+		chgr->base_addr + QCOM_COINCELL_REG_VSET, j);
+	if (rc)
+		return rc;
+
+	/* set 'enable' register */
+	return regmap_write(chgr->regmap,
+			    chgr->base_addr + QCOM_COINCELL_REG_ENABLE,
+			    QCOM_COINCELL_ENABLE);
+}
+
+static int qcom_coincell_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct qcom_coincell chgr;
+	u32 rset = 0;
+	u32 vset = 0;
+	bool enable;
+	int rc;
+
+	chgr.dev = &pdev->dev;
+
+	chgr.regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!chgr.regmap) {
+		dev_err(chgr.dev, "Unable to get regmap\n");
+		return -EINVAL;
+	}
+
+	rc = of_property_read_u32(node, "reg", &chgr.base_addr);
+	if (rc)
+		return rc;
+
+	enable = !of_property_read_bool(node, "qcom,charger-disable");
+
+	if (enable) {
+		rc = of_property_read_u32(node, "qcom,rset-ohms", &rset);
+		if (rc) {
+			dev_err(chgr.dev,
+				"can't find 'qcom,rset-ohms' in DT block");
+			return rc;
+		}
+
+		rc = of_property_read_u32(node, "qcom,vset-millivolts", &vset);
+		if (rc) {
+			dev_err(chgr.dev,
+			    "can't find 'qcom,vset-millivolts' in DT block");
+			return rc;
+		}
+	}
+
+	return qcom_coincell_chgr_config(&chgr, rset, vset, enable);
+}
+
+static const struct of_device_id qcom_coincell_match_table[] = {
+	{ .compatible = "qcom,pm8941-coincell", },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, qcom_coincell_match_table);
+
+static struct platform_driver qcom_coincell_driver = {
+	.driver	= {
+		.name		= "qcom-spmi-coincell",
+		.of_match_table	= qcom_coincell_match_table,
+	},
+	.probe		= qcom_coincell_probe,
+};
+
+module_platform_driver(qcom_coincell_driver);
+
+MODULE_DESCRIPTION("Qualcomm PMIC coincell charger driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
new file mode 100644
index 000000000..0003a1d56
--- /dev/null
+++ b/drivers/misc/sgi-gru/Makefile
@@ -0,0 +1,5 @@
+ccflags-$(CONFIG_SGI_GRU_DEBUG)	:= -DDEBUG
+
+obj-$(CONFIG_SGI_GRU) := gru.o
+gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o
+
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
new file mode 100644
index 000000000..3ad76cd18
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru.h
@@ -0,0 +1,78 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRU_H__
+#define __GRU_H__
+
+/*
+ * GRU architectural definitions
+ */
+#define GRU_CACHE_LINE_BYTES		64
+#define GRU_HANDLE_STRIDE		256
+#define GRU_CB_BASE			0
+#define GRU_DS_BASE			0x20000
+
+/*
+ * Size used to map GRU GSeg
+ */
+#if defined(CONFIG_IA64)
+#define GRU_GSEG_PAGESIZE	(256 * 1024UL)
+#elif defined(CONFIG_X86_64)
+#define GRU_GSEG_PAGESIZE	(256 * 1024UL)		/* ZZZ 2MB ??? */
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Structure for obtaining GRU resource information
+ */
+struct gru_chiplet_info {
+	int	node;
+	int	chiplet;
+	int	blade;
+	int	total_dsr_bytes;
+	int	total_cbr;
+	int	total_user_dsr_bytes;
+	int	total_user_cbr;
+	int	free_user_dsr_bytes;
+	int	free_user_cbr;
+};
+
+/*
+ * Statictics kept for each context.
+ */
+struct gru_gseg_statistics {
+	unsigned long	fmm_tlbmiss;
+	unsigned long	upm_tlbmiss;
+	unsigned long	tlbdropin;
+	unsigned long	context_stolen;
+	unsigned long	reserved[10];
+};
+
+/* Flags for GRU options on the gru_create_context() call */
+/* Select one of the follow 4 options to specify how TLB misses are handled */
+#define GRU_OPT_MISS_DEFAULT	0x0000	/* Use default mode */
+#define GRU_OPT_MISS_USER_POLL	0x0001	/* User will poll CB for faults */
+#define GRU_OPT_MISS_FMM_INTR	0x0002	/* Send interrupt to cpu to
+					   handle fault */
+#define GRU_OPT_MISS_FMM_POLL	0x0003	/* Use system polling thread */
+#define GRU_OPT_MISS_MASK	0x0003	/* Mask for TLB MISS option */
+
+
+
+#endif		/* __GRU_H__ */
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
new file mode 100644
index 000000000..04d5170ac
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -0,0 +1,736 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRU_INSTRUCTIONS_H__
+#define __GRU_INSTRUCTIONS_H__
+
+extern int gru_check_status_proc(void *cb);
+extern int gru_wait_proc(void *cb);
+extern void gru_wait_abort_proc(void *cb);
+
+
+
+/*
+ * Architecture dependent functions
+ */
+
+#if defined(CONFIG_IA64)
+#include <linux/compiler.h>
+#include <asm/intrinsics.h>
+#define __flush_cache(p)		ia64_fc((unsigned long)p)
+/* Use volatile on IA64 to ensure ordering via st4.rel */
+#define gru_ordered_store_ulong(p, v)					\
+		do {							\
+			barrier();					\
+			*((volatile unsigned long *)(p)) = v; /* force st.rel */	\
+		} while (0)
+#elif defined(CONFIG_X86_64)
+#include <asm/cacheflush.h>
+#define __flush_cache(p)		clflush(p)
+#define gru_ordered_store_ulong(p, v)					\
+		do {							\
+			barrier();					\
+			*(unsigned long *)p = v;			\
+		} while (0)
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Control block status and exception codes
+ */
+#define CBS_IDLE			0
+#define CBS_EXCEPTION			1
+#define CBS_ACTIVE			2
+#define CBS_CALL_OS			3
+
+/* CB substatus bitmasks */
+#define CBSS_MSG_QUEUE_MASK		7
+#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK	8
+
+/* CB substatus message queue values (low 3 bits of substatus) */
+#define CBSS_NO_ERROR			0
+#define CBSS_LB_OVERFLOWED		1
+#define CBSS_QLIMIT_REACHED		2
+#define CBSS_PAGE_OVERFLOW		3
+#define CBSS_AMO_NACKED			4
+#define CBSS_PUT_NACKED			5
+
+/*
+ * Structure used to fetch exception detail for CBs that terminate with
+ * CBS_EXCEPTION
+ */
+struct control_block_extended_exc_detail {
+	unsigned long	cb;
+	int		opc;
+	int		ecause;
+	int		exopc;
+	long		exceptdet0;
+	int		exceptdet1;
+	int		cbrstate;
+	int		cbrexecstatus;
+};
+
+/*
+ * Instruction formats
+ */
+
+/*
+ * Generic instruction format.
+ * This definition has precise bit field definitions.
+ */
+struct gru_instruction_bits {
+    /* DW 0  - low */
+    unsigned int		icmd:      1;
+    unsigned char		ima:	   3;	/* CB_DelRep, unmapped mode */
+    unsigned char		reserved0: 4;
+    unsigned int		xtype:     3;
+    unsigned int		iaa0:      2;
+    unsigned int		iaa1:      2;
+    unsigned char		reserved1: 1;
+    unsigned char		opc:       8;	/* opcode */
+    unsigned char		exopc:     8;	/* extended opcode */
+    /* DW 0  - high */
+    unsigned int		idef2:    22;	/* TRi0 */
+    unsigned char		reserved2: 2;
+    unsigned char		istatus:   2;
+    unsigned char		isubstatus:4;
+    unsigned char		reserved3: 1;
+    unsigned char		tlb_fault_color: 1;
+    /* DW 1 */
+    unsigned long		idef4;		/* 42 bits: TRi1, BufSize */
+    /* DW 2-6 */
+    unsigned long		idef1;		/* BAddr0 */
+    unsigned long		idef5;		/* Nelem */
+    unsigned long		idef6;		/* Stride, Operand1 */
+    unsigned long		idef3;		/* BAddr1, Value, Operand2 */
+    unsigned long		reserved4;
+    /* DW 7 */
+    unsigned long		avalue;		 /* AValue */
+};
+
+/*
+ * Generic instruction with friendlier names. This format is used
+ * for inline instructions.
+ */
+struct gru_instruction {
+    /* DW 0 */
+    union {
+    	unsigned long		op64;    /* icmd,xtype,iaa0,ima,opc,tri0 */
+	struct {
+		unsigned int	op32;
+		unsigned int	tri0;
+	};
+    };
+    unsigned long		tri1_bufsize;		/* DW 1 */
+    unsigned long		baddr0;			/* DW 2 */
+    unsigned long		nelem;			/* DW 3 */
+    unsigned long		op1_stride;		/* DW 4 */
+    unsigned long		op2_value_baddr1;	/* DW 5 */
+    unsigned long		reserved0;		/* DW 6 */
+    unsigned long		avalue;			/* DW 7 */
+};
+
+/* Some shifts and masks for the low 64 bits of a GRU command */
+#define GRU_CB_ICMD_SHFT	0
+#define GRU_CB_ICMD_MASK	0x1
+#define GRU_CB_XTYPE_SHFT	8
+#define GRU_CB_XTYPE_MASK	0x7
+#define GRU_CB_IAA0_SHFT	11
+#define GRU_CB_IAA0_MASK	0x3
+#define GRU_CB_IAA1_SHFT	13
+#define GRU_CB_IAA1_MASK	0x3
+#define GRU_CB_IMA_SHFT		1
+#define GRU_CB_IMA_MASK		0x3
+#define GRU_CB_OPC_SHFT		16
+#define GRU_CB_OPC_MASK		0xff
+#define GRU_CB_EXOPC_SHFT	24
+#define GRU_CB_EXOPC_MASK	0xff
+#define GRU_IDEF2_SHFT		32
+#define GRU_IDEF2_MASK		0x3ffff
+#define GRU_ISTATUS_SHFT	56
+#define GRU_ISTATUS_MASK	0x3
+
+/* GRU instruction opcodes (opc field) */
+#define OP_NOP		0x00
+#define OP_BCOPY	0x01
+#define OP_VLOAD	0x02
+#define OP_IVLOAD	0x03
+#define OP_VSTORE	0x04
+#define OP_IVSTORE	0x05
+#define OP_VSET		0x06
+#define OP_IVSET	0x07
+#define OP_MESQ		0x08
+#define OP_GAMXR	0x09
+#define OP_GAMIR	0x0a
+#define OP_GAMIRR	0x0b
+#define OP_GAMER	0x0c
+#define OP_GAMERR	0x0d
+#define OP_BSTORE	0x0e
+#define OP_VFLUSH	0x0f
+
+
+/* Extended opcodes values (exopc field) */
+
+/* GAMIR - AMOs with implicit operands */
+#define EOP_IR_FETCH	0x01 /* Plain fetch of memory */
+#define EOP_IR_CLR	0x02 /* Fetch and clear */
+#define EOP_IR_INC	0x05 /* Fetch and increment */
+#define EOP_IR_DEC	0x07 /* Fetch and decrement */
+#define EOP_IR_QCHK1	0x0d /* Queue check, 64 byte msg */
+#define EOP_IR_QCHK2	0x0e /* Queue check, 128 byte msg */
+
+/* GAMIRR - Registered AMOs with implicit operands */
+#define EOP_IRR_FETCH	0x01 /* Registered fetch of memory */
+#define EOP_IRR_CLR	0x02 /* Registered fetch and clear */
+#define EOP_IRR_INC	0x05 /* Registered fetch and increment */
+#define EOP_IRR_DEC	0x07 /* Registered fetch and decrement */
+#define EOP_IRR_DECZ	0x0f /* Registered fetch and decrement, update on zero*/
+
+/* GAMER - AMOs with explicit operands */
+#define EOP_ER_SWAP	0x00 /* Exchange argument and memory */
+#define EOP_ER_OR	0x01 /* Logical OR with memory */
+#define EOP_ER_AND	0x02 /* Logical AND with memory */
+#define EOP_ER_XOR	0x03 /* Logical XOR with memory */
+#define EOP_ER_ADD	0x04 /* Add value to memory */
+#define EOP_ER_CSWAP	0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ER_CADD	0x0c /* Queue check, operand1*64 byte msg */
+
+/* GAMERR - Registered AMOs with explicit operands */
+#define EOP_ERR_SWAP	0x00 /* Exchange argument and memory */
+#define EOP_ERR_OR	0x01 /* Logical OR with memory */
+#define EOP_ERR_AND	0x02 /* Logical AND with memory */
+#define EOP_ERR_XOR	0x03 /* Logical XOR with memory */
+#define EOP_ERR_ADD	0x04 /* Add value to memory */
+#define EOP_ERR_CSWAP	0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ERR_EPOLL	0x09 /* Poll for equality */
+#define EOP_ERR_NPOLL	0x0a /* Poll for inequality */
+
+/* GAMXR - SGI Arithmetic unit */
+#define EOP_XR_CSWAP	0x0b /* Masked compare exchange */
+
+
+/* Transfer types (xtype field) */
+#define XTYPE_B		0x0	/* byte */
+#define XTYPE_S		0x1	/* short (2-byte) */
+#define XTYPE_W		0x2	/* word (4-byte) */
+#define XTYPE_DW	0x3	/* doubleword (8-byte) */
+#define XTYPE_CL	0x6	/* cacheline (64-byte) */
+
+
+/* Instruction access attributes (iaa0, iaa1 fields) */
+#define IAA_RAM		0x0	/* normal cached RAM access */
+#define IAA_NCRAM	0x2	/* noncoherent RAM access */
+#define IAA_MMIO	0x1	/* noncoherent memory-mapped I/O space */
+#define IAA_REGISTER	0x3	/* memory-mapped registers, etc. */
+
+
+/* Instruction mode attributes (ima field) */
+#define IMA_MAPPED	0x0	/* Virtual mode  */
+#define IMA_CB_DELAY	0x1	/* hold read responses until status changes */
+#define IMA_UNMAPPED	0x2	/* bypass the TLBs (OS only) */
+#define IMA_INTERRUPT	0x4	/* Interrupt when instruction completes */
+
+/* CBE ecause bits */
+#define CBE_CAUSE_RI				(1 << 0)
+#define CBE_CAUSE_INVALID_INSTRUCTION		(1 << 1)
+#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN	(1 << 2)
+#define CBE_CAUSE_PE_CHECK_DATA_ERROR		(1 << 3)
+#define CBE_CAUSE_IAA_GAA_MISMATCH		(1 << 4)
+#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION	(1 << 5)
+#define CBE_CAUSE_OS_FATAL_TLB_FAULT		(1 << 6)
+#define CBE_CAUSE_EXECUTION_HW_ERROR		(1 << 7)
+#define CBE_CAUSE_TLBHW_ERROR			(1 << 8)
+#define CBE_CAUSE_RA_REQUEST_TIMEOUT		(1 << 9)
+#define CBE_CAUSE_HA_REQUEST_TIMEOUT		(1 << 10)
+#define CBE_CAUSE_RA_RESPONSE_FATAL		(1 << 11)
+#define CBE_CAUSE_RA_RESPONSE_NON_FATAL		(1 << 12)
+#define CBE_CAUSE_HA_RESPONSE_FATAL		(1 << 13)
+#define CBE_CAUSE_HA_RESPONSE_NON_FATAL		(1 << 14)
+#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR	(1 << 15)
+#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR	(1 << 16)
+#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR	(1 << 17)
+#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR	(1 << 18)
+#define CBE_CAUSE_FORCED_ERROR			(1 << 19)
+
+/* CBE cbrexecstatus bits */
+#define CBR_EXS_ABORT_OCC_BIT			0
+#define CBR_EXS_INT_OCC_BIT			1
+#define CBR_EXS_PENDING_BIT			2
+#define CBR_EXS_QUEUED_BIT			3
+#define CBR_EXS_TLB_INVAL_BIT			4
+#define CBR_EXS_EXCEPTION_BIT			5
+#define CBR_EXS_CB_INT_PENDING_BIT		6
+
+#define CBR_EXS_ABORT_OCC			(1 << CBR_EXS_ABORT_OCC_BIT)
+#define CBR_EXS_INT_OCC				(1 << CBR_EXS_INT_OCC_BIT)
+#define CBR_EXS_PENDING				(1 << CBR_EXS_PENDING_BIT)
+#define CBR_EXS_QUEUED				(1 << CBR_EXS_QUEUED_BIT)
+#define CBR_EXS_TLB_INVAL			(1 << CBR_EXS_TLB_INVAL_BIT)
+#define CBR_EXS_EXCEPTION			(1 << CBR_EXS_EXCEPTION_BIT)
+#define CBR_EXS_CB_INT_PENDING			(1 << CBR_EXS_CB_INT_PENDING_BIT)
+
+/*
+ * Exceptions are retried for the following cases. If any OTHER bits are set
+ * in ecause, the exception is not retryable.
+ */
+#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR |		\
+			      CBE_CAUSE_TLBHW_ERROR |			\
+			      CBE_CAUSE_RA_REQUEST_TIMEOUT |		\
+			      CBE_CAUSE_RA_RESPONSE_NON_FATAL |		\
+			      CBE_CAUSE_HA_RESPONSE_NON_FATAL |		\
+			      CBE_CAUSE_RA_RESPONSE_DATA_ERROR |	\
+			      CBE_CAUSE_HA_RESPONSE_DATA_ERROR		\
+			      )
+
+/* Message queue head structure */
+union gru_mesqhead {
+	unsigned long	val;
+	struct {
+		unsigned int	head;
+		unsigned int	limit;
+	};
+};
+
+
+/* Generate the low word of a GRU instruction */
+static inline unsigned long
+__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
+       unsigned char iaa0, unsigned char iaa1,
+       unsigned long idef2, unsigned char ima)
+{
+    return (1 << GRU_CB_ICMD_SHFT) |
+	   ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) |
+	   (idef2<< GRU_IDEF2_SHFT) |
+	   (iaa0 << GRU_CB_IAA0_SHFT) |
+	   (iaa1 << GRU_CB_IAA1_SHFT) |
+	   (ima << GRU_CB_IMA_SHFT) |
+	   (xtype << GRU_CB_XTYPE_SHFT) |
+	   (opcode << GRU_CB_OPC_SHFT) |
+	   (exopc << GRU_CB_EXOPC_SHFT);
+}
+
+/*
+ * Architecture specific intrinsics
+ */
+static inline void gru_flush_cache(void *p)
+{
+	__flush_cache(p);
+}
+
+/*
+ * Store the lower 64 bits of the command including the "start" bit. Then
+ * start the instruction executing.
+ */
+static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64)
+{
+	gru_ordered_store_ulong(ins, op64);
+	mb();
+	gru_flush_cache(ins);
+}
+
+
+/* Convert "hints" to IMA */
+#define CB_IMA(h)		((h) | IMA_UNMAPPED)
+
+/* Convert data segment cache line index into TRI0 / TRI1 value */
+#define GRU_DINDEX(i)		((i) * GRU_CACHE_LINE_BYTES)
+
+/* Inline functions for GRU instructions.
+ *     Note:
+ *     	- nelem and stride are in elements
+ *     	- tri0/tri1 is in bytes for the beginning of the data segment.
+ */
+static inline void gru_vload_phys(void *cb, unsigned long gpa,
+		unsigned int tri0, int iaa, unsigned long hints)
+{
+	struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+	ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
+	ins->nelem = 1;
+	ins->op1_stride = 1;
+	gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0,
+					(unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vstore_phys(void *cb, unsigned long gpa,
+		unsigned int tri0, int iaa, unsigned long hints)
+{
+	struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+	ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
+	ins->nelem = 1;
+	ins->op1_stride = 1;
+	gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0,
+					(unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vload(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
+					(unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vstore(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_ivload(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned int tri1, unsigned char xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_ivstore(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned int tri1,
+		unsigned char xtype, unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vset(void *cb, unsigned long mem_addr,
+		unsigned long value, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op2_value_baddr1 = value;
+	ins->nelem = nelem;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0,
+					 0, CB_IMA(hints)));
+}
+
+static inline void gru_ivset(void *cb, unsigned long mem_addr,
+		unsigned int tri1, unsigned long value, unsigned char xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op2_value_baddr1 = value;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_vflush(void *cb, unsigned long mem_addr,
+		unsigned long nelem, unsigned char xtype, unsigned long stride,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op1_stride = stride;
+	ins->nelem = nelem;
+	gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_nop(void *cb, int hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints)));
+}
+
+
+static inline void gru_bcopy(void *cb, const unsigned long src,
+		unsigned long dest,
+		unsigned int tri0, unsigned int xtype, unsigned long nelem,
+		unsigned int bufsize, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op2_value_baddr1 = (long)dest;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = bufsize;
+	gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM,
+					IAA_RAM, tri0, CB_IMA(hints)));
+}
+
+static inline void gru_bstore(void *cb, const unsigned long src,
+		unsigned long dest, unsigned int tri0, unsigned int xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op2_value_baddr1 = (long)dest;
+	ins->nelem = nelem;
+	gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_gamir(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamer(void *cb, int exopc, unsigned long src,
+		unsigned int xtype,
+		unsigned long operand1, unsigned long operand2,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op1_stride = operand1;
+	ins->op2_value_baddr1 = operand2;
+	gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long operand1,
+		unsigned long operand2, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op1_stride = operand1;
+	ins->op2_value_baddr1 = operand2;
+	gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamxr(void *cb, unsigned long src,
+		unsigned int tri0, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->nelem = 4;
+	gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
+				 IAA_RAM, 0, 0, CB_IMA(hints)));
+}
+
+static inline void gru_mesq(void *cb, unsigned long queue,
+		unsigned long tri0, unsigned long nelem,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)queue;
+	ins->nelem = nelem;
+	gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline unsigned long gru_get_amo_value(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue;
+}
+
+static inline int gru_get_amo_value_head(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue & 0xffffffff;
+}
+
+static inline int gru_get_amo_value_limit(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue >> 32;
+}
+
+static inline union gru_mesqhead  gru_mesq_head(int head, int limit)
+{
+	union gru_mesqhead mqh;
+
+	mqh.head = head;
+	mqh.limit = limit;
+	return mqh;
+}
+
+/*
+ * Get struct control_block_extended_exc_detail for CB.
+ */
+extern int gru_get_cb_exception_detail(void *cb,
+		       struct control_block_extended_exc_detail *excdet);
+
+#define GRU_EXC_STR_SIZE		256
+
+
+/*
+ * Control block definition for checking status
+ */
+struct gru_control_block_status {
+	unsigned int	icmd		:1;
+	unsigned int	ima		:3;
+	unsigned int	reserved0	:4;
+	unsigned int	unused1		:24;
+	unsigned int	unused2		:24;
+	unsigned int	istatus		:2;
+	unsigned int	isubstatus	:4;
+	unsigned int	unused3		:2;
+};
+
+/* Get CB status */
+static inline int gru_get_cb_status(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->istatus;
+}
+
+/* Get CB message queue substatus */
+static inline int gru_get_cb_message_queue_substatus(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->isubstatus & CBSS_MSG_QUEUE_MASK;
+}
+
+/* Get CB substatus */
+static inline int gru_get_cb_substatus(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->isubstatus;
+}
+
+/*
+ * User interface to check an instruction status. UPM and exceptions
+ * are handled automatically. However, this function does NOT wait
+ * for an active instruction to complete.
+ *
+ */
+static inline int gru_check_status(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+	int ret;
+
+	ret = cbs->istatus;
+	if (ret != CBS_ACTIVE)
+		ret = gru_check_status_proc(cb);
+	return ret;
+}
+
+/*
+ * User interface (via inline function) to wait for an instruction
+ * to complete. Completion status (IDLE or EXCEPTION is returned
+ * to the user. Exception due to hardware errors are automatically
+ * retried before returning an exception.
+ *
+ */
+static inline int gru_wait(void *cb)
+{
+	return gru_wait_proc(cb);
+}
+
+/*
+ * Wait for CB to complete. Aborts program if error. (Note: error does NOT
+ * mean TLB mis - only fatal errors such as memory parity error or user
+ * bugs will cause termination.
+ */
+static inline void gru_wait_abort(void *cb)
+{
+	gru_wait_abort_proc(cb);
+}
+
+/*
+ * Get a pointer to the start of a gseg
+ * 	p	- Any valid pointer within the gseg
+ */
+static inline void *gru_get_gseg_pointer (void *p)
+{
+	return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1));
+}
+
+/*
+ * Get a pointer to a control block
+ * 	gseg	- GSeg address returned from gru_get_thread_gru_segment()
+ * 	index	- index of desired CB
+ */
+static inline void *gru_get_cb_pointer(void *gseg,
+						      int index)
+{
+	return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE;
+}
+
+/*
+ * Get a pointer to a cacheline in the data segment portion of a GSeg
+ * 	gseg	- GSeg address returned from gru_get_thread_gru_segment()
+ * 	index	- index of desired cache line
+ */
+static inline void *gru_get_data_pointer(void *gseg, int index)
+{
+	return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES;
+}
+
+/*
+ * Convert a vaddr into the tri index within the GSEG
+ * 	vaddr		- virtual address of within gseg
+ */
+static inline int gru_get_tri(void *vaddr)
+{
+	return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE;
+}
+#endif		/* __GRU_INSTRUCTIONS_H__ */
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
new file mode 100644
index 000000000..93be82fc3
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -0,0 +1,907 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              FAULT HANDLER FOR GRU DETECTED TLB MISSES
+ *
+ * This file contains code that handles TLB misses within the GRU.
+ * These misses are reported either via interrupts or user polling of
+ * the user CB.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/security.h>
+#include <linux/prefetch.h>
+#include <asm/pgtable.h>
+#include "gru.h"
+#include "grutables.h"
+#include "grulib.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/* Return codes for vtop functions */
+#define VTOP_SUCCESS               0
+#define VTOP_INVALID               -1
+#define VTOP_RETRY                 -2
+
+
+/*
+ * Test if a physical address is a valid GRU GSEG address
+ */
+static inline int is_gru_paddr(unsigned long paddr)
+{
+	return paddr >= gru_start_paddr && paddr < gru_end_paddr;
+}
+
+/*
+ * Find the vma of a GRU segment. Caller must hold mmap_sem.
+ */
+struct vm_area_struct *gru_find_vma(unsigned long vaddr)
+{
+	struct vm_area_struct *vma;
+
+	vma = find_vma(current->mm, vaddr);
+	if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops)
+		return vma;
+	return NULL;
+}
+
+/*
+ * Find and lock the gts that contains the specified user vaddr.
+ *
+ * Returns:
+ * 	- *gts with the mmap_sem locked for read and the GTS locked.
+ *	- NULL if vaddr invalid OR is not a valid GSEG vaddr.
+ */
+
+static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct gru_thread_state *gts = NULL;
+
+	down_read(&mm->mmap_sem);
+	vma = gru_find_vma(vaddr);
+	if (vma)
+		gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+	if (gts)
+		mutex_lock(&gts->ts_ctxlock);
+	else
+		up_read(&mm->mmap_sem);
+	return gts;
+}
+
+static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct gru_thread_state *gts = ERR_PTR(-EINVAL);
+
+	down_write(&mm->mmap_sem);
+	vma = gru_find_vma(vaddr);
+	if (!vma)
+		goto err;
+
+	gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
+	if (IS_ERR(gts))
+		goto err;
+	mutex_lock(&gts->ts_ctxlock);
+	downgrade_write(&mm->mmap_sem);
+	return gts;
+
+err:
+	up_write(&mm->mmap_sem);
+	return gts;
+}
+
+/*
+ * Unlock a GTS that was previously locked with gru_find_lock_gts().
+ */
+static void gru_unlock_gts(struct gru_thread_state *gts)
+{
+	mutex_unlock(&gts->ts_ctxlock);
+	up_read(&current->mm->mmap_sem);
+}
+
+/*
+ * Set a CB.istatus to active using a user virtual address. This must be done
+ * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
+ * If the line is evicted, the status may be lost. The in-cache update
+ * is necessary to prevent the user from seeing a stale cb.istatus that will
+ * change as soon as the TFH restart is complete. Races may cause an
+ * occasional failure to clear the cb.istatus, but that is ok.
+ */
+static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk)
+{
+	if (cbk) {
+		cbk->istatus = CBS_ACTIVE;
+	}
+}
+
+/*
+ * Read & clear a TFM
+ *
+ * The GRU has an array of fault maps. A map is private to a cpu
+ * Only one cpu will be accessing a cpu's fault map.
+ *
+ * This function scans the cpu-private fault map & clears all bits that
+ * are set. The function returns a bitmap that indicates the bits that
+ * were cleared. Note that sense the maps may be updated asynchronously by
+ * the GRU, atomic operations must be used to clear bits.
+ */
+static void get_clear_fault_map(struct gru_state *gru,
+				struct gru_tlb_fault_map *imap,
+				struct gru_tlb_fault_map *dmap)
+{
+	unsigned long i, k;
+	struct gru_tlb_fault_map *tfm;
+
+	tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
+	prefetchw(tfm);		/* Helps on hardware, required for emulator */
+	for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
+		k = tfm->fault_bits[i];
+		if (k)
+			k = xchg(&tfm->fault_bits[i], 0UL);
+		imap->fault_bits[i] = k;
+		k = tfm->done_bits[i];
+		if (k)
+			k = xchg(&tfm->done_bits[i], 0UL);
+		dmap->fault_bits[i] = k;
+	}
+
+	/*
+	 * Not functionally required but helps performance. (Required
+	 * on emulator)
+	 */
+	gru_flush_cache(tfm);
+}
+
+/*
+ * Atomic (interrupt context) & non-atomic (user context) functions to
+ * convert a vaddr into a physical address. The size of the page
+ * is returned in pageshift.
+ * 	returns:
+ * 		  0 - successful
+ * 		< 0 - error code
+ * 		  1 - (atomic only) try again in non-atomic context
+ */
+static int non_atomic_pte_lookup(struct vm_area_struct *vma,
+				 unsigned long vaddr, int write,
+				 unsigned long *paddr, int *pageshift)
+{
+	struct page *page;
+
+#ifdef CONFIG_HUGETLB_PAGE
+	*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+#else
+	*pageshift = PAGE_SHIFT;
+#endif
+	if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
+		return -EFAULT;
+	*paddr = page_to_phys(page);
+	put_page(page);
+	return 0;
+}
+
+/*
+ * atomic_pte_lookup
+ *
+ * Convert a user virtual address to a physical address
+ * Only supports Intel large pages (2MB only) on x86_64.
+ *	ZZZ - hugepage support is incomplete
+ *
+ * NOTE: mmap_sem is already held on entry to this function. This
+ * guarantees existence of the page tables.
+ */
+static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
+	int write, unsigned long *paddr, int *pageshift)
+{
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t pte;
+
+	pgdp = pgd_offset(vma->vm_mm, vaddr);
+	if (unlikely(pgd_none(*pgdp)))
+		goto err;
+
+	p4dp = p4d_offset(pgdp, vaddr);
+	if (unlikely(p4d_none(*p4dp)))
+		goto err;
+
+	pudp = pud_offset(p4dp, vaddr);
+	if (unlikely(pud_none(*pudp)))
+		goto err;
+
+	pmdp = pmd_offset(pudp, vaddr);
+	if (unlikely(pmd_none(*pmdp)))
+		goto err;
+#ifdef CONFIG_X86_64
+	if (unlikely(pmd_large(*pmdp)))
+		pte = *(pte_t *) pmdp;
+	else
+#endif
+		pte = *pte_offset_kernel(pmdp, vaddr);
+
+	if (unlikely(!pte_present(pte) ||
+		     (write && (!pte_write(pte) || !pte_dirty(pte)))))
+		return 1;
+
+	*paddr = pte_pfn(pte) << PAGE_SHIFT;
+#ifdef CONFIG_HUGETLB_PAGE
+	*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+#else
+	*pageshift = PAGE_SHIFT;
+#endif
+	return 0;
+
+err:
+	return 1;
+}
+
+static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
+		    int write, int atomic, unsigned long *gpa, int *pageshift)
+{
+	struct mm_struct *mm = gts->ts_mm;
+	struct vm_area_struct *vma;
+	unsigned long paddr;
+	int ret, ps;
+
+	vma = find_vma(mm, vaddr);
+	if (!vma)
+		goto inval;
+
+	/*
+	 * Atomic lookup is faster & usually works even if called in non-atomic
+	 * context.
+	 */
+	rmb();	/* Must/check ms_range_active before loading PTEs */
+	ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps);
+	if (ret) {
+		if (atomic)
+			goto upm;
+		if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps))
+			goto inval;
+	}
+	if (is_gru_paddr(paddr))
+		goto inval;
+	paddr = paddr & ~((1UL << ps) - 1);
+	*gpa = uv_soc_phys_ram_to_gpa(paddr);
+	*pageshift = ps;
+	return VTOP_SUCCESS;
+
+inval:
+	return VTOP_INVALID;
+upm:
+	return VTOP_RETRY;
+}
+
+
+/*
+ * Flush a CBE from cache. The CBE is clean in the cache. Dirty the
+ * CBE cacheline so that the line will be written back to home agent.
+ * Otherwise the line may be silently dropped. This has no impact
+ * except on performance.
+ */
+static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe)
+{
+	if (unlikely(cbe)) {
+		cbe->cbrexecstatus = 0;         /* make CL dirty */
+		gru_flush_cache(cbe);
+	}
+}
+
+/*
+ * Preload the TLB with entries that may be required. Currently, preloading
+ * is implemented only for BCOPY. Preload  <tlb_preload_count> pages OR to
+ * the end of the bcopy tranfer, whichever is smaller.
+ */
+static void gru_preload_tlb(struct gru_state *gru,
+			struct gru_thread_state *gts, int atomic,
+			unsigned long fault_vaddr, int asid, int write,
+			unsigned char tlb_preload_count,
+			struct gru_tlb_fault_handle *tfh,
+			struct gru_control_block_extended *cbe)
+{
+	unsigned long vaddr = 0, gpa;
+	int ret, pageshift;
+
+	if (cbe->opccpy != OP_BCOPY)
+		return;
+
+	if (fault_vaddr == cbe->cbe_baddr0)
+		vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1;
+	else if (fault_vaddr == cbe->cbe_baddr1)
+		vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1;
+
+	fault_vaddr &= PAGE_MASK;
+	vaddr &= PAGE_MASK;
+	vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE);
+
+	while (vaddr > fault_vaddr) {
+		ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+		if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write,
+					  GRU_PAGESIZE(pageshift)))
+			return;
+		gru_dbg(grudev,
+			"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n",
+			atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh,
+			vaddr, asid, write, pageshift, gpa);
+		vaddr -= PAGE_SIZE;
+		STAT(tlb_preload_page);
+	}
+}
+
+/*
+ * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
+ *	Input:
+ *		cb    Address of user CBR. Null if not running in user context
+ * 	Return:
+ * 		  0 = dropin, exception, or switch to UPM successful
+ * 		  1 = range invalidate active
+ * 		< 0 = error code
+ *
+ */
+static int gru_try_dropin(struct gru_state *gru,
+			  struct gru_thread_state *gts,
+			  struct gru_tlb_fault_handle *tfh,
+			  struct gru_instruction_bits *cbk)
+{
+	struct gru_control_block_extended *cbe = NULL;
+	unsigned char tlb_preload_count = gts->ts_tlb_preload_count;
+	int pageshift = 0, asid, write, ret, atomic = !cbk, indexway;
+	unsigned long gpa = 0, vaddr = 0;
+
+	/*
+	 * NOTE: The GRU contains magic hardware that eliminates races between
+	 * TLB invalidates and TLB dropins. If an invalidate occurs
+	 * in the window between reading the TFH and the subsequent TLB dropin,
+	 * the dropin is ignored. This eliminates the need for additional locks.
+	 */
+
+	/*
+	 * Prefetch the CBE if doing TLB preloading
+	 */
+	if (unlikely(tlb_preload_count)) {
+		cbe = gru_tfh_to_cbe(tfh);
+		prefetchw(cbe);
+	}
+
+	/*
+	 * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
+	 * Might be a hardware race OR a stupid user. Ignore FMM because FMM
+	 * is a transient state.
+	 */
+	if (tfh->status != TFHSTATUS_EXCEPTION) {
+		gru_flush_cache(tfh);
+		sync_core();
+		if (tfh->status != TFHSTATUS_EXCEPTION)
+			goto failnoexception;
+		STAT(tfh_stale_on_fault);
+	}
+	if (tfh->state == TFHSTATE_IDLE)
+		goto failidle;
+	if (tfh->state == TFHSTATE_MISS_FMM && cbk)
+		goto failfmm;
+
+	write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
+	vaddr = tfh->missvaddr;
+	asid = tfh->missasid;
+	indexway = tfh->indexway;
+	if (asid == 0)
+		goto failnoasid;
+
+	rmb();	/* TFH must be cache resident before reading ms_range_active */
+
+	/*
+	 * TFH is cache resident - at least briefly. Fail the dropin
+	 * if a range invalidate is active.
+	 */
+	if (atomic_read(&gts->ts_gms->ms_range_active))
+		goto failactive;
+
+	ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+	if (ret == VTOP_INVALID)
+		goto failinval;
+	if (ret == VTOP_RETRY)
+		goto failupm;
+
+	if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) {
+		gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift);
+		if (atomic || !gru_update_cch(gts)) {
+			gts->ts_force_cch_reload = 1;
+			goto failupm;
+		}
+	}
+
+	if (unlikely(cbe) && pageshift == PAGE_SHIFT) {
+		gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe);
+		gru_flush_cache_cbe(cbe);
+	}
+
+	gru_cb_set_istatus_active(cbk);
+	gts->ustats.tlbdropin++;
+	tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
+			  GRU_PAGESIZE(pageshift));
+	gru_dbg(grudev,
+		"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x,"
+		" rw %d, ps %d, gpa 0x%lx\n",
+		atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid,
+		indexway, write, pageshift, gpa);
+	STAT(tlb_dropin);
+	return 0;
+
+failnoasid:
+	/* No asid (delayed unload). */
+	STAT(tlb_dropin_fail_no_asid);
+	gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	if (!cbk)
+		tfh_user_polling_mode(tfh);
+	else
+		gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	return -EAGAIN;
+
+failupm:
+	/* Atomic failure switch CBR to UPM */
+	tfh_user_polling_mode(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_upm);
+	gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	return 1;
+
+failfmm:
+	/* FMM state on UPM call */
+	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_fmm);
+	gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
+	return 0;
+
+failnoexception:
+	/* TFH status did not show exception pending */
+	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	if (cbk)
+		gru_flush_cache(cbk);
+	STAT(tlb_dropin_fail_no_exception);
+	gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n",
+		tfh, tfh->status, tfh->state);
+	return 0;
+
+failidle:
+	/* TFH state was idle  - no miss pending */
+	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	if (cbk)
+		gru_flush_cache(cbk);
+	STAT(tlb_dropin_fail_idle);
+	gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
+	return 0;
+
+failinval:
+	/* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
+	tfh_exception(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_invalid);
+	gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	return -EFAULT;
+
+failactive:
+	/* Range invalidate active. Switch to UPM iff atomic */
+	if (!cbk)
+		tfh_user_polling_mode(tfh);
+	else
+		gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_range_active);
+	gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
+		tfh, vaddr);
+	return 1;
+}
+
+/*
+ * Process an external interrupt from the GRU. This interrupt is
+ * caused by a TLB miss.
+ * Note that this is the interrupt handler that is registered with linux
+ * interrupt handlers.
+ */
+static irqreturn_t gru_intr(int chiplet, int blade)
+{
+	struct gru_state *gru;
+	struct gru_tlb_fault_map imap, dmap;
+	struct gru_thread_state *gts;
+	struct gru_tlb_fault_handle *tfh = NULL;
+	struct completion *cmp;
+	int cbrnum, ctxnum;
+
+	STAT(intr);
+
+	gru = &gru_base[blade]->bs_grus[chiplet];
+	if (!gru) {
+		dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n",
+			raw_smp_processor_id(), chiplet);
+		return IRQ_NONE;
+	}
+	get_clear_fault_map(gru, &imap, &dmap);
+	gru_dbg(grudev,
+		"cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n",
+		smp_processor_id(), chiplet, gru->gs_gid,
+		imap.fault_bits[0], imap.fault_bits[1],
+		dmap.fault_bits[0], dmap.fault_bits[1]);
+
+	for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) {
+		STAT(intr_cbr);
+		cmp = gru->gs_blade->bs_async_wq;
+		if (cmp)
+			complete(cmp);
+		gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n",
+			gru->gs_gid, cbrnum, cmp ? cmp->done : -1);
+	}
+
+	for_each_cbr_in_tfm(cbrnum, imap.fault_bits) {
+		STAT(intr_tfh);
+		tfh = get_tfh_by_index(gru, cbrnum);
+		prefetchw(tfh);	/* Helps on hdw, required for emulator */
+
+		/*
+		 * When hardware sets a bit in the faultmap, it implicitly
+		 * locks the GRU context so that it cannot be unloaded.
+		 * The gts cannot change until a TFH start/writestart command
+		 * is issued.
+		 */
+		ctxnum = tfh->ctxnum;
+		gts = gru->gs_gts[ctxnum];
+
+		/* Spurious interrupts can cause this. Ignore. */
+		if (!gts) {
+			STAT(intr_spurious);
+			continue;
+		}
+
+		/*
+		 * This is running in interrupt context. Trylock the mmap_sem.
+		 * If it fails, retry the fault in user context.
+		 */
+		gts->ustats.fmm_tlbmiss++;
+		if (!gts->ts_force_cch_reload &&
+					down_read_trylock(&gts->ts_mm->mmap_sem)) {
+			gru_try_dropin(gru, gts, tfh, NULL);
+			up_read(&gts->ts_mm->mmap_sem);
+		} else {
+			tfh_user_polling_mode(tfh);
+			STAT(intr_mm_lock_failed);
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+irqreturn_t gru0_intr(int irq, void *dev_id)
+{
+	return gru_intr(0, uv_numa_blade_id());
+}
+
+irqreturn_t gru1_intr(int irq, void *dev_id)
+{
+	return gru_intr(1, uv_numa_blade_id());
+}
+
+irqreturn_t gru_intr_mblade(int irq, void *dev_id)
+{
+	int blade;
+
+	for_each_possible_blade(blade) {
+		if (uv_blade_nr_possible_cpus(blade))
+			continue;
+		 gru_intr(0, blade);
+		 gru_intr(1, blade);
+	}
+	return IRQ_HANDLED;
+}
+
+
+static int gru_user_dropin(struct gru_thread_state *gts,
+			   struct gru_tlb_fault_handle *tfh,
+			   void *cb)
+{
+	struct gru_mm_struct *gms = gts->ts_gms;
+	int ret;
+
+	gts->ustats.upm_tlbmiss++;
+	while (1) {
+		wait_event(gms->ms_wait_queue,
+			   atomic_read(&gms->ms_range_active) == 0);
+		prefetchw(tfh);	/* Helps on hdw, required for emulator */
+		ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb);
+		if (ret <= 0)
+			return ret;
+		STAT(call_os_wait_queue);
+	}
+}
+
+/*
+ * This interface is called as a result of a user detecting a "call OS" bit
+ * in a user CB. Normally means that a TLB fault has occurred.
+ * 	cb - user virtual address of the CB
+ */
+int gru_handle_user_call_os(unsigned long cb)
+{
+	struct gru_tlb_fault_handle *tfh;
+	struct gru_thread_state *gts;
+	void *cbk;
+	int ucbnum, cbrnum, ret = -EINVAL;
+
+	STAT(call_os);
+
+	/* sanity check the cb pointer */
+	ucbnum = get_cb_number((void *)cb);
+	if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
+		return -EINVAL;
+
+	gts = gru_find_lock_gts(cb);
+	if (!gts)
+		return -EINVAL;
+	gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
+
+	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
+		goto exit;
+
+	gru_check_context_placement(gts);
+
+	/*
+	 * CCH may contain stale data if ts_force_cch_reload is set.
+	 */
+	if (gts->ts_gru && gts->ts_force_cch_reload) {
+		gts->ts_force_cch_reload = 0;
+		gru_update_cch(gts);
+	}
+
+	ret = -EAGAIN;
+	cbrnum = thread_cbr_number(gts, ucbnum);
+	if (gts->ts_gru) {
+		tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
+		cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr,
+				gts->ts_ctxnum, ucbnum);
+		ret = gru_user_dropin(gts, tfh, cbk);
+	}
+exit:
+	gru_unlock_gts(gts);
+	return ret;
+}
+
+/*
+ * Fetch the exception detail information for a CB that terminated with
+ * an exception.
+ */
+int gru_get_exception_detail(unsigned long arg)
+{
+	struct control_block_extended_exc_detail excdet;
+	struct gru_control_block_extended *cbe;
+	struct gru_thread_state *gts;
+	int ucbnum, cbrnum, ret;
+
+	STAT(user_exception);
+	if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
+		return -EFAULT;
+
+	gts = gru_find_lock_gts(excdet.cb);
+	if (!gts)
+		return -EINVAL;
+
+	gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
+	ucbnum = get_cb_number((void *)excdet.cb);
+	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
+		ret = -EINVAL;
+	} else if (gts->ts_gru) {
+		cbrnum = thread_cbr_number(gts, ucbnum);
+		cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
+		gru_flush_cache(cbe);	/* CBE not coherent */
+		sync_core();		/* make sure we are have current data */
+		excdet.opc = cbe->opccpy;
+		excdet.exopc = cbe->exopccpy;
+		excdet.ecause = cbe->ecause;
+		excdet.exceptdet0 = cbe->idef1upd;
+		excdet.exceptdet1 = cbe->idef3upd;
+		excdet.cbrstate = cbe->cbrstate;
+		excdet.cbrexecstatus = cbe->cbrexecstatus;
+		gru_flush_cache_cbe(cbe);
+		ret = 0;
+	} else {
+		ret = -EAGAIN;
+	}
+	gru_unlock_gts(gts);
+
+	gru_dbg(grudev,
+		"cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
+		"exdet0 0x%lx, exdet1 0x%x\n",
+		excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
+		excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
+	if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
+		ret = -EFAULT;
+	return ret;
+}
+
+/*
+ * User request to unload a context. Content is saved for possible reload.
+ */
+static int gru_unload_all_contexts(void)
+{
+	struct gru_thread_state *gts;
+	struct gru_state *gru;
+	int gid, ctxnum;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	foreach_gid(gid) {
+		gru = GID_TO_GRU(gid);
+		spin_lock(&gru->gs_lock);
+		for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+			gts = gru->gs_gts[ctxnum];
+			if (gts && mutex_trylock(&gts->ts_ctxlock)) {
+				spin_unlock(&gru->gs_lock);
+				gru_unload_context(gts, 1);
+				mutex_unlock(&gts->ts_ctxlock);
+				spin_lock(&gru->gs_lock);
+			}
+		}
+		spin_unlock(&gru->gs_lock);
+	}
+	return 0;
+}
+
+int gru_user_unload_context(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_unload_context_req req;
+
+	STAT(user_unload_context);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
+
+	if (!req.gseg)
+		return gru_unload_all_contexts();
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts)
+		return -EINVAL;
+
+	if (gts->ts_gru)
+		gru_unload_context(gts, 1);
+	gru_unlock_gts(gts);
+
+	return 0;
+}
+
+/*
+ * User request to flush a range of virtual addresses from the GRU TLB
+ * (Mainly for testing).
+ */
+int gru_user_flush_tlb(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_flush_tlb_req req;
+	struct gru_mm_struct *gms;
+
+	STAT(user_flush_tlb);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
+		req.vaddr, req.len);
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts)
+		return -EINVAL;
+
+	gms = gts->ts_gms;
+	gru_unlock_gts(gts);
+	gru_flush_tlb_range(gms, req.vaddr, req.len);
+
+	return 0;
+}
+
+/*
+ * Fetch GSEG statisticss
+ */
+long gru_get_gseg_statistics(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_get_gseg_statistics_req req;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	/*
+	 * The library creates arrays of contexts for threaded programs.
+	 * If no gts exists in the array, the context has never been used & all
+	 * statistics are implicitly 0.
+	 */
+	gts = gru_find_lock_gts(req.gseg);
+	if (gts) {
+		memcpy(&req.stats, &gts->ustats, sizeof(gts->ustats));
+		gru_unlock_gts(gts);
+	} else {
+		memset(&req.stats, 0, sizeof(gts->ustats));
+	}
+
+	if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Register the current task as the user of the GSEG slice.
+ * Needed for TLB fault interrupt targeting.
+ */
+int gru_set_context_option(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_set_context_option_req req;
+	int ret = 0;
+
+	STAT(set_context_option);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+	gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts) {
+		gts = gru_alloc_locked_gts(req.gseg);
+		if (IS_ERR(gts))
+			return PTR_ERR(gts);
+	}
+
+	switch (req.op) {
+	case sco_blade_chiplet:
+		/* Select blade/chiplet for GRU context */
+		if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB ||
+		    req.val1 < -1 || req.val1 >= GRU_MAX_BLADES ||
+		    (req.val1 >= 0 && !gru_base[req.val1])) {
+			ret = -EINVAL;
+		} else {
+			gts->ts_user_blade_id = req.val1;
+			gts->ts_user_chiplet_id = req.val0;
+			gru_check_context_placement(gts);
+		}
+		break;
+	case sco_gseg_owner:
+ 		/* Register the current task as the GSEG owner */
+		gts->ts_tgid_owner = current->tgid;
+		break;
+	case sco_cch_req_slice:
+ 		/* Set the CCH slice option */
+		gts->ts_cch_req_slice = req.val1 & 3;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	gru_unlock_gts(gts);
+
+	return ret;
+}
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
new file mode 100644
index 000000000..104a05f6b
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -0,0 +1,623 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              FILE OPERATIONS & DRIVER INITIALIZATION
+ *
+ * This file supports the user system call for file open, close, mmap, etc.
+ * This also incudes the driver initialization code.
+ *
+ *  Copyright (c) 2008-2014 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#ifdef CONFIG_X86_64
+#include <asm/uv/uv_irq.h>
+#endif
+#include <asm/uv/uv.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_mmrs.h>
+
+struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
+unsigned long gru_start_paddr __read_mostly;
+void *gru_start_vaddr __read_mostly;
+unsigned long gru_end_paddr __read_mostly;
+unsigned int gru_max_gids __read_mostly;
+struct gru_stats_s gru_stats;
+
+/* Guaranteed user available resources on each node */
+static int max_user_cbrs, max_user_dsr_bytes;
+
+static struct miscdevice gru_miscdev;
+
+static int gru_supported(void)
+{
+	return is_uv_system() &&
+		(uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE);
+}
+
+/*
+ * gru_vma_close
+ *
+ * Called when unmapping a device mapping. Frees all gru resources
+ * and tables belonging to the vma.
+ */
+static void gru_vma_close(struct vm_area_struct *vma)
+{
+	struct gru_vma_data *vdata;
+	struct gru_thread_state *gts;
+	struct list_head *entry, *next;
+
+	if (!vma->vm_private_data)
+		return;
+
+	vdata = vma->vm_private_data;
+	vma->vm_private_data = NULL;
+	gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file,
+				vdata);
+	list_for_each_safe(entry, next, &vdata->vd_head) {
+		gts =
+		    list_entry(entry, struct gru_thread_state, ts_next);
+		list_del(&gts->ts_next);
+		mutex_lock(&gts->ts_ctxlock);
+		if (gts->ts_gru)
+			gru_unload_context(gts, 0);
+		mutex_unlock(&gts->ts_ctxlock);
+		gts_drop(gts);
+	}
+	kfree(vdata);
+	STAT(vdata_free);
+}
+
+/*
+ * gru_file_mmap
+ *
+ * Called when mmapping the device.  Initializes the vma with a fault handler
+ * and private data structure necessary to allocate, track, and free the
+ * underlying pages.
+ */
+static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
+		return -EPERM;
+
+	if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
+				vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED |
+			 VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_page_prot = PAGE_SHARED;
+	vma->vm_ops = &gru_vm_ops;
+
+	vma->vm_private_data = gru_alloc_vma_data(vma, 0);
+	if (!vma->vm_private_data)
+		return -ENOMEM;
+
+	gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n",
+		file, vma->vm_start, vma, vma->vm_private_data);
+	return 0;
+}
+
+/*
+ * Create a new GRU context
+ */
+static int gru_create_new_context(unsigned long arg)
+{
+	struct gru_create_context_req req;
+	struct vm_area_struct *vma;
+	struct gru_vma_data *vdata;
+	int ret = -EINVAL;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	if (req.data_segment_bytes > max_user_dsr_bytes)
+		return -EINVAL;
+	if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count)
+		return -EINVAL;
+
+	if (!(req.options & GRU_OPT_MISS_MASK))
+		req.options |= GRU_OPT_MISS_FMM_INTR;
+
+	down_write(&current->mm->mmap_sem);
+	vma = gru_find_vma(req.gseg);
+	if (vma) {
+		vdata = vma->vm_private_data;
+		vdata->vd_user_options = req.options;
+		vdata->vd_dsr_au_count =
+		    GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
+		vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
+		vdata->vd_tlb_preload_count = req.tlb_preload_count;
+		ret = 0;
+	}
+	up_write(&current->mm->mmap_sem);
+
+	return ret;
+}
+
+/*
+ * Get GRU configuration info (temp - for emulator testing)
+ */
+static long gru_get_config_info(unsigned long arg)
+{
+	struct gru_config_info info;
+	int nodesperblade;
+
+	if (num_online_nodes() > 1 &&
+			(uv_node_to_blade_id(1) == uv_node_to_blade_id(0)))
+		nodesperblade = 2;
+	else
+		nodesperblade = 1;
+	memset(&info, 0, sizeof(info));
+	info.cpus = num_online_cpus();
+	info.nodes = num_online_nodes();
+	info.blades = info.nodes / nodesperblade;
+	info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades;
+
+	if (copy_to_user((void __user *)arg, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * gru_file_unlocked_ioctl
+ *
+ * Called to update file attributes via IOCTL calls.
+ */
+static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
+				    unsigned long arg)
+{
+	int err = -EBADRQC;
+
+	gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg);
+
+	switch (req) {
+	case GRU_CREATE_CONTEXT:
+		err = gru_create_new_context(arg);
+		break;
+	case GRU_SET_CONTEXT_OPTION:
+		err = gru_set_context_option(arg);
+		break;
+	case GRU_USER_GET_EXCEPTION_DETAIL:
+		err = gru_get_exception_detail(arg);
+		break;
+	case GRU_USER_UNLOAD_CONTEXT:
+		err = gru_user_unload_context(arg);
+		break;
+	case GRU_USER_FLUSH_TLB:
+		err = gru_user_flush_tlb(arg);
+		break;
+	case GRU_USER_CALL_OS:
+		err = gru_handle_user_call_os(arg);
+		break;
+	case GRU_GET_GSEG_STATISTICS:
+		err = gru_get_gseg_statistics(arg);
+		break;
+	case GRU_KTEST:
+		err = gru_ktest(arg);
+		break;
+	case GRU_GET_CONFIG_INFO:
+		err = gru_get_config_info(arg);
+		break;
+	case GRU_DUMP_CHIPLET_STATE:
+		err = gru_dump_chiplet_request(arg);
+		break;
+	}
+	return err;
+}
+
+/*
+ * Called at init time to build tables for all GRUs that are present in the
+ * system.
+ */
+static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
+			     void *vaddr, int blade_id, int chiplet_id)
+{
+	spin_lock_init(&gru->gs_lock);
+	spin_lock_init(&gru->gs_asid_lock);
+	gru->gs_gru_base_paddr = paddr;
+	gru->gs_gru_base_vaddr = vaddr;
+	gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id;
+	gru->gs_blade = gru_base[blade_id];
+	gru->gs_blade_id = blade_id;
+	gru->gs_chiplet_id = chiplet_id;
+	gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
+	gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
+	gru->gs_asid_limit = MAX_ASID;
+	gru_tgh_flush_init(gru);
+	if (gru->gs_gid >= gru_max_gids)
+		gru_max_gids = gru->gs_gid + 1;
+	gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n",
+		blade_id, gru->gs_gid, gru->gs_gru_base_vaddr,
+		gru->gs_gru_base_paddr);
+}
+
+static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
+{
+	int pnode, nid, bid, chip;
+	int cbrs, dsrbytes, n;
+	int order = get_order(sizeof(struct gru_blade_state));
+	struct page *page;
+	struct gru_state *gru;
+	unsigned long paddr;
+	void *vaddr;
+
+	max_user_cbrs = GRU_NUM_CB;
+	max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
+	for_each_possible_blade(bid) {
+		pnode = uv_blade_to_pnode(bid);
+		nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */
+		page = alloc_pages_node(nid, GFP_KERNEL, order);
+		if (!page)
+			goto fail;
+		gru_base[bid] = page_address(page);
+		memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
+		gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
+		spin_lock_init(&gru_base[bid]->bs_lock);
+		init_rwsem(&gru_base[bid]->bs_kgts_sema);
+
+		dsrbytes = 0;
+		cbrs = 0;
+		for (gru = gru_base[bid]->bs_grus, chip = 0;
+				chip < GRU_CHIPLETS_PER_BLADE;
+				chip++, gru++) {
+			paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
+			vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
+			gru_init_chiplet(gru, paddr, vaddr, bid, chip);
+			n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+			cbrs = max(cbrs, n);
+			n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+			dsrbytes = max(dsrbytes, n);
+		}
+		max_user_cbrs = min(max_user_cbrs, cbrs);
+		max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes);
+	}
+
+	return 0;
+
+fail:
+	for (bid--; bid >= 0; bid--)
+		free_pages((unsigned long)gru_base[bid], order);
+	return -ENOMEM;
+}
+
+static void gru_free_tables(void)
+{
+	int bid;
+	int order = get_order(sizeof(struct gru_state) *
+			      GRU_CHIPLETS_PER_BLADE);
+
+	for (bid = 0; bid < GRU_MAX_BLADES; bid++)
+		free_pages((unsigned long)gru_base[bid], order);
+}
+
+static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep)
+{
+	unsigned long mmr = 0;
+	int core;
+
+	/*
+	 * We target the cores of a blade and not the hyperthreads themselves.
+	 * There is a max of 8 cores per socket and 2 sockets per blade,
+	 * making for a max total of 16 cores (i.e., 16 CPUs without
+	 * hyperthreading and 32 CPUs with hyperthreading).
+	 */
+	core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
+	if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu))
+		return 0;
+
+	if (chiplet == 0) {
+		mmr = UVH_GR0_TLB_INT0_CONFIG +
+		    core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG);
+	} else if (chiplet == 1) {
+		mmr = UVH_GR1_TLB_INT0_CONFIG +
+		    core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG);
+	} else {
+		BUG();
+	}
+
+	*corep = core;
+	return mmr;
+}
+
+#ifdef CONFIG_IA64
+
+static int gru_irq_count[GRU_CHIPLETS_PER_BLADE];
+
+static void gru_noop(struct irq_data *d)
+{
+}
+
+static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = {
+	[0 ... GRU_CHIPLETS_PER_BLADE - 1] {
+		.irq_mask	= gru_noop,
+		.irq_unmask	= gru_noop,
+		.irq_ack	= gru_noop
+	}
+};
+
+static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
+			irq_handler_t irq_handler, int cpu, int blade)
+{
+	unsigned long mmr;
+	int irq = IRQ_GRU + chiplet;
+	int ret, core;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr == 0)
+		return 0;
+
+	if (gru_irq_count[chiplet] == 0) {
+		gru_chip[chiplet].name = irq_name;
+		ret = irq_set_chip(irq, &gru_chip[chiplet]);
+		if (ret) {
+			printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n",
+			       GRU_DRIVER_ID_STR, -ret);
+			return ret;
+		}
+
+		ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
+		if (ret) {
+			printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
+			       GRU_DRIVER_ID_STR, -ret);
+			return ret;
+		}
+	}
+	gru_irq_count[chiplet]++;
+
+	return 0;
+}
+
+static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
+{
+	unsigned long mmr;
+	int core, irq = IRQ_GRU + chiplet;
+
+	if (gru_irq_count[chiplet] == 0)
+		return;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr == 0)
+		return;
+
+	if (--gru_irq_count[chiplet] == 0)
+		free_irq(irq, NULL);
+}
+
+#elif defined CONFIG_X86_64
+
+static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
+			irq_handler_t irq_handler, int cpu, int blade)
+{
+	unsigned long mmr;
+	int irq, core;
+	int ret;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr == 0)
+		return 0;
+
+	irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU);
+	if (irq < 0) {
+		printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n",
+		       GRU_DRIVER_ID_STR, -irq);
+		return irq;
+	}
+
+	ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
+	if (ret) {
+		uv_teardown_irq(irq);
+		printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
+		       GRU_DRIVER_ID_STR, -ret);
+		return ret;
+	}
+	gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq;
+	return 0;
+}
+
+static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
+{
+	int irq, core;
+	unsigned long mmr;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr) {
+		irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core];
+		if (irq) {
+			free_irq(irq, NULL);
+			uv_teardown_irq(irq);
+		}
+	}
+}
+
+#endif
+
+static void gru_teardown_tlb_irqs(void)
+{
+	int blade;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		blade = uv_cpu_to_blade_id(cpu);
+		gru_chiplet_teardown_tlb_irq(0, cpu, blade);
+		gru_chiplet_teardown_tlb_irq(1, cpu, blade);
+	}
+	for_each_possible_blade(blade) {
+		if (uv_blade_nr_possible_cpus(blade))
+			continue;
+		gru_chiplet_teardown_tlb_irq(0, 0, blade);
+		gru_chiplet_teardown_tlb_irq(1, 0, blade);
+	}
+}
+
+static int gru_setup_tlb_irqs(void)
+{
+	int blade;
+	int cpu;
+	int ret;
+
+	for_each_online_cpu(cpu) {
+		blade = uv_cpu_to_blade_id(cpu);
+		ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade);
+		if (ret != 0)
+			goto exit1;
+
+		ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade);
+		if (ret != 0)
+			goto exit1;
+	}
+	for_each_possible_blade(blade) {
+		if (uv_blade_nr_possible_cpus(blade))
+			continue;
+		ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade);
+		if (ret != 0)
+			goto exit1;
+
+		ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade);
+		if (ret != 0)
+			goto exit1;
+	}
+
+	return 0;
+
+exit1:
+	gru_teardown_tlb_irqs();
+	return ret;
+}
+
+/*
+ * gru_init
+ *
+ * Called at boot or module load time to initialize the GRUs.
+ */
+static int __init gru_init(void)
+{
+	int ret;
+
+	if (!gru_supported())
+		return 0;
+
+#if defined CONFIG_IA64
+	gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
+#else
+	gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
+				0x7fffffffffffUL;
+#endif
+	gru_start_vaddr = __va(gru_start_paddr);
+	gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE;
+	printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
+	       gru_start_paddr, gru_end_paddr);
+	ret = misc_register(&gru_miscdev);
+	if (ret) {
+		printk(KERN_ERR "%s: misc_register failed\n",
+		       GRU_DRIVER_ID_STR);
+		goto exit0;
+	}
+
+	ret = gru_proc_init();
+	if (ret) {
+		printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
+		goto exit1;
+	}
+
+	ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
+	if (ret) {
+		printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
+		goto exit2;
+	}
+
+	ret = gru_setup_tlb_irqs();
+	if (ret != 0)
+		goto exit3;
+
+	gru_kservices_init();
+
+	printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
+	       GRU_DRIVER_VERSION_STR);
+	return 0;
+
+exit3:
+	gru_free_tables();
+exit2:
+	gru_proc_exit();
+exit1:
+	misc_deregister(&gru_miscdev);
+exit0:
+	return ret;
+
+}
+
+static void __exit gru_exit(void)
+{
+	if (!gru_supported())
+		return;
+
+	gru_teardown_tlb_irqs();
+	gru_kservices_exit();
+	gru_free_tables();
+	misc_deregister(&gru_miscdev);
+	gru_proc_exit();
+}
+
+static const struct file_operations gru_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= gru_file_unlocked_ioctl,
+	.mmap		= gru_file_mmap,
+	.llseek		= noop_llseek,
+};
+
+static struct miscdevice gru_miscdev = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "gru",
+	.fops		= &gru_fops,
+};
+
+const struct vm_operations_struct gru_vm_ops = {
+	.close		= gru_vma_close,
+	.fault		= gru_fault,
+};
+
+#ifndef MODULE
+fs_initcall(gru_init);
+#else
+module_init(gru_init);
+#endif
+module_exit(gru_exit);
+
+module_param(gru_options, ulong, 0644);
+MODULE_PARM_DESC(gru_options, "Various debug options");
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR);
+MODULE_VERSION(GRU_DRIVER_VERSION_STR);
+
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
new file mode 100644
index 000000000..1ee8e82ba
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -0,0 +1,210 @@
+/*
+ *              GRU KERNEL MCS INSTRUCTIONS
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+/* 10 sec */
+#ifdef CONFIG_IA64
+#include <asm/processor.h>
+#define GRU_OPERATION_TIMEOUT	(((cycles_t) local_cpu_data->itc_freq)*10)
+#define CLKS2NSEC(c)		((c) *1000000000 / local_cpu_data->itc_freq)
+#else
+#include <asm/tsc.h>
+#define GRU_OPERATION_TIMEOUT	((cycles_t) tsc_khz*10*1000)
+#define CLKS2NSEC(c)		((c) * 1000000 / tsc_khz)
+#endif
+
+/* Extract the status field from a kernel handle */
+#define GET_MSEG_HANDLE_STATUS(h)	(((*(unsigned long *)(h)) >> 16) & 3)
+
+struct mcs_op_statistic mcs_op_statistics[mcsop_last];
+
+static void update_mcs_stats(enum mcs_op op, unsigned long clks)
+{
+	unsigned long nsec;
+
+	nsec = CLKS2NSEC(clks);
+	atomic_long_inc(&mcs_op_statistics[op].count);
+	atomic_long_add(nsec, &mcs_op_statistics[op].total);
+	if (mcs_op_statistics[op].max < nsec)
+		mcs_op_statistics[op].max = nsec;
+}
+
+static void start_instruction(void *h)
+{
+	unsigned long *w0 = h;
+
+	wmb();		/* setting CMD/STATUS bits must be last */
+	*w0 = *w0 | 0x20001;
+	gru_flush_cache(h);
+}
+
+static void report_instruction_timeout(void *h)
+{
+	unsigned long goff = GSEGPOFF((unsigned long)h);
+	char *id = "???";
+
+	if (TYPE_IS(CCH, goff))
+		id = "CCH";
+	else if (TYPE_IS(TGH, goff))
+		id = "TGH";
+	else if (TYPE_IS(TFH, goff))
+		id = "TFH";
+
+	panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id);
+}
+
+static int wait_instruction_complete(void *h, enum mcs_op opc)
+{
+	int status;
+	unsigned long start_time = get_cycles();
+
+	while (1) {
+		cpu_relax();
+		status = GET_MSEG_HANDLE_STATUS(h);
+		if (status != CCHSTATUS_ACTIVE)
+			break;
+		if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) {
+			report_instruction_timeout(h);
+			start_time = get_cycles();
+		}
+	}
+	if (gru_options & OPT_STATS)
+		update_mcs_stats(opc, get_cycles() - start_time);
+	return status;
+}
+
+int cch_allocate(struct gru_context_configuration_handle *cch)
+{
+	int ret;
+
+	cch->opc = CCHOP_ALLOCATE;
+	start_instruction(cch);
+	ret = wait_instruction_complete(cch, cchop_allocate);
+
+	/*
+	 * Stop speculation into the GSEG being mapped by the previous ALLOCATE.
+	 * The GSEG memory does not exist until the ALLOCATE completes.
+	 */
+	sync_core();
+	return ret;
+}
+
+int cch_start(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_START;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_start);
+}
+
+int cch_interrupt(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_INTERRUPT;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_interrupt);
+}
+
+int cch_deallocate(struct gru_context_configuration_handle *cch)
+{
+	int ret;
+
+	cch->opc = CCHOP_DEALLOCATE;
+	start_instruction(cch);
+	ret = wait_instruction_complete(cch, cchop_deallocate);
+
+	/*
+	 * Stop speculation into the GSEG being unmapped by the previous
+	 * DEALLOCATE.
+	 */
+	sync_core();
+	return ret;
+}
+
+int cch_interrupt_sync(struct gru_context_configuration_handle
+				     *cch)
+{
+	cch->opc = CCHOP_INTERRUPT_SYNC;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_interrupt_sync);
+}
+
+int tgh_invalidate(struct gru_tlb_global_handle *tgh,
+				 unsigned long vaddr, unsigned long vaddrmask,
+				 int asid, int pagesize, int global, int n,
+				 unsigned short ctxbitmap)
+{
+	tgh->vaddr = vaddr;
+	tgh->asid = asid;
+	tgh->pagesize = pagesize;
+	tgh->n = n;
+	tgh->global = global;
+	tgh->vaddrmask = vaddrmask;
+	tgh->ctxbitmap = ctxbitmap;
+	tgh->opc = TGHOP_TLBINV;
+	start_instruction(tgh);
+	return wait_instruction_complete(tgh, tghop_invalidate);
+}
+
+int tfh_write_only(struct gru_tlb_fault_handle *tfh,
+				  unsigned long paddr, int gaa,
+				  unsigned long vaddr, int asid, int dirty,
+				  int pagesize)
+{
+	tfh->fillasid = asid;
+	tfh->fillvaddr = vaddr;
+	tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+	tfh->gaa = gaa;
+	tfh->dirty = dirty;
+	tfh->pagesize = pagesize;
+	tfh->opc = TFHOP_WRITE_ONLY;
+	start_instruction(tfh);
+	return wait_instruction_complete(tfh, tfhop_write_only);
+}
+
+void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
+				     unsigned long paddr, int gaa,
+				     unsigned long vaddr, int asid, int dirty,
+				     int pagesize)
+{
+	tfh->fillasid = asid;
+	tfh->fillvaddr = vaddr;
+	tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+	tfh->gaa = gaa;
+	tfh->dirty = dirty;
+	tfh->pagesize = pagesize;
+	tfh->opc = TFHOP_WRITE_RESTART;
+	start_instruction(tfh);
+}
+
+void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_USER_POLLING_MODE;
+	start_instruction(tfh);
+}
+
+void tfh_exception(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_EXCEPTION;
+	start_instruction(tfh);
+}
+
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
new file mode 100644
index 000000000..3d7bd36a1
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -0,0 +1,530 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              GRU HANDLE DEFINITION
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRUHANDLES_H__
+#define __GRUHANDLES_H__
+#include "gru_instructions.h"
+
+/*
+ * Manifest constants for GRU Memory Map
+ */
+#define GRU_GSEG0_BASE		0
+#define GRU_MCS_BASE		(64 * 1024 * 1024)
+#define GRU_SIZE		(128UL * 1024 * 1024)
+
+/* Handle & resource counts */
+#define GRU_NUM_CB		128
+#define GRU_NUM_DSR_BYTES	(32 * 1024)
+#define GRU_NUM_TFM		16
+#define GRU_NUM_TGH		24
+#define GRU_NUM_CBE		128
+#define GRU_NUM_TFH		128
+#define GRU_NUM_CCH		16
+
+/* Maximum resource counts that can be reserved by user programs */
+#define GRU_NUM_USER_CBR	GRU_NUM_CBE
+#define GRU_NUM_USER_DSR_BYTES	GRU_NUM_DSR_BYTES
+
+/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles
+ * are the same */
+#define GRU_HANDLE_BYTES	64
+#define GRU_HANDLE_STRIDE	256
+
+/* Base addresses of handles */
+#define GRU_TFM_BASE		(GRU_MCS_BASE + 0x00000)
+#define GRU_TGH_BASE		(GRU_MCS_BASE + 0x08000)
+#define GRU_CBE_BASE		(GRU_MCS_BASE + 0x10000)
+#define GRU_TFH_BASE		(GRU_MCS_BASE + 0x18000)
+#define GRU_CCH_BASE		(GRU_MCS_BASE + 0x20000)
+
+/* User gseg constants */
+#define GRU_GSEG_STRIDE		(4 * 1024 * 1024)
+#define GSEG_BASE(a)		((a) & ~(GRU_GSEG_PAGESIZE - 1))
+
+/* Data segment constants */
+#define GRU_DSR_AU_BYTES	1024
+#define GRU_DSR_CL		(GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU_CL		(GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU		(GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES)
+
+/* Control block constants */
+#define GRU_CBR_AU_SIZE		2
+#define GRU_CBR_AU		(GRU_NUM_CBE / GRU_CBR_AU_SIZE)
+
+/* Convert resource counts to the number of AU */
+#define GRU_DS_BYTES_TO_AU(n)	DIV_ROUND_UP(n, GRU_DSR_AU_BYTES)
+#define GRU_CB_COUNT_TO_AU(n)	DIV_ROUND_UP(n, GRU_CBR_AU_SIZE)
+
+/* UV limits */
+#define GRU_CHIPLETS_PER_HUB	2
+#define GRU_HUBS_PER_BLADE	1
+#define GRU_CHIPLETS_PER_BLADE	(GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB)
+
+/* User GRU Gseg offsets */
+#define GRU_CB_BASE		0
+#define GRU_CB_LIMIT		(GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE)
+#define GRU_DS_BASE		0x20000
+#define GRU_DS_LIMIT		(GRU_DS_BASE + GRU_NUM_DSR_BYTES)
+
+/* Convert a GRU physical address to the chiplet offset */
+#define GSEGPOFF(h) 		((h) & (GRU_SIZE - 1))
+
+/* Convert an arbitrary handle address to the beginning of the GRU segment */
+#define GRUBASE(h)		((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
+
+/* Test a valid handle address to determine the type */
+#define TYPE_IS(hn, h)		((h) >= GRU_##hn##_BASE && (h) <	\
+		GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE &&   \
+		(((h) & (GRU_HANDLE_STRIDE - 1)) == 0))
+
+
+/* General addressing macros. */
+static inline void *get_gseg_base_address(void *base, int ctxnum)
+{
+	return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum);
+}
+
+static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line)
+{
+	return (void *)(get_gseg_base_address(base, ctxnum) +
+			GRU_CB_BASE + GRU_HANDLE_STRIDE * line);
+}
+
+static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line)
+{
+	return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE +
+			GRU_CACHE_LINE_BYTES * line);
+}
+
+static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum)
+{
+	return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum)
+{
+	return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum)
+{
+	return (struct gru_control_block_extended *)(base + GRU_CBE_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum)
+{
+	return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_context_configuration_handle *get_cch(void *base,
+					int ctxnum)
+{
+	return (struct gru_context_configuration_handle *)(base +
+				GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline unsigned long get_cb_number(void *cb)
+{
+	return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) /
+					GRU_HANDLE_STRIDE;
+}
+
+/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/
+static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode,
+							int chiplet)
+{
+	return paddr + GRU_SIZE * (2 * pnode  + chiplet);
+}
+
+static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
+{
+	return vaddr + GRU_SIZE * (2 * pnode  + chiplet);
+}
+
+static inline struct gru_control_block_extended *gru_tfh_to_cbe(
+					struct gru_tlb_fault_handle *tfh)
+{
+	unsigned long cbe;
+
+	cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE;
+	return (struct gru_control_block_extended*)cbe;
+}
+
+
+
+
+/*
+ * Global TLB Fault Map
+ * 	Bitmap of outstanding TLB misses needing interrupt/polling service.
+ *
+ */
+struct gru_tlb_fault_map {
+	unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+	unsigned long fill0[2];
+	unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+	unsigned long fill1[2];
+};
+
+/*
+ * TGH - TLB Global Handle
+ * 	Used for TLB flushing.
+ *
+ */
+struct gru_tlb_global_handle {
+	unsigned int cmd:1;		/* DW 0 */
+	unsigned int delresp:1;
+	unsigned int opc:1;
+	unsigned int fill1:5;
+
+	unsigned int fill2:8;
+
+	unsigned int status:2;
+	unsigned long fill3:2;
+	unsigned int state:3;
+	unsigned long fill4:1;
+
+	unsigned int cause:3;
+	unsigned long fill5:37;
+
+	unsigned long vaddr:64;		/* DW 1 */
+
+	unsigned int asid:24;		/* DW 2 */
+	unsigned int fill6:8;
+
+	unsigned int pagesize:5;
+	unsigned int fill7:11;
+
+	unsigned int global:1;
+	unsigned int fill8:15;
+
+	unsigned long vaddrmask:39;	/* DW 3 */
+	unsigned int fill9:9;
+	unsigned int n:10;
+	unsigned int fill10:6;
+
+	unsigned int ctxbitmap:16;	/* DW4 */
+	unsigned long fill11[3];
+};
+
+enum gru_tgh_cmd {
+	TGHCMD_START
+};
+
+enum gru_tgh_opc {
+	TGHOP_TLBNOP,
+	TGHOP_TLBINV
+};
+
+enum gru_tgh_status {
+	TGHSTATUS_IDLE,
+	TGHSTATUS_EXCEPTION,
+	TGHSTATUS_ACTIVE
+};
+
+enum gru_tgh_state {
+	TGHSTATE_IDLE,
+	TGHSTATE_PE_INVAL,
+	TGHSTATE_INTERRUPT_INVAL,
+	TGHSTATE_WAITDONE,
+	TGHSTATE_RESTART_CTX,
+};
+
+enum gru_tgh_cause {
+	TGHCAUSE_RR_ECC,
+	TGHCAUSE_TLB_ECC,
+	TGHCAUSE_LRU_ECC,
+	TGHCAUSE_PS_ECC,
+	TGHCAUSE_MUL_ERR,
+	TGHCAUSE_DATA_ERR,
+	TGHCAUSE_SW_FORCE
+};
+
+
+/*
+ * TFH - TLB Global Handle
+ * 	Used for TLB dropins into the GRU TLB.
+ *
+ */
+struct gru_tlb_fault_handle {
+	unsigned int cmd:1;		/* DW 0 - low 32*/
+	unsigned int delresp:1;
+	unsigned int fill0:2;
+	unsigned int opc:3;
+	unsigned int fill1:9;
+
+	unsigned int status:2;
+	unsigned int fill2:2;
+	unsigned int state:3;
+	unsigned int fill3:1;
+
+	unsigned int cause:6;
+	unsigned int cb_int:1;
+	unsigned int fill4:1;
+
+	unsigned int indexway:12;	/* DW 0 - high 32 */
+	unsigned int fill5:4;
+
+	unsigned int ctxnum:4;
+	unsigned int fill6:12;
+
+	unsigned long missvaddr:64;	/* DW 1 */
+
+	unsigned int missasid:24;	/* DW 2 */
+	unsigned int fill7:8;
+	unsigned int fillasid:24;
+	unsigned int dirty:1;
+	unsigned int gaa:2;
+	unsigned long fill8:5;
+
+	unsigned long pfn:41;		/* DW 3 */
+	unsigned int fill9:7;
+	unsigned int pagesize:5;
+	unsigned int fill10:11;
+
+	unsigned long fillvaddr:64;	/* DW 4 */
+
+	unsigned long fill11[3];
+};
+
+enum gru_tfh_opc {
+	TFHOP_NOOP,
+	TFHOP_RESTART,
+	TFHOP_WRITE_ONLY,
+	TFHOP_WRITE_RESTART,
+	TFHOP_EXCEPTION,
+	TFHOP_USER_POLLING_MODE = 7,
+};
+
+enum tfh_status {
+	TFHSTATUS_IDLE,
+	TFHSTATUS_EXCEPTION,
+	TFHSTATUS_ACTIVE,
+};
+
+enum tfh_state {
+	TFHSTATE_INACTIVE,
+	TFHSTATE_IDLE,
+	TFHSTATE_MISS_UPM,
+	TFHSTATE_MISS_FMM,
+	TFHSTATE_HW_ERR,
+	TFHSTATE_WRITE_TLB,
+	TFHSTATE_RESTART_CBR,
+};
+
+/* TFH cause bits */
+enum tfh_cause {
+	TFHCAUSE_NONE,
+	TFHCAUSE_TLB_MISS,
+	TFHCAUSE_TLB_MOD,
+	TFHCAUSE_HW_ERROR_RR,
+	TFHCAUSE_HW_ERROR_MAIN_ARRAY,
+	TFHCAUSE_HW_ERROR_VALID,
+	TFHCAUSE_HW_ERROR_PAGESIZE,
+	TFHCAUSE_INSTRUCTION_EXCEPTION,
+	TFHCAUSE_UNCORRECTIBLE_ERROR,
+};
+
+/* GAA values */
+#define GAA_RAM				0x0
+#define GAA_NCRAM			0x2
+#define GAA_MMIO			0x1
+#define GAA_REGISTER			0x3
+
+/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */
+#define GRU_PADDR_SHIFT			12
+
+/*
+ * Context Configuration handle
+ * 	Used to allocate resources to a GSEG context.
+ *
+ */
+struct gru_context_configuration_handle {
+	unsigned int cmd:1;			/* DW0 */
+	unsigned int delresp:1;
+	unsigned int opc:3;
+	unsigned int unmap_enable:1;
+	unsigned int req_slice_set_enable:1;
+	unsigned int req_slice:2;
+	unsigned int cb_int_enable:1;
+	unsigned int tlb_int_enable:1;
+	unsigned int tfm_fault_bit_enable:1;
+	unsigned int tlb_int_select:4;
+
+	unsigned int status:2;
+	unsigned int state:2;
+	unsigned int reserved2:4;
+
+	unsigned int cause:4;
+	unsigned int tfm_done_bit_enable:1;
+	unsigned int unused:3;
+
+	unsigned int dsr_allocation_map;
+
+	unsigned long cbr_allocation_map;	/* DW1 */
+
+	unsigned int asid[8];			/* DW 2 - 5 */
+	unsigned short sizeavail[8];		/* DW 6 - 7 */
+} __attribute__ ((packed));
+
+enum gru_cch_opc {
+	CCHOP_START = 1,
+	CCHOP_ALLOCATE,
+	CCHOP_INTERRUPT,
+	CCHOP_DEALLOCATE,
+	CCHOP_INTERRUPT_SYNC,
+};
+
+enum gru_cch_status {
+	CCHSTATUS_IDLE,
+	CCHSTATUS_EXCEPTION,
+	CCHSTATUS_ACTIVE,
+};
+
+enum gru_cch_state {
+	CCHSTATE_INACTIVE,
+	CCHSTATE_MAPPED,
+	CCHSTATE_ACTIVE,
+	CCHSTATE_INTERRUPTED,
+};
+
+/* CCH Exception cause */
+enum gru_cch_cause {
+	CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1,
+	CCHCAUSE_ILLEGAL_OPCODE = 2,
+	CCHCAUSE_INVALID_START_REQUEST = 3,
+	CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4,
+	CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5,
+	CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6,
+	CCHCAUSE_CCH_BUSY = 7,
+	CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8,
+	CCHCAUSE_BAD_TFM_CONFIG = 9,
+	CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10,
+	CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11,
+	CCHCAUSE_CBR_DEALLOCATION_ERROR = 12,
+};
+/*
+ * CBE - Control Block Extended
+ * 	Maintains internal GRU state for active CBs.
+ *
+ */
+struct gru_control_block_extended {
+	unsigned int reserved0:1;	/* DW 0  - low */
+	unsigned int imacpy:3;
+	unsigned int reserved1:4;
+	unsigned int xtypecpy:3;
+	unsigned int iaa0cpy:2;
+	unsigned int iaa1cpy:2;
+	unsigned int reserved2:1;
+	unsigned int opccpy:8;
+	unsigned int exopccpy:8;
+
+	unsigned int idef2cpy:22;	/* DW 0  - high */
+	unsigned int reserved3:10;
+
+	unsigned int idef4cpy:22;	/* DW 1 */
+	unsigned int reserved4:10;
+	unsigned int idef4upd:22;
+	unsigned int reserved5:10;
+
+	unsigned long idef1upd:64;	/* DW 2 */
+
+	unsigned long idef5cpy:64;	/* DW 3 */
+
+	unsigned long idef6cpy:64;	/* DW 4 */
+
+	unsigned long idef3upd:64;	/* DW 5 */
+
+	unsigned long idef5upd:64;	/* DW 6 */
+
+	unsigned int idef2upd:22;	/* DW 7 */
+	unsigned int reserved6:10;
+
+	unsigned int ecause:20;
+	unsigned int cbrstate:4;
+	unsigned int cbrexecstatus:8;
+};
+
+/* CBE fields for active BCOPY instructions */
+#define cbe_baddr0	idef1upd
+#define cbe_baddr1	idef3upd
+#define cbe_src_cl	idef6cpy
+#define cbe_nelemcur	idef5upd
+
+enum gru_cbr_state {
+	CBRSTATE_INACTIVE,
+	CBRSTATE_IDLE,
+	CBRSTATE_PE_CHECK,
+	CBRSTATE_QUEUED,
+	CBRSTATE_WAIT_RESPONSE,
+	CBRSTATE_INTERRUPTED,
+	CBRSTATE_INTERRUPTED_MISS_FMM,
+	CBRSTATE_BUSY_INTERRUPT_MISS_FMM,
+	CBRSTATE_INTERRUPTED_MISS_UPM,
+	CBRSTATE_BUSY_INTERRUPTED_MISS_UPM,
+	CBRSTATE_REQUEST_ISSUE,
+	CBRSTATE_BUSY_INTERRUPT,
+};
+
+/* CBE cbrexecstatus bits  - defined in gru_instructions.h*/
+/* CBE ecause bits  - defined in gru_instructions.h */
+
+/*
+ * Convert a processor pagesize into the strange encoded pagesize used by the
+ * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT)
+ * 	pagesize	log pagesize	grupagesize
+ * 	  4k			12	0
+ * 	 16k 			14	1
+ * 	 64k			16	2
+ * 	256k			18	3
+ * 	  1m			20	4
+ * 	  2m			21	5
+ * 	  4m			22	6
+ * 	 16m			24	7
+ * 	 64m			26	8
+ * 	...
+ */
+#define GRU_PAGESIZE(sh)	((((sh) > 20 ? (sh) + 2 : (sh)) >> 1) - 6)
+#define GRU_SIZEAVAIL(sh)	(1UL << GRU_PAGESIZE(sh))
+
+/* minimum TLB purge count to ensure a full purge */
+#define GRUMAXINVAL		1024UL
+
+int cch_allocate(struct gru_context_configuration_handle *cch);
+int cch_start(struct gru_context_configuration_handle *cch);
+int cch_interrupt(struct gru_context_configuration_handle *cch);
+int cch_deallocate(struct gru_context_configuration_handle *cch);
+int cch_interrupt_sync(struct gru_context_configuration_handle *cch);
+int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr,
+	unsigned long vaddrmask, int asid, int pagesize, int global, int n,
+	unsigned short ctxbitmap);
+int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
+void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
+void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh);
+void tfh_exception(struct gru_tlb_fault_handle *tfh);
+
+#endif /* __GRUHANDLES_H__ */
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
new file mode 100644
index 000000000..1540a7785
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -0,0 +1,236 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *            Dump GRU State
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <asm/uv/uv_hub.h>
+
+#include <linux/nospec.h>
+
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+#include "grulib.h"
+
+#define CCH_LOCK_ATTEMPTS	10
+
+static int gru_user_copy_handle(void __user **dp, void *s)
+{
+	if (copy_to_user(*dp, s, GRU_HANDLE_BYTES))
+		return -1;
+	*dp += GRU_HANDLE_BYTES;
+	return 0;
+}
+
+static int gru_dump_context_data(void *grubase,
+			struct gru_context_configuration_handle *cch,
+			void __user *ubuf, int ctxnum, int dsrcnt,
+			int flush_cbrs)
+{
+	void *cb, *cbe, *tfh, *gseg;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	tfh = grubase + GRU_TFH_BASE;
+
+	for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) {
+		if (flush_cbrs)
+			gru_flush_cache(cb);
+		if (gru_user_copy_handle(&ubuf, cb))
+			goto fail;
+		if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE))
+			goto fail;
+		if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE))
+			goto fail;
+		cb += GRU_HANDLE_STRIDE;
+	}
+	if (dsrcnt)
+		memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE);
+	return 0;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_tfm(struct gru_state *gru,
+		void __user *ubuf, void __user *ubufend)
+{
+	struct gru_tlb_fault_map *tfm;
+	int i;
+
+	if (GRU_NUM_TFM * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
+		return -EFBIG;
+
+	for (i = 0; i < GRU_NUM_TFM; i++) {
+		tfm = get_tfm(gru->gs_gru_base_vaddr, i);
+		if (gru_user_copy_handle(&ubuf, tfm))
+			goto fail;
+	}
+	return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_tgh(struct gru_state *gru,
+		void __user *ubuf, void __user *ubufend)
+{
+	struct gru_tlb_global_handle *tgh;
+	int i;
+
+	if (GRU_NUM_TGH * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
+		return -EFBIG;
+
+	for (i = 0; i < GRU_NUM_TGH; i++) {
+		tgh = get_tgh(gru->gs_gru_base_vaddr, i);
+		if (gru_user_copy_handle(&ubuf, tgh))
+			goto fail;
+	}
+	return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_context(struct gru_state *gru, int ctxnum,
+		void __user *ubuf, void __user *ubufend, char data_opt,
+		char lock_cch, char flush_cbrs)
+{
+	struct gru_dump_context_header hdr;
+	struct gru_dump_context_header __user *uhdr = ubuf;
+	struct gru_context_configuration_handle *cch, *ubufcch;
+	struct gru_thread_state *gts;
+	int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0;
+	void *grubase;
+
+	memset(&hdr, 0, sizeof(hdr));
+	grubase = gru->gs_gru_base_vaddr;
+	cch = get_cch(grubase, ctxnum);
+	for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) {
+		cch_locked =  trylock_cch_handle(cch);
+		if (cch_locked)
+			break;
+		msleep(1);
+	}
+
+	ubuf += sizeof(hdr);
+	ubufcch = ubuf;
+	if (gru_user_copy_handle(&ubuf, cch)) {
+		if (cch_locked)
+			unlock_cch_handle(cch);
+		return -EFAULT;
+	}
+	if (cch_locked)
+		ubufcch->delresp = 0;
+	bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES;
+
+	if (cch_locked || !lock_cch) {
+		gts = gru->gs_gts[ctxnum];
+		if (gts && gts->ts_vma) {
+			hdr.pid = gts->ts_tgid_owner;
+			hdr.vaddr = gts->ts_vma->vm_start;
+		}
+		if (cch->state != CCHSTATE_INACTIVE) {
+			cbrcnt = hweight64(cch->cbr_allocation_map) *
+						GRU_CBR_AU_SIZE;
+			dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) *
+						GRU_DSR_AU_CL : 0;
+		}
+		bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES;
+		if (bytes > ubufend - ubuf)
+			ret = -EFBIG;
+		else
+			ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum,
+							dsrcnt, flush_cbrs);
+	}
+	if (cch_locked)
+		unlock_cch_handle(cch);
+	if (ret)
+		return ret;
+
+	hdr.magic = GRU_DUMP_MAGIC;
+	hdr.gid = gru->gs_gid;
+	hdr.ctxnum = ctxnum;
+	hdr.cbrcnt = cbrcnt;
+	hdr.dsrcnt = dsrcnt;
+	hdr.cch_locked = cch_locked;
+	if (copy_to_user(uhdr, &hdr, sizeof(hdr)))
+		return -EFAULT;
+
+	return bytes;
+}
+
+int gru_dump_chiplet_request(unsigned long arg)
+{
+	struct gru_state *gru;
+	struct gru_dump_chiplet_state_req req;
+	void __user *ubuf;
+	void __user *ubufend;
+	int ctxnum, ret, cnt = 0;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	/* Currently, only dump by gid is implemented */
+	if (req.gid >= gru_max_gids)
+		return -EINVAL;
+	req.gid = array_index_nospec(req.gid, gru_max_gids);
+
+	gru = GID_TO_GRU(req.gid);
+	ubuf = req.buf;
+	ubufend = req.buf + req.buflen;
+
+	ret = gru_dump_tfm(gru, ubuf, ubufend);
+	if (ret < 0)
+		goto fail;
+	ubuf += ret;
+
+	ret = gru_dump_tgh(gru, ubuf, ubufend);
+	if (ret < 0)
+		goto fail;
+	ubuf += ret;
+
+	for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+		if (req.ctxnum == ctxnum || req.ctxnum < 0) {
+			ret = gru_dump_context(gru, ctxnum, ubuf, ubufend,
+						req.data_opt, req.lock_cch,
+						req.flush_cbrs);
+			if (ret < 0)
+				goto fail;
+			ubuf += ret;
+			cnt++;
+		}
+	}
+
+	if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+		return -EFAULT;
+	return cnt;
+
+fail:
+	return ret;
+}
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
new file mode 100644
index 000000000..030769018
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -0,0 +1,1171 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              KERNEL SERVICES THAT USE THE GRU
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <asm/io_apic.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+#include "grukservices.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/*
+ * Kernel GRU Usage
+ *
+ * The following is an interim algorithm for management of kernel GRU
+ * resources. This will likely be replaced when we better understand the
+ * kernel/user requirements.
+ *
+ * Blade percpu resources reserved for kernel use. These resources are
+ * reserved whenever the the kernel context for the blade is loaded. Note
+ * that the kernel context is not guaranteed to be always available. It is
+ * loaded on demand & can be stolen by a user if the user demand exceeds the
+ * kernel demand. The kernel can always reload the kernel context but
+ * a SLEEP may be required!!!.
+ *
+ * Async Overview:
+ *
+ * 	Each blade has one "kernel context" that owns GRU kernel resources
+ * 	located on the blade. Kernel drivers use GRU resources in this context
+ * 	for sending messages, zeroing memory, etc.
+ *
+ * 	The kernel context is dynamically loaded on demand. If it is not in
+ * 	use by the kernel, the kernel context can be unloaded & given to a user.
+ * 	The kernel context will be reloaded when needed. This may require that
+ * 	a context be stolen from a user.
+ * 		NOTE: frequent unloading/reloading of the kernel context is
+ * 		expensive. We are depending on batch schedulers, cpusets, sane
+ * 		drivers or some other mechanism to prevent the need for frequent
+ *	 	stealing/reloading.
+ *
+ * 	The kernel context consists of two parts:
+ * 		- 1 CB & a few DSRs that are reserved for each cpu on the blade.
+ * 		  Each cpu has it's own private resources & does not share them
+ * 		  with other cpus. These resources are used serially, ie,
+ * 		  locked, used & unlocked  on each call to a function in
+ * 		  grukservices.
+ * 		  	(Now that we have dynamic loading of kernel contexts, I
+ * 		  	 may rethink this & allow sharing between cpus....)
+ *
+ *		- Additional resources can be reserved long term & used directly
+ *		  by UV drivers located in the kernel. Drivers using these GRU
+ *		  resources can use asynchronous GRU instructions that send
+ *		  interrupts on completion.
+ *		  	- these resources must be explicitly locked/unlocked
+ *		  	- locked resources prevent (obviously) the kernel
+ *		  	  context from being unloaded.
+ *			- drivers using these resource directly issue their own
+ *			  GRU instruction and must wait/check completion.
+ *
+ * 		  When these resources are reserved, the caller can optionally
+ * 		  associate a wait_queue with the resources and use asynchronous
+ * 		  GRU instructions. When an async GRU instruction completes, the
+ * 		  driver will do a wakeup on the event.
+ *
+ */
+
+
+#define ASYNC_HAN_TO_BID(h)	((h) - 1)
+#define ASYNC_BID_TO_HAN(b)	((b) + 1)
+#define ASYNC_HAN_TO_BS(h)	gru_base[ASYNC_HAN_TO_BID(h)]
+
+#define GRU_NUM_KERNEL_CBR	1
+#define GRU_NUM_KERNEL_DSR_BYTES 256
+#define GRU_NUM_KERNEL_DSR_CL	(GRU_NUM_KERNEL_DSR_BYTES /		\
+					GRU_CACHE_LINE_BYTES)
+
+/* GRU instruction attributes for all instructions */
+#define IMA			IMA_CB_DELAY
+
+/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
+#define __gru_cacheline_aligned__                               \
+	__attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
+
+#define MAGIC	0x1234567887654321UL
+
+/* Default retry count for GRU errors on kernel instructions */
+#define EXCEPTION_RETRY_LIMIT	3
+
+/* Status of message queue sections */
+#define MQS_EMPTY		0
+#define MQS_FULL		1
+#define MQS_NOOP		2
+
+/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
+/* optimized for x86_64 */
+struct message_queue {
+	union gru_mesqhead	head __gru_cacheline_aligned__;	/* CL 0 */
+	int			qlines;				/* DW 1 */
+	long 			hstatus[2];
+	void 			*next __gru_cacheline_aligned__;/* CL 1 */
+	void 			*limit;
+	void 			*start;
+	void 			*start2;
+	char			data ____cacheline_aligned;	/* CL 2 */
+};
+
+/* First word in every message - used by mesq interface */
+struct message_header {
+	char	present;
+	char	present2;
+	char 	lines;
+	char	fill;
+};
+
+#define HSTATUS(mq, h)	((mq) + offsetof(struct message_queue, hstatus[h]))
+
+/*
+ * Reload the blade's kernel context into a GRU chiplet. Called holding
+ * the bs_kgts_sema for READ. Will steal user contexts if necessary.
+ */
+static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
+{
+	struct gru_state *gru;
+	struct gru_thread_state *kgts;
+	void *vaddr;
+	int ctxnum, ncpus;
+
+	up_read(&bs->bs_kgts_sema);
+	down_write(&bs->bs_kgts_sema);
+
+	if (!bs->bs_kgts) {
+		do {
+			bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
+			if (!IS_ERR(bs->bs_kgts))
+				break;
+			msleep(1);
+		} while (true);
+		bs->bs_kgts->ts_user_blade_id = blade_id;
+	}
+	kgts = bs->bs_kgts;
+
+	if (!kgts->ts_gru) {
+		STAT(load_kernel_context);
+		ncpus = uv_blade_nr_possible_cpus(blade_id);
+		kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
+			GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
+		kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
+			GRU_NUM_KERNEL_DSR_BYTES * ncpus +
+				bs->bs_async_dsr_bytes);
+		while (!gru_assign_gru_context(kgts)) {
+			msleep(1);
+			gru_steal_context(kgts);
+		}
+		gru_load_context(kgts);
+		gru = bs->bs_kgts->ts_gru;
+		vaddr = gru->gs_gru_base_vaddr;
+		ctxnum = kgts->ts_ctxnum;
+		bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
+		bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
+	}
+	downgrade_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Free all kernel contexts that are not currently in use.
+ *   Returns 0 if all freed, else number of inuse context.
+ */
+static int gru_free_kernel_contexts(void)
+{
+	struct gru_blade_state *bs;
+	struct gru_thread_state *kgts;
+	int bid, ret = 0;
+
+	for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
+		bs = gru_base[bid];
+		if (!bs)
+			continue;
+
+		/* Ignore busy contexts. Don't want to block here.  */
+		if (down_write_trylock(&bs->bs_kgts_sema)) {
+			kgts = bs->bs_kgts;
+			if (kgts && kgts->ts_gru)
+				gru_unload_context(kgts, 0);
+			bs->bs_kgts = NULL;
+			up_write(&bs->bs_kgts_sema);
+			kfree(kgts);
+		} else {
+			ret++;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Lock & load the kernel context for the specified blade.
+ */
+static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
+{
+	struct gru_blade_state *bs;
+	int bid;
+
+	STAT(lock_kernel_context);
+again:
+	bid = blade_id < 0 ? uv_numa_blade_id() : blade_id;
+	bs = gru_base[bid];
+
+	/* Handle the case where migration occurred while waiting for the sema */
+	down_read(&bs->bs_kgts_sema);
+	if (blade_id < 0 && bid != uv_numa_blade_id()) {
+		up_read(&bs->bs_kgts_sema);
+		goto again;
+	}
+	if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
+		gru_load_kernel_context(bs, bid);
+	return bs;
+
+}
+
+/*
+ * Unlock the kernel context for the specified blade. Context is not
+ * unloaded but may be stolen before next use.
+ */
+static void gru_unlock_kernel_context(int blade_id)
+{
+	struct gru_blade_state *bs;
+
+	bs = gru_base[blade_id];
+	up_read(&bs->bs_kgts_sema);
+	STAT(unlock_kernel_context);
+}
+
+/*
+ * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
+ * 	- returns with preemption disabled
+ */
+static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
+{
+	struct gru_blade_state *bs;
+	int lcpu;
+
+	BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
+	preempt_disable();
+	bs = gru_lock_kernel_context(-1);
+	lcpu = uv_blade_processor_id();
+	*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
+	*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
+	return 0;
+}
+
+/*
+ * Free the current cpus reserved DSR/CBR resources.
+ */
+static void gru_free_cpu_resources(void *cb, void *dsr)
+{
+	gru_unlock_kernel_context(uv_numa_blade_id());
+	preempt_enable();
+}
+
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *   Note: currently supports only 1 reservation per blade.
+ *
+ * 	input:
+ * 		blade_id  - blade on which resources should be reserved
+ * 		cbrs	  - number of CBRs
+ * 		dsr_bytes - number of DSR bytes needed
+ *	output:
+ *		handle to identify resource
+ *		(0 = async resources already reserved)
+ */
+unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+			struct completion *cmp)
+{
+	struct gru_blade_state *bs;
+	struct gru_thread_state *kgts;
+	int ret = 0;
+
+	bs = gru_base[blade_id];
+
+	down_write(&bs->bs_kgts_sema);
+
+	/* Verify no resources already reserved */
+	if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
+		goto done;
+	bs->bs_async_dsr_bytes = dsr_bytes;
+	bs->bs_async_cbrs = cbrs;
+	bs->bs_async_wq = cmp;
+	kgts = bs->bs_kgts;
+
+	/* Resources changed. Unload context if already loaded */
+	if (kgts && kgts->ts_gru)
+		gru_unload_context(kgts, 0);
+	ret = ASYNC_BID_TO_HAN(blade_id);
+
+done:
+	up_write(&bs->bs_kgts_sema);
+	return ret;
+}
+
+/*
+ * Release async resources previously reserved.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_release_async_resources(unsigned long han)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+	down_write(&bs->bs_kgts_sema);
+	bs->bs_async_dsr_bytes = 0;
+	bs->bs_async_cbrs = 0;
+	bs->bs_async_wq = NULL;
+	up_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_wait_async_cbr(unsigned long han)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+	wait_for_completion(bs->bs_async_wq);
+	mb();
+}
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ *	output:
+ *		cb  - pointer to first CBR
+ *		dsr - pointer to first DSR
+ */
+void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+	int blade_id = ASYNC_HAN_TO_BID(han);
+	int ncpus;
+
+	gru_lock_kernel_context(blade_id);
+	ncpus = uv_blade_nr_possible_cpus(blade_id);
+	if (cb)
+		*cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
+	if (dsr)
+		*dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
+}
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_unlock_async_resource(unsigned long han)
+{
+	int blade_id = ASYNC_HAN_TO_BID(han);
+
+	gru_unlock_kernel_context(blade_id);
+}
+
+/*----------------------------------------------------------------------*/
+int gru_get_cb_exception_detail(void *cb,
+		struct control_block_extended_exc_detail *excdet)
+{
+	struct gru_control_block_extended *cbe;
+	struct gru_thread_state *kgts = NULL;
+	unsigned long off;
+	int cbrnum, bid;
+
+	/*
+	 * Locate kgts for cb. This algorithm is SLOW but
+	 * this function is rarely called (ie., almost never).
+	 * Performance does not matter.
+	 */
+	for_each_possible_blade(bid) {
+		if (!gru_base[bid])
+			break;
+		kgts = gru_base[bid]->bs_kgts;
+		if (!kgts || !kgts->ts_gru)
+			continue;
+		off = cb - kgts->ts_gru->gs_gru_base_vaddr;
+		if (off < GRU_SIZE)
+			break;
+		kgts = NULL;
+	}
+	BUG_ON(!kgts);
+	cbrnum = thread_cbr_number(kgts, get_cb_number(cb));
+	cbe = get_cbe(GRUBASE(cb), cbrnum);
+	gru_flush_cache(cbe);	/* CBE not coherent */
+	sync_core();
+	excdet->opc = cbe->opccpy;
+	excdet->exopc = cbe->exopccpy;
+	excdet->ecause = cbe->ecause;
+	excdet->exceptdet0 = cbe->idef1upd;
+	excdet->exceptdet1 = cbe->idef3upd;
+	gru_flush_cache(cbe);
+	return 0;
+}
+
+static char *gru_get_cb_exception_detail_str(int ret, void *cb,
+					     char *buf, int size)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	struct control_block_extended_exc_detail excdet;
+
+	if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
+		gru_get_cb_exception_detail(cb, &excdet);
+		snprintf(buf, size,
+			"GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
+			"excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),
+			gen, excdet.opc, excdet.exopc, excdet.ecause,
+			excdet.exceptdet0, excdet.exceptdet1);
+	} else {
+		snprintf(buf, size, "No exception");
+	}
+	return buf;
+}
+
+static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
+{
+	while (gen->istatus >= CBS_ACTIVE) {
+		cpu_relax();
+		barrier();
+	}
+	return gen->istatus;
+}
+
+static int gru_retry_exception(void *cb)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	struct control_block_extended_exc_detail excdet;
+	int retry = EXCEPTION_RETRY_LIMIT;
+
+	while (1)  {
+		if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
+			return CBS_IDLE;
+		if (gru_get_cb_message_queue_substatus(cb))
+			return CBS_EXCEPTION;
+		gru_get_cb_exception_detail(cb, &excdet);
+		if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
+				(excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
+			break;
+		if (retry-- == 0)
+			break;
+		gen->icmd = 1;
+		gru_flush_cache(gen);
+	}
+	return CBS_EXCEPTION;
+}
+
+int gru_check_status_proc(void *cb)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	int ret;
+
+	ret = gen->istatus;
+	if (ret == CBS_EXCEPTION)
+		ret = gru_retry_exception(cb);
+	rmb();
+	return ret;
+
+}
+
+int gru_wait_proc(void *cb)
+{
+	struct gru_control_block_status *gen = (void *)cb;
+	int ret;
+
+	ret = gru_wait_idle_or_exception(gen);
+	if (ret == CBS_EXCEPTION)
+		ret = gru_retry_exception(cb);
+	rmb();
+	return ret;
+}
+
+static void gru_abort(int ret, void *cb, char *str)
+{
+	char buf[GRU_EXC_STR_SIZE];
+
+	panic("GRU FATAL ERROR: %s - %s\n", str,
+	      gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
+}
+
+void gru_wait_abort_proc(void *cb)
+{
+	int ret;
+
+	ret = gru_wait_proc(cb);
+	if (ret)
+		gru_abort(ret, cb, "gru_wait_abort");
+}
+
+
+/*------------------------------ MESSAGE QUEUES -----------------------------*/
+
+/* Internal status . These are NOT returned to the user. */
+#define MQIE_AGAIN		-1	/* try again */
+
+
+/*
+ * Save/restore the "present" flag that is in the second line of 2-line
+ * messages
+ */
+static inline int get_present2(void *p)
+{
+	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+	return mhdr->present;
+}
+
+static inline void restore_present2(void *p, int val)
+{
+	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+	mhdr->present = val;
+}
+
+/*
+ * Create a message queue.
+ * 	qlines - message queue size in cache lines. Includes 2-line header.
+ */
+int gru_create_message_queue(struct gru_message_queue_desc *mqd,
+		void *p, unsigned int bytes, int nasid, int vector, int apicid)
+{
+	struct message_queue *mq = p;
+	unsigned int qlines;
+
+	qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
+	memset(mq, 0, bytes);
+	mq->start = &mq->data;
+	mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
+	mq->next = &mq->data;
+	mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
+	mq->qlines = qlines;
+	mq->hstatus[0] = 0;
+	mq->hstatus[1] = 1;
+	mq->head = gru_mesq_head(2, qlines / 2 + 1);
+	mqd->mq = mq;
+	mqd->mq_gpa = uv_gpa(mq);
+	mqd->qlines = qlines;
+	mqd->interrupt_pnode = nasid >> 1;
+	mqd->interrupt_vector = vector;
+	mqd->interrupt_apicid = apicid;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gru_create_message_queue);
+
+/*
+ * Send a NOOP message to a message queue
+ * 	Returns:
+ * 		 0 - if queue is full after the send. This is the normal case
+ * 		     but various races can change this.
+ *		-1 - if mesq sent successfully but queue not full
+ *		>0 - unexpected error. MQE_xxx returned
+ */
+static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg)
+{
+	const struct message_header noop_header = {
+					.present = MQS_NOOP, .lines = 1};
+	unsigned long m;
+	int substatus, ret;
+	struct message_header save_mhdr, *mhdr = mesg;
+
+	STAT(mesq_noop);
+	save_mhdr = *mhdr;
+	*mhdr = noop_header;
+	gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA);
+	ret = gru_wait(cb);
+
+	if (ret) {
+		substatus = gru_get_cb_message_queue_substatus(cb);
+		switch (substatus) {
+		case CBSS_NO_ERROR:
+			STAT(mesq_noop_unexpected_error);
+			ret = MQE_UNEXPECTED_CB_ERR;
+			break;
+		case CBSS_LB_OVERFLOWED:
+			STAT(mesq_noop_lb_overflow);
+			ret = MQE_CONGESTION;
+			break;
+		case CBSS_QLIMIT_REACHED:
+			STAT(mesq_noop_qlimit_reached);
+			ret = 0;
+			break;
+		case CBSS_AMO_NACKED:
+			STAT(mesq_noop_amo_nacked);
+			ret = MQE_CONGESTION;
+			break;
+		case CBSS_PUT_NACKED:
+			STAT(mesq_noop_put_nacked);
+			m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+			gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
+						IMA);
+			if (gru_wait(cb) == CBS_IDLE)
+				ret = MQIE_AGAIN;
+			else
+				ret = MQE_UNEXPECTED_CB_ERR;
+			break;
+		case CBSS_PAGE_OVERFLOW:
+			STAT(mesq_noop_page_overflow);
+			/* fallthru */
+		default:
+			BUG();
+		}
+	}
+	*mhdr = save_mhdr;
+	return ret;
+}
+
+/*
+ * Handle a gru_mesq full.
+ */
+static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg, int lines)
+{
+	union gru_mesqhead mqh;
+	unsigned int limit, head;
+	unsigned long avalue;
+	int half, qlines;
+
+	/* Determine if switching to first/second half of q */
+	avalue = gru_get_amo_value(cb);
+	head = gru_get_amo_value_head(cb);
+	limit = gru_get_amo_value_limit(cb);
+
+	qlines = mqd->qlines;
+	half = (limit != qlines);
+
+	if (half)
+		mqh = gru_mesq_head(qlines / 2 + 1, qlines);
+	else
+		mqh = gru_mesq_head(2, qlines / 2 + 1);
+
+	/* Try to get lock for switching head pointer */
+	gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		goto cberr;
+	if (!gru_get_amo_value(cb)) {
+		STAT(mesq_qf_locked);
+		return MQE_QUEUE_FULL;
+	}
+
+	/* Got the lock. Send optional NOP if queue not full, */
+	if (head != limit) {
+		if (send_noop_message(cb, mqd, mesg)) {
+			gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half),
+					XTYPE_DW, IMA);
+			if (gru_wait(cb) != CBS_IDLE)
+				goto cberr;
+			STAT(mesq_qf_noop_not_full);
+			return MQIE_AGAIN;
+		}
+		avalue++;
+	}
+
+	/* Then flip queuehead to other half of queue. */
+	gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue,
+							IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		goto cberr;
+
+	/* If not successfully in swapping queue head, clear the hstatus lock */
+	if (gru_get_amo_value(cb) != avalue) {
+		STAT(mesq_qf_switch_head_failed);
+		gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW,
+							IMA);
+		if (gru_wait(cb) != CBS_IDLE)
+			goto cberr;
+	}
+	return MQIE_AGAIN;
+cberr:
+	STAT(mesq_qf_unexpected_error);
+	return MQE_UNEXPECTED_CB_ERR;
+}
+
+/*
+ * Handle a PUT failure. Note: if message was a 2-line message, one of the
+ * lines might have successfully have been written. Before sending the
+ * message, "present" must be cleared in BOTH lines to prevent the receiver
+ * from prematurely seeing the full message.
+ */
+static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
+			void *mesg, int lines)
+{
+	unsigned long m;
+	int ret, loops = 200;	/* experimentally determined */
+
+	m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+	if (lines == 2) {
+		gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
+		if (gru_wait(cb) != CBS_IDLE)
+			return MQE_UNEXPECTED_CB_ERR;
+	}
+	gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		return MQE_UNEXPECTED_CB_ERR;
+
+	if (!mqd->interrupt_vector)
+		return MQE_OK;
+
+	/*
+	 * Send a noop message in order to deliver a cross-partition interrupt
+	 * to the SSI that contains the target message queue. Normally, the
+	 * interrupt is automatically delivered by hardware following mesq
+	 * operations, but some error conditions require explicit delivery.
+	 * The noop message will trigger delivery. Otherwise partition failures
+	 * could cause unrecovered errors.
+	 */
+	do {
+		ret = send_noop_message(cb, mqd, mesg);
+	} while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0));
+
+	if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) {
+		/*
+		 * Don't indicate to the app to resend the message, as it's
+		 * already been successfully sent.  We simply send an OK
+		 * (rather than fail the send with MQE_UNEXPECTED_CB_ERR),
+		 * assuming that the other side is receiving enough
+		 * interrupts to get this message processed anyway.
+		 */
+		ret = MQE_OK;
+	}
+	return ret;
+}
+
+/*
+ * Handle a gru_mesq failure. Some of these failures are software recoverable
+ * or retryable.
+ */
+static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg, int lines)
+{
+	int substatus, ret = 0;
+
+	substatus = gru_get_cb_message_queue_substatus(cb);
+	switch (substatus) {
+	case CBSS_NO_ERROR:
+		STAT(mesq_send_unexpected_error);
+		ret = MQE_UNEXPECTED_CB_ERR;
+		break;
+	case CBSS_LB_OVERFLOWED:
+		STAT(mesq_send_lb_overflow);
+		ret = MQE_CONGESTION;
+		break;
+	case CBSS_QLIMIT_REACHED:
+		STAT(mesq_send_qlimit_reached);
+		ret = send_message_queue_full(cb, mqd, mesg, lines);
+		break;
+	case CBSS_AMO_NACKED:
+		STAT(mesq_send_amo_nacked);
+		ret = MQE_CONGESTION;
+		break;
+	case CBSS_PUT_NACKED:
+		STAT(mesq_send_put_nacked);
+		ret = send_message_put_nacked(cb, mqd, mesg, lines);
+		break;
+	case CBSS_PAGE_OVERFLOW:
+		STAT(mesq_page_overflow);
+		/* fallthru */
+	default:
+		BUG();
+	}
+	return ret;
+}
+
+/*
+ * Send a message to a message queue
+ * 	mqd	message queue descriptor
+ * 	mesg	message. ust be vaddr within a GSEG
+ * 	bytes	message size (<= 2 CL)
+ */
+int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg,
+				unsigned int bytes)
+{
+	struct message_header *mhdr;
+	void *cb;
+	void *dsr;
+	int istatus, clines, ret;
+
+	STAT(mesq_send);
+	BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
+
+	clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES);
+	if (gru_get_cpu_resources(bytes, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	memcpy(dsr, mesg, bytes);
+	mhdr = dsr;
+	mhdr->present = MQS_FULL;
+	mhdr->lines = clines;
+	if (clines == 2) {
+		mhdr->present2 = get_present2(mhdr);
+		restore_present2(mhdr, MQS_FULL);
+	}
+
+	do {
+		ret = MQE_OK;
+		gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA);
+		istatus = gru_wait(cb);
+		if (istatus != CBS_IDLE)
+			ret = send_message_failure(cb, mqd, dsr, clines);
+	} while (ret == MQIE_AGAIN);
+	gru_free_cpu_resources(cb, dsr);
+
+	if (ret)
+		STAT(mesq_send_failed);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_send_message_gpa);
+
+/*
+ * Advance the receive pointer for the queue to the next message.
+ */
+void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg)
+{
+	struct message_queue *mq = mqd->mq;
+	struct message_header *mhdr = mq->next;
+	void *next, *pnext;
+	int half = -1;
+	int lines = mhdr->lines;
+
+	if (lines == 2)
+		restore_present2(mhdr, MQS_EMPTY);
+	mhdr->present = MQS_EMPTY;
+
+	pnext = mq->next;
+	next = pnext + GRU_CACHE_LINE_BYTES * lines;
+	if (next == mq->limit) {
+		next = mq->start;
+		half = 1;
+	} else if (pnext < mq->start2 && next >= mq->start2) {
+		half = 0;
+	}
+
+	if (half >= 0)
+		mq->hstatus[half] = 1;
+	mq->next = next;
+}
+EXPORT_SYMBOL_GPL(gru_free_message);
+
+/*
+ * Get next message from message queue. Return NULL if no message
+ * present. User must call next_message() to move to next message.
+ * 	rmq	message queue
+ */
+void *gru_get_next_message(struct gru_message_queue_desc *mqd)
+{
+	struct message_queue *mq = mqd->mq;
+	struct message_header *mhdr = mq->next;
+	int present = mhdr->present;
+
+	/* skip NOOP messages */
+	while (present == MQS_NOOP) {
+		gru_free_message(mqd, mhdr);
+		mhdr = mq->next;
+		present = mhdr->present;
+	}
+
+	/* Wait for both halves of 2 line messages */
+	if (present == MQS_FULL && mhdr->lines == 2 &&
+				get_present2(mhdr) == MQS_EMPTY)
+		present = MQS_EMPTY;
+
+	if (!present) {
+		STAT(mesq_receive_none);
+		return NULL;
+	}
+
+	if (mhdr->lines == 2)
+		restore_present2(mhdr, mhdr->present2);
+
+	STAT(mesq_receive);
+	return mhdr;
+}
+EXPORT_SYMBOL_GPL(gru_get_next_message);
+
+/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
+
+/*
+ * Load a DW from a global GPA. The GPA can be a memory or MMR address.
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa)
+{
+	void *cb;
+	void *dsr;
+	int ret, iaa;
+
+	STAT(read_gpa);
+	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	iaa = gpa >> 62;
+	gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA);
+	ret = gru_wait(cb);
+	if (ret == CBS_IDLE)
+		*value = *(unsigned long *)dsr;
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_read_gpa);
+
+
+/*
+ * Copy a block of data using the GRU resources
+ */
+int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+				unsigned int bytes)
+{
+	void *cb;
+	void *dsr;
+	int ret;
+
+	STAT(copy_gpa);
+	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
+		  XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA);
+	ret = gru_wait(cb);
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_copy_gpa);
+
+/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
+/* 	Temp - will delete after we gain confidence in the GRU		*/
+
+static int quicktest0(unsigned long arg)
+{
+	unsigned long word0;
+	unsigned long word1;
+	void *cb;
+	void *dsr;
+	unsigned long *p;
+	int ret = -EIO;
+
+	if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	p = dsr;
+	word0 = MAGIC;
+	word1 = 0;
+
+	gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE) {
+		printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());
+		goto done;
+	}
+
+	if (*p != MAGIC) {
+		printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);
+		goto done;
+	}
+	gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE) {
+		printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());
+		goto done;
+	}
+
+	if (word0 != word1 || word1 != MAGIC) {
+		printk(KERN_DEBUG
+		       "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",
+		     smp_processor_id(), word1, MAGIC);
+		goto done;
+	}
+	ret = 0;
+
+done:
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
+}
+
+#define ALIGNUP(p, q)	((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
+
+static int quicktest1(unsigned long arg)
+{
+	struct gru_message_queue_desc mqd;
+	void *p, *mq;
+	int i, ret = -EIO;
+	char mes[GRU_CACHE_LINE_BYTES], *m;
+
+	/* Need  1K cacheline aligned that does not cross page boundary */
+	p = kmalloc(4096, 0);
+	if (p == NULL)
+		return -ENOMEM;
+	mq = ALIGNUP(p, 1024);
+	memset(mes, 0xee, sizeof(mes));
+
+	gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
+	for (i = 0; i < 6; i++) {
+		mes[8] = i;
+		do {
+			ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
+		} while (ret == MQE_CONGESTION);
+		if (ret)
+			break;
+	}
+	if (ret != MQE_QUEUE_FULL || i != 4) {
+		printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n",
+		       smp_processor_id(), ret, i);
+		goto done;
+	}
+
+	for (i = 0; i < 6; i++) {
+		m = gru_get_next_message(&mqd);
+		if (!m || m[8] != i)
+			break;
+		gru_free_message(&mqd, m);
+	}
+	if (i != 4) {
+		printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",
+			smp_processor_id(), i, m, m ? m[8] : -1);
+		goto done;
+	}
+	ret = 0;
+
+done:
+	kfree(p);
+	return ret;
+}
+
+static int quicktest2(unsigned long arg)
+{
+	static DECLARE_COMPLETION(cmp);
+	unsigned long han;
+	int blade_id = 0;
+	int numcb = 4;
+	int ret = 0;
+	unsigned long *buf;
+	void *cb0, *cb;
+	struct gru_control_block_status *gen;
+	int i, k, istatus, bytes;
+
+	bytes = numcb * 4 * 8;
+	buf = kmalloc(bytes, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = -EBUSY;
+	han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
+	if (!han)
+		goto done;
+
+	gru_lock_async_resource(han, &cb0, NULL);
+	memset(buf, 0xee, bytes);
+	for (i = 0; i < numcb; i++)
+		gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
+				XTYPE_DW, 4, 1, IMA_INTERRUPT);
+
+	ret = 0;
+	k = numcb;
+	do {
+		gru_wait_async_cbr(han);
+		for (i = 0; i < numcb; i++) {
+			cb = cb0 + i * GRU_HANDLE_STRIDE;
+			istatus = gru_check_status(cb);
+			if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)
+				break;
+		}
+		if (i == numcb)
+			continue;
+		if (istatus != CBS_IDLE) {
+			printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);
+			ret = -EFAULT;
+		} else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] ||
+				buf[4 * i + 3]) {
+			printk(KERN_DEBUG "GRU:%d quicktest2:cb %d,  buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
+			       smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]);
+			ret = -EIO;
+		}
+		k--;
+		gen = cb;
+		gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */
+	} while (k);
+	BUG_ON(cmp.done);
+
+	gru_unlock_async_resource(han);
+	gru_release_async_resources(han);
+done:
+	kfree(buf);
+	return ret;
+}
+
+#define BUFSIZE 200
+static int quicktest3(unsigned long arg)
+{
+	char buf1[BUFSIZE], buf2[BUFSIZE];
+	int ret = 0;
+
+	memset(buf2, 0, sizeof(buf2));
+	memset(buf1, get_cycles() & 255, sizeof(buf1));
+	gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);
+	if (memcmp(buf1, buf2, BUFSIZE)) {
+		printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
+		ret = -EIO;
+	}
+	return ret;
+}
+
+/*
+ * Debugging only. User hook for various kernel tests
+ * of driver & gru.
+ */
+int gru_ktest(unsigned long arg)
+{
+	int ret = -EINVAL;
+
+	switch (arg & 0xff) {
+	case 0:
+		ret = quicktest0(arg);
+		break;
+	case 1:
+		ret = quicktest1(arg);
+		break;
+	case 2:
+		ret = quicktest2(arg);
+		break;
+	case 3:
+		ret = quicktest3(arg);
+		break;
+	case 99:
+		ret = gru_free_kernel_contexts();
+		break;
+	}
+	return ret;
+
+}
+
+int gru_kservices_init(void)
+{
+	return 0;
+}
+
+void gru_kservices_exit(void)
+{
+	if (gru_free_kernel_contexts())
+		BUG();
+}
+
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
new file mode 100644
index 000000000..02aa94d84
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -0,0 +1,214 @@
+
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#ifndef __GRU_KSERVICES_H_
+#define __GRU_KSERVICES_H_
+
+
+/*
+ * Message queues using the GRU to send/receive messages.
+ *
+ * These function allow the user to create a message queue for
+ * sending/receiving 1 or 2 cacheline messages using the GRU.
+ *
+ * Processes SENDING messages will use a kernel CBR/DSR to send
+ * the message. This is transparent to the caller.
+ *
+ * The receiver does not use any GRU resources.
+ *
+ * The functions support:
+ * 	- single receiver
+ * 	- multiple senders
+ *	- cross partition message
+ *
+ * Missing features ZZZ:
+ * 	- user options for dealing with timeouts, queue full, etc.
+ * 	- gru_create_message_queue() needs interrupt vector info
+ */
+
+struct gru_message_queue_desc {
+	void		*mq;			/* message queue vaddress */
+	unsigned long	mq_gpa;			/* global address of mq */
+	int		qlines;			/* queue size in CL */
+	int		interrupt_vector;	/* interrupt vector */
+	int		interrupt_pnode;	/* pnode for interrupt */
+	int		interrupt_apicid;	/* lapicid for interrupt */
+};
+
+/*
+ * Initialize a user allocated chunk of memory to be used as
+ * a message queue. The caller must ensure that the queue is
+ * in contiguous physical memory and is cacheline aligned.
+ *
+ * Message queue size is the total number of bytes allocated
+ * to the queue including a 2 cacheline header that is used
+ * to manage the queue.
+ *
+ *  Input:
+ * 	mqd	pointer to message queue descriptor
+ * 	p	pointer to user allocated mesq memory.
+ * 	bytes	size of message queue in bytes
+ *      vector	interrupt vector (zero if no interrupts)
+ *      nasid	nasid of blade where interrupt is delivered
+ *      apicid	apicid of cpu for interrupt
+ *
+ *  Errors:
+ *  	0	OK
+ *  	>0	error
+ */
+extern int gru_create_message_queue(struct gru_message_queue_desc *mqd,
+		void *p, unsigned int bytes, int nasid, int vector, int apicid);
+
+/*
+ * Send a message to a message queue.
+ *
+ * Note: The message queue transport mechanism uses the first 32
+ * bits of the message. Users should avoid using these bits.
+ *
+ *
+ *   Input:
+ * 	mqd	pointer to message queue descriptor
+ * 	mesg	pointer to message. Must be 64-bit aligned
+ * 	bytes	size of message in bytes
+ *
+ *   Output:
+ *      0	message sent
+ *     >0	Send failure - see error codes below
+ *
+ */
+extern int gru_send_message_gpa(struct gru_message_queue_desc *mqd,
+			void *mesg, unsigned int bytes);
+
+/* Status values for gru_send_message() */
+#define MQE_OK			0	/* message sent successfully */
+#define MQE_CONGESTION		1	/* temporary congestion, try again */
+#define MQE_QUEUE_FULL		2	/* queue is full */
+#define MQE_UNEXPECTED_CB_ERR	3	/* unexpected CB error */
+#define MQE_PAGE_OVERFLOW	10	/* BUG - queue overflowed a page */
+#define MQE_BUG_NO_RESOURCES	11	/* BUG - could not alloc GRU cb/dsr */
+
+/*
+ * Advance the receive pointer for the message queue to the next message.
+ * Note: current API requires messages to be gotten & freed in order. Future
+ * API extensions may allow for out-of-order freeing.
+ *
+ *   Input
+ * 	mqd	pointer to message queue descriptor
+ * 	mesq	message being freed
+ */
+extern void gru_free_message(struct gru_message_queue_desc *mqd,
+			     void *mesq);
+
+/*
+ * Get next message from message queue. Returns pointer to
+ * message OR NULL if no message present.
+ * User must call gru_free_message() after message is processed
+ * in order to move the queue pointers to next message.
+ *
+ *   Input
+ * 	mqd	pointer to message queue descriptor
+ *
+ *   Output:
+ *	p	pointer to message
+ *	NULL	no message available
+ */
+extern void *gru_get_next_message(struct gru_message_queue_desc *mqd);
+
+
+/*
+ * Read a GRU global GPA. Source can be located in a remote partition.
+ *
+ *    Input:
+ *    	value		memory address where MMR value is returned
+ *    	gpa		source numalink physical address of GPA
+ *
+ *    Output:
+ *	0		OK
+ *	>0		error
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa);
+
+
+/*
+ * Copy data using the GRU. Source or destination can be located in a remote
+ * partition.
+ *
+ *    Input:
+ *    	dest_gpa	destination global physical address
+ *    	src_gpa		source global physical address
+ *    	bytes		number of bytes to copy
+ *
+ *    Output:
+ *	0		OK
+ *	>0		error
+ */
+extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+							unsigned int bytes);
+
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *
+ * 	input:
+ * 		blade_id  - blade on which resources should be reserved
+ * 		cbrs	  - number of CBRs
+ * 		dsr_bytes - number of DSR bytes needed
+ * 		cmp	  - completion structure for waiting for
+ * 			    async completions
+ *	output:
+ *		handle to identify resource
+ *		(0 = no resources)
+ */
+extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+				struct completion *cmp);
+
+/*
+ * Release async resources previously reserved.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_release_async_resources(unsigned long han);
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_wait_async_cbr(unsigned long han);
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ *	output:
+ *		cb  - pointer to first CBR
+ *		dsr - pointer to first DSR
+ */
+extern void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr);
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_unlock_async_resource(unsigned long han);
+
+#endif 		/* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
new file mode 100644
index 000000000..e77d1b1f9
--- /dev/null
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -0,0 +1,153 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRULIB_H__
+#define __GRULIB_H__
+
+#define GRU_BASENAME		"gru"
+#define GRU_FULLNAME		"/dev/gru"
+#define GRU_IOCTL_NUM 		 'G'
+
+/*
+ * Maximum number of GRU segments that a user can have open
+ * ZZZ temp - set high for testing. Revisit.
+ */
+#define GRU_MAX_OPEN_CONTEXTS		32
+
+/* Set Number of Request Blocks */
+#define GRU_CREATE_CONTEXT		_IOWR(GRU_IOCTL_NUM, 1, void *)
+
+/*  Set Context Options */
+#define GRU_SET_CONTEXT_OPTION		_IOWR(GRU_IOCTL_NUM, 4, void *)
+
+/* Fetch exception detail */
+#define GRU_USER_GET_EXCEPTION_DETAIL	_IOWR(GRU_IOCTL_NUM, 6, void *)
+
+/* For user call_os handling - normally a TLB fault */
+#define GRU_USER_CALL_OS		_IOWR(GRU_IOCTL_NUM, 8, void *)
+
+/* For user unload context */
+#define GRU_USER_UNLOAD_CONTEXT		_IOWR(GRU_IOCTL_NUM, 9, void *)
+
+/* For dumpping GRU chiplet state */
+#define GRU_DUMP_CHIPLET_STATE		_IOWR(GRU_IOCTL_NUM, 11, void *)
+
+/* For getting gseg statistics */
+#define GRU_GET_GSEG_STATISTICS		_IOWR(GRU_IOCTL_NUM, 12, void *)
+
+/* For user TLB flushing (primarily for tests) */
+#define GRU_USER_FLUSH_TLB		_IOWR(GRU_IOCTL_NUM, 50, void *)
+
+/* Get some config options (primarily for tests & emulator) */
+#define GRU_GET_CONFIG_INFO		_IOWR(GRU_IOCTL_NUM, 51, void *)
+
+/* Various kernel self-tests */
+#define GRU_KTEST			_IOWR(GRU_IOCTL_NUM, 52, void *)
+
+#define CONTEXT_WINDOW_BYTES(th)        (GRU_GSEG_PAGESIZE * (th))
+#define THREAD_POINTER(p, th)		(p + GRU_GSEG_PAGESIZE * (th))
+#define GSEG_START(cb)			((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1)))
+
+struct gru_get_gseg_statistics_req {
+	unsigned long			gseg;
+	struct gru_gseg_statistics	stats;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_create_context_req {
+	unsigned long		gseg;
+	unsigned int		data_segment_bytes;
+	unsigned int		control_blocks;
+	unsigned int		maximum_thread_count;
+	unsigned int		options;
+	unsigned char		tlb_preload_count;
+};
+
+/*
+ * Structure used to pass unload context parameters to the driver
+ */
+struct gru_unload_context_req {
+	unsigned long	gseg;
+};
+
+/*
+ * Structure used to set context options
+ */
+enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet};
+struct gru_set_context_option_req {
+	unsigned long	gseg;
+	int		op;
+	int		val0;
+	long		val1;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_flush_tlb_req {
+	unsigned long	gseg;
+	unsigned long	vaddr;
+	size_t		len;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+enum {dcs_pid, dcs_gid};
+struct gru_dump_chiplet_state_req {
+	unsigned int	op;
+	unsigned int	gid;
+	int		ctxnum;
+	char		data_opt;
+	char		lock_cch;
+	char		flush_cbrs;
+	char		fill[10];
+	pid_t		pid;
+	void		*buf;
+	size_t		buflen;
+	/* ---- output --- */
+	unsigned int	num_contexts;
+};
+
+#define GRU_DUMP_MAGIC	0x3474ab6c
+struct gru_dump_context_header {
+	unsigned int	magic;
+	unsigned int	gid;
+	unsigned char	ctxnum;
+	unsigned char	cbrcnt;
+	unsigned char	dsrcnt;
+	pid_t		pid;
+	unsigned long	vaddr;
+	int		cch_locked;
+	unsigned long	data[0];
+};
+
+/*
+ * GRU configuration info (temp - for testing)
+ */
+struct gru_config_info {
+	int		cpus;
+	int		blades;
+	int		nodes;
+	int		chiplets;
+	int		fill[16];
+};
+
+#endif /* __GRULIB_H__ */
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
new file mode 100644
index 000000000..ab174f28e
--- /dev/null
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -0,0 +1,976 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *            DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <linux/prefetch.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+
+unsigned long gru_options __read_mostly;
+
+static struct device_driver gru_driver = {
+	.name = "gru"
+};
+
+static struct device gru_device = {
+	.init_name = "",
+	.driver = &gru_driver,
+};
+
+struct device *grudev = &gru_device;
+
+/*
+ * Select a gru fault map to be used by the current cpu. Note that
+ * multiple cpus may be using the same map.
+ *	ZZZ should be inline but did not work on emulator
+ */
+int gru_cpu_fault_map_id(void)
+{
+#ifdef CONFIG_IA64
+	return uv_blade_processor_id() % GRU_NUM_TFM;
+#else
+	int cpu = smp_processor_id();
+	int id, core;
+
+	core = uv_cpu_core_number(cpu);
+	id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
+	return id;
+#endif
+}
+
+/*--------- ASID Management -------------------------------------------
+ *
+ *  Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
+ *  Once MAX is reached, flush the TLB & start over. However,
+ *  some asids may still be in use. There won't be many (percentage wise) still
+ *  in use. Search active contexts & determine the value of the first
+ *  asid in use ("x"s below). Set "limit" to this value.
+ *  This defines a block of assignable asids.
+ *
+ *  When "limit" is reached, search forward from limit+1 and determine the
+ *  next block of assignable asids.
+ *
+ *  Repeat until MAX_ASID is reached, then start over again.
+ *
+ *  Each time MAX_ASID is reached, increment the asid generation. Since
+ *  the search for in-use asids only checks contexts with GRUs currently
+ *  assigned, asids in some contexts will be missed. Prior to loading
+ *  a context, the asid generation of the GTS asid is rechecked. If it
+ *  doesn't match the current generation, a new asid will be assigned.
+ *
+ *   	0---------------x------------x---------------------x----|
+ *	  ^-next	^-limit	   				^-MAX_ASID
+ *
+ * All asid manipulation & context loading/unloading is protected by the
+ * gs_lock.
+ */
+
+/* Hit the asid limit. Start over */
+static int gru_wrap_asid(struct gru_state *gru)
+{
+	gru_dbg(grudev, "gid %d\n", gru->gs_gid);
+	STAT(asid_wrap);
+	gru->gs_asid_gen++;
+	return MIN_ASID;
+}
+
+/* Find the next chunk of unused asids */
+static int gru_reset_asid_limit(struct gru_state *gru, int asid)
+{
+	int i, gid, inuse_asid, limit;
+
+	gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
+	STAT(asid_next);
+	limit = MAX_ASID;
+	if (asid >= limit)
+		asid = gru_wrap_asid(gru);
+	gru_flush_all_tlb(gru);
+	gid = gru->gs_gid;
+again:
+	for (i = 0; i < GRU_NUM_CCH; i++) {
+		if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
+			continue;
+		inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
+		gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
+			gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms,
+			inuse_asid, i);
+		if (inuse_asid == asid) {
+			asid += ASID_INC;
+			if (asid >= limit) {
+				/*
+				 * empty range: reset the range limit and
+				 * start over
+				 */
+				limit = MAX_ASID;
+				if (asid >= MAX_ASID)
+					asid = gru_wrap_asid(gru);
+				goto again;
+			}
+		}
+
+		if ((inuse_asid > asid) && (inuse_asid < limit))
+			limit = inuse_asid;
+	}
+	gru->gs_asid_limit = limit;
+	gru->gs_asid = asid;
+	gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid,
+					asid, limit);
+	return asid;
+}
+
+/* Assign a new ASID to a thread context.  */
+static int gru_assign_asid(struct gru_state *gru)
+{
+	int asid;
+
+	gru->gs_asid += ASID_INC;
+	asid = gru->gs_asid;
+	if (asid >= gru->gs_asid_limit)
+		asid = gru_reset_asid_limit(gru, asid);
+
+	gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
+	return asid;
+}
+
+/*
+ * Clear n bits in a word. Return a word indicating the bits that were cleared.
+ * Optionally, build an array of chars that contain the bit numbers allocated.
+ */
+static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
+				       char *idx)
+{
+	unsigned long bits = 0;
+	int i;
+
+	while (n--) {
+		i = find_first_bit(p, mmax);
+		if (i == mmax)
+			BUG();
+		__clear_bit(i, p);
+		__set_bit(i, &bits);
+		if (idx)
+			*idx++ = i;
+	}
+	return bits;
+}
+
+unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
+				       char *cbmap)
+{
+	return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
+				 cbmap);
+}
+
+unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
+				       char *dsmap)
+{
+	return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
+				 dsmap);
+}
+
+static void reserve_gru_resources(struct gru_state *gru,
+				  struct gru_thread_state *gts)
+{
+	gru->gs_active_contexts++;
+	gts->ts_cbr_map =
+	    gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
+				     gts->ts_cbr_idx);
+	gts->ts_dsr_map =
+	    gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
+}
+
+static void free_gru_resources(struct gru_state *gru,
+			       struct gru_thread_state *gts)
+{
+	gru->gs_active_contexts--;
+	gru->gs_cbr_map |= gts->ts_cbr_map;
+	gru->gs_dsr_map |= gts->ts_dsr_map;
+}
+
+/*
+ * Check if a GRU has sufficient free resources to satisfy an allocation
+ * request. Note: GRU locks may or may not be held when this is called. If
+ * not held, recheck after acquiring the appropriate locks.
+ *
+ * Returns 1 if sufficient resources, 0 if not
+ */
+static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
+			       int dsr_au_count, int max_active_contexts)
+{
+	return hweight64(gru->gs_cbr_map) >= cbr_au_count
+		&& hweight64(gru->gs_dsr_map) >= dsr_au_count
+		&& gru->gs_active_contexts < max_active_contexts;
+}
+
+/*
+ * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
+ * context.
+ */
+static int gru_load_mm_tracker(struct gru_state *gru,
+					struct gru_thread_state *gts)
+{
+	struct gru_mm_struct *gms = gts->ts_gms;
+	struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
+	unsigned short ctxbitmap = (1 << gts->ts_ctxnum);
+	int asid;
+
+	spin_lock(&gms->ms_asid_lock);
+	asid = asids->mt_asid;
+
+	spin_lock(&gru->gs_asid_lock);
+	if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen !=
+			  gru->gs_asid_gen)) {
+		asid = gru_assign_asid(gru);
+		asids->mt_asid = asid;
+		asids->mt_asid_gen = gru->gs_asid_gen;
+		STAT(asid_new);
+	} else {
+		STAT(asid_reuse);
+	}
+	spin_unlock(&gru->gs_asid_lock);
+
+	BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
+	asids->mt_ctxbitmap |= ctxbitmap;
+	if (!test_bit(gru->gs_gid, gms->ms_asidmap))
+		__set_bit(gru->gs_gid, gms->ms_asidmap);
+	spin_unlock(&gms->ms_asid_lock);
+
+	gru_dbg(grudev,
+		"gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n",
+		gru->gs_gid, gts, gms, gts->ts_ctxnum, asid,
+		gms->ms_asidmap[0]);
+	return asid;
+}
+
+static void gru_unload_mm_tracker(struct gru_state *gru,
+					struct gru_thread_state *gts)
+{
+	struct gru_mm_struct *gms = gts->ts_gms;
+	struct gru_mm_tracker *asids;
+	unsigned short ctxbitmap;
+
+	asids = &gms->ms_asids[gru->gs_gid];
+	ctxbitmap = (1 << gts->ts_ctxnum);
+	spin_lock(&gms->ms_asid_lock);
+	spin_lock(&gru->gs_asid_lock);
+	BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
+	asids->mt_ctxbitmap ^= ctxbitmap;
+	gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum %d, asidmap 0x%lx\n",
+		gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
+	spin_unlock(&gru->gs_asid_lock);
+	spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Decrement the reference count on a GTS structure. Free the structure
+ * if the reference count goes to zero.
+ */
+void gts_drop(struct gru_thread_state *gts)
+{
+	if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
+		if (gts->ts_gms)
+			gru_drop_mmu_notifier(gts->ts_gms);
+		kfree(gts);
+		STAT(gts_free);
+	}
+}
+
+/*
+ * Locate the GTS structure for the current thread.
+ */
+static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
+			    *vdata, int tsid)
+{
+	struct gru_thread_state *gts;
+
+	list_for_each_entry(gts, &vdata->vd_head, ts_next)
+	    if (gts->ts_tsid == tsid)
+		return gts;
+	return NULL;
+}
+
+/*
+ * Allocate a thread state structure.
+ */
+struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+		int cbr_au_count, int dsr_au_count,
+		unsigned char tlb_preload_count, int options, int tsid)
+{
+	struct gru_thread_state *gts;
+	struct gru_mm_struct *gms;
+	int bytes;
+
+	bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
+	bytes += sizeof(struct gru_thread_state);
+	gts = kmalloc(bytes, GFP_KERNEL);
+	if (!gts)
+		return ERR_PTR(-ENOMEM);
+
+	STAT(gts_alloc);
+	memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
+	atomic_set(&gts->ts_refcnt, 1);
+	mutex_init(&gts->ts_ctxlock);
+	gts->ts_cbr_au_count = cbr_au_count;
+	gts->ts_dsr_au_count = dsr_au_count;
+	gts->ts_tlb_preload_count = tlb_preload_count;
+	gts->ts_user_options = options;
+	gts->ts_user_blade_id = -1;
+	gts->ts_user_chiplet_id = -1;
+	gts->ts_tsid = tsid;
+	gts->ts_ctxnum = NULLCTX;
+	gts->ts_tlb_int_select = -1;
+	gts->ts_cch_req_slice = -1;
+	gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
+	if (vma) {
+		gts->ts_mm = current->mm;
+		gts->ts_vma = vma;
+		gms = gru_register_mmu_notifier();
+		if (IS_ERR(gms))
+			goto err;
+		gts->ts_gms = gms;
+	}
+
+	gru_dbg(grudev, "alloc gts %p\n", gts);
+	return gts;
+
+err:
+	gts_drop(gts);
+	return ERR_CAST(gms);
+}
+
+/*
+ * Allocate a vma private data structure.
+ */
+struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
+{
+	struct gru_vma_data *vdata = NULL;
+
+	vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
+	if (!vdata)
+		return NULL;
+
+	STAT(vdata_alloc);
+	INIT_LIST_HEAD(&vdata->vd_head);
+	spin_lock_init(&vdata->vd_lock);
+	gru_dbg(grudev, "alloc vdata %p\n", vdata);
+	return vdata;
+}
+
+/*
+ * Find the thread state structure for the current thread.
+ */
+struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
+					int tsid)
+{
+	struct gru_vma_data *vdata = vma->vm_private_data;
+	struct gru_thread_state *gts;
+
+	spin_lock(&vdata->vd_lock);
+	gts = gru_find_current_gts_nolock(vdata, tsid);
+	spin_unlock(&vdata->vd_lock);
+	gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+	return gts;
+}
+
+/*
+ * Allocate a new thread state for a GSEG. Note that races may allow
+ * another thread to race to create a gts.
+ */
+struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
+					int tsid)
+{
+	struct gru_vma_data *vdata = vma->vm_private_data;
+	struct gru_thread_state *gts, *ngts;
+
+	gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count,
+			    vdata->vd_dsr_au_count,
+			    vdata->vd_tlb_preload_count,
+			    vdata->vd_user_options, tsid);
+	if (IS_ERR(gts))
+		return gts;
+
+	spin_lock(&vdata->vd_lock);
+	ngts = gru_find_current_gts_nolock(vdata, tsid);
+	if (ngts) {
+		gts_drop(gts);
+		gts = ngts;
+		STAT(gts_double_allocate);
+	} else {
+		list_add(&gts->ts_next, &vdata->vd_head);
+	}
+	spin_unlock(&vdata->vd_lock);
+	gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+	return gts;
+}
+
+/*
+ * Free the GRU context assigned to the thread state.
+ */
+static void gru_free_gru_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru;
+
+	gru = gts->ts_gru;
+	gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid);
+
+	spin_lock(&gru->gs_lock);
+	gru->gs_gts[gts->ts_ctxnum] = NULL;
+	free_gru_resources(gru, gts);
+	BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
+	__clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
+	gts->ts_ctxnum = NULLCTX;
+	gts->ts_gru = NULL;
+	gts->ts_blade = -1;
+	spin_unlock(&gru->gs_lock);
+
+	gts_drop(gts);
+	STAT(free_context);
+}
+
+/*
+ * Prefetching cachelines help hardware performance.
+ * (Strictly a performance enhancement. Not functionally required).
+ */
+static void prefetch_data(void *p, int num, int stride)
+{
+	while (num-- > 0) {
+		prefetchw(p);
+		p += stride;
+	}
+}
+
+static inline long gru_copy_handle(void *d, void *s)
+{
+	memcpy(d, s, GRU_HANDLE_BYTES);
+	return GRU_HANDLE_BYTES;
+}
+
+static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
+				unsigned long cbrmap, unsigned long length)
+{
+	int i, scr;
+
+	prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
+		      GRU_CACHE_LINE_BYTES);
+
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
+		prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
+			      GRU_CACHE_LINE_BYTES);
+		cb += GRU_HANDLE_STRIDE;
+	}
+}
+
+static void gru_load_context_data(void *save, void *grubase, int ctxnum,
+				  unsigned long cbrmap, unsigned long dsrmap,
+				  int data_valid)
+{
+	void *gseg, *cb, *cbe;
+	unsigned long length;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+	gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		if (data_valid) {
+			save += gru_copy_handle(cb, save);
+			save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
+						save);
+		} else {
+			memset(cb, 0, GRU_CACHE_LINE_BYTES);
+			memset(cbe + i * GRU_HANDLE_STRIDE, 0,
+						GRU_CACHE_LINE_BYTES);
+		}
+		/* Flush CBE to hide race in context restart */
+		mb();
+		gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
+		cb += GRU_HANDLE_STRIDE;
+	}
+
+	if (data_valid)
+		memcpy(gseg + GRU_DS_BASE, save, length);
+	else
+		memset(gseg + GRU_DS_BASE, 0, length);
+}
+
+static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
+				    unsigned long cbrmap, unsigned long dsrmap)
+{
+	void *gseg, *cb, *cbe;
+	unsigned long length;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+
+	/* CBEs may not be coherent. Flush them from cache */
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr)
+		gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
+	mb();		/* Let the CL flush complete */
+
+	gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		save += gru_copy_handle(save, cb);
+		save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
+		cb += GRU_HANDLE_STRIDE;
+	}
+	memcpy(save, gseg + GRU_DS_BASE, length);
+}
+
+void gru_unload_context(struct gru_thread_state *gts, int savestate)
+{
+	struct gru_state *gru = gts->ts_gru;
+	struct gru_context_configuration_handle *cch;
+	int ctxnum = gts->ts_ctxnum;
+
+	if (!is_kernel_context(gts))
+		zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+	gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n",
+		gts, gts->ts_cbr_map, gts->ts_dsr_map);
+	lock_cch_handle(cch);
+	if (cch_interrupt_sync(cch))
+		BUG();
+
+	if (!is_kernel_context(gts))
+		gru_unload_mm_tracker(gru, gts);
+	if (savestate) {
+		gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
+					ctxnum, gts->ts_cbr_map,
+					gts->ts_dsr_map);
+		gts->ts_data_valid = 1;
+	}
+
+	if (cch_deallocate(cch))
+		BUG();
+	unlock_cch_handle(cch);
+
+	gru_free_gru_context(gts);
+}
+
+/*
+ * Load a GRU context by copying it from the thread data structure in memory
+ * to the GRU.
+ */
+void gru_load_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru = gts->ts_gru;
+	struct gru_context_configuration_handle *cch;
+	int i, err, asid, ctxnum = gts->ts_ctxnum;
+
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+	lock_cch_handle(cch);
+	cch->tfm_fault_bit_enable =
+	    (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+	     || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+	cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+	if (cch->tlb_int_enable) {
+		gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+		cch->tlb_int_select = gts->ts_tlb_int_select;
+	}
+	if (gts->ts_cch_req_slice >= 0) {
+		cch->req_slice_set_enable = 1;
+		cch->req_slice = gts->ts_cch_req_slice;
+	} else {
+		cch->req_slice_set_enable =0;
+	}
+	cch->tfm_done_bit_enable = 0;
+	cch->dsr_allocation_map = gts->ts_dsr_map;
+	cch->cbr_allocation_map = gts->ts_cbr_map;
+
+	if (is_kernel_context(gts)) {
+		cch->unmap_enable = 1;
+		cch->tfm_done_bit_enable = 1;
+		cch->cb_int_enable = 1;
+		cch->tlb_int_select = 0;	/* For now, ints go to cpu 0 */
+	} else {
+		cch->unmap_enable = 0;
+		cch->tfm_done_bit_enable = 0;
+		cch->cb_int_enable = 0;
+		asid = gru_load_mm_tracker(gru, gts);
+		for (i = 0; i < 8; i++) {
+			cch->asid[i] = asid + i;
+			cch->sizeavail[i] = gts->ts_sizeavail;
+		}
+	}
+
+	err = cch_allocate(cch);
+	if (err) {
+		gru_dbg(grudev,
+			"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
+			err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
+		BUG();
+	}
+
+	gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
+			gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
+
+	if (cch_start(cch))
+		BUG();
+	unlock_cch_handle(cch);
+
+	gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n",
+		gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map,
+		(gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select);
+}
+
+/*
+ * Update fields in an active CCH:
+ * 	- retarget interrupts on local blade
+ * 	- update sizeavail mask
+ */
+int gru_update_cch(struct gru_thread_state *gts)
+{
+	struct gru_context_configuration_handle *cch;
+	struct gru_state *gru = gts->ts_gru;
+	int i, ctxnum = gts->ts_ctxnum, ret = 0;
+
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+	lock_cch_handle(cch);
+	if (cch->state == CCHSTATE_ACTIVE) {
+		if (gru->gs_gts[gts->ts_ctxnum] != gts)
+			goto exit;
+		if (cch_interrupt(cch))
+			BUG();
+		for (i = 0; i < 8; i++)
+			cch->sizeavail[i] = gts->ts_sizeavail;
+		gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+		cch->tlb_int_select = gru_cpu_fault_map_id();
+		cch->tfm_fault_bit_enable =
+		  (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+		    || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+		if (cch_start(cch))
+			BUG();
+		ret = 1;
+	}
+exit:
+	unlock_cch_handle(cch);
+	return ret;
+}
+
+/*
+ * Update CCH tlb interrupt select. Required when all the following is true:
+ * 	- task's GRU context is loaded into a GRU
+ * 	- task is using interrupt notification for TLB faults
+ * 	- task has migrated to a different cpu on the same blade where
+ * 	  it was previously running.
+ */
+static int gru_retarget_intr(struct gru_thread_state *gts)
+{
+	if (gts->ts_tlb_int_select < 0
+	    || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
+		return 0;
+
+	gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
+		gru_cpu_fault_map_id());
+	return gru_update_cch(gts);
+}
+
+/*
+ * Check if a GRU context is allowed to use a specific chiplet. By default
+ * a context is assigned to any blade-local chiplet. However, users can
+ * override this.
+ * 	Returns 1 if assignment allowed, 0 otherwise
+ */
+static int gru_check_chiplet_assignment(struct gru_state *gru,
+					struct gru_thread_state *gts)
+{
+	int blade_id;
+	int chiplet_id;
+
+	blade_id = gts->ts_user_blade_id;
+	if (blade_id < 0)
+		blade_id = uv_numa_blade_id();
+
+	chiplet_id = gts->ts_user_chiplet_id;
+	return gru->gs_blade_id == blade_id &&
+		(chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id);
+}
+
+/*
+ * Unload the gru context if it is not assigned to the correct blade or
+ * chiplet. Misassignment can occur if the process migrates to a different
+ * blade or if the user changes the selected blade/chiplet.
+ */
+void gru_check_context_placement(struct gru_thread_state *gts)
+{
+	struct gru_state *gru;
+
+	/*
+	 * If the current task is the context owner, verify that the
+	 * context is correctly placed. This test is skipped for non-owner
+	 * references. Pthread apps use non-owner references to the CBRs.
+	 */
+	gru = gts->ts_gru;
+	if (!gru || gts->ts_tgid_owner != current->tgid)
+		return;
+
+	if (!gru_check_chiplet_assignment(gru, gts)) {
+		STAT(check_context_unload);
+		gru_unload_context(gts, 1);
+	} else if (gru_retarget_intr(gts)) {
+		STAT(check_context_retarget_intr);
+	}
+}
+
+
+/*
+ * Insufficient GRU resources available on the local blade. Steal a context from
+ * a process. This is a hack until a _real_ resource scheduler is written....
+ */
+#define next_ctxnum(n)	((n) <  GRU_NUM_CCH - 2 ? (n) + 1 : 0)
+#define next_gru(b, g)	(((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ?  \
+				 ((g)+1) : &(b)->bs_grus[0])
+
+static int is_gts_stealable(struct gru_thread_state *gts,
+		struct gru_blade_state *bs)
+{
+	if (is_kernel_context(gts))
+		return down_write_trylock(&bs->bs_kgts_sema);
+	else
+		return mutex_trylock(&gts->ts_ctxlock);
+}
+
+static void gts_stolen(struct gru_thread_state *gts,
+		struct gru_blade_state *bs)
+{
+	if (is_kernel_context(gts)) {
+		up_write(&bs->bs_kgts_sema);
+		STAT(steal_kernel_context);
+	} else {
+		mutex_unlock(&gts->ts_ctxlock);
+		STAT(steal_user_context);
+	}
+}
+
+void gru_steal_context(struct gru_thread_state *gts)
+{
+	struct gru_blade_state *blade;
+	struct gru_state *gru, *gru0;
+	struct gru_thread_state *ngts = NULL;
+	int ctxnum, ctxnum0, flag = 0, cbr, dsr;
+	int blade_id;
+
+	blade_id = gts->ts_user_blade_id;
+	if (blade_id < 0)
+		blade_id = uv_numa_blade_id();
+	cbr = gts->ts_cbr_au_count;
+	dsr = gts->ts_dsr_au_count;
+
+	blade = gru_base[blade_id];
+	spin_lock(&blade->bs_lock);
+
+	ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
+	gru = blade->bs_lru_gru;
+	if (ctxnum == 0)
+		gru = next_gru(blade, gru);
+	blade->bs_lru_gru = gru;
+	blade->bs_lru_ctxnum = ctxnum;
+	ctxnum0 = ctxnum;
+	gru0 = gru;
+	while (1) {
+		if (gru_check_chiplet_assignment(gru, gts)) {
+			if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
+				break;
+			spin_lock(&gru->gs_lock);
+			for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
+				if (flag && gru == gru0 && ctxnum == ctxnum0)
+					break;
+				ngts = gru->gs_gts[ctxnum];
+				/*
+			 	* We are grabbing locks out of order, so trylock is
+			 	* needed. GTSs are usually not locked, so the odds of
+			 	* success are high. If trylock fails, try to steal a
+			 	* different GSEG.
+			 	*/
+				if (ngts && is_gts_stealable(ngts, blade))
+					break;
+				ngts = NULL;
+			}
+			spin_unlock(&gru->gs_lock);
+			if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
+				break;
+		}
+		if (flag && gru == gru0)
+			break;
+		flag = 1;
+		ctxnum = 0;
+		gru = next_gru(blade, gru);
+	}
+	spin_unlock(&blade->bs_lock);
+
+	if (ngts) {
+		gts->ustats.context_stolen++;
+		ngts->ts_steal_jiffies = jiffies;
+		gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
+		gts_stolen(ngts, blade);
+	} else {
+		STAT(steal_context_failed);
+	}
+	gru_dbg(grudev,
+		"stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
+		" avail cb %ld, ds %ld\n",
+		gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
+		hweight64(gru->gs_dsr_map));
+}
+
+/*
+ * Assign a gru context.
+ */
+static int gru_assign_context_number(struct gru_state *gru)
+{
+	int ctxnum;
+
+	ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
+	__set_bit(ctxnum, &gru->gs_context_map);
+	return ctxnum;
+}
+
+/*
+ * Scan the GRUs on the local blade & assign a GRU context.
+ */
+struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru, *grux;
+	int i, max_active_contexts;
+	int blade_id = gts->ts_user_blade_id;
+
+	if (blade_id < 0)
+		blade_id = uv_numa_blade_id();
+again:
+	gru = NULL;
+	max_active_contexts = GRU_NUM_CCH;
+	for_each_gru_on_blade(grux, blade_id, i) {
+		if (!gru_check_chiplet_assignment(grux, gts))
+			continue;
+		if (check_gru_resources(grux, gts->ts_cbr_au_count,
+					gts->ts_dsr_au_count,
+					max_active_contexts)) {
+			gru = grux;
+			max_active_contexts = grux->gs_active_contexts;
+			if (max_active_contexts == 0)
+				break;
+		}
+	}
+
+	if (gru) {
+		spin_lock(&gru->gs_lock);
+		if (!check_gru_resources(gru, gts->ts_cbr_au_count,
+					 gts->ts_dsr_au_count, GRU_NUM_CCH)) {
+			spin_unlock(&gru->gs_lock);
+			goto again;
+		}
+		reserve_gru_resources(gru, gts);
+		gts->ts_gru = gru;
+		gts->ts_blade = gru->gs_blade_id;
+		gts->ts_ctxnum = gru_assign_context_number(gru);
+		atomic_inc(&gts->ts_refcnt);
+		gru->gs_gts[gts->ts_ctxnum] = gts;
+		spin_unlock(&gru->gs_lock);
+
+		STAT(assign_context);
+		gru_dbg(grudev,
+			"gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n",
+			gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
+			gts->ts_gru->gs_gid, gts->ts_ctxnum,
+			gts->ts_cbr_au_count, gts->ts_dsr_au_count);
+	} else {
+		gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
+		STAT(assign_context_failed);
+	}
+
+	return gru;
+}
+
+/*
+ * gru_nopage
+ *
+ * Map the user's GRU segment
+ *
+ * 	Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
+ */
+vm_fault_t gru_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct gru_thread_state *gts;
+	unsigned long paddr, vaddr;
+	unsigned long expires;
+
+	vaddr = vmf->address;
+	gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
+		vma, vaddr, GSEG_BASE(vaddr));
+	STAT(nopfn);
+
+	/* The following check ensures vaddr is a valid address in the VMA */
+	gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+	if (!gts)
+		return VM_FAULT_SIGBUS;
+
+again:
+	mutex_lock(&gts->ts_ctxlock);
+	preempt_disable();
+
+	gru_check_context_placement(gts);
+
+	if (!gts->ts_gru) {
+		STAT(load_user_context);
+		if (!gru_assign_gru_context(gts)) {
+			preempt_enable();
+			mutex_unlock(&gts->ts_ctxlock);
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(GRU_ASSIGN_DELAY);  /* true hack ZZZ */
+			expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY;
+			if (time_before(expires, jiffies))
+				gru_steal_context(gts);
+			goto again;
+		}
+		gru_load_context(gts);
+		paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
+		remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
+				paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
+				vma->vm_page_prot);
+	}
+
+	preempt_enable();
+	mutex_unlock(&gts->ts_ctxlock);
+
+	return VM_FAULT_NOPAGE;
+}
+
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
new file mode 100644
index 000000000..42ea2ecca
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -0,0 +1,324 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              PROC INTERFACES
+ *
+ * This file supports the /proc interfaces for the GRU driver
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#define printstat(s, f)		printstat_val(s, &gru_stats.f, #f)
+
+static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
+{
+	unsigned long val = atomic_long_read(v);
+
+	seq_printf(s, "%16lu %s\n", val, id);
+}
+
+static int statistics_show(struct seq_file *s, void *p)
+{
+	printstat(s, vdata_alloc);
+	printstat(s, vdata_free);
+	printstat(s, gts_alloc);
+	printstat(s, gts_free);
+	printstat(s, gms_alloc);
+	printstat(s, gms_free);
+	printstat(s, gts_double_allocate);
+	printstat(s, assign_context);
+	printstat(s, assign_context_failed);
+	printstat(s, free_context);
+	printstat(s, load_user_context);
+	printstat(s, load_kernel_context);
+	printstat(s, lock_kernel_context);
+	printstat(s, unlock_kernel_context);
+	printstat(s, steal_user_context);
+	printstat(s, steal_kernel_context);
+	printstat(s, steal_context_failed);
+	printstat(s, nopfn);
+	printstat(s, asid_new);
+	printstat(s, asid_next);
+	printstat(s, asid_wrap);
+	printstat(s, asid_reuse);
+	printstat(s, intr);
+	printstat(s, intr_cbr);
+	printstat(s, intr_tfh);
+	printstat(s, intr_spurious);
+	printstat(s, intr_mm_lock_failed);
+	printstat(s, call_os);
+	printstat(s, call_os_wait_queue);
+	printstat(s, user_flush_tlb);
+	printstat(s, user_unload_context);
+	printstat(s, user_exception);
+	printstat(s, set_context_option);
+	printstat(s, check_context_retarget_intr);
+	printstat(s, check_context_unload);
+	printstat(s, tlb_dropin);
+	printstat(s, tlb_preload_page);
+	printstat(s, tlb_dropin_fail_no_asid);
+	printstat(s, tlb_dropin_fail_upm);
+	printstat(s, tlb_dropin_fail_invalid);
+	printstat(s, tlb_dropin_fail_range_active);
+	printstat(s, tlb_dropin_fail_idle);
+	printstat(s, tlb_dropin_fail_fmm);
+	printstat(s, tlb_dropin_fail_no_exception);
+	printstat(s, tfh_stale_on_fault);
+	printstat(s, mmu_invalidate_range);
+	printstat(s, mmu_invalidate_page);
+	printstat(s, flush_tlb);
+	printstat(s, flush_tlb_gru);
+	printstat(s, flush_tlb_gru_tgh);
+	printstat(s, flush_tlb_gru_zero_asid);
+	printstat(s, copy_gpa);
+	printstat(s, read_gpa);
+	printstat(s, mesq_receive);
+	printstat(s, mesq_receive_none);
+	printstat(s, mesq_send);
+	printstat(s, mesq_send_failed);
+	printstat(s, mesq_noop);
+	printstat(s, mesq_send_unexpected_error);
+	printstat(s, mesq_send_lb_overflow);
+	printstat(s, mesq_send_qlimit_reached);
+	printstat(s, mesq_send_amo_nacked);
+	printstat(s, mesq_send_put_nacked);
+	printstat(s, mesq_qf_locked);
+	printstat(s, mesq_qf_noop_not_full);
+	printstat(s, mesq_qf_switch_head_failed);
+	printstat(s, mesq_qf_unexpected_error);
+	printstat(s, mesq_noop_unexpected_error);
+	printstat(s, mesq_noop_lb_overflow);
+	printstat(s, mesq_noop_qlimit_reached);
+	printstat(s, mesq_noop_amo_nacked);
+	printstat(s, mesq_noop_put_nacked);
+	printstat(s, mesq_noop_page_overflow);
+	return 0;
+}
+
+static ssize_t statistics_write(struct file *file, const char __user *userbuf,
+				size_t count, loff_t *data)
+{
+	memset(&gru_stats, 0, sizeof(gru_stats));
+	return count;
+}
+
+static int mcs_statistics_show(struct seq_file *s, void *p)
+{
+	int op;
+	unsigned long total, count, max;
+	static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt",
+		"cch_interrupt_sync", "cch_deallocate", "tfh_write_only",
+		"tfh_write_restart", "tgh_invalidate"};
+
+	seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks");
+	for (op = 0; op < mcsop_last; op++) {
+		count = atomic_long_read(&mcs_op_statistics[op].count);
+		total = atomic_long_read(&mcs_op_statistics[op].total);
+		max = mcs_op_statistics[op].max;
+		seq_printf(s, "%-20s%12ld%12ld%12ld\n", id[op], count,
+			   count ? total / count : 0, max);
+	}
+	return 0;
+}
+
+static ssize_t mcs_statistics_write(struct file *file,
+			const char __user *userbuf, size_t count, loff_t *data)
+{
+	memset(mcs_op_statistics, 0, sizeof(mcs_op_statistics));
+	return count;
+}
+
+static int options_show(struct seq_file *s, void *p)
+{
+	seq_printf(s, "#bitmask: 1=trace, 2=statistics\n");
+	seq_printf(s, "0x%lx\n", gru_options);
+	return 0;
+}
+
+static ssize_t options_write(struct file *file, const char __user *userbuf,
+			     size_t count, loff_t *data)
+{
+	int ret;
+
+	ret = kstrtoul_from_user(userbuf, count, 0, &gru_options);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static int cch_seq_show(struct seq_file *file, void *data)
+{
+	long gid = *(long *)data;
+	int i;
+	struct gru_state *gru = GID_TO_GRU(gid);
+	struct gru_thread_state *ts;
+	const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
+
+	if (gid == 0)
+		seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid",
+			   "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode");
+	if (gru)
+		for (i = 0; i < GRU_NUM_CCH; i++) {
+			ts = gru->gs_gts[i];
+			if (!ts)
+				continue;
+			seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n",
+				   gru->gs_gid, gru->gs_blade_id, i,
+				   is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid,
+				   is_kernel_context(ts) ? 0 : ts->ts_tgid_owner,
+				   ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
+				   ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
+				   mode[ts->ts_user_options &
+					GRU_OPT_MISS_MASK]);
+		}
+
+	return 0;
+}
+
+static int gru_seq_show(struct seq_file *file, void *data)
+{
+	long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
+	struct gru_state *gru = GID_TO_GRU(gid);
+
+	if (gid == 0) {
+		seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
+			   "ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
+		seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
+			   "busy", "busy", "free", "free", "free");
+	}
+	if (gru) {
+		ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
+		cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+		dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+		seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
+			   gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
+			   GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
+			   ctxfree, cbrfree, dsrfree);
+	}
+
+	return 0;
+}
+
+static void seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static void *seq_start(struct seq_file *file, loff_t *gid)
+{
+	if (*gid < gru_max_gids)
+		return gid;
+	return NULL;
+}
+
+static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
+{
+	(*gid)++;
+	if (*gid < gru_max_gids)
+		return gid;
+	return NULL;
+}
+
+static const struct seq_operations cch_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= cch_seq_show
+};
+
+static const struct seq_operations gru_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= gru_seq_show
+};
+
+static int statistics_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, statistics_show, NULL);
+}
+
+static int mcs_statistics_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mcs_statistics_show, NULL);
+}
+
+static int options_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, options_show, NULL);
+}
+
+/* *INDENT-OFF* */
+static const struct file_operations statistics_fops = {
+	.open 		= statistics_open,
+	.read 		= seq_read,
+	.write 		= statistics_write,
+	.llseek 	= seq_lseek,
+	.release 	= single_release,
+};
+
+static const struct file_operations mcs_statistics_fops = {
+	.open 		= mcs_statistics_open,
+	.read 		= seq_read,
+	.write 		= mcs_statistics_write,
+	.llseek 	= seq_lseek,
+	.release 	= single_release,
+};
+
+static const struct file_operations options_fops = {
+	.open 		= options_open,
+	.read 		= seq_read,
+	.write 		= options_write,
+	.llseek 	= seq_lseek,
+	.release 	= single_release,
+};
+
+static struct proc_dir_entry *proc_gru __read_mostly;
+
+int gru_proc_init(void)
+{
+	proc_gru = proc_mkdir("sgi_uv/gru", NULL);
+	if (!proc_gru)
+		return -1;
+	if (!proc_create("statistics", 0644, proc_gru, &statistics_fops))
+		goto err;
+	if (!proc_create("mcs_statistics", 0644, proc_gru, &mcs_statistics_fops))
+		goto err;
+	if (!proc_create("debug_options", 0644, proc_gru, &options_fops))
+		goto err;
+	if (!proc_create_seq("cch_status", 0444, proc_gru, &cch_seq_ops))
+		goto err;
+	if (!proc_create_seq("gru_status", 0444, proc_gru, &gru_seq_ops))
+		goto err;
+	return 0;
+err:
+	remove_proc_subtree("sgi_uv/gru", NULL);
+	return -1;
+}
+
+void gru_proc_exit(void)
+{
+	remove_proc_subtree("sgi_uv/gru", NULL);
+}
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
new file mode 100644
index 000000000..3e041b6f7
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -0,0 +1,679 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *            GRU DRIVER TABLES, MACROS, externs, etc
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRUTABLES_H__
+#define __GRUTABLES_H__
+
+/*
+ * GRU Chiplet:
+ *   The GRU is a user addressible memory accelerator. It provides
+ *   several forms of load, store, memset, bcopy instructions. In addition, it
+ *   contains special instructions for AMOs, sending messages to message
+ *   queues, etc.
+ *
+ *   The GRU is an integral part of the node controller. It connects
+ *   directly to the cpu socket. In its current implementation, there are 2
+ *   GRU chiplets in the node controller on each blade (~node).
+ *
+ *   The entire GRU memory space is fully coherent and cacheable by the cpus.
+ *
+ *   Each GRU chiplet has a physical memory map that looks like the following:
+ *
+ *   	+-----------------+
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	+-----------------+
+ *   	|  system control |
+ *   	+-----------------+        _______ +-------------+
+ *   	|/////////////////|       /        |             |
+ *   	|/////////////////|      /         |             |
+ *   	|/////////////////|     /          | instructions|
+ *   	|/////////////////|    /           |             |
+ *   	|/////////////////|   /            |             |
+ *   	|/////////////////|  /             |-------------|
+ *   	|/////////////////| /              |             |
+ *   	+-----------------+                |             |
+ *   	|   context 15    |                |  data       |
+ *   	+-----------------+                |             |
+ *   	|    ......       | \              |             |
+ *   	+-----------------+  \____________ +-------------+
+ *   	|   context 1     |
+ *   	+-----------------+
+ *   	|   context 0     |
+ *   	+-----------------+
+ *
+ *   Each of the "contexts" is a chunk of memory that can be mmaped into user
+ *   space. The context consists of 2 parts:
+ *
+ *  	- an instruction space that can be directly accessed by the user
+ *  	  to issue GRU instructions and to check instruction status.
+ *
+ *  	- a data area that acts as normal RAM.
+ *
+ *   User instructions contain virtual addresses of data to be accessed by the
+ *   GRU. The GRU contains a TLB that is used to convert these user virtual
+ *   addresses to physical addresses.
+ *
+ *   The "system control" area of the GRU chiplet is used by the kernel driver
+ *   to manage user contexts and to perform functions such as TLB dropin and
+ *   purging.
+ *
+ *   One context may be reserved for the kernel and used for cross-partition
+ *   communication. The GRU will also be used to asynchronously zero out
+ *   large blocks of memory (not currently implemented).
+ *
+ *
+ * Tables:
+ *
+ * 	VDATA-VMA Data		- Holds a few parameters. Head of linked list of
+ * 				  GTS tables for threads using the GSEG
+ * 	GTS - Gru Thread State  - contains info for managing a GSEG context. A
+ * 				  GTS is allocated for each thread accessing a
+ * 				  GSEG.
+ *     	GTD - GRU Thread Data   - contains shadow copy of GRU data when GSEG is
+ *     				  not loaded into a GRU
+ *	GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs
+ *				  where a GSEG has been loaded. Similar to
+ *				  an mm_struct but for GRU.
+ *
+ *	GS  - GRU State 	- Used to manage the state of a GRU chiplet
+ *	BS  - Blade State	- Used to manage state of all GRU chiplets
+ *				  on a blade
+ *
+ *
+ *  Normal task tables for task using GRU.
+ *  		- 2 threads in process
+ *  		- 2 GSEGs open in process
+ *  		- GSEG1 is being used by both threads
+ *  		- GSEG2 is used only by thread 2
+ *
+ *       task -->|
+ *       task ---+---> mm ->------ (notifier) -------+-> gms
+ *                     |                             |
+ *                     |--> vma -> vdata ---> gts--->|		GSEG1 (thread1)
+ *                     |                  |          |
+ *                     |                  +-> gts--->|		GSEG1 (thread2)
+ *                     |                             |
+ *                     |--> vma -> vdata ---> gts--->|		GSEG2 (thread2)
+ *                     .
+ *                     .
+ *
+ *  GSEGs are marked DONTCOPY on fork
+ *
+ * At open
+ * 	file.private_data -> NULL
+ *
+ * At mmap,
+ * 	vma -> vdata
+ *
+ * After gseg reference
+ * 	vma -> vdata ->gts
+ *
+ * After fork
+ *   parent
+ * 	vma -> vdata -> gts
+ *   child
+ * 	(vma is not copied)
+ *
+ */
+
+#include <linux/rmap.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mm_types.h>
+#include "gru.h"
+#include "grulib.h"
+#include "gruhandles.h"
+
+extern struct gru_stats_s gru_stats;
+extern struct gru_blade_state *gru_base[];
+extern unsigned long gru_start_paddr, gru_end_paddr;
+extern void *gru_start_vaddr;
+extern unsigned int gru_max_gids;
+
+#define GRU_MAX_BLADES		MAX_NUMNODES
+#define GRU_MAX_GRUS		(GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
+
+#define GRU_DRIVER_ID_STR	"SGI GRU Device Driver"
+#define GRU_DRIVER_VERSION_STR	"0.85"
+
+/*
+ * GRU statistics.
+ */
+struct gru_stats_s {
+	atomic_long_t vdata_alloc;
+	atomic_long_t vdata_free;
+	atomic_long_t gts_alloc;
+	atomic_long_t gts_free;
+	atomic_long_t gms_alloc;
+	atomic_long_t gms_free;
+	atomic_long_t gts_double_allocate;
+	atomic_long_t assign_context;
+	atomic_long_t assign_context_failed;
+	atomic_long_t free_context;
+	atomic_long_t load_user_context;
+	atomic_long_t load_kernel_context;
+	atomic_long_t lock_kernel_context;
+	atomic_long_t unlock_kernel_context;
+	atomic_long_t steal_user_context;
+	atomic_long_t steal_kernel_context;
+	atomic_long_t steal_context_failed;
+	atomic_long_t nopfn;
+	atomic_long_t asid_new;
+	atomic_long_t asid_next;
+	atomic_long_t asid_wrap;
+	atomic_long_t asid_reuse;
+	atomic_long_t intr;
+	atomic_long_t intr_cbr;
+	atomic_long_t intr_tfh;
+	atomic_long_t intr_spurious;
+	atomic_long_t intr_mm_lock_failed;
+	atomic_long_t call_os;
+	atomic_long_t call_os_wait_queue;
+	atomic_long_t user_flush_tlb;
+	atomic_long_t user_unload_context;
+	atomic_long_t user_exception;
+	atomic_long_t set_context_option;
+	atomic_long_t check_context_retarget_intr;
+	atomic_long_t check_context_unload;
+	atomic_long_t tlb_dropin;
+	atomic_long_t tlb_preload_page;
+	atomic_long_t tlb_dropin_fail_no_asid;
+	atomic_long_t tlb_dropin_fail_upm;
+	atomic_long_t tlb_dropin_fail_invalid;
+	atomic_long_t tlb_dropin_fail_range_active;
+	atomic_long_t tlb_dropin_fail_idle;
+	atomic_long_t tlb_dropin_fail_fmm;
+	atomic_long_t tlb_dropin_fail_no_exception;
+	atomic_long_t tfh_stale_on_fault;
+	atomic_long_t mmu_invalidate_range;
+	atomic_long_t mmu_invalidate_page;
+	atomic_long_t flush_tlb;
+	atomic_long_t flush_tlb_gru;
+	atomic_long_t flush_tlb_gru_tgh;
+	atomic_long_t flush_tlb_gru_zero_asid;
+
+	atomic_long_t copy_gpa;
+	atomic_long_t read_gpa;
+
+	atomic_long_t mesq_receive;
+	atomic_long_t mesq_receive_none;
+	atomic_long_t mesq_send;
+	atomic_long_t mesq_send_failed;
+	atomic_long_t mesq_noop;
+	atomic_long_t mesq_send_unexpected_error;
+	atomic_long_t mesq_send_lb_overflow;
+	atomic_long_t mesq_send_qlimit_reached;
+	atomic_long_t mesq_send_amo_nacked;
+	atomic_long_t mesq_send_put_nacked;
+	atomic_long_t mesq_page_overflow;
+	atomic_long_t mesq_qf_locked;
+	atomic_long_t mesq_qf_noop_not_full;
+	atomic_long_t mesq_qf_switch_head_failed;
+	atomic_long_t mesq_qf_unexpected_error;
+	atomic_long_t mesq_noop_unexpected_error;
+	atomic_long_t mesq_noop_lb_overflow;
+	atomic_long_t mesq_noop_qlimit_reached;
+	atomic_long_t mesq_noop_amo_nacked;
+	atomic_long_t mesq_noop_put_nacked;
+	atomic_long_t mesq_noop_page_overflow;
+
+};
+
+enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync,
+	cchop_deallocate, tfhop_write_only, tfhop_write_restart,
+	tghop_invalidate, mcsop_last};
+
+struct mcs_op_statistic {
+	atomic_long_t	count;
+	atomic_long_t	total;
+	unsigned long	max;
+};
+
+extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
+
+#define OPT_DPRINT		1
+#define OPT_STATS		2
+
+
+#define IRQ_GRU			110	/* Starting IRQ number for interrupts */
+
+/* Delay in jiffies between attempts to assign a GRU context */
+#define GRU_ASSIGN_DELAY	((HZ * 20) / 1000)
+
+/*
+ * If a process has it's context stolen, min delay in jiffies before trying to
+ * steal a context from another process.
+ */
+#define GRU_STEAL_DELAY		((HZ * 200) / 1000)
+
+#define STAT(id)	do {						\
+				if (gru_options & OPT_STATS)		\
+					atomic_long_inc(&gru_stats.id);	\
+			} while (0)
+
+#ifdef CONFIG_SGI_GRU_DEBUG
+#define gru_dbg(dev, fmt, x...)						\
+	do {								\
+		if (gru_options & OPT_DPRINT)				\
+			printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\
+	} while (0)
+#else
+#define gru_dbg(x...)
+#endif
+
+/*-----------------------------------------------------------------------------
+ * ASID management
+ */
+#define MAX_ASID	0xfffff0
+#define MIN_ASID	8
+#define ASID_INC	8	/* number of regions */
+
+/* Generate a GRU asid value from a GRU base asid & a virtual address. */
+#define VADDR_HI_BIT		64
+#define GRUREGION(addr)		((addr) >> (VADDR_HI_BIT - 3) & 3)
+#define GRUASID(asid, addr)	((asid) + GRUREGION(addr))
+
+/*------------------------------------------------------------------------------
+ *  File & VMS Tables
+ */
+
+struct gru_state;
+
+/*
+ * This structure is pointed to from the mmstruct via the notifier pointer.
+ * There is one of these per address space.
+ */
+struct gru_mm_tracker {				/* pack to reduce size */
+	unsigned int		mt_asid_gen:24;	/* ASID wrap count */
+	unsigned int		mt_asid:24;	/* current base ASID for gru */
+	unsigned short		mt_ctxbitmap:16;/* bitmap of contexts using
+						   asid */
+} __attribute__ ((packed));
+
+struct gru_mm_struct {
+	struct mmu_notifier	ms_notifier;
+	atomic_t		ms_refcnt;
+	spinlock_t		ms_asid_lock;	/* protects ASID assignment */
+	atomic_t		ms_range_active;/* num range_invals active */
+	char			ms_released;
+	wait_queue_head_t	ms_wait_queue;
+	DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
+	struct gru_mm_tracker	ms_asids[GRU_MAX_GRUS];
+};
+
+/*
+ * One of these structures is allocated when a GSEG is mmaped. The
+ * structure is pointed to by the vma->vm_private_data field in the vma struct.
+ */
+struct gru_vma_data {
+	spinlock_t		vd_lock;	/* Serialize access to vma */
+	struct list_head	vd_head;	/* head of linked list of gts */
+	long			vd_user_options;/* misc user option flags */
+	int			vd_cbr_au_count;
+	int			vd_dsr_au_count;
+	unsigned char		vd_tlb_preload_count;
+};
+
+/*
+ * One of these is allocated for each thread accessing a mmaped GRU. A linked
+ * list of these structure is hung off the struct gru_vma_data in the mm_struct.
+ */
+struct gru_thread_state {
+	struct list_head	ts_next;	/* list - head at vma-private */
+	struct mutex		ts_ctxlock;	/* load/unload CTX lock */
+	struct mm_struct	*ts_mm;		/* mm currently mapped to
+						   context */
+	struct vm_area_struct	*ts_vma;	/* vma of GRU context */
+	struct gru_state	*ts_gru;	/* GRU where the context is
+						   loaded */
+	struct gru_mm_struct	*ts_gms;	/* asid & ioproc struct */
+	unsigned char		ts_tlb_preload_count; /* TLB preload pages */
+	unsigned long		ts_cbr_map;	/* map of allocated CBRs */
+	unsigned long		ts_dsr_map;	/* map of allocated DATA
+						   resources */
+	unsigned long		ts_steal_jiffies;/* jiffies when context last
+						    stolen */
+	long			ts_user_options;/* misc user option flags */
+	pid_t			ts_tgid_owner;	/* task that is using the
+						   context - for migration */
+	short			ts_user_blade_id;/* user selected blade */
+	char			ts_user_chiplet_id;/* user selected chiplet */
+	unsigned short		ts_sizeavail;	/* Pagesizes in use */
+	int			ts_tsid;	/* thread that owns the
+						   structure */
+	int			ts_tlb_int_select;/* target cpu if interrupts
+						     enabled */
+	int			ts_ctxnum;	/* context number where the
+						   context is loaded */
+	atomic_t		ts_refcnt;	/* reference count GTS */
+	unsigned char		ts_dsr_au_count;/* Number of DSR resources
+						   required for contest */
+	unsigned char		ts_cbr_au_count;/* Number of CBR resources
+						   required for contest */
+	char			ts_cch_req_slice;/* CCH packet slice */
+	char			ts_blade;	/* If >= 0, migrate context if
+						   ref from different blade */
+	char			ts_force_cch_reload;
+	char			ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
+							  allocated CB */
+	int			ts_data_valid;	/* Indicates if ts_gdata has
+						   valid data */
+	struct gru_gseg_statistics ustats;	/* User statistics */
+	unsigned long		ts_gdata[0];	/* save area for GRU data (CB,
+						   DS, CBE) */
+};
+
+/*
+ * Threaded programs actually allocate an array of GSEGs when a context is
+ * created. Each thread uses a separate GSEG. TSID is the index into the GSEG
+ * array.
+ */
+#define TSID(a, v)		(((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE)
+#define UGRUADDR(gts)		((gts)->ts_vma->vm_start +		\
+					(gts)->ts_tsid * GRU_GSEG_PAGESIZE)
+
+#define NULLCTX			(-1)	/* if context not loaded into GRU */
+
+/*-----------------------------------------------------------------------------
+ *  GRU State Tables
+ */
+
+/*
+ * One of these exists for each GRU chiplet.
+ */
+struct gru_state {
+	struct gru_blade_state	*gs_blade;		/* GRU state for entire
+							   blade */
+	unsigned long		gs_gru_base_paddr;	/* Physical address of
+							   gru segments (64) */
+	void			*gs_gru_base_vaddr;	/* Virtual address of
+							   gru segments (64) */
+	unsigned short		gs_gid;			/* unique GRU number */
+	unsigned short		gs_blade_id;		/* blade of GRU */
+	unsigned char		gs_chiplet_id;		/* blade chiplet of GRU */
+	unsigned char		gs_tgh_local_shift;	/* used to pick TGH for
+							   local flush */
+	unsigned char		gs_tgh_first_remote;	/* starting TGH# for
+							   remote flush */
+	spinlock_t		gs_asid_lock;		/* lock used for
+							   assigning asids */
+	spinlock_t		gs_lock;		/* lock used for
+							   assigning contexts */
+
+	/* -- the following are protected by the gs_asid_lock spinlock ---- */
+	unsigned int		gs_asid;		/* Next availe ASID */
+	unsigned int		gs_asid_limit;		/* Limit of available
+							   ASIDs */
+	unsigned int		gs_asid_gen;		/* asid generation.
+							   Inc on wrap */
+
+	/* --- the following fields are protected by the gs_lock spinlock --- */
+	unsigned long		gs_context_map;		/* bitmap to manage
+							   contexts in use */
+	unsigned long		gs_cbr_map;		/* bitmap to manage CB
+							   resources */
+	unsigned long		gs_dsr_map;		/* bitmap used to manage
+							   DATA resources */
+	unsigned int		gs_reserved_cbrs;	/* Number of kernel-
+							   reserved cbrs */
+	unsigned int		gs_reserved_dsr_bytes;	/* Bytes of kernel-
+							   reserved dsrs */
+	unsigned short		gs_active_contexts;	/* number of contexts
+							   in use */
+	struct gru_thread_state	*gs_gts[GRU_NUM_CCH];	/* GTS currently using
+							   the context */
+	int			gs_irq[GRU_NUM_TFM];	/* Interrupt irqs */
+};
+
+/*
+ * This structure contains the GRU state for all the GRUs on a blade.
+ */
+struct gru_blade_state {
+	void			*kernel_cb;		/* First kernel
+							   reserved cb */
+	void			*kernel_dsr;		/* First kernel
+							   reserved DSR */
+	struct rw_semaphore	bs_kgts_sema;		/* lock for kgts */
+	struct gru_thread_state *bs_kgts;		/* GTS for kernel use */
+
+	/* ---- the following are used for managing kernel async GRU CBRs --- */
+	int			bs_async_dsr_bytes;	/* DSRs for async */
+	int			bs_async_cbrs;		/* CBRs AU for async */
+	struct completion	*bs_async_wq;
+
+	/* ---- the following are protected by the bs_lock spinlock ---- */
+	spinlock_t		bs_lock;		/* lock used for
+							   stealing contexts */
+	int			bs_lru_ctxnum;		/* STEAL - last context
+							   stolen */
+	struct gru_state	*bs_lru_gru;		/* STEAL - last gru
+							   stolen */
+
+	struct gru_state	bs_grus[GRU_CHIPLETS_PER_BLADE];
+};
+
+/*-----------------------------------------------------------------------------
+ * Address Primitives
+ */
+#define get_tfm_for_cpu(g, c)						\
+	((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c)))
+#define get_tfh_by_index(g, i)						\
+	((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i)))
+#define get_tgh_by_index(g, i)						\
+	((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i)))
+#define get_cbe_by_index(g, i)						\
+	((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\
+			(i)))
+
+/*-----------------------------------------------------------------------------
+ * Useful Macros
+ */
+
+/* Given a blade# & chiplet#, get a pointer to the GRU */
+#define get_gru(b, c)		(&gru_base[b]->bs_grus[c])
+
+/* Number of bytes to save/restore when unloading/loading GRU contexts */
+#define DSR_BYTES(dsr)		((dsr) * GRU_DSR_AU_BYTES)
+#define CBR_BYTES(cbr)		((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2)
+
+/* Convert a user CB number to the actual CBRNUM */
+#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \
+				  * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE)
+
+/* Convert a gid to a pointer to the GRU */
+#define GID_TO_GRU(gid)							\
+	(gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ?			\
+		(&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]->		\
+			bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) :	\
+	 NULL)
+
+/* Scan all active GRUs in a GRU bitmap */
+#define for_each_gru_in_bitmap(gid, map)				\
+	for_each_set_bit((gid), (map), GRU_MAX_GRUS)
+
+/* Scan all active GRUs on a specific blade */
+#define for_each_gru_on_blade(gru, nid, i)				\
+	for ((gru) = gru_base[nid]->bs_grus, (i) = 0;			\
+			(i) < GRU_CHIPLETS_PER_BLADE;			\
+			(i)++, (gru)++)
+
+/* Scan all GRUs */
+#define foreach_gid(gid)						\
+	for ((gid) = 0; (gid) < gru_max_gids; (gid)++)
+
+/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
+#define for_each_gts_on_gru(gts, gru, ctxnum)				\
+	for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++)		\
+		if (((gts) = (gru)->gs_gts[ctxnum]))
+
+/* Scan each CBR whose bit is set in a TFM (or copy of) */
+#define for_each_cbr_in_tfm(i, map)					\
+	for_each_set_bit((i), (map), GRU_NUM_CBE)
+
+/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
+#define for_each_cbr_in_allocation_map(i, map, k)			\
+	for_each_set_bit((k), (map), GRU_CBR_AU)			\
+		for ((i) = (k)*GRU_CBR_AU_SIZE;				\
+				(i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
+
+/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
+#define for_each_dsr_in_allocation_map(i, map, k)			\
+	for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU)	\
+		for ((i) = (k) * GRU_DSR_AU_CL;				\
+				(i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
+
+#define gseg_physical_address(gru, ctxnum)				\
+		((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
+#define gseg_virtual_address(gru, ctxnum)				\
+		((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE)
+
+/*-----------------------------------------------------------------------------
+ * Lock / Unlock GRU handles
+ * 	Use the "delresp" bit in the handle as a "lock" bit.
+ */
+
+/* Lock hierarchy checking enabled only in emulator */
+
+/* 0 = lock failed, 1 = locked */
+static inline int __trylock_handle(void *h)
+{
+	return !test_and_set_bit(1, h);
+}
+
+static inline void __lock_handle(void *h)
+{
+	while (test_and_set_bit(1, h))
+		cpu_relax();
+}
+
+static inline void __unlock_handle(void *h)
+{
+	clear_bit(1, h);
+}
+
+static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+	return __trylock_handle(cch);
+}
+
+static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+	__lock_handle(cch);
+}
+
+static inline void unlock_cch_handle(struct gru_context_configuration_handle
+				     *cch)
+{
+	__unlock_handle(cch);
+}
+
+static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	__lock_handle(tgh);
+}
+
+static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	__unlock_handle(tgh);
+}
+
+static inline int is_kernel_context(struct gru_thread_state *gts)
+{
+	return !gts->ts_mm;
+}
+
+/*
+ * The following are for Nehelem-EX. A more general scheme is needed for
+ * future processors.
+ */
+#define UV_MAX_INT_CORES		8
+#define uv_cpu_socket_number(p)		((cpu_physical_id(p) >> 5) & 1)
+#define uv_cpu_ht_number(p)		(cpu_physical_id(p) & 1)
+#define uv_cpu_core_number(p)		(((cpu_physical_id(p) >> 2) & 4) |	\
+					((cpu_physical_id(p) >> 1) & 3))
+/*-----------------------------------------------------------------------------
+ * Function prototypes & externs
+ */
+struct gru_unload_context_req;
+
+extern const struct vm_operations_struct gru_vm_ops;
+extern struct device *grudev;
+
+extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
+				int tsid);
+extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
+				*vma, int tsid);
+extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
+				*vma, int tsid);
+extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts);
+extern void gru_load_context(struct gru_thread_state *gts);
+extern void gru_steal_context(struct gru_thread_state *gts);
+extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
+extern int gru_update_cch(struct gru_thread_state *gts);
+extern void gts_drop(struct gru_thread_state *gts);
+extern void gru_tgh_flush_init(struct gru_state *gru);
+extern int gru_kservices_init(void);
+extern void gru_kservices_exit(void);
+extern irqreturn_t gru0_intr(int irq, void *dev_id);
+extern irqreturn_t gru1_intr(int irq, void *dev_id);
+extern irqreturn_t gru_intr_mblade(int irq, void *dev_id);
+extern int gru_dump_chiplet_request(unsigned long arg);
+extern long gru_get_gseg_statistics(unsigned long arg);
+extern int gru_handle_user_call_os(unsigned long address);
+extern int gru_user_flush_tlb(unsigned long arg);
+extern int gru_user_unload_context(unsigned long arg);
+extern int gru_get_exception_detail(unsigned long arg);
+extern int gru_set_context_option(unsigned long address);
+extern void gru_check_context_placement(struct gru_thread_state *gts);
+extern int gru_cpu_fault_map_id(void);
+extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
+extern void gru_flush_all_tlb(struct gru_state *gru);
+extern int gru_proc_init(void);
+extern void gru_proc_exit(void);
+
+extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+		int cbr_au_count, int dsr_au_count,
+		unsigned char tlb_preload_count, int options, int tsid);
+extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
+		int cbr_au_count, char *cbmap);
+extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
+		int dsr_au_count, char *dsmap);
+extern vm_fault_t gru_fault(struct vm_fault *vmf);
+extern struct gru_mm_struct *gru_register_mmu_notifier(void);
+extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
+
+extern int gru_ktest(unsigned long arg);
+extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+					unsigned long len);
+
+extern unsigned long gru_options;
+
+#endif /* __GRUTABLES_H__ */
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
new file mode 100644
index 000000000..be28f05bf
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,370 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * 		MMUOPS callbacks  + TLB flushing
+ *
+ * This file handles emu notifier callbacks from the core kernel. The callbacks
+ * are used to update the TLB in the GRU as a result of changes in the
+ * state of a process address space. This file also handles TLB invalidates
+ * from the GRU driver.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/hugetlb.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/srcu.h>
+#include <asm/processor.h>
+#include "gru.h"
+#include "grutables.h"
+#include <asm/uv/uv_hub.h>
+
+#define gru_random()	get_cycles()
+
+/* ---------------------------------- TLB Invalidation functions --------
+ * get_tgh_handle
+ *
+ * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
+ * local blade, use a fixed TGH that is a function of the blade-local cpu
+ * number. Normally, this TGH is private to the cpu & no contention occurs for
+ * the TGH. For offblade GRUs, select a random TGH in the range above the
+ * private TGHs. A spinlock is required to access this TGH & the lock must be
+ * released when the invalidate is completes. This sucks, but it is the best we
+ * can do.
+ *
+ * Note that the spinlock is IN the TGH handle so locking does not involve
+ * additional cache lines.
+ *
+ */
+static inline int get_off_blade_tgh(struct gru_state *gru)
+{
+	int n;
+
+	n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
+	n = gru_random() % n;
+	n += gru->gs_tgh_first_remote;
+	return n;
+}
+
+static inline int get_on_blade_tgh(struct gru_state *gru)
+{
+	return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
+}
+
+static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
+							 *gru)
+{
+	struct gru_tlb_global_handle *tgh;
+	int n;
+
+	preempt_disable();
+	if (uv_numa_blade_id() == gru->gs_blade_id)
+		n = get_on_blade_tgh(gru);
+	else
+		n = get_off_blade_tgh(gru);
+	tgh = get_tgh_by_index(gru, n);
+	lock_tgh_handle(tgh);
+
+	return tgh;
+}
+
+static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	unlock_tgh_handle(tgh);
+	preempt_enable();
+}
+
+/*
+ * gru_flush_tlb_range
+ *
+ * General purpose TLB invalidation function. This function scans every GRU in
+ * the ENTIRE system (partition) looking for GRUs where the specified MM has
+ * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
+ * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
+ * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
+ * cost of (possibly) a large number of future TLBmisses.
+ *
+ * The current algorithm is optimized based on the following (somewhat true)
+ * assumptions:
+ * 	- GRU contexts are not loaded into a GRU unless a reference is made to
+ * 	  the data segment or control block (this is true, not an assumption).
+ * 	  If a DS/CB is referenced, the user will also issue instructions that
+ * 	  cause TLBmisses. It is not necessary to optimize for the case where
+ * 	  contexts are loaded but no instructions cause TLB misses. (I know
+ * 	  this will happen but I'm not optimizing for it).
+ * 	- GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
+ * 	  a few usec but in unusual cases, it could be longer. Avoid if
+ * 	  possible.
+ * 	- intrablade process migration between cpus is not frequent but is
+ * 	  common.
+ * 	- a GRU context is not typically migrated to a different GRU on the
+ * 	  blade because of intrablade migration
+ *	- interblade migration is rare. Processes migrate their GRU context to
+ *	  the new blade.
+ *	- if interblade migration occurs, migration back to the original blade
+ *	  is very very rare (ie., no optimization for this case)
+ *	- most GRU instruction operate on a subset of the user REGIONS. Code
+ *	  & shared library regions are not likely targets of GRU instructions.
+ *
+ * To help improve the efficiency of TLB invalidation, the GMS data
+ * structure is maintained for EACH address space (MM struct). The GMS is
+ * also the structure that contains the pointer to the mmu callout
+ * functions. This structure is linked to the mm_struct for the address space
+ * using the mmu "register" function. The mmu interfaces are used to
+ * provide the callbacks for TLB invalidation. The GMS contains:
+ *
+ * 	- asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
+ * 	  loaded into the GRU.
+ * 	- asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
+ * 	  the above array
+ *	- ctxbitmap[maxgrus]. Indicates the contexts that are currently active
+ *	  in the GRU for the address space. This bitmap must be passed to the
+ *	  GRU to do an invalidate.
+ *
+ * The current algorithm for invalidating TLBs is:
+ * 	- scan the asidmap for GRUs where the context has been loaded, ie,
+ * 	  asid is non-zero.
+ * 	- for each gru found:
+ * 		- if the ctxtmap is non-zero, there are active contexts in the
+ * 		  GRU. TLB invalidate instructions must be issued to the GRU.
+ *		- if the ctxtmap is zero, no context is active. Set the ASID to
+ *		  zero to force a full TLB invalidation. This is fast but will
+ *		  cause a lot of TLB misses if the context is reloaded onto the
+ *		  GRU
+ *
+ */
+
+void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+			 unsigned long len)
+{
+	struct gru_state *gru;
+	struct gru_mm_tracker *asids;
+	struct gru_tlb_global_handle *tgh;
+	unsigned long num;
+	int grupagesize, pagesize, pageshift, gid, asid;
+
+	/* ZZZ TODO - handle huge pages */
+	pageshift = PAGE_SHIFT;
+	pagesize = (1UL << pageshift);
+	grupagesize = GRU_PAGESIZE(pageshift);
+	num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
+
+	STAT(flush_tlb);
+	gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
+		start, len, gms->ms_asidmap[0]);
+
+	spin_lock(&gms->ms_asid_lock);
+	for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
+		STAT(flush_tlb_gru);
+		gru = GID_TO_GRU(gid);
+		asids = gms->ms_asids + gid;
+		asid = asids->mt_asid;
+		if (asids->mt_ctxbitmap && asid) {
+			STAT(flush_tlb_gru_tgh);
+			asid = GRUASID(asid, start);
+			gru_dbg(grudev,
+	"  FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n",
+			      gid, asid, start, grupagesize, num, asids->mt_ctxbitmap);
+			tgh = get_lock_tgh_handle(gru);
+			tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
+				       num - 1, asids->mt_ctxbitmap);
+			get_unlock_tgh_handle(tgh);
+		} else {
+			STAT(flush_tlb_gru_zero_asid);
+			asids->mt_asid = 0;
+			__clear_bit(gru->gs_gid, gms->ms_asidmap);
+			gru_dbg(grudev,
+	"  CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
+				gid, asid, asids->mt_ctxbitmap,
+				gms->ms_asidmap[0]);
+		}
+	}
+	spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Flush the entire TLB on a chiplet.
+ */
+void gru_flush_all_tlb(struct gru_state *gru)
+{
+	struct gru_tlb_global_handle *tgh;
+
+	gru_dbg(grudev, "gid %d\n", gru->gs_gid);
+	tgh = get_lock_tgh_handle(gru);
+	tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
+	get_unlock_tgh_handle(tgh);
+}
+
+/*
+ * MMUOPS notifier callout functions
+ */
+static int gru_invalidate_range_start(struct mmu_notifier *mn,
+				       struct mm_struct *mm,
+				       unsigned long start, unsigned long end,
+				       bool blockable)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	STAT(mmu_invalidate_range);
+	atomic_inc(&gms->ms_range_active);
+	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
+		start, end, atomic_read(&gms->ms_range_active));
+	gru_flush_tlb_range(gms, start, end - start);
+
+	return 0;
+}
+
+static void gru_invalidate_range_end(struct mmu_notifier *mn,
+				     struct mm_struct *mm, unsigned long start,
+				     unsigned long end)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	/* ..._and_test() provides needed barrier */
+	(void)atomic_dec_and_test(&gms->ms_range_active);
+
+	wake_up_all(&gms->ms_wait_queue);
+	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
+}
+
+static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	gms->ms_released = 1;
+	gru_dbg(grudev, "gms %p\n", gms);
+}
+
+
+static const struct mmu_notifier_ops gru_mmuops = {
+	.flags			= MMU_INVALIDATE_DOES_NOT_BLOCK,
+	.invalidate_range_start	= gru_invalidate_range_start,
+	.invalidate_range_end	= gru_invalidate_range_end,
+	.release		= gru_release,
+};
+
+/* Move this to the basic mmu_notifier file. But for now... */
+static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
+			const struct mmu_notifier_ops *ops)
+{
+	struct mmu_notifier *mn, *gru_mn = NULL;
+
+	if (mm->mmu_notifier_mm) {
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list,
+					 hlist)
+		    if (mn->ops == ops) {
+			gru_mn = mn;
+			break;
+		}
+		rcu_read_unlock();
+	}
+	return gru_mn;
+}
+
+struct gru_mm_struct *gru_register_mmu_notifier(void)
+{
+	struct gru_mm_struct *gms;
+	struct mmu_notifier *mn;
+	int err;
+
+	mn = mmu_find_ops(current->mm, &gru_mmuops);
+	if (mn) {
+		gms = container_of(mn, struct gru_mm_struct, ms_notifier);
+		atomic_inc(&gms->ms_refcnt);
+	} else {
+		gms = kzalloc(sizeof(*gms), GFP_KERNEL);
+		if (!gms)
+			return ERR_PTR(-ENOMEM);
+		STAT(gms_alloc);
+		spin_lock_init(&gms->ms_asid_lock);
+		gms->ms_notifier.ops = &gru_mmuops;
+		atomic_set(&gms->ms_refcnt, 1);
+		init_waitqueue_head(&gms->ms_wait_queue);
+		err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
+		if (err)
+			goto error;
+	}
+	if (gms)
+		gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
+			atomic_read(&gms->ms_refcnt));
+	return gms;
+error:
+	kfree(gms);
+	return ERR_PTR(err);
+}
+
+void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
+{
+	gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
+		atomic_read(&gms->ms_refcnt), gms->ms_released);
+	if (atomic_dec_return(&gms->ms_refcnt) == 0) {
+		if (!gms->ms_released)
+			mmu_notifier_unregister(&gms->ms_notifier, current->mm);
+		kfree(gms);
+		STAT(gms_free);
+	}
+}
+
+/*
+ * Setup TGH parameters. There are:
+ * 	- 24 TGH handles per GRU chiplet
+ * 	- a portion (MAX_LOCAL_TGH) of the handles are reserved for
+ * 	  use by blade-local cpus
+ * 	- the rest are used by off-blade cpus. This usage is
+ * 	  less frequent than blade-local usage.
+ *
+ * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
+ * has less tan or equal to 16 cpus, each cpu has a unique handle that it can
+ * use.
+ */
+#define MAX_LOCAL_TGH	16
+
+void gru_tgh_flush_init(struct gru_state *gru)
+{
+	int cpus, shift = 0, n;
+
+	cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
+
+	/* n = cpus rounded up to next power of 2 */
+	if (cpus) {
+		n = 1 << fls(cpus - 1);
+
+		/*
+		 * shift count for converting local cpu# to TGH index
+		 *      0 if cpus <= MAX_LOCAL_TGH,
+		 *      1 if cpus <= 2*MAX_LOCAL_TGH,
+		 *      etc
+		 */
+		shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
+	}
+	gru->gs_tgh_local_shift = shift;
+
+	/* first starting TGH index to use for remote purges */
+	gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
+
+}
diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
new file mode 100644
index 000000000..bbb622c19
--- /dev/null
+++ b/drivers/misc/sgi-xp/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for SGI's XP devices.
+#
+
+obj-$(CONFIG_SGI_XP)		+= xp.o
+xp-y				:= xp_main.o
+xp-$(CONFIG_IA64_SGI_SN2)	+= xp_sn2.o xp_nofault.o
+xp-$(CONFIG_IA64_GENERIC)	+= xp_sn2.o xp_nofault.o
+xp-$(CONFIG_IA64_SGI_UV)	+= xp_uv.o
+xp-$(CONFIG_X86_64)		+= xp_uv.o
+
+obj-$(CONFIG_SGI_XP)		+= xpc.o
+xpc-y				:= xpc_main.o xpc_channel.o xpc_partition.o
+xpc-$(CONFIG_IA64_SGI_SN2)	+= xpc_sn2.o
+xpc-$(CONFIG_IA64_GENERIC)	+= xpc_sn2.o
+xpc-$(CONFIG_IA64_SGI_UV) 	+= xpc_uv.o
+xpc-$(CONFIG_X86_64)		+= xpc_uv.o
+
+obj-$(CONFIG_SGI_XP)		+= xpnet.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
new file mode 100644
index 000000000..b8069eec1
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp.h
@@ -0,0 +1,368 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * External Cross Partition (XP) structures and defines.
+ */
+
+#ifndef _DRIVERS_MISC_SGIXP_XP_H
+#define _DRIVERS_MISC_SGIXP_XP_H
+
+#include <linux/mutex.h>
+
+#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
+#include <asm/uv/uv.h>
+#define is_uv()		is_uv_system()
+#endif
+
+#ifndef is_uv
+#define is_uv()		0
+#endif
+
+#if defined CONFIG_IA64
+#include <asm/sn/arch.h>	/* defines is_shub1() and is_shub2() */
+#define is_shub()	ia64_platform_is("sn2")
+#endif
+
+#ifndef is_shub1
+#define is_shub1()	0
+#endif
+
+#ifndef is_shub2
+#define is_shub2()	0
+#endif
+
+#ifndef is_shub
+#define is_shub()	0
+#endif
+
+#ifdef USE_DBUG_ON
+#define DBUG_ON(condition)	BUG_ON(condition)
+#else
+#define DBUG_ON(condition)
+#endif
+
+/*
+ * Define the maximum number of partitions the system can possibly support.
+ * It is based on the maximum number of hardware partitionable regions. The
+ * term 'region' in this context refers to the minimum number of nodes that
+ * can comprise an access protection grouping. The access protection is in
+ * regards to memory, IPI and IOI.
+ *
+ * The maximum number of hardware partitionable regions is equal to the
+ * maximum number of nodes in the entire system divided by the minimum number
+ * of nodes that comprise an access protection grouping.
+ */
+#define XP_MAX_NPARTITIONS_SN2	64
+#define XP_MAX_NPARTITIONS_UV	256
+
+/*
+ * XPC establishes channel connections between the local partition and any
+ * other partition that is currently up. Over these channels, kernel-level
+ * `users' can communicate with their counterparts on the other partitions.
+ *
+ * If the need for additional channels arises, one can simply increase
+ * XPC_MAX_NCHANNELS accordingly. If the day should come where that number
+ * exceeds the absolute MAXIMUM number of channels possible (eight), then one
+ * will need to make changes to the XPC code to accommodate for this.
+ *
+ * The absolute maximum number of channels possible is limited to eight for
+ * performance reasons on sn2 hardware. The internal cross partition structures
+ * require sixteen bytes per channel, and eight allows all of this
+ * interface-shared info to fit in one 128-byte cacheline.
+ */
+#define XPC_MEM_CHANNEL		0	/* memory channel number */
+#define	XPC_NET_CHANNEL		1	/* network channel number */
+
+#define XPC_MAX_NCHANNELS	2	/* max #of channels allowed */
+
+#if XPC_MAX_NCHANNELS > 8
+#error	XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible.
+#endif
+
+/*
+ * Define macro, XPC_MSG_SIZE(), is provided for the user
+ * that wants to fit as many msg entries as possible in a given memory size
+ * (e.g. a memory page).
+ */
+#define XPC_MSG_MAX_SIZE	128
+#define XPC_MSG_HDR_MAX_SIZE	16
+#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE)
+
+#define XPC_MSG_SIZE(_payload_size) \
+				ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \
+				      is_uv() ? 64 : 128)
+
+
+/*
+ * Define the return values and values passed to user's callout functions.
+ * (It is important to add new value codes at the end just preceding
+ * xpUnknownReason, which must have the highest numerical value.)
+ */
+enum xp_retval {
+	xpSuccess = 0,
+
+	xpNotConnected,		/*  1: channel is not connected */
+	xpConnected,		/*  2: channel connected (opened) */
+	xpRETIRED1,		/*  3: (formerly xpDisconnected) */
+
+	xpMsgReceived,		/*  4: message received */
+	xpMsgDelivered,		/*  5: message delivered and acknowledged */
+
+	xpRETIRED2,		/*  6: (formerly xpTransferFailed) */
+
+	xpNoWait,		/*  7: operation would require wait */
+	xpRetry,		/*  8: retry operation */
+	xpTimeout,		/*  9: timeout in xpc_allocate_msg_wait() */
+	xpInterrupted,		/* 10: interrupted wait */
+
+	xpUnequalMsgSizes,	/* 11: message size disparity between sides */
+	xpInvalidAddress,	/* 12: invalid address */
+
+	xpNoMemory,		/* 13: no memory available for XPC structures */
+	xpLackOfResources,	/* 14: insufficient resources for operation */
+	xpUnregistered,		/* 15: channel is not registered */
+	xpAlreadyRegistered,	/* 16: channel is already registered */
+
+	xpPartitionDown,	/* 17: remote partition is down */
+	xpNotLoaded,		/* 18: XPC module is not loaded */
+	xpUnloading,		/* 19: this side is unloading XPC module */
+
+	xpBadMagic,		/* 20: XPC MAGIC string not found */
+
+	xpReactivating,		/* 21: remote partition was reactivated */
+
+	xpUnregistering,	/* 22: this side is unregistering channel */
+	xpOtherUnregistering,	/* 23: other side is unregistering channel */
+
+	xpCloneKThread,		/* 24: cloning kernel thread */
+	xpCloneKThreadFailed,	/* 25: cloning kernel thread failed */
+
+	xpNoHeartbeat,		/* 26: remote partition has no heartbeat */
+
+	xpPioReadError,		/* 27: PIO read error */
+	xpPhysAddrRegFailed,	/* 28: registration of phys addr range failed */
+
+	xpRETIRED3,		/* 29: (formerly xpBteDirectoryError) */
+	xpRETIRED4,		/* 30: (formerly xpBtePoisonError) */
+	xpRETIRED5,		/* 31: (formerly xpBteWriteError) */
+	xpRETIRED6,		/* 32: (formerly xpBteAccessError) */
+	xpRETIRED7,		/* 33: (formerly xpBtePWriteError) */
+	xpRETIRED8,		/* 34: (formerly xpBtePReadError) */
+	xpRETIRED9,		/* 35: (formerly xpBteTimeOutError) */
+	xpRETIRED10,		/* 36: (formerly xpBteXtalkError) */
+	xpRETIRED11,		/* 37: (formerly xpBteNotAvailable) */
+	xpRETIRED12,		/* 38: (formerly xpBteUnmappedError) */
+
+	xpBadVersion,		/* 39: bad version number */
+	xpVarsNotSet,		/* 40: the XPC variables are not set up */
+	xpNoRsvdPageAddr,	/* 41: unable to get rsvd page's phys addr */
+	xpInvalidPartid,	/* 42: invalid partition ID */
+	xpLocalPartid,		/* 43: local partition ID */
+
+	xpOtherGoingDown,	/* 44: other side going down, reason unknown */
+	xpSystemGoingDown,	/* 45: system is going down, reason unknown */
+	xpSystemHalt,		/* 46: system is being halted */
+	xpSystemReboot,		/* 47: system is being rebooted */
+	xpSystemPoweroff,	/* 48: system is being powered off */
+
+	xpDisconnecting,	/* 49: channel disconnecting (closing) */
+
+	xpOpenCloseError,	/* 50: channel open/close protocol error */
+
+	xpDisconnected,		/* 51: channel disconnected (closed) */
+
+	xpBteCopyError,		/* 52: bte_copy() returned error */
+	xpSalError,		/* 53: sn SAL error */
+	xpRsvdPageNotSet,	/* 54: the reserved page is not set up */
+	xpPayloadTooBig,	/* 55: payload too large for message slot */
+
+	xpUnsupported,		/* 56: unsupported functionality or resource */
+	xpNeedMoreInfo,		/* 57: more info is needed by SAL */
+
+	xpGruCopyError,		/* 58: gru_copy_gru() returned error */
+	xpGruSendMqError,	/* 59: gru send message queue related error */
+
+	xpBadChannelNumber,	/* 60: invalid channel number */
+	xpBadMsgType,		/* 61: invalid message type */
+	xpBiosError,		/* 62: BIOS error */
+
+	xpUnknownReason		/* 63: unknown reason - must be last in enum */
+};
+
+/*
+ * Define the callout function type used by XPC to update the user on
+ * connection activity and state changes via the user function registered
+ * by xpc_connect().
+ *
+ * Arguments:
+ *
+ *	reason - reason code.
+ *	partid - partition ID associated with condition.
+ *	ch_number - channel # associated with condition.
+ *	data - pointer to optional data.
+ *	key - pointer to optional user-defined value provided as the "key"
+ *	      argument to xpc_connect().
+ *
+ * A reason code of xpConnected indicates that a connection has been
+ * established to the specified partition on the specified channel. The data
+ * argument indicates the max number of entries allowed in the message queue.
+ *
+ * A reason code of xpMsgReceived indicates that a XPC message arrived from
+ * the specified partition on the specified channel. The data argument
+ * specifies the address of the message's payload. The user must call
+ * xpc_received() when finished with the payload.
+ *
+ * All other reason codes indicate failure. The data argmument is NULL.
+ * When a failure reason code is received, one can assume that the channel
+ * is not connected.
+ */
+typedef void (*xpc_channel_func) (enum xp_retval reason, short partid,
+				  int ch_number, void *data, void *key);
+
+/*
+ * Define the callout function type used by XPC to notify the user of
+ * messages received and delivered via the user function registered by
+ * xpc_send_notify().
+ *
+ * Arguments:
+ *
+ *	reason - reason code.
+ *	partid - partition ID associated with condition.
+ *	ch_number - channel # associated with condition.
+ *	key - pointer to optional user-defined value provided as the "key"
+ *	      argument to xpc_send_notify().
+ *
+ * A reason code of xpMsgDelivered indicates that the message was delivered
+ * to the intended recipient and that they have acknowledged its receipt by
+ * calling xpc_received().
+ *
+ * All other reason codes indicate failure.
+ *
+ * NOTE: The user defined function must be callable by an interrupt handler
+ *       and thus cannot block.
+ */
+typedef void (*xpc_notify_func) (enum xp_retval reason, short partid,
+				 int ch_number, void *key);
+
+/*
+ * The following is a registration entry. There is a global array of these,
+ * one per channel. It is used to record the connection registration made
+ * by the users of XPC. As long as a registration entry exists, for any
+ * partition that comes up, XPC will attempt to establish a connection on
+ * that channel. Notification that a connection has been made will occur via
+ * the xpc_channel_func function.
+ *
+ * The 'func' field points to the function to call when aynchronous
+ * notification is required for such events as: a connection established/lost,
+ * or an incoming message received, or an error condition encountered. A
+ * non-NULL 'func' field indicates that there is an active registration for
+ * the channel.
+ */
+struct xpc_registration {
+	struct mutex mutex;
+	xpc_channel_func func;	/* function to call */
+	void *key;		/* pointer to user's key */
+	u16 nentries;		/* #of msg entries in local msg queue */
+	u16 entry_size;		/* message queue's message entry size */
+	u32 assigned_limit;	/* limit on #of assigned kthreads */
+	u32 idle_limit;		/* limit on #of idle kthreads */
+} ____cacheline_aligned;
+
+#define XPC_CHANNEL_REGISTERED(_c)	(xpc_registrations[_c].func != NULL)
+
+/* the following are valid xpc_send() or xpc_send_notify() flags */
+#define XPC_WAIT	0	/* wait flag */
+#define XPC_NOWAIT	1	/* no wait flag */
+
+struct xpc_interface {
+	void (*connect) (int);
+	void (*disconnect) (int);
+	enum xp_retval (*send) (short, int, u32, void *, u16);
+	enum xp_retval (*send_notify) (short, int, u32, void *, u16,
+					xpc_notify_func, void *);
+	void (*received) (short, int, void *);
+	enum xp_retval (*partid_to_nasids) (short, void *);
+};
+
+extern struct xpc_interface xpc_interface;
+
+extern void xpc_set_interface(void (*)(int),
+			      void (*)(int),
+			      enum xp_retval (*)(short, int, u32, void *, u16),
+			      enum xp_retval (*)(short, int, u32, void *, u16,
+						 xpc_notify_func, void *),
+			      void (*)(short, int, void *),
+			      enum xp_retval (*)(short, void *));
+extern void xpc_clear_interface(void);
+
+extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16,
+				   u16, u32, u32);
+extern void xpc_disconnect(int);
+
+static inline enum xp_retval
+xpc_send(short partid, int ch_number, u32 flags, void *payload,
+	 u16 payload_size)
+{
+	if (!xpc_interface.send)
+		return xpNotLoaded;
+
+	return xpc_interface.send(partid, ch_number, flags, payload,
+				  payload_size);
+}
+
+static inline enum xp_retval
+xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
+		u16 payload_size, xpc_notify_func func, void *key)
+{
+	if (!xpc_interface.send_notify)
+		return xpNotLoaded;
+
+	return xpc_interface.send_notify(partid, ch_number, flags, payload,
+					 payload_size, func, key);
+}
+
+static inline void
+xpc_received(short partid, int ch_number, void *payload)
+{
+	if (xpc_interface.received)
+		xpc_interface.received(partid, ch_number, payload);
+}
+
+static inline enum xp_retval
+xpc_partid_to_nasids(short partid, void *nasids)
+{
+	if (!xpc_interface.partid_to_nasids)
+		return xpNotLoaded;
+
+	return xpc_interface.partid_to_nasids(partid, nasids);
+}
+
+extern short xp_max_npartitions;
+extern short xp_partition_id;
+extern u8 xp_region_size;
+
+extern unsigned long (*xp_pa) (void *);
+extern unsigned long (*xp_socket_pa) (unsigned long);
+extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long,
+		       size_t);
+extern int (*xp_cpu_to_nasid) (int);
+extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long);
+extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long);
+
+extern u64 xp_nofault_PIOR_target;
+extern int xp_nofault_PIOR(void *);
+extern int xp_error_PIOR(void);
+
+extern struct device *xp;
+extern enum xp_retval xp_init_sn2(void);
+extern enum xp_retval xp_init_uv(void);
+extern void xp_exit_sn2(void);
+extern void xp_exit_uv(void);
+
+#endif /* _DRIVERS_MISC_SGIXP_XP_H */
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
new file mode 100644
index 000000000..6d7f557fd
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -0,0 +1,264 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) base.
+ *
+ *	XP provides a base from which its users can interact
+ *	with XPC, yet not be dependent on XPC.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include "xp.h"
+
+/* define the XP debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xp_dbg_name = {
+	.name = "xp"
+};
+
+struct device xp_dbg_subname = {
+	.init_name = "",		/* set to "" */
+	.driver = &xp_dbg_name
+};
+
+struct device *xp = &xp_dbg_subname;
+
+/* max #of partitions possible */
+short xp_max_npartitions;
+EXPORT_SYMBOL_GPL(xp_max_npartitions);
+
+short xp_partition_id;
+EXPORT_SYMBOL_GPL(xp_partition_id);
+
+u8 xp_region_size;
+EXPORT_SYMBOL_GPL(xp_region_size);
+
+unsigned long (*xp_pa) (void *addr);
+EXPORT_SYMBOL_GPL(xp_pa);
+
+unsigned long (*xp_socket_pa) (unsigned long gpa);
+EXPORT_SYMBOL_GPL(xp_socket_pa);
+
+enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa,
+				    const unsigned long src_gpa, size_t len);
+EXPORT_SYMBOL_GPL(xp_remote_memcpy);
+
+int (*xp_cpu_to_nasid) (int cpuid);
+EXPORT_SYMBOL_GPL(xp_cpu_to_nasid);
+
+enum xp_retval (*xp_expand_memprotect) (unsigned long phys_addr,
+					unsigned long size);
+EXPORT_SYMBOL_GPL(xp_expand_memprotect);
+enum xp_retval (*xp_restrict_memprotect) (unsigned long phys_addr,
+					  unsigned long size);
+EXPORT_SYMBOL_GPL(xp_restrict_memprotect);
+
+/*
+ * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
+ * users of XPC.
+ */
+struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS];
+EXPORT_SYMBOL_GPL(xpc_registrations);
+
+/*
+ * Initialize the XPC interface to NULL to indicate that XPC isn't loaded.
+ */
+struct xpc_interface xpc_interface = { };
+EXPORT_SYMBOL_GPL(xpc_interface);
+
+/*
+ * XPC calls this when it (the XPC module) has been loaded.
+ */
+void
+xpc_set_interface(void (*connect) (int),
+		  void (*disconnect) (int),
+		  enum xp_retval (*send) (short, int, u32, void *, u16),
+		  enum xp_retval (*send_notify) (short, int, u32, void *, u16,
+						  xpc_notify_func, void *),
+		  void (*received) (short, int, void *),
+		  enum xp_retval (*partid_to_nasids) (short, void *))
+{
+	xpc_interface.connect = connect;
+	xpc_interface.disconnect = disconnect;
+	xpc_interface.send = send;
+	xpc_interface.send_notify = send_notify;
+	xpc_interface.received = received;
+	xpc_interface.partid_to_nasids = partid_to_nasids;
+}
+EXPORT_SYMBOL_GPL(xpc_set_interface);
+
+/*
+ * XPC calls this when it (the XPC module) is being unloaded.
+ */
+void
+xpc_clear_interface(void)
+{
+	memset(&xpc_interface, 0, sizeof(xpc_interface));
+}
+EXPORT_SYMBOL_GPL(xpc_clear_interface);
+
+/*
+ * Register for automatic establishment of a channel connection whenever
+ * a partition comes up.
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to register for connection.
+ *	func - function to call for asynchronous notification of channel
+ *	       state changes (i.e., connection, disconnection, error) and
+ *	       the arrival of incoming messages.
+ *      key - pointer to optional user-defined value that gets passed back
+ *	      to the user on any callouts made to func.
+ *	payload_size - size in bytes of the XPC message's payload area which
+ *		       contains a user-defined message. The user should make
+ *		       this large enough to hold their largest message.
+ *	nentries - max #of XPC message entries a message queue can contain.
+ *		   The actual number, which is determined when a connection
+ * 		   is established and may be less then requested, will be
+ *		   passed to the user via the xpConnected callout.
+ *	assigned_limit - max number of kthreads allowed to be processing
+ * 			 messages (per connection) at any given instant.
+ *	idle_limit - max number of kthreads allowed to be idle at any given
+ * 		     instant.
+ */
+enum xp_retval
+xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
+	    u16 nentries, u32 assigned_limit, u32 idle_limit)
+{
+	struct xpc_registration *registration;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+	DBUG_ON(payload_size == 0 || nentries == 0);
+	DBUG_ON(func == NULL);
+	DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
+
+	if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE)
+		return xpPayloadTooBig;
+
+	registration = &xpc_registrations[ch_number];
+
+	if (mutex_lock_interruptible(&registration->mutex) != 0)
+		return xpInterrupted;
+
+	/* if XPC_CHANNEL_REGISTERED(ch_number) */
+	if (registration->func != NULL) {
+		mutex_unlock(&registration->mutex);
+		return xpAlreadyRegistered;
+	}
+
+	/* register the channel for connection */
+	registration->entry_size = XPC_MSG_SIZE(payload_size);
+	registration->nentries = nentries;
+	registration->assigned_limit = assigned_limit;
+	registration->idle_limit = idle_limit;
+	registration->key = key;
+	registration->func = func;
+
+	mutex_unlock(&registration->mutex);
+
+	if (xpc_interface.connect)
+		xpc_interface.connect(ch_number);
+
+	return xpSuccess;
+}
+EXPORT_SYMBOL_GPL(xpc_connect);
+
+/*
+ * Remove the registration for automatic connection of the specified channel
+ * when a partition comes up.
+ *
+ * Before returning this xpc_disconnect() will wait for all connections on the
+ * specified channel have been closed/torndown. So the caller can be assured
+ * that they will not be receiving any more callouts from XPC to their
+ * function registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to unregister.
+ */
+void
+xpc_disconnect(int ch_number)
+{
+	struct xpc_registration *registration;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+	registration = &xpc_registrations[ch_number];
+
+	/*
+	 * We've decided not to make this a down_interruptible(), since we
+	 * figured XPC's users will just turn around and call xpc_disconnect()
+	 * again anyways, so we might as well wait, if need be.
+	 */
+	mutex_lock(&registration->mutex);
+
+	/* if !XPC_CHANNEL_REGISTERED(ch_number) */
+	if (registration->func == NULL) {
+		mutex_unlock(&registration->mutex);
+		return;
+	}
+
+	/* remove the connection registration for the specified channel */
+	registration->func = NULL;
+	registration->key = NULL;
+	registration->nentries = 0;
+	registration->entry_size = 0;
+	registration->assigned_limit = 0;
+	registration->idle_limit = 0;
+
+	if (xpc_interface.disconnect)
+		xpc_interface.disconnect(ch_number);
+
+	mutex_unlock(&registration->mutex);
+
+	return;
+}
+EXPORT_SYMBOL_GPL(xpc_disconnect);
+
+int __init
+xp_init(void)
+{
+	enum xp_retval ret;
+	int ch_number;
+
+	/* initialize the connection registration mutex */
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++)
+		mutex_init(&xpc_registrations[ch_number].mutex);
+
+	if (is_shub())
+		ret = xp_init_sn2();
+	else if (is_uv())
+		ret = xp_init_uv();
+	else
+		ret = 0;
+
+	if (ret != xpSuccess)
+		return ret;
+
+	return 0;
+}
+
+module_init(xp_init);
+
+void __exit
+xp_exit(void)
+{
+	if (is_shub())
+		xp_exit_sn2();
+	else if (is_uv())
+		xp_exit_uv();
+}
+
+module_exit(xp_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition (XP) base");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sgi-xp/xp_nofault.S b/drivers/misc/sgi-xp/xp_nofault.S
new file mode 100644
index 000000000..e38d43319
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_nofault.S
@@ -0,0 +1,35 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * The xp_nofault_PIOR function takes a pointer to a remote PIO register
+ * and attempts to load and consume a value from it.  This function
+ * will be registered as a nofault code block.  In the event that the
+ * PIO read fails, the MCA handler will force the error to look
+ * corrected and vector to the xp_error_PIOR which will return an error.
+ *
+ * The definition of "consumption" and the time it takes for an MCA
+ * to surface is processor implementation specific.  This code
+ * is sufficient on Itanium through the Montvale processor family.
+ * It may need to be adjusted for future processor implementations.
+ *
+ *	extern int xp_nofault_PIOR(void *remote_register);
+ */
+
+	.global xp_nofault_PIOR
+xp_nofault_PIOR:
+	mov	r8=r0			// Stage a success return value
+	ld8.acq	r9=[r32];;		// PIO Read the specified register
+	adds	r9=1,r9;;		// Add to force consumption
+	srlz.i;;			// Allow time for MCA to surface
+	br.ret.sptk.many b0;;		// Return success
+
+	.global xp_error_PIOR
+xp_error_PIOR:
+	mov	r8=1			// Return value of 1
+	br.ret.sptk.many b0;;		// Return failure
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
new file mode 100644
index 000000000..d8e463f87
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -0,0 +1,190 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) sn2-based functions.
+ *
+ *      Architecture specific implementation of common functions.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/sn_sal.h>
+#include "xp.h"
+
+/*
+ * The export of xp_nofault_PIOR needs to happen here since it is defined
+ * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
+ * defined here.
+ */
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
+
+u64 xp_nofault_PIOR_target;
+EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
+
+/*
+ * Register a nofault code region which performs a cross-partition PIO read.
+ * If the PIO read times out, the MCA handler will consume the error and
+ * return to a kernel-provided instruction to indicate an error. This PIO read
+ * exists because it is guaranteed to timeout if the destination is down
+ * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2,
+ * which unfortunately we have to work around).
+ */
+static enum xp_retval
+xp_register_nofault_code_sn2(void)
+{
+	int ret;
+	u64 func_addr;
+	u64 err_func_addr;
+
+	func_addr = *(u64 *)xp_nofault_PIOR;
+	err_func_addr = *(u64 *)xp_error_PIOR;
+	ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
+				       1, 1);
+	if (ret != 0) {
+		dev_err(xp, "can't register nofault code, error=%d\n", ret);
+		return xpSalError;
+	}
+	/*
+	 * Setup the nofault PIO read target. (There is no special reason why
+	 * SH_IPI_ACCESS was selected.)
+	 */
+	if (is_shub1())
+		xp_nofault_PIOR_target = SH1_IPI_ACCESS;
+	else if (is_shub2())
+		xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
+
+	return xpSuccess;
+}
+
+static void
+xp_unregister_nofault_code_sn2(void)
+{
+	u64 func_addr = *(u64 *)xp_nofault_PIOR;
+	u64 err_func_addr = *(u64 *)xp_error_PIOR;
+
+	/* unregister the PIO read nofault code region */
+	(void)sn_register_nofault_code(func_addr, err_func_addr,
+				       err_func_addr, 1, 0);
+}
+
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_sn2(void *addr)
+{
+	return __pa(addr);
+}
+
+/*
+ * Convert a global physical to a socket physical address.
+ */
+static unsigned long
+xp_socket_pa_sn2(unsigned long gpa)
+{
+	return gpa;
+}
+
+/*
+ * Wrapper for bte_copy().
+ *
+ *	dst_pa - physical address of the destination of the transfer.
+ *	src_pa - physical address of the source of the transfer.
+ *	len - number of bytes to transfer from source to destination.
+ *
+ * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock.
+ */
+static enum xp_retval
+xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa,
+		     size_t len)
+{
+	bte_result_t ret;
+
+	ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+	if (ret == BTE_SUCCESS)
+		return xpSuccess;
+
+	if (is_shub2()) {
+		dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa="
+			"0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa,
+			src_pa, len);
+	} else {
+		dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx "
+			"src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len);
+	}
+
+	return xpBteCopyError;
+}
+
+static int
+xp_cpu_to_nasid_sn2(int cpuid)
+{
+	return cpuid_to_nasid(cpuid);
+}
+
+static enum xp_retval
+xp_expand_memprotect_sn2(unsigned long phys_addr, unsigned long size)
+{
+	u64 nasid_array = 0;
+	int ret;
+
+	ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
+				   &nasid_array);
+	if (ret != 0) {
+		dev_err(xp, "sn_change_memprotect(,, "
+			"SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
+		return xpSalError;
+	}
+	return xpSuccess;
+}
+
+static enum xp_retval
+xp_restrict_memprotect_sn2(unsigned long phys_addr, unsigned long size)
+{
+	u64 nasid_array = 0;
+	int ret;
+
+	ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
+				   &nasid_array);
+	if (ret != 0) {
+		dev_err(xp, "sn_change_memprotect(,, "
+			"SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
+		return xpSalError;
+	}
+	return xpSuccess;
+}
+
+enum xp_retval
+xp_init_sn2(void)
+{
+	BUG_ON(!is_shub());
+
+	xp_max_npartitions = XP_MAX_NPARTITIONS_SN2;
+	xp_partition_id = sn_partition_id;
+	xp_region_size = sn_region_size;
+
+	xp_pa = xp_pa_sn2;
+	xp_socket_pa = xp_socket_pa_sn2;
+	xp_remote_memcpy = xp_remote_memcpy_sn2;
+	xp_cpu_to_nasid = xp_cpu_to_nasid_sn2;
+	xp_expand_memprotect = xp_expand_memprotect_sn2;
+	xp_restrict_memprotect = xp_restrict_memprotect_sn2;
+
+	return xp_register_nofault_code_sn2();
+}
+
+void
+xp_exit_sn2(void)
+{
+	BUG_ON(!is_shub());
+
+	xp_unregister_nofault_code_sn2();
+}
+
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
new file mode 100644
index 000000000..a0d093274
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -0,0 +1,171 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) uv-based functions.
+ *
+ *      Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/device.h>
+#include <asm/uv/uv_hub.h>
+#if defined CONFIG_X86_64
+#include <asm/uv/bios.h>
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+#include <asm/sn/sn_sal.h>
+#endif
+#include "../sgi-gru/grukservices.h"
+#include "xp.h"
+
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_uv(void *addr)
+{
+	return uv_gpa(addr);
+}
+
+/*
+ * Convert a global physical to socket physical address.
+ */
+static unsigned long
+xp_socket_pa_uv(unsigned long gpa)
+{
+	return uv_gpa_to_soc_phys_ram(gpa);
+}
+
+static enum xp_retval
+xp_remote_mmr_read(unsigned long dst_gpa, const unsigned long src_gpa,
+		   size_t len)
+{
+	int ret;
+	unsigned long *dst_va = __va(uv_gpa_to_soc_phys_ram(dst_gpa));
+
+	BUG_ON(!uv_gpa_in_mmr_space(src_gpa));
+	BUG_ON(len != 8);
+
+	ret = gru_read_gpa(dst_va, src_gpa);
+	if (ret == 0)
+		return xpSuccess;
+
+	dev_err(xp, "gru_read_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
+		"len=%ld\n", dst_gpa, src_gpa, len);
+	return xpGruCopyError;
+}
+
+
+static enum xp_retval
+xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa,
+		    size_t len)
+{
+	int ret;
+
+	if (uv_gpa_in_mmr_space(src_gpa))
+		return xp_remote_mmr_read(dst_gpa, src_gpa, len);
+
+	ret = gru_copy_gpa(dst_gpa, src_gpa, len);
+	if (ret == 0)
+		return xpSuccess;
+
+	dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
+		"len=%ld\n", dst_gpa, src_gpa, len);
+	return xpGruCopyError;
+}
+
+static int
+xp_cpu_to_nasid_uv(int cpuid)
+{
+	/* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */
+	return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid));
+}
+
+static enum xp_retval
+xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size)
+{
+	int ret;
+
+#if defined CONFIG_X86_64
+	ret = uv_bios_change_memprotect(phys_addr, size, UV_MEMPROT_ALLOW_RW);
+	if (ret != BIOS_STATUS_SUCCESS) {
+		dev_err(xp, "uv_bios_change_memprotect(,, "
+			"UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret);
+		return xpBiosError;
+	}
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	u64 nasid_array;
+
+	ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
+				   &nasid_array);
+	if (ret != 0) {
+		dev_err(xp, "sn_change_memprotect(,, "
+			"SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
+		return xpSalError;
+	}
+#else
+	#error not a supported configuration
+#endif
+	return xpSuccess;
+}
+
+static enum xp_retval
+xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size)
+{
+	int ret;
+
+#if defined CONFIG_X86_64
+	ret = uv_bios_change_memprotect(phys_addr, size,
+					UV_MEMPROT_RESTRICT_ACCESS);
+	if (ret != BIOS_STATUS_SUCCESS) {
+		dev_err(xp, "uv_bios_change_memprotect(,, "
+			"UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret);
+		return xpBiosError;
+	}
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	u64 nasid_array;
+
+	ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
+				   &nasid_array);
+	if (ret != 0) {
+		dev_err(xp, "sn_change_memprotect(,, "
+			"SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
+		return xpSalError;
+	}
+#else
+	#error not a supported configuration
+#endif
+	return xpSuccess;
+}
+
+enum xp_retval
+xp_init_uv(void)
+{
+	BUG_ON(!is_uv());
+
+	xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
+	xp_partition_id = sn_partition_id;
+	xp_region_size = sn_region_size;
+
+	xp_pa = xp_pa_uv;
+	xp_socket_pa = xp_socket_pa_uv;
+	xp_remote_memcpy = xp_remote_memcpy_uv;
+	xp_cpu_to_nasid = xp_cpu_to_nasid_uv;
+	xp_expand_memprotect = xp_expand_memprotect_uv;
+	xp_restrict_memprotect = xp_restrict_memprotect_uv;
+
+	return xpSuccess;
+}
+
+void
+xp_exit_uv(void)
+{
+	BUG_ON(!is_uv());
+}
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
new file mode 100644
index 000000000..b94d5f767
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -0,0 +1,1004 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) structures and macros.
+ */
+
+#ifndef _DRIVERS_MISC_SGIXP_XPC_H
+#define _DRIVERS_MISC_SGIXP_XPC_H
+
+#include <linux/wait.h>
+#include <linux/completion.h>
+#include <linux/timer.h>
+#include <linux/sched.h>
+#include "xp.h"
+
+/*
+ * XPC Version numbers consist of a major and minor number. XPC can always
+ * talk to versions with same major #, and never talk to versions with a
+ * different major #.
+ */
+#define _XPC_VERSION(_maj, _min)	(((_maj) << 4) | ((_min) & 0xf))
+#define XPC_VERSION_MAJOR(_v)		((_v) >> 4)
+#define XPC_VERSION_MINOR(_v)		((_v) & 0xf)
+
+/* define frequency of the heartbeat and frequency how often it's checked */
+#define XPC_HB_DEFAULT_INTERVAL		5	/* incr HB every x secs */
+#define XPC_HB_CHECK_DEFAULT_INTERVAL	20	/* check HB every x secs */
+
+/* define the process name of HB checker and the CPU it is pinned to */
+#define XPC_HB_CHECK_THREAD_NAME	"xpc_hb"
+#define XPC_HB_CHECK_CPU		0
+
+/* define the process name of the discovery thread */
+#define XPC_DISCOVERY_THREAD_NAME	"xpc_discovery"
+
+/*
+ * the reserved page
+ *
+ *   SAL reserves one page of memory per partition for XPC. Though a full page
+ *   in length (16384 bytes), its starting address is not page aligned, but it
+ *   is cacheline aligned. The reserved page consists of the following:
+ *
+ *   reserved page header
+ *
+ *     The first two 64-byte cachelines of the reserved page contain the
+ *     header (struct xpc_rsvd_page). Before SAL initialization has completed,
+ *     SAL has set up the following fields of the reserved page header:
+ *     SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The
+ *     other fields are set up by XPC. (xpc_rsvd_page points to the local
+ *     partition's reserved page.)
+ *
+ *   part_nasids mask
+ *   mach_nasids mask
+ *
+ *     SAL also sets up two bitmaps (or masks), one that reflects the actual
+ *     nasids in this partition (part_nasids), and the other that reflects
+ *     the actual nasids in the entire machine (mach_nasids). We're only
+ *     interested in the even numbered nasids (which contain the processors
+ *     and/or memory), so we only need half as many bits to represent the
+ *     nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure
+ *     to either divide or multiply by 2. The part_nasids mask is located
+ *     starting at the first cacheline following the reserved page header. The
+ *     mach_nasids mask follows right after the part_nasids mask. The size in
+ *     bytes of each mask is reflected by the reserved page header field
+ *     'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids
+ *     and xpc_mach_nasids.)
+ *
+ *   vars	(ia64-sn2 only)
+ *   vars part	(ia64-sn2 only)
+ *
+ *     Immediately following the mach_nasids mask are the XPC variables
+ *     required by other partitions. First are those that are generic to all
+ *     partitions (vars), followed on the next available cacheline by those
+ *     which are partition specific (vars part). These are setup by XPC.
+ *     (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
+ *
+ * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been
+ *       initialized.
+ */
+struct xpc_rsvd_page {
+	u64 SAL_signature;	/* SAL: unique signature */
+	u64 SAL_version;	/* SAL: version */
+	short SAL_partid;	/* SAL: partition ID */
+	short max_npartitions;	/* value of XPC_MAX_PARTITIONS */
+	u8 version;
+	u8 pad1[3];		/* align to next u64 in 1st 64-byte cacheline */
+	unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
+	union {
+		struct {
+			unsigned long vars_pa;	/* phys addr */
+		} sn2;
+		struct {
+			unsigned long heartbeat_gpa; /* phys addr */
+			unsigned long activate_gru_mq_desc_gpa; /* phys addr */
+		} uv;
+	} sn;
+	u64 pad2[9];		/* align to last u64 in 2nd 64-byte cacheline */
+	u64 SAL_nasids_size;	/* SAL: size of each nasid mask in bytes */
+};
+
+#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */
+
+/*
+ * Define the structures by which XPC variables can be exported to other
+ * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
+ */
+
+/*
+ * The following structure describes the partition generic variables
+ * needed by other partitions in order to properly initialize.
+ *
+ * struct xpc_vars version number also applies to struct xpc_vars_part.
+ * Changes to either structure and/or related functionality should be
+ * reflected by incrementing either the major or minor version numbers
+ * of struct xpc_vars.
+ */
+struct xpc_vars_sn2 {
+	u8 version;
+	u64 heartbeat;
+	DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
+	u64 heartbeat_offline;	/* if 0, heartbeat should be changing */
+	int activate_IRQ_nasid;
+	int activate_IRQ_phys_cpuid;
+	unsigned long vars_part_pa;
+	unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */
+	struct amo *amos_page;	/* vaddr of page of amos from MSPEC driver */
+};
+
+#define XPC_V_VERSION _XPC_VERSION(3, 1)    /* version 3.1 of the cross vars */
+
+/*
+ * The following structure describes the per partition specific variables.
+ *
+ * An array of these structures, one per partition, will be defined. As a
+ * partition becomes active XPC will copy the array entry corresponding to
+ * itself from that partition. It is desirable that the size of this structure
+ * evenly divides into a 128-byte cacheline, such that none of the entries in
+ * this array crosses a 128-byte cacheline boundary. As it is now, each entry
+ * occupies 64-bytes.
+ */
+struct xpc_vars_part_sn2 {
+	u64 magic;
+
+	unsigned long openclose_args_pa; /* phys addr of open and close args */
+	unsigned long GPs_pa;	/* physical address of Get/Put values */
+
+	unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */
+
+	int notify_IRQ_nasid;	/* nasid of where to send notify IRQs */
+	int notify_IRQ_phys_cpuid;	/* CPUID of where to send notify IRQs */
+
+	u8 nchannels;		/* #of defined channels supported */
+
+	u8 reserved[23];	/* pad to a full 64 bytes */
+};
+
+/*
+ * The vars_part MAGIC numbers play a part in the first contact protocol.
+ *
+ * MAGIC1 indicates that the per partition specific variables for a remote
+ * partition have been initialized by this partition.
+ *
+ * MAGIC2 indicates that this partition has pulled the remote partititions
+ * per partition variables that pertain to this partition.
+ */
+#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
+#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
+
+/* the reserved page sizes and offsets */
+
+#define XPC_RP_HEADER_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
+#define XPC_RP_VARS_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2))
+
+#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \
+				 XPC_RP_HEADER_SIZE))
+#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \
+				 xpc_nasid_mask_nlongs)
+#define XPC_RP_VARS(_rp)	((struct xpc_vars_sn2 *) \
+				 (XPC_RP_MACH_NASIDS(_rp) + \
+				  xpc_nasid_mask_nlongs))
+
+
+/*
+ * The following structure describes the partition's heartbeat info which
+ * will be periodically read by other partitions to determine whether this
+ * XPC is still 'alive'.
+ */
+struct xpc_heartbeat_uv {
+	unsigned long value;
+	unsigned long offline;	/* if 0, heartbeat should be changing */
+};
+
+/*
+ * Info pertinent to a GRU message queue using a watch list for irq generation.
+ */
+struct xpc_gru_mq_uv {
+	void *address;		/* address of GRU message queue */
+	unsigned int order;	/* size of GRU message queue as a power of 2 */
+	int irq;		/* irq raised when message is received in mq */
+	int mmr_blade;		/* blade where watchlist was allocated from */
+	unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */
+	unsigned long mmr_value; /* value of irq mmr located on mmr_blade */
+	int watchlist_num;	/* number of watchlist allocatd by BIOS */
+	void *gru_mq_desc;	/* opaque structure used by the GRU driver */
+};
+
+/*
+ * The activate_mq is used to send/receive GRU messages that affect XPC's
+ * partition active state and channel state. This is uv only.
+ */
+struct xpc_activate_mq_msghdr_uv {
+	unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
+	short partid;		/* sender's partid */
+	u8 act_state;		/* sender's act_state at time msg sent */
+	u8 type;		/* message's type */
+	unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */
+};
+
+/* activate_mq defined message types */
+#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV		0
+
+#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV		1
+#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV		2
+
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV	3
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV		4
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV	5
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV		6
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV	7
+
+#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV		8
+#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV		9
+
+struct xpc_activate_mq_msg_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+};
+
+struct xpc_activate_mq_msg_activate_req_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	unsigned long rp_gpa;
+	unsigned long heartbeat_gpa;
+	unsigned long activate_gru_mq_desc_gpa;
+};
+
+struct xpc_activate_mq_msg_deactivate_req_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closerequest_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+	enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closereply_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+};
+
+struct xpc_activate_mq_msg_chctl_openrequest_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+	short entry_size;	/* size of notify_mq's GRU messages */
+	short local_nentries;	/* ??? Is this needed? What is? */
+};
+
+struct xpc_activate_mq_msg_chctl_openreply_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+	short remote_nentries;	/* ??? Is this needed? What is? */
+	short local_nentries;	/* ??? Is this needed? What is? */
+	unsigned long notify_gru_mq_desc_gpa;
+};
+
+struct xpc_activate_mq_msg_chctl_opencomplete_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+};
+
+/*
+ * Functions registered by add_timer() or called by kernel_thread() only
+ * allow for a single 64-bit argument. The following macros can be used to
+ * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
+ * the passed argument.
+ */
+#define XPC_PACK_ARGS(_arg1, _arg2) \
+			((((u64)_arg1) & 0xffffffff) | \
+			((((u64)_arg2) & 0xffffffff) << 32))
+
+#define XPC_UNPACK_ARG1(_args)	(((u64)_args) & 0xffffffff)
+#define XPC_UNPACK_ARG2(_args)	((((u64)_args) >> 32) & 0xffffffff)
+
+/*
+ * Define a Get/Put value pair (pointers) used with a message queue.
+ */
+struct xpc_gp_sn2 {
+	s64 get;		/* Get value */
+	s64 put;		/* Put value */
+};
+
+#define XPC_GP_SIZE \
+		L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS)
+
+/*
+ * Define a structure that contains arguments associated with opening and
+ * closing a channel.
+ */
+struct xpc_openclose_args {
+	u16 reason;		/* reason why channel is closing */
+	u16 entry_size;		/* sizeof each message entry */
+	u16 remote_nentries;	/* #of message entries in remote msg queue */
+	u16 local_nentries;	/* #of message entries in local msg queue */
+	unsigned long local_msgqueue_pa; /* phys addr of local message queue */
+};
+
+#define XPC_OPENCLOSE_ARGS_SIZE \
+	      L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \
+	      XPC_MAX_NCHANNELS)
+
+
+/*
+ * Structures to define a fifo singly-linked list.
+ */
+
+struct xpc_fifo_entry_uv {
+	struct xpc_fifo_entry_uv *next;
+};
+
+struct xpc_fifo_head_uv {
+	struct xpc_fifo_entry_uv *first;
+	struct xpc_fifo_entry_uv *last;
+	spinlock_t lock;
+	int n_entries;
+};
+
+/*
+ * Define a sn2 styled message.
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message entry (within a message queue) must be a 128-byte
+ * cacheline sized multiple in order to facilitate the BTE transfer of messages
+ * from one message queue to another.
+ */
+struct xpc_msg_sn2 {
+	u8 flags;		/* FOR XPC INTERNAL USE ONLY */
+	u8 reserved[7];		/* FOR XPC INTERNAL USE ONLY */
+	s64 number;		/* FOR XPC INTERNAL USE ONLY */
+
+	u64 payload;		/* user defined portion of message */
+};
+
+/* struct xpc_msg_sn2 flags */
+
+#define	XPC_M_SN2_DONE		0x01	/* msg has been received/consumed */
+#define	XPC_M_SN2_READY		0x02	/* msg is ready to be sent */
+#define	XPC_M_SN2_INTERRUPT	0x04	/* send interrupt when msg consumed */
+
+/*
+ * The format of a uv XPC notify_mq GRU message is as follows:
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message (payload and header) sent via the GRU must be either 1
+ * or 2 GRU_CACHE_LINE_BYTES in length.
+ */
+
+struct xpc_notify_mq_msghdr_uv {
+	union {
+		unsigned int gru_msg_hdr;	/* FOR GRU INTERNAL USE ONLY */
+		struct xpc_fifo_entry_uv next;	/* FOR XPC INTERNAL USE ONLY */
+	} u;
+	short partid;		/* FOR XPC INTERNAL USE ONLY */
+	u8 ch_number;		/* FOR XPC INTERNAL USE ONLY */
+	u8 size;		/* FOR XPC INTERNAL USE ONLY */
+	unsigned int msg_slot_number;	/* FOR XPC INTERNAL USE ONLY */
+};
+
+struct xpc_notify_mq_msg_uv {
+	struct xpc_notify_mq_msghdr_uv hdr;
+	unsigned long payload;
+};
+
+/*
+ * Define sn2's notify entry.
+ *
+ * This is used to notify a message's sender that their message was received
+ * and consumed by the intended recipient.
+ */
+struct xpc_notify_sn2 {
+	u8 type;		/* type of notification */
+
+	/* the following two fields are only used if type == XPC_N_CALL */
+	xpc_notify_func func;	/* user's notify function */
+	void *key;		/* pointer to user's key */
+};
+
+/* struct xpc_notify_sn2 type of notification */
+
+#define	XPC_N_CALL	0x01	/* notify function provided by user */
+
+/*
+ * Define uv's version of the notify entry. It additionally is used to allocate
+ * a msg slot on the remote partition into which is copied a sent message.
+ */
+struct xpc_send_msg_slot_uv {
+	struct xpc_fifo_entry_uv next;
+	unsigned int msg_slot_number;
+	xpc_notify_func func;	/* user's notify function */
+	void *key;		/* pointer to user's key */
+};
+
+/*
+ * Define the structure that manages all the stuff required by a channel. In
+ * particular, they are used to manage the messages sent across the channel.
+ *
+ * This structure is private to a partition, and is NOT shared across the
+ * partition boundary.
+ *
+ * There is an array of these structures for each remote partition. It is
+ * allocated at the time a partition becomes active. The array contains one
+ * of these structures for each potential channel connection to that partition.
+ */
+
+/*
+ * The following is sn2 only.
+ *
+ * Each channel structure manages two message queues (circular buffers).
+ * They are allocated at the time a channel connection is made. One of
+ * these message queues (local_msgqueue) holds the locally created messages
+ * that are destined for the remote partition. The other of these message
+ * queues (remote_msgqueue) is a locally cached copy of the remote partition's
+ * own local_msgqueue.
+ *
+ * The following is a description of the Get/Put pointers used to manage these
+ * two message queues. Consider the local_msgqueue to be on one partition
+ * and the remote_msgqueue to be its cached copy on another partition. A
+ * description of what each of the lettered areas contains is included.
+ *
+ *
+ *                     local_msgqueue      remote_msgqueue
+ *
+ *                        |/////////|      |/////////|
+ *    w_remote_GP.get --> +---------+      |/////////|
+ *                        |    F    |      |/////////|
+ *     remote_GP.get  --> +---------+      +---------+ <-- local_GP->get
+ *                        |         |      |         |
+ *                        |         |      |    E    |
+ *                        |         |      |         |
+ *                        |         |      +---------+ <-- w_local_GP.get
+ *                        |    B    |      |/////////|
+ *                        |         |      |////D////|
+ *                        |         |      |/////////|
+ *                        |         |      +---------+ <-- w_remote_GP.put
+ *                        |         |      |////C////|
+ *      local_GP->put --> +---------+      +---------+ <-- remote_GP.put
+ *                        |         |      |/////////|
+ *                        |    A    |      |/////////|
+ *                        |         |      |/////////|
+ *     w_local_GP.put --> +---------+      |/////////|
+ *                        |/////////|      |/////////|
+ *
+ *
+ *	    ( remote_GP.[get|put] are cached copies of the remote
+ *	      partition's local_GP->[get|put], and thus their values can
+ *	      lag behind their counterparts on the remote partition. )
+ *
+ *
+ *  A - Messages that have been allocated, but have not yet been sent to the
+ *	remote partition.
+ *
+ *  B - Messages that have been sent, but have not yet been acknowledged by the
+ *      remote partition as having been received.
+ *
+ *  C - Area that needs to be prepared for the copying of sent messages, by
+ *	the clearing of the message flags of any previously received messages.
+ *
+ *  D - Area into which sent messages are to be copied from the remote
+ *	partition's local_msgqueue and then delivered to their intended
+ *	recipients. [ To allow for a multi-message copy, another pointer
+ *	(next_msg_to_pull) has been added to keep track of the next message
+ *	number needing to be copied (pulled). It chases after w_remote_GP.put.
+ *	Any messages lying between w_local_GP.get and next_msg_to_pull have
+ *	been copied and are ready to be delivered. ]
+ *
+ *  E - Messages that have been copied and delivered, but have not yet been
+ *	acknowledged by the recipient as having been received.
+ *
+ *  F - Messages that have been acknowledged, but XPC has not yet notified the
+ *	sender that the message was received by its intended recipient.
+ *	This is also an area that needs to be prepared for the allocating of
+ *	new messages, by the clearing of the message flags of the acknowledged
+ *	messages.
+ */
+
+struct xpc_channel_sn2 {
+	struct xpc_openclose_args *local_openclose_args; /* args passed on */
+					     /* opening or closing of channel */
+
+	void *local_msgqueue_base;	/* base address of kmalloc'd space */
+	struct xpc_msg_sn2 *local_msgqueue;	/* local message queue */
+	void *remote_msgqueue_base;	/* base address of kmalloc'd space */
+	struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */
+					   /* partition's local message queue */
+	unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */
+					  /* local message queue */
+
+	struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */
+
+	/* various flavors of local and remote Get/Put values */
+
+	struct xpc_gp_sn2 *local_GP;	/* local Get/Put values */
+	struct xpc_gp_sn2 remote_GP;	/* remote Get/Put values */
+	struct xpc_gp_sn2 w_local_GP;	/* working local Get/Put values */
+	struct xpc_gp_sn2 w_remote_GP;	/* working remote Get/Put values */
+	s64 next_msg_to_pull;	/* Put value of next msg to pull */
+
+	struct mutex msg_to_pull_mutex;	/* next msg to pull serialization */
+};
+
+struct xpc_channel_uv {
+	void *cached_notify_gru_mq_desc; /* remote partition's notify mq's */
+					 /* gru mq descriptor */
+
+	struct xpc_send_msg_slot_uv *send_msg_slots;
+	void *recv_msg_slots;	/* each slot will hold a xpc_notify_mq_msg_uv */
+				/* structure plus the user's payload */
+
+	struct xpc_fifo_head_uv msg_slot_free_list;
+	struct xpc_fifo_head_uv recv_msg_list;	/* deliverable payloads */
+};
+
+struct xpc_channel {
+	short partid;		/* ID of remote partition connected */
+	spinlock_t lock;	/* lock for updating this structure */
+	unsigned int flags;	/* general flags */
+
+	enum xp_retval reason;	/* reason why channel is disconnect'g */
+	int reason_line;	/* line# disconnect initiated from */
+
+	u16 number;		/* channel # */
+
+	u16 entry_size;		/* sizeof each msg entry */
+	u16 local_nentries;	/* #of msg entries in local msg queue */
+	u16 remote_nentries;	/* #of msg entries in remote msg queue */
+
+	atomic_t references;	/* #of external references to queues */
+
+	atomic_t n_on_msg_allocate_wq;	/* #on msg allocation wait queue */
+	wait_queue_head_t msg_allocate_wq;	/* msg allocation wait queue */
+
+	u8 delayed_chctl_flags;	/* chctl flags received, but delayed */
+				/* action until channel disconnected */
+
+	atomic_t n_to_notify;	/* #of msg senders to notify */
+
+	xpc_channel_func func;	/* user's channel function */
+	void *key;		/* pointer to user's key */
+
+	struct completion wdisconnect_wait;    /* wait for channel disconnect */
+
+	/* kthread management related fields */
+
+	atomic_t kthreads_assigned;	/* #of kthreads assigned to channel */
+	u32 kthreads_assigned_limit;	/* limit on #of kthreads assigned */
+	atomic_t kthreads_idle;	/* #of kthreads idle waiting for work */
+	u32 kthreads_idle_limit;	/* limit on #of kthreads idle */
+	atomic_t kthreads_active;	/* #of kthreads actively working */
+
+	wait_queue_head_t idle_wq;	/* idle kthread wait queue */
+
+	union {
+		struct xpc_channel_sn2 sn2;
+		struct xpc_channel_uv uv;
+	} sn;
+
+} ____cacheline_aligned;
+
+/* struct xpc_channel flags */
+
+#define	XPC_C_WASCONNECTED	0x00000001	/* channel was connected */
+
+#define XPC_C_ROPENCOMPLETE	0x00000002    /* remote open channel complete */
+#define XPC_C_OPENCOMPLETE	0x00000004     /* local open channel complete */
+#define	XPC_C_ROPENREPLY	0x00000008	/* remote open channel reply */
+#define	XPC_C_OPENREPLY		0x00000010	/* local open channel reply */
+#define	XPC_C_ROPENREQUEST	0x00000020     /* remote open channel request */
+#define	XPC_C_OPENREQUEST	0x00000040	/* local open channel request */
+
+#define	XPC_C_SETUP		0x00000080 /* channel's msgqueues are alloc'd */
+#define	XPC_C_CONNECTEDCALLOUT	0x00000100     /* connected callout initiated */
+#define	XPC_C_CONNECTEDCALLOUT_MADE \
+				0x00000200     /* connected callout completed */
+#define	XPC_C_CONNECTED		0x00000400	/* local channel is connected */
+#define	XPC_C_CONNECTING	0x00000800	/* channel is being connected */
+
+#define	XPC_C_RCLOSEREPLY	0x00001000	/* remote close channel reply */
+#define	XPC_C_CLOSEREPLY	0x00002000	/* local close channel reply */
+#define	XPC_C_RCLOSEREQUEST	0x00004000    /* remote close channel request */
+#define	XPC_C_CLOSEREQUEST	0x00008000     /* local close channel request */
+
+#define	XPC_C_DISCONNECTED	0x00010000	/* channel is disconnected */
+#define	XPC_C_DISCONNECTING	0x00020000   /* channel is being disconnected */
+#define	XPC_C_DISCONNECTINGCALLOUT \
+				0x00040000 /* disconnecting callout initiated */
+#define	XPC_C_DISCONNECTINGCALLOUT_MADE \
+				0x00080000 /* disconnecting callout completed */
+#define	XPC_C_WDISCONNECT	0x00100000  /* waiting for channel disconnect */
+
+/*
+ * The channel control flags (chctl) union consists of a 64-bit variable which
+ * is divided up into eight bytes, ordered from right to left. Byte zero
+ * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte
+ * can have one or more of the chctl flags set in it.
+ */
+
+union xpc_channel_ctl_flags {
+	u64 all_flags;
+	u8 flags[XPC_MAX_NCHANNELS];
+};
+
+/* chctl flags */
+#define	XPC_CHCTL_CLOSEREQUEST	0x01
+#define	XPC_CHCTL_CLOSEREPLY	0x02
+#define	XPC_CHCTL_OPENREQUEST	0x04
+#define	XPC_CHCTL_OPENREPLY	0x08
+#define XPC_CHCTL_OPENCOMPLETE	0x10
+#define	XPC_CHCTL_MSGREQUEST	0x20
+
+#define XPC_OPENCLOSE_CHCTL_FLAGS \
+			(XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \
+			 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \
+			 XPC_CHCTL_OPENCOMPLETE)
+#define XPC_MSG_CHCTL_FLAGS	XPC_CHCTL_MSGREQUEST
+
+static inline int
+xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+	int ch_number;
+
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+		if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS)
+			return 1;
+	}
+	return 0;
+}
+
+static inline int
+xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+	int ch_number;
+
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+		if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Manage channels on a partition basis. There is one of these structures
+ * for each partition (a partition will never utilize the structure that
+ * represents itself).
+ */
+
+struct xpc_partition_sn2 {
+	unsigned long remote_amos_page_pa; /* paddr of partition's amos page */
+	int activate_IRQ_nasid;	/* active partition's act/deact nasid */
+	int activate_IRQ_phys_cpuid;	/* active part's act/deact phys cpuid */
+
+	unsigned long remote_vars_pa;	/* phys addr of partition's vars */
+	unsigned long remote_vars_part_pa; /* paddr of partition's vars part */
+	u8 remote_vars_version;	/* version# of partition's vars */
+
+	void *local_GPs_base;	/* base address of kmalloc'd space */
+	struct xpc_gp_sn2 *local_GPs;	/* local Get/Put values */
+	void *remote_GPs_base;	/* base address of kmalloc'd space */
+	struct xpc_gp_sn2 *remote_GPs;	/* copy of remote partition's local */
+					/* Get/Put values */
+	unsigned long remote_GPs_pa; /* phys addr of remote partition's local */
+				     /* Get/Put values */
+
+	void *local_openclose_args_base;   /* base address of kmalloc'd space */
+	struct xpc_openclose_args *local_openclose_args;      /* local's args */
+	unsigned long remote_openclose_args_pa;	/* phys addr of remote's args */
+
+	int notify_IRQ_nasid;	/* nasid of where to send notify IRQs */
+	int notify_IRQ_phys_cpuid;	/* CPUID of where to send notify IRQs */
+	char notify_IRQ_owner[8];	/* notify IRQ's owner's name */
+
+	struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */
+	struct amo *local_chctl_amo_va;	/* address of chctl flags' amo */
+
+	struct timer_list dropped_notify_IRQ_timer;	/* dropped IRQ timer */
+};
+
+struct xpc_partition_uv {
+	unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */
+	struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */
+						  /* partition's heartbeat */
+	unsigned long activate_gru_mq_desc_gpa;	/* phys addr of parititon's */
+						/* activate mq's gru mq */
+						/* descriptor */
+	void *cached_activate_gru_mq_desc; /* cached copy of partition's */
+					   /* activate mq's gru mq descriptor */
+	struct mutex cached_activate_gru_mq_desc_mutex;
+	spinlock_t flags_lock;	/* protect updating of flags */
+	unsigned int flags;	/* general flags */
+	u8 remote_act_state;	/* remote partition's act_state */
+	u8 act_state_req;	/* act_state request from remote partition */
+	enum xp_retval reason;	/* reason for deactivate act_state request */
+};
+
+/* struct xpc_partition_uv flags */
+
+#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV	0x00000001
+#define XPC_P_ENGAGED_UV			0x00000002
+
+/* struct xpc_partition_uv act_state change requests */
+
+#define XPC_P_ASR_ACTIVATE_UV		0x01
+#define XPC_P_ASR_REACTIVATE_UV		0x02
+#define XPC_P_ASR_DEACTIVATE_UV		0x03
+
+struct xpc_partition {
+
+	/* XPC HB infrastructure */
+
+	u8 remote_rp_version;	/* version# of partition's rsvd pg */
+	unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */
+	unsigned long remote_rp_pa;	/* phys addr of partition's rsvd pg */
+	u64 last_heartbeat;	/* HB at last read */
+	u32 activate_IRQ_rcvd;	/* IRQs since activation */
+	spinlock_t act_lock;	/* protect updating of act_state */
+	u8 act_state;		/* from XPC HB viewpoint */
+	enum xp_retval reason;	/* reason partition is deactivating */
+	int reason_line;	/* line# deactivation initiated from */
+
+	unsigned long disengage_timeout;	/* timeout in jiffies */
+	struct timer_list disengage_timer;
+
+	/* XPC infrastructure referencing and teardown control */
+
+	u8 setup_state;		/* infrastructure setup state */
+	wait_queue_head_t teardown_wq;	/* kthread waiting to teardown infra */
+	atomic_t references;	/* #of references to infrastructure */
+
+	u8 nchannels;		/* #of defined channels supported */
+	atomic_t nchannels_active;  /* #of channels that are not DISCONNECTED */
+	atomic_t nchannels_engaged;  /* #of channels engaged with remote part */
+	struct xpc_channel *channels;	/* array of channel structures */
+
+	/* fields used for managing channel avialability and activity */
+
+	union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */
+	spinlock_t chctl_lock;	/* chctl flags lock */
+
+	void *remote_openclose_args_base;  /* base address of kmalloc'd space */
+	struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
+							  /* args */
+
+	/* channel manager related fields */
+
+	atomic_t channel_mgr_requests;	/* #of requests to activate chan mgr */
+	wait_queue_head_t channel_mgr_wq;	/* channel mgr's wait queue */
+
+	union {
+		struct xpc_partition_sn2 sn2;
+		struct xpc_partition_uv uv;
+	} sn;
+
+} ____cacheline_aligned;
+
+struct xpc_arch_operations {
+	int (*setup_partitions) (void);
+	void (*teardown_partitions) (void);
+	void (*process_activate_IRQ_rcvd) (void);
+	enum xp_retval (*get_partition_rsvd_page_pa)
+		(void *, u64 *, unsigned long *, size_t *);
+	int (*setup_rsvd_page) (struct xpc_rsvd_page *);
+
+	void (*allow_hb) (short);
+	void (*disallow_hb) (short);
+	void (*disallow_all_hbs) (void);
+	void (*increment_heartbeat) (void);
+	void (*offline_heartbeat) (void);
+	void (*online_heartbeat) (void);
+	void (*heartbeat_init) (void);
+	void (*heartbeat_exit) (void);
+	enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *);
+
+	void (*request_partition_activation) (struct xpc_rsvd_page *,
+						 unsigned long, int);
+	void (*request_partition_reactivation) (struct xpc_partition *);
+	void (*request_partition_deactivation) (struct xpc_partition *);
+	void (*cancel_partition_deactivation_request) (struct xpc_partition *);
+	enum xp_retval (*setup_ch_structures) (struct xpc_partition *);
+	void (*teardown_ch_structures) (struct xpc_partition *);
+
+	enum xp_retval (*make_first_contact) (struct xpc_partition *);
+
+	u64 (*get_chctl_all_flags) (struct xpc_partition *);
+	void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *);
+	void (*process_msg_chctl_flags) (struct xpc_partition *, int);
+
+	enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *,
+						      unsigned long);
+
+	enum xp_retval (*setup_msg_structures) (struct xpc_channel *);
+	void (*teardown_msg_structures) (struct xpc_channel *);
+
+	void (*indicate_partition_engaged) (struct xpc_partition *);
+	void (*indicate_partition_disengaged) (struct xpc_partition *);
+	void (*assume_partition_disengaged) (short);
+	int (*partition_engaged) (short);
+	int (*any_partition_engaged) (void);
+
+	int (*n_of_deliverable_payloads) (struct xpc_channel *);
+	enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *,
+					   u16, u8, xpc_notify_func, void *);
+	void *(*get_deliverable_payload) (struct xpc_channel *);
+	void (*received_payload) (struct xpc_channel *, void *);
+	void (*notify_senders_of_disconnect) (struct xpc_channel *);
+};
+
+/* struct xpc_partition act_state values (for XPC HB) */
+
+#define	XPC_P_AS_INACTIVE	0x00	/* partition is not active */
+#define XPC_P_AS_ACTIVATION_REQ	0x01	/* created thread to activate */
+#define XPC_P_AS_ACTIVATING	0x02	/* activation thread started */
+#define XPC_P_AS_ACTIVE		0x03	/* xpc_partition_up() was called */
+#define XPC_P_AS_DEACTIVATING	0x04	/* partition deactivation initiated */
+
+#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
+			xpc_deactivate_partition(__LINE__, (_p), (_reason))
+
+/* struct xpc_partition setup_state values */
+
+#define XPC_P_SS_UNSET		0x00	/* infrastructure was never setup */
+#define XPC_P_SS_SETUP		0x01	/* infrastructure is setup */
+#define XPC_P_SS_WTEARDOWN	0x02	/* waiting to teardown infrastructure */
+#define XPC_P_SS_TORNDOWN	0x03	/* infrastructure is torndown */
+
+/*
+ * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the
+ * following interval #of seconds before checking for dropped notify IRQs.
+ * These can occur whenever an IRQ's associated amo write doesn't complete
+ * until after the IRQ was received.
+ */
+#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL	(0.25 * HZ)
+
+/* number of seconds to wait for other partitions to disengage */
+#define XPC_DISENGAGE_DEFAULT_TIMELIMIT		90
+
+/* interval in seconds to print 'waiting deactivation' messages */
+#define XPC_DEACTIVATE_PRINTMSG_INTERVAL	10
+
+#define XPC_PARTID(_p)	((short)((_p) - &xpc_partitions[0]))
+
+/* found in xp_main.c */
+extern struct xpc_registration xpc_registrations[];
+
+/* found in xpc_main.c */
+extern struct device *xpc_part;
+extern struct device *xpc_chan;
+extern struct xpc_arch_operations xpc_arch_ops;
+extern int xpc_disengage_timelimit;
+extern int xpc_disengage_timedout;
+extern int xpc_activate_IRQ_rcvd;
+extern spinlock_t xpc_activate_IRQ_rcvd_lock;
+extern wait_queue_head_t xpc_activate_IRQ_wq;
+extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
+extern void xpc_activate_partition(struct xpc_partition *);
+extern void xpc_activate_kthreads(struct xpc_channel *, int);
+extern void xpc_create_kthreads(struct xpc_channel *, int, int);
+extern void xpc_disconnect_wait(int);
+
+/* found in xpc_sn2.c */
+extern int xpc_init_sn2(void);
+extern void xpc_exit_sn2(void);
+
+/* found in xpc_uv.c */
+extern int xpc_init_uv(void);
+extern void xpc_exit_uv(void);
+
+/* found in xpc_partition.c */
+extern int xpc_exiting;
+extern int xpc_nasid_mask_nlongs;
+extern struct xpc_rsvd_page *xpc_rsvd_page;
+extern unsigned long *xpc_mach_nasids;
+extern struct xpc_partition *xpc_partitions;
+extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
+extern int xpc_setup_rsvd_page(void);
+extern void xpc_teardown_rsvd_page(void);
+extern int xpc_identify_activate_IRQ_sender(void);
+extern int xpc_partition_disengaged(struct xpc_partition *);
+extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
+extern void xpc_mark_partition_inactive(struct xpc_partition *);
+extern void xpc_discovery(void);
+extern enum xp_retval xpc_get_remote_rp(int, unsigned long *,
+					struct xpc_rsvd_page *,
+					unsigned long *);
+extern void xpc_deactivate_partition(const int, struct xpc_partition *,
+				     enum xp_retval);
+extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
+
+/* found in xpc_channel.c */
+extern void xpc_initiate_connect(int);
+extern void xpc_initiate_disconnect(int);
+extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *);
+extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16);
+extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16,
+					       xpc_notify_func, void *);
+extern void xpc_initiate_received(short, int, void *);
+extern void xpc_process_sent_chctl_flags(struct xpc_partition *);
+extern void xpc_connected_callout(struct xpc_channel *);
+extern void xpc_deliver_payload(struct xpc_channel *);
+extern void xpc_disconnect_channel(const int, struct xpc_channel *,
+				   enum xp_retval, unsigned long *);
+extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
+extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
+
+static inline void
+xpc_wakeup_channel_mgr(struct xpc_partition *part)
+{
+	if (atomic_inc_return(&part->channel_mgr_requests) == 1)
+		wake_up(&part->channel_mgr_wq);
+}
+
+/*
+ * These next two inlines are used to keep us from tearing down a channel's
+ * msg queues while a thread may be referencing them.
+ */
+static inline void
+xpc_msgqueue_ref(struct xpc_channel *ch)
+{
+	atomic_inc(&ch->references);
+}
+
+static inline void
+xpc_msgqueue_deref(struct xpc_channel *ch)
+{
+	s32 refs = atomic_dec_return(&ch->references);
+
+	DBUG_ON(refs < 0);
+	if (refs == 0)
+		xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
+}
+
+#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
+		xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
+
+/*
+ * These two inlines are used to keep us from tearing down a partition's
+ * setup infrastructure while a thread may be referencing it.
+ */
+static inline void
+xpc_part_deref(struct xpc_partition *part)
+{
+	s32 refs = atomic_dec_return(&part->references);
+
+	DBUG_ON(refs < 0);
+	if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN)
+		wake_up(&part->teardown_wq);
+}
+
+static inline int
+xpc_part_ref(struct xpc_partition *part)
+{
+	int setup;
+
+	atomic_inc(&part->references);
+	setup = (part->setup_state == XPC_P_SS_SETUP);
+	if (!setup)
+		xpc_part_deref(part);
+
+	return setup;
+}
+
+/*
+ * The following macro is to be used for the setting of the reason and
+ * reason_line fields in both the struct xpc_channel and struct xpc_partition
+ * structures.
+ */
+#define XPC_SET_REASON(_p, _reason, _line) \
+	{ \
+		(_p)->reason = _reason; \
+		(_p)->reason_line = _line; \
+	}
+
+#endif /* _DRIVERS_MISC_SGIXP_XPC_H */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
new file mode 100644
index 000000000..05a890ce2
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -0,0 +1,1011 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) channel support.
+ *
+ *	This is the part of XPC that manages the channels and
+ *	sends/receives messages across them to/from other partitions.
+ *
+ */
+
+#include <linux/device.h>
+#include "xpc.h"
+
+/*
+ * Process a connect message from a remote partition.
+ *
+ * Note: xpc_process_connect() is expecting to be called with the
+ * spin_lock_irqsave held and will leave it locked upon return.
+ */
+static void
+xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	enum xp_retval ret;
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
+	if (!(ch->flags & XPC_C_OPENREQUEST) ||
+	    !(ch->flags & XPC_C_ROPENREQUEST)) {
+		/* nothing more to do for now */
+		return;
+	}
+	DBUG_ON(!(ch->flags & XPC_C_CONNECTING));
+
+	if (!(ch->flags & XPC_C_SETUP)) {
+		spin_unlock_irqrestore(&ch->lock, *irq_flags);
+		ret = xpc_arch_ops.setup_msg_structures(ch);
+		spin_lock_irqsave(&ch->lock, *irq_flags);
+
+		if (ret != xpSuccess)
+			XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
+		else
+			ch->flags |= XPC_C_SETUP;
+
+		if (ch->flags & XPC_C_DISCONNECTING)
+			return;
+	}
+
+	if (!(ch->flags & XPC_C_OPENREPLY)) {
+		ch->flags |= XPC_C_OPENREPLY;
+		xpc_arch_ops.send_chctl_openreply(ch, irq_flags);
+	}
+
+	if (!(ch->flags & XPC_C_ROPENREPLY))
+		return;
+
+	if (!(ch->flags & XPC_C_OPENCOMPLETE)) {
+		ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED);
+		xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags);
+	}
+
+	if (!(ch->flags & XPC_C_ROPENCOMPLETE))
+		return;
+
+	dev_info(xpc_chan, "channel %d to partition %d connected\n",
+		 ch->number, ch->partid);
+
+	ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP);	/* clear all else */
+}
+
+/*
+ * spin_lock_irqsave() is expected to be held on entry.
+ */
+static void
+xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
+	if (!(ch->flags & XPC_C_DISCONNECTING))
+		return;
+
+	DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+
+	/* make sure all activity has settled down first */
+
+	if (atomic_read(&ch->kthreads_assigned) > 0 ||
+	    atomic_read(&ch->references) > 0) {
+		return;
+	}
+	DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+		!(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE));
+
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
+		/* can't proceed until the other side disengages from us */
+		if (xpc_arch_ops.partition_engaged(ch->partid))
+			return;
+
+	} else {
+
+		/* as long as the other side is up do the full protocol */
+
+		if (!(ch->flags & XPC_C_RCLOSEREQUEST))
+			return;
+
+		if (!(ch->flags & XPC_C_CLOSEREPLY)) {
+			ch->flags |= XPC_C_CLOSEREPLY;
+			xpc_arch_ops.send_chctl_closereply(ch, irq_flags);
+		}
+
+		if (!(ch->flags & XPC_C_RCLOSEREPLY))
+			return;
+	}
+
+	/* wake those waiting for notify completion */
+	if (atomic_read(&ch->n_to_notify) > 0) {
+		/* we do callout while holding ch->lock, callout can't block */
+		xpc_arch_ops.notify_senders_of_disconnect(ch);
+	}
+
+	/* both sides are disconnected now */
+
+	if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
+		spin_unlock_irqrestore(&ch->lock, *irq_flags);
+		xpc_disconnect_callout(ch, xpDisconnected);
+		spin_lock_irqsave(&ch->lock, *irq_flags);
+	}
+
+	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
+
+	/* it's now safe to free the channel's message queues */
+	xpc_arch_ops.teardown_msg_structures(ch);
+
+	ch->func = NULL;
+	ch->key = NULL;
+	ch->entry_size = 0;
+	ch->local_nentries = 0;
+	ch->remote_nentries = 0;
+	ch->kthreads_assigned_limit = 0;
+	ch->kthreads_idle_limit = 0;
+
+	/*
+	 * Mark the channel disconnected and clear all other flags, including
+	 * XPC_C_SETUP (because of call to
+	 * xpc_arch_ops.teardown_msg_structures()) but not including
+	 * XPC_C_WDISCONNECT (if it was set).
+	 */
+	ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
+
+	atomic_dec(&part->nchannels_active);
+
+	if (channel_was_connected) {
+		dev_info(xpc_chan, "channel %d to partition %d disconnected, "
+			 "reason=%d\n", ch->number, ch->partid, ch->reason);
+	}
+
+	if (ch->flags & XPC_C_WDISCONNECT) {
+		/* we won't lose the CPU since we're holding ch->lock */
+		complete(&ch->wdisconnect_wait);
+	} else if (ch->delayed_chctl_flags) {
+		if (part->act_state != XPC_P_AS_DEACTIVATING) {
+			/* time to take action on any delayed chctl flags */
+			spin_lock(&part->chctl_lock);
+			part->chctl.flags[ch->number] |=
+			    ch->delayed_chctl_flags;
+			spin_unlock(&part->chctl_lock);
+		}
+		ch->delayed_chctl_flags = 0;
+	}
+}
+
+/*
+ * Process a change in the channel's remote connection state.
+ */
+static void
+xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
+				  u8 chctl_flags)
+{
+	unsigned long irq_flags;
+	struct xpc_openclose_args *args =
+	    &part->remote_openclose_args[ch_number];
+	struct xpc_channel *ch = &part->channels[ch_number];
+	enum xp_retval reason;
+	enum xp_retval ret;
+	int create_kthread = 0;
+
+	spin_lock_irqsave(&ch->lock, irq_flags);
+
+again:
+
+	if ((ch->flags & XPC_C_DISCONNECTED) &&
+	    (ch->flags & XPC_C_WDISCONNECT)) {
+		/*
+		 * Delay processing chctl flags until thread waiting disconnect
+		 * has had a chance to see that the channel is disconnected.
+		 */
+		ch->delayed_chctl_flags |= chctl_flags;
+		goto out;
+	}
+
+	if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received "
+			"from partid=%d, channel=%d\n", args->reason,
+			ch->partid, ch->number);
+
+		/*
+		 * If RCLOSEREQUEST is set, we're probably waiting for
+		 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
+		 * with this RCLOSEREQUEST in the chctl_flags.
+		 */
+
+		if (ch->flags & XPC_C_RCLOSEREQUEST) {
+			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+			DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+			DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
+			DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
+
+			DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY));
+			chctl_flags &= ~XPC_CHCTL_CLOSEREPLY;
+			ch->flags |= XPC_C_RCLOSEREPLY;
+
+			/* both sides have finished disconnecting */
+			xpc_process_disconnect(ch, &irq_flags);
+			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+			goto again;
+		}
+
+		if (ch->flags & XPC_C_DISCONNECTED) {
+			if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) {
+				if (part->chctl.flags[ch_number] &
+				    XPC_CHCTL_OPENREQUEST) {
+
+					DBUG_ON(ch->delayed_chctl_flags != 0);
+					spin_lock(&part->chctl_lock);
+					part->chctl.flags[ch_number] |=
+					    XPC_CHCTL_CLOSEREQUEST;
+					spin_unlock(&part->chctl_lock);
+				}
+				goto out;
+			}
+
+			XPC_SET_REASON(ch, 0, 0);
+			ch->flags &= ~XPC_C_DISCONNECTED;
+
+			atomic_inc(&part->nchannels_active);
+			ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
+		}
+
+		chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY |
+		    XPC_CHCTL_OPENCOMPLETE);
+
+		/*
+		 * The meaningful CLOSEREQUEST connection state fields are:
+		 *      reason = reason connection is to be closed
+		 */
+
+		ch->flags |= XPC_C_RCLOSEREQUEST;
+
+		if (!(ch->flags & XPC_C_DISCONNECTING)) {
+			reason = args->reason;
+			if (reason <= xpSuccess || reason > xpUnknownReason)
+				reason = xpUnknownReason;
+			else if (reason == xpUnregistering)
+				reason = xpOtherUnregistering;
+
+			XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
+
+			DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY);
+			goto out;
+		}
+
+		xpc_process_disconnect(ch, &irq_flags);
+	}
+
+	if (chctl_flags & XPC_CHCTL_CLOSEREPLY) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid="
+			"%d, channel=%d\n", ch->partid, ch->number);
+
+		if (ch->flags & XPC_C_DISCONNECTED) {
+			DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING);
+			goto out;
+		}
+
+		DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+
+		if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
+			if (part->chctl.flags[ch_number] &
+			    XPC_CHCTL_CLOSEREQUEST) {
+
+				DBUG_ON(ch->delayed_chctl_flags != 0);
+				spin_lock(&part->chctl_lock);
+				part->chctl.flags[ch_number] |=
+				    XPC_CHCTL_CLOSEREPLY;
+				spin_unlock(&part->chctl_lock);
+			}
+			goto out;
+		}
+
+		ch->flags |= XPC_C_RCLOSEREPLY;
+
+		if (ch->flags & XPC_C_CLOSEREPLY) {
+			/* both sides have finished disconnecting */
+			xpc_process_disconnect(ch, &irq_flags);
+		}
+	}
+
+	if (chctl_flags & XPC_CHCTL_OPENREQUEST) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, "
+			"local_nentries=%d) received from partid=%d, "
+			"channel=%d\n", args->entry_size, args->local_nentries,
+			ch->partid, ch->number);
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING ||
+		    (ch->flags & XPC_C_ROPENREQUEST)) {
+			goto out;
+		}
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
+			ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST;
+			goto out;
+		}
+		DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
+				       XPC_C_OPENREQUEST)));
+		DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
+				     XPC_C_OPENREPLY | XPC_C_CONNECTED));
+
+		/*
+		 * The meaningful OPENREQUEST connection state fields are:
+		 *      entry_size = size of channel's messages in bytes
+		 *      local_nentries = remote partition's local_nentries
+		 */
+		if (args->entry_size == 0 || args->local_nentries == 0) {
+			/* assume OPENREQUEST was delayed by mistake */
+			goto out;
+		}
+
+		ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
+		ch->remote_nentries = args->local_nentries;
+
+		if (ch->flags & XPC_C_OPENREQUEST) {
+			if (args->entry_size != ch->entry_size) {
+				XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
+						       &irq_flags);
+				goto out;
+			}
+		} else {
+			ch->entry_size = args->entry_size;
+
+			XPC_SET_REASON(ch, 0, 0);
+			ch->flags &= ~XPC_C_DISCONNECTED;
+
+			atomic_inc(&part->nchannels_active);
+		}
+
+		xpc_process_connect(ch, &irq_flags);
+	}
+
+	if (chctl_flags & XPC_CHCTL_OPENREPLY) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa="
+			"0x%lx, local_nentries=%d, remote_nentries=%d) "
+			"received from partid=%d, channel=%d\n",
+			args->local_msgqueue_pa, args->local_nentries,
+			args->remote_nentries, ch->partid, ch->number);
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+			goto out;
+
+		if (!(ch->flags & XPC_C_OPENREQUEST)) {
+			XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
+					       &irq_flags);
+			goto out;
+		}
+
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
+		DBUG_ON(ch->flags & XPC_C_CONNECTED);
+
+		/*
+		 * The meaningful OPENREPLY connection state fields are:
+		 *      local_msgqueue_pa = physical address of remote
+		 *                          partition's local_msgqueue
+		 *      local_nentries = remote partition's local_nentries
+		 *      remote_nentries = remote partition's remote_nentries
+		 */
+		DBUG_ON(args->local_msgqueue_pa == 0);
+		DBUG_ON(args->local_nentries == 0);
+		DBUG_ON(args->remote_nentries == 0);
+
+		ret = xpc_arch_ops.save_remote_msgqueue_pa(ch,
+						      args->local_msgqueue_pa);
+		if (ret != xpSuccess) {
+			XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags);
+			goto out;
+		}
+		ch->flags |= XPC_C_ROPENREPLY;
+
+		if (args->local_nentries < ch->remote_nentries) {
+			dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
+				"remote_nentries=%d, old remote_nentries=%d, "
+				"partid=%d, channel=%d\n",
+				args->local_nentries, ch->remote_nentries,
+				ch->partid, ch->number);
+
+			ch->remote_nentries = args->local_nentries;
+		}
+		if (args->remote_nentries < ch->local_nentries) {
+			dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
+				"local_nentries=%d, old local_nentries=%d, "
+				"partid=%d, channel=%d\n",
+				args->remote_nentries, ch->local_nentries,
+				ch->partid, ch->number);
+
+			ch->local_nentries = args->remote_nentries;
+		}
+
+		xpc_process_connect(ch, &irq_flags);
+	}
+
+	if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from "
+			"partid=%d, channel=%d\n", ch->partid, ch->number);
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+			goto out;
+
+		if (!(ch->flags & XPC_C_OPENREQUEST) ||
+		    !(ch->flags & XPC_C_OPENREPLY)) {
+			XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
+					       &irq_flags);
+			goto out;
+		}
+
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY));
+		DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
+
+		ch->flags |= XPC_C_ROPENCOMPLETE;
+
+		xpc_process_connect(ch, &irq_flags);
+		create_kthread = 1;
+	}
+
+out:
+	spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+	if (create_kthread)
+		xpc_create_kthreads(ch, 1, 0);
+}
+
+/*
+ * Attempt to establish a channel connection to a remote partition.
+ */
+static enum xp_retval
+xpc_connect_channel(struct xpc_channel *ch)
+{
+	unsigned long irq_flags;
+	struct xpc_registration *registration = &xpc_registrations[ch->number];
+
+	if (mutex_trylock(&registration->mutex) == 0)
+		return xpRetry;
+
+	if (!XPC_CHANNEL_REGISTERED(ch->number)) {
+		mutex_unlock(&registration->mutex);
+		return xpUnregistered;
+	}
+
+	spin_lock_irqsave(&ch->lock, irq_flags);
+
+	DBUG_ON(ch->flags & XPC_C_CONNECTED);
+	DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		mutex_unlock(&registration->mutex);
+		return ch->reason;
+	}
+
+	/* add info from the channel connect registration to the channel */
+
+	ch->kthreads_assigned_limit = registration->assigned_limit;
+	ch->kthreads_idle_limit = registration->idle_limit;
+	DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
+	DBUG_ON(atomic_read(&ch->kthreads_idle) != 0);
+	DBUG_ON(atomic_read(&ch->kthreads_active) != 0);
+
+	ch->func = registration->func;
+	DBUG_ON(registration->func == NULL);
+	ch->key = registration->key;
+
+	ch->local_nentries = registration->nentries;
+
+	if (ch->flags & XPC_C_ROPENREQUEST) {
+		if (registration->entry_size != ch->entry_size) {
+			/* the local and remote sides aren't the same */
+
+			/*
+			 * Because XPC_DISCONNECT_CHANNEL() can block we're
+			 * forced to up the registration sema before we unlock
+			 * the channel lock. But that's okay here because we're
+			 * done with the part that required the registration
+			 * sema. XPC_DISCONNECT_CHANNEL() requires that the
+			 * channel lock be locked and will unlock and relock
+			 * the channel lock as needed.
+			 */
+			mutex_unlock(&registration->mutex);
+			XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
+					       &irq_flags);
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return xpUnequalMsgSizes;
+		}
+	} else {
+		ch->entry_size = registration->entry_size;
+
+		XPC_SET_REASON(ch, 0, 0);
+		ch->flags &= ~XPC_C_DISCONNECTED;
+
+		atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
+	}
+
+	mutex_unlock(&registration->mutex);
+
+	/* initiate the connection */
+
+	ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
+	xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags);
+
+	xpc_process_connect(ch, &irq_flags);
+
+	spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+	return xpSuccess;
+}
+
+void
+xpc_process_sent_chctl_flags(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	union xpc_channel_ctl_flags chctl;
+	struct xpc_channel *ch;
+	int ch_number;
+	u32 ch_flags;
+
+	chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part);
+
+	/*
+	 * Initiate channel connections for registered channels.
+	 *
+	 * For each connected channel that has pending messages activate idle
+	 * kthreads and/or create new kthreads as needed.
+	 */
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		/*
+		 * Process any open or close related chctl flags, and then deal
+		 * with connecting or disconnecting the channel as required.
+		 */
+
+		if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) {
+			xpc_process_openclose_chctl_flags(part, ch_number,
+							chctl.flags[ch_number]);
+		}
+
+		ch_flags = ch->flags;	/* need an atomic snapshot of flags */
+
+		if (ch_flags & XPC_C_DISCONNECTING) {
+			spin_lock_irqsave(&ch->lock, irq_flags);
+			xpc_process_disconnect(ch, &irq_flags);
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			continue;
+		}
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
+			continue;
+
+		if (!(ch_flags & XPC_C_CONNECTED)) {
+			if (!(ch_flags & XPC_C_OPENREQUEST)) {
+				DBUG_ON(ch_flags & XPC_C_SETUP);
+				(void)xpc_connect_channel(ch);
+			}
+			continue;
+		}
+
+		/*
+		 * Process any message related chctl flags, this may involve
+		 * the activation of kthreads to deliver any pending messages
+		 * sent from the other partition.
+		 */
+
+		if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+			xpc_arch_ops.process_msg_chctl_flags(part, ch_number);
+	}
+}
+
+/*
+ * XPC's heartbeat code calls this function to inform XPC that a partition is
+ * going down.  XPC responds by tearing down the XPartition Communication
+ * infrastructure used for the just downed partition.
+ *
+ * XPC's heartbeat code will never call this function and xpc_partition_up()
+ * at the same time. Nor will it ever make multiple calls to either function
+ * at the same time.
+ */
+void
+xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason)
+{
+	unsigned long irq_flags;
+	int ch_number;
+	struct xpc_channel *ch;
+
+	dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n",
+		XPC_PARTID(part), reason);
+
+	if (!xpc_part_ref(part)) {
+		/* infrastructure for this partition isn't currently set up */
+		return;
+	}
+
+	/* disconnect channels associated with the partition going down */
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		xpc_msgqueue_ref(ch);
+		spin_lock_irqsave(&ch->lock, irq_flags);
+
+		XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
+
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		xpc_msgqueue_deref(ch);
+	}
+
+	xpc_wakeup_channel_mgr(part);
+
+	xpc_part_deref(part);
+}
+
+/*
+ * Called by XP at the time of channel connection registration to cause
+ * XPC to establish connections to all currently active partitions.
+ */
+void
+xpc_initiate_connect(int ch_number)
+{
+	short partid;
+	struct xpc_partition *part;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (xpc_part_ref(part)) {
+			/*
+			 * Initiate the establishment of a connection on the
+			 * newly registered channel to the remote partition.
+			 */
+			xpc_wakeup_channel_mgr(part);
+			xpc_part_deref(part);
+		}
+	}
+}
+
+void
+xpc_connected_callout(struct xpc_channel *ch)
+{
+	/* let the registerer know that a connection has been established */
+
+	if (ch->func != NULL) {
+		dev_dbg(xpc_chan, "ch->func() called, reason=xpConnected, "
+			"partid=%d, channel=%d\n", ch->partid, ch->number);
+
+		ch->func(xpConnected, ch->partid, ch->number,
+			 (void *)(u64)ch->local_nentries, ch->key);
+
+		dev_dbg(xpc_chan, "ch->func() returned, reason=xpConnected, "
+			"partid=%d, channel=%d\n", ch->partid, ch->number);
+	}
+}
+
+/*
+ * Called by XP at the time of channel connection unregistration to cause
+ * XPC to teardown all current connections for the specified channel.
+ *
+ * Before returning xpc_initiate_disconnect() will wait until all connections
+ * on the specified channel have been closed/torndown. So the caller can be
+ * assured that they will not be receiving any more callouts from XPC to the
+ * function they registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to unregister.
+ */
+void
+xpc_initiate_disconnect(int ch_number)
+{
+	unsigned long irq_flags;
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_channel *ch;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+	/* initiate the channel disconnect for every active partition */
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (xpc_part_ref(part)) {
+			ch = &part->channels[ch_number];
+			xpc_msgqueue_ref(ch);
+
+			spin_lock_irqsave(&ch->lock, irq_flags);
+
+			if (!(ch->flags & XPC_C_DISCONNECTED)) {
+				ch->flags |= XPC_C_WDISCONNECT;
+
+				XPC_DISCONNECT_CHANNEL(ch, xpUnregistering,
+						       &irq_flags);
+			}
+
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+			xpc_msgqueue_deref(ch);
+			xpc_part_deref(part);
+		}
+	}
+
+	xpc_disconnect_wait(ch_number);
+}
+
+/*
+ * To disconnect a channel, and reflect it back to all who may be waiting.
+ *
+ * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
+ * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
+ * xpc_disconnect_wait().
+ *
+ * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
+ */
+void
+xpc_disconnect_channel(const int line, struct xpc_channel *ch,
+		       enum xp_retval reason, unsigned long *irq_flags)
+{
+	u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
+	if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+		return;
+
+	DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED)));
+
+	dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n",
+		reason, line, ch->partid, ch->number);
+
+	XPC_SET_REASON(ch, reason, line);
+
+	ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
+	/* some of these may not have been set */
+	ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
+		       XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
+		       XPC_C_CONNECTING | XPC_C_CONNECTED);
+
+	xpc_arch_ops.send_chctl_closerequest(ch, irq_flags);
+
+	if (channel_was_connected)
+		ch->flags |= XPC_C_WASCONNECTED;
+
+	spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+	/* wake all idle kthreads so they can exit */
+	if (atomic_read(&ch->kthreads_idle) > 0) {
+		wake_up_all(&ch->idle_wq);
+
+	} else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+		   !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+		/* start a kthread that will do the xpDisconnecting callout */
+		xpc_create_kthreads(ch, 1, 1);
+	}
+
+	/* wake those waiting to allocate an entry from the local msg queue */
+	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+		wake_up(&ch->msg_allocate_wq);
+
+	spin_lock_irqsave(&ch->lock, *irq_flags);
+}
+
+void
+xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason)
+{
+	/*
+	 * Let the channel's registerer know that the channel is being
+	 * disconnected. We don't want to do this if the registerer was never
+	 * informed of a connection being made.
+	 */
+
+	if (ch->func != NULL) {
+		dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
+			"channel=%d\n", reason, ch->partid, ch->number);
+
+		ch->func(reason, ch->partid, ch->number, NULL, ch->key);
+
+		dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
+			"channel=%d\n", reason, ch->partid, ch->number);
+	}
+}
+
+/*
+ * Wait for a message entry to become available for the specified channel,
+ * but don't wait any longer than 1 jiffy.
+ */
+enum xp_retval
+xpc_allocate_msg_wait(struct xpc_channel *ch)
+{
+	enum xp_retval ret;
+	DEFINE_WAIT(wait);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		DBUG_ON(ch->reason == xpInterrupted);
+		return ch->reason;
+	}
+
+	atomic_inc(&ch->n_on_msg_allocate_wq);
+	prepare_to_wait(&ch->msg_allocate_wq, &wait, TASK_INTERRUPTIBLE);
+	ret = schedule_timeout(1);
+	finish_wait(&ch->msg_allocate_wq, &wait);
+	atomic_dec(&ch->n_on_msg_allocate_wq);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		ret = ch->reason;
+		DBUG_ON(ch->reason == xpInterrupted);
+	} else if (ret == 0) {
+		ret = xpTimeout;
+	} else {
+		ret = xpInterrupted;
+	}
+
+	return ret;
+}
+
+/*
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
+ *
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * Once sent, this routine will not wait for the message to be received, nor
+ * will notification be given when it does happen.
+ *
+ * Arguments:
+ *
+ *	partid - ID of partition to which the channel is connected.
+ *	ch_number - channel # to send message on.
+ *	flags - see xp.h for valid flags.
+ *	payload - pointer to the payload which is to be sent.
+ *	payload_size - size of the payload in bytes.
+ */
+enum xp_retval
+xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
+		  u16 payload_size)
+{
+	struct xpc_partition *part = &xpc_partitions[partid];
+	enum xp_retval ret = xpUnknownReason;
+
+	dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
+		partid, ch_number);
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+	DBUG_ON(payload == NULL);
+
+	if (xpc_part_ref(part)) {
+		ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
+				  flags, payload, payload_size, 0, NULL, NULL);
+		xpc_part_deref(part);
+	}
+
+	return ret;
+}
+
+/*
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
+ *
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * This routine will not wait for the message to be sent or received.
+ *
+ * Once the remote end of the channel has received the message, the function
+ * passed as an argument to xpc_initiate_send_notify() will be called. This
+ * allows the sender to free up or re-use any buffers referenced by the
+ * message, but does NOT mean the message has been processed at the remote
+ * end by a receiver.
+ *
+ * If this routine returns an error, the caller's function will NOT be called.
+ *
+ * Arguments:
+ *
+ *	partid - ID of partition to which the channel is connected.
+ *	ch_number - channel # to send message on.
+ *	flags - see xp.h for valid flags.
+ *	payload - pointer to the payload which is to be sent.
+ *	payload_size - size of the payload in bytes.
+ *	func - function to call with asynchronous notification of message
+ *		  receipt. THIS FUNCTION MUST BE NON-BLOCKING.
+ *	key - user-defined key to be passed to the function when it's called.
+ */
+enum xp_retval
+xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
+			 u16 payload_size, xpc_notify_func func, void *key)
+{
+	struct xpc_partition *part = &xpc_partitions[partid];
+	enum xp_retval ret = xpUnknownReason;
+
+	dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
+		partid, ch_number);
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+	DBUG_ON(payload == NULL);
+	DBUG_ON(func == NULL);
+
+	if (xpc_part_ref(part)) {
+		ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
+			  flags, payload, payload_size, XPC_N_CALL, func, key);
+		xpc_part_deref(part);
+	}
+	return ret;
+}
+
+/*
+ * Deliver a message's payload to its intended recipient.
+ */
+void
+xpc_deliver_payload(struct xpc_channel *ch)
+{
+	void *payload;
+
+	payload = xpc_arch_ops.get_deliverable_payload(ch);
+	if (payload != NULL) {
+
+		/*
+		 * This ref is taken to protect the payload itself from being
+		 * freed before the user is finished with it, which the user
+		 * indicates by calling xpc_initiate_received().
+		 */
+		xpc_msgqueue_ref(ch);
+
+		atomic_inc(&ch->kthreads_active);
+
+		if (ch->func != NULL) {
+			dev_dbg(xpc_chan, "ch->func() called, payload=0x%p "
+				"partid=%d channel=%d\n", payload, ch->partid,
+				ch->number);
+
+			/* deliver the message to its intended recipient */
+			ch->func(xpMsgReceived, ch->partid, ch->number, payload,
+				 ch->key);
+
+			dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p "
+				"partid=%d channel=%d\n", payload, ch->partid,
+				ch->number);
+		}
+
+		atomic_dec(&ch->kthreads_active);
+	}
+}
+
+/*
+ * Acknowledge receipt of a delivered message's payload.
+ *
+ * This function, although called by users, does not call xpc_part_ref() to
+ * ensure that the partition infrastructure is in place. It relies on the
+ * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload().
+ *
+ * Arguments:
+ *
+ *	partid - ID of partition to which the channel is connected.
+ *	ch_number - channel # message received on.
+ *	payload - pointer to the payload area allocated via
+ *			xpc_initiate_send() or xpc_initiate_send_notify().
+ */
+void
+xpc_initiate_received(short partid, int ch_number, void *payload)
+{
+	struct xpc_partition *part = &xpc_partitions[partid];
+	struct xpc_channel *ch;
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+
+	ch = &part->channels[ch_number];
+	xpc_arch_ops.received_payload(ch, payload);
+
+	/* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload()  */
+	xpc_msgqueue_deref(ch);
+}
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
new file mode 100644
index 000000000..83fc748a9
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -0,0 +1,1373 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) support - standard version.
+ *
+ *	XPC provides a message passing capability that crosses partition
+ *	boundaries. This module is made up of two parts:
+ *
+ *	    partition	This part detects the presence/absence of other
+ *			partitions. It provides a heartbeat and monitors
+ *			the heartbeats of other partitions.
+ *
+ *	    channel	This part manages the channels and sends/receives
+ *			messages across them to/from other partitions.
+ *
+ *	There are a couple of additional functions residing in XP, which
+ *	provide an interface to XPC for its users.
+ *
+ *
+ *	Caveats:
+ *
+ *	  . Currently on sn2, we have no way to determine which nasid an IRQ
+ *	    came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
+ *	    followed by an IPI. The amo indicates where data is to be pulled
+ *	    from, so after the IPI arrives, the remote partition checks the amo
+ *	    word. The IPI can actually arrive before the amo however, so other
+ *	    code must periodically check for this case. Also, remote amo
+ *	    operations do not reliably time out. Thus we do a remote PIO read
+ *	    solely to know whether the remote partition is down and whether we
+ *	    should stop sending IPIs to it. This remote PIO read operation is
+ *	    set up in a special nofault region so SAL knows to ignore (and
+ *	    cleanup) any errors due to the remote amo write, PIO read, and/or
+ *	    PIO write operations.
+ *
+ *	    If/when new hardware solves this IPI problem, we should abandon
+ *	    the current approach.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/kdebug.h>
+#include <linux/kthread.h>
+#include "xpc.h"
+
+#ifdef CONFIG_X86_64
+#include <asm/traps.h>
+#endif
+
+/* define two XPC debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xpc_dbg_name = {
+	.name = "xpc"
+};
+
+struct device xpc_part_dbg_subname = {
+	.init_name = "",	/* set to "part" at xpc_init() time */
+	.driver = &xpc_dbg_name
+};
+
+struct device xpc_chan_dbg_subname = {
+	.init_name = "",	/* set to "chan" at xpc_init() time */
+	.driver = &xpc_dbg_name
+};
+
+struct device *xpc_part = &xpc_part_dbg_subname;
+struct device *xpc_chan = &xpc_chan_dbg_subname;
+
+static int xpc_kdebug_ignore;
+
+/* systune related variables for /proc/sys directories */
+
+static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
+static int xpc_hb_min_interval = 1;
+static int xpc_hb_max_interval = 10;
+
+static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
+static int xpc_hb_check_min_interval = 10;
+static int xpc_hb_check_max_interval = 120;
+
+int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
+static int xpc_disengage_min_timelimit;	/* = 0 */
+static int xpc_disengage_max_timelimit = 120;
+
+static struct ctl_table xpc_sys_xpc_hb_dir[] = {
+	{
+	 .procname = "hb_interval",
+	 .data = &xpc_hb_interval,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = &xpc_hb_min_interval,
+	 .extra2 = &xpc_hb_max_interval},
+	{
+	 .procname = "hb_check_interval",
+	 .data = &xpc_hb_check_interval,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = &xpc_hb_check_min_interval,
+	 .extra2 = &xpc_hb_check_max_interval},
+	{}
+};
+static struct ctl_table xpc_sys_xpc_dir[] = {
+	{
+	 .procname = "hb",
+	 .mode = 0555,
+	 .child = xpc_sys_xpc_hb_dir},
+	{
+	 .procname = "disengage_timelimit",
+	 .data = &xpc_disengage_timelimit,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = &xpc_disengage_min_timelimit,
+	 .extra2 = &xpc_disengage_max_timelimit},
+	{}
+};
+static struct ctl_table xpc_sys_dir[] = {
+	{
+	 .procname = "xpc",
+	 .mode = 0555,
+	 .child = xpc_sys_xpc_dir},
+	{}
+};
+static struct ctl_table_header *xpc_sysctl;
+
+/* non-zero if any remote partition disengage was timed out */
+int xpc_disengage_timedout;
+
+/* #of activate IRQs received and not yet processed */
+int xpc_activate_IRQ_rcvd;
+DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
+
+/* IRQ handler notifies this wait queue on receipt of an IRQ */
+DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
+
+static unsigned long xpc_hb_check_timeout;
+static struct timer_list xpc_hb_timer;
+
+/* notification that the xpc_hb_checker thread has exited */
+static DECLARE_COMPLETION(xpc_hb_checker_exited);
+
+/* notification that the xpc_discovery thread has exited */
+static DECLARE_COMPLETION(xpc_discovery_exited);
+
+static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
+
+static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_reboot_notifier = {
+	.notifier_call = xpc_system_reboot,
+};
+
+static int xpc_system_die(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_die_notifier = {
+	.notifier_call = xpc_system_die,
+};
+
+struct xpc_arch_operations xpc_arch_ops;
+
+/*
+ * Timer function to enforce the timelimit on the partition disengage.
+ */
+static void
+xpc_timeout_partition_disengage(struct timer_list *t)
+{
+	struct xpc_partition *part = from_timer(part, t, disengage_timer);
+
+	DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
+
+	(void)xpc_partition_disengaged(part);
+
+	DBUG_ON(part->disengage_timeout != 0);
+	DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part)));
+}
+
+/*
+ * Timer to produce the heartbeat.  The timer structures function is
+ * already set when this is initially called.  A tunable is used to
+ * specify when the next timeout should occur.
+ */
+static void
+xpc_hb_beater(struct timer_list *unused)
+{
+	xpc_arch_ops.increment_heartbeat();
+
+	if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+	xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
+	add_timer(&xpc_hb_timer);
+}
+
+static void
+xpc_start_hb_beater(void)
+{
+	xpc_arch_ops.heartbeat_init();
+	timer_setup(&xpc_hb_timer, xpc_hb_beater, 0);
+	xpc_hb_beater(0);
+}
+
+static void
+xpc_stop_hb_beater(void)
+{
+	del_timer_sync(&xpc_hb_timer);
+	xpc_arch_ops.heartbeat_exit();
+}
+
+/*
+ * At periodic intervals, scan through all active partitions and ensure
+ * their heartbeat is still active.  If not, the partition is deactivated.
+ */
+static void
+xpc_check_remote_hb(void)
+{
+	struct xpc_partition *part;
+	short partid;
+	enum xp_retval ret;
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+
+		if (xpc_exiting)
+			break;
+
+		if (partid == xp_partition_id)
+			continue;
+
+		part = &xpc_partitions[partid];
+
+		if (part->act_state == XPC_P_AS_INACTIVE ||
+		    part->act_state == XPC_P_AS_DEACTIVATING) {
+			continue;
+		}
+
+		ret = xpc_arch_ops.get_remote_heartbeat(part);
+		if (ret != xpSuccess)
+			XPC_DEACTIVATE_PARTITION(part, ret);
+	}
+}
+
+/*
+ * This thread is responsible for nearly all of the partition
+ * activation/deactivation.
+ */
+static int
+xpc_hb_checker(void *ignore)
+{
+	int force_IRQ = 0;
+
+	/* this thread was marked active by xpc_hb_init() */
+
+	set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU));
+
+	/* set our heartbeating to other partitions into motion */
+	xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
+	xpc_start_hb_beater();
+
+	while (!xpc_exiting) {
+
+		dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
+			"been received\n",
+			(int)(xpc_hb_check_timeout - jiffies),
+			xpc_activate_IRQ_rcvd);
+
+		/* checking of remote heartbeats is skewed by IRQ handling */
+		if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
+			xpc_hb_check_timeout = jiffies +
+			    (xpc_hb_check_interval * HZ);
+
+			dev_dbg(xpc_part, "checking remote heartbeats\n");
+			xpc_check_remote_hb();
+
+			/*
+			 * On sn2 we need to periodically recheck to ensure no
+			 * IRQ/amo pairs have been missed.
+			 */
+			if (is_shub())
+				force_IRQ = 1;
+		}
+
+		/* check for outstanding IRQs */
+		if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
+			force_IRQ = 0;
+			dev_dbg(xpc_part, "processing activate IRQs "
+				"received\n");
+			xpc_arch_ops.process_activate_IRQ_rcvd();
+		}
+
+		/* wait for IRQ or timeout */
+		(void)wait_event_interruptible(xpc_activate_IRQ_wq,
+					       (time_is_before_eq_jiffies(
+						xpc_hb_check_timeout) ||
+						xpc_activate_IRQ_rcvd > 0 ||
+						xpc_exiting));
+	}
+
+	xpc_stop_hb_beater();
+
+	dev_dbg(xpc_part, "heartbeat checker is exiting\n");
+
+	/* mark this thread as having exited */
+	complete(&xpc_hb_checker_exited);
+	return 0;
+}
+
+/*
+ * This thread will attempt to discover other partitions to activate
+ * based on info provided by SAL. This new thread is short lived and
+ * will exit once discovery is complete.
+ */
+static int
+xpc_initiate_discovery(void *ignore)
+{
+	xpc_discovery();
+
+	dev_dbg(xpc_part, "discovery thread is exiting\n");
+
+	/* mark this thread as having exited */
+	complete(&xpc_discovery_exited);
+	return 0;
+}
+
+/*
+ * The first kthread assigned to a newly activated partition is the one
+ * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
+ * that kthread until the partition is brought down, at which time that kthread
+ * returns back to XPC HB. (The return of that kthread will signify to XPC HB
+ * that XPC has dismantled all communication infrastructure for the associated
+ * partition.) This kthread becomes the channel manager for that partition.
+ *
+ * Each active partition has a channel manager, who, besides connecting and
+ * disconnecting channels, will ensure that each of the partition's connected
+ * channels has the required number of assigned kthreads to get the work done.
+ */
+static void
+xpc_channel_mgr(struct xpc_partition *part)
+{
+	while (part->act_state != XPC_P_AS_DEACTIVATING ||
+	       atomic_read(&part->nchannels_active) > 0 ||
+	       !xpc_partition_disengaged(part)) {
+
+		xpc_process_sent_chctl_flags(part);
+
+		/*
+		 * Wait until we've been requested to activate kthreads or
+		 * all of the channel's message queues have been torn down or
+		 * a signal is pending.
+		 *
+		 * The channel_mgr_requests is set to 1 after being awakened,
+		 * This is done to prevent the channel mgr from making one pass
+		 * through the loop for each request, since he will
+		 * be servicing all the requests in one pass. The reason it's
+		 * set to 1 instead of 0 is so that other kthreads will know
+		 * that the channel mgr is running and won't bother trying to
+		 * wake him up.
+		 */
+		atomic_dec(&part->channel_mgr_requests);
+		(void)wait_event_interruptible(part->channel_mgr_wq,
+				(atomic_read(&part->channel_mgr_requests) > 0 ||
+				 part->chctl.all_flags != 0 ||
+				 (part->act_state == XPC_P_AS_DEACTIVATING &&
+				 atomic_read(&part->nchannels_active) == 0 &&
+				 xpc_partition_disengaged(part))));
+		atomic_set(&part->channel_mgr_requests, 1);
+	}
+}
+
+/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+	/* see if kzalloc will give us cachline aligned memory by default */
+	*base = kzalloc(size, flags);
+	if (*base == NULL)
+		return NULL;
+
+	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+		return *base;
+
+	kfree(*base);
+
+	/* nope, we'll have to do it ourselves */
+	*base = kzalloc(size + L1_CACHE_BYTES, flags);
+	if (*base == NULL)
+		return NULL;
+
+	return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Setup the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static enum xp_retval
+xpc_setup_ch_structures(struct xpc_partition *part)
+{
+	enum xp_retval ret;
+	int ch_number;
+	struct xpc_channel *ch;
+	short partid = XPC_PARTID(part);
+
+	/*
+	 * Allocate all of the channel structures as a contiguous chunk of
+	 * memory.
+	 */
+	DBUG_ON(part->channels != NULL);
+	part->channels = kcalloc(XPC_MAX_NCHANNELS,
+				 sizeof(struct xpc_channel),
+				 GFP_KERNEL);
+	if (part->channels == NULL) {
+		dev_err(xpc_chan, "can't get memory for channels\n");
+		return xpNoMemory;
+	}
+
+	/* allocate the remote open and close args */
+
+	part->remote_openclose_args =
+	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+					  GFP_KERNEL, &part->
+					  remote_openclose_args_base);
+	if (part->remote_openclose_args == NULL) {
+		dev_err(xpc_chan, "can't get memory for remote connect args\n");
+		ret = xpNoMemory;
+		goto out_1;
+	}
+
+	part->chctl.all_flags = 0;
+	spin_lock_init(&part->chctl_lock);
+
+	atomic_set(&part->channel_mgr_requests, 1);
+	init_waitqueue_head(&part->channel_mgr_wq);
+
+	part->nchannels = XPC_MAX_NCHANNELS;
+
+	atomic_set(&part->nchannels_active, 0);
+	atomic_set(&part->nchannels_engaged, 0);
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		ch->partid = partid;
+		ch->number = ch_number;
+		ch->flags = XPC_C_DISCONNECTED;
+
+		atomic_set(&ch->kthreads_assigned, 0);
+		atomic_set(&ch->kthreads_idle, 0);
+		atomic_set(&ch->kthreads_active, 0);
+
+		atomic_set(&ch->references, 0);
+		atomic_set(&ch->n_to_notify, 0);
+
+		spin_lock_init(&ch->lock);
+		init_completion(&ch->wdisconnect_wait);
+
+		atomic_set(&ch->n_on_msg_allocate_wq, 0);
+		init_waitqueue_head(&ch->msg_allocate_wq);
+		init_waitqueue_head(&ch->idle_wq);
+	}
+
+	ret = xpc_arch_ops.setup_ch_structures(part);
+	if (ret != xpSuccess)
+		goto out_2;
+
+	/*
+	 * With the setting of the partition setup_state to XPC_P_SS_SETUP,
+	 * we're declaring that this partition is ready to go.
+	 */
+	part->setup_state = XPC_P_SS_SETUP;
+
+	return xpSuccess;
+
+	/* setup of ch structures failed */
+out_2:
+	kfree(part->remote_openclose_args_base);
+	part->remote_openclose_args = NULL;
+out_1:
+	kfree(part->channels);
+	part->channels = NULL;
+	return ret;
+}
+
+/*
+ * Teardown the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static void
+xpc_teardown_ch_structures(struct xpc_partition *part)
+{
+	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
+	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
+
+	/*
+	 * Make this partition inaccessible to local processes by marking it
+	 * as no longer setup. Then wait before proceeding with the teardown
+	 * until all existing references cease.
+	 */
+	DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
+	part->setup_state = XPC_P_SS_WTEARDOWN;
+
+	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
+
+	/* now we can begin tearing down the infrastructure */
+
+	xpc_arch_ops.teardown_ch_structures(part);
+
+	kfree(part->remote_openclose_args_base);
+	part->remote_openclose_args = NULL;
+	kfree(part->channels);
+	part->channels = NULL;
+
+	part->setup_state = XPC_P_SS_TORNDOWN;
+}
+
+/*
+ * When XPC HB determines that a partition has come up, it will create a new
+ * kthread and that kthread will call this function to attempt to set up the
+ * basic infrastructure used for Cross Partition Communication with the newly
+ * upped partition.
+ *
+ * The kthread that was created by XPC HB and which setup the XPC
+ * infrastructure will remain assigned to the partition becoming the channel
+ * manager for that partition until the partition is deactivating, at which
+ * time the kthread will teardown the XPC infrastructure and then exit.
+ */
+static int
+xpc_activating(void *__partid)
+{
+	short partid = (u64)__partid;
+	struct xpc_partition *part = &xpc_partitions[partid];
+	unsigned long irq_flags;
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
+		part->act_state = XPC_P_AS_INACTIVE;
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+		part->remote_rp_pa = 0;
+		return 0;
+	}
+
+	/* indicate the thread is activating */
+	DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
+	part->act_state = XPC_P_AS_ACTIVATING;
+
+	XPC_SET_REASON(part, 0, 0);
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	dev_dbg(xpc_part, "activating partition %d\n", partid);
+
+	xpc_arch_ops.allow_hb(partid);
+
+	if (xpc_setup_ch_structures(part) == xpSuccess) {
+		(void)xpc_part_ref(part);	/* this will always succeed */
+
+		if (xpc_arch_ops.make_first_contact(part) == xpSuccess) {
+			xpc_mark_partition_active(part);
+			xpc_channel_mgr(part);
+			/* won't return until partition is deactivating */
+		}
+
+		xpc_part_deref(part);
+		xpc_teardown_ch_structures(part);
+	}
+
+	xpc_arch_ops.disallow_hb(partid);
+	xpc_mark_partition_inactive(part);
+
+	if (part->reason == xpReactivating) {
+		/* interrupting ourselves results in activating partition */
+		xpc_arch_ops.request_partition_reactivation(part);
+	}
+
+	return 0;
+}
+
+void
+xpc_activate_partition(struct xpc_partition *part)
+{
+	short partid = XPC_PARTID(part);
+	unsigned long irq_flags;
+	struct task_struct *kthread;
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+
+	DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
+
+	part->act_state = XPC_P_AS_ACTIVATION_REQ;
+	XPC_SET_REASON(part, xpCloneKThread, __LINE__);
+
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d",
+			      partid);
+	if (IS_ERR(kthread)) {
+		spin_lock_irqsave(&part->act_lock, irq_flags);
+		part->act_state = XPC_P_AS_INACTIVE;
+		XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+	}
+}
+
+void
+xpc_activate_kthreads(struct xpc_channel *ch, int needed)
+{
+	int idle = atomic_read(&ch->kthreads_idle);
+	int assigned = atomic_read(&ch->kthreads_assigned);
+	int wakeup;
+
+	DBUG_ON(needed <= 0);
+
+	if (idle > 0) {
+		wakeup = (needed > idle) ? idle : needed;
+		needed -= wakeup;
+
+		dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
+			"channel=%d\n", wakeup, ch->partid, ch->number);
+
+		/* only wakeup the requested number of kthreads */
+		wake_up_nr(&ch->idle_wq, wakeup);
+	}
+
+	if (needed <= 0)
+		return;
+
+	if (needed + assigned > ch->kthreads_assigned_limit) {
+		needed = ch->kthreads_assigned_limit - assigned;
+		if (needed <= 0)
+			return;
+	}
+
+	dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
+		needed, ch->partid, ch->number);
+
+	xpc_create_kthreads(ch, needed, 0);
+}
+
+/*
+ * This function is where XPC's kthreads wait for messages to deliver.
+ */
+static void
+xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
+{
+	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
+		xpc_arch_ops.n_of_deliverable_payloads;
+
+	do {
+		/* deliver messages to their intended recipients */
+
+		while (n_of_deliverable_payloads(ch) > 0 &&
+		       !(ch->flags & XPC_C_DISCONNECTING)) {
+			xpc_deliver_payload(ch);
+		}
+
+		if (atomic_inc_return(&ch->kthreads_idle) >
+		    ch->kthreads_idle_limit) {
+			/* too many idle kthreads on this channel */
+			atomic_dec(&ch->kthreads_idle);
+			break;
+		}
+
+		dev_dbg(xpc_chan, "idle kthread calling "
+			"wait_event_interruptible_exclusive()\n");
+
+		(void)wait_event_interruptible_exclusive(ch->idle_wq,
+				(n_of_deliverable_payloads(ch) > 0 ||
+				 (ch->flags & XPC_C_DISCONNECTING)));
+
+		atomic_dec(&ch->kthreads_idle);
+
+	} while (!(ch->flags & XPC_C_DISCONNECTING));
+}
+
+static int
+xpc_kthread_start(void *args)
+{
+	short partid = XPC_UNPACK_ARG1(args);
+	u16 ch_number = XPC_UNPACK_ARG2(args);
+	struct xpc_partition *part = &xpc_partitions[partid];
+	struct xpc_channel *ch;
+	int n_needed;
+	unsigned long irq_flags;
+	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
+		xpc_arch_ops.n_of_deliverable_payloads;
+
+	dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
+		partid, ch_number);
+
+	ch = &part->channels[ch_number];
+
+	if (!(ch->flags & XPC_C_DISCONNECTING)) {
+
+		/* let registerer know that connection has been established */
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
+			ch->flags |= XPC_C_CONNECTEDCALLOUT;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+			xpc_connected_callout(ch);
+
+			spin_lock_irqsave(&ch->lock, irq_flags);
+			ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+			/*
+			 * It is possible that while the callout was being
+			 * made that the remote partition sent some messages.
+			 * If that is the case, we may need to activate
+			 * additional kthreads to help deliver them. We only
+			 * need one less than total #of messages to deliver.
+			 */
+			n_needed = n_of_deliverable_payloads(ch) - 1;
+			if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
+				xpc_activate_kthreads(ch, n_needed);
+
+		} else {
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+		}
+
+		xpc_kthread_waitmsgs(part, ch);
+	}
+
+	/* let registerer know that connection is disconnecting */
+
+	spin_lock_irqsave(&ch->lock, irq_flags);
+	if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+	    !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+		ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+		xpc_disconnect_callout(ch, xpDisconnecting);
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
+	}
+	spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+	if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+	    atomic_dec_return(&part->nchannels_engaged) == 0) {
+		xpc_arch_ops.indicate_partition_disengaged(part);
+	}
+
+	xpc_msgqueue_deref(ch);
+
+	dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
+		partid, ch_number);
+
+	xpc_part_deref(part);
+	return 0;
+}
+
+/*
+ * For each partition that XPC has established communications with, there is
+ * a minimum of one kernel thread assigned to perform any operation that
+ * may potentially sleep or block (basically the callouts to the asynchronous
+ * functions registered via xpc_connect()).
+ *
+ * Additional kthreads are created and destroyed by XPC as the workload
+ * demands.
+ *
+ * A kthread is assigned to one of the active channels that exists for a given
+ * partition.
+ */
+void
+xpc_create_kthreads(struct xpc_channel *ch, int needed,
+		    int ignore_disconnecting)
+{
+	unsigned long irq_flags;
+	u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	struct task_struct *kthread;
+	void (*indicate_partition_disengaged) (struct xpc_partition *) =
+		xpc_arch_ops.indicate_partition_disengaged;
+
+	while (needed-- > 0) {
+
+		/*
+		 * The following is done on behalf of the newly created
+		 * kthread. That kthread is responsible for doing the
+		 * counterpart to the following before it exits.
+		 */
+		if (ignore_disconnecting) {
+			if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
+				/* kthreads assigned had gone to zero */
+				BUG_ON(!(ch->flags &
+					 XPC_C_DISCONNECTINGCALLOUT_MADE));
+				break;
+			}
+
+		} else if (ch->flags & XPC_C_DISCONNECTING) {
+			break;
+
+		} else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+			   atomic_inc_return(&part->nchannels_engaged) == 1) {
+			xpc_arch_ops.indicate_partition_engaged(part);
+		}
+		(void)xpc_part_ref(part);
+		xpc_msgqueue_ref(ch);
+
+		kthread = kthread_run(xpc_kthread_start, (void *)args,
+				      "xpc%02dc%d", ch->partid, ch->number);
+		if (IS_ERR(kthread)) {
+			/* the fork failed */
+
+			/*
+			 * NOTE: if (ignore_disconnecting &&
+			 * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
+			 * then we'll deadlock if all other kthreads assigned
+			 * to this channel are blocked in the channel's
+			 * registerer, because the only thing that will unblock
+			 * them is the xpDisconnecting callout that this
+			 * failed kthread_run() would have made.
+			 */
+
+			if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+			    atomic_dec_return(&part->nchannels_engaged) == 0) {
+				indicate_partition_disengaged(part);
+			}
+			xpc_msgqueue_deref(ch);
+			xpc_part_deref(part);
+
+			if (atomic_read(&ch->kthreads_assigned) <
+			    ch->kthreads_idle_limit) {
+				/*
+				 * Flag this as an error only if we have an
+				 * insufficient #of kthreads for the channel
+				 * to function.
+				 */
+				spin_lock_irqsave(&ch->lock, irq_flags);
+				XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources,
+						       &irq_flags);
+				spin_unlock_irqrestore(&ch->lock, irq_flags);
+			}
+			break;
+		}
+	}
+}
+
+void
+xpc_disconnect_wait(int ch_number)
+{
+	unsigned long irq_flags;
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_channel *ch;
+	int wakeup_channel_mgr;
+
+	/* now wait for all callouts to the caller's function to cease */
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (!xpc_part_ref(part))
+			continue;
+
+		ch = &part->channels[ch_number];
+
+		if (!(ch->flags & XPC_C_WDISCONNECT)) {
+			xpc_part_deref(part);
+			continue;
+		}
+
+		wait_for_completion(&ch->wdisconnect_wait);
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+		wakeup_channel_mgr = 0;
+
+		if (ch->delayed_chctl_flags) {
+			if (part->act_state != XPC_P_AS_DEACTIVATING) {
+				spin_lock(&part->chctl_lock);
+				part->chctl.flags[ch->number] |=
+				    ch->delayed_chctl_flags;
+				spin_unlock(&part->chctl_lock);
+				wakeup_channel_mgr = 1;
+			}
+			ch->delayed_chctl_flags = 0;
+		}
+
+		ch->flags &= ~XPC_C_WDISCONNECT;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+		if (wakeup_channel_mgr)
+			xpc_wakeup_channel_mgr(part);
+
+		xpc_part_deref(part);
+	}
+}
+
+static int
+xpc_setup_partitions(void)
+{
+	short partid;
+	struct xpc_partition *part;
+
+	xpc_partitions = kcalloc(xp_max_npartitions,
+				 sizeof(struct xpc_partition),
+				 GFP_KERNEL);
+	if (xpc_partitions == NULL) {
+		dev_err(xpc_part, "can't get memory for partition structure\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * The first few fields of each entry of xpc_partitions[] need to
+	 * be initialized now so that calls to xpc_connect() and
+	 * xpc_disconnect() can be made prior to the activation of any remote
+	 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
+	 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
+	 * PARTITION HAS BEEN ACTIVATED.
+	 */
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
+
+		part->activate_IRQ_rcvd = 0;
+		spin_lock_init(&part->act_lock);
+		part->act_state = XPC_P_AS_INACTIVE;
+		XPC_SET_REASON(part, 0, 0);
+
+		timer_setup(&part->disengage_timer,
+			    xpc_timeout_partition_disengage, 0);
+
+		part->setup_state = XPC_P_SS_UNSET;
+		init_waitqueue_head(&part->teardown_wq);
+		atomic_set(&part->references, 0);
+	}
+
+	return xpc_arch_ops.setup_partitions();
+}
+
+static void
+xpc_teardown_partitions(void)
+{
+	xpc_arch_ops.teardown_partitions();
+	kfree(xpc_partitions);
+}
+
+static void
+xpc_do_exit(enum xp_retval reason)
+{
+	short partid;
+	int active_part_count, printed_waiting_msg = 0;
+	struct xpc_partition *part;
+	unsigned long printmsg_time, disengage_timeout = 0;
+
+	/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
+	DBUG_ON(xpc_exiting == 1);
+
+	/*
+	 * Let the heartbeat checker thread and the discovery thread
+	 * (if one is running) know that they should exit. Also wake up
+	 * the heartbeat checker thread in case it's sleeping.
+	 */
+	xpc_exiting = 1;
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+	/* wait for the discovery thread to exit */
+	wait_for_completion(&xpc_discovery_exited);
+
+	/* wait for the heartbeat checker thread to exit */
+	wait_for_completion(&xpc_hb_checker_exited);
+
+	/* sleep for a 1/3 of a second or so */
+	(void)msleep_interruptible(300);
+
+	/* wait for all partitions to become inactive */
+
+	printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+	xpc_disengage_timedout = 0;
+
+	do {
+		active_part_count = 0;
+
+		for (partid = 0; partid < xp_max_npartitions; partid++) {
+			part = &xpc_partitions[partid];
+
+			if (xpc_partition_disengaged(part) &&
+			    part->act_state == XPC_P_AS_INACTIVE) {
+				continue;
+			}
+
+			active_part_count++;
+
+			XPC_DEACTIVATE_PARTITION(part, reason);
+
+			if (part->disengage_timeout > disengage_timeout)
+				disengage_timeout = part->disengage_timeout;
+		}
+
+		if (xpc_arch_ops.any_partition_engaged()) {
+			if (time_is_before_jiffies(printmsg_time)) {
+				dev_info(xpc_part, "waiting for remote "
+					 "partitions to deactivate, timeout in "
+					 "%ld seconds\n", (disengage_timeout -
+					 jiffies) / HZ);
+				printmsg_time = jiffies +
+				    (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+				printed_waiting_msg = 1;
+			}
+
+		} else if (active_part_count > 0) {
+			if (printed_waiting_msg) {
+				dev_info(xpc_part, "waiting for local partition"
+					 " to deactivate\n");
+				printed_waiting_msg = 0;
+			}
+
+		} else {
+			if (!xpc_disengage_timedout) {
+				dev_info(xpc_part, "all partitions have "
+					 "deactivated\n");
+			}
+			break;
+		}
+
+		/* sleep for a 1/3 of a second or so */
+		(void)msleep_interruptible(300);
+
+	} while (1);
+
+	DBUG_ON(xpc_arch_ops.any_partition_engaged());
+
+	xpc_teardown_rsvd_page();
+
+	if (reason == xpUnloading) {
+		(void)unregister_die_notifier(&xpc_die_notifier);
+		(void)unregister_reboot_notifier(&xpc_reboot_notifier);
+	}
+
+	/* clear the interface to XPC's functions */
+	xpc_clear_interface();
+
+	if (xpc_sysctl)
+		unregister_sysctl_table(xpc_sysctl);
+
+	xpc_teardown_partitions();
+
+	if (is_shub())
+		xpc_exit_sn2();
+	else if (is_uv())
+		xpc_exit_uv();
+}
+
+/*
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+	enum xp_retval reason;
+
+	switch (event) {
+	case SYS_RESTART:
+		reason = xpSystemReboot;
+		break;
+	case SYS_HALT:
+		reason = xpSystemHalt;
+		break;
+	case SYS_POWER_OFF:
+		reason = xpSystemPoweroff;
+		break;
+	default:
+		reason = xpSystemGoingDown;
+	}
+
+	xpc_do_exit(reason);
+	return NOTIFY_DONE;
+}
+
+/* Used to only allow one cpu to complete disconnect */
+static unsigned int xpc_die_disconnecting;
+
+/*
+ * Notify other partitions to deactivate from us by first disengaging from all
+ * references to our memory.
+ */
+static void
+xpc_die_deactivate(void)
+{
+	struct xpc_partition *part;
+	short partid;
+	int any_engaged;
+	long keep_waiting;
+	long wait_to_print;
+
+	if (cmpxchg(&xpc_die_disconnecting, 0, 1))
+		return;
+
+	/* keep xpc_hb_checker thread from doing anything (just in case) */
+	xpc_exiting = 1;
+
+	xpc_arch_ops.disallow_all_hbs();   /*indicate we're deactivated */
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (xpc_arch_ops.partition_engaged(partid) ||
+		    part->act_state != XPC_P_AS_INACTIVE) {
+			xpc_arch_ops.request_partition_deactivation(part);
+			xpc_arch_ops.indicate_partition_disengaged(part);
+		}
+	}
+
+	/*
+	 * Though we requested that all other partitions deactivate from us,
+	 * we only wait until they've all disengaged or we've reached the
+	 * defined timelimit.
+	 *
+	 * Given that one iteration through the following while-loop takes
+	 * approximately 200 microseconds, calculate the #of loops to take
+	 * before bailing and the #of loops before printing a waiting message.
+	 */
+	keep_waiting = xpc_disengage_timelimit * 1000 * 5;
+	wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
+
+	while (1) {
+		any_engaged = xpc_arch_ops.any_partition_engaged();
+		if (!any_engaged) {
+			dev_info(xpc_part, "all partitions have deactivated\n");
+			break;
+		}
+
+		if (!keep_waiting--) {
+			for (partid = 0; partid < xp_max_npartitions;
+			     partid++) {
+				if (xpc_arch_ops.partition_engaged(partid)) {
+					dev_info(xpc_part, "deactivate from "
+						 "remote partition %d timed "
+						 "out\n", partid);
+				}
+			}
+			break;
+		}
+
+		if (!wait_to_print--) {
+			dev_info(xpc_part, "waiting for remote partitions to "
+				 "deactivate, timeout in %ld seconds\n",
+				 keep_waiting / (1000 * 5));
+			wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
+			    1000 * 5;
+		}
+
+		udelay(200);
+	}
+}
+
+/*
+ * This function is called when the system is being restarted or halted due
+ * to some sort of system failure. If this is the case we need to notify the
+ * other partitions to disengage from all references to our memory.
+ * This function can also be called when our heartbeater could be offlined
+ * for a time. In this case we need to notify other partitions to not worry
+ * about the lack of a heartbeat.
+ */
+static int
+xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args)
+{
+#ifdef CONFIG_IA64		/* !!! temporary kludge */
+	switch (event) {
+	case DIE_MACHINE_RESTART:
+	case DIE_MACHINE_HALT:
+		xpc_die_deactivate();
+		break;
+
+	case DIE_KDEBUG_ENTER:
+		/* Should lack of heartbeat be ignored by other partitions? */
+		if (!xpc_kdebug_ignore)
+			break;
+
+		/* fall through */
+	case DIE_MCA_MONARCH_ENTER:
+	case DIE_INIT_MONARCH_ENTER:
+		xpc_arch_ops.offline_heartbeat();
+		break;
+
+	case DIE_KDEBUG_LEAVE:
+		/* Is lack of heartbeat being ignored by other partitions? */
+		if (!xpc_kdebug_ignore)
+			break;
+
+		/* fall through */
+	case DIE_MCA_MONARCH_LEAVE:
+	case DIE_INIT_MONARCH_LEAVE:
+		xpc_arch_ops.online_heartbeat();
+		break;
+	}
+#else
+	struct die_args *die_args = _die_args;
+
+	switch (event) {
+	case DIE_TRAP:
+		if (die_args->trapnr == X86_TRAP_DF)
+			xpc_die_deactivate();
+
+		if (((die_args->trapnr == X86_TRAP_MF) ||
+		     (die_args->trapnr == X86_TRAP_XF)) &&
+		    !user_mode(die_args->regs))
+			xpc_die_deactivate();
+
+		break;
+	case DIE_INT3:
+	case DIE_DEBUG:
+		break;
+	case DIE_OOPS:
+	case DIE_GPF:
+	default:
+		xpc_die_deactivate();
+	}
+#endif
+
+	return NOTIFY_DONE;
+}
+
+int __init
+xpc_init(void)
+{
+	int ret;
+	struct task_struct *kthread;
+
+	dev_set_name(xpc_part, "part");
+	dev_set_name(xpc_chan, "chan");
+
+	if (is_shub()) {
+		/*
+		 * The ia64-sn2 architecture supports at most 64 partitions.
+		 * And the inability to unregister remote amos restricts us
+		 * further to only support exactly 64 partitions on this
+		 * architecture, no less.
+		 */
+		if (xp_max_npartitions != 64) {
+			dev_err(xpc_part, "max #of partitions not set to 64\n");
+			ret = -EINVAL;
+		} else {
+			ret = xpc_init_sn2();
+		}
+
+	} else if (is_uv()) {
+		ret = xpc_init_uv();
+
+	} else {
+		ret = -ENODEV;
+	}
+
+	if (ret != 0)
+		return ret;
+
+	ret = xpc_setup_partitions();
+	if (ret != 0) {
+		dev_err(xpc_part, "can't get memory for partition structure\n");
+		goto out_1;
+	}
+
+	xpc_sysctl = register_sysctl_table(xpc_sys_dir);
+
+	/*
+	 * Fill the partition reserved page with the information needed by
+	 * other partitions to discover we are alive and establish initial
+	 * communications.
+	 */
+	ret = xpc_setup_rsvd_page();
+	if (ret != 0) {
+		dev_err(xpc_part, "can't setup our reserved page\n");
+		goto out_2;
+	}
+
+	/* add ourselves to the reboot_notifier_list */
+	ret = register_reboot_notifier(&xpc_reboot_notifier);
+	if (ret != 0)
+		dev_warn(xpc_part, "can't register reboot notifier\n");
+
+	/* add ourselves to the die_notifier list */
+	ret = register_die_notifier(&xpc_die_notifier);
+	if (ret != 0)
+		dev_warn(xpc_part, "can't register die notifier\n");
+
+	/*
+	 * The real work-horse behind xpc.  This processes incoming
+	 * interrupts and monitors remote heartbeats.
+	 */
+	kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
+	if (IS_ERR(kthread)) {
+		dev_err(xpc_part, "failed while forking hb check thread\n");
+		ret = -EBUSY;
+		goto out_3;
+	}
+
+	/*
+	 * Startup a thread that will attempt to discover other partitions to
+	 * activate based on info provided by SAL. This new thread is short
+	 * lived and will exit once discovery is complete.
+	 */
+	kthread = kthread_run(xpc_initiate_discovery, NULL,
+			      XPC_DISCOVERY_THREAD_NAME);
+	if (IS_ERR(kthread)) {
+		dev_err(xpc_part, "failed while forking discovery thread\n");
+
+		/* mark this new thread as a non-starter */
+		complete(&xpc_discovery_exited);
+
+		xpc_do_exit(xpUnloading);
+		return -EBUSY;
+	}
+
+	/* set the interface to point at XPC's functions */
+	xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
+			  xpc_initiate_send, xpc_initiate_send_notify,
+			  xpc_initiate_received, xpc_initiate_partid_to_nasids);
+
+	return 0;
+
+	/* initialization was not successful */
+out_3:
+	xpc_teardown_rsvd_page();
+
+	(void)unregister_die_notifier(&xpc_die_notifier);
+	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
+out_2:
+	if (xpc_sysctl)
+		unregister_sysctl_table(xpc_sysctl);
+
+	xpc_teardown_partitions();
+out_1:
+	if (is_shub())
+		xpc_exit_sn2();
+	else if (is_uv())
+		xpc_exit_uv();
+	return ret;
+}
+
+module_init(xpc_init);
+
+void __exit
+xpc_exit(void)
+{
+	xpc_do_exit(xpUnloading);
+}
+
+module_exit(xpc_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
+MODULE_LICENSE("GPL");
+
+module_param(xpc_hb_interval, int, 0);
+MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
+		 "heartbeat increments.");
+
+module_param(xpc_hb_check_interval, int, 0);
+MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
+		 "heartbeat checks.");
+
+module_param(xpc_disengage_timelimit, int, 0);
+MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
+		 "for disengage to complete.");
+
+module_param(xpc_kdebug_ignore, int, 0);
+MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
+		 "other partitions when dropping into kdebug.");
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
new file mode 100644
index 000000000..519826ba1
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -0,0 +1,540 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) partition support.
+ *
+ *	This is the part of XPC that detects the presence/absence of
+ *	other partitions. It provides a heartbeat and monitors the
+ *	heartbeats of other partitions.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/hardirq.h>
+#include <linux/slab.h>
+#include "xpc.h"
+#include <asm/uv/uv_hub.h>
+
+/* XPC is exiting flag */
+int xpc_exiting;
+
+/* this partition's reserved page pointers */
+struct xpc_rsvd_page *xpc_rsvd_page;
+static unsigned long *xpc_part_nasids;
+unsigned long *xpc_mach_nasids;
+
+static int xpc_nasid_mask_nbytes;	/* #of bytes in nasid mask */
+int xpc_nasid_mask_nlongs;	/* #of longs in nasid mask */
+
+struct xpc_partition *xpc_partitions;
+
+/*
+ * Guarantee that the kmalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+	/* see if kmalloc will give us cachline aligned memory by default */
+	*base = kmalloc(size, flags);
+	if (*base == NULL)
+		return NULL;
+
+	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+		return *base;
+
+	kfree(*base);
+
+	/* nope, we'll have to do it ourselves */
+	*base = kmalloc(size + L1_CACHE_BYTES, flags);
+	if (*base == NULL)
+		return NULL;
+
+	return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Given a nasid, get the physical address of the  partition's reserved page
+ * for that nasid. This function returns 0 on any error.
+ */
+static unsigned long
+xpc_get_rsvd_page_pa(int nasid)
+{
+	enum xp_retval ret;
+	u64 cookie = 0;
+	unsigned long rp_pa = nasid;	/* seed with nasid */
+	size_t len = 0;
+	size_t buf_len = 0;
+	void *buf = NULL;
+	void *buf_base = NULL;
+	enum xp_retval (*get_partition_rsvd_page_pa)
+		(void *, u64 *, unsigned long *, size_t *) =
+		xpc_arch_ops.get_partition_rsvd_page_pa;
+
+	while (1) {
+
+		/* !!! rp_pa will need to be _gpa on UV.
+		 * ??? So do we save it into the architecture specific parts
+		 * ??? of the xpc_partition structure? Do we rename this
+		 * ??? function or have two versions? Rename rp_pa for UV to
+		 * ??? rp_gpa?
+		 */
+		ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
+
+		dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
+			"address=0x%016lx, len=0x%016lx\n", ret,
+			(unsigned long)cookie, rp_pa, len);
+
+		if (ret != xpNeedMoreInfo)
+			break;
+
+		/* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
+		if (is_shub())
+			len = L1_CACHE_ALIGN(len);
+
+		if (len > buf_len) {
+			if (buf_base != NULL)
+				kfree(buf_base);
+			buf_len = L1_CACHE_ALIGN(len);
+			buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
+							    &buf_base);
+			if (buf_base == NULL) {
+				dev_err(xpc_part, "unable to kmalloc "
+					"len=0x%016lx\n", buf_len);
+				ret = xpNoMemory;
+				break;
+			}
+		}
+
+		ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len);
+		if (ret != xpSuccess) {
+			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
+			break;
+		}
+	}
+
+	kfree(buf_base);
+
+	if (ret != xpSuccess)
+		rp_pa = 0;
+
+	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
+	return rp_pa;
+}
+
+/*
+ * Fill the partition reserved page with the information needed by
+ * other partitions to discover we are alive and establish initial
+ * communications.
+ */
+int
+xpc_setup_rsvd_page(void)
+{
+	int ret;
+	struct xpc_rsvd_page *rp;
+	unsigned long rp_pa;
+	unsigned long new_ts_jiffies;
+
+	/* get the local reserved page's address */
+
+	preempt_disable();
+	rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
+	preempt_enable();
+	if (rp_pa == 0) {
+		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
+		return -ESRCH;
+	}
+	rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa));
+
+	if (rp->SAL_version < 3) {
+		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
+		rp->SAL_partid &= 0xff;
+	}
+	BUG_ON(rp->SAL_partid != xp_partition_id);
+
+	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
+		dev_err(xpc_part, "the reserved page's partid of %d is outside "
+			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
+			xp_max_npartitions);
+		return -EINVAL;
+	}
+
+	rp->version = XPC_RP_VERSION;
+	rp->max_npartitions = xp_max_npartitions;
+
+	/* establish the actual sizes of the nasid masks */
+	if (rp->SAL_version == 1) {
+		/* SAL_version 1 didn't set the nasids_size field */
+		rp->SAL_nasids_size = 128;
+	}
+	xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
+	xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
+					      BITS_PER_BYTE);
+
+	/* setup the pointers to the various items in the reserved page */
+	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
+	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
+
+	ret = xpc_arch_ops.setup_rsvd_page(rp);
+	if (ret != 0)
+		return ret;
+
+	/*
+	 * Set timestamp of when reserved page was setup by XPC.
+	 * This signifies to the remote partition that our reserved
+	 * page is initialized.
+	 */
+	new_ts_jiffies = jiffies;
+	if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
+		new_ts_jiffies++;
+	rp->ts_jiffies = new_ts_jiffies;
+
+	xpc_rsvd_page = rp;
+	return 0;
+}
+
+void
+xpc_teardown_rsvd_page(void)
+{
+	/* a zero timestamp indicates our rsvd page is not initialized */
+	xpc_rsvd_page->ts_jiffies = 0;
+}
+
+/*
+ * Get a copy of a portion of the remote partition's rsvd page.
+ *
+ * remote_rp points to a buffer that is cacheline aligned for BTE copies and
+ * is large enough to contain a copy of their reserved page header and
+ * part_nasids mask.
+ */
+enum xp_retval
+xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
+		  struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
+{
+	int l;
+	enum xp_retval ret;
+
+	/* get the reserved page's physical address */
+
+	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
+	if (*remote_rp_pa == 0)
+		return xpNoRsvdPageAddr;
+
+	/* pull over the reserved page header and part_nasids mask */
+	ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
+			       XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
+	if (ret != xpSuccess)
+		return ret;
+
+	if (discovered_nasids != NULL) {
+		unsigned long *remote_part_nasids =
+		    XPC_RP_PART_NASIDS(remote_rp);
+
+		for (l = 0; l < xpc_nasid_mask_nlongs; l++)
+			discovered_nasids[l] |= remote_part_nasids[l];
+	}
+
+	/* zero timestamp indicates the reserved page has not been setup */
+	if (remote_rp->ts_jiffies == 0)
+		return xpRsvdPageNotSet;
+
+	if (XPC_VERSION_MAJOR(remote_rp->version) !=
+	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
+		return xpBadVersion;
+	}
+
+	/* check that both remote and local partids are valid for each side */
+	if (remote_rp->SAL_partid < 0 ||
+	    remote_rp->SAL_partid >= xp_max_npartitions ||
+	    remote_rp->max_npartitions <= xp_partition_id) {
+		return xpInvalidPartid;
+	}
+
+	if (remote_rp->SAL_partid == xp_partition_id)
+		return xpLocalPartid;
+
+	return xpSuccess;
+}
+
+/*
+ * See if the other side has responded to a partition deactivate request
+ * from us. Though we requested the remote partition to deactivate with regard
+ * to us, we really only need to wait for the other side to disengage from us.
+ */
+int
+xpc_partition_disengaged(struct xpc_partition *part)
+{
+	short partid = XPC_PARTID(part);
+	int disengaged;
+
+	disengaged = !xpc_arch_ops.partition_engaged(partid);
+	if (part->disengage_timeout) {
+		if (!disengaged) {
+			if (time_is_after_jiffies(part->disengage_timeout)) {
+				/* timelimit hasn't been reached yet */
+				return 0;
+			}
+
+			/*
+			 * Other side hasn't responded to our deactivate
+			 * request in a timely fashion, so assume it's dead.
+			 */
+
+			dev_info(xpc_part, "deactivate request to remote "
+				 "partition %d timed out\n", partid);
+			xpc_disengage_timedout = 1;
+			xpc_arch_ops.assume_partition_disengaged(partid);
+			disengaged = 1;
+		}
+		part->disengage_timeout = 0;
+
+		/* cancel the timer function, provided it's not us */
+		if (!in_interrupt())
+			del_singleshot_timer_sync(&part->disengage_timer);
+
+		DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
+			part->act_state != XPC_P_AS_INACTIVE);
+		if (part->act_state != XPC_P_AS_INACTIVE)
+			xpc_wakeup_channel_mgr(part);
+
+		xpc_arch_ops.cancel_partition_deactivation_request(part);
+	}
+	return disengaged;
+}
+
+/*
+ * Mark specified partition as active.
+ */
+enum xp_retval
+xpc_mark_partition_active(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	enum xp_retval ret;
+
+	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+	if (part->act_state == XPC_P_AS_ACTIVATING) {
+		part->act_state = XPC_P_AS_ACTIVE;
+		ret = xpSuccess;
+	} else {
+		DBUG_ON(part->reason == xpSuccess);
+		ret = part->reason;
+	}
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	return ret;
+}
+
+/*
+ * Start the process of deactivating the specified partition.
+ */
+void
+xpc_deactivate_partition(const int line, struct xpc_partition *part,
+			 enum xp_retval reason)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+
+	if (part->act_state == XPC_P_AS_INACTIVE) {
+		XPC_SET_REASON(part, reason, line);
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+		if (reason == xpReactivating) {
+			/* we interrupt ourselves to reactivate partition */
+			xpc_arch_ops.request_partition_reactivation(part);
+		}
+		return;
+	}
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
+		if ((part->reason == xpUnloading && reason != xpUnloading) ||
+		    reason == xpReactivating) {
+			XPC_SET_REASON(part, reason, line);
+		}
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+		return;
+	}
+
+	part->act_state = XPC_P_AS_DEACTIVATING;
+	XPC_SET_REASON(part, reason, line);
+
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	/* ask remote partition to deactivate with regard to us */
+	xpc_arch_ops.request_partition_deactivation(part);
+
+	/* set a timelimit on the disengage phase of the deactivation request */
+	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
+	part->disengage_timer.expires = part->disengage_timeout;
+	add_timer(&part->disengage_timer);
+
+	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
+		XPC_PARTID(part), reason);
+
+	xpc_partition_going_down(part, reason);
+}
+
+/*
+ * Mark specified partition as inactive.
+ */
+void
+xpc_mark_partition_inactive(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+
+	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
+		XPC_PARTID(part));
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+	part->act_state = XPC_P_AS_INACTIVE;
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+	part->remote_rp_pa = 0;
+}
+
+/*
+ * SAL has provided a partition and machine mask.  The partition mask
+ * contains a bit for each even nasid in our partition.  The machine
+ * mask contains a bit for each even nasid in the entire machine.
+ *
+ * Using those two bit arrays, we can determine which nasids are
+ * known in the machine.  Each should also have a reserved page
+ * initialized if they are available for partitioning.
+ */
+void
+xpc_discovery(void)
+{
+	void *remote_rp_base;
+	struct xpc_rsvd_page *remote_rp;
+	unsigned long remote_rp_pa;
+	int region;
+	int region_size;
+	int max_regions;
+	int nasid;
+	unsigned long *discovered_nasids;
+	enum xp_retval ret;
+
+	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
+						  xpc_nasid_mask_nbytes,
+						  GFP_KERNEL, &remote_rp_base);
+	if (remote_rp == NULL)
+		return;
+
+	discovered_nasids = kcalloc(xpc_nasid_mask_nlongs, sizeof(long),
+				    GFP_KERNEL);
+	if (discovered_nasids == NULL) {
+		kfree(remote_rp_base);
+		return;
+	}
+
+	/*
+	 * The term 'region' in this context refers to the minimum number of
+	 * nodes that can comprise an access protection grouping. The access
+	 * protection is in regards to memory, IOI and IPI.
+	 */
+	region_size = xp_region_size;
+
+	if (is_uv())
+		max_regions = 256;
+	else {
+		max_regions = 64;
+
+		switch (region_size) {
+		case 128:
+			max_regions *= 2;
+			/* fall through */
+		case 64:
+			max_regions *= 2;
+			/* fall through */
+		case 32:
+			max_regions *= 2;
+			region_size = 16;
+			DBUG_ON(!is_shub2());
+		}
+	}
+
+	for (region = 0; region < max_regions; region++) {
+
+		if (xpc_exiting)
+			break;
+
+		dev_dbg(xpc_part, "searching region %d\n", region);
+
+		for (nasid = (region * region_size * 2);
+		     nasid < ((region + 1) * region_size * 2); nasid += 2) {
+
+			if (xpc_exiting)
+				break;
+
+			dev_dbg(xpc_part, "checking nasid %d\n", nasid);
+
+			if (test_bit(nasid / 2, xpc_part_nasids)) {
+				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
+					"part of the local partition; skipping "
+					"region\n", nasid);
+				break;
+			}
+
+			if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
+				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
+					"not on Numa-Link network at reset\n",
+					nasid);
+				continue;
+			}
+
+			if (test_bit(nasid / 2, discovered_nasids)) {
+				dev_dbg(xpc_part, "Nasid %d is part of a "
+					"partition which was previously "
+					"discovered\n", nasid);
+				continue;
+			}
+
+			/* pull over the rsvd page header & part_nasids mask */
+
+			ret = xpc_get_remote_rp(nasid, discovered_nasids,
+						remote_rp, &remote_rp_pa);
+			if (ret != xpSuccess) {
+				dev_dbg(xpc_part, "unable to get reserved page "
+					"from nasid %d, reason=%d\n", nasid,
+					ret);
+
+				if (ret == xpLocalPartid)
+					break;
+
+				continue;
+			}
+
+			xpc_arch_ops.request_partition_activation(remote_rp,
+							 remote_rp_pa, nasid);
+		}
+	}
+
+	kfree(discovered_nasids);
+	kfree(remote_rp_base);
+}
+
+/*
+ * Given a partid, get the nasids owned by that partition from the
+ * remote partition's reserved page.
+ */
+enum xp_retval
+xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
+{
+	struct xpc_partition *part;
+	unsigned long part_nasid_pa;
+
+	part = &xpc_partitions[partid];
+	if (part->remote_rp_pa == 0)
+		return xpPartitionDown;
+
+	memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
+
+	part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
+
+	return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
+				xpc_nasid_mask_nbytes);
+}
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
new file mode 100644
index 000000000..5a12d2a54
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -0,0 +1,2459 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) sn2-based functions.
+ *
+ *     Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <asm/uncached.h>
+#include <asm/sn/mspec.h>
+#include <asm/sn/sn_sal.h>
+#include "xpc.h"
+
+/*
+ * Define the number of u64s required to represent all the C-brick nasids
+ * as a bitmap.  The cross-partition kernel modules deal only with
+ * C-brick nasids, thus the need for bitmaps which don't account for
+ * odd-numbered (non C-brick) nasids.
+ */
+#define XPC_MAX_PHYSNODES_SN2	(MAX_NUMALINK_NODES / 2)
+#define XP_NASID_MASK_BYTES_SN2	((XPC_MAX_PHYSNODES_SN2 + 7) / 8)
+#define XP_NASID_MASK_WORDS_SN2	((XPC_MAX_PHYSNODES_SN2 + 63) / 64)
+
+/*
+ * Memory for XPC's amo variables is allocated by the MSPEC driver. These
+ * pages are located in the lowest granule. The lowest granule uses 4k pages
+ * for cached references and an alternate TLB handler to never provide a
+ * cacheable mapping for the entire region. This will prevent speculative
+ * reading of cached copies of our lines from being issued which will cause
+ * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
+ * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of
+ * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify
+ * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote
+ * partitions (i.e., XPCs) consider themselves currently engaged with the
+ * local XPC and 1 amo variable to request partition deactivation.
+ */
+#define XPC_NOTIFY_IRQ_AMOS_SN2		0
+#define XPC_ACTIVATE_IRQ_AMOS_SN2	(XPC_NOTIFY_IRQ_AMOS_SN2 + \
+					 XP_MAX_NPARTITIONS_SN2)
+#define XPC_ENGAGED_PARTITIONS_AMO_SN2	(XPC_ACTIVATE_IRQ_AMOS_SN2 + \
+					 XP_NASID_MASK_WORDS_SN2)
+#define XPC_DEACTIVATE_REQUEST_AMO_SN2	(XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1)
+
+/*
+ * Buffer used to store a local copy of portions of a remote partition's
+ * reserved page (either its header and part_nasids mask, or its vars).
+ */
+static void *xpc_remote_copy_buffer_base_sn2;
+static char *xpc_remote_copy_buffer_sn2;
+
+static struct xpc_vars_sn2 *xpc_vars_sn2;
+static struct xpc_vars_part_sn2 *xpc_vars_part_sn2;
+
+static int
+xpc_setup_partitions_sn2(void)
+{
+	/* nothing needs to be done */
+	return 0;
+}
+
+static void
+xpc_teardown_partitions_sn2(void)
+{
+	/* nothing needs to be done */
+}
+
+/* SH_IPI_ACCESS shub register value on startup */
+static u64 xpc_sh1_IPI_access_sn2;
+static u64 xpc_sh2_IPI_access0_sn2;
+static u64 xpc_sh2_IPI_access1_sn2;
+static u64 xpc_sh2_IPI_access2_sn2;
+static u64 xpc_sh2_IPI_access3_sn2;
+
+/*
+ * Change protections to allow IPI operations.
+ */
+static void
+xpc_allow_IPI_ops_sn2(void)
+{
+	int node;
+	int nasid;
+
+	/* !!! The following should get moved into SAL. */
+	if (is_shub2()) {
+		xpc_sh2_IPI_access0_sn2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
+		xpc_sh2_IPI_access1_sn2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
+		xpc_sh2_IPI_access2_sn2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
+		xpc_sh2_IPI_access3_sn2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
+
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+			      -1UL);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+			      -1UL);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+			      -1UL);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+			      -1UL);
+		}
+	} else {
+		xpc_sh1_IPI_access_sn2 =
+		    (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
+
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+			      -1UL);
+		}
+	}
+}
+
+/*
+ * Restrict protections to disallow IPI operations.
+ */
+static void
+xpc_disallow_IPI_ops_sn2(void)
+{
+	int node;
+	int nasid;
+
+	/* !!! The following should get moved into SAL. */
+	if (is_shub2()) {
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
+			      xpc_sh2_IPI_access0_sn2);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
+			      xpc_sh2_IPI_access1_sn2);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
+			      xpc_sh2_IPI_access2_sn2);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
+			      xpc_sh2_IPI_access3_sn2);
+		}
+	} else {
+		for_each_online_node(node) {
+			nasid = cnodeid_to_nasid(node);
+			HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
+			      xpc_sh1_IPI_access_sn2);
+		}
+	}
+}
+
+/*
+ * The following set of functions are used for the sending and receiving of
+ * IRQs (also known as IPIs). There are two flavors of IRQs, one that is
+ * associated with partition activity (SGI_XPC_ACTIVATE) and the other that
+ * is associated with channel activity (SGI_XPC_NOTIFY).
+ */
+
+static u64
+xpc_receive_IRQ_amo_sn2(struct amo *amo)
+{
+	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
+}
+
+static enum xp_retval
+xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid,
+		 int vector)
+{
+	int ret = 0;
+	unsigned long irq_flags;
+
+	local_irq_save(irq_flags);
+
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
+	sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IRQs and amos to it until the heartbeat times out.
+	 */
+	ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+
+	return (ret == 0) ? xpSuccess : xpPioReadError;
+}
+
+static struct amo *
+xpc_init_IRQ_amo_sn2(int index)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page + index;
+
+	(void)xpc_receive_IRQ_amo_sn2(amo);	/* clear amo variable */
+	return amo;
+}
+
+/*
+ * Functions associated with SGI_XPC_ACTIVATE IRQ.
+ */
+
+/*
+ * Notify the heartbeat check thread that an activate IRQ has been received.
+ */
+static irqreturn_t
+xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	xpc_activate_IRQ_rcvd++;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+	return IRQ_HANDLED;
+}
+
+/*
+ * Flag the appropriate amo variable and send an IRQ to the specified node.
+ */
+static void
+xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid,
+			  int to_nasid, int to_phys_cpuid)
+{
+	struct amo *amos = (struct amo *)__va(amos_page_pa +
+					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
+					      sizeof(struct amo)));
+
+	(void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)],
+			       BIT_MASK(from_nasid / 2), to_nasid,
+			       to_phys_cpuid, SGI_XPC_ACTIVATE);
+}
+
+static void
+xpc_send_local_activate_IRQ_sn2(int from_nasid)
+{
+	unsigned long irq_flags;
+	struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa +
+					      (XPC_ACTIVATE_IRQ_AMOS_SN2 *
+					      sizeof(struct amo)));
+
+	/* fake the sending and receipt of an activate IRQ from remote nasid */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable),
+			 FETCHOP_OR, BIT_MASK(from_nasid / 2));
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	xpc_activate_IRQ_rcvd++;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+}
+
+/*
+ * Functions associated with SGI_XPC_NOTIFY IRQ.
+ */
+
+/*
+ * Check to see if any chctl flags were sent from the specified partition.
+ */
+static void
+xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part)
+{
+	union xpc_channel_ctl_flags chctl;
+	unsigned long irq_flags;
+
+	chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2.
+						  local_chctl_amo_va);
+	if (chctl.all_flags == 0)
+		return;
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	part->chctl.all_flags |= chctl.all_flags;
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+	dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags="
+		"0x%llx\n", XPC_PARTID(part), chctl.all_flags);
+
+	xpc_wakeup_channel_mgr(part);
+}
+
+/*
+ * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
+ * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
+ * than one partition, we use an amo structure per partition to indicate
+ * whether a partition has sent an IRQ or not.  If it has, then wake up the
+ * associated kthread to handle it.
+ *
+ * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC
+ * running on other partitions.
+ *
+ * Noteworthy Arguments:
+ *
+ *	irq - Interrupt ReQuest number. NOT USED.
+ *
+ *	dev_id - partid of IRQ's potential sender.
+ */
+static irqreturn_t
+xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
+{
+	short partid = (short)(u64)dev_id;
+	struct xpc_partition *part = &xpc_partitions[partid];
+
+	DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2);
+
+	if (xpc_part_ref(part)) {
+		xpc_check_for_sent_chctl_flags_sn2(part);
+
+		xpc_part_deref(part);
+	}
+	return IRQ_HANDLED;
+}
+
+/*
+ * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor
+ * because the write to their associated amo variable completed after the IRQ
+ * was received.
+ */
+static void
+xpc_check_for_dropped_notify_IRQ_sn2(struct timer_list *t)
+{
+	struct xpc_partition *part =
+		from_timer(part, t, sn.sn2.dropped_notify_IRQ_timer);
+
+	if (xpc_part_ref(part)) {
+		xpc_check_for_sent_chctl_flags_sn2(part);
+
+		t->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
+		add_timer(t);
+		xpc_part_deref(part);
+	}
+}
+
+/*
+ * Send a notify IRQ to the remote partition that is associated with the
+ * specified channel.
+ */
+static void
+xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
+			char *chctl_flag_string, unsigned long *irq_flags)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	union xpc_channel_ctl_flags chctl = { 0 };
+	enum xp_retval ret;
+
+	if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) {
+		chctl.flags[ch->number] = chctl_flag;
+		ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va,
+				       chctl.all_flags,
+				       part_sn2->notify_IRQ_nasid,
+				       part_sn2->notify_IRQ_phys_cpuid,
+				       SGI_XPC_NOTIFY);
+		dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
+			chctl_flag_string, ch->partid, ch->number, ret);
+		if (unlikely(ret != xpSuccess)) {
+			if (irq_flags != NULL)
+				spin_unlock_irqrestore(&ch->lock, *irq_flags);
+			XPC_DEACTIVATE_PARTITION(part, ret);
+			if (irq_flags != NULL)
+				spin_lock_irqsave(&ch->lock, *irq_flags);
+		}
+	}
+}
+
+#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \
+		xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f)
+
+/*
+ * Make it look like the remote partition, which is associated with the
+ * specified channel, sent us a notify IRQ. This faked IRQ will be handled
+ * by xpc_check_for_dropped_notify_IRQ_sn2().
+ */
+static void
+xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
+			      char *chctl_flag_string)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	union xpc_channel_ctl_flags chctl = { 0 };
+
+	chctl.flags[ch->number] = chctl_flag;
+	FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va->
+				variable), FETCHOP_OR, chctl.all_flags);
+	dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
+		chctl_flag_string, ch->partid, ch->number);
+}
+
+#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \
+		xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f)
+
+static void
+xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch,
+				unsigned long *irq_flags)
+{
+	struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+	args->reason = ch->reason;
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags);
+}
+
+static void
+xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags);
+}
+
+static void
+xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+	args->entry_size = ch->entry_size;
+	args->local_nentries = ch->local_nentries;
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags);
+}
+
+static void
+xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args;
+
+	args->remote_nentries = ch->remote_nentries;
+	args->local_nentries = ch->local_nentries;
+	args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue);
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags);
+}
+
+static void
+xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch,
+				unsigned long *irq_flags)
+{
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags);
+}
+
+static void
+xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch)
+{
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL);
+}
+
+static void
+xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch)
+{
+	XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST);
+}
+
+static enum xp_retval
+xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch,
+				unsigned long msgqueue_pa)
+{
+	ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa;
+	return xpSuccess;
+}
+
+/*
+ * This next set of functions are used to keep track of when a partition is
+ * potentially engaged in accessing memory belonging to another partition.
+ */
+
+static void
+xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
+					     (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
+					     sizeof(struct amo)));
+
+	local_irq_save(irq_flags);
+
+	/* set bit corresponding to our partid in remote partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+			 BIT(sn_partition_id));
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IRQs and amos to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static void
+xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	unsigned long irq_flags;
+	struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
+					     (XPC_ENGAGED_PARTITIONS_AMO_SN2 *
+					     sizeof(struct amo)));
+
+	local_irq_save(irq_flags);
+
+	/* clear bit corresponding to our partid in remote partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~BIT(sn_partition_id));
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IRQs and amos to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+
+	/*
+	 * Send activate IRQ to get other side to see that we've cleared our
+	 * bit in their engaged partitions amo.
+	 */
+	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+				  cnodeid_to_nasid(0),
+				  part_sn2->activate_IRQ_nasid,
+				  part_sn2->activate_IRQ_phys_cpuid);
+}
+
+static void
+xpc_assume_partition_disengaged_sn2(short partid)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+	/* clear bit(s) based on partid mask in our partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~BIT(partid));
+}
+
+static int
+xpc_partition_engaged_sn2(short partid)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+	/* our partition's amo variable ANDed with partid mask */
+	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+		BIT(partid)) != 0;
+}
+
+static int
+xpc_any_partition_engaged_sn2(void)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_ENGAGED_PARTITIONS_AMO_SN2;
+
+	/* our partition's amo variable */
+	return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
+}
+
+/* original protection values for each node */
+static u64 xpc_prot_vec_sn2[MAX_NUMNODES];
+
+/*
+ * Change protections to allow amo operations on non-Shub 1.1 systems.
+ */
+static enum xp_retval
+xpc_allow_amo_ops_sn2(struct amo *amos_page)
+{
+	enum xp_retval ret = xpSuccess;
+
+	/*
+	 * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST
+	 * collides with memory operations. On those systems we call
+	 * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead.
+	 */
+	if (!enable_shub_wars_1_1())
+		ret = xp_expand_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE);
+
+	return ret;
+}
+
+/*
+ * Change protections to allow amo operations on Shub 1.1 systems.
+ */
+static void
+xpc_allow_amo_ops_shub_wars_1_1_sn2(void)
+{
+	int node;
+	int nasid;
+
+	if (!enable_shub_wars_1_1())
+		return;
+
+	for_each_online_node(node) {
+		nasid = cnodeid_to_nasid(node);
+		/* save current protection values */
+		xpc_prot_vec_sn2[node] =
+		    (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid,
+						  SH1_MD_DQLP_MMR_DIR_PRIVEC0));
+		/* open up everything */
+		HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+					     SH1_MD_DQLP_MMR_DIR_PRIVEC0),
+		      -1UL);
+		HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
+					     SH1_MD_DQRP_MMR_DIR_PRIVEC0),
+		      -1UL);
+	}
+}
+
+static enum xp_retval
+xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa,
+				   size_t *len)
+{
+	s64 status;
+	enum xp_retval ret;
+
+	status = sn_partition_reserved_page_pa((u64)buf, cookie,
+			(u64 *)rp_pa, (u64 *)len);
+	if (status == SALRET_OK)
+		ret = xpSuccess;
+	else if (status == SALRET_MORE_PASSES)
+		ret = xpNeedMoreInfo;
+	else
+		ret = xpSalError;
+
+	return ret;
+}
+
+
+static int
+xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp)
+{
+	struct amo *amos_page;
+	int i;
+	int ret;
+
+	xpc_vars_sn2 = XPC_RP_VARS(rp);
+
+	rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2);
+
+	/* vars_part array follows immediately after vars */
+	xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
+							 XPC_RP_VARS_SIZE);
+
+	/*
+	 * Before clearing xpc_vars_sn2, see if a page of amos had been
+	 * previously allocated. If not we'll need to allocate one and set
+	 * permissions so that cross-partition amos are allowed.
+	 *
+	 * The allocated amo page needs MCA reporting to remain disabled after
+	 * XPC has unloaded.  To make this work, we keep a copy of the pointer
+	 * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure,
+	 * which is pointed to by the reserved page, and re-use that saved copy
+	 * on subsequent loads of XPC. This amo page is never freed, and its
+	 * memory protections are never restricted.
+	 */
+	amos_page = xpc_vars_sn2->amos_page;
+	if (amos_page == NULL) {
+		amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1));
+		if (amos_page == NULL) {
+			dev_err(xpc_part, "can't allocate page of amos\n");
+			return -ENOMEM;
+		}
+
+		/*
+		 * Open up amo-R/W to cpu.  This is done on Shub 1.1 systems
+		 * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called.
+		 */
+		ret = xpc_allow_amo_ops_sn2(amos_page);
+		if (ret != xpSuccess) {
+			dev_err(xpc_part, "can't allow amo operations\n");
+			uncached_free_page(__IA64_UNCACHED_OFFSET |
+					   TO_PHYS((u64)amos_page), 1);
+			return -EPERM;
+		}
+	}
+
+	/* clear xpc_vars_sn2 */
+	memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2));
+
+	xpc_vars_sn2->version = XPC_V_VERSION;
+	xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0);
+	xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0);
+	xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2);
+	xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page);
+	xpc_vars_sn2->amos_page = amos_page;	/* save for next load of XPC */
+
+	/* clear xpc_vars_part_sn2 */
+	memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) *
+	       XP_MAX_NPARTITIONS_SN2);
+
+	/* initialize the activate IRQ related amo variables */
+	for (i = 0; i < xpc_nasid_mask_nlongs; i++)
+		(void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i);
+
+	/* initialize the engaged remote partitions related amo variables */
+	(void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2);
+	(void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2);
+
+	return 0;
+}
+
+static int
+xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask)
+{
+	return test_bit(partid, heartbeating_to_mask);
+}
+
+static void
+xpc_allow_hb_sn2(short partid)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+	set_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
+}
+
+static void
+xpc_disallow_hb_sn2(short partid)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+	clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
+}
+
+static void
+xpc_disallow_all_hbs_sn2(void)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+	bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions);
+}
+
+static void
+xpc_increment_heartbeat_sn2(void)
+{
+	xpc_vars_sn2->heartbeat++;
+}
+
+static void
+xpc_offline_heartbeat_sn2(void)
+{
+	xpc_increment_heartbeat_sn2();
+	xpc_vars_sn2->heartbeat_offline = 1;
+}
+
+static void
+xpc_online_heartbeat_sn2(void)
+{
+	xpc_increment_heartbeat_sn2();
+	xpc_vars_sn2->heartbeat_offline = 0;
+}
+
+static void
+xpc_heartbeat_init_sn2(void)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+
+	bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
+	xpc_online_heartbeat_sn2();
+}
+
+static void
+xpc_heartbeat_exit_sn2(void)
+{
+	xpc_offline_heartbeat_sn2();
+}
+
+static enum xp_retval
+xpc_get_remote_heartbeat_sn2(struct xpc_partition *part)
+{
+	struct xpc_vars_sn2 *remote_vars;
+	enum xp_retval ret;
+
+	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
+
+	/* pull the remote vars structure that contains the heartbeat */
+	ret = xp_remote_memcpy(xp_pa(remote_vars),
+			       part->sn.sn2.remote_vars_pa,
+			       XPC_RP_VARS_SIZE);
+	if (ret != xpSuccess)
+		return ret;
+
+	dev_dbg(xpc_part, "partid=%d, heartbeat=%lld, last_heartbeat=%lld, "
+		"heartbeat_offline=%lld, HB_mask[0]=0x%lx\n", XPC_PARTID(part),
+		remote_vars->heartbeat, part->last_heartbeat,
+		remote_vars->heartbeat_offline,
+		remote_vars->heartbeating_to_mask[0]);
+
+	if ((remote_vars->heartbeat == part->last_heartbeat &&
+	    !remote_vars->heartbeat_offline) ||
+	    !xpc_hb_allowed_sn2(sn_partition_id,
+				remote_vars->heartbeating_to_mask)) {
+		ret = xpNoHeartbeat;
+	} else {
+		part->last_heartbeat = remote_vars->heartbeat;
+	}
+
+	return ret;
+}
+
+/*
+ * Get a copy of the remote partition's XPC variables from the reserved page.
+ *
+ * remote_vars points to a buffer that is cacheline aligned for BTE copies and
+ * assumed to be of size XPC_RP_VARS_SIZE.
+ */
+static enum xp_retval
+xpc_get_remote_vars_sn2(unsigned long remote_vars_pa,
+			struct xpc_vars_sn2 *remote_vars)
+{
+	enum xp_retval ret;
+
+	if (remote_vars_pa == 0)
+		return xpVarsNotSet;
+
+	/* pull over the cross partition variables */
+	ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa,
+			       XPC_RP_VARS_SIZE);
+	if (ret != xpSuccess)
+		return ret;
+
+	if (XPC_VERSION_MAJOR(remote_vars->version) !=
+	    XPC_VERSION_MAJOR(XPC_V_VERSION)) {
+		return xpBadVersion;
+	}
+
+	return xpSuccess;
+}
+
+static void
+xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
+				     unsigned long remote_rp_pa, int nasid)
+{
+	xpc_send_local_activate_IRQ_sn2(nasid);
+}
+
+static void
+xpc_request_partition_reactivation_sn2(struct xpc_partition *part)
+{
+	xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid);
+}
+
+static void
+xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	unsigned long irq_flags;
+	struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
+					     (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
+					     sizeof(struct amo)));
+
+	local_irq_save(irq_flags);
+
+	/* set bit corresponding to our partid in remote partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
+			 BIT(sn_partition_id));
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IRQs and amos to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+
+	/*
+	 * Send activate IRQ to get other side to see that we've set our
+	 * bit in their deactivate request amo.
+	 */
+	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+				  cnodeid_to_nasid(0),
+				  part_sn2->activate_IRQ_nasid,
+				  part_sn2->activate_IRQ_phys_cpuid);
+}
+
+static void
+xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
+					     (XPC_DEACTIVATE_REQUEST_AMO_SN2 *
+					     sizeof(struct amo)));
+
+	local_irq_save(irq_flags);
+
+	/* clear bit corresponding to our partid in remote partition's amo */
+	FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
+			 ~BIT(sn_partition_id));
+
+	/*
+	 * We must always use the nofault function regardless of whether we
+	 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+	 * didn't, we'd never know that the other partition is down and would
+	 * keep sending IRQs and amos to it until the heartbeat times out.
+	 */
+	(void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
+							       variable),
+						     xp_nofault_PIOR_target));
+
+	local_irq_restore(irq_flags);
+}
+
+static int
+xpc_partition_deactivation_requested_sn2(short partid)
+{
+	struct amo *amo = xpc_vars_sn2->amos_page +
+			  XPC_DEACTIVATE_REQUEST_AMO_SN2;
+
+	/* our partition's amo variable ANDed with partid mask */
+	return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
+		BIT(partid)) != 0;
+}
+
+/*
+ * Update the remote partition's info.
+ */
+static void
+xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
+			      unsigned long *remote_rp_ts_jiffies,
+			      unsigned long remote_rp_pa,
+			      unsigned long remote_vars_pa,
+			      struct xpc_vars_sn2 *remote_vars)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+
+	part->remote_rp_version = remote_rp_version;
+	dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
+		part->remote_rp_version);
+
+	part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies;
+	dev_dbg(xpc_part, "  remote_rp_ts_jiffies = 0x%016lx\n",
+		part->remote_rp_ts_jiffies);
+
+	part->remote_rp_pa = remote_rp_pa;
+	dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
+
+	part_sn2->remote_vars_pa = remote_vars_pa;
+	dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
+		part_sn2->remote_vars_pa);
+
+	part->last_heartbeat = remote_vars->heartbeat - 1;
+	dev_dbg(xpc_part, "  last_heartbeat = 0x%016llx\n",
+		part->last_heartbeat);
+
+	part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa;
+	dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
+		part_sn2->remote_vars_part_pa);
+
+	part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid;
+	dev_dbg(xpc_part, "  activate_IRQ_nasid = 0x%x\n",
+		part_sn2->activate_IRQ_nasid);
+
+	part_sn2->activate_IRQ_phys_cpuid =
+	    remote_vars->activate_IRQ_phys_cpuid;
+	dev_dbg(xpc_part, "  activate_IRQ_phys_cpuid = 0x%x\n",
+		part_sn2->activate_IRQ_phys_cpuid);
+
+	part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa;
+	dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
+		part_sn2->remote_amos_page_pa);
+
+	part_sn2->remote_vars_version = remote_vars->version;
+	dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
+		part_sn2->remote_vars_version);
+}
+
+/*
+ * Prior code has determined the nasid which generated a activate IRQ.
+ * Inspect that nasid to determine if its partition needs to be activated
+ * or deactivated.
+ *
+ * A partition is considered "awaiting activation" if our partition
+ * flags indicate it is not active and it has a heartbeat.  A
+ * partition is considered "awaiting deactivation" if our partition
+ * flags indicate it is active but it has no heartbeat or it is not
+ * sending its heartbeat to us.
+ *
+ * To determine the heartbeat, the remote nasid must have a properly
+ * initialized reserved page.
+ */
+static void
+xpc_identify_activate_IRQ_req_sn2(int nasid)
+{
+	struct xpc_rsvd_page *remote_rp;
+	struct xpc_vars_sn2 *remote_vars;
+	unsigned long remote_rp_pa;
+	unsigned long remote_vars_pa;
+	int remote_rp_version;
+	int reactivate = 0;
+	unsigned long remote_rp_ts_jiffies = 0;
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_partition_sn2 *part_sn2;
+	enum xp_retval ret;
+
+	/* pull over the reserved page structure */
+
+	remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2;
+
+	ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
+	if (ret != xpSuccess) {
+		dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
+			 "which sent interrupt, reason=%d\n", nasid, ret);
+		return;
+	}
+
+	remote_vars_pa = remote_rp->sn.sn2.vars_pa;
+	remote_rp_version = remote_rp->version;
+	remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+
+	partid = remote_rp->SAL_partid;
+	part = &xpc_partitions[partid];
+	part_sn2 = &part->sn.sn2;
+
+	/* pull over the cross partition variables */
+
+	remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2;
+
+	ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars);
+	if (ret != xpSuccess) {
+		dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
+			 "which sent interrupt, reason=%d\n", nasid, ret);
+
+		XPC_DEACTIVATE_PARTITION(part, ret);
+		return;
+	}
+
+	part->activate_IRQ_rcvd++;
+
+	dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
+		"%lld:0x%lx\n", (int)nasid, (int)partid,
+		part->activate_IRQ_rcvd,
+		remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]);
+
+	if (xpc_partition_disengaged(part) &&
+	    part->act_state == XPC_P_AS_INACTIVE) {
+
+		xpc_update_partition_info_sn2(part, remote_rp_version,
+					      &remote_rp_ts_jiffies,
+					      remote_rp_pa, remote_vars_pa,
+					      remote_vars);
+
+		if (xpc_partition_deactivation_requested_sn2(partid)) {
+			/*
+			 * Other side is waiting on us to deactivate even though
+			 * we already have.
+			 */
+			return;
+		}
+
+		xpc_activate_partition(part);
+		return;
+	}
+
+	DBUG_ON(part->remote_rp_version == 0);
+	DBUG_ON(part_sn2->remote_vars_version == 0);
+
+	if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) {
+
+		/* the other side rebooted */
+
+		DBUG_ON(xpc_partition_engaged_sn2(partid));
+		DBUG_ON(xpc_partition_deactivation_requested_sn2(partid));
+
+		xpc_update_partition_info_sn2(part, remote_rp_version,
+					      &remote_rp_ts_jiffies,
+					      remote_rp_pa, remote_vars_pa,
+					      remote_vars);
+		reactivate = 1;
+	}
+
+	if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) {
+		/* still waiting on other side to disengage from us */
+		return;
+	}
+
+	if (reactivate)
+		XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+	else if (xpc_partition_deactivation_requested_sn2(partid))
+		XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
+}
+
+/*
+ * Loop through the activation amo variables and process any bits
+ * which are set.  Each bit indicates a nasid sending a partition
+ * activation or deactivation request.
+ *
+ * Return #of IRQs detected.
+ */
+int
+xpc_identify_activate_IRQ_sender_sn2(void)
+{
+	int l;
+	int b;
+	unsigned long nasid_mask_long;
+	u64 nasid;		/* remote nasid */
+	int n_IRQs_detected = 0;
+	struct amo *act_amos;
+
+	act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2;
+
+	/* scan through activate amo variables looking for non-zero entries */
+	for (l = 0; l < xpc_nasid_mask_nlongs; l++) {
+
+		if (xpc_exiting)
+			break;
+
+		nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]);
+
+		b = find_first_bit(&nasid_mask_long, BITS_PER_LONG);
+		if (b >= BITS_PER_LONG) {
+			/* no IRQs from nasids in this amo variable */
+			continue;
+		}
+
+		dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l,
+			nasid_mask_long);
+
+		/*
+		 * If this nasid has been added to the machine since
+		 * our partition was reset, this will retain the
+		 * remote nasid in our reserved pages machine mask.
+		 * This is used in the event of module reload.
+		 */
+		xpc_mach_nasids[l] |= nasid_mask_long;
+
+		/* locate the nasid(s) which sent interrupts */
+
+		do {
+			n_IRQs_detected++;
+			nasid = (l * BITS_PER_LONG + b) * 2;
+			dev_dbg(xpc_part, "interrupt from nasid %lld\n", nasid);
+			xpc_identify_activate_IRQ_req_sn2(nasid);
+
+			b = find_next_bit(&nasid_mask_long, BITS_PER_LONG,
+					  b + 1);
+		} while (b < BITS_PER_LONG);
+	}
+	return n_IRQs_detected;
+}
+
+static void
+xpc_process_activate_IRQ_rcvd_sn2(void)
+{
+	unsigned long irq_flags;
+	int n_IRQs_expected;
+	int n_IRQs_detected;
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	n_IRQs_expected = xpc_activate_IRQ_rcvd;
+	xpc_activate_IRQ_rcvd = 0;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+	n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2();
+	if (n_IRQs_detected < n_IRQs_expected) {
+		/* retry once to help avoid missing amo */
+		(void)xpc_identify_activate_IRQ_sender_sn2();
+	}
+}
+
+/*
+ * Setup the channel structures that are sn2 specific.
+ */
+static enum xp_retval
+xpc_setup_ch_structures_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	struct xpc_channel_sn2 *ch_sn2;
+	enum xp_retval retval;
+	int ret;
+	int cpuid;
+	int ch_number;
+	struct timer_list *timer;
+	short partid = XPC_PARTID(part);
+
+	/* allocate all the required GET/PUT values */
+
+	part_sn2->local_GPs =
+	    xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
+					  &part_sn2->local_GPs_base);
+	if (part_sn2->local_GPs == NULL) {
+		dev_err(xpc_chan, "can't get memory for local get/put "
+			"values\n");
+		return xpNoMemory;
+	}
+
+	part_sn2->remote_GPs =
+	    xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL,
+					  &part_sn2->remote_GPs_base);
+	if (part_sn2->remote_GPs == NULL) {
+		dev_err(xpc_chan, "can't get memory for remote get/put "
+			"values\n");
+		retval = xpNoMemory;
+		goto out_1;
+	}
+
+	part_sn2->remote_GPs_pa = 0;
+
+	/* allocate all the required open and close args */
+
+	part_sn2->local_openclose_args =
+	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+					  GFP_KERNEL, &part_sn2->
+					  local_openclose_args_base);
+	if (part_sn2->local_openclose_args == NULL) {
+		dev_err(xpc_chan, "can't get memory for local connect args\n");
+		retval = xpNoMemory;
+		goto out_2;
+	}
+
+	part_sn2->remote_openclose_args_pa = 0;
+
+	part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid);
+
+	part_sn2->notify_IRQ_nasid = 0;
+	part_sn2->notify_IRQ_phys_cpuid = 0;
+	part_sn2->remote_chctl_amo_va = NULL;
+
+	sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid);
+	ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2,
+			  IRQF_SHARED, part_sn2->notify_IRQ_owner,
+			  (void *)(u64)partid);
+	if (ret != 0) {
+		dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
+			"errno=%d\n", -ret);
+		retval = xpLackOfResources;
+		goto out_3;
+	}
+
+	/* Setup a timer to check for dropped notify IRQs */
+	timer = &part_sn2->dropped_notify_IRQ_timer;
+	timer_setup(timer, xpc_check_for_dropped_notify_IRQ_sn2, 0);
+	timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
+	add_timer(timer);
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch_sn2 = &part->channels[ch_number].sn.sn2;
+
+		ch_sn2->local_GP = &part_sn2->local_GPs[ch_number];
+		ch_sn2->local_openclose_args =
+		    &part_sn2->local_openclose_args[ch_number];
+
+		mutex_init(&ch_sn2->msg_to_pull_mutex);
+	}
+
+	/*
+	 * Setup the per partition specific variables required by the
+	 * remote partition to establish channel connections with us.
+	 *
+	 * The setting of the magic # indicates that these per partition
+	 * specific variables are ready to be used.
+	 */
+	xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs);
+	xpc_vars_part_sn2[partid].openclose_args_pa =
+	    xp_pa(part_sn2->local_openclose_args);
+	xpc_vars_part_sn2[partid].chctl_amo_pa =
+	    xp_pa(part_sn2->local_chctl_amo_va);
+	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
+	xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid);
+	xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid =
+	    cpu_physical_id(cpuid);
+	xpc_vars_part_sn2[partid].nchannels = part->nchannels;
+	xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2;
+
+	return xpSuccess;
+
+	/* setup of ch structures failed */
+out_3:
+	kfree(part_sn2->local_openclose_args_base);
+	part_sn2->local_openclose_args = NULL;
+out_2:
+	kfree(part_sn2->remote_GPs_base);
+	part_sn2->remote_GPs = NULL;
+out_1:
+	kfree(part_sn2->local_GPs_base);
+	part_sn2->local_GPs = NULL;
+	return retval;
+}
+
+/*
+ * Teardown the channel structures that are sn2 specific.
+ */
+static void
+xpc_teardown_ch_structures_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	short partid = XPC_PARTID(part);
+
+	/*
+	 * Indicate that the variables specific to the remote partition are no
+	 * longer available for its use.
+	 */
+	xpc_vars_part_sn2[partid].magic = 0;
+
+	/* in case we've still got outstanding timers registered... */
+	del_timer_sync(&part_sn2->dropped_notify_IRQ_timer);
+	free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
+
+	kfree(part_sn2->local_openclose_args_base);
+	part_sn2->local_openclose_args = NULL;
+	kfree(part_sn2->remote_GPs_base);
+	part_sn2->remote_GPs = NULL;
+	kfree(part_sn2->local_GPs_base);
+	part_sn2->local_GPs = NULL;
+	part_sn2->local_chctl_amo_va = NULL;
+}
+
+/*
+ * Create a wrapper that hides the underlying mechanism for pulling a cacheline
+ * (or multiple cachelines) from a remote partition.
+ *
+ * src_pa must be a cacheline aligned physical address on the remote partition.
+ * dst must be a cacheline aligned virtual address on this partition.
+ * cnt must be cacheline sized
+ */
+/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */
+static enum xp_retval
+xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
+			       const unsigned long src_pa, size_t cnt)
+{
+	enum xp_retval ret;
+
+	DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa));
+	DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst));
+	DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
+
+	if (part->act_state == XPC_P_AS_DEACTIVATING)
+		return part->reason;
+
+	ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt);
+	if (ret != xpSuccess) {
+		dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed,"
+			" ret=%d\n", XPC_PARTID(part), ret);
+	}
+	return ret;
+}
+
+/*
+ * Pull the remote per partition specific variables from the specified
+ * partition.
+ */
+static enum xp_retval
+xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	u8 buffer[L1_CACHE_BYTES * 2];
+	struct xpc_vars_part_sn2 *pulled_entry_cacheline =
+	    (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer);
+	struct xpc_vars_part_sn2 *pulled_entry;
+	unsigned long remote_entry_cacheline_pa;
+	unsigned long remote_entry_pa;
+	short partid = XPC_PARTID(part);
+	enum xp_retval ret;
+
+	/* pull the cacheline that contains the variables we're interested in */
+
+	DBUG_ON(part_sn2->remote_vars_part_pa !=
+		L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa));
+	DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2);
+
+	remote_entry_pa = part_sn2->remote_vars_part_pa +
+	    sn_partition_id * sizeof(struct xpc_vars_part_sn2);
+
+	remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
+
+	pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline
+						    + (remote_entry_pa &
+						    (L1_CACHE_BYTES - 1)));
+
+	ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline,
+					     remote_entry_cacheline_pa,
+					     L1_CACHE_BYTES);
+	if (ret != xpSuccess) {
+		dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
+			"partition %d, ret=%d\n", partid, ret);
+		return ret;
+	}
+
+	/* see if they've been set up yet */
+
+	if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 &&
+	    pulled_entry->magic != XPC_VP_MAGIC2_SN2) {
+
+		if (pulled_entry->magic != 0) {
+			dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
+				"partition %d has bad magic value (=0x%llx)\n",
+				partid, sn_partition_id, pulled_entry->magic);
+			return xpBadMagic;
+		}
+
+		/* they've not been initialized yet */
+		return xpRetry;
+	}
+
+	if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) {
+
+		/* validate the variables */
+
+		if (pulled_entry->GPs_pa == 0 ||
+		    pulled_entry->openclose_args_pa == 0 ||
+		    pulled_entry->chctl_amo_pa == 0) {
+
+			dev_err(xpc_chan, "partition %d's XPC vars_part for "
+				"partition %d are not valid\n", partid,
+				sn_partition_id);
+			return xpInvalidAddress;
+		}
+
+		/* the variables we imported look to be valid */
+
+		part_sn2->remote_GPs_pa = pulled_entry->GPs_pa;
+		part_sn2->remote_openclose_args_pa =
+		    pulled_entry->openclose_args_pa;
+		part_sn2->remote_chctl_amo_va =
+		    (struct amo *)__va(pulled_entry->chctl_amo_pa);
+		part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid;
+		part_sn2->notify_IRQ_phys_cpuid =
+		    pulled_entry->notify_IRQ_phys_cpuid;
+
+		if (part->nchannels > pulled_entry->nchannels)
+			part->nchannels = pulled_entry->nchannels;
+
+		/* let the other side know that we've pulled their variables */
+
+		xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2;
+	}
+
+	if (pulled_entry->magic == XPC_VP_MAGIC1_SN2)
+		return xpRetry;
+
+	return xpSuccess;
+}
+
+/*
+ * Establish first contact with the remote partititon. This involves pulling
+ * the XPC per partition variables from the remote partition and waiting for
+ * the remote partition to pull ours.
+ */
+static enum xp_retval
+xpc_make_first_contact_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	enum xp_retval ret;
+
+	/*
+	 * Register the remote partition's amos with SAL so it can handle
+	 * and cleanup errors within that address range should the remote
+	 * partition go down. We don't unregister this range because it is
+	 * difficult to tell when outstanding writes to the remote partition
+	 * are finished and thus when it is safe to unregister. This should
+	 * not result in wasted space in the SAL xp_addr_region table because
+	 * we should get the same page for remote_amos_page_pa after module
+	 * reloads and system reboots.
+	 */
+	if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa,
+				       PAGE_SIZE, 1) < 0) {
+		dev_warn(xpc_part, "xpc_activating(%d) failed to register "
+			 "xp_addr region\n", XPC_PARTID(part));
+
+		ret = xpPhysAddrRegFailed;
+		XPC_DEACTIVATE_PARTITION(part, ret);
+		return ret;
+	}
+
+	/*
+	 * Send activate IRQ to get other side to activate if they've not
+	 * already begun to do so.
+	 */
+	xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
+				  cnodeid_to_nasid(0),
+				  part_sn2->activate_IRQ_nasid,
+				  part_sn2->activate_IRQ_phys_cpuid);
+
+	while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) {
+		if (ret != xpRetry) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
+			return ret;
+		}
+
+		dev_dbg(xpc_part, "waiting to make first contact with "
+			"partition %d\n", XPC_PARTID(part));
+
+		/* wait a 1/4 of a second or so */
+		(void)msleep_interruptible(250);
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
+			return part->reason;
+	}
+
+	return xpSuccess;
+}
+
+/*
+ * Get the chctl flags and pull the openclose args and/or remote GPs as needed.
+ */
+static u64
+xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
+{
+	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
+	unsigned long irq_flags;
+	union xpc_channel_ctl_flags chctl;
+	enum xp_retval ret;
+
+	/*
+	 * See if there are any chctl flags to be handled.
+	 */
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	chctl = part->chctl;
+	if (chctl.all_flags != 0)
+		part->chctl.all_flags = 0;
+
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+	if (xpc_any_openclose_chctl_flags_set(&chctl)) {
+		ret = xpc_pull_remote_cachelines_sn2(part, part->
+						     remote_openclose_args,
+						     part_sn2->
+						     remote_openclose_args_pa,
+						     XPC_OPENCLOSE_ARGS_SIZE);
+		if (ret != xpSuccess) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
+
+			dev_dbg(xpc_chan, "failed to pull openclose args from "
+				"partition %d, ret=%d\n", XPC_PARTID(part),
+				ret);
+
+			/* don't bother processing chctl flags anymore */
+			chctl.all_flags = 0;
+		}
+	}
+
+	if (xpc_any_msg_chctl_flags_set(&chctl)) {
+		ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs,
+						     part_sn2->remote_GPs_pa,
+						     XPC_GP_SIZE);
+		if (ret != xpSuccess) {
+			XPC_DEACTIVATE_PARTITION(part, ret);
+
+			dev_dbg(xpc_chan, "failed to pull GPs from partition "
+				"%d, ret=%d\n", XPC_PARTID(part), ret);
+
+			/* don't bother processing chctl flags anymore */
+			chctl.all_flags = 0;
+		}
+	}
+
+	return chctl.all_flags;
+}
+
+/*
+ * Allocate the local message queue and the notify queue.
+ */
+static enum xp_retval
+xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	unsigned long irq_flags;
+	int nentries;
+	size_t nbytes;
+
+	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+
+		nbytes = nentries * ch->entry_size;
+		ch_sn2->local_msgqueue =
+		    xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL,
+						  &ch_sn2->local_msgqueue_base);
+		if (ch_sn2->local_msgqueue == NULL)
+			continue;
+
+		nbytes = nentries * sizeof(struct xpc_notify_sn2);
+		ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_sn2->notify_queue == NULL) {
+			kfree(ch_sn2->local_msgqueue_base);
+			ch_sn2->local_msgqueue = NULL;
+			continue;
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->local_nentries) {
+			dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
+				"partid=%d, channel=%d\n", nentries,
+				ch->local_nentries, ch->partid, ch->number);
+
+			ch->local_nentries = nentries;
+		}
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
+		"queue, partid=%d, channel=%d\n", ch->partid, ch->number);
+	return xpNoMemory;
+}
+
+/*
+ * Allocate the cached remote message queue.
+ */
+static enum xp_retval
+xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	unsigned long irq_flags;
+	int nentries;
+	size_t nbytes;
+
+	DBUG_ON(ch->remote_nentries <= 0);
+
+	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+
+		nbytes = nentries * ch->entry_size;
+		ch_sn2->remote_msgqueue =
+		    xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2->
+						  remote_msgqueue_base);
+		if (ch_sn2->remote_msgqueue == NULL)
+			continue;
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->remote_nentries) {
+			dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
+				"partid=%d, channel=%d\n", nentries,
+				ch->remote_nentries, ch->partid, ch->number);
+
+			ch->remote_nentries = nentries;
+		}
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
+		"partid=%d, channel=%d\n", ch->partid, ch->number);
+	return xpNoMemory;
+}
+
+/*
+ * Allocate message queues and other stuff associated with a channel.
+ *
+ * Note: Assumes all of the channel sizes are filled in.
+ */
+static enum xp_retval
+xpc_setup_msg_structures_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	enum xp_retval ret;
+
+	DBUG_ON(ch->flags & XPC_C_SETUP);
+
+	ret = xpc_allocate_local_msgqueue_sn2(ch);
+	if (ret == xpSuccess) {
+
+		ret = xpc_allocate_remote_msgqueue_sn2(ch);
+		if (ret != xpSuccess) {
+			kfree(ch_sn2->local_msgqueue_base);
+			ch_sn2->local_msgqueue = NULL;
+			kfree(ch_sn2->notify_queue);
+			ch_sn2->notify_queue = NULL;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Free up message queues and other stuff that were allocated for the specified
+ * channel.
+ */
+static void
+xpc_teardown_msg_structures_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
+	ch_sn2->remote_msgqueue_pa = 0;
+
+	ch_sn2->local_GP->get = 0;
+	ch_sn2->local_GP->put = 0;
+	ch_sn2->remote_GP.get = 0;
+	ch_sn2->remote_GP.put = 0;
+	ch_sn2->w_local_GP.get = 0;
+	ch_sn2->w_local_GP.put = 0;
+	ch_sn2->w_remote_GP.get = 0;
+	ch_sn2->w_remote_GP.put = 0;
+	ch_sn2->next_msg_to_pull = 0;
+
+	if (ch->flags & XPC_C_SETUP) {
+		dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
+			ch->flags, ch->partid, ch->number);
+
+		kfree(ch_sn2->local_msgqueue_base);
+		ch_sn2->local_msgqueue = NULL;
+		kfree(ch_sn2->remote_msgqueue_base);
+		ch_sn2->remote_msgqueue = NULL;
+		kfree(ch_sn2->notify_queue);
+		ch_sn2->notify_queue = NULL;
+	}
+}
+
+/*
+ * Notify those who wanted to be notified upon delivery of their message.
+ */
+static void
+xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put)
+{
+	struct xpc_notify_sn2 *notify;
+	u8 notify_type;
+	s64 get = ch->sn.sn2.w_remote_GP.get - 1;
+
+	while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
+
+		notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries];
+
+		/*
+		 * See if the notify entry indicates it was associated with
+		 * a message who's sender wants to be notified. It is possible
+		 * that it is, but someone else is doing or has done the
+		 * notification.
+		 */
+		notify_type = notify->type;
+		if (notify_type == 0 ||
+		    cmpxchg(&notify->type, notify_type, 0) != notify_type) {
+			continue;
+		}
+
+		DBUG_ON(notify_type != XPC_N_CALL);
+
+		atomic_dec(&ch->n_to_notify);
+
+		if (notify->func != NULL) {
+			dev_dbg(xpc_chan, "notify->func() called, notify=0x%p "
+				"msg_number=%lld partid=%d channel=%d\n",
+				(void *)notify, get, ch->partid, ch->number);
+
+			notify->func(reason, ch->partid, ch->number,
+				     notify->key);
+
+			dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p"
+				" msg_number=%lld partid=%d channel=%d\n",
+				(void *)notify, get, ch->partid, ch->number);
+		}
+	}
+}
+
+static void
+xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch)
+{
+	xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put);
+}
+
+/*
+ * Clear some of the msg flags in the local message queue.
+ */
+static inline void
+xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	s64 get;
+
+	get = ch_sn2->w_remote_GP.get;
+	do {
+		msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
+					     (get % ch->local_nentries) *
+					     ch->entry_size);
+		DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+		msg->flags = 0;
+	} while (++get < ch_sn2->remote_GP.get);
+}
+
+/*
+ * Clear some of the msg flags in the remote message queue.
+ */
+static inline void
+xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	s64 put, remote_nentries = ch->remote_nentries;
+
+	/* flags are zeroed when the buffer is allocated */
+	if (ch_sn2->remote_GP.put < remote_nentries)
+		return;
+
+	put = max(ch_sn2->w_remote_GP.put, remote_nentries);
+	do {
+		msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
+					     (put % remote_nentries) *
+					     ch->entry_size);
+		DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+		DBUG_ON(!(msg->flags & XPC_M_SN2_DONE));
+		DBUG_ON(msg->number != put - remote_nentries);
+		msg->flags = 0;
+	} while (++put < ch_sn2->remote_GP.put);
+}
+
+static int
+xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch)
+{
+	return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get;
+}
+
+static void
+xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number)
+{
+	struct xpc_channel *ch = &part->channels[ch_number];
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	int npayloads_sent;
+
+	ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number];
+
+	/* See what, if anything, has changed for each connected channel */
+
+	xpc_msgqueue_ref(ch);
+
+	if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get &&
+	    ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) {
+		/* nothing changed since GPs were last pulled */
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/*
+	 * First check to see if messages recently sent by us have been
+	 * received by the other side. (The remote GET value will have
+	 * changed since we last looked at it.)
+	 */
+
+	if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) {
+
+		/*
+		 * We need to notify any senders that want to be notified
+		 * that their sent messages have been received by their
+		 * intended recipients. We need to do this before updating
+		 * w_remote_GP.get so that we don't allocate the same message
+		 * queue entries prematurely (see xpc_allocate_msg()).
+		 */
+		if (atomic_read(&ch->n_to_notify) > 0) {
+			/*
+			 * Notify senders that messages sent have been
+			 * received and delivered by the other side.
+			 */
+			xpc_notify_senders_sn2(ch, xpMsgDelivered,
+					       ch_sn2->remote_GP.get);
+		}
+
+		/*
+		 * Clear msg->flags in previously sent messages, so that
+		 * they're ready for xpc_allocate_msg().
+		 */
+		xpc_clear_local_msgqueue_flags_sn2(ch);
+
+		ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get;
+
+		dev_dbg(xpc_chan, "w_remote_GP.get changed to %lld, partid=%d, "
+			"channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid,
+			ch->number);
+
+		/*
+		 * If anyone was waiting for message queue entries to become
+		 * available, wake them up.
+		 */
+		if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+			wake_up(&ch->msg_allocate_wq);
+	}
+
+	/*
+	 * Now check for newly sent messages by the other side. (The remote
+	 * PUT value will have changed since we last looked at it.)
+	 */
+
+	if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) {
+		/*
+		 * Clear msg->flags in previously received messages, so that
+		 * they're ready for xpc_get_deliverable_payload_sn2().
+		 */
+		xpc_clear_remote_msgqueue_flags_sn2(ch);
+
+		smp_wmb(); /* ensure flags have been cleared before bte_copy */
+		ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put;
+
+		dev_dbg(xpc_chan, "w_remote_GP.put changed to %lld, partid=%d, "
+			"channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid,
+			ch->number);
+
+		npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch);
+		if (npayloads_sent > 0) {
+			dev_dbg(xpc_chan, "msgs waiting to be copied and "
+				"delivered=%d, partid=%d, channel=%d\n",
+				npayloads_sent, ch->partid, ch->number);
+
+			if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
+				xpc_activate_kthreads(ch, npayloads_sent);
+		}
+	}
+
+	xpc_msgqueue_deref(ch);
+}
+
+static struct xpc_msg_sn2 *
+xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	unsigned long remote_msg_pa;
+	struct xpc_msg_sn2 *msg;
+	u32 msg_index;
+	u32 nmsgs;
+	u64 msg_offset;
+	enum xp_retval ret;
+
+	if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) {
+		/* we were interrupted by a signal */
+		return NULL;
+	}
+
+	while (get >= ch_sn2->next_msg_to_pull) {
+
+		/* pull as many messages as are ready and able to be pulled */
+
+		msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries;
+
+		DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put);
+		nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull;
+		if (msg_index + nmsgs > ch->remote_nentries) {
+			/* ignore the ones that wrap the msg queue for now */
+			nmsgs = ch->remote_nentries - msg_index;
+		}
+
+		msg_offset = msg_index * ch->entry_size;
+		msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
+		    msg_offset);
+		remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset;
+
+		ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa,
+						     nmsgs * ch->entry_size);
+		if (ret != xpSuccess) {
+
+			dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
+				" msg %lld from partition %d, channel=%d, "
+				"ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull,
+				ch->partid, ch->number, ret);
+
+			XPC_DEACTIVATE_PARTITION(part, ret);
+
+			mutex_unlock(&ch_sn2->msg_to_pull_mutex);
+			return NULL;
+		}
+
+		ch_sn2->next_msg_to_pull += nmsgs;
+	}
+
+	mutex_unlock(&ch_sn2->msg_to_pull_mutex);
+
+	/* return the message we were looking for */
+	msg_offset = (get % ch->remote_nentries) * ch->entry_size;
+	msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset);
+
+	return msg;
+}
+
+/*
+ * Get the next deliverable message's payload.
+ */
+static void *
+xpc_get_deliverable_payload_sn2(struct xpc_channel *ch)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	void *payload = NULL;
+	s64 get;
+
+	do {
+		if (ch->flags & XPC_C_DISCONNECTING)
+			break;
+
+		get = ch_sn2->w_local_GP.get;
+		smp_rmb();	/* guarantee that .get loads before .put */
+		if (get == ch_sn2->w_remote_GP.put)
+			break;
+
+		/* There are messages waiting to be pulled and delivered.
+		 * We need to try to secure one for ourselves. We'll do this
+		 * by trying to increment w_local_GP.get and hope that no one
+		 * else beats us to it. If they do, we'll we'll simply have
+		 * to try again for the next one.
+		 */
+
+		if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) {
+			/* we got the entry referenced by get */
+
+			dev_dbg(xpc_chan, "w_local_GP.get changed to %lld, "
+				"partid=%d, channel=%d\n", get + 1,
+				ch->partid, ch->number);
+
+			/* pull the message from the remote partition */
+
+			msg = xpc_pull_remote_msg_sn2(ch, get);
+
+			if (msg != NULL) {
+				DBUG_ON(msg->number != get);
+				DBUG_ON(msg->flags & XPC_M_SN2_DONE);
+				DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+
+				payload = &msg->payload;
+			}
+			break;
+		}
+
+	} while (1);
+
+	return payload;
+}
+
+/*
+ * Now we actually send the messages that are ready to be sent by advancing
+ * the local message queue's Put value and then send a chctl msgrequest to the
+ * recipient partition.
+ */
+static void
+xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	s64 put = initial_put + 1;
+	int send_msgrequest = 0;
+
+	while (1) {
+
+		while (1) {
+			if (put == ch_sn2->w_local_GP.put)
+				break;
+
+			msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
+						     local_msgqueue + (put %
+						     ch->local_nentries) *
+						     ch->entry_size);
+
+			if (!(msg->flags & XPC_M_SN2_READY))
+				break;
+
+			put++;
+		}
+
+		if (put == initial_put) {
+			/* nothing's changed */
+			break;
+		}
+
+		if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) !=
+		    initial_put) {
+			/* someone else beat us to it */
+			DBUG_ON(ch_sn2->local_GP->put < initial_put);
+			break;
+		}
+
+		/* we just set the new value of local_GP->put */
+
+		dev_dbg(xpc_chan, "local_GP->put changed to %lld, partid=%d, "
+			"channel=%d\n", put, ch->partid, ch->number);
+
+		send_msgrequest = 1;
+
+		/*
+		 * We need to ensure that the message referenced by
+		 * local_GP->put is not XPC_M_SN2_READY or that local_GP->put
+		 * equals w_local_GP.put, so we'll go have a look.
+		 */
+		initial_put = put;
+	}
+
+	if (send_msgrequest)
+		xpc_send_chctl_msgrequest_sn2(ch);
+}
+
+/*
+ * Allocate an entry for a message from the message queue associated with the
+ * specified channel.
+ */
+static enum xp_retval
+xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
+		     struct xpc_msg_sn2 **address_of_msg)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	enum xp_retval ret;
+	s64 put;
+
+	/*
+	 * Get the next available message entry from the local message queue.
+	 * If none are available, we'll make sure that we grab the latest
+	 * GP values.
+	 */
+	ret = xpTimeout;
+
+	while (1) {
+
+		put = ch_sn2->w_local_GP.put;
+		smp_rmb();	/* guarantee that .put loads before .get */
+		if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) {
+
+			/* There are available message entries. We need to try
+			 * to secure one for ourselves. We'll do this by trying
+			 * to increment w_local_GP.put as long as someone else
+			 * doesn't beat us to it. If they do, we'll have to
+			 * try again.
+			 */
+			if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) ==
+			    put) {
+				/* we got the entry referenced by put */
+				break;
+			}
+			continue;	/* try again */
+		}
+
+		/*
+		 * There aren't any available msg entries at this time.
+		 *
+		 * In waiting for a message entry to become available,
+		 * we set a timeout in case the other side is not sending
+		 * completion interrupts. This lets us fake a notify IRQ
+		 * that will cause the notify IRQ handler to fetch the latest
+		 * GP values as if an interrupt was sent by the other side.
+		 */
+		if (ret == xpTimeout)
+			xpc_send_chctl_local_msgrequest_sn2(ch);
+
+		if (flags & XPC_NOWAIT)
+			return xpNoWait;
+
+		ret = xpc_allocate_msg_wait(ch);
+		if (ret != xpInterrupted && ret != xpTimeout)
+			return ret;
+	}
+
+	/* get the message's address and initialize it */
+	msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue +
+				     (put % ch->local_nentries) *
+				     ch->entry_size);
+
+	DBUG_ON(msg->flags != 0);
+	msg->number = put;
+
+	dev_dbg(xpc_chan, "w_local_GP.put changed to %lld; msg=0x%p, "
+		"msg_number=%lld, partid=%d, channel=%d\n", put + 1,
+		(void *)msg, msg->number, ch->partid, ch->number);
+
+	*address_of_msg = msg;
+	return xpSuccess;
+}
+
+/*
+ * Common code that does the actual sending of the message by advancing the
+ * local message queue's Put value and sends a chctl msgrequest to the
+ * partition the message is being sent to.
+ */
+static enum xp_retval
+xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload,
+		     u16 payload_size, u8 notify_type, xpc_notify_func func,
+		     void *key)
+{
+	enum xp_retval ret = xpSuccess;
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg = msg;
+	struct xpc_notify_sn2 *notify = notify;
+	s64 msg_number;
+	s64 put;
+
+	DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
+
+	if (XPC_MSG_SIZE(payload_size) > ch->entry_size)
+		return xpPayloadTooBig;
+
+	xpc_msgqueue_ref(ch);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		ret = ch->reason;
+		goto out_1;
+	}
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		ret = xpNotConnected;
+		goto out_1;
+	}
+
+	ret = xpc_allocate_msg_sn2(ch, flags, &msg);
+	if (ret != xpSuccess)
+		goto out_1;
+
+	msg_number = msg->number;
+
+	if (notify_type != 0) {
+		/*
+		 * Tell the remote side to send an ACK interrupt when the
+		 * message has been delivered.
+		 */
+		msg->flags |= XPC_M_SN2_INTERRUPT;
+
+		atomic_inc(&ch->n_to_notify);
+
+		notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries];
+		notify->func = func;
+		notify->key = key;
+		notify->type = notify_type;
+
+		/* ??? Is a mb() needed here? */
+
+		if (ch->flags & XPC_C_DISCONNECTING) {
+			/*
+			 * An error occurred between our last error check and
+			 * this one. We will try to clear the type field from
+			 * the notify entry. If we succeed then
+			 * xpc_disconnect_channel() didn't already process
+			 * the notify entry.
+			 */
+			if (cmpxchg(&notify->type, notify_type, 0) ==
+			    notify_type) {
+				atomic_dec(&ch->n_to_notify);
+				ret = ch->reason;
+			}
+			goto out_1;
+		}
+	}
+
+	memcpy(&msg->payload, payload, payload_size);
+
+	msg->flags |= XPC_M_SN2_READY;
+
+	/*
+	 * The preceding store of msg->flags must occur before the following
+	 * load of local_GP->put.
+	 */
+	smp_mb();
+
+	/* see if the message is next in line to be sent, if so send it */
+
+	put = ch_sn2->local_GP->put;
+	if (put == msg_number)
+		xpc_send_msgs_sn2(ch, put);
+
+out_1:
+	xpc_msgqueue_deref(ch);
+	return ret;
+}
+
+/*
+ * Now we actually acknowledge the messages that have been delivered and ack'd
+ * by advancing the cached remote message queue's Get value and if requested
+ * send a chctl msgrequest to the message sender's partition.
+ *
+ * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition
+ * that sent the message.
+ */
+static void
+xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
+{
+	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
+	struct xpc_msg_sn2 *msg;
+	s64 get = initial_get + 1;
+	int send_msgrequest = 0;
+
+	while (1) {
+
+		while (1) {
+			if (get == ch_sn2->w_local_GP.get)
+				break;
+
+			msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->
+						     remote_msgqueue + (get %
+						     ch->remote_nentries) *
+						     ch->entry_size);
+
+			if (!(msg->flags & XPC_M_SN2_DONE))
+				break;
+
+			msg_flags |= msg->flags;
+			get++;
+		}
+
+		if (get == initial_get) {
+			/* nothing's changed */
+			break;
+		}
+
+		if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) !=
+		    initial_get) {
+			/* someone else beat us to it */
+			DBUG_ON(ch_sn2->local_GP->get <= initial_get);
+			break;
+		}
+
+		/* we just set the new value of local_GP->get */
+
+		dev_dbg(xpc_chan, "local_GP->get changed to %lld, partid=%d, "
+			"channel=%d\n", get, ch->partid, ch->number);
+
+		send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT);
+
+		/*
+		 * We need to ensure that the message referenced by
+		 * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get
+		 * equals w_local_GP.get, so we'll go have a look.
+		 */
+		initial_get = get;
+	}
+
+	if (send_msgrequest)
+		xpc_send_chctl_msgrequest_sn2(ch);
+}
+
+static void
+xpc_received_payload_sn2(struct xpc_channel *ch, void *payload)
+{
+	struct xpc_msg_sn2 *msg;
+	s64 msg_number;
+	s64 get;
+
+	msg = container_of(payload, struct xpc_msg_sn2, payload);
+	msg_number = msg->number;
+
+	dev_dbg(xpc_chan, "msg=0x%p, msg_number=%lld, partid=%d, channel=%d\n",
+		(void *)msg, msg_number, ch->partid, ch->number);
+
+	DBUG_ON((((u64)msg - (u64)ch->sn.sn2.remote_msgqueue) / ch->entry_size) !=
+		msg_number % ch->remote_nentries);
+	DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
+	DBUG_ON(msg->flags & XPC_M_SN2_DONE);
+
+	msg->flags |= XPC_M_SN2_DONE;
+
+	/*
+	 * The preceding store of msg->flags must occur before the following
+	 * load of local_GP->get.
+	 */
+	smp_mb();
+
+	/*
+	 * See if this message is next in line to be acknowledged as having
+	 * been delivered.
+	 */
+	get = ch->sn.sn2.local_GP->get;
+	if (get == msg_number)
+		xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
+}
+
+static struct xpc_arch_operations xpc_arch_ops_sn2 = {
+	.setup_partitions = xpc_setup_partitions_sn2,
+	.teardown_partitions = xpc_teardown_partitions_sn2,
+	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2,
+	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2,
+	.setup_rsvd_page = xpc_setup_rsvd_page_sn2,
+
+	.allow_hb = xpc_allow_hb_sn2,
+	.disallow_hb = xpc_disallow_hb_sn2,
+	.disallow_all_hbs = xpc_disallow_all_hbs_sn2,
+	.increment_heartbeat = xpc_increment_heartbeat_sn2,
+	.offline_heartbeat = xpc_offline_heartbeat_sn2,
+	.online_heartbeat = xpc_online_heartbeat_sn2,
+	.heartbeat_init = xpc_heartbeat_init_sn2,
+	.heartbeat_exit = xpc_heartbeat_exit_sn2,
+	.get_remote_heartbeat = xpc_get_remote_heartbeat_sn2,
+
+	.request_partition_activation =
+		xpc_request_partition_activation_sn2,
+	.request_partition_reactivation =
+		xpc_request_partition_reactivation_sn2,
+	.request_partition_deactivation =
+		xpc_request_partition_deactivation_sn2,
+	.cancel_partition_deactivation_request =
+		xpc_cancel_partition_deactivation_request_sn2,
+
+	.setup_ch_structures = xpc_setup_ch_structures_sn2,
+	.teardown_ch_structures = xpc_teardown_ch_structures_sn2,
+
+	.make_first_contact = xpc_make_first_contact_sn2,
+
+	.get_chctl_all_flags = xpc_get_chctl_all_flags_sn2,
+	.send_chctl_closerequest = xpc_send_chctl_closerequest_sn2,
+	.send_chctl_closereply = xpc_send_chctl_closereply_sn2,
+	.send_chctl_openrequest = xpc_send_chctl_openrequest_sn2,
+	.send_chctl_openreply = xpc_send_chctl_openreply_sn2,
+	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2,
+	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2,
+
+	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2,
+
+	.setup_msg_structures = xpc_setup_msg_structures_sn2,
+	.teardown_msg_structures = xpc_teardown_msg_structures_sn2,
+
+	.indicate_partition_engaged = xpc_indicate_partition_engaged_sn2,
+	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2,
+	.partition_engaged = xpc_partition_engaged_sn2,
+	.any_partition_engaged = xpc_any_partition_engaged_sn2,
+	.assume_partition_disengaged = xpc_assume_partition_disengaged_sn2,
+
+	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2,
+	.send_payload = xpc_send_payload_sn2,
+	.get_deliverable_payload = xpc_get_deliverable_payload_sn2,
+	.received_payload = xpc_received_payload_sn2,
+	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2,
+};
+
+int
+xpc_init_sn2(void)
+{
+	int ret;
+	size_t buf_size;
+
+	xpc_arch_ops = xpc_arch_ops_sn2;
+
+	if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) {
+		dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is "
+			"larger than %d\n", XPC_MSG_HDR_MAX_SIZE);
+		return -E2BIG;
+	}
+
+	buf_size = max(XPC_RP_VARS_SIZE,
+		       XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2);
+	xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size,
+								   GFP_KERNEL,
+					      &xpc_remote_copy_buffer_base_sn2);
+	if (xpc_remote_copy_buffer_sn2 == NULL) {
+		dev_err(xpc_part, "can't get memory for remote copy buffer\n");
+		return -ENOMEM;
+	}
+
+	/* open up protections for IPI and [potentially] amo operations */
+	xpc_allow_IPI_ops_sn2();
+	xpc_allow_amo_ops_shub_wars_1_1_sn2();
+
+	/*
+	 * This is safe to do before the xpc_hb_checker thread has started
+	 * because the handler releases a wait queue.  If an interrupt is
+	 * received before the thread is waiting, it will not go to sleep,
+	 * but rather immediately process the interrupt.
+	 */
+	ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0,
+			  "xpc hb", NULL);
+	if (ret != 0) {
+		dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
+			"errno=%d\n", -ret);
+		xpc_disallow_IPI_ops_sn2();
+		kfree(xpc_remote_copy_buffer_base_sn2);
+	}
+	return ret;
+}
+
+void
+xpc_exit_sn2(void)
+{
+	free_irq(SGI_XPC_ACTIVATE, NULL);
+	xpc_disallow_IPI_ops_sn2();
+	kfree(xpc_remote_copy_buffer_base_sn2);
+}
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
new file mode 100644
index 000000000..340b44d9e
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -0,0 +1,1813 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) uv-based functions.
+ *
+ *     Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <asm/uv/uv_hub.h>
+#if defined CONFIG_X86_64
+#include <asm/uv/bios.h>
+#include <asm/uv/uv_irq.h>
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+#include <asm/sn/intr.h>
+#include <asm/sn/sn_sal.h>
+#endif
+#include "../sgi-gru/gru.h"
+#include "../sgi-gru/grukservices.h"
+#include "xpc.h"
+
+#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+struct uv_IO_APIC_route_entry {
+	__u64	vector		:  8,
+		delivery_mode	:  3,
+		dest_mode	:  1,
+		delivery_status	:  1,
+		polarity	:  1,
+		__reserved_1	:  1,
+		trigger		:  1,
+		mask		:  1,
+		__reserved_2	: 15,
+		dest		: 32;
+};
+#endif
+
+static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
+
+#define XPC_ACTIVATE_MSG_SIZE_UV	(1 * GRU_CACHE_LINE_BYTES)
+#define XPC_ACTIVATE_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
+					 XPC_ACTIVATE_MSG_SIZE_UV)
+#define XPC_ACTIVATE_IRQ_NAME		"xpc_activate"
+
+#define XPC_NOTIFY_MSG_SIZE_UV		(2 * GRU_CACHE_LINE_BYTES)
+#define XPC_NOTIFY_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
+					 XPC_NOTIFY_MSG_SIZE_UV)
+#define XPC_NOTIFY_IRQ_NAME		"xpc_notify"
+
+static int xpc_mq_node = -1;
+
+static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
+static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
+
+static int
+xpc_setup_partitions_uv(void)
+{
+	short partid;
+	struct xpc_partition_uv *part_uv;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+
+		mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex);
+		spin_lock_init(&part_uv->flags_lock);
+		part_uv->remote_act_state = XPC_P_AS_INACTIVE;
+	}
+	return 0;
+}
+
+static void
+xpc_teardown_partitions_uv(void)
+{
+	short partid;
+	struct xpc_partition_uv *part_uv;
+	unsigned long irq_flags;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+
+		if (part_uv->cached_activate_gru_mq_desc != NULL) {
+			mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
+			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+			kfree(part_uv->cached_activate_gru_mq_desc);
+			part_uv->cached_activate_gru_mq_desc = NULL;
+			mutex_unlock(&part_uv->
+				     cached_activate_gru_mq_desc_mutex);
+		}
+	}
+}
+
+static int
+xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
+{
+	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+#if defined CONFIG_X86_64
+	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+			UV_AFFINITY_CPU);
+	if (mq->irq < 0)
+		return mq->irq;
+
+	mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0)
+		mq->irq = SGI_XPC_ACTIVATE;
+	else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0)
+		mq->irq = SGI_XPC_NOTIFY;
+	else
+		return -EINVAL;
+
+	mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq;
+	uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value);
+#else
+	#error not a supported configuration
+#endif
+
+	return 0;
+}
+
+static void
+xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
+{
+#if defined CONFIG_X86_64
+	uv_teardown_irq(mq->irq);
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	int mmr_pnode;
+	unsigned long mmr_value;
+
+	mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+	mmr_value = 1UL << 16;
+
+	uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
+#else
+	#error not a supported configuration
+#endif
+}
+
+static int
+xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
+{
+	int ret;
+
+#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+	ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address),
+				    mq->order, &mq->mmr_offset);
+	if (ret < 0) {
+		dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n",
+			ret);
+		return -EBUSY;
+	}
+#elif defined CONFIG_X86_64
+	ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address),
+					 mq->order, &mq->mmr_offset);
+	if (ret < 0) {
+		dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
+			"ret=%d\n", ret);
+		return ret;
+	}
+#else
+	#error not a supported configuration
+#endif
+
+	mq->watchlist_num = ret;
+	return 0;
+}
+
+static void
+xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
+{
+	int ret;
+	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+#if defined CONFIG_X86_64
+	ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
+	BUG_ON(ret != BIOS_STATUS_SUCCESS);
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
+	BUG_ON(ret != SALRET_OK);
+#else
+	#error not a supported configuration
+#endif
+}
+
+static struct xpc_gru_mq_uv *
+xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
+		     irq_handler_t irq_handler)
+{
+	enum xp_retval xp_ret;
+	int ret;
+	int nid;
+	int nasid;
+	int pg_order;
+	struct page *page;
+	struct xpc_gru_mq_uv *mq;
+	struct uv_IO_APIC_route_entry *mmr_value;
+
+	mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL);
+	if (mq == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
+			"a xpc_gru_mq_uv structure\n");
+		ret = -ENOMEM;
+		goto out_0;
+	}
+
+	mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc),
+				  GFP_KERNEL);
+	if (mq->gru_mq_desc == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
+			"a gru_message_queue_desc structure\n");
+		ret = -ENOMEM;
+		goto out_1;
+	}
+
+	pg_order = get_order(mq_size);
+	mq->order = pg_order + PAGE_SHIFT;
+	mq_size = 1UL << mq->order;
+
+	mq->mmr_blade = uv_cpu_to_blade_id(cpu);
+
+	nid = cpu_to_node(cpu);
+	page = __alloc_pages_node(nid,
+				      GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
+				      pg_order);
+	if (page == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
+			"bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
+		ret = -ENOMEM;
+		goto out_2;
+	}
+	mq->address = page_address(page);
+
+	/* enable generation of irq when GRU mq operation occurs to this mq */
+	ret = xpc_gru_mq_watchlist_alloc_uv(mq);
+	if (ret != 0)
+		goto out_3;
+
+	ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
+	if (ret != 0)
+		goto out_4;
+
+	ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
+	if (ret != 0) {
+		dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
+			mq->irq, -ret);
+		goto out_5;
+	}
+
+	nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu));
+
+	mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value;
+	ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size,
+				     nasid, mmr_value->vector, mmr_value->dest);
+	if (ret != 0) {
+		dev_err(xpc_part, "gru_create_message_queue() returned "
+			"error=%d\n", ret);
+		ret = -EINVAL;
+		goto out_6;
+	}
+
+	/* allow other partitions to access this GRU mq */
+	xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
+	if (xp_ret != xpSuccess) {
+		ret = -EACCES;
+		goto out_6;
+	}
+
+	return mq;
+
+	/* something went wrong */
+out_6:
+	free_irq(mq->irq, NULL);
+out_5:
+	xpc_release_gru_mq_irq_uv(mq);
+out_4:
+	xpc_gru_mq_watchlist_free_uv(mq);
+out_3:
+	free_pages((unsigned long)mq->address, pg_order);
+out_2:
+	kfree(mq->gru_mq_desc);
+out_1:
+	kfree(mq);
+out_0:
+	return ERR_PTR(ret);
+}
+
+static void
+xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
+{
+	unsigned int mq_size;
+	int pg_order;
+	int ret;
+
+	/* disallow other partitions to access GRU mq */
+	mq_size = 1UL << mq->order;
+	ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
+	BUG_ON(ret != xpSuccess);
+
+	/* unregister irq handler and release mq irq/vector mapping */
+	free_irq(mq->irq, NULL);
+	xpc_release_gru_mq_irq_uv(mq);
+
+	/* disable generation of irq when GRU mq op occurs to this mq */
+	xpc_gru_mq_watchlist_free_uv(mq);
+
+	pg_order = mq->order - PAGE_SHIFT;
+	free_pages((unsigned long)mq->address, pg_order);
+
+	kfree(mq);
+}
+
+static enum xp_retval
+xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg,
+		 size_t msg_size)
+{
+	enum xp_retval xp_ret;
+	int ret;
+
+	while (1) {
+		ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size);
+		if (ret == MQE_OK) {
+			xp_ret = xpSuccess;
+			break;
+		}
+
+		if (ret == MQE_QUEUE_FULL) {
+			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+				"error=MQE_QUEUE_FULL\n");
+			/* !!! handle QLimit reached; delay & try again */
+			/* ??? Do we add a limit to the number of retries? */
+			(void)msleep_interruptible(10);
+		} else if (ret == MQE_CONGESTION) {
+			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+				"error=MQE_CONGESTION\n");
+			/* !!! handle LB Overflow; simply try again */
+			/* ??? Do we add a limit to the number of retries? */
+		} else {
+			/* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
+			dev_err(xpc_chan, "gru_send_message_gpa() returned "
+				"error=%d\n", ret);
+			xp_ret = xpGruSendMqError;
+			break;
+		}
+	}
+	return xp_ret;
+}
+
+static void
+xpc_process_activate_IRQ_rcvd_uv(void)
+{
+	unsigned long irq_flags;
+	short partid;
+	struct xpc_partition *part;
+	u8 act_state_req;
+
+	DBUG_ON(xpc_activate_IRQ_rcvd == 0);
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (part->sn.uv.act_state_req == 0)
+			continue;
+
+		xpc_activate_IRQ_rcvd--;
+		BUG_ON(xpc_activate_IRQ_rcvd < 0);
+
+		act_state_req = part->sn.uv.act_state_req;
+		part->sn.uv.act_state_req = 0;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
+			if (part->act_state == XPC_P_AS_INACTIVE)
+				xpc_activate_partition(part);
+			else if (part->act_state == XPC_P_AS_DEACTIVATING)
+				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+		} else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
+			if (part->act_state == XPC_P_AS_INACTIVE)
+				xpc_activate_partition(part);
+			else
+				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+		} else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
+			XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
+
+		} else {
+			BUG();
+		}
+
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (xpc_activate_IRQ_rcvd == 0)
+			break;
+	}
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+}
+
+static void
+xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
+			      struct xpc_activate_mq_msghdr_uv *msg_hdr,
+			      int part_setup,
+			      int *wakeup_hb_checker)
+{
+	unsigned long irq_flags;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct xpc_openclose_args *args;
+
+	part_uv->remote_act_state = msg_hdr->act_state;
+
+	switch (msg_hdr->type) {
+	case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
+		/* syncing of remote_act_state was just done above */
+		break;
+
+	case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
+		struct xpc_activate_mq_msg_activate_req_uv *msg;
+
+		/*
+		 * ??? Do we deal here with ts_jiffies being different
+		 * ??? if act_state != XPC_P_AS_INACTIVE instead of
+		 * ??? below?
+		 */
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_activate_req_uv, hdr);
+
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
+		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
+		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
+		part_uv->heartbeat_gpa = msg->heartbeat_gpa;
+
+		if (msg->activate_gru_mq_desc_gpa !=
+		    part_uv->activate_gru_mq_desc_gpa) {
+			spin_lock(&part_uv->flags_lock);
+			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+			spin_unlock(&part_uv->flags_lock);
+			part_uv->activate_gru_mq_desc_gpa =
+			    msg->activate_gru_mq_desc_gpa;
+		}
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
+		struct xpc_activate_mq_msg_deactivate_req_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_deactivate_req_uv, hdr);
+
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = msg->reason;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+		return;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
+		struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
+
+		if (!part_setup)
+			break;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_closerequest_uv,
+				   hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->reason = msg->reason;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
+		struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
+
+		if (!part_setup)
+			break;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_closereply_uv,
+				   hdr);
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
+		struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
+
+		if (!part_setup)
+			break;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_openrequest_uv,
+				   hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->entry_size = msg->entry_size;
+		args->local_nentries = msg->local_nentries;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
+		struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
+
+		if (!part_setup)
+			break;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_openreply_uv, hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->remote_nentries = msg->remote_nentries;
+		args->local_nentries = msg->local_nentries;
+		args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
+		struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
+
+		if (!part_setup)
+			break;
+
+		msg = container_of(msg_hdr, struct
+				xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+	}
+	case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags |= XPC_P_ENGAGED_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+
+	case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags &= ~XPC_P_ENGAGED_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+
+	default:
+		dev_err(xpc_part, "received unknown activate_mq msg type=%d "
+			"from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
+
+		/* get hb checker to deactivate from the remote partition */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = xpBadMsgType;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+		return;
+	}
+
+	if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
+	    part->remote_rp_ts_jiffies != 0) {
+		/*
+		 * ??? Does what we do here need to be sensitive to
+		 * ??? act_state or remote_act_state?
+		 */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+	}
+}
+
+static irqreturn_t
+xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
+{
+	struct xpc_activate_mq_msghdr_uv *msg_hdr;
+	short partid;
+	struct xpc_partition *part;
+	int wakeup_hb_checker = 0;
+	int part_referenced;
+
+	while (1) {
+		msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc);
+		if (msg_hdr == NULL)
+			break;
+
+		partid = msg_hdr->partid;
+		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+			dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
+				"received invalid partid=0x%x in message\n",
+				partid);
+		} else {
+			part = &xpc_partitions[partid];
+
+			part_referenced = xpc_part_ref(part);
+			xpc_handle_activate_mq_msg_uv(part, msg_hdr,
+						      part_referenced,
+						      &wakeup_hb_checker);
+			if (part_referenced)
+				xpc_part_deref(part);
+		}
+
+		gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr);
+	}
+
+	if (wakeup_hb_checker)
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+	return IRQ_HANDLED;
+}
+
+static enum xp_retval
+xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc,
+				unsigned long gru_mq_desc_gpa)
+{
+	enum xp_retval ret;
+
+	ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa,
+			       sizeof(struct gru_message_queue_desc));
+	if (ret == xpSuccess)
+		gru_mq_desc->mq = NULL;
+
+	return ret;
+}
+
+static enum xp_retval
+xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
+			 int msg_type)
+{
+	struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct gru_message_queue_desc *gru_mq_desc;
+	unsigned long irq_flags;
+	enum xp_retval ret;
+
+	DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
+
+	msg_hdr->type = msg_type;
+	msg_hdr->partid = xp_partition_id;
+	msg_hdr->act_state = part->act_state;
+	msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
+
+	mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
+again:
+	if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) {
+		gru_mq_desc = part_uv->cached_activate_gru_mq_desc;
+		if (gru_mq_desc == NULL) {
+			gru_mq_desc = kmalloc(sizeof(struct
+					      gru_message_queue_desc),
+					      GFP_KERNEL);
+			if (gru_mq_desc == NULL) {
+				ret = xpNoMemory;
+				goto done;
+			}
+			part_uv->cached_activate_gru_mq_desc = gru_mq_desc;
+		}
+
+		ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc,
+						      part_uv->
+						      activate_gru_mq_desc_gpa);
+		if (ret != xpSuccess)
+			goto done;
+
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+	}
+
+	/* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
+	ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg,
+			       msg_size);
+	if (ret != xpSuccess) {
+		smp_rmb();	/* ensure a fresh copy of part_uv->flags */
+		if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV))
+			goto again;
+	}
+done:
+	mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex);
+	return ret;
+}
+
+static void
+xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
+			      size_t msg_size, int msg_type)
+{
+	enum xp_retval ret;
+
+	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+	if (unlikely(ret != xpSuccess))
+		XPC_DEACTIVATE_PARTITION(part, ret);
+}
+
+static void
+xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
+			 void *msg, size_t msg_size, int msg_type)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	enum xp_retval ret;
+
+	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+	if (unlikely(ret != xpSuccess)) {
+		if (irq_flags != NULL)
+			spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+		XPC_DEACTIVATE_PARTITION(part, ret);
+
+		if (irq_flags != NULL)
+			spin_lock_irqsave(&ch->lock, *irq_flags);
+	}
+}
+
+static void
+xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
+{
+	unsigned long irq_flags;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+
+	/*
+	 * !!! Make our side think that the remote partition sent an activate
+	 * !!! mq message our way by doing what the activate IRQ handler would
+	 * !!! do had one really been sent.
+	 */
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	if (part_uv->act_state_req == 0)
+		xpc_activate_IRQ_rcvd++;
+	part_uv->act_state_req = act_state_req;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+}
+
+static enum xp_retval
+xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
+				  size_t *len)
+{
+	s64 status;
+	enum xp_retval ret;
+
+#if defined CONFIG_X86_64
+	status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
+					  (u64 *)len);
+	if (status == BIOS_STATUS_SUCCESS)
+		ret = xpSuccess;
+	else if (status == BIOS_STATUS_MORE_PASSES)
+		ret = xpNeedMoreInfo;
+	else
+		ret = xpBiosError;
+
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+	status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
+	if (status == SALRET_OK)
+		ret = xpSuccess;
+	else if (status == SALRET_MORE_PASSES)
+		ret = xpNeedMoreInfo;
+	else
+		ret = xpSalError;
+
+#else
+	#error not a supported configuration
+#endif
+
+	return ret;
+}
+
+static int
+xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
+{
+	xpc_heartbeat_uv =
+	    &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
+	rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
+	rp->sn.uv.activate_gru_mq_desc_gpa =
+	    uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
+	return 0;
+}
+
+static void
+xpc_allow_hb_uv(short partid)
+{
+}
+
+static void
+xpc_disallow_hb_uv(short partid)
+{
+}
+
+static void
+xpc_disallow_all_hbs_uv(void)
+{
+}
+
+static void
+xpc_increment_heartbeat_uv(void)
+{
+	xpc_heartbeat_uv->value++;
+}
+
+static void
+xpc_offline_heartbeat_uv(void)
+{
+	xpc_increment_heartbeat_uv();
+	xpc_heartbeat_uv->offline = 1;
+}
+
+static void
+xpc_online_heartbeat_uv(void)
+{
+	xpc_increment_heartbeat_uv();
+	xpc_heartbeat_uv->offline = 0;
+}
+
+static void
+xpc_heartbeat_init_uv(void)
+{
+	xpc_heartbeat_uv->value = 1;
+	xpc_heartbeat_uv->offline = 0;
+}
+
+static void
+xpc_heartbeat_exit_uv(void)
+{
+	xpc_offline_heartbeat_uv();
+}
+
+static enum xp_retval
+xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
+{
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	enum xp_retval ret;
+
+	ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
+			       part_uv->heartbeat_gpa,
+			       sizeof(struct xpc_heartbeat_uv));
+	if (ret != xpSuccess)
+		return ret;
+
+	if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
+	    !part_uv->cached_heartbeat.offline) {
+
+		ret = xpNoHeartbeat;
+	} else {
+		part->last_heartbeat = part_uv->cached_heartbeat.value;
+	}
+	return ret;
+}
+
+static void
+xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
+				    unsigned long remote_rp_gpa, int nasid)
+{
+	short partid = remote_rp->SAL_partid;
+	struct xpc_partition *part = &xpc_partitions[partid];
+	struct xpc_activate_mq_msg_activate_req_uv msg;
+
+	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
+	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+	part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
+	part->sn.uv.activate_gru_mq_desc_gpa =
+	    remote_rp->sn.uv.activate_gru_mq_desc_gpa;
+
+	/*
+	 * ??? Is it a good idea to make this conditional on what is
+	 * ??? potentially stale state information?
+	 */
+	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
+		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
+		msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
+		msg.activate_gru_mq_desc_gpa =
+		    xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
+		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
+	}
+
+	if (part->act_state == XPC_P_AS_INACTIVE)
+		xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_reactivation_uv(struct xpc_partition *part)
+{
+	xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_deactivation_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_deactivate_req_uv msg;
+
+	/*
+	 * ??? Is it a good idea to make this conditional on what is
+	 * ??? potentially stale state information?
+	 */
+	if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
+	    part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
+
+		msg.reason = part->reason;
+		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+					 XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
+	}
+}
+
+static void
+xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
+{
+	/* nothing needs to be done */
+	return;
+}
+
+static void
+xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
+{
+	head->first = NULL;
+	head->last = NULL;
+	spin_lock_init(&head->lock);
+	head->n_entries = 0;
+}
+
+static void *
+xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
+{
+	unsigned long irq_flags;
+	struct xpc_fifo_entry_uv *first;
+
+	spin_lock_irqsave(&head->lock, irq_flags);
+	first = head->first;
+	if (head->first != NULL) {
+		head->first = first->next;
+		if (head->first == NULL)
+			head->last = NULL;
+
+		head->n_entries--;
+		BUG_ON(head->n_entries < 0);
+
+		first->next = NULL;
+	}
+	spin_unlock_irqrestore(&head->lock, irq_flags);
+	return first;
+}
+
+static void
+xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
+		      struct xpc_fifo_entry_uv *last)
+{
+	unsigned long irq_flags;
+
+	last->next = NULL;
+	spin_lock_irqsave(&head->lock, irq_flags);
+	if (head->last != NULL)
+		head->last->next = last;
+	else
+		head->first = last;
+	head->last = last;
+	head->n_entries++;
+	spin_unlock_irqrestore(&head->lock, irq_flags);
+}
+
+static int
+xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
+{
+	return head->n_entries;
+}
+
+/*
+ * Setup the channel structures that are uv specific.
+ */
+static enum xp_retval
+xpc_setup_ch_structures_uv(struct xpc_partition *part)
+{
+	struct xpc_channel_uv *ch_uv;
+	int ch_number;
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch_uv = &part->channels[ch_number].sn.uv;
+
+		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+	}
+
+	return xpSuccess;
+}
+
+/*
+ * Teardown the channel structures that are uv specific.
+ */
+static void
+xpc_teardown_ch_structures_uv(struct xpc_partition *part)
+{
+	/* nothing needs to be done */
+	return;
+}
+
+static enum xp_retval
+xpc_make_first_contact_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	/*
+	 * We send a sync msg to get the remote partition's remote_act_state
+	 * updated to our current act_state which at this point should
+	 * be XPC_P_AS_ACTIVATING.
+	 */
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
+
+	while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) ||
+		 (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) {
+
+		dev_dbg(xpc_part, "waiting to make first contact with "
+			"partition %d\n", XPC_PARTID(part));
+
+		/* wait a 1/4 of a second or so */
+		(void)msleep_interruptible(250);
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
+			return part->reason;
+	}
+
+	return xpSuccess;
+}
+
+static u64
+xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	union xpc_channel_ctl_flags chctl;
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	chctl = part->chctl;
+	if (chctl.all_flags != 0)
+		part->chctl.all_flags = 0;
+
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+	return chctl.all_flags;
+}
+
+static enum xp_retval
+xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+	struct xpc_send_msg_slot_uv *msg_slot;
+	unsigned long irq_flags;
+	int nentries;
+	int entry;
+	size_t nbytes;
+
+	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+		nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
+		ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_uv->send_msg_slots == NULL)
+			continue;
+
+		for (entry = 0; entry < nentries; entry++) {
+			msg_slot = &ch_uv->send_msg_slots[entry];
+
+			msg_slot->msg_slot_number = entry;
+			xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
+					      &msg_slot->next);
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->local_nentries)
+			ch->local_nentries = nentries;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	return xpNoMemory;
+}
+
+static enum xp_retval
+xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+	struct xpc_notify_mq_msg_uv *msg_slot;
+	unsigned long irq_flags;
+	int nentries;
+	int entry;
+	size_t nbytes;
+
+	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+		nbytes = nentries * ch->entry_size;
+		ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_uv->recv_msg_slots == NULL)
+			continue;
+
+		for (entry = 0; entry < nentries; entry++) {
+			msg_slot = ch_uv->recv_msg_slots +
+			    entry * ch->entry_size;
+
+			msg_slot->hdr.msg_slot_number = entry;
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->remote_nentries)
+			ch->remote_nentries = nentries;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	return xpNoMemory;
+}
+
+/*
+ * Allocate msg_slots associated with the channel.
+ */
+static enum xp_retval
+xpc_setup_msg_structures_uv(struct xpc_channel *ch)
+{
+	static enum xp_retval ret;
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	DBUG_ON(ch->flags & XPC_C_SETUP);
+
+	ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct
+						   gru_message_queue_desc),
+						   GFP_KERNEL);
+	if (ch_uv->cached_notify_gru_mq_desc == NULL)
+		return xpNoMemory;
+
+	ret = xpc_allocate_send_msg_slot_uv(ch);
+	if (ret == xpSuccess) {
+
+		ret = xpc_allocate_recv_msg_slot_uv(ch);
+		if (ret != xpSuccess) {
+			kfree(ch_uv->send_msg_slots);
+			xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		}
+	}
+	return ret;
+}
+
+/*
+ * Free up msg_slots and clear other stuff that were setup for the specified
+ * channel.
+ */
+static void
+xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	DBUG_ON(!spin_is_locked(&ch->lock));
+
+	kfree(ch_uv->cached_notify_gru_mq_desc);
+	ch_uv->cached_notify_gru_mq_desc = NULL;
+
+	if (ch->flags & XPC_C_SETUP) {
+		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		kfree(ch_uv->send_msg_slots);
+		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+		kfree(ch_uv->recv_msg_slots);
+	}
+}
+
+static void
+xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.reason = ch->reason;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_closereply_uv msg;
+
+	msg.ch_number = ch->number;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
+}
+
+static void
+xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.entry_size = ch->entry_size;
+	msg.local_nentries = ch->local_nentries;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_openreply_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.local_nentries = ch->local_nentries;
+	msg.remote_nentries = ch->remote_nentries;
+	msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc);
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
+}
+
+static void
+xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
+
+	msg.ch_number = ch->number;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
+}
+
+static void
+xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+	xpc_wakeup_channel_mgr(part);
+}
+
+static enum xp_retval
+xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
+			       unsigned long gru_mq_desc_gpa)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL);
+	return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc,
+					       gru_mq_desc_gpa);
+}
+
+static void
+xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
+}
+
+static void
+xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
+}
+
+static void
+xpc_assume_partition_disengaged_uv(short partid)
+{
+	struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+	part_uv->flags &= ~XPC_P_ENGAGED_UV;
+	spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+}
+
+static int
+xpc_partition_engaged_uv(short partid)
+{
+	return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
+}
+
+static int
+xpc_any_partition_engaged_uv(void)
+{
+	struct xpc_partition_uv *part_uv;
+	short partid;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+		if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
+			return 1;
+	}
+	return 0;
+}
+
+static enum xp_retval
+xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
+			 struct xpc_send_msg_slot_uv **address_of_msg_slot)
+{
+	enum xp_retval ret;
+	struct xpc_send_msg_slot_uv *msg_slot;
+	struct xpc_fifo_entry_uv *entry;
+
+	while (1) {
+		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
+		if (entry != NULL)
+			break;
+
+		if (flags & XPC_NOWAIT)
+			return xpNoWait;
+
+		ret = xpc_allocate_msg_wait(ch);
+		if (ret != xpInterrupted && ret != xpTimeout)
+			return ret;
+	}
+
+	msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
+	*address_of_msg_slot = msg_slot;
+	return xpSuccess;
+}
+
+static void
+xpc_free_msg_slot_uv(struct xpc_channel *ch,
+		     struct xpc_send_msg_slot_uv *msg_slot)
+{
+	xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
+
+	/* wakeup anyone waiting for a free msg slot */
+	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+		wake_up(&ch->msg_allocate_wq);
+}
+
+static void
+xpc_notify_sender_uv(struct xpc_channel *ch,
+		     struct xpc_send_msg_slot_uv *msg_slot,
+		     enum xp_retval reason)
+{
+	xpc_notify_func func = msg_slot->func;
+
+	if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
+
+		atomic_dec(&ch->n_to_notify);
+
+		dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
+			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+			msg_slot->msg_slot_number, ch->partid, ch->number);
+
+		func(reason, ch->partid, ch->number, msg_slot->key);
+
+		dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
+			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+			msg_slot->msg_slot_number, ch->partid, ch->number);
+	}
+}
+
+static void
+xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
+			    struct xpc_notify_mq_msg_uv *msg)
+{
+	struct xpc_send_msg_slot_uv *msg_slot;
+	int entry = msg->hdr.msg_slot_number % ch->local_nentries;
+
+	msg_slot = &ch->sn.uv.send_msg_slots[entry];
+
+	BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
+	msg_slot->msg_slot_number += ch->local_nentries;
+
+	if (msg_slot->func != NULL)
+		xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
+
+	xpc_free_msg_slot_uv(ch, msg_slot);
+}
+
+static void
+xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
+			    struct xpc_notify_mq_msg_uv *msg)
+{
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct xpc_channel *ch;
+	struct xpc_channel_uv *ch_uv;
+	struct xpc_notify_mq_msg_uv *msg_slot;
+	unsigned long irq_flags;
+	int ch_number = msg->hdr.ch_number;
+
+	if (unlikely(ch_number >= part->nchannels)) {
+		dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
+			"channel number=0x%x in message from partid=%d\n",
+			ch_number, XPC_PARTID(part));
+
+		/* get hb checker to deactivate from the remote partition */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = xpBadChannelNumber;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+		return;
+	}
+
+	ch = &part->channels[ch_number];
+	xpc_msgqueue_ref(ch);
+
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/* see if we're really dealing with an ACK for a previously sent msg */
+	if (msg->hdr.size == 0) {
+		xpc_handle_notify_mq_ack_uv(ch, msg);
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/* we're dealing with a normal message sent via the notify_mq */
+	ch_uv = &ch->sn.uv;
+
+	msg_slot = ch_uv->recv_msg_slots +
+	    (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size;
+
+	BUG_ON(msg_slot->hdr.size != 0);
+
+	memcpy(msg_slot, msg, msg->hdr.size);
+
+	xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
+
+	if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
+		/*
+		 * If there is an existing idle kthread get it to deliver
+		 * the payload, otherwise we'll have to get the channel mgr
+		 * for this partition to create a kthread to do the delivery.
+		 */
+		if (atomic_read(&ch->kthreads_idle) > 0)
+			wake_up_nr(&ch->idle_wq, 1);
+		else
+			xpc_send_chctl_local_msgrequest_uv(part, ch->number);
+	}
+	xpc_msgqueue_deref(ch);
+}
+
+static irqreturn_t
+xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
+{
+	struct xpc_notify_mq_msg_uv *msg;
+	short partid;
+	struct xpc_partition *part;
+
+	while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) !=
+	       NULL) {
+
+		partid = msg->hdr.partid;
+		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+			dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
+				"invalid partid=0x%x in message\n", partid);
+		} else {
+			part = &xpc_partitions[partid];
+
+			if (xpc_part_ref(part)) {
+				xpc_handle_notify_mq_msg_uv(part, msg);
+				xpc_part_deref(part);
+			}
+		}
+
+		gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int
+xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
+{
+	return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
+}
+
+static void
+xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
+{
+	struct xpc_channel *ch = &part->channels[ch_number];
+	int ndeliverable_payloads;
+
+	xpc_msgqueue_ref(ch);
+
+	ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
+
+	if (ndeliverable_payloads > 0 &&
+	    (ch->flags & XPC_C_CONNECTED) &&
+	    (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
+
+		xpc_activate_kthreads(ch, ndeliverable_payloads);
+	}
+
+	xpc_msgqueue_deref(ch);
+}
+
+static enum xp_retval
+xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
+		    u16 payload_size, u8 notify_type, xpc_notify_func func,
+		    void *key)
+{
+	enum xp_retval ret = xpSuccess;
+	struct xpc_send_msg_slot_uv *msg_slot = NULL;
+	struct xpc_notify_mq_msg_uv *msg;
+	u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
+	size_t msg_size;
+
+	DBUG_ON(notify_type != XPC_N_CALL);
+
+	msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
+	if (msg_size > ch->entry_size)
+		return xpPayloadTooBig;
+
+	xpc_msgqueue_ref(ch);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		ret = ch->reason;
+		goto out_1;
+	}
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		ret = xpNotConnected;
+		goto out_1;
+	}
+
+	ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
+	if (ret != xpSuccess)
+		goto out_1;
+
+	if (func != NULL) {
+		atomic_inc(&ch->n_to_notify);
+
+		msg_slot->key = key;
+		smp_wmb(); /* a non-NULL func must hit memory after the key */
+		msg_slot->func = func;
+
+		if (ch->flags & XPC_C_DISCONNECTING) {
+			ret = ch->reason;
+			goto out_2;
+		}
+	}
+
+	msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
+	msg->hdr.partid = xp_partition_id;
+	msg->hdr.ch_number = ch->number;
+	msg->hdr.size = msg_size;
+	msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
+	memcpy(&msg->payload, payload, payload_size);
+
+	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
+			       msg_size);
+	if (ret == xpSuccess)
+		goto out_1;
+
+	XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+out_2:
+	if (func != NULL) {
+		/*
+		 * Try to NULL the msg_slot's func field. If we fail, then
+		 * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
+		 * case we need to pretend we succeeded to send the message
+		 * since the user will get a callout for the disconnect error
+		 * by xpc_notify_senders_of_disconnect_uv(), and to also get an
+		 * error returned here will confuse them. Additionally, since
+		 * in this case the channel is being disconnected we don't need
+		 * to put the the msg_slot back on the free list.
+		 */
+		if (cmpxchg(&msg_slot->func, func, NULL) != func) {
+			ret = xpSuccess;
+			goto out_1;
+		}
+
+		msg_slot->key = NULL;
+		atomic_dec(&ch->n_to_notify);
+	}
+	xpc_free_msg_slot_uv(ch, msg_slot);
+out_1:
+	xpc_msgqueue_deref(ch);
+	return ret;
+}
+
+/*
+ * Tell the callers of xpc_send_notify() that the status of their payloads
+ * is unknown because the channel is now disconnecting.
+ *
+ * We don't worry about putting these msg_slots on the free list since the
+ * msg_slots themselves are about to be kfree'd.
+ */
+static void
+xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
+{
+	struct xpc_send_msg_slot_uv *msg_slot;
+	int entry;
+
+	DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+
+	for (entry = 0; entry < ch->local_nentries; entry++) {
+
+		if (atomic_read(&ch->n_to_notify) == 0)
+			break;
+
+		msg_slot = &ch->sn.uv.send_msg_slots[entry];
+		if (msg_slot->func != NULL)
+			xpc_notify_sender_uv(ch, msg_slot, ch->reason);
+	}
+}
+
+/*
+ * Get the next deliverable message's payload.
+ */
+static void *
+xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
+{
+	struct xpc_fifo_entry_uv *entry;
+	struct xpc_notify_mq_msg_uv *msg;
+	void *payload = NULL;
+
+	if (!(ch->flags & XPC_C_DISCONNECTING)) {
+		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
+		if (entry != NULL) {
+			msg = container_of(entry, struct xpc_notify_mq_msg_uv,
+					   hdr.u.next);
+			payload = &msg->payload;
+		}
+	}
+	return payload;
+}
+
+static void
+xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
+{
+	struct xpc_notify_mq_msg_uv *msg;
+	enum xp_retval ret;
+
+	msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
+
+	/* return an ACK to the sender of this message */
+
+	msg->hdr.partid = xp_partition_id;
+	msg->hdr.size = 0;	/* size of zero indicates this is an ACK */
+
+	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
+			       sizeof(struct xpc_notify_mq_msghdr_uv));
+	if (ret != xpSuccess)
+		XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+}
+
+static struct xpc_arch_operations xpc_arch_ops_uv = {
+	.setup_partitions = xpc_setup_partitions_uv,
+	.teardown_partitions = xpc_teardown_partitions_uv,
+	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
+	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
+	.setup_rsvd_page = xpc_setup_rsvd_page_uv,
+
+	.allow_hb = xpc_allow_hb_uv,
+	.disallow_hb = xpc_disallow_hb_uv,
+	.disallow_all_hbs = xpc_disallow_all_hbs_uv,
+	.increment_heartbeat = xpc_increment_heartbeat_uv,
+	.offline_heartbeat = xpc_offline_heartbeat_uv,
+	.online_heartbeat = xpc_online_heartbeat_uv,
+	.heartbeat_init = xpc_heartbeat_init_uv,
+	.heartbeat_exit = xpc_heartbeat_exit_uv,
+	.get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
+
+	.request_partition_activation =
+		xpc_request_partition_activation_uv,
+	.request_partition_reactivation =
+		xpc_request_partition_reactivation_uv,
+	.request_partition_deactivation =
+		xpc_request_partition_deactivation_uv,
+	.cancel_partition_deactivation_request =
+		xpc_cancel_partition_deactivation_request_uv,
+
+	.setup_ch_structures = xpc_setup_ch_structures_uv,
+	.teardown_ch_structures = xpc_teardown_ch_structures_uv,
+
+	.make_first_contact = xpc_make_first_contact_uv,
+
+	.get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
+	.send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
+	.send_chctl_closereply = xpc_send_chctl_closereply_uv,
+	.send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
+	.send_chctl_openreply = xpc_send_chctl_openreply_uv,
+	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
+	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
+
+	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
+
+	.setup_msg_structures = xpc_setup_msg_structures_uv,
+	.teardown_msg_structures = xpc_teardown_msg_structures_uv,
+
+	.indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
+	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
+	.assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
+	.partition_engaged = xpc_partition_engaged_uv,
+	.any_partition_engaged = xpc_any_partition_engaged_uv,
+
+	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
+	.send_payload = xpc_send_payload_uv,
+	.get_deliverable_payload = xpc_get_deliverable_payload_uv,
+	.received_payload = xpc_received_payload_uv,
+	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
+};
+
+static int
+xpc_init_mq_node(int nid)
+{
+	int cpu;
+
+	get_online_cpus();
+
+	for_each_cpu(cpu, cpumask_of_node(nid)) {
+		xpc_activate_mq_uv =
+			xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid,
+					     XPC_ACTIVATE_IRQ_NAME,
+					     xpc_handle_activate_IRQ_uv);
+		if (!IS_ERR(xpc_activate_mq_uv))
+			break;
+	}
+	if (IS_ERR(xpc_activate_mq_uv)) {
+		put_online_cpus();
+		return PTR_ERR(xpc_activate_mq_uv);
+	}
+
+	for_each_cpu(cpu, cpumask_of_node(nid)) {
+		xpc_notify_mq_uv =
+			xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid,
+					     XPC_NOTIFY_IRQ_NAME,
+					     xpc_handle_notify_IRQ_uv);
+		if (!IS_ERR(xpc_notify_mq_uv))
+			break;
+	}
+	if (IS_ERR(xpc_notify_mq_uv)) {
+		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
+		put_online_cpus();
+		return PTR_ERR(xpc_notify_mq_uv);
+	}
+
+	put_online_cpus();
+	return 0;
+}
+
+int
+xpc_init_uv(void)
+{
+	int nid;
+	int ret = 0;
+
+	xpc_arch_ops = xpc_arch_ops_uv;
+
+	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
+		dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
+			XPC_MSG_HDR_MAX_SIZE);
+		return -E2BIG;
+	}
+
+	if (xpc_mq_node < 0)
+		for_each_online_node(nid) {
+			ret = xpc_init_mq_node(nid);
+
+			if (!ret)
+				break;
+		}
+	else
+		ret = xpc_init_mq_node(xpc_mq_node);
+
+	if (ret < 0)
+		dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n",
+			-ret);
+
+	return ret;
+}
+
+void
+xpc_exit_uv(void)
+{
+	xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
+	xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
+}
+
+module_param(xpc_mq_node, int, 0);
+MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues.");
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
new file mode 100644
index 000000000..44d750d98
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -0,0 +1,596 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999-2009 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * Cross Partition Network Interface (XPNET) support
+ *
+ *	XPNET provides a virtual network layered on top of the Cross
+ *	Partition communication layer.
+ *
+ *	XPNET provides direct point-to-point and broadcast-like support
+ *	for an ethernet-like device.  The ethernet broadcast medium is
+ *	replaced with a point-to-point message structure which passes
+ *	pointers to a DMA-capable block that a remote partition should
+ *	retrieve and pass to the upper level networking layer.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include "xp.h"
+
+/*
+ * The message payload transferred by XPC.
+ *
+ * buf_pa is the physical address where the DMA should pull from.
+ *
+ * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
+ * cacheline boundary.  To accomplish this, we record the number of
+ * bytes from the beginning of the first cacheline to the first useful
+ * byte of the skb (leadin_ignore) and the number of bytes from the
+ * last useful byte of the skb to the end of the last cacheline
+ * (tailout_ignore).
+ *
+ * size is the number of bytes to transfer which includes the skb->len
+ * (useful bytes of the senders skb) plus the leadin and tailout
+ */
+struct xpnet_message {
+	u16 version;		/* Version for this message */
+	u16 embedded_bytes;	/* #of bytes embedded in XPC message */
+	u32 magic;		/* Special number indicating this is xpnet */
+	unsigned long buf_pa;	/* phys address of buffer to retrieve */
+	u32 size;		/* #of bytes in buffer */
+	u8 leadin_ignore;	/* #of bytes to ignore at the beginning */
+	u8 tailout_ignore;	/* #of bytes to ignore at the end */
+	unsigned char data;	/* body of small packets */
+};
+
+/*
+ * Determine the size of our message, the cacheline aligned size,
+ * and then the number of message will request from XPC.
+ *
+ * XPC expects each message to exist in an individual cacheline.
+ */
+#define XPNET_MSG_SIZE		XPC_MSG_PAYLOAD_MAX_SIZE
+#define XPNET_MSG_DATA_MAX	\
+		(XPNET_MSG_SIZE - offsetof(struct xpnet_message, data))
+#define XPNET_MSG_NENTRIES	(PAGE_SIZE / XPC_MSG_MAX_SIZE)
+
+#define XPNET_MAX_KTHREADS	(XPNET_MSG_NENTRIES + 1)
+#define XPNET_MAX_IDLE_KTHREADS	(XPNET_MSG_NENTRIES + 1)
+
+/*
+ * Version number of XPNET implementation. XPNET can always talk to versions
+ * with same major #, and never talk to versions with a different version.
+ */
+#define _XPNET_VERSION(_major, _minor)	(((_major) << 4) | (_minor))
+#define XPNET_VERSION_MAJOR(_v)		((_v) >> 4)
+#define XPNET_VERSION_MINOR(_v)		((_v) & 0xf)
+
+#define	XPNET_VERSION _XPNET_VERSION(1, 0)	/* version 1.0 */
+#define	XPNET_VERSION_EMBED _XPNET_VERSION(1, 1)	/* version 1.1 */
+#define XPNET_MAGIC	0x88786984	/* "XNET" */
+
+#define XPNET_VALID_MSG(_m)						     \
+   ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
+    && (msg->magic == XPNET_MAGIC))
+
+#define XPNET_DEVICE_NAME		"xp0"
+
+/*
+ * When messages are queued with xpc_send_notify, a kmalloc'd buffer
+ * of the following type is passed as a notification cookie.  When the
+ * notification function is called, we use the cookie to decide
+ * whether all outstanding message sends have completed.  The skb can
+ * then be released.
+ */
+struct xpnet_pending_msg {
+	struct sk_buff *skb;
+	atomic_t use_count;
+};
+
+struct net_device *xpnet_device;
+
+/*
+ * When we are notified of other partitions activating, we add them to
+ * our bitmask of partitions to which we broadcast.
+ */
+static unsigned long *xpnet_broadcast_partitions;
+/* protect above */
+static DEFINE_SPINLOCK(xpnet_broadcast_lock);
+
+/*
+ * Since the Block Transfer Engine (BTE) is being used for the transfer
+ * and it relies upon cache-line size transfers, we need to reserve at
+ * least one cache-line for head and tail alignment.  The BTE is
+ * limited to 8MB transfers.
+ *
+ * Testing has shown that changing MTU to greater than 64KB has no effect
+ * on TCP as the two sides negotiate a Max Segment Size that is limited
+ * to 64K.  Other protocols May use packets greater than this, but for
+ * now, the default is 64KB.
+ */
+#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
+/* 68 comes from min TCP+IP+MAC header */
+#define XPNET_MIN_MTU 68
+/* 32KB has been determined to be the ideal */
+#define XPNET_DEF_MTU (0x8000UL)
+
+/*
+ * The partid is encapsulated in the MAC address beginning in the following
+ * octet and it consists of two octets.
+ */
+#define XPNET_PARTID_OCTET	2
+
+/* Define the XPNET debug device structures to be used with dev_dbg() et al */
+
+struct device_driver xpnet_dbg_name = {
+	.name = "xpnet"
+};
+
+struct device xpnet_dbg_subname = {
+	.init_name = "",	/* set to "" */
+	.driver = &xpnet_dbg_name
+};
+
+struct device *xpnet = &xpnet_dbg_subname;
+
+/*
+ * Packet was recevied by XPC and forwarded to us.
+ */
+static void
+xpnet_receive(short partid, int channel, struct xpnet_message *msg)
+{
+	struct sk_buff *skb;
+	void *dst;
+	enum xp_retval ret;
+
+	if (!XPNET_VALID_MSG(msg)) {
+		/*
+		 * Packet with a different XPC version.  Ignore.
+		 */
+		xpc_received(partid, channel, (void *)msg);
+
+		xpnet_device->stats.rx_errors++;
+
+		return;
+	}
+	dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
+		msg->leadin_ignore, msg->tailout_ignore);
+
+	/* reserve an extra cache line */
+	skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
+	if (!skb) {
+		dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
+			msg->size + L1_CACHE_BYTES);
+
+		xpc_received(partid, channel, (void *)msg);
+
+		xpnet_device->stats.rx_errors++;
+
+		return;
+	}
+
+	/*
+	 * The allocated skb has some reserved space.
+	 * In order to use xp_remote_memcpy(), we need to get the
+	 * skb->data pointer moved forward.
+	 */
+	skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
+					    (L1_CACHE_BYTES - 1)) +
+			  msg->leadin_ignore));
+
+	/*
+	 * Update the tail pointer to indicate data actually
+	 * transferred.
+	 */
+	skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
+
+	/*
+	 * Move the data over from the other side.
+	 */
+	if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
+	    (msg->embedded_bytes != 0)) {
+		dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
+			"%lu)\n", skb->data, &msg->data,
+			(size_t)msg->embedded_bytes);
+
+		skb_copy_to_linear_data(skb, &msg->data,
+					(size_t)msg->embedded_bytes);
+	} else {
+		dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1));
+		dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
+			"xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst,
+					  (void *)msg->buf_pa, msg->size);
+
+		ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size);
+		if (ret != xpSuccess) {
+			/*
+			 * !!! Need better way of cleaning skb.  Currently skb
+			 * !!! appears in_use and we can't just call
+			 * !!! dev_kfree_skb.
+			 */
+			dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
+				"returned error=0x%x\n", dst,
+				(void *)msg->buf_pa, msg->size, ret);
+
+			xpc_received(partid, channel, (void *)msg);
+
+			xpnet_device->stats.rx_errors++;
+
+			return;
+		}
+	}
+
+	dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+		"skb->end=0x%p skb->len=%d\n", (void *)skb->head,
+		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
+		skb->len);
+
+	skb->protocol = eth_type_trans(skb, xpnet_device);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	dev_dbg(xpnet, "passing skb to network layer\n"
+		"\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+		"skb->end=0x%p skb->len=%d\n",
+		(void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
+		skb_end_pointer(skb), skb->len);
+
+	xpnet_device->stats.rx_packets++;
+	xpnet_device->stats.rx_bytes += skb->len + ETH_HLEN;
+
+	netif_rx_ni(skb);
+	xpc_received(partid, channel, (void *)msg);
+}
+
+/*
+ * This is the handler which XPC calls during any sort of change in
+ * state or message reception on a connection.
+ */
+static void
+xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
+			  void *data, void *key)
+{
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(channel != XPC_NET_CHANNEL);
+
+	switch (reason) {
+	case xpMsgReceived:	/* message received */
+		DBUG_ON(data == NULL);
+
+		xpnet_receive(partid, channel, (struct xpnet_message *)data);
+		break;
+
+	case xpConnected:	/* connection completed to a partition */
+		spin_lock_bh(&xpnet_broadcast_lock);
+		__set_bit(partid, xpnet_broadcast_partitions);
+		spin_unlock_bh(&xpnet_broadcast_lock);
+
+		netif_carrier_on(xpnet_device);
+
+		dev_dbg(xpnet, "%s connected to partition %d\n",
+			xpnet_device->name, partid);
+		break;
+
+	default:
+		spin_lock_bh(&xpnet_broadcast_lock);
+		__clear_bit(partid, xpnet_broadcast_partitions);
+		spin_unlock_bh(&xpnet_broadcast_lock);
+
+		if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions,
+				 xp_max_npartitions)) {
+			netif_carrier_off(xpnet_device);
+		}
+
+		dev_dbg(xpnet, "%s disconnected from partition %d\n",
+			xpnet_device->name, partid);
+		break;
+	}
+}
+
+static int
+xpnet_dev_open(struct net_device *dev)
+{
+	enum xp_retval ret;
+
+	dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
+		"%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
+		(unsigned long)XPNET_MSG_SIZE,
+		(unsigned long)XPNET_MSG_NENTRIES,
+		(unsigned long)XPNET_MAX_KTHREADS,
+		(unsigned long)XPNET_MAX_IDLE_KTHREADS);
+
+	ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
+			  XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
+			  XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
+	if (ret != xpSuccess) {
+		dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
+			"ret=%d\n", dev->name, ret);
+
+		return -ENOMEM;
+	}
+
+	dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
+
+	return 0;
+}
+
+static int
+xpnet_dev_stop(struct net_device *dev)
+{
+	xpc_disconnect(XPC_NET_CHANNEL);
+
+	dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
+
+	return 0;
+}
+
+/*
+ * Notification that the other end has received the message and
+ * DMA'd the skb information.  At this point, they are done with
+ * our side.  When all recipients are done processing, we
+ * release the skb and then release our pending message structure.
+ */
+static void
+xpnet_send_completed(enum xp_retval reason, short partid, int channel,
+		     void *__qm)
+{
+	struct xpnet_pending_msg *queued_msg = (struct xpnet_pending_msg *)__qm;
+
+	DBUG_ON(queued_msg == NULL);
+
+	dev_dbg(xpnet, "message to %d notified with reason %d\n",
+		partid, reason);
+
+	if (atomic_dec_return(&queued_msg->use_count) == 0) {
+		dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
+			(void *)queued_msg->skb->head);
+
+		dev_kfree_skb_any(queued_msg->skb);
+		kfree(queued_msg);
+	}
+}
+
+static void
+xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
+	   u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid)
+{
+	u8 msg_buffer[XPNET_MSG_SIZE];
+	struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer;
+	u16 msg_size = sizeof(struct xpnet_message);
+	enum xp_retval ret;
+
+	msg->embedded_bytes = embedded_bytes;
+	if (unlikely(embedded_bytes != 0)) {
+		msg->version = XPNET_VERSION_EMBED;
+		dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
+			&msg->data, skb->data, (size_t)embedded_bytes);
+		skb_copy_from_linear_data(skb, &msg->data,
+					  (size_t)embedded_bytes);
+		msg_size += embedded_bytes - 1;
+	} else {
+		msg->version = XPNET_VERSION;
+	}
+	msg->magic = XPNET_MAGIC;
+	msg->size = end_addr - start_addr;
+	msg->leadin_ignore = (u64)skb->data - start_addr;
+	msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
+	msg->buf_pa = xp_pa((void *)start_addr);
+
+	dev_dbg(xpnet, "sending XPC message to %d:%d\n"
+		"msg->buf_pa=0x%lx, msg->size=%u, "
+		"msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
+		dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
+		msg->leadin_ignore, msg->tailout_ignore);
+
+	atomic_inc(&queued_msg->use_count);
+
+	ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg,
+			      msg_size, xpnet_send_completed, queued_msg);
+	if (unlikely(ret != xpSuccess))
+		atomic_dec(&queued_msg->use_count);
+}
+
+/*
+ * Network layer has formatted a packet (skb) and is ready to place it
+ * "on the wire".  Prepare and send an xpnet_message to all partitions
+ * which have connected with us and are targets of this packet.
+ *
+ * MAC-NOTE:  For the XPNET driver, the MAC address contains the
+ * destination partid.  If the destination partid octets are 0xffff,
+ * this packet is to be broadcast to all connected partitions.
+ */
+static netdev_tx_t
+xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xpnet_pending_msg *queued_msg;
+	u64 start_addr, end_addr;
+	short dest_partid;
+	u16 embedded_bytes = 0;
+
+	dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+		"skb->end=0x%p skb->len=%d\n", (void *)skb->head,
+		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
+		skb->len);
+
+	if (skb->data[0] == 0x33) {
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;	/* nothing needed to be done */
+	}
+
+	/*
+	 * The xpnet_pending_msg tracks how many outstanding
+	 * xpc_send_notifies are relying on this skb.  When none
+	 * remain, release the skb.
+	 */
+	queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
+	if (queued_msg == NULL) {
+		dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
+			 "packet\n", sizeof(struct xpnet_pending_msg));
+
+		dev->stats.tx_errors++;
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* get the beginning of the first cacheline and end of last */
+	start_addr = ((u64)skb->data & ~(L1_CACHE_BYTES - 1));
+	end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
+
+	/* calculate how many bytes to embed in the XPC message */
+	if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
+		/* skb->data does fit so embed */
+		embedded_bytes = skb->len;
+	}
+
+	/*
+	 * Since the send occurs asynchronously, we set the count to one
+	 * and begin sending.  Any sends that happen to complete before
+	 * we are done sending will not free the skb.  We will be left
+	 * with that task during exit.  This also handles the case of
+	 * a packet destined for a partition which is no longer up.
+	 */
+	atomic_set(&queued_msg->use_count, 1);
+	queued_msg->skb = skb;
+
+	if (skb->data[0] == 0xff) {
+		/* we are being asked to broadcast to all partitions */
+		for_each_set_bit(dest_partid, xpnet_broadcast_partitions,
+			     xp_max_npartitions) {
+
+			xpnet_send(skb, queued_msg, start_addr, end_addr,
+				   embedded_bytes, dest_partid);
+		}
+	} else {
+		dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1];
+		dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8;
+
+		if (dest_partid >= 0 &&
+		    dest_partid < xp_max_npartitions &&
+		    test_bit(dest_partid, xpnet_broadcast_partitions) != 0) {
+
+			xpnet_send(skb, queued_msg, start_addr, end_addr,
+				   embedded_bytes, dest_partid);
+		}
+	}
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	if (atomic_dec_return(&queued_msg->use_count) == 0) {
+		dev_kfree_skb(skb);
+		kfree(queued_msg);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+/*
+ * Deal with transmit timeouts coming from the network layer.
+ */
+static void
+xpnet_dev_tx_timeout(struct net_device *dev)
+{
+	dev->stats.tx_errors++;
+}
+
+static const struct net_device_ops xpnet_netdev_ops = {
+	.ndo_open		= xpnet_dev_open,
+	.ndo_stop		= xpnet_dev_stop,
+	.ndo_start_xmit		= xpnet_dev_hard_start_xmit,
+	.ndo_tx_timeout		= xpnet_dev_tx_timeout,
+	.ndo_set_mac_address 	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static int __init
+xpnet_init(void)
+{
+	int result;
+
+	if (!is_shub() && !is_uv())
+		return -ENODEV;
+
+	dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
+
+	xpnet_broadcast_partitions = kcalloc(BITS_TO_LONGS(xp_max_npartitions),
+					     sizeof(long),
+					     GFP_KERNEL);
+	if (xpnet_broadcast_partitions == NULL)
+		return -ENOMEM;
+
+	/*
+	 * use ether_setup() to init the majority of our device
+	 * structure and then override the necessary pieces.
+	 */
+	xpnet_device = alloc_netdev(0, XPNET_DEVICE_NAME, NET_NAME_UNKNOWN,
+				    ether_setup);
+	if (xpnet_device == NULL) {
+		kfree(xpnet_broadcast_partitions);
+		return -ENOMEM;
+	}
+
+	netif_carrier_off(xpnet_device);
+
+	xpnet_device->netdev_ops = &xpnet_netdev_ops;
+	xpnet_device->mtu = XPNET_DEF_MTU;
+	xpnet_device->min_mtu = XPNET_MIN_MTU;
+	xpnet_device->max_mtu = XPNET_MAX_MTU;
+
+	/*
+	 * Multicast assumes the LSB of the first octet is set for multicast
+	 * MAC addresses.  We chose the first octet of the MAC to be unlikely
+	 * to collide with any vendor's officially issued MAC.
+	 */
+	xpnet_device->dev_addr[0] = 0x02;     /* locally administered, no OUI */
+
+	xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id;
+	xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8);
+
+	/*
+	 * ether_setup() sets this to a multicast device.  We are
+	 * really not supporting multicast at this time.
+	 */
+	xpnet_device->flags &= ~IFF_MULTICAST;
+
+	/*
+	 * No need to checksum as it is a DMA transfer.  The BTE will
+	 * report an error if the data is not retrievable and the
+	 * packet will be dropped.
+	 */
+	xpnet_device->features = NETIF_F_HW_CSUM;
+
+	result = register_netdev(xpnet_device);
+	if (result != 0) {
+		free_netdev(xpnet_device);
+		kfree(xpnet_broadcast_partitions);
+	}
+
+	return result;
+}
+
+module_init(xpnet_init);
+
+static void __exit
+xpnet_exit(void)
+{
+	dev_info(xpnet, "unregistering network device %s\n",
+		 xpnet_device[0].name);
+
+	unregister_netdev(xpnet_device);
+	free_netdev(xpnet_device);
+	kfree(xpnet_broadcast_partitions);
+}
+
+module_exit(xpnet_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/spear13xx_pcie_gadget.c b/drivers/misc/spear13xx_pcie_gadget.c
new file mode 100644
index 000000000..ee120dcbb
--- /dev/null
+++ b/drivers/misc/spear13xx_pcie_gadget.c
@@ -0,0 +1,797 @@
+/*
+ * drivers/misc/spear13xx_pcie_gadget.c
+ *
+ * Copyright (C) 2010 ST Microelectronics
+ * Pratyush Anand<pratyush.anand@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/device.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pci_regs.h>
+#include <linux/configfs.h>
+#include <mach/pcie.h>
+#include <mach/misc_regs.h>
+
+#define IN0_MEM_SIZE	(200 * 1024 * 1024 - 1)
+/* In current implementation address translation is done using IN0 only.
+ * So IN1 start address and IN0 end address has been kept same
+*/
+#define IN1_MEM_SIZE	(0 * 1024 * 1024 - 1)
+#define IN_IO_SIZE	(20 * 1024 * 1024 - 1)
+#define IN_CFG0_SIZE	(12 * 1024 * 1024 - 1)
+#define IN_CFG1_SIZE	(12 * 1024 * 1024 - 1)
+#define IN_MSG_SIZE	(12 * 1024 * 1024 - 1)
+/* Keep default BAR size as 4K*/
+/* AORAM would be mapped by default*/
+#define INBOUND_ADDR_MASK	(SPEAR13XX_SYSRAM1_SIZE - 1)
+
+#define INT_TYPE_NO_INT	0
+#define INT_TYPE_INTX	1
+#define INT_TYPE_MSI	2
+struct spear_pcie_gadget_config {
+	void __iomem *base;
+	void __iomem *va_app_base;
+	void __iomem *va_dbi_base;
+	char int_type[10];
+	ulong requested_msi;
+	ulong configured_msi;
+	ulong bar0_size;
+	ulong bar0_rw_offset;
+	void __iomem *va_bar0_address;
+};
+
+struct pcie_gadget_target {
+	struct configfs_subsystem subsys;
+	struct spear_pcie_gadget_config config;
+};
+
+struct pcie_gadget_target_attr {
+	struct configfs_attribute	attr;
+	ssize_t		(*show)(struct spear_pcie_gadget_config *config,
+						char *buf);
+	ssize_t		(*store)(struct spear_pcie_gadget_config *config,
+						 const char *buf,
+						 size_t count);
+};
+
+static void enable_dbi_access(struct pcie_app_reg __iomem *app_reg)
+{
+	/* Enable DBI access */
+	writel(readl(&app_reg->slv_armisc) | (1 << AXI_OP_DBI_ACCESS_ID),
+			&app_reg->slv_armisc);
+	writel(readl(&app_reg->slv_awmisc) | (1 << AXI_OP_DBI_ACCESS_ID),
+			&app_reg->slv_awmisc);
+
+}
+
+static void disable_dbi_access(struct pcie_app_reg __iomem *app_reg)
+{
+	/* disable DBI access */
+	writel(readl(&app_reg->slv_armisc) & ~(1 << AXI_OP_DBI_ACCESS_ID),
+			&app_reg->slv_armisc);
+	writel(readl(&app_reg->slv_awmisc) & ~(1 << AXI_OP_DBI_ACCESS_ID),
+			&app_reg->slv_awmisc);
+
+}
+
+static void spear_dbi_read_reg(struct spear_pcie_gadget_config *config,
+		int where, int size, u32 *val)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	ulong va_address;
+
+	/* Enable DBI access */
+	enable_dbi_access(app_reg);
+
+	va_address = (ulong)config->va_dbi_base + (where & ~0x3);
+
+	*val = readl(va_address);
+
+	if (size == 1)
+		*val = (*val >> (8 * (where & 3))) & 0xff;
+	else if (size == 2)
+		*val = (*val >> (8 * (where & 3))) & 0xffff;
+
+	/* Disable DBI access */
+	disable_dbi_access(app_reg);
+}
+
+static void spear_dbi_write_reg(struct spear_pcie_gadget_config *config,
+		int where, int size, u32 val)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	ulong va_address;
+
+	/* Enable DBI access */
+	enable_dbi_access(app_reg);
+
+	va_address = (ulong)config->va_dbi_base + (where & ~0x3);
+
+	if (size == 4)
+		writel(val, va_address);
+	else if (size == 2)
+		writew(val, va_address + (where & 2));
+	else if (size == 1)
+		writeb(val, va_address + (where & 3));
+
+	/* Disable DBI access */
+	disable_dbi_access(app_reg);
+}
+
+#define PCI_FIND_CAP_TTL	48
+
+static int pci_find_own_next_cap_ttl(struct spear_pcie_gadget_config *config,
+		u32 pos, int cap, int *ttl)
+{
+	u32 id;
+
+	while ((*ttl)--) {
+		spear_dbi_read_reg(config, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+		pos &= ~3;
+		spear_dbi_read_reg(config, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+		pos += PCI_CAP_LIST_NEXT;
+	}
+	return 0;
+}
+
+static int pci_find_own_next_cap(struct spear_pcie_gadget_config *config,
+			u32 pos, int cap)
+{
+	int ttl = PCI_FIND_CAP_TTL;
+
+	return pci_find_own_next_cap_ttl(config, pos, cap, &ttl);
+}
+
+static int pci_find_own_cap_start(struct spear_pcie_gadget_config *config,
+				u8 hdr_type)
+{
+	u32 status;
+
+	spear_dbi_read_reg(config, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	switch (hdr_type) {
+	case PCI_HEADER_TYPE_NORMAL:
+	case PCI_HEADER_TYPE_BRIDGE:
+		return PCI_CAPABILITY_LIST;
+	case PCI_HEADER_TYPE_CARDBUS:
+		return PCI_CB_CAPABILITY_LIST;
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+/*
+ * Tell if a device supports a given PCI capability.
+ * Returns the address of the requested capability structure within the
+ * device's PCI configuration space or 0 in case the device does not
+ * support it. Possible values for @cap:
+ *
+ * %PCI_CAP_ID_PM	Power Management
+ * %PCI_CAP_ID_AGP	Accelerated Graphics Port
+ * %PCI_CAP_ID_VPD	Vital Product Data
+ * %PCI_CAP_ID_SLOTID	Slot Identification
+ * %PCI_CAP_ID_MSI	Message Signalled Interrupts
+ * %PCI_CAP_ID_CHSWP	CompactPCI HotSwap
+ * %PCI_CAP_ID_PCIX	PCI-X
+ * %PCI_CAP_ID_EXP	PCI Express
+ */
+static int pci_find_own_capability(struct spear_pcie_gadget_config *config,
+		int cap)
+{
+	u32 pos;
+	u32 hdr_type;
+
+	spear_dbi_read_reg(config, PCI_HEADER_TYPE, 1, &hdr_type);
+
+	pos = pci_find_own_cap_start(config, hdr_type);
+	if (pos)
+		pos = pci_find_own_next_cap(config, pos, cap);
+
+	return pos;
+}
+
+static irqreturn_t spear_pcie_gadget_irq(int irq, void *dev_id)
+{
+	return 0;
+}
+
+/*
+ * configfs interfaces show/store functions
+ */
+
+static struct pcie_gadget_target *to_target(struct config_item *item)
+{
+	return item ?
+		container_of(to_configfs_subsystem(to_config_group(item)),
+				struct pcie_gadget_target, subsys) : NULL;
+}
+
+static ssize_t pcie_gadget_link_show(struct config_item *item, char *buf)
+{
+	struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
+
+	if (readl(&app_reg->app_status_1) & ((u32)1 << XMLH_LINK_UP_ID))
+		return sprintf(buf, "UP");
+	else
+		return sprintf(buf, "DOWN");
+}
+
+static ssize_t pcie_gadget_link_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
+
+	if (sysfs_streq(buf, "UP"))
+		writel(readl(&app_reg->app_ctrl_0) | (1 << APP_LTSSM_ENABLE_ID),
+			&app_reg->app_ctrl_0);
+	else if (sysfs_streq(buf, "DOWN"))
+		writel(readl(&app_reg->app_ctrl_0)
+				& ~(1 << APP_LTSSM_ENABLE_ID),
+				&app_reg->app_ctrl_0);
+	else
+		return -EINVAL;
+	return count;
+}
+
+static ssize_t pcie_gadget_int_type_show(struct config_item *item, char *buf)
+{
+	return sprintf(buf, "%s", to_target(item)->int_type);
+}
+
+static ssize_t pcie_gadget_int_type_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	struct spear_pcie_gadget_config *config = to_target(item)
+	u32 cap, vec, flags;
+	ulong vector;
+
+	if (sysfs_streq(buf, "INTA"))
+		spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1);
+
+	else if (sysfs_streq(buf, "MSI")) {
+		vector = config->requested_msi;
+		vec = 0;
+		while (vector > 1) {
+			vector /= 2;
+			vec++;
+		}
+		spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 0);
+		cap = pci_find_own_capability(config, PCI_CAP_ID_MSI);
+		spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags);
+		flags &= ~PCI_MSI_FLAGS_QMASK;
+		flags |= vec << 1;
+		spear_dbi_write_reg(config, cap + PCI_MSI_FLAGS, 1, flags);
+	} else
+		return -EINVAL;
+
+	strcpy(config->int_type, buf);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_no_of_msi_show(struct config_item *item, char *buf)
+{
+	struct spear_pcie_gadget_config *config = to_target(item)
+	struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
+	u32 cap, vec, flags;
+	ulong vector;
+
+	if ((readl(&app_reg->msg_status) & (1 << CFG_MSI_EN_ID))
+			!= (1 << CFG_MSI_EN_ID))
+		vector = 0;
+	else {
+		cap = pci_find_own_capability(config, PCI_CAP_ID_MSI);
+		spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags);
+		flags &= ~PCI_MSI_FLAGS_QSIZE;
+		vec = flags >> 4;
+		vector = 1;
+		while (vec--)
+			vector *= 2;
+	}
+	config->configured_msi = vector;
+
+	return sprintf(buf, "%lu", vector);
+}
+
+static ssize_t pcie_gadget_no_of_msi_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	int ret;
+
+	ret = kstrtoul(buf, 0, &to_target(item)->requested_msi);
+	if (ret)
+		return ret;
+
+	if (config->requested_msi > 32)
+		config->requested_msi = 32;
+
+	return count;
+}
+
+static ssize_t pcie_gadget_inta_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
+	ulong en;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &en);
+	if (ret)
+		return ret;
+
+	if (en)
+		writel(readl(&app_reg->app_ctrl_0) | (1 << SYS_INT_ID),
+				&app_reg->app_ctrl_0);
+	else
+		writel(readl(&app_reg->app_ctrl_0) & ~(1 << SYS_INT_ID),
+				&app_reg->app_ctrl_0);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_send_msi_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	struct spear_pcie_gadget_config *config = to_target(item)
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	ulong vector;
+	u32 ven_msi;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &vector);
+	if (ret)
+		return ret;
+
+	if (!config->configured_msi)
+		return -EINVAL;
+
+	if (vector >= config->configured_msi)
+		return -EINVAL;
+
+	ven_msi = readl(&app_reg->ven_msi_1);
+	ven_msi &= ~VEN_MSI_FUN_NUM_MASK;
+	ven_msi |= 0 << VEN_MSI_FUN_NUM_ID;
+	ven_msi &= ~VEN_MSI_TC_MASK;
+	ven_msi |= 0 << VEN_MSI_TC_ID;
+	ven_msi &= ~VEN_MSI_VECTOR_MASK;
+	ven_msi |= vector << VEN_MSI_VECTOR_ID;
+
+	/* generating interrupt for msi vector */
+	ven_msi |= VEN_MSI_REQ_EN;
+	writel(ven_msi, &app_reg->ven_msi_1);
+	udelay(1);
+	ven_msi &= ~VEN_MSI_REQ_EN;
+	writel(ven_msi, &app_reg->ven_msi_1);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_vendor_id_show(struct config_item *item, char *buf)
+{
+	u32 id;
+
+	spear_dbi_read_reg(to_target(item), PCI_VENDOR_ID, 2, &id);
+
+	return sprintf(buf, "%x", id);
+}
+
+static ssize_t pcie_gadget_vendor_id_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	ulong id;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &id);
+	if (ret)
+		return ret;
+
+	spear_dbi_write_reg(to_target(item), PCI_VENDOR_ID, 2, id);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_device_id_show(struct config_item *item, char *buf)
+{
+	u32 id;
+
+	spear_dbi_read_reg(to_target(item), PCI_DEVICE_ID, 2, &id);
+
+	return sprintf(buf, "%x", id);
+}
+
+static ssize_t pcie_gadget_device_id_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	ulong id;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &id);
+	if (ret)
+		return ret;
+
+	spear_dbi_write_reg(to_target(item), PCI_DEVICE_ID, 2, id);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_bar0_size_show(struct config_item *item, char *buf)
+{
+	return sprintf(buf, "%lx", to_target(item)->bar0_size);
+}
+
+static ssize_t pcie_gadget_bar0_size_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	struct spear_pcie_gadget_config *config = to_target(item)
+	ulong size;
+	u32 pos, pos1;
+	u32 no_of_bit = 0;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &size);
+	if (ret)
+		return ret;
+
+	/* min bar size is 256 */
+	if (size <= 0x100)
+		size = 0x100;
+	/* max bar size is 1MB*/
+	else if (size >= 0x100000)
+		size = 0x100000;
+	else {
+		pos = 0;
+		pos1 = 0;
+		while (pos < 21) {
+			pos = find_next_bit((ulong *)&size, 21, pos);
+			if (pos != 21)
+				pos1 = pos + 1;
+			pos++;
+			no_of_bit++;
+		}
+		if (no_of_bit == 2)
+			pos1--;
+
+		size = 1 << pos1;
+	}
+	config->bar0_size = size;
+	spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, size - 1);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_bar0_address_show(struct config_item *item,
+		char *buf)
+{
+	struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
+
+	u32 address = readl(&app_reg->pim0_mem_addr_start);
+
+	return sprintf(buf, "%x", address);
+}
+
+static ssize_t pcie_gadget_bar0_address_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	struct spear_pcie_gadget_config *config = to_target(item)
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	ulong address;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &address);
+	if (ret)
+		return ret;
+
+	address &= ~(config->bar0_size - 1);
+	if (config->va_bar0_address)
+		iounmap(config->va_bar0_address);
+	config->va_bar0_address = ioremap(address, config->bar0_size);
+	if (!config->va_bar0_address)
+		return -ENOMEM;
+
+	writel(address, &app_reg->pim0_mem_addr_start);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_bar0_rw_offset_show(struct config_item *item,
+		char *buf)
+{
+	return sprintf(buf, "%lx", to_target(item)->bar0_rw_offset);
+}
+
+static ssize_t pcie_gadget_bar0_rw_offset_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	ulong offset;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &offset);
+	if (ret)
+		return ret;
+
+	if (offset % 4)
+		return -EINVAL;
+
+	to_target(item)->bar0_rw_offset = offset;
+
+	return count;
+}
+
+static ssize_t pcie_gadget_bar0_data_show(struct config_item *item, char *buf)
+{
+	struct spear_pcie_gadget_config *config = to_target(item)
+	ulong data;
+
+	if (!config->va_bar0_address)
+		return -ENOMEM;
+
+	data = readl((ulong)config->va_bar0_address + config->bar0_rw_offset);
+
+	return sprintf(buf, "%lx", data);
+}
+
+static ssize_t pcie_gadget_bar0_data_store(struct config_item *item,
+		const char *buf, size_t count)
+{
+	struct spear_pcie_gadget_config *config = to_target(item)
+	ulong data;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &data);
+	if (ret)
+		return ret;
+
+	if (!config->va_bar0_address)
+		return -ENOMEM;
+
+	writel(data, (ulong)config->va_bar0_address + config->bar0_rw_offset);
+
+	return count;
+}
+
+CONFIGFS_ATTR(pcie_gadget_, link);
+CONFIGFS_ATTR(pcie_gadget_, int_type);
+CONFIGFS_ATTR(pcie_gadget_, no_of_msi);
+CONFIGFS_ATTR_WO(pcie_gadget_, inta);
+CONFIGFS_ATTR_WO(pcie_gadget_, send_msi);
+CONFIGFS_ATTR(pcie_gadget_, vendor_id);
+CONFIGFS_ATTR(pcie_gadget_, device_id);
+CONFIGFS_ATTR(pcie_gadget_, bar0_size);
+CONFIGFS_ATTR(pcie_gadget_, bar0_address);
+CONFIGFS_ATTR(pcie_gadget_, bar0_rw_offset);
+CONFIGFS_ATTR(pcie_gadget_, bar0_data);
+
+static struct configfs_attribute *pcie_gadget_target_attrs[] = {
+	&pcie_gadget_attr_link,
+	&pcie_gadget_attr_int_type,
+	&pcie_gadget_attr_no_of_msi,
+	&pcie_gadget_attr_inta,
+	&pcie_gadget_attr_send_msi,
+	&pcie_gadget_attr_vendor_id,
+	&pcie_gadget_attr_device_id,
+	&pcie_gadget_attr_bar0_size,
+	&pcie_gadget_attr_bar0_address,
+	&pcie_gadget_attr_bar0_rw_offset,
+	&pcie_gadget_attr_bar0_data,
+	NULL,
+};
+
+static struct config_item_type pcie_gadget_target_type = {
+	.ct_attrs		= pcie_gadget_target_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+static void spear13xx_pcie_device_init(struct spear_pcie_gadget_config *config)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+
+	/*setup registers for outbound translation */
+
+	writel(config->base, &app_reg->in0_mem_addr_start);
+	writel(app_reg->in0_mem_addr_start + IN0_MEM_SIZE,
+			&app_reg->in0_mem_addr_limit);
+	writel(app_reg->in0_mem_addr_limit + 1, &app_reg->in1_mem_addr_start);
+	writel(app_reg->in1_mem_addr_start + IN1_MEM_SIZE,
+			&app_reg->in1_mem_addr_limit);
+	writel(app_reg->in1_mem_addr_limit + 1, &app_reg->in_io_addr_start);
+	writel(app_reg->in_io_addr_start + IN_IO_SIZE,
+			&app_reg->in_io_addr_limit);
+	writel(app_reg->in_io_addr_limit + 1, &app_reg->in_cfg0_addr_start);
+	writel(app_reg->in_cfg0_addr_start + IN_CFG0_SIZE,
+			&app_reg->in_cfg0_addr_limit);
+	writel(app_reg->in_cfg0_addr_limit + 1, &app_reg->in_cfg1_addr_start);
+	writel(app_reg->in_cfg1_addr_start + IN_CFG1_SIZE,
+			&app_reg->in_cfg1_addr_limit);
+	writel(app_reg->in_cfg1_addr_limit + 1, &app_reg->in_msg_addr_start);
+	writel(app_reg->in_msg_addr_start + IN_MSG_SIZE,
+			&app_reg->in_msg_addr_limit);
+
+	writel(app_reg->in0_mem_addr_start, &app_reg->pom0_mem_addr_start);
+	writel(app_reg->in1_mem_addr_start, &app_reg->pom1_mem_addr_start);
+	writel(app_reg->in_io_addr_start, &app_reg->pom_io_addr_start);
+
+	/*setup registers for inbound translation */
+
+	/* Keep AORAM mapped at BAR0 as default */
+	config->bar0_size = INBOUND_ADDR_MASK + 1;
+	spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, INBOUND_ADDR_MASK);
+	spear_dbi_write_reg(config, PCI_BASE_ADDRESS_0, 4, 0xC);
+	config->va_bar0_address = ioremap(SPEAR13XX_SYSRAM1_BASE,
+			config->bar0_size);
+
+	writel(SPEAR13XX_SYSRAM1_BASE, &app_reg->pim0_mem_addr_start);
+	writel(0, &app_reg->pim1_mem_addr_start);
+	writel(INBOUND_ADDR_MASK + 1, &app_reg->mem0_addr_offset_limit);
+
+	writel(0x0, &app_reg->pim_io_addr_start);
+	writel(0x0, &app_reg->pim_io_addr_start);
+	writel(0x0, &app_reg->pim_rom_addr_start);
+
+	writel(DEVICE_TYPE_EP | (1 << MISCTRL_EN_ID)
+			| ((u32)1 << REG_TRANSLATION_ENABLE),
+			&app_reg->app_ctrl_0);
+	/* disable all rx interrupts */
+	writel(0, &app_reg->int_mask);
+
+	/* Select INTA as default*/
+	spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1);
+}
+
+static int spear_pcie_gadget_probe(struct platform_device *pdev)
+{
+	struct resource *res0, *res1;
+	unsigned int status = 0;
+	int irq;
+	struct clk *clk;
+	static struct pcie_gadget_target *target;
+	struct spear_pcie_gadget_config *config;
+	struct config_item		*cg_item;
+	struct configfs_subsystem *subsys;
+
+	target = devm_kzalloc(&pdev->dev, sizeof(*target), GFP_KERNEL);
+	if (!target) {
+		dev_err(&pdev->dev, "out of memory\n");
+		return -ENOMEM;
+	}
+
+	cg_item = &target->subsys.su_group.cg_item;
+	sprintf(cg_item->ci_namebuf, "pcie_gadget.%d", pdev->id);
+	cg_item->ci_type	= &pcie_gadget_target_type;
+	config = &target->config;
+
+	/* get resource for application registers*/
+	res0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	config->va_app_base = devm_ioremap_resource(&pdev->dev, res0);
+	if (IS_ERR(config->va_app_base)) {
+		dev_err(&pdev->dev, "ioremap fail\n");
+		return PTR_ERR(config->va_app_base);
+	}
+
+	/* get resource for dbi registers*/
+	res1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	config->base = (void __iomem *)res1->start;
+
+	config->va_dbi_base = devm_ioremap_resource(&pdev->dev, res1);
+	if (IS_ERR(config->va_dbi_base)) {
+		dev_err(&pdev->dev, "ioremap fail\n");
+		return PTR_ERR(config->va_dbi_base);
+	}
+
+	platform_set_drvdata(pdev, target);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no update irq?\n");
+		return irq;
+	}
+
+	status = devm_request_irq(&pdev->dev, irq, spear_pcie_gadget_irq,
+				  0, pdev->name, NULL);
+	if (status) {
+		dev_err(&pdev->dev,
+			"pcie gadget interrupt IRQ%d already claimed\n", irq);
+		return status;
+	}
+
+	/* Register configfs hooks */
+	subsys = &target->subsys;
+	config_group_init(&subsys->su_group);
+	mutex_init(&subsys->su_mutex);
+	status = configfs_register_subsystem(subsys);
+	if (status)
+		return status;
+
+	/*
+	 * init basic pcie application registers
+	 * do not enable clock if it is PCIE0.Ideally , all controller should
+	 * have been independent from others with respect to clock. But PCIE1
+	 * and 2 depends on PCIE0.So PCIE0 clk is provided during board init.
+	 */
+	if (pdev->id == 1) {
+		/*
+		 * Ideally CFG Clock should have been also enabled here. But
+		 * it is done currently during board init routne
+		 */
+		clk = clk_get_sys("pcie1", NULL);
+		if (IS_ERR(clk)) {
+			pr_err("%s:couldn't get clk for pcie1\n", __func__);
+			return PTR_ERR(clk);
+		}
+		status = clk_enable(clk);
+		if (status) {
+			pr_err("%s:couldn't enable clk for pcie1\n", __func__);
+			return status;
+		}
+	} else if (pdev->id == 2) {
+		/*
+		 * Ideally CFG Clock should have been also enabled here. But
+		 * it is done currently during board init routne
+		 */
+		clk = clk_get_sys("pcie2", NULL);
+		if (IS_ERR(clk)) {
+			pr_err("%s:couldn't get clk for pcie2\n", __func__);
+			return PTR_ERR(clk);
+		}
+		status = clk_enable(clk);
+		if (status) {
+			pr_err("%s:couldn't enable clk for pcie2\n", __func__);
+			return status;
+		}
+	}
+	spear13xx_pcie_device_init(config);
+
+	return 0;
+}
+
+static int spear_pcie_gadget_remove(struct platform_device *pdev)
+{
+	static struct pcie_gadget_target *target;
+
+	target = platform_get_drvdata(pdev);
+
+	configfs_unregister_subsystem(&target->subsys);
+
+	return 0;
+}
+
+static void spear_pcie_gadget_shutdown(struct platform_device *pdev)
+{
+}
+
+static struct platform_driver spear_pcie_gadget_driver = {
+	.probe = spear_pcie_gadget_probe,
+	.remove = spear_pcie_gadget_remove,
+	.shutdown = spear_pcie_gadget_shutdown,
+	.driver = {
+		.name = "pcie-gadget-spear",
+		.bus = &platform_bus_type
+	},
+};
+
+module_platform_driver(spear_pcie_gadget_driver);
+
+MODULE_ALIAS("platform:pcie-gadget-spear");
+MODULE_AUTHOR("Pratyush Anand");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c
new file mode 100644
index 000000000..426ad912b
--- /dev/null
+++ b/drivers/misc/sram-exec.c
@@ -0,0 +1,119 @@
+/*
+ * SRAM protect-exec region helper functions
+ *
+ * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/
+ *	Dave Gerlach
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/genalloc.h>
+#include <linux/mm.h>
+#include <linux/sram.h>
+
+#include <asm/fncpy.h>
+#include <asm/set_memory.h>
+
+#include "sram.h"
+
+static DEFINE_MUTEX(exec_pool_list_mutex);
+static LIST_HEAD(exec_pool_list);
+
+int sram_check_protect_exec(struct sram_dev *sram, struct sram_reserve *block,
+			    struct sram_partition *part)
+{
+	unsigned long base = (unsigned long)part->base;
+	unsigned long end = base + block->size;
+
+	if (!PAGE_ALIGNED(base) || !PAGE_ALIGNED(end)) {
+		dev_err(sram->dev,
+			"SRAM pool marked with 'protect-exec' is not page aligned and will not be created.\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int sram_add_protect_exec(struct sram_partition *part)
+{
+	mutex_lock(&exec_pool_list_mutex);
+	list_add_tail(&part->list, &exec_pool_list);
+	mutex_unlock(&exec_pool_list_mutex);
+
+	return 0;
+}
+
+/**
+ * sram_exec_copy - copy data to a protected executable region of sram
+ *
+ * @pool: struct gen_pool retrieved that is part of this sram
+ * @dst: Destination address for the copy, that must be inside pool
+ * @src: Source address for the data to copy
+ * @size: Size of copy to perform, which starting from dst, must reside in pool
+ *
+ * Return: Address for copied data that can safely be called through function
+ *	   pointer, or NULL if problem.
+ *
+ * This helper function allows sram driver to act as central control location
+ * of 'protect-exec' pools which are normal sram pools but are always set
+ * read-only and executable except when copying data to them, at which point
+ * they are set to read-write non-executable, to make sure no memory is
+ * writeable and executable at the same time. This region must be page-aligned
+ * and is checked during probe, otherwise page attribute manipulation would
+ * not be possible. Care must be taken to only call the returned address as
+ * dst address is not guaranteed to be safely callable.
+ *
+ * NOTE: This function uses the fncpy macro to move code to the executable
+ * region. Some architectures have strict requirements for relocating
+ * executable code, so fncpy is a macro that must be defined by any arch
+ * making use of this functionality that guarantees a safe copy of exec
+ * data and returns a safe address that can be called as a C function
+ * pointer.
+ */
+void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
+		     size_t size)
+{
+	struct sram_partition *part = NULL, *p;
+	unsigned long base;
+	int pages;
+	void *dst_cpy;
+
+	mutex_lock(&exec_pool_list_mutex);
+	list_for_each_entry(p, &exec_pool_list, list) {
+		if (p->pool == pool)
+			part = p;
+	}
+	mutex_unlock(&exec_pool_list_mutex);
+
+	if (!part)
+		return NULL;
+
+	if (!addr_in_gen_pool(pool, (unsigned long)dst, size))
+		return NULL;
+
+	base = (unsigned long)part->base;
+	pages = PAGE_ALIGN(size) / PAGE_SIZE;
+
+	mutex_lock(&part->lock);
+
+	set_memory_nx((unsigned long)base, pages);
+	set_memory_rw((unsigned long)base, pages);
+
+	dst_cpy = fncpy(dst, src, size);
+
+	set_memory_ro((unsigned long)base, pages);
+	set_memory_x((unsigned long)base, pages);
+
+	mutex_unlock(&part->lock);
+
+	return dst_cpy;
+}
+EXPORT_SYMBOL_GPL(sram_exec_copy);
diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c
new file mode 100644
index 000000000..74b183baf
--- /dev/null
+++ b/drivers/misc/sram.c
@@ -0,0 +1,456 @@
+/*
+ * Generic on-chip SRAM allocation driver
+ *
+ * Copyright (C) 2012 Philipp Zabel, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/genalloc.h>
+#include <linux/io.h>
+#include <linux/list_sort.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/mfd/syscon.h>
+#include <soc/at91/atmel-secumod.h>
+
+#include "sram.h"
+
+#define SRAM_GRANULARITY	32
+
+static ssize_t sram_read(struct file *filp, struct kobject *kobj,
+			 struct bin_attribute *attr,
+			 char *buf, loff_t pos, size_t count)
+{
+	struct sram_partition *part;
+
+	part = container_of(attr, struct sram_partition, battr);
+
+	mutex_lock(&part->lock);
+	memcpy_fromio(buf, part->base + pos, count);
+	mutex_unlock(&part->lock);
+
+	return count;
+}
+
+static ssize_t sram_write(struct file *filp, struct kobject *kobj,
+			  struct bin_attribute *attr,
+			  char *buf, loff_t pos, size_t count)
+{
+	struct sram_partition *part;
+
+	part = container_of(attr, struct sram_partition, battr);
+
+	mutex_lock(&part->lock);
+	memcpy_toio(part->base + pos, buf, count);
+	mutex_unlock(&part->lock);
+
+	return count;
+}
+
+static int sram_add_pool(struct sram_dev *sram, struct sram_reserve *block,
+			 phys_addr_t start, struct sram_partition *part)
+{
+	int ret;
+
+	part->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
+					  NUMA_NO_NODE, block->label);
+	if (IS_ERR(part->pool))
+		return PTR_ERR(part->pool);
+
+	ret = gen_pool_add_virt(part->pool, (unsigned long)part->base, start,
+				block->size, NUMA_NO_NODE);
+	if (ret < 0) {
+		dev_err(sram->dev, "failed to register subpool: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int sram_add_export(struct sram_dev *sram, struct sram_reserve *block,
+			   phys_addr_t start, struct sram_partition *part)
+{
+	sysfs_bin_attr_init(&part->battr);
+	part->battr.attr.name = devm_kasprintf(sram->dev, GFP_KERNEL,
+					       "%llx.sram",
+					       (unsigned long long)start);
+	if (!part->battr.attr.name)
+		return -ENOMEM;
+
+	part->battr.attr.mode = S_IRUSR | S_IWUSR;
+	part->battr.read = sram_read;
+	part->battr.write = sram_write;
+	part->battr.size = block->size;
+
+	return device_create_bin_file(sram->dev, &part->battr);
+}
+
+static int sram_add_partition(struct sram_dev *sram, struct sram_reserve *block,
+			      phys_addr_t start)
+{
+	int ret;
+	struct sram_partition *part = &sram->partition[sram->partitions];
+
+	mutex_init(&part->lock);
+	part->base = sram->virt_base + block->start;
+
+	if (block->pool) {
+		ret = sram_add_pool(sram, block, start, part);
+		if (ret)
+			return ret;
+	}
+	if (block->export) {
+		ret = sram_add_export(sram, block, start, part);
+		if (ret)
+			return ret;
+	}
+	if (block->protect_exec) {
+		ret = sram_check_protect_exec(sram, block, part);
+		if (ret)
+			return ret;
+
+		ret = sram_add_pool(sram, block, start, part);
+		if (ret)
+			return ret;
+
+		sram_add_protect_exec(part);
+	}
+
+	sram->partitions++;
+
+	return 0;
+}
+
+static void sram_free_partitions(struct sram_dev *sram)
+{
+	struct sram_partition *part;
+
+	if (!sram->partitions)
+		return;
+
+	part = &sram->partition[sram->partitions - 1];
+	for (; sram->partitions; sram->partitions--, part--) {
+		if (part->battr.size)
+			device_remove_bin_file(sram->dev, &part->battr);
+
+		if (part->pool &&
+		    gen_pool_avail(part->pool) < gen_pool_size(part->pool))
+			dev_err(sram->dev, "removed pool while SRAM allocated\n");
+	}
+}
+
+static int sram_reserve_cmp(void *priv, struct list_head *a,
+					struct list_head *b)
+{
+	struct sram_reserve *ra = list_entry(a, struct sram_reserve, list);
+	struct sram_reserve *rb = list_entry(b, struct sram_reserve, list);
+
+	return ra->start - rb->start;
+}
+
+static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
+{
+	struct device_node *np = sram->dev->of_node, *child;
+	unsigned long size, cur_start, cur_size;
+	struct sram_reserve *rblocks, *block;
+	struct list_head reserve_list;
+	unsigned int nblocks, exports = 0;
+	const char *label;
+	int ret = 0;
+
+	INIT_LIST_HEAD(&reserve_list);
+
+	size = resource_size(res);
+
+	/*
+	 * We need an additional block to mark the end of the memory region
+	 * after the reserved blocks from the dt are processed.
+	 */
+	nblocks = (np) ? of_get_available_child_count(np) + 1 : 1;
+	rblocks = kcalloc(nblocks, sizeof(*rblocks), GFP_KERNEL);
+	if (!rblocks)
+		return -ENOMEM;
+
+	block = &rblocks[0];
+	for_each_available_child_of_node(np, child) {
+		struct resource child_res;
+
+		ret = of_address_to_resource(child, 0, &child_res);
+		if (ret < 0) {
+			dev_err(sram->dev,
+				"could not get address for node %pOF\n",
+				child);
+			goto err_chunks;
+		}
+
+		if (child_res.start < res->start || child_res.end > res->end) {
+			dev_err(sram->dev,
+				"reserved block %pOF outside the sram area\n",
+				child);
+			ret = -EINVAL;
+			goto err_chunks;
+		}
+
+		block->start = child_res.start - res->start;
+		block->size = resource_size(&child_res);
+		list_add_tail(&block->list, &reserve_list);
+
+		if (of_find_property(child, "export", NULL))
+			block->export = true;
+
+		if (of_find_property(child, "pool", NULL))
+			block->pool = true;
+
+		if (of_find_property(child, "protect-exec", NULL))
+			block->protect_exec = true;
+
+		if ((block->export || block->pool || block->protect_exec) &&
+		    block->size) {
+			exports++;
+
+			label = NULL;
+			ret = of_property_read_string(child, "label", &label);
+			if (ret && ret != -EINVAL) {
+				dev_err(sram->dev,
+					"%pOF has invalid label name\n",
+					child);
+				goto err_chunks;
+			}
+			if (!label)
+				label = child->name;
+
+			block->label = devm_kstrdup(sram->dev,
+						    label, GFP_KERNEL);
+			if (!block->label) {
+				ret = -ENOMEM;
+				goto err_chunks;
+			}
+
+			dev_dbg(sram->dev, "found %sblock '%s' 0x%x-0x%x\n",
+				block->export ? "exported " : "", block->label,
+				block->start, block->start + block->size);
+		} else {
+			dev_dbg(sram->dev, "found reserved block 0x%x-0x%x\n",
+				block->start, block->start + block->size);
+		}
+
+		block++;
+	}
+	child = NULL;
+
+	/* the last chunk marks the end of the region */
+	rblocks[nblocks - 1].start = size;
+	rblocks[nblocks - 1].size = 0;
+	list_add_tail(&rblocks[nblocks - 1].list, &reserve_list);
+
+	list_sort(NULL, &reserve_list, sram_reserve_cmp);
+
+	if (exports) {
+		sram->partition = devm_kcalloc(sram->dev,
+				       exports, sizeof(*sram->partition),
+				       GFP_KERNEL);
+		if (!sram->partition) {
+			ret = -ENOMEM;
+			goto err_chunks;
+		}
+	}
+
+	cur_start = 0;
+	list_for_each_entry(block, &reserve_list, list) {
+		/* can only happen if sections overlap */
+		if (block->start < cur_start) {
+			dev_err(sram->dev,
+				"block at 0x%x starts after current offset 0x%lx\n",
+				block->start, cur_start);
+			ret = -EINVAL;
+			sram_free_partitions(sram);
+			goto err_chunks;
+		}
+
+		if ((block->export || block->pool || block->protect_exec) &&
+		    block->size) {
+			ret = sram_add_partition(sram, block,
+						 res->start + block->start);
+			if (ret) {
+				sram_free_partitions(sram);
+				goto err_chunks;
+			}
+		}
+
+		/* current start is in a reserved block, so continue after it */
+		if (block->start == cur_start) {
+			cur_start = block->start + block->size;
+			continue;
+		}
+
+		/*
+		 * allocate the space between the current starting
+		 * address and the following reserved block, or the
+		 * end of the region.
+		 */
+		cur_size = block->start - cur_start;
+
+		dev_dbg(sram->dev, "adding chunk 0x%lx-0x%lx\n",
+			cur_start, cur_start + cur_size);
+
+		ret = gen_pool_add_virt(sram->pool,
+				(unsigned long)sram->virt_base + cur_start,
+				res->start + cur_start, cur_size, -1);
+		if (ret < 0) {
+			sram_free_partitions(sram);
+			goto err_chunks;
+		}
+
+		/* next allocation after this reserved block */
+		cur_start = block->start + block->size;
+	}
+
+ err_chunks:
+	if (child)
+		of_node_put(child);
+
+	kfree(rblocks);
+
+	return ret;
+}
+
+static int atmel_securam_wait(void)
+{
+	struct regmap *regmap;
+	u32 val;
+
+	regmap = syscon_regmap_lookup_by_compatible("atmel,sama5d2-secumod");
+	if (IS_ERR(regmap))
+		return -ENODEV;
+
+	return regmap_read_poll_timeout(regmap, AT91_SECUMOD_RAMRDY, val,
+					val & AT91_SECUMOD_RAMRDY_READY,
+					10000, 500000);
+}
+
+static const struct of_device_id sram_dt_ids[] = {
+	{ .compatible = "mmio-sram" },
+	{ .compatible = "atmel,sama5d2-securam", .data = atmel_securam_wait },
+	{}
+};
+
+static int sram_probe(struct platform_device *pdev)
+{
+	struct sram_dev *sram;
+	struct resource *res;
+	size_t size;
+	int ret;
+	int (*init_func)(void);
+
+	sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL);
+	if (!sram)
+		return -ENOMEM;
+
+	sram->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(sram->dev, "found no memory resource\n");
+		return -EINVAL;
+	}
+
+	size = resource_size(res);
+
+	if (!devm_request_mem_region(sram->dev, res->start, size, pdev->name)) {
+		dev_err(sram->dev, "could not request region for resource\n");
+		return -EBUSY;
+	}
+
+	if (of_property_read_bool(pdev->dev.of_node, "no-memory-wc"))
+		sram->virt_base = devm_ioremap(sram->dev, res->start, size);
+	else
+		sram->virt_base = devm_ioremap_wc(sram->dev, res->start, size);
+	if (!sram->virt_base)
+		return -ENOMEM;
+
+	sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
+					  NUMA_NO_NODE, NULL);
+	if (IS_ERR(sram->pool))
+		return PTR_ERR(sram->pool);
+
+	sram->clk = devm_clk_get(sram->dev, NULL);
+	if (IS_ERR(sram->clk))
+		sram->clk = NULL;
+	else
+		clk_prepare_enable(sram->clk);
+
+	ret = sram_reserve_regions(sram, res);
+	if (ret)
+		goto err_disable_clk;
+
+	platform_set_drvdata(pdev, sram);
+
+	init_func = of_device_get_match_data(&pdev->dev);
+	if (init_func) {
+		ret = init_func();
+		if (ret)
+			goto err_free_partitions;
+	}
+
+	dev_dbg(sram->dev, "SRAM pool: %zu KiB @ 0x%p\n",
+		gen_pool_size(sram->pool) / 1024, sram->virt_base);
+
+	return 0;
+
+err_free_partitions:
+	sram_free_partitions(sram);
+err_disable_clk:
+	if (sram->clk)
+		clk_disable_unprepare(sram->clk);
+
+	return ret;
+}
+
+static int sram_remove(struct platform_device *pdev)
+{
+	struct sram_dev *sram = platform_get_drvdata(pdev);
+
+	sram_free_partitions(sram);
+
+	if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool))
+		dev_err(sram->dev, "removed while SRAM allocated\n");
+
+	if (sram->clk)
+		clk_disable_unprepare(sram->clk);
+
+	return 0;
+}
+
+static struct platform_driver sram_driver = {
+	.driver = {
+		.name = "sram",
+		.of_match_table = sram_dt_ids,
+	},
+	.probe = sram_probe,
+	.remove = sram_remove,
+};
+
+static int __init sram_init(void)
+{
+	return platform_driver_register(&sram_driver);
+}
+
+postcore_initcall(sram_init);
diff --git a/drivers/misc/sram.h b/drivers/misc/sram.h
new file mode 100644
index 000000000..c181ce4c8
--- /dev/null
+++ b/drivers/misc/sram.h
@@ -0,0 +1,58 @@
+/*
+ * Defines for the SRAM driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __SRAM_H
+#define __SRAM_H
+
+struct sram_partition {
+	void __iomem *base;
+
+	struct gen_pool *pool;
+	struct bin_attribute battr;
+	struct mutex lock;
+	struct list_head list;
+};
+
+struct sram_dev {
+	struct device *dev;
+	void __iomem *virt_base;
+
+	struct gen_pool *pool;
+	struct clk *clk;
+
+	struct sram_partition *partition;
+	u32 partitions;
+};
+
+struct sram_reserve {
+	struct list_head list;
+	u32 start;
+	u32 size;
+	bool export;
+	bool pool;
+	bool protect_exec;
+	const char *label;
+};
+
+#ifdef CONFIG_SRAM_EXEC
+int sram_check_protect_exec(struct sram_dev *sram, struct sram_reserve *block,
+			    struct sram_partition *part);
+int sram_add_protect_exec(struct sram_partition *part);
+#else
+static inline int sram_check_protect_exec(struct sram_dev *sram,
+					  struct sram_reserve *block,
+					  struct sram_partition *part)
+{
+	return -ENODEV;
+}
+
+static inline int sram_add_protect_exec(struct sram_partition *part)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_SRAM_EXEC */
+#endif /* __SRAM_H */
diff --git a/drivers/misc/ti-st/Kconfig b/drivers/misc/ti-st/Kconfig
new file mode 100644
index 000000000..5bb92698b
--- /dev/null
+++ b/drivers/misc/ti-st/Kconfig
@@ -0,0 +1,18 @@
+#
+# TI's shared transport line discipline and the protocol
+# drivers (BT, FM and GPS)
+#
+menu "Texas Instruments shared transport line discipline"
+config TI_ST
+	tristate "Shared transport core driver"
+	depends on NET && TTY
+	depends on GPIOLIB || COMPILE_TEST
+	select FW_LOADER
+	help
+	  This enables the shared transport core driver for TI
+	  BT / FM and GPS combo chips. This enables protocol drivers
+	  to register themselves with core and send data, the responses
+	  are returned to relevant protocol drivers based on their
+	  packet types.
+
+endmenu
diff --git a/drivers/misc/ti-st/Makefile b/drivers/misc/ti-st/Makefile
new file mode 100644
index 000000000..78d7ebb14
--- /dev/null
+++ b/drivers/misc/ti-st/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for TI's shared transport line discipline
+# and its protocol drivers (BT, FM, GPS)
+#
+obj-$(CONFIG_TI_ST) 		+= st_drv.o
+st_drv-objs			:= st_core.o st_kim.o st_ll.o
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
new file mode 100644
index 000000000..eda8d407b
--- /dev/null
+++ b/drivers/misc/ti-st/st_core.c
@@ -0,0 +1,922 @@
+/*
+ *  Shared Transport Line discipline driver Core
+ *	This hooks up ST KIM driver and ST LL driver
+ *  Copyright (C) 2009-2010 Texas Instruments
+ *  Author: Pavan Savoy <pavan_savoy@ti.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define pr_fmt(fmt)	"(stc): " fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/tty.h>
+
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+
+#include <linux/ti_wilink_st.h>
+
+extern void st_kim_recv(void *, const unsigned char *, long);
+void st_int_recv(void *, const unsigned char *, long);
+/* function pointer pointing to either,
+ * st_kim_recv during registration to receive fw download responses
+ * st_int_recv after registration to receive proto stack responses
+ */
+static void (*st_recv) (void *, const unsigned char *, long);
+
+/********************************************************************/
+static void add_channel_to_table(struct st_data_s *st_gdata,
+		struct st_proto_s *new_proto)
+{
+	pr_info("%s: id %d\n", __func__, new_proto->chnl_id);
+	/* list now has the channel id as index itself */
+	st_gdata->list[new_proto->chnl_id] = new_proto;
+	st_gdata->is_registered[new_proto->chnl_id] = true;
+}
+
+static void remove_channel_from_table(struct st_data_s *st_gdata,
+		struct st_proto_s *proto)
+{
+	pr_info("%s: id %d\n", __func__, proto->chnl_id);
+/*	st_gdata->list[proto->chnl_id] = NULL; */
+	st_gdata->is_registered[proto->chnl_id] = false;
+}
+
+/*
+ * called from KIM during firmware download.
+ *
+ * This is a wrapper function to tty->ops->write_room.
+ * It returns number of free space available in
+ * uart tx buffer.
+ */
+int st_get_uart_wr_room(struct st_data_s *st_gdata)
+{
+	struct tty_struct *tty;
+	if (unlikely(st_gdata == NULL || st_gdata->tty == NULL)) {
+		pr_err("tty unavailable to perform write");
+		return -1;
+	}
+	tty = st_gdata->tty;
+	return tty->ops->write_room(tty);
+}
+
+/* can be called in from
+ * -- KIM (during fw download)
+ * -- ST Core (during st_write)
+ *
+ *  This is the internal write function - a wrapper
+ *  to tty->ops->write
+ */
+int st_int_write(struct st_data_s *st_gdata,
+	const unsigned char *data, int count)
+{
+	struct tty_struct *tty;
+	if (unlikely(st_gdata == NULL || st_gdata->tty == NULL)) {
+		pr_err("tty unavailable to perform write");
+		return -EINVAL;
+	}
+	tty = st_gdata->tty;
+#ifdef VERBOSE
+	print_hex_dump(KERN_DEBUG, "<out<", DUMP_PREFIX_NONE,
+		16, 1, data, count, 0);
+#endif
+	return tty->ops->write(tty, data, count);
+
+}
+
+/*
+ * push the skb received to relevant
+ * protocol stacks
+ */
+static void st_send_frame(unsigned char chnl_id, struct st_data_s *st_gdata)
+{
+	pr_debug(" %s(prot:%d) ", __func__, chnl_id);
+
+	if (unlikely
+	    (st_gdata == NULL || st_gdata->rx_skb == NULL
+	     || st_gdata->is_registered[chnl_id] == false)) {
+		pr_err("chnl_id %d not registered, no data to send?",
+			   chnl_id);
+		kfree_skb(st_gdata->rx_skb);
+		return;
+	}
+	/* this cannot fail
+	 * this shouldn't take long
+	 * - should be just skb_queue_tail for the
+	 *   protocol stack driver
+	 */
+	if (likely(st_gdata->list[chnl_id]->recv != NULL)) {
+		if (unlikely
+			(st_gdata->list[chnl_id]->recv
+			(st_gdata->list[chnl_id]->priv_data, st_gdata->rx_skb)
+			     != 0)) {
+			pr_err(" proto stack %d's ->recv failed", chnl_id);
+			kfree_skb(st_gdata->rx_skb);
+			return;
+		}
+	} else {
+		pr_err(" proto stack %d's ->recv null", chnl_id);
+		kfree_skb(st_gdata->rx_skb);
+	}
+	return;
+}
+
+/**
+ * st_reg_complete -
+ * to call registration complete callbacks
+ * of all protocol stack drivers
+ * This function is being called with spin lock held, protocol drivers are
+ * only expected to complete their waits and do nothing more than that.
+ */
+static void st_reg_complete(struct st_data_s *st_gdata, int err)
+{
+	unsigned char i = 0;
+	pr_info(" %s ", __func__);
+	for (i = 0; i < ST_MAX_CHANNELS; i++) {
+		if (likely(st_gdata != NULL &&
+			st_gdata->is_registered[i] == true &&
+				st_gdata->list[i]->reg_complete_cb != NULL)) {
+			st_gdata->list[i]->reg_complete_cb
+				(st_gdata->list[i]->priv_data, err);
+			pr_info("protocol %d's cb sent %d\n", i, err);
+			if (err) { /* cleanup registered protocol */
+				st_gdata->is_registered[i] = false;
+				if (st_gdata->protos_registered)
+					st_gdata->protos_registered--;
+			}
+		}
+	}
+}
+
+static inline int st_check_data_len(struct st_data_s *st_gdata,
+	unsigned char chnl_id, int len)
+{
+	int room = skb_tailroom(st_gdata->rx_skb);
+
+	pr_debug("len %d room %d", len, room);
+
+	if (!len) {
+		/* Received packet has only packet header and
+		 * has zero length payload. So, ask ST CORE to
+		 * forward the packet to protocol driver (BT/FM/GPS)
+		 */
+		st_send_frame(chnl_id, st_gdata);
+
+	} else if (len > room) {
+		/* Received packet's payload length is larger.
+		 * We can't accommodate it in created skb.
+		 */
+		pr_err("Data length is too large len %d room %d", len,
+			   room);
+		kfree_skb(st_gdata->rx_skb);
+	} else {
+		/* Packet header has non-zero payload length and
+		 * we have enough space in created skb. Lets read
+		 * payload data */
+		st_gdata->rx_state = ST_W4_DATA;
+		st_gdata->rx_count = len;
+		return len;
+	}
+
+	/* Change ST state to continue to process next
+	 * packet */
+	st_gdata->rx_state = ST_W4_PACKET_TYPE;
+	st_gdata->rx_skb = NULL;
+	st_gdata->rx_count = 0;
+	st_gdata->rx_chnl = 0;
+
+	return 0;
+}
+
+/**
+ * st_wakeup_ack - internal function for action when wake-up ack
+ *	received
+ */
+static inline void st_wakeup_ack(struct st_data_s *st_gdata,
+	unsigned char cmd)
+{
+	struct sk_buff *waiting_skb;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&st_gdata->lock, flags);
+	/* de-Q from waitQ and Q in txQ now that the
+	 * chip is awake
+	 */
+	while ((waiting_skb = skb_dequeue(&st_gdata->tx_waitq)))
+		skb_queue_tail(&st_gdata->txq, waiting_skb);
+
+	/* state forwarded to ST LL */
+	st_ll_sleep_state(st_gdata, (unsigned long)cmd);
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+	/* wake up to send the recently copied skbs from waitQ */
+	st_tx_wakeup(st_gdata);
+}
+
+/**
+ * st_int_recv - ST's internal receive function.
+ *	Decodes received RAW data and forwards to corresponding
+ *	client drivers (Bluetooth,FM,GPS..etc).
+ *	This can receive various types of packets,
+ *	HCI-Events, ACL, SCO, 4 types of HCI-LL PM packets
+ *	CH-8 packets from FM, CH-9 packets from GPS cores.
+ */
+void st_int_recv(void *disc_data,
+	const unsigned char *data, long count)
+{
+	char *ptr;
+	struct st_proto_s *proto;
+	unsigned short payload_len = 0;
+	int len = 0;
+	unsigned char type = 0;
+	unsigned char *plen;
+	struct st_data_s *st_gdata = (struct st_data_s *)disc_data;
+	unsigned long flags;
+
+	ptr = (char *)data;
+	/* tty_receive sent null ? */
+	if (unlikely(ptr == NULL) || (st_gdata == NULL)) {
+		pr_err(" received null from TTY ");
+		return;
+	}
+
+	pr_debug("count %ld rx_state %ld"
+		   "rx_count %ld", count, st_gdata->rx_state,
+		   st_gdata->rx_count);
+
+	spin_lock_irqsave(&st_gdata->lock, flags);
+	/* Decode received bytes here */
+	while (count) {
+		if (st_gdata->rx_count) {
+			len = min_t(unsigned int, st_gdata->rx_count, count);
+			skb_put_data(st_gdata->rx_skb, ptr, len);
+			st_gdata->rx_count -= len;
+			count -= len;
+			ptr += len;
+
+			if (st_gdata->rx_count)
+				continue;
+
+			/* Check ST RX state machine , where are we? */
+			switch (st_gdata->rx_state) {
+			/* Waiting for complete packet ? */
+			case ST_W4_DATA:
+				pr_debug("Complete pkt received");
+				/* Ask ST CORE to forward
+				 * the packet to protocol driver */
+				st_send_frame(st_gdata->rx_chnl, st_gdata);
+
+				st_gdata->rx_state = ST_W4_PACKET_TYPE;
+				st_gdata->rx_skb = NULL;
+				continue;
+			/* parse the header to know details */
+			case ST_W4_HEADER:
+				proto = st_gdata->list[st_gdata->rx_chnl];
+				plen =
+				&st_gdata->rx_skb->data
+				[proto->offset_len_in_hdr];
+				pr_debug("plen pointing to %x\n", *plen);
+				if (proto->len_size == 1)/* 1 byte len field */
+					payload_len = *(unsigned char *)plen;
+				else if (proto->len_size == 2)
+					payload_len =
+					__le16_to_cpu(*(unsigned short *)plen);
+				else
+					pr_info("%s: invalid length "
+					"for id %d\n",
+					__func__, proto->chnl_id);
+				st_check_data_len(st_gdata, proto->chnl_id,
+						payload_len);
+				pr_debug("off %d, pay len %d\n",
+					proto->offset_len_in_hdr, payload_len);
+				continue;
+			}	/* end of switch rx_state */
+		}
+
+		/* end of if rx_count */
+		/* Check first byte of packet and identify module
+		 * owner (BT/FM/GPS) */
+		switch (*ptr) {
+		case LL_SLEEP_IND:
+		case LL_SLEEP_ACK:
+		case LL_WAKE_UP_IND:
+			pr_debug("PM packet");
+			/* this takes appropriate action based on
+			 * sleep state received --
+			 */
+			st_ll_sleep_state(st_gdata, *ptr);
+			/* if WAKEUP_IND collides copy from waitq to txq
+			 * and assume chip awake
+			 */
+			spin_unlock_irqrestore(&st_gdata->lock, flags);
+			if (st_ll_getstate(st_gdata) == ST_LL_AWAKE)
+				st_wakeup_ack(st_gdata, LL_WAKE_UP_ACK);
+			spin_lock_irqsave(&st_gdata->lock, flags);
+
+			ptr++;
+			count--;
+			continue;
+		case LL_WAKE_UP_ACK:
+			pr_debug("PM packet");
+
+			spin_unlock_irqrestore(&st_gdata->lock, flags);
+			/* wake up ack received */
+			st_wakeup_ack(st_gdata, *ptr);
+			spin_lock_irqsave(&st_gdata->lock, flags);
+
+			ptr++;
+			count--;
+			continue;
+			/* Unknow packet? */
+		default:
+			type = *ptr;
+
+			/* Default case means non-HCILL packets,
+			 * possibilities are packets for:
+			 * (a) valid protocol -  Supported Protocols within
+			 *     the ST_MAX_CHANNELS.
+			 * (b) registered protocol - Checked by
+			 *     "st_gdata->list[type] == NULL)" are supported
+			 *     protocols only.
+			 *  Rules out any invalid protocol and
+			 *  unregistered protocols with channel ID < 16.
+			 */
+
+			if ((type >= ST_MAX_CHANNELS) ||
+					(st_gdata->list[type] == NULL)) {
+				pr_err("chip/interface misbehavior: "
+						"dropping frame starting "
+						"with 0x%02x\n", type);
+				goto done;
+			}
+
+			st_gdata->rx_skb = alloc_skb(
+					st_gdata->list[type]->max_frame_size,
+					GFP_ATOMIC);
+			if (st_gdata->rx_skb == NULL) {
+				pr_err("out of memory: dropping\n");
+				goto done;
+			}
+
+			skb_reserve(st_gdata->rx_skb,
+					st_gdata->list[type]->reserve);
+			/* next 2 required for BT only */
+			st_gdata->rx_skb->cb[0] = type; /*pkt_type*/
+			st_gdata->rx_skb->cb[1] = 0; /*incoming*/
+			st_gdata->rx_chnl = *ptr;
+			st_gdata->rx_state = ST_W4_HEADER;
+			st_gdata->rx_count = st_gdata->list[type]->hdr_len;
+			pr_debug("rx_count %ld\n", st_gdata->rx_count);
+		};
+		ptr++;
+		count--;
+	}
+done:
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+	pr_debug("done %s", __func__);
+	return;
+}
+
+/**
+ * st_int_dequeue - internal de-Q function.
+ *	If the previous data set was not written
+ *	completely, return that skb which has the pending data.
+ *	In normal cases, return top of txq.
+ */
+static struct sk_buff *st_int_dequeue(struct st_data_s *st_gdata)
+{
+	struct sk_buff *returning_skb;
+
+	pr_debug("%s", __func__);
+	if (st_gdata->tx_skb != NULL) {
+		returning_skb = st_gdata->tx_skb;
+		st_gdata->tx_skb = NULL;
+		return returning_skb;
+	}
+	return skb_dequeue(&st_gdata->txq);
+}
+
+/**
+ * st_int_enqueue - internal Q-ing function.
+ *	Will either Q the skb to txq or the tx_waitq
+ *	depending on the ST LL state.
+ *	If the chip is asleep, then Q it onto waitq and
+ *	wakeup the chip.
+ *	txq and waitq needs protection since the other contexts
+ *	may be sending data, waking up chip.
+ */
+static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb)
+{
+	unsigned long flags = 0;
+
+	pr_debug("%s", __func__);
+	spin_lock_irqsave(&st_gdata->lock, flags);
+
+	switch (st_ll_getstate(st_gdata)) {
+	case ST_LL_AWAKE:
+		pr_debug("ST LL is AWAKE, sending normally");
+		skb_queue_tail(&st_gdata->txq, skb);
+		break;
+	case ST_LL_ASLEEP_TO_AWAKE:
+		skb_queue_tail(&st_gdata->tx_waitq, skb);
+		break;
+	case ST_LL_AWAKE_TO_ASLEEP:
+		pr_err("ST LL is illegal state(%ld),"
+			   "purging received skb.", st_ll_getstate(st_gdata));
+		kfree_skb(skb);
+		break;
+	case ST_LL_ASLEEP:
+		skb_queue_tail(&st_gdata->tx_waitq, skb);
+		st_ll_wakeup(st_gdata);
+		break;
+	default:
+		pr_err("ST LL is illegal state(%ld),"
+			   "purging received skb.", st_ll_getstate(st_gdata));
+		kfree_skb(skb);
+		break;
+	}
+
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+	pr_debug("done %s", __func__);
+	return;
+}
+
+/*
+ * internal wakeup function
+ * called from either
+ * - TTY layer when write's finished
+ * - st_write (in context of the protocol stack)
+ */
+static void work_fn_write_wakeup(struct work_struct *work)
+{
+	struct st_data_s *st_gdata = container_of(work, struct st_data_s,
+			work_write_wakeup);
+
+	st_tx_wakeup((void *)st_gdata);
+}
+void st_tx_wakeup(struct st_data_s *st_data)
+{
+	struct sk_buff *skb;
+	unsigned long flags;	/* for irq save flags */
+	pr_debug("%s", __func__);
+	/* check for sending & set flag sending here */
+	if (test_and_set_bit(ST_TX_SENDING, &st_data->tx_state)) {
+		pr_debug("ST already sending");
+		/* keep sending */
+		set_bit(ST_TX_WAKEUP, &st_data->tx_state);
+		return;
+		/* TX_WAKEUP will be checked in another
+		 * context
+		 */
+	}
+	do {			/* come back if st_tx_wakeup is set */
+		/* woke-up to write */
+		clear_bit(ST_TX_WAKEUP, &st_data->tx_state);
+		while ((skb = st_int_dequeue(st_data))) {
+			int len;
+			spin_lock_irqsave(&st_data->lock, flags);
+			/* enable wake-up from TTY */
+			set_bit(TTY_DO_WRITE_WAKEUP, &st_data->tty->flags);
+			len = st_int_write(st_data, skb->data, skb->len);
+			skb_pull(skb, len);
+			/* if skb->len = len as expected, skb->len=0 */
+			if (skb->len) {
+				/* would be the next skb to be sent */
+				st_data->tx_skb = skb;
+				spin_unlock_irqrestore(&st_data->lock, flags);
+				break;
+			}
+			kfree_skb(skb);
+			spin_unlock_irqrestore(&st_data->lock, flags);
+		}
+		/* if wake-up is set in another context- restart sending */
+	} while (test_bit(ST_TX_WAKEUP, &st_data->tx_state));
+
+	/* clear flag sending */
+	clear_bit(ST_TX_SENDING, &st_data->tx_state);
+}
+
+/********************************************************************/
+/* functions called from ST KIM
+*/
+void kim_st_list_protocols(struct st_data_s *st_gdata, void *buf)
+{
+	seq_printf(buf, "[%d]\nBT=%c\nFM=%c\nGPS=%c\n",
+			st_gdata->protos_registered,
+			st_gdata->is_registered[0x04] == true ? 'R' : 'U',
+			st_gdata->is_registered[0x08] == true ? 'R' : 'U',
+			st_gdata->is_registered[0x09] == true ? 'R' : 'U');
+}
+
+/********************************************************************/
+/*
+ * functions called from protocol stack drivers
+ * to be EXPORT-ed
+ */
+long st_register(struct st_proto_s *new_proto)
+{
+	struct st_data_s	*st_gdata;
+	long err = 0;
+	unsigned long flags = 0;
+
+	st_kim_ref(&st_gdata, 0);
+	if (st_gdata == NULL || new_proto == NULL || new_proto->recv == NULL
+	    || new_proto->reg_complete_cb == NULL) {
+		pr_err("gdata/new_proto/recv or reg_complete_cb not ready");
+		return -EINVAL;
+	}
+
+	if (new_proto->chnl_id >= ST_MAX_CHANNELS) {
+		pr_err("chnl_id %d not supported", new_proto->chnl_id);
+		return -EPROTONOSUPPORT;
+	}
+
+	if (st_gdata->is_registered[new_proto->chnl_id] == true) {
+		pr_err("chnl_id %d already registered", new_proto->chnl_id);
+		return -EALREADY;
+	}
+
+	/* can be from process context only */
+	spin_lock_irqsave(&st_gdata->lock, flags);
+
+	if (test_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state)) {
+		pr_info(" ST_REG_IN_PROGRESS:%d ", new_proto->chnl_id);
+		/* fw download in progress */
+
+		add_channel_to_table(st_gdata, new_proto);
+		st_gdata->protos_registered++;
+		new_proto->write = st_write;
+
+		set_bit(ST_REG_PENDING, &st_gdata->st_state);
+		spin_unlock_irqrestore(&st_gdata->lock, flags);
+		return -EINPROGRESS;
+	} else if (st_gdata->protos_registered == ST_EMPTY) {
+		pr_info(" chnl_id list empty :%d ", new_proto->chnl_id);
+		set_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state);
+		st_recv = st_kim_recv;
+
+		/* enable the ST LL - to set default chip state */
+		st_ll_enable(st_gdata);
+
+		/* release lock previously held - re-locked below */
+		spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+		/* this may take a while to complete
+		 * since it involves BT fw download
+		 */
+		err = st_kim_start(st_gdata->kim_data);
+		if (err != 0) {
+			clear_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state);
+			if ((st_gdata->protos_registered != ST_EMPTY) &&
+			    (test_bit(ST_REG_PENDING, &st_gdata->st_state))) {
+				pr_err(" KIM failure complete callback ");
+				spin_lock_irqsave(&st_gdata->lock, flags);
+				st_reg_complete(st_gdata, err);
+				spin_unlock_irqrestore(&st_gdata->lock, flags);
+				clear_bit(ST_REG_PENDING, &st_gdata->st_state);
+			}
+			return -EINVAL;
+		}
+
+		spin_lock_irqsave(&st_gdata->lock, flags);
+
+		clear_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state);
+		st_recv = st_int_recv;
+
+		/* this is where all pending registration
+		 * are signalled to be complete by calling callback functions
+		 */
+		if ((st_gdata->protos_registered != ST_EMPTY) &&
+		    (test_bit(ST_REG_PENDING, &st_gdata->st_state))) {
+			pr_debug(" call reg complete callback ");
+			st_reg_complete(st_gdata, 0);
+		}
+		clear_bit(ST_REG_PENDING, &st_gdata->st_state);
+
+		/* check for already registered once more,
+		 * since the above check is old
+		 */
+		if (st_gdata->is_registered[new_proto->chnl_id] == true) {
+			pr_err(" proto %d already registered ",
+				   new_proto->chnl_id);
+			spin_unlock_irqrestore(&st_gdata->lock, flags);
+			return -EALREADY;
+		}
+
+		add_channel_to_table(st_gdata, new_proto);
+		st_gdata->protos_registered++;
+		new_proto->write = st_write;
+		spin_unlock_irqrestore(&st_gdata->lock, flags);
+		return err;
+	}
+	/* if fw is already downloaded & new stack registers protocol */
+	else {
+		add_channel_to_table(st_gdata, new_proto);
+		st_gdata->protos_registered++;
+		new_proto->write = st_write;
+
+		/* lock already held before entering else */
+		spin_unlock_irqrestore(&st_gdata->lock, flags);
+		return err;
+	}
+}
+EXPORT_SYMBOL_GPL(st_register);
+
+/* to unregister a protocol -
+ * to be called from protocol stack driver
+ */
+long st_unregister(struct st_proto_s *proto)
+{
+	long err = 0;
+	unsigned long flags = 0;
+	struct st_data_s	*st_gdata;
+
+	pr_debug("%s: %d ", __func__, proto->chnl_id);
+
+	st_kim_ref(&st_gdata, 0);
+	if (!st_gdata || proto->chnl_id >= ST_MAX_CHANNELS) {
+		pr_err(" chnl_id %d not supported", proto->chnl_id);
+		return -EPROTONOSUPPORT;
+	}
+
+	spin_lock_irqsave(&st_gdata->lock, flags);
+
+	if (st_gdata->is_registered[proto->chnl_id] == false) {
+		pr_err(" chnl_id %d not registered", proto->chnl_id);
+		spin_unlock_irqrestore(&st_gdata->lock, flags);
+		return -EPROTONOSUPPORT;
+	}
+
+	if (st_gdata->protos_registered)
+		st_gdata->protos_registered--;
+
+	remove_channel_from_table(st_gdata, proto);
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+	if ((st_gdata->protos_registered == ST_EMPTY) &&
+	    (!test_bit(ST_REG_PENDING, &st_gdata->st_state))) {
+		pr_info(" all chnl_ids unregistered ");
+
+		/* stop traffic on tty */
+		if (st_gdata->tty) {
+			tty_ldisc_flush(st_gdata->tty);
+			stop_tty(st_gdata->tty);
+		}
+
+		/* all chnl_ids now unregistered */
+		st_kim_stop(st_gdata->kim_data);
+		/* disable ST LL */
+		st_ll_disable(st_gdata);
+	}
+	return err;
+}
+
+/*
+ * called in protocol stack drivers
+ * via the write function pointer
+ */
+long st_write(struct sk_buff *skb)
+{
+	struct st_data_s *st_gdata;
+	long len;
+
+	st_kim_ref(&st_gdata, 0);
+	if (unlikely(skb == NULL || st_gdata == NULL
+		|| st_gdata->tty == NULL)) {
+		pr_err("data/tty unavailable to perform write");
+		return -EINVAL;
+	}
+
+	pr_debug("%d to be written", skb->len);
+	len = skb->len;
+
+	/* st_ll to decide where to enqueue the skb */
+	st_int_enqueue(st_gdata, skb);
+	/* wake up */
+	st_tx_wakeup(st_gdata);
+
+	/* return number of bytes written */
+	return len;
+}
+
+/* for protocols making use of shared transport */
+EXPORT_SYMBOL_GPL(st_unregister);
+
+/********************************************************************/
+/*
+ * functions called from TTY layer
+ */
+static int st_tty_open(struct tty_struct *tty)
+{
+	int err = 0;
+	struct st_data_s *st_gdata;
+	pr_info("%s ", __func__);
+
+	st_kim_ref(&st_gdata, 0);
+	st_gdata->tty = tty;
+	tty->disc_data = st_gdata;
+
+	/* don't do an wakeup for now */
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+
+	/* mem already allocated
+	 */
+	tty->receive_room = 65536;
+	/* Flush any pending characters in the driver and discipline. */
+	tty_ldisc_flush(tty);
+	tty_driver_flush_buffer(tty);
+	/*
+	 * signal to UIM via KIM that -
+	 * installation of N_TI_WL ldisc is complete
+	 */
+	st_kim_complete(st_gdata->kim_data);
+	pr_debug("done %s", __func__);
+	return err;
+}
+
+static void st_tty_close(struct tty_struct *tty)
+{
+	unsigned char i = ST_MAX_CHANNELS;
+	unsigned long flags = 0;
+	struct	st_data_s *st_gdata = tty->disc_data;
+
+	pr_info("%s ", __func__);
+
+	/* TODO:
+	 * if a protocol has been registered & line discipline
+	 * un-installed for some reason - what should be done ?
+	 */
+	spin_lock_irqsave(&st_gdata->lock, flags);
+	for (i = ST_BT; i < ST_MAX_CHANNELS; i++) {
+		if (st_gdata->is_registered[i] == true)
+			pr_err("%d not un-registered", i);
+		st_gdata->list[i] = NULL;
+		st_gdata->is_registered[i] = false;
+	}
+	st_gdata->protos_registered = 0;
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+	/*
+	 * signal to UIM via KIM that -
+	 * N_TI_WL ldisc is un-installed
+	 */
+	st_kim_complete(st_gdata->kim_data);
+	st_gdata->tty = NULL;
+	/* Flush any pending characters in the driver and discipline. */
+	tty_ldisc_flush(tty);
+	tty_driver_flush_buffer(tty);
+
+	spin_lock_irqsave(&st_gdata->lock, flags);
+	/* empty out txq and tx_waitq */
+	skb_queue_purge(&st_gdata->txq);
+	skb_queue_purge(&st_gdata->tx_waitq);
+	/* reset the TTY Rx states of ST */
+	st_gdata->rx_count = 0;
+	st_gdata->rx_state = ST_W4_PACKET_TYPE;
+	kfree_skb(st_gdata->rx_skb);
+	st_gdata->rx_skb = NULL;
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+	pr_debug("%s: done ", __func__);
+}
+
+static void st_tty_receive(struct tty_struct *tty, const unsigned char *data,
+			   char *tty_flags, int count)
+{
+#ifdef VERBOSE
+	print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE,
+		16, 1, data, count, 0);
+#endif
+
+	/*
+	 * if fw download is in progress then route incoming data
+	 * to KIM for validation
+	 */
+	st_recv(tty->disc_data, data, count);
+	pr_debug("done %s", __func__);
+}
+
+/* wake-up function called in from the TTY layer
+ * inside the internal wakeup function will be called
+ */
+static void st_tty_wakeup(struct tty_struct *tty)
+{
+	struct	st_data_s *st_gdata = tty->disc_data;
+	pr_debug("%s ", __func__);
+	/* don't do an wakeup for now */
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+
+	/*
+	 * schedule the internal wakeup instead of calling directly to
+	 * avoid lockup (port->lock needed in tty->ops->write is
+	 * already taken here
+	 */
+	schedule_work(&st_gdata->work_write_wakeup);
+}
+
+static void st_tty_flush_buffer(struct tty_struct *tty)
+{
+	struct	st_data_s *st_gdata = tty->disc_data;
+	pr_debug("%s ", __func__);
+
+	kfree_skb(st_gdata->tx_skb);
+	st_gdata->tx_skb = NULL;
+
+	tty_driver_flush_buffer(tty);
+	return;
+}
+
+static struct tty_ldisc_ops st_ldisc_ops = {
+	.magic = TTY_LDISC_MAGIC,
+	.name = "n_st",
+	.open = st_tty_open,
+	.close = st_tty_close,
+	.receive_buf = st_tty_receive,
+	.write_wakeup = st_tty_wakeup,
+	.flush_buffer = st_tty_flush_buffer,
+	.owner = THIS_MODULE
+};
+
+/********************************************************************/
+int st_core_init(struct st_data_s **core_data)
+{
+	struct st_data_s *st_gdata;
+	long err;
+
+	err = tty_register_ldisc(N_TI_WL, &st_ldisc_ops);
+	if (err) {
+		pr_err("error registering %d line discipline %ld",
+			   N_TI_WL, err);
+		return err;
+	}
+	pr_debug("registered n_shared line discipline");
+
+	st_gdata = kzalloc(sizeof(struct st_data_s), GFP_KERNEL);
+	if (!st_gdata) {
+		pr_err("memory allocation failed");
+		err = tty_unregister_ldisc(N_TI_WL);
+		if (err)
+			pr_err("unable to un-register ldisc %ld", err);
+		err = -ENOMEM;
+		return err;
+	}
+
+	/* Initialize ST TxQ and Tx waitQ queue head. All BT/FM/GPS module skb's
+	 * will be pushed in this queue for actual transmission.
+	 */
+	skb_queue_head_init(&st_gdata->txq);
+	skb_queue_head_init(&st_gdata->tx_waitq);
+
+	/* Locking used in st_int_enqueue() to avoid multiple execution */
+	spin_lock_init(&st_gdata->lock);
+
+	err = st_ll_init(st_gdata);
+	if (err) {
+		pr_err("error during st_ll initialization(%ld)", err);
+		kfree(st_gdata);
+		err = tty_unregister_ldisc(N_TI_WL);
+		if (err)
+			pr_err("unable to un-register ldisc");
+		return err;
+	}
+
+	INIT_WORK(&st_gdata->work_write_wakeup, work_fn_write_wakeup);
+
+	*core_data = st_gdata;
+	return 0;
+}
+
+void st_core_exit(struct st_data_s *st_gdata)
+{
+	long err;
+	/* internal module cleanup */
+	err = st_ll_deinit(st_gdata);
+	if (err)
+		pr_err("error during deinit of ST LL %ld", err);
+
+	if (st_gdata != NULL) {
+		/* Free ST Tx Qs and skbs */
+		skb_queue_purge(&st_gdata->txq);
+		skb_queue_purge(&st_gdata->tx_waitq);
+		kfree_skb(st_gdata->rx_skb);
+		kfree_skb(st_gdata->tx_skb);
+		/* TTY ldisc cleanup */
+		err = tty_unregister_ldisc(N_TI_WL);
+		if (err)
+			pr_err("unable to un-register ldisc %ld", err);
+		/* free the global data pointer */
+		kfree(st_gdata);
+	}
+}
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
new file mode 100644
index 000000000..1874ac922
--- /dev/null
+++ b/drivers/misc/ti-st/st_kim.c
@@ -0,0 +1,868 @@
+/*
+ *  Shared Transport Line discipline driver Core
+ *	Init Manager module responsible for GPIO control
+ *	and firmware download
+ *  Copyright (C) 2009-2010 Texas Instruments
+ *  Author: Pavan Savoy <pavan_savoy@ti.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define pr_fmt(fmt) "(stk) :" fmt
+#include <linux/platform_device.h>
+#include <linux/jiffies.h>
+#include <linux/firmware.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/gpio.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/sysfs.h>
+#include <linux/tty.h>
+
+#include <linux/skbuff.h>
+#include <linux/ti_wilink_st.h>
+#include <linux/module.h>
+
+#define MAX_ST_DEVICES	3	/* Imagine 1 on each UART for now */
+static struct platform_device *st_kim_devices[MAX_ST_DEVICES];
+
+/**********************************************************************/
+/* internal functions */
+
+/**
+ * st_get_plat_device -
+ *	function which returns the reference to the platform device
+ *	requested by id. As of now only 1 such device exists (id=0)
+ *	the context requesting for reference can get the id to be
+ *	requested by a. The protocol driver which is registering or
+ *	b. the tty device which is opened.
+ */
+static struct platform_device *st_get_plat_device(int id)
+{
+	return st_kim_devices[id];
+}
+
+/**
+ * validate_firmware_response -
+ *	function to return whether the firmware response was proper
+ *	in case of error don't complete so that waiting for proper
+ *	response times out
+ */
+static void validate_firmware_response(struct kim_data_s *kim_gdata)
+{
+	struct sk_buff *skb = kim_gdata->rx_skb;
+	if (!skb)
+		return;
+
+	/* these magic numbers are the position in the response buffer which
+	 * allows us to distinguish whether the response is for the read
+	 * version info. command
+	 */
+	if (skb->data[2] == 0x01 && skb->data[3] == 0x01 &&
+			skb->data[4] == 0x10 && skb->data[5] == 0x00) {
+		/* fw version response */
+		memcpy(kim_gdata->resp_buffer,
+				kim_gdata->rx_skb->data,
+				kim_gdata->rx_skb->len);
+		kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+		kim_gdata->rx_skb = NULL;
+		kim_gdata->rx_count = 0;
+	} else if (unlikely(skb->data[5] != 0)) {
+		pr_err("no proper response during fw download");
+		pr_err("data6 %x", skb->data[5]);
+		kfree_skb(skb);
+		return;		/* keep waiting for the proper response */
+	}
+	/* becos of all the script being downloaded */
+	complete_all(&kim_gdata->kim_rcvd);
+	kfree_skb(skb);
+}
+
+/* check for data len received inside kim_int_recv
+ * most often hit the last case to update state to waiting for data
+ */
+static inline int kim_check_data_len(struct kim_data_s *kim_gdata, int len)
+{
+	register int room = skb_tailroom(kim_gdata->rx_skb);
+
+	pr_debug("len %d room %d", len, room);
+
+	if (!len) {
+		validate_firmware_response(kim_gdata);
+	} else if (len > room) {
+		/* Received packet's payload length is larger.
+		 * We can't accommodate it in created skb.
+		 */
+		pr_err("Data length is too large len %d room %d", len,
+			   room);
+		kfree_skb(kim_gdata->rx_skb);
+	} else {
+		/* Packet header has non-zero payload length and
+		 * we have enough space in created skb. Lets read
+		 * payload data */
+		kim_gdata->rx_state = ST_W4_DATA;
+		kim_gdata->rx_count = len;
+		return len;
+	}
+
+	/* Change ST LL state to continue to process next
+	 * packet */
+	kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+	kim_gdata->rx_skb = NULL;
+	kim_gdata->rx_count = 0;
+
+	return 0;
+}
+
+/**
+ * kim_int_recv - receive function called during firmware download
+ *	firmware download responses on different UART drivers
+ *	have been observed to come in bursts of different
+ *	tty_receive and hence the logic
+ */
+static void kim_int_recv(struct kim_data_s *kim_gdata,
+	const unsigned char *data, long count)
+{
+	const unsigned char *ptr;
+	int len = 0;
+	unsigned char *plen;
+
+	pr_debug("%s", __func__);
+	/* Decode received bytes here */
+	ptr = data;
+	if (unlikely(ptr == NULL)) {
+		pr_err(" received null from TTY ");
+		return;
+	}
+
+	while (count) {
+		if (kim_gdata->rx_count) {
+			len = min_t(unsigned int, kim_gdata->rx_count, count);
+			skb_put_data(kim_gdata->rx_skb, ptr, len);
+			kim_gdata->rx_count -= len;
+			count -= len;
+			ptr += len;
+
+			if (kim_gdata->rx_count)
+				continue;
+
+			/* Check ST RX state machine , where are we? */
+			switch (kim_gdata->rx_state) {
+				/* Waiting for complete packet ? */
+			case ST_W4_DATA:
+				pr_debug("Complete pkt received");
+				validate_firmware_response(kim_gdata);
+				kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+				kim_gdata->rx_skb = NULL;
+				continue;
+				/* Waiting for Bluetooth event header ? */
+			case ST_W4_HEADER:
+				plen =
+				(unsigned char *)&kim_gdata->rx_skb->data[1];
+				pr_debug("event hdr: plen 0x%02x\n", *plen);
+				kim_check_data_len(kim_gdata, *plen);
+				continue;
+			}	/* end of switch */
+		}		/* end of if rx_state */
+		switch (*ptr) {
+			/* Bluetooth event packet? */
+		case 0x04:
+			kim_gdata->rx_state = ST_W4_HEADER;
+			kim_gdata->rx_count = 2;
+			break;
+		default:
+			pr_info("unknown packet");
+			ptr++;
+			count--;
+			continue;
+		}
+		ptr++;
+		count--;
+		kim_gdata->rx_skb =
+			alloc_skb(1024+8, GFP_ATOMIC);
+		if (!kim_gdata->rx_skb) {
+			pr_err("can't allocate mem for new packet");
+			kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+			kim_gdata->rx_count = 0;
+			return;
+		}
+		skb_reserve(kim_gdata->rx_skb, 8);
+		kim_gdata->rx_skb->cb[0] = 4;
+		kim_gdata->rx_skb->cb[1] = 0;
+
+	}
+	return;
+}
+
+static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name)
+{
+	unsigned short version = 0, chip = 0, min_ver = 0, maj_ver = 0;
+	const char read_ver_cmd[] = { 0x01, 0x01, 0x10, 0x00 };
+	long timeout;
+
+	pr_debug("%s", __func__);
+
+	reinit_completion(&kim_gdata->kim_rcvd);
+	if (4 != st_int_write(kim_gdata->core_data, read_ver_cmd, 4)) {
+		pr_err("kim: couldn't write 4 bytes");
+		return -EIO;
+	}
+
+	timeout = wait_for_completion_interruptible_timeout(
+		&kim_gdata->kim_rcvd, msecs_to_jiffies(CMD_RESP_TIME));
+	if (timeout <= 0) {
+		pr_err(" waiting for ver info- timed out or received signal");
+		return timeout ? -ERESTARTSYS : -ETIMEDOUT;
+	}
+	reinit_completion(&kim_gdata->kim_rcvd);
+	/* the positions 12 & 13 in the response buffer provide with the
+	 * chip, major & minor numbers
+	 */
+
+	version =
+		MAKEWORD(kim_gdata->resp_buffer[12],
+				kim_gdata->resp_buffer[13]);
+	chip = (version & 0x7C00) >> 10;
+	min_ver = (version & 0x007F);
+	maj_ver = (version & 0x0380) >> 7;
+
+	if (version & 0x8000)
+		maj_ver |= 0x0008;
+
+	sprintf(bts_scr_name, "ti-connectivity/TIInit_%d.%d.%d.bts",
+		chip, maj_ver, min_ver);
+
+	/* to be accessed later via sysfs entry */
+	kim_gdata->version.full = version;
+	kim_gdata->version.chip = chip;
+	kim_gdata->version.maj_ver = maj_ver;
+	kim_gdata->version.min_ver = min_ver;
+
+	pr_info("%s", bts_scr_name);
+	return 0;
+}
+
+static void skip_change_remote_baud(unsigned char **ptr, long *len)
+{
+	unsigned char *nxt_action, *cur_action;
+	cur_action = *ptr;
+
+	nxt_action = cur_action + sizeof(struct bts_action) +
+		((struct bts_action *) cur_action)->size;
+
+	if (((struct bts_action *) nxt_action)->type != ACTION_WAIT_EVENT) {
+		pr_err("invalid action after change remote baud command");
+	} else {
+		*ptr = *ptr + sizeof(struct bts_action) +
+			((struct bts_action *)cur_action)->size;
+		*len = *len - (sizeof(struct bts_action) +
+				((struct bts_action *)cur_action)->size);
+		/* warn user on not commenting these in firmware */
+		pr_warn("skipping the wait event of change remote baud");
+	}
+}
+
+/**
+ * download_firmware -
+ *	internal function which parses through the .bts firmware
+ *	script file intreprets SEND, DELAY actions only as of now
+ */
+static long download_firmware(struct kim_data_s *kim_gdata)
+{
+	long err = 0;
+	long len = 0;
+	unsigned char *ptr = NULL;
+	unsigned char *action_ptr = NULL;
+	unsigned char bts_scr_name[40] = { 0 };	/* 40 char long bts scr name? */
+	int wr_room_space;
+	int cmd_size;
+	unsigned long timeout;
+
+	err = read_local_version(kim_gdata, bts_scr_name);
+	if (err != 0) {
+		pr_err("kim: failed to read local ver");
+		return err;
+	}
+	err =
+	    request_firmware(&kim_gdata->fw_entry, bts_scr_name,
+			     &kim_gdata->kim_pdev->dev);
+	if (unlikely((err != 0) || (kim_gdata->fw_entry->data == NULL) ||
+		     (kim_gdata->fw_entry->size == 0))) {
+		pr_err(" request_firmware failed(errno %ld) for %s", err,
+			   bts_scr_name);
+		return -EINVAL;
+	}
+	ptr = (void *)kim_gdata->fw_entry->data;
+	len = kim_gdata->fw_entry->size;
+	/* bts_header to remove out magic number and
+	 * version
+	 */
+	ptr += sizeof(struct bts_header);
+	len -= sizeof(struct bts_header);
+
+	while (len > 0 && ptr) {
+		pr_debug(" action size %d, type %d ",
+			   ((struct bts_action *)ptr)->size,
+			   ((struct bts_action *)ptr)->type);
+
+		switch (((struct bts_action *)ptr)->type) {
+		case ACTION_SEND_COMMAND:	/* action send */
+			pr_debug("S");
+			action_ptr = &(((struct bts_action *)ptr)->data[0]);
+			if (unlikely
+			    (((struct hci_command *)action_ptr)->opcode ==
+			     0xFF36)) {
+				/* ignore remote change
+				 * baud rate HCI VS command */
+				pr_warn("change remote baud"
+				    " rate command in firmware");
+				skip_change_remote_baud(&ptr, &len);
+				break;
+			}
+			/*
+			 * Make sure we have enough free space in uart
+			 * tx buffer to write current firmware command
+			 */
+			cmd_size = ((struct bts_action *)ptr)->size;
+			timeout = jiffies + msecs_to_jiffies(CMD_WR_TIME);
+			do {
+				wr_room_space =
+					st_get_uart_wr_room(kim_gdata->core_data);
+				if (wr_room_space < 0) {
+					pr_err("Unable to get free "
+							"space info from uart tx buffer");
+					release_firmware(kim_gdata->fw_entry);
+					return wr_room_space;
+				}
+				mdelay(1); /* wait 1ms before checking room */
+			} while ((wr_room_space < cmd_size) &&
+					time_before(jiffies, timeout));
+
+			/* Timeout happened ? */
+			if (time_after_eq(jiffies, timeout)) {
+				pr_err("Timeout while waiting for free "
+						"free space in uart tx buffer");
+				release_firmware(kim_gdata->fw_entry);
+				return -ETIMEDOUT;
+			}
+			/* reinit completion before sending for the
+			 * relevant wait
+			 */
+			reinit_completion(&kim_gdata->kim_rcvd);
+
+			/*
+			 * Free space found in uart buffer, call st_int_write
+			 * to send current firmware command to the uart tx
+			 * buffer.
+			 */
+			err = st_int_write(kim_gdata->core_data,
+			((struct bts_action_send *)action_ptr)->data,
+					   ((struct bts_action *)ptr)->size);
+			if (unlikely(err < 0)) {
+				release_firmware(kim_gdata->fw_entry);
+				return err;
+			}
+			/*
+			 * Check number of bytes written to the uart tx buffer
+			 * and requested command write size
+			 */
+			if (err != cmd_size) {
+				pr_err("Number of bytes written to uart "
+						"tx buffer are not matching with "
+						"requested cmd write size");
+				release_firmware(kim_gdata->fw_entry);
+				return -EIO;
+			}
+			break;
+		case ACTION_WAIT_EVENT:  /* wait */
+			pr_debug("W");
+			err = wait_for_completion_interruptible_timeout(
+					&kim_gdata->kim_rcvd,
+					msecs_to_jiffies(CMD_RESP_TIME));
+			if (err <= 0) {
+				pr_err("response timeout/signaled during fw download ");
+				/* timed out */
+				release_firmware(kim_gdata->fw_entry);
+				return err ? -ERESTARTSYS : -ETIMEDOUT;
+			}
+			reinit_completion(&kim_gdata->kim_rcvd);
+			break;
+		case ACTION_DELAY:	/* sleep */
+			pr_info("sleep command in scr");
+			action_ptr = &(((struct bts_action *)ptr)->data[0]);
+			mdelay(((struct bts_action_delay *)action_ptr)->msec);
+			break;
+		}
+		len =
+		    len - (sizeof(struct bts_action) +
+			   ((struct bts_action *)ptr)->size);
+		ptr =
+		    ptr + sizeof(struct bts_action) +
+		    ((struct bts_action *)ptr)->size;
+	}
+	/* fw download complete */
+	release_firmware(kim_gdata->fw_entry);
+	return 0;
+}
+
+/**********************************************************************/
+/* functions called from ST core */
+/* called from ST Core, when REG_IN_PROGRESS (registration in progress)
+ * can be because of
+ * 1. response to read local version
+ * 2. during send/recv's of firmware download
+ */
+void st_kim_recv(void *disc_data, const unsigned char *data, long count)
+{
+	struct st_data_s	*st_gdata = (struct st_data_s *)disc_data;
+	struct kim_data_s	*kim_gdata = st_gdata->kim_data;
+
+	/* proceed to gather all data and distinguish read fw version response
+	 * from other fw responses when data gathering is complete
+	 */
+	kim_int_recv(kim_gdata, data, count);
+	return;
+}
+
+/* to signal completion of line discipline installation
+ * called from ST Core, upon tty_open
+ */
+void st_kim_complete(void *kim_data)
+{
+	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
+	complete(&kim_gdata->ldisc_installed);
+}
+
+/**
+ * st_kim_start - called from ST Core upon 1st registration
+ *	This involves toggling the chip enable gpio, reading
+ *	the firmware version from chip, forming the fw file name
+ *	based on the chip version, requesting the fw, parsing it
+ *	and perform download(send/recv).
+ */
+long st_kim_start(void *kim_data)
+{
+	long err = 0;
+	long retry = POR_RETRY_COUNT;
+	struct ti_st_plat_data	*pdata;
+	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
+
+	pr_info(" %s", __func__);
+	pdata = kim_gdata->kim_pdev->dev.platform_data;
+
+	do {
+		/* platform specific enabling code here */
+		if (pdata->chip_enable)
+			pdata->chip_enable(kim_gdata);
+
+		/* Configure BT nShutdown to HIGH state */
+		gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
+		mdelay(5);	/* FIXME: a proper toggle */
+		gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH);
+		mdelay(100);
+		/* re-initialize the completion */
+		reinit_completion(&kim_gdata->ldisc_installed);
+		/* send notification to UIM */
+		kim_gdata->ldisc_install = 1;
+		pr_info("ldisc_install = 1");
+		sysfs_notify(&kim_gdata->kim_pdev->dev.kobj,
+				NULL, "install");
+		/* wait for ldisc to be installed */
+		err = wait_for_completion_interruptible_timeout(
+			&kim_gdata->ldisc_installed, msecs_to_jiffies(LDISC_TIME));
+		if (!err) {
+			/* ldisc installation timeout,
+			 * flush uart, power cycle BT_EN */
+			pr_err("ldisc installation timeout");
+			err = st_kim_stop(kim_gdata);
+			continue;
+		} else {
+			/* ldisc installed now */
+			pr_info("line discipline installed");
+			err = download_firmware(kim_gdata);
+			if (err != 0) {
+				/* ldisc installed but fw download failed,
+				 * flush uart & power cycle BT_EN */
+				pr_err("download firmware failed");
+				err = st_kim_stop(kim_gdata);
+				continue;
+			} else {	/* on success don't retry */
+				break;
+			}
+		}
+	} while (retry--);
+	return err;
+}
+
+/**
+ * st_kim_stop - stop communication with chip.
+ *	This can be called from ST Core/KIM, on the-
+ *	(a) last un-register when chip need not be powered there-after,
+ *	(b) upon failure to either install ldisc or download firmware.
+ *	The function is responsible to (a) notify UIM about un-installation,
+ *	(b) flush UART if the ldisc was installed.
+ *	(c) reset BT_EN - pull down nshutdown at the end.
+ *	(d) invoke platform's chip disabling routine.
+ */
+long st_kim_stop(void *kim_data)
+{
+	long err = 0;
+	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
+	struct ti_st_plat_data	*pdata =
+		kim_gdata->kim_pdev->dev.platform_data;
+	struct tty_struct	*tty = kim_gdata->core_data->tty;
+
+	reinit_completion(&kim_gdata->ldisc_installed);
+
+	if (tty) {	/* can be called before ldisc is installed */
+		/* Flush any pending characters in the driver and discipline. */
+		tty_ldisc_flush(tty);
+		tty_driver_flush_buffer(tty);
+	}
+
+	/* send uninstall notification to UIM */
+	pr_info("ldisc_install = 0");
+	kim_gdata->ldisc_install = 0;
+	sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, NULL, "install");
+
+	/* wait for ldisc to be un-installed */
+	err = wait_for_completion_interruptible_timeout(
+		&kim_gdata->ldisc_installed, msecs_to_jiffies(LDISC_TIME));
+	if (!err) {		/* timeout */
+		pr_err(" timed out waiting for ldisc to be un-installed");
+		err = -ETIMEDOUT;
+	}
+
+	/* By default configure BT nShutdown to LOW state */
+	gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
+	mdelay(1);
+	gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH);
+	mdelay(1);
+	gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
+
+	/* platform specific disable */
+	if (pdata->chip_disable)
+		pdata->chip_disable(kim_gdata);
+	return err;
+}
+
+/**********************************************************************/
+/* functions called from subsystems */
+/* called when debugfs entry is read from */
+
+static int show_version(struct seq_file *s, void *unused)
+{
+	struct kim_data_s *kim_gdata = (struct kim_data_s *)s->private;
+	seq_printf(s, "%04X %d.%d.%d\n", kim_gdata->version.full,
+			kim_gdata->version.chip, kim_gdata->version.maj_ver,
+			kim_gdata->version.min_ver);
+	return 0;
+}
+
+static int show_list(struct seq_file *s, void *unused)
+{
+	struct kim_data_s *kim_gdata = (struct kim_data_s *)s->private;
+	kim_st_list_protocols(kim_gdata->core_data, s);
+	return 0;
+}
+
+static ssize_t show_install(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", kim_data->ldisc_install);
+}
+
+#ifdef DEBUG
+static ssize_t store_dev_name(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	pr_debug("storing dev name >%s<", buf);
+	strncpy(kim_data->dev_name, buf, count);
+	pr_debug("stored dev name >%s<", kim_data->dev_name);
+	return count;
+}
+
+static ssize_t store_baud_rate(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	pr_debug("storing baud rate >%s<", buf);
+	sscanf(buf, "%ld", &kim_data->baud_rate);
+	pr_debug("stored baud rate >%ld<", kim_data->baud_rate);
+	return count;
+}
+#endif	/* if DEBUG */
+
+static ssize_t show_dev_name(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	return sprintf(buf, "%s\n", kim_data->dev_name);
+}
+
+static ssize_t show_baud_rate(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", kim_data->baud_rate);
+}
+
+static ssize_t show_flow_cntrl(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", kim_data->flow_cntrl);
+}
+
+/* structures specific for sysfs entries */
+static struct kobj_attribute ldisc_install =
+__ATTR(install, 0444, (void *)show_install, NULL);
+
+static struct kobj_attribute uart_dev_name =
+#ifdef DEBUG	/* TODO: move this to debug-fs if possible */
+__ATTR(dev_name, 0644, (void *)show_dev_name, (void *)store_dev_name);
+#else
+__ATTR(dev_name, 0444, (void *)show_dev_name, NULL);
+#endif
+
+static struct kobj_attribute uart_baud_rate =
+#ifdef DEBUG	/* TODO: move to debugfs */
+__ATTR(baud_rate, 0644, (void *)show_baud_rate, (void *)store_baud_rate);
+#else
+__ATTR(baud_rate, 0444, (void *)show_baud_rate, NULL);
+#endif
+
+static struct kobj_attribute uart_flow_cntrl =
+__ATTR(flow_cntrl, 0444, (void *)show_flow_cntrl, NULL);
+
+static struct attribute *uim_attrs[] = {
+	&ldisc_install.attr,
+	&uart_dev_name.attr,
+	&uart_baud_rate.attr,
+	&uart_flow_cntrl.attr,
+	NULL,
+};
+
+static const struct attribute_group uim_attr_grp = {
+	.attrs = uim_attrs,
+};
+
+/**
+ * st_kim_ref - reference the core's data
+ *	This references the per-ST platform device in the arch/xx/
+ *	board-xx.c file.
+ *	This would enable multiple such platform devices to exist
+ *	on a given platform
+ */
+void st_kim_ref(struct st_data_s **core_data, int id)
+{
+	struct platform_device	*pdev;
+	struct kim_data_s	*kim_gdata;
+	/* get kim_gdata reference from platform device */
+	pdev = st_get_plat_device(id);
+	if (!pdev)
+		goto err;
+	kim_gdata = platform_get_drvdata(pdev);
+	if (!kim_gdata)
+		goto err;
+
+	*core_data = kim_gdata->core_data;
+	return;
+err:
+	*core_data = NULL;
+}
+
+static int kim_version_open(struct inode *i, struct file *f)
+{
+	return single_open(f, show_version, i->i_private);
+}
+
+static int kim_list_open(struct inode *i, struct file *f)
+{
+	return single_open(f, show_list, i->i_private);
+}
+
+static const struct file_operations version_debugfs_fops = {
+	/* version info */
+	.open = kim_version_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+static const struct file_operations list_debugfs_fops = {
+	/* protocols info */
+	.open = kim_list_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**********************************************************************/
+/* functions called from platform device driver subsystem
+ * need to have a relevant platform device entry in the platform's
+ * board-*.c file
+ */
+
+static struct dentry *kim_debugfs_dir;
+static int kim_probe(struct platform_device *pdev)
+{
+	struct kim_data_s	*kim_gdata;
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+	int err;
+
+	if ((pdev->id != -1) && (pdev->id < MAX_ST_DEVICES)) {
+		/* multiple devices could exist */
+		st_kim_devices[pdev->id] = pdev;
+	} else {
+		/* platform's sure about existence of 1 device */
+		st_kim_devices[0] = pdev;
+	}
+
+	kim_gdata = kzalloc(sizeof(struct kim_data_s), GFP_KERNEL);
+	if (!kim_gdata) {
+		pr_err("no mem to allocate");
+		return -ENOMEM;
+	}
+	platform_set_drvdata(pdev, kim_gdata);
+
+	err = st_core_init(&kim_gdata->core_data);
+	if (err != 0) {
+		pr_err(" ST core init failed");
+		err = -EIO;
+		goto err_core_init;
+	}
+	/* refer to itself */
+	kim_gdata->core_data->kim_data = kim_gdata;
+
+	/* Claim the chip enable nShutdown gpio from the system */
+	kim_gdata->nshutdown = pdata->nshutdown_gpio;
+	err = gpio_request(kim_gdata->nshutdown, "kim");
+	if (unlikely(err)) {
+		pr_err(" gpio %d request failed ", kim_gdata->nshutdown);
+		goto err_sysfs_group;
+	}
+
+	/* Configure nShutdown GPIO as output=0 */
+	err = gpio_direction_output(kim_gdata->nshutdown, 0);
+	if (unlikely(err)) {
+		pr_err(" unable to configure gpio %d", kim_gdata->nshutdown);
+		goto err_sysfs_group;
+	}
+	/* get reference of pdev for request_firmware
+	 */
+	kim_gdata->kim_pdev = pdev;
+	init_completion(&kim_gdata->kim_rcvd);
+	init_completion(&kim_gdata->ldisc_installed);
+
+	err = sysfs_create_group(&pdev->dev.kobj, &uim_attr_grp);
+	if (err) {
+		pr_err("failed to create sysfs entries");
+		goto err_sysfs_group;
+	}
+
+	/* copying platform data */
+	strncpy(kim_gdata->dev_name, pdata->dev_name, UART_DEV_NAME_LEN);
+	kim_gdata->flow_cntrl = pdata->flow_cntrl;
+	kim_gdata->baud_rate = pdata->baud_rate;
+	pr_info("sysfs entries created\n");
+
+	kim_debugfs_dir = debugfs_create_dir("ti-st", NULL);
+	if (!kim_debugfs_dir) {
+		pr_err(" debugfs entries creation failed ");
+		return 0;
+	}
+
+	debugfs_create_file("version", S_IRUGO, kim_debugfs_dir,
+				kim_gdata, &version_debugfs_fops);
+	debugfs_create_file("protocols", S_IRUGO, kim_debugfs_dir,
+				kim_gdata, &list_debugfs_fops);
+	return 0;
+
+err_sysfs_group:
+	st_core_exit(kim_gdata->core_data);
+
+err_core_init:
+	kfree(kim_gdata);
+
+	return err;
+}
+
+static int kim_remove(struct platform_device *pdev)
+{
+	/* free the GPIOs requested */
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+	struct kim_data_s	*kim_gdata;
+
+	kim_gdata = platform_get_drvdata(pdev);
+
+	/* Free the Bluetooth/FM/GPIO
+	 * nShutdown gpio from the system
+	 */
+	gpio_free(pdata->nshutdown_gpio);
+	pr_info("nshutdown GPIO Freed");
+
+	debugfs_remove_recursive(kim_debugfs_dir);
+	sysfs_remove_group(&pdev->dev.kobj, &uim_attr_grp);
+	pr_info("sysfs entries removed");
+
+	kim_gdata->kim_pdev = NULL;
+	st_core_exit(kim_gdata->core_data);
+
+	kfree(kim_gdata);
+	kim_gdata = NULL;
+	return 0;
+}
+
+static int kim_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+
+	if (pdata->suspend)
+		return pdata->suspend(pdev, state);
+
+	return 0;
+}
+
+static int kim_resume(struct platform_device *pdev)
+{
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+
+	if (pdata->resume)
+		return pdata->resume(pdev);
+
+	return 0;
+}
+
+/**********************************************************************/
+/* entry point for ST KIM module, called in from ST Core */
+static struct platform_driver kim_platform_driver = {
+	.probe = kim_probe,
+	.remove = kim_remove,
+	.suspend = kim_suspend,
+	.resume = kim_resume,
+	.driver = {
+		.name = "kim",
+	},
+};
+
+module_platform_driver(kim_platform_driver);
+
+MODULE_AUTHOR("Pavan Savoy <pavan_savoy@ti.com>");
+MODULE_DESCRIPTION("Shared Transport Driver for TI BT/FM/GPS combo chips ");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c
new file mode 100644
index 000000000..93b4d67cc
--- /dev/null
+++ b/drivers/misc/ti-st/st_ll.c
@@ -0,0 +1,169 @@
+/*
+ *  Shared Transport driver
+ *	HCI-LL module responsible for TI proprietary HCI_LL protocol
+ *  Copyright (C) 2009-2010 Texas Instruments
+ *  Author: Pavan Savoy <pavan_savoy@ti.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define pr_fmt(fmt) "(stll) :" fmt
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/ti_wilink_st.h>
+
+/**********************************************************************/
+/* internal functions */
+static void send_ll_cmd(struct st_data_s *st_data,
+	unsigned char cmd)
+{
+
+	pr_debug("%s: writing %x", __func__, cmd);
+	st_int_write(st_data, &cmd, 1);
+	return;
+}
+
+static void ll_device_want_to_sleep(struct st_data_s *st_data)
+{
+	struct kim_data_s	*kim_data;
+	struct ti_st_plat_data	*pdata;
+
+	pr_debug("%s", __func__);
+	/* sanity check */
+	if (st_data->ll_state != ST_LL_AWAKE)
+		pr_err("ERR hcill: ST_LL_GO_TO_SLEEP_IND"
+			  "in state %ld", st_data->ll_state);
+
+	send_ll_cmd(st_data, LL_SLEEP_ACK);
+	/* update state */
+	st_data->ll_state = ST_LL_ASLEEP;
+
+	/* communicate to platform about chip asleep */
+	kim_data = st_data->kim_data;
+	pdata = kim_data->kim_pdev->dev.platform_data;
+	if (pdata->chip_asleep)
+		pdata->chip_asleep(NULL);
+}
+
+static void ll_device_want_to_wakeup(struct st_data_s *st_data)
+{
+	struct kim_data_s	*kim_data;
+	struct ti_st_plat_data	*pdata;
+
+	/* diff actions in diff states */
+	switch (st_data->ll_state) {
+	case ST_LL_ASLEEP:
+		send_ll_cmd(st_data, LL_WAKE_UP_ACK);	/* send wake_ack */
+		break;
+	case ST_LL_ASLEEP_TO_AWAKE:
+		/* duplicate wake_ind */
+		pr_err("duplicate wake_ind while waiting for Wake ack");
+		break;
+	case ST_LL_AWAKE:
+		/* duplicate wake_ind */
+		pr_err("duplicate wake_ind already AWAKE");
+		break;
+	case ST_LL_AWAKE_TO_ASLEEP:
+		/* duplicate wake_ind */
+		pr_err("duplicate wake_ind");
+		break;
+	}
+	/* update state */
+	st_data->ll_state = ST_LL_AWAKE;
+
+	/* communicate to platform about chip wakeup */
+	kim_data = st_data->kim_data;
+	pdata = kim_data->kim_pdev->dev.platform_data;
+	if (pdata->chip_awake)
+		pdata->chip_awake(NULL);
+}
+
+/**********************************************************************/
+/* functions invoked by ST Core */
+
+/* called when ST Core wants to
+ * enable ST LL */
+void st_ll_enable(struct st_data_s *ll)
+{
+	ll->ll_state = ST_LL_AWAKE;
+}
+
+/* called when ST Core /local module wants to
+ * disable ST LL */
+void st_ll_disable(struct st_data_s *ll)
+{
+	ll->ll_state = ST_LL_INVALID;
+}
+
+/* called when ST Core wants to update the state */
+void st_ll_wakeup(struct st_data_s *ll)
+{
+	if (likely(ll->ll_state != ST_LL_AWAKE)) {
+		send_ll_cmd(ll, LL_WAKE_UP_IND);	/* WAKE_IND */
+		ll->ll_state = ST_LL_ASLEEP_TO_AWAKE;
+	} else {
+		/* don't send the duplicate wake_indication */
+		pr_err(" Chip already AWAKE ");
+	}
+}
+
+/* called when ST Core wants the state */
+unsigned long st_ll_getstate(struct st_data_s *ll)
+{
+	pr_debug(" returning state %ld", ll->ll_state);
+	return ll->ll_state;
+}
+
+/* called from ST Core, when a PM related packet arrives */
+unsigned long st_ll_sleep_state(struct st_data_s *st_data,
+	unsigned char cmd)
+{
+	switch (cmd) {
+	case LL_SLEEP_IND:	/* sleep ind */
+		pr_debug("sleep indication recvd");
+		ll_device_want_to_sleep(st_data);
+		break;
+	case LL_SLEEP_ACK:	/* sleep ack */
+		pr_err("sleep ack rcvd: host shouldn't");
+		break;
+	case LL_WAKE_UP_IND:	/* wake ind */
+		pr_debug("wake indication recvd");
+		ll_device_want_to_wakeup(st_data);
+		break;
+	case LL_WAKE_UP_ACK:	/* wake ack */
+		pr_debug("wake ack rcvd");
+		st_data->ll_state = ST_LL_AWAKE;
+		break;
+	default:
+		pr_err(" unknown input/state ");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Called from ST CORE to initialize ST LL */
+long st_ll_init(struct st_data_s *ll)
+{
+	/* set state to invalid */
+	ll->ll_state = ST_LL_INVALID;
+	return 0;
+}
+
+/* Called from ST CORE to de-initialize ST LL */
+long st_ll_deinit(struct st_data_s *ll)
+{
+	return 0;
+}
diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
new file mode 100644
index 000000000..9ac95b48e
--- /dev/null
+++ b/drivers/misc/tifm_7xx1.c
@@ -0,0 +1,446 @@
+/*
+ *  tifm_7xx1.c - TI FlashMedia driver
+ *
+ *  Copyright (C) 2006 Alex Dubov <oakad@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/tifm.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+
+#define DRIVER_NAME "tifm_7xx1"
+#define DRIVER_VERSION "0.8"
+
+#define TIFM_IRQ_ENABLE           0x80000000
+#define TIFM_IRQ_SOCKMASK(x)      (x)
+#define TIFM_IRQ_CARDMASK(x)      ((x) << 8)
+#define TIFM_IRQ_FIFOMASK(x)      ((x) << 16)
+#define TIFM_IRQ_SETALL           0xffffffff
+
+static void tifm_7xx1_dummy_eject(struct tifm_adapter *fm,
+				  struct tifm_dev *sock)
+{
+}
+
+static void tifm_7xx1_eject(struct tifm_adapter *fm, struct tifm_dev *sock)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fm->lock, flags);
+	fm->socket_change_set |= 1 << sock->socket_id;
+	tifm_queue_work(&fm->media_switcher);
+	spin_unlock_irqrestore(&fm->lock, flags);
+}
+
+static irqreturn_t tifm_7xx1_isr(int irq, void *dev_id)
+{
+	struct tifm_adapter *fm = dev_id;
+	struct tifm_dev *sock;
+	unsigned int irq_status, cnt;
+
+	spin_lock(&fm->lock);
+	irq_status = readl(fm->addr + FM_INTERRUPT_STATUS);
+	if (irq_status == 0 || irq_status == (~0)) {
+		spin_unlock(&fm->lock);
+		return IRQ_NONE;
+	}
+
+	if (irq_status & TIFM_IRQ_ENABLE) {
+		writel(TIFM_IRQ_ENABLE, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+
+		for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+			sock = fm->sockets[cnt];
+			if (sock) {
+				if ((irq_status >> cnt) & TIFM_IRQ_FIFOMASK(1))
+					sock->data_event(sock);
+				if ((irq_status >> cnt) & TIFM_IRQ_CARDMASK(1))
+					sock->card_event(sock);
+			}
+		}
+
+		fm->socket_change_set |= irq_status
+					 & ((1 << fm->num_sockets) - 1);
+	}
+	writel(irq_status, fm->addr + FM_INTERRUPT_STATUS);
+
+	if (fm->finish_me)
+		complete_all(fm->finish_me);
+	else if (!fm->socket_change_set)
+		writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE);
+	else
+		tifm_queue_work(&fm->media_switcher);
+
+	spin_unlock(&fm->lock);
+	return IRQ_HANDLED;
+}
+
+static unsigned char tifm_7xx1_toggle_sock_power(char __iomem *sock_addr)
+{
+	unsigned int s_state;
+	int cnt;
+
+	writel(0x0e00, sock_addr + SOCK_CONTROL);
+
+	for (cnt = 16; cnt <= 256; cnt <<= 1) {
+		if (!(TIFM_SOCK_STATE_POWERED
+		      & readl(sock_addr + SOCK_PRESENT_STATE)))
+			break;
+
+		msleep(cnt);
+	}
+
+	s_state = readl(sock_addr + SOCK_PRESENT_STATE);
+	if (!(TIFM_SOCK_STATE_OCCUPIED & s_state))
+		return 0;
+
+	writel(readl(sock_addr + SOCK_CONTROL) | TIFM_CTRL_LED,
+	       sock_addr + SOCK_CONTROL);
+
+	/* xd needs some extra time before power on */
+	if (((readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7)
+	    == TIFM_TYPE_XD)
+		msleep(40);
+
+	writel((s_state & TIFM_CTRL_POWER_MASK) | 0x0c00,
+	       sock_addr + SOCK_CONTROL);
+	/* wait for power to stabilize */
+	msleep(20);
+	for (cnt = 16; cnt <= 256; cnt <<= 1) {
+		if ((TIFM_SOCK_STATE_POWERED
+		     & readl(sock_addr + SOCK_PRESENT_STATE)))
+			break;
+
+		msleep(cnt);
+	}
+
+	writel(readl(sock_addr + SOCK_CONTROL) & (~TIFM_CTRL_LED),
+	       sock_addr + SOCK_CONTROL);
+
+	return (readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7;
+}
+
+inline static void tifm_7xx1_sock_power_off(char __iomem *sock_addr)
+{
+	writel((~TIFM_CTRL_POWER_MASK) & readl(sock_addr + SOCK_CONTROL),
+	       sock_addr + SOCK_CONTROL);
+}
+
+inline static char __iomem *
+tifm_7xx1_sock_addr(char __iomem *base_addr, unsigned int sock_num)
+{
+	return base_addr + ((sock_num + 1) << 10);
+}
+
+static void tifm_7xx1_switch_media(struct work_struct *work)
+{
+	struct tifm_adapter *fm = container_of(work, struct tifm_adapter,
+					       media_switcher);
+	struct tifm_dev *sock;
+	char __iomem *sock_addr;
+	unsigned long flags;
+	unsigned char media_id;
+	unsigned int socket_change_set, cnt;
+
+	spin_lock_irqsave(&fm->lock, flags);
+	socket_change_set = fm->socket_change_set;
+	fm->socket_change_set = 0;
+
+	dev_dbg(fm->dev.parent, "checking media set %x\n",
+		socket_change_set);
+
+	if (!socket_change_set) {
+		spin_unlock_irqrestore(&fm->lock, flags);
+		return;
+	}
+
+	for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+		if (!(socket_change_set & (1 << cnt)))
+			continue;
+		sock = fm->sockets[cnt];
+		if (sock) {
+			printk(KERN_INFO
+			       "%s : demand removing card from socket %u:%u\n",
+			       dev_name(&fm->dev), fm->id, cnt);
+			fm->sockets[cnt] = NULL;
+			sock_addr = sock->addr;
+			spin_unlock_irqrestore(&fm->lock, flags);
+			device_unregister(&sock->dev);
+			spin_lock_irqsave(&fm->lock, flags);
+			tifm_7xx1_sock_power_off(sock_addr);
+			writel(0x0e00, sock_addr + SOCK_CONTROL);
+		}
+
+		spin_unlock_irqrestore(&fm->lock, flags);
+
+		media_id = tifm_7xx1_toggle_sock_power(
+				tifm_7xx1_sock_addr(fm->addr, cnt));
+
+		// tifm_alloc_device will check if media_id is valid
+		sock = tifm_alloc_device(fm, cnt, media_id);
+		if (sock) {
+			sock->addr = tifm_7xx1_sock_addr(fm->addr, cnt);
+
+			if (!device_register(&sock->dev)) {
+				spin_lock_irqsave(&fm->lock, flags);
+				if (!fm->sockets[cnt]) {
+					fm->sockets[cnt] = sock;
+					sock = NULL;
+				}
+				spin_unlock_irqrestore(&fm->lock, flags);
+			}
+			if (sock)
+				tifm_free_device(&sock->dev);
+		}
+		spin_lock_irqsave(&fm->lock, flags);
+	}
+
+	writel(TIFM_IRQ_FIFOMASK(socket_change_set)
+	       | TIFM_IRQ_CARDMASK(socket_change_set),
+	       fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+
+	writel(TIFM_IRQ_FIFOMASK(socket_change_set)
+	       | TIFM_IRQ_CARDMASK(socket_change_set),
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+
+	writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE);
+	spin_unlock_irqrestore(&fm->lock, flags);
+}
+
+#ifdef CONFIG_PM
+
+static int tifm_7xx1_suspend(struct pci_dev *dev, pm_message_t state)
+{
+	struct tifm_adapter *fm = pci_get_drvdata(dev);
+	int cnt;
+
+	dev_dbg(&dev->dev, "suspending host\n");
+
+	for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+		if (fm->sockets[cnt])
+			tifm_7xx1_sock_power_off(fm->sockets[cnt]->addr);
+	}
+
+	pci_save_state(dev);
+	pci_enable_wake(dev, pci_choose_state(dev, state), 0);
+	pci_disable_device(dev);
+	pci_set_power_state(dev, pci_choose_state(dev, state));
+	return 0;
+}
+
+static int tifm_7xx1_resume(struct pci_dev *dev)
+{
+	struct tifm_adapter *fm = pci_get_drvdata(dev);
+	int rc;
+	unsigned long timeout;
+	unsigned int good_sockets = 0, bad_sockets = 0;
+	unsigned long flags;
+	/* Maximum number of entries is 4 */
+	unsigned char new_ids[4];
+	DECLARE_COMPLETION_ONSTACK(finish_resume);
+
+	if (WARN_ON(fm->num_sockets > ARRAY_SIZE(new_ids)))
+		return -ENXIO;
+
+	pci_set_power_state(dev, PCI_D0);
+	pci_restore_state(dev);
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+	pci_set_master(dev);
+
+	dev_dbg(&dev->dev, "resuming host\n");
+
+	for (rc = 0; rc < fm->num_sockets; rc++)
+		new_ids[rc] = tifm_7xx1_toggle_sock_power(
+					tifm_7xx1_sock_addr(fm->addr, rc));
+	spin_lock_irqsave(&fm->lock, flags);
+	for (rc = 0; rc < fm->num_sockets; rc++) {
+		if (fm->sockets[rc]) {
+			if (fm->sockets[rc]->type == new_ids[rc])
+				good_sockets |= 1 << rc;
+			else
+				bad_sockets |= 1 << rc;
+		}
+	}
+
+	writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+	dev_dbg(&dev->dev, "change sets on resume: good %x, bad %x\n",
+		good_sockets, bad_sockets);
+
+	fm->socket_change_set = 0;
+	if (good_sockets) {
+		fm->finish_me = &finish_resume;
+		spin_unlock_irqrestore(&fm->lock, flags);
+		timeout = wait_for_completion_timeout(&finish_resume, HZ);
+		dev_dbg(&dev->dev, "wait returned %lu\n", timeout);
+		writel(TIFM_IRQ_FIFOMASK(good_sockets)
+		       | TIFM_IRQ_CARDMASK(good_sockets),
+		       fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+		writel(TIFM_IRQ_FIFOMASK(good_sockets)
+		       | TIFM_IRQ_CARDMASK(good_sockets),
+		       fm->addr + FM_SET_INTERRUPT_ENABLE);
+		spin_lock_irqsave(&fm->lock, flags);
+		fm->finish_me = NULL;
+		fm->socket_change_set ^= good_sockets & fm->socket_change_set;
+	}
+
+	fm->socket_change_set |= bad_sockets;
+	if (fm->socket_change_set)
+		tifm_queue_work(&fm->media_switcher);
+
+	spin_unlock_irqrestore(&fm->lock, flags);
+	writel(TIFM_IRQ_ENABLE,
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+
+	return 0;
+}
+
+#else
+
+#define tifm_7xx1_suspend NULL
+#define tifm_7xx1_resume NULL
+
+#endif /* CONFIG_PM */
+
+static int tifm_7xx1_dummy_has_ms_pif(struct tifm_adapter *fm,
+				      struct tifm_dev *sock)
+{
+	return 0;
+}
+
+static int tifm_7xx1_has_ms_pif(struct tifm_adapter *fm, struct tifm_dev *sock)
+{
+	if (((fm->num_sockets == 4) && (sock->socket_id == 2))
+	    || ((fm->num_sockets == 2) && (sock->socket_id == 0)))
+		return 1;
+
+	return 0;
+}
+
+static int tifm_7xx1_probe(struct pci_dev *dev,
+			   const struct pci_device_id *dev_id)
+{
+	struct tifm_adapter *fm;
+	int pci_dev_busy = 0;
+	int rc;
+
+	rc = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+
+	pci_set_master(dev);
+
+	rc = pci_request_regions(dev, DRIVER_NAME);
+	if (rc) {
+		pci_dev_busy = 1;
+		goto err_out;
+	}
+
+	pci_intx(dev, 1);
+
+	fm = tifm_alloc_adapter(dev->device == PCI_DEVICE_ID_TI_XX21_XX11_FM
+				? 4 : 2, &dev->dev);
+	if (!fm) {
+		rc = -ENOMEM;
+		goto err_out_int;
+	}
+
+	INIT_WORK(&fm->media_switcher, tifm_7xx1_switch_media);
+	fm->eject = tifm_7xx1_eject;
+	fm->has_ms_pif = tifm_7xx1_has_ms_pif;
+	pci_set_drvdata(dev, fm);
+
+	fm->addr = pci_ioremap_bar(dev, 0);
+	if (!fm->addr) {
+		rc = -ENODEV;
+		goto err_out_free;
+	}
+
+	rc = request_irq(dev->irq, tifm_7xx1_isr, IRQF_SHARED, DRIVER_NAME, fm);
+	if (rc)
+		goto err_out_unmap;
+
+	rc = tifm_add_adapter(fm);
+	if (rc)
+		goto err_out_irq;
+
+	writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+	       fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+	writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+	return 0;
+
+err_out_irq:
+	free_irq(dev->irq, fm);
+err_out_unmap:
+	iounmap(fm->addr);
+err_out_free:
+	tifm_free_adapter(fm);
+err_out_int:
+	pci_intx(dev, 0);
+	pci_release_regions(dev);
+err_out:
+	if (!pci_dev_busy)
+		pci_disable_device(dev);
+	return rc;
+}
+
+static void tifm_7xx1_remove(struct pci_dev *dev)
+{
+	struct tifm_adapter *fm = pci_get_drvdata(dev);
+	int cnt;
+
+	fm->eject = tifm_7xx1_dummy_eject;
+	fm->has_ms_pif = tifm_7xx1_dummy_has_ms_pif;
+	writel(TIFM_IRQ_SETALL, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+	mmiowb();
+	free_irq(dev->irq, fm);
+
+	tifm_remove_adapter(fm);
+
+	for (cnt = 0; cnt < fm->num_sockets; cnt++)
+		tifm_7xx1_sock_power_off(tifm_7xx1_sock_addr(fm->addr, cnt));
+
+	iounmap(fm->addr);
+	pci_intx(dev, 0);
+	pci_release_regions(dev);
+
+	pci_disable_device(dev);
+	tifm_free_adapter(fm);
+}
+
+static const struct pci_device_id tifm_7xx1_pci_tbl[] = {
+	{ PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX21_XX11_FM, PCI_ANY_ID,
+	  PCI_ANY_ID, 0, 0, 0 }, /* xx21 - the one I have */
+        { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX12_FM, PCI_ANY_ID,
+	  PCI_ANY_ID, 0, 0, 0 },
+	{ PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX20_FM, PCI_ANY_ID,
+	  PCI_ANY_ID, 0, 0, 0 },
+	{ }
+};
+
+static struct pci_driver tifm_7xx1_driver = {
+	.name = DRIVER_NAME,
+	.id_table = tifm_7xx1_pci_tbl,
+	.probe = tifm_7xx1_probe,
+	.remove = tifm_7xx1_remove,
+	.suspend = tifm_7xx1_suspend,
+	.resume = tifm_7xx1_resume,
+};
+
+module_pci_driver(tifm_7xx1_driver);
+MODULE_AUTHOR("Alex Dubov");
+MODULE_DESCRIPTION("TI FlashMedia host driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, tifm_7xx1_pci_tbl);
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c
new file mode 100644
index 000000000..a511b2a71
--- /dev/null
+++ b/drivers/misc/tifm_core.c
@@ -0,0 +1,371 @@
+/*
+ *  tifm_core.c - TI FlashMedia driver
+ *
+ *  Copyright (C) 2006 Alex Dubov <oakad@yahoo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/tifm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/idr.h>
+#include <linux/module.h>
+
+#define DRIVER_NAME "tifm_core"
+#define DRIVER_VERSION "0.8"
+
+static struct workqueue_struct *workqueue;
+static DEFINE_IDR(tifm_adapter_idr);
+static DEFINE_SPINLOCK(tifm_adapter_lock);
+
+static const char *tifm_media_type_name(unsigned char type, unsigned char nt)
+{
+	const char *card_type_name[3][3] = {
+		{ "SmartMedia/xD", "MemoryStick", "MMC/SD" },
+		{ "XD", "MS", "SD"},
+		{ "xd", "ms", "sd"}
+	};
+
+	if (nt > 2 || type < 1 || type > 3)
+		return NULL;
+	return card_type_name[nt][type - 1];
+}
+
+static int tifm_dev_match(struct tifm_dev *sock, struct tifm_device_id *id)
+{
+	if (sock->type == id->type)
+		return 1;
+	return 0;
+}
+
+static int tifm_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *fm_drv = container_of(drv, struct tifm_driver,
+						  driver);
+	struct tifm_device_id *ids = fm_drv->id_table;
+
+	if (ids) {
+		while (ids->type) {
+			if (tifm_dev_match(sock, ids))
+				return 1;
+			++ids;
+		}
+	}
+	return 0;
+}
+
+static int tifm_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+
+	if (add_uevent_var(env, "TIFM_CARD_TYPE=%s", tifm_media_type_name(sock->type, 1)))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int tifm_device_probe(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+	int rc = -ENODEV;
+
+	get_device(dev);
+	if (dev->driver && drv->probe) {
+		rc = drv->probe(sock);
+		if (!rc)
+			return 0;
+	}
+	put_device(dev);
+	return rc;
+}
+
+static void tifm_dummy_event(struct tifm_dev *sock)
+{
+	return;
+}
+
+static int tifm_device_remove(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+
+	if (dev->driver && drv->remove) {
+		sock->card_event = tifm_dummy_event;
+		sock->data_event = tifm_dummy_event;
+		drv->remove(sock);
+		sock->dev.driver = NULL;
+	}
+
+	put_device(dev);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int tifm_device_suspend(struct device *dev, pm_message_t state)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+
+	if (dev->driver && drv->suspend)
+		return drv->suspend(sock, state);
+	return 0;
+}
+
+static int tifm_device_resume(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+
+	if (dev->driver && drv->resume)
+		return drv->resume(sock);
+	return 0;
+}
+
+#else
+
+#define tifm_device_suspend NULL
+#define tifm_device_resume NULL
+
+#endif /* CONFIG_PM */
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	return sprintf(buf, "%x", sock->type);
+}
+static DEVICE_ATTR_RO(type);
+
+static struct attribute *tifm_dev_attrs[] = {
+	&dev_attr_type.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(tifm_dev);
+
+static struct bus_type tifm_bus_type = {
+	.name      = "tifm",
+	.dev_groups = tifm_dev_groups,
+	.match     = tifm_bus_match,
+	.uevent    = tifm_uevent,
+	.probe     = tifm_device_probe,
+	.remove    = tifm_device_remove,
+	.suspend   = tifm_device_suspend,
+	.resume    = tifm_device_resume
+};
+
+static void tifm_free(struct device *dev)
+{
+	struct tifm_adapter *fm = container_of(dev, struct tifm_adapter, dev);
+
+	kfree(fm);
+}
+
+static struct class tifm_adapter_class = {
+	.name    = "tifm_adapter",
+	.dev_release = tifm_free
+};
+
+struct tifm_adapter *tifm_alloc_adapter(unsigned int num_sockets,
+					struct device *dev)
+{
+	struct tifm_adapter *fm;
+
+	fm = kzalloc(sizeof(struct tifm_adapter)
+		     + sizeof(struct tifm_dev*) * num_sockets, GFP_KERNEL);
+	if (fm) {
+		fm->dev.class = &tifm_adapter_class;
+		fm->dev.parent = dev;
+		device_initialize(&fm->dev);
+		spin_lock_init(&fm->lock);
+		fm->num_sockets = num_sockets;
+	}
+	return fm;
+}
+EXPORT_SYMBOL(tifm_alloc_adapter);
+
+int tifm_add_adapter(struct tifm_adapter *fm)
+{
+	int rc;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&tifm_adapter_lock);
+	rc = idr_alloc(&tifm_adapter_idr, fm, 0, 0, GFP_NOWAIT);
+	if (rc >= 0)
+		fm->id = rc;
+	spin_unlock(&tifm_adapter_lock);
+	idr_preload_end();
+	if (rc < 0)
+		return rc;
+
+	dev_set_name(&fm->dev, "tifm%u", fm->id);
+	rc = device_add(&fm->dev);
+	if (rc) {
+		spin_lock(&tifm_adapter_lock);
+		idr_remove(&tifm_adapter_idr, fm->id);
+		spin_unlock(&tifm_adapter_lock);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(tifm_add_adapter);
+
+void tifm_remove_adapter(struct tifm_adapter *fm)
+{
+	unsigned int cnt;
+
+	flush_workqueue(workqueue);
+	for (cnt = 0; cnt < fm->num_sockets; ++cnt) {
+		if (fm->sockets[cnt])
+			device_unregister(&fm->sockets[cnt]->dev);
+	}
+
+	spin_lock(&tifm_adapter_lock);
+	idr_remove(&tifm_adapter_idr, fm->id);
+	spin_unlock(&tifm_adapter_lock);
+	device_del(&fm->dev);
+}
+EXPORT_SYMBOL(tifm_remove_adapter);
+
+void tifm_free_adapter(struct tifm_adapter *fm)
+{
+	put_device(&fm->dev);
+}
+EXPORT_SYMBOL(tifm_free_adapter);
+
+void tifm_free_device(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	kfree(sock);
+}
+EXPORT_SYMBOL(tifm_free_device);
+
+struct tifm_dev *tifm_alloc_device(struct tifm_adapter *fm, unsigned int id,
+				   unsigned char type)
+{
+	struct tifm_dev *sock = NULL;
+
+	if (!tifm_media_type_name(type, 0))
+		return sock;
+
+	sock = kzalloc(sizeof(struct tifm_dev), GFP_KERNEL);
+	if (sock) {
+		spin_lock_init(&sock->lock);
+		sock->type = type;
+		sock->socket_id = id;
+		sock->card_event = tifm_dummy_event;
+		sock->data_event = tifm_dummy_event;
+
+		sock->dev.parent = fm->dev.parent;
+		sock->dev.bus = &tifm_bus_type;
+		sock->dev.dma_mask = fm->dev.parent->dma_mask;
+		sock->dev.release = tifm_free_device;
+
+		dev_set_name(&sock->dev, "tifm_%s%u:%u",
+			     tifm_media_type_name(type, 2), fm->id, id);
+		printk(KERN_INFO DRIVER_NAME
+		       ": %s card detected in socket %u:%u\n",
+		       tifm_media_type_name(type, 0), fm->id, id);
+	}
+	return sock;
+}
+EXPORT_SYMBOL(tifm_alloc_device);
+
+void tifm_eject(struct tifm_dev *sock)
+{
+	struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent);
+	fm->eject(fm, sock);
+}
+EXPORT_SYMBOL(tifm_eject);
+
+int tifm_has_ms_pif(struct tifm_dev *sock)
+{
+	struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent);
+	return fm->has_ms_pif(fm, sock);
+}
+EXPORT_SYMBOL(tifm_has_ms_pif);
+
+int tifm_map_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
+		int direction)
+{
+	return pci_map_sg(to_pci_dev(sock->dev.parent), sg, nents, direction);
+}
+EXPORT_SYMBOL(tifm_map_sg);
+
+void tifm_unmap_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
+		   int direction)
+{
+	pci_unmap_sg(to_pci_dev(sock->dev.parent), sg, nents, direction);
+}
+EXPORT_SYMBOL(tifm_unmap_sg);
+
+void tifm_queue_work(struct work_struct *work)
+{
+	queue_work(workqueue, work);
+}
+EXPORT_SYMBOL(tifm_queue_work);
+
+int tifm_register_driver(struct tifm_driver *drv)
+{
+	drv->driver.bus = &tifm_bus_type;
+
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(tifm_register_driver);
+
+void tifm_unregister_driver(struct tifm_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(tifm_unregister_driver);
+
+static int __init tifm_init(void)
+{
+	int rc;
+
+	workqueue = create_freezable_workqueue("tifm");
+	if (!workqueue)
+		return -ENOMEM;
+
+	rc = bus_register(&tifm_bus_type);
+
+	if (rc)
+		goto err_out_wq;
+
+	rc = class_register(&tifm_adapter_class);
+	if (!rc)
+		return 0;
+
+	bus_unregister(&tifm_bus_type);
+
+err_out_wq:
+	destroy_workqueue(workqueue);
+
+	return rc;
+}
+
+static void __exit tifm_exit(void)
+{
+	class_unregister(&tifm_adapter_class);
+	bus_unregister(&tifm_bus_type);
+	destroy_workqueue(workqueue);
+}
+
+subsys_initcall(tifm_init);
+module_exit(tifm_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alex Dubov");
+MODULE_DESCRIPTION("TI FlashMedia core driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c
new file mode 100644
index 000000000..3fce3b6a3
--- /dev/null
+++ b/drivers/misc/tsl2550.c
@@ -0,0 +1,468 @@
+/*
+ *  tsl2550.c - Linux kernel modules for ambient light sensor
+ *
+ *  Copyright (C) 2007 Rodolfo Giometti <giometti@linux.it>
+ *  Copyright (C) 2007 Eurotech S.p.A. <info@eurotech.it>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+#define TSL2550_DRV_NAME	"tsl2550"
+#define DRIVER_VERSION		"1.2"
+
+/*
+ * Defines
+ */
+
+#define TSL2550_POWER_DOWN		0x00
+#define TSL2550_POWER_UP		0x03
+#define TSL2550_STANDARD_RANGE		0x18
+#define TSL2550_EXTENDED_RANGE		0x1d
+#define TSL2550_READ_ADC0		0x43
+#define TSL2550_READ_ADC1		0x83
+
+/*
+ * Structs
+ */
+
+struct tsl2550_data {
+	struct i2c_client *client;
+	struct mutex update_lock;
+
+	unsigned int power_state:1;
+	unsigned int operating_mode:1;
+};
+
+/*
+ * Global data
+ */
+
+static const u8 TSL2550_MODE_RANGE[2] = {
+	TSL2550_STANDARD_RANGE, TSL2550_EXTENDED_RANGE,
+};
+
+/*
+ * Management functions
+ */
+
+static int tsl2550_set_operating_mode(struct i2c_client *client, int mode)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+
+	int ret = i2c_smbus_write_byte(client, TSL2550_MODE_RANGE[mode]);
+
+	data->operating_mode = mode;
+
+	return ret;
+}
+
+static int tsl2550_set_power_state(struct i2c_client *client, int state)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	int ret;
+
+	if (state == 0)
+		ret = i2c_smbus_write_byte(client, TSL2550_POWER_DOWN);
+	else {
+		ret = i2c_smbus_write_byte(client, TSL2550_POWER_UP);
+
+		/* On power up we should reset operating mode also... */
+		tsl2550_set_operating_mode(client, data->operating_mode);
+	}
+
+	data->power_state = state;
+
+	return ret;
+}
+
+static int tsl2550_get_adc_value(struct i2c_client *client, u8 cmd)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, cmd);
+	if (ret < 0)
+		return ret;
+	if (!(ret & 0x80))
+		return -EAGAIN;
+	return ret & 0x7f;	/* remove the "valid" bit */
+}
+
+/*
+ * LUX calculation
+ */
+
+#define	TSL2550_MAX_LUX		1846
+
+static const u8 ratio_lut[] = {
+	100, 100, 100, 100, 100, 100, 100, 100,
+	100, 100, 100, 100, 100, 100, 99, 99,
+	99, 99, 99, 99, 99, 99, 99, 99,
+	99, 99, 99, 98, 98, 98, 98, 98,
+	98, 98, 97, 97, 97, 97, 97, 96,
+	96, 96, 96, 95, 95, 95, 94, 94,
+	93, 93, 93, 92, 92, 91, 91, 90,
+	89, 89, 88, 87, 87, 86, 85, 84,
+	83, 82, 81, 80, 79, 78, 77, 75,
+	74, 73, 71, 69, 68, 66, 64, 62,
+	60, 58, 56, 54, 52, 49, 47, 44,
+	42, 41, 40, 40, 39, 39, 38, 38,
+	37, 37, 37, 36, 36, 36, 35, 35,
+	35, 35, 34, 34, 34, 34, 33, 33,
+	33, 33, 32, 32, 32, 32, 32, 31,
+	31, 31, 31, 31, 30, 30, 30, 30,
+	30,
+};
+
+static const u16 count_lut[] = {
+	0, 1, 2, 3, 4, 5, 6, 7,
+	8, 9, 10, 11, 12, 13, 14, 15,
+	16, 18, 20, 22, 24, 26, 28, 30,
+	32, 34, 36, 38, 40, 42, 44, 46,
+	49, 53, 57, 61, 65, 69, 73, 77,
+	81, 85, 89, 93, 97, 101, 105, 109,
+	115, 123, 131, 139, 147, 155, 163, 171,
+	179, 187, 195, 203, 211, 219, 227, 235,
+	247, 263, 279, 295, 311, 327, 343, 359,
+	375, 391, 407, 423, 439, 455, 471, 487,
+	511, 543, 575, 607, 639, 671, 703, 735,
+	767, 799, 831, 863, 895, 927, 959, 991,
+	1039, 1103, 1167, 1231, 1295, 1359, 1423, 1487,
+	1551, 1615, 1679, 1743, 1807, 1871, 1935, 1999,
+	2095, 2223, 2351, 2479, 2607, 2735, 2863, 2991,
+	3119, 3247, 3375, 3503, 3631, 3759, 3887, 4015,
+};
+
+/*
+ * This function is described into Taos TSL2550 Designer's Notebook
+ * pages 2, 3.
+ */
+static int tsl2550_calculate_lux(u8 ch0, u8 ch1)
+{
+	unsigned int lux;
+
+	/* Look up count from channel values */
+	u16 c0 = count_lut[ch0];
+	u16 c1 = count_lut[ch1];
+
+	/*
+	 * Calculate ratio.
+	 * Note: the "128" is a scaling factor
+	 */
+	u8 r = 128;
+
+	/* Avoid division by 0 and count 1 cannot be greater than count 0 */
+	if (c1 <= c0)
+		if (c0) {
+			r = c1 * 128 / c0;
+
+			/* Calculate LUX */
+			lux = ((c0 - c1) * ratio_lut[r]) / 256;
+		} else
+			lux = 0;
+	else
+		return 0;
+
+	/* LUX range check */
+	return lux > TSL2550_MAX_LUX ? TSL2550_MAX_LUX : lux;
+}
+
+/*
+ * SysFS support
+ */
+
+static ssize_t tsl2550_show_power_state(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(to_i2c_client(dev));
+
+	return sprintf(buf, "%u\n", data->power_state);
+}
+
+static ssize_t tsl2550_store_power_state(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	unsigned long val = simple_strtoul(buf, NULL, 10);
+	int ret;
+
+	if (val > 1)
+		return -EINVAL;
+
+	mutex_lock(&data->update_lock);
+	ret = tsl2550_set_power_state(client, val);
+	mutex_unlock(&data->update_lock);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
+		   tsl2550_show_power_state, tsl2550_store_power_state);
+
+static ssize_t tsl2550_show_operating_mode(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(to_i2c_client(dev));
+
+	return sprintf(buf, "%u\n", data->operating_mode);
+}
+
+static ssize_t tsl2550_store_operating_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	unsigned long val = simple_strtoul(buf, NULL, 10);
+	int ret;
+
+	if (val > 1)
+		return -EINVAL;
+
+	if (data->power_state == 0)
+		return -EBUSY;
+
+	mutex_lock(&data->update_lock);
+	ret = tsl2550_set_operating_mode(client, val);
+	mutex_unlock(&data->update_lock);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(operating_mode, S_IWUSR | S_IRUGO,
+		   tsl2550_show_operating_mode, tsl2550_store_operating_mode);
+
+static ssize_t __tsl2550_show_lux(struct i2c_client *client, char *buf)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	u8 ch0, ch1;
+	int ret;
+
+	ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC0);
+	if (ret < 0)
+		return ret;
+	ch0 = ret;
+
+	ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC1);
+	if (ret < 0)
+		return ret;
+	ch1 = ret;
+
+	/* Do the job */
+	ret = tsl2550_calculate_lux(ch0, ch1);
+	if (ret < 0)
+		return ret;
+	if (data->operating_mode == 1)
+		ret *= 5;
+
+	return sprintf(buf, "%d\n", ret);
+}
+
+static ssize_t tsl2550_show_lux1_input(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	int ret;
+
+	/* No LUX data if not operational */
+	if (!data->power_state)
+		return -EBUSY;
+
+	mutex_lock(&data->update_lock);
+	ret = __tsl2550_show_lux(client, buf);
+	mutex_unlock(&data->update_lock);
+
+	return ret;
+}
+
+static DEVICE_ATTR(lux1_input, S_IRUGO,
+		   tsl2550_show_lux1_input, NULL);
+
+static struct attribute *tsl2550_attributes[] = {
+	&dev_attr_power_state.attr,
+	&dev_attr_operating_mode.attr,
+	&dev_attr_lux1_input.attr,
+	NULL
+};
+
+static const struct attribute_group tsl2550_attr_group = {
+	.attrs = tsl2550_attributes,
+};
+
+/*
+ * Initialization function
+ */
+
+static int tsl2550_init_client(struct i2c_client *client)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	int err;
+
+	/*
+	 * Probe the chip. To do so we try to power up the device and then to
+	 * read back the 0x03 code
+	 */
+	err = i2c_smbus_read_byte_data(client, TSL2550_POWER_UP);
+	if (err < 0)
+		return err;
+	if (err != TSL2550_POWER_UP)
+		return -ENODEV;
+	data->power_state = 1;
+
+	/* Set the default operating mode */
+	err = i2c_smbus_write_byte(client,
+				   TSL2550_MODE_RANGE[data->operating_mode]);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+/*
+ * I2C init/probing/exit functions
+ */
+
+static struct i2c_driver tsl2550_driver;
+static int tsl2550_probe(struct i2c_client *client,
+				   const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct tsl2550_data *data;
+	int *opmode, err = 0;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE
+					    | I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		err = -EIO;
+		goto exit;
+	}
+
+	data = kzalloc(sizeof(struct tsl2550_data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto exit;
+	}
+	data->client = client;
+	i2c_set_clientdata(client, data);
+
+	/* Check platform data */
+	opmode = client->dev.platform_data;
+	if (opmode) {
+		if (*opmode < 0 || *opmode > 1) {
+			dev_err(&client->dev, "invalid operating_mode (%d)\n",
+					*opmode);
+			err = -EINVAL;
+			goto exit_kfree;
+		}
+		data->operating_mode = *opmode;
+	} else
+		data->operating_mode = 0;	/* default mode is standard */
+	dev_info(&client->dev, "%s operating mode\n",
+			data->operating_mode ? "extended" : "standard");
+
+	mutex_init(&data->update_lock);
+
+	/* Initialize the TSL2550 chip */
+	err = tsl2550_init_client(client);
+	if (err)
+		goto exit_kfree;
+
+	/* Register sysfs hooks */
+	err = sysfs_create_group(&client->dev.kobj, &tsl2550_attr_group);
+	if (err)
+		goto exit_kfree;
+
+	dev_info(&client->dev, "support ver. %s enabled\n", DRIVER_VERSION);
+
+	return 0;
+
+exit_kfree:
+	kfree(data);
+exit:
+	return err;
+}
+
+static int tsl2550_remove(struct i2c_client *client)
+{
+	sysfs_remove_group(&client->dev.kobj, &tsl2550_attr_group);
+
+	/* Power down the device */
+	tsl2550_set_power_state(client, 0);
+
+	kfree(i2c_get_clientdata(client));
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static int tsl2550_suspend(struct device *dev)
+{
+	return tsl2550_set_power_state(to_i2c_client(dev), 0);
+}
+
+static int tsl2550_resume(struct device *dev)
+{
+	return tsl2550_set_power_state(to_i2c_client(dev), 1);
+}
+
+static SIMPLE_DEV_PM_OPS(tsl2550_pm_ops, tsl2550_suspend, tsl2550_resume);
+#define TSL2550_PM_OPS (&tsl2550_pm_ops)
+
+#else
+
+#define TSL2550_PM_OPS NULL
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct i2c_device_id tsl2550_id[] = {
+	{ "tsl2550", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tsl2550_id);
+
+static const struct of_device_id tsl2550_of_match[] = {
+	{ .compatible = "taos,tsl2550" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tsl2550_of_match);
+
+static struct i2c_driver tsl2550_driver = {
+	.driver = {
+		.name	= TSL2550_DRV_NAME,
+		.of_match_table = tsl2550_of_match,
+		.pm	= TSL2550_PM_OPS,
+	},
+	.probe	= tsl2550_probe,
+	.remove	= tsl2550_remove,
+	.id_table = tsl2550_id,
+};
+
+module_i2c_driver(tsl2550_driver);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("TSL2550 ambient light sensor driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c
new file mode 100644
index 000000000..a3c6c773d
--- /dev/null
+++ b/drivers/misc/vexpress-syscfg.c
@@ -0,0 +1,287 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2014 ARM Limited
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+#include <linux/vexpress.h>
+
+
+#define SYS_CFGDATA		0x0
+
+#define SYS_CFGCTRL		0x4
+#define SYS_CFGCTRL_START	(1 << 31)
+#define SYS_CFGCTRL_WRITE	(1 << 30)
+#define SYS_CFGCTRL_DCC(n)	(((n) & 0xf) << 26)
+#define SYS_CFGCTRL_FUNC(n)	(((n) & 0x3f) << 20)
+#define SYS_CFGCTRL_SITE(n)	(((n) & 0x3) << 16)
+#define SYS_CFGCTRL_POSITION(n)	(((n) & 0xf) << 12)
+#define SYS_CFGCTRL_DEVICE(n)	(((n) & 0xfff) << 0)
+
+#define SYS_CFGSTAT		0x8
+#define SYS_CFGSTAT_ERR		(1 << 1)
+#define SYS_CFGSTAT_COMPLETE	(1 << 0)
+
+
+struct vexpress_syscfg {
+	struct device *dev;
+	void __iomem *base;
+	struct list_head funcs;
+};
+
+struct vexpress_syscfg_func {
+	struct list_head list;
+	struct vexpress_syscfg *syscfg;
+	struct regmap *regmap;
+	int num_templates;
+	u32 template[0]; /* Keep it last! */
+};
+
+
+static int vexpress_syscfg_exec(struct vexpress_syscfg_func *func,
+		int index, bool write, u32 *data)
+{
+	struct vexpress_syscfg *syscfg = func->syscfg;
+	u32 command, status;
+	int tries;
+	long timeout;
+
+	if (WARN_ON(index >= func->num_templates))
+		return -EINVAL;
+
+	command = readl(syscfg->base + SYS_CFGCTRL);
+	if (WARN_ON(command & SYS_CFGCTRL_START))
+		return -EBUSY;
+
+	command = func->template[index];
+	command |= SYS_CFGCTRL_START;
+	command |= write ? SYS_CFGCTRL_WRITE : 0;
+
+	/* Use a canary for reads */
+	if (!write)
+		*data = 0xdeadbeef;
+
+	dev_dbg(syscfg->dev, "func %p, command %x, data %x\n",
+			func, command, *data);
+	writel(*data, syscfg->base + SYS_CFGDATA);
+	writel(0, syscfg->base + SYS_CFGSTAT);
+	writel(command, syscfg->base + SYS_CFGCTRL);
+	mb();
+
+	/* The operation can take ages... Go to sleep, 100us initially */
+	tries = 100;
+	timeout = 100;
+	do {
+		if (!irqs_disabled()) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(usecs_to_jiffies(timeout));
+			if (signal_pending(current))
+				return -EINTR;
+		} else {
+			udelay(timeout);
+		}
+
+		status = readl(syscfg->base + SYS_CFGSTAT);
+		if (status & SYS_CFGSTAT_ERR)
+			return -EFAULT;
+
+		if (timeout > 20)
+			timeout -= 20;
+	} while (--tries && !(status & SYS_CFGSTAT_COMPLETE));
+	if (WARN_ON_ONCE(!tries))
+		return -ETIMEDOUT;
+
+	if (!write) {
+		*data = readl(syscfg->base + SYS_CFGDATA);
+		dev_dbg(syscfg->dev, "func %p, read data %x\n", func, *data);
+	}
+
+	return 0;
+}
+
+static int vexpress_syscfg_read(void *context, unsigned int index,
+		unsigned int *val)
+{
+	struct vexpress_syscfg_func *func = context;
+
+	return vexpress_syscfg_exec(func, index, false, val);
+}
+
+static int vexpress_syscfg_write(void *context, unsigned int index,
+		unsigned int val)
+{
+	struct vexpress_syscfg_func *func = context;
+
+	return vexpress_syscfg_exec(func, index, true, &val);
+}
+
+static struct regmap_config vexpress_syscfg_regmap_config = {
+	.lock = vexpress_config_lock,
+	.unlock = vexpress_config_unlock,
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_read = vexpress_syscfg_read,
+	.reg_write = vexpress_syscfg_write,
+	.reg_format_endian = REGMAP_ENDIAN_LITTLE,
+	.val_format_endian = REGMAP_ENDIAN_LITTLE,
+};
+
+
+static struct regmap *vexpress_syscfg_regmap_init(struct device *dev,
+		void *context)
+{
+	int err;
+	struct vexpress_syscfg *syscfg = context;
+	struct vexpress_syscfg_func *func;
+	struct property *prop;
+	const __be32 *val = NULL;
+	__be32 energy_quirk[4];
+	int num;
+	u32 site, position, dcc;
+	int i;
+
+	err = vexpress_config_get_topo(dev->of_node, &site,
+				&position, &dcc);
+	if (err)
+		return ERR_PTR(err);
+
+	prop = of_find_property(dev->of_node,
+			"arm,vexpress-sysreg,func", NULL);
+	if (!prop)
+		return ERR_PTR(-EINVAL);
+
+	num = prop->length / sizeof(u32) / 2;
+	val = prop->value;
+
+	/*
+	 * "arm,vexpress-energy" function used to be described
+	 * by its first device only, now it requires both
+	 */
+	if (num == 1 && of_device_is_compatible(dev->of_node,
+			"arm,vexpress-energy")) {
+		num = 2;
+		energy_quirk[0] = *val;
+		energy_quirk[2] = *val++;
+		energy_quirk[1] = *val;
+		energy_quirk[3] = cpu_to_be32(be32_to_cpup(val) + 1);
+		val = energy_quirk;
+	}
+
+	func = kzalloc(struct_size(func, template, num), GFP_KERNEL);
+	if (!func)
+		return ERR_PTR(-ENOMEM);
+
+	func->syscfg = syscfg;
+	func->num_templates = num;
+
+	for (i = 0; i < num; i++) {
+		u32 function, device;
+
+		function = be32_to_cpup(val++);
+		device = be32_to_cpup(val++);
+
+		dev_dbg(dev, "func %p: %u/%u/%u/%u/%u\n",
+				func, site, position, dcc,
+				function, device);
+
+		func->template[i] = SYS_CFGCTRL_DCC(dcc);
+		func->template[i] |= SYS_CFGCTRL_SITE(site);
+		func->template[i] |= SYS_CFGCTRL_POSITION(position);
+		func->template[i] |= SYS_CFGCTRL_FUNC(function);
+		func->template[i] |= SYS_CFGCTRL_DEVICE(device);
+	}
+
+	vexpress_syscfg_regmap_config.max_register = num - 1;
+
+	func->regmap = regmap_init(dev, NULL, func,
+			&vexpress_syscfg_regmap_config);
+
+	if (IS_ERR(func->regmap)) {
+		void *err = func->regmap;
+
+		kfree(func);
+		return err;
+	}
+
+	list_add(&func->list, &syscfg->funcs);
+
+	return func->regmap;
+}
+
+static void vexpress_syscfg_regmap_exit(struct regmap *regmap, void *context)
+{
+	struct vexpress_syscfg *syscfg = context;
+	struct vexpress_syscfg_func *func, *tmp;
+
+	regmap_exit(regmap);
+
+	list_for_each_entry_safe(func, tmp, &syscfg->funcs, list) {
+		if (func->regmap == regmap) {
+			list_del(&syscfg->funcs);
+			kfree(func);
+			break;
+		}
+	}
+}
+
+static struct vexpress_config_bridge_ops vexpress_syscfg_bridge_ops = {
+	.regmap_init = vexpress_syscfg_regmap_init,
+	.regmap_exit = vexpress_syscfg_regmap_exit,
+};
+
+
+static int vexpress_syscfg_probe(struct platform_device *pdev)
+{
+	struct vexpress_syscfg *syscfg;
+	struct resource *res;
+	struct device *bridge;
+
+	syscfg = devm_kzalloc(&pdev->dev, sizeof(*syscfg), GFP_KERNEL);
+	if (!syscfg)
+		return -ENOMEM;
+	syscfg->dev = &pdev->dev;
+	INIT_LIST_HEAD(&syscfg->funcs);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	syscfg->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(syscfg->base))
+		return PTR_ERR(syscfg->base);
+
+	/* Must use dev.parent (MFD), as that's where DT phandle points at... */
+	bridge = vexpress_config_bridge_register(pdev->dev.parent,
+			&vexpress_syscfg_bridge_ops, syscfg);
+
+	return PTR_ERR_OR_ZERO(bridge);
+}
+
+static const struct platform_device_id vexpress_syscfg_id_table[] = {
+	{ "vexpress-syscfg", },
+	{},
+};
+
+static struct platform_driver vexpress_syscfg_driver = {
+	.driver.name = "vexpress-syscfg",
+	.id_table = vexpress_syscfg_id_table,
+	.probe = vexpress_syscfg_probe,
+};
+
+static int __init vexpress_syscfg_init(void)
+{
+	return platform_driver_register(&vexpress_syscfg_driver);
+}
+core_initcall(vexpress_syscfg_init);
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
new file mode 100644
index 000000000..2543ef1ec
--- /dev/null
+++ b/drivers/misc/vmw_balloon.c
@@ -0,0 +1,1236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VMware Balloon driver.
+ *
+ * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
+ *
+ * This is VMware physical memory management driver for Linux. The driver
+ * acts like a "balloon" that can be inflated to reclaim physical pages by
+ * reserving them in the guest and invalidating them in the monitor,
+ * freeing up the underlying machine pages so they can be allocated to
+ * other guests.  The balloon can also be deflated to allow the guest to
+ * use more physical memory. Higher level policies can control the sizes
+ * of balloons in VMs in order to manage physical memory resources.
+ */
+
+//#define DEBUG
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <asm/hypervisor.h>
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
+MODULE_VERSION("1.5.0.0-k");
+MODULE_ALIAS("dmi:*:svnVMware*:*");
+MODULE_ALIAS("vmware_vmmemctl");
+MODULE_LICENSE("GPL");
+
+/*
+ * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
+ * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
+ * __GFP_NOWARN, to suppress page allocation failure warnings.
+ */
+#define VMW_PAGE_ALLOC_NOSLEEP		(__GFP_HIGHMEM|__GFP_NOWARN)
+
+/*
+ * Use GFP_HIGHUSER when executing in a separate kernel thread
+ * context and allocation can sleep.  This is less stressful to
+ * the guest memory system, since it allows the thread to block
+ * while memory is reclaimed, and won't take pages from emergency
+ * low-memory pools.
+ */
+#define VMW_PAGE_ALLOC_CANSLEEP		(GFP_HIGHUSER)
+
+/* Maximum number of refused pages we accumulate during inflation cycle */
+#define VMW_BALLOON_MAX_REFUSED		16
+
+/*
+ * Hypervisor communication port definitions.
+ */
+#define VMW_BALLOON_HV_PORT		0x5670
+#define VMW_BALLOON_HV_MAGIC		0x456c6d6f
+#define VMW_BALLOON_GUEST_ID		1	/* Linux */
+
+enum vmwballoon_capabilities {
+	/*
+	 * Bit 0 is reserved and not associated to any capability.
+	 */
+	VMW_BALLOON_BASIC_CMDS			= (1 << 1),
+	VMW_BALLOON_BATCHED_CMDS		= (1 << 2),
+	VMW_BALLOON_BATCHED_2M_CMDS		= (1 << 3),
+	VMW_BALLOON_SIGNALLED_WAKEUP_CMD	= (1 << 4),
+};
+
+#define VMW_BALLOON_CAPABILITIES	(VMW_BALLOON_BASIC_CMDS \
+					| VMW_BALLOON_BATCHED_CMDS \
+					| VMW_BALLOON_BATCHED_2M_CMDS \
+					| VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
+
+#define VMW_BALLOON_2M_SHIFT		(9)
+#define VMW_BALLOON_NUM_PAGE_SIZES	(2)
+
+/*
+ * Backdoor commands availability:
+ *
+ * START, GET_TARGET and GUEST_ID are always available,
+ *
+ * VMW_BALLOON_BASIC_CMDS:
+ *	LOCK and UNLOCK commands,
+ * VMW_BALLOON_BATCHED_CMDS:
+ *	BATCHED_LOCK and BATCHED_UNLOCK commands.
+ * VMW BALLOON_BATCHED_2M_CMDS:
+ *	BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
+ * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
+ *	VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
+ */
+#define VMW_BALLOON_CMD_START			0
+#define VMW_BALLOON_CMD_GET_TARGET		1
+#define VMW_BALLOON_CMD_LOCK			2
+#define VMW_BALLOON_CMD_UNLOCK			3
+#define VMW_BALLOON_CMD_GUEST_ID		4
+#define VMW_BALLOON_CMD_BATCHED_LOCK		6
+#define VMW_BALLOON_CMD_BATCHED_UNLOCK		7
+#define VMW_BALLOON_CMD_BATCHED_2M_LOCK		8
+#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK	9
+#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET	10
+
+
+/* error codes */
+#define VMW_BALLOON_SUCCESS		        0
+#define VMW_BALLOON_FAILURE		        -1
+#define VMW_BALLOON_ERROR_CMD_INVALID	        1
+#define VMW_BALLOON_ERROR_PPN_INVALID	        2
+#define VMW_BALLOON_ERROR_PPN_LOCKED	        3
+#define VMW_BALLOON_ERROR_PPN_UNLOCKED	        4
+#define VMW_BALLOON_ERROR_PPN_PINNED	        5
+#define VMW_BALLOON_ERROR_PPN_NOTNEEDED	        6
+#define VMW_BALLOON_ERROR_RESET		        7
+#define VMW_BALLOON_ERROR_BUSY		        8
+
+#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES	(0x03000000)
+
+/* Batch page description */
+
+/*
+ * Layout of a page in the batch page:
+ *
+ * +-------------+----------+--------+
+ * |             |          |        |
+ * | Page number | Reserved | Status |
+ * |             |          |        |
+ * +-------------+----------+--------+
+ * 64  PAGE_SHIFT          6         0
+ *
+ * The reserved field should be set to 0.
+ */
+#define VMW_BALLOON_BATCH_MAX_PAGES	(PAGE_SIZE / sizeof(u64))
+#define VMW_BALLOON_BATCH_STATUS_MASK	((1UL << 5) - 1)
+#define VMW_BALLOON_BATCH_PAGE_MASK	(~((1UL << PAGE_SHIFT) - 1))
+
+struct vmballoon_batch_page {
+	u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
+};
+
+static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
+{
+	return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
+}
+
+static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
+				int idx)
+{
+	return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
+}
+
+static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
+				u64 pa)
+{
+	batch->pages[idx] = pa;
+}
+
+
+#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result)		\
+({								\
+	unsigned long __status, __dummy1, __dummy2, __dummy3;	\
+	__asm__ __volatile__ ("inl %%dx" :			\
+		"=a"(__status),					\
+		"=c"(__dummy1),					\
+		"=d"(__dummy2),					\
+		"=b"(result),					\
+		"=S" (__dummy3) :				\
+		"0"(VMW_BALLOON_HV_MAGIC),			\
+		"1"(VMW_BALLOON_CMD_##cmd),			\
+		"2"(VMW_BALLOON_HV_PORT),			\
+		"3"(arg1),					\
+		"4" (arg2) :					\
+		"memory");					\
+	if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START)	\
+		result = __dummy1;				\
+	result &= -1UL;						\
+	__status & -1UL;					\
+})
+
+#ifdef CONFIG_DEBUG_FS
+struct vmballoon_stats {
+	unsigned int timer;
+	unsigned int doorbell;
+
+	/* allocation statistics */
+	unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int sleep_alloc;
+	unsigned int sleep_alloc_fail;
+	unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
+
+	/* monitor operations */
+	unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int target;
+	unsigned int target_fail;
+	unsigned int start;
+	unsigned int start_fail;
+	unsigned int guest_type;
+	unsigned int guest_type_fail;
+	unsigned int doorbell_set;
+	unsigned int doorbell_unset;
+};
+
+#define STATS_INC(stat) (stat)++
+#else
+#define STATS_INC(stat)
+#endif
+
+struct vmballoon;
+
+struct vmballoon_ops {
+	void (*add_page)(struct vmballoon *b, int idx, struct page *p);
+	int (*lock)(struct vmballoon *b, unsigned int num_pages,
+			bool is_2m_pages, unsigned int *target);
+	int (*unlock)(struct vmballoon *b, unsigned int num_pages,
+			bool is_2m_pages, unsigned int *target);
+};
+
+struct vmballoon_page_size {
+	/* list of reserved physical pages */
+	struct list_head pages;
+
+	/* transient list of non-balloonable pages */
+	struct list_head refused_pages;
+	unsigned int n_refused_pages;
+};
+
+struct vmballoon {
+	struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
+
+	/* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
+	unsigned supported_page_sizes;
+
+	/* balloon size in pages */
+	unsigned int size;
+	unsigned int target;
+
+	/* reset flag */
+	bool reset_required;
+
+	unsigned long capabilities;
+
+	struct vmballoon_batch_page *batch_page;
+	unsigned int batch_max_pages;
+	struct page *page;
+
+	const struct vmballoon_ops *ops;
+
+#ifdef CONFIG_DEBUG_FS
+	/* statistics */
+	struct vmballoon_stats stats;
+
+	/* debugfs file exporting statistics */
+	struct dentry *dbg_entry;
+#endif
+
+	struct sysinfo sysinfo;
+
+	struct delayed_work dwork;
+
+	struct vmci_handle vmci_doorbell;
+};
+
+static struct vmballoon balloon;
+
+/*
+ * Send "start" command to the host, communicating supported version
+ * of the protocol.
+ */
+static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
+{
+	unsigned long status, capabilities, dummy = 0;
+	bool success;
+
+	STATS_INC(b->stats.start);
+
+	status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
+
+	switch (status) {
+	case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
+		b->capabilities = capabilities;
+		success = true;
+		break;
+	case VMW_BALLOON_SUCCESS:
+		b->capabilities = VMW_BALLOON_BASIC_CMDS;
+		success = true;
+		break;
+	default:
+		success = false;
+	}
+
+	/*
+	 * 2MB pages are only supported with batching. If batching is for some
+	 * reason disabled, do not use 2MB pages, since otherwise the legacy
+	 * mechanism is used with 2MB pages, causing a failure.
+	 */
+	if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
+	    (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
+		b->supported_page_sizes = 2;
+	else
+		b->supported_page_sizes = 1;
+
+	if (!success) {
+		pr_debug("%s - failed, hv returns %ld\n", __func__, status);
+		STATS_INC(b->stats.start_fail);
+	}
+	return success;
+}
+
+static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
+{
+	switch (status) {
+	case VMW_BALLOON_SUCCESS:
+		return true;
+
+	case VMW_BALLOON_ERROR_RESET:
+		b->reset_required = true;
+		/* fall through */
+
+	default:
+		return false;
+	}
+}
+
+/*
+ * Communicate guest type to the host so that it can adjust ballooning
+ * algorithm to the one most appropriate for the guest. This command
+ * is normally issued after sending "start" command and is part of
+ * standard reset sequence.
+ */
+static bool vmballoon_send_guest_id(struct vmballoon *b)
+{
+	unsigned long status, dummy = 0;
+
+	status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
+				dummy);
+
+	STATS_INC(b->stats.guest_type);
+
+	if (vmballoon_check_status(b, status))
+		return true;
+
+	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
+	STATS_INC(b->stats.guest_type_fail);
+	return false;
+}
+
+static u16 vmballoon_page_size(bool is_2m_page)
+{
+	if (is_2m_page)
+		return 1 << VMW_BALLOON_2M_SHIFT;
+
+	return 1;
+}
+
+/*
+ * Retrieve desired balloon size from the host.
+ */
+static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
+{
+	unsigned long status;
+	unsigned long target;
+	unsigned long limit;
+	unsigned long dummy = 0;
+	u32 limit32;
+
+	/*
+	 * si_meminfo() is cheap. Moreover, we want to provide dynamic
+	 * max balloon size later. So let us call si_meminfo() every
+	 * iteration.
+	 */
+	si_meminfo(&b->sysinfo);
+	limit = b->sysinfo.totalram;
+
+	/* Ensure limit fits in 32-bits */
+	limit32 = (u32)limit;
+	if (limit != limit32)
+		return false;
+
+	/* update stats */
+	STATS_INC(b->stats.target);
+
+	status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
+	if (vmballoon_check_status(b, status)) {
+		*new_target = target;
+		return true;
+	}
+
+	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
+	STATS_INC(b->stats.target_fail);
+	return false;
+}
+
+/*
+ * Notify the host about allocated page so that host can use it without
+ * fear that guest will need it. Host may reject some pages, we need to
+ * check the return value and maybe submit a different page.
+ */
+static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
+				unsigned int *hv_status, unsigned int *target)
+{
+	unsigned long status, dummy = 0;
+	u32 pfn32;
+
+	pfn32 = (u32)pfn;
+	if (pfn32 != pfn)
+		return -EINVAL;
+
+	STATS_INC(b->stats.lock[false]);
+
+	*hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
+	if (vmballoon_check_status(b, status))
+		return 0;
+
+	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
+	STATS_INC(b->stats.lock_fail[false]);
+	return -EIO;
+}
+
+static int vmballoon_send_batched_lock(struct vmballoon *b,
+		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+	unsigned long status;
+	unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
+
+	STATS_INC(b->stats.lock[is_2m_pages]);
+
+	if (is_2m_pages)
+		status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
+				*target);
+	else
+		status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
+				*target);
+
+	if (vmballoon_check_status(b, status))
+		return 0;
+
+	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
+	STATS_INC(b->stats.lock_fail[is_2m_pages]);
+	return 1;
+}
+
+/*
+ * Notify the host that guest intends to release given page back into
+ * the pool of available (to the guest) pages.
+ */
+static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
+							unsigned int *target)
+{
+	unsigned long status, dummy = 0;
+	u32 pfn32;
+
+	pfn32 = (u32)pfn;
+	if (pfn32 != pfn)
+		return false;
+
+	STATS_INC(b->stats.unlock[false]);
+
+	status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
+	if (vmballoon_check_status(b, status))
+		return true;
+
+	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
+	STATS_INC(b->stats.unlock_fail[false]);
+	return false;
+}
+
+static bool vmballoon_send_batched_unlock(struct vmballoon *b,
+		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+	unsigned long status;
+	unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
+
+	STATS_INC(b->stats.unlock[is_2m_pages]);
+
+	if (is_2m_pages)
+		status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
+				*target);
+	else
+		status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
+				*target);
+
+	if (vmballoon_check_status(b, status))
+		return true;
+
+	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
+	STATS_INC(b->stats.unlock_fail[is_2m_pages]);
+	return false;
+}
+
+static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
+{
+	if (is_2m_page)
+		return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
+
+	return alloc_page(flags);
+}
+
+static void vmballoon_free_page(struct page *page, bool is_2m_page)
+{
+	if (is_2m_page)
+		__free_pages(page, VMW_BALLOON_2M_SHIFT);
+	else
+		__free_page(page);
+}
+
+/*
+ * Quickly release all pages allocated for the balloon. This function is
+ * called when host decides to "reset" balloon for one reason or another.
+ * Unlike normal "deflate" we do not (shall not) notify host of the pages
+ * being released.
+ */
+static void vmballoon_pop(struct vmballoon *b)
+{
+	struct page *page, *next;
+	unsigned is_2m_pages;
+
+	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
+			is_2m_pages++) {
+		struct vmballoon_page_size *page_size =
+				&b->page_sizes[is_2m_pages];
+		u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
+			list_del(&page->lru);
+			vmballoon_free_page(page, is_2m_pages);
+			STATS_INC(b->stats.free[is_2m_pages]);
+			b->size -= size_per_page;
+			cond_resched();
+		}
+	}
+
+	/* Clearing the batch_page unconditionally has no adverse effect */
+	free_page((unsigned long)b->batch_page);
+	b->batch_page = NULL;
+}
+
+/*
+ * Notify the host of a ballooned page. If host rejects the page put it on the
+ * refuse list, those refused page are then released at the end of the
+ * inflation cycle.
+ */
+static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
+				bool is_2m_pages, unsigned int *target)
+{
+	int locked, hv_status;
+	struct page *page = b->page;
+	struct vmballoon_page_size *page_size = &b->page_sizes[false];
+
+	/* is_2m_pages can never happen as 2m pages support implies batching */
+
+	locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
+								target);
+	if (locked) {
+		STATS_INC(b->stats.refused_alloc[false]);
+
+		if (locked == -EIO &&
+		    (hv_status == VMW_BALLOON_ERROR_RESET ||
+		     hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) {
+			vmballoon_free_page(page, false);
+			return -EIO;
+		}
+
+		/*
+		 * Place page on the list of non-balloonable pages
+		 * and retry allocation, unless we already accumulated
+		 * too many of them, in which case take a breather.
+		 */
+		if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
+			page_size->n_refused_pages++;
+			list_add(&page->lru, &page_size->refused_pages);
+		} else {
+			vmballoon_free_page(page, false);
+		}
+		return locked;
+	}
+
+	/* track allocated page */
+	list_add(&page->lru, &page_size->pages);
+
+	/* update balloon size */
+	b->size++;
+
+	return 0;
+}
+
+static int vmballoon_lock_batched_page(struct vmballoon *b,
+		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+	int locked, i;
+	u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+	locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
+			target);
+	if (locked > 0) {
+		for (i = 0; i < num_pages; i++) {
+			u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+			struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+
+			vmballoon_free_page(p, is_2m_pages);
+		}
+
+		return -EIO;
+	}
+
+	for (i = 0; i < num_pages; i++) {
+		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+		struct vmballoon_page_size *page_size =
+				&b->page_sizes[is_2m_pages];
+
+		locked = vmballoon_batch_get_status(b->batch_page, i);
+
+		switch (locked) {
+		case VMW_BALLOON_SUCCESS:
+			list_add(&p->lru, &page_size->pages);
+			b->size += size_per_page;
+			break;
+		case VMW_BALLOON_ERROR_PPN_PINNED:
+		case VMW_BALLOON_ERROR_PPN_INVALID:
+			if (page_size->n_refused_pages
+					< VMW_BALLOON_MAX_REFUSED) {
+				list_add(&p->lru, &page_size->refused_pages);
+				page_size->n_refused_pages++;
+				break;
+			}
+			/* Fallthrough */
+		case VMW_BALLOON_ERROR_RESET:
+		case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
+			vmballoon_free_page(p, is_2m_pages);
+			break;
+		default:
+			/* This should never happen */
+			WARN_ON_ONCE(true);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Release the page allocated for the balloon. Note that we first notify
+ * the host so it can make sure the page will be available for the guest
+ * to use, if needed.
+ */
+static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
+		bool is_2m_pages, unsigned int *target)
+{
+	struct page *page = b->page;
+	struct vmballoon_page_size *page_size = &b->page_sizes[false];
+
+	/* is_2m_pages can never happen as 2m pages support implies batching */
+
+	if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
+		list_add(&page->lru, &page_size->pages);
+		return -EIO;
+	}
+
+	/* deallocate page */
+	vmballoon_free_page(page, false);
+	STATS_INC(b->stats.free[false]);
+
+	/* update balloon size */
+	b->size--;
+
+	return 0;
+}
+
+static int vmballoon_unlock_batched_page(struct vmballoon *b,
+				unsigned int num_pages, bool is_2m_pages,
+				unsigned int *target)
+{
+	int locked, i, ret = 0;
+	bool hv_success;
+	u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+	hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
+			target);
+	if (!hv_success)
+		ret = -EIO;
+
+	for (i = 0; i < num_pages; i++) {
+		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+		struct vmballoon_page_size *page_size =
+				&b->page_sizes[is_2m_pages];
+
+		locked = vmballoon_batch_get_status(b->batch_page, i);
+		if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
+			/*
+			 * That page wasn't successfully unlocked by the
+			 * hypervisor, re-add it to the list of pages owned by
+			 * the balloon driver.
+			 */
+			list_add(&p->lru, &page_size->pages);
+		} else {
+			/* deallocate page */
+			vmballoon_free_page(p, is_2m_pages);
+			STATS_INC(b->stats.free[is_2m_pages]);
+
+			/* update balloon size */
+			b->size -= size_per_page;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Release pages that were allocated while attempting to inflate the
+ * balloon but were refused by the host for one reason or another.
+ */
+static void vmballoon_release_refused_pages(struct vmballoon *b,
+		bool is_2m_pages)
+{
+	struct page *page, *next;
+	struct vmballoon_page_size *page_size =
+			&b->page_sizes[is_2m_pages];
+
+	list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
+		list_del(&page->lru);
+		vmballoon_free_page(page, is_2m_pages);
+		STATS_INC(b->stats.refused_free[is_2m_pages]);
+	}
+
+	page_size->n_refused_pages = 0;
+}
+
+static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
+{
+	b->page = p;
+}
+
+static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
+				struct page *p)
+{
+	vmballoon_batch_set_pa(b->batch_page, idx,
+			(u64)page_to_pfn(p) << PAGE_SHIFT);
+}
+
+/*
+ * Inflate the balloon towards its target size. Note that we try to limit
+ * the rate of allocation to make sure we are not choking the rest of the
+ * system.
+ */
+static void vmballoon_inflate(struct vmballoon *b)
+{
+	unsigned int num_pages = 0;
+	int error = 0;
+	gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
+	bool is_2m_pages;
+
+	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
+
+	/*
+	 * First try NOSLEEP page allocations to inflate balloon.
+	 *
+	 * If we do not throttle nosleep allocations, we can drain all
+	 * free pages in the guest quickly (if the balloon target is high).
+	 * As a side-effect, draining free pages helps to inform (force)
+	 * the guest to start swapping if balloon target is not met yet,
+	 * which is a desired behavior. However, balloon driver can consume
+	 * all available CPU cycles if too many pages are allocated in a
+	 * second. Therefore, we throttle nosleep allocations even when
+	 * the guest is not under memory pressure. OTOH, if we have already
+	 * predicted that the guest is under memory pressure, then we
+	 * slowdown page allocations considerably.
+	 */
+
+	/*
+	 * Start with no sleep allocation rate which may be higher
+	 * than sleeping allocation rate.
+	 */
+	is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
+
+	pr_debug("%s - goal: %d",  __func__, b->target - b->size);
+
+	while (!b->reset_required &&
+		b->size + num_pages * vmballoon_page_size(is_2m_pages)
+		< b->target) {
+		struct page *page;
+
+		if (flags == VMW_PAGE_ALLOC_NOSLEEP)
+			STATS_INC(b->stats.alloc[is_2m_pages]);
+		else
+			STATS_INC(b->stats.sleep_alloc);
+
+		page = vmballoon_alloc_page(flags, is_2m_pages);
+		if (!page) {
+			STATS_INC(b->stats.alloc_fail[is_2m_pages]);
+
+			if (is_2m_pages) {
+				b->ops->lock(b, num_pages, true, &b->target);
+
+				/*
+				 * ignore errors from locking as we now switch
+				 * to 4k pages and we might get different
+				 * errors.
+				 */
+
+				num_pages = 0;
+				is_2m_pages = false;
+				continue;
+			}
+
+			if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
+				/*
+				 * CANSLEEP page allocation failed, so guest
+				 * is under severe memory pressure. We just log
+				 * the event, but do not stop the inflation
+				 * due to its negative impact on performance.
+				 */
+				STATS_INC(b->stats.sleep_alloc_fail);
+				break;
+			}
+
+			/*
+			 * NOSLEEP page allocation failed, so the guest is
+			 * under memory pressure. Slowing down page alloctions
+			 * seems to be reasonable, but doing so might actually
+			 * cause the hypervisor to throttle us down, resulting
+			 * in degraded performance. We will count on the
+			 * scheduler and standard memory management mechanisms
+			 * for now.
+			 */
+			flags = VMW_PAGE_ALLOC_CANSLEEP;
+			continue;
+		}
+
+		b->ops->add_page(b, num_pages++, page);
+		if (num_pages == b->batch_max_pages) {
+			error = b->ops->lock(b, num_pages, is_2m_pages,
+					&b->target);
+			num_pages = 0;
+			if (error)
+				break;
+		}
+
+		cond_resched();
+	}
+
+	if (num_pages > 0)
+		b->ops->lock(b, num_pages, is_2m_pages, &b->target);
+
+	vmballoon_release_refused_pages(b, true);
+	vmballoon_release_refused_pages(b, false);
+}
+
+/*
+ * Decrease the size of the balloon allowing guest to use more memory.
+ */
+static void vmballoon_deflate(struct vmballoon *b)
+{
+	unsigned is_2m_pages;
+
+	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
+
+	/* free pages to reach target */
+	for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
+			is_2m_pages++) {
+		struct page *page, *next;
+		unsigned int num_pages = 0;
+		struct vmballoon_page_size *page_size =
+				&b->page_sizes[is_2m_pages];
+
+		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
+			if (b->reset_required ||
+				(b->target > 0 &&
+					b->size - num_pages
+					* vmballoon_page_size(is_2m_pages)
+				< b->target + vmballoon_page_size(true)))
+				break;
+
+			list_del(&page->lru);
+			b->ops->add_page(b, num_pages++, page);
+
+			if (num_pages == b->batch_max_pages) {
+				int error;
+
+				error = b->ops->unlock(b, num_pages,
+						is_2m_pages, &b->target);
+				num_pages = 0;
+				if (error)
+					return;
+			}
+
+			cond_resched();
+		}
+
+		if (num_pages > 0)
+			b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
+	}
+}
+
+static const struct vmballoon_ops vmballoon_basic_ops = {
+	.add_page = vmballoon_add_page,
+	.lock = vmballoon_lock_page,
+	.unlock = vmballoon_unlock_page
+};
+
+static const struct vmballoon_ops vmballoon_batched_ops = {
+	.add_page = vmballoon_add_batched_page,
+	.lock = vmballoon_lock_batched_page,
+	.unlock = vmballoon_unlock_batched_page
+};
+
+static bool vmballoon_init_batching(struct vmballoon *b)
+{
+	struct page *page;
+
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
+		return false;
+
+	b->batch_page = page_address(page);
+	return true;
+}
+
+/*
+ * Receive notification and resize balloon
+ */
+static void vmballoon_doorbell(void *client_data)
+{
+	struct vmballoon *b = client_data;
+
+	STATS_INC(b->stats.doorbell);
+
+	mod_delayed_work(system_freezable_wq, &b->dwork, 0);
+}
+
+/*
+ * Clean up vmci doorbell
+ */
+static void vmballoon_vmci_cleanup(struct vmballoon *b)
+{
+	int error;
+
+	VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
+			VMCI_INVALID_ID, error);
+	STATS_INC(b->stats.doorbell_unset);
+
+	if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
+		vmci_doorbell_destroy(b->vmci_doorbell);
+		b->vmci_doorbell = VMCI_INVALID_HANDLE;
+	}
+}
+
+/*
+ * Initialize vmci doorbell, to get notified as soon as balloon changes
+ */
+static int vmballoon_vmci_init(struct vmballoon *b)
+{
+	unsigned long error, dummy;
+
+	if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
+		return 0;
+
+	error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
+				     VMCI_PRIVILEGE_FLAG_RESTRICTED,
+				     vmballoon_doorbell, b);
+
+	if (error != VMCI_SUCCESS)
+		goto fail;
+
+	error = VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, b->vmci_doorbell.context,
+				   b->vmci_doorbell.resource, dummy);
+
+	STATS_INC(b->stats.doorbell_set);
+
+	if (error != VMW_BALLOON_SUCCESS)
+		goto fail;
+
+	return 0;
+fail:
+	vmballoon_vmci_cleanup(b);
+	return -EIO;
+}
+
+/*
+ * Perform standard reset sequence by popping the balloon (in case it
+ * is not  empty) and then restarting protocol. This operation normally
+ * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
+ */
+static void vmballoon_reset(struct vmballoon *b)
+{
+	int error;
+
+	vmballoon_vmci_cleanup(b);
+
+	/* free all pages, skipping monitor unlock */
+	vmballoon_pop(b);
+
+	if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
+		return;
+
+	if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
+		b->ops = &vmballoon_batched_ops;
+		b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
+		if (!vmballoon_init_batching(b)) {
+			/*
+			 * We failed to initialize batching, inform the monitor
+			 * about it by sending a null capability.
+			 *
+			 * The guest will retry in one second.
+			 */
+			vmballoon_send_start(b, 0);
+			return;
+		}
+	} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
+		b->ops = &vmballoon_basic_ops;
+		b->batch_max_pages = 1;
+	}
+
+	b->reset_required = false;
+
+	error = vmballoon_vmci_init(b);
+	if (error)
+		pr_err("failed to initialize vmci doorbell\n");
+
+	if (!vmballoon_send_guest_id(b))
+		pr_err("failed to send guest ID to the host\n");
+}
+
+/*
+ * Balloon work function: reset protocol, if needed, get the new size and
+ * adjust balloon as needed. Repeat in 1 sec.
+ */
+static void vmballoon_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
+	unsigned int target;
+
+	STATS_INC(b->stats.timer);
+
+	if (b->reset_required)
+		vmballoon_reset(b);
+
+	if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
+		/* update target, adjust size */
+		b->target = target;
+
+		if (b->size < target)
+			vmballoon_inflate(b);
+		else if (target == 0 ||
+				b->size > target + vmballoon_page_size(true))
+			vmballoon_deflate(b);
+	}
+
+	/*
+	 * We are using a freezable workqueue so that balloon operations are
+	 * stopped while the system transitions to/from sleep/hibernation.
+	 */
+	queue_delayed_work(system_freezable_wq,
+			   dwork, round_jiffies_relative(HZ));
+}
+
+/*
+ * DEBUGFS Interface
+ */
+#ifdef CONFIG_DEBUG_FS
+
+static int vmballoon_debug_show(struct seq_file *f, void *offset)
+{
+	struct vmballoon *b = f->private;
+	struct vmballoon_stats *stats = &b->stats;
+
+	/* format capabilities info */
+	seq_printf(f,
+		   "balloon capabilities:   %#4x\n"
+		   "used capabilities:      %#4lx\n"
+		   "is resetting:           %c\n",
+		   VMW_BALLOON_CAPABILITIES, b->capabilities,
+		   b->reset_required ? 'y' : 'n');
+
+	/* format size info */
+	seq_printf(f,
+		   "target:             %8d pages\n"
+		   "current:            %8d pages\n",
+		   b->target, b->size);
+
+	seq_printf(f,
+		   "\n"
+		   "timer:              %8u\n"
+		   "doorbell:           %8u\n"
+		   "start:              %8u (%4u failed)\n"
+		   "guestType:          %8u (%4u failed)\n"
+		   "2m-lock:            %8u (%4u failed)\n"
+		   "lock:               %8u (%4u failed)\n"
+		   "2m-unlock:          %8u (%4u failed)\n"
+		   "unlock:             %8u (%4u failed)\n"
+		   "target:             %8u (%4u failed)\n"
+		   "prim2mAlloc:        %8u (%4u failed)\n"
+		   "primNoSleepAlloc:   %8u (%4u failed)\n"
+		   "primCanSleepAlloc:  %8u (%4u failed)\n"
+		   "prim2mFree:         %8u\n"
+		   "primFree:           %8u\n"
+		   "err2mAlloc:         %8u\n"
+		   "errAlloc:           %8u\n"
+		   "err2mFree:          %8u\n"
+		   "errFree:            %8u\n"
+		   "doorbellSet:        %8u\n"
+		   "doorbellUnset:      %8u\n",
+		   stats->timer,
+		   stats->doorbell,
+		   stats->start, stats->start_fail,
+		   stats->guest_type, stats->guest_type_fail,
+		   stats->lock[true],  stats->lock_fail[true],
+		   stats->lock[false],  stats->lock_fail[false],
+		   stats->unlock[true], stats->unlock_fail[true],
+		   stats->unlock[false], stats->unlock_fail[false],
+		   stats->target, stats->target_fail,
+		   stats->alloc[true], stats->alloc_fail[true],
+		   stats->alloc[false], stats->alloc_fail[false],
+		   stats->sleep_alloc, stats->sleep_alloc_fail,
+		   stats->free[true],
+		   stats->free[false],
+		   stats->refused_alloc[true], stats->refused_alloc[false],
+		   stats->refused_free[true], stats->refused_free[false],
+		   stats->doorbell_set, stats->doorbell_unset);
+
+	return 0;
+}
+
+static int vmballoon_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vmballoon_debug_show, inode->i_private);
+}
+
+static const struct file_operations vmballoon_debug_fops = {
+	.owner		= THIS_MODULE,
+	.open		= vmballoon_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init vmballoon_debugfs_init(struct vmballoon *b)
+{
+	int error;
+
+	b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
+					   &vmballoon_debug_fops);
+	if (IS_ERR(b->dbg_entry)) {
+		error = PTR_ERR(b->dbg_entry);
+		pr_err("failed to create debugfs entry, error: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
+{
+	debugfs_remove(b->dbg_entry);
+}
+
+#else
+
+static inline int vmballoon_debugfs_init(struct vmballoon *b)
+{
+	return 0;
+}
+
+static inline void vmballoon_debugfs_exit(struct vmballoon *b)
+{
+}
+
+#endif	/* CONFIG_DEBUG_FS */
+
+static int __init vmballoon_init(void)
+{
+	int error;
+	unsigned is_2m_pages;
+	/*
+	 * Check if we are running on VMware's hypervisor and bail out
+	 * if we are not.
+	 */
+	if (x86_hyper_type != X86_HYPER_VMWARE)
+		return -ENODEV;
+
+	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
+			is_2m_pages++) {
+		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
+		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
+	}
+
+	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
+
+	error = vmballoon_debugfs_init(&balloon);
+	if (error)
+		return error;
+
+	balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
+	balloon.batch_page = NULL;
+	balloon.page = NULL;
+	balloon.reset_required = true;
+
+	queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
+
+	return 0;
+}
+
+/*
+ * Using late_initcall() instead of module_init() allows the balloon to use the
+ * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
+ * VMCI is probed only after the balloon is initialized. If the balloon is used
+ * as a module, late_initcall() is equivalent to module_init().
+ */
+late_initcall(vmballoon_init);
+
+static void __exit vmballoon_exit(void)
+{
+	vmballoon_vmci_cleanup(&balloon);
+	cancel_delayed_work_sync(&balloon.dwork);
+
+	vmballoon_debugfs_exit(&balloon);
+
+	/*
+	 * Deallocate all reserved memory, and reset connection with monitor.
+	 * Reset connection before deallocating memory to avoid potential for
+	 * additional spurious resets from guest touching deallocated pages.
+	 */
+	vmballoon_send_start(&balloon, 0);
+	vmballoon_pop(&balloon);
+}
+module_exit(vmballoon_exit);
diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig
new file mode 100644
index 000000000..39c2ecadb
--- /dev/null
+++ b/drivers/misc/vmw_vmci/Kconfig
@@ -0,0 +1,16 @@
+#
+# VMware VMCI device
+#
+
+config VMWARE_VMCI
+	tristate "VMware VMCI Driver"
+	depends on X86 && PCI
+	help
+	  This is VMware's Virtual Machine Communication Interface.  It enables
+	  high-speed communication between host and guest in a virtual
+	  environment via the VMCI virtual device.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called vmw_vmci.
diff --git a/drivers/misc/vmw_vmci/Makefile b/drivers/misc/vmw_vmci/Makefile
new file mode 100644
index 000000000..4da9893c3
--- /dev/null
+++ b/drivers/misc/vmw_vmci/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci.o
+vmw_vmci-y += vmci_context.o vmci_datagram.o vmci_doorbell.o \
+	vmci_driver.o vmci_event.o vmci_guest.o vmci_handle_array.o \
+	vmci_host.o vmci_queue_pair.o vmci_resource.o vmci_route.o
diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c
new file mode 100644
index 000000000..26e20b091
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_context.c
@@ -0,0 +1,1225 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/slab.h>
+
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+/* Use a wide upper bound for the maximum contexts. */
+#define VMCI_MAX_CONTEXTS 2000
+
+/*
+ * List of current VMCI contexts.  Contexts can be added by
+ * vmci_ctx_create() and removed via vmci_ctx_destroy().
+ * These, along with context lookup, are protected by the
+ * list structure's lock.
+ */
+static struct {
+	struct list_head head;
+	spinlock_t lock; /* Spinlock for context list operations */
+} ctx_list = {
+	.head = LIST_HEAD_INIT(ctx_list.head),
+	.lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
+};
+
+/* Used by contexts that did not set up notify flag pointers */
+static bool ctx_dummy_notify;
+
+static void ctx_signal_notify(struct vmci_ctx *context)
+{
+	*context->notify = true;
+}
+
+static void ctx_clear_notify(struct vmci_ctx *context)
+{
+	*context->notify = false;
+}
+
+/*
+ * If nothing requires the attention of the guest, clears both
+ * notify flag and call.
+ */
+static void ctx_clear_notify_call(struct vmci_ctx *context)
+{
+	if (context->pending_datagrams == 0 &&
+	    vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
+		ctx_clear_notify(context);
+}
+
+/*
+ * Sets the context's notify flag iff datagrams are pending for this
+ * context.  Called from vmci_setup_notify().
+ */
+void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
+{
+	spin_lock(&context->lock);
+	if (context->pending_datagrams)
+		ctx_signal_notify(context);
+	spin_unlock(&context->lock);
+}
+
+/*
+ * Allocates and initializes a VMCI context.
+ */
+struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
+				 uintptr_t event_hnd,
+				 int user_version,
+				 const struct cred *cred)
+{
+	struct vmci_ctx *context;
+	int error;
+
+	if (cid == VMCI_INVALID_ID) {
+		pr_devel("Invalid context ID for VMCI context\n");
+		error = -EINVAL;
+		goto err_out;
+	}
+
+	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
+		pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
+			 priv_flags);
+		error = -EINVAL;
+		goto err_out;
+	}
+
+	if (user_version == 0) {
+		pr_devel("Invalid suer_version %d\n", user_version);
+		error = -EINVAL;
+		goto err_out;
+	}
+
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context) {
+		pr_warn("Failed to allocate memory for VMCI context\n");
+		error = -EINVAL;
+		goto err_out;
+	}
+
+	kref_init(&context->kref);
+	spin_lock_init(&context->lock);
+	INIT_LIST_HEAD(&context->list_item);
+	INIT_LIST_HEAD(&context->datagram_queue);
+	INIT_LIST_HEAD(&context->notifier_list);
+
+	/* Initialize host-specific VMCI context. */
+	init_waitqueue_head(&context->host_context.wait_queue);
+
+	context->queue_pair_array =
+		vmci_handle_arr_create(0, VMCI_MAX_GUEST_QP_COUNT);
+	if (!context->queue_pair_array) {
+		error = -ENOMEM;
+		goto err_free_ctx;
+	}
+
+	context->doorbell_array =
+		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
+	if (!context->doorbell_array) {
+		error = -ENOMEM;
+		goto err_free_qp_array;
+	}
+
+	context->pending_doorbell_array =
+		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
+	if (!context->pending_doorbell_array) {
+		error = -ENOMEM;
+		goto err_free_db_array;
+	}
+
+	context->user_version = user_version;
+
+	context->priv_flags = priv_flags;
+
+	if (cred)
+		context->cred = get_cred(cred);
+
+	context->notify = &ctx_dummy_notify;
+	context->notify_page = NULL;
+
+	/*
+	 * If we collide with an existing context we generate a new
+	 * and use it instead. The VMX will determine if regeneration
+	 * is okay. Since there isn't 4B - 16 VMs running on a given
+	 * host, the below loop will terminate.
+	 */
+	spin_lock(&ctx_list.lock);
+
+	while (vmci_ctx_exists(cid)) {
+		/* We reserve the lowest 16 ids for fixed contexts. */
+		cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
+		if (cid == VMCI_INVALID_ID)
+			cid = VMCI_RESERVED_CID_LIMIT;
+	}
+	context->cid = cid;
+
+	list_add_tail_rcu(&context->list_item, &ctx_list.head);
+	spin_unlock(&ctx_list.lock);
+
+	return context;
+
+ err_free_db_array:
+	vmci_handle_arr_destroy(context->doorbell_array);
+ err_free_qp_array:
+	vmci_handle_arr_destroy(context->queue_pair_array);
+ err_free_ctx:
+	kfree(context);
+ err_out:
+	return ERR_PTR(error);
+}
+
+/*
+ * Destroy VMCI context.
+ */
+void vmci_ctx_destroy(struct vmci_ctx *context)
+{
+	spin_lock(&ctx_list.lock);
+	list_del_rcu(&context->list_item);
+	spin_unlock(&ctx_list.lock);
+	synchronize_rcu();
+
+	vmci_ctx_put(context);
+}
+
+/*
+ * Fire notification for all contexts interested in given cid.
+ */
+static int ctx_fire_notification(u32 context_id, u32 priv_flags)
+{
+	u32 i, array_size;
+	struct vmci_ctx *sub_ctx;
+	struct vmci_handle_arr *subscriber_array;
+	struct vmci_handle context_handle =
+		vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
+
+	/*
+	 * We create an array to hold the subscribers we find when
+	 * scanning through all contexts.
+	 */
+	subscriber_array = vmci_handle_arr_create(0, VMCI_MAX_CONTEXTS);
+	if (subscriber_array == NULL)
+		return VMCI_ERROR_NO_MEM;
+
+	/*
+	 * Scan all contexts to find who is interested in being
+	 * notified about given contextID.
+	 */
+	rcu_read_lock();
+	list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
+		struct vmci_handle_list *node;
+
+		/*
+		 * We only deliver notifications of the removal of
+		 * contexts, if the two contexts are allowed to
+		 * interact.
+		 */
+		if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
+			continue;
+
+		list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
+			if (!vmci_handle_is_equal(node->handle, context_handle))
+				continue;
+
+			vmci_handle_arr_append_entry(&subscriber_array,
+					vmci_make_handle(sub_ctx->cid,
+							 VMCI_EVENT_HANDLER));
+		}
+	}
+	rcu_read_unlock();
+
+	/* Fire event to all subscribers. */
+	array_size = vmci_handle_arr_get_size(subscriber_array);
+	for (i = 0; i < array_size; i++) {
+		int result;
+		struct vmci_event_ctx ev;
+
+		ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
+		ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+						  VMCI_CONTEXT_RESOURCE_ID);
+		ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+		ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
+		ev.payload.context_id = context_id;
+
+		result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
+						&ev.msg.hdr, false);
+		if (result < VMCI_SUCCESS) {
+			pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
+				 ev.msg.event_data.event,
+				 ev.msg.hdr.dst.context);
+			/* We continue to enqueue on next subscriber. */
+		}
+	}
+	vmci_handle_arr_destroy(subscriber_array);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Returns the current number of pending datagrams. The call may
+ * also serve as a synchronization point for the datagram queue,
+ * as no enqueue operations can occur concurrently.
+ */
+int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
+{
+	struct vmci_ctx *context;
+
+	context = vmci_ctx_get(cid);
+	if (context == NULL)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	spin_lock(&context->lock);
+	if (pending)
+		*pending = context->pending_datagrams;
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Queues a VMCI datagram for the appropriate target VM context.
+ */
+int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
+{
+	struct vmci_datagram_queue_entry *dq_entry;
+	struct vmci_ctx *context;
+	struct vmci_handle dg_src;
+	size_t vmci_dg_size;
+
+	vmci_dg_size = VMCI_DG_SIZE(dg);
+	if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
+		pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	/* Get the target VM's VMCI context. */
+	context = vmci_ctx_get(cid);
+	if (!context) {
+		pr_devel("Invalid context (ID=0x%x)\n", cid);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	/* Allocate guest call entry and add it to the target VM's queue. */
+	dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
+	if (dq_entry == NULL) {
+		pr_warn("Failed to allocate memory for datagram\n");
+		vmci_ctx_put(context);
+		return VMCI_ERROR_NO_MEM;
+	}
+	dq_entry->dg = dg;
+	dq_entry->dg_size = vmci_dg_size;
+	dg_src = dg->src;
+	INIT_LIST_HEAD(&dq_entry->list_item);
+
+	spin_lock(&context->lock);
+
+	/*
+	 * We put a higher limit on datagrams from the hypervisor.  If
+	 * the pending datagram is not from hypervisor, then we check
+	 * if enqueueing it would exceed the
+	 * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
+	 * the pending datagram is from hypervisor, we allow it to be
+	 * queued at the destination side provided we don't reach the
+	 * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
+	 */
+	if (context->datagram_queue_size + vmci_dg_size >=
+	    VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
+	    (!vmci_handle_is_equal(dg_src,
+				vmci_make_handle
+				(VMCI_HYPERVISOR_CONTEXT_ID,
+				 VMCI_CONTEXT_RESOURCE_ID)) ||
+	     context->datagram_queue_size + vmci_dg_size >=
+	     VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
+		spin_unlock(&context->lock);
+		vmci_ctx_put(context);
+		kfree(dq_entry);
+		pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
+		return VMCI_ERROR_NO_RESOURCES;
+	}
+
+	list_add(&dq_entry->list_item, &context->datagram_queue);
+	context->pending_datagrams++;
+	context->datagram_queue_size += vmci_dg_size;
+	ctx_signal_notify(context);
+	wake_up(&context->host_context.wait_queue);
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	return vmci_dg_size;
+}
+
+/*
+ * Verifies whether a context with the specified context ID exists.
+ * FIXME: utility is dubious as no decisions can be reliably made
+ * using this data as context can appear and disappear at any time.
+ */
+bool vmci_ctx_exists(u32 cid)
+{
+	struct vmci_ctx *context;
+	bool exists = false;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
+		if (context->cid == cid) {
+			exists = true;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	return exists;
+}
+
+/*
+ * Retrieves VMCI context corresponding to the given cid.
+ */
+struct vmci_ctx *vmci_ctx_get(u32 cid)
+{
+	struct vmci_ctx *c, *context = NULL;
+
+	if (cid == VMCI_INVALID_ID)
+		return NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
+		if (c->cid == cid) {
+			/*
+			 * The context owner drops its own reference to the
+			 * context only after removing it from the list and
+			 * waiting for RCU grace period to expire. This
+			 * means that we are not about to increase the
+			 * reference count of something that is in the
+			 * process of being destroyed.
+			 */
+			context = c;
+			kref_get(&context->kref);
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return context;
+}
+
+/*
+ * Deallocates all parts of a context data structure. This
+ * function doesn't lock the context, because it assumes that
+ * the caller was holding the last reference to context.
+ */
+static void ctx_free_ctx(struct kref *kref)
+{
+	struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
+	struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
+	struct vmci_handle temp_handle;
+	struct vmci_handle_list *notifier, *tmp;
+
+	/*
+	 * Fire event to all contexts interested in knowing this
+	 * context is dying.
+	 */
+	ctx_fire_notification(context->cid, context->priv_flags);
+
+	/*
+	 * Cleanup all queue pair resources attached to context.  If
+	 * the VM dies without cleaning up, this code will make sure
+	 * that no resources are leaked.
+	 */
+	temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
+	while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
+		if (vmci_qp_broker_detach(temp_handle,
+					  context) < VMCI_SUCCESS) {
+			/*
+			 * When vmci_qp_broker_detach() succeeds it
+			 * removes the handle from the array.  If
+			 * detach fails, we must remove the handle
+			 * ourselves.
+			 */
+			vmci_handle_arr_remove_entry(context->queue_pair_array,
+						     temp_handle);
+		}
+		temp_handle =
+		    vmci_handle_arr_get_entry(context->queue_pair_array, 0);
+	}
+
+	/*
+	 * It is fine to destroy this without locking the callQueue, as
+	 * this is the only thread having a reference to the context.
+	 */
+	list_for_each_entry_safe(dq_entry, dq_entry_tmp,
+				 &context->datagram_queue, list_item) {
+		WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
+		list_del(&dq_entry->list_item);
+		kfree(dq_entry->dg);
+		kfree(dq_entry);
+	}
+
+	list_for_each_entry_safe(notifier, tmp,
+				 &context->notifier_list, node) {
+		list_del(&notifier->node);
+		kfree(notifier);
+	}
+
+	vmci_handle_arr_destroy(context->queue_pair_array);
+	vmci_handle_arr_destroy(context->doorbell_array);
+	vmci_handle_arr_destroy(context->pending_doorbell_array);
+	vmci_ctx_unset_notify(context);
+	if (context->cred)
+		put_cred(context->cred);
+	kfree(context);
+}
+
+/*
+ * Drops reference to VMCI context. If this is the last reference to
+ * the context it will be deallocated. A context is created with
+ * a reference count of one, and on destroy, it is removed from
+ * the context list before its reference count is decremented. Thus,
+ * if we reach zero, we are sure that nobody else are about to increment
+ * it (they need the entry in the context list for that), and so there
+ * is no need for locking.
+ */
+void vmci_ctx_put(struct vmci_ctx *context)
+{
+	kref_put(&context->kref, ctx_free_ctx);
+}
+
+/*
+ * Dequeues the next datagram and returns it to caller.
+ * The caller passes in a pointer to the max size datagram
+ * it can handle and the datagram is only unqueued if the
+ * size is less than max_size. If larger max_size is set to
+ * the size of the datagram to give the caller a chance to
+ * set up a larger buffer for the guestcall.
+ */
+int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
+			      size_t *max_size,
+			      struct vmci_datagram **dg)
+{
+	struct vmci_datagram_queue_entry *dq_entry;
+	struct list_head *list_item;
+	int rv;
+
+	/* Dequeue the next datagram entry. */
+	spin_lock(&context->lock);
+	if (context->pending_datagrams == 0) {
+		ctx_clear_notify_call(context);
+		spin_unlock(&context->lock);
+		pr_devel("No datagrams pending\n");
+		return VMCI_ERROR_NO_MORE_DATAGRAMS;
+	}
+
+	list_item = context->datagram_queue.next;
+
+	dq_entry =
+	    list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
+
+	/* Check size of caller's buffer. */
+	if (*max_size < dq_entry->dg_size) {
+		*max_size = dq_entry->dg_size;
+		spin_unlock(&context->lock);
+		pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
+			 (u32) *max_size);
+		return VMCI_ERROR_NO_MEM;
+	}
+
+	list_del(list_item);
+	context->pending_datagrams--;
+	context->datagram_queue_size -= dq_entry->dg_size;
+	if (context->pending_datagrams == 0) {
+		ctx_clear_notify_call(context);
+		rv = VMCI_SUCCESS;
+	} else {
+		/*
+		 * Return the size of the next datagram.
+		 */
+		struct vmci_datagram_queue_entry *next_entry;
+
+		list_item = context->datagram_queue.next;
+		next_entry =
+		    list_entry(list_item, struct vmci_datagram_queue_entry,
+			       list_item);
+
+		/*
+		 * The following size_t -> int truncation is fine as
+		 * the maximum size of a (routable) datagram is 68KB.
+		 */
+		rv = (int)next_entry->dg_size;
+	}
+	spin_unlock(&context->lock);
+
+	/* Caller must free datagram. */
+	*dg = dq_entry->dg;
+	dq_entry->dg = NULL;
+	kfree(dq_entry);
+
+	return rv;
+}
+
+/*
+ * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
+ * page mapped/locked by vmci_setup_notify().
+ */
+void vmci_ctx_unset_notify(struct vmci_ctx *context)
+{
+	struct page *notify_page;
+
+	spin_lock(&context->lock);
+
+	notify_page = context->notify_page;
+	context->notify = &ctx_dummy_notify;
+	context->notify_page = NULL;
+
+	spin_unlock(&context->lock);
+
+	if (notify_page) {
+		kunmap(notify_page);
+		put_page(notify_page);
+	}
+}
+
+/*
+ * Add remote_cid to list of contexts current contexts wants
+ * notifications from/about.
+ */
+int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
+{
+	struct vmci_ctx *context;
+	struct vmci_handle_list *notifier, *n;
+	int result;
+	bool exists = false;
+
+	context = vmci_ctx_get(context_id);
+	if (!context)
+		return VMCI_ERROR_NOT_FOUND;
+
+	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
+		pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
+			 context_id, remote_cid);
+		result = VMCI_ERROR_DST_UNREACHABLE;
+		goto out;
+	}
+
+	if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
+		result = VMCI_ERROR_NO_ACCESS;
+		goto out;
+	}
+
+	notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
+	if (!notifier) {
+		result = VMCI_ERROR_NO_MEM;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&notifier->node);
+	notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
+
+	spin_lock(&context->lock);
+
+	if (context->n_notifiers < VMCI_MAX_CONTEXTS) {
+		list_for_each_entry(n, &context->notifier_list, node) {
+			if (vmci_handle_is_equal(n->handle, notifier->handle)) {
+				exists = true;
+				break;
+			}
+		}
+
+		if (exists) {
+			kfree(notifier);
+			result = VMCI_ERROR_ALREADY_EXISTS;
+		} else {
+			list_add_tail_rcu(&notifier->node,
+					  &context->notifier_list);
+			context->n_notifiers++;
+			result = VMCI_SUCCESS;
+		}
+	} else {
+		kfree(notifier);
+		result = VMCI_ERROR_NO_MEM;
+	}
+
+	spin_unlock(&context->lock);
+
+ out:
+	vmci_ctx_put(context);
+	return result;
+}
+
+/*
+ * Remove remote_cid from current context's list of contexts it is
+ * interested in getting notifications from/about.
+ */
+int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
+{
+	struct vmci_ctx *context;
+	struct vmci_handle_list *notifier, *tmp;
+	struct vmci_handle handle;
+	bool found = false;
+
+	context = vmci_ctx_get(context_id);
+	if (!context)
+		return VMCI_ERROR_NOT_FOUND;
+
+	handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
+
+	spin_lock(&context->lock);
+	list_for_each_entry_safe(notifier, tmp,
+				 &context->notifier_list, node) {
+		if (vmci_handle_is_equal(notifier->handle, handle)) {
+			list_del_rcu(&notifier->node);
+			context->n_notifiers--;
+			found = true;
+			break;
+		}
+	}
+	spin_unlock(&context->lock);
+
+	if (found) {
+		synchronize_rcu();
+		kfree(notifier);
+	}
+
+	vmci_ctx_put(context);
+
+	return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
+}
+
+static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
+					u32 *buf_size, void **pbuf)
+{
+	u32 *notifiers;
+	size_t data_size;
+	struct vmci_handle_list *entry;
+	int i = 0;
+
+	if (context->n_notifiers == 0) {
+		*buf_size = 0;
+		*pbuf = NULL;
+		return VMCI_SUCCESS;
+	}
+
+	data_size = context->n_notifiers * sizeof(*notifiers);
+	if (*buf_size < data_size) {
+		*buf_size = data_size;
+		return VMCI_ERROR_MORE_DATA;
+	}
+
+	notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
+	if (!notifiers)
+		return VMCI_ERROR_NO_MEM;
+
+	list_for_each_entry(entry, &context->notifier_list, node)
+		notifiers[i++] = entry->handle.context;
+
+	*buf_size = data_size;
+	*pbuf = notifiers;
+	return VMCI_SUCCESS;
+}
+
+static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
+					u32 *buf_size, void **pbuf)
+{
+	struct dbell_cpt_state *dbells;
+	u32 i, n_doorbells;
+
+	n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
+	if (n_doorbells > 0) {
+		size_t data_size = n_doorbells * sizeof(*dbells);
+		if (*buf_size < data_size) {
+			*buf_size = data_size;
+			return VMCI_ERROR_MORE_DATA;
+		}
+
+		dbells = kzalloc(data_size, GFP_ATOMIC);
+		if (!dbells)
+			return VMCI_ERROR_NO_MEM;
+
+		for (i = 0; i < n_doorbells; i++)
+			dbells[i].handle = vmci_handle_arr_get_entry(
+						context->doorbell_array, i);
+
+		*buf_size = data_size;
+		*pbuf = dbells;
+	} else {
+		*buf_size = 0;
+		*pbuf = NULL;
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Get current context's checkpoint state of given type.
+ */
+int vmci_ctx_get_chkpt_state(u32 context_id,
+			     u32 cpt_type,
+			     u32 *buf_size,
+			     void **pbuf)
+{
+	struct vmci_ctx *context;
+	int result;
+
+	context = vmci_ctx_get(context_id);
+	if (!context)
+		return VMCI_ERROR_NOT_FOUND;
+
+	spin_lock(&context->lock);
+
+	switch (cpt_type) {
+	case VMCI_NOTIFICATION_CPT_STATE:
+		result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
+		break;
+
+	case VMCI_WELLKNOWN_CPT_STATE:
+		/*
+		 * For compatibility with VMX'en with VM to VM communication, we
+		 * always return zero wellknown handles.
+		 */
+
+		*buf_size = 0;
+		*pbuf = NULL;
+		result = VMCI_SUCCESS;
+		break;
+
+	case VMCI_DOORBELL_CPT_STATE:
+		result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
+		break;
+
+	default:
+		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
+		result = VMCI_ERROR_INVALID_ARGS;
+		break;
+	}
+
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	return result;
+}
+
+/*
+ * Set current context's checkpoint state of given type.
+ */
+int vmci_ctx_set_chkpt_state(u32 context_id,
+			     u32 cpt_type,
+			     u32 buf_size,
+			     void *cpt_buf)
+{
+	u32 i;
+	u32 current_id;
+	int result = VMCI_SUCCESS;
+	u32 num_ids = buf_size / sizeof(u32);
+
+	if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
+		/*
+		 * We would end up here if VMX with VM to VM communication
+		 * attempts to restore a checkpoint with wellknown handles.
+		 */
+		pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
+		return VMCI_ERROR_OBSOLETE;
+	}
+
+	if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
+		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
+		current_id = ((u32 *)cpt_buf)[i];
+		result = vmci_ctx_add_notification(context_id, current_id);
+		if (result != VMCI_SUCCESS)
+			break;
+	}
+	if (result != VMCI_SUCCESS)
+		pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
+			 cpt_type, result);
+
+	return result;
+}
+
+/*
+ * Retrieves the specified context's pending notifications in the
+ * form of a handle array. The handle arrays returned are the
+ * actual data - not a copy and should not be modified by the
+ * caller. They must be released using
+ * vmci_ctx_rcv_notifications_release.
+ */
+int vmci_ctx_rcv_notifications_get(u32 context_id,
+				   struct vmci_handle_arr **db_handle_array,
+				   struct vmci_handle_arr **qp_handle_array)
+{
+	struct vmci_ctx *context;
+	int result = VMCI_SUCCESS;
+
+	context = vmci_ctx_get(context_id);
+	if (context == NULL)
+		return VMCI_ERROR_NOT_FOUND;
+
+	spin_lock(&context->lock);
+
+	*db_handle_array = context->pending_doorbell_array;
+	context->pending_doorbell_array =
+		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
+	if (!context->pending_doorbell_array) {
+		context->pending_doorbell_array = *db_handle_array;
+		*db_handle_array = NULL;
+		result = VMCI_ERROR_NO_MEM;
+	}
+	*qp_handle_array = NULL;
+
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	return result;
+}
+
+/*
+ * Releases handle arrays with pending notifications previously
+ * retrieved using vmci_ctx_rcv_notifications_get. If the
+ * notifications were not successfully handed over to the guest,
+ * success must be false.
+ */
+void vmci_ctx_rcv_notifications_release(u32 context_id,
+					struct vmci_handle_arr *db_handle_array,
+					struct vmci_handle_arr *qp_handle_array,
+					bool success)
+{
+	struct vmci_ctx *context = vmci_ctx_get(context_id);
+
+	spin_lock(&context->lock);
+	if (!success) {
+		struct vmci_handle handle;
+
+		/*
+		 * New notifications may have been added while we were not
+		 * holding the context lock, so we transfer any new pending
+		 * doorbell notifications to the old array, and reinstate the
+		 * old array.
+		 */
+
+		handle = vmci_handle_arr_remove_tail(
+					context->pending_doorbell_array);
+		while (!vmci_handle_is_invalid(handle)) {
+			if (!vmci_handle_arr_has_entry(db_handle_array,
+						       handle)) {
+				vmci_handle_arr_append_entry(
+						&db_handle_array, handle);
+			}
+			handle = vmci_handle_arr_remove_tail(
+					context->pending_doorbell_array);
+		}
+		vmci_handle_arr_destroy(context->pending_doorbell_array);
+		context->pending_doorbell_array = db_handle_array;
+		db_handle_array = NULL;
+	} else {
+		ctx_clear_notify_call(context);
+	}
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	if (db_handle_array)
+		vmci_handle_arr_destroy(db_handle_array);
+
+	if (qp_handle_array)
+		vmci_handle_arr_destroy(qp_handle_array);
+}
+
+/*
+ * Registers that a new doorbell handle has been allocated by the
+ * context. Only doorbell handles registered can be notified.
+ */
+int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
+{
+	struct vmci_ctx *context;
+	int result;
+
+	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	context = vmci_ctx_get(context_id);
+	if (context == NULL)
+		return VMCI_ERROR_NOT_FOUND;
+
+	spin_lock(&context->lock);
+	if (!vmci_handle_arr_has_entry(context->doorbell_array, handle))
+		result = vmci_handle_arr_append_entry(&context->doorbell_array,
+						      handle);
+	else
+		result = VMCI_ERROR_DUPLICATE_ENTRY;
+
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	return result;
+}
+
+/*
+ * Unregisters a doorbell handle that was previously registered
+ * with vmci_ctx_dbell_create.
+ */
+int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
+{
+	struct vmci_ctx *context;
+	struct vmci_handle removed_handle;
+
+	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	context = vmci_ctx_get(context_id);
+	if (context == NULL)
+		return VMCI_ERROR_NOT_FOUND;
+
+	spin_lock(&context->lock);
+	removed_handle =
+	    vmci_handle_arr_remove_entry(context->doorbell_array, handle);
+	vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
+	spin_unlock(&context->lock);
+
+	vmci_ctx_put(context);
+
+	return vmci_handle_is_invalid(removed_handle) ?
+	    VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
+}
+
+/*
+ * Unregisters all doorbell handles that were previously
+ * registered with vmci_ctx_dbell_create.
+ */
+int vmci_ctx_dbell_destroy_all(u32 context_id)
+{
+	struct vmci_ctx *context;
+	struct vmci_handle handle;
+
+	if (context_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	context = vmci_ctx_get(context_id);
+	if (context == NULL)
+		return VMCI_ERROR_NOT_FOUND;
+
+	spin_lock(&context->lock);
+	do {
+		struct vmci_handle_arr *arr = context->doorbell_array;
+		handle = vmci_handle_arr_remove_tail(arr);
+	} while (!vmci_handle_is_invalid(handle));
+	do {
+		struct vmci_handle_arr *arr = context->pending_doorbell_array;
+		handle = vmci_handle_arr_remove_tail(arr);
+	} while (!vmci_handle_is_invalid(handle));
+	spin_unlock(&context->lock);
+
+	vmci_ctx_put(context);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Registers a notification of a doorbell handle initiated by the
+ * specified source context. The notification of doorbells are
+ * subject to the same isolation rules as datagram delivery. To
+ * allow host side senders of notifications a finer granularity
+ * of sender rights than those assigned to the sending context
+ * itself, the host context is required to specify a different
+ * set of privilege flags that will override the privileges of
+ * the source context.
+ */
+int vmci_ctx_notify_dbell(u32 src_cid,
+			  struct vmci_handle handle,
+			  u32 src_priv_flags)
+{
+	struct vmci_ctx *dst_context;
+	int result;
+
+	if (vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/* Get the target VM's VMCI context. */
+	dst_context = vmci_ctx_get(handle.context);
+	if (!dst_context) {
+		pr_devel("Invalid context (ID=0x%x)\n", handle.context);
+		return VMCI_ERROR_NOT_FOUND;
+	}
+
+	if (src_cid != handle.context) {
+		u32 dst_priv_flags;
+
+		if (VMCI_CONTEXT_IS_VM(src_cid) &&
+		    VMCI_CONTEXT_IS_VM(handle.context)) {
+			pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
+				 src_cid, handle.context);
+			result = VMCI_ERROR_DST_UNREACHABLE;
+			goto out;
+		}
+
+		result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
+		if (result < VMCI_SUCCESS) {
+			pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
+				handle.context, handle.resource);
+			goto out;
+		}
+
+		if (src_cid != VMCI_HOST_CONTEXT_ID ||
+		    src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
+			src_priv_flags = vmci_context_get_priv_flags(src_cid);
+		}
+
+		if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
+			result = VMCI_ERROR_NO_ACCESS;
+			goto out;
+		}
+	}
+
+	if (handle.context == VMCI_HOST_CONTEXT_ID) {
+		result = vmci_dbell_host_context_notify(src_cid, handle);
+	} else {
+		spin_lock(&dst_context->lock);
+
+		if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
+					       handle)) {
+			result = VMCI_ERROR_NOT_FOUND;
+		} else {
+			if (!vmci_handle_arr_has_entry(
+					dst_context->pending_doorbell_array,
+					handle)) {
+				result = vmci_handle_arr_append_entry(
+					&dst_context->pending_doorbell_array,
+					handle);
+				if (result == VMCI_SUCCESS) {
+					ctx_signal_notify(dst_context);
+					wake_up(&dst_context->host_context.wait_queue);
+				}
+			} else {
+				result = VMCI_SUCCESS;
+			}
+		}
+		spin_unlock(&dst_context->lock);
+	}
+
+ out:
+	vmci_ctx_put(dst_context);
+
+	return result;
+}
+
+bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
+{
+	return context && context->user_version >= VMCI_VERSION_HOSTQP;
+}
+
+/*
+ * Registers that a new queue pair handle has been allocated by
+ * the context.
+ */
+int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
+{
+	int result;
+
+	if (context == NULL || vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle))
+		result = vmci_handle_arr_append_entry(
+			&context->queue_pair_array, handle);
+	else
+		result = VMCI_ERROR_DUPLICATE_ENTRY;
+
+	return result;
+}
+
+/*
+ * Unregisters a queue pair handle that was previously registered
+ * with vmci_ctx_qp_create.
+ */
+int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
+{
+	struct vmci_handle hndl;
+
+	if (context == NULL || vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
+
+	return vmci_handle_is_invalid(hndl) ?
+		VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
+}
+
+/*
+ * Determines whether a given queue pair handle is registered
+ * with the given context.
+ */
+bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
+{
+	if (context == NULL || vmci_handle_is_invalid(handle))
+		return false;
+
+	return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
+}
+
+/*
+ * vmci_context_get_priv_flags() - Retrieve privilege flags.
+ * @context_id: The context ID of the VMCI context.
+ *
+ * Retrieves privilege flags of the given VMCI context ID.
+ */
+u32 vmci_context_get_priv_flags(u32 context_id)
+{
+	if (vmci_host_code_active()) {
+		u32 flags;
+		struct vmci_ctx *context;
+
+		context = vmci_ctx_get(context_id);
+		if (!context)
+			return VMCI_LEAST_PRIVILEGE_FLAGS;
+
+		flags = context->priv_flags;
+		vmci_ctx_put(context);
+		return flags;
+	}
+	return VMCI_NO_PRIVILEGE_FLAGS;
+}
+EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
+
+/*
+ * vmci_is_context_owner() - Determimnes if user is the context owner
+ * @context_id: The context ID of the VMCI context.
+ * @uid:        The host user id (real kernel value).
+ *
+ * Determines whether a given UID is the owner of given VMCI context.
+ */
+bool vmci_is_context_owner(u32 context_id, kuid_t uid)
+{
+	bool is_owner = false;
+
+	if (vmci_host_code_active()) {
+		struct vmci_ctx *context = vmci_ctx_get(context_id);
+		if (context) {
+			if (context->cred)
+				is_owner = uid_eq(context->cred->uid, uid);
+			vmci_ctx_put(context);
+		}
+	}
+
+	return is_owner;
+}
+EXPORT_SYMBOL_GPL(vmci_is_context_owner);
diff --git a/drivers/misc/vmw_vmci/vmci_context.h b/drivers/misc/vmw_vmci/vmci_context.h
new file mode 100644
index 000000000..24a88e68a
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_context.h
@@ -0,0 +1,182 @@
+/*
+ * VMware VMCI driver (vmciContext.h)
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_CONTEXT_H_
+#define _VMCI_CONTEXT_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/atomic.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_datagram.h"
+
+/* Used to determine what checkpoint state to get and set. */
+enum {
+	VMCI_NOTIFICATION_CPT_STATE = 1,
+	VMCI_WELLKNOWN_CPT_STATE    = 2,
+	VMCI_DG_OUT_STATE           = 3,
+	VMCI_DG_IN_STATE            = 4,
+	VMCI_DG_IN_SIZE_STATE       = 5,
+	VMCI_DOORBELL_CPT_STATE     = 6,
+};
+
+/* Host specific struct used for signalling */
+struct vmci_host {
+	wait_queue_head_t wait_queue;
+};
+
+struct vmci_handle_list {
+	struct list_head node;
+	struct vmci_handle handle;
+};
+
+struct vmci_ctx {
+	struct list_head list_item;       /* For global VMCI list. */
+	u32 cid;
+	struct kref kref;
+	struct list_head datagram_queue;  /* Head of per VM queue. */
+	u32 pending_datagrams;
+	size_t datagram_queue_size;	  /* Size of datagram queue in bytes. */
+
+	/*
+	 * Version of the code that created
+	 * this context; e.g., VMX.
+	 */
+	int user_version;
+	spinlock_t lock;  /* Locks callQueue and handle_arrays. */
+
+	/*
+	 * queue_pairs attached to.  The array of
+	 * handles for queue pairs is accessed
+	 * from the code for QP API, and there
+	 * it is protected by the QP lock.  It
+	 * is also accessed from the context
+	 * clean up path, which does not
+	 * require a lock.  VMCILock is not
+	 * used to protect the QP array field.
+	 */
+	struct vmci_handle_arr *queue_pair_array;
+
+	/* Doorbells created by context. */
+	struct vmci_handle_arr *doorbell_array;
+
+	/* Doorbells pending for context. */
+	struct vmci_handle_arr *pending_doorbell_array;
+
+	/* Contexts current context is subscribing to. */
+	struct list_head notifier_list;
+	unsigned int n_notifiers;
+
+	struct vmci_host host_context;
+	u32 priv_flags;
+
+	const struct cred *cred;
+	bool *notify;		/* Notify flag pointer - hosted only. */
+	struct page *notify_page;	/* Page backing the notify UVA. */
+};
+
+/* VMCINotifyAddRemoveInfo: Used to add/remove remote context notifications. */
+struct vmci_ctx_info {
+	u32 remote_cid;
+	int result;
+};
+
+/* VMCICptBufInfo: Used to set/get current context's checkpoint state. */
+struct vmci_ctx_chkpt_buf_info {
+	u64 cpt_buf;
+	u32 cpt_type;
+	u32 buf_size;
+	s32 result;
+	u32 _pad;
+};
+
+/*
+ * VMCINotificationReceiveInfo: Used to recieve pending notifications
+ * for doorbells and queue pairs.
+ */
+struct vmci_ctx_notify_recv_info {
+	u64 db_handle_buf_uva;
+	u64 db_handle_buf_size;
+	u64 qp_handle_buf_uva;
+	u64 qp_handle_buf_size;
+	s32 result;
+	u32 _pad;
+};
+
+/*
+ * Utilility function that checks whether two entities are allowed
+ * to interact. If one of them is restricted, the other one must
+ * be trusted.
+ */
+static inline bool vmci_deny_interaction(u32 part_one, u32 part_two)
+{
+	return ((part_one & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
+		!(part_two & VMCI_PRIVILEGE_FLAG_TRUSTED)) ||
+	       ((part_two & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
+		!(part_one & VMCI_PRIVILEGE_FLAG_TRUSTED));
+}
+
+struct vmci_ctx *vmci_ctx_create(u32 cid, u32 flags,
+				 uintptr_t event_hnd, int version,
+				 const struct cred *cred);
+void vmci_ctx_destroy(struct vmci_ctx *context);
+
+bool vmci_ctx_supports_host_qp(struct vmci_ctx *context);
+int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg);
+int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
+			      size_t *max_size, struct vmci_datagram **dg);
+int vmci_ctx_pending_datagrams(u32 cid, u32 *pending);
+struct vmci_ctx *vmci_ctx_get(u32 cid);
+void vmci_ctx_put(struct vmci_ctx *context);
+bool vmci_ctx_exists(u32 cid);
+
+int vmci_ctx_add_notification(u32 context_id, u32 remote_cid);
+int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid);
+int vmci_ctx_get_chkpt_state(u32 context_id, u32 cpt_type,
+			     u32 *num_cids, void **cpt_buf_ptr);
+int vmci_ctx_set_chkpt_state(u32 context_id, u32 cpt_type,
+			     u32 num_cids, void *cpt_buf);
+
+int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle);
+int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle);
+bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle);
+
+void vmci_ctx_check_signal_notify(struct vmci_ctx *context);
+void vmci_ctx_unset_notify(struct vmci_ctx *context);
+
+int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle);
+int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle);
+int vmci_ctx_dbell_destroy_all(u32 context_id);
+int vmci_ctx_notify_dbell(u32 cid, struct vmci_handle handle,
+			  u32 src_priv_flags);
+
+int vmci_ctx_rcv_notifications_get(u32 context_id, struct vmci_handle_arr
+				   **db_handle_array, struct vmci_handle_arr
+				   **qp_handle_array);
+void vmci_ctx_rcv_notifications_release(u32 context_id, struct vmci_handle_arr
+					*db_handle_array, struct vmci_handle_arr
+					*qp_handle_array, bool success);
+
+static inline u32 vmci_ctx_get_id(struct vmci_ctx *context)
+{
+	if (!context)
+		return VMCI_INVALID_ID;
+	return context->cid;
+}
+
+#endif /* _VMCI_CONTEXT_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c
new file mode 100644
index 000000000..8a4b6bbe1
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_datagram.c
@@ -0,0 +1,502 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/bug.h>
+
+#include "vmci_datagram.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_route.h"
+
+/*
+ * struct datagram_entry describes the datagram entity. It is used for datagram
+ * entities created only on the host.
+ */
+struct datagram_entry {
+	struct vmci_resource resource;
+	u32 flags;
+	bool run_delayed;
+	vmci_datagram_recv_cb recv_cb;
+	void *client_data;
+	u32 priv_flags;
+};
+
+struct delayed_datagram_info {
+	struct datagram_entry *entry;
+	struct work_struct work;
+	bool in_dg_host_queue;
+	/* msg and msg_payload must be together. */
+	struct vmci_datagram msg;
+	u8 msg_payload[];
+};
+
+/* Number of in-flight host->host datagrams */
+static atomic_t delayed_dg_host_queue_size = ATOMIC_INIT(0);
+
+/*
+ * Create a datagram entry given a handle pointer.
+ */
+static int dg_create_handle(u32 resource_id,
+			    u32 flags,
+			    u32 priv_flags,
+			    vmci_datagram_recv_cb recv_cb,
+			    void *client_data, struct vmci_handle *out_handle)
+{
+	int result;
+	u32 context_id;
+	struct vmci_handle handle;
+	struct datagram_entry *entry;
+
+	if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0) {
+		context_id = VMCI_INVALID_ID;
+	} else {
+		context_id = vmci_get_context_id();
+		if (context_id == VMCI_INVALID_ID)
+			return VMCI_ERROR_NO_RESOURCES;
+	}
+
+	handle = vmci_make_handle(context_id, resource_id);
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		pr_warn("Failed allocating memory for datagram entry\n");
+		return VMCI_ERROR_NO_MEM;
+	}
+
+	entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ? true : false;
+	entry->flags = flags;
+	entry->recv_cb = recv_cb;
+	entry->client_data = client_data;
+	entry->priv_flags = priv_flags;
+
+	/* Make datagram resource live. */
+	result = vmci_resource_add(&entry->resource,
+				   VMCI_RESOURCE_TYPE_DATAGRAM,
+				   handle);
+	if (result != VMCI_SUCCESS) {
+		pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n",
+			handle.context, handle.resource, result);
+		kfree(entry);
+		return result;
+	}
+
+	*out_handle = vmci_resource_handle(&entry->resource);
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Internal utility function with the same purpose as
+ * vmci_datagram_get_priv_flags that also takes a context_id.
+ */
+static int vmci_datagram_get_priv_flags(u32 context_id,
+					struct vmci_handle handle,
+					u32 *priv_flags)
+{
+	if (context_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (context_id == VMCI_HOST_CONTEXT_ID) {
+		struct datagram_entry *src_entry;
+		struct vmci_resource *resource;
+
+		resource = vmci_resource_by_handle(handle,
+						   VMCI_RESOURCE_TYPE_DATAGRAM);
+		if (!resource)
+			return VMCI_ERROR_INVALID_ARGS;
+
+		src_entry = container_of(resource, struct datagram_entry,
+					 resource);
+		*priv_flags = src_entry->priv_flags;
+		vmci_resource_put(resource);
+	} else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID)
+		*priv_flags = VMCI_MAX_PRIVILEGE_FLAGS;
+	else
+		*priv_flags = vmci_context_get_priv_flags(context_id);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Calls the specified callback in a delayed context.
+ */
+static void dg_delayed_dispatch(struct work_struct *work)
+{
+	struct delayed_datagram_info *dg_info =
+			container_of(work, struct delayed_datagram_info, work);
+
+	dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg);
+
+	vmci_resource_put(&dg_info->entry->resource);
+
+	if (dg_info->in_dg_host_queue)
+		atomic_dec(&delayed_dg_host_queue_size);
+
+	kfree(dg_info);
+}
+
+/*
+ * Dispatch datagram as a host, to the host, or other vm context. This
+ * function cannot dispatch to hypervisor context handlers. This should
+ * have been handled before we get here by vmci_datagram_dispatch.
+ * Returns number of bytes sent on success, error code otherwise.
+ */
+static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg)
+{
+	int retval;
+	size_t dg_size;
+	u32 src_priv_flags;
+
+	dg_size = VMCI_DG_SIZE(dg);
+
+	/* Host cannot send to the hypervisor. */
+	if (dg->dst.context == VMCI_HYPERVISOR_CONTEXT_ID)
+		return VMCI_ERROR_DST_UNREACHABLE;
+
+	/* Check that source handle matches sending context. */
+	if (dg->src.context != context_id) {
+		pr_devel("Sender context (ID=0x%x) is not owner of src datagram entry (handle=0x%x:0x%x)\n",
+			 context_id, dg->src.context, dg->src.resource);
+		return VMCI_ERROR_NO_ACCESS;
+	}
+
+	/* Get hold of privileges of sending endpoint. */
+	retval = vmci_datagram_get_priv_flags(context_id, dg->src,
+					      &src_priv_flags);
+	if (retval != VMCI_SUCCESS) {
+		pr_warn("Couldn't get privileges (handle=0x%x:0x%x)\n",
+			dg->src.context, dg->src.resource);
+		return retval;
+	}
+
+	/* Determine if we should route to host or guest destination. */
+	if (dg->dst.context == VMCI_HOST_CONTEXT_ID) {
+		/* Route to host datagram entry. */
+		struct datagram_entry *dst_entry;
+		struct vmci_resource *resource;
+
+		if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
+		    dg->dst.resource == VMCI_EVENT_HANDLER) {
+			return vmci_event_dispatch(dg);
+		}
+
+		resource = vmci_resource_by_handle(dg->dst,
+						   VMCI_RESOURCE_TYPE_DATAGRAM);
+		if (!resource) {
+			pr_devel("Sending to invalid destination (handle=0x%x:0x%x)\n",
+				 dg->dst.context, dg->dst.resource);
+			return VMCI_ERROR_INVALID_RESOURCE;
+		}
+		dst_entry = container_of(resource, struct datagram_entry,
+					 resource);
+		if (vmci_deny_interaction(src_priv_flags,
+					  dst_entry->priv_flags)) {
+			vmci_resource_put(resource);
+			return VMCI_ERROR_NO_ACCESS;
+		}
+
+		/*
+		 * If a VMCI datagram destined for the host is also sent by the
+		 * host, we always run it delayed. This ensures that no locks
+		 * are held when the datagram callback runs.
+		 */
+		if (dst_entry->run_delayed ||
+		    dg->src.context == VMCI_HOST_CONTEXT_ID) {
+			struct delayed_datagram_info *dg_info;
+
+			if (atomic_add_return(1, &delayed_dg_host_queue_size)
+			    == VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE) {
+				atomic_dec(&delayed_dg_host_queue_size);
+				vmci_resource_put(resource);
+				return VMCI_ERROR_NO_MEM;
+			}
+
+			dg_info = kmalloc(sizeof(*dg_info) +
+				    (size_t) dg->payload_size, GFP_ATOMIC);
+			if (!dg_info) {
+				atomic_dec(&delayed_dg_host_queue_size);
+				vmci_resource_put(resource);
+				return VMCI_ERROR_NO_MEM;
+			}
+
+			dg_info->in_dg_host_queue = true;
+			dg_info->entry = dst_entry;
+			memcpy(&dg_info->msg, dg, dg_size);
+
+			INIT_WORK(&dg_info->work, dg_delayed_dispatch);
+			schedule_work(&dg_info->work);
+			retval = VMCI_SUCCESS;
+
+		} else {
+			retval = dst_entry->recv_cb(dst_entry->client_data, dg);
+			vmci_resource_put(resource);
+			if (retval < VMCI_SUCCESS)
+				return retval;
+		}
+	} else {
+		/* Route to destination VM context. */
+		struct vmci_datagram *new_dg;
+
+		if (context_id != dg->dst.context) {
+			if (vmci_deny_interaction(src_priv_flags,
+						  vmci_context_get_priv_flags
+						  (dg->dst.context))) {
+				return VMCI_ERROR_NO_ACCESS;
+			} else if (VMCI_CONTEXT_IS_VM(context_id)) {
+				/*
+				 * If the sending context is a VM, it
+				 * cannot reach another VM.
+				 */
+
+				pr_devel("Datagram communication between VMs not supported (src=0x%x, dst=0x%x)\n",
+					 context_id, dg->dst.context);
+				return VMCI_ERROR_DST_UNREACHABLE;
+			}
+		}
+
+		/* We make a copy to enqueue. */
+		new_dg = kmemdup(dg, dg_size, GFP_KERNEL);
+		if (new_dg == NULL)
+			return VMCI_ERROR_NO_MEM;
+
+		retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg);
+		if (retval < VMCI_SUCCESS) {
+			kfree(new_dg);
+			return retval;
+		}
+	}
+
+	/*
+	 * We currently truncate the size to signed 32 bits. This doesn't
+	 * matter for this handler as it only support 4Kb messages.
+	 */
+	return (int)dg_size;
+}
+
+/*
+ * Dispatch datagram as a guest, down through the VMX and potentially to
+ * the host.
+ * Returns number of bytes sent on success, error code otherwise.
+ */
+static int dg_dispatch_as_guest(struct vmci_datagram *dg)
+{
+	int retval;
+	struct vmci_resource *resource;
+
+	resource = vmci_resource_by_handle(dg->src,
+					   VMCI_RESOURCE_TYPE_DATAGRAM);
+	if (!resource)
+		return VMCI_ERROR_NO_HANDLE;
+
+	retval = vmci_send_datagram(dg);
+	vmci_resource_put(resource);
+	return retval;
+}
+
+/*
+ * Dispatch datagram.  This will determine the routing for the datagram
+ * and dispatch it accordingly.
+ * Returns number of bytes sent on success, error code otherwise.
+ */
+int vmci_datagram_dispatch(u32 context_id,
+			   struct vmci_datagram *dg, bool from_guest)
+{
+	int retval;
+	enum vmci_route route;
+
+	BUILD_BUG_ON(sizeof(struct vmci_datagram) != 24);
+
+	if (dg->payload_size > VMCI_MAX_DG_SIZE ||
+	    VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) {
+		pr_devel("Payload (size=%llu bytes) too big to send\n",
+			 (unsigned long long)dg->payload_size);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	retval = vmci_route(&dg->src, &dg->dst, from_guest, &route);
+	if (retval < VMCI_SUCCESS) {
+		pr_devel("Failed to route datagram (src=0x%x, dst=0x%x, err=%d)\n",
+			 dg->src.context, dg->dst.context, retval);
+		return retval;
+	}
+
+	if (VMCI_ROUTE_AS_HOST == route) {
+		if (VMCI_INVALID_ID == context_id)
+			context_id = VMCI_HOST_CONTEXT_ID;
+		return dg_dispatch_as_host(context_id, dg);
+	}
+
+	if (VMCI_ROUTE_AS_GUEST == route)
+		return dg_dispatch_as_guest(dg);
+
+	pr_warn("Unknown route (%d) for datagram\n", route);
+	return VMCI_ERROR_DST_UNREACHABLE;
+}
+
+/*
+ * Invoke the handler for the given datagram.  This is intended to be
+ * called only when acting as a guest and receiving a datagram from the
+ * virtual device.
+ */
+int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg)
+{
+	struct vmci_resource *resource;
+	struct datagram_entry *dst_entry;
+
+	resource = vmci_resource_by_handle(dg->dst,
+					   VMCI_RESOURCE_TYPE_DATAGRAM);
+	if (!resource) {
+		pr_devel("destination (handle=0x%x:0x%x) doesn't exist\n",
+			 dg->dst.context, dg->dst.resource);
+		return VMCI_ERROR_NO_HANDLE;
+	}
+
+	dst_entry = container_of(resource, struct datagram_entry, resource);
+	if (dst_entry->run_delayed) {
+		struct delayed_datagram_info *dg_info;
+
+		dg_info = kmalloc(sizeof(*dg_info) + (size_t)dg->payload_size,
+				  GFP_ATOMIC);
+		if (!dg_info) {
+			vmci_resource_put(resource);
+			return VMCI_ERROR_NO_MEM;
+		}
+
+		dg_info->in_dg_host_queue = false;
+		dg_info->entry = dst_entry;
+		memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg));
+
+		INIT_WORK(&dg_info->work, dg_delayed_dispatch);
+		schedule_work(&dg_info->work);
+	} else {
+		dst_entry->recv_cb(dst_entry->client_data, dg);
+		vmci_resource_put(resource);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * vmci_datagram_create_handle_priv() - Create host context datagram endpoint
+ * @resource_id:        The resource ID.
+ * @flags:      Datagram Flags.
+ * @priv_flags: Privilege Flags.
+ * @recv_cb:    Callback when receiving datagrams.
+ * @client_data:        Pointer for a datagram_entry struct
+ * @out_handle: vmci_handle that is populated as a result of this function.
+ *
+ * Creates a host context datagram endpoint and returns a handle to it.
+ */
+int vmci_datagram_create_handle_priv(u32 resource_id,
+				     u32 flags,
+				     u32 priv_flags,
+				     vmci_datagram_recv_cb recv_cb,
+				     void *client_data,
+				     struct vmci_handle *out_handle)
+{
+	if (out_handle == NULL)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (recv_cb == NULL) {
+		pr_devel("Client callback needed when creating datagram\n");
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	return dg_create_handle(resource_id, flags, priv_flags, recv_cb,
+				client_data, out_handle);
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_create_handle_priv);
+
+/*
+ * vmci_datagram_create_handle() - Create host context datagram endpoint
+ * @resource_id:        Resource ID.
+ * @flags:      Datagram Flags.
+ * @recv_cb:    Callback when receiving datagrams.
+ * @client_ata: Pointer for a datagram_entry struct
+ * @out_handle: vmci_handle that is populated as a result of this function.
+ *
+ * Creates a host context datagram endpoint and returns a handle to
+ * it.  Same as vmci_datagram_create_handle_priv without the priviledge
+ * flags argument.
+ */
+int vmci_datagram_create_handle(u32 resource_id,
+				u32 flags,
+				vmci_datagram_recv_cb recv_cb,
+				void *client_data,
+				struct vmci_handle *out_handle)
+{
+	return vmci_datagram_create_handle_priv(
+		resource_id, flags,
+		VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
+		recv_cb, client_data,
+		out_handle);
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_create_handle);
+
+/*
+ * vmci_datagram_destroy_handle() - Destroys datagram handle
+ * @handle:     vmci_handle to be destroyed and reaped.
+ *
+ * Use this function to destroy any datagram handles created by
+ * vmci_datagram_create_handle{,Priv} functions.
+ */
+int vmci_datagram_destroy_handle(struct vmci_handle handle)
+{
+	struct datagram_entry *entry;
+	struct vmci_resource *resource;
+
+	resource = vmci_resource_by_handle(handle, VMCI_RESOURCE_TYPE_DATAGRAM);
+	if (!resource) {
+		pr_devel("Failed to destroy datagram (handle=0x%x:0x%x)\n",
+			 handle.context, handle.resource);
+		return VMCI_ERROR_NOT_FOUND;
+	}
+
+	entry = container_of(resource, struct datagram_entry, resource);
+
+	vmci_resource_put(&entry->resource);
+	vmci_resource_remove(&entry->resource);
+	kfree(entry);
+
+	return VMCI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_destroy_handle);
+
+/*
+ * vmci_datagram_send() - Send a datagram
+ * @msg:        The datagram to send.
+ *
+ * Sends the provided datagram on its merry way.
+ */
+int vmci_datagram_send(struct vmci_datagram *msg)
+{
+	if (msg == NULL)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	return vmci_datagram_dispatch(VMCI_INVALID_ID, msg, false);
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_send);
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.h b/drivers/misc/vmw_vmci/vmci_datagram.h
new file mode 100644
index 000000000..eb4aab7f6
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_datagram.h
@@ -0,0 +1,52 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_DATAGRAM_H_
+#define _VMCI_DATAGRAM_H_
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include "vmci_context.h"
+
+#define VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE 256
+
+/*
+ * The struct vmci_datagram_queue_entry is a queue header for the in-kernel VMCI
+ * datagram queues. It is allocated in non-paged memory, as the
+ * content is accessed while holding a spinlock. The pending datagram
+ * itself may be allocated from paged memory. We shadow the size of
+ * the datagram in the non-paged queue entry as this size is used
+ * while holding the same spinlock as above.
+ */
+struct vmci_datagram_queue_entry {
+	struct list_head list_item;	/* For queuing. */
+	size_t dg_size;		/* Size of datagram. */
+	struct vmci_datagram *dg;	/* Pending datagram. */
+};
+
+/* VMCIDatagramSendRecvInfo */
+struct vmci_datagram_snd_rcv_info {
+	u64 addr;
+	u32 len;
+	s32 result;
+};
+
+/* Datagram API for non-public use. */
+int vmci_datagram_dispatch(u32 context_id, struct vmci_datagram *dg,
+			   bool from_guest);
+int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg);
+
+#endif /* _VMCI_DATAGRAM_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c
new file mode 100644
index 000000000..458121034
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.c
@@ -0,0 +1,609 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/completion.h>
+#include <linux/hash.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_resource.h"
+#include "vmci_driver.h"
+#include "vmci_route.h"
+
+
+#define VMCI_DOORBELL_INDEX_BITS	6
+#define VMCI_DOORBELL_INDEX_TABLE_SIZE	(1 << VMCI_DOORBELL_INDEX_BITS)
+#define VMCI_DOORBELL_HASH(_idx)	hash_32(_idx, VMCI_DOORBELL_INDEX_BITS)
+
+/*
+ * DoorbellEntry describes the a doorbell notification handle allocated by the
+ * host.
+ */
+struct dbell_entry {
+	struct vmci_resource resource;
+	struct hlist_node node;
+	struct work_struct work;
+	vmci_callback notify_cb;
+	void *client_data;
+	u32 idx;
+	u32 priv_flags;
+	bool run_delayed;
+	atomic_t active;	/* Only used by guest personality */
+};
+
+/* The VMCI index table keeps track of currently registered doorbells. */
+struct dbell_index_table {
+	spinlock_t lock;	/* Index table lock */
+	struct hlist_head entries[VMCI_DOORBELL_INDEX_TABLE_SIZE];
+};
+
+static struct dbell_index_table vmci_doorbell_it = {
+	.lock = __SPIN_LOCK_UNLOCKED(vmci_doorbell_it.lock),
+};
+
+/*
+ * The max_notify_idx is one larger than the currently known bitmap index in
+ * use, and is used to determine how much of the bitmap needs to be scanned.
+ */
+static u32 max_notify_idx;
+
+/*
+ * The notify_idx_count is used for determining whether there are free entries
+ * within the bitmap (if notify_idx_count + 1 < max_notify_idx).
+ */
+static u32 notify_idx_count;
+
+/*
+ * The last_notify_idx_reserved is used to track the last index handed out - in
+ * the case where multiple handles share a notification index, we hand out
+ * indexes round robin based on last_notify_idx_reserved.
+ */
+static u32 last_notify_idx_reserved;
+
+/* This is a one entry cache used to by the index allocation. */
+static u32 last_notify_idx_released = PAGE_SIZE;
+
+
+/*
+ * Utility function that retrieves the privilege flags associated
+ * with a given doorbell handle. For guest endpoints, the
+ * privileges are determined by the context ID, but for host
+ * endpoints privileges are associated with the complete
+ * handle. Hypervisor endpoints are not yet supported.
+ */
+int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags)
+{
+	if (priv_flags == NULL || handle.context == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (handle.context == VMCI_HOST_CONTEXT_ID) {
+		struct dbell_entry *entry;
+		struct vmci_resource *resource;
+
+		resource = vmci_resource_by_handle(handle,
+						   VMCI_RESOURCE_TYPE_DOORBELL);
+		if (!resource)
+			return VMCI_ERROR_NOT_FOUND;
+
+		entry = container_of(resource, struct dbell_entry, resource);
+		*priv_flags = entry->priv_flags;
+		vmci_resource_put(resource);
+	} else if (handle.context == VMCI_HYPERVISOR_CONTEXT_ID) {
+		/*
+		 * Hypervisor endpoints for notifications are not
+		 * supported (yet).
+		 */
+		return VMCI_ERROR_INVALID_ARGS;
+	} else {
+		*priv_flags = vmci_context_get_priv_flags(handle.context);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Find doorbell entry by bitmap index.
+ */
+static struct dbell_entry *dbell_index_table_find(u32 idx)
+{
+	u32 bucket = VMCI_DOORBELL_HASH(idx);
+	struct dbell_entry *dbell;
+
+	hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket],
+			     node) {
+		if (idx == dbell->idx)
+			return dbell;
+	}
+
+	return NULL;
+}
+
+/*
+ * Add the given entry to the index table.  This willi take a reference to the
+ * entry's resource so that the entry is not deleted before it is removed from
+ * the * table.
+ */
+static void dbell_index_table_add(struct dbell_entry *entry)
+{
+	u32 bucket;
+	u32 new_notify_idx;
+
+	vmci_resource_get(&entry->resource);
+
+	spin_lock_bh(&vmci_doorbell_it.lock);
+
+	/*
+	 * Below we try to allocate an index in the notification
+	 * bitmap with "not too much" sharing between resources. If we
+	 * use less that the full bitmap, we either add to the end if
+	 * there are no unused flags within the currently used area,
+	 * or we search for unused ones. If we use the full bitmap, we
+	 * allocate the index round robin.
+	 */
+	if (max_notify_idx < PAGE_SIZE || notify_idx_count < PAGE_SIZE) {
+		if (last_notify_idx_released < max_notify_idx &&
+		    !dbell_index_table_find(last_notify_idx_released)) {
+			new_notify_idx = last_notify_idx_released;
+			last_notify_idx_released = PAGE_SIZE;
+		} else {
+			bool reused = false;
+			new_notify_idx = last_notify_idx_reserved;
+			if (notify_idx_count + 1 < max_notify_idx) {
+				do {
+					if (!dbell_index_table_find
+					    (new_notify_idx)) {
+						reused = true;
+						break;
+					}
+					new_notify_idx = (new_notify_idx + 1) %
+					    max_notify_idx;
+				} while (new_notify_idx !=
+					 last_notify_idx_released);
+			}
+			if (!reused) {
+				new_notify_idx = max_notify_idx;
+				max_notify_idx++;
+			}
+		}
+	} else {
+		new_notify_idx = (last_notify_idx_reserved + 1) % PAGE_SIZE;
+	}
+
+	last_notify_idx_reserved = new_notify_idx;
+	notify_idx_count++;
+
+	entry->idx = new_notify_idx;
+	bucket = VMCI_DOORBELL_HASH(entry->idx);
+	hlist_add_head(&entry->node, &vmci_doorbell_it.entries[bucket]);
+
+	spin_unlock_bh(&vmci_doorbell_it.lock);
+}
+
+/*
+ * Remove the given entry from the index table.  This will release() the
+ * entry's resource.
+ */
+static void dbell_index_table_remove(struct dbell_entry *entry)
+{
+	spin_lock_bh(&vmci_doorbell_it.lock);
+
+	hlist_del_init(&entry->node);
+
+	notify_idx_count--;
+	if (entry->idx == max_notify_idx - 1) {
+		/*
+		 * If we delete an entry with the maximum known
+		 * notification index, we take the opportunity to
+		 * prune the current max. As there might be other
+		 * unused indices immediately below, we lower the
+		 * maximum until we hit an index in use.
+		 */
+		while (max_notify_idx > 0 &&
+		       !dbell_index_table_find(max_notify_idx - 1))
+			max_notify_idx--;
+	}
+
+	last_notify_idx_released = entry->idx;
+
+	spin_unlock_bh(&vmci_doorbell_it.lock);
+
+	vmci_resource_put(&entry->resource);
+}
+
+/*
+ * Creates a link between the given doorbell handle and the given
+ * index in the bitmap in the device backend. A notification state
+ * is created in hypervisor.
+ */
+static int dbell_link(struct vmci_handle handle, u32 notify_idx)
+{
+	struct vmci_doorbell_link_msg link_msg;
+
+	link_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					    VMCI_DOORBELL_LINK);
+	link_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+	link_msg.hdr.payload_size = sizeof(link_msg) - VMCI_DG_HEADERSIZE;
+	link_msg.handle = handle;
+	link_msg.notify_idx = notify_idx;
+
+	return vmci_send_datagram(&link_msg.hdr);
+}
+
+/*
+ * Unlinks the given doorbell handle from an index in the bitmap in
+ * the device backend. The notification state is destroyed in hypervisor.
+ */
+static int dbell_unlink(struct vmci_handle handle)
+{
+	struct vmci_doorbell_unlink_msg unlink_msg;
+
+	unlink_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					      VMCI_DOORBELL_UNLINK);
+	unlink_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+	unlink_msg.hdr.payload_size = sizeof(unlink_msg) - VMCI_DG_HEADERSIZE;
+	unlink_msg.handle = handle;
+
+	return vmci_send_datagram(&unlink_msg.hdr);
+}
+
+/*
+ * Notify another guest or the host.  We send a datagram down to the
+ * host via the hypervisor with the notification info.
+ */
+static int dbell_notify_as_guest(struct vmci_handle handle, u32 priv_flags)
+{
+	struct vmci_doorbell_notify_msg notify_msg;
+
+	notify_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					      VMCI_DOORBELL_NOTIFY);
+	notify_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+	notify_msg.hdr.payload_size = sizeof(notify_msg) - VMCI_DG_HEADERSIZE;
+	notify_msg.handle = handle;
+
+	return vmci_send_datagram(&notify_msg.hdr);
+}
+
+/*
+ * Calls the specified callback in a delayed context.
+ */
+static void dbell_delayed_dispatch(struct work_struct *work)
+{
+	struct dbell_entry *entry = container_of(work,
+						 struct dbell_entry, work);
+
+	entry->notify_cb(entry->client_data);
+	vmci_resource_put(&entry->resource);
+}
+
+/*
+ * Dispatches a doorbell notification to the host context.
+ */
+int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle)
+{
+	struct dbell_entry *entry;
+	struct vmci_resource *resource;
+
+	if (vmci_handle_is_invalid(handle)) {
+		pr_devel("Notifying an invalid doorbell (handle=0x%x:0x%x)\n",
+			 handle.context, handle.resource);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	resource = vmci_resource_by_handle(handle,
+					   VMCI_RESOURCE_TYPE_DOORBELL);
+	if (!resource) {
+		pr_devel("Notifying an unknown doorbell (handle=0x%x:0x%x)\n",
+			 handle.context, handle.resource);
+		return VMCI_ERROR_NOT_FOUND;
+	}
+
+	entry = container_of(resource, struct dbell_entry, resource);
+	if (entry->run_delayed) {
+		if (!schedule_work(&entry->work))
+			vmci_resource_put(resource);
+	} else {
+		entry->notify_cb(entry->client_data);
+		vmci_resource_put(resource);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Register the notification bitmap with the host.
+ */
+bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn)
+{
+	int result;
+	struct vmci_notify_bm_set_msg bitmap_set_msg = { };
+
+	bitmap_set_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+						  VMCI_SET_NOTIFY_BITMAP);
+	bitmap_set_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+	bitmap_set_msg.hdr.payload_size = sizeof(bitmap_set_msg) -
+	    VMCI_DG_HEADERSIZE;
+	bitmap_set_msg.bitmap_ppn = bitmap_ppn;
+
+	result = vmci_send_datagram(&bitmap_set_msg.hdr);
+	if (result != VMCI_SUCCESS) {
+		pr_devel("Failed to register (PPN=%u) as notification bitmap (error=%d)\n",
+			 bitmap_ppn, result);
+		return false;
+	}
+	return true;
+}
+
+/*
+ * Executes or schedules the handlers for a given notify index.
+ */
+static void dbell_fire_entries(u32 notify_idx)
+{
+	u32 bucket = VMCI_DOORBELL_HASH(notify_idx);
+	struct dbell_entry *dbell;
+
+	spin_lock_bh(&vmci_doorbell_it.lock);
+
+	hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) {
+		if (dbell->idx == notify_idx &&
+		    atomic_read(&dbell->active) == 1) {
+			if (dbell->run_delayed) {
+				vmci_resource_get(&dbell->resource);
+				if (!schedule_work(&dbell->work))
+					vmci_resource_put(&dbell->resource);
+			} else {
+				dbell->notify_cb(dbell->client_data);
+			}
+		}
+	}
+
+	spin_unlock_bh(&vmci_doorbell_it.lock);
+}
+
+/*
+ * Scans the notification bitmap, collects pending notifications,
+ * resets the bitmap and invokes appropriate callbacks.
+ */
+void vmci_dbell_scan_notification_entries(u8 *bitmap)
+{
+	u32 idx;
+
+	for (idx = 0; idx < max_notify_idx; idx++) {
+		if (bitmap[idx] & 0x1) {
+			bitmap[idx] &= ~1;
+			dbell_fire_entries(idx);
+		}
+	}
+}
+
+/*
+ * vmci_doorbell_create() - Creates a doorbell
+ * @handle:     A handle used to track the resource.  Can be invalid.
+ * @flags:      Flag that determines context of callback.
+ * @priv_flags: Privileges flags.
+ * @notify_cb:  The callback to be ivoked when the doorbell fires.
+ * @client_data:        A parameter to be passed to the callback.
+ *
+ * Creates a doorbell with the given callback. If the handle is
+ * VMCI_INVALID_HANDLE, a free handle will be assigned, if
+ * possible. The callback can be run immediately (potentially with
+ * locks held - the default) or delayed (in a kernel thread) by
+ * specifying the flag VMCI_FLAG_DELAYED_CB. If delayed execution
+ * is selected, a given callback may not be run if the kernel is
+ * unable to allocate memory for the delayed execution (highly
+ * unlikely).
+ */
+int vmci_doorbell_create(struct vmci_handle *handle,
+			 u32 flags,
+			 u32 priv_flags,
+			 vmci_callback notify_cb, void *client_data)
+{
+	struct dbell_entry *entry;
+	struct vmci_handle new_handle;
+	int result;
+
+	if (!handle || !notify_cb || flags & ~VMCI_FLAG_DELAYED_CB ||
+	    priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL) {
+		pr_warn("Failed allocating memory for datagram entry\n");
+		return VMCI_ERROR_NO_MEM;
+	}
+
+	if (vmci_handle_is_invalid(*handle)) {
+		u32 context_id = vmci_get_context_id();
+
+		if (context_id == VMCI_INVALID_ID) {
+			pr_warn("Failed to get context ID\n");
+			result = VMCI_ERROR_NO_RESOURCES;
+			goto free_mem;
+		}
+
+		/* Let resource code allocate a free ID for us */
+		new_handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
+	} else {
+		bool valid_context = false;
+
+		/*
+		 * Validate the handle.  We must do both of the checks below
+		 * because we can be acting as both a host and a guest at the
+		 * same time. We always allow the host context ID, since the
+		 * host functionality is in practice always there with the
+		 * unified driver.
+		 */
+		if (handle->context == VMCI_HOST_CONTEXT_ID ||
+		    (vmci_guest_code_active() &&
+		     vmci_get_context_id() == handle->context)) {
+			valid_context = true;
+		}
+
+		if (!valid_context || handle->resource == VMCI_INVALID_ID) {
+			pr_devel("Invalid argument (handle=0x%x:0x%x)\n",
+				 handle->context, handle->resource);
+			result = VMCI_ERROR_INVALID_ARGS;
+			goto free_mem;
+		}
+
+		new_handle = *handle;
+	}
+
+	entry->idx = 0;
+	INIT_HLIST_NODE(&entry->node);
+	entry->priv_flags = priv_flags;
+	INIT_WORK(&entry->work, dbell_delayed_dispatch);
+	entry->run_delayed = flags & VMCI_FLAG_DELAYED_CB;
+	entry->notify_cb = notify_cb;
+	entry->client_data = client_data;
+	atomic_set(&entry->active, 0);
+
+	result = vmci_resource_add(&entry->resource,
+				   VMCI_RESOURCE_TYPE_DOORBELL,
+				   new_handle);
+	if (result != VMCI_SUCCESS) {
+		pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n",
+			new_handle.context, new_handle.resource, result);
+		goto free_mem;
+	}
+
+	new_handle = vmci_resource_handle(&entry->resource);
+	if (vmci_guest_code_active()) {
+		dbell_index_table_add(entry);
+		result = dbell_link(new_handle, entry->idx);
+		if (VMCI_SUCCESS != result)
+			goto destroy_resource;
+
+		atomic_set(&entry->active, 1);
+	}
+
+	*handle = new_handle;
+
+	return result;
+
+ destroy_resource:
+	dbell_index_table_remove(entry);
+	vmci_resource_remove(&entry->resource);
+ free_mem:
+	kfree(entry);
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_doorbell_create);
+
+/*
+ * vmci_doorbell_destroy() - Destroy a doorbell.
+ * @handle:     The handle tracking the resource.
+ *
+ * Destroys a doorbell previously created with vmcii_doorbell_create. This
+ * operation may block waiting for a callback to finish.
+ */
+int vmci_doorbell_destroy(struct vmci_handle handle)
+{
+	struct dbell_entry *entry;
+	struct vmci_resource *resource;
+
+	if (vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	resource = vmci_resource_by_handle(handle,
+					   VMCI_RESOURCE_TYPE_DOORBELL);
+	if (!resource) {
+		pr_devel("Failed to destroy doorbell (handle=0x%x:0x%x)\n",
+			 handle.context, handle.resource);
+		return VMCI_ERROR_NOT_FOUND;
+	}
+
+	entry = container_of(resource, struct dbell_entry, resource);
+
+	if (!hlist_unhashed(&entry->node)) {
+		int result;
+
+		dbell_index_table_remove(entry);
+
+		result = dbell_unlink(handle);
+		if (VMCI_SUCCESS != result) {
+
+			/*
+			 * The only reason this should fail would be
+			 * an inconsistency between guest and
+			 * hypervisor state, where the guest believes
+			 * it has an active registration whereas the
+			 * hypervisor doesn't. One case where this may
+			 * happen is if a doorbell is unregistered
+			 * following a hibernation at a time where the
+			 * doorbell state hasn't been restored on the
+			 * hypervisor side yet. Since the handle has
+			 * now been removed in the guest, we just
+			 * print a warning and return success.
+			 */
+			pr_devel("Unlink of doorbell (handle=0x%x:0x%x) unknown by hypervisor (error=%d)\n",
+				 handle.context, handle.resource, result);
+		}
+	}
+
+	/*
+	 * Now remove the resource from the table.  It might still be in use
+	 * after this, in a callback or still on the delayed work queue.
+	 */
+	vmci_resource_put(&entry->resource);
+	vmci_resource_remove(&entry->resource);
+
+	kfree(entry);
+
+	return VMCI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(vmci_doorbell_destroy);
+
+/*
+ * vmci_doorbell_notify() - Ring the doorbell (and hide in the bushes).
+ * @dst:        The handlle identifying the doorbell resource
+ * @priv_flags: Priviledge flags.
+ *
+ * Generates a notification on the doorbell identified by the
+ * handle. For host side generation of notifications, the caller
+ * can specify what the privilege of the calling side is.
+ */
+int vmci_doorbell_notify(struct vmci_handle dst, u32 priv_flags)
+{
+	int retval;
+	enum vmci_route route;
+	struct vmci_handle src;
+
+	if (vmci_handle_is_invalid(dst) ||
+	    (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	src = VMCI_INVALID_HANDLE;
+	retval = vmci_route(&src, &dst, false, &route);
+	if (retval < VMCI_SUCCESS)
+		return retval;
+
+	if (VMCI_ROUTE_AS_HOST == route)
+		return vmci_ctx_notify_dbell(VMCI_HOST_CONTEXT_ID,
+					     dst, priv_flags);
+
+	if (VMCI_ROUTE_AS_GUEST == route)
+		return dbell_notify_as_guest(dst, priv_flags);
+
+	pr_warn("Unknown route (%d) for doorbell\n", route);
+	return VMCI_ERROR_DST_UNREACHABLE;
+}
+EXPORT_SYMBOL_GPL(vmci_doorbell_notify);
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.h b/drivers/misc/vmw_vmci/vmci_doorbell.h
new file mode 100644
index 000000000..e4c0b1748
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.h
@@ -0,0 +1,51 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef VMCI_DOORBELL_H
+#define VMCI_DOORBELL_H
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/types.h>
+
+#include "vmci_driver.h"
+
+/*
+ * VMCINotifyResourceInfo: Used to create and destroy doorbells, and
+ * generate a notification for a doorbell or queue pair.
+ */
+struct vmci_dbell_notify_resource_info {
+	struct vmci_handle handle;
+	u16 resource;
+	u16 action;
+	s32 result;
+};
+
+/*
+ * Structure used for checkpointing the doorbell mappings. It is
+ * written to the checkpoint as is, so changing this structure will
+ * break checkpoint compatibility.
+ */
+struct dbell_cpt_state {
+	struct vmci_handle handle;
+	u64 bitmap_idx;
+};
+
+int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle);
+int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags);
+
+bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn);
+void vmci_dbell_scan_notification_entries(u8 *bitmap);
+
+#endif /* VMCI_DOORBELL_H */
diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c
new file mode 100644
index 000000000..003bfba40
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
@@ -0,0 +1,117 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+static bool vmci_disable_host;
+module_param_named(disable_host, vmci_disable_host, bool, 0);
+MODULE_PARM_DESC(disable_host,
+		 "Disable driver host personality (default=enabled)");
+
+static bool vmci_disable_guest;
+module_param_named(disable_guest, vmci_disable_guest, bool, 0);
+MODULE_PARM_DESC(disable_guest,
+		 "Disable driver guest personality (default=enabled)");
+
+static bool vmci_guest_personality_initialized;
+static bool vmci_host_personality_initialized;
+
+/*
+ * vmci_get_context_id() - Gets the current context ID.
+ *
+ * Returns the current context ID.  Note that since this is accessed only
+ * from code running in the host, this always returns the host context ID.
+ */
+u32 vmci_get_context_id(void)
+{
+	if (vmci_guest_code_active())
+		return vmci_get_vm_context_id();
+	else if (vmci_host_code_active())
+		return VMCI_HOST_CONTEXT_ID;
+
+	return VMCI_INVALID_ID;
+}
+EXPORT_SYMBOL_GPL(vmci_get_context_id);
+
+static int __init vmci_drv_init(void)
+{
+	int vmci_err;
+	int error;
+
+	vmci_err = vmci_event_init();
+	if (vmci_err < VMCI_SUCCESS) {
+		pr_err("Failed to initialize VMCIEvent (result=%d)\n",
+		       vmci_err);
+		return -EINVAL;
+	}
+
+	if (!vmci_disable_guest) {
+		error = vmci_guest_init();
+		if (error) {
+			pr_warn("Failed to initialize guest personality (err=%d)\n",
+				error);
+		} else {
+			vmci_guest_personality_initialized = true;
+			pr_info("Guest personality initialized and is %s\n",
+				vmci_guest_code_active() ?
+				"active" : "inactive");
+		}
+	}
+
+	if (!vmci_disable_host) {
+		error = vmci_host_init();
+		if (error) {
+			pr_warn("Unable to initialize host personality (err=%d)\n",
+				error);
+		} else {
+			vmci_host_personality_initialized = true;
+			pr_info("Initialized host personality\n");
+		}
+	}
+
+	if (!vmci_guest_personality_initialized &&
+	    !vmci_host_personality_initialized) {
+		vmci_event_exit();
+		return -ENODEV;
+	}
+
+	return 0;
+}
+module_init(vmci_drv_init);
+
+static void __exit vmci_drv_exit(void)
+{
+	if (vmci_guest_personality_initialized)
+		vmci_guest_exit();
+
+	if (vmci_host_personality_initialized)
+		vmci_host_exit();
+
+	vmci_event_exit();
+}
+module_exit(vmci_drv_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface.");
+MODULE_VERSION("1.1.6.0-k");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/vmw_vmci/vmci_driver.h b/drivers/misc/vmw_vmci/vmci_driver.h
new file mode 100644
index 000000000..cee9e977d
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_driver.h
@@ -0,0 +1,57 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_DRIVER_H_
+#define _VMCI_DRIVER_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/wait.h>
+
+#include "vmci_queue_pair.h"
+#include "vmci_context.h"
+
+enum vmci_obj_type {
+	VMCIOBJ_VMX_VM = 10,
+	VMCIOBJ_CONTEXT,
+	VMCIOBJ_SOCKET,
+	VMCIOBJ_NOT_SET,
+};
+
+/* For storing VMCI structures in file handles. */
+struct vmci_obj {
+	void *ptr;
+	enum vmci_obj_type type;
+};
+
+/*
+ * Needed by other components of this module.  It's okay to have one global
+ * instance of this because there can only ever be one VMCI device.  Our
+ * virtual hardware enforces this.
+ */
+extern struct pci_dev *vmci_pdev;
+
+u32 vmci_get_context_id(void);
+int vmci_send_datagram(struct vmci_datagram *dg);
+
+int vmci_host_init(void);
+void vmci_host_exit(void);
+bool vmci_host_code_active(void);
+
+int vmci_guest_init(void);
+void vmci_guest_exit(void);
+bool vmci_guest_code_active(void);
+u32 vmci_get_vm_context_id(void);
+
+#endif /* _VMCI_DRIVER_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c
new file mode 100644
index 000000000..84258a480
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_event.c
@@ -0,0 +1,225 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+#define EVENT_MAGIC 0xEABE0000
+#define VMCI_EVENT_MAX_ATTEMPTS 10
+
+struct vmci_subscription {
+	u32 id;
+	u32 event;
+	vmci_event_cb callback;
+	void *callback_data;
+	struct list_head node;	/* on one of subscriber lists */
+};
+
+static struct list_head subscriber_array[VMCI_EVENT_MAX];
+static DEFINE_MUTEX(subscriber_mutex);
+
+int __init vmci_event_init(void)
+{
+	int i;
+
+	for (i = 0; i < VMCI_EVENT_MAX; i++)
+		INIT_LIST_HEAD(&subscriber_array[i]);
+
+	return VMCI_SUCCESS;
+}
+
+void vmci_event_exit(void)
+{
+	int e;
+
+	/* We free all memory at exit. */
+	for (e = 0; e < VMCI_EVENT_MAX; e++) {
+		struct vmci_subscription *cur, *p2;
+		list_for_each_entry_safe(cur, p2, &subscriber_array[e], node) {
+
+			/*
+			 * We should never get here because all events
+			 * should have been unregistered before we try
+			 * to unload the driver module.
+			 */
+			pr_warn("Unexpected free events occurring\n");
+			list_del(&cur->node);
+			kfree(cur);
+		}
+	}
+}
+
+/*
+ * Find entry. Assumes subscriber_mutex is held.
+ */
+static struct vmci_subscription *event_find(u32 sub_id)
+{
+	int e;
+
+	for (e = 0; e < VMCI_EVENT_MAX; e++) {
+		struct vmci_subscription *cur;
+		list_for_each_entry(cur, &subscriber_array[e], node) {
+			if (cur->id == sub_id)
+				return cur;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Actually delivers the events to the subscribers.
+ * The callback function for each subscriber is invoked.
+ */
+static void event_deliver(struct vmci_event_msg *event_msg)
+{
+	struct vmci_subscription *cur;
+	struct list_head *subscriber_list;
+
+	rcu_read_lock();
+	subscriber_list = &subscriber_array[event_msg->event_data.event];
+	list_for_each_entry_rcu(cur, subscriber_list, node) {
+		cur->callback(cur->id, &event_msg->event_data,
+			      cur->callback_data);
+	}
+	rcu_read_unlock();
+}
+
+/*
+ * Dispatcher for the VMCI_EVENT_RECEIVE datagrams. Calls all
+ * subscribers for given event.
+ */
+int vmci_event_dispatch(struct vmci_datagram *msg)
+{
+	struct vmci_event_msg *event_msg = (struct vmci_event_msg *)msg;
+
+	if (msg->payload_size < sizeof(u32) ||
+	    msg->payload_size > sizeof(struct vmci_event_data_max))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (!VMCI_EVENT_VALID(event_msg->event_data.event))
+		return VMCI_ERROR_EVENT_UNKNOWN;
+
+	event_deliver(event_msg);
+	return VMCI_SUCCESS;
+}
+
+/*
+ * vmci_event_subscribe() - Subscribe to a given event.
+ * @event:      The event to subscribe to.
+ * @callback:   The callback to invoke upon the event.
+ * @callback_data:      Data to pass to the callback.
+ * @subscription_id:    ID used to track subscription.  Used with
+ *              vmci_event_unsubscribe()
+ *
+ * Subscribes to the provided event. The callback specified will be
+ * fired from RCU critical section and therefore must not sleep.
+ */
+int vmci_event_subscribe(u32 event,
+			 vmci_event_cb callback,
+			 void *callback_data,
+			 u32 *new_subscription_id)
+{
+	struct vmci_subscription *sub;
+	int attempts;
+	int retval;
+	bool have_new_id = false;
+
+	if (!new_subscription_id) {
+		pr_devel("%s: Invalid subscription (NULL)\n", __func__);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	if (!VMCI_EVENT_VALID(event) || !callback) {
+		pr_devel("%s: Failed to subscribe to event (type=%d) (callback=%p) (data=%p)\n",
+			 __func__, event, callback, callback_data);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	sub = kzalloc(sizeof(*sub), GFP_KERNEL);
+	if (!sub)
+		return VMCI_ERROR_NO_MEM;
+
+	sub->id = VMCI_EVENT_MAX;
+	sub->event = event;
+	sub->callback = callback;
+	sub->callback_data = callback_data;
+	INIT_LIST_HEAD(&sub->node);
+
+	mutex_lock(&subscriber_mutex);
+
+	/* Creation of a new event is always allowed. */
+	for (attempts = 0; attempts < VMCI_EVENT_MAX_ATTEMPTS; attempts++) {
+		static u32 subscription_id;
+		/*
+		 * We try to get an id a couple of time before
+		 * claiming we are out of resources.
+		 */
+
+		/* Test for duplicate id. */
+		if (!event_find(++subscription_id)) {
+			sub->id = subscription_id;
+			have_new_id = true;
+			break;
+		}
+	}
+
+	if (have_new_id) {
+		list_add_rcu(&sub->node, &subscriber_array[event]);
+		retval = VMCI_SUCCESS;
+	} else {
+		retval = VMCI_ERROR_NO_RESOURCES;
+	}
+
+	mutex_unlock(&subscriber_mutex);
+
+	*new_subscription_id = sub->id;
+	return retval;
+}
+EXPORT_SYMBOL_GPL(vmci_event_subscribe);
+
+/*
+ * vmci_event_unsubscribe() - unsubscribe from an event.
+ * @sub_id:     A subscription ID as provided by vmci_event_subscribe()
+ *
+ * Unsubscribe from given event. Removes it from list and frees it.
+ * Will return callback_data if requested by caller.
+ */
+int vmci_event_unsubscribe(u32 sub_id)
+{
+	struct vmci_subscription *s;
+
+	mutex_lock(&subscriber_mutex);
+	s = event_find(sub_id);
+	if (s)
+		list_del_rcu(&s->node);
+	mutex_unlock(&subscriber_mutex);
+
+	if (!s)
+		return VMCI_ERROR_NOT_FOUND;
+
+	synchronize_rcu();
+	kfree(s);
+
+	return VMCI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(vmci_event_unsubscribe);
diff --git a/drivers/misc/vmw_vmci/vmci_event.h b/drivers/misc/vmw_vmci/vmci_event.h
new file mode 100644
index 000000000..7df9b1c0a
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_event.h
@@ -0,0 +1,25 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef __VMCI_EVENT_H__
+#define __VMCI_EVENT_H__
+
+#include <linux/vmw_vmci_api.h>
+
+int vmci_event_init(void);
+void vmci_event_exit(void);
+int vmci_event_dispatch(struct vmci_datagram *msg);
+
+#endif /*__VMCI_EVENT_H__ */
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
new file mode 100644
index 000000000..dd20ea4ad
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -0,0 +1,736 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/moduleparam.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
+
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+#define PCI_DEVICE_ID_VMWARE_VMCI	0x0740
+
+#define VMCI_UTIL_NUM_RESOURCES 1
+
+static bool vmci_disable_msi;
+module_param_named(disable_msi, vmci_disable_msi, bool, 0);
+MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
+
+static bool vmci_disable_msix;
+module_param_named(disable_msix, vmci_disable_msix, bool, 0);
+MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
+
+static u32 ctx_update_sub_id = VMCI_INVALID_ID;
+static u32 vm_context_id = VMCI_INVALID_ID;
+
+struct vmci_guest_device {
+	struct device *dev;	/* PCI device we are attached to */
+	void __iomem *iobase;
+
+	bool exclusive_vectors;
+
+	struct tasklet_struct datagram_tasklet;
+	struct tasklet_struct bm_tasklet;
+
+	void *data_buffer;
+	void *notification_bitmap;
+	dma_addr_t notification_base;
+};
+
+/* vmci_dev singleton device and supporting data*/
+struct pci_dev *vmci_pdev;
+static struct vmci_guest_device *vmci_dev_g;
+static DEFINE_SPINLOCK(vmci_dev_spinlock);
+
+static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
+
+bool vmci_guest_code_active(void)
+{
+	return atomic_read(&vmci_num_guest_devices) != 0;
+}
+
+u32 vmci_get_vm_context_id(void)
+{
+	if (vm_context_id == VMCI_INVALID_ID) {
+		struct vmci_datagram get_cid_msg;
+		get_cid_msg.dst =
+		    vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+				     VMCI_GET_CONTEXT_ID);
+		get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
+		get_cid_msg.payload_size = 0;
+		vm_context_id = vmci_send_datagram(&get_cid_msg);
+	}
+	return vm_context_id;
+}
+
+/*
+ * VM to hypervisor call mechanism. We use the standard VMware naming
+ * convention since shared code is calling this function as well.
+ */
+int vmci_send_datagram(struct vmci_datagram *dg)
+{
+	unsigned long flags;
+	int result;
+
+	/* Check args. */
+	if (dg == NULL)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/*
+	 * Need to acquire spinlock on the device because the datagram
+	 * data may be spread over multiple pages and the monitor may
+	 * interleave device user rpc calls from multiple
+	 * VCPUs. Acquiring the spinlock precludes that
+	 * possibility. Disabling interrupts to avoid incoming
+	 * datagrams during a "rep out" and possibly landing up in
+	 * this function.
+	 */
+	spin_lock_irqsave(&vmci_dev_spinlock, flags);
+
+	if (vmci_dev_g) {
+		iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
+			     dg, VMCI_DG_SIZE(dg));
+		result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR);
+	} else {
+		result = VMCI_ERROR_UNAVAILABLE;
+	}
+
+	spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_send_datagram);
+
+/*
+ * Gets called with the new context id if updated or resumed.
+ * Context id.
+ */
+static void vmci_guest_cid_update(u32 sub_id,
+				  const struct vmci_event_data *event_data,
+				  void *client_data)
+{
+	const struct vmci_event_payld_ctx *ev_payload =
+				vmci_event_data_const_payload(event_data);
+
+	if (sub_id != ctx_update_sub_id) {
+		pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
+		return;
+	}
+
+	if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
+		pr_devel("Invalid event data\n");
+		return;
+	}
+
+	pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
+		 vm_context_id, ev_payload->context_id, event_data->event);
+
+	vm_context_id = ev_payload->context_id;
+}
+
+/*
+ * Verify that the host supports the hypercalls we need. If it does not,
+ * try to find fallback hypercalls and use those instead.  Returns
+ * true if required hypercalls (or fallback hypercalls) are
+ * supported by the host, false otherwise.
+ */
+static int vmci_check_host_caps(struct pci_dev *pdev)
+{
+	bool result;
+	struct vmci_resource_query_msg *msg;
+	u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
+				VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
+	struct vmci_datagram *check_msg;
+
+	check_msg = kzalloc(msg_size, GFP_KERNEL);
+	if (!check_msg) {
+		dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
+		return -ENOMEM;
+	}
+
+	check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					  VMCI_RESOURCES_QUERY);
+	check_msg->src = VMCI_ANON_SRC_HANDLE;
+	check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
+	msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
+
+	msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
+	msg->resources[0] = VMCI_GET_CONTEXT_ID;
+
+	/* Checks that hyper calls are supported */
+	result = vmci_send_datagram(check_msg) == 0x01;
+	kfree(check_msg);
+
+	dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
+		__func__, result ? "PASSED" : "FAILED");
+
+	/* We need the vector. There are no fallbacks. */
+	return result ? 0 : -ENXIO;
+}
+
+/*
+ * Reads datagrams from the data in port and dispatches them. We
+ * always start reading datagrams into only the first page of the
+ * datagram buffer. If the datagrams don't fit into one page, we
+ * use the maximum datagram buffer size for the remainder of the
+ * invocation. This is a simple heuristic for not penalizing
+ * small datagrams.
+ *
+ * This function assumes that it has exclusive access to the data
+ * in port for the duration of the call.
+ */
+static void vmci_dispatch_dgs(unsigned long data)
+{
+	struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
+	u8 *dg_in_buffer = vmci_dev->data_buffer;
+	struct vmci_datagram *dg;
+	size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
+	size_t current_dg_in_buffer_size = PAGE_SIZE;
+	size_t remaining_bytes;
+
+	BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
+
+	ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
+		    vmci_dev->data_buffer, current_dg_in_buffer_size);
+	dg = (struct vmci_datagram *)dg_in_buffer;
+	remaining_bytes = current_dg_in_buffer_size;
+
+	while (dg->dst.resource != VMCI_INVALID_ID ||
+	       remaining_bytes > PAGE_SIZE) {
+		unsigned dg_in_size;
+
+		/*
+		 * When the input buffer spans multiple pages, a datagram can
+		 * start on any page boundary in the buffer.
+		 */
+		if (dg->dst.resource == VMCI_INVALID_ID) {
+			dg = (struct vmci_datagram *)roundup(
+				(uintptr_t)dg + 1, PAGE_SIZE);
+			remaining_bytes =
+				(size_t)(dg_in_buffer +
+					 current_dg_in_buffer_size -
+					 (u8 *)dg);
+			continue;
+		}
+
+		dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
+
+		if (dg_in_size <= dg_in_buffer_size) {
+			int result;
+
+			/*
+			 * If the remaining bytes in the datagram
+			 * buffer doesn't contain the complete
+			 * datagram, we first make sure we have enough
+			 * room for it and then we read the reminder
+			 * of the datagram and possibly any following
+			 * datagrams.
+			 */
+			if (dg_in_size > remaining_bytes) {
+				if (remaining_bytes !=
+				    current_dg_in_buffer_size) {
+
+					/*
+					 * We move the partial
+					 * datagram to the front and
+					 * read the reminder of the
+					 * datagram and possibly
+					 * following calls into the
+					 * following bytes.
+					 */
+					memmove(dg_in_buffer, dg_in_buffer +
+						current_dg_in_buffer_size -
+						remaining_bytes,
+						remaining_bytes);
+					dg = (struct vmci_datagram *)
+					    dg_in_buffer;
+				}
+
+				if (current_dg_in_buffer_size !=
+				    dg_in_buffer_size)
+					current_dg_in_buffer_size =
+					    dg_in_buffer_size;
+
+				ioread8_rep(vmci_dev->iobase +
+						VMCI_DATA_IN_ADDR,
+					vmci_dev->data_buffer +
+						remaining_bytes,
+					current_dg_in_buffer_size -
+						remaining_bytes);
+			}
+
+			/*
+			 * We special case event datagrams from the
+			 * hypervisor.
+			 */
+			if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
+			    dg->dst.resource == VMCI_EVENT_HANDLER) {
+				result = vmci_event_dispatch(dg);
+			} else {
+				result = vmci_datagram_invoke_guest_handler(dg);
+			}
+			if (result < VMCI_SUCCESS)
+				dev_dbg(vmci_dev->dev,
+					"Datagram with resource (ID=0x%x) failed (err=%d)\n",
+					 dg->dst.resource, result);
+
+			/* On to the next datagram. */
+			dg = (struct vmci_datagram *)((u8 *)dg +
+						      dg_in_size);
+		} else {
+			size_t bytes_to_skip;
+
+			/*
+			 * Datagram doesn't fit in datagram buffer of maximal
+			 * size. We drop it.
+			 */
+			dev_dbg(vmci_dev->dev,
+				"Failed to receive datagram (size=%u bytes)\n",
+				 dg_in_size);
+
+			bytes_to_skip = dg_in_size - remaining_bytes;
+			if (current_dg_in_buffer_size != dg_in_buffer_size)
+				current_dg_in_buffer_size = dg_in_buffer_size;
+
+			for (;;) {
+				ioread8_rep(vmci_dev->iobase +
+						VMCI_DATA_IN_ADDR,
+					vmci_dev->data_buffer,
+					current_dg_in_buffer_size);
+				if (bytes_to_skip <= current_dg_in_buffer_size)
+					break;
+
+				bytes_to_skip -= current_dg_in_buffer_size;
+			}
+			dg = (struct vmci_datagram *)(dg_in_buffer +
+						      bytes_to_skip);
+		}
+
+		remaining_bytes =
+		    (size_t) (dg_in_buffer + current_dg_in_buffer_size -
+			      (u8 *)dg);
+
+		if (remaining_bytes < VMCI_DG_HEADERSIZE) {
+			/* Get the next batch of datagrams. */
+
+			ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
+				    vmci_dev->data_buffer,
+				    current_dg_in_buffer_size);
+			dg = (struct vmci_datagram *)dg_in_buffer;
+			remaining_bytes = current_dg_in_buffer_size;
+		}
+	}
+}
+
+/*
+ * Scans the notification bitmap for raised flags, clears them
+ * and handles the notifications.
+ */
+static void vmci_process_bitmap(unsigned long data)
+{
+	struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
+
+	if (!dev->notification_bitmap) {
+		dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
+		return;
+	}
+
+	vmci_dbell_scan_notification_entries(dev->notification_bitmap);
+}
+
+/*
+ * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
+ * interrupt (vector VMCI_INTR_DATAGRAM).
+ */
+static irqreturn_t vmci_interrupt(int irq, void *_dev)
+{
+	struct vmci_guest_device *dev = _dev;
+
+	/*
+	 * If we are using MSI-X with exclusive vectors then we simply schedule
+	 * the datagram tasklet, since we know the interrupt was meant for us.
+	 * Otherwise we must read the ICR to determine what to do.
+	 */
+
+	if (dev->exclusive_vectors) {
+		tasklet_schedule(&dev->datagram_tasklet);
+	} else {
+		unsigned int icr;
+
+		/* Acknowledge interrupt and determine what needs doing. */
+		icr = ioread32(dev->iobase + VMCI_ICR_ADDR);
+		if (icr == 0 || icr == ~0)
+			return IRQ_NONE;
+
+		if (icr & VMCI_ICR_DATAGRAM) {
+			tasklet_schedule(&dev->datagram_tasklet);
+			icr &= ~VMCI_ICR_DATAGRAM;
+		}
+
+		if (icr & VMCI_ICR_NOTIFICATION) {
+			tasklet_schedule(&dev->bm_tasklet);
+			icr &= ~VMCI_ICR_NOTIFICATION;
+		}
+
+		if (icr != 0)
+			dev_warn(dev->dev,
+				 "Ignoring unknown interrupt cause (%d)\n",
+				 icr);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
+ * which is for the notification bitmap.  Will only get called if we are
+ * using MSI-X with exclusive vectors.
+ */
+static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
+{
+	struct vmci_guest_device *dev = _dev;
+
+	/* For MSI-X we can just assume it was meant for us. */
+	tasklet_schedule(&dev->bm_tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Most of the initialization at module load time is done here.
+ */
+static int vmci_guest_probe_device(struct pci_dev *pdev,
+				   const struct pci_device_id *id)
+{
+	struct vmci_guest_device *vmci_dev;
+	void __iomem *iobase;
+	unsigned int capabilities;
+	unsigned long cmd;
+	int vmci_err;
+	int error;
+
+	dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
+
+	error = pcim_enable_device(pdev);
+	if (error) {
+		dev_err(&pdev->dev,
+			"Failed to enable VMCI device: %d\n", error);
+		return error;
+	}
+
+	error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME);
+	if (error) {
+		dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
+		return error;
+	}
+
+	iobase = pcim_iomap_table(pdev)[0];
+
+	dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u\n",
+		 (unsigned long)iobase, pdev->irq);
+
+	vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
+	if (!vmci_dev) {
+		dev_err(&pdev->dev,
+			"Can't allocate memory for VMCI device\n");
+		return -ENOMEM;
+	}
+
+	vmci_dev->dev = &pdev->dev;
+	vmci_dev->exclusive_vectors = false;
+	vmci_dev->iobase = iobase;
+
+	tasklet_init(&vmci_dev->datagram_tasklet,
+		     vmci_dispatch_dgs, (unsigned long)vmci_dev);
+	tasklet_init(&vmci_dev->bm_tasklet,
+		     vmci_process_bitmap, (unsigned long)vmci_dev);
+
+	vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
+	if (!vmci_dev->data_buffer) {
+		dev_err(&pdev->dev,
+			"Can't allocate memory for datagram buffer\n");
+		return -ENOMEM;
+	}
+
+	pci_set_master(pdev);	/* To enable queue_pair functionality. */
+
+	/*
+	 * Verify that the VMCI Device supports the capabilities that
+	 * we need. If the device is missing capabilities that we would
+	 * like to use, check for fallback capabilities and use those
+	 * instead (so we can run a new VM on old hosts). Fail the load if
+	 * a required capability is missing and there is no fallback.
+	 *
+	 * Right now, we need datagrams. There are no fallbacks.
+	 */
+	capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR);
+	if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
+		dev_err(&pdev->dev, "Device does not support datagrams\n");
+		error = -ENXIO;
+		goto err_free_data_buffer;
+	}
+
+	/*
+	 * If the hardware supports notifications, we will use that as
+	 * well.
+	 */
+	if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
+		vmci_dev->notification_bitmap = dma_alloc_coherent(
+			&pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
+			GFP_KERNEL);
+		if (!vmci_dev->notification_bitmap) {
+			dev_warn(&pdev->dev,
+				 "Unable to allocate notification bitmap\n");
+		} else {
+			memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE);
+			capabilities |= VMCI_CAPS_NOTIFICATIONS;
+		}
+	}
+
+	dev_info(&pdev->dev, "Using capabilities 0x%x\n", capabilities);
+
+	/* Let the host know which capabilities we intend to use. */
+	iowrite32(capabilities, vmci_dev->iobase + VMCI_CAPS_ADDR);
+
+	/* Set up global device so that we can start sending datagrams */
+	spin_lock_irq(&vmci_dev_spinlock);
+	vmci_dev_g = vmci_dev;
+	vmci_pdev = pdev;
+	spin_unlock_irq(&vmci_dev_spinlock);
+
+	/*
+	 * Register notification bitmap with device if that capability is
+	 * used.
+	 */
+	if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
+		unsigned long bitmap_ppn =
+			vmci_dev->notification_base >> PAGE_SHIFT;
+		if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
+			dev_warn(&pdev->dev,
+				 "VMCI device unable to register notification bitmap with PPN 0x%x\n",
+				 (u32) bitmap_ppn);
+			error = -ENXIO;
+			goto err_remove_vmci_dev_g;
+		}
+	}
+
+	/* Check host capabilities. */
+	error = vmci_check_host_caps(pdev);
+	if (error)
+		goto err_remove_bitmap;
+
+	/* Enable device. */
+
+	/*
+	 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
+	 * update the internal context id when needed.
+	 */
+	vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
+					vmci_guest_cid_update, NULL,
+					&ctx_update_sub_id);
+	if (vmci_err < VMCI_SUCCESS)
+		dev_warn(&pdev->dev,
+			 "Failed to subscribe to event (type=%d): %d\n",
+			 VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
+
+	/*
+	 * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
+	 * legacy interrupts.
+	 */
+	error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
+			PCI_IRQ_MSIX);
+	if (error < 0) {
+		error = pci_alloc_irq_vectors(pdev, 1, 1,
+				PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+		if (error < 0)
+			goto err_remove_bitmap;
+	} else {
+		vmci_dev->exclusive_vectors = true;
+	}
+
+	/*
+	 * Request IRQ for legacy or MSI interrupts, or for first
+	 * MSI-X vector.
+	 */
+	error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt,
+			    IRQF_SHARED, KBUILD_MODNAME, vmci_dev);
+	if (error) {
+		dev_err(&pdev->dev, "Irq %u in use: %d\n",
+			pci_irq_vector(pdev, 0), error);
+		goto err_disable_msi;
+	}
+
+	/*
+	 * For MSI-X with exclusive vectors we need to request an
+	 * interrupt for each vector so that we get a separate
+	 * interrupt handler routine.  This allows us to distinguish
+	 * between the vectors.
+	 */
+	if (vmci_dev->exclusive_vectors) {
+		error = request_irq(pci_irq_vector(pdev, 1),
+				    vmci_interrupt_bm, 0, KBUILD_MODNAME,
+				    vmci_dev);
+		if (error) {
+			dev_err(&pdev->dev,
+				"Failed to allocate irq %u: %d\n",
+				pci_irq_vector(pdev, 1), error);
+			goto err_free_irq;
+		}
+	}
+
+	dev_dbg(&pdev->dev, "Registered device\n");
+
+	atomic_inc(&vmci_num_guest_devices);
+
+	/* Enable specific interrupt bits. */
+	cmd = VMCI_IMR_DATAGRAM;
+	if (capabilities & VMCI_CAPS_NOTIFICATIONS)
+		cmd |= VMCI_IMR_NOTIFICATION;
+	iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR);
+
+	/* Enable interrupts. */
+	iowrite32(VMCI_CONTROL_INT_ENABLE,
+		  vmci_dev->iobase + VMCI_CONTROL_ADDR);
+
+	pci_set_drvdata(pdev, vmci_dev);
+	return 0;
+
+err_free_irq:
+	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
+	tasklet_kill(&vmci_dev->datagram_tasklet);
+	tasklet_kill(&vmci_dev->bm_tasklet);
+
+err_disable_msi:
+	pci_free_irq_vectors(pdev);
+
+	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
+	if (vmci_err < VMCI_SUCCESS)
+		dev_warn(&pdev->dev,
+			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
+			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
+
+err_remove_bitmap:
+	if (vmci_dev->notification_bitmap) {
+		iowrite32(VMCI_CONTROL_RESET,
+			  vmci_dev->iobase + VMCI_CONTROL_ADDR);
+		dma_free_coherent(&pdev->dev, PAGE_SIZE,
+				  vmci_dev->notification_bitmap,
+				  vmci_dev->notification_base);
+	}
+
+err_remove_vmci_dev_g:
+	spin_lock_irq(&vmci_dev_spinlock);
+	vmci_pdev = NULL;
+	vmci_dev_g = NULL;
+	spin_unlock_irq(&vmci_dev_spinlock);
+
+err_free_data_buffer:
+	vfree(vmci_dev->data_buffer);
+
+	/* The rest are managed resources and will be freed by PCI core */
+	return error;
+}
+
+static void vmci_guest_remove_device(struct pci_dev *pdev)
+{
+	struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
+	int vmci_err;
+
+	dev_dbg(&pdev->dev, "Removing device\n");
+
+	atomic_dec(&vmci_num_guest_devices);
+
+	vmci_qp_guest_endpoints_exit();
+
+	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
+	if (vmci_err < VMCI_SUCCESS)
+		dev_warn(&pdev->dev,
+			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
+			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
+
+	spin_lock_irq(&vmci_dev_spinlock);
+	vmci_dev_g = NULL;
+	vmci_pdev = NULL;
+	spin_unlock_irq(&vmci_dev_spinlock);
+
+	dev_dbg(&pdev->dev, "Resetting vmci device\n");
+	iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR);
+
+	/*
+	 * Free IRQ and then disable MSI/MSI-X as appropriate.  For
+	 * MSI-X, we might have multiple vectors, each with their own
+	 * IRQ, which we must free too.
+	 */
+	if (vmci_dev->exclusive_vectors)
+		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
+	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
+	pci_free_irq_vectors(pdev);
+
+	tasklet_kill(&vmci_dev->datagram_tasklet);
+	tasklet_kill(&vmci_dev->bm_tasklet);
+
+	if (vmci_dev->notification_bitmap) {
+		/*
+		 * The device reset above cleared the bitmap state of the
+		 * device, so we can safely free it here.
+		 */
+
+		dma_free_coherent(&pdev->dev, PAGE_SIZE,
+				  vmci_dev->notification_bitmap,
+				  vmci_dev->notification_base);
+	}
+
+	vfree(vmci_dev->data_buffer);
+
+	/* The rest are managed resources and will be freed by PCI core */
+}
+
+static const struct pci_device_id vmci_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
+	{ 0 },
+};
+MODULE_DEVICE_TABLE(pci, vmci_ids);
+
+static struct pci_driver vmci_guest_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= vmci_ids,
+	.probe		= vmci_guest_probe_device,
+	.remove		= vmci_guest_remove_device,
+};
+
+int __init vmci_guest_init(void)
+{
+	return pci_register_driver(&vmci_guest_driver);
+}
+
+void __exit vmci_guest_exit(void)
+{
+	pci_unregister_driver(&vmci_guest_driver);
+}
diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.c b/drivers/misc/vmw_vmci/vmci_handle_array.c
new file mode 100644
index 000000000..917e18a8a
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_handle_array.c
@@ -0,0 +1,154 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/slab.h>
+#include "vmci_handle_array.h"
+
+static size_t handle_arr_calc_size(u32 capacity)
+{
+	return VMCI_HANDLE_ARRAY_HEADER_SIZE +
+	    capacity * sizeof(struct vmci_handle);
+}
+
+struct vmci_handle_arr *vmci_handle_arr_create(u32 capacity, u32 max_capacity)
+{
+	struct vmci_handle_arr *array;
+
+	if (max_capacity == 0 || capacity > max_capacity)
+		return NULL;
+
+	if (capacity == 0)
+		capacity = min((u32)VMCI_HANDLE_ARRAY_DEFAULT_CAPACITY,
+			       max_capacity);
+
+	array = kmalloc(handle_arr_calc_size(capacity), GFP_ATOMIC);
+	if (!array)
+		return NULL;
+
+	array->capacity = capacity;
+	array->max_capacity = max_capacity;
+	array->size = 0;
+
+	return array;
+}
+
+void vmci_handle_arr_destroy(struct vmci_handle_arr *array)
+{
+	kfree(array);
+}
+
+int vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
+				 struct vmci_handle handle)
+{
+	struct vmci_handle_arr *array = *array_ptr;
+
+	if (unlikely(array->size >= array->capacity)) {
+		/* reallocate. */
+		struct vmci_handle_arr *new_array;
+		u32 capacity_bump = min(array->max_capacity - array->capacity,
+					array->capacity);
+		size_t new_size = handle_arr_calc_size(array->capacity +
+						       capacity_bump);
+
+		if (array->size >= array->max_capacity)
+			return VMCI_ERROR_NO_MEM;
+
+		new_array = krealloc(array, new_size, GFP_ATOMIC);
+		if (!new_array)
+			return VMCI_ERROR_NO_MEM;
+
+		new_array->capacity += capacity_bump;
+		*array_ptr = array = new_array;
+	}
+
+	array->entries[array->size] = handle;
+	array->size++;
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Handle that was removed, VMCI_INVALID_HANDLE if entry not found.
+ */
+struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array,
+						struct vmci_handle entry_handle)
+{
+	struct vmci_handle handle = VMCI_INVALID_HANDLE;
+	u32 i;
+
+	for (i = 0; i < array->size; i++) {
+		if (vmci_handle_is_equal(array->entries[i], entry_handle)) {
+			handle = array->entries[i];
+			array->size--;
+			array->entries[i] = array->entries[array->size];
+			array->entries[array->size] = VMCI_INVALID_HANDLE;
+			break;
+		}
+	}
+
+	return handle;
+}
+
+/*
+ * Handle that was removed, VMCI_INVALID_HANDLE if array was empty.
+ */
+struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array)
+{
+	struct vmci_handle handle = VMCI_INVALID_HANDLE;
+
+	if (array->size) {
+		array->size--;
+		handle = array->entries[array->size];
+		array->entries[array->size] = VMCI_INVALID_HANDLE;
+	}
+
+	return handle;
+}
+
+/*
+ * Handle at given index, VMCI_INVALID_HANDLE if invalid index.
+ */
+struct vmci_handle
+vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, u32 index)
+{
+	if (unlikely(index >= array->size))
+		return VMCI_INVALID_HANDLE;
+
+	return array->entries[index];
+}
+
+bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array,
+			       struct vmci_handle entry_handle)
+{
+	u32 i;
+
+	for (i = 0; i < array->size; i++)
+		if (vmci_handle_is_equal(array->entries[i], entry_handle))
+			return true;
+
+	return false;
+}
+
+/*
+ * NULL if the array is empty. Otherwise, a pointer to the array
+ * of VMCI handles in the handle array.
+ */
+struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array)
+{
+	if (array->size)
+		return array->entries;
+
+	return NULL;
+}
diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.h b/drivers/misc/vmw_vmci/vmci_handle_array.h
new file mode 100644
index 000000000..0fc585978
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_handle_array.h
@@ -0,0 +1,61 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_HANDLE_ARRAY_H_
+#define _VMCI_HANDLE_ARRAY_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/limits.h>
+#include <linux/types.h>
+
+struct vmci_handle_arr {
+	u32 capacity;
+	u32 max_capacity;
+	u32 size;
+	u32 pad;
+	struct vmci_handle entries[];
+};
+
+#define VMCI_HANDLE_ARRAY_HEADER_SIZE				\
+	offsetof(struct vmci_handle_arr, entries)
+/* Select a default capacity that results in a 64 byte sized array */
+#define VMCI_HANDLE_ARRAY_DEFAULT_CAPACITY			6
+/* Make sure that the max array size can be expressed by a u32 */
+#define VMCI_HANDLE_ARRAY_MAX_CAPACITY				\
+	((U32_MAX - VMCI_HANDLE_ARRAY_HEADER_SIZE - 1) /	\
+	sizeof(struct vmci_handle))
+
+struct vmci_handle_arr *vmci_handle_arr_create(u32 capacity, u32 max_capacity);
+void vmci_handle_arr_destroy(struct vmci_handle_arr *array);
+int vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
+				 struct vmci_handle handle);
+struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array,
+						struct vmci_handle
+						entry_handle);
+struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array);
+struct vmci_handle
+vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, u32 index);
+bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array,
+			       struct vmci_handle entry_handle);
+struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array);
+
+static inline u32 vmci_handle_arr_get_size(
+	const struct vmci_handle_arr *array)
+{
+	return array->size;
+}
+
+
+#endif /* _VMCI_HANDLE_ARRAY_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
new file mode 100644
index 000000000..83e0c95d2
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -0,0 +1,1036 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/moduleparam.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+#define VMCI_UTIL_NUM_RESOURCES 1
+
+enum {
+	VMCI_NOTIFY_RESOURCE_QUEUE_PAIR = 0,
+	VMCI_NOTIFY_RESOURCE_DOOR_BELL = 1,
+};
+
+enum {
+	VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY = 0,
+	VMCI_NOTIFY_RESOURCE_ACTION_CREATE = 1,
+	VMCI_NOTIFY_RESOURCE_ACTION_DESTROY = 2,
+};
+
+/*
+ * VMCI driver initialization. This block can also be used to
+ * pass initial group membership etc.
+ */
+struct vmci_init_blk {
+	u32 cid;
+	u32 flags;
+};
+
+/* VMCIqueue_pairAllocInfo_VMToVM */
+struct vmci_qp_alloc_info_vmvm {
+	struct vmci_handle handle;
+	u32 peer;
+	u32 flags;
+	u64 produce_size;
+	u64 consume_size;
+	u64 produce_page_file;	  /* User VA. */
+	u64 consume_page_file;	  /* User VA. */
+	u64 produce_page_file_size;  /* Size of the file name array. */
+	u64 consume_page_file_size;  /* Size of the file name array. */
+	s32 result;
+	u32 _pad;
+};
+
+/* VMCISetNotifyInfo: Used to pass notify flag's address to the host driver. */
+struct vmci_set_notify_info {
+	u64 notify_uva;
+	s32 result;
+	u32 _pad;
+};
+
+/*
+ * Per-instance host state
+ */
+struct vmci_host_dev {
+	struct vmci_ctx *context;
+	int user_version;
+	enum vmci_obj_type ct_type;
+	struct mutex lock;  /* Mutex lock for vmci context access */
+};
+
+static struct vmci_ctx *host_context;
+static bool vmci_host_device_initialized;
+static atomic_t vmci_host_active_users = ATOMIC_INIT(0);
+
+/*
+ * Determines whether the VMCI host personality is
+ * available. Since the core functionality of the host driver is
+ * always present, all guests could possibly use the host
+ * personality. However, to minimize the deviation from the
+ * pre-unified driver state of affairs, we only consider the host
+ * device active if there is no active guest device or if there
+ * are VMX'en with active VMCI contexts using the host device.
+ */
+bool vmci_host_code_active(void)
+{
+	return vmci_host_device_initialized &&
+	    (!vmci_guest_code_active() ||
+	     atomic_read(&vmci_host_active_users) > 0);
+}
+
+/*
+ * Called on open of /dev/vmci.
+ */
+static int vmci_host_open(struct inode *inode, struct file *filp)
+{
+	struct vmci_host_dev *vmci_host_dev;
+
+	vmci_host_dev = kzalloc(sizeof(struct vmci_host_dev), GFP_KERNEL);
+	if (vmci_host_dev == NULL)
+		return -ENOMEM;
+
+	vmci_host_dev->ct_type = VMCIOBJ_NOT_SET;
+	mutex_init(&vmci_host_dev->lock);
+	filp->private_data = vmci_host_dev;
+
+	return 0;
+}
+
+/*
+ * Called on close of /dev/vmci, most often when the process
+ * exits.
+ */
+static int vmci_host_close(struct inode *inode, struct file *filp)
+{
+	struct vmci_host_dev *vmci_host_dev = filp->private_data;
+
+	if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
+		vmci_ctx_destroy(vmci_host_dev->context);
+		vmci_host_dev->context = NULL;
+
+		/*
+		 * The number of active contexts is used to track whether any
+		 * VMX'en are using the host personality. It is incremented when
+		 * a context is created through the IOCTL_VMCI_INIT_CONTEXT
+		 * ioctl.
+		 */
+		atomic_dec(&vmci_host_active_users);
+	}
+	vmci_host_dev->ct_type = VMCIOBJ_NOT_SET;
+
+	kfree(vmci_host_dev);
+	filp->private_data = NULL;
+	return 0;
+}
+
+/*
+ * This is used to wake up the VMX when a VMCI call arrives, or
+ * to wake up select() or poll() at the next clock tick.
+ */
+static __poll_t vmci_host_poll(struct file *filp, poll_table *wait)
+{
+	struct vmci_host_dev *vmci_host_dev = filp->private_data;
+	struct vmci_ctx *context = vmci_host_dev->context;
+	__poll_t mask = 0;
+
+	if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
+		/* Check for VMCI calls to this VM context. */
+		if (wait)
+			poll_wait(filp, &context->host_context.wait_queue,
+				  wait);
+
+		spin_lock(&context->lock);
+		if (context->pending_datagrams > 0 ||
+		    vmci_handle_arr_get_size(
+				context->pending_doorbell_array) > 0) {
+			mask = EPOLLIN;
+		}
+		spin_unlock(&context->lock);
+	}
+	return mask;
+}
+
+/*
+ * Copies the handles of a handle array into a user buffer, and
+ * returns the new length in userBufferSize. If the copy to the
+ * user buffer fails, the functions still returns VMCI_SUCCESS,
+ * but retval != 0.
+ */
+static int drv_cp_harray_to_user(void __user *user_buf_uva,
+				 u64 *user_buf_size,
+				 struct vmci_handle_arr *handle_array,
+				 int *retval)
+{
+	u32 array_size = 0;
+	struct vmci_handle *handles;
+
+	if (handle_array)
+		array_size = vmci_handle_arr_get_size(handle_array);
+
+	if (array_size * sizeof(*handles) > *user_buf_size)
+		return VMCI_ERROR_MORE_DATA;
+
+	*user_buf_size = array_size * sizeof(*handles);
+	if (*user_buf_size)
+		*retval = copy_to_user(user_buf_uva,
+				       vmci_handle_arr_get_handles
+				       (handle_array), *user_buf_size);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Sets up a given context for notify to work. Maps the notify
+ * boolean in user VA into kernel space.
+ */
+static int vmci_host_setup_notify(struct vmci_ctx *context,
+				  unsigned long uva)
+{
+	int retval;
+
+	if (context->notify_page) {
+		pr_devel("%s: Notify mechanism is already set up\n", __func__);
+		return VMCI_ERROR_DUPLICATE_ENTRY;
+	}
+
+	/*
+	 * We are using 'bool' internally, but let's make sure we explicit
+	 * about the size.
+	 */
+	BUILD_BUG_ON(sizeof(bool) != sizeof(u8));
+	if (!access_ok(VERIFY_WRITE, (void __user *)uva, sizeof(u8)))
+		return VMCI_ERROR_GENERIC;
+
+	/*
+	 * Lock physical page backing a given user VA.
+	 */
+	retval = get_user_pages_fast(uva, 1, 1, &context->notify_page);
+	if (retval != 1) {
+		context->notify_page = NULL;
+		return VMCI_ERROR_GENERIC;
+	}
+
+	/*
+	 * Map the locked page and set up notify pointer.
+	 */
+	context->notify = kmap(context->notify_page) + (uva & (PAGE_SIZE - 1));
+	vmci_ctx_check_signal_notify(context);
+
+	return VMCI_SUCCESS;
+}
+
+static int vmci_host_get_version(struct vmci_host_dev *vmci_host_dev,
+				 unsigned int cmd, void __user *uptr)
+{
+	if (cmd == IOCTL_VMCI_VERSION2) {
+		int __user *vptr = uptr;
+		if (get_user(vmci_host_dev->user_version, vptr))
+			return -EFAULT;
+	}
+
+	/*
+	 * The basic logic here is:
+	 *
+	 * If the user sends in a version of 0 tell it our version.
+	 * If the user didn't send in a version, tell it our version.
+	 * If the user sent in an old version, tell it -its- version.
+	 * If the user sent in an newer version, tell it our version.
+	 *
+	 * The rationale behind telling the caller its version is that
+	 * Workstation 6.5 required that VMX and VMCI kernel module were
+	 * version sync'd.  All new VMX users will be programmed to
+	 * handle the VMCI kernel module version.
+	 */
+
+	if (vmci_host_dev->user_version > 0 &&
+	    vmci_host_dev->user_version < VMCI_VERSION_HOSTQP) {
+		return vmci_host_dev->user_version;
+	}
+
+	return VMCI_VERSION;
+}
+
+#define vmci_ioctl_err(fmt, ...)	\
+	pr_devel("%s: " fmt, ioctl_name, ##__VA_ARGS__)
+
+static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev,
+				     const char *ioctl_name,
+				     void __user *uptr)
+{
+	struct vmci_init_blk init_block;
+	const struct cred *cred;
+	int retval;
+
+	if (copy_from_user(&init_block, uptr, sizeof(init_block))) {
+		vmci_ioctl_err("error reading init block\n");
+		return -EFAULT;
+	}
+
+	mutex_lock(&vmci_host_dev->lock);
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_NOT_SET) {
+		vmci_ioctl_err("received VMCI init on initialized handle\n");
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (init_block.flags & ~VMCI_PRIVILEGE_FLAG_RESTRICTED) {
+		vmci_ioctl_err("unsupported VMCI restriction flag\n");
+		retval = -EINVAL;
+		goto out;
+	}
+
+	cred = get_current_cred();
+	vmci_host_dev->context = vmci_ctx_create(init_block.cid,
+						 init_block.flags, 0,
+						 vmci_host_dev->user_version,
+						 cred);
+	put_cred(cred);
+	if (IS_ERR(vmci_host_dev->context)) {
+		retval = PTR_ERR(vmci_host_dev->context);
+		vmci_ioctl_err("error initializing context\n");
+		goto out;
+	}
+
+	/*
+	 * Copy cid to userlevel, we do this to allow the VMX
+	 * to enforce its policy on cid generation.
+	 */
+	init_block.cid = vmci_ctx_get_id(vmci_host_dev->context);
+	if (copy_to_user(uptr, &init_block, sizeof(init_block))) {
+		vmci_ctx_destroy(vmci_host_dev->context);
+		vmci_host_dev->context = NULL;
+		vmci_ioctl_err("error writing init block\n");
+		retval = -EFAULT;
+		goto out;
+	}
+
+	vmci_host_dev->ct_type = VMCIOBJ_CONTEXT;
+	atomic_inc(&vmci_host_active_users);
+
+	retval = 0;
+
+out:
+	mutex_unlock(&vmci_host_dev->lock);
+	return retval;
+}
+
+static int vmci_host_do_send_datagram(struct vmci_host_dev *vmci_host_dev,
+				      const char *ioctl_name,
+				      void __user *uptr)
+{
+	struct vmci_datagram_snd_rcv_info send_info;
+	struct vmci_datagram *dg = NULL;
+	u32 cid;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&send_info, uptr, sizeof(send_info)))
+		return -EFAULT;
+
+	if (send_info.len > VMCI_MAX_DG_SIZE) {
+		vmci_ioctl_err("datagram is too big (size=%d)\n",
+			       send_info.len);
+		return -EINVAL;
+	}
+
+	if (send_info.len < sizeof(*dg)) {
+		vmci_ioctl_err("datagram is too small (size=%d)\n",
+			       send_info.len);
+		return -EINVAL;
+	}
+
+	dg = memdup_user((void __user *)(uintptr_t)send_info.addr,
+			 send_info.len);
+	if (IS_ERR(dg)) {
+		vmci_ioctl_err(
+			"cannot allocate memory to dispatch datagram\n");
+		return PTR_ERR(dg);
+	}
+
+	if (VMCI_DG_SIZE(dg) != send_info.len) {
+		vmci_ioctl_err("datagram size mismatch\n");
+		kfree(dg);
+		return -EINVAL;
+	}
+
+	pr_devel("Datagram dst (handle=0x%x:0x%x) src (handle=0x%x:0x%x), payload (size=%llu bytes)\n",
+		 dg->dst.context, dg->dst.resource,
+		 dg->src.context, dg->src.resource,
+		 (unsigned long long)dg->payload_size);
+
+	/* Get source context id. */
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+	send_info.result = vmci_datagram_dispatch(cid, dg, true);
+	kfree(dg);
+
+	return copy_to_user(uptr, &send_info, sizeof(send_info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_receive_datagram(struct vmci_host_dev *vmci_host_dev,
+					 const char *ioctl_name,
+					 void __user *uptr)
+{
+	struct vmci_datagram_snd_rcv_info recv_info;
+	struct vmci_datagram *dg = NULL;
+	int retval;
+	size_t size;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&recv_info, uptr, sizeof(recv_info)))
+		return -EFAULT;
+
+	size = recv_info.len;
+	recv_info.result = vmci_ctx_dequeue_datagram(vmci_host_dev->context,
+						     &size, &dg);
+
+	if (recv_info.result >= VMCI_SUCCESS) {
+		void __user *ubuf = (void __user *)(uintptr_t)recv_info.addr;
+		retval = copy_to_user(ubuf, dg, VMCI_DG_SIZE(dg));
+		kfree(dg);
+		if (retval != 0)
+			return -EFAULT;
+	}
+
+	return copy_to_user(uptr, &recv_info, sizeof(recv_info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_alloc_queuepair(struct vmci_host_dev *vmci_host_dev,
+					const char *ioctl_name,
+					void __user *uptr)
+{
+	struct vmci_handle handle;
+	int vmci_status;
+	int __user *retptr;
+	u32 cid;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+
+	if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
+		struct vmci_qp_alloc_info_vmvm alloc_info;
+		struct vmci_qp_alloc_info_vmvm __user *info = uptr;
+
+		if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info)))
+			return -EFAULT;
+
+		handle = alloc_info.handle;
+		retptr = &info->result;
+
+		vmci_status = vmci_qp_broker_alloc(alloc_info.handle,
+						alloc_info.peer,
+						alloc_info.flags,
+						VMCI_NO_PRIVILEGE_FLAGS,
+						alloc_info.produce_size,
+						alloc_info.consume_size,
+						NULL,
+						vmci_host_dev->context);
+
+		if (vmci_status == VMCI_SUCCESS)
+			vmci_status = VMCI_SUCCESS_QUEUEPAIR_CREATE;
+	} else {
+		struct vmci_qp_alloc_info alloc_info;
+		struct vmci_qp_alloc_info __user *info = uptr;
+		struct vmci_qp_page_store page_store;
+
+		if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info)))
+			return -EFAULT;
+
+		handle = alloc_info.handle;
+		retptr = &info->result;
+
+		page_store.pages = alloc_info.ppn_va;
+		page_store.len = alloc_info.num_ppns;
+
+		vmci_status = vmci_qp_broker_alloc(alloc_info.handle,
+						alloc_info.peer,
+						alloc_info.flags,
+						VMCI_NO_PRIVILEGE_FLAGS,
+						alloc_info.produce_size,
+						alloc_info.consume_size,
+						&page_store,
+						vmci_host_dev->context);
+	}
+
+	if (put_user(vmci_status, retptr)) {
+		if (vmci_status >= VMCI_SUCCESS) {
+			vmci_status = vmci_qp_broker_detach(handle,
+							vmci_host_dev->context);
+		}
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int vmci_host_do_queuepair_setva(struct vmci_host_dev *vmci_host_dev,
+					const char *ioctl_name,
+					void __user *uptr)
+{
+	struct vmci_qp_set_va_info set_va_info;
+	struct vmci_qp_set_va_info __user *info = uptr;
+	s32 result;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
+		vmci_ioctl_err("is not allowed\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&set_va_info, uptr, sizeof(set_va_info)))
+		return -EFAULT;
+
+	if (set_va_info.va) {
+		/*
+		 * VMX is passing down a new VA for the queue
+		 * pair mapping.
+		 */
+		result = vmci_qp_broker_map(set_va_info.handle,
+					    vmci_host_dev->context,
+					    set_va_info.va);
+	} else {
+		/*
+		 * The queue pair is about to be unmapped by
+		 * the VMX.
+		 */
+		result = vmci_qp_broker_unmap(set_va_info.handle,
+					 vmci_host_dev->context, 0);
+	}
+
+	return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_queuepair_setpf(struct vmci_host_dev *vmci_host_dev,
+					const char *ioctl_name,
+					void __user *uptr)
+{
+	struct vmci_qp_page_file_info page_file_info;
+	struct vmci_qp_page_file_info __user *info = uptr;
+	s32 result;
+
+	if (vmci_host_dev->user_version < VMCI_VERSION_HOSTQP ||
+	    vmci_host_dev->user_version >= VMCI_VERSION_NOVMVM) {
+		vmci_ioctl_err("not supported on this VMX (version=%d)\n",
+			       vmci_host_dev->user_version);
+		return -EINVAL;
+	}
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&page_file_info, uptr, sizeof(*info)))
+		return -EFAULT;
+
+	/*
+	 * Communicate success pre-emptively to the caller.  Note that the
+	 * basic premise is that it is incumbent upon the caller not to look at
+	 * the info.result field until after the ioctl() returns.  And then,
+	 * only if the ioctl() result indicates no error.  We send up the
+	 * SUCCESS status before calling SetPageStore() store because failing
+	 * to copy up the result code means unwinding the SetPageStore().
+	 *
+	 * It turns out the logic to unwind a SetPageStore() opens a can of
+	 * worms.  For example, if a host had created the queue_pair and a
+	 * guest attaches and SetPageStore() is successful but writing success
+	 * fails, then ... the host has to be stopped from writing (anymore)
+	 * data into the queue_pair.  That means an additional test in the
+	 * VMCI_Enqueue() code path.  Ugh.
+	 */
+
+	if (put_user(VMCI_SUCCESS, &info->result)) {
+		/*
+		 * In this case, we can't write a result field of the
+		 * caller's info block.  So, we don't even try to
+		 * SetPageStore().
+		 */
+		return -EFAULT;
+	}
+
+	result = vmci_qp_broker_set_page_store(page_file_info.handle,
+						page_file_info.produce_va,
+						page_file_info.consume_va,
+						vmci_host_dev->context);
+	if (result < VMCI_SUCCESS) {
+		if (put_user(result, &info->result)) {
+			/*
+			 * Note that in this case the SetPageStore()
+			 * call failed but we were unable to
+			 * communicate that to the caller (because the
+			 * copy_to_user() call failed).  So, if we
+			 * simply return an error (in this case
+			 * -EFAULT) then the caller will know that the
+			 *  SetPageStore failed even though we couldn't
+			 *  put the result code in the result field and
+			 *  indicate exactly why it failed.
+			 *
+			 * That says nothing about the issue where we
+			 * were once able to write to the caller's info
+			 * memory and now can't.  Something more
+			 * serious is probably going on than the fact
+			 * that SetPageStore() didn't work.
+			 */
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static int vmci_host_do_qp_detach(struct vmci_host_dev *vmci_host_dev,
+				  const char *ioctl_name,
+				  void __user *uptr)
+{
+	struct vmci_qp_dtch_info detach_info;
+	struct vmci_qp_dtch_info __user *info = uptr;
+	s32 result;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&detach_info, uptr, sizeof(detach_info)))
+		return -EFAULT;
+
+	result = vmci_qp_broker_detach(detach_info.handle,
+				       vmci_host_dev->context);
+	if (result == VMCI_SUCCESS &&
+	    vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
+		result = VMCI_SUCCESS_LAST_DETACH;
+	}
+
+	return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_add_notify(struct vmci_host_dev *vmci_host_dev,
+				       const char *ioctl_name,
+				       void __user *uptr)
+{
+	struct vmci_ctx_info ar_info;
+	struct vmci_ctx_info __user *info = uptr;
+	s32 result;
+	u32 cid;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&ar_info, uptr, sizeof(ar_info)))
+		return -EFAULT;
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+	result = vmci_ctx_add_notification(cid, ar_info.remote_cid);
+
+	return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_remove_notify(struct vmci_host_dev *vmci_host_dev,
+					  const char *ioctl_name,
+					  void __user *uptr)
+{
+	struct vmci_ctx_info ar_info;
+	struct vmci_ctx_info __user *info = uptr;
+	u32 cid;
+	int result;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&ar_info, uptr, sizeof(ar_info)))
+		return -EFAULT;
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+	result = vmci_ctx_remove_notification(cid,
+					      ar_info.remote_cid);
+
+	return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_get_cpt_state(struct vmci_host_dev *vmci_host_dev,
+					  const char *ioctl_name,
+					  void __user *uptr)
+{
+	struct vmci_ctx_chkpt_buf_info get_info;
+	u32 cid;
+	void *cpt_buf;
+	int retval;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&get_info, uptr, sizeof(get_info)))
+		return -EFAULT;
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+	get_info.result = vmci_ctx_get_chkpt_state(cid, get_info.cpt_type,
+						&get_info.buf_size, &cpt_buf);
+	if (get_info.result == VMCI_SUCCESS && get_info.buf_size) {
+		void __user *ubuf = (void __user *)(uintptr_t)get_info.cpt_buf;
+		retval = copy_to_user(ubuf, cpt_buf, get_info.buf_size);
+		kfree(cpt_buf);
+
+		if (retval)
+			return -EFAULT;
+	}
+
+	return copy_to_user(uptr, &get_info, sizeof(get_info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_set_cpt_state(struct vmci_host_dev *vmci_host_dev,
+					  const char *ioctl_name,
+					  void __user *uptr)
+{
+	struct vmci_ctx_chkpt_buf_info set_info;
+	u32 cid;
+	void *cpt_buf;
+	int retval;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&set_info, uptr, sizeof(set_info)))
+		return -EFAULT;
+
+	cpt_buf = kmalloc(set_info.buf_size, GFP_KERNEL);
+	if (!cpt_buf) {
+		vmci_ioctl_err(
+			"cannot allocate memory to set cpt state (type=%d)\n",
+			set_info.cpt_type);
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(cpt_buf, (void __user *)(uintptr_t)set_info.cpt_buf,
+			   set_info.buf_size)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+	set_info.result = vmci_ctx_set_chkpt_state(cid, set_info.cpt_type,
+						   set_info.buf_size, cpt_buf);
+
+	retval = copy_to_user(uptr, &set_info, sizeof(set_info)) ? -EFAULT : 0;
+
+out:
+	kfree(cpt_buf);
+	return retval;
+}
+
+static int vmci_host_do_get_context_id(struct vmci_host_dev *vmci_host_dev,
+				       const char *ioctl_name,
+				       void __user *uptr)
+{
+	u32 __user *u32ptr = uptr;
+
+	return put_user(VMCI_HOST_CONTEXT_ID, u32ptr) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_set_notify(struct vmci_host_dev *vmci_host_dev,
+				   const char *ioctl_name,
+				   void __user *uptr)
+{
+	struct vmci_set_notify_info notify_info;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&notify_info, uptr, sizeof(notify_info)))
+		return -EFAULT;
+
+	if (notify_info.notify_uva) {
+		notify_info.result =
+			vmci_host_setup_notify(vmci_host_dev->context,
+					       notify_info.notify_uva);
+	} else {
+		vmci_ctx_unset_notify(vmci_host_dev->context);
+		notify_info.result = VMCI_SUCCESS;
+	}
+
+	return copy_to_user(uptr, &notify_info, sizeof(notify_info)) ?
+		-EFAULT : 0;
+}
+
+static int vmci_host_do_notify_resource(struct vmci_host_dev *vmci_host_dev,
+					const char *ioctl_name,
+					void __user *uptr)
+{
+	struct vmci_dbell_notify_resource_info info;
+	u32 cid;
+
+	if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) {
+		vmci_ioctl_err("invalid for current VMX versions\n");
+		return -EINVAL;
+	}
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&info, uptr, sizeof(info)))
+		return -EFAULT;
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+
+	switch (info.action) {
+	case VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY:
+		if (info.resource == VMCI_NOTIFY_RESOURCE_DOOR_BELL) {
+			u32 flags = VMCI_NO_PRIVILEGE_FLAGS;
+			info.result = vmci_ctx_notify_dbell(cid, info.handle,
+							    flags);
+		} else {
+			info.result = VMCI_ERROR_UNAVAILABLE;
+		}
+		break;
+
+	case VMCI_NOTIFY_RESOURCE_ACTION_CREATE:
+		info.result = vmci_ctx_dbell_create(cid, info.handle);
+		break;
+
+	case VMCI_NOTIFY_RESOURCE_ACTION_DESTROY:
+		info.result = vmci_ctx_dbell_destroy(cid, info.handle);
+		break;
+
+	default:
+		vmci_ioctl_err("got unknown action (action=%d)\n",
+			       info.action);
+		info.result = VMCI_ERROR_INVALID_ARGS;
+	}
+
+	return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_recv_notifications(struct vmci_host_dev *vmci_host_dev,
+					   const char *ioctl_name,
+					   void __user *uptr)
+{
+	struct vmci_ctx_notify_recv_info info;
+	struct vmci_handle_arr *db_handle_array;
+	struct vmci_handle_arr *qp_handle_array;
+	void __user *ubuf;
+	u32 cid;
+	int retval = 0;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) {
+		vmci_ioctl_err("not supported for the current vmx version\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&info, uptr, sizeof(info)))
+		return -EFAULT;
+
+	if ((info.db_handle_buf_size && !info.db_handle_buf_uva) ||
+	    (info.qp_handle_buf_size && !info.qp_handle_buf_uva)) {
+		return -EINVAL;
+	}
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+
+	info.result = vmci_ctx_rcv_notifications_get(cid,
+				&db_handle_array, &qp_handle_array);
+	if (info.result != VMCI_SUCCESS)
+		return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0;
+
+	ubuf = (void __user *)(uintptr_t)info.db_handle_buf_uva;
+	info.result = drv_cp_harray_to_user(ubuf, &info.db_handle_buf_size,
+					    db_handle_array, &retval);
+	if (info.result == VMCI_SUCCESS && !retval) {
+		ubuf = (void __user *)(uintptr_t)info.qp_handle_buf_uva;
+		info.result = drv_cp_harray_to_user(ubuf,
+						    &info.qp_handle_buf_size,
+						    qp_handle_array, &retval);
+	}
+
+	if (!retval && copy_to_user(uptr, &info, sizeof(info)))
+		retval = -EFAULT;
+
+	vmci_ctx_rcv_notifications_release(cid,
+				db_handle_array, qp_handle_array,
+				info.result == VMCI_SUCCESS && !retval);
+
+	return retval;
+}
+
+static long vmci_host_unlocked_ioctl(struct file *filp,
+				     unsigned int iocmd, unsigned long ioarg)
+{
+#define VMCI_DO_IOCTL(ioctl_name, ioctl_fn) do {			\
+		char *name = __stringify(IOCTL_VMCI_ ## ioctl_name);	\
+		return vmci_host_do_ ## ioctl_fn(			\
+			vmci_host_dev, name, uptr);			\
+	} while (0)
+
+	struct vmci_host_dev *vmci_host_dev = filp->private_data;
+	void __user *uptr = (void __user *)ioarg;
+
+	switch (iocmd) {
+	case IOCTL_VMCI_INIT_CONTEXT:
+		VMCI_DO_IOCTL(INIT_CONTEXT, init_context);
+	case IOCTL_VMCI_DATAGRAM_SEND:
+		VMCI_DO_IOCTL(DATAGRAM_SEND, send_datagram);
+	case IOCTL_VMCI_DATAGRAM_RECEIVE:
+		VMCI_DO_IOCTL(DATAGRAM_RECEIVE, receive_datagram);
+	case IOCTL_VMCI_QUEUEPAIR_ALLOC:
+		VMCI_DO_IOCTL(QUEUEPAIR_ALLOC, alloc_queuepair);
+	case IOCTL_VMCI_QUEUEPAIR_SETVA:
+		VMCI_DO_IOCTL(QUEUEPAIR_SETVA, queuepair_setva);
+	case IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE:
+		VMCI_DO_IOCTL(QUEUEPAIR_SETPAGEFILE, queuepair_setpf);
+	case IOCTL_VMCI_QUEUEPAIR_DETACH:
+		VMCI_DO_IOCTL(QUEUEPAIR_DETACH, qp_detach);
+	case IOCTL_VMCI_CTX_ADD_NOTIFICATION:
+		VMCI_DO_IOCTL(CTX_ADD_NOTIFICATION, ctx_add_notify);
+	case IOCTL_VMCI_CTX_REMOVE_NOTIFICATION:
+		VMCI_DO_IOCTL(CTX_REMOVE_NOTIFICATION, ctx_remove_notify);
+	case IOCTL_VMCI_CTX_GET_CPT_STATE:
+		VMCI_DO_IOCTL(CTX_GET_CPT_STATE, ctx_get_cpt_state);
+	case IOCTL_VMCI_CTX_SET_CPT_STATE:
+		VMCI_DO_IOCTL(CTX_SET_CPT_STATE, ctx_set_cpt_state);
+	case IOCTL_VMCI_GET_CONTEXT_ID:
+		VMCI_DO_IOCTL(GET_CONTEXT_ID, get_context_id);
+	case IOCTL_VMCI_SET_NOTIFY:
+		VMCI_DO_IOCTL(SET_NOTIFY, set_notify);
+	case IOCTL_VMCI_NOTIFY_RESOURCE:
+		VMCI_DO_IOCTL(NOTIFY_RESOURCE, notify_resource);
+	case IOCTL_VMCI_NOTIFICATIONS_RECEIVE:
+		VMCI_DO_IOCTL(NOTIFICATIONS_RECEIVE, recv_notifications);
+
+	case IOCTL_VMCI_VERSION:
+	case IOCTL_VMCI_VERSION2:
+		return vmci_host_get_version(vmci_host_dev, iocmd, uptr);
+
+	default:
+		pr_devel("%s: Unknown ioctl (iocmd=%d)\n", __func__, iocmd);
+		return -EINVAL;
+	}
+
+#undef VMCI_DO_IOCTL
+}
+
+static const struct file_operations vmuser_fops = {
+	.owner		= THIS_MODULE,
+	.open		= vmci_host_open,
+	.release	= vmci_host_close,
+	.poll		= vmci_host_poll,
+	.unlocked_ioctl	= vmci_host_unlocked_ioctl,
+	.compat_ioctl	= vmci_host_unlocked_ioctl,
+};
+
+static struct miscdevice vmci_host_miscdev = {
+	 .name = "vmci",
+	 .minor = MISC_DYNAMIC_MINOR,
+	 .fops = &vmuser_fops,
+};
+
+int __init vmci_host_init(void)
+{
+	int error;
+
+	host_context = vmci_ctx_create(VMCI_HOST_CONTEXT_ID,
+					VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
+					-1, VMCI_VERSION, NULL);
+	if (IS_ERR(host_context)) {
+		error = PTR_ERR(host_context);
+		pr_warn("Failed to initialize VMCIContext (error%d)\n",
+			error);
+		return error;
+	}
+
+	error = misc_register(&vmci_host_miscdev);
+	if (error) {
+		pr_warn("Module registration error (name=%s, major=%d, minor=%d, err=%d)\n",
+			vmci_host_miscdev.name,
+			MISC_MAJOR, vmci_host_miscdev.minor,
+			error);
+		pr_warn("Unable to initialize host personality\n");
+		vmci_ctx_destroy(host_context);
+		return error;
+	}
+
+	pr_info("VMCI host device registered (name=%s, major=%d, minor=%d)\n",
+		vmci_host_miscdev.name, MISC_MAJOR, vmci_host_miscdev.minor);
+
+	vmci_host_device_initialized = true;
+	return 0;
+}
+
+void __exit vmci_host_exit(void)
+{
+	vmci_host_device_initialized = false;
+
+	misc_deregister(&vmci_host_miscdev);
+	vmci_ctx_destroy(host_context);
+	vmci_qp_broker_exit();
+
+	pr_debug("VMCI host driver module unloaded\n");
+}
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
new file mode 100644
index 000000000..db433d285
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -0,0 +1,3272 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pagemap.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/wait.h>
+#include <linux/vmalloc.h>
+#include <linux/skbuff.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_route.h"
+
+/*
+ * In the following, we will distinguish between two kinds of VMX processes -
+ * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
+ * VMCI page files in the VMX and supporting VM to VM communication and the
+ * newer ones that use the guest memory directly. We will in the following
+ * refer to the older VMX versions as old-style VMX'en, and the newer ones as
+ * new-style VMX'en.
+ *
+ * The state transition datagram is as follows (the VMCIQPB_ prefix has been
+ * removed for readability) - see below for more details on the transtions:
+ *
+ *            --------------  NEW  -------------
+ *            |                                |
+ *           \_/                              \_/
+ *     CREATED_NO_MEM <-----------------> CREATED_MEM
+ *            |    |                           |
+ *            |    o-----------------------o   |
+ *            |                            |   |
+ *           \_/                          \_/ \_/
+ *     ATTACHED_NO_MEM <----------------> ATTACHED_MEM
+ *            |                            |   |
+ *            |     o----------------------o   |
+ *            |     |                          |
+ *           \_/   \_/                        \_/
+ *     SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
+ *            |                                |
+ *            |                                |
+ *            -------------> gone <-------------
+ *
+ * In more detail. When a VMCI queue pair is first created, it will be in the
+ * VMCIQPB_NEW state. It will then move into one of the following states:
+ *
+ * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
+ *
+ *     - the created was performed by a host endpoint, in which case there is
+ *       no backing memory yet.
+ *
+ *     - the create was initiated by an old-style VMX, that uses
+ *       vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
+ *       a later point in time. This state can be distinguished from the one
+ *       above by the context ID of the creator. A host side is not allowed to
+ *       attach until the page store has been set.
+ *
+ * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
+ *     is created by a VMX using the queue pair device backend that
+ *     sets the UVAs of the queue pair immediately and stores the
+ *     information for later attachers. At this point, it is ready for
+ *     the host side to attach to it.
+ *
+ * Once the queue pair is in one of the created states (with the exception of
+ * the case mentioned for older VMX'en above), it is possible to attach to the
+ * queue pair. Again we have two new states possible:
+ *
+ * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
+ *   paths:
+ *
+ *     - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
+ *       pair, and attaches to a queue pair previously created by the host side.
+ *
+ *     - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
+ *       already created by a guest.
+ *
+ *     - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
+ *       vmci_qp_broker_set_page_store (see below).
+ *
+ * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
+ *     VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
+ *     bring the queue pair into this state. Once vmci_qp_broker_set_page_store
+ *     is called to register the user memory, the VMCIQPB_ATTACH_MEM state
+ *     will be entered.
+ *
+ * From the attached queue pair, the queue pair can enter the shutdown states
+ * when either side of the queue pair detaches. If the guest side detaches
+ * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
+ * the content of the queue pair will no longer be available. If the host
+ * side detaches first, the queue pair will either enter the
+ * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
+ * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
+ * (e.g., the host detaches while a guest is stunned).
+ *
+ * New-style VMX'en will also unmap guest memory, if the guest is
+ * quiesced, e.g., during a snapshot operation. In that case, the guest
+ * memory will no longer be available, and the queue pair will transition from
+ * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
+ * in which case the queue pair will transition from the *_NO_MEM state at that
+ * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
+ * since the peer may have either attached or detached in the meantime. The
+ * values are laid out such that ++ on a state will move from a *_NO_MEM to a
+ * *_MEM state, and vice versa.
+ */
+
+/* The Kernel specific component of the struct vmci_queue structure. */
+struct vmci_queue_kern_if {
+	struct mutex __mutex;	/* Protects the queue. */
+	struct mutex *mutex;	/* Shared by producer and consumer queues. */
+	size_t num_pages;	/* Number of pages incl. header. */
+	bool host;		/* Host or guest? */
+	union {
+		struct {
+			dma_addr_t *pas;
+			void **vas;
+		} g;		/* Used by the guest. */
+		struct {
+			struct page **page;
+			struct page **header_page;
+		} h;		/* Used by the host. */
+	} u;
+};
+
+/*
+ * This structure is opaque to the clients.
+ */
+struct vmci_qp {
+	struct vmci_handle handle;
+	struct vmci_queue *produce_q;
+	struct vmci_queue *consume_q;
+	u64 produce_q_size;
+	u64 consume_q_size;
+	u32 peer;
+	u32 flags;
+	u32 priv_flags;
+	bool guest_endpoint;
+	unsigned int blocked;
+	unsigned int generation;
+	wait_queue_head_t event;
+};
+
+enum qp_broker_state {
+	VMCIQPB_NEW,
+	VMCIQPB_CREATED_NO_MEM,
+	VMCIQPB_CREATED_MEM,
+	VMCIQPB_ATTACHED_NO_MEM,
+	VMCIQPB_ATTACHED_MEM,
+	VMCIQPB_SHUTDOWN_NO_MEM,
+	VMCIQPB_SHUTDOWN_MEM,
+	VMCIQPB_GONE
+};
+
+#define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
+				     _qpb->state == VMCIQPB_ATTACHED_MEM || \
+				     _qpb->state == VMCIQPB_SHUTDOWN_MEM)
+
+/*
+ * In the queue pair broker, we always use the guest point of view for
+ * the produce and consume queue values and references, e.g., the
+ * produce queue size stored is the guests produce queue size. The
+ * host endpoint will need to swap these around. The only exception is
+ * the local queue pairs on the host, in which case the host endpoint
+ * that creates the queue pair will have the right orientation, and
+ * the attaching host endpoint will need to swap.
+ */
+struct qp_entry {
+	struct list_head list_item;
+	struct vmci_handle handle;
+	u32 peer;
+	u32 flags;
+	u64 produce_size;
+	u64 consume_size;
+	u32 ref_count;
+};
+
+struct qp_broker_entry {
+	struct vmci_resource resource;
+	struct qp_entry qp;
+	u32 create_id;
+	u32 attach_id;
+	enum qp_broker_state state;
+	bool require_trusted_attach;
+	bool created_by_trusted;
+	bool vmci_page_files;	/* Created by VMX using VMCI page files */
+	struct vmci_queue *produce_q;
+	struct vmci_queue *consume_q;
+	struct vmci_queue_header saved_produce_q;
+	struct vmci_queue_header saved_consume_q;
+	vmci_event_release_cb wakeup_cb;
+	void *client_data;
+	void *local_mem;	/* Kernel memory for local queue pair */
+};
+
+struct qp_guest_endpoint {
+	struct vmci_resource resource;
+	struct qp_entry qp;
+	u64 num_ppns;
+	void *produce_q;
+	void *consume_q;
+	struct ppn_set ppn_set;
+};
+
+struct qp_list {
+	struct list_head head;
+	struct mutex mutex;	/* Protect queue list. */
+};
+
+static struct qp_list qp_broker_list = {
+	.head = LIST_HEAD_INIT(qp_broker_list.head),
+	.mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
+};
+
+static struct qp_list qp_guest_endpoints = {
+	.head = LIST_HEAD_INIT(qp_guest_endpoints.head),
+	.mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
+};
+
+#define INVALID_VMCI_GUEST_MEM_ID  0
+#define QPE_NUM_PAGES(_QPE) ((u32) \
+			     (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
+			      DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
+
+
+/*
+ * Frees kernel VA space for a given queue and its queue header, and
+ * frees physical data pages.
+ */
+static void qp_free_queue(void *q, u64 size)
+{
+	struct vmci_queue *queue = q;
+
+	if (queue) {
+		u64 i;
+
+		/* Given size does not include header, so add in a page here. */
+		for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE) + 1; i++) {
+			dma_free_coherent(&vmci_pdev->dev, PAGE_SIZE,
+					  queue->kernel_if->u.g.vas[i],
+					  queue->kernel_if->u.g.pas[i]);
+		}
+
+		vfree(queue);
+	}
+}
+
+/*
+ * Allocates kernel queue pages of specified size with IOMMU mappings,
+ * plus space for the queue structure/kernel interface and the queue
+ * header.
+ */
+static void *qp_alloc_queue(u64 size, u32 flags)
+{
+	u64 i;
+	struct vmci_queue *queue;
+	size_t pas_size;
+	size_t vas_size;
+	size_t queue_size = sizeof(*queue) + sizeof(*queue->kernel_if);
+	u64 num_pages;
+
+	if (size > SIZE_MAX - PAGE_SIZE)
+		return NULL;
+	num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
+	if (num_pages >
+		 (SIZE_MAX - queue_size) /
+		 (sizeof(*queue->kernel_if->u.g.pas) +
+		  sizeof(*queue->kernel_if->u.g.vas)))
+		return NULL;
+
+	pas_size = num_pages * sizeof(*queue->kernel_if->u.g.pas);
+	vas_size = num_pages * sizeof(*queue->kernel_if->u.g.vas);
+	queue_size += pas_size + vas_size;
+
+	queue = vmalloc(queue_size);
+	if (!queue)
+		return NULL;
+
+	queue->q_header = NULL;
+	queue->saved_header = NULL;
+	queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
+	queue->kernel_if->mutex = NULL;
+	queue->kernel_if->num_pages = num_pages;
+	queue->kernel_if->u.g.pas = (dma_addr_t *)(queue->kernel_if + 1);
+	queue->kernel_if->u.g.vas =
+		(void **)((u8 *)queue->kernel_if->u.g.pas + pas_size);
+	queue->kernel_if->host = false;
+
+	for (i = 0; i < num_pages; i++) {
+		queue->kernel_if->u.g.vas[i] =
+			dma_alloc_coherent(&vmci_pdev->dev, PAGE_SIZE,
+					   &queue->kernel_if->u.g.pas[i],
+					   GFP_KERNEL);
+		if (!queue->kernel_if->u.g.vas[i]) {
+			/* Size excl. the header. */
+			qp_free_queue(queue, i * PAGE_SIZE);
+			return NULL;
+		}
+	}
+
+	/* Queue header is the first page. */
+	queue->q_header = queue->kernel_if->u.g.vas[0];
+
+	return queue;
+}
+
+/*
+ * Copies from a given buffer or iovector to a VMCI Queue.  Uses
+ * kmap()/kunmap() to dynamically map/unmap required portions of the queue
+ * by traversing the offset -> page translation structure for the queue.
+ * Assumes that offset + size does not wrap around in the queue.
+ */
+static int qp_memcpy_to_queue_iter(struct vmci_queue *queue,
+				  u64 queue_offset,
+				  struct iov_iter *from,
+				  size_t size)
+{
+	struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
+	size_t bytes_copied = 0;
+
+	while (bytes_copied < size) {
+		const u64 page_index =
+			(queue_offset + bytes_copied) / PAGE_SIZE;
+		const size_t page_offset =
+		    (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+		void *va;
+		size_t to_copy;
+
+		if (kernel_if->host)
+			va = kmap(kernel_if->u.h.page[page_index]);
+		else
+			va = kernel_if->u.g.vas[page_index + 1];
+			/* Skip header. */
+
+		if (size - bytes_copied > PAGE_SIZE - page_offset)
+			/* Enough payload to fill up from this page. */
+			to_copy = PAGE_SIZE - page_offset;
+		else
+			to_copy = size - bytes_copied;
+
+		if (!copy_from_iter_full((u8 *)va + page_offset, to_copy,
+					 from)) {
+			if (kernel_if->host)
+				kunmap(kernel_if->u.h.page[page_index]);
+			return VMCI_ERROR_INVALID_ARGS;
+		}
+		bytes_copied += to_copy;
+		if (kernel_if->host)
+			kunmap(kernel_if->u.h.page[page_index]);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Copies to a given buffer or iovector from a VMCI Queue.  Uses
+ * kmap()/kunmap() to dynamically map/unmap required portions of the queue
+ * by traversing the offset -> page translation structure for the queue.
+ * Assumes that offset + size does not wrap around in the queue.
+ */
+static int qp_memcpy_from_queue_iter(struct iov_iter *to,
+				    const struct vmci_queue *queue,
+				    u64 queue_offset, size_t size)
+{
+	struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
+	size_t bytes_copied = 0;
+
+	while (bytes_copied < size) {
+		const u64 page_index =
+			(queue_offset + bytes_copied) / PAGE_SIZE;
+		const size_t page_offset =
+		    (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+		void *va;
+		size_t to_copy;
+		int err;
+
+		if (kernel_if->host)
+			va = kmap(kernel_if->u.h.page[page_index]);
+		else
+			va = kernel_if->u.g.vas[page_index + 1];
+			/* Skip header. */
+
+		if (size - bytes_copied > PAGE_SIZE - page_offset)
+			/* Enough payload to fill up this page. */
+			to_copy = PAGE_SIZE - page_offset;
+		else
+			to_copy = size - bytes_copied;
+
+		err = copy_to_iter((u8 *)va + page_offset, to_copy, to);
+		if (err != to_copy) {
+			if (kernel_if->host)
+				kunmap(kernel_if->u.h.page[page_index]);
+			return VMCI_ERROR_INVALID_ARGS;
+		}
+		bytes_copied += to_copy;
+		if (kernel_if->host)
+			kunmap(kernel_if->u.h.page[page_index]);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Allocates two list of PPNs --- one for the pages in the produce queue,
+ * and the other for the pages in the consume queue. Intializes the list
+ * of PPNs with the page frame numbers of the KVA for the two queues (and
+ * the queue headers).
+ */
+static int qp_alloc_ppn_set(void *prod_q,
+			    u64 num_produce_pages,
+			    void *cons_q,
+			    u64 num_consume_pages, struct ppn_set *ppn_set)
+{
+	u32 *produce_ppns;
+	u32 *consume_ppns;
+	struct vmci_queue *produce_q = prod_q;
+	struct vmci_queue *consume_q = cons_q;
+	u64 i;
+
+	if (!produce_q || !num_produce_pages || !consume_q ||
+	    !num_consume_pages || !ppn_set)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (ppn_set->initialized)
+		return VMCI_ERROR_ALREADY_EXISTS;
+
+	produce_ppns =
+	    kmalloc_array(num_produce_pages, sizeof(*produce_ppns),
+			  GFP_KERNEL);
+	if (!produce_ppns)
+		return VMCI_ERROR_NO_MEM;
+
+	consume_ppns =
+	    kmalloc_array(num_consume_pages, sizeof(*consume_ppns),
+			  GFP_KERNEL);
+	if (!consume_ppns) {
+		kfree(produce_ppns);
+		return VMCI_ERROR_NO_MEM;
+	}
+
+	for (i = 0; i < num_produce_pages; i++) {
+		unsigned long pfn;
+
+		produce_ppns[i] =
+			produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
+		pfn = produce_ppns[i];
+
+		/* Fail allocation if PFN isn't supported by hypervisor. */
+		if (sizeof(pfn) > sizeof(*produce_ppns)
+		    && pfn != produce_ppns[i])
+			goto ppn_error;
+	}
+
+	for (i = 0; i < num_consume_pages; i++) {
+		unsigned long pfn;
+
+		consume_ppns[i] =
+			consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
+		pfn = consume_ppns[i];
+
+		/* Fail allocation if PFN isn't supported by hypervisor. */
+		if (sizeof(pfn) > sizeof(*consume_ppns)
+		    && pfn != consume_ppns[i])
+			goto ppn_error;
+	}
+
+	ppn_set->num_produce_pages = num_produce_pages;
+	ppn_set->num_consume_pages = num_consume_pages;
+	ppn_set->produce_ppns = produce_ppns;
+	ppn_set->consume_ppns = consume_ppns;
+	ppn_set->initialized = true;
+	return VMCI_SUCCESS;
+
+ ppn_error:
+	kfree(produce_ppns);
+	kfree(consume_ppns);
+	return VMCI_ERROR_INVALID_ARGS;
+}
+
+/*
+ * Frees the two list of PPNs for a queue pair.
+ */
+static void qp_free_ppn_set(struct ppn_set *ppn_set)
+{
+	if (ppn_set->initialized) {
+		/* Do not call these functions on NULL inputs. */
+		kfree(ppn_set->produce_ppns);
+		kfree(ppn_set->consume_ppns);
+	}
+	memset(ppn_set, 0, sizeof(*ppn_set));
+}
+
+/*
+ * Populates the list of PPNs in the hypercall structure with the PPNS
+ * of the produce queue and the consume queue.
+ */
+static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set)
+{
+	memcpy(call_buf, ppn_set->produce_ppns,
+	       ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
+	memcpy(call_buf +
+	       ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns),
+	       ppn_set->consume_ppns,
+	       ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Allocates kernel VA space of specified size plus space for the queue
+ * and kernel interface.  This is different from the guest queue allocator,
+ * because we do not allocate our own queue header/data pages here but
+ * share those of the guest.
+ */
+static struct vmci_queue *qp_host_alloc_queue(u64 size)
+{
+	struct vmci_queue *queue;
+	size_t queue_page_size;
+	u64 num_pages;
+	const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
+
+	if (size > SIZE_MAX - PAGE_SIZE)
+		return NULL;
+	num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
+	if (num_pages > (SIZE_MAX - queue_size) /
+		 sizeof(*queue->kernel_if->u.h.page))
+		return NULL;
+
+	queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page);
+
+	if (queue_size + queue_page_size > KMALLOC_MAX_SIZE)
+		return NULL;
+
+	queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
+	if (queue) {
+		queue->q_header = NULL;
+		queue->saved_header = NULL;
+		queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
+		queue->kernel_if->host = true;
+		queue->kernel_if->mutex = NULL;
+		queue->kernel_if->num_pages = num_pages;
+		queue->kernel_if->u.h.header_page =
+		    (struct page **)((u8 *)queue + queue_size);
+		queue->kernel_if->u.h.page =
+			&queue->kernel_if->u.h.header_page[1];
+	}
+
+	return queue;
+}
+
+/*
+ * Frees kernel memory for a given queue (header plus translation
+ * structure).
+ */
+static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
+{
+	kfree(queue);
+}
+
+/*
+ * Initialize the mutex for the pair of queues.  This mutex is used to
+ * protect the q_header and the buffer from changing out from under any
+ * users of either queue.  Of course, it's only any good if the mutexes
+ * are actually acquired.  Queue structure must lie on non-paged memory
+ * or we cannot guarantee access to the mutex.
+ */
+static void qp_init_queue_mutex(struct vmci_queue *produce_q,
+				struct vmci_queue *consume_q)
+{
+	/*
+	 * Only the host queue has shared state - the guest queues do not
+	 * need to synchronize access using a queue mutex.
+	 */
+
+	if (produce_q->kernel_if->host) {
+		produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
+		consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
+		mutex_init(produce_q->kernel_if->mutex);
+	}
+}
+
+/*
+ * Cleans up the mutex for the pair of queues.
+ */
+static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
+				   struct vmci_queue *consume_q)
+{
+	if (produce_q->kernel_if->host) {
+		produce_q->kernel_if->mutex = NULL;
+		consume_q->kernel_if->mutex = NULL;
+	}
+}
+
+/*
+ * Acquire the mutex for the queue.  Note that the produce_q and
+ * the consume_q share a mutex.  So, only one of the two need to
+ * be passed in to this routine.  Either will work just fine.
+ */
+static void qp_acquire_queue_mutex(struct vmci_queue *queue)
+{
+	if (queue->kernel_if->host)
+		mutex_lock(queue->kernel_if->mutex);
+}
+
+/*
+ * Release the mutex for the queue.  Note that the produce_q and
+ * the consume_q share a mutex.  So, only one of the two need to
+ * be passed in to this routine.  Either will work just fine.
+ */
+static void qp_release_queue_mutex(struct vmci_queue *queue)
+{
+	if (queue->kernel_if->host)
+		mutex_unlock(queue->kernel_if->mutex);
+}
+
+/*
+ * Helper function to release pages in the PageStoreAttachInfo
+ * previously obtained using get_user_pages.
+ */
+static void qp_release_pages(struct page **pages,
+			     u64 num_pages, bool dirty)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++) {
+		if (dirty)
+			set_page_dirty_lock(pages[i]);
+
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+}
+
+/*
+ * Lock the user pages referenced by the {produce,consume}Buffer
+ * struct into memory and populate the {produce,consume}Pages
+ * arrays in the attach structure with them.
+ */
+static int qp_host_get_user_memory(u64 produce_uva,
+				   u64 consume_uva,
+				   struct vmci_queue *produce_q,
+				   struct vmci_queue *consume_q)
+{
+	int retval;
+	int err = VMCI_SUCCESS;
+
+	retval = get_user_pages_fast((uintptr_t) produce_uva,
+				     produce_q->kernel_if->num_pages, 1,
+				     produce_q->kernel_if->u.h.header_page);
+	if (retval < (int)produce_q->kernel_if->num_pages) {
+		pr_debug("get_user_pages_fast(produce) failed (retval=%d)",
+			retval);
+		if (retval > 0)
+			qp_release_pages(produce_q->kernel_if->u.h.header_page,
+					retval, false);
+		err = VMCI_ERROR_NO_MEM;
+		goto out;
+	}
+
+	retval = get_user_pages_fast((uintptr_t) consume_uva,
+				     consume_q->kernel_if->num_pages, 1,
+				     consume_q->kernel_if->u.h.header_page);
+	if (retval < (int)consume_q->kernel_if->num_pages) {
+		pr_debug("get_user_pages_fast(consume) failed (retval=%d)",
+			retval);
+		if (retval > 0)
+			qp_release_pages(consume_q->kernel_if->u.h.header_page,
+					retval, false);
+		qp_release_pages(produce_q->kernel_if->u.h.header_page,
+				 produce_q->kernel_if->num_pages, false);
+		err = VMCI_ERROR_NO_MEM;
+	}
+
+ out:
+	return err;
+}
+
+/*
+ * Registers the specification of the user pages used for backing a queue
+ * pair. Enough information to map in pages is stored in the OS specific
+ * part of the struct vmci_queue structure.
+ */
+static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
+					struct vmci_queue *produce_q,
+					struct vmci_queue *consume_q)
+{
+	u64 produce_uva;
+	u64 consume_uva;
+
+	/*
+	 * The new style and the old style mapping only differs in
+	 * that we either get a single or two UVAs, so we split the
+	 * single UVA range at the appropriate spot.
+	 */
+	produce_uva = page_store->pages;
+	consume_uva = page_store->pages +
+	    produce_q->kernel_if->num_pages * PAGE_SIZE;
+	return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
+				       consume_q);
+}
+
+/*
+ * Releases and removes the references to user pages stored in the attach
+ * struct.  Pages are released from the page cache and may become
+ * swappable again.
+ */
+static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
+					   struct vmci_queue *consume_q)
+{
+	qp_release_pages(produce_q->kernel_if->u.h.header_page,
+			 produce_q->kernel_if->num_pages, true);
+	memset(produce_q->kernel_if->u.h.header_page, 0,
+	       sizeof(*produce_q->kernel_if->u.h.header_page) *
+	       produce_q->kernel_if->num_pages);
+	qp_release_pages(consume_q->kernel_if->u.h.header_page,
+			 consume_q->kernel_if->num_pages, true);
+	memset(consume_q->kernel_if->u.h.header_page, 0,
+	       sizeof(*consume_q->kernel_if->u.h.header_page) *
+	       consume_q->kernel_if->num_pages);
+}
+
+/*
+ * Once qp_host_register_user_memory has been performed on a
+ * queue, the queue pair headers can be mapped into the
+ * kernel. Once mapped, they must be unmapped with
+ * qp_host_unmap_queues prior to calling
+ * qp_host_unregister_user_memory.
+ * Pages are pinned.
+ */
+static int qp_host_map_queues(struct vmci_queue *produce_q,
+			      struct vmci_queue *consume_q)
+{
+	int result;
+
+	if (!produce_q->q_header || !consume_q->q_header) {
+		struct page *headers[2];
+
+		if (produce_q->q_header != consume_q->q_header)
+			return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+
+		if (produce_q->kernel_if->u.h.header_page == NULL ||
+		    *produce_q->kernel_if->u.h.header_page == NULL)
+			return VMCI_ERROR_UNAVAILABLE;
+
+		headers[0] = *produce_q->kernel_if->u.h.header_page;
+		headers[1] = *consume_q->kernel_if->u.h.header_page;
+
+		produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
+		if (produce_q->q_header != NULL) {
+			consume_q->q_header =
+			    (struct vmci_queue_header *)((u8 *)
+							 produce_q->q_header +
+							 PAGE_SIZE);
+			result = VMCI_SUCCESS;
+		} else {
+			pr_warn("vmap failed\n");
+			result = VMCI_ERROR_NO_MEM;
+		}
+	} else {
+		result = VMCI_SUCCESS;
+	}
+
+	return result;
+}
+
+/*
+ * Unmaps previously mapped queue pair headers from the kernel.
+ * Pages are unpinned.
+ */
+static int qp_host_unmap_queues(u32 gid,
+				struct vmci_queue *produce_q,
+				struct vmci_queue *consume_q)
+{
+	if (produce_q->q_header) {
+		if (produce_q->q_header < consume_q->q_header)
+			vunmap(produce_q->q_header);
+		else
+			vunmap(consume_q->q_header);
+
+		produce_q->q_header = NULL;
+		consume_q->q_header = NULL;
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle. Assumes
+ * that the list is locked.
+ */
+static struct qp_entry *qp_list_find(struct qp_list *qp_list,
+				     struct vmci_handle handle)
+{
+	struct qp_entry *entry;
+
+	if (vmci_handle_is_invalid(handle))
+		return NULL;
+
+	list_for_each_entry(entry, &qp_list->head, list_item) {
+		if (vmci_handle_is_equal(entry->handle, handle))
+			return entry;
+	}
+
+	return NULL;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle.
+ */
+static struct qp_guest_endpoint *
+qp_guest_handle_to_entry(struct vmci_handle handle)
+{
+	struct qp_guest_endpoint *entry;
+	struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
+
+	entry = qp ? container_of(
+		qp, struct qp_guest_endpoint, qp) : NULL;
+	return entry;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle.
+ */
+static struct qp_broker_entry *
+qp_broker_handle_to_entry(struct vmci_handle handle)
+{
+	struct qp_broker_entry *entry;
+	struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
+
+	entry = qp ? container_of(
+		qp, struct qp_broker_entry, qp) : NULL;
+	return entry;
+}
+
+/*
+ * Dispatches a queue pair event message directly into the local event
+ * queue.
+ */
+static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
+{
+	u32 context_id = vmci_get_context_id();
+	struct vmci_event_qp ev;
+
+	ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
+	ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					  VMCI_CONTEXT_RESOURCE_ID);
+	ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+	ev.msg.event_data.event =
+	    attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
+	ev.payload.peer_id = context_id;
+	ev.payload.handle = handle;
+
+	return vmci_event_dispatch(&ev.msg.hdr);
+}
+
+/*
+ * Allocates and initializes a qp_guest_endpoint structure.
+ * Allocates a queue_pair rid (and handle) iff the given entry has
+ * an invalid handle.  0 through VMCI_RESERVED_RESOURCE_ID_MAX
+ * are reserved handles.  Assumes that the QP list mutex is held
+ * by the caller.
+ */
+static struct qp_guest_endpoint *
+qp_guest_endpoint_create(struct vmci_handle handle,
+			 u32 peer,
+			 u32 flags,
+			 u64 produce_size,
+			 u64 consume_size,
+			 void *produce_q,
+			 void *consume_q)
+{
+	int result;
+	struct qp_guest_endpoint *entry;
+	/* One page each for the queue headers. */
+	const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) +
+	    DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2;
+
+	if (vmci_handle_is_invalid(handle)) {
+		u32 context_id = vmci_get_context_id();
+
+		handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
+	}
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry) {
+		entry->qp.peer = peer;
+		entry->qp.flags = flags;
+		entry->qp.produce_size = produce_size;
+		entry->qp.consume_size = consume_size;
+		entry->qp.ref_count = 0;
+		entry->num_ppns = num_ppns;
+		entry->produce_q = produce_q;
+		entry->consume_q = consume_q;
+		INIT_LIST_HEAD(&entry->qp.list_item);
+
+		/* Add resource obj */
+		result = vmci_resource_add(&entry->resource,
+					   VMCI_RESOURCE_TYPE_QPAIR_GUEST,
+					   handle);
+		entry->qp.handle = vmci_resource_handle(&entry->resource);
+		if ((result != VMCI_SUCCESS) ||
+		    qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
+			pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
+				handle.context, handle.resource, result);
+			kfree(entry);
+			entry = NULL;
+		}
+	}
+	return entry;
+}
+
+/*
+ * Frees a qp_guest_endpoint structure.
+ */
+static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
+{
+	qp_free_ppn_set(&entry->ppn_set);
+	qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
+	qp_free_queue(entry->produce_q, entry->qp.produce_size);
+	qp_free_queue(entry->consume_q, entry->qp.consume_size);
+	/* Unlink from resource hash table and free callback */
+	vmci_resource_remove(&entry->resource);
+
+	kfree(entry);
+}
+
+/*
+ * Helper to make a queue_pairAlloc hypercall when the driver is
+ * supporting a guest device.
+ */
+static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
+{
+	struct vmci_qp_alloc_msg *alloc_msg;
+	size_t msg_size;
+	int result;
+
+	if (!entry || entry->num_ppns <= 2)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	msg_size = sizeof(*alloc_msg) +
+	    (size_t) entry->num_ppns * sizeof(u32);
+	alloc_msg = kmalloc(msg_size, GFP_KERNEL);
+	if (!alloc_msg)
+		return VMCI_ERROR_NO_MEM;
+
+	alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					      VMCI_QUEUEPAIR_ALLOC);
+	alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
+	alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
+	alloc_msg->handle = entry->qp.handle;
+	alloc_msg->peer = entry->qp.peer;
+	alloc_msg->flags = entry->qp.flags;
+	alloc_msg->produce_size = entry->qp.produce_size;
+	alloc_msg->consume_size = entry->qp.consume_size;
+	alloc_msg->num_ppns = entry->num_ppns;
+
+	result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg),
+				     &entry->ppn_set);
+	if (result == VMCI_SUCCESS)
+		result = vmci_send_datagram(&alloc_msg->hdr);
+
+	kfree(alloc_msg);
+
+	return result;
+}
+
+/*
+ * Helper to make a queue_pairDetach hypercall when the driver is
+ * supporting a guest device.
+ */
+static int qp_detatch_hypercall(struct vmci_handle handle)
+{
+	struct vmci_qp_detach_msg detach_msg;
+
+	detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					      VMCI_QUEUEPAIR_DETACH);
+	detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+	detach_msg.hdr.payload_size = sizeof(handle);
+	detach_msg.handle = handle;
+
+	return vmci_send_datagram(&detach_msg.hdr);
+}
+
+/*
+ * Adds the given entry to the list. Assumes that the list is locked.
+ */
+static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry)
+{
+	if (entry)
+		list_add(&entry->list_item, &qp_list->head);
+}
+
+/*
+ * Removes the given entry from the list. Assumes that the list is locked.
+ */
+static void qp_list_remove_entry(struct qp_list *qp_list,
+				 struct qp_entry *entry)
+{
+	if (entry)
+		list_del(&entry->list_item);
+}
+
+/*
+ * Helper for VMCI queue_pair detach interface. Frees the physical
+ * pages for the queue pair.
+ */
+static int qp_detatch_guest_work(struct vmci_handle handle)
+{
+	int result;
+	struct qp_guest_endpoint *entry;
+	u32 ref_count = ~0;	/* To avoid compiler warning below */
+
+	mutex_lock(&qp_guest_endpoints.mutex);
+
+	entry = qp_guest_handle_to_entry(handle);
+	if (!entry) {
+		mutex_unlock(&qp_guest_endpoints.mutex);
+		return VMCI_ERROR_NOT_FOUND;
+	}
+
+	if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+		result = VMCI_SUCCESS;
+
+		if (entry->qp.ref_count > 1) {
+			result = qp_notify_peer_local(false, handle);
+			/*
+			 * We can fail to notify a local queuepair
+			 * because we can't allocate.  We still want
+			 * to release the entry if that happens, so
+			 * don't bail out yet.
+			 */
+		}
+	} else {
+		result = qp_detatch_hypercall(handle);
+		if (result < VMCI_SUCCESS) {
+			/*
+			 * We failed to notify a non-local queuepair.
+			 * That other queuepair might still be
+			 * accessing the shared memory, so don't
+			 * release the entry yet.  It will get cleaned
+			 * up by VMCIqueue_pair_Exit() if necessary
+			 * (assuming we are going away, otherwise why
+			 * did this fail?).
+			 */
+
+			mutex_unlock(&qp_guest_endpoints.mutex);
+			return result;
+		}
+	}
+
+	/*
+	 * If we get here then we either failed to notify a local queuepair, or
+	 * we succeeded in all cases.  Release the entry if required.
+	 */
+
+	entry->qp.ref_count--;
+	if (entry->qp.ref_count == 0)
+		qp_list_remove_entry(&qp_guest_endpoints, &entry->qp);
+
+	/* If we didn't remove the entry, this could change once we unlock. */
+	if (entry)
+		ref_count = entry->qp.ref_count;
+
+	mutex_unlock(&qp_guest_endpoints.mutex);
+
+	if (ref_count == 0)
+		qp_guest_endpoint_destroy(entry);
+
+	return result;
+}
+
+/*
+ * This functions handles the actual allocation of a VMCI queue
+ * pair guest endpoint. Allocates physical pages for the queue
+ * pair. It makes OS dependent calls through generic wrappers.
+ */
+static int qp_alloc_guest_work(struct vmci_handle *handle,
+			       struct vmci_queue **produce_q,
+			       u64 produce_size,
+			       struct vmci_queue **consume_q,
+			       u64 consume_size,
+			       u32 peer,
+			       u32 flags,
+			       u32 priv_flags)
+{
+	const u64 num_produce_pages =
+	    DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1;
+	const u64 num_consume_pages =
+	    DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1;
+	void *my_produce_q = NULL;
+	void *my_consume_q = NULL;
+	int result;
+	struct qp_guest_endpoint *queue_pair_entry = NULL;
+
+	if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
+		return VMCI_ERROR_NO_ACCESS;
+
+	mutex_lock(&qp_guest_endpoints.mutex);
+
+	queue_pair_entry = qp_guest_handle_to_entry(*handle);
+	if (queue_pair_entry) {
+		if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+			/* Local attach case. */
+			if (queue_pair_entry->qp.ref_count > 1) {
+				pr_devel("Error attempting to attach more than once\n");
+				result = VMCI_ERROR_UNAVAILABLE;
+				goto error_keep_entry;
+			}
+
+			if (queue_pair_entry->qp.produce_size != consume_size ||
+			    queue_pair_entry->qp.consume_size !=
+			    produce_size ||
+			    queue_pair_entry->qp.flags !=
+			    (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
+				pr_devel("Error mismatched queue pair in local attach\n");
+				result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
+				goto error_keep_entry;
+			}
+
+			/*
+			 * Do a local attach.  We swap the consume and
+			 * produce queues for the attacher and deliver
+			 * an attach event.
+			 */
+			result = qp_notify_peer_local(true, *handle);
+			if (result < VMCI_SUCCESS)
+				goto error_keep_entry;
+
+			my_produce_q = queue_pair_entry->consume_q;
+			my_consume_q = queue_pair_entry->produce_q;
+			goto out;
+		}
+
+		result = VMCI_ERROR_ALREADY_EXISTS;
+		goto error_keep_entry;
+	}
+
+	my_produce_q = qp_alloc_queue(produce_size, flags);
+	if (!my_produce_q) {
+		pr_warn("Error allocating pages for produce queue\n");
+		result = VMCI_ERROR_NO_MEM;
+		goto error;
+	}
+
+	my_consume_q = qp_alloc_queue(consume_size, flags);
+	if (!my_consume_q) {
+		pr_warn("Error allocating pages for consume queue\n");
+		result = VMCI_ERROR_NO_MEM;
+		goto error;
+	}
+
+	queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
+						    produce_size, consume_size,
+						    my_produce_q, my_consume_q);
+	if (!queue_pair_entry) {
+		pr_warn("Error allocating memory in %s\n", __func__);
+		result = VMCI_ERROR_NO_MEM;
+		goto error;
+	}
+
+	result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q,
+				  num_consume_pages,
+				  &queue_pair_entry->ppn_set);
+	if (result < VMCI_SUCCESS) {
+		pr_warn("qp_alloc_ppn_set failed\n");
+		goto error;
+	}
+
+	/*
+	 * It's only necessary to notify the host if this queue pair will be
+	 * attached to from another context.
+	 */
+	if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+		/* Local create case. */
+		u32 context_id = vmci_get_context_id();
+
+		/*
+		 * Enforce similar checks on local queue pairs as we
+		 * do for regular ones.  The handle's context must
+		 * match the creator or attacher context id (here they
+		 * are both the current context id) and the
+		 * attach-only flag cannot exist during create.  We
+		 * also ensure specified peer is this context or an
+		 * invalid one.
+		 */
+		if (queue_pair_entry->qp.handle.context != context_id ||
+		    (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
+		     queue_pair_entry->qp.peer != context_id)) {
+			result = VMCI_ERROR_NO_ACCESS;
+			goto error;
+		}
+
+		if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
+			result = VMCI_ERROR_NOT_FOUND;
+			goto error;
+		}
+	} else {
+		result = qp_alloc_hypercall(queue_pair_entry);
+		if (result < VMCI_SUCCESS) {
+			pr_warn("qp_alloc_hypercall result = %d\n", result);
+			goto error;
+		}
+	}
+
+	qp_init_queue_mutex((struct vmci_queue *)my_produce_q,
+			    (struct vmci_queue *)my_consume_q);
+
+	qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
+
+ out:
+	queue_pair_entry->qp.ref_count++;
+	*handle = queue_pair_entry->qp.handle;
+	*produce_q = (struct vmci_queue *)my_produce_q;
+	*consume_q = (struct vmci_queue *)my_consume_q;
+
+	/*
+	 * We should initialize the queue pair header pages on a local
+	 * queue pair create.  For non-local queue pairs, the
+	 * hypervisor initializes the header pages in the create step.
+	 */
+	if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
+	    queue_pair_entry->qp.ref_count == 1) {
+		vmci_q_header_init((*produce_q)->q_header, *handle);
+		vmci_q_header_init((*consume_q)->q_header, *handle);
+	}
+
+	mutex_unlock(&qp_guest_endpoints.mutex);
+
+	return VMCI_SUCCESS;
+
+ error:
+	mutex_unlock(&qp_guest_endpoints.mutex);
+	if (queue_pair_entry) {
+		/* The queues will be freed inside the destroy routine. */
+		qp_guest_endpoint_destroy(queue_pair_entry);
+	} else {
+		qp_free_queue(my_produce_q, produce_size);
+		qp_free_queue(my_consume_q, consume_size);
+	}
+	return result;
+
+ error_keep_entry:
+	/* This path should only be used when an existing entry was found. */
+	mutex_unlock(&qp_guest_endpoints.mutex);
+	return result;
+}
+
+/*
+ * The first endpoint issuing a queue pair allocation will create the state
+ * of the queue pair in the queue pair broker.
+ *
+ * If the creator is a guest, it will associate a VMX virtual address range
+ * with the queue pair as specified by the page_store. For compatibility with
+ * older VMX'en, that would use a separate step to set the VMX virtual
+ * address range, the virtual address range can be registered later using
+ * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be
+ * used.
+ *
+ * If the creator is the host, a page_store of NULL should be used as well,
+ * since the host is not able to supply a page store for the queue pair.
+ *
+ * For older VMX and host callers, the queue pair will be created in the
+ * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be
+ * created in VMCOQPB_CREATED_MEM state.
+ */
+static int qp_broker_create(struct vmci_handle handle,
+			    u32 peer,
+			    u32 flags,
+			    u32 priv_flags,
+			    u64 produce_size,
+			    u64 consume_size,
+			    struct vmci_qp_page_store *page_store,
+			    struct vmci_ctx *context,
+			    vmci_event_release_cb wakeup_cb,
+			    void *client_data, struct qp_broker_entry **ent)
+{
+	struct qp_broker_entry *entry = NULL;
+	const u32 context_id = vmci_ctx_get_id(context);
+	bool is_local = flags & VMCI_QPFLAG_LOCAL;
+	int result;
+	u64 guest_produce_size;
+	u64 guest_consume_size;
+
+	/* Do not create if the caller asked not to. */
+	if (flags & VMCI_QPFLAG_ATTACH_ONLY)
+		return VMCI_ERROR_NOT_FOUND;
+
+	/*
+	 * Creator's context ID should match handle's context ID or the creator
+	 * must allow the context in handle's context ID as the "peer".
+	 */
+	if (handle.context != context_id && handle.context != peer)
+		return VMCI_ERROR_NO_ACCESS;
+
+	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer))
+		return VMCI_ERROR_DST_UNREACHABLE;
+
+	/*
+	 * Creator's context ID for local queue pairs should match the
+	 * peer, if a peer is specified.
+	 */
+	if (is_local && peer != VMCI_INVALID_ID && context_id != peer)
+		return VMCI_ERROR_NO_ACCESS;
+
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (!entry)
+		return VMCI_ERROR_NO_MEM;
+
+	if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) {
+		/*
+		 * The queue pair broker entry stores values from the guest
+		 * point of view, so a creating host side endpoint should swap
+		 * produce and consume values -- unless it is a local queue
+		 * pair, in which case no swapping is necessary, since the local
+		 * attacher will swap queues.
+		 */
+
+		guest_produce_size = consume_size;
+		guest_consume_size = produce_size;
+	} else {
+		guest_produce_size = produce_size;
+		guest_consume_size = consume_size;
+	}
+
+	entry->qp.handle = handle;
+	entry->qp.peer = peer;
+	entry->qp.flags = flags;
+	entry->qp.produce_size = guest_produce_size;
+	entry->qp.consume_size = guest_consume_size;
+	entry->qp.ref_count = 1;
+	entry->create_id = context_id;
+	entry->attach_id = VMCI_INVALID_ID;
+	entry->state = VMCIQPB_NEW;
+	entry->require_trusted_attach =
+	    !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED);
+	entry->created_by_trusted =
+	    !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED);
+	entry->vmci_page_files = false;
+	entry->wakeup_cb = wakeup_cb;
+	entry->client_data = client_data;
+	entry->produce_q = qp_host_alloc_queue(guest_produce_size);
+	if (entry->produce_q == NULL) {
+		result = VMCI_ERROR_NO_MEM;
+		goto error;
+	}
+	entry->consume_q = qp_host_alloc_queue(guest_consume_size);
+	if (entry->consume_q == NULL) {
+		result = VMCI_ERROR_NO_MEM;
+		goto error;
+	}
+
+	qp_init_queue_mutex(entry->produce_q, entry->consume_q);
+
+	INIT_LIST_HEAD(&entry->qp.list_item);
+
+	if (is_local) {
+		u8 *tmp;
+
+		entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp),
+					   PAGE_SIZE, GFP_KERNEL);
+		if (entry->local_mem == NULL) {
+			result = VMCI_ERROR_NO_MEM;
+			goto error;
+		}
+		entry->state = VMCIQPB_CREATED_MEM;
+		entry->produce_q->q_header = entry->local_mem;
+		tmp = (u8 *)entry->local_mem + PAGE_SIZE *
+		    (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1);
+		entry->consume_q->q_header = (struct vmci_queue_header *)tmp;
+	} else if (page_store) {
+		/*
+		 * The VMX already initialized the queue pair headers, so no
+		 * need for the kernel side to do that.
+		 */
+		result = qp_host_register_user_memory(page_store,
+						      entry->produce_q,
+						      entry->consume_q);
+		if (result < VMCI_SUCCESS)
+			goto error;
+
+		entry->state = VMCIQPB_CREATED_MEM;
+	} else {
+		/*
+		 * A create without a page_store may be either a host
+		 * side create (in which case we are waiting for the
+		 * guest side to supply the memory) or an old style
+		 * queue pair create (in which case we will expect a
+		 * set page store call as the next step).
+		 */
+		entry->state = VMCIQPB_CREATED_NO_MEM;
+	}
+
+	qp_list_add_entry(&qp_broker_list, &entry->qp);
+	if (ent != NULL)
+		*ent = entry;
+
+	/* Add to resource obj */
+	result = vmci_resource_add(&entry->resource,
+				   VMCI_RESOURCE_TYPE_QPAIR_HOST,
+				   handle);
+	if (result != VMCI_SUCCESS) {
+		pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
+			handle.context, handle.resource, result);
+		goto error;
+	}
+
+	entry->qp.handle = vmci_resource_handle(&entry->resource);
+	if (is_local) {
+		vmci_q_header_init(entry->produce_q->q_header,
+				   entry->qp.handle);
+		vmci_q_header_init(entry->consume_q->q_header,
+				   entry->qp.handle);
+	}
+
+	vmci_ctx_qp_create(context, entry->qp.handle);
+
+	return VMCI_SUCCESS;
+
+ error:
+	if (entry != NULL) {
+		qp_host_free_queue(entry->produce_q, guest_produce_size);
+		qp_host_free_queue(entry->consume_q, guest_consume_size);
+		kfree(entry);
+	}
+
+	return result;
+}
+
+/*
+ * Enqueues an event datagram to notify the peer VM attached to
+ * the given queue pair handle about attach/detach event by the
+ * given VM.  Returns Payload size of datagram enqueued on
+ * success, error code otherwise.
+ */
+static int qp_notify_peer(bool attach,
+			  struct vmci_handle handle,
+			  u32 my_id,
+			  u32 peer_id)
+{
+	int rv;
+	struct vmci_event_qp ev;
+
+	if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID ||
+	    peer_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/*
+	 * In vmci_ctx_enqueue_datagram() we enforce the upper limit on
+	 * number of pending events from the hypervisor to a given VM
+	 * otherwise a rogue VM could do an arbitrary number of attach
+	 * and detach operations causing memory pressure in the host
+	 * kernel.
+	 */
+
+	ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER);
+	ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					  VMCI_CONTEXT_RESOURCE_ID);
+	ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+	ev.msg.event_data.event = attach ?
+	    VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
+	ev.payload.handle = handle;
+	ev.payload.peer_id = my_id;
+
+	rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
+				    &ev.msg.hdr, false);
+	if (rv < VMCI_SUCCESS)
+		pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n",
+			attach ? "ATTACH" : "DETACH", peer_id);
+
+	return rv;
+}
+
+/*
+ * The second endpoint issuing a queue pair allocation will attach to
+ * the queue pair registered with the queue pair broker.
+ *
+ * If the attacher is a guest, it will associate a VMX virtual address
+ * range with the queue pair as specified by the page_store. At this
+ * point, the already attach host endpoint may start using the queue
+ * pair, and an attach event is sent to it. For compatibility with
+ * older VMX'en, that used a separate step to set the VMX virtual
+ * address range, the virtual address range can be registered later
+ * using vmci_qp_broker_set_page_store. In that case, a page_store of
+ * NULL should be used, and the attach event will be generated once
+ * the actual page store has been set.
+ *
+ * If the attacher is the host, a page_store of NULL should be used as
+ * well, since the page store information is already set by the guest.
+ *
+ * For new VMX and host callers, the queue pair will be moved to the
+ * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be
+ * moved to the VMCOQPB_ATTACHED_NO_MEM state.
+ */
+static int qp_broker_attach(struct qp_broker_entry *entry,
+			    u32 peer,
+			    u32 flags,
+			    u32 priv_flags,
+			    u64 produce_size,
+			    u64 consume_size,
+			    struct vmci_qp_page_store *page_store,
+			    struct vmci_ctx *context,
+			    vmci_event_release_cb wakeup_cb,
+			    void *client_data,
+			    struct qp_broker_entry **ent)
+{
+	const u32 context_id = vmci_ctx_get_id(context);
+	bool is_local = flags & VMCI_QPFLAG_LOCAL;
+	int result;
+
+	if (entry->state != VMCIQPB_CREATED_NO_MEM &&
+	    entry->state != VMCIQPB_CREATED_MEM)
+		return VMCI_ERROR_UNAVAILABLE;
+
+	if (is_local) {
+		if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
+		    context_id != entry->create_id) {
+			return VMCI_ERROR_INVALID_ARGS;
+		}
+	} else if (context_id == entry->create_id ||
+		   context_id == entry->attach_id) {
+		return VMCI_ERROR_ALREADY_EXISTS;
+	}
+
+	if (VMCI_CONTEXT_IS_VM(context_id) &&
+	    VMCI_CONTEXT_IS_VM(entry->create_id))
+		return VMCI_ERROR_DST_UNREACHABLE;
+
+	/*
+	 * If we are attaching from a restricted context then the queuepair
+	 * must have been created by a trusted endpoint.
+	 */
+	if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
+	    !entry->created_by_trusted)
+		return VMCI_ERROR_NO_ACCESS;
+
+	/*
+	 * If we are attaching to a queuepair that was created by a restricted
+	 * context then we must be trusted.
+	 */
+	if (entry->require_trusted_attach &&
+	    (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED)))
+		return VMCI_ERROR_NO_ACCESS;
+
+	/*
+	 * If the creator specifies VMCI_INVALID_ID in "peer" field, access
+	 * control check is not performed.
+	 */
+	if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id)
+		return VMCI_ERROR_NO_ACCESS;
+
+	if (entry->create_id == VMCI_HOST_CONTEXT_ID) {
+		/*
+		 * Do not attach if the caller doesn't support Host Queue Pairs
+		 * and a host created this queue pair.
+		 */
+
+		if (!vmci_ctx_supports_host_qp(context))
+			return VMCI_ERROR_INVALID_RESOURCE;
+
+	} else if (context_id == VMCI_HOST_CONTEXT_ID) {
+		struct vmci_ctx *create_context;
+		bool supports_host_qp;
+
+		/*
+		 * Do not attach a host to a user created queue pair if that
+		 * user doesn't support host queue pair end points.
+		 */
+
+		create_context = vmci_ctx_get(entry->create_id);
+		supports_host_qp = vmci_ctx_supports_host_qp(create_context);
+		vmci_ctx_put(create_context);
+
+		if (!supports_host_qp)
+			return VMCI_ERROR_INVALID_RESOURCE;
+	}
+
+	if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER))
+		return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+
+	if (context_id != VMCI_HOST_CONTEXT_ID) {
+		/*
+		 * The queue pair broker entry stores values from the guest
+		 * point of view, so an attaching guest should match the values
+		 * stored in the entry.
+		 */
+
+		if (entry->qp.produce_size != produce_size ||
+		    entry->qp.consume_size != consume_size) {
+			return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+		}
+	} else if (entry->qp.produce_size != consume_size ||
+		   entry->qp.consume_size != produce_size) {
+		return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+	}
+
+	if (context_id != VMCI_HOST_CONTEXT_ID) {
+		/*
+		 * If a guest attached to a queue pair, it will supply
+		 * the backing memory.  If this is a pre NOVMVM vmx,
+		 * the backing memory will be supplied by calling
+		 * vmci_qp_broker_set_page_store() following the
+		 * return of the vmci_qp_broker_alloc() call. If it is
+		 * a vmx of version NOVMVM or later, the page store
+		 * must be supplied as part of the
+		 * vmci_qp_broker_alloc call.  Under all circumstances
+		 * must the initially created queue pair not have any
+		 * memory associated with it already.
+		 */
+
+		if (entry->state != VMCIQPB_CREATED_NO_MEM)
+			return VMCI_ERROR_INVALID_ARGS;
+
+		if (page_store != NULL) {
+			/*
+			 * Patch up host state to point to guest
+			 * supplied memory. The VMX already
+			 * initialized the queue pair headers, so no
+			 * need for the kernel side to do that.
+			 */
+
+			result = qp_host_register_user_memory(page_store,
+							      entry->produce_q,
+							      entry->consume_q);
+			if (result < VMCI_SUCCESS)
+				return result;
+
+			entry->state = VMCIQPB_ATTACHED_MEM;
+		} else {
+			entry->state = VMCIQPB_ATTACHED_NO_MEM;
+		}
+	} else if (entry->state == VMCIQPB_CREATED_NO_MEM) {
+		/*
+		 * The host side is attempting to attach to a queue
+		 * pair that doesn't have any memory associated with
+		 * it. This must be a pre NOVMVM vmx that hasn't set
+		 * the page store information yet, or a quiesced VM.
+		 */
+
+		return VMCI_ERROR_UNAVAILABLE;
+	} else {
+		/* The host side has successfully attached to a queue pair. */
+		entry->state = VMCIQPB_ATTACHED_MEM;
+	}
+
+	if (entry->state == VMCIQPB_ATTACHED_MEM) {
+		result =
+		    qp_notify_peer(true, entry->qp.handle, context_id,
+				   entry->create_id);
+		if (result < VMCI_SUCCESS)
+			pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
+				entry->create_id, entry->qp.handle.context,
+				entry->qp.handle.resource);
+	}
+
+	entry->attach_id = context_id;
+	entry->qp.ref_count++;
+	if (wakeup_cb) {
+		entry->wakeup_cb = wakeup_cb;
+		entry->client_data = client_data;
+	}
+
+	/*
+	 * When attaching to local queue pairs, the context already has
+	 * an entry tracking the queue pair, so don't add another one.
+	 */
+	if (!is_local)
+		vmci_ctx_qp_create(context, entry->qp.handle);
+
+	if (ent != NULL)
+		*ent = entry;
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * queue_pair_Alloc for use when setting up queue pair endpoints
+ * on the host.
+ */
+static int qp_broker_alloc(struct vmci_handle handle,
+			   u32 peer,
+			   u32 flags,
+			   u32 priv_flags,
+			   u64 produce_size,
+			   u64 consume_size,
+			   struct vmci_qp_page_store *page_store,
+			   struct vmci_ctx *context,
+			   vmci_event_release_cb wakeup_cb,
+			   void *client_data,
+			   struct qp_broker_entry **ent,
+			   bool *swap)
+{
+	const u32 context_id = vmci_ctx_get_id(context);
+	bool create;
+	struct qp_broker_entry *entry = NULL;
+	bool is_local = flags & VMCI_QPFLAG_LOCAL;
+	int result;
+
+	if (vmci_handle_is_invalid(handle) ||
+	    (flags & ~VMCI_QP_ALL_FLAGS) || is_local ||
+	    !(produce_size || consume_size) ||
+	    !context || context_id == VMCI_INVALID_ID ||
+	    handle.context == VMCI_INVALID_ID) {
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/*
+	 * In the initial argument check, we ensure that non-vmkernel hosts
+	 * are not allowed to create local queue pairs.
+	 */
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	if (!is_local && vmci_ctx_qp_exists(context, handle)) {
+		pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n",
+			 context_id, handle.context, handle.resource);
+		mutex_unlock(&qp_broker_list.mutex);
+		return VMCI_ERROR_ALREADY_EXISTS;
+	}
+
+	if (handle.resource != VMCI_INVALID_ID)
+		entry = qp_broker_handle_to_entry(handle);
+
+	if (!entry) {
+		create = true;
+		result =
+		    qp_broker_create(handle, peer, flags, priv_flags,
+				     produce_size, consume_size, page_store,
+				     context, wakeup_cb, client_data, ent);
+	} else {
+		create = false;
+		result =
+		    qp_broker_attach(entry, peer, flags, priv_flags,
+				     produce_size, consume_size, page_store,
+				     context, wakeup_cb, client_data, ent);
+	}
+
+	mutex_unlock(&qp_broker_list.mutex);
+
+	if (swap)
+		*swap = (context_id == VMCI_HOST_CONTEXT_ID) &&
+		    !(create && is_local);
+
+	return result;
+}
+
+/*
+ * This function implements the kernel API for allocating a queue
+ * pair.
+ */
+static int qp_alloc_host_work(struct vmci_handle *handle,
+			      struct vmci_queue **produce_q,
+			      u64 produce_size,
+			      struct vmci_queue **consume_q,
+			      u64 consume_size,
+			      u32 peer,
+			      u32 flags,
+			      u32 priv_flags,
+			      vmci_event_release_cb wakeup_cb,
+			      void *client_data)
+{
+	struct vmci_handle new_handle;
+	struct vmci_ctx *context;
+	struct qp_broker_entry *entry;
+	int result;
+	bool swap;
+
+	if (vmci_handle_is_invalid(*handle)) {
+		new_handle = vmci_make_handle(
+			VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID);
+	} else
+		new_handle = *handle;
+
+	context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
+	entry = NULL;
+	result =
+	    qp_broker_alloc(new_handle, peer, flags, priv_flags,
+			    produce_size, consume_size, NULL, context,
+			    wakeup_cb, client_data, &entry, &swap);
+	if (result == VMCI_SUCCESS) {
+		if (swap) {
+			/*
+			 * If this is a local queue pair, the attacher
+			 * will swap around produce and consume
+			 * queues.
+			 */
+
+			*produce_q = entry->consume_q;
+			*consume_q = entry->produce_q;
+		} else {
+			*produce_q = entry->produce_q;
+			*consume_q = entry->consume_q;
+		}
+
+		*handle = vmci_resource_handle(&entry->resource);
+	} else {
+		*handle = VMCI_INVALID_HANDLE;
+		pr_devel("queue pair broker failed to alloc (result=%d)\n",
+			 result);
+	}
+	vmci_ctx_put(context);
+	return result;
+}
+
+/*
+ * Allocates a VMCI queue_pair. Only checks validity of input
+ * arguments. The real work is done in the host or guest
+ * specific function.
+ */
+int vmci_qp_alloc(struct vmci_handle *handle,
+		  struct vmci_queue **produce_q,
+		  u64 produce_size,
+		  struct vmci_queue **consume_q,
+		  u64 consume_size,
+		  u32 peer,
+		  u32 flags,
+		  u32 priv_flags,
+		  bool guest_endpoint,
+		  vmci_event_release_cb wakeup_cb,
+		  void *client_data)
+{
+	if (!handle || !produce_q || !consume_q ||
+	    (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (guest_endpoint) {
+		return qp_alloc_guest_work(handle, produce_q,
+					   produce_size, consume_q,
+					   consume_size, peer,
+					   flags, priv_flags);
+	} else {
+		return qp_alloc_host_work(handle, produce_q,
+					  produce_size, consume_q,
+					  consume_size, peer, flags,
+					  priv_flags, wakeup_cb, client_data);
+	}
+}
+
+/*
+ * This function implements the host kernel API for detaching from
+ * a queue pair.
+ */
+static int qp_detatch_host_work(struct vmci_handle handle)
+{
+	int result;
+	struct vmci_ctx *context;
+
+	context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
+
+	result = vmci_qp_broker_detach(handle, context);
+
+	vmci_ctx_put(context);
+	return result;
+}
+
+/*
+ * Detaches from a VMCI queue_pair. Only checks validity of input argument.
+ * Real work is done in the host or guest specific function.
+ */
+static int qp_detatch(struct vmci_handle handle, bool guest_endpoint)
+{
+	if (vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (guest_endpoint)
+		return qp_detatch_guest_work(handle);
+	else
+		return qp_detatch_host_work(handle);
+}
+
+/*
+ * Returns the entry from the head of the list. Assumes that the list is
+ * locked.
+ */
+static struct qp_entry *qp_list_get_head(struct qp_list *qp_list)
+{
+	if (!list_empty(&qp_list->head)) {
+		struct qp_entry *entry =
+		    list_first_entry(&qp_list->head, struct qp_entry,
+				     list_item);
+		return entry;
+	}
+
+	return NULL;
+}
+
+void vmci_qp_broker_exit(void)
+{
+	struct qp_entry *entry;
+	struct qp_broker_entry *be;
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	while ((entry = qp_list_get_head(&qp_broker_list))) {
+		be = (struct qp_broker_entry *)entry;
+
+		qp_list_remove_entry(&qp_broker_list, entry);
+		kfree(be);
+	}
+
+	mutex_unlock(&qp_broker_list.mutex);
+}
+
+/*
+ * Requests that a queue pair be allocated with the VMCI queue
+ * pair broker. Allocates a queue pair entry if one does not
+ * exist. Attaches to one if it exists, and retrieves the page
+ * files backing that queue_pair.  Assumes that the queue pair
+ * broker lock is held.
+ */
+int vmci_qp_broker_alloc(struct vmci_handle handle,
+			 u32 peer,
+			 u32 flags,
+			 u32 priv_flags,
+			 u64 produce_size,
+			 u64 consume_size,
+			 struct vmci_qp_page_store *page_store,
+			 struct vmci_ctx *context)
+{
+	return qp_broker_alloc(handle, peer, flags, priv_flags,
+			       produce_size, consume_size,
+			       page_store, context, NULL, NULL, NULL, NULL);
+}
+
+/*
+ * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate
+ * step to add the UVAs of the VMX mapping of the queue pair. This function
+ * provides backwards compatibility with such VMX'en, and takes care of
+ * registering the page store for a queue pair previously allocated by the
+ * VMX during create or attach. This function will move the queue pair state
+ * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or
+ * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the
+ * attached state with memory, the queue pair is ready to be used by the
+ * host peer, and an attached event will be generated.
+ *
+ * Assumes that the queue pair broker lock is held.
+ *
+ * This function is only used by the hosted platform, since there is no
+ * issue with backwards compatibility for vmkernel.
+ */
+int vmci_qp_broker_set_page_store(struct vmci_handle handle,
+				  u64 produce_uva,
+				  u64 consume_uva,
+				  struct vmci_ctx *context)
+{
+	struct qp_broker_entry *entry;
+	int result;
+	const u32 context_id = vmci_ctx_get_id(context);
+
+	if (vmci_handle_is_invalid(handle) || !context ||
+	    context_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/*
+	 * We only support guest to host queue pairs, so the VMX must
+	 * supply UVAs for the mapped page files.
+	 */
+
+	if (produce_uva == 0 || consume_uva == 0)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	if (!vmci_ctx_qp_exists(context, handle)) {
+		pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+			context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	entry = qp_broker_handle_to_entry(handle);
+	if (!entry) {
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	/*
+	 * If I'm the owner then I can set the page store.
+	 *
+	 * Or, if a host created the queue_pair and I'm the attached peer
+	 * then I can set the page store.
+	 */
+	if (entry->create_id != context_id &&
+	    (entry->create_id != VMCI_HOST_CONTEXT_ID ||
+	     entry->attach_id != context_id)) {
+		result = VMCI_ERROR_QUEUEPAIR_NOTOWNER;
+		goto out;
+	}
+
+	if (entry->state != VMCIQPB_CREATED_NO_MEM &&
+	    entry->state != VMCIQPB_ATTACHED_NO_MEM) {
+		result = VMCI_ERROR_UNAVAILABLE;
+		goto out;
+	}
+
+	result = qp_host_get_user_memory(produce_uva, consume_uva,
+					 entry->produce_q, entry->consume_q);
+	if (result < VMCI_SUCCESS)
+		goto out;
+
+	result = qp_host_map_queues(entry->produce_q, entry->consume_q);
+	if (result < VMCI_SUCCESS) {
+		qp_host_unregister_user_memory(entry->produce_q,
+					       entry->consume_q);
+		goto out;
+	}
+
+	if (entry->state == VMCIQPB_CREATED_NO_MEM)
+		entry->state = VMCIQPB_CREATED_MEM;
+	else
+		entry->state = VMCIQPB_ATTACHED_MEM;
+
+	entry->vmci_page_files = true;
+
+	if (entry->state == VMCIQPB_ATTACHED_MEM) {
+		result =
+		    qp_notify_peer(true, handle, context_id, entry->create_id);
+		if (result < VMCI_SUCCESS) {
+			pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
+				entry->create_id, entry->qp.handle.context,
+				entry->qp.handle.resource);
+		}
+	}
+
+	result = VMCI_SUCCESS;
+ out:
+	mutex_unlock(&qp_broker_list.mutex);
+	return result;
+}
+
+/*
+ * Resets saved queue headers for the given QP broker
+ * entry. Should be used when guest memory becomes available
+ * again, or the guest detaches.
+ */
+static void qp_reset_saved_headers(struct qp_broker_entry *entry)
+{
+	entry->produce_q->saved_header = NULL;
+	entry->consume_q->saved_header = NULL;
+}
+
+/*
+ * The main entry point for detaching from a queue pair registered with the
+ * queue pair broker. If more than one endpoint is attached to the queue
+ * pair, the first endpoint will mainly decrement a reference count and
+ * generate a notification to its peer. The last endpoint will clean up
+ * the queue pair state registered with the broker.
+ *
+ * When a guest endpoint detaches, it will unmap and unregister the guest
+ * memory backing the queue pair. If the host is still attached, it will
+ * no longer be able to access the queue pair content.
+ *
+ * If the queue pair is already in a state where there is no memory
+ * registered for the queue pair (any *_NO_MEM state), it will transition to
+ * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest
+ * endpoint is the first of two endpoints to detach. If the host endpoint is
+ * the first out of two to detach, the queue pair will move to the
+ * VMCIQPB_SHUTDOWN_MEM state.
+ */
+int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context)
+{
+	struct qp_broker_entry *entry;
+	const u32 context_id = vmci_ctx_get_id(context);
+	u32 peer_id;
+	bool is_local = false;
+	int result;
+
+	if (vmci_handle_is_invalid(handle) || !context ||
+	    context_id == VMCI_INVALID_ID) {
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	if (!vmci_ctx_qp_exists(context, handle)) {
+		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	entry = qp_broker_handle_to_entry(handle);
+	if (!entry) {
+		pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	if (context_id != entry->create_id && context_id != entry->attach_id) {
+		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+		goto out;
+	}
+
+	if (context_id == entry->create_id) {
+		peer_id = entry->attach_id;
+		entry->create_id = VMCI_INVALID_ID;
+	} else {
+		peer_id = entry->create_id;
+		entry->attach_id = VMCI_INVALID_ID;
+	}
+	entry->qp.ref_count--;
+
+	is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
+
+	if (context_id != VMCI_HOST_CONTEXT_ID) {
+		bool headers_mapped;
+
+		/*
+		 * Pre NOVMVM vmx'en may detach from a queue pair
+		 * before setting the page store, and in that case
+		 * there is no user memory to detach from. Also, more
+		 * recent VMX'en may detach from a queue pair in the
+		 * quiesced state.
+		 */
+
+		qp_acquire_queue_mutex(entry->produce_q);
+		headers_mapped = entry->produce_q->q_header ||
+		    entry->consume_q->q_header;
+		if (QPBROKERSTATE_HAS_MEM(entry)) {
+			result =
+			    qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID,
+						 entry->produce_q,
+						 entry->consume_q);
+			if (result < VMCI_SUCCESS)
+				pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
+					handle.context, handle.resource,
+					result);
+
+			qp_host_unregister_user_memory(entry->produce_q,
+						       entry->consume_q);
+
+		}
+
+		if (!headers_mapped)
+			qp_reset_saved_headers(entry);
+
+		qp_release_queue_mutex(entry->produce_q);
+
+		if (!headers_mapped && entry->wakeup_cb)
+			entry->wakeup_cb(entry->client_data);
+
+	} else {
+		if (entry->wakeup_cb) {
+			entry->wakeup_cb = NULL;
+			entry->client_data = NULL;
+		}
+	}
+
+	if (entry->qp.ref_count == 0) {
+		qp_list_remove_entry(&qp_broker_list, &entry->qp);
+
+		if (is_local)
+			kfree(entry->local_mem);
+
+		qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
+		qp_host_free_queue(entry->produce_q, entry->qp.produce_size);
+		qp_host_free_queue(entry->consume_q, entry->qp.consume_size);
+		/* Unlink from resource hash table and free callback */
+		vmci_resource_remove(&entry->resource);
+
+		kfree(entry);
+
+		vmci_ctx_qp_destroy(context, handle);
+	} else {
+		qp_notify_peer(false, handle, context_id, peer_id);
+		if (context_id == VMCI_HOST_CONTEXT_ID &&
+		    QPBROKERSTATE_HAS_MEM(entry)) {
+			entry->state = VMCIQPB_SHUTDOWN_MEM;
+		} else {
+			entry->state = VMCIQPB_SHUTDOWN_NO_MEM;
+		}
+
+		if (!is_local)
+			vmci_ctx_qp_destroy(context, handle);
+
+	}
+	result = VMCI_SUCCESS;
+ out:
+	mutex_unlock(&qp_broker_list.mutex);
+	return result;
+}
+
+/*
+ * Establishes the necessary mappings for a queue pair given a
+ * reference to the queue pair guest memory. This is usually
+ * called when a guest is unquiesced and the VMX is allowed to
+ * map guest memory once again.
+ */
+int vmci_qp_broker_map(struct vmci_handle handle,
+		       struct vmci_ctx *context,
+		       u64 guest_mem)
+{
+	struct qp_broker_entry *entry;
+	const u32 context_id = vmci_ctx_get_id(context);
+	int result;
+
+	if (vmci_handle_is_invalid(handle) || !context ||
+	    context_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	if (!vmci_ctx_qp_exists(context, handle)) {
+		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	entry = qp_broker_handle_to_entry(handle);
+	if (!entry) {
+		pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	if (context_id != entry->create_id && context_id != entry->attach_id) {
+		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+		goto out;
+	}
+
+	result = VMCI_SUCCESS;
+
+	if (context_id != VMCI_HOST_CONTEXT_ID &&
+	    !QPBROKERSTATE_HAS_MEM(entry)) {
+		struct vmci_qp_page_store page_store;
+
+		page_store.pages = guest_mem;
+		page_store.len = QPE_NUM_PAGES(entry->qp);
+
+		qp_acquire_queue_mutex(entry->produce_q);
+		qp_reset_saved_headers(entry);
+		result =
+		    qp_host_register_user_memory(&page_store,
+						 entry->produce_q,
+						 entry->consume_q);
+		qp_release_queue_mutex(entry->produce_q);
+		if (result == VMCI_SUCCESS) {
+			/* Move state from *_NO_MEM to *_MEM */
+
+			entry->state++;
+
+			if (entry->wakeup_cb)
+				entry->wakeup_cb(entry->client_data);
+		}
+	}
+
+ out:
+	mutex_unlock(&qp_broker_list.mutex);
+	return result;
+}
+
+/*
+ * Saves a snapshot of the queue headers for the given QP broker
+ * entry. Should be used when guest memory is unmapped.
+ * Results:
+ * VMCI_SUCCESS on success, appropriate error code if guest memory
+ * can't be accessed..
+ */
+static int qp_save_headers(struct qp_broker_entry *entry)
+{
+	int result;
+
+	if (entry->produce_q->saved_header != NULL &&
+	    entry->consume_q->saved_header != NULL) {
+		/*
+		 *  If the headers have already been saved, we don't need to do
+		 *  it again, and we don't want to map in the headers
+		 *  unnecessarily.
+		 */
+
+		return VMCI_SUCCESS;
+	}
+
+	if (NULL == entry->produce_q->q_header ||
+	    NULL == entry->consume_q->q_header) {
+		result = qp_host_map_queues(entry->produce_q, entry->consume_q);
+		if (result < VMCI_SUCCESS)
+			return result;
+	}
+
+	memcpy(&entry->saved_produce_q, entry->produce_q->q_header,
+	       sizeof(entry->saved_produce_q));
+	entry->produce_q->saved_header = &entry->saved_produce_q;
+	memcpy(&entry->saved_consume_q, entry->consume_q->q_header,
+	       sizeof(entry->saved_consume_q));
+	entry->consume_q->saved_header = &entry->saved_consume_q;
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Removes all references to the guest memory of a given queue pair, and
+ * will move the queue pair from state *_MEM to *_NO_MEM. It is usually
+ * called when a VM is being quiesced where access to guest memory should
+ * avoided.
+ */
+int vmci_qp_broker_unmap(struct vmci_handle handle,
+			 struct vmci_ctx *context,
+			 u32 gid)
+{
+	struct qp_broker_entry *entry;
+	const u32 context_id = vmci_ctx_get_id(context);
+	int result;
+
+	if (vmci_handle_is_invalid(handle) || !context ||
+	    context_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	if (!vmci_ctx_qp_exists(context, handle)) {
+		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	entry = qp_broker_handle_to_entry(handle);
+	if (!entry) {
+		pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	if (context_id != entry->create_id && context_id != entry->attach_id) {
+		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+		goto out;
+	}
+
+	if (context_id != VMCI_HOST_CONTEXT_ID &&
+	    QPBROKERSTATE_HAS_MEM(entry)) {
+		qp_acquire_queue_mutex(entry->produce_q);
+		result = qp_save_headers(entry);
+		if (result < VMCI_SUCCESS)
+			pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
+				handle.context, handle.resource, result);
+
+		qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q);
+
+		/*
+		 * On hosted, when we unmap queue pairs, the VMX will also
+		 * unmap the guest memory, so we invalidate the previously
+		 * registered memory. If the queue pair is mapped again at a
+		 * later point in time, we will need to reregister the user
+		 * memory with a possibly new user VA.
+		 */
+		qp_host_unregister_user_memory(entry->produce_q,
+					       entry->consume_q);
+
+		/*
+		 * Move state from *_MEM to *_NO_MEM.
+		 */
+		entry->state--;
+
+		qp_release_queue_mutex(entry->produce_q);
+	}
+
+	result = VMCI_SUCCESS;
+
+ out:
+	mutex_unlock(&qp_broker_list.mutex);
+	return result;
+}
+
+/*
+ * Destroys all guest queue pair endpoints. If active guest queue
+ * pairs still exist, hypercalls to attempt detach from these
+ * queue pairs will be made. Any failure to detach is silently
+ * ignored.
+ */
+void vmci_qp_guest_endpoints_exit(void)
+{
+	struct qp_entry *entry;
+	struct qp_guest_endpoint *ep;
+
+	mutex_lock(&qp_guest_endpoints.mutex);
+
+	while ((entry = qp_list_get_head(&qp_guest_endpoints))) {
+		ep = (struct qp_guest_endpoint *)entry;
+
+		/* Don't make a hypercall for local queue_pairs. */
+		if (!(entry->flags & VMCI_QPFLAG_LOCAL))
+			qp_detatch_hypercall(entry->handle);
+
+		/* We cannot fail the exit, so let's reset ref_count. */
+		entry->ref_count = 0;
+		qp_list_remove_entry(&qp_guest_endpoints, entry);
+
+		qp_guest_endpoint_destroy(ep);
+	}
+
+	mutex_unlock(&qp_guest_endpoints.mutex);
+}
+
+/*
+ * Helper routine that will lock the queue pair before subsequent
+ * operations.
+ * Note: Non-blocking on the host side is currently only implemented in ESX.
+ * Since non-blocking isn't yet implemented on the host personality we
+ * have no reason to acquire a spin lock.  So to avoid the use of an
+ * unnecessary lock only acquire the mutex if we can block.
+ */
+static void qp_lock(const struct vmci_qp *qpair)
+{
+	qp_acquire_queue_mutex(qpair->produce_q);
+}
+
+/*
+ * Helper routine that unlocks the queue pair after calling
+ * qp_lock.
+ */
+static void qp_unlock(const struct vmci_qp *qpair)
+{
+	qp_release_queue_mutex(qpair->produce_q);
+}
+
+/*
+ * The queue headers may not be mapped at all times. If a queue is
+ * currently not mapped, it will be attempted to do so.
+ */
+static int qp_map_queue_headers(struct vmci_queue *produce_q,
+				struct vmci_queue *consume_q)
+{
+	int result;
+
+	if (NULL == produce_q->q_header || NULL == consume_q->q_header) {
+		result = qp_host_map_queues(produce_q, consume_q);
+		if (result < VMCI_SUCCESS)
+			return (produce_q->saved_header &&
+				consume_q->saved_header) ?
+			    VMCI_ERROR_QUEUEPAIR_NOT_READY :
+			    VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Helper routine that will retrieve the produce and consume
+ * headers of a given queue pair. If the guest memory of the
+ * queue pair is currently not available, the saved queue headers
+ * will be returned, if these are available.
+ */
+static int qp_get_queue_headers(const struct vmci_qp *qpair,
+				struct vmci_queue_header **produce_q_header,
+				struct vmci_queue_header **consume_q_header)
+{
+	int result;
+
+	result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q);
+	if (result == VMCI_SUCCESS) {
+		*produce_q_header = qpair->produce_q->q_header;
+		*consume_q_header = qpair->consume_q->q_header;
+	} else if (qpair->produce_q->saved_header &&
+		   qpair->consume_q->saved_header) {
+		*produce_q_header = qpair->produce_q->saved_header;
+		*consume_q_header = qpair->consume_q->saved_header;
+		result = VMCI_SUCCESS;
+	}
+
+	return result;
+}
+
+/*
+ * Callback from VMCI queue pair broker indicating that a queue
+ * pair that was previously not ready, now either is ready or
+ * gone forever.
+ */
+static int qp_wakeup_cb(void *client_data)
+{
+	struct vmci_qp *qpair = (struct vmci_qp *)client_data;
+
+	qp_lock(qpair);
+	while (qpair->blocked > 0) {
+		qpair->blocked--;
+		qpair->generation++;
+		wake_up(&qpair->event);
+	}
+	qp_unlock(qpair);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Makes the calling thread wait for the queue pair to become
+ * ready for host side access.  Returns true when thread is
+ * woken up after queue pair state change, false otherwise.
+ */
+static bool qp_wait_for_ready_queue(struct vmci_qp *qpair)
+{
+	unsigned int generation;
+
+	qpair->blocked++;
+	generation = qpair->generation;
+	qp_unlock(qpair);
+	wait_event(qpair->event, generation != qpair->generation);
+	qp_lock(qpair);
+
+	return true;
+}
+
+/*
+ * Enqueues a given buffer to the produce queue using the provided
+ * function. As many bytes as possible (space available in the queue)
+ * are enqueued.  Assumes the queue->mutex has been acquired.  Returns
+ * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue
+ * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the
+ * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if
+ * an error occured when accessing the buffer,
+ * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
+ * available.  Otherwise, the number of bytes written to the queue is
+ * returned.  Updates the tail pointer of the produce queue.
+ */
+static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q,
+				 struct vmci_queue *consume_q,
+				 const u64 produce_q_size,
+				 struct iov_iter *from)
+{
+	s64 free_space;
+	u64 tail;
+	size_t buf_size = iov_iter_count(from);
+	size_t written;
+	ssize_t result;
+
+	result = qp_map_queue_headers(produce_q, consume_q);
+	if (unlikely(result != VMCI_SUCCESS))
+		return result;
+
+	free_space = vmci_q_header_free_space(produce_q->q_header,
+					      consume_q->q_header,
+					      produce_q_size);
+	if (free_space == 0)
+		return VMCI_ERROR_QUEUEPAIR_NOSPACE;
+
+	if (free_space < VMCI_SUCCESS)
+		return (ssize_t) free_space;
+
+	written = (size_t) (free_space > buf_size ? buf_size : free_space);
+	tail = vmci_q_header_producer_tail(produce_q->q_header);
+	if (likely(tail + written < produce_q_size)) {
+		result = qp_memcpy_to_queue_iter(produce_q, tail, from, written);
+	} else {
+		/* Tail pointer wraps around. */
+
+		const size_t tmp = (size_t) (produce_q_size - tail);
+
+		result = qp_memcpy_to_queue_iter(produce_q, tail, from, tmp);
+		if (result >= VMCI_SUCCESS)
+			result = qp_memcpy_to_queue_iter(produce_q, 0, from,
+						 written - tmp);
+	}
+
+	if (result < VMCI_SUCCESS)
+		return result;
+
+	vmci_q_header_add_producer_tail(produce_q->q_header, written,
+					produce_q_size);
+	return written;
+}
+
+/*
+ * Dequeues data (if available) from the given consume queue. Writes data
+ * to the user provided buffer using the provided function.
+ * Assumes the queue->mutex has been acquired.
+ * Results:
+ * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
+ * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
+ * (as defined by the queue size).
+ * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
+ * Otherwise the number of bytes dequeued is returned.
+ * Side effects:
+ * Updates the head pointer of the consume queue.
+ */
+static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q,
+				 struct vmci_queue *consume_q,
+				 const u64 consume_q_size,
+				 struct iov_iter *to,
+				 bool update_consumer)
+{
+	size_t buf_size = iov_iter_count(to);
+	s64 buf_ready;
+	u64 head;
+	size_t read;
+	ssize_t result;
+
+	result = qp_map_queue_headers(produce_q, consume_q);
+	if (unlikely(result != VMCI_SUCCESS))
+		return result;
+
+	buf_ready = vmci_q_header_buf_ready(consume_q->q_header,
+					    produce_q->q_header,
+					    consume_q_size);
+	if (buf_ready == 0)
+		return VMCI_ERROR_QUEUEPAIR_NODATA;
+
+	if (buf_ready < VMCI_SUCCESS)
+		return (ssize_t) buf_ready;
+
+	read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready);
+	head = vmci_q_header_consumer_head(produce_q->q_header);
+	if (likely(head + read < consume_q_size)) {
+		result = qp_memcpy_from_queue_iter(to, consume_q, head, read);
+	} else {
+		/* Head pointer wraps around. */
+
+		const size_t tmp = (size_t) (consume_q_size - head);
+
+		result = qp_memcpy_from_queue_iter(to, consume_q, head, tmp);
+		if (result >= VMCI_SUCCESS)
+			result = qp_memcpy_from_queue_iter(to, consume_q, 0,
+						   read - tmp);
+
+	}
+
+	if (result < VMCI_SUCCESS)
+		return result;
+
+	if (update_consumer)
+		vmci_q_header_add_consumer_head(produce_q->q_header,
+						read, consume_q_size);
+
+	return read;
+}
+
+/*
+ * vmci_qpair_alloc() - Allocates a queue pair.
+ * @qpair:      Pointer for the new vmci_qp struct.
+ * @handle:     Handle to track the resource.
+ * @produce_qsize:      Desired size of the producer queue.
+ * @consume_qsize:      Desired size of the consumer queue.
+ * @peer:       ContextID of the peer.
+ * @flags:      VMCI flags.
+ * @priv_flags: VMCI priviledge flags.
+ *
+ * This is the client interface for allocating the memory for a
+ * vmci_qp structure and then attaching to the underlying
+ * queue.  If an error occurs allocating the memory for the
+ * vmci_qp structure no attempt is made to attach.  If an
+ * error occurs attaching, then the structure is freed.
+ */
+int vmci_qpair_alloc(struct vmci_qp **qpair,
+		     struct vmci_handle *handle,
+		     u64 produce_qsize,
+		     u64 consume_qsize,
+		     u32 peer,
+		     u32 flags,
+		     u32 priv_flags)
+{
+	struct vmci_qp *my_qpair;
+	int retval;
+	struct vmci_handle src = VMCI_INVALID_HANDLE;
+	struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID);
+	enum vmci_route route;
+	vmci_event_release_cb wakeup_cb;
+	void *client_data;
+
+	/*
+	 * Restrict the size of a queuepair.  The device already
+	 * enforces a limit on the total amount of memory that can be
+	 * allocated to queuepairs for a guest.  However, we try to
+	 * allocate this memory before we make the queuepair
+	 * allocation hypercall.  On Linux, we allocate each page
+	 * separately, which means rather than fail, the guest will
+	 * thrash while it tries to allocate, and will become
+	 * increasingly unresponsive to the point where it appears to
+	 * be hung.  So we place a limit on the size of an individual
+	 * queuepair here, and leave the device to enforce the
+	 * restriction on total queuepair memory.  (Note that this
+	 * doesn't prevent all cases; a user with only this much
+	 * physical memory could still get into trouble.)  The error
+	 * used by the device is NO_RESOURCES, so use that here too.
+	 */
+
+	if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) ||
+	    produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY)
+		return VMCI_ERROR_NO_RESOURCES;
+
+	retval = vmci_route(&src, &dst, false, &route);
+	if (retval < VMCI_SUCCESS)
+		route = vmci_guest_code_active() ?
+		    VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST;
+
+	if (flags & (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED)) {
+		pr_devel("NONBLOCK OR PINNED set");
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL);
+	if (!my_qpair)
+		return VMCI_ERROR_NO_MEM;
+
+	my_qpair->produce_q_size = produce_qsize;
+	my_qpair->consume_q_size = consume_qsize;
+	my_qpair->peer = peer;
+	my_qpair->flags = flags;
+	my_qpair->priv_flags = priv_flags;
+
+	wakeup_cb = NULL;
+	client_data = NULL;
+
+	if (VMCI_ROUTE_AS_HOST == route) {
+		my_qpair->guest_endpoint = false;
+		if (!(flags & VMCI_QPFLAG_LOCAL)) {
+			my_qpair->blocked = 0;
+			my_qpair->generation = 0;
+			init_waitqueue_head(&my_qpair->event);
+			wakeup_cb = qp_wakeup_cb;
+			client_data = (void *)my_qpair;
+		}
+	} else {
+		my_qpair->guest_endpoint = true;
+	}
+
+	retval = vmci_qp_alloc(handle,
+			       &my_qpair->produce_q,
+			       my_qpair->produce_q_size,
+			       &my_qpair->consume_q,
+			       my_qpair->consume_q_size,
+			       my_qpair->peer,
+			       my_qpair->flags,
+			       my_qpair->priv_flags,
+			       my_qpair->guest_endpoint,
+			       wakeup_cb, client_data);
+
+	if (retval < VMCI_SUCCESS) {
+		kfree(my_qpair);
+		return retval;
+	}
+
+	*qpair = my_qpair;
+	my_qpair->handle = *handle;
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_alloc);
+
+/*
+ * vmci_qpair_detach() - Detatches the client from a queue pair.
+ * @qpair:      Reference of a pointer to the qpair struct.
+ *
+ * This is the client interface for detaching from a VMCIQPair.
+ * Note that this routine will free the memory allocated for the
+ * vmci_qp structure too.
+ */
+int vmci_qpair_detach(struct vmci_qp **qpair)
+{
+	int result;
+	struct vmci_qp *old_qpair;
+
+	if (!qpair || !(*qpair))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	old_qpair = *qpair;
+	result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint);
+
+	/*
+	 * The guest can fail to detach for a number of reasons, and
+	 * if it does so, it will cleanup the entry (if there is one).
+	 * The host can fail too, but it won't cleanup the entry
+	 * immediately, it will do that later when the context is
+	 * freed.  Either way, we need to release the qpair struct
+	 * here; there isn't much the caller can do, and we don't want
+	 * to leak.
+	 */
+
+	memset(old_qpair, 0, sizeof(*old_qpair));
+	old_qpair->handle = VMCI_INVALID_HANDLE;
+	old_qpair->peer = VMCI_INVALID_ID;
+	kfree(old_qpair);
+	*qpair = NULL;
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_detach);
+
+/*
+ * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer.
+ * @qpair:      Pointer to the queue pair struct.
+ * @producer_tail:      Reference used for storing producer tail index.
+ * @consumer_head:      Reference used for storing the consumer head index.
+ *
+ * This is the client interface for getting the current indexes of the
+ * QPair from the point of the view of the caller as the producer.
+ */
+int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
+				   u64 *producer_tail,
+				   u64 *consumer_head)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	int result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		vmci_q_header_get_pointers(produce_q_header, consume_q_header,
+					   producer_tail, consumer_head);
+	qp_unlock(qpair);
+
+	if (result == VMCI_SUCCESS &&
+	    ((producer_tail && *producer_tail >= qpair->produce_q_size) ||
+	     (consumer_head && *consumer_head >= qpair->produce_q_size)))
+		return VMCI_ERROR_INVALID_SIZE;
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes);
+
+/*
+ * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the consumer.
+ * @qpair:      Pointer to the queue pair struct.
+ * @consumer_tail:      Reference used for storing consumer tail index.
+ * @producer_head:      Reference used for storing the producer head index.
+ *
+ * This is the client interface for getting the current indexes of the
+ * QPair from the point of the view of the caller as the consumer.
+ */
+int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair,
+				   u64 *consumer_tail,
+				   u64 *producer_head)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	int result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		vmci_q_header_get_pointers(consume_q_header, produce_q_header,
+					   consumer_tail, producer_head);
+	qp_unlock(qpair);
+
+	if (result == VMCI_SUCCESS &&
+	    ((consumer_tail && *consumer_tail >= qpair->consume_q_size) ||
+	     (producer_head && *producer_head >= qpair->consume_q_size)))
+		return VMCI_ERROR_INVALID_SIZE;
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes);
+
+/*
+ * vmci_qpair_produce_free_space() - Retrieves free space in producer queue.
+ * @qpair:      Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of free
+ * space in the QPair from the point of the view of the caller as
+ * the producer which is the common case.  Returns < 0 if err, else
+ * available bytes into which data can be enqueued if > 0.
+ */
+s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	s64 result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		result = vmci_q_header_free_space(produce_q_header,
+						  consume_q_header,
+						  qpair->produce_q_size);
+	else
+		result = 0;
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space);
+
+/*
+ * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue.
+ * @qpair:      Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of free
+ * space in the QPair from the point of the view of the caller as
+ * the consumer which is not the common case.  Returns < 0 if err, else
+ * available bytes into which data can be enqueued if > 0.
+ */
+s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	s64 result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		result = vmci_q_header_free_space(consume_q_header,
+						  produce_q_header,
+						  qpair->consume_q_size);
+	else
+		result = 0;
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space);
+
+/*
+ * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from
+ * producer queue.
+ * @qpair:      Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of
+ * enqueued data in the QPair from the point of the view of the
+ * caller as the producer which is not the common case.  Returns < 0 if err,
+ * else available bytes that may be read.
+ */
+s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	s64 result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		result = vmci_q_header_buf_ready(produce_q_header,
+						 consume_q_header,
+						 qpair->produce_q_size);
+	else
+		result = 0;
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready);
+
+/*
+ * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from
+ * consumer queue.
+ * @qpair:      Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of
+ * enqueued data in the QPair from the point of the view of the
+ * caller as the consumer which is the normal case.  Returns < 0 if err,
+ * else available bytes that may be read.
+ */
+s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	s64 result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		result = vmci_q_header_buf_ready(consume_q_header,
+						 produce_q_header,
+						 qpair->consume_q_size);
+	else
+		result = 0;
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready);
+
+/*
+ * vmci_qpair_enqueue() - Throw data on the queue.
+ * @qpair:      Pointer to the queue pair struct.
+ * @buf:        Pointer to buffer containing data
+ * @buf_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused).
+ *
+ * This is the client interface for enqueueing data into the queue.
+ * Returns number of bytes enqueued or < 0 on error.
+ */
+ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
+			   const void *buf,
+			   size_t buf_size,
+			   int buf_type)
+{
+	ssize_t result;
+	struct iov_iter from;
+	struct kvec v = {.iov_base = (void *)buf, .iov_len = buf_size};
+
+	if (!qpair || !buf)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	iov_iter_kvec(&from, WRITE | ITER_KVEC, &v, 1, buf_size);
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_enqueue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->produce_q_size,
+					   &from);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_enqueue);
+
+/*
+ * vmci_qpair_dequeue() - Get data from the queue.
+ * @qpair:      Pointer to the queue pair struct.
+ * @buf:        Pointer to buffer for the data
+ * @buf_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused).
+ *
+ * This is the client interface for dequeueing data from the queue.
+ * Returns number of bytes dequeued or < 0 on error.
+ */
+ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
+			   void *buf,
+			   size_t buf_size,
+			   int buf_type)
+{
+	ssize_t result;
+	struct iov_iter to;
+	struct kvec v = {.iov_base = buf, .iov_len = buf_size};
+
+	if (!qpair || !buf)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_dequeue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->consume_q_size,
+					   &to, true);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_dequeue);
+
+/*
+ * vmci_qpair_peek() - Peek at the data in the queue.
+ * @qpair:      Pointer to the queue pair struct.
+ * @buf:        Pointer to buffer for the data
+ * @buf_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused on Linux).
+ *
+ * This is the client interface for peeking into a queue.  (I.e.,
+ * copy data from the queue without updating the head pointer.)
+ * Returns number of bytes dequeued or < 0 on error.
+ */
+ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
+			void *buf,
+			size_t buf_size,
+			int buf_type)
+{
+	struct iov_iter to;
+	struct kvec v = {.iov_base = buf, .iov_len = buf_size};
+	ssize_t result;
+
+	if (!qpair || !buf)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_dequeue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->consume_q_size,
+					   &to, false);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_peek);
+
+/*
+ * vmci_qpair_enquev() - Throw data on the queue using iov.
+ * @qpair:      Pointer to the queue pair struct.
+ * @iov:        Pointer to buffer containing data
+ * @iov_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused).
+ *
+ * This is the client interface for enqueueing data into the queue.
+ * This function uses IO vectors to handle the work. Returns number
+ * of bytes enqueued or < 0 on error.
+ */
+ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
+			  struct msghdr *msg,
+			  size_t iov_size,
+			  int buf_type)
+{
+	ssize_t result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_enqueue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->produce_q_size,
+					   &msg->msg_iter);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
+
+/*
+ * vmci_qpair_dequev() - Get data from the queue using iov.
+ * @qpair:      Pointer to the queue pair struct.
+ * @iov:        Pointer to buffer for the data
+ * @iov_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused).
+ *
+ * This is the client interface for dequeueing data from the queue.
+ * This function uses IO vectors to handle the work. Returns number
+ * of bytes dequeued or < 0 on error.
+ */
+ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
+			  struct msghdr *msg,
+			  size_t iov_size,
+			  int buf_type)
+{
+	ssize_t result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_dequeue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->consume_q_size,
+					   &msg->msg_iter, true);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
+
+/*
+ * vmci_qpair_peekv() - Peek at the data in the queue using iov.
+ * @qpair:      Pointer to the queue pair struct.
+ * @iov:        Pointer to buffer for the data
+ * @iov_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused on Linux).
+ *
+ * This is the client interface for peeking into a queue.  (I.e.,
+ * copy data from the queue without updating the head pointer.)
+ * This function uses IO vectors to handle the work. Returns number
+ * of bytes peeked or < 0 on error.
+ */
+ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
+			 struct msghdr *msg,
+			 size_t iov_size,
+			 int buf_type)
+{
+	ssize_t result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_dequeue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->consume_q_size,
+					   &msg->msg_iter, false);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_peekv);
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.h b/drivers/misc/vmw_vmci/vmci_queue_pair.h
new file mode 100644
index 000000000..ed177f04e
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.h
@@ -0,0 +1,173 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_QUEUE_PAIR_H_
+#define _VMCI_QUEUE_PAIR_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/types.h>
+
+#include "vmci_context.h"
+
+/* Callback needed for correctly waiting on events. */
+typedef int (*vmci_event_release_cb) (void *client_data);
+
+/* Guest device port I/O. */
+struct ppn_set {
+	u64 num_produce_pages;
+	u64 num_consume_pages;
+	u32 *produce_ppns;
+	u32 *consume_ppns;
+	bool initialized;
+};
+
+/* VMCIqueue_pairAllocInfo */
+struct vmci_qp_alloc_info {
+	struct vmci_handle handle;
+	u32 peer;
+	u32 flags;
+	u64 produce_size;
+	u64 consume_size;
+	u64 ppn_va;	/* Start VA of queue pair PPNs. */
+	u64 num_ppns;
+	s32 result;
+	u32 version;
+};
+
+/* VMCIqueue_pairSetVAInfo */
+struct vmci_qp_set_va_info {
+	struct vmci_handle handle;
+	u64 va;		/* Start VA of queue pair PPNs. */
+	u64 num_ppns;
+	u32 version;
+	s32 result;
+};
+
+/*
+ * For backwards compatibility, here is a version of the
+ * VMCIqueue_pairPageFileInfo before host support end-points was added.
+ * Note that the current version of that structure requires VMX to
+ * pass down the VA of the mapped file.  Before host support was added
+ * there was nothing of the sort.  So, when the driver sees the ioctl
+ * with a parameter that is the sizeof
+ * VMCIqueue_pairPageFileInfo_NoHostQP then it can infer that the version
+ * of VMX running can't attach to host end points because it doesn't
+ * provide the VA of the mapped files.
+ *
+ * The Linux driver doesn't get an indication of the size of the
+ * structure passed down from user space.  So, to fix a long standing
+ * but unfiled bug, the _pad field has been renamed to version.
+ * Existing versions of VMX always initialize the PageFileInfo
+ * structure so that _pad, er, version is set to 0.
+ *
+ * A version value of 1 indicates that the size of the structure has
+ * been increased to include two UVA's: produce_uva and consume_uva.
+ * These UVA's are of the mmap()'d queue contents backing files.
+ *
+ * In addition, if when VMX is sending down the
+ * VMCIqueue_pairPageFileInfo structure it gets an error then it will
+ * try again with the _NoHostQP version of the file to see if an older
+ * VMCI kernel module is running.
+ */
+
+/* VMCIqueue_pairPageFileInfo */
+struct vmci_qp_page_file_info {
+	struct vmci_handle handle;
+	u64 produce_page_file;	  /* User VA. */
+	u64 consume_page_file;	  /* User VA. */
+	u64 produce_page_file_size;  /* Size of the file name array. */
+	u64 consume_page_file_size;  /* Size of the file name array. */
+	s32 result;
+	u32 version;	/* Was _pad. */
+	u64 produce_va;	/* User VA of the mapped file. */
+	u64 consume_va;	/* User VA of the mapped file. */
+};
+
+/* vmci queuepair detach info */
+struct vmci_qp_dtch_info {
+	struct vmci_handle handle;
+	s32 result;
+	u32 _pad;
+};
+
+/*
+ * struct vmci_qp_page_store describes how the memory of a given queue pair
+ * is backed. When the queue pair is between the host and a guest, the
+ * page store consists of references to the guest pages. On vmkernel,
+ * this is a list of PPNs, and on hosted, it is a user VA where the
+ * queue pair is mapped into the VMX address space.
+ */
+struct vmci_qp_page_store {
+	/* Reference to pages backing the queue pair. */
+	u64 pages;
+	/* Length of pageList/virtual addres range (in pages). */
+	u32 len;
+};
+
+/*
+ * This data type contains the information about a queue.
+ * There are two queues (hence, queue pairs) per transaction model between a
+ * pair of end points, A & B.  One queue is used by end point A to transmit
+ * commands and responses to B.  The other queue is used by B to transmit
+ * commands and responses.
+ *
+ * struct vmci_queue_kern_if is a per-OS defined Queue structure.  It contains
+ * either a direct pointer to the linear address of the buffer contents or a
+ * pointer to structures which help the OS locate those data pages.  See
+ * vmciKernelIf.c for each platform for its definition.
+ */
+struct vmci_queue {
+	struct vmci_queue_header *q_header;
+	struct vmci_queue_header *saved_header;
+	struct vmci_queue_kern_if *kernel_if;
+};
+
+/*
+ * Utility function that checks whether the fields of the page
+ * store contain valid values.
+ * Result:
+ * true if the page store is wellformed. false otherwise.
+ */
+static inline bool
+VMCI_QP_PAGESTORE_IS_WELLFORMED(struct vmci_qp_page_store *page_store)
+{
+	return page_store->len >= 2;
+}
+
+void vmci_qp_broker_exit(void);
+int vmci_qp_broker_alloc(struct vmci_handle handle, u32 peer,
+			 u32 flags, u32 priv_flags,
+			 u64 produce_size, u64 consume_size,
+			 struct vmci_qp_page_store *page_store,
+			 struct vmci_ctx *context);
+int vmci_qp_broker_set_page_store(struct vmci_handle handle,
+				  u64 produce_uva, u64 consume_uva,
+				  struct vmci_ctx *context);
+int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context);
+
+void vmci_qp_guest_endpoints_exit(void);
+
+int vmci_qp_alloc(struct vmci_handle *handle,
+		  struct vmci_queue **produce_q, u64 produce_size,
+		  struct vmci_queue **consume_q, u64 consume_size,
+		  u32 peer, u32 flags, u32 priv_flags,
+		  bool guest_endpoint, vmci_event_release_cb wakeup_cb,
+		  void *client_data);
+int vmci_qp_broker_map(struct vmci_handle handle,
+		       struct vmci_ctx *context, u64 guest_mem);
+int vmci_qp_broker_unmap(struct vmci_handle handle,
+			 struct vmci_ctx *context, u32 gid);
+
+#endif /* _VMCI_QUEUE_PAIR_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c
new file mode 100644
index 000000000..da1ee2e1b
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_resource.c
@@ -0,0 +1,229 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/rculist.h>
+#include <linux/completion.h>
+
+#include "vmci_resource.h"
+#include "vmci_driver.h"
+
+
+#define VMCI_RESOURCE_HASH_BITS         7
+#define VMCI_RESOURCE_HASH_BUCKETS      (1 << VMCI_RESOURCE_HASH_BITS)
+
+struct vmci_hash_table {
+	spinlock_t lock;
+	struct hlist_head entries[VMCI_RESOURCE_HASH_BUCKETS];
+};
+
+static struct vmci_hash_table vmci_resource_table = {
+	.lock = __SPIN_LOCK_UNLOCKED(vmci_resource_table.lock),
+};
+
+static unsigned int vmci_resource_hash(struct vmci_handle handle)
+{
+	return hash_32(handle.resource, VMCI_RESOURCE_HASH_BITS);
+}
+
+/*
+ * Gets a resource (if one exists) matching given handle from the hash table.
+ */
+static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle,
+						  enum vmci_resource_type type)
+{
+	struct vmci_resource *r, *resource = NULL;
+	unsigned int idx = vmci_resource_hash(handle);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(r,
+				 &vmci_resource_table.entries[idx], node) {
+		u32 cid = r->handle.context;
+		u32 rid = r->handle.resource;
+
+		if (r->type == type &&
+		    rid == handle.resource &&
+		    (cid == handle.context || cid == VMCI_INVALID_ID ||
+		     handle.context == VMCI_INVALID_ID)) {
+			resource = r;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return resource;
+}
+
+/*
+ * Find an unused resource ID and return it. The first
+ * VMCI_RESERVED_RESOURCE_ID_MAX are reserved so we start from
+ * its value + 1.
+ * Returns VMCI resource id on success, VMCI_INVALID_ID on failure.
+ */
+static u32 vmci_resource_find_id(u32 context_id,
+				 enum vmci_resource_type resource_type)
+{
+	static u32 resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
+	u32 old_rid = resource_id;
+	u32 current_rid;
+
+	/*
+	 * Generate a unique resource ID.  Keep on trying until we wrap around
+	 * in the RID space.
+	 */
+	do {
+		struct vmci_handle handle;
+
+		current_rid = resource_id;
+		resource_id++;
+		if (unlikely(resource_id == VMCI_INVALID_ID)) {
+			/* Skip the reserved rids. */
+			resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
+		}
+
+		handle = vmci_make_handle(context_id, current_rid);
+		if (!vmci_resource_lookup(handle, resource_type))
+			return current_rid;
+	} while (resource_id != old_rid);
+
+	return VMCI_INVALID_ID;
+}
+
+
+int vmci_resource_add(struct vmci_resource *resource,
+		      enum vmci_resource_type resource_type,
+		      struct vmci_handle handle)
+
+{
+	unsigned int idx;
+	int result;
+
+	spin_lock(&vmci_resource_table.lock);
+
+	if (handle.resource == VMCI_INVALID_ID) {
+		handle.resource = vmci_resource_find_id(handle.context,
+			resource_type);
+		if (handle.resource == VMCI_INVALID_ID) {
+			result = VMCI_ERROR_NO_HANDLE;
+			goto out;
+		}
+	} else if (vmci_resource_lookup(handle, resource_type)) {
+		result = VMCI_ERROR_ALREADY_EXISTS;
+		goto out;
+	}
+
+	resource->handle = handle;
+	resource->type = resource_type;
+	INIT_HLIST_NODE(&resource->node);
+	kref_init(&resource->kref);
+	init_completion(&resource->done);
+
+	idx = vmci_resource_hash(resource->handle);
+	hlist_add_head_rcu(&resource->node, &vmci_resource_table.entries[idx]);
+
+	result = VMCI_SUCCESS;
+
+out:
+	spin_unlock(&vmci_resource_table.lock);
+	return result;
+}
+
+void vmci_resource_remove(struct vmci_resource *resource)
+{
+	struct vmci_handle handle = resource->handle;
+	unsigned int idx = vmci_resource_hash(handle);
+	struct vmci_resource *r;
+
+	/* Remove resource from hash table. */
+	spin_lock(&vmci_resource_table.lock);
+
+	hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) {
+		if (vmci_handle_is_equal(r->handle, resource->handle)) {
+			hlist_del_init_rcu(&r->node);
+			break;
+		}
+	}
+
+	spin_unlock(&vmci_resource_table.lock);
+	synchronize_rcu();
+
+	vmci_resource_put(resource);
+	wait_for_completion(&resource->done);
+}
+
+struct vmci_resource *
+vmci_resource_by_handle(struct vmci_handle resource_handle,
+			enum vmci_resource_type resource_type)
+{
+	struct vmci_resource *r, *resource = NULL;
+
+	rcu_read_lock();
+
+	r = vmci_resource_lookup(resource_handle, resource_type);
+	if (r &&
+	    (resource_type == r->type ||
+	     resource_type == VMCI_RESOURCE_TYPE_ANY)) {
+		resource = vmci_resource_get(r);
+	}
+
+	rcu_read_unlock();
+
+	return resource;
+}
+
+/*
+ * Get a reference to given resource.
+ */
+struct vmci_resource *vmci_resource_get(struct vmci_resource *resource)
+{
+	kref_get(&resource->kref);
+
+	return resource;
+}
+
+static void vmci_release_resource(struct kref *kref)
+{
+	struct vmci_resource *resource =
+		container_of(kref, struct vmci_resource, kref);
+
+	/* Verify the resource has been unlinked from hash table */
+	WARN_ON(!hlist_unhashed(&resource->node));
+
+	/* Signal that container of this resource can now be destroyed */
+	complete(&resource->done);
+}
+
+/*
+ * Resource's release function will get called if last reference.
+ * If it is the last reference, then we are sure that nobody else
+ * can increment the count again (it's gone from the resource hash
+ * table), so there's no need for locking here.
+ */
+int vmci_resource_put(struct vmci_resource *resource)
+{
+	/*
+	 * We propagate the information back to caller in case it wants to know
+	 * whether entry was freed.
+	 */
+	return kref_put(&resource->kref, vmci_release_resource) ?
+		VMCI_SUCCESS_ENTRY_DEAD : VMCI_SUCCESS;
+}
+
+struct vmci_handle vmci_resource_handle(struct vmci_resource *resource)
+{
+	return resource->handle;
+}
diff --git a/drivers/misc/vmw_vmci/vmci_resource.h b/drivers/misc/vmw_vmci/vmci_resource.h
new file mode 100644
index 000000000..9190cd298
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_resource.h
@@ -0,0 +1,59 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_RESOURCE_H_
+#define _VMCI_RESOURCE_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/types.h>
+
+#include "vmci_context.h"
+
+
+enum vmci_resource_type {
+	VMCI_RESOURCE_TYPE_ANY,
+	VMCI_RESOURCE_TYPE_API,
+	VMCI_RESOURCE_TYPE_GROUP,
+	VMCI_RESOURCE_TYPE_DATAGRAM,
+	VMCI_RESOURCE_TYPE_DOORBELL,
+	VMCI_RESOURCE_TYPE_QPAIR_GUEST,
+	VMCI_RESOURCE_TYPE_QPAIR_HOST
+};
+
+struct vmci_resource {
+	struct vmci_handle handle;
+	enum vmci_resource_type type;
+	struct hlist_node node;
+	struct kref kref;
+	struct completion done;
+};
+
+
+int vmci_resource_add(struct vmci_resource *resource,
+		      enum vmci_resource_type resource_type,
+		      struct vmci_handle handle);
+
+void vmci_resource_remove(struct vmci_resource *resource);
+
+struct vmci_resource *
+vmci_resource_by_handle(struct vmci_handle resource_handle,
+			enum vmci_resource_type resource_type);
+
+struct vmci_resource *vmci_resource_get(struct vmci_resource *resource);
+int vmci_resource_put(struct vmci_resource *resource);
+
+struct vmci_handle vmci_resource_handle(struct vmci_resource *resource);
+
+#endif /* _VMCI_RESOURCE_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_route.c b/drivers/misc/vmw_vmci/vmci_route.c
new file mode 100644
index 000000000..91090658b
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_route.c
@@ -0,0 +1,226 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_route.h"
+
+/*
+ * Make a routing decision for the given source and destination handles.
+ * This will try to determine the route using the handles and the available
+ * devices.  Will set the source context if it is invalid.
+ */
+int vmci_route(struct vmci_handle *src,
+	       const struct vmci_handle *dst,
+	       bool from_guest,
+	       enum vmci_route *route)
+{
+	bool has_host_device = vmci_host_code_active();
+	bool has_guest_device = vmci_guest_code_active();
+
+	*route = VMCI_ROUTE_NONE;
+
+	/*
+	 * "from_guest" is only ever set to true by
+	 * IOCTL_VMCI_DATAGRAM_SEND (or by the vmkernel equivalent),
+	 * which comes from the VMX, so we know it is coming from a
+	 * guest.
+	 *
+	 * To avoid inconsistencies, test these once.  We will test
+	 * them again when we do the actual send to ensure that we do
+	 * not touch a non-existent device.
+	 */
+
+	/* Must have a valid destination context. */
+	if (VMCI_INVALID_ID == dst->context)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/* Anywhere to hypervisor. */
+	if (VMCI_HYPERVISOR_CONTEXT_ID == dst->context) {
+
+		/*
+		 * If this message already came from a guest then we
+		 * cannot send it to the hypervisor.  It must come
+		 * from a local client.
+		 */
+		if (from_guest)
+			return VMCI_ERROR_DST_UNREACHABLE;
+
+		/*
+		 * We must be acting as a guest in order to send to
+		 * the hypervisor.
+		 */
+		if (!has_guest_device)
+			return VMCI_ERROR_DEVICE_NOT_FOUND;
+
+		/* And we cannot send if the source is the host context. */
+		if (VMCI_HOST_CONTEXT_ID == src->context)
+			return VMCI_ERROR_INVALID_ARGS;
+
+		/*
+		 * If the client passed the ANON source handle then
+		 * respect it (both context and resource are invalid).
+		 * However, if they passed only an invalid context,
+		 * then they probably mean ANY, in which case we
+		 * should set the real context here before passing it
+		 * down.
+		 */
+		if (VMCI_INVALID_ID == src->context &&
+		    VMCI_INVALID_ID != src->resource)
+			src->context = vmci_get_context_id();
+
+		/* Send from local client down to the hypervisor. */
+		*route = VMCI_ROUTE_AS_GUEST;
+		return VMCI_SUCCESS;
+	}
+
+	/* Anywhere to local client on host. */
+	if (VMCI_HOST_CONTEXT_ID == dst->context) {
+		/*
+		 * If it is not from a guest but we are acting as a
+		 * guest, then we need to send it down to the host.
+		 * Note that if we are also acting as a host then this
+		 * will prevent us from sending from local client to
+		 * local client, but we accept that restriction as a
+		 * way to remove any ambiguity from the host context.
+		 */
+		if (src->context == VMCI_HYPERVISOR_CONTEXT_ID) {
+			/*
+			 * If the hypervisor is the source, this is
+			 * host local communication. The hypervisor
+			 * may send vmci event datagrams to the host
+			 * itself, but it will never send datagrams to
+			 * an "outer host" through the guest device.
+			 */
+
+			if (has_host_device) {
+				*route = VMCI_ROUTE_AS_HOST;
+				return VMCI_SUCCESS;
+			} else {
+				return VMCI_ERROR_DEVICE_NOT_FOUND;
+			}
+		}
+
+		if (!from_guest && has_guest_device) {
+			/* If no source context then use the current. */
+			if (VMCI_INVALID_ID == src->context)
+				src->context = vmci_get_context_id();
+
+			/* Send it from local client down to the host. */
+			*route = VMCI_ROUTE_AS_GUEST;
+			return VMCI_SUCCESS;
+		}
+
+		/*
+		 * Otherwise we already received it from a guest and
+		 * it is destined for a local client on this host, or
+		 * it is from another local client on this host.  We
+		 * must be acting as a host to service it.
+		 */
+		if (!has_host_device)
+			return VMCI_ERROR_DEVICE_NOT_FOUND;
+
+		if (VMCI_INVALID_ID == src->context) {
+			/*
+			 * If it came from a guest then it must have a
+			 * valid context.  Otherwise we can use the
+			 * host context.
+			 */
+			if (from_guest)
+				return VMCI_ERROR_INVALID_ARGS;
+
+			src->context = VMCI_HOST_CONTEXT_ID;
+		}
+
+		/* Route to local client. */
+		*route = VMCI_ROUTE_AS_HOST;
+		return VMCI_SUCCESS;
+	}
+
+	/*
+	 * If we are acting as a host then this might be destined for
+	 * a guest.
+	 */
+	if (has_host_device) {
+		/* It will have a context if it is meant for a guest. */
+		if (vmci_ctx_exists(dst->context)) {
+			if (VMCI_INVALID_ID == src->context) {
+				/*
+				 * If it came from a guest then it
+				 * must have a valid context.
+				 * Otherwise we can use the host
+				 * context.
+				 */
+
+				if (from_guest)
+					return VMCI_ERROR_INVALID_ARGS;
+
+				src->context = VMCI_HOST_CONTEXT_ID;
+			} else if (VMCI_CONTEXT_IS_VM(src->context) &&
+				   src->context != dst->context) {
+				/*
+				 * VM to VM communication is not
+				 * allowed. Since we catch all
+				 * communication destined for the host
+				 * above, this must be destined for a
+				 * VM since there is a valid context.
+				 */
+
+				return VMCI_ERROR_DST_UNREACHABLE;
+			}
+
+			/* Pass it up to the guest. */
+			*route = VMCI_ROUTE_AS_HOST;
+			return VMCI_SUCCESS;
+		} else if (!has_guest_device) {
+			/*
+			 * The host is attempting to reach a CID
+			 * without an active context, and we can't
+			 * send it down, since we have no guest
+			 * device.
+			 */
+
+			return VMCI_ERROR_DST_UNREACHABLE;
+		}
+	}
+
+	/*
+	 * We must be a guest trying to send to another guest, which means
+	 * we need to send it down to the host. We do not filter out VM to
+	 * VM communication here, since we want to be able to use the guest
+	 * driver on older versions that do support VM to VM communication.
+	 */
+	if (!has_guest_device) {
+		/*
+		 * Ending up here means we have neither guest nor host
+		 * device.
+		 */
+		return VMCI_ERROR_DEVICE_NOT_FOUND;
+	}
+
+	/* If no source context then use the current context. */
+	if (VMCI_INVALID_ID == src->context)
+		src->context = vmci_get_context_id();
+
+	/*
+	 * Send it from local client down to the host, which will
+	 * route it to the other guest for us.
+	 */
+	*route = VMCI_ROUTE_AS_GUEST;
+	return VMCI_SUCCESS;
+}
diff --git a/drivers/misc/vmw_vmci/vmci_route.h b/drivers/misc/vmw_vmci/vmci_route.h
new file mode 100644
index 000000000..3b30e8241
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_route.h
@@ -0,0 +1,30 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMCI_ROUTE_H_
+#define _VMCI_ROUTE_H_
+
+#include <linux/vmw_vmci_defs.h>
+
+enum vmci_route {
+	VMCI_ROUTE_NONE,
+	VMCI_ROUTE_AS_HOST,
+	VMCI_ROUTE_AS_GUEST,
+};
+
+int vmci_route(struct vmci_handle *src, const struct vmci_handle *dst,
+	       bool from_guest, enum vmci_route *route);
+
+#endif /* _VMCI_ROUTE_H_ */
-- 
cgit v1.2.3