From ace9429bb58fd418f0c81d4c2835699bddf6bde6 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Thu, 11 Apr 2024 10:27:49 +0200
Subject: Adding upstream version 6.6.15.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 drivers/misc/Kconfig                              |  582 ++++
 drivers/misc/Makefile                             |   69 +
 drivers/misc/ad525x_dpot-i2c.c                    |  118 +
 drivers/misc/ad525x_dpot-spi.c                    |  145 +
 drivers/misc/ad525x_dpot.c                        |  763 +++++
 drivers/misc/ad525x_dpot.h                        |  214 ++
 drivers/misc/altera-stapl/Kconfig                 |    9 +
 drivers/misc/altera-stapl/Makefile                |    5 +
 drivers/misc/altera-stapl/altera-comp.c           |  128 +
 drivers/misc/altera-stapl/altera-exprt.h          |   19 +
 drivers/misc/altera-stapl/altera-jtag.c           | 1007 +++++++
 drivers/misc/altera-stapl/altera-jtag.h           |   99 +
 drivers/misc/altera-stapl/altera-lpt.c            |   56 +
 drivers/misc/altera-stapl/altera.c                | 2497 ++++++++++++++++
 drivers/misc/apds9802als.c                        |  308 ++
 drivers/misc/apds990x.c                           | 1283 ++++++++
 drivers/misc/atmel-ssc.c                          |  281 ++
 drivers/misc/bcm-vk/Kconfig                       |   29 +
 drivers/misc/bcm-vk/Makefile                      |   12 +
 drivers/misc/bcm-vk/bcm_vk.h                      |  549 ++++
 drivers/misc/bcm-vk/bcm_vk_dev.c                  | 1652 +++++++++++
 drivers/misc/bcm-vk/bcm_vk_msg.c                  | 1352 +++++++++
 drivers/misc/bcm-vk/bcm_vk_msg.h                  |  163 +
 drivers/misc/bcm-vk/bcm_vk_sg.c                   |  275 ++
 drivers/misc/bcm-vk/bcm_vk_sg.h                   |   61 +
 drivers/misc/bcm-vk/bcm_vk_tty.c                  |  338 +++
 drivers/misc/bh1770glc.c                          | 1390 +++++++++
 drivers/misc/c2port/Kconfig                       |   33 +
 drivers/misc/c2port/Makefile                      |    4 +
 drivers/misc/c2port/c2port-duramar2150.c          |  156 +
 drivers/misc/c2port/core.c                        | 1000 +++++++
 drivers/misc/cardreader/Kconfig                   |   32 +
 drivers/misc/cardreader/Makefile                  |    5 +
 drivers/misc/cardreader/alcor_pci.c               |  229 ++
 drivers/misc/cardreader/rtl8411.c                 |  499 ++++
 drivers/misc/cardreader/rts5209.c                 |  267 ++
 drivers/misc/cardreader/rts5227.c                 |  512 ++++
 drivers/misc/cardreader/rts5228.c                 |  723 +++++
 drivers/misc/cardreader/rts5228.h                 |  168 ++
 drivers/misc/cardreader/rts5229.c                 |  263 ++
 drivers/misc/cardreader/rts5249.c                 |  848 ++++++
 drivers/misc/cardreader/rts5260.c                 |  629 ++++
 drivers/misc/cardreader/rts5260.h                 |   45 +
 drivers/misc/cardreader/rts5261.c                 |  806 +++++
 drivers/misc/cardreader/rts5261.h                 |  206 ++
 drivers/misc/cardreader/rtsx_pcr.c                | 1860 ++++++++++++
 drivers/misc/cardreader/rtsx_pcr.h                |  132 +
 drivers/misc/cardreader/rtsx_usb.c                |  800 +++++
 drivers/misc/cb710/Kconfig                        |   25 +
 drivers/misc/cb710/Makefile                       |    7 +
 drivers/misc/cb710/core.c                         |  329 +++
 drivers/misc/cb710/debug.c                        |  115 +
 drivers/misc/cb710/sgbuf2.c                       |  144 +
 drivers/misc/cs5535-mfgpt.c                       |  380 +++
 drivers/misc/cxl/Kconfig                          |   26 +
 drivers/misc/cxl/Makefile                         |   14 +
 drivers/misc/cxl/api.c                            |  532 ++++
 drivers/misc/cxl/base.c                           |  126 +
 drivers/misc/cxl/context.c                        |  362 +++
 drivers/misc/cxl/cxl.h                            | 1134 +++++++
 drivers/misc/cxl/cxllib.c                         |  271 ++
 drivers/misc/cxl/debugfs.c                        |  134 +
 drivers/misc/cxl/fault.c                          |  341 +++
 drivers/misc/cxl/file.c                           |  700 +++++
 drivers/misc/cxl/flash.c                          |  538 ++++
 drivers/misc/cxl/guest.c                          | 1208 ++++++++
 drivers/misc/cxl/hcalls.c                         |  643 ++++
 drivers/misc/cxl/hcalls.h                         |  200 ++
 drivers/misc/cxl/irq.c                            |  450 +++
 drivers/misc/cxl/main.c                           |  383 +++
 drivers/misc/cxl/native.c                         | 1592 ++++++++++
 drivers/misc/cxl/of.c                             |  506 ++++
 drivers/misc/cxl/pci.c                            | 2107 +++++++++++++
 drivers/misc/cxl/sysfs.c                          |  771 +++++
 drivers/misc/cxl/trace.c                          |    9 +
 drivers/misc/cxl/trace.h                          |  691 +++++
 drivers/misc/cxl/vphb.c                           |  309 ++
 drivers/misc/ds1682.c                             |  262 ++
 drivers/misc/dummy-irq.c                          |   60 +
 drivers/misc/dw-xdata-pcie.c                      |  420 +++
 drivers/misc/echo/Kconfig                         |    9 +
 drivers/misc/echo/Makefile                        |    2 +
 drivers/misc/echo/echo.c                          |  589 ++++
 drivers/misc/echo/echo.h                          |  175 ++
 drivers/misc/echo/fir.h                           |  154 +
 drivers/misc/echo/oslec.h                         |   81 +
 drivers/misc/eeprom/Kconfig                       |  134 +
 drivers/misc/eeprom/Makefile                      |   10 +
 drivers/misc/eeprom/at24.c                        |  862 ++++++
 drivers/misc/eeprom/at25.c                        |  532 ++++
 drivers/misc/eeprom/digsy_mtc_eeprom.c            |  103 +
 drivers/misc/eeprom/ee1004.c                      |  245 ++
 drivers/misc/eeprom/eeprom.c                      |  214 ++
 drivers/misc/eeprom/eeprom_93cx6.c                |  372 +++
 drivers/misc/eeprom/eeprom_93xx46.c               |  583 ++++
 drivers/misc/eeprom/idt_89hpesx.c                 | 1592 ++++++++++
 drivers/misc/eeprom/max6875.c                     |  204 ++
 drivers/misc/enclosure.c                          |  683 +++++
 drivers/misc/fastrpc.c                            | 2482 ++++++++++++++++
 drivers/misc/gehc-achc.c                          |  566 ++++
 drivers/misc/genwqe/Kconfig                       |   19 +
 drivers/misc/genwqe/Makefile                      |    8 +
 drivers/misc/genwqe/card_base.c                   | 1402 +++++++++
 drivers/misc/genwqe/card_base.h                   |  577 ++++
 drivers/misc/genwqe/card_ddcb.c                   | 1411 +++++++++
 drivers/misc/genwqe/card_ddcb.h                   |  179 ++
 drivers/misc/genwqe/card_debugfs.c                |  378 +++
 drivers/misc/genwqe/card_dev.c                    | 1391 +++++++++
 drivers/misc/genwqe/card_sysfs.c                  |  295 ++
 drivers/misc/genwqe/card_utils.c                  | 1049 +++++++
 drivers/misc/genwqe/genwqe_driver.h               |   69 +
 drivers/misc/hi6421v600-irq.c                     |  304 ++
 drivers/misc/hisi_hikey_usb.c                     |  277 ++
 drivers/misc/hmc6352.c                            |  143 +
 drivers/misc/hpilo.c                              |  927 ++++++
 drivers/misc/hpilo.h                              |  214 ++
 drivers/misc/ibmasm/Makefile                      |   16 +
 drivers/misc/ibmasm/command.c                     |  173 ++
 drivers/misc/ibmasm/dot_command.c                 |  138 +
 drivers/misc/ibmasm/dot_command.h                 |   64 +
 drivers/misc/ibmasm/event.c                       |  163 +
 drivers/misc/ibmasm/heartbeat.c                   |   88 +
 drivers/misc/ibmasm/i2o.h                         |   63 +
 drivers/misc/ibmasm/ibmasm.h                      |  209 ++
 drivers/misc/ibmasm/ibmasmfs.c                    |  603 ++++
 drivers/misc/ibmasm/lowlevel.c                    |   71 +
 drivers/misc/ibmasm/lowlevel.h                    |  123 +
 drivers/misc/ibmasm/module.c                      |  224 ++
 drivers/misc/ibmasm/r_heartbeat.c                 |   86 +
 drivers/misc/ibmasm/remote.c                      |  268 ++
 drivers/misc/ibmasm/remote.h                      |  257 ++
 drivers/misc/ibmasm/uart.c                        |   58 +
 drivers/misc/ibmvmc.c                             | 2421 +++++++++++++++
 drivers/misc/ibmvmc.h                             |  209 ++
 drivers/misc/ics932s401.c                         |  481 +++
 drivers/misc/isl29003.c                           |  472 +++
 drivers/misc/isl29020.c                           |  226 ++
 drivers/misc/kgdbts.c                             | 1191 ++++++++
 drivers/misc/lattice-ecp3-config.c                |  242 ++
 drivers/misc/lis3lv02d/Kconfig                    |   36 +
 drivers/misc/lis3lv02d/Makefile                   |    8 +
 drivers/misc/lis3lv02d/lis3lv02d.c                | 1266 ++++++++
 drivers/misc/lis3lv02d/lis3lv02d.h                |  318 ++
 drivers/misc/lis3lv02d/lis3lv02d_i2c.c            |  274 ++
 drivers/misc/lis3lv02d/lis3lv02d_spi.c            |  150 +
 drivers/misc/lkdtm/Makefile                       |   29 +
 drivers/misc/lkdtm/bugs.c                         |  672 +++++
 drivers/misc/lkdtm/cfi.c                          |  193 ++
 drivers/misc/lkdtm/core.c                         |  488 +++
 drivers/misc/lkdtm/fortify.c                      |  217 ++
 drivers/misc/lkdtm/heap.c                         |  342 +++
 drivers/misc/lkdtm/lkdtm.h                        |  100 +
 drivers/misc/lkdtm/perms.c                        |  293 ++
 drivers/misc/lkdtm/powerpc.c                      |  129 +
 drivers/misc/lkdtm/refcount.c                     |  419 +++
 drivers/misc/lkdtm/rodata.c                       |   11 +
 drivers/misc/lkdtm/stackleak.c                    |  150 +
 drivers/misc/lkdtm/usercopy.c                     |  457 +++
 drivers/misc/mchp_pci1xxxx/Kconfig                |   14 +
 drivers/misc/mchp_pci1xxxx/Makefile               |    1 +
 drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c     |  165 ++
 drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.h     |   28 +
 drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c   |  433 +++
 drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c |  443 +++
 drivers/misc/mei/Kconfig                          |   65 +
 drivers/misc/mei/Makefile                         |   33 +
 drivers/misc/mei/bus-fixup.c                      |  588 ++++
 drivers/misc/mei/bus.c                            | 1556 ++++++++++
 drivers/misc/mei/client.c                         | 2474 ++++++++++++++++
 drivers/misc/mei/client.h                         |  288 ++
 drivers/misc/mei/debugfs.c                        |  195 ++
 drivers/misc/mei/dma-ring.c                       |  269 ++
 drivers/misc/mei/gsc-me.c                         |  316 ++
 drivers/misc/mei/gsc_proxy/Kconfig                |   14 +
 drivers/misc/mei/gsc_proxy/Makefile               |    7 +
 drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c        |  208 ++
 drivers/misc/mei/hbm.c                            | 1619 ++++++++++
 drivers/misc/mei/hbm.h                            |   58 +
 drivers/misc/mei/hdcp/Kconfig                     |   12 +
 drivers/misc/mei/hdcp/Makefile                    |    7 +
 drivers/misc/mei/hdcp/mei_hdcp.c                  |  879 ++++++
 drivers/misc/mei/hdcp/mei_hdcp.h                  |   14 +
 drivers/misc/mei/hw-me-regs.h                     |  214 ++
 drivers/misc/mei/hw-me.c                          | 1770 +++++++++++
 drivers/misc/mei/hw-me.h                          |  144 +
 drivers/misc/mei/hw-txe-regs.h                    |  239 ++
 drivers/misc/mei/hw-txe.c                         | 1256 ++++++++
 drivers/misc/mei/hw-txe.h                         |   65 +
 drivers/misc/mei/hw.h                             |  836 ++++++
 drivers/misc/mei/init.c                           |  424 +++
 drivers/misc/mei/interrupt.c                      |  688 +++++
 drivers/misc/mei/main.c                           | 1318 +++++++++
 drivers/misc/mei/mei-trace.c                      |   16 +
 drivers/misc/mei/mei-trace.h                      |   83 +
 drivers/misc/mei/mei_dev.h                        |  878 ++++++
 drivers/misc/mei/mkhi.h                           |   55 +
 drivers/misc/mei/pci-me.c                         |  551 ++++
 drivers/misc/mei/pci-txe.c                        |  406 +++
 drivers/misc/mei/pxp/Kconfig                      |   13 +
 drivers/misc/mei/pxp/Makefile                     |    7 +
 drivers/misc/mei/pxp/mei_pxp.c                    |  260 ++
 drivers/misc/mei/pxp/mei_pxp.h                    |   18 +
 drivers/misc/ocxl/Kconfig                         |   31 +
 drivers/misc/ocxl/Makefile                        |   12 +
 drivers/misc/ocxl/afu_irq.c                       |  212 ++
 drivers/misc/ocxl/config.c                        |  937 ++++++
 drivers/misc/ocxl/context.c                       |  296 ++
 drivers/misc/ocxl/core.c                          |  569 ++++
 drivers/misc/ocxl/file.c                          |  620 ++++
 drivers/misc/ocxl/link.c                          |  779 +++++
 drivers/misc/ocxl/main.c                          |   37 +
 drivers/misc/ocxl/mmio.c                          |  234 ++
 drivers/misc/ocxl/ocxl_internal.h                 |  154 +
 drivers/misc/ocxl/pasid.c                         |  107 +
 drivers/misc/ocxl/pci.c                           |   66 +
 drivers/misc/ocxl/sysfs.c                         |  186 ++
 drivers/misc/ocxl/trace.c                         |    6 +
 drivers/misc/ocxl/trace.h                         |  242 ++
 drivers/misc/open-dice.c                          |  206 ++
 drivers/misc/pch_phub.c                           |  878 ++++++
 drivers/misc/pci_endpoint_test.c                  | 1025 +++++++
 drivers/misc/phantom.c                            |  563 ++++
 drivers/misc/pvpanic/Kconfig                      |   27 +
 drivers/misc/pvpanic/Makefile                     |    8 +
 drivers/misc/pvpanic/pvpanic-mmio.c               |  130 +
 drivers/misc/pvpanic/pvpanic-pci.c                |  109 +
 drivers/misc/pvpanic/pvpanic.c                    |  115 +
 drivers/misc/pvpanic/pvpanic.h                    |   20 +
 drivers/misc/qcom-coincell.c                      |  144 +
 drivers/misc/sgi-gru/Makefile                     |    6 +
 drivers/misc/sgi-gru/gru.h                        |   78 +
 drivers/misc/sgi-gru/gru_instructions.h           |  736 +++++
 drivers/misc/sgi-gru/grufault.c                   |  903 ++++++
 drivers/misc/sgi-gru/grufile.c                    |  612 ++++
 drivers/misc/sgi-gru/gruhandles.c                 |  198 ++
 drivers/misc/sgi-gru/gruhandles.h                 |  517 ++++
 drivers/misc/sgi-gru/grukdump.c                   |  223 ++
 drivers/misc/sgi-gru/grukservices.c               | 1159 ++++++++
 drivers/misc/sgi-gru/grukservices.h               |  201 ++
 drivers/misc/sgi-gru/grulib.h                     |  153 +
 drivers/misc/sgi-gru/grumain.c                    |  977 ++++++
 drivers/misc/sgi-gru/gruprocfs.c                  |  308 ++
 drivers/misc/sgi-gru/grutables.h                  |  659 +++++
 drivers/misc/sgi-gru/grutlbpurge.c                |  318 ++
 drivers/misc/sgi-xp/Makefile                      |   13 +
 drivers/misc/sgi-xp/xp.h                          |  341 +++
 drivers/misc/sgi-xp/xp_main.c                     |  261 ++
 drivers/misc/sgi-xp/xp_uv.c                       |  175 ++
 drivers/misc/sgi-xp/xpc.h                         |  732 +++++
 drivers/misc/sgi-xp/xpc_channel.c                 | 1011 +++++++
 drivers/misc/sgi-xp/xpc_main.c                    | 1345 +++++++++
 drivers/misc/sgi-xp/xpc_partition.c               |  545 ++++
 drivers/misc/sgi-xp/xpc_uv.c                      | 1817 ++++++++++++
 drivers/misc/sgi-xp/xpnet.c                       |  599 ++++
 drivers/misc/smpro-errmon.c                       |  610 ++++
 drivers/misc/smpro-misc.c                         |  145 +
 drivers/misc/sram-exec.c                          |  121 +
 drivers/misc/sram.c                               |  464 +++
 drivers/misc/sram.h                               |   63 +
 drivers/misc/ti-st/Kconfig                        |   19 +
 drivers/misc/ti-st/Makefile                       |    7 +
 drivers/misc/ti-st/st_core.c                      |  918 ++++++
 drivers/misc/ti-st/st_kim.c                       |  839 ++++++
 drivers/misc/ti-st/st_ll.c                        |  156 +
 drivers/misc/tifm_7xx1.c                          |  427 +++
 drivers/misc/tifm_core.c                          |  366 +++
 drivers/misc/tps6594-esm.c                        |  146 +
 drivers/misc/tps6594-pfsm.c                       |  303 ++
 drivers/misc/tsl2550.c                            |  450 +++
 drivers/misc/uacce/Kconfig                        |   13 +
 drivers/misc/uacce/Makefile                       |    2 +
 drivers/misc/uacce/uacce.c                        |  648 ++++
 drivers/misc/vcpu_stall_detector.c                |  222 ++
 drivers/misc/vmw_balloon.c                        | 1937 ++++++++++++
 drivers/misc/vmw_vmci/Kconfig                     |   17 +
 drivers/misc/vmw_vmci/Makefile                    |    5 +
 drivers/misc/vmw_vmci/vmci_context.c              | 1214 ++++++++
 drivers/misc/vmw_vmci/vmci_context.h              |  174 ++
 drivers/misc/vmw_vmci/vmci_datagram.c             |  494 ++++
 drivers/misc/vmw_vmci/vmci_datagram.h             |   44 +
 drivers/misc/vmw_vmci/vmci_doorbell.c             |  604 ++++
 drivers/misc/vmw_vmci/vmci_doorbell.h             |   43 +
 drivers/misc/vmw_vmci/vmci_driver.c               |  176 ++
 drivers/misc/vmw_vmci/vmci_driver.h               |   53 +
 drivers/misc/vmw_vmci/vmci_event.c                |  216 ++
 drivers/misc/vmw_vmci/vmci_event.h                |   17 +
 drivers/misc/vmw_vmci/vmci_guest.c                |  978 ++++++
 drivers/misc/vmw_vmci/vmci_handle_array.c         |  146 +
 drivers/misc/vmw_vmci/vmci_handle_array.h         |   53 +
 drivers/misc/vmw_vmci/vmci_host.c                 | 1027 +++++++
 drivers/misc/vmw_vmci/vmci_queue_pair.c           | 3279 +++++++++++++++++++++
 drivers/misc/vmw_vmci/vmci_queue_pair.h           |  165 ++
 drivers/misc/vmw_vmci/vmci_resource.c             |  221 ++
 drivers/misc/vmw_vmci/vmci_resource.h             |   51 +
 drivers/misc/vmw_vmci/vmci_route.c                |  218 ++
 drivers/misc/vmw_vmci/vmci_route.h                |   22 +
 drivers/misc/xilinx_sdfec.c                       | 1455 +++++++++
 drivers/misc/xilinx_tmr_inject.c                  |  172 ++
 drivers/misc/xilinx_tmr_manager.c                 |  220 ++
 299 files changed, 128490 insertions(+)
 create mode 100644 drivers/misc/Kconfig
 create mode 100644 drivers/misc/Makefile
 create mode 100644 drivers/misc/ad525x_dpot-i2c.c
 create mode 100644 drivers/misc/ad525x_dpot-spi.c
 create mode 100644 drivers/misc/ad525x_dpot.c
 create mode 100644 drivers/misc/ad525x_dpot.h
 create mode 100644 drivers/misc/altera-stapl/Kconfig
 create mode 100644 drivers/misc/altera-stapl/Makefile
 create mode 100644 drivers/misc/altera-stapl/altera-comp.c
 create mode 100644 drivers/misc/altera-stapl/altera-exprt.h
 create mode 100644 drivers/misc/altera-stapl/altera-jtag.c
 create mode 100644 drivers/misc/altera-stapl/altera-jtag.h
 create mode 100644 drivers/misc/altera-stapl/altera-lpt.c
 create mode 100644 drivers/misc/altera-stapl/altera.c
 create mode 100644 drivers/misc/apds9802als.c
 create mode 100644 drivers/misc/apds990x.c
 create mode 100644 drivers/misc/atmel-ssc.c
 create mode 100644 drivers/misc/bcm-vk/Kconfig
 create mode 100644 drivers/misc/bcm-vk/Makefile
 create mode 100644 drivers/misc/bcm-vk/bcm_vk.h
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_dev.c
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_msg.c
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_msg.h
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_sg.c
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_sg.h
 create mode 100644 drivers/misc/bcm-vk/bcm_vk_tty.c
 create mode 100644 drivers/misc/bh1770glc.c
 create mode 100644 drivers/misc/c2port/Kconfig
 create mode 100644 drivers/misc/c2port/Makefile
 create mode 100644 drivers/misc/c2port/c2port-duramar2150.c
 create mode 100644 drivers/misc/c2port/core.c
 create mode 100644 drivers/misc/cardreader/Kconfig
 create mode 100644 drivers/misc/cardreader/Makefile
 create mode 100644 drivers/misc/cardreader/alcor_pci.c
 create mode 100644 drivers/misc/cardreader/rtl8411.c
 create mode 100644 drivers/misc/cardreader/rts5209.c
 create mode 100644 drivers/misc/cardreader/rts5227.c
 create mode 100644 drivers/misc/cardreader/rts5228.c
 create mode 100644 drivers/misc/cardreader/rts5228.h
 create mode 100644 drivers/misc/cardreader/rts5229.c
 create mode 100644 drivers/misc/cardreader/rts5249.c
 create mode 100644 drivers/misc/cardreader/rts5260.c
 create mode 100644 drivers/misc/cardreader/rts5260.h
 create mode 100644 drivers/misc/cardreader/rts5261.c
 create mode 100644 drivers/misc/cardreader/rts5261.h
 create mode 100644 drivers/misc/cardreader/rtsx_pcr.c
 create mode 100644 drivers/misc/cardreader/rtsx_pcr.h
 create mode 100644 drivers/misc/cardreader/rtsx_usb.c
 create mode 100644 drivers/misc/cb710/Kconfig
 create mode 100644 drivers/misc/cb710/Makefile
 create mode 100644 drivers/misc/cb710/core.c
 create mode 100644 drivers/misc/cb710/debug.c
 create mode 100644 drivers/misc/cb710/sgbuf2.c
 create mode 100644 drivers/misc/cs5535-mfgpt.c
 create mode 100644 drivers/misc/cxl/Kconfig
 create mode 100644 drivers/misc/cxl/Makefile
 create mode 100644 drivers/misc/cxl/api.c
 create mode 100644 drivers/misc/cxl/base.c
 create mode 100644 drivers/misc/cxl/context.c
 create mode 100644 drivers/misc/cxl/cxl.h
 create mode 100644 drivers/misc/cxl/cxllib.c
 create mode 100644 drivers/misc/cxl/debugfs.c
 create mode 100644 drivers/misc/cxl/fault.c
 create mode 100644 drivers/misc/cxl/file.c
 create mode 100644 drivers/misc/cxl/flash.c
 create mode 100644 drivers/misc/cxl/guest.c
 create mode 100644 drivers/misc/cxl/hcalls.c
 create mode 100644 drivers/misc/cxl/hcalls.h
 create mode 100644 drivers/misc/cxl/irq.c
 create mode 100644 drivers/misc/cxl/main.c
 create mode 100644 drivers/misc/cxl/native.c
 create mode 100644 drivers/misc/cxl/of.c
 create mode 100644 drivers/misc/cxl/pci.c
 create mode 100644 drivers/misc/cxl/sysfs.c
 create mode 100644 drivers/misc/cxl/trace.c
 create mode 100644 drivers/misc/cxl/trace.h
 create mode 100644 drivers/misc/cxl/vphb.c
 create mode 100644 drivers/misc/ds1682.c
 create mode 100644 drivers/misc/dummy-irq.c
 create mode 100644 drivers/misc/dw-xdata-pcie.c
 create mode 100644 drivers/misc/echo/Kconfig
 create mode 100644 drivers/misc/echo/Makefile
 create mode 100644 drivers/misc/echo/echo.c
 create mode 100644 drivers/misc/echo/echo.h
 create mode 100644 drivers/misc/echo/fir.h
 create mode 100644 drivers/misc/echo/oslec.h
 create mode 100644 drivers/misc/eeprom/Kconfig
 create mode 100644 drivers/misc/eeprom/Makefile
 create mode 100644 drivers/misc/eeprom/at24.c
 create mode 100644 drivers/misc/eeprom/at25.c
 create mode 100644 drivers/misc/eeprom/digsy_mtc_eeprom.c
 create mode 100644 drivers/misc/eeprom/ee1004.c
 create mode 100644 drivers/misc/eeprom/eeprom.c
 create mode 100644 drivers/misc/eeprom/eeprom_93cx6.c
 create mode 100644 drivers/misc/eeprom/eeprom_93xx46.c
 create mode 100644 drivers/misc/eeprom/idt_89hpesx.c
 create mode 100644 drivers/misc/eeprom/max6875.c
 create mode 100644 drivers/misc/enclosure.c
 create mode 100644 drivers/misc/fastrpc.c
 create mode 100644 drivers/misc/gehc-achc.c
 create mode 100644 drivers/misc/genwqe/Kconfig
 create mode 100644 drivers/misc/genwqe/Makefile
 create mode 100644 drivers/misc/genwqe/card_base.c
 create mode 100644 drivers/misc/genwqe/card_base.h
 create mode 100644 drivers/misc/genwqe/card_ddcb.c
 create mode 100644 drivers/misc/genwqe/card_ddcb.h
 create mode 100644 drivers/misc/genwqe/card_debugfs.c
 create mode 100644 drivers/misc/genwqe/card_dev.c
 create mode 100644 drivers/misc/genwqe/card_sysfs.c
 create mode 100644 drivers/misc/genwqe/card_utils.c
 create mode 100644 drivers/misc/genwqe/genwqe_driver.h
 create mode 100644 drivers/misc/hi6421v600-irq.c
 create mode 100644 drivers/misc/hisi_hikey_usb.c
 create mode 100644 drivers/misc/hmc6352.c
 create mode 100644 drivers/misc/hpilo.c
 create mode 100644 drivers/misc/hpilo.h
 create mode 100644 drivers/misc/ibmasm/Makefile
 create mode 100644 drivers/misc/ibmasm/command.c
 create mode 100644 drivers/misc/ibmasm/dot_command.c
 create mode 100644 drivers/misc/ibmasm/dot_command.h
 create mode 100644 drivers/misc/ibmasm/event.c
 create mode 100644 drivers/misc/ibmasm/heartbeat.c
 create mode 100644 drivers/misc/ibmasm/i2o.h
 create mode 100644 drivers/misc/ibmasm/ibmasm.h
 create mode 100644 drivers/misc/ibmasm/ibmasmfs.c
 create mode 100644 drivers/misc/ibmasm/lowlevel.c
 create mode 100644 drivers/misc/ibmasm/lowlevel.h
 create mode 100644 drivers/misc/ibmasm/module.c
 create mode 100644 drivers/misc/ibmasm/r_heartbeat.c
 create mode 100644 drivers/misc/ibmasm/remote.c
 create mode 100644 drivers/misc/ibmasm/remote.h
 create mode 100644 drivers/misc/ibmasm/uart.c
 create mode 100644 drivers/misc/ibmvmc.c
 create mode 100644 drivers/misc/ibmvmc.h
 create mode 100644 drivers/misc/ics932s401.c
 create mode 100644 drivers/misc/isl29003.c
 create mode 100644 drivers/misc/isl29020.c
 create mode 100644 drivers/misc/kgdbts.c
 create mode 100644 drivers/misc/lattice-ecp3-config.c
 create mode 100644 drivers/misc/lis3lv02d/Kconfig
 create mode 100644 drivers/misc/lis3lv02d/Makefile
 create mode 100644 drivers/misc/lis3lv02d/lis3lv02d.c
 create mode 100644 drivers/misc/lis3lv02d/lis3lv02d.h
 create mode 100644 drivers/misc/lis3lv02d/lis3lv02d_i2c.c
 create mode 100644 drivers/misc/lis3lv02d/lis3lv02d_spi.c
 create mode 100644 drivers/misc/lkdtm/Makefile
 create mode 100644 drivers/misc/lkdtm/bugs.c
 create mode 100644 drivers/misc/lkdtm/cfi.c
 create mode 100644 drivers/misc/lkdtm/core.c
 create mode 100644 drivers/misc/lkdtm/fortify.c
 create mode 100644 drivers/misc/lkdtm/heap.c
 create mode 100644 drivers/misc/lkdtm/lkdtm.h
 create mode 100644 drivers/misc/lkdtm/perms.c
 create mode 100644 drivers/misc/lkdtm/powerpc.c
 create mode 100644 drivers/misc/lkdtm/refcount.c
 create mode 100644 drivers/misc/lkdtm/rodata.c
 create mode 100644 drivers/misc/lkdtm/stackleak.c
 create mode 100644 drivers/misc/lkdtm/usercopy.c
 create mode 100644 drivers/misc/mchp_pci1xxxx/Kconfig
 create mode 100644 drivers/misc/mchp_pci1xxxx/Makefile
 create mode 100644 drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c
 create mode 100644 drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.h
 create mode 100644 drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c
 create mode 100644 drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c
 create mode 100644 drivers/misc/mei/Kconfig
 create mode 100644 drivers/misc/mei/Makefile
 create mode 100644 drivers/misc/mei/bus-fixup.c
 create mode 100644 drivers/misc/mei/bus.c
 create mode 100644 drivers/misc/mei/client.c
 create mode 100644 drivers/misc/mei/client.h
 create mode 100644 drivers/misc/mei/debugfs.c
 create mode 100644 drivers/misc/mei/dma-ring.c
 create mode 100644 drivers/misc/mei/gsc-me.c
 create mode 100644 drivers/misc/mei/gsc_proxy/Kconfig
 create mode 100644 drivers/misc/mei/gsc_proxy/Makefile
 create mode 100644 drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c
 create mode 100644 drivers/misc/mei/hbm.c
 create mode 100644 drivers/misc/mei/hbm.h
 create mode 100644 drivers/misc/mei/hdcp/Kconfig
 create mode 100644 drivers/misc/mei/hdcp/Makefile
 create mode 100644 drivers/misc/mei/hdcp/mei_hdcp.c
 create mode 100644 drivers/misc/mei/hdcp/mei_hdcp.h
 create mode 100644 drivers/misc/mei/hw-me-regs.h
 create mode 100644 drivers/misc/mei/hw-me.c
 create mode 100644 drivers/misc/mei/hw-me.h
 create mode 100644 drivers/misc/mei/hw-txe-regs.h
 create mode 100644 drivers/misc/mei/hw-txe.c
 create mode 100644 drivers/misc/mei/hw-txe.h
 create mode 100644 drivers/misc/mei/hw.h
 create mode 100644 drivers/misc/mei/init.c
 create mode 100644 drivers/misc/mei/interrupt.c
 create mode 100644 drivers/misc/mei/main.c
 create mode 100644 drivers/misc/mei/mei-trace.c
 create mode 100644 drivers/misc/mei/mei-trace.h
 create mode 100644 drivers/misc/mei/mei_dev.h
 create mode 100644 drivers/misc/mei/mkhi.h
 create mode 100644 drivers/misc/mei/pci-me.c
 create mode 100644 drivers/misc/mei/pci-txe.c
 create mode 100644 drivers/misc/mei/pxp/Kconfig
 create mode 100644 drivers/misc/mei/pxp/Makefile
 create mode 100644 drivers/misc/mei/pxp/mei_pxp.c
 create mode 100644 drivers/misc/mei/pxp/mei_pxp.h
 create mode 100644 drivers/misc/ocxl/Kconfig
 create mode 100644 drivers/misc/ocxl/Makefile
 create mode 100644 drivers/misc/ocxl/afu_irq.c
 create mode 100644 drivers/misc/ocxl/config.c
 create mode 100644 drivers/misc/ocxl/context.c
 create mode 100644 drivers/misc/ocxl/core.c
 create mode 100644 drivers/misc/ocxl/file.c
 create mode 100644 drivers/misc/ocxl/link.c
 create mode 100644 drivers/misc/ocxl/main.c
 create mode 100644 drivers/misc/ocxl/mmio.c
 create mode 100644 drivers/misc/ocxl/ocxl_internal.h
 create mode 100644 drivers/misc/ocxl/pasid.c
 create mode 100644 drivers/misc/ocxl/pci.c
 create mode 100644 drivers/misc/ocxl/sysfs.c
 create mode 100644 drivers/misc/ocxl/trace.c
 create mode 100644 drivers/misc/ocxl/trace.h
 create mode 100644 drivers/misc/open-dice.c
 create mode 100644 drivers/misc/pch_phub.c
 create mode 100644 drivers/misc/pci_endpoint_test.c
 create mode 100644 drivers/misc/phantom.c
 create mode 100644 drivers/misc/pvpanic/Kconfig
 create mode 100644 drivers/misc/pvpanic/Makefile
 create mode 100644 drivers/misc/pvpanic/pvpanic-mmio.c
 create mode 100644 drivers/misc/pvpanic/pvpanic-pci.c
 create mode 100644 drivers/misc/pvpanic/pvpanic.c
 create mode 100644 drivers/misc/pvpanic/pvpanic.h
 create mode 100644 drivers/misc/qcom-coincell.c
 create mode 100644 drivers/misc/sgi-gru/Makefile
 create mode 100644 drivers/misc/sgi-gru/gru.h
 create mode 100644 drivers/misc/sgi-gru/gru_instructions.h
 create mode 100644 drivers/misc/sgi-gru/grufault.c
 create mode 100644 drivers/misc/sgi-gru/grufile.c
 create mode 100644 drivers/misc/sgi-gru/gruhandles.c
 create mode 100644 drivers/misc/sgi-gru/gruhandles.h
 create mode 100644 drivers/misc/sgi-gru/grukdump.c
 create mode 100644 drivers/misc/sgi-gru/grukservices.c
 create mode 100644 drivers/misc/sgi-gru/grukservices.h
 create mode 100644 drivers/misc/sgi-gru/grulib.h
 create mode 100644 drivers/misc/sgi-gru/grumain.c
 create mode 100644 drivers/misc/sgi-gru/gruprocfs.c
 create mode 100644 drivers/misc/sgi-gru/grutables.h
 create mode 100644 drivers/misc/sgi-gru/grutlbpurge.c
 create mode 100644 drivers/misc/sgi-xp/Makefile
 create mode 100644 drivers/misc/sgi-xp/xp.h
 create mode 100644 drivers/misc/sgi-xp/xp_main.c
 create mode 100644 drivers/misc/sgi-xp/xp_uv.c
 create mode 100644 drivers/misc/sgi-xp/xpc.h
 create mode 100644 drivers/misc/sgi-xp/xpc_channel.c
 create mode 100644 drivers/misc/sgi-xp/xpc_main.c
 create mode 100644 drivers/misc/sgi-xp/xpc_partition.c
 create mode 100644 drivers/misc/sgi-xp/xpc_uv.c
 create mode 100644 drivers/misc/sgi-xp/xpnet.c
 create mode 100644 drivers/misc/smpro-errmon.c
 create mode 100644 drivers/misc/smpro-misc.c
 create mode 100644 drivers/misc/sram-exec.c
 create mode 100644 drivers/misc/sram.c
 create mode 100644 drivers/misc/sram.h
 create mode 100644 drivers/misc/ti-st/Kconfig
 create mode 100644 drivers/misc/ti-st/Makefile
 create mode 100644 drivers/misc/ti-st/st_core.c
 create mode 100644 drivers/misc/ti-st/st_kim.c
 create mode 100644 drivers/misc/ti-st/st_ll.c
 create mode 100644 drivers/misc/tifm_7xx1.c
 create mode 100644 drivers/misc/tifm_core.c
 create mode 100644 drivers/misc/tps6594-esm.c
 create mode 100644 drivers/misc/tps6594-pfsm.c
 create mode 100644 drivers/misc/tsl2550.c
 create mode 100644 drivers/misc/uacce/Kconfig
 create mode 100644 drivers/misc/uacce/Makefile
 create mode 100644 drivers/misc/uacce/uacce.c
 create mode 100644 drivers/misc/vcpu_stall_detector.c
 create mode 100644 drivers/misc/vmw_balloon.c
 create mode 100644 drivers/misc/vmw_vmci/Kconfig
 create mode 100644 drivers/misc/vmw_vmci/Makefile
 create mode 100644 drivers/misc/vmw_vmci/vmci_context.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_context.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_datagram.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_datagram.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_doorbell.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_doorbell.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_driver.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_driver.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_event.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_event.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_guest.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_handle_array.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_handle_array.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_host.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_queue_pair.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_queue_pair.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_resource.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_resource.h
 create mode 100644 drivers/misc/vmw_vmci/vmci_route.c
 create mode 100644 drivers/misc/vmw_vmci/vmci_route.h
 create mode 100644 drivers/misc/xilinx_sdfec.c
 create mode 100644 drivers/misc/xilinx_tmr_inject.c
 create mode 100644 drivers/misc/xilinx_tmr_manager.c

(limited to 'drivers/misc')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
new file mode 100644
index 0000000000..cadd4a820c
--- /dev/null
+++ b/drivers/misc/Kconfig
@@ -0,0 +1,582 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Misc strange devices
+#
+
+menu "Misc devices"
+
+config SENSORS_LIS3LV02D
+	tristate
+	depends on INPUT
+
+config AD525X_DPOT
+	tristate "Analog Devices Digital Potentiometers"
+	depends on (I2C || SPI) && SYSFS
+	help
+	  If you say yes here, you get support for the Analog Devices
+	  AD5258, AD5259, AD5251, AD5252, AD5253, AD5254, AD5255
+	  AD5160, AD5161, AD5162, AD5165, AD5200, AD5201, AD5203,
+	  AD5204, AD5206, AD5207, AD5231, AD5232, AD5233, AD5235,
+	  AD5260, AD5262, AD5263, AD5290, AD5291, AD5292, AD5293,
+	  AD7376, AD8400, AD8402, AD8403, ADN2850, AD5241, AD5242,
+	  AD5243, AD5245, AD5246, AD5247, AD5248, AD5280, AD5282,
+	  ADN2860, AD5273, AD5171, AD5170, AD5172, AD5173, AD5270,
+	  AD5271, AD5272, AD5274
+	  digital potentiometer chips.
+
+	  See Documentation/misc-devices/ad525x_dpot.rst for the
+	  userspace interface.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called ad525x_dpot.
+
+config AD525X_DPOT_I2C
+	tristate "support I2C bus connection"
+	depends on AD525X_DPOT && I2C
+	help
+	  Say Y here if you have a digital potentiometers hooked to an I2C bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad525x_dpot-i2c.
+
+config AD525X_DPOT_SPI
+	tristate "support SPI bus connection"
+	depends on AD525X_DPOT && SPI_MASTER
+	help
+	  Say Y here if you have a digital potentiometers hooked to an SPI bus.
+
+	  If unsure, say N (but it's safe to say "Y").
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad525x_dpot-spi.
+
+config DUMMY_IRQ
+	tristate "Dummy IRQ handler"
+	help
+	  This module accepts a single 'irq' parameter, which it should register for.
+	  The sole purpose of this module is to help with debugging of systems on
+	  which spurious IRQs would happen on disabled IRQ vector.
+
+config IBM_ASM
+	tristate "Device driver for IBM RSA service processor"
+	depends on X86 && PCI && INPUT
+	depends on SERIAL_8250 || SERIAL_8250=n
+	help
+	  This option enables device driver support for in-band access to the
+	  IBM RSA (Condor) service processor in eServer xSeries systems.
+	  The ibmasm device driver allows user space application to access
+	  ASM (Advanced Systems Management) functions on the service
+	  processor. The driver is meant to be used in conjunction with
+	  a user space API.
+	  The ibmasm driver also enables the OS to use the UART on the
+	  service processor board as a regular serial port. To make use of
+	  this feature serial driver support (CONFIG_SERIAL_8250) must be
+	  enabled.
+
+	  WARNING: This software may not be supported or function
+	  correctly on your IBM server. Please consult the IBM ServerProven
+	  website <https://www-03.ibm.com/systems/info/x86servers/serverproven/compat/us/>
+	  for information on the specific driver level and support statement
+	  for your IBM server.
+
+config IBMVMC
+	tristate "IBM Virtual Management Channel support"
+	depends on PPC_PSERIES
+	help
+	  This is the IBM POWER Virtual Management Channel
+
+	  This driver is to be used for the POWER Virtual
+	  Management Channel virtual adapter on the PowerVM
+	  platform. It provides both request/response and
+	  async message support through the /dev/ibmvmc node.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ibmvmc.
+
+config PHANTOM
+	tristate "Sensable PHANToM (PCI)"
+	depends on PCI
+	help
+	  Say Y here if you want to build a driver for Sensable PHANToM device.
+
+	  This driver is only for PCI PHANToMs.
+
+	  If you choose to build module, its name will be phantom. If unsure,
+	  say N here.
+
+config TIFM_CORE
+	tristate "TI Flash Media interface support"
+	depends on PCI
+	help
+	  If you want support for Texas Instruments(R) Flash Media adapters
+	  you should select this option and then also choose an appropriate
+	  host adapter, such as 'TI Flash Media PCI74xx/PCI76xx host adapter
+	  support', if you have a TI PCI74xx compatible card reader, for
+	  example.
+	  You will also have to select some flash card format drivers. MMC/SD
+	  cards are supported via 'MMC/SD Card support: TI Flash Media MMC/SD
+	  Interface support (MMC_TIFM_SD)'.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called tifm_core.
+
+config TIFM_7XX1
+	tristate "TI Flash Media PCI74xx/PCI76xx host adapter support"
+	depends on PCI && TIFM_CORE
+	default TIFM_CORE
+	help
+	  This option enables support for Texas Instruments(R) PCI74xx and
+	  PCI76xx families of Flash Media adapters, found in many laptops.
+	  To make actual use of the device, you will have to select some
+	  flash card format drivers, as outlined in the TIFM_CORE Help.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called tifm_7xx1.
+
+config ICS932S401
+	tristate "Integrated Circuits ICS932S401"
+	depends on I2C
+	help
+	  If you say yes here you get support for the Integrated Circuits
+	  ICS932S401 clock control chips.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called ics932s401.
+
+config ATMEL_SSC
+	tristate "Device driver for Atmel SSC peripheral"
+	depends on HAS_IOMEM && (ARCH_AT91 || COMPILE_TEST)
+	help
+	  This option enables device driver support for Atmel Synchronized
+	  Serial Communication peripheral (SSC).
+
+	  The SSC peripheral supports a wide variety of serial frame based
+	  communications, i.e. I2S, SPI, etc.
+
+	  If unsure, say N.
+
+config ENCLOSURE_SERVICES
+	tristate "Enclosure Services"
+	help
+	  Provides support for intelligent enclosures (bays which
+	  contain storage devices).  You also need either a host
+	  driver (SCSI/ATA) which supports enclosures
+	  or a SCSI enclosure device (SES) to use these services.
+
+config SGI_XP
+	tristate "Support communication between SGI SSIs"
+	depends on NET
+	depends on (IA64_SGI_UV || X86_UV) && SMP
+	depends on X86_64 || BROKEN
+	select SGI_GRU if X86_64 && SMP
+	help
+	  An SGI machine can be divided into multiple Single System
+	  Images which act independently of each other and have
+	  hardware based memory protection from the others.  Enabling
+	  this feature will allow for direct communication between SSIs
+	  based on a network adapter and DMA messaging.
+
+config SMPRO_ERRMON
+	tristate "Ampere Computing SMPro error monitor driver"
+	depends on MFD_SMPRO || COMPILE_TEST
+	help
+	  Say Y here to get support for the SMpro error monitor function
+	  provided by Ampere Computing's Altra and Altra Max SoCs. Upon
+	  loading, the driver creates sysfs files which can be use to gather
+	  multiple HW error data reported via read and write system calls.
+
+	  To compile this driver as a module, say M here. The driver will be
+	  called smpro-errmon.
+
+config SMPRO_MISC
+	tristate "Ampere Computing SMPro miscellaneous driver"
+	depends on MFD_SMPRO || COMPILE_TEST
+	help
+	  Say Y here to get support for the SMpro error miscellalenous function
+	  provided by Ampere Computing's Altra and Altra Max SoCs.
+
+	  To compile this driver as a module, say M here. The driver will be
+	  called smpro-misc.
+
+config CS5535_MFGPT
+	tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support"
+	depends on MFD_CS5535
+	help
+	  This driver provides access to MFGPT functionality for other
+	  drivers that need timers.  MFGPTs are available in the CS5535 and
+	  CS5536 companion chips that are found in AMD Geode and several
+	  other platforms.  They have a better resolution and max interval
+	  than the generic PIT, and are suitable for use as high-res timers.
+	  You probably don't want to enable this manually; other drivers that
+	  make use of it should enable it.
+
+config CS5535_MFGPT_DEFAULT_IRQ
+	int
+	depends on CS5535_MFGPT
+	default 7
+	help
+	  MFGPTs on the CS5535 require an interrupt.  The selected IRQ
+	  can be overridden as a module option as well as by driver that
+	  use the cs5535_mfgpt_ API; however, different architectures might
+	  want to use a different IRQ by default.  This is here for
+	  architectures to set as necessary.
+
+config CS5535_CLOCK_EVENT_SRC
+	tristate "CS5535/CS5536 high-res timer (MFGPT) events"
+	depends on GENERIC_CLOCKEVENTS && CS5535_MFGPT
+	help
+	  This driver provides a clock event source based on the MFGPT
+	  timer(s) in the CS5535 and CS5536 companion chips.
+	  MFGPTs have a better resolution and max interval than the
+	  generic PIT, and are suitable for use as high-res timers.
+
+config GEHC_ACHC
+	tristate "GEHC ACHC support"
+	depends on SPI && SYSFS
+	depends on SOC_IMX53 || COMPILE_TEST
+	select FW_LOADER
+	help
+	  Support for GE ACHC microcontroller, that is part of the GE
+	  PPD device.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called gehc-achc.
+
+config HI6421V600_IRQ
+	tristate "HiSilicon Hi6421v600 IRQ and powerkey"
+	depends on OF
+	depends on SPMI
+	depends on HAS_IOMEM
+	select MFD_CORE
+	select REGMAP_SPMI
+	help
+	  This driver provides IRQ handling for Hi6421v600, used on
+	  some Kirin chipsets, like the one at Hikey 970.
+
+config HP_ILO
+	tristate "Channel interface driver for the HP iLO processor"
+	depends on PCI
+	help
+	  The channel interface driver allows applications to communicate
+	  with iLO management processors present on HP ProLiant servers.
+	  Upon loading, the driver creates /dev/hpilo/dXccbN files, which
+	  can be used to gather data from the management processor, via
+	  read and write system calls.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called hpilo.
+
+config QCOM_COINCELL
+	tristate "Qualcomm coincell charger support"
+	depends on MFD_SPMI_PMIC || COMPILE_TEST
+	help
+	  This driver supports the coincell block found inside of
+	  Qualcomm PMICs.  The coincell charger provides a means to
+	  charge a coincell battery or backup capacitor which is used
+	  to maintain PMIC register and RTC state in the absence of
+	  external power.
+
+config QCOM_FASTRPC
+	tristate "Qualcomm FastRPC"
+	depends on ARCH_QCOM || COMPILE_TEST
+	depends on RPMSG
+	select DMA_SHARED_BUFFER
+	select QCOM_SCM
+	help
+	  Provides a communication mechanism that allows for clients to
+	  make remote method invocations across processor boundary to
+	  applications DSP processor. Say M if you want to enable this
+	  module.
+
+config SGI_GRU
+	tristate "SGI GRU driver"
+	depends on X86_UV && SMP
+	select MMU_NOTIFIER
+	help
+	The GRU is a hardware resource located in the system chipset. The GRU
+	contains memory that can be mmapped into the user address space. This memory is
+	used to communicate with the GRU to perform functions such as load/store,
+	scatter/gather, bcopy, AMOs, etc.  The GRU is directly accessed by user
+	instructions using user virtual addresses. GRU instructions (ex., bcopy) use
+	user virtual addresses for operands.
+
+	If you are not running on a SGI UV system, say N.
+
+config SGI_GRU_DEBUG
+	bool  "SGI GRU driver debug"
+	depends on SGI_GRU
+	help
+	This option enables additional debugging code for the SGI GRU driver.
+	If you are unsure, say N.
+
+config APDS9802ALS
+	tristate "Medfield Avago APDS9802 ALS Sensor module"
+	depends on I2C
+	help
+	  If you say yes here you get support for the ALS APDS9802 ambient
+	  light sensor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called apds9802als.
+
+config ISL29003
+	tristate "Intersil ISL29003 ambient light sensor"
+	depends on I2C && SYSFS
+	help
+	  If you say yes here you get support for the Intersil ISL29003
+	  ambient light sensor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called isl29003.
+
+config ISL29020
+	tristate "Intersil ISL29020 ambient light sensor"
+	depends on I2C
+	help
+	  If you say yes here you get support for the Intersil ISL29020
+	  ambient light sensor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called isl29020.
+
+config SENSORS_TSL2550
+	tristate "Taos TSL2550 ambient light sensor"
+	depends on I2C && SYSFS
+	help
+	  If you say yes here you get support for the Taos TSL2550
+	  ambient light sensor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called tsl2550.
+
+config SENSORS_BH1770
+	 tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor"
+	 depends on I2C
+	help
+	   Say Y here if you want to build a driver for BH1770GLC (ROHM) or
+	   SFH7770 (Osram) combined ambient light and proximity sensor chip.
+
+	   To compile this driver as a module, choose M here: the
+	   module will be called bh1770glc. If unsure, say N here.
+
+config SENSORS_APDS990X
+	 tristate "APDS990X combined als and proximity sensors"
+	 depends on I2C
+	help
+	   Say Y here if you want to build a driver for Avago APDS990x
+	   combined ambient light and proximity sensor chip.
+
+	   To compile this driver as a module, choose M here: the
+	   module will be called apds990x. If unsure, say N here.
+
+config HMC6352
+	tristate "Honeywell HMC6352 compass"
+	depends on I2C
+	help
+	  This driver provides support for the Honeywell HMC6352 compass,
+	  providing configuration and heading data via sysfs.
+
+config DS1682
+	tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm"
+	depends on I2C
+	help
+	  If you say yes here you get support for Dallas Semiconductor
+	  DS1682 Total Elapsed Time Recorder.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called ds1682.
+
+config VMWARE_BALLOON
+	tristate "VMware Balloon Driver"
+	depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST
+	select MEMORY_BALLOON
+	help
+	  This is VMware physical memory management driver which acts
+	  like a "balloon" that can be inflated to reclaim physical pages
+	  by reserving them in the guest and invalidating them in the
+	  monitor, freeing up the underlying machine pages so they can
+	  be allocated to other guests. The balloon can also be deflated
+	  to allow the guest to use more physical memory.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called vmw_balloon.
+
+config PCH_PHUB
+	tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) PHUB"
+	select GENERIC_NET_UTILS
+	depends on PCI && (X86_32 || MIPS || COMPILE_TEST)
+	help
+	  This driver is for PCH(Platform controller Hub) PHUB(Packet Hub) of
+	  Intel Topcliff which is an IOH(Input/Output Hub) for x86 embedded
+	  processor. The Topcliff has MAC address and Option ROM data in SROM.
+	  This driver can access MAC address and Option ROM data in SROM.
+
+	  This driver also can be used for LAPIS Semiconductor's IOH,
+	  ML7213/ML7223/ML7831.
+	  ML7213 which is for IVI(In-Vehicle Infotainment) use.
+	  ML7223 IOH is for MP(Media Phone) use.
+	  ML7831 IOH is for general purpose use.
+	  ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+	  ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called pch_phub.
+
+config LATTICE_ECP3_CONFIG
+	tristate "Lattice ECP3 FPGA bitstream configuration via SPI"
+	depends on SPI && SYSFS
+	select FW_LOADER
+	default	n
+	help
+	  This option enables support for bitstream configuration (programming
+	  or loading) of the Lattice ECP3 FPGA family via SPI.
+
+	  If unsure, say N.
+
+config SRAM
+	bool "Generic on-chip SRAM driver"
+	depends on HAS_IOMEM
+	select GENERIC_ALLOCATOR
+	select SRAM_EXEC if ARM
+	help
+	  This driver allows you to declare a memory region to be managed by
+	  the genalloc API. It is supposed to be used for small on-chip SRAM
+	  areas found on many SoCs.
+
+config SRAM_EXEC
+	bool
+
+config DW_XDATA_PCIE
+	depends on PCI
+	tristate "Synopsys DesignWare xData PCIe driver"
+	help
+	  This driver allows controlling Synopsys DesignWare PCIe traffic
+	  generator IP also known as xData, present in Synopsys DesignWare
+	  PCIe Endpoint prototype.
+
+	  If unsure, say N.
+
+config PCI_ENDPOINT_TEST
+	depends on PCI
+	select CRC32
+	tristate "PCI Endpoint Test driver"
+	help
+	   Enable this configuration option to enable the host side test driver
+	   for PCI Endpoint.
+
+config XILINX_SDFEC
+	tristate "Xilinx SDFEC 16"
+	depends on HAS_IOMEM
+	help
+	  This option enables support for the Xilinx SDFEC (Soft Decision
+	  Forward Error Correction) driver. This enables a char driver
+	  for the SDFEC.
+
+	  You may select this driver if your design instantiates the
+	  SDFEC(16nm) hardened block. To compile this as a module choose M.
+
+	  If unsure, say N.
+
+config MISC_RTSX
+	tristate
+	default MISC_RTSX_PCI || MISC_RTSX_USB
+
+config HISI_HIKEY_USB
+	tristate "USB GPIO Hub on HiSilicon Hikey 960/970 Platform"
+	depends on (OF && GPIOLIB) || COMPILE_TEST
+	depends on USB_ROLE_SWITCH
+	help
+	  If you say yes here this adds support for the on-board USB GPIO hub
+	  found on HiKey 960/970 boards, which is necessary to support
+	  switching between the dual-role USB-C port and the USB-A host ports
+	  using only one USB controller.
+
+config OPEN_DICE
+	tristate "Open Profile for DICE driver"
+	depends on OF_RESERVED_MEM
+	depends on HAS_IOMEM
+	help
+	  This driver exposes a DICE reserved memory region to userspace via
+	  a character device. The memory region contains Compound Device
+	  Identifiers (CDIs) generated by firmware as an output of DICE
+	  measured boot flow. Userspace can use CDIs for remote attestation
+	  and sealing.
+
+	  If unsure, say N.
+
+config VCPU_STALL_DETECTOR
+	tristate "Guest vCPU stall detector"
+	depends on OF && HAS_IOMEM
+	help
+	  When this driver is bound inside a KVM guest, it will
+	  periodically "pet" an MMIO stall detector device from each vCPU
+	  and allow the host to detect vCPU stalls.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vcpu_stall_detector.
+
+	  If you do not intend to run this kernel as a guest, say N.
+
+config TMR_MANAGER
+	tristate "Select TMR Manager"
+	depends on MICROBLAZE && MB_MANAGER
+	help
+	  This option enables the driver developed for TMR Manager.
+	  The Triple Modular Redundancy(TMR) manager provides support for
+	  fault detection.
+
+	  Say N here unless you know what you are doing.
+
+config TMR_INJECT
+	tristate "Select TMR Inject"
+	depends on TMR_MANAGER && FAULT_INJECTION_DEBUG_FS
+	help
+	  This option enables the driver developed for TMR Inject.
+	  The Triple Modular Redundancy(TMR) Inject provides
+	  fault injection.
+
+	  Say N here unless you know what you are doing.
+
+config TPS6594_ESM
+	tristate "TI TPS6594 Error Signal Monitor support"
+	depends on MFD_TPS6594
+	default MFD_TPS6594
+	help
+	  Support ESM (Error Signal Monitor) on TPS6594 PMIC devices.
+	  ESM is used typically to reboot the board in error condition.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called tps6594-esm.
+
+config TPS6594_PFSM
+	tristate "TI TPS6594 Pre-configurable Finite State Machine support"
+	depends on MFD_TPS6594
+	default MFD_TPS6594
+	help
+	  Support PFSM (Pre-configurable Finite State Machine) on TPS6594 PMIC devices.
+	  These devices integrate a finite state machine engine, which manages the state
+	  of the device during operating state transition.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called tps6594-pfsm.
+
+source "drivers/misc/c2port/Kconfig"
+source "drivers/misc/eeprom/Kconfig"
+source "drivers/misc/cb710/Kconfig"
+source "drivers/misc/ti-st/Kconfig"
+source "drivers/misc/lis3lv02d/Kconfig"
+source "drivers/misc/altera-stapl/Kconfig"
+source "drivers/misc/mei/Kconfig"
+source "drivers/misc/vmw_vmci/Kconfig"
+source "drivers/misc/genwqe/Kconfig"
+source "drivers/misc/echo/Kconfig"
+source "drivers/misc/cxl/Kconfig"
+source "drivers/misc/ocxl/Kconfig"
+source "drivers/misc/bcm-vk/Kconfig"
+source "drivers/misc/cardreader/Kconfig"
+source "drivers/misc/uacce/Kconfig"
+source "drivers/misc/pvpanic/Kconfig"
+source "drivers/misc/mchp_pci1xxxx/Kconfig"
+endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
new file mode 100644
index 0000000000..f2a4d1ff65
--- /dev/null
+++ b/drivers/misc/Makefile
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for misc devices that really don't fit anywhere else.
+#
+
+obj-$(CONFIG_IBM_ASM)		+= ibmasm/
+obj-$(CONFIG_IBMVMC)		+= ibmvmc.o
+obj-$(CONFIG_AD525X_DPOT)	+= ad525x_dpot.o
+obj-$(CONFIG_AD525X_DPOT_I2C)	+= ad525x_dpot-i2c.o
+obj-$(CONFIG_AD525X_DPOT_SPI)	+= ad525x_dpot-spi.o
+obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
+obj-$(CONFIG_DUMMY_IRQ)		+= dummy-irq.o
+obj-$(CONFIG_ICS932S401)	+= ics932s401.o
+obj-$(CONFIG_LKDTM)		+= lkdtm/
+obj-$(CONFIG_TIFM_CORE)       	+= tifm_core.o
+obj-$(CONFIG_TIFM_7XX1)       	+= tifm_7xx1.o
+obj-$(CONFIG_PHANTOM)		+= phantom.o
+obj-$(CONFIG_QCOM_COINCELL)	+= qcom-coincell.o
+obj-$(CONFIG_QCOM_FASTRPC)	+= fastrpc.o
+obj-$(CONFIG_SENSORS_BH1770)	+= bh1770glc.o
+obj-$(CONFIG_SENSORS_APDS990X)	+= apds990x.o
+obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
+obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
+obj-$(CONFIG_SGI_XP)		+= sgi-xp/
+obj-$(CONFIG_SGI_GRU)		+= sgi-gru/
+obj-$(CONFIG_SMPRO_ERRMON)	+= smpro-errmon.o
+obj-$(CONFIG_SMPRO_MISC)	+= smpro-misc.o
+obj-$(CONFIG_CS5535_MFGPT)	+= cs5535-mfgpt.o
+obj-$(CONFIG_GEHC_ACHC)		+= gehc-achc.o
+obj-$(CONFIG_HP_ILO)		+= hpilo.o
+obj-$(CONFIG_APDS9802ALS)	+= apds9802als.o
+obj-$(CONFIG_ISL29003)		+= isl29003.o
+obj-$(CONFIG_ISL29020)		+= isl29020.o
+obj-$(CONFIG_SENSORS_TSL2550)	+= tsl2550.o
+obj-$(CONFIG_DS1682)		+= ds1682.o
+obj-$(CONFIG_C2PORT)		+= c2port/
+obj-$(CONFIG_HMC6352)		+= hmc6352.o
+obj-y				+= eeprom/
+obj-y				+= cb710/
+obj-$(CONFIG_VMWARE_BALLOON)	+= vmw_balloon.o
+obj-$(CONFIG_PCH_PHUB)		+= pch_phub.o
+obj-y				+= ti-st/
+obj-y				+= lis3lv02d/
+obj-$(CONFIG_ALTERA_STAPL)	+=altera-stapl/
+obj-$(CONFIG_INTEL_MEI)		+= mei/
+obj-$(CONFIG_VMWARE_VMCI)	+= vmw_vmci/
+obj-$(CONFIG_LATTICE_ECP3_CONFIG)	+= lattice-ecp3-config.o
+obj-$(CONFIG_SRAM)		+= sram.o
+obj-$(CONFIG_SRAM_EXEC)		+= sram-exec.o
+obj-$(CONFIG_GENWQE)		+= genwqe/
+obj-$(CONFIG_ECHO)		+= echo/
+obj-$(CONFIG_CXL_BASE)		+= cxl/
+obj-$(CONFIG_DW_XDATA_PCIE)	+= dw-xdata-pcie.o
+obj-$(CONFIG_PCI_ENDPOINT_TEST)	+= pci_endpoint_test.o
+obj-$(CONFIG_OCXL)		+= ocxl/
+obj-$(CONFIG_BCM_VK)		+= bcm-vk/
+obj-y				+= cardreader/
+obj-$(CONFIG_PVPANIC)   	+= pvpanic/
+obj-$(CONFIG_UACCE)		+= uacce/
+obj-$(CONFIG_XILINX_SDFEC)	+= xilinx_sdfec.o
+obj-$(CONFIG_HISI_HIKEY_USB)	+= hisi_hikey_usb.o
+obj-$(CONFIG_HI6421V600_IRQ)	+= hi6421v600-irq.o
+obj-$(CONFIG_OPEN_DICE)		+= open-dice.o
+obj-$(CONFIG_GP_PCI1XXXX)	+= mchp_pci1xxxx/
+obj-$(CONFIG_VCPU_STALL_DETECTOR)	+= vcpu_stall_detector.o
+obj-$(CONFIG_TMR_MANAGER)      += xilinx_tmr_manager.o
+obj-$(CONFIG_TMR_INJECT)	+= xilinx_tmr_inject.o
+obj-$(CONFIG_TPS6594_ESM)	+= tps6594-esm.o
+obj-$(CONFIG_TPS6594_PFSM)	+= tps6594-pfsm.o
diff --git a/drivers/misc/ad525x_dpot-i2c.c b/drivers/misc/ad525x_dpot-i2c.c
new file mode 100644
index 0000000000..469478f7a1
--- /dev/null
+++ b/drivers/misc/ad525x_dpot-i2c.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Driver for the Analog Devices digital potentiometers (I2C bus)
+ *
+ * Copyright (C) 2010-2011 Michael Hennerich, Analog Devices Inc.
+ */
+
+#include <linux/i2c.h>
+#include <linux/module.h>
+
+#include "ad525x_dpot.h"
+
+/* I2C bus functions */
+static int write_d8(void *client, u8 val)
+{
+	return i2c_smbus_write_byte(client, val);
+}
+
+static int write_r8d8(void *client, u8 reg, u8 val)
+{
+	return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static int write_r8d16(void *client, u8 reg, u16 val)
+{
+	return i2c_smbus_write_word_data(client, reg, val);
+}
+
+static int read_d8(void *client)
+{
+	return i2c_smbus_read_byte(client);
+}
+
+static int read_r8d8(void *client, u8 reg)
+{
+	return i2c_smbus_read_byte_data(client, reg);
+}
+
+static int read_r8d16(void *client, u8 reg)
+{
+	return i2c_smbus_read_word_data(client, reg);
+}
+
+static const struct ad_dpot_bus_ops bops = {
+	.read_d8	= read_d8,
+	.read_r8d8	= read_r8d8,
+	.read_r8d16	= read_r8d16,
+	.write_d8	= write_d8,
+	.write_r8d8	= write_r8d8,
+	.write_r8d16	= write_r8d16,
+};
+
+static int ad_dpot_i2c_probe(struct i2c_client *client)
+{
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
+	struct ad_dpot_bus_data bdata = {
+		.client = client,
+		.bops = &bops,
+	};
+
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_WORD_DATA)) {
+		dev_err(&client->dev, "SMBUS Word Data not Supported\n");
+		return -EIO;
+	}
+
+	return ad_dpot_probe(&client->dev, &bdata, id->driver_data, id->name);
+}
+
+static void ad_dpot_i2c_remove(struct i2c_client *client)
+{
+	ad_dpot_remove(&client->dev);
+}
+
+static const struct i2c_device_id ad_dpot_id[] = {
+	{"ad5258", AD5258_ID},
+	{"ad5259", AD5259_ID},
+	{"ad5251", AD5251_ID},
+	{"ad5252", AD5252_ID},
+	{"ad5253", AD5253_ID},
+	{"ad5254", AD5254_ID},
+	{"ad5255", AD5255_ID},
+	{"ad5241", AD5241_ID},
+	{"ad5242", AD5242_ID},
+	{"ad5243", AD5243_ID},
+	{"ad5245", AD5245_ID},
+	{"ad5246", AD5246_ID},
+	{"ad5247", AD5247_ID},
+	{"ad5248", AD5248_ID},
+	{"ad5280", AD5280_ID},
+	{"ad5282", AD5282_ID},
+	{"adn2860", ADN2860_ID},
+	{"ad5273", AD5273_ID},
+	{"ad5161", AD5161_ID},
+	{"ad5171", AD5171_ID},
+	{"ad5170", AD5170_ID},
+	{"ad5172", AD5172_ID},
+	{"ad5173", AD5173_ID},
+	{"ad5272", AD5272_ID},
+	{"ad5274", AD5274_ID},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, ad_dpot_id);
+
+static struct i2c_driver ad_dpot_i2c_driver = {
+	.driver = {
+		.name	= "ad_dpot",
+	},
+	.probe		= ad_dpot_i2c_probe,
+	.remove		= ad_dpot_i2c_remove,
+	.id_table	= ad_dpot_id,
+};
+
+module_i2c_driver(ad_dpot_i2c_driver);
+
+MODULE_AUTHOR("Michael Hennerich <michael.hennerich@analog.com>");
+MODULE_DESCRIPTION("digital potentiometer I2C bus driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ad525x_dpot-spi.c b/drivers/misc/ad525x_dpot-spi.c
new file mode 100644
index 0000000000..263055bda4
--- /dev/null
+++ b/drivers/misc/ad525x_dpot-spi.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Driver for the Analog Devices digital potentiometers (SPI bus)
+ *
+ * Copyright (C) 2010-2011 Michael Hennerich, Analog Devices Inc.
+ */
+
+#include <linux/spi/spi.h>
+#include <linux/module.h>
+
+#include "ad525x_dpot.h"
+
+/* SPI bus functions */
+static int write8(void *client, u8 val)
+{
+	u8 data = val;
+
+	return spi_write(client, &data, 1);
+}
+
+static int write16(void *client, u8 reg, u8 val)
+{
+	u8 data[2] = {reg, val};
+
+	return spi_write(client, data, 2);
+}
+
+static int write24(void *client, u8 reg, u16 val)
+{
+	u8 data[3] = {reg, val >> 8, val};
+
+	return spi_write(client, data, 3);
+}
+
+static int read8(void *client)
+{
+	int ret;
+	u8 data;
+
+	ret = spi_read(client, &data, 1);
+	if (ret < 0)
+		return ret;
+
+	return data;
+}
+
+static int read16(void *client, u8 reg)
+{
+	int ret;
+	u8 buf_rx[2];
+
+	write16(client, reg, 0);
+	ret = spi_read(client, buf_rx, 2);
+	if (ret < 0)
+		return ret;
+
+	return (buf_rx[0] << 8) |  buf_rx[1];
+}
+
+static int read24(void *client, u8 reg)
+{
+	int ret;
+	u8 buf_rx[3];
+
+	write24(client, reg, 0);
+	ret = spi_read(client, buf_rx, 3);
+	if (ret < 0)
+		return ret;
+
+	return (buf_rx[1] << 8) |  buf_rx[2];
+}
+
+static const struct ad_dpot_bus_ops bops = {
+	.read_d8	= read8,
+	.read_r8d8	= read16,
+	.read_r8d16	= read24,
+	.write_d8	= write8,
+	.write_r8d8	= write16,
+	.write_r8d16	= write24,
+};
+static int ad_dpot_spi_probe(struct spi_device *spi)
+{
+	struct ad_dpot_bus_data bdata = {
+		.client = spi,
+		.bops = &bops,
+	};
+
+	return ad_dpot_probe(&spi->dev, &bdata,
+			     spi_get_device_id(spi)->driver_data,
+			     spi_get_device_id(spi)->name);
+}
+
+static void ad_dpot_spi_remove(struct spi_device *spi)
+{
+	ad_dpot_remove(&spi->dev);
+}
+
+static const struct spi_device_id ad_dpot_spi_id[] = {
+	{"ad5160", AD5160_ID},
+	{"ad5161", AD5161_ID},
+	{"ad5162", AD5162_ID},
+	{"ad5165", AD5165_ID},
+	{"ad5200", AD5200_ID},
+	{"ad5201", AD5201_ID},
+	{"ad5203", AD5203_ID},
+	{"ad5204", AD5204_ID},
+	{"ad5206", AD5206_ID},
+	{"ad5207", AD5207_ID},
+	{"ad5231", AD5231_ID},
+	{"ad5232", AD5232_ID},
+	{"ad5233", AD5233_ID},
+	{"ad5235", AD5235_ID},
+	{"ad5260", AD5260_ID},
+	{"ad5262", AD5262_ID},
+	{"ad5263", AD5263_ID},
+	{"ad5290", AD5290_ID},
+	{"ad5291", AD5291_ID},
+	{"ad5292", AD5292_ID},
+	{"ad5293", AD5293_ID},
+	{"ad7376", AD7376_ID},
+	{"ad8400", AD8400_ID},
+	{"ad8402", AD8402_ID},
+	{"ad8403", AD8403_ID},
+	{"adn2850", ADN2850_ID},
+	{"ad5270", AD5270_ID},
+	{"ad5271", AD5271_ID},
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad_dpot_spi_id);
+
+static struct spi_driver ad_dpot_spi_driver = {
+	.driver = {
+		.name	= "ad_dpot",
+	},
+	.probe		= ad_dpot_spi_probe,
+	.remove		= ad_dpot_spi_remove,
+	.id_table	= ad_dpot_spi_id,
+};
+
+module_spi_driver(ad_dpot_spi_driver);
+
+MODULE_AUTHOR("Michael Hennerich <michael.hennerich@analog.com>");
+MODULE_DESCRIPTION("digital potentiometer SPI bus driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ad_dpot");
diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c
new file mode 100644
index 0000000000..756ef6912b
--- /dev/null
+++ b/drivers/misc/ad525x_dpot.c
@@ -0,0 +1,763 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ad525x_dpot: Driver for the Analog Devices digital potentiometers
+ * Copyright (c) 2009-2010 Analog Devices, Inc.
+ * Author: Michael Hennerich <michael.hennerich@analog.com>
+ *
+ * DEVID		#Wipers		#Positions	Resistor Options (kOhm)
+ * AD5258		1		64		1, 10, 50, 100
+ * AD5259		1		256		5, 10, 50, 100
+ * AD5251		2		64		1, 10, 50, 100
+ * AD5252		2		256		1, 10, 50, 100
+ * AD5255		3		512		25, 250
+ * AD5253		4		64		1, 10, 50, 100
+ * AD5254		4		256		1, 10, 50, 100
+ * AD5160		1		256		5, 10, 50, 100
+ * AD5161		1		256		5, 10, 50, 100
+ * AD5162		2		256		2.5, 10, 50, 100
+ * AD5165		1		256		100
+ * AD5200		1		256		10, 50
+ * AD5201		1		33		10, 50
+ * AD5203		4		64		10, 100
+ * AD5204		4		256		10, 50, 100
+ * AD5206		6		256		10, 50, 100
+ * AD5207		2		256		10, 50, 100
+ * AD5231		1		1024		10, 50, 100
+ * AD5232		2		256		10, 50, 100
+ * AD5233		4		64		10, 50, 100
+ * AD5235		2		1024		25, 250
+ * AD5260		1		256		20, 50, 200
+ * AD5262		2		256		20, 50, 200
+ * AD5263		4		256		20, 50, 200
+ * AD5290		1		256		10, 50, 100
+ * AD5291		1		256		20, 50, 100  (20-TP)
+ * AD5292		1		1024		20, 50, 100  (20-TP)
+ * AD5293		1		1024		20, 50, 100
+ * AD7376		1		128		10, 50, 100, 1M
+ * AD8400		1		256		1, 10, 50, 100
+ * AD8402		2		256		1, 10, 50, 100
+ * AD8403		4		256		1, 10, 50, 100
+ * ADN2850		3		512		25, 250
+ * AD5241		1		256		10, 100, 1M
+ * AD5246		1		128		5, 10, 50, 100
+ * AD5247		1		128		5, 10, 50, 100
+ * AD5245		1		256		5, 10, 50, 100
+ * AD5243		2		256		2.5, 10, 50, 100
+ * AD5248		2		256		2.5, 10, 50, 100
+ * AD5242		2		256		20, 50, 200
+ * AD5280		1		256		20, 50, 200
+ * AD5282		2		256		20, 50, 200
+ * ADN2860		3		512		25, 250
+ * AD5273		1		64		1, 10, 50, 100 (OTP)
+ * AD5171		1		64		5, 10, 50, 100 (OTP)
+ * AD5170		1		256		2.5, 10, 50, 100 (OTP)
+ * AD5172		2		256		2.5, 10, 50, 100 (OTP)
+ * AD5173		2		256		2.5, 10, 50, 100 (OTP)
+ * AD5270		1		1024		20, 50, 100 (50-TP)
+ * AD5271		1		256		20, 50, 100 (50-TP)
+ * AD5272		1		1024		20, 50, 100 (50-TP)
+ * AD5274		1		256		20, 50, 100 (50-TP)
+ *
+ * See Documentation/misc-devices/ad525x_dpot.rst for more info.
+ *
+ * derived from ad5258.c
+ * Copyright (c) 2009 Cyber Switching, Inc.
+ * Author: Chris Verges <chrisv@cyberswitching.com>
+ *
+ * derived from ad5252.c
+ * Copyright (c) 2006-2011 Michael Hennerich <michael.hennerich@analog.com>
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include "ad525x_dpot.h"
+
+/*
+ * Client data (each client gets its own)
+ */
+
+struct dpot_data {
+	struct ad_dpot_bus_data	bdata;
+	struct mutex update_lock;
+	unsigned int rdac_mask;
+	unsigned int max_pos;
+	unsigned long devid;
+	unsigned int uid;
+	unsigned int feat;
+	unsigned int wipers;
+	u16 rdac_cache[MAX_RDACS];
+	DECLARE_BITMAP(otp_en_mask, MAX_RDACS);
+};
+
+static inline int dpot_read_d8(struct dpot_data *dpot)
+{
+	return dpot->bdata.bops->read_d8(dpot->bdata.client);
+}
+
+static inline int dpot_read_r8d8(struct dpot_data *dpot, u8 reg)
+{
+	return dpot->bdata.bops->read_r8d8(dpot->bdata.client, reg);
+}
+
+static inline int dpot_read_r8d16(struct dpot_data *dpot, u8 reg)
+{
+	return dpot->bdata.bops->read_r8d16(dpot->bdata.client, reg);
+}
+
+static inline int dpot_write_d8(struct dpot_data *dpot, u8 val)
+{
+	return dpot->bdata.bops->write_d8(dpot->bdata.client, val);
+}
+
+static inline int dpot_write_r8d8(struct dpot_data *dpot, u8 reg, u16 val)
+{
+	return dpot->bdata.bops->write_r8d8(dpot->bdata.client, reg, val);
+}
+
+static inline int dpot_write_r8d16(struct dpot_data *dpot, u8 reg, u16 val)
+{
+	return dpot->bdata.bops->write_r8d16(dpot->bdata.client, reg, val);
+}
+
+static s32 dpot_read_spi(struct dpot_data *dpot, u8 reg)
+{
+	unsigned int ctrl = 0;
+	int value;
+
+	if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD))) {
+
+		if (dpot->feat & F_RDACS_WONLY)
+			return dpot->rdac_cache[reg & DPOT_RDAC_MASK];
+		if (dpot->uid == DPOT_UID(AD5291_ID) ||
+			dpot->uid == DPOT_UID(AD5292_ID) ||
+			dpot->uid == DPOT_UID(AD5293_ID)) {
+
+			value = dpot_read_r8d8(dpot,
+				DPOT_AD5291_READ_RDAC << 2);
+
+			if (value < 0)
+				return value;
+
+			if (dpot->uid == DPOT_UID(AD5291_ID))
+				value = value >> 2;
+
+			return value;
+		} else if (dpot->uid == DPOT_UID(AD5270_ID) ||
+			dpot->uid == DPOT_UID(AD5271_ID)) {
+
+			value = dpot_read_r8d8(dpot,
+				DPOT_AD5270_1_2_4_READ_RDAC << 2);
+
+			if (value < 0)
+				return value;
+
+			if (dpot->uid == DPOT_UID(AD5271_ID))
+				value = value >> 2;
+
+			return value;
+		}
+
+		ctrl = DPOT_SPI_READ_RDAC;
+	} else if (reg & DPOT_ADDR_EEPROM) {
+		ctrl = DPOT_SPI_READ_EEPROM;
+	}
+
+	if (dpot->feat & F_SPI_16BIT)
+		return dpot_read_r8d8(dpot, ctrl);
+	else if (dpot->feat & F_SPI_24BIT)
+		return dpot_read_r8d16(dpot, ctrl);
+
+	return -EFAULT;
+}
+
+static s32 dpot_read_i2c(struct dpot_data *dpot, u8 reg)
+{
+	int value;
+	unsigned int ctrl = 0;
+
+	switch (dpot->uid) {
+	case DPOT_UID(AD5246_ID):
+	case DPOT_UID(AD5247_ID):
+		return dpot_read_d8(dpot);
+	case DPOT_UID(AD5245_ID):
+	case DPOT_UID(AD5241_ID):
+	case DPOT_UID(AD5242_ID):
+	case DPOT_UID(AD5243_ID):
+	case DPOT_UID(AD5248_ID):
+	case DPOT_UID(AD5280_ID):
+	case DPOT_UID(AD5282_ID):
+		ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+			0 : DPOT_AD5282_RDAC_AB;
+		return dpot_read_r8d8(dpot, ctrl);
+	case DPOT_UID(AD5170_ID):
+	case DPOT_UID(AD5171_ID):
+	case DPOT_UID(AD5273_ID):
+			return dpot_read_d8(dpot);
+	case DPOT_UID(AD5172_ID):
+	case DPOT_UID(AD5173_ID):
+		ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+			0 : DPOT_AD5172_3_A0;
+		return dpot_read_r8d8(dpot, ctrl);
+	case DPOT_UID(AD5272_ID):
+	case DPOT_UID(AD5274_ID):
+		dpot_write_r8d8(dpot,
+				(DPOT_AD5270_1_2_4_READ_RDAC << 2), 0);
+
+		value = dpot_read_r8d16(dpot, DPOT_AD5270_1_2_4_RDAC << 2);
+		if (value < 0)
+			return value;
+		/*
+		 * AD5272/AD5274 returns high byte first, however
+		 * underling smbus expects low byte first.
+		 */
+		value = swab16(value);
+
+		if (dpot->uid == DPOT_UID(AD5274_ID))
+			value = value >> 2;
+		return value;
+	default:
+		if ((reg & DPOT_REG_TOL) || (dpot->max_pos > 256))
+			return dpot_read_r8d16(dpot, (reg & 0xF8) |
+					((reg & 0x7) << 1));
+		else
+			return dpot_read_r8d8(dpot, reg);
+	}
+}
+
+static s32 dpot_read(struct dpot_data *dpot, u8 reg)
+{
+	if (dpot->feat & F_SPI)
+		return dpot_read_spi(dpot, reg);
+	else
+		return dpot_read_i2c(dpot, reg);
+}
+
+static s32 dpot_write_spi(struct dpot_data *dpot, u8 reg, u16 value)
+{
+	unsigned int val = 0;
+
+	if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD | DPOT_ADDR_OTP))) {
+		if (dpot->feat & F_RDACS_WONLY)
+			dpot->rdac_cache[reg & DPOT_RDAC_MASK] = value;
+
+		if (dpot->feat & F_AD_APPDATA) {
+			if (dpot->feat & F_SPI_8BIT) {
+				val = ((reg & DPOT_RDAC_MASK) <<
+					DPOT_MAX_POS(dpot->devid)) |
+					value;
+				return dpot_write_d8(dpot, val);
+			} else if (dpot->feat & F_SPI_16BIT) {
+				val = ((reg & DPOT_RDAC_MASK) <<
+					DPOT_MAX_POS(dpot->devid)) |
+					value;
+				return dpot_write_r8d8(dpot, val >> 8,
+					val & 0xFF);
+			} else
+				BUG();
+		} else {
+			if (dpot->uid == DPOT_UID(AD5291_ID) ||
+				dpot->uid == DPOT_UID(AD5292_ID) ||
+				dpot->uid == DPOT_UID(AD5293_ID)) {
+
+				dpot_write_r8d8(dpot, DPOT_AD5291_CTRLREG << 2,
+						DPOT_AD5291_UNLOCK_CMD);
+
+				if (dpot->uid == DPOT_UID(AD5291_ID))
+					value = value << 2;
+
+				return dpot_write_r8d8(dpot,
+					(DPOT_AD5291_RDAC << 2) |
+					(value >> 8), value & 0xFF);
+			} else if (dpot->uid == DPOT_UID(AD5270_ID) ||
+				dpot->uid == DPOT_UID(AD5271_ID)) {
+				dpot_write_r8d8(dpot,
+						DPOT_AD5270_1_2_4_CTRLREG << 2,
+						DPOT_AD5270_1_2_4_UNLOCK_CMD);
+
+				if (dpot->uid == DPOT_UID(AD5271_ID))
+					value = value << 2;
+
+				return dpot_write_r8d8(dpot,
+					(DPOT_AD5270_1_2_4_RDAC << 2) |
+					(value >> 8), value & 0xFF);
+			}
+			val = DPOT_SPI_RDAC | (reg & DPOT_RDAC_MASK);
+		}
+	} else if (reg & DPOT_ADDR_EEPROM) {
+		val = DPOT_SPI_EEPROM | (reg & DPOT_RDAC_MASK);
+	} else if (reg & DPOT_ADDR_CMD) {
+		switch (reg) {
+		case DPOT_DEC_ALL_6DB:
+			val = DPOT_SPI_DEC_ALL_6DB;
+			break;
+		case DPOT_INC_ALL_6DB:
+			val = DPOT_SPI_INC_ALL_6DB;
+			break;
+		case DPOT_DEC_ALL:
+			val = DPOT_SPI_DEC_ALL;
+			break;
+		case DPOT_INC_ALL:
+			val = DPOT_SPI_INC_ALL;
+			break;
+		}
+	} else if (reg & DPOT_ADDR_OTP) {
+		if (dpot->uid == DPOT_UID(AD5291_ID) ||
+			dpot->uid == DPOT_UID(AD5292_ID)) {
+			return dpot_write_r8d8(dpot,
+				DPOT_AD5291_STORE_XTPM << 2, 0);
+		} else if (dpot->uid == DPOT_UID(AD5270_ID) ||
+			dpot->uid == DPOT_UID(AD5271_ID)) {
+			return dpot_write_r8d8(dpot,
+				DPOT_AD5270_1_2_4_STORE_XTPM << 2, 0);
+		}
+	} else
+		BUG();
+
+	if (dpot->feat & F_SPI_16BIT)
+		return dpot_write_r8d8(dpot, val, value);
+	else if (dpot->feat & F_SPI_24BIT)
+		return dpot_write_r8d16(dpot, val, value);
+
+	return -EFAULT;
+}
+
+static s32 dpot_write_i2c(struct dpot_data *dpot, u8 reg, u16 value)
+{
+	/* Only write the instruction byte for certain commands */
+	unsigned int tmp = 0, ctrl = 0;
+
+	switch (dpot->uid) {
+	case DPOT_UID(AD5246_ID):
+	case DPOT_UID(AD5247_ID):
+		return dpot_write_d8(dpot, value);
+
+	case DPOT_UID(AD5245_ID):
+	case DPOT_UID(AD5241_ID):
+	case DPOT_UID(AD5242_ID):
+	case DPOT_UID(AD5243_ID):
+	case DPOT_UID(AD5248_ID):
+	case DPOT_UID(AD5280_ID):
+	case DPOT_UID(AD5282_ID):
+		ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+			0 : DPOT_AD5282_RDAC_AB;
+		return dpot_write_r8d8(dpot, ctrl, value);
+	case DPOT_UID(AD5171_ID):
+	case DPOT_UID(AD5273_ID):
+		if (reg & DPOT_ADDR_OTP) {
+			tmp = dpot_read_d8(dpot);
+			if (tmp >> 6) /* Ready to Program? */
+				return -EFAULT;
+			ctrl = DPOT_AD5273_FUSE;
+		}
+		return dpot_write_r8d8(dpot, ctrl, value);
+	case DPOT_UID(AD5172_ID):
+	case DPOT_UID(AD5173_ID):
+		ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ?
+			0 : DPOT_AD5172_3_A0;
+		if (reg & DPOT_ADDR_OTP) {
+			tmp = dpot_read_r8d16(dpot, ctrl);
+			if (tmp >> 14) /* Ready to Program? */
+				return -EFAULT;
+			ctrl |= DPOT_AD5170_2_3_FUSE;
+		}
+		return dpot_write_r8d8(dpot, ctrl, value);
+	case DPOT_UID(AD5170_ID):
+		if (reg & DPOT_ADDR_OTP) {
+			tmp = dpot_read_r8d16(dpot, tmp);
+			if (tmp >> 14) /* Ready to Program? */
+				return -EFAULT;
+			ctrl = DPOT_AD5170_2_3_FUSE;
+		}
+		return dpot_write_r8d8(dpot, ctrl, value);
+	case DPOT_UID(AD5272_ID):
+	case DPOT_UID(AD5274_ID):
+		dpot_write_r8d8(dpot, DPOT_AD5270_1_2_4_CTRLREG << 2,
+				DPOT_AD5270_1_2_4_UNLOCK_CMD);
+
+		if (reg & DPOT_ADDR_OTP)
+			return dpot_write_r8d8(dpot,
+					DPOT_AD5270_1_2_4_STORE_XTPM << 2, 0);
+
+		if (dpot->uid == DPOT_UID(AD5274_ID))
+			value = value << 2;
+
+		return dpot_write_r8d8(dpot, (DPOT_AD5270_1_2_4_RDAC << 2) |
+				       (value >> 8), value & 0xFF);
+	default:
+		if (reg & DPOT_ADDR_CMD)
+			return dpot_write_d8(dpot, reg);
+
+		if (dpot->max_pos > 256)
+			return dpot_write_r8d16(dpot, (reg & 0xF8) |
+						((reg & 0x7) << 1), value);
+		else
+			/* All other registers require instruction + data bytes */
+			return dpot_write_r8d8(dpot, reg, value);
+	}
+}
+
+static s32 dpot_write(struct dpot_data *dpot, u8 reg, u16 value)
+{
+	if (dpot->feat & F_SPI)
+		return dpot_write_spi(dpot, reg, value);
+	else
+		return dpot_write_i2c(dpot, reg, value);
+}
+
+/* sysfs functions */
+
+static ssize_t sysfs_show_reg(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf, u32 reg)
+{
+	struct dpot_data *data = dev_get_drvdata(dev);
+	s32 value;
+
+	if (reg & DPOT_ADDR_OTP_EN)
+		return sprintf(buf, "%s\n",
+			test_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask) ?
+			"enabled" : "disabled");
+
+
+	mutex_lock(&data->update_lock);
+	value = dpot_read(data, reg);
+	mutex_unlock(&data->update_lock);
+
+	if (value < 0)
+		return -EINVAL;
+	/*
+	 * Let someone else deal with converting this ...
+	 * the tolerance is a two-byte value where the MSB
+	 * is a sign + integer value, and the LSB is a
+	 * decimal value.  See page 18 of the AD5258
+	 * datasheet (Rev. A) for more details.
+	 */
+
+	if (reg & DPOT_REG_TOL)
+		return sprintf(buf, "0x%04x\n", value & 0xFFFF);
+	else
+		return sprintf(buf, "%u\n", value & data->rdac_mask);
+}
+
+static ssize_t sysfs_set_reg(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t count, u32 reg)
+{
+	struct dpot_data *data = dev_get_drvdata(dev);
+	unsigned long value;
+	int err;
+
+	if (reg & DPOT_ADDR_OTP_EN) {
+		if (sysfs_streq(buf, "enabled"))
+			set_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask);
+		else
+			clear_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask);
+
+		return count;
+	}
+
+	if ((reg & DPOT_ADDR_OTP) &&
+		!test_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask))
+		return -EPERM;
+
+	err = kstrtoul(buf, 10, &value);
+	if (err)
+		return err;
+
+	if (value > data->rdac_mask)
+		value = data->rdac_mask;
+
+	mutex_lock(&data->update_lock);
+	dpot_write(data, reg, value);
+	if (reg & DPOT_ADDR_EEPROM)
+		msleep(26);	/* Sleep while the EEPROM updates */
+	else if (reg & DPOT_ADDR_OTP)
+		msleep(400);	/* Sleep while the OTP updates */
+	mutex_unlock(&data->update_lock);
+
+	return count;
+}
+
+static ssize_t sysfs_do_cmd(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t count, u32 reg)
+{
+	struct dpot_data *data = dev_get_drvdata(dev);
+
+	mutex_lock(&data->update_lock);
+	dpot_write(data, reg, 0);
+	mutex_unlock(&data->update_lock);
+
+	return count;
+}
+
+/* ------------------------------------------------------------------------- */
+
+#define DPOT_DEVICE_SHOW(_name, _reg) static ssize_t \
+show_##_name(struct device *dev, \
+			  struct device_attribute *attr, char *buf) \
+{ \
+	return sysfs_show_reg(dev, attr, buf, _reg); \
+}
+
+#define DPOT_DEVICE_SET(_name, _reg) static ssize_t \
+set_##_name(struct device *dev, \
+			 struct device_attribute *attr, \
+			 const char *buf, size_t count) \
+{ \
+	return sysfs_set_reg(dev, attr, buf, count, _reg); \
+}
+
+#define DPOT_DEVICE_SHOW_SET(name, reg) \
+DPOT_DEVICE_SHOW(name, reg) \
+DPOT_DEVICE_SET(name, reg) \
+static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, set_##name)
+
+#define DPOT_DEVICE_SHOW_ONLY(name, reg) \
+DPOT_DEVICE_SHOW(name, reg) \
+static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, NULL)
+
+DPOT_DEVICE_SHOW_SET(rdac0, DPOT_ADDR_RDAC | DPOT_RDAC0);
+DPOT_DEVICE_SHOW_SET(eeprom0, DPOT_ADDR_EEPROM | DPOT_RDAC0);
+DPOT_DEVICE_SHOW_ONLY(tolerance0, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC0);
+DPOT_DEVICE_SHOW_SET(otp0, DPOT_ADDR_OTP | DPOT_RDAC0);
+DPOT_DEVICE_SHOW_SET(otp0en, DPOT_ADDR_OTP_EN | DPOT_RDAC0);
+
+DPOT_DEVICE_SHOW_SET(rdac1, DPOT_ADDR_RDAC | DPOT_RDAC1);
+DPOT_DEVICE_SHOW_SET(eeprom1, DPOT_ADDR_EEPROM | DPOT_RDAC1);
+DPOT_DEVICE_SHOW_ONLY(tolerance1, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC1);
+DPOT_DEVICE_SHOW_SET(otp1, DPOT_ADDR_OTP | DPOT_RDAC1);
+DPOT_DEVICE_SHOW_SET(otp1en, DPOT_ADDR_OTP_EN | DPOT_RDAC1);
+
+DPOT_DEVICE_SHOW_SET(rdac2, DPOT_ADDR_RDAC | DPOT_RDAC2);
+DPOT_DEVICE_SHOW_SET(eeprom2, DPOT_ADDR_EEPROM | DPOT_RDAC2);
+DPOT_DEVICE_SHOW_ONLY(tolerance2, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC2);
+DPOT_DEVICE_SHOW_SET(otp2, DPOT_ADDR_OTP | DPOT_RDAC2);
+DPOT_DEVICE_SHOW_SET(otp2en, DPOT_ADDR_OTP_EN | DPOT_RDAC2);
+
+DPOT_DEVICE_SHOW_SET(rdac3, DPOT_ADDR_RDAC | DPOT_RDAC3);
+DPOT_DEVICE_SHOW_SET(eeprom3, DPOT_ADDR_EEPROM | DPOT_RDAC3);
+DPOT_DEVICE_SHOW_ONLY(tolerance3, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC3);
+DPOT_DEVICE_SHOW_SET(otp3, DPOT_ADDR_OTP | DPOT_RDAC3);
+DPOT_DEVICE_SHOW_SET(otp3en, DPOT_ADDR_OTP_EN | DPOT_RDAC3);
+
+DPOT_DEVICE_SHOW_SET(rdac4, DPOT_ADDR_RDAC | DPOT_RDAC4);
+DPOT_DEVICE_SHOW_SET(eeprom4, DPOT_ADDR_EEPROM | DPOT_RDAC4);
+DPOT_DEVICE_SHOW_ONLY(tolerance4, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC4);
+DPOT_DEVICE_SHOW_SET(otp4, DPOT_ADDR_OTP | DPOT_RDAC4);
+DPOT_DEVICE_SHOW_SET(otp4en, DPOT_ADDR_OTP_EN | DPOT_RDAC4);
+
+DPOT_DEVICE_SHOW_SET(rdac5, DPOT_ADDR_RDAC | DPOT_RDAC5);
+DPOT_DEVICE_SHOW_SET(eeprom5, DPOT_ADDR_EEPROM | DPOT_RDAC5);
+DPOT_DEVICE_SHOW_ONLY(tolerance5, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC5);
+DPOT_DEVICE_SHOW_SET(otp5, DPOT_ADDR_OTP | DPOT_RDAC5);
+DPOT_DEVICE_SHOW_SET(otp5en, DPOT_ADDR_OTP_EN | DPOT_RDAC5);
+
+static const struct attribute *dpot_attrib_wipers[] = {
+	&dev_attr_rdac0.attr,
+	&dev_attr_rdac1.attr,
+	&dev_attr_rdac2.attr,
+	&dev_attr_rdac3.attr,
+	&dev_attr_rdac4.attr,
+	&dev_attr_rdac5.attr,
+	NULL
+};
+
+static const struct attribute *dpot_attrib_eeprom[] = {
+	&dev_attr_eeprom0.attr,
+	&dev_attr_eeprom1.attr,
+	&dev_attr_eeprom2.attr,
+	&dev_attr_eeprom3.attr,
+	&dev_attr_eeprom4.attr,
+	&dev_attr_eeprom5.attr,
+	NULL
+};
+
+static const struct attribute *dpot_attrib_otp[] = {
+	&dev_attr_otp0.attr,
+	&dev_attr_otp1.attr,
+	&dev_attr_otp2.attr,
+	&dev_attr_otp3.attr,
+	&dev_attr_otp4.attr,
+	&dev_attr_otp5.attr,
+	NULL
+};
+
+static const struct attribute *dpot_attrib_otp_en[] = {
+	&dev_attr_otp0en.attr,
+	&dev_attr_otp1en.attr,
+	&dev_attr_otp2en.attr,
+	&dev_attr_otp3en.attr,
+	&dev_attr_otp4en.attr,
+	&dev_attr_otp5en.attr,
+	NULL
+};
+
+static const struct attribute *dpot_attrib_tolerance[] = {
+	&dev_attr_tolerance0.attr,
+	&dev_attr_tolerance1.attr,
+	&dev_attr_tolerance2.attr,
+	&dev_attr_tolerance3.attr,
+	&dev_attr_tolerance4.attr,
+	&dev_attr_tolerance5.attr,
+	NULL
+};
+
+/* ------------------------------------------------------------------------- */
+
+#define DPOT_DEVICE_DO_CMD(_name, _cmd) static ssize_t \
+set_##_name(struct device *dev, \
+			 struct device_attribute *attr, \
+			 const char *buf, size_t count) \
+{ \
+	return sysfs_do_cmd(dev, attr, buf, count, _cmd); \
+} \
+static DEVICE_ATTR(_name, S_IWUSR | S_IRUGO, NULL, set_##_name)
+
+DPOT_DEVICE_DO_CMD(inc_all, DPOT_INC_ALL);
+DPOT_DEVICE_DO_CMD(dec_all, DPOT_DEC_ALL);
+DPOT_DEVICE_DO_CMD(inc_all_6db, DPOT_INC_ALL_6DB);
+DPOT_DEVICE_DO_CMD(dec_all_6db, DPOT_DEC_ALL_6DB);
+
+static struct attribute *ad525x_attributes_commands[] = {
+	&dev_attr_inc_all.attr,
+	&dev_attr_dec_all.attr,
+	&dev_attr_inc_all_6db.attr,
+	&dev_attr_dec_all_6db.attr,
+	NULL
+};
+
+static const struct attribute_group ad525x_group_commands = {
+	.attrs = ad525x_attributes_commands,
+};
+
+static int ad_dpot_add_files(struct device *dev,
+		unsigned int features, unsigned int rdac)
+{
+	int err = sysfs_create_file(&dev->kobj,
+		dpot_attrib_wipers[rdac]);
+	if (features & F_CMD_EEP)
+		err |= sysfs_create_file(&dev->kobj,
+			dpot_attrib_eeprom[rdac]);
+	if (features & F_CMD_TOL)
+		err |= sysfs_create_file(&dev->kobj,
+			dpot_attrib_tolerance[rdac]);
+	if (features & F_CMD_OTP) {
+		err |= sysfs_create_file(&dev->kobj,
+			dpot_attrib_otp_en[rdac]);
+		err |= sysfs_create_file(&dev->kobj,
+			dpot_attrib_otp[rdac]);
+	}
+
+	if (err)
+		dev_err(dev, "failed to register sysfs hooks for RDAC%d\n",
+			rdac);
+
+	return err;
+}
+
+static inline void ad_dpot_remove_files(struct device *dev,
+		unsigned int features, unsigned int rdac)
+{
+	sysfs_remove_file(&dev->kobj,
+		dpot_attrib_wipers[rdac]);
+	if (features & F_CMD_EEP)
+		sysfs_remove_file(&dev->kobj,
+			dpot_attrib_eeprom[rdac]);
+	if (features & F_CMD_TOL)
+		sysfs_remove_file(&dev->kobj,
+			dpot_attrib_tolerance[rdac]);
+	if (features & F_CMD_OTP) {
+		sysfs_remove_file(&dev->kobj,
+			dpot_attrib_otp_en[rdac]);
+		sysfs_remove_file(&dev->kobj,
+			dpot_attrib_otp[rdac]);
+	}
+}
+
+int ad_dpot_probe(struct device *dev,
+		struct ad_dpot_bus_data *bdata, unsigned long devid,
+			    const char *name)
+{
+
+	struct dpot_data *data;
+	int i, err = 0;
+
+	data = kzalloc(sizeof(struct dpot_data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	dev_set_drvdata(dev, data);
+	mutex_init(&data->update_lock);
+
+	data->bdata = *bdata;
+	data->devid = devid;
+
+	data->max_pos = 1 << DPOT_MAX_POS(devid);
+	data->rdac_mask = data->max_pos - 1;
+	data->feat = DPOT_FEAT(devid);
+	data->uid = DPOT_UID(devid);
+	data->wipers = DPOT_WIPERS(devid);
+
+	for (i = DPOT_RDAC0; i < MAX_RDACS; i++)
+		if (data->wipers & (1 << i)) {
+			err = ad_dpot_add_files(dev, data->feat, i);
+			if (err)
+				goto exit_remove_files;
+			/* power-up midscale */
+			if (data->feat & F_RDACS_WONLY)
+				data->rdac_cache[i] = data->max_pos / 2;
+		}
+
+	if (data->feat & F_CMD_INC)
+		err = sysfs_create_group(&dev->kobj, &ad525x_group_commands);
+
+	if (err) {
+		dev_err(dev, "failed to register sysfs hooks\n");
+		goto exit_free;
+	}
+
+	dev_info(dev, "%s %d-Position Digital Potentiometer registered\n",
+		 name, data->max_pos);
+
+	return 0;
+
+exit_remove_files:
+	for (i = DPOT_RDAC0; i < MAX_RDACS; i++)
+		if (data->wipers & (1 << i))
+			ad_dpot_remove_files(dev, data->feat, i);
+
+exit_free:
+	kfree(data);
+	dev_set_drvdata(dev, NULL);
+exit:
+	dev_err(dev, "failed to create client for %s ID 0x%lX\n",
+		name, devid);
+	return err;
+}
+EXPORT_SYMBOL(ad_dpot_probe);
+
+void ad_dpot_remove(struct device *dev)
+{
+	struct dpot_data *data = dev_get_drvdata(dev);
+	int i;
+
+	for (i = DPOT_RDAC0; i < MAX_RDACS; i++)
+		if (data->wipers & (1 << i))
+			ad_dpot_remove_files(dev, data->feat, i);
+
+	kfree(data);
+}
+EXPORT_SYMBOL(ad_dpot_remove);
+
+
+MODULE_AUTHOR("Chris Verges <chrisv@cyberswitching.com>, "
+	      "Michael Hennerich <michael.hennerich@analog.com>");
+MODULE_DESCRIPTION("Digital potentiometer driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ad525x_dpot.h b/drivers/misc/ad525x_dpot.h
new file mode 100644
index 0000000000..72a9d68019
--- /dev/null
+++ b/drivers/misc/ad525x_dpot.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Driver for the Analog Devices digital potentiometers
+ *
+ * Copyright (C) 2010 Michael Hennerich, Analog Devices Inc.
+ */
+
+#ifndef _AD_DPOT_H_
+#define _AD_DPOT_H_
+
+#include <linux/types.h>
+
+#define DPOT_CONF(features, wipers, max_pos, uid) \
+		(((features) << 18) | (((wipers) & 0xFF) << 10) | \
+		((max_pos & 0xF) << 6) | (uid & 0x3F))
+
+#define DPOT_UID(conf)		(conf & 0x3F)
+#define DPOT_MAX_POS(conf)	((conf >> 6) & 0xF)
+#define DPOT_WIPERS(conf)	((conf >> 10) & 0xFF)
+#define DPOT_FEAT(conf)		(conf >> 18)
+
+#define BRDAC0			(1 << 0)
+#define BRDAC1			(1 << 1)
+#define BRDAC2			(1 << 2)
+#define BRDAC3			(1 << 3)
+#define BRDAC4			(1 << 4)
+#define BRDAC5			(1 << 5)
+#define MAX_RDACS		6
+
+#define F_CMD_INC		(1 << 0)	/* Features INC/DEC ALL, 6dB */
+#define F_CMD_EEP		(1 << 1)	/* Features EEPROM */
+#define F_CMD_OTP		(1 << 2)	/* Features OTP */
+#define F_CMD_TOL		(1 << 3)	/* RDACS feature Tolerance REG */
+#define F_RDACS_RW		(1 << 4)	/* RDACS are Read/Write  */
+#define F_RDACS_WONLY		(1 << 5)	/* RDACS are Write only */
+#define F_AD_APPDATA		(1 << 6)	/* RDAC Address append to data */
+#define F_SPI_8BIT		(1 << 7)	/* All SPI XFERS are 8-bit */
+#define F_SPI_16BIT		(1 << 8)	/* All SPI XFERS are 16-bit */
+#define F_SPI_24BIT		(1 << 9)	/* All SPI XFERS are 24-bit */
+
+#define F_RDACS_RW_TOL	(F_RDACS_RW | F_CMD_EEP | F_CMD_TOL)
+#define F_RDACS_RW_EEP	(F_RDACS_RW | F_CMD_EEP)
+#define F_SPI		(F_SPI_8BIT | F_SPI_16BIT | F_SPI_24BIT)
+
+enum dpot_devid {
+	AD5258_ID = DPOT_CONF(F_RDACS_RW_TOL, BRDAC0, 6, 0), /* I2C */
+	AD5259_ID = DPOT_CONF(F_RDACS_RW_TOL, BRDAC0, 8, 1),
+	AD5251_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC1 | BRDAC3, 6, 2),
+	AD5252_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC1 | BRDAC3, 8, 3),
+	AD5253_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 4),
+	AD5254_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 5),
+	AD5255_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC0 | BRDAC1 | BRDAC2, 9, 6),
+	AD5160_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 7), /* SPI */
+	AD5161_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 8),
+	AD5162_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1, 8, 9),
+	AD5165_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 10),
+	AD5200_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 11),
+	AD5201_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 5, 12),
+	AD5203_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 13),
+	AD5204_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 14),
+	AD5206_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3 | BRDAC4 | BRDAC5,
+			8, 15),
+	AD5207_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1, 8, 16),
+	AD5231_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT,
+			BRDAC0, 10, 17),
+	AD5232_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_16BIT,
+			BRDAC0 | BRDAC1, 8, 18),
+	AD5233_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_16BIT,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 19),
+	AD5235_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT,
+			BRDAC0 | BRDAC1, 10, 20),
+	AD5260_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 21),
+	AD5262_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1, 8, 22),
+	AD5263_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 23),
+	AD5290_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 8, 24),
+	AD5291_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT | F_CMD_OTP,
+			BRDAC0, 8, 25),
+	AD5292_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT | F_CMD_OTP,
+			BRDAC0, 10, 26),
+	AD5293_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT, BRDAC0, 10, 27),
+	AD7376_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT,
+			BRDAC0, 7, 28),
+	AD8400_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0, 8, 29),
+	AD8402_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1, 8, 30),
+	AD8403_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT,
+			BRDAC0 | BRDAC1 | BRDAC2, 8, 31),
+	ADN2850_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT,
+			BRDAC0 | BRDAC1, 10, 32),
+	AD5241_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 33),
+	AD5242_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 34),
+	AD5243_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 35),
+	AD5245_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 36),
+	AD5246_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 7, 37),
+	AD5247_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 7, 38),
+	AD5248_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 39),
+	AD5280_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 40),
+	AD5282_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 41),
+	ADN2860_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC,
+			BRDAC0 | BRDAC1 | BRDAC2, 9, 42),
+	AD5273_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 6, 43),
+	AD5171_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 6, 44),
+	AD5170_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 8, 45),
+	AD5172_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0 | BRDAC1, 8, 46),
+	AD5173_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0 | BRDAC1, 8, 47),
+	AD5270_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP | F_SPI_16BIT,
+			BRDAC0, 10, 48),
+	AD5271_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP | F_SPI_16BIT,
+			BRDAC0, 8, 49),
+	AD5272_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 10, 50),
+	AD5274_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 8, 51),
+};
+
+#define DPOT_RDAC0		0
+#define DPOT_RDAC1		1
+#define DPOT_RDAC2		2
+#define DPOT_RDAC3		3
+#define DPOT_RDAC4		4
+#define DPOT_RDAC5		5
+
+#define DPOT_RDAC_MASK		0x1F
+
+#define DPOT_REG_TOL		0x18
+#define DPOT_TOL_RDAC0		(DPOT_REG_TOL | DPOT_RDAC0)
+#define DPOT_TOL_RDAC1		(DPOT_REG_TOL | DPOT_RDAC1)
+#define DPOT_TOL_RDAC2		(DPOT_REG_TOL | DPOT_RDAC2)
+#define DPOT_TOL_RDAC3		(DPOT_REG_TOL | DPOT_RDAC3)
+#define DPOT_TOL_RDAC4		(DPOT_REG_TOL | DPOT_RDAC4)
+#define DPOT_TOL_RDAC5		(DPOT_REG_TOL | DPOT_RDAC5)
+
+/* RDAC-to-EEPROM Interface Commands */
+#define DPOT_ADDR_RDAC		(0x0 << 5)
+#define DPOT_ADDR_EEPROM	(0x1 << 5)
+#define DPOT_ADDR_OTP		(0x1 << 6)
+#define DPOT_ADDR_CMD		(0x1 << 7)
+#define DPOT_ADDR_OTP_EN	(0x1 << 9)
+
+#define DPOT_DEC_ALL_6DB	(DPOT_ADDR_CMD | (0x4 << 3))
+#define DPOT_INC_ALL_6DB	(DPOT_ADDR_CMD | (0x9 << 3))
+#define DPOT_DEC_ALL		(DPOT_ADDR_CMD | (0x6 << 3))
+#define DPOT_INC_ALL		(DPOT_ADDR_CMD | (0xB << 3))
+
+#define DPOT_SPI_RDAC		0xB0
+#define DPOT_SPI_EEPROM		0x30
+#define DPOT_SPI_READ_RDAC	0xA0
+#define DPOT_SPI_READ_EEPROM	0x90
+#define DPOT_SPI_DEC_ALL_6DB	0x50
+#define DPOT_SPI_INC_ALL_6DB	0xD0
+#define DPOT_SPI_DEC_ALL	0x70
+#define DPOT_SPI_INC_ALL	0xF0
+
+/* AD5291/2/3 use special commands */
+#define DPOT_AD5291_RDAC	0x01
+#define DPOT_AD5291_READ_RDAC	0x02
+#define DPOT_AD5291_STORE_XTPM	0x03
+#define DPOT_AD5291_CTRLREG	0x06
+#define DPOT_AD5291_UNLOCK_CMD	0x03
+
+/* AD5270/1/2/4 use special commands */
+#define DPOT_AD5270_1_2_4_RDAC		0x01
+#define DPOT_AD5270_1_2_4_READ_RDAC	0x02
+#define DPOT_AD5270_1_2_4_STORE_XTPM	0x03
+#define DPOT_AD5270_1_2_4_CTRLREG	0x07
+#define DPOT_AD5270_1_2_4_UNLOCK_CMD	0x03
+
+#define DPOT_AD5282_RDAC_AB	0x80
+
+#define DPOT_AD5273_FUSE	0x80
+#define DPOT_AD5170_2_3_FUSE	0x20
+#define DPOT_AD5170_2_3_OW	0x08
+#define DPOT_AD5172_3_A0	0x08
+#define DPOT_AD5170_2FUSE	0x80
+
+struct dpot_data;
+
+struct ad_dpot_bus_ops {
+	int (*read_d8)(void *client);
+	int (*read_r8d8)(void *client, u8 reg);
+	int (*read_r8d16)(void *client, u8 reg);
+	int (*write_d8)(void *client, u8 val);
+	int (*write_r8d8)(void *client, u8 reg, u8 val);
+	int (*write_r8d16)(void *client, u8 reg, u16 val);
+};
+
+struct ad_dpot_bus_data {
+	void *client;
+	const struct ad_dpot_bus_ops *bops;
+};
+
+int ad_dpot_probe(struct device *dev, struct ad_dpot_bus_data *bdata,
+		  unsigned long devid, const char *name);
+void ad_dpot_remove(struct device *dev);
+
+#endif
diff --git a/drivers/misc/altera-stapl/Kconfig b/drivers/misc/altera-stapl/Kconfig
new file mode 100644
index 0000000000..6c4c6575ec
--- /dev/null
+++ b/drivers/misc/altera-stapl/Kconfig
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+comment "Altera FPGA firmware download module (requires I2C)"
+	depends on !I2C
+
+config ALTERA_STAPL
+	tristate "Altera FPGA firmware download module"
+	depends on I2C
+	help
+	  An Altera FPGA module. Say Y when you want to support this tool.
diff --git a/drivers/misc/altera-stapl/Makefile b/drivers/misc/altera-stapl/Makefile
new file mode 100644
index 0000000000..90f18e7bf9
--- /dev/null
+++ b/drivers/misc/altera-stapl/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+altera-stapl-y = altera-jtag.o altera-comp.o altera.o
+altera-stapl-$(CONFIG_HAS_IOPORT) += altera-lpt.o
+
+obj-$(CONFIG_ALTERA_STAPL) += altera-stapl.o
diff --git a/drivers/misc/altera-stapl/altera-comp.c b/drivers/misc/altera-stapl/altera-comp.c
new file mode 100644
index 0000000000..4a63f51cc6
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-comp.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * altera-comp.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ */
+
+#include <linux/kernel.h>
+#include "altera-exprt.h"
+
+#define	SHORT_BITS		16
+#define	CHAR_BITS		8
+#define	DATA_BLOB_LENGTH	3
+#define	MATCH_DATA_LENGTH	8192
+#define ALTERA_REQUEST_SIZE	1024
+#define ALTERA_BUFFER_SIZE	(MATCH_DATA_LENGTH + ALTERA_REQUEST_SIZE)
+
+static u32 altera_bits_req(u32 n)
+{
+	u32 result = SHORT_BITS;
+
+	if (n == 0)
+		result = 1;
+	else {
+		/* Look for the highest non-zero bit position */
+		while ((n & (1 << (SHORT_BITS - 1))) == 0) {
+			n <<= 1;
+			--result;
+		}
+	}
+
+	return result;
+}
+
+static u32 altera_read_packed(u8 *buffer, u32 bits, u32 *bits_avail,
+							u32 *in_index)
+{
+	u32 result = 0;
+	u32 shift = 0;
+	u32 databyte = 0;
+
+	while (bits > 0) {
+		databyte = buffer[*in_index];
+		result |= (((databyte >> (CHAR_BITS - *bits_avail))
+			& (0xff >> (CHAR_BITS - *bits_avail))) << shift);
+
+		if (bits <= *bits_avail) {
+			result &= (0xffff >> (SHORT_BITS - (bits + shift)));
+			*bits_avail -= bits;
+			bits = 0;
+		} else {
+			++(*in_index);
+			shift += *bits_avail;
+			bits -= *bits_avail;
+			*bits_avail = CHAR_BITS;
+		}
+	}
+
+	return result;
+}
+
+u32 altera_shrink(u8 *in, u32 in_length, u8 *out, u32 out_length, s32 version)
+{
+	u32 i, j, data_length = 0L;
+	u32 offset, length;
+	u32 match_data_length = MATCH_DATA_LENGTH;
+	u32 bits_avail = CHAR_BITS;
+	u32 in_index = 0L;
+
+	if (version > 0)
+		--match_data_length;
+
+	for (i = 0; i < out_length; ++i)
+		out[i] = 0;
+
+	/* Read number of bytes in data. */
+	for (i = 0; i < sizeof(in_length); ++i) {
+		data_length = data_length | (
+			altera_read_packed(in,
+					CHAR_BITS,
+					&bits_avail,
+					&in_index) << (i * CHAR_BITS));
+	}
+
+	if (data_length > out_length) {
+		data_length = 0L;
+		return data_length;
+	}
+
+	i = 0;
+	while (i < data_length) {
+		/* A 0 bit indicates literal data. */
+		if (altera_read_packed(in, 1, &bits_avail,
+						&in_index) == 0) {
+			for (j = 0; j < DATA_BLOB_LENGTH; ++j) {
+				if (i < data_length) {
+					out[i] = (u8)altera_read_packed(in,
+							CHAR_BITS,
+							&bits_avail,
+							&in_index);
+					i++;
+				}
+			}
+		} else {
+			/* A 1 bit indicates offset/length to follow. */
+			offset = altera_read_packed(in, altera_bits_req((s16)
+					(i > match_data_length ?
+						match_data_length : i)),
+					&bits_avail,
+					&in_index);
+			length = altera_read_packed(in, CHAR_BITS,
+					&bits_avail,
+					&in_index);
+			for (j = 0; j < length; ++j) {
+				if (i < data_length) {
+					out[i] = out[i - offset];
+					i++;
+				}
+			}
+		}
+	}
+
+	return data_length;
+}
diff --git a/drivers/misc/altera-stapl/altera-exprt.h b/drivers/misc/altera-stapl/altera-exprt.h
new file mode 100644
index 0000000000..6a8b696ce8
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-exprt.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * altera-exprt.h
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ */
+
+#ifndef ALTERA_EXPRT_H
+#define ALTERA_EXPRT_H
+
+
+u32 altera_shrink(u8 *in, u32 in_length, u8 *out, u32 out_length, s32 version);
+int netup_jtag_io_lpt(void *device, int tms, int tdi, int read_tdo);
+
+#endif /* ALTERA_EXPRT_H */
diff --git a/drivers/misc/altera-stapl/altera-jtag.c b/drivers/misc/altera-stapl/altera-jtag.c
new file mode 100644
index 0000000000..27e8e0c9e8
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-jtag.c
@@ -0,0 +1,1007 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * altera-jtag.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <misc/altera.h>
+#include "altera-exprt.h"
+#include "altera-jtag.h"
+
+#define	alt_jtag_io(a, b, c)\
+		astate->config->jtag_io(astate->config->dev, a, b, c);
+
+#define	alt_malloc(a)	kzalloc(a, GFP_KERNEL);
+
+/*
+ * This structure shows, for each JTAG state, which state is reached after
+ * a single TCK clock cycle with TMS high or TMS low, respectively.  This
+ * describes all possible state transitions in the JTAG state machine.
+ */
+struct altera_jtag_machine {
+	enum altera_jtag_state tms_high;
+	enum altera_jtag_state tms_low;
+};
+
+static const struct altera_jtag_machine altera_transitions[] = {
+	/* RESET     */	{ RESET,	IDLE },
+	/* IDLE      */	{ DRSELECT,	IDLE },
+	/* DRSELECT  */	{ IRSELECT,	DRCAPTURE },
+	/* DRCAPTURE */	{ DREXIT1,	DRSHIFT },
+	/* DRSHIFT   */	{ DREXIT1,	DRSHIFT },
+	/* DREXIT1   */	{ DRUPDATE,	DRPAUSE },
+	/* DRPAUSE   */	{ DREXIT2,	DRPAUSE },
+	/* DREXIT2   */	{ DRUPDATE,	DRSHIFT },
+	/* DRUPDATE  */	{ DRSELECT,	IDLE },
+	/* IRSELECT  */	{ RESET,	IRCAPTURE },
+	/* IRCAPTURE */	{ IREXIT1,	IRSHIFT },
+	/* IRSHIFT   */	{ IREXIT1,	IRSHIFT },
+	/* IREXIT1   */	{ IRUPDATE,	IRPAUSE },
+	/* IRPAUSE   */	{ IREXIT2,	IRPAUSE },
+	/* IREXIT2   */	{ IRUPDATE,	IRSHIFT },
+	/* IRUPDATE  */	{ DRSELECT,	IDLE }
+};
+
+/*
+ * This table contains the TMS value to be used to take the NEXT STEP on
+ * the path to the desired state.  The array index is the current state,
+ * and the bit position is the desired endstate.  To find out which state
+ * is used as the intermediate state, look up the TMS value in the
+ * altera_transitions[] table.
+ */
+static const u16 altera_jtag_path_map[16] = {
+	/* RST	RTI	SDRS	CDR	SDR	E1DR	PDR	E2DR */
+	0x0001,	0xFFFD,	0xFE01,	0xFFE7,	0xFFEF,	0xFF0F,	0xFFBF,	0xFFFF,
+	/* UDR	SIRS	CIR	SIR	E1IR	PIR	E2IR	UIR */
+	0xFEFD,	0x0001,	0xF3FF,	0xF7FF,	0x87FF,	0xDFFF,	0xFFFF,	0x7FFD
+};
+
+/* Flag bits for alt_jtag_io() function */
+#define TMS_HIGH   1
+#define TMS_LOW    0
+#define TDI_HIGH   1
+#define TDI_LOW    0
+#define READ_TDO   1
+#define IGNORE_TDO 0
+
+int altera_jinit(struct altera_state *astate)
+{
+	struct altera_jtag *js = &astate->js;
+
+	/* initial JTAG state is unknown */
+	js->jtag_state = ILLEGAL_JTAG_STATE;
+
+	/* initialize to default state */
+	js->drstop_state = IDLE;
+	js->irstop_state = IDLE;
+	js->dr_pre  = 0;
+	js->dr_post = 0;
+	js->ir_pre  = 0;
+	js->ir_post = 0;
+	js->dr_length    = 0;
+	js->ir_length    = 0;
+
+	js->dr_pre_data  = NULL;
+	js->dr_post_data = NULL;
+	js->ir_pre_data  = NULL;
+	js->ir_post_data = NULL;
+	js->dr_buffer	 = NULL;
+	js->ir_buffer	 = NULL;
+
+	return 0;
+}
+
+int altera_set_drstop(struct altera_jtag *js, enum altera_jtag_state state)
+{
+	js->drstop_state = state;
+
+	return 0;
+}
+
+int altera_set_irstop(struct altera_jtag *js, enum altera_jtag_state state)
+{
+	js->irstop_state = state;
+
+	return 0;
+}
+
+int altera_set_dr_pre(struct altera_jtag *js,
+				u32 count, u32 start_index,
+				u8 *preamble_data)
+{
+	int status = 0;
+	u32 i;
+	u32 j;
+
+	if (count > js->dr_pre) {
+		kfree(js->dr_pre_data);
+		js->dr_pre_data = (u8 *)alt_malloc((count + 7) >> 3);
+		if (js->dr_pre_data == NULL)
+			status = -ENOMEM;
+		else
+			js->dr_pre = count;
+	} else
+		js->dr_pre = count;
+
+	if (status == 0) {
+		for (i = 0; i < count; ++i) {
+			j = i + start_index;
+
+			if (preamble_data == NULL)
+				js->dr_pre_data[i >> 3] |= (1 << (i & 7));
+			else {
+				if (preamble_data[j >> 3] & (1 << (j & 7)))
+					js->dr_pre_data[i >> 3] |=
+							(1 << (i & 7));
+				else
+					js->dr_pre_data[i >> 3] &=
+							~(u32)(1 << (i & 7));
+
+			}
+		}
+	}
+
+	return status;
+}
+
+int altera_set_ir_pre(struct altera_jtag *js, u32 count, u32 start_index,
+							u8 *preamble_data)
+{
+	int status = 0;
+	u32 i;
+	u32 j;
+
+	if (count > js->ir_pre) {
+		kfree(js->ir_pre_data);
+		js->ir_pre_data = (u8 *)alt_malloc((count + 7) >> 3);
+		if (js->ir_pre_data == NULL)
+			status = -ENOMEM;
+		else
+			js->ir_pre = count;
+
+	} else
+		js->ir_pre = count;
+
+	if (status == 0) {
+		for (i = 0; i < count; ++i) {
+			j = i + start_index;
+			if (preamble_data == NULL)
+				js->ir_pre_data[i >> 3] |= (1 << (i & 7));
+			else {
+				if (preamble_data[j >> 3] & (1 << (j & 7)))
+					js->ir_pre_data[i >> 3] |=
+							(1 << (i & 7));
+				else
+					js->ir_pre_data[i >> 3] &=
+							~(u32)(1 << (i & 7));
+
+			}
+		}
+	}
+
+	return status;
+}
+
+int altera_set_dr_post(struct altera_jtag *js, u32 count, u32 start_index,
+						u8 *postamble_data)
+{
+	int status = 0;
+	u32 i;
+	u32 j;
+
+	if (count > js->dr_post) {
+		kfree(js->dr_post_data);
+		js->dr_post_data = (u8 *)alt_malloc((count + 7) >> 3);
+
+		if (js->dr_post_data == NULL)
+			status = -ENOMEM;
+		else
+			js->dr_post = count;
+
+	} else
+		js->dr_post = count;
+
+	if (status == 0) {
+		for (i = 0; i < count; ++i) {
+			j = i + start_index;
+
+			if (postamble_data == NULL)
+				js->dr_post_data[i >> 3] |= (1 << (i & 7));
+			else {
+				if (postamble_data[j >> 3] & (1 << (j & 7)))
+					js->dr_post_data[i >> 3] |=
+								(1 << (i & 7));
+				else
+					js->dr_post_data[i >> 3] &=
+					    ~(u32)(1 << (i & 7));
+
+			}
+		}
+	}
+
+	return status;
+}
+
+int altera_set_ir_post(struct altera_jtag *js, u32 count, u32 start_index,
+						u8 *postamble_data)
+{
+	int status = 0;
+	u32 i;
+	u32 j;
+
+	if (count > js->ir_post) {
+		kfree(js->ir_post_data);
+		js->ir_post_data = (u8 *)alt_malloc((count + 7) >> 3);
+		if (js->ir_post_data == NULL)
+			status = -ENOMEM;
+		else
+			js->ir_post = count;
+
+	} else
+		js->ir_post = count;
+
+	if (status != 0)
+		return status;
+
+	for (i = 0; i < count; ++i) {
+		j = i + start_index;
+
+		if (postamble_data == NULL)
+			js->ir_post_data[i >> 3] |= (1 << (i & 7));
+		else {
+			if (postamble_data[j >> 3] & (1 << (j & 7)))
+				js->ir_post_data[i >> 3] |= (1 << (i & 7));
+			else
+				js->ir_post_data[i >> 3] &=
+				    ~(u32)(1 << (i & 7));
+
+		}
+	}
+
+	return status;
+}
+
+static void altera_jreset_idle(struct altera_state *astate)
+{
+	struct altera_jtag *js = &astate->js;
+	int i;
+	/* Go to Test Logic Reset (no matter what the starting state may be) */
+	for (i = 0; i < 5; ++i)
+		alt_jtag_io(TMS_HIGH, TDI_LOW, IGNORE_TDO);
+
+	/* Now step to Run Test / Idle */
+	alt_jtag_io(TMS_LOW, TDI_LOW, IGNORE_TDO);
+	js->jtag_state = IDLE;
+}
+
+int altera_goto_jstate(struct altera_state *astate,
+					enum altera_jtag_state state)
+{
+	struct altera_jtag *js = &astate->js;
+	int tms;
+	int count = 0;
+	int status = 0;
+
+	if (js->jtag_state == ILLEGAL_JTAG_STATE)
+		/* initialize JTAG chain to known state */
+		altera_jreset_idle(astate);
+
+	if (js->jtag_state == state) {
+		/*
+		 * We are already in the desired state.
+		 * If it is a stable state, loop here.
+		 * Otherwise do nothing (no clock cycles).
+		 */
+		if ((state == IDLE) || (state == DRSHIFT) ||
+			(state == DRPAUSE) || (state == IRSHIFT) ||
+				(state == IRPAUSE)) {
+			alt_jtag_io(TMS_LOW, TDI_LOW, IGNORE_TDO);
+		} else if (state == RESET)
+			alt_jtag_io(TMS_HIGH, TDI_LOW, IGNORE_TDO);
+
+	} else {
+		while ((js->jtag_state != state) && (count < 9)) {
+			/* Get TMS value to take a step toward desired state */
+			tms = (altera_jtag_path_map[js->jtag_state] &
+							(1 << state))
+							? TMS_HIGH : TMS_LOW;
+
+			/* Take a step */
+			alt_jtag_io(tms, TDI_LOW, IGNORE_TDO);
+
+			if (tms)
+				js->jtag_state =
+					altera_transitions[js->jtag_state].tms_high;
+			else
+				js->jtag_state =
+					altera_transitions[js->jtag_state].tms_low;
+
+			++count;
+		}
+	}
+
+	if (js->jtag_state != state)
+		status = -EREMOTEIO;
+
+	return status;
+}
+
+int altera_wait_cycles(struct altera_state *astate,
+					s32 cycles,
+					enum altera_jtag_state wait_state)
+{
+	struct altera_jtag *js = &astate->js;
+	int tms;
+	s32 count;
+	int status = 0;
+
+	if (js->jtag_state != wait_state)
+		status = altera_goto_jstate(astate, wait_state);
+
+	if (status == 0) {
+		/*
+		 * Set TMS high to loop in RESET state
+		 * Set TMS low to loop in any other stable state
+		 */
+		tms = (wait_state == RESET) ? TMS_HIGH : TMS_LOW;
+
+		for (count = 0L; count < cycles; count++)
+			alt_jtag_io(tms, TDI_LOW, IGNORE_TDO);
+
+	}
+
+	return status;
+}
+
+int altera_wait_msecs(struct altera_state *astate,
+			s32 microseconds, enum altera_jtag_state wait_state)
+/*
+ * Causes JTAG hardware to sit in the specified stable
+ * state for the specified duration of real time.  If
+ * no JTAG operations have been performed yet, then only
+ * a delay is performed.  This permits the WAIT USECS
+ * statement to be used in VECTOR programs without causing
+ * any JTAG operations.
+ * Returns 0 for success, else appropriate error code.
+ */
+{
+	struct altera_jtag *js = &astate->js;
+	int status = 0;
+
+	if ((js->jtag_state != ILLEGAL_JTAG_STATE) &&
+	    (js->jtag_state != wait_state))
+		status = altera_goto_jstate(astate, wait_state);
+
+	if (status == 0)
+		/* Wait for specified time interval */
+		udelay(microseconds);
+
+	return status;
+}
+
+static void altera_concatenate_data(u8 *buffer,
+				u8 *preamble_data,
+				u32 preamble_count,
+				u8 *target_data,
+				u32 start_index,
+				u32 target_count,
+				u8 *postamble_data,
+				u32 postamble_count)
+/*
+ * Copies preamble data, target data, and postamble data
+ * into one buffer for IR or DR scans.
+ */
+{
+	u32 i, j, k;
+
+	for (i = 0L; i < preamble_count; ++i) {
+		if (preamble_data[i >> 3L] & (1L << (i & 7L)))
+			buffer[i >> 3L] |= (1L << (i & 7L));
+		else
+			buffer[i >> 3L] &= ~(u32)(1L << (i & 7L));
+
+	}
+
+	j = start_index;
+	k = preamble_count + target_count;
+	for (; i < k; ++i, ++j) {
+		if (target_data[j >> 3L] & (1L << (j & 7L)))
+			buffer[i >> 3L] |= (1L << (i & 7L));
+		else
+			buffer[i >> 3L] &= ~(u32)(1L << (i & 7L));
+
+	}
+
+	j = 0L;
+	k = preamble_count + target_count + postamble_count;
+	for (; i < k; ++i, ++j) {
+		if (postamble_data[j >> 3L] & (1L << (j & 7L)))
+			buffer[i >> 3L] |= (1L << (i & 7L));
+		else
+			buffer[i >> 3L] &= ~(u32)(1L << (i & 7L));
+
+	}
+}
+
+static int alt_jtag_drscan(struct altera_state *astate,
+			int start_state,
+			int count,
+			u8 *tdi,
+			u8 *tdo)
+{
+	int i = 0;
+	int tdo_bit = 0;
+	int status = 1;
+
+	/* First go to DRSHIFT state */
+	switch (start_state) {
+	case 0:						/* IDLE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(0, 0, 0);	/* DRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* DRSHIFT */
+		break;
+
+	case 1:						/* DRPAUSE */
+		alt_jtag_io(1, 0, 0);	/* DREXIT2 */
+		alt_jtag_io(1, 0, 0);	/* DRUPDATE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(0, 0, 0);	/* DRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* DRSHIFT */
+		break;
+
+	case 2:						/* IRPAUSE */
+		alt_jtag_io(1, 0, 0);	/* IREXIT2 */
+		alt_jtag_io(1, 0, 0);	/* IRUPDATE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(0, 0, 0);	/* DRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* DRSHIFT */
+		break;
+
+	default:
+		status = 0;
+	}
+
+	if (status) {
+		/* loop in the SHIFT-DR state */
+		for (i = 0; i < count; i++) {
+			tdo_bit = alt_jtag_io(
+					(i == count - 1),
+					tdi[i >> 3] & (1 << (i & 7)),
+					(tdo != NULL));
+
+			if (tdo != NULL) {
+				if (tdo_bit)
+					tdo[i >> 3] |= (1 << (i & 7));
+				else
+					tdo[i >> 3] &= ~(u32)(1 << (i & 7));
+
+			}
+		}
+
+		alt_jtag_io(0, 0, 0);	/* DRPAUSE */
+	}
+
+	return status;
+}
+
+static int alt_jtag_irscan(struct altera_state *astate,
+		    int start_state,
+		    int count,
+		    u8 *tdi,
+		    u8 *tdo)
+{
+	int i = 0;
+	int tdo_bit = 0;
+	int status = 1;
+
+	/* First go to IRSHIFT state */
+	switch (start_state) {
+	case 0:						/* IDLE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(1, 0, 0);	/* IRSELECT */
+		alt_jtag_io(0, 0, 0);	/* IRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* IRSHIFT */
+		break;
+
+	case 1:						/* DRPAUSE */
+		alt_jtag_io(1, 0, 0);	/* DREXIT2 */
+		alt_jtag_io(1, 0, 0);	/* DRUPDATE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(1, 0, 0);	/* IRSELECT */
+		alt_jtag_io(0, 0, 0);	/* IRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* IRSHIFT */
+		break;
+
+	case 2:						/* IRPAUSE */
+		alt_jtag_io(1, 0, 0);	/* IREXIT2 */
+		alt_jtag_io(1, 0, 0);	/* IRUPDATE */
+		alt_jtag_io(1, 0, 0);	/* DRSELECT */
+		alt_jtag_io(1, 0, 0);	/* IRSELECT */
+		alt_jtag_io(0, 0, 0);	/* IRCAPTURE */
+		alt_jtag_io(0, 0, 0);	/* IRSHIFT */
+		break;
+
+	default:
+		status = 0;
+	}
+
+	if (status) {
+		/* loop in the SHIFT-IR state */
+		for (i = 0; i < count; i++) {
+			tdo_bit = alt_jtag_io(
+				      (i == count - 1),
+				      tdi[i >> 3] & (1 << (i & 7)),
+				      (tdo != NULL));
+			if (tdo != NULL) {
+				if (tdo_bit)
+					tdo[i >> 3] |= (1 << (i & 7));
+				else
+					tdo[i >> 3] &= ~(u32)(1 << (i & 7));
+
+			}
+		}
+
+		alt_jtag_io(0, 0, 0);	/* IRPAUSE */
+	}
+
+	return status;
+}
+
+static void altera_extract_target_data(u8 *buffer,
+				u8 *target_data,
+				u32 start_index,
+				u32 preamble_count,
+				u32 target_count)
+/*
+ * Copies target data from scan buffer, filtering out
+ * preamble and postamble data.
+ */
+{
+	u32 i;
+	u32 j;
+	u32 k;
+
+	j = preamble_count;
+	k = start_index + target_count;
+	for (i = start_index; i < k; ++i, ++j) {
+		if (buffer[j >> 3] & (1 << (j & 7)))
+			target_data[i >> 3] |= (1 << (i & 7));
+		else
+			target_data[i >> 3] &= ~(u32)(1 << (i & 7));
+
+	}
+}
+
+int altera_irscan(struct altera_state *astate,
+				u32 count,
+				u8 *tdi_data,
+				u32 start_index)
+/* Shifts data into instruction register */
+{
+	struct altera_jtag *js = &astate->js;
+	int start_code = 0;
+	u32 alloc_chars = 0;
+	u32 shift_count = js->ir_pre + count + js->ir_post;
+	int status = 0;
+	enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+	switch (js->jtag_state) {
+	case ILLEGAL_JTAG_STATE:
+	case RESET:
+	case IDLE:
+		start_code = 0;
+		start_state = IDLE;
+		break;
+
+	case DRSELECT:
+	case DRCAPTURE:
+	case DRSHIFT:
+	case DREXIT1:
+	case DRPAUSE:
+	case DREXIT2:
+	case DRUPDATE:
+		start_code = 1;
+		start_state = DRPAUSE;
+		break;
+
+	case IRSELECT:
+	case IRCAPTURE:
+	case IRSHIFT:
+	case IREXIT1:
+	case IRPAUSE:
+	case IREXIT2:
+	case IRUPDATE:
+		start_code = 2;
+		start_state = IRPAUSE;
+		break;
+
+	default:
+		status = -EREMOTEIO;
+		break;
+	}
+
+	if (status == 0)
+		if (js->jtag_state != start_state)
+			status = altera_goto_jstate(astate, start_state);
+
+	if (status == 0) {
+		if (shift_count > js->ir_length) {
+			alloc_chars = (shift_count + 7) >> 3;
+			kfree(js->ir_buffer);
+			js->ir_buffer = (u8 *)alt_malloc(alloc_chars);
+			if (js->ir_buffer == NULL)
+				status = -ENOMEM;
+			else
+				js->ir_length = alloc_chars * 8;
+
+		}
+	}
+
+	if (status == 0) {
+		/*
+		 * Copy preamble data, IR data,
+		 * and postamble data into a buffer
+		 */
+		altera_concatenate_data(js->ir_buffer,
+					js->ir_pre_data,
+					js->ir_pre,
+					tdi_data,
+					start_index,
+					count,
+					js->ir_post_data,
+					js->ir_post);
+		/* Do the IRSCAN */
+		alt_jtag_irscan(astate,
+				start_code,
+				shift_count,
+				js->ir_buffer,
+				NULL);
+
+		/* alt_jtag_irscan() always ends in IRPAUSE state */
+		js->jtag_state = IRPAUSE;
+	}
+
+	if (status == 0)
+		if (js->irstop_state != IRPAUSE)
+			status = altera_goto_jstate(astate, js->irstop_state);
+
+
+	return status;
+}
+
+int altera_swap_ir(struct altera_state *astate,
+			    u32 count,
+			    u8 *in_data,
+			    u32 in_index,
+			    u8 *out_data,
+			    u32 out_index)
+/* Shifts data into instruction register, capturing output data */
+{
+	struct altera_jtag *js = &astate->js;
+	int start_code = 0;
+	u32 alloc_chars = 0;
+	u32 shift_count = js->ir_pre + count + js->ir_post;
+	int status = 0;
+	enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+	switch (js->jtag_state) {
+	case ILLEGAL_JTAG_STATE:
+	case RESET:
+	case IDLE:
+		start_code = 0;
+		start_state = IDLE;
+		break;
+
+	case DRSELECT:
+	case DRCAPTURE:
+	case DRSHIFT:
+	case DREXIT1:
+	case DRPAUSE:
+	case DREXIT2:
+	case DRUPDATE:
+		start_code = 1;
+		start_state = DRPAUSE;
+		break;
+
+	case IRSELECT:
+	case IRCAPTURE:
+	case IRSHIFT:
+	case IREXIT1:
+	case IRPAUSE:
+	case IREXIT2:
+	case IRUPDATE:
+		start_code = 2;
+		start_state = IRPAUSE;
+		break;
+
+	default:
+		status = -EREMOTEIO;
+		break;
+	}
+
+	if (status == 0)
+		if (js->jtag_state != start_state)
+			status = altera_goto_jstate(astate, start_state);
+
+	if (status == 0) {
+		if (shift_count > js->ir_length) {
+			alloc_chars = (shift_count + 7) >> 3;
+			kfree(js->ir_buffer);
+			js->ir_buffer = (u8 *)alt_malloc(alloc_chars);
+			if (js->ir_buffer == NULL)
+				status = -ENOMEM;
+			else
+				js->ir_length = alloc_chars * 8;
+
+		}
+	}
+
+	if (status == 0) {
+		/*
+		 * Copy preamble data, IR data,
+		 * and postamble data into a buffer
+		 */
+		altera_concatenate_data(js->ir_buffer,
+					js->ir_pre_data,
+					js->ir_pre,
+					in_data,
+					in_index,
+					count,
+					js->ir_post_data,
+					js->ir_post);
+
+		/* Do the IRSCAN */
+		alt_jtag_irscan(astate,
+				start_code,
+				shift_count,
+				js->ir_buffer,
+				js->ir_buffer);
+
+		/* alt_jtag_irscan() always ends in IRPAUSE state */
+		js->jtag_state = IRPAUSE;
+	}
+
+	if (status == 0)
+		if (js->irstop_state != IRPAUSE)
+			status = altera_goto_jstate(astate, js->irstop_state);
+
+
+	if (status == 0)
+		/* Now extract the returned data from the buffer */
+		altera_extract_target_data(js->ir_buffer,
+					out_data, out_index,
+					js->ir_pre, count);
+
+	return status;
+}
+
+int altera_drscan(struct altera_state *astate,
+				u32 count,
+				u8 *tdi_data,
+				u32 start_index)
+/* Shifts data into data register (ignoring output data) */
+{
+	struct altera_jtag *js = &astate->js;
+	int start_code = 0;
+	u32 alloc_chars = 0;
+	u32 shift_count = js->dr_pre + count + js->dr_post;
+	int status = 0;
+	enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+	switch (js->jtag_state) {
+	case ILLEGAL_JTAG_STATE:
+	case RESET:
+	case IDLE:
+		start_code = 0;
+		start_state = IDLE;
+		break;
+
+	case DRSELECT:
+	case DRCAPTURE:
+	case DRSHIFT:
+	case DREXIT1:
+	case DRPAUSE:
+	case DREXIT2:
+	case DRUPDATE:
+		start_code = 1;
+		start_state = DRPAUSE;
+		break;
+
+	case IRSELECT:
+	case IRCAPTURE:
+	case IRSHIFT:
+	case IREXIT1:
+	case IRPAUSE:
+	case IREXIT2:
+	case IRUPDATE:
+		start_code = 2;
+		start_state = IRPAUSE;
+		break;
+
+	default:
+		status = -EREMOTEIO;
+		break;
+	}
+
+	if (status == 0)
+		if (js->jtag_state != start_state)
+			status = altera_goto_jstate(astate, start_state);
+
+	if (status == 0) {
+		if (shift_count > js->dr_length) {
+			alloc_chars = (shift_count + 7) >> 3;
+			kfree(js->dr_buffer);
+			js->dr_buffer = (u8 *)alt_malloc(alloc_chars);
+			if (js->dr_buffer == NULL)
+				status = -ENOMEM;
+			else
+				js->dr_length = alloc_chars * 8;
+
+		}
+	}
+
+	if (status == 0) {
+		/*
+		 * Copy preamble data, DR data,
+		 * and postamble data into a buffer
+		 */
+		altera_concatenate_data(js->dr_buffer,
+					js->dr_pre_data,
+					js->dr_pre,
+					tdi_data,
+					start_index,
+					count,
+					js->dr_post_data,
+					js->dr_post);
+		/* Do the DRSCAN */
+		alt_jtag_drscan(astate, start_code, shift_count,
+				js->dr_buffer, NULL);
+		/* alt_jtag_drscan() always ends in DRPAUSE state */
+		js->jtag_state = DRPAUSE;
+	}
+
+	if (status == 0)
+		if (js->drstop_state != DRPAUSE)
+			status = altera_goto_jstate(astate, js->drstop_state);
+
+	return status;
+}
+
+int altera_swap_dr(struct altera_state *astate, u32 count,
+				u8 *in_data, u32 in_index,
+				u8 *out_data, u32 out_index)
+/* Shifts data into data register, capturing output data */
+{
+	struct altera_jtag *js = &astate->js;
+	int start_code = 0;
+	u32 alloc_chars = 0;
+	u32 shift_count = js->dr_pre + count + js->dr_post;
+	int status = 0;
+	enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE;
+
+	switch (js->jtag_state) {
+	case ILLEGAL_JTAG_STATE:
+	case RESET:
+	case IDLE:
+		start_code = 0;
+		start_state = IDLE;
+		break;
+
+	case DRSELECT:
+	case DRCAPTURE:
+	case DRSHIFT:
+	case DREXIT1:
+	case DRPAUSE:
+	case DREXIT2:
+	case DRUPDATE:
+		start_code = 1;
+		start_state = DRPAUSE;
+		break;
+
+	case IRSELECT:
+	case IRCAPTURE:
+	case IRSHIFT:
+	case IREXIT1:
+	case IRPAUSE:
+	case IREXIT2:
+	case IRUPDATE:
+		start_code = 2;
+		start_state = IRPAUSE;
+		break;
+
+	default:
+		status = -EREMOTEIO;
+		break;
+	}
+
+	if (status == 0)
+		if (js->jtag_state != start_state)
+			status = altera_goto_jstate(astate, start_state);
+
+	if (status == 0) {
+		if (shift_count > js->dr_length) {
+			alloc_chars = (shift_count + 7) >> 3;
+			kfree(js->dr_buffer);
+			js->dr_buffer = (u8 *)alt_malloc(alloc_chars);
+
+			if (js->dr_buffer == NULL)
+				status = -ENOMEM;
+			else
+				js->dr_length = alloc_chars * 8;
+
+		}
+	}
+
+	if (status == 0) {
+		/*
+		 * Copy preamble data, DR data,
+		 * and postamble data into a buffer
+		 */
+		altera_concatenate_data(js->dr_buffer,
+				js->dr_pre_data,
+				js->dr_pre,
+				in_data,
+				in_index,
+				count,
+				js->dr_post_data,
+				js->dr_post);
+
+		/* Do the DRSCAN */
+		alt_jtag_drscan(astate,
+				start_code,
+				shift_count,
+				js->dr_buffer,
+				js->dr_buffer);
+
+		/* alt_jtag_drscan() always ends in DRPAUSE state */
+		js->jtag_state = DRPAUSE;
+	}
+
+	if (status == 0)
+		if (js->drstop_state != DRPAUSE)
+			status = altera_goto_jstate(astate, js->drstop_state);
+
+	if (status == 0)
+		/* Now extract the returned data from the buffer */
+		altera_extract_target_data(js->dr_buffer,
+					out_data,
+					out_index,
+					js->dr_pre,
+					count);
+
+	return status;
+}
+
+void altera_free_buffers(struct altera_state *astate)
+{
+	struct altera_jtag *js = &astate->js;
+	/* If the JTAG interface was used, reset it to TLR */
+	if (js->jtag_state != ILLEGAL_JTAG_STATE)
+		altera_jreset_idle(astate);
+
+	kfree(js->dr_pre_data);
+	js->dr_pre_data = NULL;
+
+	kfree(js->dr_post_data);
+	js->dr_post_data = NULL;
+
+	kfree(js->dr_buffer);
+	js->dr_buffer = NULL;
+
+	kfree(js->ir_pre_data);
+	js->ir_pre_data = NULL;
+
+	kfree(js->ir_post_data);
+	js->ir_post_data = NULL;
+
+	kfree(js->ir_buffer);
+	js->ir_buffer = NULL;
+}
diff --git a/drivers/misc/altera-stapl/altera-jtag.h b/drivers/misc/altera-stapl/altera-jtag.h
new file mode 100644
index 0000000000..90235b31e7
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-jtag.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * altera-jtag.h
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Igor M. Liplianin <liplianin@netup.ru>
+ */
+
+#ifndef ALTERA_JTAG_H
+#define ALTERA_JTAG_H
+
+/* Function Prototypes */
+enum altera_jtag_state {
+	ILLEGAL_JTAG_STATE = -1,
+	RESET = 0,
+	IDLE = 1,
+	DRSELECT = 2,
+	DRCAPTURE = 3,
+	DRSHIFT = 4,
+	DREXIT1 = 5,
+	DRPAUSE = 6,
+	DREXIT2 = 7,
+	DRUPDATE = 8,
+	IRSELECT = 9,
+	IRCAPTURE = 10,
+	IRSHIFT = 11,
+	IREXIT1 = 12,
+	IRPAUSE = 13,
+	IREXIT2 = 14,
+	IRUPDATE = 15
+
+};
+
+struct altera_jtag {
+	/* Global variable to store the current JTAG state */
+	enum altera_jtag_state jtag_state;
+
+	/* Store current stop-state for DR and IR scan commands */
+	enum altera_jtag_state drstop_state;
+	enum altera_jtag_state irstop_state;
+
+	/* Store current padding values */
+	u32 dr_pre;
+	u32 dr_post;
+	u32 ir_pre;
+	u32 ir_post;
+	u32 dr_length;
+	u32 ir_length;
+	u8 *dr_pre_data;
+	u8 *dr_post_data;
+	u8 *ir_pre_data;
+	u8 *ir_post_data;
+	u8 *dr_buffer;
+	u8 *ir_buffer;
+};
+
+#define ALTERA_STACK_SIZE 128
+#define ALTERA_MESSAGE_LENGTH 1024
+
+struct altera_state {
+	struct altera_config	*config;
+	struct altera_jtag	js;
+	char			msg_buff[ALTERA_MESSAGE_LENGTH + 1];
+	long			stack[ALTERA_STACK_SIZE];
+};
+
+int altera_jinit(struct altera_state *astate);
+int altera_set_drstop(struct altera_jtag *js, enum altera_jtag_state state);
+int altera_set_irstop(struct altera_jtag *js, enum altera_jtag_state state);
+int altera_set_dr_pre(struct altera_jtag *js, u32 count, u32 start_index,
+				u8 *preamble_data);
+int altera_set_ir_pre(struct altera_jtag *js, u32 count, u32 start_index,
+				u8 *preamble_data);
+int altera_set_dr_post(struct altera_jtag *js, u32 count, u32 start_index,
+				u8 *postamble_data);
+int altera_set_ir_post(struct altera_jtag *js, u32 count, u32 start_index,
+				u8 *postamble_data);
+int altera_goto_jstate(struct altera_state *astate,
+				enum altera_jtag_state state);
+int altera_wait_cycles(struct altera_state *astate, s32 cycles,
+				enum altera_jtag_state wait_state);
+int altera_wait_msecs(struct altera_state *astate, s32 microseconds,
+				enum altera_jtag_state wait_state);
+int altera_irscan(struct altera_state *astate, u32 count,
+				u8 *tdi_data, u32 start_index);
+int altera_swap_ir(struct altera_state *astate,
+				u32 count, u8 *in_data,
+				u32 in_index, u8 *out_data,
+				u32 out_index);
+int altera_drscan(struct altera_state *astate, u32 count,
+				u8 *tdi_data, u32 start_index);
+int altera_swap_dr(struct altera_state *astate, u32 count,
+				u8 *in_data, u32 in_index,
+				u8 *out_data, u32 out_index);
+void altera_free_buffers(struct altera_state *astate);
+#endif /* ALTERA_JTAG_H */
diff --git a/drivers/misc/altera-stapl/altera-lpt.c b/drivers/misc/altera-stapl/altera-lpt.c
new file mode 100644
index 0000000000..2b7d9cf415
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera-lpt.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * altera-lpt.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010 NetUP Inc.
+ * Copyright (C) 2010 Abylay Ospan <aospan@netup.ru>
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include "altera-exprt.h"
+
+static int lpt_hardware_initialized;
+
+static void byteblaster_write(int port, int data)
+{
+	outb((u8)data, (u16)(port + 0x378));
+};
+
+static int byteblaster_read(int port)
+{
+	int data = 0;
+	data = inb((u16)(port + 0x378));
+	return data & 0xff;
+};
+
+int netup_jtag_io_lpt(void *device, int tms, int tdi, int read_tdo)
+{
+	int data = 0;
+	int tdo = 0;
+	int initial_lpt_ctrl = 0;
+
+	if (!lpt_hardware_initialized) {
+		initial_lpt_ctrl = byteblaster_read(2);
+		byteblaster_write(2, (initial_lpt_ctrl | 0x02) & 0xdf);
+		lpt_hardware_initialized = 1;
+	}
+
+	data = ((tdi ? 0x40 : 0) | (tms ? 0x02 : 0));
+
+	byteblaster_write(0, data);
+
+	if (read_tdo) {
+		tdo = byteblaster_read(1);
+		tdo = ((tdo & 0x80) ? 0 : 1);
+	}
+
+	byteblaster_write(0, data | 0x01);
+
+	byteblaster_write(0, data);
+
+	return tdo;
+}
diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c
new file mode 100644
index 0000000000..587427b739
--- /dev/null
+++ b/drivers/misc/altera-stapl/altera.c
@@ -0,0 +1,2497 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * altera.c
+ *
+ * altera FPGA driver
+ *
+ * Copyright (C) Altera Corporation 1998-2001
+ * Copyright (C) 2010,2011 NetUP Inc.
+ * Copyright (C) 2010,2011 Igor M. Liplianin <liplianin@netup.ru>
+ */
+
+#include <asm/unaligned.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <misc/altera.h>
+#include "altera-exprt.h"
+#include "altera-jtag.h"
+
+static int debug = 1;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "enable debugging information");
+
+MODULE_DESCRIPTION("altera FPGA kernel module");
+MODULE_AUTHOR("Igor M. Liplianin  <liplianin@netup.ru>");
+MODULE_LICENSE("GPL");
+
+#define dprintk(args...) \
+	if (debug) { \
+		printk(KERN_DEBUG args); \
+	}
+
+enum altera_fpga_opcode {
+	OP_NOP = 0,
+	OP_DUP,
+	OP_SWP,
+	OP_ADD,
+	OP_SUB,
+	OP_MULT,
+	OP_DIV,
+	OP_MOD,
+	OP_SHL,
+	OP_SHR,
+	OP_NOT,
+	OP_AND,
+	OP_OR,
+	OP_XOR,
+	OP_INV,
+	OP_GT,
+	OP_LT,
+	OP_RET,
+	OP_CMPS,
+	OP_PINT,
+	OP_PRNT,
+	OP_DSS,
+	OP_DSSC,
+	OP_ISS,
+	OP_ISSC,
+	OP_DPR = 0x1c,
+	OP_DPRL,
+	OP_DPO,
+	OP_DPOL,
+	OP_IPR,
+	OP_IPRL,
+	OP_IPO,
+	OP_IPOL,
+	OP_PCHR,
+	OP_EXIT,
+	OP_EQU,
+	OP_POPT,
+	OP_ABS = 0x2c,
+	OP_BCH0,
+	OP_PSH0 = 0x2f,
+	OP_PSHL = 0x40,
+	OP_PSHV,
+	OP_JMP,
+	OP_CALL,
+	OP_NEXT,
+	OP_PSTR,
+	OP_SINT = 0x47,
+	OP_ST,
+	OP_ISTP,
+	OP_DSTP,
+	OP_SWPN,
+	OP_DUPN,
+	OP_POPV,
+	OP_POPE,
+	OP_POPA,
+	OP_JMPZ,
+	OP_DS,
+	OP_IS,
+	OP_DPRA,
+	OP_DPOA,
+	OP_IPRA,
+	OP_IPOA,
+	OP_EXPT,
+	OP_PSHE,
+	OP_PSHA,
+	OP_DYNA,
+	OP_EXPV = 0x5c,
+	OP_COPY = 0x80,
+	OP_REVA,
+	OP_DSC,
+	OP_ISC,
+	OP_WAIT,
+	OP_VS,
+	OP_CMPA = 0xc0,
+	OP_VSC,
+};
+
+struct altera_procinfo {
+	char			*name;
+	u8			attrs;
+	struct altera_procinfo	*next;
+};
+
+/* This function checks if enough parameters are available on the stack. */
+static int altera_check_stack(int stack_ptr, int count, int *status)
+{
+	if (stack_ptr < count) {
+		*status = -EOVERFLOW;
+		return 0;
+	}
+
+	return 1;
+}
+
+static void altera_export_int(char *key, s32 value)
+{
+	dprintk("Export: key = \"%s\", value = %d\n", key, value);
+}
+
+#define HEX_LINE_CHARS 72
+#define HEX_LINE_BITS (HEX_LINE_CHARS * 4)
+
+static void altera_export_bool_array(char *key, u8 *data, s32 count)
+{
+	char string[HEX_LINE_CHARS + 1];
+	s32 i, offset;
+	u32 size, line, lines, linebits, value, j, k;
+
+	if (count > HEX_LINE_BITS) {
+		dprintk("Export: key = \"%s\", %d bits, value = HEX\n",
+							key, count);
+		lines = (count + (HEX_LINE_BITS - 1)) / HEX_LINE_BITS;
+
+		for (line = 0; line < lines; ++line) {
+			if (line < (lines - 1)) {
+				linebits = HEX_LINE_BITS;
+				size = HEX_LINE_CHARS;
+				offset = count - ((line + 1) * HEX_LINE_BITS);
+			} else {
+				linebits =
+					count - ((lines - 1) * HEX_LINE_BITS);
+				size = (linebits + 3) / 4;
+				offset = 0L;
+			}
+
+			string[size] = '\0';
+			j = size - 1;
+			value = 0;
+
+			for (k = 0; k < linebits; ++k) {
+				i = k + offset;
+				if (data[i >> 3] & (1 << (i & 7)))
+					value |= (1 << (i & 3));
+				if ((i & 3) == 3) {
+					sprintf(&string[j], "%1x", value);
+					value = 0;
+					--j;
+				}
+			}
+			if ((k & 3) > 0)
+				sprintf(&string[j], "%1x", value);
+
+			dprintk("%s\n", string);
+		}
+
+	} else {
+		size = (count + 3) / 4;
+		string[size] = '\0';
+		j = size - 1;
+		value = 0;
+
+		for (i = 0; i < count; ++i) {
+			if (data[i >> 3] & (1 << (i & 7)))
+				value |= (1 << (i & 3));
+			if ((i & 3) == 3) {
+				sprintf(&string[j], "%1x", value);
+				value = 0;
+				--j;
+			}
+		}
+		if ((i & 3) > 0)
+			sprintf(&string[j], "%1x", value);
+
+		dprintk("Export: key = \"%s\", %d bits, value = HEX %s\n",
+			key, count, string);
+	}
+}
+
+static int altera_execute(struct altera_state *astate,
+				u8 *p,
+				s32 program_size,
+				s32 *error_address,
+				int *exit_code,
+				int *format_version)
+{
+	struct altera_config *aconf = astate->config;
+	char *msg_buff = astate->msg_buff;
+	long *stack = astate->stack;
+	int status = 0;
+	u32 first_word = 0L;
+	u32 action_table = 0L;
+	u32 proc_table = 0L;
+	u32 str_table = 0L;
+	u32 sym_table = 0L;
+	u32 data_sect = 0L;
+	u32 code_sect = 0L;
+	u32 debug_sect = 0L;
+	u32 action_count = 0L;
+	u32 proc_count = 0L;
+	u32 sym_count = 0L;
+	long *vars = NULL;
+	s32 *var_size = NULL;
+	char *attrs = NULL;
+	u8 *proc_attributes = NULL;
+	u32 pc;
+	u32 opcode_address;
+	u32 args[3];
+	u32 opcode;
+	u32 name_id;
+	u8 charbuf[4];
+	long long_tmp;
+	u32 variable_id;
+	u8 *charptr_tmp;
+	u8 *charptr_tmp2;
+	long *longptr_tmp;
+	int version = 0;
+	int delta = 0;
+	int stack_ptr = 0;
+	u32 arg_count;
+	int done = 0;
+	int bad_opcode = 0;
+	u32 count;
+	u32 index;
+	u32 index2;
+	s32 long_count;
+	s32 long_idx;
+	s32 long_idx2;
+	u32 i;
+	u32 j;
+	u32 uncomp_size;
+	u32 offset;
+	u32 value;
+	int current_proc = 0;
+	int reverse;
+
+	char *name;
+
+	dprintk("%s\n", __func__);
+
+	/* Read header information */
+	if (program_size > 52L) {
+		first_word    = get_unaligned_be32(&p[0]);
+		version = (first_word & 1L);
+		*format_version = version + 1;
+		delta = version * 8;
+
+		action_table  = get_unaligned_be32(&p[4]);
+		proc_table    = get_unaligned_be32(&p[8]);
+		str_table  = get_unaligned_be32(&p[4 + delta]);
+		sym_table  = get_unaligned_be32(&p[16 + delta]);
+		data_sect  = get_unaligned_be32(&p[20 + delta]);
+		code_sect  = get_unaligned_be32(&p[24 + delta]);
+		debug_sect = get_unaligned_be32(&p[28 + delta]);
+		action_count  = get_unaligned_be32(&p[40 + delta]);
+		proc_count    = get_unaligned_be32(&p[44 + delta]);
+		sym_count  = get_unaligned_be32(&p[48 + (2 * delta)]);
+	}
+
+	if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L)) {
+		done = 1;
+		status = -EIO;
+		goto exit_done;
+	}
+
+	if (sym_count <= 0)
+		goto exit_done;
+
+	vars = kcalloc(sym_count, sizeof(long), GFP_KERNEL);
+
+	if (vars == NULL)
+		status = -ENOMEM;
+
+	if (status == 0) {
+		var_size = kcalloc(sym_count, sizeof(s32), GFP_KERNEL);
+
+		if (var_size == NULL)
+			status = -ENOMEM;
+	}
+
+	if (status == 0) {
+		attrs = kzalloc(sym_count, GFP_KERNEL);
+
+		if (attrs == NULL)
+			status = -ENOMEM;
+	}
+
+	if ((status == 0) && (version > 0)) {
+		proc_attributes = kzalloc(proc_count, GFP_KERNEL);
+
+		if (proc_attributes == NULL)
+			status = -ENOMEM;
+	}
+
+	if (status != 0)
+		goto exit_done;
+
+	delta = version * 2;
+
+	for (i = 0; i < sym_count; ++i) {
+		offset = (sym_table + ((11 + delta) * i));
+
+		value = get_unaligned_be32(&p[offset + 3 + delta]);
+
+		attrs[i] = p[offset];
+
+		/*
+		 * use bit 7 of attribute byte to indicate that
+		 * this buffer was dynamically allocated
+		 * and should be freed later
+		 */
+		attrs[i] &= 0x7f;
+
+		var_size[i] = get_unaligned_be32(&p[offset + 7 + delta]);
+
+		/*
+		 * Attribute bits:
+		 * bit 0: 0 = read-only, 1 = read-write
+		 * bit 1: 0 = not compressed, 1 = compressed
+		 * bit 2: 0 = not initialized, 1 = initialized
+		 * bit 3: 0 = scalar, 1 = array
+		 * bit 4: 0 = Boolean, 1 = integer
+		 * bit 5: 0 = declared variable,
+		 *	1 = compiler created temporary variable
+		 */
+
+		if ((attrs[i] & 0x0c) == 0x04)
+			/* initialized scalar variable */
+			vars[i] = value;
+		else if ((attrs[i] & 0x1e) == 0x0e) {
+			/* initialized compressed Boolean array */
+			uncomp_size = get_unaligned_le32(&p[data_sect + value]);
+
+			/* allocate a buffer for the uncompressed data */
+			vars[i] = (long)kzalloc(uncomp_size, GFP_KERNEL);
+			if (vars[i] == 0L)
+				status = -ENOMEM;
+			else {
+				/* set flag so buffer will be freed later */
+				attrs[i] |= 0x80;
+
+				/* uncompress the data */
+				if (altera_shrink(&p[data_sect + value],
+						var_size[i],
+						(u8 *)vars[i],
+						uncomp_size,
+						version) != uncomp_size)
+					/* decompression failed */
+					status = -EIO;
+				else
+					var_size[i] = uncomp_size * 8L;
+
+			}
+		} else if ((attrs[i] & 0x1e) == 0x0c) {
+			/* initialized Boolean array */
+			vars[i] = value + data_sect + (long)p;
+		} else if ((attrs[i] & 0x1c) == 0x1c) {
+			/* initialized integer array */
+			vars[i] = value + data_sect;
+		} else if ((attrs[i] & 0x0c) == 0x08) {
+			/* uninitialized array */
+
+			/* flag attrs so that memory is freed */
+			attrs[i] |= 0x80;
+
+			if (var_size[i] > 0) {
+				u32 size;
+
+				if (attrs[i] & 0x10)
+					/* integer array */
+					size = (var_size[i] * sizeof(s32));
+				else
+					/* Boolean array */
+					size = ((var_size[i] + 7L) / 8L);
+
+				vars[i] = (long)kzalloc(size, GFP_KERNEL);
+
+				if (vars[i] == 0) {
+					status = -ENOMEM;
+				} else {
+					/* zero out memory */
+					for (j = 0; j < size; ++j)
+						((u8 *)(vars[i]))[j] = 0;
+
+				}
+			} else
+				vars[i] = 0;
+
+		} else
+			vars[i] = 0;
+
+	}
+
+exit_done:
+	if (status != 0)
+		done = 1;
+
+	altera_jinit(astate);
+
+	pc = code_sect;
+	msg_buff[0] = '\0';
+
+	/*
+	 * For JBC version 2, we will execute the procedures corresponding to
+	 * the selected ACTION
+	 */
+	if (version > 0) {
+		if (aconf->action == NULL) {
+			status = -EINVAL;
+			done = 1;
+		} else {
+			int action_found = 0;
+			for (i = 0; (i < action_count) && !action_found; ++i) {
+				name_id = get_unaligned_be32(&p[action_table +
+								(12 * i)]);
+
+				name = &p[str_table + name_id];
+
+				if (strncasecmp(aconf->action, name, strlen(name)) == 0) {
+					action_found = 1;
+					current_proc =
+						get_unaligned_be32(&p[action_table +
+								(12 * i) + 8]);
+				}
+			}
+
+			if (!action_found) {
+				status = -EINVAL;
+				done = 1;
+			}
+		}
+
+		if (status == 0) {
+			int first_time = 1;
+			i = current_proc;
+			while ((i != 0) || first_time) {
+				first_time = 0;
+				/* check procedure attribute byte */
+				proc_attributes[i] =
+						(p[proc_table +
+								(13 * i) + 8] &
+									0x03);
+
+				/*
+				 * BIT0 - OPTIONAL
+				 * BIT1 - RECOMMENDED
+				 * BIT6 - FORCED OFF
+				 * BIT7 - FORCED ON
+				 */
+
+				i = get_unaligned_be32(&p[proc_table +
+							(13 * i) + 4]);
+			}
+
+			/*
+			 * Set current_proc to the first procedure
+			 * to be executed
+			 */
+			i = current_proc;
+			while ((i != 0) &&
+				((proc_attributes[i] == 1) ||
+				((proc_attributes[i] & 0xc0) == 0x40))) {
+				i = get_unaligned_be32(&p[proc_table +
+							(13 * i) + 4]);
+			}
+
+			if ((i != 0) || ((i == 0) && (current_proc == 0) &&
+				((proc_attributes[0] != 1) &&
+				((proc_attributes[0] & 0xc0) != 0x40)))) {
+				current_proc = i;
+				pc = code_sect +
+					get_unaligned_be32(&p[proc_table +
+								(13 * i) + 9]);
+				if ((pc < code_sect) || (pc >= debug_sect))
+					status = -ERANGE;
+			} else
+				/* there are no procedures to execute! */
+				done = 1;
+
+		}
+	}
+
+	msg_buff[0] = '\0';
+
+	while (!done) {
+		opcode = (p[pc] & 0xff);
+		opcode_address = pc;
+		++pc;
+
+		if (debug > 1)
+			printk("opcode: %02x\n", opcode);
+
+		arg_count = (opcode >> 6) & 3;
+		for (i = 0; i < arg_count; ++i) {
+			args[i] = get_unaligned_be32(&p[pc]);
+			pc += 4;
+		}
+
+		switch (opcode) {
+		case OP_NOP:
+			break;
+		case OP_DUP:
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				stack[stack_ptr] = stack[stack_ptr - 1];
+				++stack_ptr;
+			}
+			break;
+		case OP_SWP:
+			if (altera_check_stack(stack_ptr, 2, &status))
+				swap(stack[stack_ptr - 2], stack[stack_ptr - 1]);
+			break;
+		case OP_ADD:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] += stack[stack_ptr];
+			}
+			break;
+		case OP_SUB:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] -= stack[stack_ptr];
+			}
+			break;
+		case OP_MULT:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] *= stack[stack_ptr];
+			}
+			break;
+		case OP_DIV:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] /= stack[stack_ptr];
+			}
+			break;
+		case OP_MOD:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] %= stack[stack_ptr];
+			}
+			break;
+		case OP_SHL:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] <<= stack[stack_ptr];
+			}
+			break;
+		case OP_SHR:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] >>= stack[stack_ptr];
+			}
+			break;
+		case OP_NOT:
+			if (altera_check_stack(stack_ptr, 1, &status))
+				stack[stack_ptr - 1] ^= (-1L);
+
+			break;
+		case OP_AND:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] &= stack[stack_ptr];
+			}
+			break;
+		case OP_OR:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] |= stack[stack_ptr];
+			}
+			break;
+		case OP_XOR:
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				--stack_ptr;
+				stack[stack_ptr - 1] ^= stack[stack_ptr];
+			}
+			break;
+		case OP_INV:
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			stack[stack_ptr - 1] = stack[stack_ptr - 1] ? 0L : 1L;
+			break;
+		case OP_GT:
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			--stack_ptr;
+			stack[stack_ptr - 1] =
+				(stack[stack_ptr - 1] > stack[stack_ptr]) ?
+									1L : 0L;
+
+			break;
+		case OP_LT:
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			--stack_ptr;
+			stack[stack_ptr - 1] =
+				(stack[stack_ptr - 1] < stack[stack_ptr]) ?
+									1L : 0L;
+
+			break;
+		case OP_RET:
+			if ((version > 0) && (stack_ptr == 0)) {
+				/*
+				 * We completed one of the main procedures
+				 * of an ACTION.
+				 * Find the next procedure
+				 * to be executed and jump to it.
+				 * If there are no more procedures, then EXIT.
+				 */
+				i = get_unaligned_be32(&p[proc_table +
+						(13 * current_proc) + 4]);
+				while ((i != 0) &&
+					((proc_attributes[i] == 1) ||
+					((proc_attributes[i] & 0xc0) == 0x40)))
+					i = get_unaligned_be32(&p[proc_table +
+								(13 * i) + 4]);
+
+				if (i == 0) {
+					/* no procedures to execute! */
+					done = 1;
+					*exit_code = 0;	/* success */
+				} else {
+					current_proc = i;
+					pc = code_sect + get_unaligned_be32(
+								&p[proc_table +
+								(13 * i) + 9]);
+					if ((pc < code_sect) ||
+					    (pc >= debug_sect))
+						status = -ERANGE;
+				}
+
+			} else
+				if (altera_check_stack(stack_ptr, 1, &status)) {
+					pc = stack[--stack_ptr] + code_sect;
+					if ((pc <= code_sect) ||
+					    (pc >= debug_sect))
+						status = -ERANGE;
+
+				}
+
+			break;
+		case OP_CMPS:
+			/*
+			 * Array short compare
+			 * ...stack 0 is source 1 value
+			 * ...stack 1 is source 2 value
+			 * ...stack 2 is mask value
+			 * ...stack 3 is count
+			 */
+			if (altera_check_stack(stack_ptr, 4, &status)) {
+				s32 a = stack[--stack_ptr];
+				s32 b = stack[--stack_ptr];
+				long_tmp = stack[--stack_ptr];
+				count = stack[stack_ptr - 1];
+
+				if ((count < 1) || (count > 32))
+					status = -ERANGE;
+				else {
+					long_tmp &= ((-1L) >> (32 - count));
+
+					stack[stack_ptr - 1] =
+					((a & long_tmp) == (b & long_tmp))
+								? 1L : 0L;
+				}
+			}
+			break;
+		case OP_PINT:
+			/*
+			 * PRINT add integer
+			 * ...stack 0 is integer value
+			 */
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			sprintf(&msg_buff[strlen(msg_buff)],
+					"%ld", stack[--stack_ptr]);
+			break;
+		case OP_PRNT:
+			/* PRINT finish */
+			if (debug)
+				printk(msg_buff, "\n");
+
+			msg_buff[0] = '\0';
+			break;
+		case OP_DSS:
+			/*
+			 * DRSCAN short
+			 * ...stack 0 is scan data
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_tmp = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_drscan(astate, count, charbuf, 0);
+			break;
+		case OP_DSSC:
+			/*
+			 * DRSCAN short with capture
+			 * ...stack 0 is scan data
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_tmp = stack[--stack_ptr];
+			count = stack[stack_ptr - 1];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_swap_dr(astate, count, charbuf,
+							0, charbuf, 0);
+			stack[stack_ptr - 1] = get_unaligned_le32(&charbuf[0]);
+			break;
+		case OP_ISS:
+			/*
+			 * IRSCAN short
+			 * ...stack 0 is scan data
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_tmp = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_irscan(astate, count, charbuf, 0);
+			break;
+		case OP_ISSC:
+			/*
+			 * IRSCAN short with capture
+			 * ...stack 0 is scan data
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_tmp = stack[--stack_ptr];
+			count = stack[stack_ptr - 1];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_swap_ir(astate, count, charbuf,
+							0, charbuf, 0);
+			stack[stack_ptr - 1] = get_unaligned_le32(&charbuf[0]);
+			break;
+		case OP_DPR:
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			count = stack[--stack_ptr];
+			status = altera_set_dr_pre(&astate->js, count, 0, NULL);
+			break;
+		case OP_DPRL:
+			/*
+			 * DRPRE with literal data
+			 * ...stack 0 is count
+			 * ...stack 1 is literal data
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			count = stack[--stack_ptr];
+			long_tmp = stack[--stack_ptr];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_set_dr_pre(&astate->js, count, 0,
+						charbuf);
+			break;
+		case OP_DPO:
+			/*
+			 * DRPOST
+			 * ...stack 0 is count
+			 */
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				count = stack[--stack_ptr];
+				status = altera_set_dr_post(&astate->js, count,
+								0, NULL);
+			}
+			break;
+		case OP_DPOL:
+			/*
+			 * DRPOST with literal data
+			 * ...stack 0 is count
+			 * ...stack 1 is literal data
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			count = stack[--stack_ptr];
+			long_tmp = stack[--stack_ptr];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_set_dr_post(&astate->js, count, 0,
+							charbuf);
+			break;
+		case OP_IPR:
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				count = stack[--stack_ptr];
+				status = altera_set_ir_pre(&astate->js, count,
+								0, NULL);
+			}
+			break;
+		case OP_IPRL:
+			/*
+			 * IRPRE with literal data
+			 * ...stack 0 is count
+			 * ...stack 1 is literal data
+			 */
+			if (altera_check_stack(stack_ptr, 2, &status)) {
+				count = stack[--stack_ptr];
+				long_tmp = stack[--stack_ptr];
+				put_unaligned_le32(long_tmp, &charbuf[0]);
+				status = altera_set_ir_pre(&astate->js, count,
+							0, charbuf);
+			}
+			break;
+		case OP_IPO:
+			/*
+			 * IRPOST
+			 * ...stack 0 is count
+			 */
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				count = stack[--stack_ptr];
+				status = altera_set_ir_post(&astate->js, count,
+							0, NULL);
+			}
+			break;
+		case OP_IPOL:
+			/*
+			 * IRPOST with literal data
+			 * ...stack 0 is count
+			 * ...stack 1 is literal data
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			count = stack[--stack_ptr];
+			long_tmp = stack[--stack_ptr];
+			put_unaligned_le32(long_tmp, &charbuf[0]);
+			status = altera_set_ir_post(&astate->js, count, 0,
+							charbuf);
+			break;
+		case OP_PCHR:
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				u8 ch;
+				count = strlen(msg_buff);
+				ch = (char) stack[--stack_ptr];
+				if ((ch < 1) || (ch > 127)) {
+					/*
+					 * character code out of range
+					 * instead of flagging an error,
+					 * force the value to 127
+					 */
+					ch = 127;
+				}
+				msg_buff[count] = ch;
+				msg_buff[count + 1] = '\0';
+			}
+			break;
+		case OP_EXIT:
+			if (altera_check_stack(stack_ptr, 1, &status))
+				*exit_code = stack[--stack_ptr];
+
+			done = 1;
+			break;
+		case OP_EQU:
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			--stack_ptr;
+			stack[stack_ptr - 1] =
+				(stack[stack_ptr - 1] == stack[stack_ptr]) ?
+									1L : 0L;
+			break;
+		case OP_POPT:
+			if (altera_check_stack(stack_ptr, 1, &status))
+				--stack_ptr;
+
+			break;
+		case OP_ABS:
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			if (stack[stack_ptr - 1] < 0)
+				stack[stack_ptr - 1] = 0 - stack[stack_ptr - 1];
+
+			break;
+		case OP_BCH0:
+			/*
+			 * Batch operation 0
+			 * SWP
+			 * SWPN 7
+			 * SWP
+			 * SWPN 6
+			 * DUPN 8
+			 * SWPN 2
+			 * SWP
+			 * DUPN 6
+			 * DUPN 6
+			 */
+
+			/* SWP  */
+			if (altera_check_stack(stack_ptr, 2, &status))
+				swap(stack[stack_ptr - 2], stack[stack_ptr - 1]);
+
+			/* SWPN 7 */
+			index = 7 + 1;
+			if (altera_check_stack(stack_ptr, index, &status))
+				swap(stack[stack_ptr - index], stack[stack_ptr - 1]);
+
+			/* SWP  */
+			if (altera_check_stack(stack_ptr, 2, &status))
+				swap(stack[stack_ptr - 2], stack[stack_ptr - 1]);
+
+			/* SWPN 6 */
+			index = 6 + 1;
+			if (altera_check_stack(stack_ptr, index, &status))
+				swap(stack[stack_ptr - index], stack[stack_ptr - 1]);
+
+			/* DUPN 8 */
+			index = 8 + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				stack[stack_ptr] = stack[stack_ptr - index];
+				++stack_ptr;
+			}
+
+			/* SWPN 2 */
+			index = 2 + 1;
+			if (altera_check_stack(stack_ptr, index, &status))
+				swap(stack[stack_ptr - index], stack[stack_ptr - 1]);
+
+			/* SWP  */
+			if (altera_check_stack(stack_ptr, 2, &status))
+				swap(stack[stack_ptr - 2], stack[stack_ptr - 1]);
+
+			/* DUPN 6 */
+			index = 6 + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				stack[stack_ptr] = stack[stack_ptr - index];
+				++stack_ptr;
+			}
+
+			/* DUPN 6 */
+			index = 6 + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				stack[stack_ptr] = stack[stack_ptr - index];
+				++stack_ptr;
+			}
+			break;
+		case OP_PSH0:
+			stack[stack_ptr++] = 0;
+			break;
+		case OP_PSHL:
+			stack[stack_ptr++] = (s32) args[0];
+			break;
+		case OP_PSHV:
+			stack[stack_ptr++] = vars[args[0]];
+			break;
+		case OP_JMP:
+			pc = args[0] + code_sect;
+			if ((pc < code_sect) || (pc >= debug_sect))
+				status = -ERANGE;
+			break;
+		case OP_CALL:
+			stack[stack_ptr++] = pc;
+			pc = args[0] + code_sect;
+			if ((pc < code_sect) || (pc >= debug_sect))
+				status = -ERANGE;
+			break;
+		case OP_NEXT:
+			/*
+			 * Process FOR / NEXT loop
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is step value
+			 * ...stack 1 is end value
+			 * ...stack 2 is top address
+			 */
+			if (altera_check_stack(stack_ptr, 3, &status)) {
+				s32 step = stack[stack_ptr - 1];
+				s32 end = stack[stack_ptr - 2];
+				s32 top = stack[stack_ptr - 3];
+				s32 iterator = vars[args[0]];
+				int break_out = 0;
+
+				if (step < 0) {
+					if (iterator <= end)
+						break_out = 1;
+				} else if (iterator >= end)
+					break_out = 1;
+
+				if (break_out) {
+					stack_ptr -= 3;
+				} else {
+					vars[args[0]] = iterator + step;
+					pc = top + code_sect;
+					if ((pc < code_sect) ||
+					    (pc >= debug_sect))
+						status = -ERANGE;
+				}
+			}
+			break;
+		case OP_PSTR:
+			/*
+			 * PRINT add string
+			 * ...argument 0 is string ID
+			 */
+			count = strlen(msg_buff);
+			strscpy(&msg_buff[count],
+				&p[str_table + args[0]],
+				ALTERA_MESSAGE_LENGTH - count);
+			break;
+		case OP_SINT:
+			/*
+			 * STATE intermediate state
+			 * ...argument 0 is state code
+			 */
+			status = altera_goto_jstate(astate, args[0]);
+			break;
+		case OP_ST:
+			/*
+			 * STATE final state
+			 * ...argument 0 is state code
+			 */
+			status = altera_goto_jstate(astate, args[0]);
+			break;
+		case OP_ISTP:
+			/*
+			 * IRSTOP state
+			 * ...argument 0 is state code
+			 */
+			status = altera_set_irstop(&astate->js, args[0]);
+			break;
+		case OP_DSTP:
+			/*
+			 * DRSTOP state
+			 * ...argument 0 is state code
+			 */
+			status = altera_set_drstop(&astate->js, args[0]);
+			break;
+
+		case OP_SWPN:
+			/*
+			 * Exchange top with Nth stack value
+			 * ...argument 0 is 0-based stack entry
+			 * to swap with top element
+			 */
+			index = (args[0]) + 1;
+			if (altera_check_stack(stack_ptr, index, &status))
+				swap(stack[stack_ptr - index], stack[stack_ptr - 1]);
+			break;
+		case OP_DUPN:
+			/*
+			 * Duplicate Nth stack value
+			 * ...argument 0 is 0-based stack entry to duplicate
+			 */
+			index = (args[0]) + 1;
+			if (altera_check_stack(stack_ptr, index, &status)) {
+				stack[stack_ptr] = stack[stack_ptr - index];
+				++stack_ptr;
+			}
+			break;
+		case OP_POPV:
+			/*
+			 * Pop stack into scalar variable
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is value
+			 */
+			if (altera_check_stack(stack_ptr, 1, &status))
+				vars[args[0]] = stack[--stack_ptr];
+
+			break;
+		case OP_POPE:
+			/*
+			 * Pop stack into integer array element
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is value
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			variable_id = args[0];
+
+			/*
+			 * If variable is read-only,
+			 * convert to writable array
+			 */
+			if ((version > 0) &&
+				((attrs[variable_id] & 0x9c) == 0x1c)) {
+				/* Allocate a writable buffer for this array */
+				count = var_size[variable_id];
+				long_tmp = vars[variable_id];
+				longptr_tmp = kcalloc(count, sizeof(long),
+								GFP_KERNEL);
+				vars[variable_id] = (long)longptr_tmp;
+
+				if (vars[variable_id] == 0) {
+					status = -ENOMEM;
+					break;
+				}
+
+				/* copy previous contents into buffer */
+				for (i = 0; i < count; ++i) {
+					longptr_tmp[i] =
+						get_unaligned_be32(&p[long_tmp]);
+					long_tmp += sizeof(long);
+				}
+
+				/*
+				 * set bit 7 - buffer was
+				 * dynamically allocated
+				 */
+				attrs[variable_id] |= 0x80;
+
+				/* clear bit 2 - variable is writable */
+				attrs[variable_id] &= ~0x04;
+				attrs[variable_id] |= 0x01;
+
+			}
+
+			/* check that variable is a writable integer array */
+			if ((attrs[variable_id] & 0x1c) != 0x18)
+				status = -ERANGE;
+			else {
+				longptr_tmp = (long *)vars[variable_id];
+
+				/* pop the array index */
+				index = stack[--stack_ptr];
+
+				/* pop the value and store it into the array */
+				longptr_tmp[index] = stack[--stack_ptr];
+			}
+
+			break;
+		case OP_POPA:
+			/*
+			 * Pop stack into Boolean array
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is count
+			 * ...stack 1 is array index
+			 * ...stack 2 is value
+			 */
+			if (!altera_check_stack(stack_ptr, 3, &status))
+				break;
+			variable_id = args[0];
+
+			/*
+			 * If variable is read-only,
+			 * convert to writable array
+			 */
+			if ((version > 0) &&
+				((attrs[variable_id] & 0x9c) == 0x0c)) {
+				/* Allocate a writable buffer for this array */
+				long_tmp =
+					(var_size[variable_id] + 7L) >> 3L;
+				charptr_tmp2 = (u8 *)vars[variable_id];
+				charptr_tmp =
+					kzalloc(long_tmp, GFP_KERNEL);
+				vars[variable_id] = (long)charptr_tmp;
+
+				if (vars[variable_id] == 0) {
+					status = -ENOMEM;
+					break;
+				}
+
+				/* zero the buffer */
+				for (long_idx = 0L;
+					long_idx < long_tmp;
+					++long_idx) {
+					charptr_tmp[long_idx] = 0;
+				}
+
+				/* copy previous contents into buffer */
+				for (long_idx = 0L;
+					long_idx < var_size[variable_id];
+					++long_idx) {
+					long_idx2 = long_idx;
+
+					if (charptr_tmp2[long_idx2 >> 3] &
+						(1 << (long_idx2 & 7))) {
+						charptr_tmp[long_idx >> 3] |=
+							(1 << (long_idx & 7));
+					}
+				}
+
+				/*
+				 * set bit 7 - buffer was
+				 * dynamically allocated
+				 */
+				attrs[variable_id] |= 0x80;
+
+				/* clear bit 2 - variable is writable */
+				attrs[variable_id] &= ~0x04;
+				attrs[variable_id] |= 0x01;
+
+			}
+
+			/*
+			 * check that variable is
+			 * a writable Boolean array
+			 */
+			if ((attrs[variable_id] & 0x1c) != 0x08) {
+				status = -ERANGE;
+				break;
+			}
+
+			charptr_tmp = (u8 *)vars[variable_id];
+
+			/* pop the count (number of bits to copy) */
+			long_count = stack[--stack_ptr];
+
+			/* pop the array index */
+			long_idx = stack[--stack_ptr];
+
+			reverse = 0;
+
+			if (version > 0) {
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+
+				if (long_idx > long_count) {
+					reverse = 1;
+					long_tmp = long_count;
+					long_count = 1 + long_idx -
+								long_count;
+					long_idx = long_tmp;
+
+					/* reverse POPA is not supported */
+					status = -ERANGE;
+					break;
+				} else
+					long_count = 1 + long_count -
+								long_idx;
+
+			}
+
+			/* pop the data */
+			long_tmp = stack[--stack_ptr];
+
+			if (long_count < 1) {
+				status = -ERANGE;
+				break;
+			}
+
+			for (i = 0; i < long_count; ++i) {
+				if (long_tmp & (1L << (s32) i))
+					charptr_tmp[long_idx >> 3L] |=
+						(1L << (long_idx & 7L));
+				else
+					charptr_tmp[long_idx >> 3L] &=
+						~(1L << (long_idx & 7L));
+
+				++long_idx;
+			}
+
+			break;
+		case OP_JMPZ:
+			/*
+			 * Pop stack and branch if zero
+			 * ...argument 0 is address
+			 * ...stack 0 is condition value
+			 */
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				if (stack[--stack_ptr] == 0) {
+					pc = args[0] + code_sect;
+					if ((pc < code_sect) ||
+					    (pc >= debug_sect))
+						status = -ERANGE;
+				}
+			}
+			break;
+		case OP_DS:
+		case OP_IS:
+			/*
+			 * DRSCAN
+			 * IRSCAN
+			 * ...argument 0 is scan data variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_idx = stack[--stack_ptr];
+			long_count = stack[--stack_ptr];
+			reverse = 0;
+			if (version > 0) {
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 * stack 2 = count
+				 */
+				long_tmp = long_count;
+				long_count = stack[--stack_ptr];
+
+				if (long_idx > long_tmp) {
+					reverse = 1;
+					long_idx = long_tmp;
+				}
+			}
+
+			charptr_tmp = (u8 *)vars[args[0]];
+
+			if (reverse) {
+				/*
+				 * allocate a buffer
+				 * and reverse the data order
+				 */
+				charptr_tmp2 = charptr_tmp;
+				charptr_tmp = kzalloc((long_count >> 3) + 1,
+								GFP_KERNEL);
+				if (charptr_tmp == NULL) {
+					status = -ENOMEM;
+					break;
+				}
+
+				long_tmp = long_idx + long_count - 1;
+				long_idx2 = 0;
+				while (long_idx2 < long_count) {
+					if (charptr_tmp2[long_tmp >> 3] &
+							(1 << (long_tmp & 7)))
+						charptr_tmp[long_idx2 >> 3] |=
+							(1 << (long_idx2 & 7));
+					else
+						charptr_tmp[long_idx2 >> 3] &=
+							~(1 << (long_idx2 & 7));
+
+					--long_tmp;
+					++long_idx2;
+				}
+			}
+
+			if (opcode == 0x51) /* DS */
+				status = altera_drscan(astate, long_count,
+						charptr_tmp, long_idx);
+			else /* IS */
+				status = altera_irscan(astate, long_count,
+						charptr_tmp, long_idx);
+
+			if (reverse)
+				kfree(charptr_tmp);
+
+			break;
+		case OP_DPRA:
+			/*
+			 * DRPRE with array data
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			index = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+
+			if (version > 0)
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+				count = 1 + count - index;
+
+			charptr_tmp = (u8 *)vars[args[0]];
+			status = altera_set_dr_pre(&astate->js, count, index,
+							charptr_tmp);
+			break;
+		case OP_DPOA:
+			/*
+			 * DRPOST with array data
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			index = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+
+			if (version > 0)
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+				count = 1 + count - index;
+
+			charptr_tmp = (u8 *)vars[args[0]];
+			status = altera_set_dr_post(&astate->js, count, index,
+							charptr_tmp);
+			break;
+		case OP_IPRA:
+			/*
+			 * IRPRE with array data
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			index = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+
+			if (version > 0)
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+				count = 1 + count - index;
+
+			charptr_tmp = (u8 *)vars[args[0]];
+			status = altera_set_ir_pre(&astate->js, count, index,
+							charptr_tmp);
+
+			break;
+		case OP_IPOA:
+			/*
+			 * IRPOST with array data
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 * ...stack 1 is count
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			index = stack[--stack_ptr];
+			count = stack[--stack_ptr];
+
+			if (version > 0)
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+				count = 1 + count - index;
+
+			charptr_tmp = (u8 *)vars[args[0]];
+			status = altera_set_ir_post(&astate->js, count, index,
+							charptr_tmp);
+
+			break;
+		case OP_EXPT:
+			/*
+			 * EXPORT
+			 * ...argument 0 is string ID
+			 * ...stack 0 is integer expression
+			 */
+			if (altera_check_stack(stack_ptr, 1, &status)) {
+				name = &p[str_table + args[0]];
+				long_tmp = stack[--stack_ptr];
+				altera_export_int(name, long_tmp);
+			}
+			break;
+		case OP_PSHE:
+			/*
+			 * Push integer array element
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is array index
+			 */
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			variable_id = args[0];
+			index = stack[stack_ptr - 1];
+
+			/* check variable type */
+			if ((attrs[variable_id] & 0x1f) == 0x19) {
+				/* writable integer array */
+				longptr_tmp = (long *)vars[variable_id];
+				stack[stack_ptr - 1] = longptr_tmp[index];
+			} else if ((attrs[variable_id] & 0x1f) == 0x1c) {
+				/* read-only integer array */
+				long_tmp = vars[variable_id] +
+						(index * sizeof(long));
+				stack[stack_ptr - 1] =
+					get_unaligned_be32(&p[long_tmp]);
+			} else
+				status = -ERANGE;
+
+			break;
+		case OP_PSHA:
+			/*
+			 * Push Boolean array
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is count
+			 * ...stack 1 is array index
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			variable_id = args[0];
+
+			/* check that variable is a Boolean array */
+			if ((attrs[variable_id] & 0x18) != 0x08) {
+				status = -ERANGE;
+				break;
+			}
+
+			charptr_tmp = (u8 *)vars[variable_id];
+
+			/* pop the count (number of bits to copy) */
+			count = stack[--stack_ptr];
+
+			/* pop the array index */
+			index = stack[stack_ptr - 1];
+
+			if (version > 0)
+				/*
+				 * stack 0 = array right index
+				 * stack 1 = array left index
+				 */
+				count = 1 + count - index;
+
+			if ((count < 1) || (count > 32)) {
+				status = -ERANGE;
+				break;
+			}
+
+			long_tmp = 0L;
+
+			for (i = 0; i < count; ++i)
+				if (charptr_tmp[(i + index) >> 3] &
+						(1 << ((i + index) & 7)))
+					long_tmp |= (1L << i);
+
+			stack[stack_ptr - 1] = long_tmp;
+
+			break;
+		case OP_DYNA:
+			/*
+			 * Dynamically change size of array
+			 * ...argument 0 is variable ID
+			 * ...stack 0 is new size
+			 */
+			if (!altera_check_stack(stack_ptr, 1, &status))
+				break;
+			variable_id = args[0];
+			long_tmp = stack[--stack_ptr];
+
+			if (long_tmp > var_size[variable_id]) {
+				var_size[variable_id] = long_tmp;
+
+				if (attrs[variable_id] & 0x10)
+					/* allocate integer array */
+					long_tmp *= sizeof(long);
+				else
+					/* allocate Boolean array */
+					long_tmp = (long_tmp + 7) >> 3;
+
+				/*
+				 * If the buffer was previously allocated,
+				 * free it
+				 */
+				if (attrs[variable_id] & 0x80) {
+					kfree((void *)vars[variable_id]);
+					vars[variable_id] = 0;
+				}
+
+				/*
+				 * Allocate a new buffer
+				 * of the requested size
+				 */
+				vars[variable_id] = (long)
+					kzalloc(long_tmp, GFP_KERNEL);
+
+				if (vars[variable_id] == 0) {
+					status = -ENOMEM;
+					break;
+				}
+
+				/*
+				 * Set the attribute bit to indicate that
+				 * this buffer was dynamically allocated and
+				 * should be freed later
+				 */
+				attrs[variable_id] |= 0x80;
+
+				/* zero out memory */
+				count = ((var_size[variable_id] + 7L) /
+									8L);
+				charptr_tmp = (u8 *)(vars[variable_id]);
+				for (index = 0; index < count; ++index)
+					charptr_tmp[index] = 0;
+
+			}
+
+			break;
+		case OP_EXPV:
+			/*
+			 * Export Boolean array
+			 * ...argument 0 is string ID
+			 * ...stack 0 is variable ID
+			 * ...stack 1 is array right index
+			 * ...stack 2 is array left index
+			 */
+			if (!altera_check_stack(stack_ptr, 3, &status))
+				break;
+			if (version == 0) {
+				/* EXPV is not supported in JBC 1.0 */
+				bad_opcode = 1;
+				break;
+			}
+			name = &p[str_table + args[0]];
+			variable_id = stack[--stack_ptr];
+			long_idx = stack[--stack_ptr];/* right indx */
+			long_idx2 = stack[--stack_ptr];/* left indx */
+
+			if (long_idx > long_idx2) {
+				/* reverse indices not supported */
+				status = -ERANGE;
+				break;
+			}
+
+			long_count = 1 + long_idx2 - long_idx;
+
+			charptr_tmp = (u8 *)vars[variable_id];
+			charptr_tmp2 = NULL;
+
+			if ((long_idx & 7L) != 0) {
+				s32 k = long_idx;
+				charptr_tmp2 =
+					kzalloc(((long_count + 7L) / 8L),
+							GFP_KERNEL);
+				if (charptr_tmp2 == NULL) {
+					status = -ENOMEM;
+					break;
+				}
+
+				for (i = 0; i < long_count; ++i) {
+					if (charptr_tmp[k >> 3] &
+							(1 << (k & 7)))
+						charptr_tmp2[i >> 3] |=
+								(1 << (i & 7));
+					else
+						charptr_tmp2[i >> 3] &=
+								~(1 << (i & 7));
+
+					++k;
+				}
+				charptr_tmp = charptr_tmp2;
+
+			} else if (long_idx != 0)
+				charptr_tmp = &charptr_tmp[long_idx >> 3];
+
+			altera_export_bool_array(name, charptr_tmp,
+							long_count);
+
+			/* free allocated buffer */
+			if ((long_idx & 7L) != 0)
+				kfree(charptr_tmp2);
+
+			break;
+		case OP_COPY: {
+			/*
+			 * Array copy
+			 * ...argument 0 is dest ID
+			 * ...argument 1 is source ID
+			 * ...stack 0 is count
+			 * ...stack 1 is dest index
+			 * ...stack 2 is source index
+			 */
+			s32 copy_count;
+			s32 copy_index;
+			s32 copy_index2;
+			s32 destleft;
+			s32 src_count;
+			s32 dest_count;
+			int src_reverse = 0;
+			int dest_reverse = 0;
+
+			if (!altera_check_stack(stack_ptr, 3, &status))
+				break;
+
+			copy_count = stack[--stack_ptr];
+			copy_index = stack[--stack_ptr];
+			copy_index2 = stack[--stack_ptr];
+			reverse = 0;
+
+			if (version > 0) {
+				/*
+				 * stack 0 = source right index
+				 * stack 1 = source left index
+				 * stack 2 = destination right index
+				 * stack 3 = destination left index
+				 */
+				destleft = stack[--stack_ptr];
+
+				if (copy_count > copy_index) {
+					src_reverse = 1;
+					reverse = 1;
+					src_count = 1 + copy_count - copy_index;
+					/* copy_index = source start index */
+				} else {
+					src_count = 1 + copy_index - copy_count;
+					/* source start index */
+					copy_index = copy_count;
+				}
+
+				if (copy_index2 > destleft) {
+					dest_reverse = 1;
+					reverse = !reverse;
+					dest_count = 1 + copy_index2 - destleft;
+					/* destination start index */
+					copy_index2 = destleft;
+				} else
+					dest_count = 1 + destleft - copy_index2;
+
+				copy_count = (src_count < dest_count) ?
+							src_count : dest_count;
+
+				if ((src_reverse || dest_reverse) &&
+					(src_count != dest_count))
+					/*
+					 * If either the source or destination
+					 * is reversed, we can't tolerate
+					 * a length mismatch, because we
+					 * "left justify" arrays when copying.
+					 * This won't work correctly
+					 * with reversed arrays.
+					 */
+					status = -ERANGE;
+
+			}
+
+			count = copy_count;
+			index = copy_index;
+			index2 = copy_index2;
+
+			/*
+			 * If destination is a read-only array,
+			 * allocate a buffer and convert it to a writable array
+			 */
+			variable_id = args[1];
+			if ((version > 0) &&
+				((attrs[variable_id] & 0x9c) == 0x0c)) {
+				/* Allocate a writable buffer for this array */
+				long_tmp =
+					(var_size[variable_id] + 7L) >> 3L;
+				charptr_tmp2 = (u8 *)vars[variable_id];
+				charptr_tmp =
+					kzalloc(long_tmp, GFP_KERNEL);
+				vars[variable_id] = (long)charptr_tmp;
+
+				if (vars[variable_id] == 0) {
+					status = -ENOMEM;
+					break;
+				}
+
+				/* zero the buffer */
+				for (long_idx = 0L; long_idx < long_tmp;
+								++long_idx)
+					charptr_tmp[long_idx] = 0;
+
+				/* copy previous contents into buffer */
+				for (long_idx = 0L;
+					long_idx < var_size[variable_id];
+								++long_idx) {
+					long_idx2 = long_idx;
+
+					if (charptr_tmp2[long_idx2 >> 3] &
+						(1 << (long_idx2 & 7)))
+						charptr_tmp[long_idx >> 3] |=
+							(1 << (long_idx & 7));
+
+				}
+
+				/*
+				set bit 7 - buffer was dynamically allocated */
+				attrs[variable_id] |= 0x80;
+
+				/* clear bit 2 - variable is writable */
+				attrs[variable_id] &= ~0x04;
+				attrs[variable_id] |= 0x01;
+			}
+
+			charptr_tmp = (u8 *)vars[args[1]];
+			charptr_tmp2 = (u8 *)vars[args[0]];
+
+			/* check if destination is a writable Boolean array */
+			if ((attrs[args[1]] & 0x1c) != 0x08) {
+				status = -ERANGE;
+				break;
+			}
+
+			if (count < 1) {
+				status = -ERANGE;
+				break;
+			}
+
+			if (reverse)
+				index2 += (count - 1);
+
+			for (i = 0; i < count; ++i) {
+				if (charptr_tmp2[index >> 3] &
+							(1 << (index & 7)))
+					charptr_tmp[index2 >> 3] |=
+							(1 << (index2 & 7));
+				else
+					charptr_tmp[index2 >> 3] &=
+						~(1 << (index2 & 7));
+
+				++index;
+				if (reverse)
+					--index2;
+				else
+					++index2;
+			}
+
+			break;
+		}
+		case OP_DSC:
+		case OP_ISC: {
+			/*
+			 * DRSCAN with capture
+			 * IRSCAN with capture
+			 * ...argument 0 is scan data variable ID
+			 * ...argument 1 is capture variable ID
+			 * ...stack 0 is capture index
+			 * ...stack 1 is scan data index
+			 * ...stack 2 is count
+			 */
+			s32 scan_right, scan_left;
+			s32 capture_count = 0;
+			s32 scan_count = 0;
+			s32 capture_index;
+			s32 scan_index;
+
+			if (!altera_check_stack(stack_ptr, 3, &status))
+				break;
+
+			capture_index = stack[--stack_ptr];
+			scan_index = stack[--stack_ptr];
+
+			if (version > 0) {
+				/*
+				 * stack 0 = capture right index
+				 * stack 1 = capture left index
+				 * stack 2 = scan right index
+				 * stack 3 = scan left index
+				 * stack 4 = count
+				 */
+				scan_right = stack[--stack_ptr];
+				scan_left = stack[--stack_ptr];
+				capture_count = 1 + scan_index - capture_index;
+				scan_count = 1 + scan_left - scan_right;
+				scan_index = scan_right;
+			}
+
+			long_count = stack[--stack_ptr];
+			/*
+			 * If capture array is read-only, allocate a buffer
+			 * and convert it to a writable array
+			 */
+			variable_id = args[1];
+			if ((version > 0) &&
+				((attrs[variable_id] & 0x9c) == 0x0c)) {
+				/* Allocate a writable buffer for this array */
+				long_tmp =
+					(var_size[variable_id] + 7L) >> 3L;
+				charptr_tmp2 = (u8 *)vars[variable_id];
+				charptr_tmp =
+					kzalloc(long_tmp, GFP_KERNEL);
+				vars[variable_id] = (long)charptr_tmp;
+
+				if (vars[variable_id] == 0) {
+					status = -ENOMEM;
+					break;
+				}
+
+				/* zero the buffer */
+				for (long_idx = 0L; long_idx < long_tmp;
+								++long_idx)
+					charptr_tmp[long_idx] = 0;
+
+				/* copy previous contents into buffer */
+				for (long_idx = 0L;
+					long_idx < var_size[variable_id];
+								++long_idx) {
+					long_idx2 = long_idx;
+
+					if (charptr_tmp2[long_idx2 >> 3] &
+						(1 << (long_idx2 & 7)))
+						charptr_tmp[long_idx >> 3] |=
+							(1 << (long_idx & 7));
+
+				}
+
+				/*
+				 * set bit 7 - buffer was
+				 * dynamically allocated
+				 */
+				attrs[variable_id] |= 0x80;
+
+				/* clear bit 2 - variable is writable */
+				attrs[variable_id] &= ~0x04;
+				attrs[variable_id] |= 0x01;
+
+			}
+
+			charptr_tmp = (u8 *)vars[args[0]];
+			charptr_tmp2 = (u8 *)vars[args[1]];
+
+			if ((version > 0) &&
+					((long_count > capture_count) ||
+					(long_count > scan_count))) {
+				status = -ERANGE;
+				break;
+			}
+
+			/*
+			 * check that capture array
+			 * is a writable Boolean array
+			 */
+			if ((attrs[args[1]] & 0x1c) != 0x08) {
+				status = -ERANGE;
+				break;
+			}
+
+			if (status == 0) {
+				if (opcode == 0x82) /* DSC */
+					status = altera_swap_dr(astate,
+							long_count,
+							charptr_tmp,
+							scan_index,
+							charptr_tmp2,
+							capture_index);
+				else /* ISC */
+					status = altera_swap_ir(astate,
+							long_count,
+							charptr_tmp,
+							scan_index,
+							charptr_tmp2,
+							capture_index);
+
+			}
+
+			break;
+		}
+		case OP_WAIT:
+			/*
+			 * WAIT
+			 * ...argument 0 is wait state
+			 * ...argument 1 is end state
+			 * ...stack 0 is cycles
+			 * ...stack 1 is microseconds
+			 */
+			if (!altera_check_stack(stack_ptr, 2, &status))
+				break;
+			long_tmp = stack[--stack_ptr];
+
+			if (long_tmp != 0L)
+				status = altera_wait_cycles(astate, long_tmp,
+								args[0]);
+
+			long_tmp = stack[--stack_ptr];
+
+			if ((status == 0) && (long_tmp != 0L))
+				status = altera_wait_msecs(astate,
+								long_tmp,
+								args[0]);
+
+			if ((status == 0) && (args[1] != args[0]))
+				status = altera_goto_jstate(astate,
+								args[1]);
+
+			if (version > 0) {
+				--stack_ptr; /* throw away MAX cycles */
+				--stack_ptr; /* throw away MAX microseconds */
+			}
+			break;
+		case OP_CMPA: {
+			/*
+			 * Array compare
+			 * ...argument 0 is source 1 ID
+			 * ...argument 1 is source 2 ID
+			 * ...argument 2 is mask ID
+			 * ...stack 0 is source 1 index
+			 * ...stack 1 is source 2 index
+			 * ...stack 2 is mask index
+			 * ...stack 3 is count
+			 */
+			s32 a, b;
+			u8 *source1 = (u8 *)vars[args[0]];
+			u8 *source2 = (u8 *)vars[args[1]];
+			u8 *mask = (u8 *)vars[args[2]];
+			u32 index1;
+			u32 index2;
+			u32 mask_index;
+
+			if (!altera_check_stack(stack_ptr, 4, &status))
+				break;
+
+			index1 = stack[--stack_ptr];
+			index2 = stack[--stack_ptr];
+			mask_index = stack[--stack_ptr];
+			long_count = stack[--stack_ptr];
+
+			if (version > 0) {
+				/*
+				 * stack 0 = source 1 right index
+				 * stack 1 = source 1 left index
+				 * stack 2 = source 2 right index
+				 * stack 3 = source 2 left index
+				 * stack 4 = mask right index
+				 * stack 5 = mask left index
+				 */
+				s32 mask_right = stack[--stack_ptr];
+				s32 mask_left = stack[--stack_ptr];
+				/* source 1 count */
+				a = 1 + index2 - index1;
+				/* source 2 count */
+				b = 1 + long_count - mask_index;
+				a = (a < b) ? a : b;
+				/* mask count */
+				b = 1 + mask_left - mask_right;
+				a = (a < b) ? a : b;
+				/* source 2 start index */
+				index2 = mask_index;
+				/* mask start index */
+				mask_index = mask_right;
+				long_count = a;
+			}
+
+			long_tmp = 1L;
+
+			if (long_count < 1)
+				status = -ERANGE;
+			else {
+				count = long_count;
+
+				for (i = 0; i < count; ++i) {
+					if (mask[mask_index >> 3] &
+						(1 << (mask_index & 7))) {
+						a = source1[index1 >> 3] &
+							(1 << (index1 & 7))
+								? 1 : 0;
+						b = source2[index2 >> 3] &
+							(1 << (index2 & 7))
+								? 1 : 0;
+
+						if (a != b) /* failure */
+							long_tmp = 0L;
+					}
+					++index1;
+					++index2;
+					++mask_index;
+				}
+			}
+
+			stack[stack_ptr++] = long_tmp;
+
+			break;
+		}
+		default:
+			/* Unrecognized opcode -- ERROR! */
+			bad_opcode = 1;
+			break;
+		}
+
+		if (bad_opcode)
+			status = -ENOSYS;
+
+		if ((stack_ptr < 0) || (stack_ptr >= ALTERA_STACK_SIZE))
+			status = -EOVERFLOW;
+
+		if (status != 0) {
+			done = 1;
+			*error_address = (s32)(opcode_address - code_sect);
+		}
+	}
+
+	altera_free_buffers(astate);
+
+	/* Free all dynamically allocated arrays */
+	if ((attrs != NULL) && (vars != NULL))
+		for (i = 0; i < sym_count; ++i)
+			if (attrs[i] & 0x80)
+				kfree((void *)vars[i]);
+
+	kfree(vars);
+	kfree(var_size);
+	kfree(attrs);
+	kfree(proc_attributes);
+
+	return status;
+}
+
+static int altera_get_note(u8 *p, s32 program_size, s32 *offset,
+			   char *key, char *value, int keylen, int vallen)
+/*
+ * Gets key and value of NOTE fields in the JBC file.
+ * Can be called in two modes:  if offset pointer is NULL,
+ * then the function searches for note fields which match
+ * the key string provided.  If offset is not NULL, then
+ * the function finds the next note field of any key,
+ * starting at the offset specified by the offset pointer.
+ * Returns 0 for success, else appropriate error code
+ */
+{
+	int status = -ENODATA;
+	u32 note_strings = 0L;
+	u32 note_table = 0L;
+	u32 note_count = 0L;
+	u32 first_word = 0L;
+	int version = 0;
+	int delta = 0;
+	char *key_ptr;
+	char *value_ptr;
+	int i;
+
+	/* Read header information */
+	if (program_size > 52L) {
+		first_word    = get_unaligned_be32(&p[0]);
+		version = (first_word & 1L);
+		delta = version * 8;
+
+		note_strings  = get_unaligned_be32(&p[8 + delta]);
+		note_table    = get_unaligned_be32(&p[12 + delta]);
+		note_count    = get_unaligned_be32(&p[44 + (2 * delta)]);
+	}
+
+	if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L))
+		return -EIO;
+
+	if (note_count <= 0L)
+		return status;
+
+	if (offset == NULL) {
+		/*
+		 * We will search for the first note with a specific key,
+		 * and return only the value
+		 */
+		for (i = 0; (i < note_count) &&
+						(status != 0); ++i) {
+			key_ptr = &p[note_strings +
+					get_unaligned_be32(
+					&p[note_table + (8 * i)])];
+			if (key && !strncasecmp(key, key_ptr, strlen(key_ptr))) {
+				status = 0;
+
+				value_ptr = &p[note_strings +
+						get_unaligned_be32(
+						&p[note_table + (8 * i) + 4])];
+
+				if (value != NULL)
+					strscpy(value, value_ptr, vallen);
+
+			}
+		}
+	} else {
+		/*
+		 * We will search for the next note, regardless of the key,
+		 * and return both the value and the key
+		 */
+
+		i = *offset;
+
+		if ((i >= 0) && (i < note_count)) {
+			status = 0;
+
+			if (key != NULL)
+				strscpy(key, &p[note_strings +
+						get_unaligned_be32(
+						&p[note_table + (8 * i)])],
+					keylen);
+
+			if (value != NULL)
+				strscpy(value, &p[note_strings +
+						get_unaligned_be32(
+						&p[note_table + (8 * i) + 4])],
+					vallen);
+
+			*offset = i + 1;
+		}
+	}
+
+	return status;
+}
+
+static int altera_check_crc(u8 *p, s32 program_size)
+{
+	int status = 0;
+	u16 local_expected = 0,
+	    local_actual = 0,
+	    shift_reg = 0xffff;
+	int bit, feedback;
+	u8 databyte;
+	u32 i;
+	u32 crc_section = 0L;
+	u32 first_word = 0L;
+	int version = 0;
+	int delta = 0;
+
+	if (program_size > 52L) {
+		first_word  = get_unaligned_be32(&p[0]);
+		version = (first_word & 1L);
+		delta = version * 8;
+
+		crc_section = get_unaligned_be32(&p[32 + delta]);
+	}
+
+	if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L))
+		status = -EIO;
+
+	if (crc_section >= program_size)
+		status = -EIO;
+
+	if (status == 0) {
+		local_expected = (u16)get_unaligned_be16(&p[crc_section]);
+
+		for (i = 0; i < crc_section; ++i) {
+			databyte = p[i];
+			for (bit = 0; bit < 8; bit++) {
+				feedback = (databyte ^ shift_reg) & 0x01;
+				shift_reg >>= 1;
+				if (feedback)
+					shift_reg ^= 0x8408;
+
+				databyte >>= 1;
+			}
+		}
+
+		local_actual = (u16)~shift_reg;
+
+		if (local_expected != local_actual)
+			status = -EILSEQ;
+
+	}
+
+	if (debug || status) {
+		switch (status) {
+		case 0:
+			printk(KERN_INFO "%s: CRC matched: %04x\n", __func__,
+				local_actual);
+			break;
+		case -EILSEQ:
+			printk(KERN_ERR "%s: CRC mismatch: expected %04x, "
+				"actual %04x\n", __func__, local_expected,
+				local_actual);
+			break;
+		case -EIO:
+			printk(KERN_ERR "%s: error: format isn't "
+				"recognized.\n", __func__);
+			break;
+		default:
+			printk(KERN_ERR "%s: CRC function returned error "
+				"code %d\n", __func__, status);
+			break;
+		}
+	}
+
+	return status;
+}
+
+static int altera_get_file_info(u8 *p,
+					s32 program_size,
+					int *format_version,
+					int *action_count,
+					int *procedure_count)
+{
+	int status = -EIO;
+	u32 first_word = 0;
+	int version = 0;
+
+	if (program_size <= 52L)
+		return status;
+
+	first_word = get_unaligned_be32(&p[0]);
+
+	if ((first_word == 0x4A414D00L) || (first_word == 0x4A414D01L)) {
+		status = 0;
+
+		version = (first_word & 1L);
+		*format_version = version + 1;
+
+		if (version > 0) {
+			*action_count = get_unaligned_be32(&p[48]);
+			*procedure_count = get_unaligned_be32(&p[52]);
+		}
+	}
+
+	return status;
+}
+
+static int altera_get_act_info(u8 *p,
+					s32 program_size,
+					int index,
+					char **name,
+					char **description,
+					struct altera_procinfo **proc_list)
+{
+	int status = -EIO;
+	struct altera_procinfo *procptr = NULL;
+	struct altera_procinfo *tmpptr = NULL;
+	u32 first_word = 0L;
+	u32 action_table = 0L;
+	u32 proc_table = 0L;
+	u32 str_table = 0L;
+	u32 note_strings = 0L;
+	u32 action_count = 0L;
+	u32 proc_count = 0L;
+	u32 act_name_id = 0L;
+	u32 act_desc_id = 0L;
+	u32 act_proc_id = 0L;
+	u32 act_proc_name = 0L;
+	u8 act_proc_attribute = 0;
+
+	if (program_size <= 52L)
+		return status;
+	/* Read header information */
+	first_word = get_unaligned_be32(&p[0]);
+
+	if (first_word != 0x4A414D01L)
+		return status;
+
+	action_table = get_unaligned_be32(&p[4]);
+	proc_table   = get_unaligned_be32(&p[8]);
+	str_table = get_unaligned_be32(&p[12]);
+	note_strings = get_unaligned_be32(&p[16]);
+	action_count = get_unaligned_be32(&p[48]);
+	proc_count   = get_unaligned_be32(&p[52]);
+
+	if (index >= action_count)
+		return status;
+
+	act_name_id = get_unaligned_be32(&p[action_table + (12 * index)]);
+	act_desc_id = get_unaligned_be32(&p[action_table + (12 * index) + 4]);
+	act_proc_id = get_unaligned_be32(&p[action_table + (12 * index) + 8]);
+
+	*name = &p[str_table + act_name_id];
+
+	if (act_desc_id < (note_strings - str_table))
+		*description = &p[str_table + act_desc_id];
+
+	do {
+		act_proc_name = get_unaligned_be32(
+					&p[proc_table + (13 * act_proc_id)]);
+		act_proc_attribute =
+			(p[proc_table + (13 * act_proc_id) + 8] & 0x03);
+
+		procptr =
+				kzalloc(sizeof(struct altera_procinfo),
+								GFP_KERNEL);
+
+		if (procptr == NULL)
+			status = -ENOMEM;
+		else {
+			procptr->name = &p[str_table + act_proc_name];
+			procptr->attrs = act_proc_attribute;
+			procptr->next = NULL;
+
+			/* add record to end of linked list */
+			if (*proc_list == NULL)
+				*proc_list = procptr;
+			else {
+				tmpptr = *proc_list;
+				while (tmpptr->next != NULL)
+					tmpptr = tmpptr->next;
+				tmpptr->next = procptr;
+			}
+		}
+
+		act_proc_id = get_unaligned_be32(
+				&p[proc_table + (13 * act_proc_id) + 4]);
+	} while ((act_proc_id != 0) && (act_proc_id < proc_count));
+
+	return status;
+}
+
+int altera_init(struct altera_config *config, const struct firmware *fw)
+{
+	struct altera_state *astate = NULL;
+	struct altera_procinfo *proc_list = NULL;
+	struct altera_procinfo *procptr = NULL;
+	char *key = NULL;
+	char *value = NULL;
+	char *action_name = NULL;
+	char *description = NULL;
+	int exec_result = 0;
+	int exit_code = 0;
+	int format_version = 0;
+	int action_count = 0;
+	int procedure_count = 0;
+	int index = 0;
+	s32 offset = 0L;
+	s32 error_address = 0L;
+	int retval = 0;
+
+	key = kzalloc(33, GFP_KERNEL);
+	if (!key) {
+		retval = -ENOMEM;
+		goto out;
+	}
+	value = kzalloc(257, GFP_KERNEL);
+	if (!value) {
+		retval = -ENOMEM;
+		goto free_key;
+	}
+	astate = kzalloc(sizeof(struct altera_state), GFP_KERNEL);
+	if (!astate) {
+		retval = -ENOMEM;
+		goto free_value;
+	}
+
+	astate->config = config;
+	if (!astate->config->jtag_io) {
+		if (!IS_ENABLED(CONFIG_HAS_IOPORT)) {
+			retval = -ENODEV;
+			goto free_state;
+		}
+		dprintk("%s: using byteblaster!\n", __func__);
+		astate->config->jtag_io = netup_jtag_io_lpt;
+	}
+
+	altera_check_crc((u8 *)fw->data, fw->size);
+
+	if (debug) {
+		altera_get_file_info((u8 *)fw->data, fw->size, &format_version,
+					&action_count, &procedure_count);
+		printk(KERN_INFO "%s: File format is %s ByteCode format\n",
+			__func__, (format_version == 2) ? "Jam STAPL" :
+						"pre-standardized Jam 1.1");
+		while (altera_get_note((u8 *)fw->data, fw->size,
+					&offset, key, value, 32, 256) == 0)
+			printk(KERN_INFO "%s: NOTE \"%s\" = \"%s\"\n",
+					__func__, key, value);
+	}
+
+	if (debug && (format_version == 2) && (action_count > 0)) {
+		printk(KERN_INFO "%s: Actions available:\n", __func__);
+		for (index = 0; index < action_count; ++index) {
+			altera_get_act_info((u8 *)fw->data, fw->size,
+						index, &action_name,
+						&description,
+						&proc_list);
+
+			if (description == NULL)
+				printk(KERN_INFO "%s: %s\n",
+						__func__,
+						action_name);
+			else
+				printk(KERN_INFO "%s: %s \"%s\"\n",
+						__func__,
+						action_name,
+						description);
+
+			procptr = proc_list;
+			while (procptr != NULL) {
+				if (procptr->attrs != 0)
+					printk(KERN_INFO "%s:    %s (%s)\n",
+						__func__,
+						procptr->name,
+						(procptr->attrs == 1) ?
+						"optional" : "recommended");
+
+				proc_list = procptr->next;
+				kfree(procptr);
+				procptr = proc_list;
+			}
+		}
+
+		printk(KERN_INFO "\n");
+	}
+
+	exec_result = altera_execute(astate, (u8 *)fw->data, fw->size,
+				&error_address, &exit_code, &format_version);
+
+	if (exit_code)
+		exec_result = -EREMOTEIO;
+
+	if ((format_version == 2) && (exec_result == -EINVAL)) {
+		if (astate->config->action == NULL)
+			printk(KERN_ERR "%s: error: no action specified for "
+				"Jam STAPL file.\nprogram terminated.\n",
+				__func__);
+		else
+			printk(KERN_ERR "%s: error: action \"%s\""
+				" is not supported "
+				"for this Jam STAPL file.\n"
+				"Program terminated.\n", __func__,
+				astate->config->action);
+
+	} else if (exec_result)
+		printk(KERN_ERR "%s: error %d\n", __func__, exec_result);
+free_state:
+	kfree(astate);
+free_value:
+	kfree(value);
+free_key:
+	kfree(key);
+out:
+	return retval;
+}
+EXPORT_SYMBOL(altera_init);
diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c
new file mode 100644
index 0000000000..693f0e539f
--- /dev/null
+++ b/drivers/misc/apds9802als.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * apds9802als.c - apds9802  ALS Driver
+ *
+ * Copyright (C) 2009 Intel Corp
+ *
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/pm_runtime.h>
+
+#define ALS_MIN_RANGE_VAL 1
+#define ALS_MAX_RANGE_VAL 2
+#define POWER_STA_ENABLE 1
+#define POWER_STA_DISABLE 0
+
+#define DRIVER_NAME "apds9802als"
+
+struct als_data {
+	struct mutex mutex;
+};
+
+static ssize_t als_sensing_range_show(struct device *dev,
+			struct device_attribute *attr,  char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int  val;
+
+	val = i2c_smbus_read_byte_data(client, 0x81);
+	if (val < 0)
+		return val;
+	if (val & 1)
+		return sprintf(buf, "4095\n");
+	else
+		return sprintf(buf, "65535\n");
+}
+
+static int als_wait_for_data_ready(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret;
+	int retry = 10;
+
+	do {
+		msleep(30);
+		ret = i2c_smbus_read_byte_data(client, 0x86);
+	} while (!(ret & 0x80) && retry--);
+
+	if (retry < 0) {
+		dev_warn(dev, "timeout waiting for data ready\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static ssize_t als_lux0_input_data_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct als_data *data = i2c_get_clientdata(client);
+	int ret_val;
+	int temp;
+
+	/* Protect against parallel reads */
+	pm_runtime_get_sync(dev);
+	mutex_lock(&data->mutex);
+
+	/* clear EOC interrupt status */
+	i2c_smbus_write_byte(client, 0x40);
+	/* start measurement */
+	temp = i2c_smbus_read_byte_data(client, 0x81);
+	i2c_smbus_write_byte_data(client, 0x81, temp | 0x08);
+
+	ret_val = als_wait_for_data_ready(dev);
+	if (ret_val < 0)
+		goto failed;
+
+	temp = i2c_smbus_read_byte_data(client, 0x8C); /* LSB data */
+	if (temp < 0) {
+		ret_val = temp;
+		goto failed;
+	}
+	ret_val = i2c_smbus_read_byte_data(client, 0x8D); /* MSB data */
+	if (ret_val < 0)
+		goto failed;
+
+	mutex_unlock(&data->mutex);
+	pm_runtime_put_sync(dev);
+
+	temp = (ret_val << 8) | temp;
+	return sprintf(buf, "%d\n", temp);
+failed:
+	mutex_unlock(&data->mutex);
+	pm_runtime_put_sync(dev);
+	return ret_val;
+}
+
+static ssize_t als_sensing_range_store(struct device *dev,
+		struct device_attribute *attr, const  char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct als_data *data = i2c_get_clientdata(client);
+	int ret_val;
+	unsigned long val;
+
+	ret_val = kstrtoul(buf, 10, &val);
+	if (ret_val)
+		return ret_val;
+
+	if (val < 4096)
+		val = 1;
+	else if (val < 65536)
+		val = 2;
+	else
+		return -ERANGE;
+
+	pm_runtime_get_sync(dev);
+
+	/* Make sure nobody else reads/modifies/writes 0x81 while we
+	   are active */
+	mutex_lock(&data->mutex);
+
+	ret_val = i2c_smbus_read_byte_data(client, 0x81);
+	if (ret_val < 0)
+		goto fail;
+
+	/* Reset the bits before setting them */
+	ret_val = ret_val & 0xFA;
+
+	if (val == 1) /* Setting detection range up to 4k LUX */
+		ret_val = (ret_val | 0x01);
+	else /* Setting detection range up to 64k LUX*/
+		ret_val = (ret_val | 0x00);
+
+	ret_val = i2c_smbus_write_byte_data(client, 0x81, ret_val);
+
+	if (ret_val >= 0) {
+		/* All OK */
+		mutex_unlock(&data->mutex);
+		pm_runtime_put_sync(dev);
+		return count;
+	}
+fail:
+	mutex_unlock(&data->mutex);
+	pm_runtime_put_sync(dev);
+	return ret_val;
+}
+
+static int als_set_power_state(struct i2c_client *client, bool on_off)
+{
+	int ret_val;
+	struct als_data *data = i2c_get_clientdata(client);
+
+	mutex_lock(&data->mutex);
+	ret_val = i2c_smbus_read_byte_data(client, 0x80);
+	if (ret_val < 0)
+		goto fail;
+	if (on_off)
+		ret_val = ret_val | 0x01;
+	else
+		ret_val = ret_val & 0xFE;
+	ret_val = i2c_smbus_write_byte_data(client, 0x80, ret_val);
+fail:
+	mutex_unlock(&data->mutex);
+	return ret_val;
+}
+
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO | S_IWUSR,
+	als_sensing_range_show, als_sensing_range_store);
+static DEVICE_ATTR(lux0_input, S_IRUGO, als_lux0_input_data_show, NULL);
+
+static struct attribute *mid_att_als[] = {
+	&dev_attr_lux0_sensor_range.attr,
+	&dev_attr_lux0_input.attr,
+	NULL
+};
+
+static const struct attribute_group m_als_gr = {
+	.name = "apds9802als",
+	.attrs = mid_att_als
+};
+
+static int als_set_default_config(struct i2c_client *client)
+{
+	int ret_val;
+	/* Write the command and then switch on */
+	ret_val = i2c_smbus_write_byte_data(client, 0x80, 0x01);
+	if (ret_val < 0) {
+		dev_err(&client->dev, "failed default switch on write\n");
+		return ret_val;
+	}
+	/* detection range: 1~64K Lux, maunal measurement */
+	ret_val = i2c_smbus_write_byte_data(client, 0x81, 0x08);
+	if (ret_val < 0)
+		dev_err(&client->dev, "failed default LUX on write\n");
+
+	/*  We always get 0 for the 1st measurement after system power on,
+	 *  so make sure it is finished before user asks for data.
+	 */
+	als_wait_for_data_ready(&client->dev);
+
+	return ret_val;
+}
+
+static int apds9802als_probe(struct i2c_client *client)
+{
+	int res;
+	struct als_data *data;
+
+	data = kzalloc(sizeof(struct als_data), GFP_KERNEL);
+	if (data == NULL) {
+		dev_err(&client->dev, "Memory allocation failed\n");
+		return -ENOMEM;
+	}
+	i2c_set_clientdata(client, data);
+	res = sysfs_create_group(&client->dev.kobj, &m_als_gr);
+	if (res) {
+		dev_err(&client->dev, "device create file failed\n");
+		goto als_error1;
+	}
+	dev_info(&client->dev, "ALS chip found\n");
+	als_set_default_config(client);
+	mutex_init(&data->mutex);
+
+	pm_runtime_set_active(&client->dev);
+	pm_runtime_enable(&client->dev);
+
+	return res;
+als_error1:
+	kfree(data);
+	return res;
+}
+
+static void apds9802als_remove(struct i2c_client *client)
+{
+	struct als_data *data = i2c_get_clientdata(client);
+
+	pm_runtime_get_sync(&client->dev);
+
+	als_set_power_state(client, false);
+	sysfs_remove_group(&client->dev.kobj, &m_als_gr);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+	pm_runtime_put_noidle(&client->dev);
+
+	kfree(data);
+}
+
+#ifdef CONFIG_PM
+
+static int apds9802als_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	als_set_power_state(client, false);
+	return 0;
+}
+
+static int apds9802als_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	als_set_power_state(client, true);
+	return 0;
+}
+
+static UNIVERSAL_DEV_PM_OPS(apds9802als_pm_ops, apds9802als_suspend,
+	apds9802als_resume, NULL);
+
+#define APDS9802ALS_PM_OPS (&apds9802als_pm_ops)
+
+#else	/* CONFIG_PM */
+#define APDS9802ALS_PM_OPS NULL
+#endif	/* CONFIG_PM */
+
+static const struct i2c_device_id apds9802als_id[] = {
+	{ DRIVER_NAME, 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, apds9802als_id);
+
+static struct i2c_driver apds9802als_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.pm = APDS9802ALS_PM_OPS,
+	},
+	.probe = apds9802als_probe,
+	.remove = apds9802als_remove,
+	.id_table = apds9802als_id,
+};
+
+module_i2c_driver(apds9802als_driver);
+
+MODULE_AUTHOR("Anantha Narayanan <Anantha.Narayanan@intel.com");
+MODULE_DESCRIPTION("Avago apds9802als ALS Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c
new file mode 100644
index 0000000000..92b92be91d
--- /dev/null
+++ b/drivers/misc/apds990x.c
@@ -0,0 +1,1283 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This file is part of the APDS990x sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/platform_data/apds990x.h>
+
+/* Register map */
+#define APDS990X_ENABLE	 0x00 /* Enable of states and interrupts */
+#define APDS990X_ATIME	 0x01 /* ALS ADC time  */
+#define APDS990X_PTIME	 0x02 /* Proximity ADC time  */
+#define APDS990X_WTIME	 0x03 /* Wait time  */
+#define APDS990X_AILTL	 0x04 /* ALS interrupt low threshold low byte */
+#define APDS990X_AILTH	 0x05 /* ALS interrupt low threshold hi byte */
+#define APDS990X_AIHTL	 0x06 /* ALS interrupt hi threshold low byte */
+#define APDS990X_AIHTH	 0x07 /* ALS interrupt hi threshold hi byte */
+#define APDS990X_PILTL	 0x08 /* Proximity interrupt low threshold low byte */
+#define APDS990X_PILTH	 0x09 /* Proximity interrupt low threshold hi byte */
+#define APDS990X_PIHTL	 0x0a /* Proximity interrupt hi threshold low byte */
+#define APDS990X_PIHTH	 0x0b /* Proximity interrupt hi threshold hi byte */
+#define APDS990X_PERS	 0x0c /* Interrupt persistence filters */
+#define APDS990X_CONFIG	 0x0d /* Configuration */
+#define APDS990X_PPCOUNT 0x0e /* Proximity pulse count */
+#define APDS990X_CONTROL 0x0f /* Gain control register */
+#define APDS990X_REV	 0x11 /* Revision Number */
+#define APDS990X_ID	 0x12 /* Device ID */
+#define APDS990X_STATUS	 0x13 /* Device status */
+#define APDS990X_CDATAL	 0x14 /* Clear ADC low data register */
+#define APDS990X_CDATAH	 0x15 /* Clear ADC high data register */
+#define APDS990X_IRDATAL 0x16 /* IR ADC low data register */
+#define APDS990X_IRDATAH 0x17 /* IR ADC high data register */
+#define APDS990X_PDATAL	 0x18 /* Proximity ADC low data register */
+#define APDS990X_PDATAH	 0x19 /* Proximity ADC high data register */
+
+/* Control */
+#define APDS990X_MAX_AGAIN	3
+
+/* Enable register */
+#define APDS990X_EN_PIEN	(0x1 << 5)
+#define APDS990X_EN_AIEN	(0x1 << 4)
+#define APDS990X_EN_WEN		(0x1 << 3)
+#define APDS990X_EN_PEN		(0x1 << 2)
+#define APDS990X_EN_AEN		(0x1 << 1)
+#define APDS990X_EN_PON		(0x1 << 0)
+#define APDS990X_EN_DISABLE_ALL 0
+
+/* Status register */
+#define APDS990X_ST_PINT	(0x1 << 5)
+#define APDS990X_ST_AINT	(0x1 << 4)
+
+/* I2C access types */
+#define APDS990x_CMD_TYPE_MASK	(0x03 << 5)
+#define APDS990x_CMD_TYPE_RB	(0x00 << 5) /* Repeated byte */
+#define APDS990x_CMD_TYPE_INC	(0x01 << 5) /* Auto increment */
+#define APDS990x_CMD_TYPE_SPE	(0x03 << 5) /* Special function */
+
+#define APDS990x_ADDR_SHIFT	0
+#define APDS990x_CMD		0x80
+
+/* Interrupt ack commands */
+#define APDS990X_INT_ACK_ALS	0x6
+#define APDS990X_INT_ACK_PS	0x5
+#define APDS990X_INT_ACK_BOTH	0x7
+
+/* ptime */
+#define APDS990X_PTIME_DEFAULT	0xff /* Recommended conversion time 2.7ms*/
+
+/* wtime */
+#define APDS990X_WTIME_DEFAULT	0xee /* ~50ms wait time */
+
+#define APDS990X_TIME_TO_ADC	1024 /* One timetick as ADC count value */
+
+/* Persistence */
+#define APDS990X_APERS_SHIFT	0
+#define APDS990X_PPERS_SHIFT	4
+
+/* Supported ID:s */
+#define APDS990X_ID_0		0x0
+#define APDS990X_ID_4		0x4
+#define APDS990X_ID_29		0x29
+
+/* pgain and pdiode settings */
+#define APDS_PGAIN_1X	       0x0
+#define APDS_PDIODE_IR	       0x2
+
+#define APDS990X_LUX_OUTPUT_SCALE 10
+
+/* Reverse chip factors for threshold calculation */
+struct reverse_factors {
+	u32 afactor;
+	int cf1;
+	int irf1;
+	int cf2;
+	int irf2;
+};
+
+struct apds990x_chip {
+	struct apds990x_platform_data	*pdata;
+	struct i2c_client		*client;
+	struct mutex			mutex; /* avoid parallel access */
+	struct regulator_bulk_data	regs[2];
+	wait_queue_head_t		wait;
+
+	int	prox_en;
+	bool	prox_continuous_mode;
+	bool	lux_wait_fresh_res;
+
+	/* Chip parameters */
+	struct	apds990x_chip_factors	cf;
+	struct	reverse_factors		rcf;
+	u16	atime;		/* als integration time */
+	u16	arate;		/* als reporting rate */
+	u16	a_max_result;	/* Max possible ADC value with current atime */
+	u8	again_meas;	/* Gain used in last measurement */
+	u8	again_next;	/* Next calculated gain */
+	u8	pgain;
+	u8	pdiode;
+	u8	pdrive;
+	u8	lux_persistence;
+	u8	prox_persistence;
+
+	u32	lux_raw;
+	u32	lux;
+	u16	lux_clear;
+	u16	lux_ir;
+	u16	lux_calib;
+	u32	lux_thres_hi;
+	u32	lux_thres_lo;
+
+	u32	prox_thres;
+	u16	prox_data;
+	u16	prox_calib;
+
+	char	chipname[10];
+	u8	revision;
+};
+
+#define APDS_CALIB_SCALER		8192
+#define APDS_LUX_NEUTRAL_CALIB_VALUE	(1 * APDS_CALIB_SCALER)
+#define APDS_PROX_NEUTRAL_CALIB_VALUE	(1 * APDS_CALIB_SCALER)
+
+#define APDS_PROX_DEF_THRES		600
+#define APDS_PROX_HYSTERESIS		50
+#define APDS_LUX_DEF_THRES_HI		101
+#define APDS_LUX_DEF_THRES_LO		100
+#define APDS_DEFAULT_PROX_PERS		1
+
+#define APDS_TIMEOUT			2000
+#define APDS_STARTUP_DELAY		25000 /* us */
+#define APDS_RANGE			65535
+#define APDS_PROX_RANGE			1023
+#define APDS_LUX_GAIN_LO_LIMIT		100
+#define APDS_LUX_GAIN_LO_LIMIT_STRICT	25
+
+#define TIMESTEP			87 /* 2.7ms is about 87 / 32 */
+#define TIME_STEP_SCALER		32
+
+#define APDS_LUX_AVERAGING_TIME		50 /* tolerates 50/60Hz ripple */
+#define APDS_LUX_DEFAULT_RATE		200
+
+static const u8 again[]	= {1, 8, 16, 120}; /* ALS gain steps */
+
+/* Following two tables must match i.e 10Hz rate means 1 as persistence value */
+static const u16 arates_hz[] = {10, 5, 2, 1};
+static const u8 apersis[] = {1, 2, 4, 5};
+
+/* Regulators */
+static const char reg_vcc[] = "Vdd";
+static const char reg_vled[] = "Vled";
+
+static int apds990x_read_byte(struct apds990x_chip *chip, u8 reg, u8 *data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB;
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	*data = ret;
+	return (int)ret;
+}
+
+static int apds990x_read_word(struct apds990x_chip *chip, u8 reg, u16 *data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC;
+
+	ret = i2c_smbus_read_word_data(client, reg);
+	*data = ret;
+	return (int)ret;
+}
+
+static int apds990x_write_byte(struct apds990x_chip *chip, u8 reg, u8 data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB;
+
+	ret = i2c_smbus_write_byte_data(client, reg, data);
+	return (int)ret;
+}
+
+static int apds990x_write_word(struct apds990x_chip *chip, u8 reg, u16 data)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+
+	reg &= ~APDS990x_CMD_TYPE_MASK;
+	reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC;
+
+	ret = i2c_smbus_write_word_data(client, reg, data);
+	return (int)ret;
+}
+
+static int apds990x_mode_on(struct apds990x_chip *chip)
+{
+	/* ALS is mandatory, proximity optional */
+	u8 reg = APDS990X_EN_AIEN | APDS990X_EN_PON | APDS990X_EN_AEN |
+		APDS990X_EN_WEN;
+
+	if (chip->prox_en)
+		reg |= APDS990X_EN_PIEN | APDS990X_EN_PEN;
+
+	return apds990x_write_byte(chip, APDS990X_ENABLE, reg);
+}
+
+static u16 apds990x_lux_to_threshold(struct apds990x_chip *chip, u32 lux)
+{
+	u32 thres;
+	u32 cpl;
+	u32 ir;
+
+	if (lux == 0)
+		return 0;
+	else if (lux == APDS_RANGE)
+		return APDS_RANGE;
+
+	/*
+	 * Reported LUX value is a combination of the IR and CLEAR channel
+	 * values. However, interrupt threshold is only for clear channel.
+	 * This function approximates needed HW threshold value for a given
+	 * LUX value in the current lightning type.
+	 * IR level compared to visible light varies heavily depending on the
+	 * source of the light
+	 *
+	 * Calculate threshold value for the next measurement period.
+	 * Math: threshold = lux * cpl where
+	 * cpl = atime * again / (glass_attenuation * device_factor)
+	 * (count-per-lux)
+	 *
+	 * First remove calibration. Division by four is to avoid overflow
+	 */
+	lux = lux * (APDS_CALIB_SCALER / 4) / (chip->lux_calib / 4);
+
+	/* Multiplication by 64 is to increase accuracy */
+	cpl = ((u32)chip->atime * (u32)again[chip->again_next] *
+		APDS_PARAM_SCALE * 64) / (chip->cf.ga * chip->cf.df);
+
+	thres = lux * cpl / 64;
+	/*
+	 * Convert IR light from the latest result to match with
+	 * new gain step. This helps to adapt with the current
+	 * source of light.
+	 */
+	ir = (u32)chip->lux_ir * (u32)again[chip->again_next] /
+		(u32)again[chip->again_meas];
+
+	/*
+	 * Compensate count with IR light impact
+	 * IAC1 > IAC2 (see apds990x_get_lux for formulas)
+	 */
+	if (chip->lux_clear * APDS_PARAM_SCALE >=
+		chip->rcf.afactor * chip->lux_ir)
+		thres = (chip->rcf.cf1 * thres + chip->rcf.irf1 * ir) /
+			APDS_PARAM_SCALE;
+	else
+		thres = (chip->rcf.cf2 * thres + chip->rcf.irf2 * ir) /
+			APDS_PARAM_SCALE;
+
+	if (thres >= chip->a_max_result)
+		thres = chip->a_max_result - 1;
+	return thres;
+}
+
+static inline int apds990x_set_atime(struct apds990x_chip *chip, u32 time_ms)
+{
+	u8 reg_value;
+
+	chip->atime = time_ms;
+	/* Formula is specified in the data sheet */
+	reg_value = 256 - ((time_ms * TIME_STEP_SCALER) / TIMESTEP);
+	/* Calculate max ADC value for given integration time */
+	chip->a_max_result = (u16)(256 - reg_value) * APDS990X_TIME_TO_ADC;
+	return apds990x_write_byte(chip, APDS990X_ATIME, reg_value);
+}
+
+/* Called always with mutex locked */
+static int apds990x_refresh_pthres(struct apds990x_chip *chip, int data)
+{
+	int ret, lo, hi;
+
+	/* If the chip is not in use, don't try to access it */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	if (data < chip->prox_thres) {
+		lo = 0;
+		hi = chip->prox_thres;
+	} else {
+		lo = chip->prox_thres - APDS_PROX_HYSTERESIS;
+		if (chip->prox_continuous_mode)
+			hi = chip->prox_thres;
+		else
+			hi = APDS_RANGE;
+	}
+
+	ret = apds990x_write_word(chip, APDS990X_PILTL, lo);
+	ret |= apds990x_write_word(chip, APDS990X_PIHTL, hi);
+	return ret;
+}
+
+/* Called always with mutex locked */
+static int apds990x_refresh_athres(struct apds990x_chip *chip)
+{
+	int ret;
+	/* If the chip is not in use, don't try to access it */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	ret = apds990x_write_word(chip, APDS990X_AILTL,
+			apds990x_lux_to_threshold(chip, chip->lux_thres_lo));
+	ret |= apds990x_write_word(chip, APDS990X_AIHTL,
+			apds990x_lux_to_threshold(chip, chip->lux_thres_hi));
+
+	return ret;
+}
+
+/* Called always with mutex locked */
+static void apds990x_force_a_refresh(struct apds990x_chip *chip)
+{
+	/* This will force ALS interrupt after the next measurement. */
+	apds990x_write_word(chip, APDS990X_AILTL, APDS_LUX_DEF_THRES_LO);
+	apds990x_write_word(chip, APDS990X_AIHTL, APDS_LUX_DEF_THRES_HI);
+}
+
+/* Called always with mutex locked */
+static void apds990x_force_p_refresh(struct apds990x_chip *chip)
+{
+	/* This will force proximity interrupt after the next measurement. */
+	apds990x_write_word(chip, APDS990X_PILTL, APDS_PROX_DEF_THRES - 1);
+	apds990x_write_word(chip, APDS990X_PIHTL, APDS_PROX_DEF_THRES);
+}
+
+/* Called always with mutex locked */
+static int apds990x_calc_again(struct apds990x_chip *chip)
+{
+	int curr_again = chip->again_meas;
+	int next_again = chip->again_meas;
+	int ret = 0;
+
+	/* Calculate suitable als gain */
+	if (chip->lux_clear == chip->a_max_result)
+		next_again -= 2; /* ALS saturated. Decrease gain by 2 steps */
+	else if (chip->lux_clear > chip->a_max_result / 2)
+		next_again--;
+	else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT)
+		next_again += 2; /* Too dark. Increase gain by 2 steps */
+	else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT)
+		next_again++;
+
+	/* Limit gain to available range */
+	if (next_again < 0)
+		next_again = 0;
+	else if (next_again > APDS990X_MAX_AGAIN)
+		next_again = APDS990X_MAX_AGAIN;
+
+	/* Let's check can we trust the measured result */
+	if (chip->lux_clear == chip->a_max_result)
+		/* Result can be totally garbage due to saturation */
+		ret = -ERANGE;
+	else if (next_again != curr_again &&
+		chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT)
+		/*
+		 * Gain is changed and measurement result is very small.
+		 * Result can be totally garbage due to underflow
+		 */
+		ret = -ERANGE;
+
+	chip->again_next = next_again;
+	apds990x_write_byte(chip, APDS990X_CONTROL,
+			(chip->pdrive << 6) |
+			(chip->pdiode << 4) |
+			(chip->pgain << 2) |
+			(chip->again_next << 0));
+
+	/*
+	 * Error means bad result -> re-measurement is needed. The forced
+	 * refresh uses fastest possible persistence setting to get result
+	 * as soon as possible.
+	 */
+	if (ret < 0)
+		apds990x_force_a_refresh(chip);
+	else
+		apds990x_refresh_athres(chip);
+
+	return ret;
+}
+
+/* Called always with mutex locked */
+static int apds990x_get_lux(struct apds990x_chip *chip, int clear, int ir)
+{
+	int iac, iac1, iac2; /* IR adjusted counts */
+	u32 lpc; /* Lux per count */
+
+	/* Formulas:
+	 * iac1 = CF1 * CLEAR_CH - IRF1 * IR_CH
+	 * iac2 = CF2 * CLEAR_CH - IRF2 * IR_CH
+	 */
+	iac1 = (chip->cf.cf1 * clear - chip->cf.irf1 * ir) / APDS_PARAM_SCALE;
+	iac2 = (chip->cf.cf2 * clear - chip->cf.irf2 * ir) / APDS_PARAM_SCALE;
+
+	iac = max(iac1, iac2);
+	iac = max(iac, 0);
+
+	lpc = APDS990X_LUX_OUTPUT_SCALE * (chip->cf.df * chip->cf.ga) /
+		(u32)(again[chip->again_meas] * (u32)chip->atime);
+
+	return (iac * lpc) / APDS_PARAM_SCALE;
+}
+
+static int apds990x_ack_int(struct apds990x_chip *chip, u8 mode)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+	u8 reg = APDS990x_CMD | APDS990x_CMD_TYPE_SPE;
+
+	switch (mode & (APDS990X_ST_AINT | APDS990X_ST_PINT)) {
+	case APDS990X_ST_AINT:
+		reg |= APDS990X_INT_ACK_ALS;
+		break;
+	case APDS990X_ST_PINT:
+		reg |= APDS990X_INT_ACK_PS;
+		break;
+	default:
+		reg |= APDS990X_INT_ACK_BOTH;
+		break;
+	}
+
+	ret = i2c_smbus_read_byte_data(client, reg);
+	return (int)ret;
+}
+
+static irqreturn_t apds990x_irq(int irq, void *data)
+{
+	struct apds990x_chip *chip = data;
+	u8 status;
+
+	apds990x_read_byte(chip, APDS990X_STATUS, &status);
+	apds990x_ack_int(chip, status);
+
+	mutex_lock(&chip->mutex);
+	if (!pm_runtime_suspended(&chip->client->dev)) {
+		if (status & APDS990X_ST_AINT) {
+			apds990x_read_word(chip, APDS990X_CDATAL,
+					&chip->lux_clear);
+			apds990x_read_word(chip, APDS990X_IRDATAL,
+					&chip->lux_ir);
+			/* Store used gain for calculations */
+			chip->again_meas = chip->again_next;
+
+			chip->lux_raw = apds990x_get_lux(chip,
+							chip->lux_clear,
+							chip->lux_ir);
+
+			if (apds990x_calc_again(chip) == 0) {
+				/* Result is valid */
+				chip->lux = chip->lux_raw;
+				chip->lux_wait_fresh_res = false;
+				wake_up(&chip->wait);
+				sysfs_notify(&chip->client->dev.kobj,
+					NULL, "lux0_input");
+			}
+		}
+
+		if ((status & APDS990X_ST_PINT) && chip->prox_en) {
+			u16 clr_ch;
+
+			apds990x_read_word(chip, APDS990X_CDATAL, &clr_ch);
+			/*
+			 * If ALS channel is saturated at min gain,
+			 * proximity gives false posivite values.
+			 * Just ignore them.
+			 */
+			if (chip->again_meas == 0 &&
+				clr_ch == chip->a_max_result)
+				chip->prox_data = 0;
+			else
+				apds990x_read_word(chip,
+						APDS990X_PDATAL,
+						&chip->prox_data);
+
+			apds990x_refresh_pthres(chip, chip->prox_data);
+			if (chip->prox_data < chip->prox_thres)
+				chip->prox_data = 0;
+			else if (!chip->prox_continuous_mode)
+				chip->prox_data = APDS_PROX_RANGE;
+			sysfs_notify(&chip->client->dev.kobj,
+				NULL, "prox0_raw");
+		}
+	}
+	mutex_unlock(&chip->mutex);
+	return IRQ_HANDLED;
+}
+
+static int apds990x_configure(struct apds990x_chip *chip)
+{
+	/* It is recommended to use disabled mode during these operations */
+	apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL);
+
+	/* conversion and wait times for different state machince states */
+	apds990x_write_byte(chip, APDS990X_PTIME, APDS990X_PTIME_DEFAULT);
+	apds990x_write_byte(chip, APDS990X_WTIME, APDS990X_WTIME_DEFAULT);
+	apds990x_set_atime(chip, APDS_LUX_AVERAGING_TIME);
+
+	apds990x_write_byte(chip, APDS990X_CONFIG, 0);
+
+	/* Persistence levels */
+	apds990x_write_byte(chip, APDS990X_PERS,
+			(chip->lux_persistence << APDS990X_APERS_SHIFT) |
+			(chip->prox_persistence << APDS990X_PPERS_SHIFT));
+
+	apds990x_write_byte(chip, APDS990X_PPCOUNT, chip->pdata->ppcount);
+
+	/* Start with relatively small gain */
+	chip->again_meas = 1;
+	chip->again_next = 1;
+	apds990x_write_byte(chip, APDS990X_CONTROL,
+			(chip->pdrive << 6) |
+			(chip->pdiode << 4) |
+			(chip->pgain << 2) |
+			(chip->again_next << 0));
+	return 0;
+}
+
+static int apds990x_detect(struct apds990x_chip *chip)
+{
+	struct i2c_client *client = chip->client;
+	int ret;
+	u8 id;
+
+	ret = apds990x_read_byte(chip, APDS990X_ID, &id);
+	if (ret < 0) {
+		dev_err(&client->dev, "ID read failed\n");
+		return ret;
+	}
+
+	ret = apds990x_read_byte(chip, APDS990X_REV, &chip->revision);
+	if (ret < 0) {
+		dev_err(&client->dev, "REV read failed\n");
+		return ret;
+	}
+
+	switch (id) {
+	case APDS990X_ID_0:
+	case APDS990X_ID_4:
+	case APDS990X_ID_29:
+		snprintf(chip->chipname, sizeof(chip->chipname), "APDS-990x");
+		break;
+	default:
+		ret = -ENODEV;
+		break;
+	}
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int apds990x_chip_on(struct apds990x_chip *chip)
+{
+	int err	 = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+					chip->regs);
+	if (err < 0)
+		return err;
+
+	usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY);
+
+	/* Refresh all configs in case of regulators were off */
+	chip->prox_data = 0;
+	apds990x_configure(chip);
+	apds990x_mode_on(chip);
+	return 0;
+}
+#endif
+
+static int apds990x_chip_off(struct apds990x_chip *chip)
+{
+	apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL);
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+	return 0;
+}
+
+static ssize_t apds990x_lux_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip = dev_get_drvdata(dev);
+	ssize_t ret;
+	u32 result;
+	long timeout;
+
+	if (pm_runtime_suspended(dev))
+		return -EIO;
+
+	timeout = wait_event_interruptible_timeout(chip->wait,
+						!chip->lux_wait_fresh_res,
+						msecs_to_jiffies(APDS_TIMEOUT));
+	if (!timeout)
+		return -EIO;
+
+	mutex_lock(&chip->mutex);
+	result = (chip->lux * chip->lux_calib) / APDS_CALIB_SCALER;
+	if (result > (APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE))
+		result = APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE;
+
+	ret = sprintf(buf, "%d.%d\n",
+		result / APDS990X_LUX_OUTPUT_SCALE,
+		result % APDS990X_LUX_OUTPUT_SCALE);
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static DEVICE_ATTR(lux0_input, S_IRUGO, apds990x_lux_show, NULL);
+
+static ssize_t apds990x_lux_range_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", APDS_RANGE);
+}
+
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, apds990x_lux_range_show, NULL);
+
+static ssize_t apds990x_lux_calib_format_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", APDS_CALIB_SCALER);
+}
+
+static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO,
+		apds990x_lux_calib_format_show, NULL);
+
+static ssize_t apds990x_lux_calib_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", chip->lux_calib);
+}
+
+static ssize_t apds990x_lux_calib_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	chip->lux_calib = value;
+
+	return len;
+}
+
+static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, apds990x_lux_calib_show,
+		apds990x_lux_calib_store);
+
+static ssize_t apds990x_rate_avail(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int i;
+	int pos = 0;
+
+	for (i = 0; i < ARRAY_SIZE(arates_hz); i++)
+		pos += sprintf(buf + pos, "%d ", arates_hz[i]);
+	sprintf(buf + pos - 1, "\n");
+	return pos;
+}
+
+static ssize_t apds990x_rate_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->arate);
+}
+
+static int apds990x_set_arate(struct apds990x_chip *chip, int rate)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arates_hz); i++)
+		if (rate >= arates_hz[i])
+			break;
+
+	if (i == ARRAY_SIZE(arates_hz))
+		return -EINVAL;
+
+	/* Pick up corresponding persistence value */
+	chip->lux_persistence = apersis[i];
+	chip->arate = arates_hz[i];
+
+	/* If the chip is not in use, don't try to access it */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	/* Persistence levels */
+	return apds990x_write_byte(chip, APDS990X_PERS,
+			(chip->lux_persistence << APDS990X_APERS_SHIFT) |
+			(chip->prox_persistence << APDS990X_PPERS_SHIFT));
+}
+
+static ssize_t apds990x_rate_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	ret = apds990x_set_arate(chip, value);
+	mutex_unlock(&chip->mutex);
+
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, apds990x_rate_avail, NULL);
+
+static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, apds990x_rate_show,
+						 apds990x_rate_store);
+
+static ssize_t apds990x_prox_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	ssize_t ret;
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	if (pm_runtime_suspended(dev) || !chip->prox_en)
+		return -EIO;
+
+	mutex_lock(&chip->mutex);
+	ret = sprintf(buf, "%d\n", chip->prox_data);
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static DEVICE_ATTR(prox0_raw, S_IRUGO, apds990x_prox_show, NULL);
+
+static ssize_t apds990x_prox_range_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", APDS_PROX_RANGE);
+}
+
+static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, apds990x_prox_range_show, NULL);
+
+static ssize_t apds990x_prox_enable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->prox_en);
+}
+
+static ssize_t apds990x_prox_enable_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+
+	if (!chip->prox_en)
+		chip->prox_data = 0;
+
+	if (value)
+		chip->prox_en++;
+	else if (chip->prox_en > 0)
+		chip->prox_en--;
+
+	if (!pm_runtime_suspended(dev))
+		apds990x_mode_on(chip);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, apds990x_prox_enable_show,
+						   apds990x_prox_enable_store);
+
+static const char *reporting_modes[] = {"trigger", "periodic"};
+
+static ssize_t apds990x_prox_reporting_mode_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n",
+		reporting_modes[!!chip->prox_continuous_mode]);
+}
+
+static ssize_t apds990x_prox_reporting_mode_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	int ret;
+
+	ret = sysfs_match_string(reporting_modes, buf);
+	if (ret < 0)
+		return ret;
+
+	chip->prox_continuous_mode = ret;
+	return len;
+}
+
+static DEVICE_ATTR(prox0_reporting_mode, S_IRUGO | S_IWUSR,
+		apds990x_prox_reporting_mode_show,
+		apds990x_prox_reporting_mode_store);
+
+static ssize_t apds990x_prox_reporting_avail_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s %s\n", reporting_modes[0], reporting_modes[1]);
+}
+
+static DEVICE_ATTR(prox0_reporting_mode_avail, S_IRUGO | S_IWUSR,
+		apds990x_prox_reporting_avail_show, NULL);
+
+
+static ssize_t apds990x_lux_thresh_above_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->lux_thres_hi);
+}
+
+static ssize_t apds990x_lux_thresh_below_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->lux_thres_lo);
+}
+
+static ssize_t apds990x_set_lux_thresh(struct apds990x_chip *chip, u32 *target,
+				const char *buf)
+{
+	unsigned long thresh;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &thresh);
+	if (ret)
+		return ret;
+
+	if (thresh > APDS_RANGE)
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	*target = thresh;
+	/*
+	 * Don't update values in HW if we are still waiting for
+	 * first interrupt to come after device handle open call.
+	 */
+	if (!chip->lux_wait_fresh_res)
+		apds990x_refresh_athres(chip);
+	mutex_unlock(&chip->mutex);
+	return ret;
+
+}
+
+static ssize_t apds990x_lux_thresh_above_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_hi, buf);
+
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static ssize_t apds990x_lux_thresh_below_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_lo, buf);
+
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR,
+		apds990x_lux_thresh_above_show,
+		apds990x_lux_thresh_above_store);
+
+static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR,
+		apds990x_lux_thresh_below_show,
+		apds990x_lux_thresh_below_store);
+
+static ssize_t apds990x_prox_threshold_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", chip->prox_thres);
+}
+
+static ssize_t apds990x_prox_threshold_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	if ((value > APDS_RANGE) || (value == 0) ||
+		(value < APDS_PROX_HYSTERESIS))
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_thres = value;
+
+	apds990x_force_p_refresh(chip);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static DEVICE_ATTR(prox0_thresh_above_value, S_IRUGO | S_IWUSR,
+		apds990x_prox_threshold_show,
+		apds990x_prox_threshold_store);
+
+static ssize_t apds990x_power_state_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", !pm_runtime_suspended(dev));
+	return 0;
+}
+
+static ssize_t apds990x_power_state_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	if (value) {
+		pm_runtime_get_sync(dev);
+		mutex_lock(&chip->mutex);
+		chip->lux_wait_fresh_res = true;
+		apds990x_force_a_refresh(chip);
+		apds990x_force_p_refresh(chip);
+		mutex_unlock(&chip->mutex);
+	} else {
+		if (!pm_runtime_suspended(dev))
+			pm_runtime_put(dev);
+	}
+	return len;
+}
+
+static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR,
+		apds990x_power_state_show,
+		apds990x_power_state_store);
+
+static ssize_t apds990x_chip_id_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct apds990x_chip *chip =  dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s %d\n", chip->chipname, chip->revision);
+}
+
+static DEVICE_ATTR(chip_id, S_IRUGO, apds990x_chip_id_show, NULL);
+
+static struct attribute *sysfs_attrs_ctrl[] = {
+	&dev_attr_lux0_calibscale.attr,
+	&dev_attr_lux0_calibscale_default.attr,
+	&dev_attr_lux0_input.attr,
+	&dev_attr_lux0_sensor_range.attr,
+	&dev_attr_lux0_rate.attr,
+	&dev_attr_lux0_rate_avail.attr,
+	&dev_attr_lux0_thresh_above_value.attr,
+	&dev_attr_lux0_thresh_below_value.attr,
+	&dev_attr_prox0_raw_en.attr,
+	&dev_attr_prox0_raw.attr,
+	&dev_attr_prox0_sensor_range.attr,
+	&dev_attr_prox0_thresh_above_value.attr,
+	&dev_attr_prox0_reporting_mode.attr,
+	&dev_attr_prox0_reporting_mode_avail.attr,
+	&dev_attr_chip_id.attr,
+	&dev_attr_power_state.attr,
+	NULL
+};
+
+static const struct attribute_group apds990x_attribute_group[] = {
+	{.attrs = sysfs_attrs_ctrl },
+};
+
+static int apds990x_probe(struct i2c_client *client)
+{
+	struct apds990x_chip *chip;
+	int err;
+
+	chip = kzalloc(sizeof *chip, GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, chip);
+	chip->client  = client;
+
+	init_waitqueue_head(&chip->wait);
+	mutex_init(&chip->mutex);
+	chip->pdata	= client->dev.platform_data;
+
+	if (chip->pdata == NULL) {
+		dev_err(&client->dev, "platform data is mandatory\n");
+		err = -EINVAL;
+		goto fail1;
+	}
+
+	if (chip->pdata->cf.ga == 0) {
+		/* set uncovered sensor default parameters */
+		chip->cf.ga = 1966; /* 0.48 * APDS_PARAM_SCALE */
+		chip->cf.cf1 = 4096; /* 1.00 * APDS_PARAM_SCALE */
+		chip->cf.irf1 = 9134; /* 2.23 * APDS_PARAM_SCALE */
+		chip->cf.cf2 = 2867; /* 0.70 * APDS_PARAM_SCALE */
+		chip->cf.irf2 = 5816; /* 1.42 * APDS_PARAM_SCALE */
+		chip->cf.df = 52;
+	} else {
+		chip->cf = chip->pdata->cf;
+	}
+
+	/* precalculate inverse chip factors for threshold control */
+	chip->rcf.afactor =
+		(chip->cf.irf1 - chip->cf.irf2) * APDS_PARAM_SCALE /
+		(chip->cf.cf1 - chip->cf.cf2);
+	chip->rcf.cf1 = APDS_PARAM_SCALE * APDS_PARAM_SCALE /
+		chip->cf.cf1;
+	chip->rcf.irf1 = chip->cf.irf1 * APDS_PARAM_SCALE /
+		chip->cf.cf1;
+	chip->rcf.cf2 = APDS_PARAM_SCALE * APDS_PARAM_SCALE /
+		chip->cf.cf2;
+	chip->rcf.irf2 = chip->cf.irf2 * APDS_PARAM_SCALE /
+		chip->cf.cf2;
+
+	/* Set something to start with */
+	chip->lux_thres_hi = APDS_LUX_DEF_THRES_HI;
+	chip->lux_thres_lo = APDS_LUX_DEF_THRES_LO;
+	chip->lux_calib = APDS_LUX_NEUTRAL_CALIB_VALUE;
+
+	chip->prox_thres = APDS_PROX_DEF_THRES;
+	chip->pdrive = chip->pdata->pdrive;
+	chip->pdiode = APDS_PDIODE_IR;
+	chip->pgain = APDS_PGAIN_1X;
+	chip->prox_calib = APDS_PROX_NEUTRAL_CALIB_VALUE;
+	chip->prox_persistence = APDS_DEFAULT_PROX_PERS;
+	chip->prox_continuous_mode = false;
+
+	chip->regs[0].supply = reg_vcc;
+	chip->regs[1].supply = reg_vled;
+
+	err = regulator_bulk_get(&client->dev,
+				 ARRAY_SIZE(chip->regs), chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot get regulators\n");
+		goto fail1;
+	}
+
+	err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot enable regulators\n");
+		goto fail2;
+	}
+
+	usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY);
+
+	err = apds990x_detect(chip);
+	if (err < 0) {
+		dev_err(&client->dev, "APDS990X not found\n");
+		goto fail3;
+	}
+
+	pm_runtime_set_active(&client->dev);
+
+	apds990x_configure(chip);
+	apds990x_set_arate(chip, APDS_LUX_DEFAULT_RATE);
+	apds990x_mode_on(chip);
+
+	pm_runtime_enable(&client->dev);
+
+	if (chip->pdata->setup_resources) {
+		err = chip->pdata->setup_resources();
+		if (err) {
+			err = -EINVAL;
+			goto fail3;
+		}
+	}
+
+	err = sysfs_create_group(&chip->client->dev.kobj,
+				apds990x_attribute_group);
+	if (err < 0) {
+		dev_err(&chip->client->dev, "Sysfs registration failed\n");
+		goto fail4;
+	}
+
+	err = request_threaded_irq(client->irq, NULL,
+				apds990x_irq,
+				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW |
+				IRQF_ONESHOT,
+				"apds990x", chip);
+	if (err) {
+		dev_err(&client->dev, "could not get IRQ %d\n",
+			client->irq);
+		goto fail5;
+	}
+	return err;
+fail5:
+	sysfs_remove_group(&chip->client->dev.kobj,
+			&apds990x_attribute_group[0]);
+fail4:
+	if (chip->pdata && chip->pdata->release_resources)
+		chip->pdata->release_resources();
+fail3:
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+fail2:
+	regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs);
+fail1:
+	kfree(chip);
+	return err;
+}
+
+static void apds990x_remove(struct i2c_client *client)
+{
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	free_irq(client->irq, chip);
+	sysfs_remove_group(&chip->client->dev.kobj,
+			apds990x_attribute_group);
+
+	if (chip->pdata && chip->pdata->release_resources)
+		chip->pdata->release_resources();
+
+	if (!pm_runtime_suspended(&client->dev))
+		apds990x_chip_off(chip);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+
+	regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs);
+
+	kfree(chip);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int apds990x_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	apds990x_chip_off(chip);
+	return 0;
+}
+
+static int apds990x_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	/*
+	 * If we were enabled at suspend time, it is expected
+	 * everything works nice and smoothly. Chip_on is enough
+	 */
+	apds990x_chip_on(chip);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int apds990x_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	apds990x_chip_off(chip);
+	return 0;
+}
+
+static int apds990x_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct apds990x_chip *chip = i2c_get_clientdata(client);
+
+	apds990x_chip_on(chip);
+	return 0;
+}
+
+#endif
+
+static const struct i2c_device_id apds990x_id[] = {
+	{"apds990x", 0 },
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, apds990x_id);
+
+static const struct dev_pm_ops apds990x_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(apds990x_suspend, apds990x_resume)
+	SET_RUNTIME_PM_OPS(apds990x_runtime_suspend,
+			apds990x_runtime_resume,
+			NULL)
+};
+
+static struct i2c_driver apds990x_driver = {
+	.driver	  = {
+		.name	= "apds990x",
+		.pm	= &apds990x_pm_ops,
+	},
+	.probe    = apds990x_probe,
+	.remove	  = apds990x_remove,
+	.id_table = apds990x_id,
+};
+
+module_i2c_driver(apds990x_driver);
+
+MODULE_DESCRIPTION("APDS990X combined ALS and proximity sensor");
+MODULE_AUTHOR("Samu Onkalo, Nokia Corporation");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
new file mode 100644
index 0000000000..ee590c4a15
--- /dev/null
+++ b/drivers/misc/atmel-ssc.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Atmel SSC driver
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ */
+
+#include <linux/platform_device.h>
+#include <linux/list.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/atmel-ssc.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <linux/of.h>
+
+#include "../../sound/soc/atmel/atmel_ssc_dai.h"
+
+/* Serialize access to ssc_list and user count */
+static DEFINE_MUTEX(user_lock);
+static LIST_HEAD(ssc_list);
+
+struct ssc_device *ssc_request(unsigned int ssc_num)
+{
+	int ssc_valid = 0;
+	struct ssc_device *ssc;
+
+	mutex_lock(&user_lock);
+	list_for_each_entry(ssc, &ssc_list, list) {
+		if (ssc->pdev->dev.of_node) {
+			if (of_alias_get_id(ssc->pdev->dev.of_node, "ssc")
+				== ssc_num) {
+				ssc->pdev->id = ssc_num;
+				ssc_valid = 1;
+				break;
+			}
+		} else if (ssc->pdev->id == ssc_num) {
+			ssc_valid = 1;
+			break;
+		}
+	}
+
+	if (!ssc_valid) {
+		mutex_unlock(&user_lock);
+		pr_err("ssc: ssc%d platform device is missing\n", ssc_num);
+		return ERR_PTR(-ENODEV);
+	}
+
+	if (ssc->user) {
+		mutex_unlock(&user_lock);
+		dev_dbg(&ssc->pdev->dev, "module busy\n");
+		return ERR_PTR(-EBUSY);
+	}
+	ssc->user++;
+	mutex_unlock(&user_lock);
+
+	clk_prepare(ssc->clk);
+
+	return ssc;
+}
+EXPORT_SYMBOL(ssc_request);
+
+void ssc_free(struct ssc_device *ssc)
+{
+	bool disable_clk = true;
+
+	mutex_lock(&user_lock);
+	if (ssc->user)
+		ssc->user--;
+	else {
+		disable_clk = false;
+		dev_dbg(&ssc->pdev->dev, "device already free\n");
+	}
+	mutex_unlock(&user_lock);
+
+	if (disable_clk)
+		clk_unprepare(ssc->clk);
+}
+EXPORT_SYMBOL(ssc_free);
+
+static struct atmel_ssc_platform_data at91rm9200_config = {
+	.use_dma = 0,
+	.has_fslen_ext = 0,
+};
+
+static struct atmel_ssc_platform_data at91sam9rl_config = {
+	.use_dma = 0,
+	.has_fslen_ext = 1,
+};
+
+static struct atmel_ssc_platform_data at91sam9g45_config = {
+	.use_dma = 1,
+	.has_fslen_ext = 1,
+};
+
+static const struct platform_device_id atmel_ssc_devtypes[] = {
+	{
+		.name = "at91rm9200_ssc",
+		.driver_data = (unsigned long) &at91rm9200_config,
+	}, {
+		.name = "at91sam9rl_ssc",
+		.driver_data = (unsigned long) &at91sam9rl_config,
+	}, {
+		.name = "at91sam9g45_ssc",
+		.driver_data = (unsigned long) &at91sam9g45_config,
+	}, {
+		/* sentinel */
+	}
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id atmel_ssc_dt_ids[] = {
+	{
+		.compatible = "atmel,at91rm9200-ssc",
+		.data = &at91rm9200_config,
+	}, {
+		.compatible = "atmel,at91sam9rl-ssc",
+		.data = &at91sam9rl_config,
+	}, {
+		.compatible = "atmel,at91sam9g45-ssc",
+		.data = &at91sam9g45_config,
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, atmel_ssc_dt_ids);
+#endif
+
+static inline const struct atmel_ssc_platform_data *
+	atmel_ssc_get_driver_data(struct platform_device *pdev)
+{
+	if (pdev->dev.of_node) {
+		const struct of_device_id *match;
+		match = of_match_node(atmel_ssc_dt_ids, pdev->dev.of_node);
+		if (match == NULL)
+			return NULL;
+		return match->data;
+	}
+
+	return (struct atmel_ssc_platform_data *)
+		platform_get_device_id(pdev)->driver_data;
+}
+
+#ifdef CONFIG_SND_ATMEL_SOC_SSC
+static int ssc_sound_dai_probe(struct ssc_device *ssc)
+{
+	struct device_node *np = ssc->pdev->dev.of_node;
+	int ret;
+	int id;
+
+	ssc->sound_dai = false;
+
+	if (!of_property_read_bool(np, "#sound-dai-cells"))
+		return 0;
+
+	id = of_alias_get_id(np, "ssc");
+	if (id < 0)
+		return id;
+
+	ret = atmel_ssc_set_audio(id);
+	ssc->sound_dai = !ret;
+
+	return ret;
+}
+
+static void ssc_sound_dai_remove(struct ssc_device *ssc)
+{
+	if (!ssc->sound_dai)
+		return;
+
+	atmel_ssc_put_audio(of_alias_get_id(ssc->pdev->dev.of_node, "ssc"));
+}
+#else
+static inline int ssc_sound_dai_probe(struct ssc_device *ssc)
+{
+	if (of_property_read_bool(ssc->pdev->dev.of_node, "#sound-dai-cells"))
+		return -ENOTSUPP;
+
+	return 0;
+}
+
+static inline void ssc_sound_dai_remove(struct ssc_device *ssc)
+{
+}
+#endif
+
+static int ssc_probe(struct platform_device *pdev)
+{
+	struct resource *regs;
+	struct ssc_device *ssc;
+	const struct atmel_ssc_platform_data *plat_dat;
+
+	ssc = devm_kzalloc(&pdev->dev, sizeof(struct ssc_device), GFP_KERNEL);
+	if (!ssc) {
+		dev_dbg(&pdev->dev, "out of memory\n");
+		return -ENOMEM;
+	}
+
+	ssc->pdev = pdev;
+
+	plat_dat = atmel_ssc_get_driver_data(pdev);
+	if (!plat_dat)
+		return -ENODEV;
+	ssc->pdata = (struct atmel_ssc_platform_data *)plat_dat;
+
+	if (pdev->dev.of_node) {
+		struct device_node *np = pdev->dev.of_node;
+		ssc->clk_from_rk_pin =
+			of_property_read_bool(np, "atmel,clk-from-rk-pin");
+	}
+
+	ssc->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &regs);
+	if (IS_ERR(ssc->regs))
+		return PTR_ERR(ssc->regs);
+
+	ssc->phybase = regs->start;
+
+	ssc->clk = devm_clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(ssc->clk)) {
+		dev_dbg(&pdev->dev, "no pclk clock defined\n");
+		return -ENXIO;
+	}
+
+	/* disable all interrupts */
+	clk_prepare_enable(ssc->clk);
+	ssc_writel(ssc->regs, IDR, -1);
+	ssc_readl(ssc->regs, SR);
+	clk_disable_unprepare(ssc->clk);
+
+	ssc->irq = platform_get_irq(pdev, 0);
+	if (ssc->irq < 0) {
+		dev_dbg(&pdev->dev, "could not get irq\n");
+		return ssc->irq;
+	}
+
+	mutex_lock(&user_lock);
+	list_add_tail(&ssc->list, &ssc_list);
+	mutex_unlock(&user_lock);
+
+	platform_set_drvdata(pdev, ssc);
+
+	dev_info(&pdev->dev, "Atmel SSC device at 0x%p (irq %d)\n",
+			ssc->regs, ssc->irq);
+
+	if (ssc_sound_dai_probe(ssc))
+		dev_err(&pdev->dev, "failed to auto-setup ssc for audio\n");
+
+	return 0;
+}
+
+static int ssc_remove(struct platform_device *pdev)
+{
+	struct ssc_device *ssc = platform_get_drvdata(pdev);
+
+	ssc_sound_dai_remove(ssc);
+
+	mutex_lock(&user_lock);
+	list_del(&ssc->list);
+	mutex_unlock(&user_lock);
+
+	return 0;
+}
+
+static struct platform_driver ssc_driver = {
+	.driver		= {
+		.name		= "ssc",
+		.of_match_table	= of_match_ptr(atmel_ssc_dt_ids),
+	},
+	.id_table	= atmel_ssc_devtypes,
+	.probe		= ssc_probe,
+	.remove		= ssc_remove,
+};
+module_platform_driver(ssc_driver);
+
+MODULE_AUTHOR("Hans-Christian Noren Egtvedt <egtvedt@samfundet.no>");
+MODULE_DESCRIPTION("SSC driver for Atmel AT91");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:ssc");
diff --git a/drivers/misc/bcm-vk/Kconfig b/drivers/misc/bcm-vk/Kconfig
new file mode 100644
index 0000000000..68a972772b
--- /dev/null
+++ b/drivers/misc/bcm-vk/Kconfig
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Broadcom VK device
+#
+config BCM_VK
+	tristate "Support for Broadcom VK Accelerators"
+	depends on PCI_MSI
+	help
+	  Select this option to enable support for Broadcom
+	  VK Accelerators.  VK is used for performing
+	  multiple specific offload processing tasks in parallel.
+	  Such offload tasks assist in such operations as video
+	  transcoding, compression, and crypto tasks.
+	  This driver enables userspace programs to access these
+	  accelerators via /dev/bcm-vk.N devices.
+
+	  If unsure, say N.
+
+config BCM_VK_TTY
+	bool "Enable tty ports on a Broadcom VK Accelerator device"
+	depends on TTY
+	depends on BCM_VK
+	help
+	  Select this option to enable tty support to allow console
+	  access to Broadcom VK Accelerator cards from host.
+
+	  Device node will in the form /dev/bcm-vk.x_ttyVKy where:
+	  x is the instance of the VK card
+	  y is the tty device number on the VK card.
diff --git a/drivers/misc/bcm-vk/Makefile b/drivers/misc/bcm-vk/Makefile
new file mode 100644
index 0000000000..1df2ebe851
--- /dev/null
+++ b/drivers/misc/bcm-vk/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Broadcom VK driver
+#
+
+obj-$(CONFIG_BCM_VK) += bcm_vk.o
+bcm_vk-objs := \
+	bcm_vk_dev.o \
+	bcm_vk_msg.o \
+	bcm_vk_sg.o
+
+bcm_vk-$(CONFIG_BCM_VK_TTY) += bcm_vk_tty.o
diff --git a/drivers/misc/bcm-vk/bcm_vk.h b/drivers/misc/bcm-vk/bcm_vk.h
new file mode 100644
index 0000000000..386884c2a2
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk.h
@@ -0,0 +1,549 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef BCM_VK_H
+#define BCM_VK_H
+
+#include <linux/atomic.h>
+#include <linux/firmware.h>
+#include <linux/irq.h>
+#include <linux/kref.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+#include <linux/sched/signal.h>
+#include <linux/tty.h>
+#include <linux/uaccess.h>
+#include <uapi/linux/misc/bcm_vk.h>
+
+#include "bcm_vk_msg.h"
+
+#define DRV_MODULE_NAME		"bcm-vk"
+
+/*
+ * Load Image is completed in two stages:
+ *
+ * 1) When the VK device boot-up, M7 CPU runs and executes the BootROM.
+ * The Secure Boot Loader (SBL) as part of the BootROM will run
+ * to open up ITCM for host to push BOOT1 image.
+ * SBL will authenticate the image before jumping to BOOT1 image.
+ *
+ * 2) Because BOOT1 image is a secured image, we also called it the
+ * Secure Boot Image (SBI). At second stage, SBI will initialize DDR
+ * and wait for host to push BOOT2 image to DDR.
+ * SBI will authenticate the image before jumping to BOOT2 image.
+ *
+ */
+/* Location of registers of interest in BAR0 */
+
+/* Request register for Secure Boot Loader (SBL) download */
+#define BAR_CODEPUSH_SBL		0x400
+/* Start of ITCM */
+#define CODEPUSH_BOOT1_ENTRY		0x00400000
+#define CODEPUSH_MASK		        0xfffff000
+#define CODEPUSH_BOOTSTART		BIT(0)
+
+/* Boot Status register */
+#define BAR_BOOT_STATUS			0x404
+
+#define SRAM_OPEN			BIT(16)
+#define DDR_OPEN			BIT(17)
+
+/* Firmware loader progress status definitions */
+#define FW_LOADER_ACK_SEND_MORE_DATA	BIT(18)
+#define FW_LOADER_ACK_IN_PROGRESS	BIT(19)
+#define FW_LOADER_ACK_RCVD_ALL_DATA	BIT(20)
+
+/* Boot1/2 is running in standalone mode */
+#define BOOT_STDALONE_RUNNING		BIT(21)
+
+/* definitions for boot status register */
+#define BOOT_STATE_MASK			(0xffffffff & \
+					 ~(FW_LOADER_ACK_SEND_MORE_DATA | \
+					   FW_LOADER_ACK_IN_PROGRESS | \
+					   BOOT_STDALONE_RUNNING))
+
+#define BOOT_ERR_SHIFT			4
+#define BOOT_ERR_MASK			(0xf << BOOT_ERR_SHIFT)
+#define BOOT_PROG_MASK			0xf
+
+#define BROM_STATUS_NOT_RUN		0x2
+#define BROM_NOT_RUN			(SRAM_OPEN | BROM_STATUS_NOT_RUN)
+#define BROM_STATUS_COMPLETE		0x6
+#define BROM_RUNNING			(SRAM_OPEN | BROM_STATUS_COMPLETE)
+#define BOOT1_STATUS_COMPLETE		0x6
+#define BOOT1_RUNNING			(DDR_OPEN | BOOT1_STATUS_COMPLETE)
+#define BOOT2_STATUS_COMPLETE		0x6
+#define BOOT2_RUNNING			(FW_LOADER_ACK_RCVD_ALL_DATA | \
+					 BOOT2_STATUS_COMPLETE)
+
+/* Boot request for Secure Boot Image (SBI) */
+#define BAR_CODEPUSH_SBI		0x408
+/* 64M mapped to BAR2 */
+#define CODEPUSH_BOOT2_ENTRY		0x60000000
+
+#define BAR_CARD_STATUS			0x410
+/* CARD_STATUS definitions */
+#define CARD_STATUS_TTYVK0_READY	BIT(0)
+#define CARD_STATUS_TTYVK1_READY	BIT(1)
+
+#define BAR_BOOT1_STDALONE_PROGRESS	0x420
+#define BOOT1_STDALONE_SUCCESS		(BIT(13) | BIT(14))
+#define BOOT1_STDALONE_PROGRESS_MASK	BOOT1_STDALONE_SUCCESS
+
+#define BAR_METADATA_VERSION		0x440
+#define BAR_OS_UPTIME			0x444
+#define BAR_CHIP_ID			0x448
+#define MAJOR_SOC_REV(_chip_id)		(((_chip_id) >> 20) & 0xf)
+
+#define BAR_CARD_TEMPERATURE		0x45c
+/* defines for all temperature sensor */
+#define BCM_VK_TEMP_FIELD_MASK		0xff
+#define BCM_VK_CPU_TEMP_SHIFT		0
+#define BCM_VK_DDR0_TEMP_SHIFT		8
+#define BCM_VK_DDR1_TEMP_SHIFT		16
+
+#define BAR_CARD_VOLTAGE		0x460
+/* defines for voltage rail conversion */
+#define BCM_VK_VOLT_RAIL_MASK		0xffff
+#define BCM_VK_3P3_VOLT_REG_SHIFT	16
+
+#define BAR_CARD_ERR_LOG		0x464
+/* Error log register bit definition - register for error alerts */
+#define ERR_LOG_UECC			BIT(0)
+#define ERR_LOG_SSIM_BUSY		BIT(1)
+#define ERR_LOG_AFBC_BUSY		BIT(2)
+#define ERR_LOG_HIGH_TEMP_ERR		BIT(3)
+#define ERR_LOG_WDOG_TIMEOUT		BIT(4)
+#define ERR_LOG_SYS_FAULT		BIT(5)
+#define ERR_LOG_RAMDUMP			BIT(6)
+#define ERR_LOG_COP_WDOG_TIMEOUT	BIT(7)
+/* warnings */
+#define ERR_LOG_MEM_ALLOC_FAIL		BIT(8)
+#define ERR_LOG_LOW_TEMP_WARN		BIT(9)
+#define ERR_LOG_ECC			BIT(10)
+#define ERR_LOG_IPC_DWN			BIT(11)
+
+/* Alert bit definitions detectd on host */
+#define ERR_LOG_HOST_INTF_V_FAIL	BIT(13)
+#define ERR_LOG_HOST_HB_FAIL		BIT(14)
+#define ERR_LOG_HOST_PCIE_DWN		BIT(15)
+
+#define BAR_CARD_ERR_MEM		0x468
+/* defines for mem err, all fields have same width */
+#define BCM_VK_MEM_ERR_FIELD_MASK	0xff
+#define BCM_VK_ECC_MEM_ERR_SHIFT	0
+#define BCM_VK_UECC_MEM_ERR_SHIFT	8
+/* threshold of event occurrence and logs start to come out */
+#define BCM_VK_ECC_THRESHOLD		10
+#define BCM_VK_UECC_THRESHOLD		1
+
+#define BAR_CARD_PWR_AND_THRE		0x46c
+/* defines for power and temp threshold, all fields have same width */
+#define BCM_VK_PWR_AND_THRE_FIELD_MASK	0xff
+#define BCM_VK_LOW_TEMP_THRE_SHIFT	0
+#define BCM_VK_HIGH_TEMP_THRE_SHIFT	8
+#define BCM_VK_PWR_STATE_SHIFT		16
+
+#define BAR_CARD_STATIC_INFO		0x470
+
+#define BAR_INTF_VER			0x47c
+#define BAR_INTF_VER_MAJOR_SHIFT	16
+#define BAR_INTF_VER_MASK		0xffff
+/*
+ * major and minor semantic version numbers supported
+ * Please update as required on interface changes
+ */
+#define SEMANTIC_MAJOR			1
+#define SEMANTIC_MINOR			0
+
+/*
+ * first door bell reg, ie for queue = 0.  Only need the first one, as
+ * we will use the queue number to derive the others
+ */
+#define VK_BAR0_REGSEG_DB_BASE		0x484
+#define VK_BAR0_REGSEG_DB_REG_GAP	8 /*
+					   * DB register gap,
+					   * DB1 at 0x48c and DB2 at 0x494
+					   */
+
+/* reset register and specific values */
+#define VK_BAR0_RESET_DB_NUM		3
+#define VK_BAR0_RESET_DB_SOFT		0xffffffff
+#define VK_BAR0_RESET_DB_HARD		0xfffffffd
+#define VK_BAR0_RESET_RAMPDUMP		0xa0000000
+
+#define VK_BAR0_Q_DB_BASE(q_num)	(VK_BAR0_REGSEG_DB_BASE + \
+					 ((q_num) * VK_BAR0_REGSEG_DB_REG_GAP))
+#define VK_BAR0_RESET_DB_BASE		(VK_BAR0_REGSEG_DB_BASE + \
+					 (VK_BAR0_RESET_DB_NUM * VK_BAR0_REGSEG_DB_REG_GAP))
+
+#define BAR_BOOTSRC_SELECT		0xc78
+/* BOOTSRC definitions */
+#define BOOTSRC_SOFT_ENABLE		BIT(14)
+
+/* Card OS Firmware version size */
+#define BAR_FIRMWARE_TAG_SIZE		50
+#define FIRMWARE_STATUS_PRE_INIT_DONE	0x1f
+
+/* VK MSG_ID defines */
+#define VK_MSG_ID_BITMAP_SIZE		4096
+#define VK_MSG_ID_BITMAP_MASK		(VK_MSG_ID_BITMAP_SIZE - 1)
+#define VK_MSG_ID_OVERFLOW		0xffff
+
+/*
+ * BAR1
+ */
+
+/* BAR1 message q definition */
+
+/* indicate if msgq ctrl in BAR1 is populated */
+#define VK_BAR1_MSGQ_DEF_RDY		0x60c0
+/* ready marker value for the above location, normal boot2 */
+#define VK_BAR1_MSGQ_RDY_MARKER		0xbeefcafe
+/* ready marker value for the above location, normal boot2 */
+#define VK_BAR1_DIAG_RDY_MARKER		0xdeadcafe
+/* number of msgqs in BAR1 */
+#define VK_BAR1_MSGQ_NR			0x60c4
+/* BAR1 queue control structure offset */
+#define VK_BAR1_MSGQ_CTRL_OFF		0x60c8
+
+/* BAR1 ucode and boot1 version tag */
+#define VK_BAR1_UCODE_VER_TAG		0x6170
+#define VK_BAR1_BOOT1_VER_TAG		0x61b0
+#define VK_BAR1_VER_TAG_SIZE		64
+
+/* Memory to hold the DMA buffer memory address allocated for boot2 download */
+#define VK_BAR1_DMA_BUF_OFF_HI		0x61e0
+#define VK_BAR1_DMA_BUF_OFF_LO		(VK_BAR1_DMA_BUF_OFF_HI + 4)
+#define VK_BAR1_DMA_BUF_SZ		(VK_BAR1_DMA_BUF_OFF_HI + 8)
+
+/* Scratch memory allocated on host for VK */
+#define VK_BAR1_SCRATCH_OFF_HI		0x61f0
+#define VK_BAR1_SCRATCH_OFF_LO		(VK_BAR1_SCRATCH_OFF_HI + 4)
+#define VK_BAR1_SCRATCH_SZ_ADDR		(VK_BAR1_SCRATCH_OFF_HI + 8)
+#define VK_BAR1_SCRATCH_DEF_NR_PAGES	32
+
+/* BAR1 DAUTH info */
+#define VK_BAR1_DAUTH_BASE_ADDR		0x6200
+#define VK_BAR1_DAUTH_STORE_SIZE	0x48
+#define VK_BAR1_DAUTH_VALID_SIZE	0x8
+#define VK_BAR1_DAUTH_MAX		4
+#define VK_BAR1_DAUTH_STORE_ADDR(x) \
+		(VK_BAR1_DAUTH_BASE_ADDR + \
+		 (x) * (VK_BAR1_DAUTH_STORE_SIZE + VK_BAR1_DAUTH_VALID_SIZE))
+#define VK_BAR1_DAUTH_VALID_ADDR(x) \
+		(VK_BAR1_DAUTH_STORE_ADDR(x) + VK_BAR1_DAUTH_STORE_SIZE)
+
+/* BAR1 SOTP AUTH and REVID info */
+#define VK_BAR1_SOTP_REVID_BASE_ADDR	0x6340
+#define VK_BAR1_SOTP_REVID_SIZE		0x10
+#define VK_BAR1_SOTP_REVID_MAX		2
+#define VK_BAR1_SOTP_REVID_ADDR(x) \
+		(VK_BAR1_SOTP_REVID_BASE_ADDR + (x) * VK_BAR1_SOTP_REVID_SIZE)
+
+/* VK device supports a maximum of 3 bars */
+#define MAX_BAR	3
+
+/* default number of msg blk for inband SGL */
+#define BCM_VK_DEF_IB_SGL_BLK_LEN	 16
+#define BCM_VK_IB_SGL_BLK_MAX		 24
+
+enum pci_barno {
+	BAR_0 = 0,
+	BAR_1,
+	BAR_2
+};
+
+#ifdef CONFIG_BCM_VK_TTY
+#define BCM_VK_NUM_TTY 2
+#else
+#define BCM_VK_NUM_TTY 0
+#endif
+
+struct bcm_vk_tty {
+	struct tty_port port;
+	u32 to_offset;	/* bar offset to use */
+	u32 to_size;	/* to VK buffer size */
+	u32 wr;		/* write offset shadow */
+	u32 from_offset;	/* bar offset to use */
+	u32 from_size;	/* from VK buffer size */
+	u32 rd;		/* read offset shadow */
+	pid_t pid;
+	bool irq_enabled;
+	bool is_opened;		/* tracks tty open/close */
+};
+
+/* VK device max power state, supports 3, full, reduced and low */
+#define MAX_OPP 3
+#define MAX_CARD_INFO_TAG_SIZE 64
+
+struct bcm_vk_card_info {
+	u32 version;
+	char os_tag[MAX_CARD_INFO_TAG_SIZE];
+	char cmpt_tag[MAX_CARD_INFO_TAG_SIZE];
+	u32 cpu_freq_mhz;
+	u32 cpu_scale[MAX_OPP];
+	u32 ddr_freq_mhz;
+	u32 ddr_size_MB;
+	u32 video_core_freq_mhz;
+};
+
+/* DAUTH related info */
+struct bcm_vk_dauth_key {
+	char store[VK_BAR1_DAUTH_STORE_SIZE];
+	char valid[VK_BAR1_DAUTH_VALID_SIZE];
+};
+
+struct bcm_vk_dauth_info {
+	struct bcm_vk_dauth_key keys[VK_BAR1_DAUTH_MAX];
+};
+
+/*
+ * Control structure of logging messages from the card.  This
+ * buffer is for logmsg that comes from vk
+ */
+struct bcm_vk_peer_log {
+	u32 rd_idx;
+	u32 wr_idx;
+	u32 buf_size;
+	u32 mask;
+	char data[];
+};
+
+/* max buf size allowed */
+#define BCM_VK_PEER_LOG_BUF_MAX SZ_16K
+/* max size per line of peer log */
+#define BCM_VK_PEER_LOG_LINE_MAX  256
+
+/*
+ * single entry for processing type + utilization
+ */
+#define BCM_VK_PROC_TYPE_TAG_LEN 8
+struct bcm_vk_proc_mon_entry_t {
+	char tag[BCM_VK_PROC_TYPE_TAG_LEN];
+	u32 used;
+	u32 max; /**< max capacity */
+};
+
+/**
+ * Structure for run time utilization
+ */
+#define BCM_VK_PROC_MON_MAX 8 /* max entries supported */
+struct bcm_vk_proc_mon_info {
+	u32 num; /**< no of entries */
+	u32 entry_size; /**< per entry size */
+	struct bcm_vk_proc_mon_entry_t entries[BCM_VK_PROC_MON_MAX];
+};
+
+struct bcm_vk_hb_ctrl {
+	struct delayed_work work;
+	u32 last_uptime;
+	u32 lost_cnt;
+};
+
+struct bcm_vk_alert {
+	u16 flags;
+	u16 notfs;
+};
+
+/* some alert counters that the driver will keep track */
+struct bcm_vk_alert_cnts {
+	u16 ecc;
+	u16 uecc;
+};
+
+struct bcm_vk {
+	struct pci_dev *pdev;
+	void __iomem *bar[MAX_BAR];
+	int num_irqs;
+
+	struct bcm_vk_card_info card_info;
+	struct bcm_vk_proc_mon_info proc_mon_info;
+	struct bcm_vk_dauth_info dauth_info;
+
+	/* mutex to protect the ioctls */
+	struct mutex mutex;
+	struct miscdevice miscdev;
+	int devid; /* dev id allocated */
+
+#ifdef CONFIG_BCM_VK_TTY
+	struct tty_driver *tty_drv;
+	struct timer_list serial_timer;
+	struct bcm_vk_tty tty[BCM_VK_NUM_TTY];
+	struct workqueue_struct *tty_wq_thread;
+	struct work_struct tty_wq_work;
+#endif
+
+	/* Reference-counting to handle file operations */
+	struct kref kref;
+
+	spinlock_t msg_id_lock; /* Spinlock for msg_id */
+	u16 msg_id;
+	DECLARE_BITMAP(bmap, VK_MSG_ID_BITMAP_SIZE);
+	spinlock_t ctx_lock; /* Spinlock for component context */
+	struct bcm_vk_ctx ctx[VK_CMPT_CTX_MAX];
+	struct bcm_vk_ht_entry pid_ht[VK_PID_HT_SZ];
+	pid_t reset_pid; /* process that issue reset */
+
+	atomic_t msgq_inited; /* indicate if info has been synced with vk */
+	struct bcm_vk_msg_chan to_v_msg_chan;
+	struct bcm_vk_msg_chan to_h_msg_chan;
+
+	struct workqueue_struct *wq_thread;
+	struct work_struct wq_work; /* work queue for deferred job */
+	unsigned long wq_offload[1]; /* various flags on wq requested */
+	void *tdma_vaddr; /* test dma segment virtual addr */
+	dma_addr_t tdma_addr; /* test dma segment bus addr */
+
+	struct notifier_block panic_nb;
+	u32 ib_sgl_size; /* size allocated for inband sgl insertion */
+
+	/* heart beat mechanism control structure */
+	struct bcm_vk_hb_ctrl hb_ctrl;
+	/* house-keeping variable of error logs */
+	spinlock_t host_alert_lock; /* protection to access host_alert struct */
+	struct bcm_vk_alert host_alert;
+	struct bcm_vk_alert peer_alert; /* bits set by the card */
+	struct bcm_vk_alert_cnts alert_cnts;
+
+	/* offset of the peer log control in BAR2 */
+	u32 peerlog_off;
+	struct bcm_vk_peer_log peerlog_info; /* record of peer log info */
+	/* offset of processing monitoring info in BAR2 */
+	u32 proc_mon_off;
+};
+
+/* wq offload work items bits definitions */
+enum bcm_vk_wq_offload_flags {
+	BCM_VK_WQ_DWNLD_PEND = 0,
+	BCM_VK_WQ_DWNLD_AUTO = 1,
+	BCM_VK_WQ_NOTF_PEND  = 2,
+};
+
+/* a macro to get an individual field with mask and shift */
+#define BCM_VK_EXTRACT_FIELD(_field, _reg, _mask, _shift) \
+		(_field = (((_reg) >> (_shift)) & (_mask)))
+
+struct bcm_vk_entry {
+	const u32 mask;
+	const u32 exp_val;
+	const char *str;
+};
+
+/* alerts that could be generated from peer */
+#define BCM_VK_PEER_ERR_NUM 12
+extern struct bcm_vk_entry const bcm_vk_peer_err[BCM_VK_PEER_ERR_NUM];
+/* alerts detected by the host */
+#define BCM_VK_HOST_ERR_NUM 3
+extern struct bcm_vk_entry const bcm_vk_host_err[BCM_VK_HOST_ERR_NUM];
+
+/*
+ * check if PCIe interface is down on read.  Use it when it is
+ * certain that _val should never be all ones.
+ */
+#define BCM_VK_INTF_IS_DOWN(val) ((val) == 0xffffffff)
+
+static inline u32 vkread32(struct bcm_vk *vk, enum pci_barno bar, u64 offset)
+{
+	return readl(vk->bar[bar] + offset);
+}
+
+static inline void vkwrite32(struct bcm_vk *vk,
+			     u32 value,
+			     enum pci_barno bar,
+			     u64 offset)
+{
+	writel(value, vk->bar[bar] + offset);
+}
+
+static inline u8 vkread8(struct bcm_vk *vk, enum pci_barno bar, u64 offset)
+{
+	return readb(vk->bar[bar] + offset);
+}
+
+static inline void vkwrite8(struct bcm_vk *vk,
+			    u8 value,
+			    enum pci_barno bar,
+			    u64 offset)
+{
+	writeb(value, vk->bar[bar] + offset);
+}
+
+static inline bool bcm_vk_msgq_marker_valid(struct bcm_vk *vk)
+{
+	u32 rdy_marker = 0;
+	u32 fw_status;
+
+	fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
+
+	if ((fw_status & VK_FWSTS_READY) == VK_FWSTS_READY)
+		rdy_marker = vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
+
+	return (rdy_marker == VK_BAR1_MSGQ_RDY_MARKER);
+}
+
+int bcm_vk_open(struct inode *inode, struct file *p_file);
+ssize_t bcm_vk_read(struct file *p_file, char __user *buf, size_t count,
+		    loff_t *f_pos);
+ssize_t bcm_vk_write(struct file *p_file, const char __user *buf,
+		     size_t count, loff_t *f_pos);
+__poll_t bcm_vk_poll(struct file *p_file, struct poll_table_struct *wait);
+int bcm_vk_release(struct inode *inode, struct file *p_file);
+void bcm_vk_release_data(struct kref *kref);
+irqreturn_t bcm_vk_msgq_irqhandler(int irq, void *dev_id);
+irqreturn_t bcm_vk_notf_irqhandler(int irq, void *dev_id);
+irqreturn_t bcm_vk_tty_irqhandler(int irq, void *dev_id);
+int bcm_vk_msg_init(struct bcm_vk *vk);
+void bcm_vk_msg_remove(struct bcm_vk *vk);
+void bcm_vk_drain_msg_on_reset(struct bcm_vk *vk);
+int bcm_vk_sync_msgq(struct bcm_vk *vk, bool force_sync);
+void bcm_vk_blk_drv_access(struct bcm_vk *vk);
+s32 bcm_to_h_msg_dequeue(struct bcm_vk *vk);
+int bcm_vk_send_shutdown_msg(struct bcm_vk *vk, u32 shut_type,
+			     const pid_t pid, const u32 q_num);
+void bcm_to_v_q_doorbell(struct bcm_vk *vk, u32 q_num, u32 db_val);
+int bcm_vk_auto_load_all_images(struct bcm_vk *vk);
+void bcm_vk_hb_init(struct bcm_vk *vk);
+void bcm_vk_hb_deinit(struct bcm_vk *vk);
+void bcm_vk_handle_notf(struct bcm_vk *vk);
+bool bcm_vk_drv_access_ok(struct bcm_vk *vk);
+void bcm_vk_set_host_alert(struct bcm_vk *vk, u32 bit_mask);
+
+#ifdef CONFIG_BCM_VK_TTY
+int bcm_vk_tty_init(struct bcm_vk *vk, char *name);
+void bcm_vk_tty_exit(struct bcm_vk *vk);
+void bcm_vk_tty_terminate_tty_user(struct bcm_vk *vk);
+void bcm_vk_tty_wq_exit(struct bcm_vk *vk);
+
+static inline void bcm_vk_tty_set_irq_enabled(struct bcm_vk *vk, int index)
+{
+	vk->tty[index].irq_enabled = true;
+}
+#else
+static inline int bcm_vk_tty_init(struct bcm_vk *vk, char *name)
+{
+	return 0;
+}
+
+static inline void bcm_vk_tty_exit(struct bcm_vk *vk)
+{
+}
+
+static inline void bcm_vk_tty_terminate_tty_user(struct bcm_vk *vk)
+{
+}
+
+static inline void bcm_vk_tty_wq_exit(struct bcm_vk *vk)
+{
+}
+
+static inline void bcm_vk_tty_set_irq_enabled(struct bcm_vk *vk, int index)
+{
+}
+#endif /* CONFIG_BCM_VK_TTY */
+
+#endif
diff --git a/drivers/misc/bcm-vk/bcm_vk_dev.c b/drivers/misc/bcm-vk/bcm_vk_dev.c
new file mode 100644
index 0000000000..d4a9613772
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_dev.c
@@ -0,0 +1,1652 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/panic_notifier.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+#include <uapi/linux/misc/bcm_vk.h>
+
+#include "bcm_vk.h"
+
+#define PCI_DEVICE_ID_VALKYRIE	0x5e87
+#define PCI_DEVICE_ID_VIPER	0x5e88
+
+static DEFINE_IDA(bcm_vk_ida);
+
+enum soc_idx {
+	VALKYRIE_A0 = 0,
+	VALKYRIE_B0,
+	VIPER,
+	VK_IDX_INVALID
+};
+
+enum img_idx {
+	IMG_PRI = 0,
+	IMG_SEC,
+	IMG_PER_TYPE_MAX
+};
+
+struct load_image_entry {
+	const u32 image_type;
+	const char *image_name[IMG_PER_TYPE_MAX];
+};
+
+#define NUM_BOOT_STAGES 2
+/* default firmware images names */
+static const struct load_image_entry image_tab[][NUM_BOOT_STAGES] = {
+	[VALKYRIE_A0] = {
+		{VK_IMAGE_TYPE_BOOT1, {"vk_a0-boot1.bin", "vk-boot1.bin"}},
+		{VK_IMAGE_TYPE_BOOT2, {"vk_a0-boot2.bin", "vk-boot2.bin"}}
+	},
+	[VALKYRIE_B0] = {
+		{VK_IMAGE_TYPE_BOOT1, {"vk_b0-boot1.bin", "vk-boot1.bin"}},
+		{VK_IMAGE_TYPE_BOOT2, {"vk_b0-boot2.bin", "vk-boot2.bin"}}
+	},
+
+	[VIPER] = {
+		{VK_IMAGE_TYPE_BOOT1, {"vp-boot1.bin", ""}},
+		{VK_IMAGE_TYPE_BOOT2, {"vp-boot2.bin", ""}}
+	},
+};
+
+/* Location of memory base addresses of interest in BAR1 */
+/* Load Boot1 to start of ITCM */
+#define BAR1_CODEPUSH_BASE_BOOT1	0x100000
+
+/* Allow minimum 1s for Load Image timeout responses */
+#define LOAD_IMAGE_TIMEOUT_MS		(1 * MSEC_PER_SEC)
+
+/* Image startup timeouts */
+#define BOOT1_STARTUP_TIMEOUT_MS	(5 * MSEC_PER_SEC)
+#define BOOT2_STARTUP_TIMEOUT_MS	(10 * MSEC_PER_SEC)
+
+/* 1ms wait for checking the transfer complete status */
+#define TXFR_COMPLETE_TIMEOUT_MS	1
+
+/* MSIX usages */
+#define VK_MSIX_MSGQ_MAX		3
+#define VK_MSIX_NOTF_MAX		1
+#define VK_MSIX_TTY_MAX			BCM_VK_NUM_TTY
+#define VK_MSIX_IRQ_MAX			(VK_MSIX_MSGQ_MAX + VK_MSIX_NOTF_MAX + \
+					 VK_MSIX_TTY_MAX)
+#define VK_MSIX_IRQ_MIN_REQ             (VK_MSIX_MSGQ_MAX + VK_MSIX_NOTF_MAX)
+
+/* Number of bits set in DMA mask*/
+#define BCM_VK_DMA_BITS			64
+
+/* Ucode boot wait time */
+#define BCM_VK_UCODE_BOOT_US            (100 * USEC_PER_MSEC)
+/* 50% margin */
+#define BCM_VK_UCODE_BOOT_MAX_US        ((BCM_VK_UCODE_BOOT_US * 3) >> 1)
+
+/* deinit time for the card os after receiving doorbell */
+#define BCM_VK_DEINIT_TIME_MS		(2 * MSEC_PER_SEC)
+
+/*
+ * module parameters
+ */
+static bool auto_load = true;
+module_param(auto_load, bool, 0444);
+MODULE_PARM_DESC(auto_load,
+		 "Load images automatically at PCIe probe time.\n");
+static uint nr_scratch_pages = VK_BAR1_SCRATCH_DEF_NR_PAGES;
+module_param(nr_scratch_pages, uint, 0444);
+MODULE_PARM_DESC(nr_scratch_pages,
+		 "Number of pre allocated DMAable coherent pages.\n");
+static uint nr_ib_sgl_blk = BCM_VK_DEF_IB_SGL_BLK_LEN;
+module_param(nr_ib_sgl_blk, uint, 0444);
+MODULE_PARM_DESC(nr_ib_sgl_blk,
+		 "Number of in-band msg blks for short SGL.\n");
+
+/*
+ * alerts that could be generated from peer
+ */
+const struct bcm_vk_entry bcm_vk_peer_err[BCM_VK_PEER_ERR_NUM] = {
+	{ERR_LOG_UECC, ERR_LOG_UECC, "uecc"},
+	{ERR_LOG_SSIM_BUSY, ERR_LOG_SSIM_BUSY, "ssim_busy"},
+	{ERR_LOG_AFBC_BUSY, ERR_LOG_AFBC_BUSY, "afbc_busy"},
+	{ERR_LOG_HIGH_TEMP_ERR, ERR_LOG_HIGH_TEMP_ERR, "high_temp"},
+	{ERR_LOG_WDOG_TIMEOUT, ERR_LOG_WDOG_TIMEOUT, "wdog_timeout"},
+	{ERR_LOG_SYS_FAULT, ERR_LOG_SYS_FAULT, "sys_fault"},
+	{ERR_LOG_RAMDUMP, ERR_LOG_RAMDUMP, "ramdump"},
+	{ERR_LOG_COP_WDOG_TIMEOUT, ERR_LOG_COP_WDOG_TIMEOUT,
+	 "cop_wdog_timeout"},
+	{ERR_LOG_MEM_ALLOC_FAIL, ERR_LOG_MEM_ALLOC_FAIL, "malloc_fail warn"},
+	{ERR_LOG_LOW_TEMP_WARN, ERR_LOG_LOW_TEMP_WARN, "low_temp warn"},
+	{ERR_LOG_ECC, ERR_LOG_ECC, "ecc"},
+	{ERR_LOG_IPC_DWN, ERR_LOG_IPC_DWN, "ipc_down"},
+};
+
+/* alerts detected by the host */
+const struct bcm_vk_entry bcm_vk_host_err[BCM_VK_HOST_ERR_NUM] = {
+	{ERR_LOG_HOST_PCIE_DWN, ERR_LOG_HOST_PCIE_DWN, "PCIe_down"},
+	{ERR_LOG_HOST_HB_FAIL, ERR_LOG_HOST_HB_FAIL, "hb_fail"},
+	{ERR_LOG_HOST_INTF_V_FAIL, ERR_LOG_HOST_INTF_V_FAIL, "intf_ver_fail"},
+};
+
+irqreturn_t bcm_vk_notf_irqhandler(int irq, void *dev_id)
+{
+	struct bcm_vk *vk = dev_id;
+
+	if (!bcm_vk_drv_access_ok(vk)) {
+		dev_err(&vk->pdev->dev,
+			"Interrupt %d received when msgq not inited\n", irq);
+		goto skip_schedule_work;
+	}
+
+	/* if notification is not pending, set bit and schedule work */
+	if (test_and_set_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload) == 0)
+		queue_work(vk->wq_thread, &vk->wq_work);
+
+skip_schedule_work:
+	return IRQ_HANDLED;
+}
+
+static int bcm_vk_intf_ver_chk(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	u32 reg;
+	u16 major, minor;
+	int ret = 0;
+
+	/* read interface register */
+	reg = vkread32(vk, BAR_0, BAR_INTF_VER);
+	major = (reg >> BAR_INTF_VER_MAJOR_SHIFT) & BAR_INTF_VER_MASK;
+	minor = reg & BAR_INTF_VER_MASK;
+
+	/*
+	 * if major number is 0, it is pre-release and it would be allowed
+	 * to continue, else, check versions accordingly
+	 */
+	if (!major) {
+		dev_warn(dev, "Pre-release major.minor=%d.%d - drv %d.%d\n",
+			 major, minor, SEMANTIC_MAJOR, SEMANTIC_MINOR);
+	} else if (major != SEMANTIC_MAJOR) {
+		dev_err(dev,
+			"Intf major.minor=%d.%d rejected - drv %d.%d\n",
+			major, minor, SEMANTIC_MAJOR, SEMANTIC_MINOR);
+		bcm_vk_set_host_alert(vk, ERR_LOG_HOST_INTF_V_FAIL);
+		ret = -EPFNOSUPPORT;
+	} else {
+		dev_dbg(dev,
+			"Intf major.minor=%d.%d passed - drv %d.%d\n",
+			major, minor, SEMANTIC_MAJOR, SEMANTIC_MINOR);
+	}
+	return ret;
+}
+
+static void bcm_vk_log_notf(struct bcm_vk *vk,
+			    struct bcm_vk_alert *alert,
+			    struct bcm_vk_entry const *entry_tab,
+			    const u32 table_size)
+{
+	u32 i;
+	u32 masked_val, latched_val;
+	struct bcm_vk_entry const *entry;
+	u32 reg;
+	u16 ecc_mem_err, uecc_mem_err;
+	struct device *dev = &vk->pdev->dev;
+
+	for (i = 0; i < table_size; i++) {
+		entry = &entry_tab[i];
+		masked_val = entry->mask & alert->notfs;
+		latched_val = entry->mask & alert->flags;
+
+		if (masked_val == ERR_LOG_UECC) {
+			/*
+			 * if there is difference between stored cnt and it
+			 * is greater than threshold, log it.
+			 */
+			reg = vkread32(vk, BAR_0, BAR_CARD_ERR_MEM);
+			BCM_VK_EXTRACT_FIELD(uecc_mem_err, reg,
+					     BCM_VK_MEM_ERR_FIELD_MASK,
+					     BCM_VK_UECC_MEM_ERR_SHIFT);
+			if ((uecc_mem_err != vk->alert_cnts.uecc) &&
+			    (uecc_mem_err >= BCM_VK_UECC_THRESHOLD))
+				dev_info(dev,
+					 "ALERT! %s.%d uecc RAISED - ErrCnt %d\n",
+					 DRV_MODULE_NAME, vk->devid,
+					 uecc_mem_err);
+			vk->alert_cnts.uecc = uecc_mem_err;
+		} else if (masked_val == ERR_LOG_ECC) {
+			reg = vkread32(vk, BAR_0, BAR_CARD_ERR_MEM);
+			BCM_VK_EXTRACT_FIELD(ecc_mem_err, reg,
+					     BCM_VK_MEM_ERR_FIELD_MASK,
+					     BCM_VK_ECC_MEM_ERR_SHIFT);
+			if ((ecc_mem_err != vk->alert_cnts.ecc) &&
+			    (ecc_mem_err >= BCM_VK_ECC_THRESHOLD))
+				dev_info(dev, "ALERT! %s.%d ecc RAISED - ErrCnt %d\n",
+					 DRV_MODULE_NAME, vk->devid,
+					 ecc_mem_err);
+			vk->alert_cnts.ecc = ecc_mem_err;
+		} else if (masked_val != latched_val) {
+			/* print a log as info */
+			dev_info(dev, "ALERT! %s.%d %s %s\n",
+				 DRV_MODULE_NAME, vk->devid, entry->str,
+				 masked_val ? "RAISED" : "CLEARED");
+		}
+	}
+}
+
+static void bcm_vk_dump_peer_log(struct bcm_vk *vk)
+{
+	struct bcm_vk_peer_log log;
+	struct bcm_vk_peer_log *log_info = &vk->peerlog_info;
+	char loc_buf[BCM_VK_PEER_LOG_LINE_MAX];
+	int cnt;
+	struct device *dev = &vk->pdev->dev;
+	unsigned int data_offset;
+
+	memcpy_fromio(&log, vk->bar[BAR_2] + vk->peerlog_off, sizeof(log));
+
+	dev_dbg(dev, "Peer PANIC: Size 0x%x(0x%x), [Rd Wr] = [%d %d]\n",
+		log.buf_size, log.mask, log.rd_idx, log.wr_idx);
+
+	if (!log_info->buf_size) {
+		dev_err(dev, "Peer log dump disabled - skipped!\n");
+		return;
+	}
+
+	/* perform range checking for rd/wr idx */
+	if ((log.rd_idx > log_info->mask) ||
+	    (log.wr_idx > log_info->mask) ||
+	    (log.buf_size != log_info->buf_size) ||
+	    (log.mask != log_info->mask)) {
+		dev_err(dev,
+			"Corrupted Ptrs: Size 0x%x(0x%x) Mask 0x%x(0x%x) [Rd Wr] = [%d %d], skip log dump.\n",
+			log_info->buf_size, log.buf_size,
+			log_info->mask, log.mask,
+			log.rd_idx, log.wr_idx);
+		return;
+	}
+
+	cnt = 0;
+	data_offset = vk->peerlog_off + sizeof(struct bcm_vk_peer_log);
+	loc_buf[BCM_VK_PEER_LOG_LINE_MAX - 1] = '\0';
+	while (log.rd_idx != log.wr_idx) {
+		loc_buf[cnt] = vkread8(vk, BAR_2, data_offset + log.rd_idx);
+
+		if ((loc_buf[cnt] == '\0') ||
+		    (cnt == (BCM_VK_PEER_LOG_LINE_MAX - 1))) {
+			dev_err(dev, "%s", loc_buf);
+			cnt = 0;
+		} else {
+			cnt++;
+		}
+		log.rd_idx = (log.rd_idx + 1) & log.mask;
+	}
+	/* update rd idx at the end */
+	vkwrite32(vk, log.rd_idx, BAR_2,
+		  vk->peerlog_off + offsetof(struct bcm_vk_peer_log, rd_idx));
+}
+
+void bcm_vk_handle_notf(struct bcm_vk *vk)
+{
+	u32 reg;
+	struct bcm_vk_alert alert;
+	bool intf_down;
+	unsigned long flags;
+
+	/* handle peer alerts and then locally detected ones */
+	reg = vkread32(vk, BAR_0, BAR_CARD_ERR_LOG);
+	intf_down = BCM_VK_INTF_IS_DOWN(reg);
+	if (!intf_down) {
+		vk->peer_alert.notfs = reg;
+		bcm_vk_log_notf(vk, &vk->peer_alert, bcm_vk_peer_err,
+				ARRAY_SIZE(bcm_vk_peer_err));
+		vk->peer_alert.flags = vk->peer_alert.notfs;
+	} else {
+		/* turn off access */
+		bcm_vk_blk_drv_access(vk);
+	}
+
+	/* check and make copy of alert with lock and then free lock */
+	spin_lock_irqsave(&vk->host_alert_lock, flags);
+	if (intf_down)
+		vk->host_alert.notfs |= ERR_LOG_HOST_PCIE_DWN;
+
+	alert = vk->host_alert;
+	vk->host_alert.flags = vk->host_alert.notfs;
+	spin_unlock_irqrestore(&vk->host_alert_lock, flags);
+
+	/* call display with copy */
+	bcm_vk_log_notf(vk, &alert, bcm_vk_host_err,
+			ARRAY_SIZE(bcm_vk_host_err));
+
+	/*
+	 * If it is a sys fault or heartbeat timeout, we would like extract
+	 * log msg from the card so that we would know what is the last fault
+	 */
+	if (!intf_down &&
+	    ((vk->host_alert.flags & ERR_LOG_HOST_HB_FAIL) ||
+	     (vk->peer_alert.flags & ERR_LOG_SYS_FAULT)))
+		bcm_vk_dump_peer_log(vk);
+}
+
+static inline int bcm_vk_wait(struct bcm_vk *vk, enum pci_barno bar,
+			      u64 offset, u32 mask, u32 value,
+			      unsigned long timeout_ms)
+{
+	struct device *dev = &vk->pdev->dev;
+	unsigned long start_time;
+	unsigned long timeout;
+	u32 rd_val, boot_status;
+
+	start_time = jiffies;
+	timeout = start_time + msecs_to_jiffies(timeout_ms);
+
+	do {
+		rd_val = vkread32(vk, bar, offset);
+		dev_dbg(dev, "BAR%d Offset=0x%llx: 0x%x\n",
+			bar, offset, rd_val);
+
+		/* check for any boot err condition */
+		boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+		if (boot_status & BOOT_ERR_MASK) {
+			dev_err(dev, "Boot Err 0x%x, progress 0x%x after %d ms\n",
+				(boot_status & BOOT_ERR_MASK) >> BOOT_ERR_SHIFT,
+				boot_status & BOOT_PROG_MASK,
+				jiffies_to_msecs(jiffies - start_time));
+			return -EFAULT;
+		}
+
+		if (time_after(jiffies, timeout))
+			return -ETIMEDOUT;
+
+		cpu_relax();
+		cond_resched();
+	} while ((rd_val & mask) != value);
+
+	return 0;
+}
+
+static void bcm_vk_get_card_info(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	u32 offset;
+	int i;
+	u8 *dst;
+	struct bcm_vk_card_info *info = &vk->card_info;
+
+	/* first read the offset from spare register */
+	offset = vkread32(vk, BAR_0, BAR_CARD_STATIC_INFO);
+	offset &= (pci_resource_len(vk->pdev, BAR_2 * 2) - 1);
+
+	/* based on the offset, read info to internal card info structure */
+	dst = (u8 *)info;
+	for (i = 0; i < sizeof(*info); i++)
+		*dst++ = vkread8(vk, BAR_2, offset++);
+
+#define CARD_INFO_LOG_FMT "version   : %x\n" \
+			  "os_tag    : %s\n" \
+			  "cmpt_tag  : %s\n" \
+			  "cpu_freq  : %d MHz\n" \
+			  "cpu_scale : %d full, %d lowest\n" \
+			  "ddr_freq  : %d MHz\n" \
+			  "ddr_size  : %d MB\n" \
+			  "video_freq: %d MHz\n"
+	dev_dbg(dev, CARD_INFO_LOG_FMT, info->version, info->os_tag,
+		info->cmpt_tag, info->cpu_freq_mhz, info->cpu_scale[0],
+		info->cpu_scale[MAX_OPP - 1], info->ddr_freq_mhz,
+		info->ddr_size_MB, info->video_core_freq_mhz);
+
+	/*
+	 * get the peer log pointer, only need the offset, and get record
+	 * of the log buffer information which would be used for checking
+	 * before dump, in case the BAR2 memory has been corrupted.
+	 */
+	vk->peerlog_off = offset;
+	memcpy_fromio(&vk->peerlog_info, vk->bar[BAR_2] + vk->peerlog_off,
+		      sizeof(vk->peerlog_info));
+
+	/*
+	 * Do a range checking and if out of bound, the record will be zeroed
+	 * which guarantees that nothing would be dumped.  In other words,
+	 * peer dump is disabled.
+	 */
+	if ((vk->peerlog_info.buf_size > BCM_VK_PEER_LOG_BUF_MAX) ||
+	    (vk->peerlog_info.mask != (vk->peerlog_info.buf_size - 1)) ||
+	    (vk->peerlog_info.rd_idx > vk->peerlog_info.mask) ||
+	    (vk->peerlog_info.wr_idx > vk->peerlog_info.mask)) {
+		dev_err(dev, "Peer log disabled - range error: Size 0x%x(0x%x), [Rd Wr] = [%d %d]\n",
+			vk->peerlog_info.buf_size,
+			vk->peerlog_info.mask,
+			vk->peerlog_info.rd_idx,
+			vk->peerlog_info.wr_idx);
+		memset(&vk->peerlog_info, 0, sizeof(vk->peerlog_info));
+	} else {
+		dev_dbg(dev, "Peer log: Size 0x%x(0x%x), [Rd Wr] = [%d %d]\n",
+			vk->peerlog_info.buf_size,
+			vk->peerlog_info.mask,
+			vk->peerlog_info.rd_idx,
+			vk->peerlog_info.wr_idx);
+	}
+}
+
+static void bcm_vk_get_proc_mon_info(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	struct bcm_vk_proc_mon_info *mon = &vk->proc_mon_info;
+	u32 num, entry_size, offset, buf_size;
+	u8 *dst;
+
+	/* calculate offset which is based on peerlog offset */
+	buf_size = vkread32(vk, BAR_2,
+			    vk->peerlog_off
+			    + offsetof(struct bcm_vk_peer_log, buf_size));
+	offset = vk->peerlog_off + sizeof(struct bcm_vk_peer_log)
+		 + buf_size;
+
+	/* first read the num and entry size */
+	num = vkread32(vk, BAR_2, offset);
+	entry_size = vkread32(vk, BAR_2, offset + sizeof(num));
+
+	/* check for max allowed */
+	if (num > BCM_VK_PROC_MON_MAX) {
+		dev_err(dev, "Processing monitoring entry %d exceeds max %d\n",
+			num, BCM_VK_PROC_MON_MAX);
+		return;
+	}
+	mon->num = num;
+	mon->entry_size = entry_size;
+
+	vk->proc_mon_off = offset;
+
+	/* read it once that will capture those static info */
+	dst = (u8 *)&mon->entries[0];
+	offset += sizeof(num) + sizeof(entry_size);
+	memcpy_fromio(dst, vk->bar[BAR_2] + offset, num * entry_size);
+}
+
+static int bcm_vk_sync_card_info(struct bcm_vk *vk)
+{
+	u32 rdy_marker = vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
+
+	/* check for marker, but allow diags mode to skip sync */
+	if (!bcm_vk_msgq_marker_valid(vk))
+		return (rdy_marker == VK_BAR1_DIAG_RDY_MARKER ? 0 : -EINVAL);
+
+	/*
+	 * Write down scratch addr which is used for DMA. For
+	 * signed part, BAR1 is accessible only after boot2 has come
+	 * up
+	 */
+	if (vk->tdma_addr) {
+		vkwrite32(vk, (u64)vk->tdma_addr >> 32, BAR_1,
+			  VK_BAR1_SCRATCH_OFF_HI);
+		vkwrite32(vk, (u32)vk->tdma_addr, BAR_1,
+			  VK_BAR1_SCRATCH_OFF_LO);
+		vkwrite32(vk, nr_scratch_pages * PAGE_SIZE, BAR_1,
+			  VK_BAR1_SCRATCH_SZ_ADDR);
+	}
+
+	/* get static card info, only need to read once */
+	bcm_vk_get_card_info(vk);
+
+	/* get the proc mon info once */
+	bcm_vk_get_proc_mon_info(vk);
+
+	return 0;
+}
+
+void bcm_vk_blk_drv_access(struct bcm_vk *vk)
+{
+	int i;
+
+	/*
+	 * kill all the apps except for the process that is resetting.
+	 * If not called during reset, reset_pid will be 0, and all will be
+	 * killed.
+	 */
+	spin_lock(&vk->ctx_lock);
+
+	/* set msgq_inited to 0 so that all rd/wr will be blocked */
+	atomic_set(&vk->msgq_inited, 0);
+
+	for (i = 0; i < VK_PID_HT_SZ; i++) {
+		struct bcm_vk_ctx *ctx;
+
+		list_for_each_entry(ctx, &vk->pid_ht[i].head, node) {
+			if (ctx->pid != vk->reset_pid) {
+				dev_dbg(&vk->pdev->dev,
+					"Send kill signal to pid %d\n",
+					ctx->pid);
+				kill_pid(find_vpid(ctx->pid), SIGKILL, 1);
+			}
+		}
+	}
+	bcm_vk_tty_terminate_tty_user(vk);
+	spin_unlock(&vk->ctx_lock);
+}
+
+static void bcm_vk_buf_notify(struct bcm_vk *vk, void *bufp,
+			      dma_addr_t host_buf_addr, u32 buf_size)
+{
+	/* update the dma address to the card */
+	vkwrite32(vk, (u64)host_buf_addr >> 32, BAR_1,
+		  VK_BAR1_DMA_BUF_OFF_HI);
+	vkwrite32(vk, (u32)host_buf_addr, BAR_1,
+		  VK_BAR1_DMA_BUF_OFF_LO);
+	vkwrite32(vk, buf_size, BAR_1, VK_BAR1_DMA_BUF_SZ);
+}
+
+static int bcm_vk_load_image_by_type(struct bcm_vk *vk, u32 load_type,
+				     const char *filename)
+{
+	struct device *dev = &vk->pdev->dev;
+	const struct firmware *fw = NULL;
+	void *bufp = NULL;
+	size_t max_buf, offset;
+	int ret;
+	u64 offset_codepush;
+	u32 codepush;
+	u32 value;
+	dma_addr_t boot_dma_addr;
+	bool is_stdalone;
+
+	if (load_type == VK_IMAGE_TYPE_BOOT1) {
+		/*
+		 * After POR, enable VK soft BOOTSRC so bootrom do not clear
+		 * the pushed image (the TCM memories).
+		 */
+		value = vkread32(vk, BAR_0, BAR_BOOTSRC_SELECT);
+		value |= BOOTSRC_SOFT_ENABLE;
+		vkwrite32(vk, value, BAR_0, BAR_BOOTSRC_SELECT);
+
+		codepush = CODEPUSH_BOOTSTART + CODEPUSH_BOOT1_ENTRY;
+		offset_codepush = BAR_CODEPUSH_SBL;
+
+		/* Write a 1 to request SRAM open bit */
+		vkwrite32(vk, CODEPUSH_BOOTSTART, BAR_0, offset_codepush);
+
+		/* Wait for VK to respond */
+		ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS, SRAM_OPEN,
+				  SRAM_OPEN, LOAD_IMAGE_TIMEOUT_MS);
+		if (ret < 0) {
+			dev_err(dev, "boot1 wait SRAM err - ret(%d)\n", ret);
+			goto err_buf_out;
+		}
+
+		max_buf = SZ_256K;
+		bufp = dma_alloc_coherent(dev,
+					  max_buf,
+					  &boot_dma_addr, GFP_KERNEL);
+		if (!bufp) {
+			dev_err(dev, "Error allocating 0x%zx\n", max_buf);
+			ret = -ENOMEM;
+			goto err_buf_out;
+		}
+	} else if (load_type == VK_IMAGE_TYPE_BOOT2) {
+		codepush = CODEPUSH_BOOT2_ENTRY;
+		offset_codepush = BAR_CODEPUSH_SBI;
+
+		/* Wait for VK to respond */
+		ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS, DDR_OPEN,
+				  DDR_OPEN, LOAD_IMAGE_TIMEOUT_MS);
+		if (ret < 0) {
+			dev_err(dev, "boot2 wait DDR open error - ret(%d)\n",
+				ret);
+			goto err_buf_out;
+		}
+
+		max_buf = SZ_4M;
+		bufp = dma_alloc_coherent(dev,
+					  max_buf,
+					  &boot_dma_addr, GFP_KERNEL);
+		if (!bufp) {
+			dev_err(dev, "Error allocating 0x%zx\n", max_buf);
+			ret = -ENOMEM;
+			goto err_buf_out;
+		}
+
+		bcm_vk_buf_notify(vk, bufp, boot_dma_addr, max_buf);
+	} else {
+		dev_err(dev, "Error invalid image type 0x%x\n", load_type);
+		ret = -EINVAL;
+		goto err_buf_out;
+	}
+
+	offset = 0;
+	ret = request_partial_firmware_into_buf(&fw, filename, dev,
+						bufp, max_buf, offset);
+	if (ret) {
+		dev_err(dev, "Error %d requesting firmware file: %s\n",
+			ret, filename);
+		goto err_firmware_out;
+	}
+	dev_dbg(dev, "size=0x%zx\n", fw->size);
+	if (load_type == VK_IMAGE_TYPE_BOOT1)
+		memcpy_toio(vk->bar[BAR_1] + BAR1_CODEPUSH_BASE_BOOT1,
+			    bufp,
+			    fw->size);
+
+	dev_dbg(dev, "Signaling 0x%x to 0x%llx\n", codepush, offset_codepush);
+	vkwrite32(vk, codepush, BAR_0, offset_codepush);
+
+	if (load_type == VK_IMAGE_TYPE_BOOT1) {
+		u32 boot_status;
+
+		/* wait until done */
+		ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS,
+				  BOOT1_RUNNING,
+				  BOOT1_RUNNING,
+				  BOOT1_STARTUP_TIMEOUT_MS);
+
+		boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+		is_stdalone = !BCM_VK_INTF_IS_DOWN(boot_status) &&
+			      (boot_status & BOOT_STDALONE_RUNNING);
+		if (ret && !is_stdalone) {
+			dev_err(dev,
+				"Timeout %ld ms waiting for boot1 to come up - ret(%d)\n",
+				BOOT1_STARTUP_TIMEOUT_MS, ret);
+			goto err_firmware_out;
+		} else if (is_stdalone) {
+			u32 reg;
+
+			reg = vkread32(vk, BAR_0, BAR_BOOT1_STDALONE_PROGRESS);
+			if ((reg & BOOT1_STDALONE_PROGRESS_MASK) ==
+				     BOOT1_STDALONE_SUCCESS) {
+				dev_info(dev, "Boot1 standalone success\n");
+				ret = 0;
+			} else {
+				dev_err(dev, "Timeout %ld ms - Boot1 standalone failure\n",
+					BOOT1_STARTUP_TIMEOUT_MS);
+				ret = -EINVAL;
+				goto err_firmware_out;
+			}
+		}
+	} else if (load_type == VK_IMAGE_TYPE_BOOT2) {
+		unsigned long timeout;
+
+		timeout = jiffies + msecs_to_jiffies(LOAD_IMAGE_TIMEOUT_MS);
+
+		/* To send more data to VK than max_buf allowed at a time */
+		do {
+			/*
+			 * Check for ack from card. when Ack is received,
+			 * it means all the data is received by card.
+			 * Exit the loop after ack is received.
+			 */
+			ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS,
+					  FW_LOADER_ACK_RCVD_ALL_DATA,
+					  FW_LOADER_ACK_RCVD_ALL_DATA,
+					  TXFR_COMPLETE_TIMEOUT_MS);
+			if (ret == 0) {
+				dev_dbg(dev, "Exit boot2 download\n");
+				break;
+			} else if (ret == -EFAULT) {
+				dev_err(dev, "Error detected during ACK waiting");
+				goto err_firmware_out;
+			}
+
+			/* exit the loop, if there is no response from card */
+			if (time_after(jiffies, timeout)) {
+				dev_err(dev, "Error. No reply from card\n");
+				ret = -ETIMEDOUT;
+				goto err_firmware_out;
+			}
+
+			/* Wait for VK to open BAR space to copy new data */
+			ret = bcm_vk_wait(vk, BAR_0, offset_codepush,
+					  codepush, 0,
+					  TXFR_COMPLETE_TIMEOUT_MS);
+			if (ret == 0) {
+				offset += max_buf;
+				ret = request_partial_firmware_into_buf
+						(&fw,
+						 filename,
+						 dev, bufp,
+						 max_buf,
+						 offset);
+				if (ret) {
+					dev_err(dev,
+						"Error %d requesting firmware file: %s offset: 0x%zx\n",
+						ret, filename, offset);
+					goto err_firmware_out;
+				}
+				dev_dbg(dev, "size=0x%zx\n", fw->size);
+				dev_dbg(dev, "Signaling 0x%x to 0x%llx\n",
+					codepush, offset_codepush);
+				vkwrite32(vk, codepush, BAR_0, offset_codepush);
+				/* reload timeout after every codepush */
+				timeout = jiffies +
+				    msecs_to_jiffies(LOAD_IMAGE_TIMEOUT_MS);
+			} else if (ret == -EFAULT) {
+				dev_err(dev, "Error detected waiting for transfer\n");
+				goto err_firmware_out;
+			}
+		} while (1);
+
+		/* wait for fw status bits to indicate app ready */
+		ret = bcm_vk_wait(vk, BAR_0, VK_BAR_FWSTS,
+				  VK_FWSTS_READY,
+				  VK_FWSTS_READY,
+				  BOOT2_STARTUP_TIMEOUT_MS);
+		if (ret < 0) {
+			dev_err(dev, "Boot2 not ready - ret(%d)\n", ret);
+			goto err_firmware_out;
+		}
+
+		is_stdalone = vkread32(vk, BAR_0, BAR_BOOT_STATUS) &
+			      BOOT_STDALONE_RUNNING;
+		if (!is_stdalone) {
+			ret = bcm_vk_intf_ver_chk(vk);
+			if (ret) {
+				dev_err(dev, "failure in intf version check\n");
+				goto err_firmware_out;
+			}
+
+			/*
+			 * Next, initialize Message Q if we are loading boot2.
+			 * Do a force sync
+			 */
+			ret = bcm_vk_sync_msgq(vk, true);
+			if (ret) {
+				dev_err(dev, "Boot2 Error reading comm msg Q info\n");
+				ret = -EIO;
+				goto err_firmware_out;
+			}
+
+			/* sync & channel other info */
+			ret = bcm_vk_sync_card_info(vk);
+			if (ret) {
+				dev_err(dev, "Syncing Card Info failure\n");
+				goto err_firmware_out;
+			}
+		}
+	}
+
+err_firmware_out:
+	release_firmware(fw);
+
+err_buf_out:
+	if (bufp)
+		dma_free_coherent(dev, max_buf, bufp, boot_dma_addr);
+
+	return ret;
+}
+
+static u32 bcm_vk_next_boot_image(struct bcm_vk *vk)
+{
+	u32 boot_status;
+	u32 fw_status;
+	u32 load_type = 0;  /* default for unknown */
+
+	boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+	fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
+
+	if (!BCM_VK_INTF_IS_DOWN(boot_status) && (boot_status & SRAM_OPEN))
+		load_type = VK_IMAGE_TYPE_BOOT1;
+	else if (boot_status == BOOT1_RUNNING)
+		load_type = VK_IMAGE_TYPE_BOOT2;
+
+	/* Log status so that we know different stages */
+	dev_info(&vk->pdev->dev,
+		 "boot-status value for next image: 0x%x : fw-status 0x%x\n",
+		 boot_status, fw_status);
+
+	return load_type;
+}
+
+static enum soc_idx get_soc_idx(struct bcm_vk *vk)
+{
+	struct pci_dev *pdev = vk->pdev;
+	enum soc_idx idx = VK_IDX_INVALID;
+	u32 rev;
+	static enum soc_idx const vk_soc_tab[] = { VALKYRIE_A0, VALKYRIE_B0 };
+
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_VALKYRIE:
+		/* get the chip id to decide sub-class */
+		rev = MAJOR_SOC_REV(vkread32(vk, BAR_0, BAR_CHIP_ID));
+		if (rev < ARRAY_SIZE(vk_soc_tab)) {
+			idx = vk_soc_tab[rev];
+		} else {
+			/* Default to A0 firmware for all other chip revs */
+			idx = VALKYRIE_A0;
+			dev_warn(&pdev->dev,
+				 "Rev %d not in image lookup table, default to idx=%d\n",
+				 rev, idx);
+		}
+		break;
+
+	case PCI_DEVICE_ID_VIPER:
+		idx = VIPER;
+		break;
+
+	default:
+		dev_err(&pdev->dev, "no images for 0x%x\n", pdev->device);
+	}
+	return idx;
+}
+
+static const char *get_load_fw_name(struct bcm_vk *vk,
+				    const struct load_image_entry *entry)
+{
+	const struct firmware *fw;
+	struct device *dev = &vk->pdev->dev;
+	int ret;
+	unsigned long dummy;
+	int i;
+
+	for (i = 0; i < IMG_PER_TYPE_MAX; i++) {
+		fw = NULL;
+		ret = request_partial_firmware_into_buf(&fw,
+							entry->image_name[i],
+							dev, &dummy,
+							sizeof(dummy),
+							0);
+		release_firmware(fw);
+		if (!ret)
+			return entry->image_name[i];
+	}
+	return NULL;
+}
+
+int bcm_vk_auto_load_all_images(struct bcm_vk *vk)
+{
+	int i, ret = -1;
+	enum soc_idx idx;
+	struct device *dev = &vk->pdev->dev;
+	u32 curr_type;
+	const char *curr_name;
+
+	idx = get_soc_idx(vk);
+	if (idx == VK_IDX_INVALID)
+		goto auto_load_all_exit;
+
+	/* log a message to know the relative loading order */
+	dev_dbg(dev, "Load All for device %d\n", vk->devid);
+
+	for (i = 0; i < NUM_BOOT_STAGES; i++) {
+		curr_type = image_tab[idx][i].image_type;
+		if (bcm_vk_next_boot_image(vk) == curr_type) {
+			curr_name = get_load_fw_name(vk, &image_tab[idx][i]);
+			if (!curr_name) {
+				dev_err(dev, "No suitable firmware exists for type %d",
+					curr_type);
+				ret = -ENOENT;
+				goto auto_load_all_exit;
+			}
+			ret = bcm_vk_load_image_by_type(vk, curr_type,
+							curr_name);
+			dev_info(dev, "Auto load %s, ret %d\n",
+				 curr_name, ret);
+
+			if (ret) {
+				dev_err(dev, "Error loading default %s\n",
+					curr_name);
+				goto auto_load_all_exit;
+			}
+		}
+	}
+
+auto_load_all_exit:
+	return ret;
+}
+
+static int bcm_vk_trigger_autoload(struct bcm_vk *vk)
+{
+	if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0)
+		return -EPERM;
+
+	set_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload);
+	queue_work(vk->wq_thread, &vk->wq_work);
+
+	return 0;
+}
+
+/*
+ * deferred work queue for draining and auto download.
+ */
+static void bcm_vk_wq_handler(struct work_struct *work)
+{
+	struct bcm_vk *vk = container_of(work, struct bcm_vk, wq_work);
+	struct device *dev = &vk->pdev->dev;
+	s32 ret;
+
+	/* check wq offload bit map to perform various operations */
+	if (test_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload)) {
+		/* clear bit right the way for notification */
+		clear_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload);
+		bcm_vk_handle_notf(vk);
+	}
+	if (test_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload)) {
+		bcm_vk_auto_load_all_images(vk);
+
+		/*
+		 * at the end of operation, clear AUTO bit and pending
+		 * bit
+		 */
+		clear_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload);
+		clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
+	}
+
+	/* next, try to drain */
+	ret = bcm_to_h_msg_dequeue(vk);
+
+	if (ret == 0)
+		dev_dbg(dev, "Spurious trigger for workqueue\n");
+	else if (ret < 0)
+		bcm_vk_blk_drv_access(vk);
+}
+
+static long bcm_vk_load_image(struct bcm_vk *vk,
+			      const struct vk_image __user *arg)
+{
+	struct device *dev = &vk->pdev->dev;
+	const char *image_name;
+	struct vk_image image;
+	u32 next_loadable;
+	enum soc_idx idx;
+	int image_idx;
+	int ret = -EPERM;
+
+	if (copy_from_user(&image, arg, sizeof(image)))
+		return -EACCES;
+
+	if ((image.type != VK_IMAGE_TYPE_BOOT1) &&
+	    (image.type != VK_IMAGE_TYPE_BOOT2)) {
+		dev_err(dev, "invalid image.type %u\n", image.type);
+		return ret;
+	}
+
+	next_loadable = bcm_vk_next_boot_image(vk);
+	if (next_loadable != image.type) {
+		dev_err(dev, "Next expected image %u, Loading %u\n",
+			next_loadable, image.type);
+		return ret;
+	}
+
+	/*
+	 * if something is pending download already.  This could only happen
+	 * for now when the driver is being loaded, or if someone has issued
+	 * another download command in another shell.
+	 */
+	if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0) {
+		dev_err(dev, "Download operation already pending.\n");
+		return ret;
+	}
+
+	image_name = image.filename;
+	if (image_name[0] == '\0') {
+		/* Use default image name if NULL */
+		idx = get_soc_idx(vk);
+		if (idx == VK_IDX_INVALID)
+			goto err_idx;
+
+		/* Image idx starts with boot1 */
+		image_idx = image.type - VK_IMAGE_TYPE_BOOT1;
+		image_name = get_load_fw_name(vk, &image_tab[idx][image_idx]);
+		if (!image_name) {
+			dev_err(dev, "No suitable image found for type %d",
+				image.type);
+			ret = -ENOENT;
+			goto err_idx;
+		}
+	} else {
+		/* Ensure filename is NULL terminated */
+		image.filename[sizeof(image.filename) - 1] = '\0';
+	}
+	ret = bcm_vk_load_image_by_type(vk, image.type, image_name);
+	dev_info(dev, "Load %s, ret %d\n", image_name, ret);
+err_idx:
+	clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
+
+	return ret;
+}
+
+static int bcm_vk_reset_successful(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	u32 fw_status, reset_reason;
+	int ret = -EAGAIN;
+
+	/*
+	 * Reset could be triggered when the card in several state:
+	 *   i)   in bootROM
+	 *   ii)  after boot1
+	 *   iii) boot2 running
+	 *
+	 * i) & ii) - no status bits will be updated.  If vkboot1
+	 * runs automatically after reset, it  will update the reason
+	 * to be unknown reason
+	 * iii) - reboot reason match + deinit done.
+	 */
+	fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
+	/* immediate exit if interface goes down */
+	if (BCM_VK_INTF_IS_DOWN(fw_status)) {
+		dev_err(dev, "PCIe Intf Down!\n");
+		goto reset_exit;
+	}
+
+	reset_reason = (fw_status & VK_FWSTS_RESET_REASON_MASK);
+	if ((reset_reason == VK_FWSTS_RESET_MBOX_DB) ||
+	    (reset_reason == VK_FWSTS_RESET_UNKNOWN))
+		ret = 0;
+
+	/*
+	 * if some of the deinit bits are set, but done
+	 * bit is not, this is a failure if triggered while boot2 is running
+	 */
+	if ((fw_status & VK_FWSTS_DEINIT_TRIGGERED) &&
+	    !(fw_status & VK_FWSTS_RESET_DONE))
+		ret = -EAGAIN;
+
+reset_exit:
+	dev_dbg(dev, "FW status = 0x%x ret %d\n", fw_status, ret);
+
+	return ret;
+}
+
+static void bcm_to_v_reset_doorbell(struct bcm_vk *vk, u32 db_val)
+{
+	vkwrite32(vk, db_val, BAR_0, VK_BAR0_RESET_DB_BASE);
+}
+
+static int bcm_vk_trigger_reset(struct bcm_vk *vk)
+{
+	u32 i;
+	u32 value, boot_status;
+	bool is_stdalone, is_boot2;
+	static const u32 bar0_reg_clr_list[] = { BAR_OS_UPTIME,
+						 BAR_INTF_VER,
+						 BAR_CARD_VOLTAGE,
+						 BAR_CARD_TEMPERATURE,
+						 BAR_CARD_PWR_AND_THRE };
+
+	/* clean up before pressing the door bell */
+	bcm_vk_drain_msg_on_reset(vk);
+	vkwrite32(vk, 0, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
+	/* make tag '\0' terminated */
+	vkwrite32(vk, 0, BAR_1, VK_BAR1_BOOT1_VER_TAG);
+
+	for (i = 0; i < VK_BAR1_DAUTH_MAX; i++) {
+		vkwrite32(vk, 0, BAR_1, VK_BAR1_DAUTH_STORE_ADDR(i));
+		vkwrite32(vk, 0, BAR_1, VK_BAR1_DAUTH_VALID_ADDR(i));
+	}
+	for (i = 0; i < VK_BAR1_SOTP_REVID_MAX; i++)
+		vkwrite32(vk, 0, BAR_1, VK_BAR1_SOTP_REVID_ADDR(i));
+
+	memset(&vk->card_info, 0, sizeof(vk->card_info));
+	memset(&vk->peerlog_info, 0, sizeof(vk->peerlog_info));
+	memset(&vk->proc_mon_info, 0, sizeof(vk->proc_mon_info));
+	memset(&vk->alert_cnts, 0, sizeof(vk->alert_cnts));
+
+	/*
+	 * When boot request fails, the CODE_PUSH_OFFSET stays persistent.
+	 * Allowing us to debug the failure. When we call reset,
+	 * we should clear CODE_PUSH_OFFSET so ROM does not execute
+	 * boot again (and fails again) and instead waits for a new
+	 * codepush.  And, if previous boot has encountered error, need
+	 * to clear the entry values
+	 */
+	boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+	if (boot_status & BOOT_ERR_MASK) {
+		dev_info(&vk->pdev->dev,
+			 "Card in boot error 0x%x, clear CODEPUSH val\n",
+			 boot_status);
+		value = 0;
+	} else {
+		value = vkread32(vk, BAR_0, BAR_CODEPUSH_SBL);
+		value &= CODEPUSH_MASK;
+	}
+	vkwrite32(vk, value, BAR_0, BAR_CODEPUSH_SBL);
+
+	/* special reset handling */
+	is_stdalone = boot_status & BOOT_STDALONE_RUNNING;
+	is_boot2 = (boot_status & BOOT_STATE_MASK) == BOOT2_RUNNING;
+	if (vk->peer_alert.flags & ERR_LOG_RAMDUMP) {
+		/*
+		 * if card is in ramdump mode, it is hitting an error.  Don't
+		 * reset the reboot reason as it will contain valid info that
+		 * is important - simply use special reset
+		 */
+		vkwrite32(vk, VK_BAR0_RESET_RAMPDUMP, BAR_0, VK_BAR_FWSTS);
+		return VK_BAR0_RESET_RAMPDUMP;
+	} else if (is_stdalone && !is_boot2) {
+		dev_info(&vk->pdev->dev, "Hard reset on Standalone mode");
+		bcm_to_v_reset_doorbell(vk, VK_BAR0_RESET_DB_HARD);
+		return VK_BAR0_RESET_DB_HARD;
+	}
+
+	/* reset fw_status with proper reason, and press db */
+	vkwrite32(vk, VK_FWSTS_RESET_MBOX_DB, BAR_0, VK_BAR_FWSTS);
+	bcm_to_v_reset_doorbell(vk, VK_BAR0_RESET_DB_SOFT);
+
+	/* clear other necessary registers and alert records */
+	for (i = 0; i < ARRAY_SIZE(bar0_reg_clr_list); i++)
+		vkwrite32(vk, 0, BAR_0, bar0_reg_clr_list[i]);
+	memset(&vk->host_alert, 0, sizeof(vk->host_alert));
+	memset(&vk->peer_alert, 0, sizeof(vk->peer_alert));
+	/* clear 4096 bits of bitmap */
+	bitmap_clear(vk->bmap, 0, VK_MSG_ID_BITMAP_SIZE);
+
+	return 0;
+}
+
+static long bcm_vk_reset(struct bcm_vk *vk, struct vk_reset __user *arg)
+{
+	struct device *dev = &vk->pdev->dev;
+	struct vk_reset reset;
+	int ret = 0;
+	u32 ramdump_reset;
+	int special_reset;
+
+	if (copy_from_user(&reset, arg, sizeof(struct vk_reset)))
+		return -EFAULT;
+
+	/* check if any download is in-progress, if so return error */
+	if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0) {
+		dev_err(dev, "Download operation pending - skip reset.\n");
+		return -EPERM;
+	}
+
+	ramdump_reset = vk->peer_alert.flags & ERR_LOG_RAMDUMP;
+	dev_info(dev, "Issue Reset %s\n",
+		 ramdump_reset ? "in ramdump mode" : "");
+
+	/*
+	 * The following is the sequence of reset:
+	 * - send card level graceful shut down
+	 * - wait enough time for VK to handle its business, stopping DMA etc
+	 * - kill host apps
+	 * - Trigger interrupt with DB
+	 */
+	bcm_vk_send_shutdown_msg(vk, VK_SHUTDOWN_GRACEFUL, 0, 0);
+
+	spin_lock(&vk->ctx_lock);
+	if (!vk->reset_pid) {
+		vk->reset_pid = task_pid_nr(current);
+	} else {
+		dev_err(dev, "Reset already launched by process pid %d\n",
+			vk->reset_pid);
+		ret = -EACCES;
+	}
+	spin_unlock(&vk->ctx_lock);
+	if (ret)
+		goto err_exit;
+
+	bcm_vk_blk_drv_access(vk);
+	special_reset = bcm_vk_trigger_reset(vk);
+
+	/*
+	 * Wait enough time for card os to deinit
+	 * and populate the reset reason.
+	 */
+	msleep(BCM_VK_DEINIT_TIME_MS);
+
+	if (special_reset) {
+		/* if it is special ramdump reset, return the type to user */
+		reset.arg2 = special_reset;
+		if (copy_to_user(arg, &reset, sizeof(reset)))
+			ret = -EFAULT;
+	} else {
+		ret = bcm_vk_reset_successful(vk);
+	}
+
+err_exit:
+	clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
+	return ret;
+}
+
+static int bcm_vk_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct bcm_vk_ctx *ctx = file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+	unsigned long pg_size;
+
+	/* only BAR2 is mmap possible, which is bar num 4 due to 64bit */
+#define VK_MMAPABLE_BAR 4
+
+	pg_size = ((pci_resource_len(vk->pdev, VK_MMAPABLE_BAR) - 1)
+		    >> PAGE_SHIFT) + 1;
+	if (vma->vm_pgoff + vma_pages(vma) > pg_size)
+		return -EINVAL;
+
+	vma->vm_pgoff += (pci_resource_start(vk->pdev, VK_MMAPABLE_BAR)
+			  >> PAGE_SHIFT);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+}
+
+static long bcm_vk_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long ret = -EINVAL;
+	struct bcm_vk_ctx *ctx = file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+	void __user *argp = (void __user *)arg;
+
+	dev_dbg(&vk->pdev->dev,
+		"ioctl, cmd=0x%02x, arg=0x%02lx\n",
+		cmd, arg);
+
+	mutex_lock(&vk->mutex);
+
+	switch (cmd) {
+	case VK_IOCTL_LOAD_IMAGE:
+		ret = bcm_vk_load_image(vk, argp);
+		break;
+
+	case VK_IOCTL_RESET:
+		ret = bcm_vk_reset(vk, argp);
+		break;
+
+	default:
+		break;
+	}
+
+	mutex_unlock(&vk->mutex);
+
+	return ret;
+}
+
+static const struct file_operations bcm_vk_fops = {
+	.owner = THIS_MODULE,
+	.open = bcm_vk_open,
+	.read = bcm_vk_read,
+	.write = bcm_vk_write,
+	.poll = bcm_vk_poll,
+	.release = bcm_vk_release,
+	.mmap = bcm_vk_mmap,
+	.unlocked_ioctl = bcm_vk_ioctl,
+};
+
+static int bcm_vk_on_panic(struct notifier_block *nb,
+			   unsigned long e, void *p)
+{
+	struct bcm_vk *vk = container_of(nb, struct bcm_vk, panic_nb);
+
+	bcm_to_v_reset_doorbell(vk, VK_BAR0_RESET_DB_HARD);
+
+	return 0;
+}
+
+static int bcm_vk_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int err;
+	int i;
+	int id;
+	int irq;
+	char name[20];
+	struct bcm_vk *vk;
+	struct device *dev = &pdev->dev;
+	struct miscdevice *misc_device;
+	u32 boot_status;
+
+	/* allocate vk structure which is tied to kref for freeing */
+	vk = kzalloc(sizeof(*vk), GFP_KERNEL);
+	if (!vk)
+		return -ENOMEM;
+
+	kref_init(&vk->kref);
+	if (nr_ib_sgl_blk > BCM_VK_IB_SGL_BLK_MAX) {
+		dev_warn(dev, "Inband SGL blk %d limited to max %d\n",
+			 nr_ib_sgl_blk, BCM_VK_IB_SGL_BLK_MAX);
+		nr_ib_sgl_blk = BCM_VK_IB_SGL_BLK_MAX;
+	}
+	vk->ib_sgl_size = nr_ib_sgl_blk * VK_MSGQ_BLK_SIZE;
+	mutex_init(&vk->mutex);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Cannot enable PCI device\n");
+		goto err_free_exit;
+	}
+	vk->pdev = pci_dev_get(pdev);
+
+	err = pci_request_regions(pdev, DRV_MODULE_NAME);
+	if (err) {
+		dev_err(dev, "Cannot obtain PCI resources\n");
+		goto err_disable_pdev;
+	}
+
+	/* make sure DMA is good */
+	err = dma_set_mask_and_coherent(&pdev->dev,
+					DMA_BIT_MASK(BCM_VK_DMA_BITS));
+	if (err) {
+		dev_err(dev, "failed to set DMA mask\n");
+		goto err_disable_pdev;
+	}
+
+	/* The tdma is a scratch area for some DMA testings. */
+	if (nr_scratch_pages) {
+		vk->tdma_vaddr = dma_alloc_coherent
+					(dev,
+					 nr_scratch_pages * PAGE_SIZE,
+					 &vk->tdma_addr, GFP_KERNEL);
+		if (!vk->tdma_vaddr) {
+			err = -ENOMEM;
+			goto err_disable_pdev;
+		}
+	}
+
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, vk);
+
+	irq = pci_alloc_irq_vectors(pdev,
+				    VK_MSIX_IRQ_MIN_REQ,
+				    VK_MSIX_IRQ_MAX,
+				    PCI_IRQ_MSI | PCI_IRQ_MSIX);
+
+	if (irq < VK_MSIX_IRQ_MIN_REQ) {
+		dev_err(dev, "failed to get min %d MSIX interrupts, irq(%d)\n",
+			VK_MSIX_IRQ_MIN_REQ, irq);
+		err = (irq >= 0) ? -EINVAL : irq;
+		goto err_disable_pdev;
+	}
+
+	if (irq != VK_MSIX_IRQ_MAX)
+		dev_warn(dev, "Number of IRQs %d allocated - requested(%d).\n",
+			 irq, VK_MSIX_IRQ_MAX);
+
+	for (i = 0; i < MAX_BAR; i++) {
+		/* multiple by 2 for 64 bit BAR mapping */
+		vk->bar[i] = pci_ioremap_bar(pdev, i * 2);
+		if (!vk->bar[i]) {
+			dev_err(dev, "failed to remap BAR%d\n", i);
+			err = -ENOMEM;
+			goto err_iounmap;
+		}
+	}
+
+	for (vk->num_irqs = 0;
+	     vk->num_irqs < VK_MSIX_MSGQ_MAX;
+	     vk->num_irqs++) {
+		err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
+				       bcm_vk_msgq_irqhandler,
+				       IRQF_SHARED, DRV_MODULE_NAME, vk);
+		if (err) {
+			dev_err(dev, "failed to request msgq IRQ %d for MSIX %d\n",
+				pdev->irq + vk->num_irqs, vk->num_irqs + 1);
+			goto err_irq;
+		}
+	}
+	/* one irq for notification from VK */
+	err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
+			       bcm_vk_notf_irqhandler,
+			       IRQF_SHARED, DRV_MODULE_NAME, vk);
+	if (err) {
+		dev_err(dev, "failed to request notf IRQ %d for MSIX %d\n",
+			pdev->irq + vk->num_irqs, vk->num_irqs + 1);
+		goto err_irq;
+	}
+	vk->num_irqs++;
+
+	for (i = 0;
+	     (i < VK_MSIX_TTY_MAX) && (vk->num_irqs < irq);
+	     i++, vk->num_irqs++) {
+		err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
+				       bcm_vk_tty_irqhandler,
+				       IRQF_SHARED, DRV_MODULE_NAME, vk);
+		if (err) {
+			dev_err(dev, "failed request tty IRQ %d for MSIX %d\n",
+				pdev->irq + vk->num_irqs, vk->num_irqs + 1);
+			goto err_irq;
+		}
+		bcm_vk_tty_set_irq_enabled(vk, i);
+	}
+
+	id = ida_alloc(&bcm_vk_ida, GFP_KERNEL);
+	if (id < 0) {
+		err = id;
+		dev_err(dev, "unable to get id\n");
+		goto err_irq;
+	}
+
+	vk->devid = id;
+	snprintf(name, sizeof(name), DRV_MODULE_NAME ".%d", id);
+	misc_device = &vk->miscdev;
+	misc_device->minor = MISC_DYNAMIC_MINOR;
+	misc_device->name = kstrdup(name, GFP_KERNEL);
+	if (!misc_device->name) {
+		err = -ENOMEM;
+		goto err_ida_remove;
+	}
+	misc_device->fops = &bcm_vk_fops,
+
+	err = misc_register(misc_device);
+	if (err) {
+		dev_err(dev, "failed to register device\n");
+		goto err_kfree_name;
+	}
+
+	INIT_WORK(&vk->wq_work, bcm_vk_wq_handler);
+
+	/* create dedicated workqueue */
+	vk->wq_thread = create_singlethread_workqueue(name);
+	if (!vk->wq_thread) {
+		dev_err(dev, "Fail to create workqueue thread\n");
+		err = -ENOMEM;
+		goto err_misc_deregister;
+	}
+
+	err = bcm_vk_msg_init(vk);
+	if (err) {
+		dev_err(dev, "failed to init msg queue info\n");
+		goto err_destroy_workqueue;
+	}
+
+	/* sync other info */
+	bcm_vk_sync_card_info(vk);
+
+	/* register for panic notifier */
+	vk->panic_nb.notifier_call = bcm_vk_on_panic;
+	err = atomic_notifier_chain_register(&panic_notifier_list,
+					     &vk->panic_nb);
+	if (err) {
+		dev_err(dev, "Fail to register panic notifier\n");
+		goto err_destroy_workqueue;
+	}
+
+	snprintf(name, sizeof(name), KBUILD_MODNAME ".%d_ttyVK", id);
+	err = bcm_vk_tty_init(vk, name);
+	if (err)
+		goto err_unregister_panic_notifier;
+
+	/*
+	 * lets trigger an auto download.  We don't want to do it serially here
+	 * because at probing time, it is not supposed to block for a long time.
+	 */
+	boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+	if (auto_load) {
+		if ((boot_status & BOOT_STATE_MASK) == BROM_RUNNING) {
+			err = bcm_vk_trigger_autoload(vk);
+			if (err)
+				goto err_bcm_vk_tty_exit;
+		} else {
+			dev_err(dev,
+				"Auto-load skipped - BROM not in proper state (0x%x)\n",
+				boot_status);
+		}
+	}
+
+	/* enable hb */
+	bcm_vk_hb_init(vk);
+
+	dev_dbg(dev, "BCM-VK:%u created\n", id);
+
+	return 0;
+
+err_bcm_vk_tty_exit:
+	bcm_vk_tty_exit(vk);
+
+err_unregister_panic_notifier:
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+					 &vk->panic_nb);
+
+err_destroy_workqueue:
+	destroy_workqueue(vk->wq_thread);
+
+err_misc_deregister:
+	misc_deregister(misc_device);
+
+err_kfree_name:
+	kfree(misc_device->name);
+	misc_device->name = NULL;
+
+err_ida_remove:
+	ida_free(&bcm_vk_ida, id);
+
+err_irq:
+	for (i = 0; i < vk->num_irqs; i++)
+		devm_free_irq(dev, pci_irq_vector(pdev, i), vk);
+
+	pci_disable_msix(pdev);
+	pci_disable_msi(pdev);
+
+err_iounmap:
+	for (i = 0; i < MAX_BAR; i++) {
+		if (vk->bar[i])
+			pci_iounmap(pdev, vk->bar[i]);
+	}
+	pci_release_regions(pdev);
+
+err_disable_pdev:
+	if (vk->tdma_vaddr)
+		dma_free_coherent(&pdev->dev, nr_scratch_pages * PAGE_SIZE,
+				  vk->tdma_vaddr, vk->tdma_addr);
+
+	pci_free_irq_vectors(pdev);
+	pci_disable_device(pdev);
+	pci_dev_put(pdev);
+
+err_free_exit:
+	kfree(vk);
+
+	return err;
+}
+
+void bcm_vk_release_data(struct kref *kref)
+{
+	struct bcm_vk *vk = container_of(kref, struct bcm_vk, kref);
+	struct pci_dev *pdev = vk->pdev;
+
+	dev_dbg(&pdev->dev, "BCM-VK:%d release data 0x%p\n", vk->devid, vk);
+	pci_dev_put(pdev);
+	kfree(vk);
+}
+
+static void bcm_vk_remove(struct pci_dev *pdev)
+{
+	int i;
+	struct bcm_vk *vk = pci_get_drvdata(pdev);
+	struct miscdevice *misc_device = &vk->miscdev;
+
+	bcm_vk_hb_deinit(vk);
+
+	/*
+	 * Trigger a reset to card and wait enough time for UCODE to rerun,
+	 * which re-initialize the card into its default state.
+	 * This ensures when driver is re-enumerated it will start from
+	 * a completely clean state.
+	 */
+	bcm_vk_trigger_reset(vk);
+	usleep_range(BCM_VK_UCODE_BOOT_US, BCM_VK_UCODE_BOOT_MAX_US);
+
+	/* unregister panic notifier */
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+					 &vk->panic_nb);
+
+	bcm_vk_msg_remove(vk);
+	bcm_vk_tty_exit(vk);
+
+	if (vk->tdma_vaddr)
+		dma_free_coherent(&pdev->dev, nr_scratch_pages * PAGE_SIZE,
+				  vk->tdma_vaddr, vk->tdma_addr);
+
+	/* remove if name is set which means misc dev registered */
+	if (misc_device->name) {
+		misc_deregister(misc_device);
+		kfree(misc_device->name);
+		ida_free(&bcm_vk_ida, vk->devid);
+	}
+	for (i = 0; i < vk->num_irqs; i++)
+		devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), vk);
+
+	pci_disable_msix(pdev);
+	pci_disable_msi(pdev);
+
+	cancel_work_sync(&vk->wq_work);
+	destroy_workqueue(vk->wq_thread);
+	bcm_vk_tty_wq_exit(vk);
+
+	for (i = 0; i < MAX_BAR; i++) {
+		if (vk->bar[i])
+			pci_iounmap(pdev, vk->bar[i]);
+	}
+
+	dev_dbg(&pdev->dev, "BCM-VK:%d released\n", vk->devid);
+
+	pci_release_regions(pdev);
+	pci_free_irq_vectors(pdev);
+	pci_disable_device(pdev);
+
+	kref_put(&vk->kref, bcm_vk_release_data);
+}
+
+static void bcm_vk_shutdown(struct pci_dev *pdev)
+{
+	struct bcm_vk *vk = pci_get_drvdata(pdev);
+	u32 reg, boot_stat;
+
+	reg = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+	boot_stat = reg & BOOT_STATE_MASK;
+
+	if (boot_stat == BOOT1_RUNNING) {
+		/* simply trigger a reset interrupt to park it */
+		bcm_vk_trigger_reset(vk);
+	} else if (boot_stat == BROM_NOT_RUN) {
+		int err;
+		u16 lnksta;
+
+		/*
+		 * The boot status only reflects boot condition since last reset
+		 * As ucode will run only once to configure pcie, if multiple
+		 * resets happen, we lost track if ucode has run or not.
+		 * Here, read the current link speed and use that to
+		 * sync up the bootstatus properly so that on reboot-back-up,
+		 * it has the proper state to start with autoload
+		 */
+		err = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
+		if (!err &&
+		    (lnksta & PCI_EXP_LNKSTA_CLS) != PCI_EXP_LNKSTA_CLS_2_5GB) {
+			reg |= BROM_STATUS_COMPLETE;
+			vkwrite32(vk, reg, BAR_0, BAR_BOOT_STATUS);
+		}
+	}
+}
+
+static const struct pci_device_id bcm_vk_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VALKYRIE), },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, bcm_vk_ids);
+
+static struct pci_driver pci_driver = {
+	.name     = DRV_MODULE_NAME,
+	.id_table = bcm_vk_ids,
+	.probe    = bcm_vk_probe,
+	.remove   = bcm_vk_remove,
+	.shutdown = bcm_vk_shutdown,
+};
+module_pci_driver(pci_driver);
+
+MODULE_DESCRIPTION("Broadcom VK Host Driver");
+MODULE_AUTHOR("Scott Branden <scott.branden@broadcom.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION("1.0");
diff --git a/drivers/misc/bcm-vk/bcm_vk_msg.c b/drivers/misc/bcm-vk/bcm_vk_msg.c
new file mode 100644
index 0000000000..e17d81231e
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_msg.c
@@ -0,0 +1,1352 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/hash.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/sizes.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+
+#include "bcm_vk.h"
+#include "bcm_vk_msg.h"
+#include "bcm_vk_sg.h"
+
+/* functions to manipulate the transport id in msg block */
+#define BCM_VK_MSG_Q_SHIFT	 4
+#define BCM_VK_MSG_Q_MASK	 0xF
+#define BCM_VK_MSG_ID_MASK	 0xFFF
+
+#define BCM_VK_DMA_DRAIN_MAX_MS	  2000
+
+/* number x q_size will be the max number of msg processed per loop */
+#define BCM_VK_MSG_PROC_MAX_LOOP 2
+
+/* module parameter */
+static bool hb_mon = true;
+module_param(hb_mon, bool, 0444);
+MODULE_PARM_DESC(hb_mon, "Monitoring heartbeat continuously.\n");
+static int batch_log = 1;
+module_param(batch_log, int, 0444);
+MODULE_PARM_DESC(batch_log, "Max num of logs per batch operation.\n");
+
+static bool hb_mon_is_on(void)
+{
+	return hb_mon;
+}
+
+static u32 get_q_num(const struct vk_msg_blk *msg)
+{
+	u32 q_num = msg->trans_id & BCM_VK_MSG_Q_MASK;
+
+	if (q_num >= VK_MSGQ_PER_CHAN_MAX)
+		q_num = VK_MSGQ_NUM_DEFAULT;
+	return q_num;
+}
+
+static void set_q_num(struct vk_msg_blk *msg, u32 q_num)
+{
+	u32 trans_q;
+
+	if (q_num >= VK_MSGQ_PER_CHAN_MAX)
+		trans_q = VK_MSGQ_NUM_DEFAULT;
+	else
+		trans_q = q_num;
+
+	msg->trans_id = (msg->trans_id & ~BCM_VK_MSG_Q_MASK) | trans_q;
+}
+
+static u32 get_msg_id(const struct vk_msg_blk *msg)
+{
+	return ((msg->trans_id >> BCM_VK_MSG_Q_SHIFT) & BCM_VK_MSG_ID_MASK);
+}
+
+static void set_msg_id(struct vk_msg_blk *msg, u32 val)
+{
+	msg->trans_id = (val << BCM_VK_MSG_Q_SHIFT) | get_q_num(msg);
+}
+
+static u32 msgq_inc(const struct bcm_vk_sync_qinfo *qinfo, u32 idx, u32 inc)
+{
+	return ((idx + inc) & qinfo->q_mask);
+}
+
+static
+struct vk_msg_blk __iomem *msgq_blk_addr(const struct bcm_vk_sync_qinfo *qinfo,
+					 u32 idx)
+{
+	return qinfo->q_start + (VK_MSGQ_BLK_SIZE * idx);
+}
+
+static u32 msgq_occupied(const struct bcm_vk_msgq __iomem *msgq,
+			 const struct bcm_vk_sync_qinfo *qinfo)
+{
+	u32 wr_idx, rd_idx;
+
+	wr_idx = readl_relaxed(&msgq->wr_idx);
+	rd_idx = readl_relaxed(&msgq->rd_idx);
+
+	return ((wr_idx - rd_idx) & qinfo->q_mask);
+}
+
+static
+u32 msgq_avail_space(const struct bcm_vk_msgq __iomem *msgq,
+		     const struct bcm_vk_sync_qinfo *qinfo)
+{
+	return (qinfo->q_size - msgq_occupied(msgq, qinfo) - 1);
+}
+
+/* number of retries when enqueue message fails before returning EAGAIN */
+#define BCM_VK_H2VK_ENQ_RETRY 10
+#define BCM_VK_H2VK_ENQ_RETRY_DELAY_MS 50
+
+bool bcm_vk_drv_access_ok(struct bcm_vk *vk)
+{
+	return (!!atomic_read(&vk->msgq_inited));
+}
+
+void bcm_vk_set_host_alert(struct bcm_vk *vk, u32 bit_mask)
+{
+	struct bcm_vk_alert *alert = &vk->host_alert;
+	unsigned long flags;
+
+	/* use irqsave version as this maybe called inside timer interrupt */
+	spin_lock_irqsave(&vk->host_alert_lock, flags);
+	alert->notfs |= bit_mask;
+	spin_unlock_irqrestore(&vk->host_alert_lock, flags);
+
+	if (test_and_set_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload) == 0)
+		queue_work(vk->wq_thread, &vk->wq_work);
+}
+
+/*
+ * Heartbeat related defines
+ * The heartbeat from host is a last resort.  If stuck condition happens
+ * on the card, firmware is supposed to detect it.  Therefore, the heartbeat
+ * values used will be more relaxed on the driver, which need to be bigger
+ * than the watchdog timeout on the card.  The watchdog timeout on the card
+ * is 20s, with a jitter of 2s => 22s.  We use a value of 27s here.
+ */
+#define BCM_VK_HB_TIMER_S 3
+#define BCM_VK_HB_TIMER_VALUE (BCM_VK_HB_TIMER_S * HZ)
+#define BCM_VK_HB_LOST_MAX (27 / BCM_VK_HB_TIMER_S)
+
+static void bcm_vk_hb_poll(struct work_struct *work)
+{
+	u32 uptime_s;
+	struct bcm_vk_hb_ctrl *hb = container_of(to_delayed_work(work), struct bcm_vk_hb_ctrl,
+						 work);
+	struct bcm_vk *vk = container_of(hb, struct bcm_vk, hb_ctrl);
+
+	if (bcm_vk_drv_access_ok(vk) && hb_mon_is_on()) {
+		/* read uptime from register and compare */
+		uptime_s = vkread32(vk, BAR_0, BAR_OS_UPTIME);
+
+		if (uptime_s == hb->last_uptime)
+			hb->lost_cnt++;
+		else /* reset to avoid accumulation */
+			hb->lost_cnt = 0;
+
+		dev_dbg(&vk->pdev->dev, "Last uptime %d current %d, lost %d\n",
+			hb->last_uptime, uptime_s, hb->lost_cnt);
+
+		/*
+		 * if the interface goes down without any activity, a value
+		 * of 0xFFFFFFFF will be continuously read, and the detection
+		 * will be happened eventually.
+		 */
+		hb->last_uptime = uptime_s;
+	} else {
+		/* reset heart beat lost cnt */
+		hb->lost_cnt = 0;
+	}
+
+	/* next, check if heartbeat exceeds limit */
+	if (hb->lost_cnt > BCM_VK_HB_LOST_MAX) {
+		dev_err(&vk->pdev->dev, "Heartbeat Misses %d times, %d s!\n",
+			BCM_VK_HB_LOST_MAX,
+			BCM_VK_HB_LOST_MAX * BCM_VK_HB_TIMER_S);
+
+		bcm_vk_blk_drv_access(vk);
+		bcm_vk_set_host_alert(vk, ERR_LOG_HOST_HB_FAIL);
+	}
+	/* re-arm timer */
+	schedule_delayed_work(&hb->work, BCM_VK_HB_TIMER_VALUE);
+}
+
+void bcm_vk_hb_init(struct bcm_vk *vk)
+{
+	struct bcm_vk_hb_ctrl *hb = &vk->hb_ctrl;
+
+	INIT_DELAYED_WORK(&hb->work, bcm_vk_hb_poll);
+	schedule_delayed_work(&hb->work, BCM_VK_HB_TIMER_VALUE);
+}
+
+void bcm_vk_hb_deinit(struct bcm_vk *vk)
+{
+	struct bcm_vk_hb_ctrl *hb = &vk->hb_ctrl;
+
+	cancel_delayed_work_sync(&hb->work);
+}
+
+static void bcm_vk_msgid_bitmap_clear(struct bcm_vk *vk,
+				      unsigned int start,
+				      unsigned int nbits)
+{
+	spin_lock(&vk->msg_id_lock);
+	bitmap_clear(vk->bmap, start, nbits);
+	spin_unlock(&vk->msg_id_lock);
+}
+
+/*
+ * allocate a ctx per file struct
+ */
+static struct bcm_vk_ctx *bcm_vk_get_ctx(struct bcm_vk *vk, const pid_t pid)
+{
+	u32 i;
+	struct bcm_vk_ctx *ctx = NULL;
+	u32 hash_idx = hash_32(pid, VK_PID_HT_SHIFT_BIT);
+
+	spin_lock(&vk->ctx_lock);
+
+	/* check if it is in reset, if so, don't allow */
+	if (vk->reset_pid) {
+		dev_err(&vk->pdev->dev,
+			"No context allowed during reset by pid %d\n",
+			vk->reset_pid);
+
+		goto in_reset_exit;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(vk->ctx); i++) {
+		if (!vk->ctx[i].in_use) {
+			vk->ctx[i].in_use = true;
+			ctx = &vk->ctx[i];
+			break;
+		}
+	}
+
+	if (!ctx) {
+		dev_err(&vk->pdev->dev, "All context in use\n");
+
+		goto all_in_use_exit;
+	}
+
+	/* set the pid and insert it to hash table */
+	ctx->pid = pid;
+	ctx->hash_idx = hash_idx;
+	list_add_tail(&ctx->node, &vk->pid_ht[hash_idx].head);
+
+	/* increase kref */
+	kref_get(&vk->kref);
+
+	/* clear counter */
+	atomic_set(&ctx->pend_cnt, 0);
+	atomic_set(&ctx->dma_cnt, 0);
+	init_waitqueue_head(&ctx->rd_wq);
+
+all_in_use_exit:
+in_reset_exit:
+	spin_unlock(&vk->ctx_lock);
+
+	return ctx;
+}
+
+static u16 bcm_vk_get_msg_id(struct bcm_vk *vk)
+{
+	u16 rc = VK_MSG_ID_OVERFLOW;
+	u16 test_bit_count = 0;
+
+	spin_lock(&vk->msg_id_lock);
+	while (test_bit_count < (VK_MSG_ID_BITMAP_SIZE - 1)) {
+		/*
+		 * first time come in this loop, msg_id will be 0
+		 * and the first one tested will be 1.  We skip
+		 * VK_SIMPLEX_MSG_ID (0) for one way host2vk
+		 * communication
+		 */
+		vk->msg_id++;
+		if (vk->msg_id == VK_MSG_ID_BITMAP_SIZE)
+			vk->msg_id = 1;
+
+		if (test_bit(vk->msg_id, vk->bmap)) {
+			test_bit_count++;
+			continue;
+		}
+		rc = vk->msg_id;
+		bitmap_set(vk->bmap, vk->msg_id, 1);
+		break;
+	}
+	spin_unlock(&vk->msg_id_lock);
+
+	return rc;
+}
+
+static int bcm_vk_free_ctx(struct bcm_vk *vk, struct bcm_vk_ctx *ctx)
+{
+	u32 idx;
+	u32 hash_idx;
+	pid_t pid;
+	struct bcm_vk_ctx *entry;
+	int count = 0;
+
+	if (!ctx) {
+		dev_err(&vk->pdev->dev, "NULL context detected\n");
+		return -EINVAL;
+	}
+	idx = ctx->idx;
+	pid = ctx->pid;
+
+	spin_lock(&vk->ctx_lock);
+
+	if (!vk->ctx[idx].in_use) {
+		dev_err(&vk->pdev->dev, "context[%d] not in use!\n", idx);
+	} else {
+		vk->ctx[idx].in_use = false;
+		vk->ctx[idx].miscdev = NULL;
+
+		/* Remove it from hash list and see if it is the last one. */
+		list_del(&ctx->node);
+		hash_idx = ctx->hash_idx;
+		list_for_each_entry(entry, &vk->pid_ht[hash_idx].head, node) {
+			if (entry->pid == pid)
+				count++;
+		}
+	}
+
+	spin_unlock(&vk->ctx_lock);
+
+	return count;
+}
+
+static void bcm_vk_free_wkent(struct device *dev, struct bcm_vk_wkent *entry)
+{
+	int proc_cnt;
+
+	bcm_vk_sg_free(dev, entry->dma, VK_DMA_MAX_ADDRS, &proc_cnt);
+	if (proc_cnt)
+		atomic_dec(&entry->ctx->dma_cnt);
+
+	kfree(entry->to_h_msg);
+	kfree(entry);
+}
+
+static void bcm_vk_drain_all_pend(struct device *dev,
+				  struct bcm_vk_msg_chan *chan,
+				  struct bcm_vk_ctx *ctx)
+{
+	u32 num;
+	struct bcm_vk_wkent *entry, *tmp;
+	struct bcm_vk *vk;
+	struct list_head del_q;
+
+	if (ctx)
+		vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+
+	INIT_LIST_HEAD(&del_q);
+	spin_lock(&chan->pendq_lock);
+	for (num = 0; num < chan->q_nr; num++) {
+		list_for_each_entry_safe(entry, tmp, &chan->pendq[num], node) {
+			if ((!ctx) || (entry->ctx->idx == ctx->idx)) {
+				list_move_tail(&entry->node, &del_q);
+			}
+		}
+	}
+	spin_unlock(&chan->pendq_lock);
+
+	/* batch clean up */
+	num = 0;
+	list_for_each_entry_safe(entry, tmp, &del_q, node) {
+		list_del(&entry->node);
+		num++;
+		if (ctx) {
+			struct vk_msg_blk *msg;
+			int bit_set;
+			bool responded;
+			u32 msg_id;
+
+			/* if it is specific ctx, log for any stuck */
+			msg = entry->to_v_msg;
+			msg_id = get_msg_id(msg);
+			bit_set = test_bit(msg_id, vk->bmap);
+			responded = entry->to_h_msg ? true : false;
+			if (num <= batch_log)
+				dev_info(dev,
+					 "Drained: fid %u size %u msg 0x%x(seq-%x) ctx 0x%x[fd-%d] args:[0x%x 0x%x] resp %s, bmap %d\n",
+					 msg->function_id, msg->size,
+					 msg_id, entry->seq_num,
+					 msg->context_id, entry->ctx->idx,
+					 msg->cmd, msg->arg,
+					 responded ? "T" : "F", bit_set);
+			if (responded)
+				atomic_dec(&ctx->pend_cnt);
+			else if (bit_set)
+				bcm_vk_msgid_bitmap_clear(vk, msg_id, 1);
+		}
+		bcm_vk_free_wkent(dev, entry);
+	}
+	if (num && ctx)
+		dev_info(dev, "Total drained items %d [fd-%d]\n",
+			 num, ctx->idx);
+}
+
+void bcm_vk_drain_msg_on_reset(struct bcm_vk *vk)
+{
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_v_msg_chan, NULL);
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_h_msg_chan, NULL);
+}
+
+/*
+ * Function to sync up the messages queue info that is provided by BAR1
+ */
+int bcm_vk_sync_msgq(struct bcm_vk *vk, bool force_sync)
+{
+	struct bcm_vk_msgq __iomem *msgq;
+	struct device *dev = &vk->pdev->dev;
+	u32 msgq_off;
+	u32 num_q;
+	struct bcm_vk_msg_chan *chan_list[] = {&vk->to_v_msg_chan,
+					       &vk->to_h_msg_chan};
+	struct bcm_vk_msg_chan *chan;
+	int i, j;
+	int ret = 0;
+
+	/*
+	 * If the driver is loaded at startup where vk OS is not up yet,
+	 * the msgq-info may not be available until a later time.  In
+	 * this case, we skip and the sync function is supposed to be
+	 * called again.
+	 */
+	if (!bcm_vk_msgq_marker_valid(vk)) {
+		dev_info(dev, "BAR1 msgq marker not initialized.\n");
+		return -EAGAIN;
+	}
+
+	msgq_off = vkread32(vk, BAR_1, VK_BAR1_MSGQ_CTRL_OFF);
+
+	/* each side is always half the total  */
+	num_q = vkread32(vk, BAR_1, VK_BAR1_MSGQ_NR) / 2;
+	if (!num_q || (num_q > VK_MSGQ_PER_CHAN_MAX)) {
+		dev_err(dev,
+			"Advertised msgq %d error - max %d allowed\n",
+			num_q, VK_MSGQ_PER_CHAN_MAX);
+		return -EINVAL;
+	}
+
+	vk->to_v_msg_chan.q_nr = num_q;
+	vk->to_h_msg_chan.q_nr = num_q;
+
+	/* first msgq location */
+	msgq = vk->bar[BAR_1] + msgq_off;
+
+	/*
+	 * if this function is called when it is already inited,
+	 * something is wrong
+	 */
+	if (bcm_vk_drv_access_ok(vk) && !force_sync) {
+		dev_err(dev, "Msgq info already in sync\n");
+		return -EPERM;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(chan_list); i++) {
+		chan = chan_list[i];
+		memset(chan->sync_qinfo, 0, sizeof(chan->sync_qinfo));
+
+		for (j = 0; j < num_q; j++) {
+			struct bcm_vk_sync_qinfo *qinfo;
+			u32 msgq_start;
+			u32 msgq_size;
+			u32 msgq_nxt;
+			u32 msgq_db_offset, q_db_offset;
+
+			chan->msgq[j] = msgq;
+			msgq_start = readl_relaxed(&msgq->start);
+			msgq_size = readl_relaxed(&msgq->size);
+			msgq_nxt = readl_relaxed(&msgq->nxt);
+			msgq_db_offset = readl_relaxed(&msgq->db_offset);
+			q_db_offset = (msgq_db_offset & ((1 << DB_SHIFT) - 1));
+			if (q_db_offset  == (~msgq_db_offset >> DB_SHIFT))
+				msgq_db_offset = q_db_offset;
+			else
+				/* fall back to default */
+				msgq_db_offset = VK_BAR0_Q_DB_BASE(j);
+
+			dev_info(dev,
+				 "MsgQ[%d] type %d num %d, @ 0x%x, db_offset 0x%x rd_idx %d wr_idx %d, size %d, nxt 0x%x\n",
+				 j,
+				 readw_relaxed(&msgq->type),
+				 readw_relaxed(&msgq->num),
+				 msgq_start,
+				 msgq_db_offset,
+				 readl_relaxed(&msgq->rd_idx),
+				 readl_relaxed(&msgq->wr_idx),
+				 msgq_size,
+				 msgq_nxt);
+
+			qinfo = &chan->sync_qinfo[j];
+			/* formulate and record static info */
+			qinfo->q_start = vk->bar[BAR_1] + msgq_start;
+			qinfo->q_size = msgq_size;
+			/* set low threshold as 50% or 1/2 */
+			qinfo->q_low = qinfo->q_size >> 1;
+			qinfo->q_mask = qinfo->q_size - 1;
+			qinfo->q_db_offset = msgq_db_offset;
+
+			msgq++;
+		}
+	}
+	atomic_set(&vk->msgq_inited, 1);
+
+	return ret;
+}
+
+static int bcm_vk_msg_chan_init(struct bcm_vk_msg_chan *chan)
+{
+	u32 i;
+
+	mutex_init(&chan->msgq_mutex);
+	spin_lock_init(&chan->pendq_lock);
+	for (i = 0; i < VK_MSGQ_MAX_NR; i++)
+		INIT_LIST_HEAD(&chan->pendq[i]);
+
+	return 0;
+}
+
+static void bcm_vk_append_pendq(struct bcm_vk_msg_chan *chan, u16 q_num,
+				struct bcm_vk_wkent *entry)
+{
+	struct bcm_vk_ctx *ctx;
+
+	spin_lock(&chan->pendq_lock);
+	list_add_tail(&entry->node, &chan->pendq[q_num]);
+	if (entry->to_h_msg) {
+		ctx = entry->ctx;
+		atomic_inc(&ctx->pend_cnt);
+		wake_up_interruptible(&ctx->rd_wq);
+	}
+	spin_unlock(&chan->pendq_lock);
+}
+
+static u32 bcm_vk_append_ib_sgl(struct bcm_vk *vk,
+				struct bcm_vk_wkent *entry,
+				struct _vk_data *data,
+				unsigned int num_planes)
+{
+	unsigned int i;
+	unsigned int item_cnt = 0;
+	struct device *dev = &vk->pdev->dev;
+	struct bcm_vk_msg_chan *chan = &vk->to_v_msg_chan;
+	struct vk_msg_blk *msg = &entry->to_v_msg[0];
+	struct bcm_vk_msgq __iomem *msgq;
+	struct bcm_vk_sync_qinfo *qinfo;
+	u32 ib_sgl_size = 0;
+	u8 *buf = (u8 *)&entry->to_v_msg[entry->to_v_blks];
+	u32 avail;
+	u32 q_num;
+
+	/* check if high watermark is hit, and if so, skip */
+	q_num = get_q_num(msg);
+	msgq = chan->msgq[q_num];
+	qinfo = &chan->sync_qinfo[q_num];
+	avail = msgq_avail_space(msgq, qinfo);
+	if (avail < qinfo->q_low) {
+		dev_dbg(dev, "Skip inserting inband SGL, [0x%x/0x%x]\n",
+			avail, qinfo->q_size);
+		return 0;
+	}
+
+	for (i = 0; i < num_planes; i++) {
+		if (data[i].address &&
+		    (ib_sgl_size + data[i].size) <= vk->ib_sgl_size) {
+			item_cnt++;
+			memcpy(buf, entry->dma[i].sglist, data[i].size);
+			ib_sgl_size += data[i].size;
+			buf += data[i].size;
+		}
+	}
+
+	dev_dbg(dev, "Num %u sgl items appended, size 0x%x, room 0x%x\n",
+		item_cnt, ib_sgl_size, vk->ib_sgl_size);
+
+	/* round up size */
+	ib_sgl_size = (ib_sgl_size + VK_MSGQ_BLK_SIZE - 1)
+		       >> VK_MSGQ_BLK_SZ_SHIFT;
+
+	return ib_sgl_size;
+}
+
+void bcm_to_v_q_doorbell(struct bcm_vk *vk, u32 q_num, u32 db_val)
+{
+	struct bcm_vk_msg_chan *chan = &vk->to_v_msg_chan;
+	struct bcm_vk_sync_qinfo *qinfo = &chan->sync_qinfo[q_num];
+
+	vkwrite32(vk, db_val, BAR_0, qinfo->q_db_offset);
+}
+
+static int bcm_to_v_msg_enqueue(struct bcm_vk *vk, struct bcm_vk_wkent *entry)
+{
+	static u32 seq_num;
+	struct bcm_vk_msg_chan *chan = &vk->to_v_msg_chan;
+	struct device *dev = &vk->pdev->dev;
+	struct vk_msg_blk *src = &entry->to_v_msg[0];
+
+	struct vk_msg_blk __iomem *dst;
+	struct bcm_vk_msgq __iomem *msgq;
+	struct bcm_vk_sync_qinfo *qinfo;
+	u32 q_num = get_q_num(src);
+	u32 wr_idx; /* local copy */
+	u32 i;
+	u32 avail;
+	u32 retry;
+
+	if (entry->to_v_blks != src->size + 1) {
+		dev_err(dev, "number of blks %d not matching %d MsgId[0x%x]: func %d ctx 0x%x\n",
+			entry->to_v_blks,
+			src->size + 1,
+			get_msg_id(src),
+			src->function_id,
+			src->context_id);
+		return -EMSGSIZE;
+	}
+
+	msgq = chan->msgq[q_num];
+	qinfo = &chan->sync_qinfo[q_num];
+
+	mutex_lock(&chan->msgq_mutex);
+
+	avail = msgq_avail_space(msgq, qinfo);
+
+	/* if not enough space, return EAGAIN and let app handles it */
+	retry = 0;
+	while ((avail < entry->to_v_blks) &&
+	       (retry++ < BCM_VK_H2VK_ENQ_RETRY)) {
+		mutex_unlock(&chan->msgq_mutex);
+
+		msleep(BCM_VK_H2VK_ENQ_RETRY_DELAY_MS);
+		mutex_lock(&chan->msgq_mutex);
+		avail = msgq_avail_space(msgq, qinfo);
+	}
+	if (retry > BCM_VK_H2VK_ENQ_RETRY) {
+		mutex_unlock(&chan->msgq_mutex);
+		return -EAGAIN;
+	}
+
+	/* at this point, mutex is taken and there is enough space */
+	entry->seq_num = seq_num++; /* update debug seq number */
+	wr_idx = readl_relaxed(&msgq->wr_idx);
+
+	if (wr_idx >= qinfo->q_size) {
+		dev_crit(dev, "Invalid wr_idx 0x%x => max 0x%x!",
+			 wr_idx, qinfo->q_size);
+		bcm_vk_blk_drv_access(vk);
+		bcm_vk_set_host_alert(vk, ERR_LOG_HOST_PCIE_DWN);
+		goto idx_err;
+	}
+
+	dst = msgq_blk_addr(qinfo, wr_idx);
+	for (i = 0; i < entry->to_v_blks; i++) {
+		memcpy_toio(dst, src, sizeof(*dst));
+
+		src++;
+		wr_idx = msgq_inc(qinfo, wr_idx, 1);
+		dst = msgq_blk_addr(qinfo, wr_idx);
+	}
+
+	/* flush the write pointer */
+	writel(wr_idx, &msgq->wr_idx);
+
+	/* log new info for debugging */
+	dev_dbg(dev,
+		"MsgQ[%d] [Rd Wr] = [%d %d] blks inserted %d - Q = [u-%d a-%d]/%d\n",
+		readl_relaxed(&msgq->num),
+		readl_relaxed(&msgq->rd_idx),
+		wr_idx,
+		entry->to_v_blks,
+		msgq_occupied(msgq, qinfo),
+		msgq_avail_space(msgq, qinfo),
+		readl_relaxed(&msgq->size));
+	/*
+	 * press door bell based on queue number. 1 is added to the wr_idx
+	 * to avoid the value of 0 appearing on the VK side to distinguish
+	 * from initial value.
+	 */
+	bcm_to_v_q_doorbell(vk, q_num, wr_idx + 1);
+idx_err:
+	mutex_unlock(&chan->msgq_mutex);
+	return 0;
+}
+
+int bcm_vk_send_shutdown_msg(struct bcm_vk *vk, u32 shut_type,
+			     const pid_t pid, const u32 q_num)
+{
+	int rc = 0;
+	struct bcm_vk_wkent *entry;
+	struct device *dev = &vk->pdev->dev;
+
+	/*
+	 * check if the marker is still good.  Sometimes, the PCIe interface may
+	 * have gone done, and if so and we ship down thing based on broken
+	 * values, kernel may panic.
+	 */
+	if (!bcm_vk_msgq_marker_valid(vk)) {
+		dev_info(dev, "PCIe comm chan - invalid marker (0x%x)!\n",
+			 vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY));
+		return -EINVAL;
+	}
+
+	entry = kzalloc(struct_size(entry, to_v_msg, 1), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	/* fill up necessary data */
+	entry->to_v_msg[0].function_id = VK_FID_SHUTDOWN;
+	set_q_num(&entry->to_v_msg[0], q_num);
+	set_msg_id(&entry->to_v_msg[0], VK_SIMPLEX_MSG_ID);
+	entry->to_v_blks = 1; /* always 1 block */
+
+	entry->to_v_msg[0].cmd = shut_type;
+	entry->to_v_msg[0].arg = pid;
+
+	rc = bcm_to_v_msg_enqueue(vk, entry);
+	if (rc)
+		dev_err(dev,
+			"Sending shutdown message to q %d for pid %d fails.\n",
+			get_q_num(&entry->to_v_msg[0]), pid);
+
+	kfree(entry);
+
+	return rc;
+}
+
+static int bcm_vk_handle_last_sess(struct bcm_vk *vk, const pid_t pid,
+				   const u32 q_num)
+{
+	int rc = 0;
+	struct device *dev = &vk->pdev->dev;
+
+	/*
+	 * don't send down or do anything if message queue is not initialized
+	 * and if it is the reset session, clear it.
+	 */
+	if (!bcm_vk_drv_access_ok(vk)) {
+		if (vk->reset_pid == pid)
+			vk->reset_pid = 0;
+		return -EPERM;
+	}
+
+	dev_dbg(dev, "No more sessions, shut down pid %d\n", pid);
+
+	/* only need to do it if it is not the reset process */
+	if (vk->reset_pid != pid)
+		rc = bcm_vk_send_shutdown_msg(vk, VK_SHUTDOWN_PID, pid, q_num);
+	else
+		/* put reset_pid to 0 if it is exiting last session */
+		vk->reset_pid = 0;
+
+	return rc;
+}
+
+static struct bcm_vk_wkent *bcm_vk_dequeue_pending(struct bcm_vk *vk,
+						   struct bcm_vk_msg_chan *chan,
+						   u16 q_num,
+						   u16 msg_id)
+{
+	struct bcm_vk_wkent *entry = NULL, *iter;
+
+	spin_lock(&chan->pendq_lock);
+	list_for_each_entry(iter, &chan->pendq[q_num], node) {
+		if (get_msg_id(&iter->to_v_msg[0]) == msg_id) {
+			list_del(&iter->node);
+			entry = iter;
+			bcm_vk_msgid_bitmap_clear(vk, msg_id, 1);
+			break;
+		}
+	}
+	spin_unlock(&chan->pendq_lock);
+	return entry;
+}
+
+s32 bcm_to_h_msg_dequeue(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	struct bcm_vk_msg_chan *chan = &vk->to_h_msg_chan;
+	struct vk_msg_blk *data;
+	struct vk_msg_blk __iomem *src;
+	struct vk_msg_blk *dst;
+	struct bcm_vk_msgq __iomem *msgq;
+	struct bcm_vk_sync_qinfo *qinfo;
+	struct bcm_vk_wkent *entry;
+	u32 rd_idx, wr_idx;
+	u32 q_num, msg_id, j;
+	u32 num_blks;
+	s32 total = 0;
+	int cnt = 0;
+	int msg_processed = 0;
+	int max_msg_to_process;
+	bool exit_loop;
+
+	/*
+	 * drain all the messages from the queues, and find its pending
+	 * entry in the to_v queue, based on msg_id & q_num, and move the
+	 * entry to the to_h pending queue, waiting for user space
+	 * program to extract
+	 */
+	mutex_lock(&chan->msgq_mutex);
+
+	for (q_num = 0; q_num < chan->q_nr; q_num++) {
+		msgq = chan->msgq[q_num];
+		qinfo = &chan->sync_qinfo[q_num];
+		max_msg_to_process = BCM_VK_MSG_PROC_MAX_LOOP * qinfo->q_size;
+
+		rd_idx = readl_relaxed(&msgq->rd_idx);
+		wr_idx = readl_relaxed(&msgq->wr_idx);
+		msg_processed = 0;
+		exit_loop = false;
+		while ((rd_idx != wr_idx) && !exit_loop) {
+			u8 src_size;
+
+			/*
+			 * Make a local copy and get pointer to src blk
+			 * The rd_idx is masked before getting the pointer to
+			 * avoid out of bound access in case the interface goes
+			 * down.  It will end up pointing to the last block in
+			 * the buffer, but subsequent src->size check would be
+			 * able to catch this.
+			 */
+			src = msgq_blk_addr(qinfo, rd_idx & qinfo->q_mask);
+			src_size = readb(&src->size);
+
+			if ((rd_idx >= qinfo->q_size) ||
+			    (src_size > (qinfo->q_size - 1))) {
+				dev_crit(dev,
+					 "Invalid rd_idx 0x%x or size 0x%x => max 0x%x!",
+					 rd_idx, src_size, qinfo->q_size);
+				bcm_vk_blk_drv_access(vk);
+				bcm_vk_set_host_alert(vk,
+						      ERR_LOG_HOST_PCIE_DWN);
+				goto idx_err;
+			}
+
+			num_blks = src_size + 1;
+			data = kzalloc(num_blks * VK_MSGQ_BLK_SIZE, GFP_KERNEL);
+			if (data) {
+				/* copy messages and linearize it */
+				dst = data;
+				for (j = 0; j < num_blks; j++) {
+					memcpy_fromio(dst, src, sizeof(*dst));
+
+					dst++;
+					rd_idx = msgq_inc(qinfo, rd_idx, 1);
+					src = msgq_blk_addr(qinfo, rd_idx);
+				}
+				total++;
+			} else {
+				/*
+				 * if we could not allocate memory in kernel,
+				 * that is fatal.
+				 */
+				dev_crit(dev, "Kernel mem allocation failure.\n");
+				total = -ENOMEM;
+				goto idx_err;
+			}
+
+			/* flush rd pointer after a message is dequeued */
+			writel(rd_idx, &msgq->rd_idx);
+
+			/* log new info for debugging */
+			dev_dbg(dev,
+				"MsgQ[%d] [Rd Wr] = [%d %d] blks extracted %d - Q = [u-%d a-%d]/%d\n",
+				readl_relaxed(&msgq->num),
+				rd_idx,
+				wr_idx,
+				num_blks,
+				msgq_occupied(msgq, qinfo),
+				msgq_avail_space(msgq, qinfo),
+				readl_relaxed(&msgq->size));
+
+			/*
+			 * No need to search if it is an autonomous one-way
+			 * message from driver, as these messages do not bear
+			 * a to_v pending item. Currently, only the shutdown
+			 * message falls into this category.
+			 */
+			if (data->function_id == VK_FID_SHUTDOWN) {
+				kfree(data);
+				continue;
+			}
+
+			msg_id = get_msg_id(data);
+			/* lookup original message in to_v direction */
+			entry = bcm_vk_dequeue_pending(vk,
+						       &vk->to_v_msg_chan,
+						       q_num,
+						       msg_id);
+
+			/*
+			 * if there is message to does not have prior send,
+			 * this is the location to add here
+			 */
+			if (entry) {
+				entry->to_h_blks = num_blks;
+				entry->to_h_msg = data;
+				bcm_vk_append_pendq(&vk->to_h_msg_chan,
+						    q_num, entry);
+
+			} else {
+				if (cnt++ < batch_log)
+					dev_info(dev,
+						 "Could not find MsgId[0x%x] for resp func %d bmap %d\n",
+						 msg_id, data->function_id,
+						 test_bit(msg_id, vk->bmap));
+				kfree(data);
+			}
+			/* Fetch wr_idx to handle more back-to-back events */
+			wr_idx = readl(&msgq->wr_idx);
+
+			/*
+			 * cap the max so that even we try to handle more back-to-back events,
+			 * so that it won't hold CPU too long or in case rd/wr idexes are
+			 * corrupted which triggers infinite looping.
+			 */
+			if (++msg_processed >= max_msg_to_process) {
+				dev_warn(dev, "Q[%d] Per loop processing exceeds %d\n",
+					 q_num, max_msg_to_process);
+				exit_loop = true;
+			}
+		}
+	}
+idx_err:
+	mutex_unlock(&chan->msgq_mutex);
+	dev_dbg(dev, "total %d drained from queues\n", total);
+
+	return total;
+}
+
+/*
+ * init routine for all required data structures
+ */
+static int bcm_vk_data_init(struct bcm_vk *vk)
+{
+	int i;
+
+	spin_lock_init(&vk->ctx_lock);
+	for (i = 0; i < ARRAY_SIZE(vk->ctx); i++) {
+		vk->ctx[i].in_use = false;
+		vk->ctx[i].idx = i;	/* self identity */
+		vk->ctx[i].miscdev = NULL;
+	}
+	spin_lock_init(&vk->msg_id_lock);
+	spin_lock_init(&vk->host_alert_lock);
+	vk->msg_id = 0;
+
+	/* initialize hash table */
+	for (i = 0; i < VK_PID_HT_SZ; i++)
+		INIT_LIST_HEAD(&vk->pid_ht[i].head);
+
+	return 0;
+}
+
+irqreturn_t bcm_vk_msgq_irqhandler(int irq, void *dev_id)
+{
+	struct bcm_vk *vk = dev_id;
+
+	if (!bcm_vk_drv_access_ok(vk)) {
+		dev_err(&vk->pdev->dev,
+			"Interrupt %d received when msgq not inited\n", irq);
+		goto skip_schedule_work;
+	}
+
+	queue_work(vk->wq_thread, &vk->wq_work);
+
+skip_schedule_work:
+	return IRQ_HANDLED;
+}
+
+int bcm_vk_open(struct inode *inode, struct file *p_file)
+{
+	struct bcm_vk_ctx *ctx;
+	struct miscdevice *miscdev = (struct miscdevice *)p_file->private_data;
+	struct bcm_vk *vk = container_of(miscdev, struct bcm_vk, miscdev);
+	struct device *dev = &vk->pdev->dev;
+	int rc = 0;
+
+	/* get a context and set it up for file */
+	ctx = bcm_vk_get_ctx(vk, task_tgid_nr(current));
+	if (!ctx) {
+		dev_err(dev, "Error allocating context\n");
+		rc = -ENOMEM;
+	} else {
+		/*
+		 * set up context and replace private data with context for
+		 * other methods to use.  Reason for the context is because
+		 * it is allowed for multiple sessions to open the sysfs, and
+		 * for each file open, when upper layer query the response,
+		 * only those that are tied to a specific open should be
+		 * returned.  The context->idx will be used for such binding
+		 */
+		ctx->miscdev = miscdev;
+		p_file->private_data = ctx;
+		dev_dbg(dev, "ctx_returned with idx %d, pid %d\n",
+			ctx->idx, ctx->pid);
+	}
+	return rc;
+}
+
+ssize_t bcm_vk_read(struct file *p_file,
+		    char __user *buf,
+		    size_t count,
+		    loff_t *f_pos)
+{
+	ssize_t rc = -ENOMSG;
+	struct bcm_vk_ctx *ctx = p_file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk,
+					 miscdev);
+	struct device *dev = &vk->pdev->dev;
+	struct bcm_vk_msg_chan *chan = &vk->to_h_msg_chan;
+	struct bcm_vk_wkent *entry = NULL, *iter;
+	u32 q_num;
+	u32 rsp_length;
+
+	if (!bcm_vk_drv_access_ok(vk))
+		return -EPERM;
+
+	dev_dbg(dev, "Buf count %zu\n", count);
+
+	/*
+	 * search through the pendq on the to_h chan, and return only those
+	 * that belongs to the same context.  Search is always from the high to
+	 * the low priority queues
+	 */
+	spin_lock(&chan->pendq_lock);
+	for (q_num = 0; q_num < chan->q_nr; q_num++) {
+		list_for_each_entry(iter, &chan->pendq[q_num], node) {
+			if (iter->ctx->idx == ctx->idx) {
+				if (count >=
+				    (iter->to_h_blks * VK_MSGQ_BLK_SIZE)) {
+					list_del(&iter->node);
+					atomic_dec(&ctx->pend_cnt);
+					entry = iter;
+				} else {
+					/* buffer not big enough */
+					rc = -EMSGSIZE;
+				}
+				goto read_loop_exit;
+			}
+		}
+	}
+read_loop_exit:
+	spin_unlock(&chan->pendq_lock);
+
+	if (entry) {
+		/* retrieve the passed down msg_id */
+		set_msg_id(&entry->to_h_msg[0], entry->usr_msg_id);
+		rsp_length = entry->to_h_blks * VK_MSGQ_BLK_SIZE;
+		if (copy_to_user(buf, entry->to_h_msg, rsp_length) == 0)
+			rc = rsp_length;
+
+		bcm_vk_free_wkent(dev, entry);
+	} else if (rc == -EMSGSIZE) {
+		struct vk_msg_blk tmp_msg = entry->to_h_msg[0];
+
+		/*
+		 * in this case, return just the first block, so
+		 * that app knows what size it is looking for.
+		 */
+		set_msg_id(&tmp_msg, entry->usr_msg_id);
+		tmp_msg.size = entry->to_h_blks - 1;
+		if (copy_to_user(buf, &tmp_msg, VK_MSGQ_BLK_SIZE) != 0) {
+			dev_err(dev, "Error return 1st block in -EMSGSIZE\n");
+			rc = -EFAULT;
+		}
+	}
+	return rc;
+}
+
+ssize_t bcm_vk_write(struct file *p_file,
+		     const char __user *buf,
+		     size_t count,
+		     loff_t *f_pos)
+{
+	ssize_t rc;
+	struct bcm_vk_ctx *ctx = p_file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk,
+					 miscdev);
+	struct bcm_vk_msgq __iomem *msgq;
+	struct device *dev = &vk->pdev->dev;
+	struct bcm_vk_wkent *entry;
+	u32 sgl_extra_blks;
+	u32 q_num;
+	u32 msg_size;
+	u32 msgq_size;
+
+	if (!bcm_vk_drv_access_ok(vk))
+		return -EPERM;
+
+	dev_dbg(dev, "Msg count %zu\n", count);
+
+	/* first, do sanity check where count should be multiple of basic blk */
+	if (count & (VK_MSGQ_BLK_SIZE - 1)) {
+		dev_err(dev, "Failure with size %zu not multiple of %zu\n",
+			count, VK_MSGQ_BLK_SIZE);
+		rc = -EINVAL;
+		goto write_err;
+	}
+
+	/* allocate the work entry + buffer for size count and inband sgl */
+	entry = kzalloc(sizeof(*entry) + count + vk->ib_sgl_size,
+			GFP_KERNEL);
+	if (!entry) {
+		rc = -ENOMEM;
+		goto write_err;
+	}
+
+	/* now copy msg from user space, and then formulate the work entry */
+	if (copy_from_user(&entry->to_v_msg[0], buf, count)) {
+		rc = -EFAULT;
+		goto write_free_ent;
+	}
+
+	entry->to_v_blks = count >> VK_MSGQ_BLK_SZ_SHIFT;
+	entry->ctx = ctx;
+
+	/* do a check on the blk size which could not exceed queue space */
+	q_num = get_q_num(&entry->to_v_msg[0]);
+	msgq = vk->to_v_msg_chan.msgq[q_num];
+	msgq_size = readl_relaxed(&msgq->size);
+	if (entry->to_v_blks + (vk->ib_sgl_size >> VK_MSGQ_BLK_SZ_SHIFT)
+	    > (msgq_size - 1)) {
+		dev_err(dev, "Blk size %d exceed max queue size allowed %d\n",
+			entry->to_v_blks, msgq_size - 1);
+		rc = -EINVAL;
+		goto write_free_ent;
+	}
+
+	/* Use internal message id */
+	entry->usr_msg_id = get_msg_id(&entry->to_v_msg[0]);
+	rc = bcm_vk_get_msg_id(vk);
+	if (rc == VK_MSG_ID_OVERFLOW) {
+		dev_err(dev, "msg_id overflow\n");
+		rc = -EOVERFLOW;
+		goto write_free_ent;
+	}
+	set_msg_id(&entry->to_v_msg[0], rc);
+	ctx->q_num = q_num;
+
+	dev_dbg(dev,
+		"[Q-%d]Message ctx id %d, usr_msg_id 0x%x sent msg_id 0x%x\n",
+		ctx->q_num, ctx->idx, entry->usr_msg_id,
+		get_msg_id(&entry->to_v_msg[0]));
+
+	if (entry->to_v_msg[0].function_id == VK_FID_TRANS_BUF) {
+		/* Convert any pointers to sg list */
+		unsigned int num_planes;
+		int dir;
+		struct _vk_data *data;
+
+		/*
+		 * check if we are in reset, if so, no buffer transfer is
+		 * allowed and return error.
+		 */
+		if (vk->reset_pid) {
+			dev_dbg(dev, "No Transfer allowed during reset, pid %d.\n",
+				ctx->pid);
+			rc = -EACCES;
+			goto write_free_msgid;
+		}
+
+		num_planes = entry->to_v_msg[0].cmd & VK_CMD_PLANES_MASK;
+		if ((entry->to_v_msg[0].cmd & VK_CMD_MASK) == VK_CMD_DOWNLOAD)
+			dir = DMA_FROM_DEVICE;
+		else
+			dir = DMA_TO_DEVICE;
+
+		/* Calculate vk_data location */
+		/* Go to end of the message */
+		msg_size = entry->to_v_msg[0].size;
+		if (msg_size > entry->to_v_blks) {
+			rc = -EMSGSIZE;
+			goto write_free_msgid;
+		}
+
+		data = (struct _vk_data *)&entry->to_v_msg[msg_size + 1];
+
+		/* Now back up to the start of the pointers */
+		data -= num_planes;
+
+		/* Convert user addresses to DMA SG List */
+		rc = bcm_vk_sg_alloc(dev, entry->dma, dir, data, num_planes);
+		if (rc)
+			goto write_free_msgid;
+
+		atomic_inc(&ctx->dma_cnt);
+		/* try to embed inband sgl */
+		sgl_extra_blks = bcm_vk_append_ib_sgl(vk, entry, data,
+						      num_planes);
+		entry->to_v_blks += sgl_extra_blks;
+		entry->to_v_msg[0].size += sgl_extra_blks;
+	} else if (entry->to_v_msg[0].function_id == VK_FID_INIT &&
+		   entry->to_v_msg[0].context_id == VK_NEW_CTX) {
+		/*
+		 * Init happens in 2 stages, only the first stage contains the
+		 * pid that needs translating.
+		 */
+		pid_t org_pid, pid;
+
+		/*
+		 * translate the pid into the unique host space as user
+		 * may run sessions inside containers or process
+		 * namespaces.
+		 */
+#define VK_MSG_PID_MASK 0xffffff00
+#define VK_MSG_PID_SH   8
+		org_pid = (entry->to_v_msg[0].arg & VK_MSG_PID_MASK)
+			   >> VK_MSG_PID_SH;
+
+		pid = task_tgid_nr(current);
+		entry->to_v_msg[0].arg =
+			(entry->to_v_msg[0].arg & ~VK_MSG_PID_MASK) |
+			(pid << VK_MSG_PID_SH);
+		if (org_pid != pid)
+			dev_dbg(dev, "In PID 0x%x(%d), converted PID 0x%x(%d)\n",
+				org_pid, org_pid, pid, pid);
+	}
+
+	/*
+	 * store work entry to pending queue until a response is received.
+	 * This needs to be done before enqueuing the message
+	 */
+	bcm_vk_append_pendq(&vk->to_v_msg_chan, q_num, entry);
+
+	rc = bcm_to_v_msg_enqueue(vk, entry);
+	if (rc) {
+		dev_err(dev, "Fail to enqueue msg to to_v queue\n");
+
+		/* remove message from pending list */
+		entry = bcm_vk_dequeue_pending
+			       (vk,
+				&vk->to_v_msg_chan,
+				q_num,
+				get_msg_id(&entry->to_v_msg[0]));
+		goto write_free_ent;
+	}
+
+	return count;
+
+write_free_msgid:
+	bcm_vk_msgid_bitmap_clear(vk, get_msg_id(&entry->to_v_msg[0]), 1);
+write_free_ent:
+	kfree(entry);
+write_err:
+	return rc;
+}
+
+__poll_t bcm_vk_poll(struct file *p_file, struct poll_table_struct *wait)
+{
+	__poll_t ret = 0;
+	int cnt;
+	struct bcm_vk_ctx *ctx = p_file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+	struct device *dev = &vk->pdev->dev;
+
+	poll_wait(p_file, &ctx->rd_wq, wait);
+
+	cnt = atomic_read(&ctx->pend_cnt);
+	if (cnt) {
+		ret = (__force __poll_t)(POLLIN | POLLRDNORM);
+		if (cnt < 0) {
+			dev_err(dev, "Error cnt %d, setting back to 0", cnt);
+			atomic_set(&ctx->pend_cnt, 0);
+		}
+	}
+
+	return ret;
+}
+
+int bcm_vk_release(struct inode *inode, struct file *p_file)
+{
+	int ret;
+	struct bcm_vk_ctx *ctx = p_file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+	struct device *dev = &vk->pdev->dev;
+	pid_t pid = ctx->pid;
+	int dma_cnt;
+	unsigned long timeout, start_time;
+
+	/*
+	 * if there are outstanding DMA transactions, need to delay long enough
+	 * to ensure that the card side would have stopped touching the host buffer
+	 * and its SGL list.  A race condition could happen if the host app is killed
+	 * abruptly, eg kill -9, while some DMA transfer orders are still inflight.
+	 * Nothing could be done except for a delay as host side is running in a
+	 * completely async fashion.
+	 */
+	start_time = jiffies;
+	timeout = start_time + msecs_to_jiffies(BCM_VK_DMA_DRAIN_MAX_MS);
+	do {
+		if (time_after(jiffies, timeout)) {
+			dev_warn(dev, "%d dma still pending for [fd-%d] pid %d\n",
+				 dma_cnt, ctx->idx, pid);
+			break;
+		}
+		dma_cnt = atomic_read(&ctx->dma_cnt);
+		cpu_relax();
+		cond_resched();
+	} while (dma_cnt);
+	dev_dbg(dev, "Draining for [fd-%d] pid %d - delay %d ms\n",
+		ctx->idx, pid, jiffies_to_msecs(jiffies - start_time));
+
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_v_msg_chan, ctx);
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_h_msg_chan, ctx);
+
+	ret = bcm_vk_free_ctx(vk, ctx);
+	if (ret == 0)
+		ret = bcm_vk_handle_last_sess(vk, pid, ctx->q_num);
+	else
+		ret = 0;
+
+	kref_put(&vk->kref, bcm_vk_release_data);
+
+	return ret;
+}
+
+int bcm_vk_msg_init(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	int ret;
+
+	if (bcm_vk_data_init(vk)) {
+		dev_err(dev, "Error initializing internal data structures\n");
+		return -EINVAL;
+	}
+
+	if (bcm_vk_msg_chan_init(&vk->to_v_msg_chan) ||
+	    bcm_vk_msg_chan_init(&vk->to_h_msg_chan)) {
+		dev_err(dev, "Error initializing communication channel\n");
+		return -EIO;
+	}
+
+	/* read msgq info if ready */
+	ret = bcm_vk_sync_msgq(vk, false);
+	if (ret && (ret != -EAGAIN)) {
+		dev_err(dev, "Error reading comm msg Q info\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+void bcm_vk_msg_remove(struct bcm_vk *vk)
+{
+	bcm_vk_blk_drv_access(vk);
+
+	/* drain all pending items */
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_v_msg_chan, NULL);
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_h_msg_chan, NULL);
+}
+
diff --git a/drivers/misc/bcm-vk/bcm_vk_msg.h b/drivers/misc/bcm-vk/bcm_vk_msg.h
new file mode 100644
index 0000000000..56784c8896
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_msg.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef BCM_VK_MSG_H
+#define BCM_VK_MSG_H
+
+#include <uapi/linux/misc/bcm_vk.h>
+#include "bcm_vk_sg.h"
+
+/* Single message queue control structure */
+struct bcm_vk_msgq {
+	u16 type;	/* queue type */
+	u16 num;	/* queue number */
+	u32 start;	/* offset in BAR1 where the queue memory starts */
+
+	u32 rd_idx; /* read idx */
+	u32 wr_idx; /* write idx */
+
+	u32 size;	/*
+			 * size, which is in number of 16byte blocks,
+			 * to align with the message data structure.
+			 */
+	u32 nxt;	/*
+			 * nxt offset to the next msg queue struct.
+			 * This is to provide flexibity for alignment purposes.
+			 */
+
+/* Least significant 16 bits in below field hold doorbell register offset */
+#define DB_SHIFT 16
+
+	u32 db_offset; /* queue doorbell register offset in BAR0 */
+
+	u32 rsvd;
+};
+
+/*
+ * Structure to record static info from the msgq sync.  We keep local copy
+ * for some of these variables for both performance + checking purpose.
+ */
+struct bcm_vk_sync_qinfo {
+	void __iomem *q_start;
+	u32 q_size;
+	u32 q_mask;
+	u32 q_low;
+	u32 q_db_offset;
+};
+
+#define VK_MSGQ_MAX_NR 4 /* Maximum number of message queues */
+
+/*
+ * message block - basic unit in the message where a message's size is always
+ *		   N x sizeof(basic_block)
+ */
+struct vk_msg_blk {
+	u8 function_id;
+#define VK_FID_TRANS_BUF	5
+#define VK_FID_SHUTDOWN		8
+#define VK_FID_INIT		9
+	u8 size; /* size of the message in number of vk_msg_blk's */
+	u16 trans_id; /* transport id, queue & msg_id */
+	u32 context_id;
+#define VK_NEW_CTX		0
+	u32 cmd;
+#define VK_CMD_PLANES_MASK	0x000f /* number of planes to up/download */
+#define VK_CMD_UPLOAD		0x0400 /* memory transfer to vk */
+#define VK_CMD_DOWNLOAD		0x0500 /* memory transfer from vk */
+#define VK_CMD_MASK		0x0f00 /* command mask */
+	u32 arg;
+};
+
+/* vk_msg_blk is 16 bytes fixed */
+#define VK_MSGQ_BLK_SIZE   (sizeof(struct vk_msg_blk))
+/* shift for fast division of basic msg blk size */
+#define VK_MSGQ_BLK_SZ_SHIFT 4
+
+/* use msg_id 0 for any simplex host2vk communication */
+#define VK_SIMPLEX_MSG_ID 0
+
+/* context per session opening of sysfs */
+struct bcm_vk_ctx {
+	struct list_head node; /* use for linkage in Hash Table */
+	unsigned int idx;
+	bool in_use;
+	pid_t pid;
+	u32 hash_idx;
+	u32 q_num; /* queue number used by the stream */
+	struct miscdevice *miscdev;
+	atomic_t pend_cnt; /* number of items pending to be read from host */
+	atomic_t dma_cnt; /* any dma transaction outstanding */
+	wait_queue_head_t rd_wq;
+};
+
+/* pid hash table entry */
+struct bcm_vk_ht_entry {
+	struct list_head head;
+};
+
+#define VK_DMA_MAX_ADDRS 4 /* Max 4 DMA Addresses */
+/* structure for house keeping a single work entry */
+struct bcm_vk_wkent {
+	struct list_head node; /* for linking purpose */
+	struct bcm_vk_ctx *ctx;
+
+	/* Store up to 4 dma pointers */
+	struct bcm_vk_dma dma[VK_DMA_MAX_ADDRS];
+
+	u32 to_h_blks; /* response */
+	struct vk_msg_blk *to_h_msg;
+
+	/*
+	 * put the to_v_msg at the end so that we could simply append to_v msg
+	 * to the end of the allocated block
+	 */
+	u32 usr_msg_id;
+	u32 to_v_blks;
+	u32 seq_num;
+	struct vk_msg_blk to_v_msg[];
+};
+
+/* queue stats counters */
+struct bcm_vk_qs_cnts {
+	u32 cnt; /* general counter, used to limit output */
+	u32 acc_sum;
+	u32 max_occ; /* max during a sampling period */
+	u32 max_abs; /* the abs max since reset */
+};
+
+/* control channel structure for either to_v or to_h communication */
+struct bcm_vk_msg_chan {
+	u32 q_nr;
+	/* Mutex to access msgq */
+	struct mutex msgq_mutex;
+	/* pointing to BAR locations */
+	struct bcm_vk_msgq __iomem *msgq[VK_MSGQ_MAX_NR];
+	/* Spinlock to access pending queue */
+	spinlock_t pendq_lock;
+	/* for temporary storing pending items, one for each queue */
+	struct list_head pendq[VK_MSGQ_MAX_NR];
+	/* static queue info from the sync */
+	struct bcm_vk_sync_qinfo sync_qinfo[VK_MSGQ_MAX_NR];
+};
+
+/* totol number of message q allowed by the driver */
+#define VK_MSGQ_PER_CHAN_MAX	3
+#define VK_MSGQ_NUM_DEFAULT	(VK_MSGQ_PER_CHAN_MAX - 1)
+
+/* total number of supported ctx, 32 ctx each for 5 components */
+#define VK_CMPT_CTX_MAX		(32 * 5)
+
+/* hash table defines to store the opened FDs */
+#define VK_PID_HT_SHIFT_BIT	7 /* 128 */
+#define VK_PID_HT_SZ		BIT(VK_PID_HT_SHIFT_BIT)
+
+/* The following are offsets of DDR info provided by the vk card */
+#define VK_BAR0_SEG_SIZE	(4 * SZ_1K) /* segment size for BAR0 */
+
+/* shutdown types supported */
+#define VK_SHUTDOWN_PID		1
+#define VK_SHUTDOWN_GRACEFUL	2
+
+#endif
diff --git a/drivers/misc/bcm-vk/bcm_vk_sg.c b/drivers/misc/bcm-vk/bcm_vk_sg.c
new file mode 100644
index 0000000000..2e9daaf3e4
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_sg.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/pgtable.h>
+#include <linux/vmalloc.h>
+
+#include <asm/page.h>
+#include <asm/unaligned.h>
+
+#include <uapi/linux/misc/bcm_vk.h>
+
+#include "bcm_vk.h"
+#include "bcm_vk_msg.h"
+#include "bcm_vk_sg.h"
+
+/*
+ * Valkyrie has a hardware limitation of 16M transfer size.
+ * So limit the SGL chunks to 16M.
+ */
+#define BCM_VK_MAX_SGL_CHUNK SZ_16M
+
+static int bcm_vk_dma_alloc(struct device *dev,
+			    struct bcm_vk_dma *dma,
+			    int dir,
+			    struct _vk_data *vkdata);
+static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma);
+
+/* Uncomment to dump SGLIST */
+/* #define BCM_VK_DUMP_SGLIST */
+
+static int bcm_vk_dma_alloc(struct device *dev,
+			    struct bcm_vk_dma *dma,
+			    int direction,
+			    struct _vk_data *vkdata)
+{
+	dma_addr_t addr, sg_addr;
+	int err;
+	int i;
+	int offset;
+	u32 size;
+	u32 remaining_size;
+	u32 transfer_size;
+	u64 data;
+	unsigned long first, last;
+	struct _vk_data *sgdata;
+
+	/* Get 64-bit user address */
+	data = get_unaligned(&vkdata->address);
+
+	/* offset into first page */
+	offset = offset_in_page(data);
+
+	/* Calculate number of pages */
+	first = (data & PAGE_MASK) >> PAGE_SHIFT;
+	last  = ((data + vkdata->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
+	dma->nr_pages = last - first + 1;
+
+	/* Allocate DMA pages */
+	dma->pages = kmalloc_array(dma->nr_pages,
+				   sizeof(struct page *),
+				   GFP_KERNEL);
+	if (!dma->pages)
+		return -ENOMEM;
+
+	dev_dbg(dev, "Alloc DMA Pages [0x%llx+0x%x => %d pages]\n",
+		data, vkdata->size, dma->nr_pages);
+
+	dma->direction = direction;
+
+	/* Get user pages into memory */
+	err = get_user_pages_fast(data & PAGE_MASK,
+				  dma->nr_pages,
+				  direction == DMA_FROM_DEVICE,
+				  dma->pages);
+	if (err != dma->nr_pages) {
+		dma->nr_pages = (err >= 0) ? err : 0;
+		dev_err(dev, "get_user_pages_fast, err=%d [%d]\n",
+			err, dma->nr_pages);
+		return err < 0 ? err : -EINVAL;
+	}
+
+	/* Max size of sg list is 1 per mapped page + fields at start */
+	dma->sglen = (dma->nr_pages * sizeof(*sgdata)) +
+		     (sizeof(u32) * SGLIST_VKDATA_START);
+
+	/* Allocate sglist */
+	dma->sglist = dma_alloc_coherent(dev,
+					 dma->sglen,
+					 &dma->handle,
+					 GFP_KERNEL);
+	if (!dma->sglist)
+		return -ENOMEM;
+
+	dma->sglist[SGLIST_NUM_SG] = 0;
+	dma->sglist[SGLIST_TOTALSIZE] = vkdata->size;
+	remaining_size = vkdata->size;
+	sgdata = (struct _vk_data *)&dma->sglist[SGLIST_VKDATA_START];
+
+	/* Map all pages into DMA */
+	size = min_t(size_t, PAGE_SIZE - offset, remaining_size);
+	remaining_size -= size;
+	sg_addr = dma_map_page(dev,
+			       dma->pages[0],
+			       offset,
+			       size,
+			       dma->direction);
+	transfer_size = size;
+	if (unlikely(dma_mapping_error(dev, sg_addr))) {
+		__free_page(dma->pages[0]);
+		return -EIO;
+	}
+
+	for (i = 1; i < dma->nr_pages; i++) {
+		size = min_t(size_t, PAGE_SIZE, remaining_size);
+		remaining_size -= size;
+		addr = dma_map_page(dev,
+				    dma->pages[i],
+				    0,
+				    size,
+				    dma->direction);
+		if (unlikely(dma_mapping_error(dev, addr))) {
+			__free_page(dma->pages[i]);
+			return -EIO;
+		}
+
+		/*
+		 * Compress SG list entry when pages are contiguous
+		 * and transfer size less or equal to BCM_VK_MAX_SGL_CHUNK
+		 */
+		if ((addr == (sg_addr + transfer_size)) &&
+		    ((transfer_size + size) <= BCM_VK_MAX_SGL_CHUNK)) {
+			/* pages are contiguous, add to same sg entry */
+			transfer_size += size;
+		} else {
+			/* pages are not contiguous, write sg entry */
+			sgdata->size = transfer_size;
+			put_unaligned(sg_addr, (u64 *)&sgdata->address);
+			dma->sglist[SGLIST_NUM_SG]++;
+
+			/* start new sg entry */
+			sgdata++;
+			sg_addr = addr;
+			transfer_size = size;
+		}
+	}
+	/* Write last sg list entry */
+	sgdata->size = transfer_size;
+	put_unaligned(sg_addr, (u64 *)&sgdata->address);
+	dma->sglist[SGLIST_NUM_SG]++;
+
+	/* Update pointers and size field to point to sglist */
+	put_unaligned((u64)dma->handle, &vkdata->address);
+	vkdata->size = (dma->sglist[SGLIST_NUM_SG] * sizeof(*sgdata)) +
+		       (sizeof(u32) * SGLIST_VKDATA_START);
+
+#ifdef BCM_VK_DUMP_SGLIST
+	dev_dbg(dev,
+		"sgl 0x%llx handle 0x%llx, sglen: 0x%x sgsize: 0x%x\n",
+		(u64)dma->sglist,
+		dma->handle,
+		dma->sglen,
+		vkdata->size);
+	for (i = 0; i < vkdata->size / sizeof(u32); i++)
+		dev_dbg(dev, "i:0x%x 0x%x\n", i, dma->sglist[i]);
+#endif
+
+	return 0;
+}
+
+int bcm_vk_sg_alloc(struct device *dev,
+		    struct bcm_vk_dma *dma,
+		    int dir,
+		    struct _vk_data *vkdata,
+		    int num)
+{
+	int i;
+	int rc = -EINVAL;
+
+	/* Convert user addresses to DMA SG List */
+	for (i = 0; i < num; i++) {
+		if (vkdata[i].size && vkdata[i].address) {
+			/*
+			 * If both size and address are non-zero
+			 * then DMA alloc.
+			 */
+			rc = bcm_vk_dma_alloc(dev,
+					      &dma[i],
+					      dir,
+					      &vkdata[i]);
+		} else if (vkdata[i].size ||
+			   vkdata[i].address) {
+			/*
+			 * If one of size and address are zero
+			 * there is a problem.
+			 */
+			dev_err(dev,
+				"Invalid vkdata %x 0x%x 0x%llx\n",
+				i, vkdata[i].size, vkdata[i].address);
+			rc = -EINVAL;
+		} else {
+			/*
+			 * If size and address are both zero
+			 * don't convert, but return success.
+			 */
+			rc = 0;
+		}
+
+		if (rc)
+			goto fail_alloc;
+	}
+	return rc;
+
+fail_alloc:
+	while (i > 0) {
+		i--;
+		if (dma[i].sglist)
+			bcm_vk_dma_free(dev, &dma[i]);
+	}
+	return rc;
+}
+
+static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma)
+{
+	dma_addr_t addr;
+	int i;
+	int num_sg;
+	u32 size;
+	struct _vk_data *vkdata;
+
+	dev_dbg(dev, "free sglist=%p sglen=0x%x\n", dma->sglist, dma->sglen);
+
+	/* Unmap all pages in the sglist */
+	num_sg = dma->sglist[SGLIST_NUM_SG];
+	vkdata = (struct _vk_data *)&dma->sglist[SGLIST_VKDATA_START];
+	for (i = 0; i < num_sg; i++) {
+		size = vkdata[i].size;
+		addr = get_unaligned(&vkdata[i].address);
+
+		dma_unmap_page(dev, addr, size, dma->direction);
+	}
+
+	/* Free allocated sglist */
+	dma_free_coherent(dev, dma->sglen, dma->sglist, dma->handle);
+
+	/* Release lock on all pages */
+	for (i = 0; i < dma->nr_pages; i++)
+		put_page(dma->pages[i]);
+
+	/* Free allocated dma pages */
+	kfree(dma->pages);
+	dma->sglist = NULL;
+
+	return 0;
+}
+
+int bcm_vk_sg_free(struct device *dev, struct bcm_vk_dma *dma, int num,
+		   int *proc_cnt)
+{
+	int i;
+
+	*proc_cnt = 0;
+	/* Unmap and free all pages and sglists */
+	for (i = 0; i < num; i++) {
+		if (dma[i].sglist) {
+			bcm_vk_dma_free(dev, &dma[i]);
+			*proc_cnt += 1;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/misc/bcm-vk/bcm_vk_sg.h b/drivers/misc/bcm-vk/bcm_vk_sg.h
new file mode 100644
index 0000000000..81b3d0976d
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_sg.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef BCM_VK_SG_H
+#define BCM_VK_SG_H
+
+#include <linux/dma-mapping.h>
+
+struct bcm_vk_dma {
+	/* for userland buffer */
+	struct page **pages;
+	int nr_pages;
+
+	/* common */
+	dma_addr_t handle;
+	/*
+	 * sglist is of the following LE format
+	 * [U32] num_sg  = number of sg addresses (N)
+	 * [U32] totalsize = totalsize of data being transferred in sglist
+	 * [U32] size[0] = size of data in address0
+	 * [U32] addr_l[0] = lower 32-bits of address0
+	 * [U32] addr_h[0] = higher 32-bits of address0
+	 * ..
+	 * [U32] size[N-1] = size of data in addressN-1
+	 * [U32] addr_l[N-1] = lower 32-bits of addressN-1
+	 * [U32] addr_h[N-1] = higher 32-bits of addressN-1
+	 */
+	u32 *sglist;
+#define SGLIST_NUM_SG		0
+#define SGLIST_TOTALSIZE	1
+#define SGLIST_VKDATA_START	2
+
+	int sglen; /* Length (bytes) of sglist */
+	int direction;
+};
+
+struct _vk_data {
+	u32 size;    /* data size in bytes */
+	u64 address; /* Pointer to data     */
+} __packed;
+
+/*
+ * Scatter-gather DMA buffer API.
+ *
+ * These functions provide a simple way to create a page list and a
+ * scatter-gather list from userspace address and map the memory
+ * for DMA operation.
+ */
+int bcm_vk_sg_alloc(struct device *dev,
+		    struct bcm_vk_dma *dma,
+		    int dir,
+		    struct _vk_data *vkdata,
+		    int num);
+
+int bcm_vk_sg_free(struct device *dev, struct bcm_vk_dma *dma, int num,
+		   int *proc_cnt);
+
+#endif
+
diff --git a/drivers/misc/bcm-vk/bcm_vk_tty.c b/drivers/misc/bcm-vk/bcm_vk_tty.c
new file mode 100644
index 0000000000..2bce835ca4
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_tty.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+
+#include "bcm_vk.h"
+
+/* TTYVK base offset is 0x30000 into BAR1 */
+#define BAR1_TTYVK_BASE_OFFSET	0x300000
+/* Each TTYVK channel (TO or FROM) is 0x10000 */
+#define BAR1_TTYVK_CHAN_OFFSET	0x100000
+/* Each TTYVK channel has TO and FROM, hence the * 2 */
+#define BAR1_TTYVK_BASE(index)	(BAR1_TTYVK_BASE_OFFSET + \
+				 ((index) * BAR1_TTYVK_CHAN_OFFSET * 2))
+/* TO TTYVK channel base comes before FROM for each index */
+#define TO_TTYK_BASE(index)	BAR1_TTYVK_BASE(index)
+#define FROM_TTYK_BASE(index)	(BAR1_TTYVK_BASE(index) + \
+				 BAR1_TTYVK_CHAN_OFFSET)
+
+struct bcm_vk_tty_chan {
+	u32 reserved;
+	u32 size;
+	u32 wr;
+	u32 rd;
+	u32 *data;
+};
+
+#define VK_BAR_CHAN(v, DIR, e)	((v)->DIR##_offset \
+				 + offsetof(struct bcm_vk_tty_chan, e))
+#define VK_BAR_CHAN_SIZE(v, DIR)	VK_BAR_CHAN(v, DIR, size)
+#define VK_BAR_CHAN_WR(v, DIR)		VK_BAR_CHAN(v, DIR, wr)
+#define VK_BAR_CHAN_RD(v, DIR)		VK_BAR_CHAN(v, DIR, rd)
+#define VK_BAR_CHAN_DATA(v, DIR, off)	(VK_BAR_CHAN(v, DIR, data) + (off))
+
+#define VK_BAR0_REGSEG_TTY_DB_OFFSET	0x86c
+
+/* Poll every 1/10 of second - temp hack till we use MSI interrupt */
+#define SERIAL_TIMER_VALUE (HZ / 10)
+
+static void bcm_vk_tty_poll(struct timer_list *t)
+{
+	struct bcm_vk *vk = from_timer(vk, t, serial_timer);
+
+	queue_work(vk->tty_wq_thread, &vk->tty_wq_work);
+	mod_timer(&vk->serial_timer, jiffies + SERIAL_TIMER_VALUE);
+}
+
+irqreturn_t bcm_vk_tty_irqhandler(int irq, void *dev_id)
+{
+	struct bcm_vk *vk = dev_id;
+
+	queue_work(vk->tty_wq_thread, &vk->tty_wq_work);
+
+	return IRQ_HANDLED;
+}
+
+static void bcm_vk_tty_wq_handler(struct work_struct *work)
+{
+	struct bcm_vk *vk = container_of(work, struct bcm_vk, tty_wq_work);
+	struct bcm_vk_tty *vktty;
+	int card_status;
+	int count;
+	unsigned char c;
+	int i;
+	int wr;
+
+	card_status = vkread32(vk, BAR_0, BAR_CARD_STATUS);
+	if (BCM_VK_INTF_IS_DOWN(card_status))
+		return;
+
+	for (i = 0; i < BCM_VK_NUM_TTY; i++) {
+		count = 0;
+		/* Check the card status that the tty channel is ready */
+		if ((card_status & BIT(i)) == 0)
+			continue;
+
+		vktty = &vk->tty[i];
+
+		/* Don't increment read index if tty app is closed */
+		if (!vktty->is_opened)
+			continue;
+
+		/* Fetch the wr offset in buffer from VK */
+		wr = vkread32(vk, BAR_1, VK_BAR_CHAN_WR(vktty, from));
+
+		/* safe to ignore until bar read gives proper size */
+		if (vktty->from_size == 0)
+			continue;
+
+		if (wr >= vktty->from_size) {
+			dev_err(&vk->pdev->dev,
+				"ERROR: wq handler ttyVK%d wr:0x%x > 0x%x\n",
+				i, wr, vktty->from_size);
+			/* Need to signal and close device in this case */
+			continue;
+		}
+
+		/*
+		 * Simple read of circular buffer and
+		 * insert into tty flip buffer
+		 */
+		while (vk->tty[i].rd != wr) {
+			c = vkread8(vk, BAR_1,
+				    VK_BAR_CHAN_DATA(vktty, from, vktty->rd));
+			vktty->rd++;
+			if (vktty->rd >= vktty->from_size)
+				vktty->rd = 0;
+			tty_insert_flip_char(&vktty->port, c, TTY_NORMAL);
+			count++;
+		}
+
+		if (count) {
+			tty_flip_buffer_push(&vktty->port);
+
+			/* Update read offset from shadow register to card */
+			vkwrite32(vk, vktty->rd, BAR_1,
+				  VK_BAR_CHAN_RD(vktty, from));
+		}
+	}
+}
+
+static int bcm_vk_tty_open(struct tty_struct *tty, struct file *file)
+{
+	int card_status;
+	struct bcm_vk *vk;
+	struct bcm_vk_tty *vktty;
+	int index;
+
+	/* initialize the pointer in case something fails */
+	tty->driver_data = NULL;
+
+	vk = (struct bcm_vk *)dev_get_drvdata(tty->dev);
+	index = tty->index;
+
+	if (index >= BCM_VK_NUM_TTY)
+		return -EINVAL;
+
+	vktty = &vk->tty[index];
+
+	vktty->pid = task_pid_nr(current);
+	vktty->to_offset = TO_TTYK_BASE(index);
+	vktty->from_offset = FROM_TTYK_BASE(index);
+
+	/* Do not allow tty device to be opened if tty on card not ready */
+	card_status = vkread32(vk, BAR_0, BAR_CARD_STATUS);
+	if (BCM_VK_INTF_IS_DOWN(card_status) || ((card_status & BIT(index)) == 0))
+		return -EBUSY;
+
+	/*
+	 * Get shadow registers of the buffer sizes and the "to" write offset
+	 * and "from" read offset
+	 */
+	vktty->to_size = vkread32(vk, BAR_1, VK_BAR_CHAN_SIZE(vktty, to));
+	vktty->wr = vkread32(vk, BAR_1,  VK_BAR_CHAN_WR(vktty, to));
+	vktty->from_size = vkread32(vk, BAR_1, VK_BAR_CHAN_SIZE(vktty, from));
+	vktty->rd = vkread32(vk, BAR_1,  VK_BAR_CHAN_RD(vktty, from));
+	vktty->is_opened = true;
+
+	if (tty->count == 1 && !vktty->irq_enabled) {
+		timer_setup(&vk->serial_timer, bcm_vk_tty_poll, 0);
+		mod_timer(&vk->serial_timer, jiffies + SERIAL_TIMER_VALUE);
+	}
+	return 0;
+}
+
+static void bcm_vk_tty_close(struct tty_struct *tty, struct file *file)
+{
+	struct bcm_vk *vk = dev_get_drvdata(tty->dev);
+
+	if (tty->index >= BCM_VK_NUM_TTY)
+		return;
+
+	vk->tty[tty->index].is_opened = false;
+
+	if (tty->count == 1)
+		del_timer_sync(&vk->serial_timer);
+}
+
+static void bcm_vk_tty_doorbell(struct bcm_vk *vk, u32 db_val)
+{
+	vkwrite32(vk, db_val, BAR_0,
+		  VK_BAR0_REGSEG_DB_BASE + VK_BAR0_REGSEG_TTY_DB_OFFSET);
+}
+
+static ssize_t bcm_vk_tty_write(struct tty_struct *tty, const u8 *buffer,
+				size_t count)
+{
+	int index;
+	struct bcm_vk *vk;
+	struct bcm_vk_tty *vktty;
+	int i;
+
+	index = tty->index;
+	vk = dev_get_drvdata(tty->dev);
+	vktty = &vk->tty[index];
+
+	/* Simple write each byte to circular buffer */
+	for (i = 0; i < count; i++) {
+		vkwrite8(vk, buffer[i], BAR_1,
+			 VK_BAR_CHAN_DATA(vktty, to, vktty->wr));
+		vktty->wr++;
+		if (vktty->wr >= vktty->to_size)
+			vktty->wr = 0;
+	}
+	/* Update write offset from shadow register to card */
+	vkwrite32(vk, vktty->wr, BAR_1, VK_BAR_CHAN_WR(vktty, to));
+	bcm_vk_tty_doorbell(vk, 0);
+
+	return count;
+}
+
+static unsigned int bcm_vk_tty_write_room(struct tty_struct *tty)
+{
+	struct bcm_vk *vk = dev_get_drvdata(tty->dev);
+
+	return vk->tty[tty->index].to_size - 1;
+}
+
+static const struct tty_operations serial_ops = {
+	.open = bcm_vk_tty_open,
+	.close = bcm_vk_tty_close,
+	.write = bcm_vk_tty_write,
+	.write_room = bcm_vk_tty_write_room,
+};
+
+int bcm_vk_tty_init(struct bcm_vk *vk, char *name)
+{
+	int i;
+	int err;
+	struct tty_driver *tty_drv;
+	struct device *dev = &vk->pdev->dev;
+
+	tty_drv = tty_alloc_driver
+				(BCM_VK_NUM_TTY,
+				 TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV);
+	if (IS_ERR(tty_drv))
+		return PTR_ERR(tty_drv);
+
+	/* Save struct tty_driver for uninstalling the device */
+	vk->tty_drv = tty_drv;
+
+	/* initialize the tty driver */
+	tty_drv->driver_name = KBUILD_MODNAME;
+	tty_drv->name = kstrdup(name, GFP_KERNEL);
+	if (!tty_drv->name) {
+		err = -ENOMEM;
+		goto err_tty_driver_kref_put;
+	}
+	tty_drv->type = TTY_DRIVER_TYPE_SERIAL;
+	tty_drv->subtype = SERIAL_TYPE_NORMAL;
+	tty_drv->init_termios = tty_std_termios;
+	tty_set_operations(tty_drv, &serial_ops);
+
+	/* register the tty driver */
+	err = tty_register_driver(tty_drv);
+	if (err) {
+		dev_err(dev, "tty_register_driver failed\n");
+		goto err_kfree_tty_name;
+	}
+
+	for (i = 0; i < BCM_VK_NUM_TTY; i++) {
+		struct device *tty_dev;
+
+		tty_port_init(&vk->tty[i].port);
+		tty_dev = tty_port_register_device_attr(&vk->tty[i].port,
+							tty_drv, i, dev, vk,
+							NULL);
+		if (IS_ERR(tty_dev)) {
+			err = PTR_ERR(tty_dev);
+			goto unwind;
+		}
+		vk->tty[i].is_opened = false;
+	}
+
+	INIT_WORK(&vk->tty_wq_work, bcm_vk_tty_wq_handler);
+	vk->tty_wq_thread = create_singlethread_workqueue("tty");
+	if (!vk->tty_wq_thread) {
+		dev_err(dev, "Fail to create tty workqueue thread\n");
+		err = -ENOMEM;
+		goto unwind;
+	}
+	return 0;
+
+unwind:
+	while (--i >= 0)
+		tty_port_unregister_device(&vk->tty[i].port, tty_drv, i);
+	tty_unregister_driver(tty_drv);
+
+err_kfree_tty_name:
+	kfree(tty_drv->name);
+	tty_drv->name = NULL;
+
+err_tty_driver_kref_put:
+	tty_driver_kref_put(tty_drv);
+
+	return err;
+}
+
+void bcm_vk_tty_exit(struct bcm_vk *vk)
+{
+	int i;
+
+	del_timer_sync(&vk->serial_timer);
+	for (i = 0; i < BCM_VK_NUM_TTY; ++i) {
+		tty_port_unregister_device(&vk->tty[i].port,
+					   vk->tty_drv,
+					   i);
+		tty_port_destroy(&vk->tty[i].port);
+	}
+	tty_unregister_driver(vk->tty_drv);
+
+	kfree(vk->tty_drv->name);
+	vk->tty_drv->name = NULL;
+
+	tty_driver_kref_put(vk->tty_drv);
+}
+
+void bcm_vk_tty_terminate_tty_user(struct bcm_vk *vk)
+{
+	struct bcm_vk_tty *vktty;
+	int i;
+
+	for (i = 0; i < BCM_VK_NUM_TTY; ++i) {
+		vktty = &vk->tty[i];
+		if (vktty->pid)
+			kill_pid(find_vpid(vktty->pid), SIGKILL, 1);
+	}
+}
+
+void bcm_vk_tty_wq_exit(struct bcm_vk *vk)
+{
+	cancel_work_sync(&vk->tty_wq_work);
+	destroy_workqueue(vk->tty_wq_thread);
+}
diff --git a/drivers/misc/bh1770glc.c b/drivers/misc/bh1770glc.c
new file mode 100644
index 0000000000..1629b62fd0
--- /dev/null
+++ b/drivers/misc/bh1770glc.c
@@ -0,0 +1,1390 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver.
+ * Chip is combined proximity and ambient light sensor.
+ *
+ * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/platform_data/bh1770glc.h>
+#include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+
+#define BH1770_ALS_CONTROL	0x80 /* ALS operation mode control */
+#define BH1770_PS_CONTROL	0x81 /* PS operation mode control */
+#define BH1770_I_LED		0x82 /* active LED and LED1, LED2 current */
+#define BH1770_I_LED3		0x83 /* LED3 current setting */
+#define BH1770_ALS_PS_MEAS	0x84 /* Forced mode trigger */
+#define BH1770_PS_MEAS_RATE	0x85 /* PS meas. rate at stand alone mode */
+#define BH1770_ALS_MEAS_RATE	0x86 /* ALS meas. rate at stand alone mode */
+#define BH1770_PART_ID		0x8a /* Part number and revision ID */
+#define BH1770_MANUFACT_ID	0x8b /* Manufacturerer ID */
+#define BH1770_ALS_DATA_0	0x8c /* ALS DATA low byte */
+#define BH1770_ALS_DATA_1	0x8d /* ALS DATA high byte */
+#define BH1770_ALS_PS_STATUS	0x8e /* Measurement data and int status */
+#define BH1770_PS_DATA_LED1	0x8f /* PS data from LED1 */
+#define BH1770_PS_DATA_LED2	0x90 /* PS data from LED2 */
+#define BH1770_PS_DATA_LED3	0x91 /* PS data from LED3 */
+#define BH1770_INTERRUPT	0x92 /* Interrupt setting */
+#define BH1770_PS_TH_LED1	0x93 /* PS interrupt threshold for LED1 */
+#define BH1770_PS_TH_LED2	0x94 /* PS interrupt threshold for LED2 */
+#define BH1770_PS_TH_LED3	0x95 /* PS interrupt threshold for LED3 */
+#define BH1770_ALS_TH_UP_0	0x96 /* ALS upper threshold low byte */
+#define BH1770_ALS_TH_UP_1	0x97 /* ALS upper threshold high byte */
+#define BH1770_ALS_TH_LOW_0	0x98 /* ALS lower threshold low byte */
+#define BH1770_ALS_TH_LOW_1	0x99 /* ALS lower threshold high byte */
+
+/* MANUFACT_ID */
+#define BH1770_MANUFACT_ROHM	0x01
+#define BH1770_MANUFACT_OSRAM	0x03
+
+/* PART_ID */
+#define BH1770_PART		0x90
+#define BH1770_PART_MASK	0xf0
+#define BH1770_REV_MASK		0x0f
+#define BH1770_REV_SHIFT	0
+#define BH1770_REV_0		0x00
+#define BH1770_REV_1		0x01
+
+/* Operating modes for both */
+#define BH1770_STANDBY		0x00
+#define BH1770_FORCED		0x02
+#define BH1770_STANDALONE	0x03
+#define BH1770_SWRESET		(0x01 << 2)
+
+#define BH1770_PS_TRIG_MEAS	(1 << 0)
+#define BH1770_ALS_TRIG_MEAS	(1 << 1)
+
+/* Interrupt control */
+#define BH1770_INT_OUTPUT_MODE	(1 << 3) /* 0 = latched */
+#define BH1770_INT_POLARITY	(1 << 2) /* 1 = active high */
+#define BH1770_INT_ALS_ENA	(1 << 1)
+#define BH1770_INT_PS_ENA	(1 << 0)
+
+/* Interrupt status */
+#define BH1770_INT_LED1_DATA	(1 << 0)
+#define BH1770_INT_LED1_INT	(1 << 1)
+#define BH1770_INT_LED2_DATA	(1 << 2)
+#define BH1770_INT_LED2_INT	(1 << 3)
+#define BH1770_INT_LED3_DATA	(1 << 4)
+#define BH1770_INT_LED3_INT	(1 << 5)
+#define BH1770_INT_LEDS_INT	((1 << 1) | (1 << 3) | (1 << 5))
+#define BH1770_INT_ALS_DATA	(1 << 6)
+#define BH1770_INT_ALS_INT	(1 << 7)
+
+/* Led channels */
+#define BH1770_LED1		0x00
+
+#define BH1770_DISABLE		0
+#define BH1770_ENABLE		1
+#define BH1770_PROX_CHANNELS	1
+
+#define BH1770_LUX_DEFAULT_RATE	1 /* Index to lux rate table */
+#define BH1770_PROX_DEFAULT_RATE 1 /* Direct HW value =~ 50Hz */
+#define BH1770_PROX_DEF_RATE_THRESH 6 /* Direct HW value =~ 5 Hz */
+#define BH1770_STARTUP_DELAY	50
+#define BH1770_RESET_TIME	10
+#define BH1770_TIMEOUT		2100 /* Timeout in 2.1 seconds */
+
+#define BH1770_LUX_RANGE	65535
+#define BH1770_PROX_RANGE	255
+#define BH1770_COEF_SCALER	1024
+#define BH1770_CALIB_SCALER	8192
+#define BH1770_LUX_NEUTRAL_CALIB_VALUE (1 * BH1770_CALIB_SCALER)
+#define BH1770_LUX_DEF_THRES	1000
+#define BH1770_PROX_DEF_THRES	70
+#define BH1770_PROX_DEF_ABS_THRES   100
+#define BH1770_DEFAULT_PERSISTENCE  10
+#define BH1770_PROX_MAX_PERSISTENCE 50
+#define BH1770_LUX_GA_SCALE	16384
+#define BH1770_LUX_CF_SCALE	2048 /* CF ChipFactor */
+#define BH1770_NEUTRAL_CF	BH1770_LUX_CF_SCALE
+#define BH1770_LUX_CORR_SCALE	4096
+
+#define PROX_ABOVE_THRESHOLD	1
+#define PROX_BELOW_THRESHOLD	0
+
+#define PROX_IGNORE_LUX_LIMIT	500
+
+struct bh1770_chip {
+	struct bh1770_platform_data	*pdata;
+	char				chipname[10];
+	u8				revision;
+	struct i2c_client		*client;
+	struct regulator_bulk_data	regs[2];
+	struct mutex			mutex; /* avoid parallel access */
+	wait_queue_head_t		wait;
+
+	bool			int_mode_prox;
+	bool			int_mode_lux;
+	struct delayed_work	prox_work;
+	u32	lux_cf; /* Chip specific factor */
+	u32	lux_ga;
+	u32	lux_calib;
+	int	lux_rate_index;
+	u32	lux_corr;
+	u16	lux_data_raw;
+	u16	lux_threshold_hi;
+	u16	lux_threshold_lo;
+	u16	lux_thres_hi_onchip;
+	u16	lux_thres_lo_onchip;
+	bool	lux_wait_result;
+
+	int	prox_enable_count;
+	u16	prox_coef;
+	u16	prox_const;
+	int	prox_rate;
+	int	prox_rate_threshold;
+	u8	prox_persistence;
+	u8	prox_persistence_counter;
+	u8	prox_data;
+	u8	prox_threshold;
+	u8	prox_threshold_hw;
+	bool	prox_force_update;
+	u8	prox_abs_thres;
+	u8	prox_led;
+};
+
+static const char reg_vcc[] = "Vcc";
+static const char reg_vleds[] = "Vleds";
+
+/*
+ * Supported stand alone rates in ms from chip data sheet
+ * {10, 20, 30, 40, 70, 100, 200, 500, 1000, 2000};
+ */
+static const s16 prox_rates_hz[] = {100, 50, 33, 25, 14, 10, 5, 2};
+static const s16 prox_rates_ms[] = {10, 20, 30, 40, 70, 100, 200, 500};
+
+/*
+ * Supported stand alone rates in ms from chip data sheet
+ * {100, 200, 500, 1000, 2000};
+ */
+static const s16 lux_rates_hz[] = {10, 5, 2, 1, 0};
+
+/*
+ * interrupt control functions are called while keeping chip->mutex
+ * excluding module probe / remove
+ */
+static inline int bh1770_lux_interrupt_control(struct bh1770_chip *chip,
+					int lux)
+{
+	chip->int_mode_lux = lux;
+	/* Set interrupt modes, interrupt active low, latched */
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_INTERRUPT,
+					(lux << 1) | chip->int_mode_prox);
+}
+
+static inline int bh1770_prox_interrupt_control(struct bh1770_chip *chip,
+					int ps)
+{
+	chip->int_mode_prox = ps;
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_INTERRUPT,
+					(chip->int_mode_lux << 1) | (ps << 0));
+}
+
+/* chip->mutex is always kept here */
+static int bh1770_lux_rate(struct bh1770_chip *chip, int rate_index)
+{
+	/* sysfs may call this when the chip is powered off */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	/* Proper proximity response needs fastest lux rate (100ms) */
+	if (chip->prox_enable_count)
+		rate_index = 0;
+
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_ALS_MEAS_RATE,
+					rate_index);
+}
+
+static int bh1770_prox_rate(struct bh1770_chip *chip, int mode)
+{
+	int rate;
+
+	rate = (mode == PROX_ABOVE_THRESHOLD) ?
+		chip->prox_rate_threshold : chip->prox_rate;
+
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_PS_MEAS_RATE,
+					rate);
+}
+
+/* InfraredLED is controlled by the chip during proximity scanning */
+static inline int bh1770_led_cfg(struct bh1770_chip *chip)
+{
+	/* LED cfg, current for leds 1 and 2 */
+	return i2c_smbus_write_byte_data(chip->client,
+					BH1770_I_LED,
+					(BH1770_LED1 << 6) |
+					(BH1770_LED_5mA << 3) |
+					chip->prox_led);
+}
+
+/*
+ * Following two functions converts raw ps values from HW to normalized
+ * values. Purpose is to compensate differences between different sensor
+ * versions and variants so that result means about the same between
+ * versions.
+ */
+static inline u8 bh1770_psraw_to_adjusted(struct bh1770_chip *chip, u8 psraw)
+{
+	u16 adjusted;
+	adjusted = (u16)(((u32)(psraw + chip->prox_const) * chip->prox_coef) /
+		BH1770_COEF_SCALER);
+	if (adjusted > BH1770_PROX_RANGE)
+		adjusted = BH1770_PROX_RANGE;
+	return adjusted;
+}
+
+static inline u8 bh1770_psadjusted_to_raw(struct bh1770_chip *chip, u8 ps)
+{
+	u16 raw;
+
+	raw = (((u32)ps * BH1770_COEF_SCALER) / chip->prox_coef);
+	if (raw > chip->prox_const)
+		raw = raw - chip->prox_const;
+	else
+		raw = 0;
+	return raw;
+}
+
+/*
+ * Following two functions converts raw lux values from HW to normalized
+ * values. Purpose is to compensate differences between different sensor
+ * versions and variants so that result means about the same between
+ * versions. Chip->mutex is kept when this is called.
+ */
+static int bh1770_prox_set_threshold(struct bh1770_chip *chip)
+{
+	u8 tmp = 0;
+
+	/* sysfs may call this when the chip is powered off */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	tmp = bh1770_psadjusted_to_raw(chip, chip->prox_threshold);
+	chip->prox_threshold_hw = tmp;
+
+	return	i2c_smbus_write_byte_data(chip->client, BH1770_PS_TH_LED1,
+					tmp);
+}
+
+static inline u16 bh1770_lux_raw_to_adjusted(struct bh1770_chip *chip, u16 raw)
+{
+	u32 lux;
+	lux = ((u32)raw * chip->lux_corr) / BH1770_LUX_CORR_SCALE;
+	return min(lux, (u32)BH1770_LUX_RANGE);
+}
+
+static inline u16 bh1770_lux_adjusted_to_raw(struct bh1770_chip *chip,
+					u16 adjusted)
+{
+	return (u32)adjusted * BH1770_LUX_CORR_SCALE / chip->lux_corr;
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_lux_update_thresholds(struct bh1770_chip *chip,
+					u16 threshold_hi, u16 threshold_lo)
+{
+	u8 data[4];
+	int ret;
+
+	/* sysfs may call this when the chip is powered off */
+	if (pm_runtime_suspended(&chip->client->dev))
+		return 0;
+
+	/*
+	 * Compensate threshold values with the correction factors if not
+	 * set to minimum or maximum.
+	 * Min & max values disables interrupts.
+	 */
+	if (threshold_hi != BH1770_LUX_RANGE && threshold_hi != 0)
+		threshold_hi = bh1770_lux_adjusted_to_raw(chip, threshold_hi);
+
+	if (threshold_lo != BH1770_LUX_RANGE && threshold_lo != 0)
+		threshold_lo = bh1770_lux_adjusted_to_raw(chip, threshold_lo);
+
+	if (chip->lux_thres_hi_onchip == threshold_hi &&
+	    chip->lux_thres_lo_onchip == threshold_lo)
+		return 0;
+
+	chip->lux_thres_hi_onchip = threshold_hi;
+	chip->lux_thres_lo_onchip = threshold_lo;
+
+	data[0] = threshold_hi;
+	data[1] = threshold_hi >> 8;
+	data[2] = threshold_lo;
+	data[3] = threshold_lo >> 8;
+
+	ret = i2c_smbus_write_i2c_block_data(chip->client,
+					BH1770_ALS_TH_UP_0,
+					ARRAY_SIZE(data),
+					data);
+	return ret;
+}
+
+static int bh1770_lux_get_result(struct bh1770_chip *chip)
+{
+	u16 data;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_0);
+	if (ret < 0)
+		return ret;
+
+	data = ret & 0xff;
+	ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_1);
+	if (ret < 0)
+		return ret;
+
+	chip->lux_data_raw = data | ((ret & 0xff) << 8);
+
+	return 0;
+}
+
+/* Calculate correction value which contains chip and device specific parts */
+static u32 bh1770_get_corr_value(struct bh1770_chip *chip)
+{
+	u32 tmp;
+	/* Impact of glass attenuation correction */
+	tmp = (BH1770_LUX_CORR_SCALE * chip->lux_ga) / BH1770_LUX_GA_SCALE;
+	/* Impact of chip factor correction */
+	tmp = (tmp * chip->lux_cf) / BH1770_LUX_CF_SCALE;
+	/* Impact of Device specific calibration correction */
+	tmp = (tmp * chip->lux_calib) / BH1770_CALIB_SCALER;
+	return tmp;
+}
+
+static int bh1770_lux_read_result(struct bh1770_chip *chip)
+{
+	bh1770_lux_get_result(chip);
+	return bh1770_lux_raw_to_adjusted(chip, chip->lux_data_raw);
+}
+
+/*
+ * Chip on / off functions are called while keeping mutex except probe
+ * or remove phase
+ */
+static int bh1770_chip_on(struct bh1770_chip *chip)
+{
+	int ret = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+					chip->regs);
+	if (ret < 0)
+		return ret;
+
+	usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2);
+
+	/* Reset the chip */
+	i2c_smbus_write_byte_data(chip->client, BH1770_ALS_CONTROL,
+				BH1770_SWRESET);
+	usleep_range(BH1770_RESET_TIME, BH1770_RESET_TIME * 2);
+
+	/*
+	 * ALS is started always since proximity needs als results
+	 * for realibility estimation.
+	 * Let's assume dark until the first ALS measurement is ready.
+	 */
+	chip->lux_data_raw = 0;
+	chip->prox_data = 0;
+	ret = i2c_smbus_write_byte_data(chip->client,
+					BH1770_ALS_CONTROL, BH1770_STANDALONE);
+
+	/* Assume reset defaults */
+	chip->lux_thres_hi_onchip = BH1770_LUX_RANGE;
+	chip->lux_thres_lo_onchip = 0;
+
+	return ret;
+}
+
+static void bh1770_chip_off(struct bh1770_chip *chip)
+{
+	i2c_smbus_write_byte_data(chip->client,
+					BH1770_INTERRUPT, BH1770_DISABLE);
+	i2c_smbus_write_byte_data(chip->client,
+				BH1770_ALS_CONTROL, BH1770_STANDBY);
+	i2c_smbus_write_byte_data(chip->client,
+				BH1770_PS_CONTROL, BH1770_STANDBY);
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_prox_mode_control(struct bh1770_chip *chip)
+{
+	if (chip->prox_enable_count) {
+		chip->prox_force_update = true; /* Force immediate update */
+
+		bh1770_lux_rate(chip, chip->lux_rate_index);
+		bh1770_prox_set_threshold(chip);
+		bh1770_led_cfg(chip);
+		bh1770_prox_rate(chip, PROX_BELOW_THRESHOLD);
+		bh1770_prox_interrupt_control(chip, BH1770_ENABLE);
+		i2c_smbus_write_byte_data(chip->client,
+					BH1770_PS_CONTROL, BH1770_STANDALONE);
+	} else {
+		chip->prox_data = 0;
+		bh1770_lux_rate(chip, chip->lux_rate_index);
+		bh1770_prox_interrupt_control(chip, BH1770_DISABLE);
+		i2c_smbus_write_byte_data(chip->client,
+					BH1770_PS_CONTROL, BH1770_STANDBY);
+	}
+	return 0;
+}
+
+/* chip->mutex is kept when this is called */
+static int bh1770_prox_read_result(struct bh1770_chip *chip)
+{
+	int ret;
+	bool above;
+	u8 mode;
+
+	ret = i2c_smbus_read_byte_data(chip->client, BH1770_PS_DATA_LED1);
+	if (ret < 0)
+		goto out;
+
+	if (ret > chip->prox_threshold_hw)
+		above = true;
+	else
+		above = false;
+
+	/*
+	 * when ALS levels goes above limit, proximity result may be
+	 * false proximity. Thus ignore the result. With real proximity
+	 * there is a shadow causing low als levels.
+	 */
+	if (chip->lux_data_raw > PROX_IGNORE_LUX_LIMIT)
+		ret = 0;
+
+	chip->prox_data = bh1770_psraw_to_adjusted(chip, ret);
+
+	/* Strong proximity level or force mode requires immediate response */
+	if (chip->prox_data >= chip->prox_abs_thres ||
+	    chip->prox_force_update)
+		chip->prox_persistence_counter = chip->prox_persistence;
+
+	chip->prox_force_update = false;
+
+	/* Persistence filttering to reduce false proximity events */
+	if (likely(above)) {
+		if (chip->prox_persistence_counter < chip->prox_persistence) {
+			chip->prox_persistence_counter++;
+			ret = -ENODATA;
+		} else {
+			mode = PROX_ABOVE_THRESHOLD;
+			ret = 0;
+		}
+	} else {
+		chip->prox_persistence_counter = 0;
+		mode = PROX_BELOW_THRESHOLD;
+		chip->prox_data = 0;
+		ret = 0;
+	}
+
+	/* Set proximity detection rate based on above or below value */
+	if (ret == 0) {
+		bh1770_prox_rate(chip, mode);
+		sysfs_notify(&chip->client->dev.kobj, NULL, "prox0_raw");
+	}
+out:
+	return ret;
+}
+
+static int bh1770_detect(struct bh1770_chip *chip)
+{
+	struct i2c_client *client = chip->client;
+	s32 ret;
+	u8 manu, part;
+
+	ret = i2c_smbus_read_byte_data(client, BH1770_MANUFACT_ID);
+	if (ret < 0)
+		goto error;
+	manu = (u8)ret;
+
+	ret = i2c_smbus_read_byte_data(client, BH1770_PART_ID);
+	if (ret < 0)
+		goto error;
+	part = (u8)ret;
+
+	chip->revision = (part & BH1770_REV_MASK) >> BH1770_REV_SHIFT;
+	chip->prox_coef = BH1770_COEF_SCALER;
+	chip->prox_const = 0;
+	chip->lux_cf = BH1770_NEUTRAL_CF;
+
+	if ((manu == BH1770_MANUFACT_ROHM) &&
+	    ((part & BH1770_PART_MASK) == BH1770_PART)) {
+		snprintf(chip->chipname, sizeof(chip->chipname), "BH1770GLC");
+		return 0;
+	}
+
+	if ((manu == BH1770_MANUFACT_OSRAM) &&
+	    ((part & BH1770_PART_MASK) == BH1770_PART)) {
+		snprintf(chip->chipname, sizeof(chip->chipname), "SFH7770");
+		/* Values selected by comparing different versions */
+		chip->prox_coef = 819; /* 0.8 * BH1770_COEF_SCALER */
+		chip->prox_const = 40;
+		return 0;
+	}
+
+	ret = -ENODEV;
+error:
+	dev_dbg(&client->dev, "BH1770 or SFH7770 not found\n");
+
+	return ret;
+}
+
+/*
+ * This work is re-scheduled at every proximity interrupt.
+ * If this work is running, it means that there hasn't been any
+ * proximity interrupt in time. Situation is handled as no-proximity.
+ * It would be nice to have low-threshold interrupt or interrupt
+ * when measurement and hi-threshold are both 0. But neither of those exists.
+ * This is a workaroud for missing HW feature.
+ */
+
+static void bh1770_prox_work(struct work_struct *work)
+{
+	struct bh1770_chip *chip =
+		container_of(work, struct bh1770_chip, prox_work.work);
+
+	mutex_lock(&chip->mutex);
+	bh1770_prox_read_result(chip);
+	mutex_unlock(&chip->mutex);
+}
+
+/* This is threaded irq handler */
+static irqreturn_t bh1770_irq(int irq, void *data)
+{
+	struct bh1770_chip *chip = data;
+	int status;
+	int rate = 0;
+
+	mutex_lock(&chip->mutex);
+	status = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_PS_STATUS);
+
+	/* Acknowledge interrupt by reading this register */
+	i2c_smbus_read_byte_data(chip->client, BH1770_INTERRUPT);
+
+	/*
+	 * Check if there is fresh data available for als.
+	 * If this is the very first data, update thresholds after that.
+	 */
+	if (status & BH1770_INT_ALS_DATA) {
+		bh1770_lux_get_result(chip);
+		if (unlikely(chip->lux_wait_result)) {
+			chip->lux_wait_result = false;
+			wake_up(&chip->wait);
+			bh1770_lux_update_thresholds(chip,
+						chip->lux_threshold_hi,
+						chip->lux_threshold_lo);
+		}
+	}
+
+	/* Disable interrupt logic to guarantee acknowledgement */
+	i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT,
+				  (0 << 1) | (0 << 0));
+
+	if ((status & BH1770_INT_ALS_INT))
+		sysfs_notify(&chip->client->dev.kobj, NULL, "lux0_input");
+
+	if (chip->int_mode_prox && (status & BH1770_INT_LEDS_INT)) {
+		rate = prox_rates_ms[chip->prox_rate_threshold];
+		bh1770_prox_read_result(chip);
+	}
+
+	/* Re-enable interrupt logic */
+	i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT,
+				  (chip->int_mode_lux << 1) |
+				  (chip->int_mode_prox << 0));
+	mutex_unlock(&chip->mutex);
+
+	/*
+	 * Can't cancel work while keeping mutex since the work uses the
+	 * same mutex.
+	 */
+	if (rate) {
+		/*
+		 * Simulate missing no-proximity interrupt 50ms after the
+		 * next expected interrupt time.
+		 */
+		cancel_delayed_work_sync(&chip->prox_work);
+		schedule_delayed_work(&chip->prox_work,
+				msecs_to_jiffies(rate + 50));
+	}
+	return IRQ_HANDLED;
+}
+
+static ssize_t bh1770_power_state_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	ssize_t ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	if (value) {
+		pm_runtime_get_sync(dev);
+
+		ret = bh1770_lux_rate(chip, chip->lux_rate_index);
+		if (ret < 0) {
+			pm_runtime_put(dev);
+			goto leave;
+		}
+
+		ret = bh1770_lux_interrupt_control(chip, BH1770_ENABLE);
+		if (ret < 0) {
+			pm_runtime_put(dev);
+			goto leave;
+		}
+
+		/* This causes interrupt after the next measurement cycle */
+		bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES,
+					BH1770_LUX_DEF_THRES);
+		/* Inform that we are waiting for a result from ALS */
+		chip->lux_wait_result = true;
+		bh1770_prox_mode_control(chip);
+	} else if (!pm_runtime_suspended(dev)) {
+		pm_runtime_put(dev);
+	}
+	ret = count;
+leave:
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static ssize_t bh1770_power_state_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", !pm_runtime_suspended(dev));
+}
+
+static ssize_t bh1770_lux_result_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	ssize_t ret;
+	long timeout;
+
+	if (pm_runtime_suspended(dev))
+		return -EIO; /* Chip is not enabled at all */
+
+	timeout = wait_event_interruptible_timeout(chip->wait,
+					!chip->lux_wait_result,
+					msecs_to_jiffies(BH1770_TIMEOUT));
+	if (!timeout)
+		return -EIO;
+
+	mutex_lock(&chip->mutex);
+	ret = sprintf(buf, "%d\n", bh1770_lux_read_result(chip));
+	mutex_unlock(&chip->mutex);
+
+	return ret;
+}
+
+static ssize_t bh1770_lux_range_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", BH1770_LUX_RANGE);
+}
+
+static ssize_t bh1770_prox_enable_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	/* Assume no proximity. Sensor will tell real state soon */
+	if (!chip->prox_enable_count)
+		chip->prox_data = 0;
+
+	if (value)
+		chip->prox_enable_count++;
+	else if (chip->prox_enable_count > 0)
+		chip->prox_enable_count--;
+	else
+		goto leave;
+
+	/* Run control only when chip is powered on */
+	if (!pm_runtime_suspended(dev))
+		bh1770_prox_mode_control(chip);
+leave:
+	mutex_unlock(&chip->mutex);
+	return count;
+}
+
+static ssize_t bh1770_prox_enable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	ssize_t len;
+
+	mutex_lock(&chip->mutex);
+	len = sprintf(buf, "%d\n", chip->prox_enable_count);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static ssize_t bh1770_prox_result_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	ssize_t ret;
+
+	mutex_lock(&chip->mutex);
+	if (chip->prox_enable_count && !pm_runtime_suspended(dev))
+		ret = sprintf(buf, "%d\n", chip->prox_data);
+	else
+		ret = -EIO;
+	mutex_unlock(&chip->mutex);
+	return ret;
+}
+
+static ssize_t bh1770_prox_range_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", BH1770_PROX_RANGE);
+}
+
+static ssize_t bh1770_get_prox_rate_avail(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int i;
+	int pos = 0;
+	for (i = 0; i < ARRAY_SIZE(prox_rates_hz); i++)
+		pos += sprintf(buf + pos, "%d ", prox_rates_hz[i]);
+	sprintf(buf + pos - 1, "\n");
+	return pos;
+}
+
+static ssize_t bh1770_get_prox_rate_above(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate_threshold]);
+}
+
+static ssize_t bh1770_get_prox_rate_below(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate]);
+}
+
+static int bh1770_prox_rate_validate(int rate)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(prox_rates_hz) - 1; i++)
+		if (rate >= prox_rates_hz[i])
+			break;
+	return i;
+}
+
+static ssize_t bh1770_set_prox_rate_above(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_rate_threshold = bh1770_prox_rate_validate(value);
+	mutex_unlock(&chip->mutex);
+	return count;
+}
+
+static ssize_t bh1770_set_prox_rate_below(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_rate = bh1770_prox_rate_validate(value);
+	mutex_unlock(&chip->mutex);
+	return count;
+}
+
+static ssize_t bh1770_get_prox_thres(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->prox_threshold);
+}
+
+static ssize_t bh1770_set_prox_thres(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	if (value > BH1770_PROX_RANGE)
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	chip->prox_threshold = value;
+	ret = bh1770_prox_set_threshold(chip);
+	mutex_unlock(&chip->mutex);
+	if (ret < 0)
+		return ret;
+	return count;
+}
+
+static ssize_t bh1770_prox_persistence_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", chip->prox_persistence);
+}
+
+static ssize_t bh1770_prox_persistence_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	if (value > BH1770_PROX_MAX_PERSISTENCE)
+		return -EINVAL;
+
+	chip->prox_persistence = value;
+
+	return len;
+}
+
+static ssize_t bh1770_prox_abs_thres_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	return sprintf(buf, "%u\n", chip->prox_abs_thres);
+}
+
+static ssize_t bh1770_prox_abs_thres_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	if (value > BH1770_PROX_RANGE)
+		return -EINVAL;
+
+	chip->prox_abs_thres = value;
+
+	return len;
+}
+
+static ssize_t bh1770_chip_id_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%s rev %d\n", chip->chipname, chip->revision);
+}
+
+static ssize_t bh1770_lux_calib_default_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", BH1770_CALIB_SCALER);
+}
+
+static ssize_t bh1770_lux_calib_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	ssize_t len;
+
+	mutex_lock(&chip->mutex);
+	len = sprintf(buf, "%u\n", chip->lux_calib);
+	mutex_unlock(&chip->mutex);
+	return len;
+}
+
+static ssize_t bh1770_lux_calib_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct bh1770_chip *chip = dev_get_drvdata(dev);
+	unsigned long value;
+	u32 old_calib;
+	u32 new_corr;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return ret;
+
+	mutex_lock(&chip->mutex);
+	old_calib = chip->lux_calib;
+	chip->lux_calib = value;
+	new_corr = bh1770_get_corr_value(chip);
+	if (new_corr == 0) {
+		chip->lux_calib = old_calib;
+		mutex_unlock(&chip->mutex);
+		return -EINVAL;
+	}
+	chip->lux_corr = new_corr;
+	/* Refresh thresholds on HW after changing correction value */
+	bh1770_lux_update_thresholds(chip, chip->lux_threshold_hi,
+				chip->lux_threshold_lo);
+
+	mutex_unlock(&chip->mutex);
+
+	return len;
+}
+
+static ssize_t bh1770_get_lux_rate_avail(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int i;
+	int pos = 0;
+	for (i = 0; i < ARRAY_SIZE(lux_rates_hz); i++)
+		pos += sprintf(buf + pos, "%d ", lux_rates_hz[i]);
+	sprintf(buf + pos - 1, "\n");
+	return pos;
+}
+
+static ssize_t bh1770_get_lux_rate(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", lux_rates_hz[chip->lux_rate_index]);
+}
+
+static ssize_t bh1770_set_lux_rate(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	unsigned long rate_hz;
+	int ret, i;
+
+	ret = kstrtoul(buf, 0, &rate_hz);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(lux_rates_hz) - 1; i++)
+		if (rate_hz >= lux_rates_hz[i])
+			break;
+
+	mutex_lock(&chip->mutex);
+	chip->lux_rate_index = i;
+	ret = bh1770_lux_rate(chip, i);
+	mutex_unlock(&chip->mutex);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static ssize_t bh1770_get_lux_thresh_above(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->lux_threshold_hi);
+}
+
+static ssize_t bh1770_get_lux_thresh_below(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", chip->lux_threshold_lo);
+}
+
+static ssize_t bh1770_set_lux_thresh(struct bh1770_chip *chip, u16 *target,
+				const char *buf)
+{
+	unsigned long thresh;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &thresh);
+	if (ret)
+		return ret;
+
+	if (thresh > BH1770_LUX_RANGE)
+		return -EINVAL;
+
+	mutex_lock(&chip->mutex);
+	*target = thresh;
+	/*
+	 * Don't update values in HW if we are still waiting for
+	 * first interrupt to come after device handle open call.
+	 */
+	if (!chip->lux_wait_result)
+		ret = bh1770_lux_update_thresholds(chip,
+						chip->lux_threshold_hi,
+						chip->lux_threshold_lo);
+	mutex_unlock(&chip->mutex);
+	return ret;
+
+}
+
+static ssize_t bh1770_set_lux_thresh_above(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_hi, buf);
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static ssize_t bh1770_set_lux_thresh_below(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	struct bh1770_chip *chip =  dev_get_drvdata(dev);
+	int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_lo, buf);
+	if (ret < 0)
+		return ret;
+	return len;
+}
+
+static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, bh1770_prox_enable_show,
+						bh1770_prox_enable_store);
+static DEVICE_ATTR(prox0_thresh_above1_value, S_IRUGO | S_IWUSR,
+						bh1770_prox_abs_thres_show,
+						bh1770_prox_abs_thres_store);
+static DEVICE_ATTR(prox0_thresh_above0_value, S_IRUGO | S_IWUSR,
+						bh1770_get_prox_thres,
+						bh1770_set_prox_thres);
+static DEVICE_ATTR(prox0_raw, S_IRUGO, bh1770_prox_result_show, NULL);
+static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, bh1770_prox_range_show, NULL);
+static DEVICE_ATTR(prox0_thresh_above_count, S_IRUGO | S_IWUSR,
+						bh1770_prox_persistence_show,
+						bh1770_prox_persistence_store);
+static DEVICE_ATTR(prox0_rate_above, S_IRUGO | S_IWUSR,
+						bh1770_get_prox_rate_above,
+						bh1770_set_prox_rate_above);
+static DEVICE_ATTR(prox0_rate_below, S_IRUGO | S_IWUSR,
+						bh1770_get_prox_rate_below,
+						bh1770_set_prox_rate_below);
+static DEVICE_ATTR(prox0_rate_avail, S_IRUGO, bh1770_get_prox_rate_avail, NULL);
+
+static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, bh1770_lux_calib_show,
+						bh1770_lux_calib_store);
+static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO,
+						bh1770_lux_calib_default_show,
+						NULL);
+static DEVICE_ATTR(lux0_input, S_IRUGO, bh1770_lux_result_show, NULL);
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, bh1770_lux_range_show, NULL);
+static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, bh1770_get_lux_rate,
+						bh1770_set_lux_rate);
+static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, bh1770_get_lux_rate_avail, NULL);
+static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR,
+						bh1770_get_lux_thresh_above,
+						bh1770_set_lux_thresh_above);
+static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR,
+						bh1770_get_lux_thresh_below,
+						bh1770_set_lux_thresh_below);
+static DEVICE_ATTR(chip_id, S_IRUGO, bh1770_chip_id_show, NULL);
+static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, bh1770_power_state_show,
+						 bh1770_power_state_store);
+
+
+static struct attribute *sysfs_attrs[] = {
+	&dev_attr_lux0_calibscale.attr,
+	&dev_attr_lux0_calibscale_default.attr,
+	&dev_attr_lux0_input.attr,
+	&dev_attr_lux0_sensor_range.attr,
+	&dev_attr_lux0_rate.attr,
+	&dev_attr_lux0_rate_avail.attr,
+	&dev_attr_lux0_thresh_above_value.attr,
+	&dev_attr_lux0_thresh_below_value.attr,
+	&dev_attr_prox0_raw.attr,
+	&dev_attr_prox0_sensor_range.attr,
+	&dev_attr_prox0_raw_en.attr,
+	&dev_attr_prox0_thresh_above_count.attr,
+	&dev_attr_prox0_rate_above.attr,
+	&dev_attr_prox0_rate_below.attr,
+	&dev_attr_prox0_rate_avail.attr,
+	&dev_attr_prox0_thresh_above0_value.attr,
+	&dev_attr_prox0_thresh_above1_value.attr,
+	&dev_attr_chip_id.attr,
+	&dev_attr_power_state.attr,
+	NULL
+};
+
+static const struct attribute_group bh1770_attribute_group = {
+	.attrs = sysfs_attrs
+};
+
+static int bh1770_probe(struct i2c_client *client)
+{
+	struct bh1770_chip *chip;
+	int err;
+
+	chip = devm_kzalloc(&client->dev, sizeof *chip, GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, chip);
+	chip->client  = client;
+
+	mutex_init(&chip->mutex);
+	init_waitqueue_head(&chip->wait);
+	INIT_DELAYED_WORK(&chip->prox_work, bh1770_prox_work);
+
+	if (client->dev.platform_data == NULL) {
+		dev_err(&client->dev, "platform data is mandatory\n");
+		return -EINVAL;
+	}
+
+	chip->pdata		= client->dev.platform_data;
+	chip->lux_calib		= BH1770_LUX_NEUTRAL_CALIB_VALUE;
+	chip->lux_rate_index	= BH1770_LUX_DEFAULT_RATE;
+	chip->lux_threshold_lo	= BH1770_LUX_DEF_THRES;
+	chip->lux_threshold_hi	= BH1770_LUX_DEF_THRES;
+
+	if (chip->pdata->glass_attenuation == 0)
+		chip->lux_ga = BH1770_NEUTRAL_GA;
+	else
+		chip->lux_ga = chip->pdata->glass_attenuation;
+
+	chip->prox_threshold	= BH1770_PROX_DEF_THRES;
+	chip->prox_led		= chip->pdata->led_def_curr;
+	chip->prox_abs_thres	= BH1770_PROX_DEF_ABS_THRES;
+	chip->prox_persistence	= BH1770_DEFAULT_PERSISTENCE;
+	chip->prox_rate_threshold = BH1770_PROX_DEF_RATE_THRESH;
+	chip->prox_rate		= BH1770_PROX_DEFAULT_RATE;
+	chip->prox_data		= 0;
+
+	chip->regs[0].supply = reg_vcc;
+	chip->regs[1].supply = reg_vleds;
+
+	err = devm_regulator_bulk_get(&client->dev,
+				      ARRAY_SIZE(chip->regs), chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot get regulators\n");
+		return err;
+	}
+
+	err = regulator_bulk_enable(ARRAY_SIZE(chip->regs),
+				chip->regs);
+	if (err < 0) {
+		dev_err(&client->dev, "Cannot enable regulators\n");
+		return err;
+	}
+
+	usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2);
+	err = bh1770_detect(chip);
+	if (err < 0)
+		goto fail0;
+
+	/* Start chip */
+	bh1770_chip_on(chip);
+	pm_runtime_set_active(&client->dev);
+	pm_runtime_enable(&client->dev);
+
+	chip->lux_corr = bh1770_get_corr_value(chip);
+	if (chip->lux_corr == 0) {
+		dev_err(&client->dev, "Improper correction values\n");
+		err = -EINVAL;
+		goto fail0;
+	}
+
+	if (chip->pdata->setup_resources) {
+		err = chip->pdata->setup_resources();
+		if (err) {
+			err = -EINVAL;
+			goto fail0;
+		}
+	}
+
+	err = sysfs_create_group(&chip->client->dev.kobj,
+				&bh1770_attribute_group);
+	if (err < 0) {
+		dev_err(&chip->client->dev, "Sysfs registration failed\n");
+		goto fail1;
+	}
+
+	/*
+	 * Chip needs level triggered interrupt to work. However,
+	 * level triggering doesn't work always correctly with power
+	 * management. Select both
+	 */
+	err = request_threaded_irq(client->irq, NULL,
+				bh1770_irq,
+				IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+				IRQF_TRIGGER_LOW,
+				"bh1770", chip);
+	if (err) {
+		dev_err(&client->dev, "could not get IRQ %d\n",
+			client->irq);
+		goto fail2;
+	}
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+	return err;
+fail2:
+	sysfs_remove_group(&chip->client->dev.kobj,
+			&bh1770_attribute_group);
+fail1:
+	if (chip->pdata->release_resources)
+		chip->pdata->release_resources();
+fail0:
+	regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs);
+	return err;
+}
+
+static void bh1770_remove(struct i2c_client *client)
+{
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	free_irq(client->irq, chip);
+
+	sysfs_remove_group(&chip->client->dev.kobj,
+			&bh1770_attribute_group);
+
+	if (chip->pdata->release_resources)
+		chip->pdata->release_resources();
+
+	cancel_delayed_work_sync(&chip->prox_work);
+
+	if (!pm_runtime_suspended(&client->dev))
+		bh1770_chip_off(chip);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int bh1770_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	bh1770_chip_off(chip);
+
+	return 0;
+}
+
+static int bh1770_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+	int ret = 0;
+
+	bh1770_chip_on(chip);
+
+	if (!pm_runtime_suspended(dev)) {
+		/*
+		 * If we were enabled at suspend time, it is expected
+		 * everything works nice and smoothly
+		 */
+		ret = bh1770_lux_rate(chip, chip->lux_rate_index);
+		ret |= bh1770_lux_interrupt_control(chip, BH1770_ENABLE);
+
+		/* This causes interrupt after the next measurement cycle */
+		bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES,
+					BH1770_LUX_DEF_THRES);
+		/* Inform that we are waiting for a result from ALS */
+		chip->lux_wait_result = true;
+		bh1770_prox_mode_control(chip);
+	}
+	return ret;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int bh1770_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	bh1770_chip_off(chip);
+
+	return 0;
+}
+
+static int bh1770_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct bh1770_chip *chip = i2c_get_clientdata(client);
+
+	bh1770_chip_on(chip);
+
+	return 0;
+}
+#endif
+
+static const struct i2c_device_id bh1770_id[] = {
+	{"bh1770glc", 0 },
+	{"sfh7770", 0 },
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, bh1770_id);
+
+static const struct dev_pm_ops bh1770_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(bh1770_suspend, bh1770_resume)
+	SET_RUNTIME_PM_OPS(bh1770_runtime_suspend, bh1770_runtime_resume, NULL)
+};
+
+static struct i2c_driver bh1770_driver = {
+	.driver	  = {
+		.name	= "bh1770glc",
+		.pm	= &bh1770_pm_ops,
+	},
+	.probe    = bh1770_probe,
+	.remove	  = bh1770_remove,
+	.id_table = bh1770_id,
+};
+
+module_i2c_driver(bh1770_driver);
+
+MODULE_DESCRIPTION("BH1770GLC / SFH7770 combined ALS and proximity sensor");
+MODULE_AUTHOR("Samu Onkalo, Nokia Corporation");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/c2port/Kconfig b/drivers/misc/c2port/Kconfig
new file mode 100644
index 0000000000..e20516ffd9
--- /dev/null
+++ b/drivers/misc/c2port/Kconfig
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# C2 port devices
+#
+
+menuconfig C2PORT
+	tristate "Silicon Labs C2 port support"
+	help
+	  This option enables support for Silicon Labs C2 port used to
+	  program Silicon micro controller chips (and other 8051 compatible).
+
+	  If your board have no such micro controllers you don't need this
+	  interface at all.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called c2port_core. Note that you also need a client module
+	  usually called c2port-*.
+
+	  If you are not sure, say N here.
+
+if C2PORT
+
+config C2PORT_DURAMAR_2150
+	tristate "C2 port support for Eurotech's Duramar 2150"
+	depends on X86
+	help
+	  This option enables C2 support for the Eurotech's Duramar 2150
+	  on board micro controller.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called c2port-duramar2150.
+
+endif # C2PORT
diff --git a/drivers/misc/c2port/Makefile b/drivers/misc/c2port/Makefile
new file mode 100644
index 0000000000..1dfe6ab067
--- /dev/null
+++ b/drivers/misc/c2port/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_C2PORT)		+= core.o
+
+obj-$(CONFIG_C2PORT_DURAMAR_2150)	+= c2port-duramar2150.o
diff --git a/drivers/misc/c2port/c2port-duramar2150.c b/drivers/misc/c2port/c2port-duramar2150.c
new file mode 100644
index 0000000000..7e370949e0
--- /dev/null
+++ b/drivers/misc/c2port/c2port-duramar2150.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Silicon Labs C2 port Linux support for Eurotech Duramar 2150
+ *
+ *  Copyright (c) 2008 Rodolfo Giometti <giometti@linux.it>
+ *  Copyright (c) 2008 Eurotech S.p.A. <info@eurotech.it>
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/c2port.h>
+
+#define DATA_PORT	0x325
+#define DIR_PORT	0x326
+#define    C2D		   (1 << 0)
+#define    C2CK		   (1 << 1)
+
+static DEFINE_MUTEX(update_lock);
+
+/*
+ * C2 port operations
+ */
+
+static void duramar2150_c2port_access(struct c2port_device *dev, int status)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DIR_PORT);
+
+	/* 0 = input, 1 = output */
+	if (status)
+		outb(v | (C2D | C2CK), DIR_PORT);
+	else
+		/* When access is "off" is important that both lines are set
+		 * as inputs or hi-impedance */
+		outb(v & ~(C2D | C2CK), DIR_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static void duramar2150_c2port_c2d_dir(struct c2port_device *dev, int dir)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DIR_PORT);
+
+	if (dir)
+		outb(v & ~C2D, DIR_PORT);
+	else
+		outb(v | C2D, DIR_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static int duramar2150_c2port_c2d_get(struct c2port_device *dev)
+{
+	return inb(DATA_PORT) & C2D;
+}
+
+static void duramar2150_c2port_c2d_set(struct c2port_device *dev, int status)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DATA_PORT);
+
+	if (status)
+		outb(v | C2D, DATA_PORT);
+	else
+		outb(v & ~C2D, DATA_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static void duramar2150_c2port_c2ck_set(struct c2port_device *dev, int status)
+{
+	u8 v;
+
+	mutex_lock(&update_lock);
+
+	v = inb(DATA_PORT);
+
+	if (status)
+		outb(v | C2CK, DATA_PORT);
+	else
+		outb(v & ~C2CK, DATA_PORT);
+
+	mutex_unlock(&update_lock);
+}
+
+static struct c2port_ops duramar2150_c2port_ops = {
+	.block_size	= 512,	/* bytes */
+	.blocks_num	= 30,	/* total flash size: 15360 bytes */
+
+	.access		= duramar2150_c2port_access,
+	.c2d_dir	= duramar2150_c2port_c2d_dir,
+	.c2d_get	= duramar2150_c2port_c2d_get,
+	.c2d_set	= duramar2150_c2port_c2d_set,
+	.c2ck_set	= duramar2150_c2port_c2ck_set,
+};
+
+static struct c2port_device *duramar2150_c2port_dev;
+
+/*
+ * Module stuff
+ */
+
+static int __init duramar2150_c2port_init(void)
+{
+	struct resource *res;
+	int ret = 0;
+
+	res = request_region(0x325, 2, "c2port");
+	if (!res)
+		return -EBUSY;
+
+	duramar2150_c2port_dev = c2port_device_register("uc",
+					&duramar2150_c2port_ops, NULL);
+	if (IS_ERR(duramar2150_c2port_dev)) {
+		ret = PTR_ERR(duramar2150_c2port_dev);
+		goto free_region;
+	}
+
+	return 0;
+
+free_region:
+	release_region(0x325, 2);
+	return ret;
+}
+
+static void __exit duramar2150_c2port_exit(void)
+{
+	/* Setup the GPIOs as input by default (access = 0) */
+	duramar2150_c2port_access(duramar2150_c2port_dev, 0);
+
+	c2port_device_unregister(duramar2150_c2port_dev);
+
+	release_region(0x325, 2);
+}
+
+module_init(duramar2150_c2port_init);
+module_exit(duramar2150_c2port_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("Silicon Labs C2 port Linux support for Duramar 2150");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c
new file mode 100644
index 0000000000..f574c83b82
--- /dev/null
+++ b/drivers/misc/c2port/core.c
@@ -0,0 +1,1000 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Silicon Labs C2 port core Linux support
+ *
+ *  Copyright (c) 2007 Rodolfo Giometti <giometti@linux.it>
+ *  Copyright (c) 2007 Eurotech S.p.A. <info@eurotech.it>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <linux/c2port.h>
+
+#define DRIVER_NAME             "c2port"
+#define DRIVER_VERSION          "0.51.0"
+
+static DEFINE_SPINLOCK(c2port_idr_lock);
+static DEFINE_IDR(c2port_idr);
+
+/*
+ * Local variables
+ */
+
+static struct class *c2port_class;
+
+/*
+ * C2 registers & commands defines
+ */
+
+/* C2 registers */
+#define C2PORT_DEVICEID		0x00
+#define C2PORT_REVID		0x01
+#define C2PORT_FPCTL		0x02
+#define C2PORT_FPDAT		0xB4
+
+/* C2 interface commands */
+#define C2PORT_GET_VERSION	0x01
+#define C2PORT_DEVICE_ERASE	0x03
+#define C2PORT_BLOCK_READ	0x06
+#define C2PORT_BLOCK_WRITE	0x07
+#define C2PORT_PAGE_ERASE	0x08
+
+/* C2 status return codes */
+#define C2PORT_INVALID_COMMAND	0x00
+#define C2PORT_COMMAND_FAILED	0x02
+#define C2PORT_COMMAND_OK	0x0d
+
+/*
+ * C2 port low level signal managements
+ */
+
+static void c2port_reset(struct c2port_device *dev)
+{
+	struct c2port_ops *ops = dev->ops;
+
+	/* To reset the device we have to keep clock line low for at least
+	 * 20us.
+	 */
+	local_irq_disable();
+	ops->c2ck_set(dev, 0);
+	udelay(25);
+	ops->c2ck_set(dev, 1);
+	local_irq_enable();
+
+	udelay(1);
+}
+
+static void c2port_strobe_ck(struct c2port_device *dev)
+{
+	struct c2port_ops *ops = dev->ops;
+
+	/* During hi-low-hi transition we disable local IRQs to avoid
+	 * interructions since C2 port specification says that it must be
+	 * shorter than 5us, otherwise the microcontroller may consider
+	 * it as a reset signal!
+	 */
+	local_irq_disable();
+	ops->c2ck_set(dev, 0);
+	udelay(1);
+	ops->c2ck_set(dev, 1);
+	local_irq_enable();
+
+	udelay(1);
+}
+
+/*
+ * C2 port basic functions
+ */
+
+static void c2port_write_ar(struct c2port_device *dev, u8 addr)
+{
+	struct c2port_ops *ops = dev->ops;
+	int i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (11b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+
+	/* ADDRESS field */
+	for (i = 0; i < 8; i++) {
+		ops->c2d_set(dev, addr & 0x01);
+		c2port_strobe_ck(dev);
+
+		addr >>= 1;
+	}
+
+	/* STOP field */
+	ops->c2d_dir(dev, 1);
+	c2port_strobe_ck(dev);
+}
+
+static int c2port_read_ar(struct c2port_device *dev, u8 *addr)
+{
+	struct c2port_ops *ops = dev->ops;
+	int i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (10b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+
+	/* ADDRESS field */
+	ops->c2d_dir(dev, 1);
+	*addr = 0;
+	for (i = 0; i < 8; i++) {
+		*addr >>= 1;	/* shift in 8-bit ADDRESS field LSB first */
+
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			*addr |= 0x80;
+	}
+
+	/* STOP field */
+	c2port_strobe_ck(dev);
+
+	return 0;
+}
+
+static int c2port_write_dr(struct c2port_device *dev, u8 data)
+{
+	struct c2port_ops *ops = dev->ops;
+	int timeout, i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (01b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 1);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* LENGTH field (00b, LSB first -> 1 byte) */
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* DATA field */
+	for (i = 0; i < 8; i++) {
+		ops->c2d_set(dev, data & 0x01);
+		c2port_strobe_ck(dev);
+
+		data >>= 1;
+	}
+
+	/* WAIT field */
+	ops->c2d_dir(dev, 1);
+	timeout = 20;
+	do {
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	/* STOP field */
+	c2port_strobe_ck(dev);
+
+	return 0;
+}
+
+static int c2port_read_dr(struct c2port_device *dev, u8 *data)
+{
+	struct c2port_ops *ops = dev->ops;
+	int timeout, i;
+
+	/* START field */
+	c2port_strobe_ck(dev);
+
+	/* INS field (00b, LSB first) */
+	ops->c2d_dir(dev, 0);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* LENGTH field (00b, LSB first -> 1 byte) */
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+	ops->c2d_set(dev, 0);
+	c2port_strobe_ck(dev);
+
+	/* WAIT field */
+	ops->c2d_dir(dev, 1);
+	timeout = 20;
+	do {
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	/* DATA field */
+	*data = 0;
+	for (i = 0; i < 8; i++) {
+		*data >>= 1;	/* shift in 8-bit DATA field LSB first */
+
+		c2port_strobe_ck(dev);
+		if (ops->c2d_get(dev))
+			*data |= 0x80;
+	}
+
+	/* STOP field */
+	c2port_strobe_ck(dev);
+
+	return 0;
+}
+
+static int c2port_poll_in_busy(struct c2port_device *dev)
+{
+	u8 addr;
+	int ret, timeout = 20;
+
+	do {
+		ret = (c2port_read_ar(dev, &addr));
+		if (ret < 0)
+			return -EIO;
+
+		if (!(addr & 0x02))
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	return 0;
+}
+
+static int c2port_poll_out_ready(struct c2port_device *dev)
+{
+	u8 addr;
+	int ret, timeout = 10000; /* erase flash needs long time... */
+
+	do {
+		ret = (c2port_read_ar(dev, &addr));
+		if (ret < 0)
+			return -EIO;
+
+		if (addr & 0x01)
+			break;
+
+		udelay(1);
+	} while (--timeout > 0);
+	if (timeout == 0)
+		return -EIO;
+
+	return 0;
+}
+
+/*
+ * sysfs methods
+ */
+
+static ssize_t c2port_show_name(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", c2dev->name);
+}
+static DEVICE_ATTR(name, 0444, c2port_show_name, NULL);
+
+static ssize_t c2port_show_flash_blocks_num(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+
+	return sprintf(buf, "%d\n", ops->blocks_num);
+}
+static DEVICE_ATTR(flash_blocks_num, 0444, c2port_show_flash_blocks_num, NULL);
+
+static ssize_t c2port_show_flash_block_size(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+
+	return sprintf(buf, "%d\n", ops->block_size);
+}
+static DEVICE_ATTR(flash_block_size, 0444, c2port_show_flash_block_size, NULL);
+
+static ssize_t c2port_show_flash_size(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+
+	return sprintf(buf, "%d\n", ops->blocks_num * ops->block_size);
+}
+static DEVICE_ATTR(flash_size, 0444, c2port_show_flash_size, NULL);
+
+static ssize_t access_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", c2dev->access);
+}
+
+static ssize_t access_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	struct c2port_ops *ops = c2dev->ops;
+	int status, ret;
+
+	ret = sscanf(buf, "%d", &status);
+	if (ret != 1)
+		return -EINVAL;
+
+	mutex_lock(&c2dev->mutex);
+
+	c2dev->access = !!status;
+
+	/* If access is "on" clock should be HIGH _before_ setting the line
+	 * as output and data line should be set as INPUT anyway */
+	if (c2dev->access)
+		ops->c2ck_set(c2dev, 1);
+	ops->access(c2dev, c2dev->access);
+	if (c2dev->access)
+		ops->c2d_dir(c2dev, 1);
+
+	mutex_unlock(&c2dev->mutex);
+
+	return count;
+}
+static DEVICE_ATTR_RW(access);
+
+static ssize_t c2port_store_reset(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	/* Check the device access status */
+	if (!c2dev->access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+
+	c2port_reset(c2dev);
+	c2dev->flash_access = 0;
+
+	mutex_unlock(&c2dev->mutex);
+
+	return count;
+}
+static DEVICE_ATTR(reset, 0200, NULL, c2port_store_reset);
+
+static ssize_t __c2port_show_dev_id(struct c2port_device *dev, char *buf)
+{
+	u8 data;
+	int ret;
+
+	/* Select DEVICEID register for C2 data register accesses */
+	c2port_write_ar(dev, C2PORT_DEVICEID);
+
+	/* Read and return the device ID register */
+	ret = c2port_read_dr(dev, &data);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d\n", data);
+}
+
+static ssize_t c2port_show_dev_id(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	ssize_t ret;
+
+	/* Check the device access status */
+	if (!c2dev->access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_show_dev_id(c2dev, buf);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(dev, "cannot read from %s\n", c2dev->name);
+
+	return ret;
+}
+static DEVICE_ATTR(dev_id, 0444, c2port_show_dev_id, NULL);
+
+static ssize_t __c2port_show_rev_id(struct c2port_device *dev, char *buf)
+{
+	u8 data;
+	int ret;
+
+	/* Select REVID register for C2 data register accesses */
+	c2port_write_ar(dev, C2PORT_REVID);
+
+	/* Read and return the revision ID register */
+	ret = c2port_read_dr(dev, &data);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d\n", data);
+}
+
+static ssize_t c2port_show_rev_id(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	ssize_t ret;
+
+	/* Check the device access status */
+	if (!c2dev->access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_show_rev_id(c2dev, buf);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(c2dev->dev, "cannot read from %s\n", c2dev->name);
+
+	return ret;
+}
+static DEVICE_ATTR(rev_id, 0444, c2port_show_rev_id, NULL);
+
+static ssize_t c2port_show_flash_access(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", c2dev->flash_access);
+}
+
+static ssize_t __c2port_store_flash_access(struct c2port_device *dev,
+						int status)
+{
+	int ret;
+
+	/* Check the device access status */
+	if (!dev->access)
+		return -EBUSY;
+
+	dev->flash_access = !!status;
+
+	/* If flash_access is off we have nothing to do... */
+	if (dev->flash_access == 0)
+		return 0;
+
+	/* Target the C2 flash programming control register for C2 data
+	 * register access */
+	c2port_write_ar(dev, C2PORT_FPCTL);
+
+	/* Write the first keycode to enable C2 Flash programming */
+	ret = c2port_write_dr(dev, 0x02);
+	if (ret < 0)
+		return ret;
+
+	/* Write the second keycode to enable C2 Flash programming */
+	ret = c2port_write_dr(dev, 0x01);
+	if (ret < 0)
+		return ret;
+
+	/* Delay for at least 20ms to ensure the target is ready for
+	 * C2 flash programming */
+	mdelay(25);
+
+	return 0;
+}
+
+static ssize_t c2port_store_flash_access(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	int status;
+	ssize_t ret;
+
+	ret = sscanf(buf, "%d", &status);
+	if (ret != 1)
+		return -EINVAL;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_store_flash_access(c2dev, status);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0) {
+		dev_err(c2dev->dev, "cannot enable %s flash programming\n",
+			c2dev->name);
+		return ret;
+	}
+
+	return count;
+}
+static DEVICE_ATTR(flash_access, 0644, c2port_show_flash_access,
+		   c2port_store_flash_access);
+
+static ssize_t __c2port_write_flash_erase(struct c2port_device *dev)
+{
+	u8 status;
+	int ret;
+
+	/* Target the C2 flash programming data register for C2 data register
+	 * access.
+	 */
+	c2port_write_ar(dev, C2PORT_FPDAT);
+
+	/* Send device erase command */
+	c2port_write_dr(dev, C2PORT_DEVICE_ERASE);
+
+	/* Wait for input acknowledge */
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before starting FLASH access sequence */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Send a three-byte arming sequence to enable the device erase.
+	 * If the sequence is not received correctly, the command will be
+	 * ignored.
+	 * Sequence is: 0xde, 0xad, 0xa5.
+	 */
+	c2port_write_dr(dev, 0xde);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+	c2port_write_dr(dev, 0xad);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+	c2port_write_dr(dev, 0xa5);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static ssize_t c2port_store_flash_erase(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(dev);
+	int ret;
+
+	/* Check the device and flash access status */
+	if (!c2dev->access || !c2dev->flash_access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_write_flash_erase(c2dev);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0) {
+		dev_err(c2dev->dev, "cannot erase %s flash\n", c2dev->name);
+		return ret;
+	}
+
+	return count;
+}
+static DEVICE_ATTR(flash_erase, 0200, NULL, c2port_store_flash_erase);
+
+static ssize_t __c2port_read_flash_data(struct c2port_device *dev,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_ops *ops = dev->ops;
+	u8 status, nread = 128;
+	int i, ret;
+
+	/* Check for flash end */
+	if (offset >= ops->block_size * ops->blocks_num)
+		return 0;
+
+	if (ops->block_size * ops->blocks_num - offset < nread)
+		nread = ops->block_size * ops->blocks_num - offset;
+	if (count < nread)
+		nread = count;
+	if (nread == 0)
+		return nread;
+
+	/* Target the C2 flash programming data register for C2 data register
+	 * access */
+	c2port_write_ar(dev, C2PORT_FPDAT);
+
+	/* Send flash block read command */
+	c2port_write_dr(dev, C2PORT_BLOCK_READ);
+
+	/* Wait for input acknowledge */
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before starting FLASH access sequence */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Send address high byte */
+	c2port_write_dr(dev, offset >> 8);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address low byte */
+	c2port_write_dr(dev, offset & 0x00ff);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address block size */
+	c2port_write_dr(dev, nread);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before reading FLASH block */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Read flash block */
+	for (i = 0; i < nread; i++) {
+		ret = c2port_poll_out_ready(dev);
+		if (ret < 0)
+			return ret;
+
+		ret = c2port_read_dr(dev, buffer+i);
+		if (ret < 0)
+			return ret;
+	}
+
+	return nread;
+}
+
+static ssize_t c2port_read_flash_data(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(kobj_to_dev(kobj));
+	ssize_t ret;
+
+	/* Check the device and flash access status */
+	if (!c2dev->access || !c2dev->flash_access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_read_flash_data(c2dev, buffer, offset, count);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(c2dev->dev, "cannot read %s flash\n", c2dev->name);
+
+	return ret;
+}
+
+static ssize_t __c2port_write_flash_data(struct c2port_device *dev,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_ops *ops = dev->ops;
+	u8 status, nwrite = 128;
+	int i, ret;
+
+	if (nwrite > count)
+		nwrite = count;
+	if (ops->block_size * ops->blocks_num - offset < nwrite)
+		nwrite = ops->block_size * ops->blocks_num - offset;
+
+	/* Check for flash end */
+	if (offset >= ops->block_size * ops->blocks_num)
+		return -EINVAL;
+
+	/* Target the C2 flash programming data register for C2 data register
+	 * access */
+	c2port_write_ar(dev, C2PORT_FPDAT);
+
+	/* Send flash block write command */
+	c2port_write_dr(dev, C2PORT_BLOCK_WRITE);
+
+	/* Wait for input acknowledge */
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before starting FLASH access sequence */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Send address high byte */
+	c2port_write_dr(dev, offset >> 8);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address low byte */
+	c2port_write_dr(dev, offset & 0x00ff);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Send address block size */
+	c2port_write_dr(dev, nwrite);
+	ret = c2port_poll_in_busy(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Should check status before writing FLASH block */
+
+	/* Wait for status information */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Read flash programming interface status */
+	ret = c2port_read_dr(dev, &status);
+	if (ret < 0)
+		return ret;
+	if (status != C2PORT_COMMAND_OK)
+		return -EBUSY;
+
+	/* Write flash block */
+	for (i = 0; i < nwrite; i++) {
+		ret = c2port_write_dr(dev, *(buffer+i));
+		if (ret < 0)
+			return ret;
+
+		ret = c2port_poll_in_busy(dev);
+		if (ret < 0)
+			return ret;
+
+	}
+
+	/* Wait for last flash write to complete */
+	ret = c2port_poll_out_ready(dev);
+	if (ret < 0)
+		return ret;
+
+	return nwrite;
+}
+
+static ssize_t c2port_write_flash_data(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct c2port_device *c2dev = dev_get_drvdata(kobj_to_dev(kobj));
+	int ret;
+
+	/* Check the device access status */
+	if (!c2dev->access || !c2dev->flash_access)
+		return -EBUSY;
+
+	mutex_lock(&c2dev->mutex);
+	ret = __c2port_write_flash_data(c2dev, buffer, offset, count);
+	mutex_unlock(&c2dev->mutex);
+
+	if (ret < 0)
+		dev_err(c2dev->dev, "cannot write %s flash\n", c2dev->name);
+
+	return ret;
+}
+/* size is computed at run-time */
+static BIN_ATTR(flash_data, 0644, c2port_read_flash_data,
+		c2port_write_flash_data, 0);
+
+/*
+ * Class attributes
+ */
+static struct attribute *c2port_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_flash_blocks_num.attr,
+	&dev_attr_flash_block_size.attr,
+	&dev_attr_flash_size.attr,
+	&dev_attr_access.attr,
+	&dev_attr_reset.attr,
+	&dev_attr_dev_id.attr,
+	&dev_attr_rev_id.attr,
+	&dev_attr_flash_access.attr,
+	&dev_attr_flash_erase.attr,
+	NULL,
+};
+
+static struct bin_attribute *c2port_bin_attrs[] = {
+	&bin_attr_flash_data,
+	NULL,
+};
+
+static const struct attribute_group c2port_group = {
+	.attrs = c2port_attrs,
+	.bin_attrs = c2port_bin_attrs,
+};
+
+static const struct attribute_group *c2port_groups[] = {
+	&c2port_group,
+	NULL,
+};
+
+/*
+ * Exported functions
+ */
+
+struct c2port_device *c2port_device_register(char *name,
+					struct c2port_ops *ops, void *devdata)
+{
+	struct c2port_device *c2dev;
+	int ret;
+
+	if (unlikely(!ops) || unlikely(!ops->access) || \
+		unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \
+		unlikely(!ops->c2d_get) || unlikely(!ops->c2d_set))
+		return ERR_PTR(-EINVAL);
+
+	c2dev = kzalloc(sizeof(struct c2port_device), GFP_KERNEL);
+	if (unlikely(!c2dev))
+		return ERR_PTR(-ENOMEM);
+
+	idr_preload(GFP_KERNEL);
+	spin_lock_irq(&c2port_idr_lock);
+	ret = idr_alloc(&c2port_idr, c2dev, 0, 0, GFP_NOWAIT);
+	spin_unlock_irq(&c2port_idr_lock);
+	idr_preload_end();
+
+	if (ret < 0)
+		goto error_idr_alloc;
+	c2dev->id = ret;
+
+	bin_attr_flash_data.size = ops->blocks_num * ops->block_size;
+
+	c2dev->dev = device_create(c2port_class, NULL, 0, c2dev,
+				   "c2port%d", c2dev->id);
+	if (IS_ERR(c2dev->dev)) {
+		ret = PTR_ERR(c2dev->dev);
+		goto error_device_create;
+	}
+	dev_set_drvdata(c2dev->dev, c2dev);
+
+	strncpy(c2dev->name, name, C2PORT_NAME_LEN - 1);
+	c2dev->ops = ops;
+	mutex_init(&c2dev->mutex);
+
+	/* By default C2 port access is off */
+	c2dev->access = c2dev->flash_access = 0;
+	ops->access(c2dev, 0);
+
+	dev_info(c2dev->dev, "C2 port %s added\n", name);
+	dev_info(c2dev->dev, "%s flash has %d blocks x %d bytes "
+				"(%d bytes total)\n",
+				name, ops->blocks_num, ops->block_size,
+				ops->blocks_num * ops->block_size);
+
+	return c2dev;
+
+error_device_create:
+	spin_lock_irq(&c2port_idr_lock);
+	idr_remove(&c2port_idr, c2dev->id);
+	spin_unlock_irq(&c2port_idr_lock);
+
+error_idr_alloc:
+	kfree(c2dev);
+
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(c2port_device_register);
+
+void c2port_device_unregister(struct c2port_device *c2dev)
+{
+	if (!c2dev)
+		return;
+
+	dev_info(c2dev->dev, "C2 port %s removed\n", c2dev->name);
+
+	spin_lock_irq(&c2port_idr_lock);
+	idr_remove(&c2port_idr, c2dev->id);
+	spin_unlock_irq(&c2port_idr_lock);
+
+	device_destroy(c2port_class, c2dev->id);
+
+	kfree(c2dev);
+}
+EXPORT_SYMBOL(c2port_device_unregister);
+
+/*
+ * Module stuff
+ */
+
+static int __init c2port_init(void)
+{
+	printk(KERN_INFO "Silicon Labs C2 port support v. " DRIVER_VERSION
+		" - (C) 2007 Rodolfo Giometti\n");
+
+	c2port_class = class_create("c2port");
+	if (IS_ERR(c2port_class)) {
+		printk(KERN_ERR "c2port: failed to allocate class\n");
+		return PTR_ERR(c2port_class);
+	}
+	c2port_class->dev_groups = c2port_groups;
+
+	return 0;
+}
+
+static void __exit c2port_exit(void)
+{
+	class_destroy(c2port_class);
+}
+
+module_init(c2port_init);
+module_exit(c2port_exit);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("Silicon Labs C2 port support v. " DRIVER_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/cardreader/Kconfig b/drivers/misc/cardreader/Kconfig
new file mode 100644
index 0000000000..022322dfb3
--- /dev/null
+++ b/drivers/misc/cardreader/Kconfig
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MISC_ALCOR_PCI
+	tristate "Alcor Micro/Alcor Link PCI-E card reader"
+	depends on PCI
+	select MFD_CORE
+	help
+	  This supports for Alcor Micro PCI-Express card reader including au6601,
+	  au6621.
+	  Alcor Micro card readers support access to many types of memory cards,
+	  such as Memory Stick, Memory Stick Pro, Secure Digital and
+	  MultiMediaCard.
+
+config MISC_RTSX_PCI
+	tristate "Realtek PCI-E card reader"
+	depends on PCI
+	select MFD_CORE
+	help
+	  This supports for Realtek PCI-Express card reader including rts5209,
+	  rts5227, rts522A, rts5229, rts5249, rts524A, rts525A, rtl8411, rts5260.
+	  Realtek card readers support access to many types of memory cards,
+	  such as Memory Stick, Memory Stick Pro, Secure Digital and
+	  MultiMediaCard.
+
+config MISC_RTSX_USB
+	tristate "Realtek USB card reader"
+	depends on USB
+	select MFD_CORE
+	help
+	  Select this option to get support for Realtek USB 2.0 card readers
+	  including RTS5129, RTS5139, RTS5179 and RTS5170.
+	  Realtek card reader supports access to many types of memory cards,
+	  such as Memory Stick Pro, Secure Digital and MultiMediaCard.
diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile
new file mode 100644
index 0000000000..895128475d
--- /dev/null
+++ b/drivers/misc/cardreader/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MISC_ALCOR_PCI)	+= alcor_pci.o
+obj-$(CONFIG_MISC_RTSX_PCI)	+= rtsx_pci.o
+rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o rts5261.o rts5228.o
+obj-$(CONFIG_MISC_RTSX_USB)	+= rtsx_usb.o
diff --git a/drivers/misc/cardreader/alcor_pci.c b/drivers/misc/cardreader/alcor_pci.c
new file mode 100644
index 0000000000..0142c4bf4f
--- /dev/null
+++ b/drivers/misc/cardreader/alcor_pci.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 Oleksij Rempel <linux@rempel-privat.de>
+ *
+ * Driver for Alcor Micro AU6601 and AU6621 controllers
+ */
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+
+#include <linux/alcor_pci.h>
+
+#define DRV_NAME_ALCOR_PCI			"alcor_pci"
+
+static DEFINE_IDA(alcor_pci_idr);
+
+static struct mfd_cell alcor_pci_cells[] = {
+	[ALCOR_SD_CARD] = {
+		.name = DRV_NAME_ALCOR_PCI_SDMMC,
+	},
+	[ALCOR_MS_CARD] = {
+		.name = DRV_NAME_ALCOR_PCI_MS,
+	},
+};
+
+static const struct alcor_dev_cfg alcor_cfg = {
+	.dma = 0,
+};
+
+static const struct alcor_dev_cfg au6621_cfg = {
+	.dma = 1,
+};
+
+static const struct alcor_dev_cfg au6625_cfg = {
+	.dma = 0,
+};
+
+static const struct pci_device_id pci_ids[] = {
+	{ PCI_DEVICE(PCI_ID_ALCOR_MICRO, PCI_ID_AU6601),
+		.driver_data = (kernel_ulong_t)&alcor_cfg },
+	{ PCI_DEVICE(PCI_ID_ALCOR_MICRO, PCI_ID_AU6621),
+		.driver_data = (kernel_ulong_t)&au6621_cfg },
+	{ PCI_DEVICE(PCI_ID_ALCOR_MICRO, PCI_ID_AU6625),
+		.driver_data = (kernel_ulong_t)&au6625_cfg },
+	{},
+};
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+void alcor_write8(struct alcor_pci_priv *priv, u8 val, unsigned int addr)
+{
+	writeb(val, priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_write8);
+
+void alcor_write16(struct alcor_pci_priv *priv, u16 val, unsigned int addr)
+{
+	writew(val, priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_write16);
+
+void alcor_write32(struct alcor_pci_priv *priv, u32 val, unsigned int addr)
+{
+	writel(val, priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_write32);
+
+void alcor_write32be(struct alcor_pci_priv *priv, u32 val, unsigned int addr)
+{
+	iowrite32be(val, priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_write32be);
+
+u8 alcor_read8(struct alcor_pci_priv *priv, unsigned int addr)
+{
+	return readb(priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_read8);
+
+u32 alcor_read32(struct alcor_pci_priv *priv, unsigned int addr)
+{
+	return readl(priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_read32);
+
+u32 alcor_read32be(struct alcor_pci_priv *priv, unsigned int addr)
+{
+	return ioread32be(priv->iobase + addr);
+}
+EXPORT_SYMBOL_GPL(alcor_read32be);
+
+static int alcor_pci_probe(struct pci_dev *pdev,
+			   const struct pci_device_id *ent)
+{
+	struct alcor_dev_cfg *cfg;
+	struct alcor_pci_priv *priv;
+	int ret, i, bar = 0;
+
+	cfg = (void *)ent->driver_data;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	ret = ida_alloc(&alcor_pci_idr, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+	priv->id = ret;
+
+	priv->pdev = pdev;
+	priv->parent_pdev = pdev->bus->self;
+	priv->dev = &pdev->dev;
+	priv->cfg = cfg;
+	priv->irq = pdev->irq;
+
+	ret = pci_request_regions(pdev, DRV_NAME_ALCOR_PCI);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot request region\n");
+		ret = -ENOMEM;
+		goto error_free_ida;
+	}
+
+	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar);
+		ret = -ENODEV;
+		goto error_release_regions;
+	}
+
+	priv->iobase = pcim_iomap(pdev, bar, 0);
+	if (!priv->iobase) {
+		ret = -ENOMEM;
+		goto error_release_regions;
+	}
+
+	/* make sure irqs are disabled */
+	alcor_write32(priv, 0, AU6601_REG_INT_ENABLE);
+	alcor_write32(priv, 0, AU6601_MS_INT_ENABLE);
+
+	ret = dma_set_mask_and_coherent(priv->dev, AU6601_SDMA_MASK);
+	if (ret) {
+		dev_err(priv->dev, "Failed to set DMA mask\n");
+		goto error_release_regions;
+	}
+
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, priv);
+
+	for (i = 0; i < ARRAY_SIZE(alcor_pci_cells); i++) {
+		alcor_pci_cells[i].platform_data = priv;
+		alcor_pci_cells[i].pdata_size = sizeof(*priv);
+	}
+	ret = mfd_add_devices(&pdev->dev, priv->id, alcor_pci_cells,
+			ARRAY_SIZE(alcor_pci_cells), NULL, 0, NULL);
+	if (ret < 0)
+		goto error_clear_drvdata;
+
+	pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
+
+	return 0;
+
+error_clear_drvdata:
+	pci_clear_master(pdev);
+	pci_set_drvdata(pdev, NULL);
+error_release_regions:
+	pci_release_regions(pdev);
+error_free_ida:
+	ida_free(&alcor_pci_idr, priv->id);
+	return ret;
+}
+
+static void alcor_pci_remove(struct pci_dev *pdev)
+{
+	struct alcor_pci_priv *priv;
+
+	priv = pci_get_drvdata(pdev);
+
+	mfd_remove_devices(&pdev->dev);
+
+	ida_free(&alcor_pci_idr, priv->id);
+
+	pci_release_regions(pdev);
+	pci_clear_master(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int alcor_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int alcor_resume(struct device *dev)
+{
+	struct alcor_pci_priv *priv = dev_get_drvdata(dev);
+
+	pci_disable_link_state(priv->pdev,
+			       PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(alcor_pci_pm_ops, alcor_suspend, alcor_resume);
+
+static struct pci_driver alcor_driver = {
+	.name	=	DRV_NAME_ALCOR_PCI,
+	.id_table =	pci_ids,
+	.probe	=	alcor_pci_probe,
+	.remove =	alcor_pci_remove,
+	.driver	=	{
+		.pm	= &alcor_pci_pm_ops
+	},
+};
+
+module_pci_driver(alcor_driver);
+
+MODULE_AUTHOR("Oleksij Rempel <linux@rempel-privat.de>");
+MODULE_DESCRIPTION("PCI driver for Alcor Micro AU6601 Secure Digital Host Controller Interface");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/cardreader/rtl8411.c b/drivers/misc/cardreader/rtl8411.c
new file mode 100644
index 0000000000..06457e875a
--- /dev/null
+++ b/drivers/misc/cardreader/rtl8411.c
@@ -0,0 +1,499 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   Roger Tseng <rogerable@realtek.com>
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rtl8411_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, SYS_VER, &val);
+	return val & 0x0F;
+}
+
+static int rtl8411b_is_qfn48(struct rtsx_pcr *pcr)
+{
+	u8 val = 0;
+
+	rtsx_pci_read_register(pcr, RTL8411B_PACKAGE_MODE, &val);
+
+	if (val & 0x2)
+		return 1;
+	else
+		return 0;
+}
+
+static void rtl8411_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	struct pci_dev *pdev = pcr->pci;
+	u32 reg1 = 0;
+	u8 reg3 = 0;
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG1, &reg1);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg1);
+
+	if (!rtsx_vendor_setting_valid(reg1))
+		return;
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg1);
+	pcr->sd30_drive_sel_1v8 =
+		map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg1));
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg1);
+
+	pci_read_config_byte(pdev, PCR_SETTING_REG3, &reg3);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG3, reg3);
+	pcr->sd30_drive_sel_3v3 = rtl8411_reg_to_sd30_drive_sel_3v3(reg3);
+}
+
+static void rtl8411b_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	struct pci_dev *pdev = pcr->pci;
+	u32 reg = 0;
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg))
+		return;
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 =
+		map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg));
+	pcr->sd30_drive_sel_3v3 =
+		map_sd_drive(rtl8411b_reg_to_sd30_drive_sel_3v3(reg));
+}
+
+static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
+{
+	rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07);
+}
+
+static int rtl8411_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0xFF, pcr->sd30_drive_sel_3v3);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CD_PAD_CTL,
+			CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rtl8411b_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	if (rtl8411b_is_qfn48(pcr))
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+				CARD_PULL_CTL3, 0xFF, 0xF5);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0xFF, pcr->sd30_drive_sel_3v3);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CD_PAD_CTL,
+			CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, FUNC_FORCE_CTL,
+			0x06, 0x00);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rtl8411_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00);
+}
+
+static int rtl8411_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01);
+}
+
+static int rtl8411_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D);
+}
+
+static int rtl8411_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00);
+}
+
+static int rtl8411_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CTL,
+			BPP_LDO_POWB, BPP_LDO_SUSPEND);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_10_PERCENT_ON);
+	if (err < 0)
+		return err;
+
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_15_PERCENT_ON);
+	if (err < 0)
+		return err;
+
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_ON);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL, BPP_LDO_POWB, BPP_LDO_ON);
+}
+
+static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_OFF);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL,
+			BPP_LDO_POWB, BPP_LDO_SUSPEND);
+}
+
+static int rtl8411_do_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage,
+		int bpp_tuned18_shift, int bpp_asic_1v8)
+{
+	u8 mask, val;
+	int err;
+
+	mask = (BPP_REG_TUNED18 << bpp_tuned18_shift) | BPP_PAD_MASK;
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3);
+		if (err < 0)
+			return err;
+		val = (BPP_ASIC_3V3 << bpp_tuned18_shift) | BPP_PAD_3V3;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8);
+		if (err < 0)
+			return err;
+		val = (bpp_asic_1v8 << bpp_tuned18_shift) | BPP_PAD_1V8;
+	} else {
+		return -EINVAL;
+	}
+
+	return rtsx_pci_write_register(pcr, LDO_CTL, mask, val);
+}
+
+static int rtl8411_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	return rtl8411_do_switch_output_voltage(pcr, voltage,
+			BPP_TUNED18_SHIFT_8411, BPP_ASIC_1V8);
+}
+
+static int rtl8402_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	return rtl8411_do_switch_output_voltage(pcr, voltage,
+			BPP_TUNED18_SHIFT_8402, BPP_ASIC_2V0);
+}
+
+static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr)
+{
+	unsigned int card_exist;
+
+	card_exist = rtsx_pci_readl(pcr, RTSX_BIPR);
+	card_exist &= CARD_EXIST;
+	if (!card_exist) {
+		/* Enable card CD */
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, CD_ENABLE);
+		/* Enable card interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x00);
+		return 0;
+	}
+
+	if (hweight32(card_exist) > 1) {
+		rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+				BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON);
+		msleep(100);
+
+		card_exist = rtsx_pci_readl(pcr, RTSX_BIPR);
+		if (card_exist & MS_EXIST)
+			card_exist = MS_EXIST;
+		else if (card_exist & SD_EXIST)
+			card_exist = SD_EXIST;
+		else
+			card_exist = 0;
+
+		rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+				BPP_POWER_MASK, BPP_POWER_OFF);
+
+		pcr_dbg(pcr, "After CD deglitch, card_exist = 0x%x\n",
+			card_exist);
+	}
+
+	if (card_exist & MS_EXIST) {
+		/* Disable SD interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x40);
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, MS_CD_EN_ONLY);
+	} else if (card_exist & SD_EXIST) {
+		/* Disable MS interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x80);
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, SD_CD_EN_ONLY);
+	}
+
+	return card_exist;
+}
+
+static int rtl8411_conv_clk_and_div_n(int input, int dir)
+{
+	int output;
+
+	if (dir == CLK_TO_DIV_N)
+		output = input * 4 / 5 - 2;
+	else
+		output = (input + 2) * 5 / 4;
+
+	return output;
+}
+
+static const struct pcr_ops rtl8411_pcr_ops = {
+	.fetch_vendor_settings = rtl8411_fetch_vendor_settings,
+	.extra_init_hw = rtl8411_extra_init_hw,
+	.optimize_phy = NULL,
+	.turn_on_led = rtl8411_turn_on_led,
+	.turn_off_led = rtl8411_turn_off_led,
+	.enable_auto_blink = rtl8411_enable_auto_blink,
+	.disable_auto_blink = rtl8411_disable_auto_blink,
+	.card_power_on = rtl8411_card_power_on,
+	.card_power_off = rtl8411_card_power_off,
+	.switch_output_voltage = rtl8411_switch_output_voltage,
+	.cd_deglitch = rtl8411_cd_deglitch,
+	.conv_clk_and_div_n = rtl8411_conv_clk_and_div_n,
+	.force_power_down = rtl8411_force_power_down,
+};
+
+static const struct pcr_ops rtl8402_pcr_ops = {
+	.fetch_vendor_settings = rtl8411_fetch_vendor_settings,
+	.extra_init_hw = rtl8411_extra_init_hw,
+	.optimize_phy = NULL,
+	.turn_on_led = rtl8411_turn_on_led,
+	.turn_off_led = rtl8411_turn_off_led,
+	.enable_auto_blink = rtl8411_enable_auto_blink,
+	.disable_auto_blink = rtl8411_disable_auto_blink,
+	.card_power_on = rtl8411_card_power_on,
+	.card_power_off = rtl8411_card_power_off,
+	.switch_output_voltage = rtl8402_switch_output_voltage,
+	.cd_deglitch = rtl8411_cd_deglitch,
+	.conv_clk_and_div_n = rtl8411_conv_clk_and_div_n,
+	.force_power_down = rtl8411_force_power_down,
+};
+
+static const struct pcr_ops rtl8411b_pcr_ops = {
+	.fetch_vendor_settings = rtl8411b_fetch_vendor_settings,
+	.extra_init_hw = rtl8411b_extra_init_hw,
+	.optimize_phy = NULL,
+	.turn_on_led = rtl8411_turn_on_led,
+	.turn_off_led = rtl8411_turn_off_led,
+	.enable_auto_blink = rtl8411_enable_auto_blink,
+	.disable_auto_blink = rtl8411_disable_auto_blink,
+	.card_power_on = rtl8411_card_power_on,
+	.card_power_off = rtl8411_card_power_off,
+	.switch_output_voltage = rtl8411_switch_output_voltage,
+	.cd_deglitch = rtl8411_cd_deglitch,
+	.conv_clk_and_div_n = rtl8411_conv_clk_and_div_n,
+	.force_power_down = rtl8411_force_power_down,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rtl8411_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xA9),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rtl8411_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rtl8411_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rtl8411_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+static const u32 rtl8411b_qfn64_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x09 | 0xD0),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn48_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x69 | 0x90),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x08 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn64_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn48_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn64_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn48_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn64_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static const u32 rtl8411b_qfn48_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11),
+	0,
+};
+
+static void rtl8411_init_common_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTL8411_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_CFG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(23, 7, 14);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(4, 3, 10);
+	pcr->ic_version = rtl8411_get_ic_version(pcr);
+}
+
+void rtl8411_init_params(struct rtsx_pcr *pcr)
+{
+	rtl8411_init_common_params(pcr);
+	pcr->ops = &rtl8411_pcr_ops;
+	set_pull_ctrl_tables(pcr, rtl8411);
+}
+
+void rtl8411b_init_params(struct rtsx_pcr *pcr)
+{
+	rtl8411_init_common_params(pcr);
+	pcr->ops = &rtl8411b_pcr_ops;
+	if (rtl8411b_is_qfn48(pcr))
+		set_pull_ctrl_tables(pcr, rtl8411b_qfn48);
+	else
+		set_pull_ctrl_tables(pcr, rtl8411b_qfn64);
+}
+
+void rtl8402_init_params(struct rtsx_pcr *pcr)
+{
+	rtl8411_init_common_params(pcr);
+	pcr->ops = &rtl8402_pcr_ops;
+	set_pull_ctrl_tables(pcr, rtl8411);
+}
diff --git a/drivers/misc/cardreader/rts5209.c b/drivers/misc/cardreader/rts5209.c
new file mode 100644
index 0000000000..52b0a476ba
--- /dev/null
+++ b/drivers/misc/cardreader/rts5209.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5209_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	val = rtsx_pci_readb(pcr, 0x1C);
+	return val & 0x0F;
+}
+
+static void rts5209_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	struct pci_dev *pdev = pcr->pci;
+	u32 reg;
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (rts5209_vendor_setting1_valid(reg)) {
+		if (rts5209_reg_check_ms_pmos(reg))
+			pcr->flags |= PCR_MS_PMOS;
+		pcr->aspm_en = rts5209_reg_to_aspm(reg);
+	}
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+
+	if (rts5209_vendor_setting2_valid(reg)) {
+		pcr->sd30_drive_sel_1v8 =
+			rts5209_reg_to_sd30_drive_sel_1v8(reg);
+		pcr->sd30_drive_sel_3v3 =
+			rts5209_reg_to_sd30_drive_sel_3v3(reg);
+		pcr->card_drive_sel = rts5209_reg_to_card_drive_sel(reg);
+	}
+}
+
+static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
+{
+	rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07);
+}
+
+static int rts5209_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Turn off LED */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO, 0xFF, 0x03);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Force CLKREQ# PIN to drive 0 to request clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08);
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO_DIR, 0xFF, 0x03);
+	/* Configure driving */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0xFF, pcr->sd30_drive_sel_3v3);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5209_optimize_phy(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xB966);
+}
+
+static int rts5209_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00);
+}
+
+static int rts5209_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01);
+}
+
+static int rts5209_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D);
+}
+
+static int rts5209_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00);
+}
+
+static int rts5209_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+	u8 pwr_mask, partial_pwr_on, pwr_on;
+
+	pwr_mask = SD_POWER_MASK;
+	partial_pwr_on = SD_PARTIAL_POWER_ON;
+	pwr_on = SD_POWER_ON;
+
+	if ((pcr->flags & PCR_MS_PMOS) && (card == RTSX_MS_CARD)) {
+		pwr_mask = MS_POWER_MASK;
+		partial_pwr_on = MS_PARTIAL_POWER_ON;
+		pwr_on = MS_POWER_ON;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			pwr_mask, partial_pwr_on);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x04);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, pwr_mask, pwr_on);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	u8 pwr_mask, pwr_off;
+
+	pwr_mask = SD_POWER_MASK;
+	pwr_off = SD_POWER_OFF;
+
+	if ((pcr->flags & PCR_MS_PMOS) && (card == RTSX_MS_CARD)) {
+		pwr_mask = MS_POWER_MASK;
+		pwr_off = MS_POWER_OFF;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			pwr_mask | PMOS_STRG_MASK, pwr_off | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5209_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct pcr_ops rts5209_pcr_ops = {
+	.fetch_vendor_settings = rts5209_fetch_vendor_settings,
+	.extra_init_hw = rts5209_extra_init_hw,
+	.optimize_phy = rts5209_optimize_phy,
+	.turn_on_led = rts5209_turn_on_led,
+	.turn_off_led = rts5209_turn_off_led,
+	.enable_auto_blink = rts5209_enable_auto_blink,
+	.disable_auto_blink = rts5209_disable_auto_blink,
+	.card_power_on = rts5209_card_power_on,
+	.card_power_off = rts5209_card_power_off,
+	.switch_output_voltage = rts5209_switch_output_voltage,
+	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
+	.force_power_down = rts5209_force_power_down,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5209_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5209_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5209_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5209_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5209_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 |
+		EXTRA_CAPS_SD_SDR104 | EXTRA_CAPS_MMC_8BIT;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5209_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTS5209_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_CFG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 16);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
+
+	pcr->ic_version = rts5209_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5209_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5209_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5209_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5209_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c
new file mode 100644
index 0000000000..cd512284bf
--- /dev/null
+++ b/drivers/misc/cardreader/rts5227.c
@@ -0,0 +1,512 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   Roger Tseng <rogerable@realtek.com>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5227_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
+static void rts5227_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 driving_3v3[4][3] = {
+		{0x13, 0x13, 0x13},
+		{0x96, 0x96, 0x96},
+		{0x7F, 0x7F, 0x7F},
+		{0x96, 0x96, 0x96},
+	};
+	u8 driving_1v8[4][3] = {
+		{0x99, 0x99, 0x99},
+		{0xAA, 0xAA, 0xAA},
+		{0xFE, 0xFE, 0xFE},
+		{0xB3, 0xB3, 0xB3},
+	};
+	u8 (*driving)[3], drive_sel;
+
+	if (voltage == OUTPUT_3V3) {
+		driving = driving_3v3;
+		drive_sel = pcr->sd30_drive_sel_3v3;
+	} else {
+		driving = driving_1v8;
+		drive_sel = pcr->sd30_drive_sel_1v8;
+	}
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL,
+			0xFF, driving[drive_sel][0]);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL,
+			0xFF, driving[drive_sel][1]);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL,
+			0xFF, driving[drive_sel][2]);
+}
+
+static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	struct pci_dev *pdev = pcr->pci;
+	u32 reg;
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg))
+		return;
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg);
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg);
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+	if (CHK_PCI_PID(pcr, 0x522A))
+		pcr->rtd3_en = rtsx_reg_to_rtd3(reg);
+	if (rtsx_check_mmc_support(reg))
+		pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
+	pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg);
+	if (rtsx_reg_check_reverse_socket(reg))
+		pcr->flags |= PCR_REVERSE_SOCKET;
+}
+
+static void rts5227_init_from_cfg(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (CHK_PCI_PID(pcr, 0x522A)) {
+		if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+				| PM_L1_1_EN | PM_L1_2_EN))
+			rtsx_pci_disable_oobs_polling(pcr);
+		else
+			rtsx_pci_enable_oobs_polling(pcr);
+	}
+
+	if (option->ltr_en) {
+		if (option->ltr_enabled)
+			rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+	}
+}
+
+static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	u16 cap;
+	struct rtsx_cr_option *option = &pcr->option;
+
+	rts5227_init_from_cfg(pcr);
+	rtsx_pci_init_cmd(pcr);
+
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+	/* Configure LTR */
+	pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &cap);
+	if (cap & PCI_EXP_DEVCTL2_LTR_EN)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LTR_CTL, 0xFF, 0xA3);
+	/* Configure OBFF */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OBFF_CFG, 0x03, 0x03);
+	/* Configure driving */
+	rts5227_fill_driving(pcr, OUTPUT_3V3);
+	/* Configure force_clock_req */
+	if (pcr->flags & PCR_REVERSE_SOCKET)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x30);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x30, 0x00);
+
+	if (CHK_PCI_PID(pcr, 0x522A))
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_AUTOLOAD_CFG1,
+			CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
+
+	if (pcr->rtd3_en) {
+		if (CHK_PCI_PID(pcr, 0x522A)) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PM_CTRL3, 0x01, 0x01);
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PME_FORCE_CTL, 0x30, 0x30);
+		} else {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CTRL3, 0x01, 0x01);
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PME_FORCE_CTL, 0xFF, 0x33);
+		}
+	} else {
+		if (CHK_PCI_PID(pcr, 0x522A)) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PM_CTRL3, 0x01, 0x00);
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RTS522A_PME_FORCE_CTL, 0x30, 0x20);
+		} else {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PME_FORCE_CTL, 0xFF, 0x30);
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CTRL3, 0x01, 0x00);
+		}
+	}
+
+	if (option->force_clkreq_0)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
+				FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
+				FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, pcr->reg_pm_ctrl3, 0x10, 0x00);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5227_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00);
+	if (err < 0)
+		return err;
+
+	/* Optimize RX sensitivity */
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42);
+}
+
+static int rts5227_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rts5227_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rts5227_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rts5227_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rts5227_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	if (pcr->option.ocp_en)
+		rtsx_pci_enable_ocp(pcr);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_PARTIAL_POWER_ON);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	msleep(20);
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_POWER_ON);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_OE,
+			SD_OUTPUT_EN, SD_OUTPUT_EN);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_OE,
+			MS_OUTPUT_EN, MS_OUTPUT_EN);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5227_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->option.ocp_en)
+		rtsx_pci_disable_ocp(pcr);
+
+	rtsx_pci_write_register(pcr, CARD_PWR_CTL, SD_POWER_MASK |
+			PMOS_STRG_MASK, SD_POWER_OFF | PMOS_STRG_400mA);
+	rtsx_pci_write_register(pcr, PWR_GATE_CTRL, LDO3318_PWR_MASK, 0X00);
+
+	return 0;
+}
+
+static int rts5227_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_phy_register(pcr, 0x11, 0x3C02);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C80 | 0x24);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rtsx_pci_init_cmd(pcr);
+	rts5227_fill_driving(pcr, voltage);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5227_pcr_ops = {
+	.fetch_vendor_settings = rts5227_fetch_vendor_settings,
+	.extra_init_hw = rts5227_extra_init_hw,
+	.optimize_phy = rts5227_optimize_phy,
+	.turn_on_led = rts5227_turn_on_led,
+	.turn_off_led = rts5227_turn_off_led,
+	.enable_auto_blink = rts5227_enable_auto_blink,
+	.disable_auto_blink = rts5227_disable_auto_blink,
+	.card_power_on = rts5227_card_power_on,
+	.card_power_off = rts5227_card_power_off,
+	.switch_output_voltage = rts5227_switch_output_voltage,
+	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5227_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5227_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5227_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5227_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5227_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5227_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_CFG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 7, 7);
+
+	pcr->ic_version = rts5227_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5227_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5227_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5227_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5227_ms_pull_ctl_disable_tbl;
+
+	pcr->reg_pm_ctrl3 = PM_CTRL3;
+}
+
+static int rts522a_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, D3_DELINK_MODE_EN,
+		0x00);
+	if (err < 0)
+		return err;
+
+	if (is_version(pcr, 0x522A, IC_VER_A)) {
+		err = rtsx_pci_write_phy_register(pcr, PHY_RCR2,
+			PHY_RCR2_INIT_27S);
+		if (err)
+			return err;
+
+		rtsx_pci_write_phy_register(pcr, PHY_RCR1, PHY_RCR1_INIT_27S);
+		rtsx_pci_write_phy_register(pcr, PHY_FLD0, PHY_FLD0_INIT_27S);
+		rtsx_pci_write_phy_register(pcr, PHY_FLD3, PHY_FLD3_INIT_27S);
+		rtsx_pci_write_phy_register(pcr, PHY_FLD4, PHY_FLD4_INIT_27S);
+	}
+
+	return 0;
+}
+
+static int rts522a_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rts5227_extra_init_hw(pcr);
+
+	/* Power down OCP for power consumption */
+	if (!pcr->card_exist)
+		rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN,
+				OC_POWER_DOWN);
+
+	rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, FUNC_FORCE_UPME_XMT_DBG,
+		FUNC_FORCE_UPME_XMT_DBG);
+	rtsx_pci_write_register(pcr, PCLK_CTL, 0x04, 0x04);
+	rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0);
+	rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 0xFF, 0x11);
+
+	return 0;
+}
+
+static int rts522a_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x57E4);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_phy_register(pcr, 0x11, 0x3C02);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x54A4);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rtsx_pci_init_cmd(pcr);
+	rts5227_fill_driving(pcr, voltage);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static void rts522a_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
+{
+	/* Set relink_time to 0 */
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
+				RELINK_TIME_MASK, 0);
+
+	rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3,
+			D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
+
+	if (!runtime) {
+		rtsx_pci_write_register(pcr, RTS522A_AUTOLOAD_CFG1,
+				CD_RESUME_EN_MASK, 0);
+		rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, 0x01, 0x00);
+		rtsx_pci_write_register(pcr, RTS522A_PME_FORCE_CTL, 0x30, 0x20);
+	}
+
+	rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN);
+}
+
+
+static void rts522a_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	int aspm_L1_1, aspm_L1_2;
+	u8 val = 0;
+
+	aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN);
+	aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (active) {
+		/* run, latency: 60us */
+		if (aspm_L1_1)
+			val = option->ltr_l1off_snooze_sspwrgate;
+	} else {
+		/* l1off, latency: 300us */
+		if (aspm_L1_2)
+			val = option->ltr_l1off_sspwrgate;
+	}
+
+	rtsx_set_l1off_sub(pcr, val);
+}
+
+/* rts522a operations mainly derived from rts5227, except phy/hw init setting.
+ */
+static const struct pcr_ops rts522a_pcr_ops = {
+	.fetch_vendor_settings = rts5227_fetch_vendor_settings,
+	.extra_init_hw = rts522a_extra_init_hw,
+	.optimize_phy = rts522a_optimize_phy,
+	.turn_on_led = rts5227_turn_on_led,
+	.turn_off_led = rts5227_turn_off_led,
+	.enable_auto_blink = rts5227_enable_auto_blink,
+	.disable_auto_blink = rts5227_disable_auto_blink,
+	.card_power_on = rts5227_card_power_on,
+	.card_power_off = rts5227_card_power_off,
+	.switch_output_voltage = rts522a_switch_output_voltage,
+	.force_power_down = rts522a_force_power_down,
+	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
+	.set_l1off_cfg_sub_d0 = rts522a_set_l1off_cfg_sub_d0,
+};
+
+void rts522a_init_params(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	rts5227_init_params(pcr);
+	pcr->ops = &rts522a_pcr_ops;
+	pcr->aspm_mode = ASPM_MODE_REG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 20, 11);
+	pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3;
+
+	option->dev_flags = LTR_L1SS_PWR_GATE_EN;
+	option->ltr_en = true;
+
+	/* init latency of active, idle, L1OFF to 60us, 300us, 3ms */
+	option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF;
+	option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF;
+	option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF;
+	option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF;
+	option->ltr_l1off_sspwrgate = 0x7F;
+	option->ltr_l1off_snooze_sspwrgate = 0x78;
+
+	pcr->option.ocp_en = 1;
+	if (pcr->option.ocp_en)
+		pcr->hw_param.interrupt_en |= SD_OC_INT_EN;
+	pcr->hw_param.ocp_glitch = SD_OCP_GLITCH_10M;
+	pcr->option.sd_800mA_ocp_thd = RTS522A_OCP_THD_800;
+
+}
diff --git a/drivers/misc/cardreader/rts5228.c b/drivers/misc/cardreader/rts5228.c
new file mode 100644
index 0000000000..0c7f10bcf6
--- /dev/null
+++ b/drivers/misc/cardreader/rts5228.c
@@ -0,0 +1,723 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Ricky WU <ricky_wu@realtek.com>
+ *   Rui FENG <rui_feng@realsil.com.cn>
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rts5228.h"
+#include "rtsx_pcr.h"
+
+static u8 rts5228_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & IC_VERSION_MASK;
+}
+
+static void rts5228_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 driving_3v3[4][3] = {
+		{0x13, 0x13, 0x13},
+		{0x96, 0x96, 0x96},
+		{0x7F, 0x7F, 0x7F},
+		{0x96, 0x96, 0x96},
+	};
+	u8 driving_1v8[4][3] = {
+		{0x99, 0x99, 0x99},
+		{0xB5, 0xB5, 0xB5},
+		{0xE6, 0x7E, 0xFE},
+		{0x6B, 0x6B, 0x6B},
+	};
+	u8 (*driving)[3], drive_sel;
+
+	if (voltage == OUTPUT_3V3) {
+		driving = driving_3v3;
+		drive_sel = pcr->sd30_drive_sel_3v3;
+	} else {
+		driving = driving_1v8;
+		drive_sel = pcr->sd30_drive_sel_1v8;
+	}
+
+	rtsx_pci_write_register(pcr, SD30_CLK_DRIVE_SEL,
+			 0xFF, driving[drive_sel][0]);
+
+	rtsx_pci_write_register(pcr, SD30_CMD_DRIVE_SEL,
+			 0xFF, driving[drive_sel][1]);
+
+	rtsx_pci_write_register(pcr, SD30_DAT_DRIVE_SEL,
+			 0xFF, driving[drive_sel][2]);
+}
+
+static void rtsx5228_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	struct pci_dev *pdev = pcr->pci;
+	u32 reg;
+
+	/* 0x724~0x727 */
+	pci_read_config_dword(pdev, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg)) {
+		pcr_dbg(pcr, "skip fetch vendor setting\n");
+		return;
+	}
+	pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg);
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+
+	/* 0x814~0x817 */
+	pci_read_config_dword(pdev, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+
+	pcr->rtd3_en = rtsx_reg_to_rtd3(reg);
+	if (rtsx_check_mmc_support(reg))
+		pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
+	pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg);
+	if (rtsx_reg_check_reverse_socket(reg))
+		pcr->flags |= PCR_REVERSE_SOCKET;
+}
+
+static int rts5228_optimize_phy(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_phy_register(pcr, 0x07, 0x8F40);
+}
+
+static void rts5228_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
+{
+	/* Set relink_time to 0 */
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
+				RELINK_TIME_MASK, 0);
+
+	rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3,
+			D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
+
+	if (!runtime) {
+		rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1,
+				CD_RESUME_EN_MASK, 0);
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
+		rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
+				FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
+	}
+
+	rtsx_pci_write_register(pcr, FPDCTL,
+		SSC_POWER_DOWN, SSC_POWER_DOWN);
+}
+
+static int rts5228_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL,
+		LED_SHINE_MASK, LED_SHINE_EN);
+}
+
+static int rts5228_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL,
+		LED_SHINE_MASK, LED_SHINE_DISABLE);
+}
+
+static int rts5228_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL,
+		0x02, 0x02);
+}
+
+static int rts5228_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL,
+		0x02, 0x00);
+}
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5228_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5228_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+static int rts5228_sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_write_register(pcr, SD_CFG1, SD_MODE_SELECT_MASK
+		| SD_ASYNC_FIFO_NOT_RST, SD_30_MODE | SD_ASYNC_FIFO_NOT_RST);
+	rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_write_register(pcr, CARD_CLK_SOURCE, 0xFF,
+			CRC_VAR_CLK0 | SD30_FIX_CLK | SAMPLE_VAR_CLK1);
+	rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+
+	return 0;
+}
+
+static int rts5228_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en)
+		rtsx_pci_enable_ocp(pcr);
+
+	rtsx_pci_write_register(pcr, REG_CRC_DUMMY_0,
+			CFG_SD_POW_AUTO_PD, CFG_SD_POW_AUTO_PD);
+
+	rtsx_pci_write_register(pcr, RTS5228_LDO1_CFG1,
+			RTS5228_LDO1_TUNE_MASK, RTS5228_LDO1_33);
+
+	rtsx_pci_write_register(pcr, RTS5228_LDO1233318_POW_CTL,
+			RTS5228_LDO1_POWERON_MASK, RTS5228_LDO1_SOFTSTART);
+	mdelay(2);
+	rtsx_pci_write_register(pcr, RTS5228_LDO1233318_POW_CTL,
+			RTS5228_LDO1_POWERON_MASK, RTS5228_LDO1_FULLON);
+
+
+	rtsx_pci_write_register(pcr, RTS5228_LDO1233318_POW_CTL,
+			RTS5228_LDO3318_POWERON, RTS5228_LDO3318_POWERON);
+
+	msleep(20);
+
+	rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, SD_OUTPUT_EN);
+
+	/* Initialize SD_CFG1 register */
+	rtsx_pci_write_register(pcr, SD_CFG1, 0xFF,
+			SD_CLK_DIVIDE_128 | SD_20_MODE | SD_BUS_WIDTH_1BIT);
+
+	rtsx_pci_write_register(pcr, SD_SAMPLE_POINT_CTL,
+			0xFF, SD20_RX_POS_EDGE);
+	rtsx_pci_write_register(pcr, SD_PUSH_POINT_CTL, 0xFF, 0);
+	rtsx_pci_write_register(pcr, CARD_STOP, SD_STOP | SD_CLR_ERR,
+			SD_STOP | SD_CLR_ERR);
+
+	/* Reset SD_CFG3 register */
+	rtsx_pci_write_register(pcr, SD_CFG3, SD30_CLK_END_EN, 0);
+	rtsx_pci_write_register(pcr, REG_SD_STOP_SDCLK_CFG,
+			SD30_CLK_STOP_CFG_EN | SD30_CLK_STOP_CFG1 |
+			SD30_CLK_STOP_CFG0, 0);
+
+	if (pcr->extra_caps & EXTRA_CAPS_SD_SDR50 ||
+	    pcr->extra_caps & EXTRA_CAPS_SD_SDR104)
+		rts5228_sd_set_sample_push_timing_sd30(pcr);
+
+	return 0;
+}
+
+static int rts5228_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+	u16 val = 0;
+
+	rtsx_pci_write_register(pcr, RTS5228_CARD_PWR_CTL,
+			RTS5228_PUPDC, RTS5228_PUPDC);
+
+	switch (voltage) {
+	case OUTPUT_3V3:
+		rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val);
+		val |= PHY_TUNE_SDBUS_33;
+		err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val);
+		if (err < 0)
+			return err;
+
+		rtsx_pci_write_register(pcr, RTS5228_DV3318_CFG,
+				RTS5228_DV3318_TUNE_MASK, RTS5228_DV3318_33);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL,
+				SD_IO_USING_1V8, 0);
+		break;
+	case OUTPUT_1V8:
+		rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val);
+		val &= ~PHY_TUNE_SDBUS_33;
+		err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val);
+		if (err < 0)
+			return err;
+
+		rtsx_pci_write_register(pcr, RTS5228_DV3318_CFG,
+				RTS5228_DV3318_TUNE_MASK, RTS5228_DV3318_18);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL,
+				SD_IO_USING_1V8, SD_IO_USING_1V8);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rts5228_fill_driving(pcr, voltage);
+
+	return 0;
+}
+
+static void rts5228_stop_cmd(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA);
+	rtsx_pci_write_register(pcr, RTS5260_DMA_RST_CTL_0,
+				RTS5260_DMA_RST | RTS5260_ADMA3_RST,
+				RTS5260_DMA_RST | RTS5260_ADMA3_RST);
+	rtsx_pci_write_register(pcr, RBCTL, RB_FLUSH, RB_FLUSH);
+}
+
+static void rts5228_card_before_power_off(struct rtsx_pcr *pcr)
+{
+	rts5228_stop_cmd(pcr);
+	rts5228_switch_output_voltage(pcr, OUTPUT_3V3);
+}
+
+static void rts5228_enable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 val = 0;
+
+	val = SD_OCP_INT_EN | SD_DETECT_EN;
+	rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val);
+	rtsx_pci_write_register(pcr, RTS5228_LDO1_CFG0,
+			RTS5228_LDO1_OCP_EN | RTS5228_LDO1_OCP_LMT_EN,
+			RTS5228_LDO1_OCP_EN | RTS5228_LDO1_OCP_LMT_EN);
+}
+
+static void rts5228_disable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 mask = 0;
+
+	mask = SD_OCP_INT_EN | SD_DETECT_EN;
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+	rtsx_pci_write_register(pcr, RTS5228_LDO1_CFG0,
+			RTS5228_LDO1_OCP_EN | RTS5228_LDO1_OCP_LMT_EN, 0);
+}
+
+static int rts5228_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	int err = 0;
+
+	rts5228_card_before_power_off(pcr);
+	err = rtsx_pci_write_register(pcr, RTS5228_LDO1233318_POW_CTL,
+				RTS5228_LDO_POWERON_MASK, 0);
+	rtsx_pci_write_register(pcr, REG_CRC_DUMMY_0, CFG_SD_POW_AUTO_PD, 0);
+
+	if (pcr->option.ocp_en)
+		rtsx_pci_disable_ocp(pcr);
+
+	return err;
+}
+
+static void rts5228_init_ocp(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en) {
+		u8 mask, val;
+
+		rtsx_pci_write_register(pcr, RTS5228_LDO1_CFG0,
+			RTS5228_LDO1_OCP_EN | RTS5228_LDO1_OCP_LMT_EN,
+			RTS5228_LDO1_OCP_EN | RTS5228_LDO1_OCP_LMT_EN);
+
+		rtsx_pci_write_register(pcr, RTS5228_LDO1_CFG0,
+			RTS5228_LDO1_OCP_THD_MASK, option->sd_800mA_ocp_thd);
+
+		rtsx_pci_write_register(pcr, RTS5228_LDO1_CFG0,
+			RTS5228_LDO1_OCP_LMT_THD_MASK,
+			RTS5228_LDO1_LMT_THD_1500);
+
+		rtsx_pci_read_register(pcr, RTS5228_LDO1_CFG0, &val);
+
+		mask = SD_OCP_GLITCH_MASK;
+		val = pcr->hw_param.ocp_glitch;
+		rtsx_pci_write_register(pcr, REG_OCPGLITCH, mask, val);
+
+		rts5228_enable_ocp(pcr);
+
+	} else {
+		rtsx_pci_write_register(pcr, RTS5228_LDO1_CFG0,
+			RTS5228_LDO1_OCP_EN | RTS5228_LDO1_OCP_LMT_EN, 0);
+	}
+}
+
+static void rts5228_clear_ocpstat(struct rtsx_pcr *pcr)
+{
+	u8 mask = 0;
+	u8 val = 0;
+
+	mask = SD_OCP_INT_CLR | SD_OC_CLR;
+	val = SD_OCP_INT_CLR | SD_OC_CLR;
+
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val);
+
+	udelay(1000);
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+
+}
+
+static void rts5228_process_ocp(struct rtsx_pcr *pcr)
+{
+	if (!pcr->option.ocp_en)
+		return;
+
+	rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat);
+
+	if (pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER)) {
+		rts5228_clear_ocpstat(pcr);
+		rts5228_card_power_off(pcr, RTSX_SD_CARD);
+		rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0);
+		pcr->ocp_stat = 0;
+	}
+
+}
+
+static void rts5228_init_from_cfg(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+				| PM_L1_1_EN | PM_L1_2_EN))
+		rtsx_pci_disable_oobs_polling(pcr);
+	else
+		rtsx_pci_enable_oobs_polling(pcr);
+
+	rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0);
+
+	if (option->ltr_en) {
+		if (option->ltr_enabled)
+			rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+	}
+}
+
+static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1,
+			CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
+
+	rts5228_init_from_cfg(pcr);
+
+	rtsx_pci_write_register(pcr, L1SUB_CONFIG1,
+			AUX_CLK_ACTIVE_SEL_MASK, MAC_CKSW_DONE);
+	rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, 0);
+
+	rtsx_pci_write_register(pcr, FUNC_FORCE_CTL,
+			FUNC_FORCE_UPME_XMT_DBG, FUNC_FORCE_UPME_XMT_DBG);
+
+	rtsx_pci_write_register(pcr, PCLK_CTL,
+			PCLK_MODE_SEL, PCLK_MODE_SEL);
+
+	rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0);
+	rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, CLK_PM_EN, CLK_PM_EN);
+
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x0F, 0x02);
+
+	/* Configure driving */
+	rts5228_fill_driving(pcr, OUTPUT_3V3);
+
+	if (pcr->flags & PCR_REVERSE_SOCKET)
+		rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x30);
+	else
+		rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
+
+	/*
+	 * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+	 * to drive low, and we forcibly request clock.
+	 */
+	if (option->force_clkreq_0)
+		rtsx_pci_write_register(pcr, PETXCFG,
+				 FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+	else
+		rtsx_pci_write_register(pcr, PETXCFG,
+				 FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+
+	rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
+
+	if (pcr->rtd3_en) {
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x01);
+		rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
+				FORCE_PM_CONTROL | FORCE_PM_VALUE,
+				FORCE_PM_CONTROL | FORCE_PM_VALUE);
+	} else {
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
+		rtsx_pci_write_register(pcr, RTS5228_REG_PME_FORCE_CTL,
+				FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
+	}
+	rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, D3_DELINK_MODE_EN, 0x00);
+
+	return 0;
+}
+
+static void rts5228_enable_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	u8 mask, val;
+
+	if (pcr->aspm_enabled == enable)
+		return;
+
+	mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1;
+	val = FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1;
+	val |= (pcr->aspm_en & 0x02);
+	rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val);
+	pcie_capability_clear_and_set_word(pcr->pci, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC, pcr->aspm_en);
+	pcr->aspm_enabled = enable;
+}
+
+static void rts5228_disable_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	u8 mask, val;
+
+	if (pcr->aspm_enabled == enable)
+		return;
+
+	pcie_capability_clear_and_set_word(pcr->pci, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC, 0);
+	mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1;
+	val = FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1;
+	rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val);
+	rtsx_pci_write_register(pcr, SD_CFG1, SD_ASYNC_FIFO_NOT_RST, 0);
+	mdelay(10);
+	pcr->aspm_enabled = enable;
+}
+
+static void rts5228_set_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	if (enable)
+		rts5228_enable_aspm(pcr, true);
+	else
+		rts5228_disable_aspm(pcr, false);
+}
+
+static void rts5228_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	int aspm_L1_1, aspm_L1_2;
+	u8 val = 0;
+
+	aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN);
+	aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (active) {
+		/* run, latency: 60us */
+		if (aspm_L1_1)
+			val = option->ltr_l1off_snooze_sspwrgate;
+	} else {
+		/* l1off, latency: 300us */
+		if (aspm_L1_2)
+			val = option->ltr_l1off_sspwrgate;
+	}
+
+	rtsx_set_l1off_sub(pcr, val);
+}
+
+static const struct pcr_ops rts5228_pcr_ops = {
+	.fetch_vendor_settings = rtsx5228_fetch_vendor_settings,
+	.turn_on_led = rts5228_turn_on_led,
+	.turn_off_led = rts5228_turn_off_led,
+	.extra_init_hw = rts5228_extra_init_hw,
+	.enable_auto_blink = rts5228_enable_auto_blink,
+	.disable_auto_blink = rts5228_disable_auto_blink,
+	.card_power_on = rts5228_card_power_on,
+	.card_power_off = rts5228_card_power_off,
+	.switch_output_voltage = rts5228_switch_output_voltage,
+	.force_power_down = rts5228_force_power_down,
+	.stop_cmd = rts5228_stop_cmd,
+	.set_aspm = rts5228_set_aspm,
+	.set_l1off_cfg_sub_d0 = rts5228_set_l1off_cfg_sub_d0,
+	.enable_ocp = rts5228_enable_ocp,
+	.disable_ocp = rts5228_disable_ocp,
+	.init_ocp = rts5228_init_ocp,
+	.process_ocp = rts5228_process_ocp,
+	.clear_ocpstat = rts5228_clear_ocpstat,
+	.optimize_phy = rts5228_optimize_phy,
+};
+
+
+static inline u8 double_ssc_depth(u8 depth)
+{
+	return ((depth > 1) ? (depth - 1) : depth);
+}
+
+int rts5228_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk)
+{
+	int err, clk;
+	u16 n;
+	u8 clk_divider, mcu_cnt, div;
+	static const u8 depth[] = {
+		[RTSX_SSC_DEPTH_4M] = RTS5228_SSC_DEPTH_4M,
+		[RTSX_SSC_DEPTH_2M] = RTS5228_SSC_DEPTH_2M,
+		[RTSX_SSC_DEPTH_1M] = RTS5228_SSC_DEPTH_1M,
+		[RTSX_SSC_DEPTH_500K] = RTS5228_SSC_DEPTH_512K,
+	};
+
+	if (initial_mode) {
+		/* We use 250k(around) here, in initial stage */
+		clk_divider = SD_CLK_DIVIDE_128;
+		card_clock = 30000000;
+	} else {
+		clk_divider = SD_CLK_DIVIDE_0;
+	}
+	err = rtsx_pci_write_register(pcr, SD_CFG1,
+			SD_CLK_DIVIDE_MASK, clk_divider);
+	if (err < 0)
+		return err;
+
+	card_clock /= 1000000;
+	pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock);
+
+	clk = card_clock;
+	if (!initial_mode && double_clk)
+		clk = card_clock * 2;
+	pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n",
+		clk, pcr->cur_clock);
+
+	if (clk == pcr->cur_clock)
+		return 0;
+
+	if (pcr->ops->conv_clk_and_div_n)
+		n = pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N);
+	else
+		n = clk - 4;
+	if ((clk <= 4) || (n > 396))
+		return -EINVAL;
+
+	mcu_cnt = 125/clk + 3;
+	if (mcu_cnt > 15)
+		mcu_cnt = 15;
+
+	div = CLK_DIV_1;
+	while ((n < MIN_DIV_N_PCR - 4) && (div < CLK_DIV_8)) {
+		if (pcr->ops->conv_clk_and_div_n) {
+			int dbl_clk = pcr->ops->conv_clk_and_div_n(n,
+					DIV_N_TO_CLK) * 2;
+			n = pcr->ops->conv_clk_and_div_n(dbl_clk,
+					CLK_TO_DIV_N);
+		} else {
+			n = (n + 4) * 2 - 4;
+		}
+		div++;
+	}
+
+	n = (n / 2) - 1;
+	pcr_dbg(pcr, "n = %d, div = %d\n", n, div);
+
+	ssc_depth = depth[ssc_depth];
+	if (double_clk)
+		ssc_depth = double_ssc_depth(ssc_depth);
+
+	if (ssc_depth) {
+		if (div == CLK_DIV_2) {
+			if (ssc_depth > 1)
+				ssc_depth -= 1;
+			else
+				ssc_depth = RTS5228_SSC_DEPTH_8M;
+		} else if (div == CLK_DIV_4) {
+			if (ssc_depth > 2)
+				ssc_depth -= 2;
+			else
+				ssc_depth = RTS5228_SSC_DEPTH_8M;
+		} else if (div == CLK_DIV_8) {
+			if (ssc_depth > 3)
+				ssc_depth -= 3;
+			else
+				ssc_depth = RTS5228_SSC_DEPTH_8M;
+		}
+	} else {
+		ssc_depth = 0;
+	}
+	pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL,
+				CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV,
+			0xFF, (div << 4) | mcu_cnt);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2,
+			SSC_DEPTH_MASK, ssc_depth);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB);
+	if (vpclk) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 2000);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC clock stable */
+	udelay(SSC_CLOCK_STABLE_WAIT);
+	err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+	if (err < 0)
+		return err;
+
+	pcr->cur_clock = clk;
+	return 0;
+
+}
+
+void rts5228_init_params(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	struct rtsx_hw_param *hw_param = &pcr->hw_param;
+
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 1;
+	pcr->ops = &rts5228_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_REG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(28, 27, 11);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
+
+	pcr->ic_version = rts5228_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5228_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5228_sd_pull_ctl_disable_tbl;
+
+	pcr->reg_pm_ctrl3 = RTS5228_AUTOLOAD_CFG3;
+
+	option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN
+				| LTR_L1SS_PWR_GATE_EN);
+	option->ltr_en = true;
+
+	/* init latency of active, idle, L1OFF to 60us, 300us, 3ms */
+	option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF;
+	option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF;
+	option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF;
+	option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF;
+	option->ltr_l1off_sspwrgate = 0x7F;
+	option->ltr_l1off_snooze_sspwrgate = 0x78;
+
+	option->ocp_en = 1;
+	hw_param->interrupt_en |= SD_OC_INT_EN;
+	hw_param->ocp_glitch =  SD_OCP_GLITCH_800U;
+	option->sd_800mA_ocp_thd =  RTS5228_LDO1_OCP_THD_930;
+}
diff --git a/drivers/misc/cardreader/rts5228.h b/drivers/misc/cardreader/rts5228.h
new file mode 100644
index 0000000000..6a872246ae
--- /dev/null
+++ b/drivers/misc/cardreader/rts5228.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Ricky WU <ricky_wu@realtek.com>
+ *   Rui FENG <rui_feng@realsil.com.cn>
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+#ifndef RTS5228_H
+#define RTS5228_H
+
+
+#define RTS5228_AUTOLOAD_CFG0			0xFF7B
+#define RTS5228_AUTOLOAD_CFG1			0xFF7C
+#define RTS5228_AUTOLOAD_CFG2			0xFF7D
+#define RTS5228_AUTOLOAD_CFG3			0xFF7E
+#define RTS5228_AUTOLOAD_CFG4			0xFF7F
+
+#define RTS5228_REG_VREF			0xFE97
+#define RTS5228_PWD_SUSPND_EN			(1 << 4)
+
+#define RTS5228_PAD_H3L1			0xFF79
+#define PAD_GPIO_H3L1			(1 << 3)
+
+/* SSC_CTL2 0xFC12 */
+#define RTS5228_SSC_DEPTH_MASK		0x07
+#define RTS5228_SSC_DEPTH_DISALBE	0x00
+#define RTS5228_SSC_DEPTH_8M		0x01
+#define RTS5228_SSC_DEPTH_4M		0x02
+#define RTS5228_SSC_DEPTH_2M		0x03
+#define RTS5228_SSC_DEPTH_1M		0x04
+#define RTS5228_SSC_DEPTH_512K		0x05
+#define RTS5228_SSC_DEPTH_256K		0x06
+#define RTS5228_SSC_DEPTH_128K		0x07
+
+/* DMACTL 0xFE2C */
+#define RTS5228_DMA_PACK_SIZE_MASK		0xF0
+
+#define RTS5228_REG_LDO12_CFG			0xFF6E
+#define RTS5228_LDO12_VO_TUNE_MASK		(0x07<<1)
+#define RTS5228_LDO12_100				(0x00<<1)
+#define RTS5228_LDO12_105				(0x01<<1)
+#define RTS5228_LDO12_110				(0x02<<1)
+#define RTS5228_LDO12_115				(0x03<<1)
+#define RTS5228_LDO12_120				(0x04<<1)
+#define RTS5228_LDO12_125				(0x05<<1)
+#define RTS5228_LDO12_130				(0x06<<1)
+#define RTS5228_LDO12_135				(0x07<<1)
+#define RTS5228_REG_PWD_LDO12			(0x01<<0)
+
+#define RTS5228_REG_LDO12_L12	0xFF6F
+#define RTS5228_LDO12_L12_MASK			(0x07<<4)
+#define RTS5228_LDO12_L12_120			(0x04<<4)
+
+/* LDO control register */
+#define RTS5228_CARD_PWR_CTL			0xFD50
+#define RTS5228_PUPDC					(0x01<<5)
+
+#define RTS5228_LDO1233318_POW_CTL		0xFF70
+#define RTS5228_LDO3318_POWERON			(0x01<<3)
+#define RTS5228_LDO1_POWEROFF			(0x00<<0)
+#define RTS5228_LDO1_SOFTSTART			(0x01<<0)
+#define RTS5228_LDO1_FULLON				(0x03<<0)
+#define RTS5228_LDO1_POWERON_MASK		(0x03<<0)
+#define RTS5228_LDO_POWERON_MASK		(0x0F<<0)
+
+#define RTS5228_DV3318_CFG				0xFF71
+#define RTS5228_DV3318_TUNE_MASK		(0x07<<4)
+#define RTS5228_DV3318_17				(0x00<<4)
+#define RTS5228_DV3318_1V75				(0x01<<4)
+#define RTS5228_DV3318_18				(0x02<<4)
+#define RTS5228_DV3318_1V85				(0x03<<4)
+#define RTS5228_DV3318_19				(0x04<<4)
+#define RTS5228_DV3318_33				(0x07<<4)
+#define RTS5228_DV3318_SR_MASK			(0x03<<2)
+#define RTS5228_DV3318_SR_0				(0x00<<2)
+#define RTS5228_DV3318_SR_250			(0x01<<2)
+#define RTS5228_DV3318_SR_500			(0x02<<2)
+#define RTS5228_DV3318_SR_1000			(0x03<<2)
+
+#define RTS5228_LDO1_CFG0				0xFF72
+#define RTS5228_LDO1_OCP_THD_MASK		(0x07<<5)
+#define RTS5228_LDO1_OCP_EN				(0x01<<4)
+#define RTS5228_LDO1_OCP_LMT_THD_MASK	(0x03<<2)
+#define RTS5228_LDO1_OCP_LMT_EN			(0x01<<1)
+
+#define RTS5228_LDO1_OCP_THD_730		(0x00<<5)
+#define RTS5228_LDO1_OCP_THD_780		(0x01<<5)
+#define RTS5228_LDO1_OCP_THD_860		(0x02<<5)
+#define RTS5228_LDO1_OCP_THD_930		(0x03<<5)
+#define RTS5228_LDO1_OCP_THD_1000		(0x04<<5)
+#define RTS5228_LDO1_OCP_THD_1070		(0x05<<5)
+#define RTS5228_LDO1_OCP_THD_1140		(0x06<<5)
+#define RTS5228_LDO1_OCP_THD_1220		(0x07<<5)
+
+#define RTS5228_LDO1_LMT_THD_450		(0x00<<2)
+#define RTS5228_LDO1_LMT_THD_1000		(0x01<<2)
+#define RTS5228_LDO1_LMT_THD_1500		(0x02<<2)
+#define RTS5228_LDO1_LMT_THD_2000		(0x03<<2)
+
+#define RTS5228_LDO1_CFG1				0xFF73
+#define RTS5228_LDO1_SR_TIME_MASK		(0x03<<6)
+#define RTS5228_LDO1_SR_0_0				(0x00<<6)
+#define RTS5228_LDO1_SR_0_25			(0x01<<6)
+#define RTS5228_LDO1_SR_0_5				(0x02<<6)
+#define RTS5228_LDO1_SR_1_0				(0x03<<6)
+#define RTS5228_LDO1_TUNE_MASK			(0x07<<1)
+#define RTS5228_LDO1_18					(0x05<<1)
+#define RTS5228_LDO1_33					(0x07<<1)
+#define RTS5228_LDO1_PWD_MASK			(0x01<<0)
+
+#define RTS5228_AUXCLK_GAT_CTL			0xFF74
+
+#define RTS5228_REG_RREF_CTL_0			0xFF75
+#define RTS5228_FORCE_RREF_EXTL			(0x01<<7)
+#define RTS5228_REG_BG33_MASK			(0x07<<0)
+#define RTS5228_RREF_12_1V				(0x04<<0)
+#define RTS5228_RREF_12_3V				(0x05<<0)
+
+#define RTS5228_REG_RREF_CTL_1			0xFF76
+
+#define RTS5228_REG_RREF_CTL_2			0xFF77
+#define RTS5228_TEST_INTL_RREF			(0x01<<7)
+#define RTS5228_DGLCH_TIME_MASK			(0x03<<5)
+#define RTS5228_DGLCH_TIME_50			(0x00<<5)
+#define RTS5228_DGLCH_TIME_75			(0x01<<5)
+#define RTS5228_DGLCH_TIME_100			(0x02<<5)
+#define RTS5228_DGLCH_TIME_125			(0x03<<5)
+#define RTS5228_REG_REXT_TUNE_MASK		(0x1F<<0)
+
+#define RTS5228_REG_PME_FORCE_CTL		0xFF78
+#define FORCE_PM_CONTROL		0x20
+#define FORCE_PM_VALUE			0x10
+
+
+/* Single LUN, support SD */
+#define DEFAULT_SINGLE		0
+#define SD_LUN				1
+
+
+/* For Change_FPGA_SSCClock Function */
+#define MULTIPLY_BY_1    0x00
+#define MULTIPLY_BY_2    0x01
+#define MULTIPLY_BY_3    0x02
+#define MULTIPLY_BY_4    0x03
+#define MULTIPLY_BY_5    0x04
+#define MULTIPLY_BY_6    0x05
+#define MULTIPLY_BY_7    0x06
+#define MULTIPLY_BY_8    0x07
+#define MULTIPLY_BY_9    0x08
+#define MULTIPLY_BY_10   0x09
+
+#define DIVIDE_BY_2      0x01
+#define DIVIDE_BY_3      0x02
+#define DIVIDE_BY_4      0x03
+#define DIVIDE_BY_5      0x04
+#define DIVIDE_BY_6      0x05
+#define DIVIDE_BY_7      0x06
+#define DIVIDE_BY_8      0x07
+#define DIVIDE_BY_9      0x08
+#define DIVIDE_BY_10     0x09
+
+int rts5228_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk);
+
+#endif /* RTS5228_H */
diff --git a/drivers/misc/cardreader/rts5229.c b/drivers/misc/cardreader/rts5229.c
new file mode 100644
index 0000000000..b0edd8006d
--- /dev/null
+++ b/drivers/misc/cardreader/rts5229.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5229_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
+static void rts5229_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	struct pci_dev *pdev = pcr->pci;
+	u32 reg;
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg))
+		return;
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 =
+		map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg));
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg);
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+	pcr->sd30_drive_sel_3v3 =
+		map_sd_drive(rtsx_reg_to_sd30_drive_sel_3v3(reg));
+}
+
+static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
+{
+	rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03);
+}
+
+static int rts5229_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Force CLKREQ# PIN to drive 0 to request clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+	/* Configure driving */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0xFF, pcr->sd30_drive_sel_3v3);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5229_optimize_phy(struct rtsx_pcr *pcr)
+{
+	/* Optimize RX sensitivity */
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42);
+}
+
+static int rts5229_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rts5229_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rts5229_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rts5229_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rts5229_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_PARTIAL_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK | PMOS_STRG_MASK,
+			SD_POWER_OFF | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5229_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_register(pcr,
+				SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct pcr_ops rts5229_pcr_ops = {
+	.fetch_vendor_settings = rts5229_fetch_vendor_settings,
+	.extra_init_hw = rts5229_extra_init_hw,
+	.optimize_phy = rts5229_optimize_phy,
+	.turn_on_led = rts5229_turn_on_led,
+	.turn_off_led = rts5229_turn_off_led,
+	.enable_auto_blink = rts5229_enable_auto_blink,
+	.disable_auto_blink = rts5229_disable_auto_blink,
+	.card_power_on = rts5229_card_power_on,
+	.card_power_off = rts5229_card_power_off,
+	.switch_output_voltage = rts5229_switch_output_voltage,
+	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
+	.force_power_down = rts5229_force_power_down,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5229_sd_pull_ctl_enable_tbl1[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* For RTS5229 version C */
+static const u32 rts5229_sd_pull_ctl_enable_tbl2[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5229_sd_pull_ctl_disable_tbl1[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* For RTS5229 version C */
+static const u32 rts5229_sd_pull_ctl_disable_tbl2[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5229_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5229_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5229_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5229_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_CFG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 6, 6);
+
+	pcr->ic_version = rts5229_get_ic_version(pcr);
+	if (pcr->ic_version == IC_VER_C) {
+		pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl2;
+		pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl2;
+	} else {
+		pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl1;
+		pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl1;
+	}
+	pcr->ms_pull_ctl_enable_tbl = rts5229_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5229_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c
new file mode 100644
index 0000000000..6c81040e18
--- /dev/null
+++ b/drivers/misc/cardreader/rts5249.c
@@ -0,0 +1,848 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5249_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
+static void rts5249_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 driving_3v3[4][3] = {
+		{0x11, 0x11, 0x18},
+		{0x55, 0x55, 0x5C},
+		{0xFF, 0xFF, 0xFF},
+		{0x96, 0x96, 0x96},
+	};
+	u8 driving_1v8[4][3] = {
+		{0xC4, 0xC4, 0xC4},
+		{0x3C, 0x3C, 0x3C},
+		{0xFE, 0xFE, 0xFE},
+		{0xB3, 0xB3, 0xB3},
+	};
+	u8 (*driving)[3], drive_sel;
+
+	if (voltage == OUTPUT_3V3) {
+		driving = driving_3v3;
+		drive_sel = pcr->sd30_drive_sel_3v3;
+	} else {
+		driving = driving_1v8;
+		drive_sel = pcr->sd30_drive_sel_1v8;
+	}
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL,
+			0xFF, driving[drive_sel][0]);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL,
+			0xFF, driving[drive_sel][1]);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL,
+			0xFF, driving[drive_sel][2]);
+}
+
+static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	struct pci_dev *pdev = pcr->pci;
+	u32 reg;
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg)) {
+		pcr_dbg(pcr, "skip fetch vendor setting\n");
+		return;
+	}
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg);
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg);
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+
+	if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A))
+		pcr->rtd3_en = rtsx_reg_to_rtd3_uhsii(reg);
+
+	if (rtsx_check_mmc_support(reg))
+		pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
+	pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg);
+	if (rtsx_reg_check_reverse_socket(reg))
+		pcr->flags |= PCR_REVERSE_SOCKET;
+}
+
+static void rts5249_init_from_cfg(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &(pcr->option);
+
+	if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) {
+		if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+				| PM_L1_1_EN | PM_L1_2_EN))
+			rtsx_pci_disable_oobs_polling(pcr);
+		else
+			rtsx_pci_enable_oobs_polling(pcr);
+	}
+
+	if (option->ltr_en) {
+		if (option->ltr_enabled)
+			rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+	}
+}
+
+static void rts52xa_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
+{
+	/* Set relink_time to 0 */
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
+				RELINK_TIME_MASK, 0);
+
+	rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3,
+			D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
+
+	if (!runtime) {
+		rtsx_pci_write_register(pcr, RTS524A_AUTOLOAD_CFG1,
+				CD_RESUME_EN_MASK, 0);
+		rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, 0x01, 0x00);
+		rtsx_pci_write_register(pcr, RTS524A_PME_FORCE_CTL, 0x30, 0x20);
+	}
+
+	rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN);
+}
+
+static void rts52xa_save_content_from_efuse(struct rtsx_pcr *pcr)
+{
+	u8 cnt, sv;
+	u16 j = 0;
+	u8 tmp;
+	u8 val;
+	int i;
+
+	rtsx_pci_write_register(pcr, RTS524A_PME_FORCE_CTL,
+				REG_EFUSE_BYPASS | REG_EFUSE_POR, REG_EFUSE_POR);
+	udelay(1);
+
+	pcr_dbg(pcr, "Enable efuse por!");
+	pcr_dbg(pcr, "save efuse to autoload");
+
+	rtsx_pci_write_register(pcr, RTS525A_EFUSE_ADD, REG_EFUSE_ADD_MASK, 0x00);
+	rtsx_pci_write_register(pcr, RTS525A_EFUSE_CTL,
+				REG_EFUSE_ENABLE | REG_EFUSE_MODE, REG_EFUSE_ENABLE);
+	/* Wait transfer end */
+	for (j = 0; j < 1024; j++) {
+		rtsx_pci_read_register(pcr, RTS525A_EFUSE_CTL, &tmp);
+		if ((tmp & 0x80) == 0)
+			break;
+	}
+	rtsx_pci_read_register(pcr, RTS525A_EFUSE_DATA, &val);
+	cnt = val & 0x0F;
+	sv = val & 0x10;
+
+	if (sv) {
+		for (i = 0; i < 4; i++) {
+			rtsx_pci_write_register(pcr, RTS525A_EFUSE_ADD,
+				REG_EFUSE_ADD_MASK, 0x04 + i);
+			rtsx_pci_write_register(pcr, RTS525A_EFUSE_CTL,
+				REG_EFUSE_ENABLE | REG_EFUSE_MODE, REG_EFUSE_ENABLE);
+			/* Wait transfer end */
+			for (j = 0; j < 1024; j++) {
+				rtsx_pci_read_register(pcr, RTS525A_EFUSE_CTL, &tmp);
+				if ((tmp & 0x80) == 0)
+					break;
+			}
+			rtsx_pci_read_register(pcr, RTS525A_EFUSE_DATA, &val);
+			rtsx_pci_write_register(pcr, 0xFF04 + i, 0xFF, val);
+		}
+	} else {
+		rtsx_pci_write_register(pcr, 0xFF04, 0xFF, (u8)PCI_VID(pcr));
+		rtsx_pci_write_register(pcr, 0xFF05, 0xFF, (u8)(PCI_VID(pcr) >> 8));
+		rtsx_pci_write_register(pcr, 0xFF06, 0xFF, (u8)PCI_PID(pcr));
+		rtsx_pci_write_register(pcr, 0xFF07, 0xFF, (u8)(PCI_PID(pcr) >> 8));
+	}
+
+	for (i = 0; i < cnt * 4; i++) {
+		if (sv)
+			rtsx_pci_write_register(pcr, RTS525A_EFUSE_ADD,
+				REG_EFUSE_ADD_MASK, 0x08 + i);
+		else
+			rtsx_pci_write_register(pcr, RTS525A_EFUSE_ADD,
+				REG_EFUSE_ADD_MASK, 0x04 + i);
+		rtsx_pci_write_register(pcr, RTS525A_EFUSE_CTL,
+				REG_EFUSE_ENABLE | REG_EFUSE_MODE, REG_EFUSE_ENABLE);
+		/* Wait transfer end */
+		for (j = 0; j < 1024; j++) {
+			rtsx_pci_read_register(pcr, RTS525A_EFUSE_CTL, &tmp);
+			if ((tmp & 0x80) == 0)
+				break;
+		}
+		rtsx_pci_read_register(pcr, RTS525A_EFUSE_DATA, &val);
+		rtsx_pci_write_register(pcr, 0xFF08 + i, 0xFF, val);
+	}
+	rtsx_pci_write_register(pcr, 0xFF00, 0xFF, (cnt & 0x7F) | 0x80);
+	rtsx_pci_write_register(pcr, RTS524A_PME_FORCE_CTL,
+		REG_EFUSE_BYPASS | REG_EFUSE_POR, REG_EFUSE_BYPASS);
+	pcr_dbg(pcr, "Disable efuse por!");
+}
+
+static void rts52xa_save_content_to_autoload_space(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, RESET_LOAD_REG, &val);
+	if (val & 0x02) {
+		rtsx_pci_read_register(pcr, RTS525A_BIOS_CFG, &val);
+		if (val & RTS525A_LOAD_BIOS_FLAG) {
+			rtsx_pci_write_register(pcr, RTS525A_BIOS_CFG,
+				RTS525A_LOAD_BIOS_FLAG, RTS525A_CLEAR_BIOS_FLAG);
+
+			rtsx_pci_write_register(pcr, RTS524A_PME_FORCE_CTL,
+				REG_EFUSE_POWER_MASK, REG_EFUSE_POWERON);
+
+			pcr_dbg(pcr, "Power ON efuse!");
+			mdelay(1);
+			rts52xa_save_content_from_efuse(pcr);
+		} else {
+			rtsx_pci_read_register(pcr, RTS524A_PME_FORCE_CTL, &val);
+			if (!(val & 0x08))
+				rts52xa_save_content_from_efuse(pcr);
+		}
+	} else {
+		pcr_dbg(pcr, "Load from autoload");
+		rtsx_pci_write_register(pcr, 0xFF00, 0xFF, 0x80);
+		rtsx_pci_write_register(pcr, 0xFF04, 0xFF, (u8)PCI_VID(pcr));
+		rtsx_pci_write_register(pcr, 0xFF05, 0xFF, (u8)(PCI_VID(pcr) >> 8));
+		rtsx_pci_write_register(pcr, 0xFF06, 0xFF, (u8)PCI_PID(pcr));
+		rtsx_pci_write_register(pcr, 0xFF07, 0xFF, (u8)(PCI_PID(pcr) >> 8));
+	}
+}
+
+static int rts5249_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &(pcr->option);
+
+	rts5249_init_from_cfg(pcr);
+
+	rtsx_pci_init_cmd(pcr);
+
+	if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A))
+		rts52xa_save_content_to_autoload_space(pcr);
+
+	/* Rest L1SUB Config */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG3, 0xFF, 0x00);
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+	/* Configure driving */
+	rts5249_fill_driving(pcr, OUTPUT_3V3);
+	if (pcr->flags & PCR_REVERSE_SOCKET)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80);
+
+	rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF);
+
+	if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) {
+		rtsx_pci_write_register(pcr, REG_VREF, PWD_SUSPND_EN, PWD_SUSPND_EN);
+		rtsx_pci_write_register(pcr, RTS524A_AUTOLOAD_CFG1,
+			CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
+	}
+
+	if (pcr->rtd3_en) {
+		if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) {
+			rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, 0x01, 0x01);
+			rtsx_pci_write_register(pcr, RTS524A_PME_FORCE_CTL, 0x30, 0x30);
+		} else {
+			rtsx_pci_write_register(pcr, PM_CTRL3, 0x01, 0x01);
+			rtsx_pci_write_register(pcr, PME_FORCE_CTL, 0xFF, 0x33);
+		}
+	} else {
+		if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) {
+			rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, 0x01, 0x00);
+			rtsx_pci_write_register(pcr, RTS524A_PME_FORCE_CTL, 0x30, 0x20);
+		} else {
+			rtsx_pci_write_register(pcr, PME_FORCE_CTL, 0xFF, 0x30);
+			rtsx_pci_write_register(pcr, PM_CTRL3, 0x01, 0x00);
+		}
+	}
+
+
+	/*
+	 * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+	 * to drive low, and we forcibly request clock.
+	 */
+	if (option->force_clkreq_0)
+		rtsx_pci_write_register(pcr, PETXCFG,
+			FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+	else
+		rtsx_pci_write_register(pcr, PETXCFG,
+			FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+
+	rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
+	if (CHK_PCI_PID(pcr, PID_524A) || CHK_PCI_PID(pcr, PID_525A)) {
+		rtsx_pci_write_register(pcr, RTS524A_PME_FORCE_CTL,
+				REG_EFUSE_POWER_MASK, REG_EFUSE_POWEROFF);
+		pcr_dbg(pcr, "Power OFF efuse!");
+	}
+
+	return 0;
+}
+
+static int rts5249_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00);
+	if (err < 0)
+		return err;
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_REV,
+			PHY_REV_RESV | PHY_REV_RXIDLE_LATCHED |
+			PHY_REV_P1_EN | PHY_REV_RXIDLE_EN |
+			PHY_REV_CLKREQ_TX_EN | PHY_REV_RX_PWST |
+			PHY_REV_CLKREQ_DT_1_0 | PHY_REV_STOP_CLKRD |
+			PHY_REV_STOP_CLKWR);
+	if (err < 0)
+		return err;
+
+	msleep(1);
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_BPCR,
+			PHY_BPCR_IBRXSEL | PHY_BPCR_IBTXSEL |
+			PHY_BPCR_IB_FILTER | PHY_BPCR_CMIRROR_EN);
+	if (err < 0)
+		return err;
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_PCR,
+			PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 |
+			PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 |
+			PHY_PCR_RSSI_EN | PHY_PCR_RX10K);
+	if (err < 0)
+		return err;
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_RCR2,
+			PHY_RCR2_EMPHASE_EN | PHY_RCR2_NADJR |
+			PHY_RCR2_CDR_SR_2 | PHY_RCR2_FREQSEL_12 |
+			PHY_RCR2_CDR_SC_12P | PHY_RCR2_CALIB_LATE);
+	if (err < 0)
+		return err;
+
+	err = rtsx_pci_write_phy_register(pcr, PHY_FLD4,
+			PHY_FLD4_FLDEN_SEL | PHY_FLD4_REQ_REF |
+			PHY_FLD4_RXAMP_OFF | PHY_FLD4_REQ_ADDA |
+			PHY_FLD4_BER_COUNT | PHY_FLD4_BER_TIMER |
+			PHY_FLD4_BER_CHK_EN);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_RDR,
+			PHY_RDR_RXDSEL_1_9 | PHY_SSC_AUTO_PWD);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_RCR1,
+			PHY_RCR1_ADP_TIME_4 | PHY_RCR1_VCO_COARSE);
+	if (err < 0)
+		return err;
+	err = rtsx_pci_write_phy_register(pcr, PHY_FLD3,
+			PHY_FLD3_TIMER_4 | PHY_FLD3_TIMER_6 |
+			PHY_FLD3_RXDELINK);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_phy_register(pcr, PHY_TUNE,
+			PHY_TUNE_TUNEREF_1_0 | PHY_TUNE_VBGSEL_1252 |
+			PHY_TUNE_SDBUS_33 | PHY_TUNE_TUNED18 |
+			PHY_TUNE_TUNED12 | PHY_TUNE_TUNEA12);
+}
+
+static int rtsx_base_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rtsx_base_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rtsx_base_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rtsx_base_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rtsx_base_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en)
+		rtsx_pci_enable_ocp(pcr);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_VCC_PARTIAL_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	msleep(5);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_VCC_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rtsx_base_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en)
+		rtsx_pci_disable_ocp(pcr);
+
+	rtsx_pci_write_register(pcr, CARD_PWR_CTL, SD_POWER_MASK, SD_POWER_OFF);
+
+	rtsx_pci_write_register(pcr, PWR_GATE_CTRL, LDO3318_PWR_MASK, 0x00);
+	return 0;
+}
+
+static int rtsx_base_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+	u16 append;
+
+	switch (voltage) {
+	case OUTPUT_3V3:
+		err = rtsx_pci_update_phy(pcr, PHY_TUNE, PHY_TUNE_VOLTAGE_MASK,
+			PHY_TUNE_VOLTAGE_3V3);
+		if (err < 0)
+			return err;
+		break;
+	case OUTPUT_1V8:
+		append = PHY_TUNE_D18_1V8;
+		if (CHK_PCI_PID(pcr, 0x5249)) {
+			err = rtsx_pci_update_phy(pcr, PHY_BACR,
+				PHY_BACR_BASIC_MASK, 0);
+			if (err < 0)
+				return err;
+			append = PHY_TUNE_D18_1V7;
+		}
+
+		err = rtsx_pci_update_phy(pcr, PHY_TUNE, PHY_TUNE_VOLTAGE_MASK,
+			append);
+		if (err < 0)
+			return err;
+		break;
+	default:
+		pcr_dbg(pcr, "unknown output voltage %d\n", voltage);
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rtsx_pci_init_cmd(pcr);
+	rts5249_fill_driving(pcr, voltage);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5249_pcr_ops = {
+	.fetch_vendor_settings = rtsx_base_fetch_vendor_settings,
+	.extra_init_hw = rts5249_extra_init_hw,
+	.optimize_phy = rts5249_optimize_phy,
+	.turn_on_led = rtsx_base_turn_on_led,
+	.turn_off_led = rtsx_base_turn_off_led,
+	.enable_auto_blink = rtsx_base_enable_auto_blink,
+	.disable_auto_blink = rtsx_base_disable_auto_blink,
+	.card_power_on = rtsx_base_card_power_on,
+	.card_power_off = rtsx_base_card_power_off,
+	.switch_output_voltage = rtsx_base_switch_output_voltage,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5249_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5249_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5249_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5249_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5249_init_params(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &(pcr->option);
+
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5249_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_CFG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
+
+	pcr->ic_version = rts5249_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5249_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5249_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5249_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5249_ms_pull_ctl_disable_tbl;
+
+	pcr->reg_pm_ctrl3 = PM_CTRL3;
+
+	option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN
+				| LTR_L1SS_PWR_GATE_EN);
+	option->ltr_en = true;
+
+	/* Init latency of active, idle, L1OFF to 60us, 300us, 3ms */
+	option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF;
+	option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF;
+	option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF;
+	option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF;
+	option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5249_DEF;
+	option->ltr_l1off_snooze_sspwrgate =
+		LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF;
+}
+
+static int rts524a_write_phy(struct rtsx_pcr *pcr, u8 addr, u16 val)
+{
+	addr = addr & 0x80 ? (addr & 0x7F) | 0x40 : addr;
+
+	return __rtsx_pci_write_phy_register(pcr, addr, val);
+}
+
+static int rts524a_read_phy(struct rtsx_pcr *pcr, u8 addr, u16 *val)
+{
+	addr = addr & 0x80 ? (addr & 0x7F) | 0x40 : addr;
+
+	return __rtsx_pci_read_phy_register(pcr, addr, val);
+}
+
+static int rts524a_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3,
+		D3_DELINK_MODE_EN, 0x00);
+	if (err < 0)
+		return err;
+
+	rtsx_pci_write_phy_register(pcr, PHY_PCR,
+		PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 |
+		PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 | PHY_PCR_RSSI_EN);
+	rtsx_pci_write_phy_register(pcr, PHY_SSCCR3,
+		PHY_SSCCR3_STEP_IN | PHY_SSCCR3_CHECK_DELAY);
+
+	if (is_version(pcr, 0x524A, IC_VER_A)) {
+		rtsx_pci_write_phy_register(pcr, PHY_SSCCR3,
+			PHY_SSCCR3_STEP_IN | PHY_SSCCR3_CHECK_DELAY);
+		rtsx_pci_write_phy_register(pcr, PHY_SSCCR2,
+			PHY_SSCCR2_PLL_NCODE | PHY_SSCCR2_TIME0 |
+			PHY_SSCCR2_TIME2_WIDTH);
+		rtsx_pci_write_phy_register(pcr, PHY_ANA1A,
+			PHY_ANA1A_TXR_LOOPBACK | PHY_ANA1A_RXT_BIST |
+			PHY_ANA1A_TXR_BIST | PHY_ANA1A_REV);
+		rtsx_pci_write_phy_register(pcr, PHY_ANA1D,
+			PHY_ANA1D_DEBUG_ADDR);
+		rtsx_pci_write_phy_register(pcr, PHY_DIG1E,
+			PHY_DIG1E_REV | PHY_DIG1E_D0_X_D1 |
+			PHY_DIG1E_RX_ON_HOST | PHY_DIG1E_RCLK_REF_HOST |
+			PHY_DIG1E_RCLK_TX_EN_KEEP |
+			PHY_DIG1E_RCLK_TX_TERM_KEEP |
+			PHY_DIG1E_RCLK_RX_EIDLE_ON | PHY_DIG1E_TX_TERM_KEEP |
+			PHY_DIG1E_RX_TERM_KEEP | PHY_DIG1E_TX_EN_KEEP |
+			PHY_DIG1E_RX_EN_KEEP);
+	}
+
+	rtsx_pci_write_phy_register(pcr, PHY_ANA08,
+		PHY_ANA08_RX_EQ_DCGAIN | PHY_ANA08_SEL_RX_EN |
+		PHY_ANA08_RX_EQ_VAL | PHY_ANA08_SCP | PHY_ANA08_SEL_IPI);
+
+	return 0;
+}
+
+static int rts524a_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rts5249_extra_init_hw(pcr);
+
+	rtsx_pci_write_register(pcr, FUNC_FORCE_CTL,
+		FORCE_ASPM_L1_EN, FORCE_ASPM_L1_EN);
+	rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0);
+	rtsx_pci_write_register(pcr, LDO_VCC_CFG1, LDO_VCC_LMT_EN,
+		LDO_VCC_LMT_EN);
+	rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL);
+	if (is_version(pcr, 0x524A, IC_VER_A)) {
+		rtsx_pci_write_register(pcr, LDO_DV18_CFG,
+			LDO_DV18_SR_MASK, LDO_DV18_SR_DF);
+		rtsx_pci_write_register(pcr, LDO_VCC_CFG1,
+			LDO_VCC_REF_TUNE_MASK, LDO_VCC_REF_1V2);
+		rtsx_pci_write_register(pcr, LDO_VIO_CFG,
+			LDO_VIO_REF_TUNE_MASK, LDO_VIO_REF_1V2);
+		rtsx_pci_write_register(pcr, LDO_VIO_CFG,
+			LDO_VIO_SR_MASK, LDO_VIO_SR_DF);
+		rtsx_pci_write_register(pcr, LDO_DV12S_CFG,
+			LDO_REF12_TUNE_MASK, LDO_REF12_TUNE_DF);
+		rtsx_pci_write_register(pcr, SD40_LDO_CTL1,
+			SD40_VIO_TUNE_MASK, SD40_VIO_TUNE_1V7);
+	}
+
+	return 0;
+}
+
+static void rts5250_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
+{
+	struct rtsx_cr_option *option = &(pcr->option);
+
+	u32 interrupt = rtsx_pci_readl(pcr, RTSX_BIPR);
+	int card_exist = (interrupt & SD_EXIST) | (interrupt & MS_EXIST);
+	int aspm_L1_1, aspm_L1_2;
+	u8 val = 0;
+
+	aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN);
+	aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (active) {
+		/* Run, latency: 60us */
+		if (aspm_L1_1)
+			val = option->ltr_l1off_snooze_sspwrgate;
+	} else {
+		/* L1off, latency: 300us */
+		if (aspm_L1_2)
+			val = option->ltr_l1off_sspwrgate;
+	}
+
+	if (aspm_L1_1 || aspm_L1_2) {
+		if (rtsx_check_dev_flag(pcr,
+					LTR_L1SS_PWR_GATE_CHECK_CARD_EN)) {
+			if (card_exist)
+				val &= ~L1OFF_MBIAS2_EN_5250;
+			else
+				val |= L1OFF_MBIAS2_EN_5250;
+		}
+	}
+	rtsx_set_l1off_sub(pcr, val);
+}
+
+static const struct pcr_ops rts524a_pcr_ops = {
+	.write_phy = rts524a_write_phy,
+	.read_phy = rts524a_read_phy,
+	.fetch_vendor_settings = rtsx_base_fetch_vendor_settings,
+	.extra_init_hw = rts524a_extra_init_hw,
+	.optimize_phy = rts524a_optimize_phy,
+	.turn_on_led = rtsx_base_turn_on_led,
+	.turn_off_led = rtsx_base_turn_off_led,
+	.enable_auto_blink = rtsx_base_enable_auto_blink,
+	.disable_auto_blink = rtsx_base_disable_auto_blink,
+	.card_power_on = rtsx_base_card_power_on,
+	.card_power_off = rtsx_base_card_power_off,
+	.switch_output_voltage = rtsx_base_switch_output_voltage,
+	.force_power_down = rts52xa_force_power_down,
+	.set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0,
+};
+
+void rts524a_init_params(struct rtsx_pcr *pcr)
+{
+	rts5249_init_params(pcr);
+	pcr->aspm_mode = ASPM_MODE_REG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
+	pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
+	pcr->option.ltr_l1off_snooze_sspwrgate =
+		LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
+
+	pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3;
+	pcr->ops = &rts524a_pcr_ops;
+
+	pcr->option.ocp_en = 1;
+	if (pcr->option.ocp_en)
+		pcr->hw_param.interrupt_en |= SD_OC_INT_EN;
+	pcr->hw_param.ocp_glitch = SD_OCP_GLITCH_10M;
+	pcr->option.sd_800mA_ocp_thd = RTS524A_OCP_THD_800;
+
+}
+
+static int rts525a_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_write_register(pcr, LDO_VCC_CFG1,
+		LDO_VCC_TUNE_MASK, LDO_VCC_3V3);
+	return rtsx_base_card_power_on(pcr, card);
+}
+
+static int rts525a_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	switch (voltage) {
+	case OUTPUT_3V3:
+		rtsx_pci_write_register(pcr, LDO_CONFIG2,
+			LDO_D3318_MASK, LDO_D3318_33V);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, 0);
+		break;
+	case OUTPUT_1V8:
+		rtsx_pci_write_register(pcr, LDO_CONFIG2,
+			LDO_D3318_MASK, LDO_D3318_18V);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8,
+			SD_IO_USING_1V8);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rts5249_fill_driving(pcr, voltage);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts525a_optimize_phy(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3,
+		D3_DELINK_MODE_EN, 0x00);
+	if (err < 0)
+		return err;
+
+	rtsx_pci_write_phy_register(pcr, _PHY_FLD0,
+		_PHY_FLD0_CLK_REQ_20C | _PHY_FLD0_RX_IDLE_EN |
+		_PHY_FLD0_BIT_ERR_RSTN | _PHY_FLD0_BER_COUNT |
+		_PHY_FLD0_BER_TIMER | _PHY_FLD0_CHECK_EN);
+
+	rtsx_pci_write_phy_register(pcr, _PHY_ANA03,
+		_PHY_ANA03_TIMER_MAX | _PHY_ANA03_OOBS_DEB_EN |
+		_PHY_CMU_DEBUG_EN);
+
+	if (is_version(pcr, 0x525A, IC_VER_A))
+		rtsx_pci_write_phy_register(pcr, _PHY_REV0,
+			_PHY_REV0_FILTER_OUT | _PHY_REV0_CDR_BYPASS_PFD |
+			_PHY_REV0_CDR_RX_IDLE_BYPASS);
+
+	return 0;
+}
+
+static int rts525a_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rts5249_extra_init_hw(pcr);
+
+	rtsx_pci_write_register(pcr, RTS5250_CLK_CFG3, RTS525A_CFG_MEM_PD, RTS525A_CFG_MEM_PD);
+
+	rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL);
+	if (is_version(pcr, 0x525A, IC_VER_A)) {
+		rtsx_pci_write_register(pcr, L1SUB_CONFIG2,
+			L1SUB_AUTO_CFG, L1SUB_AUTO_CFG);
+		rtsx_pci_write_register(pcr, RREF_CFG,
+			RREF_VBGSEL_MASK, RREF_VBGSEL_1V25);
+		rtsx_pci_write_register(pcr, LDO_VIO_CFG,
+			LDO_VIO_TUNE_MASK, LDO_VIO_1V7);
+		rtsx_pci_write_register(pcr, LDO_DV12S_CFG,
+			LDO_D12_TUNE_MASK, LDO_D12_TUNE_DF);
+		rtsx_pci_write_register(pcr, LDO_AV12S_CFG,
+			LDO_AV12S_TUNE_MASK, LDO_AV12S_TUNE_DF);
+		rtsx_pci_write_register(pcr, LDO_VCC_CFG0,
+			LDO_VCC_LMTVTH_MASK, LDO_VCC_LMTVTH_2A);
+		rtsx_pci_write_register(pcr, OOBS_CONFIG,
+			OOBS_AUTOK_DIS | OOBS_VAL_MASK, 0x89);
+	}
+
+	return 0;
+}
+
+static const struct pcr_ops rts525a_pcr_ops = {
+	.fetch_vendor_settings = rtsx_base_fetch_vendor_settings,
+	.extra_init_hw = rts525a_extra_init_hw,
+	.optimize_phy = rts525a_optimize_phy,
+	.turn_on_led = rtsx_base_turn_on_led,
+	.turn_off_led = rtsx_base_turn_off_led,
+	.enable_auto_blink = rtsx_base_enable_auto_blink,
+	.disable_auto_blink = rtsx_base_disable_auto_blink,
+	.card_power_on = rts525a_card_power_on,
+	.card_power_off = rtsx_base_card_power_off,
+	.switch_output_voltage = rts525a_switch_output_voltage,
+	.force_power_down = rts52xa_force_power_down,
+	.set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0,
+};
+
+void rts525a_init_params(struct rtsx_pcr *pcr)
+{
+	rts5249_init_params(pcr);
+	pcr->aspm_mode = ASPM_MODE_REG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(25, 29, 11);
+	pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
+	pcr->option.ltr_l1off_snooze_sspwrgate =
+		LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
+
+	pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3;
+	pcr->ops = &rts525a_pcr_ops;
+
+	pcr->option.ocp_en = 1;
+	if (pcr->option.ocp_en)
+		pcr->hw_param.interrupt_en |= SD_OC_INT_EN;
+	pcr->hw_param.ocp_glitch = SD_OCP_GLITCH_10M;
+	pcr->option.sd_800mA_ocp_thd = RTS525A_OCP_THD_800;
+}
diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c
new file mode 100644
index 0000000000..d2d3a6ccb8
--- /dev/null
+++ b/drivers/misc/cardreader/rts5260.c
@@ -0,0 +1,629 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2016-2017 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Steven FENG <steven_feng@realsil.com.cn>
+ *   Rui FENG <rui_feng@realsil.com.cn>
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rts5260.h"
+#include "rtsx_pcr.h"
+
+static u8 rts5260_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & IC_VERSION_MASK;
+}
+
+static void rts5260_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 driving_3v3[4][3] = {
+		{0x11, 0x11, 0x11},
+		{0x22, 0x22, 0x22},
+		{0x55, 0x55, 0x55},
+		{0x33, 0x33, 0x33},
+	};
+	u8 driving_1v8[4][3] = {
+		{0x35, 0x33, 0x33},
+		{0x8A, 0x88, 0x88},
+		{0xBD, 0xBB, 0xBB},
+		{0x9B, 0x99, 0x99},
+	};
+	u8 (*driving)[3], drive_sel;
+
+	if (voltage == OUTPUT_3V3) {
+		driving = driving_3v3;
+		drive_sel = pcr->sd30_drive_sel_3v3;
+	} else {
+		driving = driving_1v8;
+		drive_sel = pcr->sd30_drive_sel_1v8;
+	}
+
+	rtsx_pci_write_register(pcr, SD30_CLK_DRIVE_SEL,
+			 0xFF, driving[drive_sel][0]);
+
+	rtsx_pci_write_register(pcr, SD30_CMD_DRIVE_SEL,
+			 0xFF, driving[drive_sel][1]);
+
+	rtsx_pci_write_register(pcr, SD30_DAT_DRIVE_SEL,
+			 0xFF, driving[drive_sel][2]);
+}
+
+static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	struct pci_dev *pdev = pcr->pci;
+	u32 reg;
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	if (!rtsx_vendor_setting_valid(reg)) {
+		pcr_dbg(pcr, "skip fetch vendor setting\n");
+		return;
+	}
+
+	pcr->aspm_en = rtsx_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg);
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg);
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+	if (rtsx_check_mmc_support(reg))
+		pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
+	pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg);
+	if (rtsx_reg_check_reverse_socket(reg))
+		pcr->flags |= PCR_REVERSE_SOCKET;
+}
+
+static int rtsx_base_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL,
+		LED_SHINE_MASK, LED_SHINE_EN);
+}
+
+static int rtsx_base_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL,
+		LED_SHINE_MASK, LED_SHINE_DISABLE);
+}
+
+static int rts5260_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, RTS5260_REG_GPIO_CTL0,
+		RTS5260_REG_GPIO_MASK, RTS5260_REG_GPIO_ON);
+}
+
+static int rts5260_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, RTS5260_REG_GPIO_CTL0,
+		RTS5260_REG_GPIO_MASK, RTS5260_REG_GPIO_OFF);
+}
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5260_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5260_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5260_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5260_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+static int sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_write_register(pcr, SD_CFG1, SD_MODE_SELECT_MASK
+		| SD_ASYNC_FIFO_NOT_RST, SD_30_MODE | SD_ASYNC_FIFO_NOT_RST);
+	rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_write_register(pcr, CARD_CLK_SOURCE, 0xFF,
+			CRC_VAR_CLK0 | SD30_FIX_CLK | SAMPLE_VAR_CLK1);
+	rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+
+	return 0;
+}
+
+static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en)
+		rtsx_pci_enable_ocp(pcr);
+
+
+	rtsx_pci_write_register(pcr, LDO_CONFIG2, DV331812_VDD1, DV331812_VDD1);
+	rtsx_pci_write_register(pcr, LDO_VCC_CFG0,
+			 RTS5260_DVCC_TUNE_MASK, RTS5260_DVCC_33);
+
+	rtsx_pci_write_register(pcr, LDO_VCC_CFG1, LDO_POW_SDVDD1_MASK,
+			LDO_POW_SDVDD1_ON);
+
+	rtsx_pci_write_register(pcr, LDO_CONFIG2,
+			 DV331812_POWERON, DV331812_POWERON);
+	msleep(20);
+
+	if (pcr->extra_caps & EXTRA_CAPS_SD_SDR50 ||
+	    pcr->extra_caps & EXTRA_CAPS_SD_SDR104)
+		sd_set_sample_push_timing_sd30(pcr);
+
+	/* Initialize SD_CFG1 register */
+	rtsx_pci_write_register(pcr, SD_CFG1, 0xFF,
+				SD_CLK_DIVIDE_128 | SD_20_MODE);
+
+	rtsx_pci_write_register(pcr, SD_SAMPLE_POINT_CTL,
+				0xFF, SD20_RX_POS_EDGE);
+	rtsx_pci_write_register(pcr, SD_PUSH_POINT_CTL, 0xFF, 0);
+	rtsx_pci_write_register(pcr, CARD_STOP, SD_STOP | SD_CLR_ERR,
+				SD_STOP | SD_CLR_ERR);
+
+	/* Reset SD_CFG3 register */
+	rtsx_pci_write_register(pcr, SD_CFG3, SD30_CLK_END_EN, 0);
+	rtsx_pci_write_register(pcr, REG_SD_STOP_SDCLK_CFG,
+			SD30_CLK_STOP_CFG_EN | SD30_CLK_STOP_CFG1 |
+			SD30_CLK_STOP_CFG0, 0);
+
+	rtsx_pci_write_register(pcr, REG_PRE_RW_MODE, EN_INFINITE_MODE, 0);
+
+	return 0;
+}
+
+static int rts5260_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	switch (voltage) {
+	case OUTPUT_3V3:
+		rtsx_pci_write_register(pcr, LDO_CONFIG2,
+					DV331812_VDD1, DV331812_VDD1);
+		rtsx_pci_write_register(pcr, LDO_DV18_CFG,
+					DV331812_MASK, DV331812_33);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, 0);
+		break;
+	case OUTPUT_1V8:
+		rtsx_pci_write_register(pcr, LDO_CONFIG2,
+					DV331812_VDD1, DV331812_VDD1);
+		rtsx_pci_write_register(pcr, LDO_DV18_CFG,
+					DV331812_MASK, DV331812_17);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8,
+					SD_IO_USING_1V8);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rts5260_fill_driving(pcr, voltage);
+
+	return 0;
+}
+
+static void rts5260_stop_cmd(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA);
+	rtsx_pci_write_register(pcr, RTS5260_DMA_RST_CTL_0,
+				RTS5260_DMA_RST | RTS5260_ADMA3_RST,
+				RTS5260_DMA_RST | RTS5260_ADMA3_RST);
+	rtsx_pci_write_register(pcr, RBCTL, RB_FLUSH, RB_FLUSH);
+}
+
+static void rts5260_card_before_power_off(struct rtsx_pcr *pcr)
+{
+	rts5260_stop_cmd(pcr);
+	rts5260_switch_output_voltage(pcr, OUTPUT_3V3);
+
+}
+
+static int rts5260_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	int err = 0;
+
+	rts5260_card_before_power_off(pcr);
+	err = rtsx_pci_write_register(pcr, LDO_VCC_CFG1,
+			 LDO_POW_SDVDD1_MASK, LDO_POW_SDVDD1_OFF);
+	err = rtsx_pci_write_register(pcr, LDO_CONFIG2,
+			 DV331812_POWERON, DV331812_POWEROFF);
+	if (pcr->option.ocp_en)
+		rtsx_pci_disable_ocp(pcr);
+
+	return err;
+}
+
+static void rts5260_init_ocp(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en) {
+		u8 mask, val;
+
+
+		rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL,
+				RTS5260_DVCC_OCP_THD_MASK,
+				option->sd_800mA_ocp_thd);
+
+		rtsx_pci_write_register(pcr, RTS5260_DV331812_CFG,
+				RTS5260_DV331812_OCP_THD_MASK,
+				RTS5260_DV331812_OCP_THD_270);
+
+		mask = SD_OCP_GLITCH_MASK;
+		val = pcr->hw_param.ocp_glitch;
+		rtsx_pci_write_register(pcr, REG_OCPGLITCH, mask, val);
+		rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL,
+					RTS5260_DVCC_OCP_EN |
+					RTS5260_DVCC_OCP_CL_EN,
+					RTS5260_DVCC_OCP_EN |
+					RTS5260_DVCC_OCP_CL_EN);
+
+		rtsx_pci_enable_ocp(pcr);
+	} else {
+		rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL,
+					RTS5260_DVCC_OCP_EN |
+					RTS5260_DVCC_OCP_CL_EN, 0);
+	}
+}
+
+static void rts5260_enable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 val = 0;
+
+	val = SD_OCP_INT_EN | SD_DETECT_EN;
+	rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val);
+
+}
+
+static void rts5260_disable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 mask = 0;
+
+	mask = SD_OCP_INT_EN | SD_DETECT_EN;
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+
+}
+
+
+static int rts5260_get_ocpstat(struct rtsx_pcr *pcr, u8 *val)
+{
+	return rtsx_pci_read_register(pcr, REG_OCPSTAT, val);
+}
+
+static int rts5260_get_ocpstat2(struct rtsx_pcr *pcr, u8 *val)
+{
+	return rtsx_pci_read_register(pcr, REG_DV3318_OCPSTAT, val);
+}
+
+static void rts5260_clear_ocpstat(struct rtsx_pcr *pcr)
+{
+	u8 mask = 0;
+	u8 val = 0;
+
+	mask = SD_OCP_INT_CLR | SD_OC_CLR;
+	val = SD_OCP_INT_CLR | SD_OC_CLR;
+
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val);
+	rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL,
+				DV3318_OCP_INT_CLR | DV3318_OCP_CLR,
+				DV3318_OCP_INT_CLR | DV3318_OCP_CLR);
+	udelay(10);
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+	rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL,
+				DV3318_OCP_INT_CLR | DV3318_OCP_CLR, 0);
+}
+
+static void rts5260_process_ocp(struct rtsx_pcr *pcr)
+{
+	if (!pcr->option.ocp_en)
+		return;
+
+	rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat);
+	rts5260_get_ocpstat2(pcr, &pcr->ocp_stat2);
+
+	if ((pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER)) ||
+		(pcr->ocp_stat2 & (DV3318_OCP_NOW | DV3318_OCP_EVER))) {
+		rtsx_pci_card_power_off(pcr, RTSX_SD_CARD);
+		rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0);
+		rtsx_pci_clear_ocpstat(pcr);
+		pcr->ocp_stat = 0;
+		pcr->ocp_stat2 = 0;
+	}
+
+}
+
+static int rts5260_init_hw(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG1,
+			 AUX_CLK_ACTIVE_SEL_MASK, MAC_CKSW_DONE);
+	/* Rest L1SUB Config */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG3, 0xFF, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CLK_FORCE_CTL,
+			 CLK_PM_EN, CLK_PM_EN);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWD_SUSPEND_EN, 0xFF, 0xFF);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			 PWR_GATE_EN, PWR_GATE_EN);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, REG_VREF,
+			 PWD_SUSPND_EN, PWD_SUSPND_EN);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RBCTL,
+			 U_AUTO_DMA_EN_MASK, U_AUTO_DMA_DISABLE);
+
+	if (pcr->flags & PCR_REVERSE_SOCKET)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OBFF_CFG,
+			 OBFF_EN_MASK, OBFF_DISABLE);
+
+	err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF);
+	if (err < 0)
+		return err;
+
+	rtsx_pci_init_ocp(pcr);
+
+	return 0;
+}
+
+static void rts5260_pwr_saving_setting(struct rtsx_pcr *pcr)
+{
+	int lss_l1_1, lss_l1_2;
+
+	lss_l1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN)
+			| rtsx_check_dev_flag(pcr, PM_L1_1_EN);
+	lss_l1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN)
+			| rtsx_check_dev_flag(pcr, PM_L1_2_EN);
+
+	rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0);
+	if (lss_l1_2) {
+		pcr_dbg(pcr, "Set parameters for L1.2.");
+		rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL,
+					0xFF, PCIE_L1_2_EN);
+		rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL,
+					RTS5260_DVCC_OCP_EN |
+					RTS5260_DVCC_OCP_CL_EN,
+					RTS5260_DVCC_OCP_EN |
+					RTS5260_DVCC_OCP_CL_EN);
+
+		rtsx_pci_write_register(pcr, PWR_FE_CTL,
+					0xFF, PCIE_L1_2_PD_FE_EN);
+	} else if (lss_l1_1) {
+		pcr_dbg(pcr, "Set parameters for L1.1.");
+		rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL,
+					0xFF, PCIE_L1_1_EN);
+		rtsx_pci_write_register(pcr, PWR_FE_CTL,
+					0xFF, PCIE_L1_1_PD_FE_EN);
+	} else {
+		pcr_dbg(pcr, "Set parameters for L1.");
+		rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL,
+					0xFF, PCIE_L1_0_EN);
+		rtsx_pci_write_register(pcr, PWR_FE_CTL,
+					0xFF, PCIE_L1_0_PD_FE_EN);
+	}
+
+	rtsx_pci_write_register(pcr, CFG_L1_0_PCIE_DPHY_RET_VALUE,
+				0xFF, CFG_L1_0_RET_VALUE_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_L1_0_PCIE_MAC_RET_VALUE,
+				0xFF, CFG_L1_0_RET_VALUE_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_L1_0_CRC_SD30_RET_VALUE,
+				0xFF, CFG_L1_0_RET_VALUE_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_L1_0_CRC_SD40_RET_VALUE,
+				0xFF, CFG_L1_0_RET_VALUE_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_L1_0_SYS_RET_VALUE,
+				0xFF, CFG_L1_0_RET_VALUE_DEFAULT);
+	/*Option cut APHY*/
+	rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_0,
+				0xFF, CFG_PCIE_APHY_OFF_0_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_1,
+				0xFF, CFG_PCIE_APHY_OFF_1_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_2,
+				0xFF, CFG_PCIE_APHY_OFF_2_DEFAULT);
+	rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_3,
+				0xFF, CFG_PCIE_APHY_OFF_3_DEFAULT);
+	/*CDR DEC*/
+	rtsx_pci_write_register(pcr, PWC_CDR, 0xFF, PWC_CDR_DEFAULT);
+	/*PWMPFM*/
+	rtsx_pci_write_register(pcr, CFG_LP_FPWM_VALUE,
+				0xFF, CFG_LP_FPWM_VALUE_DEFAULT);
+	/*No Power Saving WA*/
+	rtsx_pci_write_register(pcr, CFG_L1_0_CRC_MISC_RET_VALUE,
+				0xFF, CFG_L1_0_CRC_MISC_RET_VALUE_DEFAULT);
+}
+
+static void rts5260_init_from_cfg(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	rts5260_pwr_saving_setting(pcr);
+
+	if (option->ltr_en) {
+		if (option->ltr_enabled)
+			rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+	}
+}
+
+static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	/* Set mcu_cnt to 7 to ensure data can be sampled properly */
+	rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07);
+	rtsx_pci_write_register(pcr, SSC_DIV_N_0, 0xFF, 0x5D);
+
+	rts5260_init_from_cfg(pcr);
+
+	/* force no MDIO*/
+	rtsx_pci_write_register(pcr, RTS5260_AUTOLOAD_CFG4,
+				0xFF, RTS5260_MIMO_DISABLE);
+	/*Modify SDVCC Tune Default Parameters!*/
+	rtsx_pci_write_register(pcr, LDO_VCC_CFG0,
+				RTS5260_DVCC_TUNE_MASK, RTS5260_DVCC_33);
+
+	rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL);
+
+	rts5260_init_hw(pcr);
+
+	/*
+	 * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+	 * to drive low, and we forcibly request clock.
+	 */
+	if (option->force_clkreq_0)
+		rtsx_pci_write_register(pcr, PETXCFG,
+				 FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+	else
+		rtsx_pci_write_register(pcr, PETXCFG,
+				 FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+
+	rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
+
+	return 0;
+}
+
+static void rts5260_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	u32 interrupt = rtsx_pci_readl(pcr, RTSX_BIPR);
+	int card_exist = (interrupt & SD_EXIST) | (interrupt & MS_EXIST);
+	int aspm_L1_1, aspm_L1_2;
+	u8 val = 0;
+
+	aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN);
+	aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (active) {
+		/* run, latency: 60us */
+		if (aspm_L1_1)
+			val = option->ltr_l1off_snooze_sspwrgate;
+	} else {
+		/* l1off, latency: 300us */
+		if (aspm_L1_2)
+			val = option->ltr_l1off_sspwrgate;
+	}
+
+	if (aspm_L1_1 || aspm_L1_2) {
+		if (rtsx_check_dev_flag(pcr,
+					LTR_L1SS_PWR_GATE_CHECK_CARD_EN)) {
+			if (card_exist)
+				val &= ~L1OFF_MBIAS2_EN_5250;
+			else
+				val |= L1OFF_MBIAS2_EN_5250;
+		}
+	}
+	rtsx_set_l1off_sub(pcr, val);
+}
+
+static const struct pcr_ops rts5260_pcr_ops = {
+	.fetch_vendor_settings = rtsx_base_fetch_vendor_settings,
+	.turn_on_led = rts5260_turn_on_led,
+	.turn_off_led = rts5260_turn_off_led,
+	.extra_init_hw = rts5260_extra_init_hw,
+	.enable_auto_blink = rtsx_base_enable_auto_blink,
+	.disable_auto_blink = rtsx_base_disable_auto_blink,
+	.card_power_on = rts5260_card_power_on,
+	.card_power_off = rts5260_card_power_off,
+	.switch_output_voltage = rts5260_switch_output_voltage,
+	.stop_cmd = rts5260_stop_cmd,
+	.set_l1off_cfg_sub_d0 = rts5260_set_l1off_cfg_sub_d0,
+	.enable_ocp = rts5260_enable_ocp,
+	.disable_ocp = rts5260_disable_ocp,
+	.init_ocp = rts5260_init_ocp,
+	.process_ocp = rts5260_process_ocp,
+	.get_ocpstat = rts5260_get_ocpstat,
+	.clear_ocpstat = rts5260_clear_ocpstat,
+};
+
+void rts5260_init_params(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	struct rtsx_hw_param *hw_param = &pcr->hw_param;
+
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_REG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
+
+	pcr->ic_version = rts5260_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5260_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5260_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5260_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5260_ms_pull_ctl_disable_tbl;
+
+	pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3;
+
+	pcr->ops = &rts5260_pcr_ops;
+
+	option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN
+				| LTR_L1SS_PWR_GATE_EN);
+	option->ltr_en = true;
+
+	/* init latency of active, idle, L1OFF to 60us, 300us, 3ms */
+	option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF;
+	option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF;
+	option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF;
+	option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF;
+	option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
+	option->ltr_l1off_snooze_sspwrgate =
+		LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF;
+
+	option->ocp_en = 1;
+	if (option->ocp_en)
+		hw_param->interrupt_en |= SD_OC_INT_EN;
+	hw_param->ocp_glitch =  SD_OCP_GLITCH_100U | SDVIO_OCP_GLITCH_800U;
+	option->sd_400mA_ocp_thd = RTS5260_DVCC_OCP_THD_550;
+	option->sd_800mA_ocp_thd = RTS5260_DVCC_OCP_THD_970;
+}
diff --git a/drivers/misc/cardreader/rts5260.h b/drivers/misc/cardreader/rts5260.h
new file mode 100644
index 0000000000..53a1411c88
--- /dev/null
+++ b/drivers/misc/cardreader/rts5260.h
@@ -0,0 +1,45 @@
+#ifndef __RTS5260_H__
+#define __RTS5260_H__
+
+#define RTS5260_DVCC_CTRL		0xFF73
+#define RTS5260_DVCC_OCP_EN		(0x01 << 7)
+#define RTS5260_DVCC_OCP_THD_MASK	(0x07 << 4)
+#define RTS5260_DVCC_POWERON		(0x01 << 3)
+#define RTS5260_DVCC_OCP_CL_EN		(0x01 << 2)
+
+#define RTS5260_DVIO_CTRL		0xFF75
+#define RTS5260_DVIO_OCP_EN		(0x01 << 7)
+#define RTS5260_DVIO_OCP_THD_MASK	(0x07 << 4)
+#define RTS5260_DVIO_POWERON		(0x01 << 3)
+#define RTS5260_DVIO_OCP_CL_EN		(0x01 << 2)
+
+#define RTS5260_DV331812_CFG		0xFF71
+#define RTS5260_DV331812_OCP_EN		(0x01 << 7)
+#define RTS5260_DV331812_OCP_THD_MASK	(0x07 << 4)
+#define RTS5260_DV331812_POWERON	(0x01 << 3)
+#define RTS5260_DV331812_SEL		(0x01 << 2)
+#define RTS5260_DV331812_VDD1		(0x01 << 2)
+#define RTS5260_DV331812_VDD2		(0x00 << 2)
+
+#define RTS5260_DV331812_OCP_THD_120	(0x00 << 4)
+#define RTS5260_DV331812_OCP_THD_140	(0x01 << 4)
+#define RTS5260_DV331812_OCP_THD_160	(0x02 << 4)
+#define RTS5260_DV331812_OCP_THD_180	(0x03 << 4)
+#define RTS5260_DV331812_OCP_THD_210	(0x04 << 4)
+#define RTS5260_DV331812_OCP_THD_240	(0x05 << 4)
+#define RTS5260_DV331812_OCP_THD_270	(0x06 << 4)
+#define RTS5260_DV331812_OCP_THD_300	(0x07 << 4)
+
+#define RTS5260_DVIO_OCP_THD_250	(0x00 << 4)
+#define RTS5260_DVIO_OCP_THD_300	(0x01 << 4)
+#define RTS5260_DVIO_OCP_THD_350	(0x02 << 4)
+#define RTS5260_DVIO_OCP_THD_400	(0x03 << 4)
+#define RTS5260_DVIO_OCP_THD_450	(0x04 << 4)
+#define RTS5260_DVIO_OCP_THD_500	(0x05 << 4)
+#define RTS5260_DVIO_OCP_THD_550	(0x06 << 4)
+#define RTS5260_DVIO_OCP_THD_600	(0x07 << 4)
+
+#define RTS5260_DVCC_OCP_THD_550	(0x00 << 4)
+#define RTS5260_DVCC_OCP_THD_970	(0x05 << 4)
+
+#endif
diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c
new file mode 100644
index 0000000000..67252512a1
--- /dev/null
+++ b/drivers/misc/cardreader/rts5261.c
@@ -0,0 +1,806 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Rui FENG <rui_feng@realsil.com.cn>
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rts5261.h"
+#include "rtsx_pcr.h"
+
+static u8 rts5261_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & IC_VERSION_MASK;
+}
+
+static void rts5261_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 driving_3v3[4][3] = {
+		{0x96, 0x96, 0x96},
+		{0x96, 0x96, 0x96},
+		{0x7F, 0x7F, 0x7F},
+		{0x13, 0x13, 0x13},
+	};
+	u8 driving_1v8[4][3] = {
+		{0xB3, 0xB3, 0xB3},
+		{0x3A, 0x3A, 0x3A},
+		{0xE6, 0xE6, 0xE6},
+		{0x99, 0x99, 0x99},
+	};
+	u8 (*driving)[3], drive_sel;
+
+	if (voltage == OUTPUT_3V3) {
+		driving = driving_3v3;
+		drive_sel = pcr->sd30_drive_sel_3v3;
+	} else {
+		driving = driving_1v8;
+		drive_sel = pcr->sd30_drive_sel_1v8;
+	}
+
+	rtsx_pci_write_register(pcr, SD30_CLK_DRIVE_SEL,
+			 0xFF, driving[drive_sel][0]);
+
+	rtsx_pci_write_register(pcr, SD30_CMD_DRIVE_SEL,
+			 0xFF, driving[drive_sel][1]);
+
+	rtsx_pci_write_register(pcr, SD30_DAT_DRIVE_SEL,
+			 0xFF, driving[drive_sel][2]);
+}
+
+static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
+{
+	/* Set relink_time to 0 */
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
+				RELINK_TIME_MASK, 0);
+
+	if (pm_state == HOST_ENTER_S3)
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3,
+					D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
+
+	if (!runtime) {
+		rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1,
+				CD_RESUME_EN_MASK, 0);
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
+		rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+				FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
+
+	} else {
+		rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+				FORCE_PM_CONTROL | FORCE_PM_VALUE, 0);
+
+		rtsx_pci_write_register(pcr, RTS5261_FW_CTL,
+				RTS5261_INFORM_RTD3_COLD, RTS5261_INFORM_RTD3_COLD);
+		rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4,
+				RTS5261_FORCE_PRSNT_LOW, RTS5261_FORCE_PRSNT_LOW);
+
+	}
+
+	rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL,
+		SSC_POWER_DOWN, SSC_POWER_DOWN);
+}
+
+static int rts5261_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL,
+		LED_SHINE_MASK, LED_SHINE_EN);
+}
+
+static int rts5261_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL,
+		LED_SHINE_MASK, LED_SHINE_DISABLE);
+}
+
+static int rts5261_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL,
+		0x02, 0x02);
+}
+
+static int rts5261_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL,
+		0x02, 0x00);
+}
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5261_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5261_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+static int rts5261_sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_write_register(pcr, SD_CFG1, SD_MODE_SELECT_MASK
+		| SD_ASYNC_FIFO_NOT_RST, SD_30_MODE | SD_ASYNC_FIFO_NOT_RST);
+	rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_write_register(pcr, CARD_CLK_SOURCE, 0xFF,
+			CRC_VAR_CLK0 | SD30_FIX_CLK | SAMPLE_VAR_CLK1);
+	rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+
+	return 0;
+}
+
+static int rts5261_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en)
+		rtsx_pci_enable_ocp(pcr);
+
+	rtsx_pci_write_register(pcr, REG_CRC_DUMMY_0,
+		CFG_SD_POW_AUTO_PD, CFG_SD_POW_AUTO_PD);
+
+	rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG1,
+			RTS5261_LDO1_TUNE_MASK, RTS5261_LDO1_33);
+	rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL,
+			RTS5261_LDO1_POWERON, RTS5261_LDO1_POWERON);
+
+	rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL,
+			RTS5261_LDO3318_POWERON, RTS5261_LDO3318_POWERON);
+
+	msleep(20);
+
+	rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, SD_OUTPUT_EN);
+
+	/* Initialize SD_CFG1 register */
+	rtsx_pci_write_register(pcr, SD_CFG1, 0xFF,
+			SD_CLK_DIVIDE_128 | SD_20_MODE | SD_BUS_WIDTH_1BIT);
+
+	rtsx_pci_write_register(pcr, SD_SAMPLE_POINT_CTL,
+			0xFF, SD20_RX_POS_EDGE);
+	rtsx_pci_write_register(pcr, SD_PUSH_POINT_CTL, 0xFF, 0);
+	rtsx_pci_write_register(pcr, CARD_STOP, SD_STOP | SD_CLR_ERR,
+			SD_STOP | SD_CLR_ERR);
+
+	/* Reset SD_CFG3 register */
+	rtsx_pci_write_register(pcr, SD_CFG3, SD30_CLK_END_EN, 0);
+	rtsx_pci_write_register(pcr, REG_SD_STOP_SDCLK_CFG,
+			SD30_CLK_STOP_CFG_EN | SD30_CLK_STOP_CFG1 |
+			SD30_CLK_STOP_CFG0, 0);
+
+	if (pcr->extra_caps & EXTRA_CAPS_SD_SDR50 ||
+	    pcr->extra_caps & EXTRA_CAPS_SD_SDR104)
+		rts5261_sd_set_sample_push_timing_sd30(pcr);
+
+	return 0;
+}
+
+static int rts5261_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+	u16 val = 0;
+
+	rtsx_pci_write_register(pcr, RTS5261_CARD_PWR_CTL,
+			RTS5261_PUPDC, RTS5261_PUPDC);
+
+	switch (voltage) {
+	case OUTPUT_3V3:
+		rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val);
+		val |= PHY_TUNE_SDBUS_33;
+		err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val);
+		if (err < 0)
+			return err;
+
+		rtsx_pci_write_register(pcr, RTS5261_DV3318_CFG,
+				RTS5261_DV3318_TUNE_MASK, RTS5261_DV3318_33);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL,
+				SD_IO_USING_1V8, 0);
+		break;
+	case OUTPUT_1V8:
+		rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val);
+		val &= ~PHY_TUNE_SDBUS_33;
+		err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val);
+		if (err < 0)
+			return err;
+
+		rtsx_pci_write_register(pcr, RTS5261_DV3318_CFG,
+				RTS5261_DV3318_TUNE_MASK, RTS5261_DV3318_18);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL,
+				SD_IO_USING_1V8, SD_IO_USING_1V8);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rts5261_fill_driving(pcr, voltage);
+
+	return 0;
+}
+
+static void rts5261_stop_cmd(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA);
+	rtsx_pci_write_register(pcr, RTS5260_DMA_RST_CTL_0,
+				RTS5260_DMA_RST | RTS5260_ADMA3_RST,
+				RTS5260_DMA_RST | RTS5260_ADMA3_RST);
+	rtsx_pci_write_register(pcr, RBCTL, RB_FLUSH, RB_FLUSH);
+}
+
+static void rts5261_card_before_power_off(struct rtsx_pcr *pcr)
+{
+	rts5261_stop_cmd(pcr);
+	rts5261_switch_output_voltage(pcr, OUTPUT_3V3);
+
+}
+
+static void rts5261_enable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 val = 0;
+
+	val = SD_OCP_INT_EN | SD_DETECT_EN;
+	rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN,
+			RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN);
+	rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val);
+
+}
+
+static void rts5261_disable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 mask = 0;
+
+	mask = SD_OCP_INT_EN | SD_DETECT_EN;
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+	rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, 0);
+
+}
+
+static int rts5261_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	int err = 0;
+
+	rts5261_card_before_power_off(pcr);
+	err = rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL,
+				RTS5261_LDO_POWERON_MASK, 0);
+
+	rtsx_pci_write_register(pcr, REG_CRC_DUMMY_0,
+		CFG_SD_POW_AUTO_PD, 0);
+	if (pcr->option.ocp_en)
+		rtsx_pci_disable_ocp(pcr);
+
+	return err;
+}
+
+static void rts5261_init_ocp(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en) {
+		u8 mask, val;
+
+		rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN,
+			RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN);
+
+		rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_THD_MASK, option->sd_800mA_ocp_thd);
+
+		rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_LMT_THD_MASK,
+			RTS5261_LDO1_LMT_THD_2000);
+
+		mask = SD_OCP_GLITCH_MASK;
+		val = pcr->hw_param.ocp_glitch;
+		rtsx_pci_write_register(pcr, REG_OCPGLITCH, mask, val);
+
+		rts5261_enable_ocp(pcr);
+	} else {
+		rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, 0);
+	}
+}
+
+static void rts5261_clear_ocpstat(struct rtsx_pcr *pcr)
+{
+	u8 mask = 0;
+	u8 val = 0;
+
+	mask = SD_OCP_INT_CLR | SD_OC_CLR;
+	val = SD_OCP_INT_CLR | SD_OC_CLR;
+
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val);
+
+	udelay(1000);
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+
+}
+
+static void rts5261_process_ocp(struct rtsx_pcr *pcr)
+{
+	if (!pcr->option.ocp_en)
+		return;
+
+	rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat);
+
+	if (pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER)) {
+		rts5261_clear_ocpstat(pcr);
+		rts5261_card_power_off(pcr, RTSX_SD_CARD);
+		rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0);
+		pcr->ocp_stat = 0;
+	}
+
+}
+
+static void rts5261_init_from_hw(struct rtsx_pcr *pcr)
+{
+	struct pci_dev *pdev = pcr->pci;
+	u32 lval1, lval2, i;
+	u16 setting_reg1, setting_reg2;
+	u8 valid, efuse_valid, tmp;
+
+	rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+		REG_EFUSE_POR | REG_EFUSE_POWER_MASK,
+		REG_EFUSE_POR | REG_EFUSE_POWERON);
+	udelay(1);
+	rtsx_pci_write_register(pcr, RTS5261_EFUSE_ADDR,
+		RTS5261_EFUSE_ADDR_MASK, 0x00);
+	rtsx_pci_write_register(pcr, RTS5261_EFUSE_CTL,
+		RTS5261_EFUSE_ENABLE | RTS5261_EFUSE_MODE_MASK,
+		RTS5261_EFUSE_ENABLE);
+
+	/* Wait transfer end */
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		rtsx_pci_read_register(pcr, RTS5261_EFUSE_CTL, &tmp);
+		if ((tmp & 0x80) == 0)
+			break;
+	}
+	rtsx_pci_read_register(pcr, RTS5261_EFUSE_READ_DATA, &tmp);
+	efuse_valid = ((tmp & 0x0C) >> 2);
+	pcr_dbg(pcr, "Load efuse valid: 0x%x\n", efuse_valid);
+
+	pci_read_config_dword(pdev, PCR_SETTING_REG2, &lval2);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, lval2);
+	/* 0x816 */
+	valid = (u8)((lval2 >> 16) & 0x03);
+
+	rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+		REG_EFUSE_POR, 0);
+	pcr_dbg(pcr, "Disable efuse por!\n");
+
+	if (efuse_valid == 2 || efuse_valid == 3) {
+		if (valid == 3) {
+			/* Bypass efuse */
+			setting_reg1 = PCR_SETTING_REG1;
+			setting_reg2 = PCR_SETTING_REG2;
+		} else {
+			/* Use efuse data */
+			setting_reg1 = PCR_SETTING_REG4;
+			setting_reg2 = PCR_SETTING_REG5;
+		}
+	} else if (efuse_valid == 0) {
+		// default
+		setting_reg1 = PCR_SETTING_REG1;
+		setting_reg2 = PCR_SETTING_REG2;
+	} else {
+		return;
+	}
+
+	pci_read_config_dword(pdev, setting_reg2, &lval2);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", setting_reg2, lval2);
+
+	if (!rts5261_vendor_setting_valid(lval2)) {
+		/* Not support MMC default */
+		pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
+		pcr_dbg(pcr, "skip fetch vendor setting\n");
+		return;
+	}
+
+	if (!rts5261_reg_check_mmc_support(lval2))
+		pcr->extra_caps |= EXTRA_CAPS_NO_MMC;
+
+	pcr->rtd3_en = rts5261_reg_to_rtd3(lval2);
+
+	if (rts5261_reg_check_reverse_socket(lval2))
+		pcr->flags |= PCR_REVERSE_SOCKET;
+
+	pci_read_config_dword(pdev, setting_reg1, &lval1);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", setting_reg1, lval1);
+
+	pcr->aspm_en = rts5261_reg_to_aspm(lval1);
+	pcr->sd30_drive_sel_1v8 = rts5261_reg_to_sd30_drive_sel_1v8(lval1);
+	pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(lval1);
+
+	if (setting_reg1 == PCR_SETTING_REG1) {
+		/* store setting */
+		rtsx_pci_write_register(pcr, 0xFF0C, 0xFF, (u8)(lval1 & 0xFF));
+		rtsx_pci_write_register(pcr, 0xFF0D, 0xFF, (u8)((lval1 >> 8) & 0xFF));
+		rtsx_pci_write_register(pcr, 0xFF0E, 0xFF, (u8)((lval1 >> 16) & 0xFF));
+		rtsx_pci_write_register(pcr, 0xFF0F, 0xFF, (u8)((lval1 >> 24) & 0xFF));
+		rtsx_pci_write_register(pcr, 0xFF10, 0xFF, (u8)(lval2 & 0xFF));
+		rtsx_pci_write_register(pcr, 0xFF11, 0xFF, (u8)((lval2 >> 8) & 0xFF));
+		rtsx_pci_write_register(pcr, 0xFF12, 0xFF, (u8)((lval2 >> 16) & 0xFF));
+
+		pci_write_config_dword(pdev, PCR_SETTING_REG4, lval1);
+		lval2 = lval2 & 0x00FFFFFF;
+		pci_write_config_dword(pdev, PCR_SETTING_REG5, lval2);
+	}
+}
+
+static void rts5261_init_from_cfg(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ltr_en) {
+		if (option->ltr_enabled)
+			rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+	}
+}
+
+static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	u32 val;
+
+	rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1,
+			CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
+
+	rts5261_init_from_cfg(pcr);
+	rts5261_init_from_hw(pcr);
+
+	/* power off efuse */
+	rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+			REG_EFUSE_POWER_MASK, REG_EFUSE_POWEROFF);
+	rtsx_pci_write_register(pcr, L1SUB_CONFIG1,
+			AUX_CLK_ACTIVE_SEL_MASK, MAC_CKSW_DONE);
+	rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, 0);
+
+	if (is_version_higher_than(pcr, PID_5261, IC_VER_B)) {
+		val = rtsx_pci_readl(pcr, RTSX_DUM_REG);
+		rtsx_pci_writel(pcr, RTSX_DUM_REG, val | 0x1);
+	}
+	rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4,
+			RTS5261_AUX_CLK_16M_EN, 0);
+
+	/* Release PRSNT# */
+	rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4,
+			RTS5261_FORCE_PRSNT_LOW, 0);
+	rtsx_pci_write_register(pcr, FUNC_FORCE_CTL,
+			FUNC_FORCE_UPME_XMT_DBG, FUNC_FORCE_UPME_XMT_DBG);
+
+	rtsx_pci_write_register(pcr, PCLK_CTL,
+			PCLK_MODE_SEL, PCLK_MODE_SEL);
+
+	rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0);
+	rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, CLK_PM_EN, CLK_PM_EN);
+
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x0F, 0x02);
+
+	/* Configure driving */
+	rts5261_fill_driving(pcr, OUTPUT_3V3);
+
+	if (pcr->flags & PCR_REVERSE_SOCKET)
+		rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x30);
+	else
+		rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
+
+	/*
+	 * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+	 * to drive low, and we forcibly request clock.
+	 */
+	if (option->force_clkreq_0)
+		rtsx_pci_write_register(pcr, PETXCFG,
+				 FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+	else
+		rtsx_pci_write_register(pcr, PETXCFG,
+				 FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+
+	rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
+
+	if (pcr->rtd3_en) {
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x01);
+		rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+				FORCE_PM_CONTROL | FORCE_PM_VALUE,
+				FORCE_PM_CONTROL | FORCE_PM_VALUE);
+	} else {
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x01, 0x00);
+		rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+				FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
+	}
+	rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, D3_DELINK_MODE_EN, 0x00);
+
+	/* Clear Enter RTD3_cold Information*/
+	rtsx_pci_write_register(pcr, RTS5261_FW_CTL,
+		RTS5261_INFORM_RTD3_COLD, 0);
+
+	return 0;
+}
+
+static void rts5261_enable_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	u8 val = FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1;
+	u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1;
+
+	if (pcr->aspm_enabled == enable)
+		return;
+
+	val |= (pcr->aspm_en & 0x02);
+	rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val);
+	pcie_capability_clear_and_set_word(pcr->pci, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC, pcr->aspm_en);
+	pcr->aspm_enabled = enable;
+}
+
+static void rts5261_disable_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	u8 val = FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1;
+	u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1;
+
+	if (pcr->aspm_enabled == enable)
+		return;
+
+	pcie_capability_clear_and_set_word(pcr->pci, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC, 0);
+	rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val);
+	rtsx_pci_write_register(pcr, SD_CFG1, SD_ASYNC_FIFO_NOT_RST, 0);
+	udelay(10);
+	pcr->aspm_enabled = enable;
+}
+
+static void rts5261_set_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	if (enable)
+		rts5261_enable_aspm(pcr, true);
+	else
+		rts5261_disable_aspm(pcr, false);
+}
+
+static void rts5261_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	int aspm_L1_1, aspm_L1_2;
+	u8 val = 0;
+
+	aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN);
+	aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (active) {
+		/* run, latency: 60us */
+		if (aspm_L1_1)
+			val = option->ltr_l1off_snooze_sspwrgate;
+	} else {
+		/* l1off, latency: 300us */
+		if (aspm_L1_2)
+			val = option->ltr_l1off_sspwrgate;
+	}
+
+	rtsx_set_l1off_sub(pcr, val);
+}
+
+static const struct pcr_ops rts5261_pcr_ops = {
+	.turn_on_led = rts5261_turn_on_led,
+	.turn_off_led = rts5261_turn_off_led,
+	.extra_init_hw = rts5261_extra_init_hw,
+	.enable_auto_blink = rts5261_enable_auto_blink,
+	.disable_auto_blink = rts5261_disable_auto_blink,
+	.card_power_on = rts5261_card_power_on,
+	.card_power_off = rts5261_card_power_off,
+	.switch_output_voltage = rts5261_switch_output_voltage,
+	.force_power_down = rts5261_force_power_down,
+	.stop_cmd = rts5261_stop_cmd,
+	.set_aspm = rts5261_set_aspm,
+	.set_l1off_cfg_sub_d0 = rts5261_set_l1off_cfg_sub_d0,
+	.enable_ocp = rts5261_enable_ocp,
+	.disable_ocp = rts5261_disable_ocp,
+	.init_ocp = rts5261_init_ocp,
+	.process_ocp = rts5261_process_ocp,
+	.clear_ocpstat = rts5261_clear_ocpstat,
+};
+
+static inline u8 double_ssc_depth(u8 depth)
+{
+	return ((depth > 1) ? (depth - 1) : depth);
+}
+
+int rts5261_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk)
+{
+	int err, clk;
+	u16 n;
+	u8 clk_divider, mcu_cnt, div;
+	static const u8 depth[] = {
+		[RTSX_SSC_DEPTH_4M] = RTS5261_SSC_DEPTH_4M,
+		[RTSX_SSC_DEPTH_2M] = RTS5261_SSC_DEPTH_2M,
+		[RTSX_SSC_DEPTH_1M] = RTS5261_SSC_DEPTH_1M,
+		[RTSX_SSC_DEPTH_500K] = RTS5261_SSC_DEPTH_512K,
+	};
+
+	if (initial_mode) {
+		/* We use 250k(around) here, in initial stage */
+		if (is_version_higher_than(pcr, PID_5261, IC_VER_C)) {
+			clk_divider = SD_CLK_DIVIDE_256;
+			card_clock = 60000000;
+		} else {
+			clk_divider = SD_CLK_DIVIDE_128;
+			card_clock = 30000000;
+		}
+	} else {
+		clk_divider = SD_CLK_DIVIDE_0;
+	}
+	err = rtsx_pci_write_register(pcr, SD_CFG1,
+			SD_CLK_DIVIDE_MASK, clk_divider);
+	if (err < 0)
+		return err;
+
+	card_clock /= 1000000;
+	pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock);
+
+	clk = card_clock;
+	if (!initial_mode && double_clk)
+		clk = card_clock * 2;
+	pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n",
+		clk, pcr->cur_clock);
+
+	if (clk == pcr->cur_clock)
+		return 0;
+
+	if (pcr->ops->conv_clk_and_div_n)
+		n = pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N);
+	else
+		n = clk - 4;
+	if ((clk <= 4) || (n > 396))
+		return -EINVAL;
+
+	mcu_cnt = 125/clk + 3;
+	if (mcu_cnt > 15)
+		mcu_cnt = 15;
+
+	div = CLK_DIV_1;
+	while ((n < MIN_DIV_N_PCR - 4) && (div < CLK_DIV_8)) {
+		if (pcr->ops->conv_clk_and_div_n) {
+			int dbl_clk = pcr->ops->conv_clk_and_div_n(n,
+					DIV_N_TO_CLK) * 2;
+			n = pcr->ops->conv_clk_and_div_n(dbl_clk,
+					CLK_TO_DIV_N);
+		} else {
+			n = (n + 4) * 2 - 4;
+		}
+		div++;
+	}
+
+	n = (n / 2) - 1;
+	pcr_dbg(pcr, "n = %d, div = %d\n", n, div);
+
+	ssc_depth = depth[ssc_depth];
+	if (double_clk)
+		ssc_depth = double_ssc_depth(ssc_depth);
+
+	if (ssc_depth) {
+		if (div == CLK_DIV_2) {
+			if (ssc_depth > 1)
+				ssc_depth -= 1;
+			else
+				ssc_depth = RTS5261_SSC_DEPTH_8M;
+		} else if (div == CLK_DIV_4) {
+			if (ssc_depth > 2)
+				ssc_depth -= 2;
+			else
+				ssc_depth = RTS5261_SSC_DEPTH_8M;
+		} else if (div == CLK_DIV_8) {
+			if (ssc_depth > 3)
+				ssc_depth -= 3;
+			else
+				ssc_depth = RTS5261_SSC_DEPTH_8M;
+		}
+	} else {
+		ssc_depth = 0;
+	}
+	pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL,
+				CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV,
+			0xFF, (div << 4) | mcu_cnt);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2,
+			SSC_DEPTH_MASK, ssc_depth);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB);
+	if (vpclk) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 2000);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC clock stable */
+	udelay(SSC_CLOCK_STABLE_WAIT);
+	err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+	if (err < 0)
+		return err;
+
+	pcr->cur_clock = clk;
+	return 0;
+
+}
+
+void rts5261_init_params(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	struct rtsx_hw_param *hw_param = &pcr->hw_param;
+	u8 val;
+
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	rtsx_pci_read_register(pcr, RTS5261_FW_STATUS, &val);
+	if (!(val & RTS5261_EXPRESS_LINK_FAIL_MASK))
+		pcr->extra_caps |= EXTRA_CAPS_SD_EXPRESS;
+	pcr->num_slots = 1;
+	pcr->ops = &rts5261_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = 0x00;
+	pcr->sd30_drive_sel_3v3 = 0x00;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_REG;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 11);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
+
+	pcr->ic_version = rts5261_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5261_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5261_sd_pull_ctl_disable_tbl;
+
+	pcr->reg_pm_ctrl3 = RTS5261_AUTOLOAD_CFG3;
+
+	option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN
+				| LTR_L1SS_PWR_GATE_EN);
+	option->ltr_en = true;
+
+	/* init latency of active, idle, L1OFF to 60us, 300us, 3ms */
+	option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF;
+	option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF;
+	option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF;
+	option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF;
+	option->ltr_l1off_sspwrgate = 0x7F;
+	option->ltr_l1off_snooze_sspwrgate = 0x78;
+
+	option->ocp_en = 1;
+	hw_param->interrupt_en |= SD_OC_INT_EN;
+	hw_param->ocp_glitch =  SD_OCP_GLITCH_800U;
+	option->sd_800mA_ocp_thd =  RTS5261_LDO1_OCP_THD_1040;
+}
diff --git a/drivers/misc/cardreader/rts5261.h b/drivers/misc/cardreader/rts5261.h
new file mode 100644
index 0000000000..2344c560a6
--- /dev/null
+++ b/drivers/misc/cardreader/rts5261.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Rui FENG <rui_feng@realsil.com.cn>
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+#ifndef RTS5261_H
+#define RTS5261_H
+
+/*New add*/
+#define rts5261_vendor_setting_valid(reg)	((reg) & 0x010000)
+#define rts5261_reg_to_aspm(reg) \
+	(((~(reg) >> 28) & 0x02) | (((reg) >> 28) & 0x01))
+#define rts5261_reg_check_reverse_socket(reg)	((reg) & 0x04)
+#define rts5261_reg_to_sd30_drive_sel_1v8(reg)	(((reg) >> 22) & 0x03)
+#define rts5261_reg_to_sd30_drive_sel_3v3(reg)	(((reg) >> 16) & 0x03)
+#define rts5261_reg_to_rtd3(reg)		((reg) & 0x08)
+#define rts5261_reg_check_mmc_support(reg)	((reg) & 0x10)
+
+#define RTS5261_AUTOLOAD_CFG0		0xFF7B
+#define RTS5261_AUTOLOAD_CFG1		0xFF7C
+#define RTS5261_AUTOLOAD_CFG2		0xFF7D
+#define RTS5261_AUTOLOAD_CFG3		0xFF7E
+#define RTS5261_AUTOLOAD_CFG4		0xFF7F
+#define RTS5261_FORCE_PRSNT_LOW		(1 << 6)
+#define RTS5261_AUX_CLK_16M_EN		(1 << 5)
+
+#define RTS5261_REG_VREF		0xFE97
+#define RTS5261_PWD_SUSPND_EN		(1 << 4)
+
+#define RTS5261_PAD_H3L1		0xFF79
+#define PAD_GPIO_H3L1			(1 << 3)
+
+/* SSC_CTL2 0xFC12 */
+#define RTS5261_SSC_DEPTH_MASK		0x07
+#define RTS5261_SSC_DEPTH_DISALBE	0x00
+#define RTS5261_SSC_DEPTH_8M		0x01
+#define RTS5261_SSC_DEPTH_4M		0x02
+#define RTS5261_SSC_DEPTH_2M		0x03
+#define RTS5261_SSC_DEPTH_1M		0x04
+#define RTS5261_SSC_DEPTH_512K		0x05
+#define RTS5261_SSC_DEPTH_256K		0x06
+#define RTS5261_SSC_DEPTH_128K		0x07
+
+/* efuse control register*/
+#define RTS5261_EFUSE_CTL		0xFC30
+#define RTS5261_EFUSE_ENABLE		0x80
+/* EFUSE_MODE: 0=READ 1=PROGRAM */
+#define RTS5261_EFUSE_MODE_MASK		0x40
+#define RTS5261_EFUSE_PROGRAM		0x40
+
+#define RTS5261_EFUSE_ADDR		0xFC31
+#define	RTS5261_EFUSE_ADDR_MASK		0x3F
+
+#define RTS5261_EFUSE_WRITE_DATA	0xFC32
+#define RTS5261_EFUSE_READ_DATA		0xFC34
+
+/* DMACTL 0xFE2C */
+#define RTS5261_DMA_PACK_SIZE_MASK	0xF0
+
+/* FW status register */
+#define RTS5261_FW_STATUS		0xFF56
+#define RTS5261_EXPRESS_LINK_FAIL_MASK	(0x01<<7)
+
+/* FW control register */
+#define RTS5261_FW_CTL			0xFF5F
+#define RTS5261_INFORM_RTD3_COLD	(0x01<<5)
+
+#define RTS5261_REG_FPDCTL		0xFF60
+
+#define RTS5261_REG_LDO12_CFG		0xFF6E
+#define RTS5261_LDO12_VO_TUNE_MASK	(0x07<<1)
+#define RTS5261_LDO12_115		(0x03<<1)
+#define RTS5261_LDO12_120		(0x04<<1)
+#define RTS5261_LDO12_125		(0x05<<1)
+#define RTS5261_LDO12_130		(0x06<<1)
+#define RTS5261_LDO12_135		(0x07<<1)
+
+/* LDO control register */
+#define RTS5261_CARD_PWR_CTL		0xFD50
+#define RTS5261_SD_CLK_ISO		(0x01<<7)
+#define RTS5261_PAD_SD_DAT_FW_CTRL	(0x01<<6)
+#define RTS5261_PUPDC			(0x01<<5)
+#define RTS5261_SD_CMD_ISO		(0x01<<4)
+#define RTS5261_SD_DAT_ISO_MASK		(0x0F<<0)
+
+#define RTS5261_LDO1233318_POW_CTL	0xFF70
+#define RTS5261_LDO3318_POWERON		(0x01<<3)
+#define RTS5261_LDO3_POWERON		(0x01<<2)
+#define RTS5261_LDO2_POWERON		(0x01<<1)
+#define RTS5261_LDO1_POWERON		(0x01<<0)
+#define RTS5261_LDO_POWERON_MASK	(0x0F<<0)
+
+#define RTS5261_DV3318_CFG		0xFF71
+#define RTS5261_DV3318_TUNE_MASK	(0x07<<4)
+#define RTS5261_DV3318_18		(0x02<<4)
+#define RTS5261_DV3318_19		(0x04<<4)
+#define RTS5261_DV3318_33		(0x07<<4)
+
+/* CRD6603-433 190319 request changed */
+#define RTS5261_LDO1_OCP_THD_740	(0x00<<5)
+#define RTS5261_LDO1_OCP_THD_800	(0x01<<5)
+#define RTS5261_LDO1_OCP_THD_860	(0x02<<5)
+#define RTS5261_LDO1_OCP_THD_920	(0x03<<5)
+#define RTS5261_LDO1_OCP_THD_980	(0x04<<5)
+#define RTS5261_LDO1_OCP_THD_1040	(0x05<<5)
+#define RTS5261_LDO1_OCP_THD_1100	(0x06<<5)
+#define RTS5261_LDO1_OCP_THD_1160	(0x07<<5)
+
+#define RTS5261_LDO1_LMT_THD_450	(0x00<<2)
+#define RTS5261_LDO1_LMT_THD_1000	(0x01<<2)
+#define RTS5261_LDO1_LMT_THD_1500	(0x02<<2)
+#define RTS5261_LDO1_LMT_THD_2000	(0x03<<2)
+
+#define RTS5261_LDO1_CFG1		0xFF73
+#define RTS5261_LDO1_TUNE_MASK		(0x07<<1)
+#define RTS5261_LDO1_18			(0x05<<1)
+#define RTS5261_LDO1_33			(0x07<<1)
+#define RTS5261_LDO1_PWD_MASK		(0x01<<0)
+
+#define RTS5261_LDO2_CFG0		0xFF74
+#define RTS5261_LDO2_OCP_THD_MASK	(0x07<<5)
+#define RTS5261_LDO2_OCP_EN		(0x01<<4)
+#define RTS5261_LDO2_OCP_LMT_THD_MASK	(0x03<<2)
+#define RTS5261_LDO2_OCP_LMT_EN		(0x01<<1)
+
+#define RTS5261_LDO2_OCP_THD_620	(0x00<<5)
+#define RTS5261_LDO2_OCP_THD_650	(0x01<<5)
+#define RTS5261_LDO2_OCP_THD_680	(0x02<<5)
+#define RTS5261_LDO2_OCP_THD_720	(0x03<<5)
+#define RTS5261_LDO2_OCP_THD_750	(0x04<<5)
+#define RTS5261_LDO2_OCP_THD_780	(0x05<<5)
+#define RTS5261_LDO2_OCP_THD_810	(0x06<<5)
+#define RTS5261_LDO2_OCP_THD_840	(0x07<<5)
+
+#define RTS5261_LDO2_CFG1		0xFF75
+#define RTS5261_LDO2_TUNE_MASK		(0x07<<1)
+#define RTS5261_LDO2_18			(0x05<<1)
+#define RTS5261_LDO2_33			(0x07<<1)
+#define RTS5261_LDO2_PWD_MASK		(0x01<<0)
+
+#define RTS5261_LDO3_CFG0		0xFF76
+#define RTS5261_LDO3_OCP_THD_MASK	(0x07<<5)
+#define RTS5261_LDO3_OCP_EN		(0x01<<4)
+#define RTS5261_LDO3_OCP_LMT_THD_MASK	(0x03<<2)
+#define RTS5261_LDO3_OCP_LMT_EN		(0x01<<1)
+
+#define RTS5261_LDO3_OCP_THD_620	(0x00<<5)
+#define RTS5261_LDO3_OCP_THD_650	(0x01<<5)
+#define RTS5261_LDO3_OCP_THD_680	(0x02<<5)
+#define RTS5261_LDO3_OCP_THD_720	(0x03<<5)
+#define RTS5261_LDO3_OCP_THD_750	(0x04<<5)
+#define RTS5261_LDO3_OCP_THD_780	(0x05<<5)
+#define RTS5261_LDO3_OCP_THD_810	(0x06<<5)
+#define RTS5261_LDO3_OCP_THD_840	(0x07<<5)
+
+#define RTS5261_LDO3_CFG1		0xFF77
+#define RTS5261_LDO3_TUNE_MASK		(0x07<<1)
+#define RTS5261_LDO3_18			(0x05<<1)
+#define RTS5261_LDO3_33			(0x07<<1)
+#define RTS5261_LDO3_PWD_MASK		(0x01<<0)
+
+#define RTS5261_REG_PME_FORCE_CTL	0xFF78
+#define FORCE_PM_CONTROL		0x20
+#define FORCE_PM_VALUE			0x10
+#define REG_EFUSE_BYPASS		0x08
+#define REG_EFUSE_POR			0x04
+#define REG_EFUSE_POWER_MASK		0x03
+#define REG_EFUSE_POWERON		0x03
+#define REG_EFUSE_POWEROFF		0x00
+
+
+/* Single LUN, support SD/SD EXPRESS */
+#define DEFAULT_SINGLE		0
+#define SD_LUN			1
+#define SD_EXPRESS_LUN		2
+
+/* For Change_FPGA_SSCClock Function */
+#define MULTIPLY_BY_1    0x00
+#define MULTIPLY_BY_2    0x01
+#define MULTIPLY_BY_3    0x02
+#define MULTIPLY_BY_4    0x03
+#define MULTIPLY_BY_5    0x04
+#define MULTIPLY_BY_6    0x05
+#define MULTIPLY_BY_7    0x06
+#define MULTIPLY_BY_8    0x07
+#define MULTIPLY_BY_9    0x08
+#define MULTIPLY_BY_10   0x09
+
+#define DIVIDE_BY_2      0x01
+#define DIVIDE_BY_3      0x02
+#define DIVIDE_BY_4      0x03
+#define DIVIDE_BY_5      0x04
+#define DIVIDE_BY_6      0x05
+#define DIVIDE_BY_7      0x06
+#define DIVIDE_BY_8      0x07
+#define DIVIDE_BY_9      0x08
+#define DIVIDE_BY_10     0x09
+
+int rts5261_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk);
+
+#endif /* RTS5261_H */
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
new file mode 100644
index 0000000000..a30751ad37
--- /dev/null
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -0,0 +1,1860 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/rtsx_pci.h>
+#include <linux/mmc/card.h>
+#include <asm/unaligned.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+
+#include "rtsx_pcr.h"
+#include "rts5261.h"
+#include "rts5228.h"
+
+static bool msi_en = true;
+module_param(msi_en, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(msi_en, "Enable MSI");
+
+static DEFINE_IDR(rtsx_pci_idr);
+static DEFINE_SPINLOCK(rtsx_pci_lock);
+
+static struct mfd_cell rtsx_pcr_cells[] = {
+	[RTSX_SD_CARD] = {
+		.name = DRV_NAME_RTSX_PCI_SDMMC,
+	},
+};
+
+static const struct pci_device_id rtsx_pci_ids[] = {
+	{ PCI_DEVICE(0x10EC, 0x5209), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5227), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x522A), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5249), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5287), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5286), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x524A), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x525A), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5260), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5261), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5228), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, rtsx_pci_ids);
+
+static int rtsx_comm_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency)
+{
+	rtsx_pci_write_register(pcr, MSGTXDATA0,
+				MASK_8_BIT_DEF, (u8) (latency & 0xFF));
+	rtsx_pci_write_register(pcr, MSGTXDATA1,
+				MASK_8_BIT_DEF, (u8)((latency >> 8) & 0xFF));
+	rtsx_pci_write_register(pcr, MSGTXDATA2,
+				MASK_8_BIT_DEF, (u8)((latency >> 16) & 0xFF));
+	rtsx_pci_write_register(pcr, MSGTXDATA3,
+				MASK_8_BIT_DEF, (u8)((latency >> 24) & 0xFF));
+	rtsx_pci_write_register(pcr, LTR_CTL, LTR_TX_EN_MASK |
+		LTR_LATENCY_MODE_MASK, LTR_TX_EN_1 | LTR_LATENCY_MODE_SW);
+
+	return 0;
+}
+
+int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency)
+{
+	return rtsx_comm_set_ltr_latency(pcr, latency);
+}
+
+static void rtsx_comm_set_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	if (pcr->aspm_enabled == enable)
+		return;
+
+	if (pcr->aspm_mode == ASPM_MODE_CFG) {
+		pcie_capability_clear_and_set_word(pcr->pci, PCI_EXP_LNKCTL,
+						PCI_EXP_LNKCTL_ASPMC,
+						enable ? pcr->aspm_en : 0);
+	} else if (pcr->aspm_mode == ASPM_MODE_REG) {
+		if (pcr->aspm_en & 0x02)
+			rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, FORCE_ASPM_CTL0 |
+				FORCE_ASPM_CTL1, enable ? 0 : FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1);
+		else
+			rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, FORCE_ASPM_CTL0 |
+				FORCE_ASPM_CTL1, FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1);
+	}
+
+	if (!enable && (pcr->aspm_en & 0x02))
+		mdelay(10);
+
+	pcr->aspm_enabled = enable;
+}
+
+static void rtsx_disable_aspm(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->set_aspm)
+		pcr->ops->set_aspm(pcr, false);
+	else
+		rtsx_comm_set_aspm(pcr, false);
+}
+
+int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val)
+{
+	rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, val);
+
+	return 0;
+}
+
+static void rtsx_set_l1off_sub_cfg_d0(struct rtsx_pcr *pcr, int active)
+{
+	if (pcr->ops->set_l1off_cfg_sub_d0)
+		pcr->ops->set_l1off_cfg_sub_d0(pcr, active);
+}
+
+static void rtsx_comm_pm_full_on(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	rtsx_disable_aspm(pcr);
+
+	/* Fixes DMA transfer timeout issue after disabling ASPM on RTS5260 */
+	msleep(1);
+
+	if (option->ltr_enabled)
+		rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+
+	if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN))
+		rtsx_set_l1off_sub_cfg_d0(pcr, 1);
+}
+
+static void rtsx_pm_full_on(struct rtsx_pcr *pcr)
+{
+	rtsx_comm_pm_full_on(pcr);
+}
+
+void rtsx_pci_start_run(struct rtsx_pcr *pcr)
+{
+	/* If pci device removed, don't queue idle work any more */
+	if (pcr->remove_pci)
+		return;
+
+	if (pcr->state != PDEV_STAT_RUN) {
+		pcr->state = PDEV_STAT_RUN;
+		if (pcr->ops->enable_auto_blink)
+			pcr->ops->enable_auto_blink(pcr);
+		rtsx_pm_full_on(pcr);
+	}
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_start_run);
+
+int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data)
+{
+	int i;
+	u32 val = HAIMR_WRITE_START;
+
+	val |= (u32)(addr & 0x3FFF) << 16;
+	val |= (u32)mask << 8;
+	val |= (u32)data;
+
+	rtsx_pci_writel(pcr, RTSX_HAIMR, val);
+
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		val = rtsx_pci_readl(pcr, RTSX_HAIMR);
+		if ((val & HAIMR_TRANS_END) == 0) {
+			if (data != (u8)val)
+				return -EIO;
+			return 0;
+		}
+	}
+
+	return -ETIMEDOUT;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_register);
+
+int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data)
+{
+	u32 val = HAIMR_READ_START;
+	int i;
+
+	val |= (u32)(addr & 0x3FFF) << 16;
+	rtsx_pci_writel(pcr, RTSX_HAIMR, val);
+
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		val = rtsx_pci_readl(pcr, RTSX_HAIMR);
+		if ((val & HAIMR_TRANS_END) == 0)
+			break;
+	}
+
+	if (i >= MAX_RW_REG_CNT)
+		return -ETIMEDOUT;
+
+	if (data)
+		*data = (u8)(val & 0xFF);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_register);
+
+int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val)
+{
+	int err, i, finished = 0;
+	u8 tmp;
+
+	rtsx_pci_write_register(pcr, PHYDATA0, 0xFF, (u8)val);
+	rtsx_pci_write_register(pcr, PHYDATA1, 0xFF, (u8)(val >> 8));
+	rtsx_pci_write_register(pcr, PHYADDR, 0xFF, addr);
+	rtsx_pci_write_register(pcr, PHYRWCTL, 0xFF, 0x81);
+
+	for (i = 0; i < 100000; i++) {
+		err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp);
+		if (err < 0)
+			return err;
+
+		if (!(tmp & 0x80)) {
+			finished = 1;
+			break;
+		}
+	}
+
+	if (!finished)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val)
+{
+	if (pcr->ops->write_phy)
+		return pcr->ops->write_phy(pcr, addr, val);
+
+	return __rtsx_pci_write_phy_register(pcr, addr, val);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_phy_register);
+
+int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val)
+{
+	int err, i, finished = 0;
+	u16 data;
+	u8 tmp, val1, val2;
+
+	rtsx_pci_write_register(pcr, PHYADDR, 0xFF, addr);
+	rtsx_pci_write_register(pcr, PHYRWCTL, 0xFF, 0x80);
+
+	for (i = 0; i < 100000; i++) {
+		err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp);
+		if (err < 0)
+			return err;
+
+		if (!(tmp & 0x80)) {
+			finished = 1;
+			break;
+		}
+	}
+
+	if (!finished)
+		return -ETIMEDOUT;
+
+	rtsx_pci_read_register(pcr, PHYDATA0, &val1);
+	rtsx_pci_read_register(pcr, PHYDATA1, &val2);
+	data = val1 | (val2 << 8);
+
+	if (val)
+		*val = data;
+
+	return 0;
+}
+
+int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val)
+{
+	if (pcr->ops->read_phy)
+		return pcr->ops->read_phy(pcr, addr, val);
+
+	return __rtsx_pci_read_phy_register(pcr, addr, val);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_phy_register);
+
+void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->stop_cmd)
+		return pcr->ops->stop_cmd(pcr);
+
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA);
+
+	rtsx_pci_write_register(pcr, DMACTL, 0x80, 0x80);
+	rtsx_pci_write_register(pcr, RBCTL, 0x80, 0x80);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_stop_cmd);
+
+void rtsx_pci_add_cmd(struct rtsx_pcr *pcr,
+		u8 cmd_type, u16 reg_addr, u8 mask, u8 data)
+{
+	unsigned long flags;
+	u32 val = 0;
+	u32 *ptr = (u32 *)(pcr->host_cmds_ptr);
+
+	val |= (u32)(cmd_type & 0x03) << 30;
+	val |= (u32)(reg_addr & 0x3FFF) << 16;
+	val |= (u32)mask << 8;
+	val |= (u32)data;
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	ptr += pcr->ci;
+	if (pcr->ci < (HOST_CMDS_BUF_LEN / 4)) {
+		put_unaligned_le32(val, ptr);
+		ptr++;
+		pcr->ci++;
+	}
+	spin_unlock_irqrestore(&pcr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_add_cmd);
+
+void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr)
+{
+	u32 val = 1 << 31;
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	val |= (u32)(pcr->ci * 4) & 0x00FFFFFF;
+	/* Hardware Auto Response */
+	val |= 0x40000000;
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, val);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd_no_wait);
+
+int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout)
+{
+	struct completion trans_done;
+	u32 val = 1 << 31;
+	long timeleft;
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	/* set up data structures for the wakeup system */
+	pcr->done = &trans_done;
+	pcr->trans_result = TRANS_NOT_READY;
+	init_completion(&trans_done);
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	val |= (u32)(pcr->ci * 4) & 0x00FFFFFF;
+	/* Hardware Auto Response */
+	val |= 0x40000000;
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, val);
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	/* Wait for TRANS_OK_INT */
+	timeleft = wait_for_completion_interruptible_timeout(
+			&trans_done, msecs_to_jiffies(timeout));
+	if (timeleft <= 0) {
+		pcr_dbg(pcr, "Timeout (%s %d)\n", __func__, __LINE__);
+		err = -ETIMEDOUT;
+		goto finish_send_cmd;
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	if (pcr->trans_result == TRANS_RESULT_FAIL)
+		err = -EINVAL;
+	else if (pcr->trans_result == TRANS_RESULT_OK)
+		err = 0;
+	else if (pcr->trans_result == TRANS_NO_DEVICE)
+		err = -ENODEV;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+finish_send_cmd:
+	spin_lock_irqsave(&pcr->lock, flags);
+	pcr->done = NULL;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if ((err < 0) && (err != -ENODEV))
+		rtsx_pci_stop_cmd(pcr);
+
+	if (pcr->finish_me)
+		complete(pcr->finish_me);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd);
+
+static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
+		dma_addr_t addr, unsigned int len, int end)
+{
+	u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi;
+	u64 val;
+	u8 option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA;
+
+	pcr_dbg(pcr, "DMA addr: 0x%x, Len: 0x%x\n", (unsigned int)addr, len);
+
+	if (end)
+		option |= RTSX_SG_END;
+
+	if ((PCI_PID(pcr) == PID_5261) || (PCI_PID(pcr) == PID_5228)) {
+		if (len > 0xFFFF)
+			val = ((u64)addr << 32) | (((u64)len & 0xFFFF) << 16)
+				| (((u64)len >> 16) << 6) | option;
+		else
+			val = ((u64)addr << 32) | ((u64)len << 16) | option;
+	} else {
+		val = ((u64)addr << 32) | ((u64)len << 12) | option;
+	}
+	put_unaligned_le64(val, ptr);
+	pcr->sgi++;
+}
+
+int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read, int timeout)
+{
+	int err = 0, count;
+
+	pcr_dbg(pcr, "--> %s: num_sg = %d\n", __func__, num_sg);
+	count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read);
+	if (count < 1)
+		return -EINVAL;
+	pcr_dbg(pcr, "DMA mapping count: %d\n", count);
+
+	err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout);
+
+	rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+
+int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	if (pcr->remove_pci)
+		return -EINVAL;
+
+	if ((sglist == NULL) || (num_sg <= 0))
+		return -EINVAL;
+
+	return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg);
+
+void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg);
+
+int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int count, bool read, int timeout)
+{
+	struct completion trans_done;
+	struct scatterlist *sg;
+	dma_addr_t addr;
+	long timeleft;
+	unsigned long flags;
+	unsigned int len;
+	int i, err = 0;
+	u32 val;
+	u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE;
+
+	if (pcr->remove_pci)
+		return -ENODEV;
+
+	if ((sglist == NULL) || (count < 1))
+		return -EINVAL;
+
+	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
+	pcr->sgi = 0;
+	for_each_sg(sglist, sg, count, i) {
+		addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+		rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1);
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	pcr->done = &trans_done;
+	pcr->trans_result = TRANS_NOT_READY;
+	init_completion(&trans_done);
+	rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, val);
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	timeleft = wait_for_completion_interruptible_timeout(
+			&trans_done, msecs_to_jiffies(timeout));
+	if (timeleft <= 0) {
+		pcr_dbg(pcr, "Timeout (%s %d)\n", __func__, __LINE__);
+		err = -ETIMEDOUT;
+		goto out;
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	if (pcr->trans_result == TRANS_RESULT_FAIL) {
+		err = -EILSEQ;
+		if (pcr->dma_error_count < RTS_MAX_TIMES_FREQ_REDUCTION)
+			pcr->dma_error_count++;
+	}
+
+	else if (pcr->trans_result == TRANS_NO_DEVICE)
+		err = -ENODEV;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+out:
+	spin_lock_irqsave(&pcr->lock, flags);
+	pcr->done = NULL;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if ((err < 0) && (err != -ENODEV))
+		rtsx_pci_stop_cmd(pcr);
+
+	if (pcr->finish_me)
+		complete(pcr->finish_me);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer);
+
+int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
+{
+	int err;
+	int i, j;
+	u16 reg;
+	u8 *ptr;
+
+	if (buf_len > 512)
+		buf_len = 512;
+
+	ptr = buf;
+	reg = PPBUF_BASE2;
+	for (i = 0; i < buf_len / 256; i++) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < 256; j++)
+			rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0);
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+
+		memcpy(ptr, rtsx_pci_get_cmd_data(pcr), 256);
+		ptr += 256;
+	}
+
+	if (buf_len % 256) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < buf_len % 256; j++)
+			rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0);
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	memcpy(ptr, rtsx_pci_get_cmd_data(pcr), buf_len % 256);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_ppbuf);
+
+int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
+{
+	int err;
+	int i, j;
+	u16 reg;
+	u8 *ptr;
+
+	if (buf_len > 512)
+		buf_len = 512;
+
+	ptr = buf;
+	reg = PPBUF_BASE2;
+	for (i = 0; i < buf_len / 256; i++) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < 256; j++) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+					reg++, 0xFF, *ptr);
+			ptr++;
+		}
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	if (buf_len % 256) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < buf_len % 256; j++) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+					reg++, 0xFF, *ptr);
+			ptr++;
+		}
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_ppbuf);
+
+static int rtsx_pci_set_pull_ctl(struct rtsx_pcr *pcr, const u32 *tbl)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	while (*tbl & 0xFFFF0000) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+				(u16)(*tbl >> 16), 0xFF, (u8)(*tbl));
+		tbl++;
+	}
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card)
+{
+	const u32 *tbl;
+
+	if (card == RTSX_SD_CARD)
+		tbl = pcr->sd_pull_ctl_enable_tbl;
+	else if (card == RTSX_MS_CARD)
+		tbl = pcr->ms_pull_ctl_enable_tbl;
+	else
+		return -EINVAL;
+
+	return rtsx_pci_set_pull_ctl(pcr, tbl);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_enable);
+
+int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card)
+{
+	const u32 *tbl;
+
+	if (card == RTSX_SD_CARD)
+		tbl = pcr->sd_pull_ctl_disable_tbl;
+	else if (card == RTSX_MS_CARD)
+		tbl = pcr->ms_pull_ctl_disable_tbl;
+	else
+		return -EINVAL;
+
+	return rtsx_pci_set_pull_ctl(pcr, tbl);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable);
+
+static void rtsx_pci_enable_bus_int(struct rtsx_pcr *pcr)
+{
+	struct rtsx_hw_param *hw_param = &pcr->hw_param;
+
+	pcr->bier = TRANS_OK_INT_EN | TRANS_FAIL_INT_EN | SD_INT_EN
+		| hw_param->interrupt_en;
+
+	if (pcr->num_slots > 1)
+		pcr->bier |= MS_INT_EN;
+
+	/* Enable Bus Interrupt */
+	rtsx_pci_writel(pcr, RTSX_BIER, pcr->bier);
+
+	pcr_dbg(pcr, "RTSX_BIER: 0x%08x\n", pcr->bier);
+}
+
+static inline u8 double_ssc_depth(u8 depth)
+{
+	return ((depth > 1) ? (depth - 1) : depth);
+}
+
+static u8 revise_ssc_depth(u8 ssc_depth, u8 div)
+{
+	if (div > CLK_DIV_1) {
+		if (ssc_depth > (div - 1))
+			ssc_depth -= (div - 1);
+		else
+			ssc_depth = SSC_DEPTH_4M;
+	}
+
+	return ssc_depth;
+}
+
+int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk)
+{
+	int err, clk;
+	u8 n, clk_divider, mcu_cnt, div;
+	static const u8 depth[] = {
+		[RTSX_SSC_DEPTH_4M] = SSC_DEPTH_4M,
+		[RTSX_SSC_DEPTH_2M] = SSC_DEPTH_2M,
+		[RTSX_SSC_DEPTH_1M] = SSC_DEPTH_1M,
+		[RTSX_SSC_DEPTH_500K] = SSC_DEPTH_500K,
+		[RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K,
+	};
+
+	if (PCI_PID(pcr) == PID_5261)
+		return rts5261_pci_switch_clock(pcr, card_clock,
+				ssc_depth, initial_mode, double_clk, vpclk);
+	if (PCI_PID(pcr) == PID_5228)
+		return rts5228_pci_switch_clock(pcr, card_clock,
+				ssc_depth, initial_mode, double_clk, vpclk);
+
+	if (initial_mode) {
+		/* We use 250k(around) here, in initial stage */
+		clk_divider = SD_CLK_DIVIDE_128;
+		card_clock = 30000000;
+	} else {
+		clk_divider = SD_CLK_DIVIDE_0;
+	}
+	err = rtsx_pci_write_register(pcr, SD_CFG1,
+			SD_CLK_DIVIDE_MASK, clk_divider);
+	if (err < 0)
+		return err;
+
+	/* Reduce card clock by 20MHz each time a DMA transfer error occurs */
+	if (card_clock == UHS_SDR104_MAX_DTR &&
+	    pcr->dma_error_count &&
+	    PCI_PID(pcr) == RTS5227_DEVICE_ID)
+		card_clock = UHS_SDR104_MAX_DTR -
+			(pcr->dma_error_count * 20000000);
+
+	card_clock /= 1000000;
+	pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock);
+
+	clk = card_clock;
+	if (!initial_mode && double_clk)
+		clk = card_clock * 2;
+	pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n",
+		clk, pcr->cur_clock);
+
+	if (clk == pcr->cur_clock)
+		return 0;
+
+	if (pcr->ops->conv_clk_and_div_n)
+		n = (u8)pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N);
+	else
+		n = (u8)(clk - 2);
+	if ((clk <= 2) || (n > MAX_DIV_N_PCR))
+		return -EINVAL;
+
+	mcu_cnt = (u8)(125/clk + 3);
+	if (mcu_cnt > 15)
+		mcu_cnt = 15;
+
+	/* Make sure that the SSC clock div_n is not less than MIN_DIV_N_PCR */
+	div = CLK_DIV_1;
+	while ((n < MIN_DIV_N_PCR) && (div < CLK_DIV_8)) {
+		if (pcr->ops->conv_clk_and_div_n) {
+			int dbl_clk = pcr->ops->conv_clk_and_div_n(n,
+					DIV_N_TO_CLK) * 2;
+			n = (u8)pcr->ops->conv_clk_and_div_n(dbl_clk,
+					CLK_TO_DIV_N);
+		} else {
+			n = (n + 2) * 2 - 2;
+		}
+		div++;
+	}
+	pcr_dbg(pcr, "n = %d, div = %d\n", n, div);
+
+	ssc_depth = depth[ssc_depth];
+	if (double_clk)
+		ssc_depth = double_ssc_depth(ssc_depth);
+
+	ssc_depth = revise_ssc_depth(ssc_depth, div);
+	pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL,
+			CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV,
+			0xFF, (div << 4) | mcu_cnt);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2,
+			SSC_DEPTH_MASK, ssc_depth);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB);
+	if (vpclk) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 2000);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC clock stable */
+	udelay(SSC_CLOCK_STABLE_WAIT);
+	err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+	if (err < 0)
+		return err;
+
+	pcr->cur_clock = clk;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_switch_clock);
+
+int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->ops->card_power_on)
+		return pcr->ops->card_power_on(pcr, card);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_power_on);
+
+int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->ops->card_power_off)
+		return pcr->ops->card_power_off(pcr, card);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off);
+
+int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card)
+{
+	static const unsigned int cd_mask[] = {
+		[RTSX_SD_CARD] = SD_EXIST,
+		[RTSX_MS_CARD] = MS_EXIST
+	};
+
+	if (!(pcr->flags & PCR_MS_PMOS)) {
+		/* When using single PMOS, accessing card is not permitted
+		 * if the existing card is not the designated one.
+		 */
+		if (pcr->card_exist & (~cd_mask[card]))
+			return -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_exclusive_check);
+
+int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	if (pcr->ops->switch_output_voltage)
+		return pcr->ops->switch_output_voltage(pcr, voltage);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_switch_output_voltage);
+
+unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr)
+{
+	unsigned int val;
+
+	val = rtsx_pci_readl(pcr, RTSX_BIPR);
+	if (pcr->ops->cd_deglitch)
+		val = pcr->ops->cd_deglitch(pcr);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_exist);
+
+void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr)
+{
+	struct completion finish;
+
+	pcr->finish_me = &finish;
+	init_completion(&finish);
+
+	if (pcr->done)
+		complete(pcr->done);
+
+	if (!pcr->remove_pci)
+		rtsx_pci_stop_cmd(pcr);
+
+	wait_for_completion_interruptible_timeout(&finish,
+			msecs_to_jiffies(2));
+	pcr->finish_me = NULL;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_complete_unfinished_transfer);
+
+static void rtsx_pci_card_detect(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct rtsx_pcr *pcr;
+	unsigned long flags;
+	unsigned int card_detect = 0, card_inserted, card_removed;
+	u32 irq_status;
+
+	dwork = to_delayed_work(work);
+	pcr = container_of(dwork, struct rtsx_pcr, carddet_work);
+
+	pcr_dbg(pcr, "--> %s\n", __func__);
+
+	mutex_lock(&pcr->pcr_mutex);
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	irq_status = rtsx_pci_readl(pcr, RTSX_BIPR);
+	pcr_dbg(pcr, "irq_status: 0x%08x\n", irq_status);
+
+	irq_status &= CARD_EXIST;
+	card_inserted = pcr->card_inserted & irq_status;
+	card_removed = pcr->card_removed;
+	pcr->card_inserted = 0;
+	pcr->card_removed = 0;
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if (card_inserted || card_removed) {
+		pcr_dbg(pcr, "card_inserted: 0x%x, card_removed: 0x%x\n",
+			card_inserted, card_removed);
+
+		if (pcr->ops->cd_deglitch)
+			card_inserted = pcr->ops->cd_deglitch(pcr);
+
+		card_detect = card_inserted | card_removed;
+
+		pcr->card_exist |= card_inserted;
+		pcr->card_exist &= ~card_removed;
+	}
+
+	mutex_unlock(&pcr->pcr_mutex);
+
+	if ((card_detect & SD_EXIST) && pcr->slots[RTSX_SD_CARD].card_event)
+		pcr->slots[RTSX_SD_CARD].card_event(
+				pcr->slots[RTSX_SD_CARD].p_dev);
+	if ((card_detect & MS_EXIST) && pcr->slots[RTSX_MS_CARD].card_event)
+		pcr->slots[RTSX_MS_CARD].card_event(
+				pcr->slots[RTSX_MS_CARD].p_dev);
+}
+
+static void rtsx_pci_process_ocp(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->process_ocp) {
+		pcr->ops->process_ocp(pcr);
+	} else {
+		if (!pcr->option.ocp_en)
+			return;
+		rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat);
+		if (pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER)) {
+			rtsx_pci_card_power_off(pcr, RTSX_SD_CARD);
+			rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0);
+			rtsx_pci_clear_ocpstat(pcr);
+			pcr->ocp_stat = 0;
+		}
+	}
+}
+
+static int rtsx_pci_process_ocp_interrupt(struct rtsx_pcr *pcr)
+{
+	if (pcr->option.ocp_en)
+		rtsx_pci_process_ocp(pcr);
+
+	return 0;
+}
+
+static irqreturn_t rtsx_pci_isr(int irq, void *dev_id)
+{
+	struct rtsx_pcr *pcr = dev_id;
+	u32 int_reg;
+
+	if (!pcr)
+		return IRQ_NONE;
+
+	spin_lock(&pcr->lock);
+
+	int_reg = rtsx_pci_readl(pcr, RTSX_BIPR);
+	/* Clear interrupt flag */
+	rtsx_pci_writel(pcr, RTSX_BIPR, int_reg);
+	if ((int_reg & pcr->bier) == 0) {
+		spin_unlock(&pcr->lock);
+		return IRQ_NONE;
+	}
+	if (int_reg == 0xFFFFFFFF) {
+		spin_unlock(&pcr->lock);
+		return IRQ_HANDLED;
+	}
+
+	int_reg &= (pcr->bier | 0x7FFFFF);
+
+	if (int_reg & SD_OC_INT)
+		rtsx_pci_process_ocp_interrupt(pcr);
+
+	if (int_reg & SD_INT) {
+		if (int_reg & SD_EXIST) {
+			pcr->card_inserted |= SD_EXIST;
+		} else {
+			pcr->card_removed |= SD_EXIST;
+			pcr->card_inserted &= ~SD_EXIST;
+			if (PCI_PID(pcr) == PID_5261) {
+				rtsx_pci_write_register(pcr, RTS5261_FW_STATUS,
+					RTS5261_EXPRESS_LINK_FAIL_MASK, 0);
+				pcr->extra_caps |= EXTRA_CAPS_SD_EXPRESS;
+			}
+		}
+		pcr->dma_error_count = 0;
+	}
+
+	if (int_reg & MS_INT) {
+		if (int_reg & MS_EXIST) {
+			pcr->card_inserted |= MS_EXIST;
+		} else {
+			pcr->card_removed |= MS_EXIST;
+			pcr->card_inserted &= ~MS_EXIST;
+		}
+	}
+
+	if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) {
+		if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) {
+			pcr->trans_result = TRANS_RESULT_FAIL;
+			if (pcr->done)
+				complete(pcr->done);
+		} else if (int_reg & TRANS_OK_INT) {
+			pcr->trans_result = TRANS_RESULT_OK;
+			if (pcr->done)
+				complete(pcr->done);
+		}
+	}
+
+	if ((pcr->card_inserted || pcr->card_removed) && !(int_reg & SD_OC_INT))
+		schedule_delayed_work(&pcr->carddet_work,
+				msecs_to_jiffies(200));
+
+	spin_unlock(&pcr->lock);
+	return IRQ_HANDLED;
+}
+
+static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr)
+{
+	pcr_dbg(pcr, "%s: pcr->msi_en = %d, pci->irq = %d\n",
+			__func__, pcr->msi_en, pcr->pci->irq);
+
+	if (request_irq(pcr->pci->irq, rtsx_pci_isr,
+			pcr->msi_en ? 0 : IRQF_SHARED,
+			DRV_NAME_RTSX_PCI, pcr)) {
+		dev_err(&(pcr->pci->dev),
+			"rtsx_sdmmc: unable to grab IRQ %d, disabling device\n",
+			pcr->pci->irq);
+		return -1;
+	}
+
+	pcr->irq = pcr->pci->irq;
+	pci_intx(pcr->pci, !pcr->msi_en);
+
+	return 0;
+}
+
+static void rtsx_base_force_power_down(struct rtsx_pcr *pcr)
+{
+	/* Set relink_time to 0 */
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
+			RELINK_TIME_MASK, 0);
+
+	rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3,
+			D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
+
+	rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN);
+}
+
+static void __maybe_unused rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state, bool runtime)
+{
+	if (pcr->ops->turn_off_led)
+		pcr->ops->turn_off_led(pcr);
+
+	rtsx_pci_writel(pcr, RTSX_BIER, 0);
+	pcr->bier = 0;
+
+	rtsx_pci_write_register(pcr, PETXCFG, 0x08, 0x08);
+	rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, pm_state);
+
+	if (pcr->ops->force_power_down)
+		pcr->ops->force_power_down(pcr, pm_state, runtime);
+	else
+		rtsx_base_force_power_down(pcr);
+}
+
+void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 val = SD_OCP_INT_EN | SD_DETECT_EN;
+
+	if (pcr->ops->enable_ocp) {
+		pcr->ops->enable_ocp(pcr);
+	} else {
+		rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, 0);
+		rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val);
+	}
+
+}
+
+void rtsx_pci_disable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 mask = SD_OCP_INT_EN | SD_DETECT_EN;
+
+	if (pcr->ops->disable_ocp) {
+		pcr->ops->disable_ocp(pcr);
+	} else {
+		rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+		rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN,
+				OC_POWER_DOWN);
+	}
+}
+
+void rtsx_pci_init_ocp(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->init_ocp) {
+		pcr->ops->init_ocp(pcr);
+	} else {
+		struct rtsx_cr_option *option = &(pcr->option);
+
+		if (option->ocp_en) {
+			u8 val = option->sd_800mA_ocp_thd;
+
+			rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, 0);
+			rtsx_pci_write_register(pcr, REG_OCPPARA1,
+				SD_OCP_TIME_MASK, SD_OCP_TIME_800);
+			rtsx_pci_write_register(pcr, REG_OCPPARA2,
+				SD_OCP_THD_MASK, val);
+			rtsx_pci_write_register(pcr, REG_OCPGLITCH,
+				SD_OCP_GLITCH_MASK, pcr->hw_param.ocp_glitch);
+			rtsx_pci_enable_ocp(pcr);
+		}
+	}
+}
+
+int rtsx_pci_get_ocpstat(struct rtsx_pcr *pcr, u8 *val)
+{
+	if (pcr->ops->get_ocpstat)
+		return pcr->ops->get_ocpstat(pcr, val);
+	else
+		return rtsx_pci_read_register(pcr, REG_OCPSTAT, val);
+}
+
+void rtsx_pci_clear_ocpstat(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->clear_ocpstat) {
+		pcr->ops->clear_ocpstat(pcr);
+	} else {
+		u8 mask = SD_OCP_INT_CLR | SD_OC_CLR;
+		u8 val = SD_OCP_INT_CLR | SD_OC_CLR;
+
+		rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val);
+		udelay(100);
+		rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+	}
+}
+
+void rtsx_pci_enable_oobs_polling(struct rtsx_pcr *pcr)
+{
+	u16 val;
+
+	if ((PCI_PID(pcr) != PID_525A) && (PCI_PID(pcr) != PID_5260)) {
+		rtsx_pci_read_phy_register(pcr, 0x01, &val);
+		val |= 1<<9;
+		rtsx_pci_write_phy_register(pcr, 0x01, val);
+	}
+	rtsx_pci_write_register(pcr, REG_CFG_OOBS_OFF_TIMER, 0xFF, 0x32);
+	rtsx_pci_write_register(pcr, REG_CFG_OOBS_ON_TIMER, 0xFF, 0x05);
+	rtsx_pci_write_register(pcr, REG_CFG_VCM_ON_TIMER, 0xFF, 0x83);
+	rtsx_pci_write_register(pcr, REG_CFG_OOBS_POLLING, 0xFF, 0xDE);
+
+}
+
+void rtsx_pci_disable_oobs_polling(struct rtsx_pcr *pcr)
+{
+	u16 val;
+
+	if ((PCI_PID(pcr) != PID_525A) && (PCI_PID(pcr) != PID_5260)) {
+		rtsx_pci_read_phy_register(pcr, 0x01, &val);
+		val &= ~(1<<9);
+		rtsx_pci_write_phy_register(pcr, 0x01, val);
+	}
+	rtsx_pci_write_register(pcr, REG_CFG_VCM_ON_TIMER, 0xFF, 0x03);
+	rtsx_pci_write_register(pcr, REG_CFG_OOBS_POLLING, 0xFF, 0x00);
+
+}
+
+int rtsx_sd_power_off_card3v3(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_write_register(pcr, CARD_CLK_EN, SD_CLK_EN |
+		MS_CLK_EN | SD40_CLK_EN, 0);
+	rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0);
+	rtsx_pci_card_power_off(pcr, RTSX_SD_CARD);
+
+	msleep(50);
+
+	rtsx_pci_card_pull_ctl_disable(pcr, RTSX_SD_CARD);
+
+	return 0;
+}
+
+int rtsx_ms_power_off_card3v3(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_write_register(pcr, CARD_CLK_EN, SD_CLK_EN |
+		MS_CLK_EN | SD40_CLK_EN, 0);
+
+	rtsx_pci_card_pull_ctl_disable(pcr, RTSX_MS_CARD);
+
+	rtsx_pci_write_register(pcr, CARD_OE, MS_OUTPUT_EN, 0);
+	rtsx_pci_card_power_off(pcr, RTSX_MS_CARD);
+
+	return 0;
+}
+
+static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
+{
+	struct pci_dev *pdev = pcr->pci;
+	int err;
+
+	if (PCI_PID(pcr) == PID_5228)
+		rtsx_pci_write_register(pcr, RTS5228_LDO1_CFG1, RTS5228_LDO1_SR_TIME_MASK,
+				RTS5228_LDO1_SR_0_5);
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	rtsx_pci_enable_bus_int(pcr);
+
+	/* Power on SSC */
+	if (PCI_PID(pcr) == PID_5261) {
+		/* Gating real mcu clock */
+		err = rtsx_pci_write_register(pcr, RTS5261_FW_CFG1,
+			RTS5261_MCU_CLOCK_GATING, 0);
+		err = rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL,
+			SSC_POWER_DOWN, 0);
+	} else {
+		err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0);
+	}
+	if (err < 0)
+		return err;
+
+	/* Wait SSC power stable */
+	udelay(200);
+
+	rtsx_disable_aspm(pcr);
+	if (pcr->ops->optimize_phy) {
+		err = pcr->ops->optimize_phy(pcr);
+		if (err < 0)
+			return err;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+
+	/* Set mcu_cnt to 7 to ensure data can be sampled properly */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, 0x07, 0x07);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, HOST_SLEEP_STATE, 0x03, 0x00);
+	/* Disable card clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_CLK_EN, 0x1E, 0);
+	/* Reset delink mode */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x0A, 0);
+	/* Card driving select */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_DRIVE_SEL,
+			0xFF, pcr->card_drive_sel);
+	/* Enable SSC Clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1,
+			0xFF, SSC_8X_EN | SSC_SEL_4M);
+	if (PCI_PID(pcr) == PID_5261)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF,
+			RTS5261_SSC_DEPTH_2M);
+	else if (PCI_PID(pcr) == PID_5228)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF,
+			RTS5228_SSC_DEPTH_2M);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12);
+
+	/* Disable cd_pwr_save */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10);
+	/* Clear Link Ready Interrupt */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0,
+			LINK_RDY_INT, LINK_RDY_INT);
+	/* Enlarge the estimation window of PERST# glitch
+	 * to reduce the chance of invalid card interrupt
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PERST_GLITCH_WIDTH, 0xFF, 0x80);
+	/* Update RC oscillator to 400k
+	 * bit[0] F_HIGH: for RC oscillator, Rst_value is 1'b1
+	 *                1: 2M  0: 400k
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RCCTL, 0x01, 0x00);
+	/* Set interrupt write clear
+	 * bit 1: U_elbi_if_rd_clr_en
+	 *	1: Enable ELBI interrupt[31:22] & [7:0] flag read clear
+	 *	0: ELBI interrupt flag[31:22] & [7:0] only can be write clear
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, NFTS_TX_CTRL, 0x02, 0);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	switch (PCI_PID(pcr)) {
+	case PID_5250:
+	case PID_524A:
+	case PID_525A:
+	case PID_5260:
+	case PID_5261:
+	case PID_5228:
+		rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 1, 1);
+		break;
+	default:
+		break;
+	}
+
+	/*init ocp*/
+	rtsx_pci_init_ocp(pcr);
+
+	/* Enable clk_request_n to enable clock power management */
+	pcie_capability_clear_and_set_word(pcr->pci, PCI_EXP_LNKCTL,
+					0, PCI_EXP_LNKCTL_CLKREQ_EN);
+	/* Enter L1 when host tx idle */
+	pci_write_config_byte(pdev, 0x70F, 0x5B);
+
+	if (pcr->ops->extra_init_hw) {
+		err = pcr->ops->extra_init_hw(pcr);
+		if (err < 0)
+			return err;
+	}
+
+	if (pcr->aspm_mode == ASPM_MODE_REG)
+		rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0x30, 0x30);
+
+	/* No CD interrupt if probing driver with card inserted.
+	 * So we need to initialize pcr->card_exist here.
+	 */
+	if (pcr->ops->cd_deglitch)
+		pcr->card_exist = pcr->ops->cd_deglitch(pcr);
+	else
+		pcr->card_exist = rtsx_pci_readl(pcr, RTSX_BIPR) & CARD_EXIST;
+
+	return 0;
+}
+
+static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &(pcr->option);
+	int err, l1ss;
+	u32 lval;
+	u16 cfg_val;
+	u8 val;
+
+	spin_lock_init(&pcr->lock);
+	mutex_init(&pcr->pcr_mutex);
+
+	switch (PCI_PID(pcr)) {
+	default:
+	case 0x5209:
+		rts5209_init_params(pcr);
+		break;
+
+	case 0x5229:
+		rts5229_init_params(pcr);
+		break;
+
+	case 0x5289:
+		rtl8411_init_params(pcr);
+		break;
+
+	case 0x5227:
+		rts5227_init_params(pcr);
+		break;
+
+	case 0x522A:
+		rts522a_init_params(pcr);
+		break;
+
+	case 0x5249:
+		rts5249_init_params(pcr);
+		break;
+
+	case 0x524A:
+		rts524a_init_params(pcr);
+		break;
+
+	case 0x525A:
+		rts525a_init_params(pcr);
+		break;
+
+	case 0x5287:
+		rtl8411b_init_params(pcr);
+		break;
+
+	case 0x5286:
+		rtl8402_init_params(pcr);
+		break;
+
+	case 0x5260:
+		rts5260_init_params(pcr);
+		break;
+
+	case 0x5261:
+		rts5261_init_params(pcr);
+		break;
+
+	case 0x5228:
+		rts5228_init_params(pcr);
+		break;
+	}
+
+	pcr_dbg(pcr, "PID: 0x%04x, IC version: 0x%02x\n",
+			PCI_PID(pcr), pcr->ic_version);
+
+	pcr->slots = kcalloc(pcr->num_slots, sizeof(struct rtsx_slot),
+			GFP_KERNEL);
+	if (!pcr->slots)
+		return -ENOMEM;
+
+	if (pcr->aspm_mode == ASPM_MODE_CFG) {
+		pcie_capability_read_word(pcr->pci, PCI_EXP_LNKCTL, &cfg_val);
+		if (cfg_val & PCI_EXP_LNKCTL_ASPM_L1)
+			pcr->aspm_enabled = true;
+		else
+			pcr->aspm_enabled = false;
+
+	} else if (pcr->aspm_mode == ASPM_MODE_REG) {
+		rtsx_pci_read_register(pcr, ASPM_FORCE_CTL, &val);
+		if (val & FORCE_ASPM_CTL0 && val & FORCE_ASPM_CTL1)
+			pcr->aspm_enabled = false;
+		else
+			pcr->aspm_enabled = true;
+	}
+
+	l1ss = pci_find_ext_capability(pcr->pci, PCI_EXT_CAP_ID_L1SS);
+	if (l1ss) {
+		pci_read_config_dword(pcr->pci, l1ss + PCI_L1SS_CTL1, &lval);
+
+		if (lval & PCI_L1SS_CTL1_ASPM_L1_1)
+			rtsx_set_dev_flag(pcr, ASPM_L1_1_EN);
+		else
+			rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN);
+
+		if (lval & PCI_L1SS_CTL1_ASPM_L1_2)
+			rtsx_set_dev_flag(pcr, ASPM_L1_2_EN);
+		else
+			rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN);
+
+		if (lval & PCI_L1SS_CTL1_PCIPM_L1_1)
+			rtsx_set_dev_flag(pcr, PM_L1_1_EN);
+		else
+			rtsx_clear_dev_flag(pcr, PM_L1_1_EN);
+
+		if (lval & PCI_L1SS_CTL1_PCIPM_L1_2)
+			rtsx_set_dev_flag(pcr, PM_L1_2_EN);
+		else
+			rtsx_clear_dev_flag(pcr, PM_L1_2_EN);
+
+		pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &cfg_val);
+		if (cfg_val & PCI_EXP_DEVCTL2_LTR_EN) {
+			option->ltr_enabled = true;
+			option->ltr_active = true;
+		} else {
+			option->ltr_enabled = false;
+		}
+
+		if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+				| PM_L1_1_EN | PM_L1_2_EN))
+			option->force_clkreq_0 = false;
+		else
+			option->force_clkreq_0 = true;
+	} else {
+		option->ltr_enabled = false;
+		option->force_clkreq_0 = true;
+	}
+
+	if (pcr->ops->fetch_vendor_settings)
+		pcr->ops->fetch_vendor_settings(pcr);
+
+	pcr_dbg(pcr, "pcr->aspm_en = 0x%x\n", pcr->aspm_en);
+	pcr_dbg(pcr, "pcr->sd30_drive_sel_1v8 = 0x%x\n",
+			pcr->sd30_drive_sel_1v8);
+	pcr_dbg(pcr, "pcr->sd30_drive_sel_3v3 = 0x%x\n",
+			pcr->sd30_drive_sel_3v3);
+	pcr_dbg(pcr, "pcr->card_drive_sel = 0x%x\n",
+			pcr->card_drive_sel);
+	pcr_dbg(pcr, "pcr->flags = 0x%x\n", pcr->flags);
+
+	pcr->state = PDEV_STAT_IDLE;
+	err = rtsx_pci_init_hw(pcr);
+	if (err < 0) {
+		kfree(pcr->slots);
+		return err;
+	}
+
+	return 0;
+}
+
+static int rtsx_pci_probe(struct pci_dev *pcidev,
+			  const struct pci_device_id *id)
+{
+	struct rtsx_pcr *pcr;
+	struct pcr_handle *handle;
+	u32 base, len;
+	int ret, i, bar = 0;
+
+	dev_dbg(&(pcidev->dev),
+		": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n",
+		pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device,
+		(int)pcidev->revision);
+
+	ret = dma_set_mask(&pcidev->dev, DMA_BIT_MASK(32));
+	if (ret < 0)
+		return ret;
+
+	ret = pci_enable_device(pcidev);
+	if (ret)
+		return ret;
+
+	ret = pci_request_regions(pcidev, DRV_NAME_RTSX_PCI);
+	if (ret)
+		goto disable;
+
+	pcr = kzalloc(sizeof(*pcr), GFP_KERNEL);
+	if (!pcr) {
+		ret = -ENOMEM;
+		goto release_pci;
+	}
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle) {
+		ret = -ENOMEM;
+		goto free_pcr;
+	}
+	handle->pcr = pcr;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&rtsx_pci_lock);
+	ret = idr_alloc(&rtsx_pci_idr, pcr, 0, 0, GFP_NOWAIT);
+	if (ret >= 0)
+		pcr->id = ret;
+	spin_unlock(&rtsx_pci_lock);
+	idr_preload_end();
+	if (ret < 0)
+		goto free_handle;
+
+	pcr->pci = pcidev;
+	dev_set_drvdata(&pcidev->dev, handle);
+
+	if (CHK_PCI_PID(pcr, 0x525A))
+		bar = 1;
+	len = pci_resource_len(pcidev, bar);
+	base = pci_resource_start(pcidev, bar);
+	pcr->remap_addr = ioremap(base, len);
+	if (!pcr->remap_addr) {
+		ret = -ENOMEM;
+		goto free_idr;
+	}
+
+	pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev),
+			RTSX_RESV_BUF_LEN, &(pcr->rtsx_resv_buf_addr),
+			GFP_KERNEL);
+	if (pcr->rtsx_resv_buf == NULL) {
+		ret = -ENXIO;
+		goto unmap;
+	}
+	pcr->host_cmds_ptr = pcr->rtsx_resv_buf;
+	pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr;
+	pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN;
+	pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN;
+	pcr->card_inserted = 0;
+	pcr->card_removed = 0;
+	INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect);
+
+	pcr->msi_en = msi_en;
+	if (pcr->msi_en) {
+		ret = pci_enable_msi(pcidev);
+		if (ret)
+			pcr->msi_en = false;
+	}
+
+	ret = rtsx_pci_acquire_irq(pcr);
+	if (ret < 0)
+		goto disable_msi;
+
+	pci_set_master(pcidev);
+	synchronize_irq(pcr->irq);
+
+	ret = rtsx_pci_init_chip(pcr);
+	if (ret < 0)
+		goto disable_irq;
+
+	for (i = 0; i < ARRAY_SIZE(rtsx_pcr_cells); i++) {
+		rtsx_pcr_cells[i].platform_data = handle;
+		rtsx_pcr_cells[i].pdata_size = sizeof(*handle);
+	}
+
+
+	ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells,
+			ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL);
+	if (ret < 0)
+		goto free_slots;
+
+	pm_runtime_allow(&pcidev->dev);
+	pm_runtime_put(&pcidev->dev);
+
+	return 0;
+
+free_slots:
+	kfree(pcr->slots);
+disable_irq:
+	free_irq(pcr->irq, (void *)pcr);
+disable_msi:
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
+	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
+			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+unmap:
+	iounmap(pcr->remap_addr);
+free_idr:
+	spin_lock(&rtsx_pci_lock);
+	idr_remove(&rtsx_pci_idr, pcr->id);
+	spin_unlock(&rtsx_pci_lock);
+free_handle:
+	kfree(handle);
+free_pcr:
+	kfree(pcr);
+release_pci:
+	pci_release_regions(pcidev);
+disable:
+	pci_disable_device(pcidev);
+
+	return ret;
+}
+
+static void rtsx_pci_remove(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+
+	pcr->remove_pci = true;
+
+	pm_runtime_get_sync(&pcidev->dev);
+	pm_runtime_forbid(&pcidev->dev);
+
+	/* Disable interrupts at the pcr level */
+	spin_lock_irq(&pcr->lock);
+	rtsx_pci_writel(pcr, RTSX_BIER, 0);
+	pcr->bier = 0;
+	spin_unlock_irq(&pcr->lock);
+
+	cancel_delayed_work_sync(&pcr->carddet_work);
+
+	mfd_remove_devices(&pcidev->dev);
+
+	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
+			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+	free_irq(pcr->irq, (void *)pcr);
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
+	iounmap(pcr->remap_addr);
+
+	pci_release_regions(pcidev);
+	pci_disable_device(pcidev);
+
+	spin_lock(&rtsx_pci_lock);
+	idr_remove(&rtsx_pci_idr, pcr->id);
+	spin_unlock(&rtsx_pci_lock);
+
+	kfree(pcr->slots);
+	kfree(pcr);
+	kfree(handle);
+
+	dev_dbg(&(pcidev->dev),
+		": Realtek PCI-E Card Reader at %s [%04x:%04x] has been removed\n",
+		pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device);
+}
+
+static int __maybe_unused rtsx_pci_suspend(struct device *dev_d)
+{
+	struct pci_dev *pcidev = to_pci_dev(dev_d);
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	cancel_delayed_work_sync(&pcr->carddet_work);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	rtsx_pci_power_off(pcr, HOST_ENTER_S3, false);
+
+	mutex_unlock(&pcr->pcr_mutex);
+	return 0;
+}
+
+static int __maybe_unused rtsx_pci_resume(struct device *dev_d)
+{
+	struct pci_dev *pcidev = to_pci_dev(dev_d);
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+	int ret = 0;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
+	if (ret)
+		goto out;
+
+	ret = rtsx_pci_init_hw(pcr);
+	if (ret)
+		goto out;
+
+out:
+	mutex_unlock(&pcr->pcr_mutex);
+	return ret;
+}
+
+#ifdef CONFIG_PM
+
+static void rtsx_enable_aspm(struct rtsx_pcr *pcr)
+{
+	if (pcr->ops->set_aspm)
+		pcr->ops->set_aspm(pcr, true);
+	else
+		rtsx_comm_set_aspm(pcr, true);
+}
+
+static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ltr_enabled) {
+		u32 latency = option->ltr_l1off_latency;
+
+		if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN))
+			mdelay(option->l1_snooze_delay);
+
+		rtsx_set_ltr_latency(pcr, latency);
+	}
+
+	if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN))
+		rtsx_set_l1off_sub_cfg_d0(pcr, 0);
+
+	rtsx_enable_aspm(pcr);
+}
+
+static void rtsx_pm_power_saving(struct rtsx_pcr *pcr)
+{
+	rtsx_comm_pm_power_saving(pcr);
+}
+
+static void rtsx_pci_shutdown(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	rtsx_pci_power_off(pcr, HOST_ENTER_S1, false);
+
+	pci_disable_device(pcidev);
+	free_irq(pcr->irq, (void *)pcr);
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
+}
+
+static int rtsx_pci_runtime_idle(struct device *device)
+{
+	struct pci_dev *pcidev = to_pci_dev(device);
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+
+	dev_dbg(device, "--> %s\n", __func__);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	pcr->state = PDEV_STAT_IDLE;
+
+	if (pcr->ops->disable_auto_blink)
+		pcr->ops->disable_auto_blink(pcr);
+	if (pcr->ops->turn_off_led)
+		pcr->ops->turn_off_led(pcr);
+
+	rtsx_pm_power_saving(pcr);
+
+	mutex_unlock(&pcr->pcr_mutex);
+
+	if (pcr->rtd3_en)
+		pm_schedule_suspend(device, 10000);
+
+	return -EBUSY;
+}
+
+static int rtsx_pci_runtime_suspend(struct device *device)
+{
+	struct pci_dev *pcidev = to_pci_dev(device);
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+
+	dev_dbg(device, "--> %s\n", __func__);
+
+	cancel_delayed_work_sync(&pcr->carddet_work);
+
+	mutex_lock(&pcr->pcr_mutex);
+	rtsx_pci_power_off(pcr, HOST_ENTER_S3, true);
+
+	mutex_unlock(&pcr->pcr_mutex);
+
+	return 0;
+}
+
+static int rtsx_pci_runtime_resume(struct device *device)
+{
+	struct pci_dev *pcidev = to_pci_dev(device);
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+
+	dev_dbg(device, "--> %s\n", __func__);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
+
+	rtsx_pci_init_hw(pcr);
+
+	if (pcr->slots[RTSX_SD_CARD].p_dev != NULL) {
+		pcr->slots[RTSX_SD_CARD].card_event(
+				pcr->slots[RTSX_SD_CARD].p_dev);
+	}
+
+	mutex_unlock(&pcr->pcr_mutex);
+	return 0;
+}
+
+#else /* CONFIG_PM */
+
+#define rtsx_pci_shutdown NULL
+#define rtsx_pci_runtime_suspend NULL
+#define rtsx_pic_runtime_resume NULL
+
+#endif /* CONFIG_PM */
+
+static const struct dev_pm_ops rtsx_pci_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(rtsx_pci_suspend, rtsx_pci_resume)
+	SET_RUNTIME_PM_OPS(rtsx_pci_runtime_suspend, rtsx_pci_runtime_resume, rtsx_pci_runtime_idle)
+};
+
+static struct pci_driver rtsx_pci_driver = {
+	.name = DRV_NAME_RTSX_PCI,
+	.id_table = rtsx_pci_ids,
+	.probe = rtsx_pci_probe,
+	.remove = rtsx_pci_remove,
+	.driver.pm = &rtsx_pci_pm_ops,
+	.shutdown = rtsx_pci_shutdown,
+};
+module_pci_driver(rtsx_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wei WANG <wei_wang@realsil.com.cn>");
+MODULE_DESCRIPTION("Realtek PCI-E Card Reader Driver");
diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h
new file mode 100644
index 0000000000..37d1f316ae
--- /dev/null
+++ b/drivers/misc/cardreader/rtsx_pcr.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#ifndef __RTSX_PCR_H
+#define __RTSX_PCR_H
+
+#include <linux/rtsx_pci.h>
+
+#define MIN_DIV_N_PCR		80
+#define MAX_DIV_N_PCR		208
+
+#define RTS522A_PME_FORCE_CTL	0xFF78
+#define RTS522A_AUTOLOAD_CFG1	0xFF7C
+#define RTS522A_PM_CTRL3		0xFF7E
+
+#define RTS524A_PME_FORCE_CTL		0xFF78
+#define REG_EFUSE_BYPASS		0x08
+#define REG_EFUSE_POR			0x04
+#define REG_EFUSE_POWER_MASK		0x03
+#define REG_EFUSE_POWERON		0x03
+#define REG_EFUSE_POWEROFF		0x00
+#define RTS5250_CLK_CFG3		0xFF79
+#define RTS525A_CFG_MEM_PD		0xF0
+#define RTS524A_AUTOLOAD_CFG1		0xFF7C
+#define RTS524A_PM_CTRL3		0xFF7E
+#define RTS525A_BIOS_CFG		0xFF2D
+#define RTS525A_LOAD_BIOS_FLAG	0x01
+#define RTS525A_CLEAR_BIOS_FLAG	0x00
+
+#define RTS525A_EFUSE_CTL		0xFC32
+#define REG_EFUSE_ENABLE		0x80
+#define REG_EFUSE_MODE			0x40
+#define RTS525A_EFUSE_ADD		0xFC33
+#define REG_EFUSE_ADD_MASK		0x3F
+#define RTS525A_EFUSE_DATA		0xFC35
+
+#define LTR_ACTIVE_LATENCY_DEF		0x883C
+#define LTR_IDLE_LATENCY_DEF		0x892C
+#define LTR_L1OFF_LATENCY_DEF		0x9003
+#define L1_SNOOZE_DELAY_DEF		1
+#define LTR_L1OFF_SSPWRGATE_5249_DEF		0xAF
+#define LTR_L1OFF_SSPWRGATE_5250_DEF		0xFF
+#define LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF	0xAC
+#define LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF	0xF8
+#define CMD_TIMEOUT_DEF		100
+#define MASK_8_BIT_DEF		0xFF
+
+#define SSC_CLOCK_STABLE_WAIT	130
+
+#define RTS524A_OCP_THD_800	0x04
+#define RTS525A_OCP_THD_800	0x05
+#define RTS522A_OCP_THD_800	0x06
+
+
+int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val);
+int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val);
+
+void rts5209_init_params(struct rtsx_pcr *pcr);
+void rts5229_init_params(struct rtsx_pcr *pcr);
+void rtl8411_init_params(struct rtsx_pcr *pcr);
+void rtl8402_init_params(struct rtsx_pcr *pcr);
+void rts5227_init_params(struct rtsx_pcr *pcr);
+void rts522a_init_params(struct rtsx_pcr *pcr);
+void rts5249_init_params(struct rtsx_pcr *pcr);
+void rts524a_init_params(struct rtsx_pcr *pcr);
+void rts525a_init_params(struct rtsx_pcr *pcr);
+void rtl8411b_init_params(struct rtsx_pcr *pcr);
+void rts5260_init_params(struct rtsx_pcr *pcr);
+void rts5261_init_params(struct rtsx_pcr *pcr);
+void rts5228_init_params(struct rtsx_pcr *pcr);
+
+static inline u8 map_sd_drive(int idx)
+{
+	u8 sd_drive[4] = {
+		0x01,	/* Type D */
+		0x02,	/* Type C */
+		0x05,	/* Type A */
+		0x03	/* Type B */
+	};
+
+	return sd_drive[idx];
+}
+
+#define rtsx_vendor_setting_valid(reg)		(!((reg) & 0x1000000))
+#define rts5209_vendor_setting1_valid(reg)	(!((reg) & 0x80))
+#define rts5209_vendor_setting2_valid(reg)	((reg) & 0x80)
+
+#define rtsx_check_mmc_support(reg)		((reg) & 0x10)
+#define rtsx_reg_to_rtd3(reg)				((reg) & 0x02)
+#define rtsx_reg_to_rtd3_uhsii(reg)				((reg) & 0x04)
+#define rtsx_reg_to_aspm(reg)			(((reg) >> 28) & 0x03)
+#define rtsx_reg_to_sd30_drive_sel_1v8(reg)	(((reg) >> 26) & 0x03)
+#define rtsx_reg_to_sd30_drive_sel_3v3(reg)	(((reg) >> 5) & 0x03)
+#define rtsx_reg_to_card_drive_sel(reg)		((((reg) >> 25) & 0x01) << 6)
+#define rtsx_reg_check_reverse_socket(reg)	((reg) & 0x4000)
+#define rts5209_reg_to_aspm(reg)		(((reg) >> 5) & 0x03)
+#define rts5209_reg_check_ms_pmos(reg)		(!((reg) & 0x08))
+#define rts5209_reg_to_sd30_drive_sel_1v8(reg)	(((reg) >> 3) & 0x07)
+#define rts5209_reg_to_sd30_drive_sel_3v3(reg)	((reg) & 0x07)
+#define rts5209_reg_to_card_drive_sel(reg)	((reg) >> 8)
+#define rtl8411_reg_to_sd30_drive_sel_3v3(reg)	(((reg) >> 5) & 0x07)
+#define rtl8411b_reg_to_sd30_drive_sel_3v3(reg)	((reg) & 0x03)
+
+#define set_pull_ctrl_tables(pcr, __device)				\
+do {									\
+	pcr->sd_pull_ctl_enable_tbl  = __device##_sd_pull_ctl_enable_tbl;  \
+	pcr->sd_pull_ctl_disable_tbl = __device##_sd_pull_ctl_disable_tbl; \
+	pcr->ms_pull_ctl_enable_tbl  = __device##_ms_pull_ctl_enable_tbl;  \
+	pcr->ms_pull_ctl_disable_tbl = __device##_ms_pull_ctl_disable_tbl; \
+} while (0)
+
+/* generic operations */
+int rtsx_gops_pm_reset(struct rtsx_pcr *pcr);
+int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency);
+int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val);
+void rtsx_pci_init_ocp(struct rtsx_pcr *pcr);
+void rtsx_pci_disable_ocp(struct rtsx_pcr *pcr);
+void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr);
+int rtsx_pci_get_ocpstat(struct rtsx_pcr *pcr, u8 *val);
+void rtsx_pci_clear_ocpstat(struct rtsx_pcr *pcr);
+void rtsx_pci_enable_oobs_polling(struct rtsx_pcr *pcr);
+void rtsx_pci_disable_oobs_polling(struct rtsx_pcr *pcr);
+int rtsx_sd_power_off_card3v3(struct rtsx_pcr *pcr);
+int rtsx_ms_power_off_card3v3(struct rtsx_pcr *pcr);
+
+#endif
diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c
new file mode 100644
index 0000000000..f150d8769f
--- /dev/null
+++ b/drivers/misc/cardreader/rtsx_usb.c
@@ -0,0 +1,800 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Driver for Realtek USB card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Roger Tseng <rogerable@realtek.com>
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/usb.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/rtsx_usb.h>
+
+static int polling_pipe = 1;
+module_param(polling_pipe, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(polling_pipe, "polling pipe (0: ctl, 1: bulk)");
+
+static const struct mfd_cell rtsx_usb_cells[] = {
+	[RTSX_USB_SD_CARD] = {
+		.name = "rtsx_usb_sdmmc",
+		.pdata_size = 0,
+	},
+	[RTSX_USB_MS_CARD] = {
+		.name = "rtsx_usb_ms",
+		.pdata_size = 0,
+	},
+};
+
+static void rtsx_usb_sg_timed_out(struct timer_list *t)
+{
+	struct rtsx_ucr *ucr = from_timer(ucr, t, sg_timer);
+
+	dev_dbg(&ucr->pusb_intf->dev, "%s: sg transfer timed out", __func__);
+	usb_sg_cancel(&ucr->current_sg);
+}
+
+static int rtsx_usb_bulk_transfer_sglist(struct rtsx_ucr *ucr,
+		unsigned int pipe, struct scatterlist *sg, int num_sg,
+		unsigned int length, unsigned int *act_len, int timeout)
+{
+	int ret;
+
+	dev_dbg(&ucr->pusb_intf->dev, "%s: xfer %u bytes, %d entries\n",
+			__func__, length, num_sg);
+	ret = usb_sg_init(&ucr->current_sg, ucr->pusb_dev, pipe, 0,
+			sg, num_sg, length, GFP_NOIO);
+	if (ret)
+		return ret;
+
+	ucr->sg_timer.expires = jiffies + msecs_to_jiffies(timeout);
+	add_timer(&ucr->sg_timer);
+	usb_sg_wait(&ucr->current_sg);
+	if (!del_timer_sync(&ucr->sg_timer))
+		ret = -ETIMEDOUT;
+	else
+		ret = ucr->current_sg.status;
+
+	if (act_len)
+		*act_len = ucr->current_sg.bytes;
+
+	return ret;
+}
+
+int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe,
+			      void *buf, unsigned int len, int num_sg,
+			      unsigned int *act_len, int timeout)
+{
+	if (timeout < 600)
+		timeout = 600;
+
+	if (num_sg)
+		return rtsx_usb_bulk_transfer_sglist(ucr, pipe,
+				(struct scatterlist *)buf, num_sg, len, act_len,
+				timeout);
+	else
+		return usb_bulk_msg(ucr->pusb_dev, pipe, buf, len, act_len,
+				timeout);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_transfer_data);
+
+static inline void rtsx_usb_seq_cmd_hdr(struct rtsx_ucr *ucr,
+		u16 addr, u16 len, u8 seq_type)
+{
+	rtsx_usb_cmd_hdr_tag(ucr);
+
+	ucr->cmd_buf[PACKET_TYPE] = seq_type;
+	ucr->cmd_buf[5] = (u8)(len >> 8);
+	ucr->cmd_buf[6] = (u8)len;
+	ucr->cmd_buf[8] = (u8)(addr >> 8);
+	ucr->cmd_buf[9] = (u8)addr;
+
+	if (seq_type == SEQ_WRITE)
+		ucr->cmd_buf[STAGE_FLAG] = 0;
+	else
+		ucr->cmd_buf[STAGE_FLAG] = STAGE_R;
+}
+
+static int rtsx_usb_seq_write_register(struct rtsx_ucr *ucr,
+		u16 addr, u16 len, u8 *data)
+{
+	u16 cmd_len = ALIGN(SEQ_WRITE_DATA_OFFSET + len, 4);
+
+	if (!data)
+		return -EINVAL;
+
+	if (cmd_len > IOBUF_SIZE)
+		return -EINVAL;
+
+	rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_WRITE);
+	memcpy(ucr->cmd_buf + SEQ_WRITE_DATA_OFFSET, data, len);
+
+	return rtsx_usb_transfer_data(ucr,
+			usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT),
+			ucr->cmd_buf, cmd_len, 0, NULL, 100);
+}
+
+static int rtsx_usb_seq_read_register(struct rtsx_ucr *ucr,
+		u16 addr, u16 len, u8 *data)
+{
+	int i, ret;
+	u16 rsp_len = round_down(len, 4);
+	u16 res_len = len - rsp_len;
+
+	if (!data)
+		return -EINVAL;
+
+	/* 4-byte aligned part */
+	if (rsp_len) {
+		rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_READ);
+		ret = rtsx_usb_transfer_data(ucr,
+				usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT),
+				ucr->cmd_buf, 12, 0, NULL, 100);
+		if (ret)
+			return ret;
+
+		ret = rtsx_usb_transfer_data(ucr,
+				usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN),
+				data, rsp_len, 0, NULL, 100);
+		if (ret)
+			return ret;
+	}
+
+	/* unaligned part */
+	for (i = 0; i < res_len; i++) {
+		ret = rtsx_usb_read_register(ucr, addr + rsp_len + i,
+				data + rsp_len + i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len)
+{
+	return rtsx_usb_seq_read_register(ucr, PPBUF_BASE2, (u16)buf_len, buf);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_read_ppbuf);
+
+int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len)
+{
+	return rtsx_usb_seq_write_register(ucr, PPBUF_BASE2, (u16)buf_len, buf);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_write_ppbuf);
+
+int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr,
+		u8 mask, u8 data)
+{
+	u16 value, index;
+
+	addr |= EP0_WRITE_REG_CMD << EP0_OP_SHIFT;
+	value = swab16(addr);
+	index = mask | data << 8;
+
+	return usb_control_msg(ucr->pusb_dev,
+			usb_sndctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP,
+			USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+			value, index, NULL, 0, 100);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register);
+
+int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data)
+{
+	u16 value;
+	u8 *buf;
+	int ret;
+
+	if (!data)
+		return -EINVAL;
+
+	buf = kzalloc(sizeof(u8), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT;
+	value = swab16(addr);
+
+	ret = usb_control_msg(ucr->pusb_dev,
+			usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP,
+			USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+			value, 0, buf, 1, 100);
+	*data = *buf;
+
+	kfree(buf);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register);
+
+void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, u16 reg_addr,
+		u8 mask, u8 data)
+{
+	int i;
+
+	if (ucr->cmd_idx < (IOBUF_SIZE - CMD_OFFSET) / 4) {
+		i = CMD_OFFSET + ucr->cmd_idx * 4;
+
+		ucr->cmd_buf[i++] = ((cmd_type & 0x03) << 6) |
+			(u8)((reg_addr >> 8) & 0x3F);
+		ucr->cmd_buf[i++] = (u8)reg_addr;
+		ucr->cmd_buf[i++] = mask;
+		ucr->cmd_buf[i++] = data;
+
+		ucr->cmd_idx++;
+	}
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_add_cmd);
+
+int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout)
+{
+	int ret;
+
+	ucr->cmd_buf[CNT_H] = (u8)(ucr->cmd_idx >> 8);
+	ucr->cmd_buf[CNT_L] = (u8)(ucr->cmd_idx);
+	ucr->cmd_buf[STAGE_FLAG] = flag;
+
+	ret = rtsx_usb_transfer_data(ucr,
+			usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT),
+			ucr->cmd_buf, ucr->cmd_idx * 4 + CMD_OFFSET,
+			0, NULL, timeout);
+	if (ret) {
+		rtsx_usb_clear_fsm_err(ucr);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_send_cmd);
+
+int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout)
+{
+	if (rsp_len <= 0)
+		return -EINVAL;
+
+	rsp_len = ALIGN(rsp_len, 4);
+
+	return rtsx_usb_transfer_data(ucr,
+			usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN),
+			ucr->rsp_buf, rsp_len, 0, NULL, timeout);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_get_rsp);
+
+static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status)
+{
+	int ret;
+
+	rtsx_usb_init_cmd(ucr);
+	rtsx_usb_add_cmd(ucr, READ_REG_CMD, CARD_EXIST, 0x00, 0x00);
+	rtsx_usb_add_cmd(ucr, READ_REG_CMD, OCPSTAT, 0x00, 0x00);
+	ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100);
+	if (ret)
+		return ret;
+
+	ret = rtsx_usb_get_rsp(ucr, 2, 100);
+	if (ret)
+		return ret;
+
+	*status = ((ucr->rsp_buf[0] >> 2) & 0x0f) |
+		  ((ucr->rsp_buf[1] & 0x03) << 4);
+
+	return 0;
+}
+
+int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status)
+{
+	int ret;
+	u16 *buf;
+
+	if (!status)
+		return -EINVAL;
+
+	if (polling_pipe == 0) {
+		buf = kzalloc(sizeof(u16), GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+
+		ret = usb_control_msg(ucr->pusb_dev,
+				usb_rcvctrlpipe(ucr->pusb_dev, 0),
+				RTSX_USB_REQ_POLL,
+				USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+				0, 0, buf, 2, 100);
+		*status = *buf;
+
+		kfree(buf);
+	} else {
+		ret = rtsx_usb_get_status_with_bulk(ucr, status);
+	}
+
+	/* usb_control_msg may return positive when success */
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_get_card_status);
+
+static int rtsx_usb_write_phy_register(struct rtsx_ucr *ucr, u8 addr, u8 val)
+{
+	dev_dbg(&ucr->pusb_intf->dev, "Write 0x%x to phy register 0x%x\n",
+			val, addr);
+
+	rtsx_usb_init_cmd(ucr);
+
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VSTAIN, 0xFF, val);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, 0xFF, addr & 0x0F);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL,
+			0xFF, (addr >> 4) & 0x0F);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01);
+
+	return rtsx_usb_send_cmd(ucr, MODE_C, 100);
+}
+
+int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, u8 data)
+{
+	rtsx_usb_init_cmd(ucr);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, addr, mask, data);
+	return rtsx_usb_send_cmd(ucr, MODE_C, 100);
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_write_register);
+
+int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data)
+{
+	int ret;
+
+	if (data != NULL)
+		*data = 0;
+
+	rtsx_usb_init_cmd(ucr);
+	rtsx_usb_add_cmd(ucr, READ_REG_CMD, addr, 0, 0);
+	ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100);
+	if (ret)
+		return ret;
+
+	ret = rtsx_usb_get_rsp(ucr, 1, 100);
+	if (ret)
+		return ret;
+
+	if (data != NULL)
+		*data = ucr->rsp_buf[0];
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_read_register);
+
+static inline u8 double_ssc_depth(u8 depth)
+{
+	return (depth > 1) ? (depth - 1) : depth;
+}
+
+static u8 revise_ssc_depth(u8 ssc_depth, u8 div)
+{
+	if (div > CLK_DIV_1) {
+		if (ssc_depth > div - 1)
+			ssc_depth -= (div - 1);
+		else
+			ssc_depth = SSC_DEPTH_2M;
+	}
+
+	return ssc_depth;
+}
+
+int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk)
+{
+	int ret;
+	u8 n, clk_divider, mcu_cnt, div;
+
+	if (!card_clock) {
+		ucr->cur_clk = 0;
+		return 0;
+	}
+
+	if (initial_mode) {
+		/* We use 250k(around) here, in initial stage */
+		clk_divider = SD_CLK_DIVIDE_128;
+		card_clock = 30000000;
+	} else {
+		clk_divider = SD_CLK_DIVIDE_0;
+	}
+
+	ret = rtsx_usb_write_register(ucr, SD_CFG1,
+			SD_CLK_DIVIDE_MASK, clk_divider);
+	if (ret < 0)
+		return ret;
+
+	card_clock /= 1000000;
+	dev_dbg(&ucr->pusb_intf->dev,
+			"Switch card clock to %dMHz\n", card_clock);
+
+	if (!initial_mode && double_clk)
+		card_clock *= 2;
+	dev_dbg(&ucr->pusb_intf->dev,
+			"Internal SSC clock: %dMHz (cur_clk = %d)\n",
+			card_clock, ucr->cur_clk);
+
+	if (card_clock == ucr->cur_clk)
+		return 0;
+
+	/* Converting clock value into internal settings: n and div */
+	n = card_clock - 2;
+	if ((card_clock <= 2) || (n > MAX_DIV_N))
+		return -EINVAL;
+
+	mcu_cnt = 60/card_clock + 3;
+	if (mcu_cnt > 15)
+		mcu_cnt = 15;
+
+	/* Make sure that the SSC clock div_n is not less than MIN_DIV_N */
+
+	div = CLK_DIV_1;
+	while (n < MIN_DIV_N && div < CLK_DIV_4) {
+		n = (n + 2) * 2 - 2;
+		div++;
+	}
+	dev_dbg(&ucr->pusb_intf->dev, "n = %d, div = %d\n", n, div);
+
+	if (double_clk)
+		ssc_depth = double_ssc_depth(ssc_depth);
+
+	ssc_depth = revise_ssc_depth(ssc_depth, div);
+	dev_dbg(&ucr->pusb_intf->dev, "ssc_depth = %d\n", ssc_depth);
+
+	rtsx_usb_init_cmd(ucr);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, CLK_CHANGE, CLK_CHANGE);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV,
+			0x3F, (div << 4) | mcu_cnt);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL2,
+			SSC_DEPTH_MASK, ssc_depth);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB);
+	if (vpclk) {
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+	}
+
+	ret = rtsx_usb_send_cmd(ucr, MODE_C, 2000);
+	if (ret < 0)
+		return ret;
+
+	ret = rtsx_usb_write_register(ucr, SSC_CTL1, 0xff,
+			SSC_RSTB | SSC_8X_EN | SSC_SEL_4M);
+	if (ret < 0)
+		return ret;
+
+	/* Wait SSC clock stable */
+	usleep_range(100, 1000);
+
+	ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0);
+	if (ret < 0)
+		return ret;
+
+	ucr->cur_clk = card_clock;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_switch_clock);
+
+int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card)
+{
+	int ret;
+	u16 val;
+	u16 cd_mask[] = {
+		[RTSX_USB_SD_CARD] = (CD_MASK & ~SD_CD),
+		[RTSX_USB_MS_CARD] = (CD_MASK & ~MS_CD)
+	};
+
+	ret = rtsx_usb_get_card_status(ucr, &val);
+	/*
+	 * If get status fails, return 0 (ok) for the exclusive check
+	 * and let the flow fail at somewhere else.
+	 */
+	if (ret)
+		return 0;
+
+	if (val & cd_mask[card])
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_usb_card_exclusive_check);
+
+static int rtsx_usb_reset_chip(struct rtsx_ucr *ucr)
+{
+	int ret;
+	u8 val;
+
+	rtsx_usb_init_cmd(ucr);
+
+	if (CHECK_PKG(ucr, LQFP48)) {
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL,
+				LDO3318_PWR_MASK, LDO_SUSPEND);
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL,
+				FORCE_LDO_POWERB, FORCE_LDO_POWERB);
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL1,
+				0x30, 0x10);
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL5,
+				0x03, 0x01);
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL6,
+				0x0C, 0x04);
+	}
+
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SYS_DUMMY0, NYET_MSAK, NYET_EN);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CD_DEGLITCH_WIDTH, 0xFF, 0x08);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD,
+			CD_DEGLITCH_EN, XD_CD_DEGLITCH_EN, 0x0);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			SD30_DRIVE_MASK, DRIVER_TYPE_D);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD,
+			CARD_DRIVE_SEL, SD20_DRIVE_MASK, 0x0);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, LDO_POWER_CFG, 0xE0, 0x0);
+
+	if (ucr->is_rts5179)
+		rtsx_usb_add_cmd(ucr, WRITE_REG_CMD,
+				CARD_PULL_CTL5, 0x03, 0x01);
+
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_DMA1_CTL,
+		       EXTEND_DMA1_ASYNC_SIGNAL, EXTEND_DMA1_ASYNC_SIGNAL);
+	rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_INT_PEND,
+			XD_INT | MS_INT | SD_INT,
+			XD_INT | MS_INT | SD_INT);
+
+	ret = rtsx_usb_send_cmd(ucr, MODE_C, 100);
+	if (ret)
+		return ret;
+
+	/* config non-crystal mode */
+	rtsx_usb_read_register(ucr, CFG_MODE, &val);
+	if ((val & XTAL_FREE) || ((val & CLK_MODE_MASK) == CLK_MODE_NON_XTAL)) {
+		ret = rtsx_usb_write_phy_register(ucr, 0xC2, 0x7C);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int rtsx_usb_init_chip(struct rtsx_ucr *ucr)
+{
+	int ret;
+	u8 val;
+
+	rtsx_usb_clear_fsm_err(ucr);
+
+	/* power on SSC */
+	ret = rtsx_usb_write_register(ucr,
+			FPDCTL, SSC_POWER_MASK, SSC_POWER_ON);
+	if (ret)
+		return ret;
+
+	usleep_range(100, 1000);
+	ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0x00);
+	if (ret)
+		return ret;
+
+	/* determine IC version */
+	ret = rtsx_usb_read_register(ucr, HW_VERSION, &val);
+	if (ret)
+		return ret;
+
+	ucr->ic_version = val & HW_VER_MASK;
+
+	/* determine package */
+	ret = rtsx_usb_read_register(ucr, CARD_SHARE_MODE, &val);
+	if (ret)
+		return ret;
+
+	if (val & CARD_SHARE_LQFP_SEL) {
+		ucr->package = LQFP48;
+		dev_dbg(&ucr->pusb_intf->dev, "Package: LQFP48\n");
+	} else {
+		ucr->package = QFN24;
+		dev_dbg(&ucr->pusb_intf->dev, "Package: QFN24\n");
+	}
+
+	/* determine IC variations */
+	rtsx_usb_read_register(ucr, CFG_MODE_1, &val);
+	if (val & RTS5179) {
+		ucr->is_rts5179 = true;
+		dev_dbg(&ucr->pusb_intf->dev, "Device is rts5179\n");
+	} else {
+		ucr->is_rts5179 = false;
+	}
+
+	return rtsx_usb_reset_chip(ucr);
+}
+
+static int rtsx_usb_probe(struct usb_interface *intf,
+			 const struct usb_device_id *id)
+{
+	struct usb_device *usb_dev = interface_to_usbdev(intf);
+	struct rtsx_ucr *ucr;
+	int ret;
+
+	dev_dbg(&intf->dev,
+		": Realtek USB Card Reader found at bus %03d address %03d\n",
+		 usb_dev->bus->busnum, usb_dev->devnum);
+
+	ucr = devm_kzalloc(&intf->dev, sizeof(*ucr), GFP_KERNEL);
+	if (!ucr)
+		return -ENOMEM;
+
+	ucr->pusb_dev = usb_dev;
+
+	ucr->cmd_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL);
+	if (!ucr->cmd_buf)
+		return -ENOMEM;
+
+	ucr->rsp_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL);
+	if (!ucr->rsp_buf) {
+		ret = -ENOMEM;
+		goto out_free_cmd_buf;
+	}
+
+	usb_set_intfdata(intf, ucr);
+
+	ucr->vendor_id = id->idVendor;
+	ucr->product_id = id->idProduct;
+
+	mutex_init(&ucr->dev_mutex);
+
+	ucr->pusb_intf = intf;
+
+	/* initialize */
+	ret = rtsx_usb_init_chip(ucr);
+	if (ret)
+		goto out_init_fail;
+
+	/* initialize USB SG transfer timer */
+	timer_setup(&ucr->sg_timer, rtsx_usb_sg_timed_out, 0);
+
+	ret = mfd_add_hotplug_devices(&intf->dev, rtsx_usb_cells,
+				      ARRAY_SIZE(rtsx_usb_cells));
+	if (ret)
+		goto out_init_fail;
+
+#ifdef CONFIG_PM
+	intf->needs_remote_wakeup = 1;
+	usb_enable_autosuspend(usb_dev);
+#endif
+
+	return 0;
+
+out_init_fail:
+	usb_set_intfdata(ucr->pusb_intf, NULL);
+	kfree(ucr->rsp_buf);
+	ucr->rsp_buf = NULL;
+out_free_cmd_buf:
+	kfree(ucr->cmd_buf);
+	ucr->cmd_buf = NULL;
+	return ret;
+}
+
+static void rtsx_usb_disconnect(struct usb_interface *intf)
+{
+	struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf);
+
+	dev_dbg(&intf->dev, "%s called\n", __func__);
+
+	mfd_remove_devices(&intf->dev);
+
+	usb_set_intfdata(ucr->pusb_intf, NULL);
+
+	kfree(ucr->cmd_buf);
+	ucr->cmd_buf = NULL;
+
+	kfree(ucr->rsp_buf);
+	ucr->rsp_buf = NULL;
+}
+
+#ifdef CONFIG_PM
+static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message)
+{
+	struct rtsx_ucr *ucr =
+		(struct rtsx_ucr *)usb_get_intfdata(intf);
+	u16 val = 0;
+
+	dev_dbg(&intf->dev, "%s called with pm message 0x%04x\n",
+			__func__, message.event);
+
+	if (PMSG_IS_AUTO(message)) {
+		if (mutex_trylock(&ucr->dev_mutex)) {
+			rtsx_usb_get_card_status(ucr, &val);
+			mutex_unlock(&ucr->dev_mutex);
+
+			/* Defer the autosuspend if card exists */
+			if (val & (SD_CD | MS_CD))
+				return -EAGAIN;
+		} else {
+			/* There is an ongoing operation*/
+			return -EAGAIN;
+		}
+	}
+
+	return 0;
+}
+
+static int rtsx_usb_resume_child(struct device *dev, void *data)
+{
+	pm_request_resume(dev);
+	return 0;
+}
+
+static int rtsx_usb_resume(struct usb_interface *intf)
+{
+	device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child);
+	return 0;
+}
+
+static int rtsx_usb_reset_resume(struct usb_interface *intf)
+{
+	struct rtsx_ucr *ucr =
+		(struct rtsx_ucr *)usb_get_intfdata(intf);
+
+	rtsx_usb_reset_chip(ucr);
+	device_for_each_child(&intf->dev, NULL, rtsx_usb_resume_child);
+	return 0;
+}
+
+#else /* CONFIG_PM */
+
+#define rtsx_usb_suspend NULL
+#define rtsx_usb_resume NULL
+#define rtsx_usb_reset_resume NULL
+
+#endif /* CONFIG_PM */
+
+
+static int rtsx_usb_pre_reset(struct usb_interface *intf)
+{
+	struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf);
+
+	mutex_lock(&ucr->dev_mutex);
+	return 0;
+}
+
+static int rtsx_usb_post_reset(struct usb_interface *intf)
+{
+	struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf);
+
+	mutex_unlock(&ucr->dev_mutex);
+	return 0;
+}
+
+static const struct usb_device_id rtsx_usb_usb_ids[] = {
+	{ USB_DEVICE(0x0BDA, 0x0129) },
+	{ USB_DEVICE(0x0BDA, 0x0139) },
+	{ USB_DEVICE(0x0BDA, 0x0140) },
+	{ }
+};
+MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids);
+
+static struct usb_driver rtsx_usb_driver = {
+	.name			= "rtsx_usb",
+	.probe			= rtsx_usb_probe,
+	.disconnect		= rtsx_usb_disconnect,
+	.suspend		= rtsx_usb_suspend,
+	.resume			= rtsx_usb_resume,
+	.reset_resume		= rtsx_usb_reset_resume,
+	.pre_reset		= rtsx_usb_pre_reset,
+	.post_reset		= rtsx_usb_post_reset,
+	.id_table		= rtsx_usb_usb_ids,
+	.supports_autosuspend	= 1,
+	.soft_unbind		= 1,
+};
+
+module_usb_driver(rtsx_usb_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Roger Tseng <rogerable@realtek.com>");
+MODULE_DESCRIPTION("Realtek USB Card Reader Driver");
diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig
new file mode 100644
index 0000000000..a696d75090
--- /dev/null
+++ b/drivers/misc/cb710/Kconfig
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config CB710_CORE
+	tristate "ENE CB710/720 Flash memory card reader support"
+	depends on PCI
+	help
+	  This option enables support for PCI ENE CB710/720 Flash memory card
+	  reader found in some laptops (ie. some versions of HP Compaq nx9500).
+
+	  You will also have to select some flash card format drivers (MMC/SD,
+	  MemoryStick).
+
+	  This driver can also be built as a module. If so, the module
+	  will be called cb710.
+
+config CB710_DEBUG
+	bool "Enable driver debugging"
+	depends on CB710_CORE != n
+	help
+	  This is an option for use by developers; most people should
+	  say N here.  This adds a lot of debugging output to dmesg.
+
+config CB710_DEBUG_ASSUMPTIONS
+	bool
+	depends on CB710_CORE != n
+	default y
diff --git a/drivers/misc/cb710/Makefile b/drivers/misc/cb710/Makefile
new file mode 100644
index 0000000000..8a38c66eb8
--- /dev/null
+++ b/drivers/misc/cb710/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-$(CONFIG_CB710_DEBUG)	:= -DDEBUG
+
+obj-$(CONFIG_CB710_CORE)	+= cb710.o
+
+cb710-y				:= core.o sgbuf2.o
+cb710-$(CONFIG_CB710_DEBUG)	+= debug.o
diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c
new file mode 100644
index 0000000000..55b7ee0e8f
--- /dev/null
+++ b/drivers/misc/cb710/core.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  cb710/core.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/cb710.h>
+#include <linux/gfp.h>
+
+static DEFINE_IDA(cb710_ida);
+
+void cb710_pci_update_config_reg(struct pci_dev *pdev,
+	int reg, uint32_t mask, uint32_t xor)
+{
+	u32 rval;
+
+	pci_read_config_dword(pdev, reg, &rval);
+	rval = (rval & mask) ^ xor;
+	pci_write_config_dword(pdev, reg, rval);
+}
+EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg);
+
+/* Some magic writes based on Windows driver init code */
+static int cb710_pci_configure(struct pci_dev *pdev)
+{
+	unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	struct pci_dev *pdev0;
+	u32 val;
+
+	cb710_pci_update_config_reg(pdev, 0x48,
+		~0x000000FF, 0x0000003F);
+
+	pci_read_config_dword(pdev, 0x48, &val);
+	if (val & 0x80000000)
+		return 0;
+
+	pdev0 = pci_get_slot(pdev->bus, devfn);
+	if (!pdev0)
+		return -ENODEV;
+
+	if (pdev0->vendor == PCI_VENDOR_ID_ENE
+	    && pdev0->device == PCI_DEVICE_ID_ENE_720) {
+		cb710_pci_update_config_reg(pdev0, 0x8C,
+			~0x00F00000, 0x00100000);
+		cb710_pci_update_config_reg(pdev0, 0xB0,
+			~0x08000000, 0x08000000);
+	}
+
+	cb710_pci_update_config_reg(pdev0, 0x8C,
+		~0x00000F00, 0x00000200);
+	cb710_pci_update_config_reg(pdev0, 0x90,
+		~0x00060000, 0x00040000);
+
+	pci_dev_put(pdev0);
+
+	return 0;
+}
+
+static irqreturn_t cb710_irq_handler(int irq, void *data)
+{
+	struct cb710_chip *chip = data;
+	struct cb710_slot *slot = &chip->slot[0];
+	irqreturn_t handled = IRQ_NONE;
+	unsigned nr;
+
+	spin_lock(&chip->irq_lock); /* incl. smp_rmb() */
+
+	for (nr = chip->slots; nr; ++slot, --nr) {
+		cb710_irq_handler_t handler_func = slot->irq_handler;
+		if (handler_func && handler_func(slot))
+			handled = IRQ_HANDLED;
+	}
+
+	spin_unlock(&chip->irq_lock);
+
+	return handled;
+}
+
+static void cb710_release_slot(struct device *dev)
+{
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	struct cb710_slot *slot = cb710_pdev_to_slot(to_platform_device(dev));
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+
+	/* slot struct can be freed now */
+	atomic_dec(&chip->slot_refs_count);
+#endif
+}
+
+static int cb710_register_slot(struct cb710_chip *chip,
+	unsigned slot_mask, unsigned io_offset, const char *name)
+{
+	int nr = chip->slots;
+	struct cb710_slot *slot = &chip->slot[nr];
+	int err;
+
+	dev_dbg(cb710_chip_dev(chip),
+		"register: %s.%d; slot %d; mask %d; IO offset: 0x%02X\n",
+		name, chip->platform_id, nr, slot_mask, io_offset);
+
+	/* slot->irq_handler == NULL here; this needs to be
+	 * seen before platform_device_register() */
+	++chip->slots;
+	smp_wmb();
+
+	slot->iobase = chip->iobase + io_offset;
+	slot->pdev.name = name;
+	slot->pdev.id = chip->platform_id;
+	slot->pdev.dev.parent = &chip->pdev->dev;
+	slot->pdev.dev.release = cb710_release_slot;
+
+	err = platform_device_register(&slot->pdev);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	atomic_inc(&chip->slot_refs_count);
+#endif
+
+	if (err) {
+		/* device_initialize() called from platform_device_register()
+		 * wants this on error path */
+		platform_device_put(&slot->pdev);
+
+		/* slot->irq_handler == NULL here anyway, so no lock needed */
+		--chip->slots;
+		return err;
+	}
+
+	chip->slot_mask |= slot_mask;
+
+	return 0;
+}
+
+static void cb710_unregister_slot(struct cb710_chip *chip,
+	unsigned slot_mask)
+{
+	int nr = chip->slots - 1;
+
+	if (!(chip->slot_mask & slot_mask))
+		return;
+
+	platform_device_unregister(&chip->slot[nr].pdev);
+
+	/* complementary to spin_unlock() in cb710_set_irq_handler() */
+	smp_rmb();
+	BUG_ON(chip->slot[nr].irq_handler != NULL);
+
+	/* slot->irq_handler == NULL here, so no lock needed */
+	--chip->slots;
+	chip->slot_mask &= ~slot_mask;
+}
+
+void cb710_set_irq_handler(struct cb710_slot *slot,
+	cb710_irq_handler_t handler)
+{
+	struct cb710_chip *chip = cb710_slot_to_chip(slot);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chip->irq_lock, flags);
+	slot->irq_handler = handler;
+	spin_unlock_irqrestore(&chip->irq_lock, flags);
+}
+EXPORT_SYMBOL_GPL(cb710_set_irq_handler);
+
+static int __maybe_unused cb710_suspend(struct device *dev_d)
+{
+	struct pci_dev *pdev = to_pci_dev(dev_d);
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+
+	devm_free_irq(&pdev->dev, pdev->irq, chip);
+	return 0;
+}
+
+static int __maybe_unused cb710_resume(struct device *dev_d)
+{
+	struct pci_dev *pdev = to_pci_dev(dev_d);
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+
+	return devm_request_irq(&pdev->dev, pdev->irq,
+		cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+}
+
+static int cb710_probe(struct pci_dev *pdev,
+	const struct pci_device_id *ent)
+{
+	struct cb710_chip *chip;
+	u32 val;
+	int err;
+	int n = 0;
+
+	err = cb710_pci_configure(pdev);
+	if (err)
+		return err;
+
+	/* this is actually magic... */
+	pci_read_config_dword(pdev, 0x48, &val);
+	if (!(val & 0x80000000)) {
+		pci_write_config_dword(pdev, 0x48, val|0x71000000);
+		pci_read_config_dword(pdev, 0x48, &val);
+	}
+
+	dev_dbg(&pdev->dev, "PCI config[0x48] = 0x%08X\n", val);
+	if (!(val & 0x70000000))
+		return -ENODEV;
+	val = (val >> 28) & 7;
+	if (val & CB710_SLOT_MMC)
+		++n;
+	if (val & CB710_SLOT_MS)
+		++n;
+	if (val & CB710_SLOT_SM)
+		++n;
+
+	chip = devm_kzalloc(&pdev->dev, struct_size(chip, slot, n),
+			    GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, 0x0001, KBUILD_MODNAME);
+	if (err)
+		return err;
+
+	spin_lock_init(&chip->irq_lock);
+	chip->pdev = pdev;
+	chip->iobase = pcim_iomap_table(pdev)[0];
+
+	pci_set_drvdata(pdev, chip);
+
+	err = devm_request_irq(&pdev->dev, pdev->irq,
+		cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip);
+	if (err)
+		return err;
+
+	err = ida_alloc(&cb710_ida, GFP_KERNEL);
+	if (err < 0)
+		return err;
+	chip->platform_id = err;
+
+	dev_info(&pdev->dev, "id %d, IO 0x%p, IRQ %d\n",
+		chip->platform_id, chip->iobase, pdev->irq);
+
+	if (val & CB710_SLOT_MMC) {	/* MMC/SD slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_MMC, 0x00, "cb710-mmc");
+		if (err)
+			return err;
+	}
+
+	if (val & CB710_SLOT_MS) {	/* MemoryStick slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_MS, 0x40, "cb710-ms");
+		if (err)
+			goto unreg_mmc;
+	}
+
+	if (val & CB710_SLOT_SM) {	/* SmartMedia slot */
+		err = cb710_register_slot(chip,
+			CB710_SLOT_SM, 0x60, "cb710-sm");
+		if (err)
+			goto unreg_ms;
+	}
+
+	return 0;
+unreg_ms:
+	cb710_unregister_slot(chip, CB710_SLOT_MS);
+unreg_mmc:
+	cb710_unregister_slot(chip, CB710_SLOT_MMC);
+
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+	return err;
+}
+
+static void cb710_remove_one(struct pci_dev *pdev)
+{
+	struct cb710_chip *chip = pci_get_drvdata(pdev);
+
+	cb710_unregister_slot(chip, CB710_SLOT_SM);
+	cb710_unregister_slot(chip, CB710_SLOT_MS);
+	cb710_unregister_slot(chip, CB710_SLOT_MMC);
+#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS
+	BUG_ON(atomic_read(&chip->slot_refs_count) != 0);
+#endif
+
+	ida_free(&cb710_ida, chip->platform_id);
+}
+
+static const struct pci_device_id cb710_pci_tbl[] = {
+	{ PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_CB710_FLASH,
+		PCI_ANY_ID, PCI_ANY_ID, },
+	{ 0, }
+};
+
+static SIMPLE_DEV_PM_OPS(cb710_pm_ops, cb710_suspend, cb710_resume);
+
+static struct pci_driver cb710_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = cb710_pci_tbl,
+	.probe = cb710_probe,
+	.remove = cb710_remove_one,
+	.driver.pm = &cb710_pm_ops,
+};
+
+static int __init cb710_init_module(void)
+{
+	return pci_register_driver(&cb710_driver);
+}
+
+static void __exit cb710_cleanup_module(void)
+{
+	pci_unregister_driver(&cb710_driver);
+	ida_destroy(&cb710_ida);
+}
+
+module_init(cb710_init_module);
+module_exit(cb710_cleanup_module);
+
+MODULE_AUTHOR("Michał Mirosław <mirq-linux@rere.qmqm.pl>");
+MODULE_DESCRIPTION("ENE CB710 memory card reader driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, cb710_pci_tbl);
diff --git a/drivers/misc/cb710/debug.c b/drivers/misc/cb710/debug.c
new file mode 100644
index 0000000000..20d672edf7
--- /dev/null
+++ b/drivers/misc/cb710/debug.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  cb710/debug.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ */
+#include <linux/cb710.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#define CB710_REG_COUNT		0x80
+
+static const u16 allow[CB710_REG_COUNT/16] = {
+	0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+	0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF,
+};
+static const char *const prefix[ARRAY_SIZE(allow)] = {
+	"MMC", "MMC", "MMC", "MMC",
+	"MS?", "MS?", "SM?", "SM?"
+};
+
+static inline int allow_reg_read(unsigned block, unsigned offset, unsigned bits)
+{
+	unsigned mask = (1 << bits/8) - 1;
+	offset *= bits/8;
+	return ((allow[block] >> offset) & mask) == mask;
+}
+
+#define CB710_READ_REGS_TEMPLATE(t)					\
+static void cb710_read_regs_##t(void __iomem *iobase,			\
+	u##t *reg, unsigned select)					\
+{									\
+	unsigned i, j;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
+		if (!(select & (1 << i)))					\
+			continue;					\
+									\
+		for (j = 0; j < 0x10/(t/8); ++j) {			\
+			if (!allow_reg_read(i, j, t))			\
+				continue;				\
+			reg[j] = ioread##t(iobase			\
+				+ (i << 4) + (j * (t/8)));		\
+		}							\
+	}								\
+}
+
+static const char cb710_regf_8[] = "%02X";
+static const char cb710_regf_16[] = "%04X";
+static const char cb710_regf_32[] = "%08X";
+static const char cb710_xes[] = "xxxxxxxx";
+
+#define CB710_DUMP_REGS_TEMPLATE(t)					\
+static void cb710_dump_regs_##t(struct device *dev,			\
+	const u##t *reg, unsigned select)				\
+{									\
+	const char *const xp = &cb710_xes[8 - t/4];			\
+	const char *const format = cb710_regf_##t;			\
+									\
+	char msg[100], *p;						\
+	unsigned i, j;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) {	\
+		if (!(select & (1 << i)))				\
+			continue;					\
+		p = msg;						\
+		for (j = 0; j < 0x10/(t/8); ++j) {			\
+			*p++ = ' ';					\
+			if (j == 8/(t/8))				\
+				*p++ = ' ';				\
+			if (allow_reg_read(i, j, t))			\
+				p += sprintf(p, format, reg[j]);	\
+			else						\
+				p += sprintf(p, "%s", xp);		\
+		}							\
+		dev_dbg(dev, "%s 0x%02X %s\n", prefix[i], i << 4, msg);	\
+	}								\
+}
+
+#define CB710_READ_AND_DUMP_REGS_TEMPLATE(t)				\
+static void cb710_read_and_dump_regs_##t(struct cb710_chip *chip,	\
+	unsigned select)						\
+{									\
+	u##t regs[CB710_REG_COUNT/sizeof(u##t)];			\
+									\
+	memset(&regs, 0, sizeof(regs));					\
+	cb710_read_regs_##t(chip->iobase, regs, select);		\
+	cb710_dump_regs_##t(cb710_chip_dev(chip), regs, select);	\
+}
+
+#define CB710_REG_ACCESS_TEMPLATES(t)		\
+  CB710_READ_REGS_TEMPLATE(t)			\
+  CB710_DUMP_REGS_TEMPLATE(t)			\
+  CB710_READ_AND_DUMP_REGS_TEMPLATE(t)
+
+CB710_REG_ACCESS_TEMPLATES(8)
+CB710_REG_ACCESS_TEMPLATES(16)
+CB710_REG_ACCESS_TEMPLATES(32)
+
+void cb710_dump_regs(struct cb710_chip *chip, unsigned select)
+{
+	if (!(select & CB710_DUMP_REGS_MASK))
+		select = CB710_DUMP_REGS_ALL;
+	if (!(select & CB710_DUMP_ACCESS_MASK))
+		select |= CB710_DUMP_ACCESS_8;
+
+	if (select & CB710_DUMP_ACCESS_32)
+		cb710_read_and_dump_regs_32(chip, select);
+	if (select & CB710_DUMP_ACCESS_16)
+		cb710_read_and_dump_regs_16(chip, select);
+	if (select & CB710_DUMP_ACCESS_8)
+		cb710_read_and_dump_regs_8(chip, select);
+}
+EXPORT_SYMBOL_GPL(cb710_dump_regs);
+
diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c
new file mode 100644
index 0000000000..a798fad5f0
--- /dev/null
+++ b/drivers/misc/cb710/sgbuf2.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  cb710/sgbuf2.c
+ *
+ *  Copyright by Michał Mirosław, 2008-2009
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cb710.h>
+
+static bool sg_dwiter_next(struct sg_mapping_iter *miter)
+{
+	if (sg_miter_next(miter)) {
+		miter->consumed = 0;
+		return true;
+	} else
+		return false;
+}
+
+static bool sg_dwiter_is_at_end(struct sg_mapping_iter *miter)
+{
+	return miter->length == miter->consumed && !sg_dwiter_next(miter);
+}
+
+static uint32_t sg_dwiter_read_buffer(struct sg_mapping_iter *miter)
+{
+	size_t len, left = 4;
+	uint32_t data;
+	void *addr = &data;
+
+	do {
+		len = min(miter->length - miter->consumed, left);
+		memcpy(addr, miter->addr + miter->consumed, len);
+		miter->consumed += len;
+		left -= len;
+		if (!left)
+			return data;
+		addr += len;
+	} while (sg_dwiter_next(miter));
+
+	memset(addr, 0, left);
+	return data;
+}
+
+static inline bool needs_unaligned_copy(const void *ptr)
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	return false;
+#else
+	return ((uintptr_t)ptr & 3) != 0;
+#endif
+}
+
+static bool sg_dwiter_get_next_block(struct sg_mapping_iter *miter, uint32_t **ptr)
+{
+	size_t len;
+
+	if (sg_dwiter_is_at_end(miter))
+		return true;
+
+	len = miter->length - miter->consumed;
+
+	if (likely(len >= 4 && !needs_unaligned_copy(
+			miter->addr + miter->consumed))) {
+		*ptr = miter->addr + miter->consumed;
+		miter->consumed += 4;
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * cb710_sg_dwiter_read_next_block() - get next 32-bit word from sg buffer
+ * @miter: sg mapping iterator used for reading
+ *
+ * Description:
+ *   Returns 32-bit word starting at byte pointed to by @miter@
+ *   handling any alignment issues.  Bytes past the buffer's end
+ *   are not accessed (read) but are returned as zeroes.  @miter@
+ *   is advanced by 4 bytes or to the end of buffer whichever is
+ *   closer.
+ *
+ * Context:
+ *   Same requirements as in sg_miter_next().
+ *
+ * Returns:
+ *   32-bit word just read.
+ */
+uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter)
+{
+	uint32_t *ptr = NULL;
+
+	if (likely(sg_dwiter_get_next_block(miter, &ptr)))
+		return ptr ? *ptr : 0;
+
+	return sg_dwiter_read_buffer(miter);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_read_next_block);
+
+static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data)
+{
+	size_t len, left = 4;
+	void *addr = &data;
+
+	do {
+		len = min(miter->length - miter->consumed, left);
+		memcpy(miter->addr, addr, len);
+		miter->consumed += len;
+		left -= len;
+		if (!left)
+			return;
+		addr += len;
+	} while (sg_dwiter_next(miter));
+}
+
+/**
+ * cb710_sg_dwiter_write_next_block() - write next 32-bit word to sg buffer
+ * @miter: sg mapping iterator used for writing
+ * @data: data to write to sg buffer
+ *
+ * Description:
+ *   Writes 32-bit word starting at byte pointed to by @miter@
+ *   handling any alignment issues.  Bytes which would be written
+ *   past the buffer's end are silently discarded. @miter@ is
+ *   advanced by 4 bytes or to the end of buffer whichever is closer.
+ *
+ * Context:
+ *   Same requirements as in sg_miter_next().
+ */
+void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data)
+{
+	uint32_t *ptr = NULL;
+
+	if (likely(sg_dwiter_get_next_block(miter, &ptr))) {
+		if (ptr)
+			*ptr = data;
+		else
+			return;
+	} else
+		sg_dwiter_write_slow(miter, data);
+}
+EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block);
+
diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c
new file mode 100644
index 0000000000..18fc1aaa5c
--- /dev/null
+++ b/drivers/misc/cs5535-mfgpt.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for the CS5535/CS5536 Multi-Function General Purpose Timers (MFGPT)
+ *
+ * Copyright (C) 2006, Advanced Micro Devices, Inc.
+ * Copyright (C) 2007  Andres Salomon <dilinger@debian.org>
+ * Copyright (C) 2009  Andres Salomon <dilinger@collabora.co.uk>
+ *
+ * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book.
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/cs5535.h>
+#include <linux/slab.h>
+
+#define DRV_NAME "cs5535-mfgpt"
+
+static int mfgpt_reset_timers;
+module_param_named(mfgptfix, mfgpt_reset_timers, int, 0644);
+MODULE_PARM_DESC(mfgptfix, "Try to reset the MFGPT timers during init; "
+		"required by some broken BIOSes (ie, TinyBIOS < 0.99) or kexec "
+		"(1 = reset the MFGPT using an undocumented bit, "
+		"2 = perform a soft reset by unconfiguring all timers); "
+		"use what works best for you.");
+
+struct cs5535_mfgpt_timer {
+	struct cs5535_mfgpt_chip *chip;
+	int nr;
+};
+
+static struct cs5535_mfgpt_chip {
+	DECLARE_BITMAP(avail, MFGPT_MAX_TIMERS);
+	resource_size_t base;
+
+	struct platform_device *pdev;
+	spinlock_t lock;
+	int initialized;
+} cs5535_mfgpt_chip;
+
+int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp,
+		int event, int enable)
+{
+	uint32_t msr, mask, value, dummy;
+	int shift = (cmp == MFGPT_CMP1) ? 0 : 8;
+
+	if (!timer) {
+		WARN_ON(1);
+		return -EIO;
+	}
+
+	/*
+	 * The register maps for these are described in sections 6.17.1.x of
+	 * the AMD Geode CS5536 Companion Device Data Book.
+	 */
+	switch (event) {
+	case MFGPT_EVENT_RESET:
+		/*
+		 * XXX: According to the docs, we cannot reset timers above
+		 * 6; that is, resets for 7 and 8 will be ignored.  Is this
+		 * a problem?   -dilinger
+		 */
+		msr = MSR_MFGPT_NR;
+		mask = 1 << (timer->nr + 24);
+		break;
+
+	case MFGPT_EVENT_NMI:
+		msr = MSR_MFGPT_NR;
+		mask = 1 << (timer->nr + shift);
+		break;
+
+	case MFGPT_EVENT_IRQ:
+		msr = MSR_MFGPT_IRQ;
+		mask = 1 << (timer->nr + shift);
+		break;
+
+	default:
+		return -EIO;
+	}
+
+	rdmsr(msr, value, dummy);
+
+	if (enable)
+		value |= mask;
+	else
+		value &= ~mask;
+
+	wrmsr(msr, value, dummy);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_toggle_event);
+
+int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp, int *irq,
+		int enable)
+{
+	uint32_t zsel, lpc, dummy;
+	int shift;
+
+	if (!timer) {
+		WARN_ON(1);
+		return -EIO;
+	}
+
+	/*
+	 * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
+	 * is using the same CMP of the timer's Siamese twin, the IRQ is set to
+	 * 2, and we mustn't use nor change it.
+	 * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
+	 * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
+	 * with *irq==0 is safe. Currently there _are_ no 2 drivers.
+	 */
+	rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+	shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer->nr % 4) * 4;
+	if (((zsel >> shift) & 0xF) == 2)
+		return -EIO;
+
+	/* Choose IRQ: if none supplied, keep IRQ already set or use default */
+	if (!*irq)
+		*irq = (zsel >> shift) & 0xF;
+	if (!*irq)
+		*irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ;
+
+	/* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
+	if (*irq < 1 || *irq == 2 || *irq > 15)
+		return -EIO;
+	rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
+	if (lpc & (1 << *irq))
+		return -EIO;
+
+	/* All chosen and checked - go for it */
+	if (cs5535_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
+		return -EIO;
+	if (enable) {
+		zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
+		wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_set_irq);
+
+struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer_nr, int domain)
+{
+	struct cs5535_mfgpt_chip *mfgpt = &cs5535_mfgpt_chip;
+	struct cs5535_mfgpt_timer *timer = NULL;
+	unsigned long flags;
+	int max;
+
+	if (!mfgpt->initialized)
+		goto done;
+
+	/* only allocate timers from the working domain if requested */
+	if (domain == MFGPT_DOMAIN_WORKING)
+		max = 6;
+	else
+		max = MFGPT_MAX_TIMERS;
+
+	if (timer_nr >= max) {
+		/* programmer error.  silly programmers! */
+		WARN_ON(1);
+		goto done;
+	}
+
+	spin_lock_irqsave(&mfgpt->lock, flags);
+	if (timer_nr < 0) {
+		unsigned long t;
+
+		/* try to find any available timer */
+		t = find_first_bit(mfgpt->avail, max);
+		/* set timer_nr to -1 if no timers available */
+		timer_nr = t < max ? (int) t : -1;
+	} else {
+		/* check if the requested timer's available */
+		if (!test_bit(timer_nr, mfgpt->avail))
+			timer_nr = -1;
+	}
+
+	if (timer_nr >= 0)
+		/* if timer_nr is not -1, it's an available timer */
+		__clear_bit(timer_nr, mfgpt->avail);
+	spin_unlock_irqrestore(&mfgpt->lock, flags);
+
+	if (timer_nr < 0)
+		goto done;
+
+	timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+	if (!timer) {
+		/* aw hell */
+		spin_lock_irqsave(&mfgpt->lock, flags);
+		__set_bit(timer_nr, mfgpt->avail);
+		spin_unlock_irqrestore(&mfgpt->lock, flags);
+		goto done;
+	}
+	timer->chip = mfgpt;
+	timer->nr = timer_nr;
+	dev_info(&mfgpt->pdev->dev, "registered timer %d\n", timer_nr);
+
+done:
+	return timer;
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_alloc_timer);
+
+/*
+ * XXX: This frees the timer memory, but never resets the actual hardware
+ * timer.  The old geode_mfgpt code did this; it would be good to figure
+ * out a way to actually release the hardware timer.  See comments below.
+ */
+void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer)
+{
+	unsigned long flags;
+	uint16_t val;
+
+	/* timer can be made available again only if never set up */
+	val = cs5535_mfgpt_read(timer, MFGPT_REG_SETUP);
+	if (!(val & MFGPT_SETUP_SETUP)) {
+		spin_lock_irqsave(&timer->chip->lock, flags);
+		__set_bit(timer->nr, timer->chip->avail);
+		spin_unlock_irqrestore(&timer->chip->lock, flags);
+	}
+
+	kfree(timer);
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_free_timer);
+
+uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer, uint16_t reg)
+{
+	return inw(timer->chip->base + reg + (timer->nr * 8));
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_read);
+
+void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg,
+		uint16_t value)
+{
+	outw(value, timer->chip->base + reg + (timer->nr * 8));
+}
+EXPORT_SYMBOL_GPL(cs5535_mfgpt_write);
+
+/*
+ * This is a sledgehammer that resets all MFGPT timers. This is required by
+ * some broken BIOSes which leave the system in an unstable state
+ * (TinyBIOS 0.98, for example; fixed in 0.99).  It's uncertain as to
+ * whether or not this secret MSR can be used to release individual timers.
+ * Jordan tells me that he and Mitch once played w/ it, but it's unclear
+ * what the results of that were (and they experienced some instability).
+ */
+static void reset_all_timers(void)
+{
+	uint32_t val, dummy;
+
+	/* The following undocumented bit resets the MFGPT timers */
+	val = 0xFF; dummy = 0;
+	wrmsr(MSR_MFGPT_SETUP, val, dummy);
+}
+
+/*
+ * This is another sledgehammer to reset all MFGPT timers.
+ * Instead of using the undocumented bit method it clears
+ * IRQ, NMI and RESET settings.
+ */
+static void soft_reset(void)
+{
+	int i;
+	struct cs5535_mfgpt_timer t;
+
+	for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
+		t.nr = i;
+
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_RESET, 0);
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_RESET, 0);
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_NMI, 0);
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_NMI, 0);
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_IRQ, 0);
+		cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_IRQ, 0);
+	}
+}
+
+/*
+ * Check whether any MFGPTs are available for the kernel to use.  In most
+ * cases, firmware that uses AMD's VSA code will claim all timers during
+ * bootup; we certainly don't want to take them if they're already in use.
+ * In other cases (such as with VSAless OpenFirmware), the system firmware
+ * leaves timers available for us to use.
+ */
+static int scan_timers(struct cs5535_mfgpt_chip *mfgpt)
+{
+	struct cs5535_mfgpt_timer timer = { .chip = mfgpt };
+	unsigned long flags;
+	int timers = 0;
+	uint16_t val;
+	int i;
+
+	/* bios workaround */
+	if (mfgpt_reset_timers == 1)
+		reset_all_timers();
+	else if (mfgpt_reset_timers == 2)
+		soft_reset();
+
+	/* just to be safe, protect this section w/ lock */
+	spin_lock_irqsave(&mfgpt->lock, flags);
+	for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
+		timer.nr = i;
+		val = cs5535_mfgpt_read(&timer, MFGPT_REG_SETUP);
+		if (!(val & MFGPT_SETUP_SETUP) || mfgpt_reset_timers == 2) {
+			__set_bit(i, mfgpt->avail);
+			timers++;
+		}
+	}
+	spin_unlock_irqrestore(&mfgpt->lock, flags);
+
+	return timers;
+}
+
+static int cs5535_mfgpt_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int err = -EIO, t;
+
+	if (mfgpt_reset_timers < 0 || mfgpt_reset_timers > 2) {
+		dev_err(&pdev->dev, "Bad mfgpt_reset_timers value: %i\n",
+			mfgpt_reset_timers);
+		goto done;
+	}
+
+	/* There are two ways to get the MFGPT base address; one is by
+	 * fetching it from MSR_LBAR_MFGPT, the other is by reading the
+	 * PCI BAR info.  The latter method is easier (especially across
+	 * different architectures), so we'll stick with that for now.  If
+	 * it turns out to be unreliable in the face of crappy BIOSes, we
+	 * can always go back to using MSRs.. */
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "can't fetch device resource info\n");
+		goto done;
+	}
+
+	if (!request_region(res->start, resource_size(res), pdev->name)) {
+		dev_err(&pdev->dev, "can't request region\n");
+		goto done;
+	}
+
+	/* set up the driver-specific struct */
+	cs5535_mfgpt_chip.base = res->start;
+	cs5535_mfgpt_chip.pdev = pdev;
+	spin_lock_init(&cs5535_mfgpt_chip.lock);
+
+	dev_info(&pdev->dev, "reserved resource region %pR\n", res);
+
+	/* detect the available timers */
+	t = scan_timers(&cs5535_mfgpt_chip);
+	dev_info(&pdev->dev, "%d MFGPT timers available\n", t);
+	cs5535_mfgpt_chip.initialized = 1;
+	return 0;
+
+done:
+	return err;
+}
+
+static struct platform_driver cs5535_mfgpt_driver = {
+	.driver = {
+		.name = DRV_NAME,
+	},
+	.probe = cs5535_mfgpt_probe,
+};
+
+
+static int __init cs5535_mfgpt_init(void)
+{
+	return platform_driver_register(&cs5535_mfgpt_driver);
+}
+
+module_init(cs5535_mfgpt_init);
+
+MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
+MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
new file mode 100644
index 0000000000..5efc4151bf
--- /dev/null
+++ b/drivers/misc/cxl/Kconfig
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# IBM Coherent Accelerator (CXL) compatible devices
+#
+
+config CXL_BASE
+	bool
+	select PPC_COPRO_BASE
+	select PPC_64S_HASH_MMU
+
+config CXL
+	tristate "Support for IBM Coherent Accelerators (CXL)"
+	depends on PPC_POWERNV && PCI_MSI && EEH
+	select CXL_BASE
+	default m
+	help
+	  Select this option to enable driver support for IBM Coherent
+	  Accelerators (CXL).  CXL is otherwise known as Coherent Accelerator
+	  Processor Interface (CAPI).  CAPI allows accelerators in FPGAs to be
+	  coherently attached to a CPU via an MMU.  This driver enables
+	  userspace programs to access these accelerators via /dev/cxl/afuM.N
+	  devices.
+
+	  CAPI adapters are found in POWER8 based systems.
+
+	  If unsure, say N.
diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
new file mode 100644
index 0000000000..5eea61b958
--- /dev/null
+++ b/drivers/misc/cxl/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y			:= $(call cc-disable-warning, unused-const-variable)
+ccflags-$(CONFIG_PPC_WERROR)	+= -Werror
+
+cxl-y				+= main.o file.o irq.o fault.o native.o
+cxl-y				+= context.o sysfs.o pci.o trace.o
+cxl-y				+= vphb.o api.o cxllib.o
+cxl-$(CONFIG_PPC_PSERIES)	+= flash.o guest.o of.o hcalls.o
+cxl-$(CONFIG_DEBUG_FS)		+= debugfs.o
+obj-$(CONFIG_CXL)		+= cxl.o
+obj-$(CONFIG_CXL_BASE)		+= base.o
+
+# For tracepoints to include our trace.h from tracepoint infrastructure:
+CFLAGS_trace.o := -I$(src)
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
new file mode 100644
index 0000000000..d85c565308
--- /dev/null
+++ b/drivers/misc/cxl/api.c
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <misc/cxl.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <linux/irqdomain.h>
+
+#include "cxl.h"
+
+/*
+ * Since we want to track memory mappings to be able to force-unmap
+ * when the AFU is no longer reachable, we need an inode. For devices
+ * opened through the cxl user API, this is not a problem, but a
+ * userland process can also get a cxl fd through the cxl_get_fd()
+ * API, which is used by the cxlflash driver.
+ *
+ * Therefore we implement our own simple pseudo-filesystem and inode
+ * allocator. We don't use the anonymous inode, as we need the
+ * meta-data associated with it (address_space) and it is shared by
+ * other drivers/processes, so it could lead to cxl unmapping VMAs
+ * from random processes.
+ */
+
+#define CXL_PSEUDO_FS_MAGIC	0x1697697f
+
+static int cxl_fs_cnt;
+static struct vfsmount *cxl_vfs_mount;
+
+static int cxl_fs_init_fs_context(struct fs_context *fc)
+{
+	return init_pseudo(fc, CXL_PSEUDO_FS_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type cxl_fs_type = {
+	.name		= "cxl",
+	.owner		= THIS_MODULE,
+	.init_fs_context = cxl_fs_init_fs_context,
+	.kill_sb	= kill_anon_super,
+};
+
+
+void cxl_release_mapping(struct cxl_context *ctx)
+{
+	if (ctx->kernelapi && ctx->mapping)
+		simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
+}
+
+static struct file *cxl_getfile(const char *name,
+				const struct file_operations *fops,
+				void *priv, int flags)
+{
+	struct file *file;
+	struct inode *inode;
+	int rc;
+
+	/* strongly inspired by anon_inode_getfile() */
+
+	if (fops->owner && !try_module_get(fops->owner))
+		return ERR_PTR(-ENOENT);
+
+	rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt);
+	if (rc < 0) {
+		pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc);
+		file = ERR_PTR(rc);
+		goto err_module;
+	}
+
+	inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
+	if (IS_ERR(inode)) {
+		file = ERR_CAST(inode);
+		goto err_fs;
+	}
+
+	file = alloc_file_pseudo(inode, cxl_vfs_mount, name,
+				 flags & (O_ACCMODE | O_NONBLOCK), fops);
+	if (IS_ERR(file))
+		goto err_inode;
+
+	file->private_data = priv;
+
+	return file;
+
+err_inode:
+	iput(inode);
+err_fs:
+	simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
+err_module:
+	module_put(fops->owner);
+	return file;
+}
+
+struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
+{
+	struct cxl_afu *afu;
+	struct cxl_context  *ctx;
+	int rc;
+
+	afu = cxl_pci_to_afu(dev);
+	if (IS_ERR(afu))
+		return ERR_CAST(afu);
+
+	ctx = cxl_context_alloc();
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	ctx->kernelapi = true;
+
+	/* Make it a slave context.  We can promote it later? */
+	rc = cxl_context_init(ctx, afu, false);
+	if (rc)
+		goto err_ctx;
+
+	return ctx;
+
+err_ctx:
+	kfree(ctx);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(cxl_dev_context_init);
+
+struct cxl_context *cxl_get_context(struct pci_dev *dev)
+{
+	return dev->dev.archdata.cxl_ctx;
+}
+EXPORT_SYMBOL_GPL(cxl_get_context);
+
+int cxl_release_context(struct cxl_context *ctx)
+{
+	if (ctx->status >= STARTED)
+		return -EBUSY;
+
+	cxl_context_free(ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_release_context);
+
+static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
+{
+	__u16 range;
+	int r;
+
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		range = ctx->irqs.range[r];
+		if (num < range) {
+			return ctx->irqs.offset[r] + num;
+		}
+		num -= range;
+	}
+	return 0;
+}
+
+
+int cxl_set_priv(struct cxl_context *ctx, void *priv)
+{
+	if (!ctx)
+		return -EINVAL;
+
+	ctx->priv = priv;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_priv);
+
+void *cxl_get_priv(struct cxl_context *ctx)
+{
+	if (!ctx)
+		return ERR_PTR(-EINVAL);
+
+	return ctx->priv;
+}
+EXPORT_SYMBOL_GPL(cxl_get_priv);
+
+int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
+{
+	int res;
+	irq_hw_number_t hwirq;
+
+	if (num == 0)
+		num = ctx->afu->pp_irqs;
+	res = afu_allocate_irqs(ctx, num);
+	if (res)
+		return res;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE)) {
+		/* In a guest, the PSL interrupt is not multiplexed. It was
+		 * allocated above, and we need to set its handler
+		 */
+		hwirq = cxl_find_afu_irq(ctx, 0);
+		if (hwirq)
+			cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
+	}
+
+	if (ctx->status == STARTED) {
+		if (cxl_ops->update_ivtes)
+			cxl_ops->update_ivtes(ctx);
+		else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n");
+	}
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
+
+void cxl_free_afu_irqs(struct cxl_context *ctx)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE)) {
+		hwirq = cxl_find_afu_irq(ctx, 0);
+		if (hwirq) {
+			virq = irq_find_mapping(NULL, hwirq);
+			if (virq)
+				cxl_unmap_irq(virq, ctx);
+		}
+	}
+	afu_irq_name_free(ctx);
+	cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+}
+EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
+
+int cxl_map_afu_irq(struct cxl_context *ctx, int num,
+		    irq_handler_t handler, void *cookie, char *name)
+{
+	irq_hw_number_t hwirq;
+
+	/*
+	 * Find interrupt we are to register.
+	 */
+	hwirq = cxl_find_afu_irq(ctx, num);
+	if (!hwirq)
+		return -ENOENT;
+
+	return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
+}
+EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
+
+void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+
+	hwirq = cxl_find_afu_irq(ctx, num);
+	if (!hwirq)
+		return;
+
+	virq = irq_find_mapping(NULL, hwirq);
+	if (virq)
+		cxl_unmap_irq(virq, cookie);
+}
+EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
+
+/*
+ * Start a context
+ * Code here similar to afu_ioctl_start_work().
+ */
+int cxl_start_context(struct cxl_context *ctx, u64 wed,
+		      struct task_struct *task)
+{
+	int rc = 0;
+	bool kernel = true;
+
+	pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status == STARTED)
+		goto out; /* already started */
+
+	/*
+	 * Increment the mapped context count for adapter. This also checks
+	 * if adapter_context_lock is taken.
+	 */
+	rc = cxl_adapter_context_get(ctx->afu->adapter);
+	if (rc)
+		goto out;
+
+	if (task) {
+		ctx->pid = get_task_pid(task, PIDTYPE_PID);
+		kernel = false;
+
+		/* acquire a reference to the task's mm */
+		ctx->mm = get_task_mm(current);
+
+		/* ensure this mm_struct can't be freed */
+		cxl_context_mm_count_get(ctx);
+
+		if (ctx->mm) {
+			/* decrement the use count from above */
+			mmput(ctx->mm);
+			/* make TLBIs for this context global */
+			mm_context_add_copro(ctx->mm);
+		}
+	}
+
+	/*
+	 * Increment driver use count. Enables global TLBIs for hash
+	 * and callbacks to handle the segment table
+	 */
+	cxl_ctx_get();
+
+	/* See the comment in afu_ioctl_start_work() */
+	smp_mb();
+
+	if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
+		put_pid(ctx->pid);
+		ctx->pid = NULL;
+		cxl_adapter_context_put(ctx->afu->adapter);
+		cxl_ctx_put();
+		if (task) {
+			cxl_context_mm_count_put(ctx);
+			if (ctx->mm)
+				mm_context_remove_copro(ctx->mm);
+		}
+		goto out;
+	}
+
+	ctx->status = STARTED;
+out:
+	mutex_unlock(&ctx->status_mutex);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(cxl_start_context);
+
+int cxl_process_element(struct cxl_context *ctx)
+{
+	return ctx->external_pe;
+}
+EXPORT_SYMBOL_GPL(cxl_process_element);
+
+/* Stop a context.  Returns 0 on success, otherwise -Errno */
+int cxl_stop_context(struct cxl_context *ctx)
+{
+	return __detach_context(ctx);
+}
+EXPORT_SYMBOL_GPL(cxl_stop_context);
+
+void cxl_set_master(struct cxl_context *ctx)
+{
+	ctx->master = true;
+}
+EXPORT_SYMBOL_GPL(cxl_set_master);
+
+/* wrappers around afu_* file ops which are EXPORTED */
+int cxl_fd_open(struct inode *inode, struct file *file)
+{
+	return afu_open(inode, file);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_open);
+int cxl_fd_release(struct inode *inode, struct file *file)
+{
+	return afu_release(inode, file);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_release);
+long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return afu_ioctl(file, cmd, arg);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
+int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
+{
+	return afu_mmap(file, vm);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_mmap);
+__poll_t cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
+{
+	return afu_poll(file, poll);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_poll);
+ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
+			loff_t *off)
+{
+	return afu_read(file, buf, count, off);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_read);
+
+#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
+
+/* Get a struct file and fd for a context and attach the ops */
+struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
+			int *fd)
+{
+	struct file *file;
+	int rc, flags, fdtmp;
+	char *name = NULL;
+
+	/* only allow one per context */
+	if (ctx->mapping)
+		return ERR_PTR(-EEXIST);
+
+	flags = O_RDWR | O_CLOEXEC;
+
+	/* This code is similar to anon_inode_getfd() */
+	rc = get_unused_fd_flags(flags);
+	if (rc < 0)
+		return ERR_PTR(rc);
+	fdtmp = rc;
+
+	/*
+	 * Patch the file ops.  Needs to be careful that this is rentrant safe.
+	 */
+	if (fops) {
+		PATCH_FOPS(open);
+		PATCH_FOPS(poll);
+		PATCH_FOPS(read);
+		PATCH_FOPS(release);
+		PATCH_FOPS(unlocked_ioctl);
+		PATCH_FOPS(compat_ioctl);
+		PATCH_FOPS(mmap);
+	} else /* use default ops */
+		fops = (struct file_operations *)&afu_fops;
+
+	name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe);
+	file = cxl_getfile(name, fops, ctx, flags);
+	kfree(name);
+	if (IS_ERR(file))
+		goto err_fd;
+
+	cxl_context_set_mapping(ctx, file->f_mapping);
+	*fd = fdtmp;
+	return file;
+
+err_fd:
+	put_unused_fd(fdtmp);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(cxl_get_fd);
+
+struct cxl_context *cxl_fops_get_context(struct file *file)
+{
+	return file->private_data;
+}
+EXPORT_SYMBOL_GPL(cxl_fops_get_context);
+
+void cxl_set_driver_ops(struct cxl_context *ctx,
+			struct cxl_afu_driver_ops *ops)
+{
+	WARN_ON(!ops->fetch_event || !ops->event_delivered);
+	atomic_set(&ctx->afu_driver_events, 0);
+	ctx->afu_driver_ops = ops;
+}
+EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
+
+void cxl_context_events_pending(struct cxl_context *ctx,
+				unsigned int new_events)
+{
+	atomic_add(new_events, &ctx->afu_driver_events);
+	wake_up_all(&ctx->wq);
+}
+EXPORT_SYMBOL_GPL(cxl_context_events_pending);
+
+int cxl_start_work(struct cxl_context *ctx,
+		   struct cxl_ioctl_start_work *work)
+{
+	int rc;
+
+	/* code taken from afu_ioctl_start_work */
+	if (!(work->flags & CXL_START_WORK_NUM_IRQS))
+		work->num_interrupts = ctx->afu->pp_irqs;
+	else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
+		 (work->num_interrupts > ctx->afu->irqs_max)) {
+		return -EINVAL;
+	}
+
+	rc = afu_register_irqs(ctx, work->num_interrupts);
+	if (rc)
+		return rc;
+
+	rc = cxl_start_context(ctx, work->work_element_descriptor, current);
+	if (rc < 0) {
+		afu_release_irqs(ctx, ctx);
+		return rc;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_start_work);
+
+void __iomem *cxl_psa_map(struct cxl_context *ctx)
+{
+	if (ctx->status != STARTED)
+		return NULL;
+
+	pr_devel("%s: psn_phys%llx size:%llx\n",
+		__func__, ctx->psn_phys, ctx->psn_size);
+	return ioremap(ctx->psn_phys, ctx->psn_size);
+}
+EXPORT_SYMBOL_GPL(cxl_psa_map);
+
+void cxl_psa_unmap(void __iomem *addr)
+{
+	iounmap(addr);
+}
+EXPORT_SYMBOL_GPL(cxl_psa_unmap);
+
+int cxl_afu_reset(struct cxl_context *ctx)
+{
+	struct cxl_afu *afu = ctx->afu;
+	int rc;
+
+	rc = cxl_ops->afu_reset(afu);
+	if (rc)
+		return rc;
+
+	return cxl_ops->afu_check_and_enable(afu);
+}
+EXPORT_SYMBOL_GPL(cxl_afu_reset);
+
+void cxl_perst_reloads_same_image(struct cxl_afu *afu,
+				  bool perst_reloads_same_image)
+{
+	afu->adapter->perst_same_image = perst_reloads_same_image;
+}
+EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
+
+ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
+{
+	struct cxl_afu *afu = cxl_pci_to_afu(dev);
+	if (IS_ERR(afu))
+		return -ENODEV;
+
+	return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
+}
+EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c
new file mode 100644
index 0000000000..b054562c04
--- /dev/null
+++ b/drivers/misc/cxl/base.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/module.h>
+#include <linux/rcupdate.h>
+#include <asm/errno.h>
+#include <misc/cxl-base.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include "cxl.h"
+
+/* protected by rcu */
+static struct cxl_calls *cxl_calls;
+
+atomic_t cxl_use_count = ATOMIC_INIT(0);
+EXPORT_SYMBOL(cxl_use_count);
+
+#ifdef CONFIG_CXL_MODULE
+
+static inline struct cxl_calls *cxl_calls_get(void)
+{
+	struct cxl_calls *calls = NULL;
+
+	rcu_read_lock();
+	calls = rcu_dereference(cxl_calls);
+	if (calls && !try_module_get(calls->owner))
+		calls = NULL;
+	rcu_read_unlock();
+
+	return calls;
+}
+
+static inline void cxl_calls_put(struct cxl_calls *calls)
+{
+	BUG_ON(calls != cxl_calls);
+
+	/* we don't need to rcu this, as we hold a reference to the module */
+	module_put(cxl_calls->owner);
+}
+
+#else /* !defined CONFIG_CXL_MODULE */
+
+static inline struct cxl_calls *cxl_calls_get(void)
+{
+	return cxl_calls;
+}
+
+static inline void cxl_calls_put(struct cxl_calls *calls) { }
+
+#endif /* CONFIG_CXL_MODULE */
+
+/* AFU refcount management */
+struct cxl_afu *cxl_afu_get(struct cxl_afu *afu)
+{
+	return (get_device(&afu->dev) == NULL) ? NULL : afu;
+}
+EXPORT_SYMBOL_GPL(cxl_afu_get);
+
+void cxl_afu_put(struct cxl_afu *afu)
+{
+	put_device(&afu->dev);
+}
+EXPORT_SYMBOL_GPL(cxl_afu_put);
+
+void cxl_slbia(struct mm_struct *mm)
+{
+	struct cxl_calls *calls;
+
+	calls = cxl_calls_get();
+	if (!calls)
+		return;
+
+	if (cxl_ctx_in_use())
+	    calls->cxl_slbia(mm);
+
+	cxl_calls_put(calls);
+}
+
+int register_cxl_calls(struct cxl_calls *calls)
+{
+	if (cxl_calls)
+		return -EBUSY;
+
+	rcu_assign_pointer(cxl_calls, calls);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_cxl_calls);
+
+void unregister_cxl_calls(struct cxl_calls *calls)
+{
+	BUG_ON(cxl_calls->owner != calls->owner);
+	RCU_INIT_POINTER(cxl_calls, NULL);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(unregister_cxl_calls);
+
+int cxl_update_properties(struct device_node *dn,
+			  struct property *new_prop)
+{
+	return of_update_property(dn, new_prop);
+}
+EXPORT_SYMBOL_GPL(cxl_update_properties);
+
+static int __init cxl_base_init(void)
+{
+	struct device_node *np;
+	struct platform_device *dev;
+	int count = 0;
+
+	/*
+	 * Scan for compatible devices in guest only
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		return 0;
+
+	for_each_compatible_node(np, NULL, "ibm,coherent-platform-facility") {
+		dev = of_platform_device_create(np, NULL, NULL);
+		if (dev)
+			count++;
+	}
+	pr_devel("Found %d cxl device(s)\n", count);
+	return 0;
+}
+device_initcall(cxl_base_init);
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
new file mode 100644
index 0000000000..76b5ea66df
--- /dev/null
+++ b/drivers/misc/cxl/context.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/bitmap.h>
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/cputable.h>
+#include <asm/current.h>
+#include <asm/copro.h>
+
+#include "cxl.h"
+
+/*
+ * Allocates space for a CXL context.
+ */
+struct cxl_context *cxl_context_alloc(void)
+{
+	return kzalloc(sizeof(struct cxl_context), GFP_KERNEL);
+}
+
+/*
+ * Initialises a CXL context.
+ */
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
+{
+	int i;
+
+	ctx->afu = afu;
+	ctx->master = master;
+	ctx->pid = NULL; /* Set in start work ioctl */
+	mutex_init(&ctx->mapping_lock);
+	ctx->mapping = NULL;
+	ctx->tidr = 0;
+	ctx->assign_tidr = false;
+
+	if (cxl_is_power8()) {
+		spin_lock_init(&ctx->sste_lock);
+
+		/*
+		 * Allocate the segment table before we put it in the IDR so that we
+		 * can always access it when dereferenced from IDR. For the same
+		 * reason, the segment table is only destroyed after the context is
+		 * removed from the IDR.  Access to this in the IOCTL is protected by
+		 * Linux filesystem semantics (can't IOCTL until open is complete).
+		 */
+		i = cxl_alloc_sst(ctx);
+		if (i)
+			return i;
+	}
+
+	INIT_WORK(&ctx->fault_work, cxl_handle_fault);
+
+	init_waitqueue_head(&ctx->wq);
+	spin_lock_init(&ctx->lock);
+
+	ctx->irq_bitmap = NULL;
+	ctx->pending_irq = false;
+	ctx->pending_fault = false;
+	ctx->pending_afu_err = false;
+
+	INIT_LIST_HEAD(&ctx->irq_names);
+
+	/*
+	 * When we have to destroy all contexts in cxl_context_detach_all() we
+	 * end up with afu_release_irqs() called from inside a
+	 * idr_for_each_entry(). Hence we need to make sure that anything
+	 * dereferenced from this IDR is ok before we allocate the IDR here.
+	 * This clears out the IRQ ranges to ensure this.
+	 */
+	for (i = 0; i < CXL_IRQ_RANGES; i++)
+		ctx->irqs.range[i] = 0;
+
+	mutex_init(&ctx->status_mutex);
+
+	ctx->status = OPENED;
+
+	/*
+	 * Allocating IDR! We better make sure everything's setup that
+	 * dereferences from it.
+	 */
+	mutex_lock(&afu->contexts_lock);
+	idr_preload(GFP_KERNEL);
+	i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0,
+		      ctx->afu->num_procs, GFP_NOWAIT);
+	idr_preload_end();
+	mutex_unlock(&afu->contexts_lock);
+	if (i < 0)
+		return i;
+
+	ctx->pe = i;
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		ctx->elem = &ctx->afu->native->spa[i];
+		ctx->external_pe = ctx->pe;
+	} else {
+		ctx->external_pe = -1; /* assigned when attaching */
+	}
+	ctx->pe_inserted = false;
+
+	/*
+	 * take a ref on the afu so that it stays alive at-least till
+	 * this context is reclaimed inside reclaim_ctx.
+	 */
+	cxl_afu_get(afu);
+	return 0;
+}
+
+void cxl_context_set_mapping(struct cxl_context *ctx,
+			struct address_space *mapping)
+{
+	mutex_lock(&ctx->mapping_lock);
+	ctx->mapping = mapping;
+	mutex_unlock(&ctx->mapping_lock);
+}
+
+static vm_fault_t cxl_mmap_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct cxl_context *ctx = vma->vm_file->private_data;
+	u64 area, offset;
+	vm_fault_t ret;
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+
+	pr_devel("%s: pe: %i address: 0x%lx offset: 0x%llx\n",
+			__func__, ctx->pe, vmf->address, offset);
+
+	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
+		area = ctx->afu->psn_phys;
+		if (offset >= ctx->afu->adapter->ps_size)
+			return VM_FAULT_SIGBUS;
+	} else {
+		area = ctx->psn_phys;
+		if (offset >= ctx->psn_size)
+			return VM_FAULT_SIGBUS;
+	}
+
+	mutex_lock(&ctx->status_mutex);
+
+	if (ctx->status != STARTED) {
+		mutex_unlock(&ctx->status_mutex);
+		pr_devel("%s: Context not started, failing problem state access\n", __func__);
+		if (ctx->mmio_err_ff) {
+			if (!ctx->ff_page) {
+				ctx->ff_page = alloc_page(GFP_USER);
+				if (!ctx->ff_page)
+					return VM_FAULT_OOM;
+				memset(page_address(ctx->ff_page), 0xff, PAGE_SIZE);
+			}
+			get_page(ctx->ff_page);
+			vmf->page = ctx->ff_page;
+			vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+			return 0;
+		}
+		return VM_FAULT_SIGBUS;
+	}
+
+	ret = vmf_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT);
+
+	mutex_unlock(&ctx->status_mutex);
+
+	return ret;
+}
+
+static const struct vm_operations_struct cxl_mmap_vmops = {
+	.fault = cxl_mmap_fault,
+};
+
+/*
+ * Map a per-context mmio space into the given vma.
+ */
+int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
+{
+	u64 start = vma->vm_pgoff << PAGE_SHIFT;
+	u64 len = vma->vm_end - vma->vm_start;
+
+	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
+		if (start + len > ctx->afu->adapter->ps_size)
+			return -EINVAL;
+
+		if (cxl_is_power9()) {
+			/*
+			 * Make sure there is a valid problem state
+			 * area space for this AFU.
+			 */
+			if (ctx->master && !ctx->afu->psa) {
+				pr_devel("AFU doesn't support mmio space\n");
+				return -EINVAL;
+			}
+
+			/* Can't mmap until the AFU is enabled */
+			if (!ctx->afu->enabled)
+				return -EBUSY;
+		}
+	} else {
+		if (start + len > ctx->psn_size)
+			return -EINVAL;
+
+		/* Make sure there is a valid per process space for this AFU */
+		if ((ctx->master && !ctx->afu->psa) || (!ctx->afu->pp_psa)) {
+			pr_devel("AFU doesn't support mmio space\n");
+			return -EINVAL;
+		}
+
+		/* Can't mmap until the AFU is enabled */
+		if (!ctx->afu->enabled)
+			return -EBUSY;
+	}
+
+	pr_devel("%s: mmio physical: %llx pe: %i master:%i\n", __func__,
+		 ctx->psn_phys, ctx->pe , ctx->master);
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_ops = &cxl_mmap_vmops;
+	return 0;
+}
+
+/*
+ * Detach a context from the hardware. This disables interrupts and doesn't
+ * return until all outstanding interrupts for this context have completed. The
+ * hardware should no longer access *ctx after this has returned.
+ */
+int __detach_context(struct cxl_context *ctx)
+{
+	enum cxl_context_status status;
+
+	mutex_lock(&ctx->status_mutex);
+	status = ctx->status;
+	ctx->status = CLOSED;
+	mutex_unlock(&ctx->status_mutex);
+	if (status != STARTED)
+		return -EBUSY;
+
+	/* Only warn if we detached while the link was OK.
+	 * If detach fails when hw is down, we don't care.
+	 */
+	WARN_ON(cxl_ops->detach_process(ctx) &&
+		cxl_ops->link_ok(ctx->afu->adapter, ctx->afu));
+	flush_work(&ctx->fault_work); /* Only needed for dedicated process */
+
+	/*
+	 * Wait until no further interrupts are presented by the PSL
+	 * for this context.
+	 */
+	if (cxl_ops->irq_wait)
+		cxl_ops->irq_wait(ctx);
+
+	/* release the reference to the group leader and mm handling pid */
+	put_pid(ctx->pid);
+
+	cxl_ctx_put();
+
+	/* Decrease the attached context count on the adapter */
+	cxl_adapter_context_put(ctx->afu->adapter);
+
+	/* Decrease the mm count on the context */
+	cxl_context_mm_count_put(ctx);
+	if (ctx->mm)
+		mm_context_remove_copro(ctx->mm);
+	ctx->mm = NULL;
+
+	return 0;
+}
+
+/*
+ * Detach the given context from the AFU. This doesn't actually
+ * free the context but it should stop the context running in hardware
+ * (ie. prevent this context from generating any further interrupts
+ * so that it can be freed).
+ */
+void cxl_context_detach(struct cxl_context *ctx)
+{
+	int rc;
+
+	rc = __detach_context(ctx);
+	if (rc)
+		return;
+
+	afu_release_irqs(ctx, ctx);
+	wake_up_all(&ctx->wq);
+}
+
+/*
+ * Detach all contexts on the given AFU.
+ */
+void cxl_context_detach_all(struct cxl_afu *afu)
+{
+	struct cxl_context *ctx;
+	int tmp;
+
+	mutex_lock(&afu->contexts_lock);
+	idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
+		/*
+		 * Anything done in here needs to be setup before the IDR is
+		 * created and torn down after the IDR removed
+		 */
+		cxl_context_detach(ctx);
+
+		/*
+		 * We are force detaching - remove any active PSA mappings so
+		 * userspace cannot interfere with the card if it comes back.
+		 * Easiest way to exercise this is to unbind and rebind the
+		 * driver via sysfs while it is in use.
+		 */
+		mutex_lock(&ctx->mapping_lock);
+		if (ctx->mapping)
+			unmap_mapping_range(ctx->mapping, 0, 0, 1);
+		mutex_unlock(&ctx->mapping_lock);
+	}
+	mutex_unlock(&afu->contexts_lock);
+}
+
+static void reclaim_ctx(struct rcu_head *rcu)
+{
+	struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
+
+	if (cxl_is_power8())
+		free_page((u64)ctx->sstp);
+	if (ctx->ff_page)
+		__free_page(ctx->ff_page);
+	ctx->sstp = NULL;
+
+	bitmap_free(ctx->irq_bitmap);
+
+	/* Drop ref to the afu device taken during cxl_context_init */
+	cxl_afu_put(ctx->afu);
+
+	kfree(ctx);
+}
+
+void cxl_context_free(struct cxl_context *ctx)
+{
+	if (ctx->kernelapi && ctx->mapping)
+		cxl_release_mapping(ctx);
+	mutex_lock(&ctx->afu->contexts_lock);
+	idr_remove(&ctx->afu->contexts_idr, ctx->pe);
+	mutex_unlock(&ctx->afu->contexts_lock);
+	call_rcu(&ctx->rcu, reclaim_ctx);
+}
+
+void cxl_context_mm_count_get(struct cxl_context *ctx)
+{
+	if (ctx->mm)
+		mmgrab(ctx->mm);
+}
+
+void cxl_context_mm_count_put(struct cxl_context *ctx)
+{
+	if (ctx->mm)
+		mmdrop(ctx->mm);
+}
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
new file mode 100644
index 0000000000..0562071cdd
--- /dev/null
+++ b/drivers/misc/cxl/cxl.h
@@ -0,0 +1,1134 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#ifndef _CXL_H_
+#define _CXL_H_
+
+#include <linux/interrupt.h>
+#include <linux/semaphore.h>
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/pid.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/fs.h>
+#include <asm/cputable.h>
+#include <asm/mmu.h>
+#include <asm/reg.h>
+#include <misc/cxl-base.h>
+
+#include <misc/cxl.h>
+#include <uapi/misc/cxl.h>
+
+extern uint cxl_verbose;
+
+struct property;
+
+#define CXL_TIMEOUT 5
+
+/*
+ * Bump version each time a user API change is made, whether it is
+ * backwards compatible ot not.
+ */
+#define CXL_API_VERSION 3
+#define CXL_API_VERSION_COMPATIBLE 1
+
+/*
+ * Opaque types to avoid accidentally passing registers for the wrong MMIO
+ *
+ * At the end of the day, I'm not married to using typedef here, but it might
+ * (and has!) help avoid bugs like mixing up CXL_PSL_CtxTime and
+ * CXL_PSL_CtxTime_An, or calling cxl_p1n_write instead of cxl_p1_write.
+ *
+ * I'm quite happy if these are changed back to #defines before upstreaming, it
+ * should be little more than a regexp search+replace operation in this file.
+ */
+typedef struct {
+	const int x;
+} cxl_p1_reg_t;
+typedef struct {
+	const int x;
+} cxl_p1n_reg_t;
+typedef struct {
+	const int x;
+} cxl_p2n_reg_t;
+#define cxl_reg_off(reg) \
+	(reg.x)
+
+/* Memory maps. Ref CXL Appendix A */
+
+/* PSL Privilege 1 Memory Map */
+/* Configuration and Control area - CAIA 1&2 */
+static const cxl_p1_reg_t CXL_PSL_CtxTime = {0x0000};
+static const cxl_p1_reg_t CXL_PSL_ErrIVTE = {0x0008};
+static const cxl_p1_reg_t CXL_PSL_KEY1    = {0x0010};
+static const cxl_p1_reg_t CXL_PSL_KEY2    = {0x0018};
+static const cxl_p1_reg_t CXL_PSL_Control = {0x0020};
+/* Downloading */
+static const cxl_p1_reg_t CXL_PSL_DLCNTL  = {0x0060};
+static const cxl_p1_reg_t CXL_PSL_DLADDR  = {0x0068};
+
+/* PSL Lookaside Buffer Management Area - CAIA 1 */
+static const cxl_p1_reg_t CXL_PSL_LBISEL  = {0x0080};
+static const cxl_p1_reg_t CXL_PSL_SLBIE   = {0x0088};
+static const cxl_p1_reg_t CXL_PSL_SLBIA   = {0x0090};
+static const cxl_p1_reg_t CXL_PSL_TLBIE   = {0x00A0};
+static const cxl_p1_reg_t CXL_PSL_TLBIA   = {0x00A8};
+static const cxl_p1_reg_t CXL_PSL_AFUSEL  = {0x00B0};
+
+/* 0x00C0:7EFF Implementation dependent area */
+/* PSL registers - CAIA 1 */
+static const cxl_p1_reg_t CXL_PSL_FIR1      = {0x0100};
+static const cxl_p1_reg_t CXL_PSL_FIR2      = {0x0108};
+static const cxl_p1_reg_t CXL_PSL_Timebase  = {0x0110};
+static const cxl_p1_reg_t CXL_PSL_VERSION   = {0x0118};
+static const cxl_p1_reg_t CXL_PSL_RESLCKTO  = {0x0128};
+static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140};
+static const cxl_p1_reg_t CXL_PSL_FIR_CNTL  = {0x0148};
+static const cxl_p1_reg_t CXL_PSL_DSNDCTL   = {0x0150};
+static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158};
+static const cxl_p1_reg_t CXL_PSL_TRACE     = {0x0170};
+/* PSL registers - CAIA 2 */
+static const cxl_p1_reg_t CXL_PSL9_CONTROL  = {0x0020};
+static const cxl_p1_reg_t CXL_XSL9_INV      = {0x0110};
+static const cxl_p1_reg_t CXL_XSL9_DBG      = {0x0130};
+static const cxl_p1_reg_t CXL_XSL9_DEF      = {0x0140};
+static const cxl_p1_reg_t CXL_XSL9_DSNCTL   = {0x0168};
+static const cxl_p1_reg_t CXL_PSL9_FIR1     = {0x0300};
+static const cxl_p1_reg_t CXL_PSL9_FIR_MASK = {0x0308};
+static const cxl_p1_reg_t CXL_PSL9_Timebase = {0x0310};
+static const cxl_p1_reg_t CXL_PSL9_DEBUG    = {0x0320};
+static const cxl_p1_reg_t CXL_PSL9_FIR_CNTL = {0x0348};
+static const cxl_p1_reg_t CXL_PSL9_DSNDCTL  = {0x0350};
+static const cxl_p1_reg_t CXL_PSL9_TB_CTLSTAT = {0x0340};
+static const cxl_p1_reg_t CXL_PSL9_TRACECFG = {0x0368};
+static const cxl_p1_reg_t CXL_PSL9_APCDEDALLOC = {0x0378};
+static const cxl_p1_reg_t CXL_PSL9_APCDEDTYPE = {0x0380};
+static const cxl_p1_reg_t CXL_PSL9_TNR_ADDR = {0x0388};
+static const cxl_p1_reg_t CXL_PSL9_CTCCFG = {0x0390};
+static const cxl_p1_reg_t CXL_PSL9_GP_CT = {0x0398};
+static const cxl_p1_reg_t CXL_XSL9_IERAT = {0x0588};
+static const cxl_p1_reg_t CXL_XSL9_ILPP  = {0x0590};
+
+/* 0x7F00:7FFF Reserved PCIe MSI-X Pending Bit Array area */
+/* 0x8000:FFFF Reserved PCIe MSI-X Table Area */
+
+/* PSL Slice Privilege 1 Memory Map */
+/* Configuration Area - CAIA 1&2 */
+static const cxl_p1n_reg_t CXL_PSL_SR_An          = {0x00};
+static const cxl_p1n_reg_t CXL_PSL_LPID_An        = {0x08};
+static const cxl_p1n_reg_t CXL_PSL_AMBAR_An       = {0x10};
+static const cxl_p1n_reg_t CXL_PSL_SPOffset_An    = {0x18};
+static const cxl_p1n_reg_t CXL_PSL_ID_An          = {0x20};
+static const cxl_p1n_reg_t CXL_PSL_SERR_An        = {0x28};
+/* Memory Management and Lookaside Buffer Management - CAIA 1*/
+static const cxl_p1n_reg_t CXL_PSL_SDR_An         = {0x30};
+/* Memory Management and Lookaside Buffer Management - CAIA 1&2 */
+static const cxl_p1n_reg_t CXL_PSL_AMOR_An        = {0x38};
+/* Pointer Area - CAIA 1&2 */
+static const cxl_p1n_reg_t CXL_HAURP_An           = {0x80};
+static const cxl_p1n_reg_t CXL_PSL_SPAP_An        = {0x88};
+static const cxl_p1n_reg_t CXL_PSL_LLCMD_An       = {0x90};
+/* Control Area - CAIA 1&2 */
+static const cxl_p1n_reg_t CXL_PSL_SCNTL_An       = {0xA0};
+static const cxl_p1n_reg_t CXL_PSL_CtxTime_An     = {0xA8};
+static const cxl_p1n_reg_t CXL_PSL_IVTE_Offset_An = {0xB0};
+static const cxl_p1n_reg_t CXL_PSL_IVTE_Limit_An  = {0xB8};
+/* 0xC0:FF Implementation Dependent Area - CAIA 1&2 */
+static const cxl_p1n_reg_t CXL_PSL_FIR_SLICE_An   = {0xC0};
+static const cxl_p1n_reg_t CXL_AFU_DEBUG_An       = {0xC8};
+/* 0xC0:FF Implementation Dependent Area - CAIA 1 */
+static const cxl_p1n_reg_t CXL_PSL_APCALLOC_A     = {0xD0};
+static const cxl_p1n_reg_t CXL_PSL_COALLOC_A      = {0xD8};
+static const cxl_p1n_reg_t CXL_PSL_RXCTL_A        = {0xE0};
+static const cxl_p1n_reg_t CXL_PSL_SLICE_TRACE    = {0xE8};
+
+/* PSL Slice Privilege 2 Memory Map */
+/* Configuration and Control Area - CAIA 1&2 */
+static const cxl_p2n_reg_t CXL_PSL_PID_TID_An = {0x000};
+static const cxl_p2n_reg_t CXL_CSRP_An        = {0x008};
+/* Configuration and Control Area - CAIA 1 */
+static const cxl_p2n_reg_t CXL_AURP0_An       = {0x010};
+static const cxl_p2n_reg_t CXL_AURP1_An       = {0x018};
+static const cxl_p2n_reg_t CXL_SSTP0_An       = {0x020};
+static const cxl_p2n_reg_t CXL_SSTP1_An       = {0x028};
+/* Configuration and Control Area - CAIA 1 */
+static const cxl_p2n_reg_t CXL_PSL_AMR_An     = {0x030};
+/* Segment Lookaside Buffer Management - CAIA 1 */
+static const cxl_p2n_reg_t CXL_SLBIE_An       = {0x040};
+static const cxl_p2n_reg_t CXL_SLBIA_An       = {0x048};
+static const cxl_p2n_reg_t CXL_SLBI_Select_An = {0x050};
+/* Interrupt Registers - CAIA 1&2 */
+static const cxl_p2n_reg_t CXL_PSL_DSISR_An   = {0x060};
+static const cxl_p2n_reg_t CXL_PSL_DAR_An     = {0x068};
+static const cxl_p2n_reg_t CXL_PSL_DSR_An     = {0x070};
+static const cxl_p2n_reg_t CXL_PSL_TFC_An     = {0x078};
+static const cxl_p2n_reg_t CXL_PSL_PEHandle_An = {0x080};
+static const cxl_p2n_reg_t CXL_PSL_ErrStat_An = {0x088};
+/* AFU Registers - CAIA 1&2 */
+static const cxl_p2n_reg_t CXL_AFU_Cntl_An    = {0x090};
+static const cxl_p2n_reg_t CXL_AFU_ERR_An     = {0x098};
+/* Work Element Descriptor - CAIA 1&2 */
+static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
+/* 0x0C0:FFF Implementation Dependent Area */
+
+#define CXL_PSL_SPAP_Addr 0x0ffffffffffff000ULL
+#define CXL_PSL_SPAP_Size 0x0000000000000ff0ULL
+#define CXL_PSL_SPAP_Size_Shift 4
+#define CXL_PSL_SPAP_V    0x0000000000000001ULL
+
+/****** CXL_PSL_Control ****************************************************/
+#define CXL_PSL_Control_tb              (0x1ull << (63-63))
+#define CXL_PSL_Control_Fr              (0x1ull << (63-31))
+#define CXL_PSL_Control_Fs_MASK         (0x3ull << (63-29))
+#define CXL_PSL_Control_Fs_Complete     (0x3ull << (63-29))
+
+/****** CXL_PSL_DLCNTL *****************************************************/
+#define CXL_PSL_DLCNTL_D (0x1ull << (63-28))
+#define CXL_PSL_DLCNTL_C (0x1ull << (63-29))
+#define CXL_PSL_DLCNTL_E (0x1ull << (63-30))
+#define CXL_PSL_DLCNTL_S (0x1ull << (63-31))
+#define CXL_PSL_DLCNTL_CE (CXL_PSL_DLCNTL_C | CXL_PSL_DLCNTL_E)
+#define CXL_PSL_DLCNTL_DCES (CXL_PSL_DLCNTL_D | CXL_PSL_DLCNTL_CE | CXL_PSL_DLCNTL_S)
+
+/****** CXL_PSL_SR_An ******************************************************/
+#define CXL_PSL_SR_An_SF  MSR_SF            /* 64bit */
+#define CXL_PSL_SR_An_TA  (1ull << (63-1))  /* Tags active,   GA1: 0 */
+#define CXL_PSL_SR_An_HV  MSR_HV            /* Hypervisor,    GA1: 0 */
+#define CXL_PSL_SR_An_XLAT_hpt (0ull << (63-6))/* Hashed page table (HPT) mode */
+#define CXL_PSL_SR_An_XLAT_roh (2ull << (63-6))/* Radix on HPT mode */
+#define CXL_PSL_SR_An_XLAT_ror (3ull << (63-6))/* Radix on Radix mode */
+#define CXL_PSL_SR_An_BOT (1ull << (63-10)) /* Use the in-memory segment table */
+#define CXL_PSL_SR_An_PR  MSR_PR            /* Problem state, GA1: 1 */
+#define CXL_PSL_SR_An_ISL (1ull << (63-53)) /* Ignore Segment Large Page */
+#define CXL_PSL_SR_An_TC  (1ull << (63-54)) /* Page Table secondary hash */
+#define CXL_PSL_SR_An_US  (1ull << (63-56)) /* User state,    GA1: X */
+#define CXL_PSL_SR_An_SC  (1ull << (63-58)) /* Segment Table secondary hash */
+#define CXL_PSL_SR_An_R   MSR_DR            /* Relocate,      GA1: 1 */
+#define CXL_PSL_SR_An_MP  (1ull << (63-62)) /* Master Process */
+#define CXL_PSL_SR_An_LE  (1ull << (63-63)) /* Little Endian */
+
+/****** CXL_PSL_ID_An ****************************************************/
+#define CXL_PSL_ID_An_F	(1ull << (63-31))
+#define CXL_PSL_ID_An_L	(1ull << (63-30))
+
+/****** CXL_PSL_SERR_An ****************************************************/
+#define CXL_PSL_SERR_An_afuto	(1ull << (63-0))
+#define CXL_PSL_SERR_An_afudis	(1ull << (63-1))
+#define CXL_PSL_SERR_An_afuov	(1ull << (63-2))
+#define CXL_PSL_SERR_An_badsrc	(1ull << (63-3))
+#define CXL_PSL_SERR_An_badctx	(1ull << (63-4))
+#define CXL_PSL_SERR_An_llcmdis	(1ull << (63-5))
+#define CXL_PSL_SERR_An_llcmdto	(1ull << (63-6))
+#define CXL_PSL_SERR_An_afupar	(1ull << (63-7))
+#define CXL_PSL_SERR_An_afudup	(1ull << (63-8))
+#define CXL_PSL_SERR_An_IRQS	( \
+	CXL_PSL_SERR_An_afuto | CXL_PSL_SERR_An_afudis | CXL_PSL_SERR_An_afuov | \
+	CXL_PSL_SERR_An_badsrc | CXL_PSL_SERR_An_badctx | CXL_PSL_SERR_An_llcmdis | \
+	CXL_PSL_SERR_An_llcmdto | CXL_PSL_SERR_An_afupar | CXL_PSL_SERR_An_afudup)
+#define CXL_PSL_SERR_An_afuto_mask	(1ull << (63-32))
+#define CXL_PSL_SERR_An_afudis_mask	(1ull << (63-33))
+#define CXL_PSL_SERR_An_afuov_mask	(1ull << (63-34))
+#define CXL_PSL_SERR_An_badsrc_mask	(1ull << (63-35))
+#define CXL_PSL_SERR_An_badctx_mask	(1ull << (63-36))
+#define CXL_PSL_SERR_An_llcmdis_mask	(1ull << (63-37))
+#define CXL_PSL_SERR_An_llcmdto_mask	(1ull << (63-38))
+#define CXL_PSL_SERR_An_afupar_mask	(1ull << (63-39))
+#define CXL_PSL_SERR_An_afudup_mask	(1ull << (63-40))
+#define CXL_PSL_SERR_An_IRQ_MASKS	( \
+	CXL_PSL_SERR_An_afuto_mask | CXL_PSL_SERR_An_afudis_mask | CXL_PSL_SERR_An_afuov_mask | \
+	CXL_PSL_SERR_An_badsrc_mask | CXL_PSL_SERR_An_badctx_mask | CXL_PSL_SERR_An_llcmdis_mask | \
+	CXL_PSL_SERR_An_llcmdto_mask | CXL_PSL_SERR_An_afupar_mask | CXL_PSL_SERR_An_afudup_mask)
+
+#define CXL_PSL_SERR_An_AE	(1ull << (63-30))
+
+/****** CXL_PSL_SCNTL_An ****************************************************/
+#define CXL_PSL_SCNTL_An_CR          (0x1ull << (63-15))
+/* Programming Modes: */
+#define CXL_PSL_SCNTL_An_PM_MASK     (0xffffull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_Shared   (0x0000ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_OS       (0x0001ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_Process  (0x0002ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_AFU      (0x0004ull << (63-31))
+#define CXL_PSL_SCNTL_An_PM_AFU_PBT  (0x0104ull << (63-31))
+/* Purge Status (ro) */
+#define CXL_PSL_SCNTL_An_Ps_MASK     (0x3ull << (63-39))
+#define CXL_PSL_SCNTL_An_Ps_Pending  (0x1ull << (63-39))
+#define CXL_PSL_SCNTL_An_Ps_Complete (0x3ull << (63-39))
+/* Purge */
+#define CXL_PSL_SCNTL_An_Pc          (0x1ull << (63-48))
+/* Suspend Status (ro) */
+#define CXL_PSL_SCNTL_An_Ss_MASK     (0x3ull << (63-55))
+#define CXL_PSL_SCNTL_An_Ss_Pending  (0x1ull << (63-55))
+#define CXL_PSL_SCNTL_An_Ss_Complete (0x3ull << (63-55))
+/* Suspend Control */
+#define CXL_PSL_SCNTL_An_Sc          (0x1ull << (63-63))
+
+/* AFU Slice Enable Status (ro) */
+#define CXL_AFU_Cntl_An_ES_MASK     (0x7ull << (63-2))
+#define CXL_AFU_Cntl_An_ES_Disabled (0x0ull << (63-2))
+#define CXL_AFU_Cntl_An_ES_Enabled  (0x4ull << (63-2))
+/* AFU Slice Enable */
+#define CXL_AFU_Cntl_An_E           (0x1ull << (63-3))
+/* AFU Slice Reset status (ro) */
+#define CXL_AFU_Cntl_An_RS_MASK     (0x3ull << (63-5))
+#define CXL_AFU_Cntl_An_RS_Pending  (0x1ull << (63-5))
+#define CXL_AFU_Cntl_An_RS_Complete (0x2ull << (63-5))
+/* AFU Slice Reset */
+#define CXL_AFU_Cntl_An_RA          (0x1ull << (63-7))
+
+/****** CXL_SSTP0/1_An ******************************************************/
+/* These top bits are for the segment that CONTAINS the segment table */
+#define CXL_SSTP0_An_B_SHIFT    SLB_VSID_SSIZE_SHIFT
+#define CXL_SSTP0_An_KS             (1ull << (63-2))
+#define CXL_SSTP0_An_KP             (1ull << (63-3))
+#define CXL_SSTP0_An_N              (1ull << (63-4))
+#define CXL_SSTP0_An_L              (1ull << (63-5))
+#define CXL_SSTP0_An_C              (1ull << (63-6))
+#define CXL_SSTP0_An_TA             (1ull << (63-7))
+#define CXL_SSTP0_An_LP_SHIFT                (63-9)  /* 2 Bits */
+/* And finally, the virtual address & size of the segment table: */
+#define CXL_SSTP0_An_SegTableSize_SHIFT      (63-31) /* 12 Bits */
+#define CXL_SSTP0_An_SegTableSize_MASK \
+	(((1ull << 12) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT)
+#define CXL_SSTP0_An_STVA_U_MASK   ((1ull << (63-49))-1)
+#define CXL_SSTP1_An_STVA_L_MASK (~((1ull << (63-55))-1))
+#define CXL_SSTP1_An_V              (1ull << (63-63))
+
+/****** CXL_PSL_SLBIE_[An] - CAIA 1 **************************************************/
+/* write: */
+#define CXL_SLBIE_C        PPC_BIT(36)         /* Class */
+#define CXL_SLBIE_SS       PPC_BITMASK(37, 38) /* Segment Size */
+#define CXL_SLBIE_SS_SHIFT PPC_BITLSHIFT(38)
+#define CXL_SLBIE_TA       PPC_BIT(38)         /* Tags Active */
+/* read: */
+#define CXL_SLBIE_MAX      PPC_BITMASK(24, 31)
+#define CXL_SLBIE_PENDING  PPC_BITMASK(56, 63)
+
+/****** Common to all CXL_TLBIA/SLBIA_[An] - CAIA 1 **********************************/
+#define CXL_TLB_SLB_P          (1ull) /* Pending (read) */
+
+/****** Common to all CXL_TLB/SLB_IA/IE_[An] registers - CAIA 1 **********************/
+#define CXL_TLB_SLB_IQ_ALL     (0ull) /* Inv qualifier */
+#define CXL_TLB_SLB_IQ_LPID    (1ull) /* Inv qualifier */
+#define CXL_TLB_SLB_IQ_LPIDPID (3ull) /* Inv qualifier */
+
+/****** CXL_PSL_AFUSEL ******************************************************/
+#define CXL_PSL_AFUSEL_A (1ull << (63-55)) /* Adapter wide invalidates affect all AFUs */
+
+/****** CXL_PSL_DSISR_An - CAIA 1 ****************************************************/
+#define CXL_PSL_DSISR_An_DS (1ull << (63-0))  /* Segment not found */
+#define CXL_PSL_DSISR_An_DM (1ull << (63-1))  /* PTE not found (See also: M) or protection fault */
+#define CXL_PSL_DSISR_An_ST (1ull << (63-2))  /* Segment Table PTE not found */
+#define CXL_PSL_DSISR_An_UR (1ull << (63-3))  /* AURP PTE not found */
+#define CXL_PSL_DSISR_TRANS (CXL_PSL_DSISR_An_DS | CXL_PSL_DSISR_An_DM | CXL_PSL_DSISR_An_ST | CXL_PSL_DSISR_An_UR)
+#define CXL_PSL_DSISR_An_PE (1ull << (63-4))  /* PSL Error (implementation specific) */
+#define CXL_PSL_DSISR_An_AE (1ull << (63-5))  /* AFU Error */
+#define CXL_PSL_DSISR_An_OC (1ull << (63-6))  /* OS Context Warning */
+#define CXL_PSL_DSISR_PENDING (CXL_PSL_DSISR_TRANS | CXL_PSL_DSISR_An_PE | CXL_PSL_DSISR_An_AE | CXL_PSL_DSISR_An_OC)
+/* NOTE: Bits 32:63 are undefined if DSISR[DS] = 1 */
+#define CXL_PSL_DSISR_An_M  DSISR_NOHPTE      /* PTE not found */
+#define CXL_PSL_DSISR_An_P  DSISR_PROTFAULT   /* Storage protection violation */
+#define CXL_PSL_DSISR_An_A  (1ull << (63-37)) /* AFU lock access to write through or cache inhibited storage */
+#define CXL_PSL_DSISR_An_S  DSISR_ISSTORE     /* Access was afu_wr or afu_zero */
+#define CXL_PSL_DSISR_An_K  DSISR_KEYFAULT    /* Access not permitted by virtual page class key protection */
+
+/****** CXL_PSL_DSISR_An - CAIA 2 ****************************************************/
+#define CXL_PSL9_DSISR_An_TF (1ull << (63-3))  /* Translation fault */
+#define CXL_PSL9_DSISR_An_PE (1ull << (63-4))  /* PSL Error (implementation specific) */
+#define CXL_PSL9_DSISR_An_AE (1ull << (63-5))  /* AFU Error */
+#define CXL_PSL9_DSISR_An_OC (1ull << (63-6))  /* OS Context Warning */
+#define CXL_PSL9_DSISR_An_S (1ull << (63-38))  /* TF for a write operation */
+#define CXL_PSL9_DSISR_PENDING (CXL_PSL9_DSISR_An_TF | CXL_PSL9_DSISR_An_PE | CXL_PSL9_DSISR_An_AE | CXL_PSL9_DSISR_An_OC)
+/*
+ * NOTE: Bits 56:63 (Checkout Response Status) are valid when DSISR_An[TF] = 1
+ * Status (0:7) Encoding
+ */
+#define CXL_PSL9_DSISR_An_CO_MASK 0x00000000000000ffULL
+#define CXL_PSL9_DSISR_An_SF      0x0000000000000080ULL  /* Segment Fault                        0b10000000 */
+#define CXL_PSL9_DSISR_An_PF_SLR  0x0000000000000088ULL  /* PTE not found (Single Level Radix)   0b10001000 */
+#define CXL_PSL9_DSISR_An_PF_RGC  0x000000000000008CULL  /* PTE not found (Radix Guest (child))  0b10001100 */
+#define CXL_PSL9_DSISR_An_PF_RGP  0x0000000000000090ULL  /* PTE not found (Radix Guest (parent)) 0b10010000 */
+#define CXL_PSL9_DSISR_An_PF_HRH  0x0000000000000094ULL  /* PTE not found (HPT/Radix Host)       0b10010100 */
+#define CXL_PSL9_DSISR_An_PF_STEG 0x000000000000009CULL  /* PTE not found (STEG VA)              0b10011100 */
+#define CXL_PSL9_DSISR_An_URTCH   0x00000000000000B4ULL  /* Unsupported Radix Tree Configuration 0b10110100 */
+
+/****** CXL_PSL_TFC_An ******************************************************/
+#define CXL_PSL_TFC_An_A  (1ull << (63-28)) /* Acknowledge non-translation fault */
+#define CXL_PSL_TFC_An_C  (1ull << (63-29)) /* Continue (abort transaction) */
+#define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */
+#define CXL_PSL_TFC_An_R  (1ull << (63-31)) /* Restart PSL transaction */
+
+/****** CXL_PSL_DEBUG *****************************************************/
+#define CXL_PSL_DEBUG_CDC  (1ull << (63-27)) /* Coherent Data cache support */
+
+/****** CXL_XSL9_IERAT_ERAT - CAIA 2 **********************************/
+#define CXL_XSL9_IERAT_MLPID    (1ull << (63-0))  /* Match LPID */
+#define CXL_XSL9_IERAT_MPID     (1ull << (63-1))  /* Match PID */
+#define CXL_XSL9_IERAT_PRS      (1ull << (63-4))  /* PRS bit for Radix invalidations */
+#define CXL_XSL9_IERAT_INVR     (1ull << (63-3))  /* Invalidate Radix */
+#define CXL_XSL9_IERAT_IALL     (1ull << (63-8))  /* Invalidate All */
+#define CXL_XSL9_IERAT_IINPROG  (1ull << (63-63)) /* Invalidate in progress */
+
+/* cxl_process_element->software_status */
+#define CXL_PE_SOFTWARE_STATE_V (1ul << (31 -  0)) /* Valid */
+#define CXL_PE_SOFTWARE_STATE_C (1ul << (31 - 29)) /* Complete */
+#define CXL_PE_SOFTWARE_STATE_S (1ul << (31 - 30)) /* Suspend */
+#define CXL_PE_SOFTWARE_STATE_T (1ul << (31 - 31)) /* Terminate */
+
+/****** CXL_PSL_RXCTL_An (Implementation Specific) **************************
+ * Controls AFU Hang Pulse, which sets the timeout for the AFU to respond to
+ * the PSL for any response (except MMIO). Timeouts will occur between 1x to 2x
+ * of the hang pulse frequency.
+ */
+#define CXL_PSL_RXCTL_AFUHP_4S      0x7000000000000000ULL
+
+/* SPA->sw_command_status */
+#define CXL_SPA_SW_CMD_MASK         0xffff000000000000ULL
+#define CXL_SPA_SW_CMD_TERMINATE    0x0001000000000000ULL
+#define CXL_SPA_SW_CMD_REMOVE       0x0002000000000000ULL
+#define CXL_SPA_SW_CMD_SUSPEND      0x0003000000000000ULL
+#define CXL_SPA_SW_CMD_RESUME       0x0004000000000000ULL
+#define CXL_SPA_SW_CMD_ADD          0x0005000000000000ULL
+#define CXL_SPA_SW_CMD_UPDATE       0x0006000000000000ULL
+#define CXL_SPA_SW_STATE_MASK       0x0000ffff00000000ULL
+#define CXL_SPA_SW_STATE_TERMINATED 0x0000000100000000ULL
+#define CXL_SPA_SW_STATE_REMOVED    0x0000000200000000ULL
+#define CXL_SPA_SW_STATE_SUSPENDED  0x0000000300000000ULL
+#define CXL_SPA_SW_STATE_RESUMED    0x0000000400000000ULL
+#define CXL_SPA_SW_STATE_ADDED      0x0000000500000000ULL
+#define CXL_SPA_SW_STATE_UPDATED    0x0000000600000000ULL
+#define CXL_SPA_SW_PSL_ID_MASK      0x00000000ffff0000ULL
+#define CXL_SPA_SW_LINK_MASK        0x000000000000ffffULL
+
+#define CXL_MAX_SLICES 4
+#define MAX_AFU_MMIO_REGS 3
+
+#define CXL_MODE_TIME_SLICED 0x4
+#define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED)
+
+#define CXL_DEV_MINORS 13   /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */
+#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS)
+#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS)
+
+#define CXL_PSL9_TRACEID_MAX 0xAU
+#define CXL_PSL9_TRACESTATE_FIN 0x3U
+
+enum cxl_context_status {
+	CLOSED,
+	OPENED,
+	STARTED
+};
+
+enum prefault_modes {
+	CXL_PREFAULT_NONE,
+	CXL_PREFAULT_WED,
+	CXL_PREFAULT_ALL,
+};
+
+enum cxl_attrs {
+	CXL_ADAPTER_ATTRS,
+	CXL_AFU_MASTER_ATTRS,
+	CXL_AFU_ATTRS,
+};
+
+struct cxl_sste {
+	__be64 esid_data;
+	__be64 vsid_data;
+};
+
+#define to_cxl_adapter(d) container_of(d, struct cxl, dev)
+#define to_cxl_afu(d) container_of(d, struct cxl_afu, dev)
+
+struct cxl_afu_native {
+	void __iomem *p1n_mmio;
+	void __iomem *afu_desc_mmio;
+	irq_hw_number_t psl_hwirq;
+	unsigned int psl_virq;
+	struct mutex spa_mutex;
+	/*
+	 * Only the first part of the SPA is used for the process element
+	 * linked list. The only other part that software needs to worry about
+	 * is sw_command_status, which we store a separate pointer to.
+	 * Everything else in the SPA is only used by hardware
+	 */
+	struct cxl_process_element *spa;
+	__be64 *sw_command_status;
+	unsigned int spa_size;
+	int spa_order;
+	int spa_max_procs;
+	u64 pp_offset;
+};
+
+struct cxl_afu_guest {
+	struct cxl_afu *parent;
+	u64 handle;
+	phys_addr_t p2n_phys;
+	u64 p2n_size;
+	int max_ints;
+	bool handle_err;
+	struct delayed_work work_err;
+	int previous_state;
+};
+
+struct cxl_afu {
+	struct cxl_afu_native *native;
+	struct cxl_afu_guest *guest;
+	irq_hw_number_t serr_hwirq;
+	unsigned int serr_virq;
+	char *psl_irq_name;
+	char *err_irq_name;
+	void __iomem *p2n_mmio;
+	phys_addr_t psn_phys;
+	u64 pp_size;
+
+	struct cxl *adapter;
+	struct device dev;
+	struct cdev afu_cdev_s, afu_cdev_m, afu_cdev_d;
+	struct device *chardev_s, *chardev_m, *chardev_d;
+	struct idr contexts_idr;
+	struct dentry *debugfs;
+	struct mutex contexts_lock;
+	spinlock_t afu_cntl_lock;
+
+	/* -1: AFU deconfigured/locked, >= 0: number of readers */
+	atomic_t configured_state;
+
+	/* AFU error buffer fields and bin attribute for sysfs */
+	u64 eb_len, eb_offset;
+	struct bin_attribute attr_eb;
+
+	/* pointer to the vphb */
+	struct pci_controller *phb;
+
+	int pp_irqs;
+	int irqs_max;
+	int num_procs;
+	int max_procs_virtualised;
+	int slice;
+	int modes_supported;
+	int current_mode;
+	int crs_num;
+	u64 crs_len;
+	u64 crs_offset;
+	struct list_head crs;
+	enum prefault_modes prefault_mode;
+	bool psa;
+	bool pp_psa;
+	bool enabled;
+};
+
+
+struct cxl_irq_name {
+	struct list_head list;
+	char *name;
+};
+
+struct irq_avail {
+	irq_hw_number_t offset;
+	irq_hw_number_t range;
+	unsigned long   *bitmap;
+};
+
+/*
+ * This is a cxl context.  If the PSL is in dedicated mode, there will be one
+ * of these per AFU.  If in AFU directed there can be lots of these.
+ */
+struct cxl_context {
+	struct cxl_afu *afu;
+
+	/* Problem state MMIO */
+	phys_addr_t psn_phys;
+	u64 psn_size;
+
+	/* Used to unmap any mmaps when force detaching */
+	struct address_space *mapping;
+	struct mutex mapping_lock;
+	struct page *ff_page;
+	bool mmio_err_ff;
+	bool kernelapi;
+
+	spinlock_t sste_lock; /* Protects segment table entries */
+	struct cxl_sste *sstp;
+	u64 sstp0, sstp1;
+	unsigned int sst_size, sst_lru;
+
+	wait_queue_head_t wq;
+	/* use mm context associated with this pid for ds faults */
+	struct pid *pid;
+	spinlock_t lock; /* Protects pending_irq_mask, pending_fault and fault_addr */
+	/* Only used in PR mode */
+	u64 process_token;
+
+	/* driver private data */
+	void *priv;
+
+	unsigned long *irq_bitmap; /* Accessed from IRQ context */
+	struct cxl_irq_ranges irqs;
+	struct list_head irq_names;
+	u64 fault_addr;
+	u64 fault_dsisr;
+	u64 afu_err;
+
+	/*
+	 * This status and it's lock pretects start and detach context
+	 * from racing.  It also prevents detach from racing with
+	 * itself
+	 */
+	enum cxl_context_status status;
+	struct mutex status_mutex;
+
+
+	/* XXX: Is it possible to need multiple work items at once? */
+	struct work_struct fault_work;
+	u64 dsisr;
+	u64 dar;
+
+	struct cxl_process_element *elem;
+
+	/*
+	 * pe is the process element handle, assigned by this driver when the
+	 * context is initialized.
+	 *
+	 * external_pe is the PE shown outside of cxl.
+	 * On bare-metal, pe=external_pe, because we decide what the handle is.
+	 * In a guest, we only find out about the pe used by pHyp when the
+	 * context is attached, and that's the value we want to report outside
+	 * of cxl.
+	 */
+	int pe;
+	int external_pe;
+
+	u32 irq_count;
+	bool pe_inserted;
+	bool master;
+	bool kernel;
+	bool pending_irq;
+	bool pending_fault;
+	bool pending_afu_err;
+
+	/* Used by AFU drivers for driver specific event delivery */
+	struct cxl_afu_driver_ops *afu_driver_ops;
+	atomic_t afu_driver_events;
+
+	struct rcu_head rcu;
+
+	struct mm_struct *mm;
+
+	u16 tidr;
+	bool assign_tidr;
+};
+
+struct cxl_irq_info;
+
+struct cxl_service_layer_ops {
+	int (*adapter_regs_init)(struct cxl *adapter, struct pci_dev *dev);
+	int (*invalidate_all)(struct cxl *adapter);
+	int (*afu_regs_init)(struct cxl_afu *afu);
+	int (*sanitise_afu_regs)(struct cxl_afu *afu);
+	int (*register_serr_irq)(struct cxl_afu *afu);
+	void (*release_serr_irq)(struct cxl_afu *afu);
+	irqreturn_t (*handle_interrupt)(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info);
+	irqreturn_t (*fail_irq)(struct cxl_afu *afu, struct cxl_irq_info *irq_info);
+	int (*activate_dedicated_process)(struct cxl_afu *afu);
+	int (*attach_afu_directed)(struct cxl_context *ctx, u64 wed, u64 amr);
+	int (*attach_dedicated_process)(struct cxl_context *ctx, u64 wed, u64 amr);
+	void (*update_dedicated_ivtes)(struct cxl_context *ctx);
+	void (*debugfs_add_adapter_regs)(struct cxl *adapter, struct dentry *dir);
+	void (*debugfs_add_afu_regs)(struct cxl_afu *afu, struct dentry *dir);
+	void (*psl_irq_dump_registers)(struct cxl_context *ctx);
+	void (*err_irq_dump_registers)(struct cxl *adapter);
+	void (*debugfs_stop_trace)(struct cxl *adapter);
+	void (*write_timebase_ctrl)(struct cxl *adapter);
+	u64 (*timebase_read)(struct cxl *adapter);
+	int capi_mode;
+	bool needs_reset_before_disable;
+};
+
+struct cxl_native {
+	u64 afu_desc_off;
+	u64 afu_desc_size;
+	void __iomem *p1_mmio;
+	void __iomem *p2_mmio;
+	irq_hw_number_t err_hwirq;
+	unsigned int err_virq;
+	u64 ps_off;
+	bool no_data_cache; /* set if no data cache on the card */
+	const struct cxl_service_layer_ops *sl_ops;
+};
+
+struct cxl_guest {
+	struct platform_device *pdev;
+	int irq_nranges;
+	struct cdev cdev;
+	irq_hw_number_t irq_base_offset;
+	struct irq_avail *irq_avail;
+	spinlock_t irq_alloc_lock;
+	u64 handle;
+	char *status;
+	u16 vendor;
+	u16 device;
+	u16 subsystem_vendor;
+	u16 subsystem;
+};
+
+struct cxl {
+	struct cxl_native *native;
+	struct cxl_guest *guest;
+	spinlock_t afu_list_lock;
+	struct cxl_afu *afu[CXL_MAX_SLICES];
+	struct device dev;
+	struct dentry *trace;
+	struct dentry *psl_err_chk;
+	struct dentry *debugfs;
+	char *irq_name;
+	struct bin_attribute cxl_attr;
+	int adapter_num;
+	int user_irqs;
+	u64 ps_size;
+	u16 psl_rev;
+	u16 base_image;
+	u8 vsec_status;
+	u8 caia_major;
+	u8 caia_minor;
+	u8 slices;
+	bool user_image_loaded;
+	bool perst_loads_image;
+	bool perst_select_user;
+	bool perst_same_image;
+	bool psl_timebase_synced;
+	bool tunneled_ops_supported;
+
+	/*
+	 * number of contexts mapped on to this card. Possible values are:
+	 * >0: Number of contexts mapped and new one can be mapped.
+	 *  0: No active contexts and new ones can be mapped.
+	 * -1: No contexts mapped and new ones cannot be mapped.
+	 */
+	atomic_t contexts_num;
+};
+
+int cxl_pci_alloc_one_irq(struct cxl *adapter);
+void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq);
+int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num);
+void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter);
+int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq);
+int cxl_update_image_control(struct cxl *adapter);
+int cxl_pci_reset(struct cxl *adapter);
+void cxl_pci_release_afu(struct device *dev);
+ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len);
+
+/* common == phyp + powernv - CAIA 1&2 */
+struct cxl_process_element_common {
+	__be32 tid;
+	__be32 pid;
+	__be64 csrp;
+	union {
+		struct {
+			__be64 aurp0;
+			__be64 aurp1;
+			__be64 sstp0;
+			__be64 sstp1;
+		} psl8;  /* CAIA 1 */
+		struct {
+			u8     reserved2[8];
+			u8     reserved3[8];
+			u8     reserved4[8];
+			u8     reserved5[8];
+		} psl9;  /* CAIA 2 */
+	} u;
+	__be64 amr;
+	u8     reserved6[4];
+	__be64 wed;
+} __packed;
+
+/* just powernv - CAIA 1&2 */
+struct cxl_process_element {
+	__be64 sr;
+	__be64 SPOffset;
+	union {
+		__be64 sdr;          /* CAIA 1 */
+		u8     reserved1[8]; /* CAIA 2 */
+	} u;
+	__be64 haurp;
+	__be32 ctxtime;
+	__be16 ivte_offsets[4];
+	__be16 ivte_ranges[4];
+	__be32 lpid;
+	struct cxl_process_element_common common;
+	__be32 software_state;
+} __packed;
+
+static inline bool cxl_adapter_link_ok(struct cxl *cxl, struct cxl_afu *afu)
+{
+	struct pci_dev *pdev;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		pdev = to_pci_dev(cxl->dev.parent);
+		return !pci_channel_offline(pdev);
+	}
+	return true;
+}
+
+static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg)
+{
+	WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
+	return cxl->native->p1_mmio + cxl_reg_off(reg);
+}
+
+static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val)
+{
+	if (likely(cxl_adapter_link_ok(cxl, NULL)))
+		out_be64(_cxl_p1_addr(cxl, reg), val);
+}
+
+static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg)
+{
+	if (likely(cxl_adapter_link_ok(cxl, NULL)))
+		return in_be64(_cxl_p1_addr(cxl, reg));
+	else
+		return ~0ULL;
+}
+
+static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg)
+{
+	WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
+	return afu->native->p1n_mmio + cxl_reg_off(reg);
+}
+
+static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
+		out_be64(_cxl_p1n_addr(afu, reg), val);
+}
+
+static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
+		return in_be64(_cxl_p1n_addr(afu, reg));
+	else
+		return ~0ULL;
+}
+
+static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg)
+{
+	return afu->p2n_mmio + cxl_reg_off(reg);
+}
+
+static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
+		out_be64(_cxl_p2n_addr(afu, reg), val);
+}
+
+static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
+		return in_be64(_cxl_p2n_addr(afu, reg));
+	else
+		return ~0ULL;
+}
+
+static inline bool cxl_is_power8(void)
+{
+	if ((pvr_version_is(PVR_POWER8E)) ||
+	    (pvr_version_is(PVR_POWER8NVL)) ||
+	    (pvr_version_is(PVR_POWER8)))
+		return true;
+	return false;
+}
+
+static inline bool cxl_is_power9(void)
+{
+	if (pvr_version_is(PVR_POWER9))
+		return true;
+	return false;
+}
+
+ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+				loff_t off, size_t count);
+
+
+struct cxl_calls {
+	void (*cxl_slbia)(struct mm_struct *mm);
+	struct module *owner;
+};
+int register_cxl_calls(struct cxl_calls *calls);
+void unregister_cxl_calls(struct cxl_calls *calls);
+int cxl_update_properties(struct device_node *dn, struct property *new_prop);
+
+void cxl_remove_adapter_nr(struct cxl *adapter);
+
+void cxl_release_spa(struct cxl_afu *afu);
+
+dev_t cxl_get_dev(void);
+int cxl_file_init(void);
+void cxl_file_exit(void);
+int cxl_register_adapter(struct cxl *adapter);
+int cxl_register_afu(struct cxl_afu *afu);
+int cxl_chardev_d_afu_add(struct cxl_afu *afu);
+int cxl_chardev_m_afu_add(struct cxl_afu *afu);
+int cxl_chardev_s_afu_add(struct cxl_afu *afu);
+void cxl_chardev_afu_remove(struct cxl_afu *afu);
+
+void cxl_context_detach_all(struct cxl_afu *afu);
+void cxl_context_free(struct cxl_context *ctx);
+void cxl_context_detach(struct cxl_context *ctx);
+
+int cxl_sysfs_adapter_add(struct cxl *adapter);
+void cxl_sysfs_adapter_remove(struct cxl *adapter);
+int cxl_sysfs_afu_add(struct cxl_afu *afu);
+void cxl_sysfs_afu_remove(struct cxl_afu *afu);
+int cxl_sysfs_afu_m_add(struct cxl_afu *afu);
+void cxl_sysfs_afu_m_remove(struct cxl_afu *afu);
+
+struct cxl *cxl_alloc_adapter(void);
+struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice);
+int cxl_afu_select_best_mode(struct cxl_afu *afu);
+
+int cxl_native_register_psl_irq(struct cxl_afu *afu);
+void cxl_native_release_psl_irq(struct cxl_afu *afu);
+int cxl_native_register_psl_err_irq(struct cxl *adapter);
+void cxl_native_release_psl_err_irq(struct cxl *adapter);
+int cxl_native_register_serr_irq(struct cxl_afu *afu);
+void cxl_native_release_serr_irq(struct cxl_afu *afu);
+int afu_register_irqs(struct cxl_context *ctx, u32 count);
+void afu_release_irqs(struct cxl_context *ctx, void *cookie);
+void afu_irq_name_free(struct cxl_context *ctx);
+
+int cxl_attach_afu_directed_psl9(struct cxl_context *ctx, u64 wed, u64 amr);
+int cxl_attach_afu_directed_psl8(struct cxl_context *ctx, u64 wed, u64 amr);
+int cxl_activate_dedicated_process_psl9(struct cxl_afu *afu);
+int cxl_activate_dedicated_process_psl8(struct cxl_afu *afu);
+int cxl_attach_dedicated_process_psl9(struct cxl_context *ctx, u64 wed, u64 amr);
+int cxl_attach_dedicated_process_psl8(struct cxl_context *ctx, u64 wed, u64 amr);
+void cxl_update_dedicated_ivtes_psl9(struct cxl_context *ctx);
+void cxl_update_dedicated_ivtes_psl8(struct cxl_context *ctx);
+
+#ifdef CONFIG_DEBUG_FS
+
+void cxl_debugfs_init(void);
+void cxl_debugfs_exit(void);
+void cxl_debugfs_adapter_add(struct cxl *adapter);
+void cxl_debugfs_adapter_remove(struct cxl *adapter);
+void cxl_debugfs_afu_add(struct cxl_afu *afu);
+void cxl_debugfs_afu_remove(struct cxl_afu *afu);
+void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir);
+void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir);
+void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir);
+void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir);
+
+#else /* CONFIG_DEBUG_FS */
+
+static inline void __init cxl_debugfs_init(void)
+{
+}
+
+static inline void cxl_debugfs_exit(void)
+{
+}
+
+static inline void cxl_debugfs_adapter_add(struct cxl *adapter)
+{
+}
+
+static inline void cxl_debugfs_adapter_remove(struct cxl *adapter)
+{
+}
+
+static inline void cxl_debugfs_afu_add(struct cxl_afu *afu)
+{
+}
+
+static inline void cxl_debugfs_afu_remove(struct cxl_afu *afu)
+{
+}
+
+static inline void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter,
+						    struct dentry *dir)
+{
+}
+
+static inline void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter,
+						    struct dentry *dir)
+{
+}
+
+static inline void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir)
+{
+}
+
+static inline void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+void cxl_handle_fault(struct work_struct *work);
+void cxl_prefault(struct cxl_context *ctx, u64 wed);
+int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar);
+
+struct cxl *get_cxl_adapter(int num);
+int cxl_alloc_sst(struct cxl_context *ctx);
+void cxl_dump_debug_buffer(void *addr, size_t size);
+
+void init_cxl_native(void);
+
+struct cxl_context *cxl_context_alloc(void);
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master);
+void cxl_context_set_mapping(struct cxl_context *ctx,
+			struct address_space *mapping);
+void cxl_context_free(struct cxl_context *ctx);
+int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
+unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
+			 irq_handler_t handler, void *cookie, const char *name);
+void cxl_unmap_irq(unsigned int virq, void *cookie);
+int __detach_context(struct cxl_context *ctx);
+
+/*
+ * This must match the layout of the H_COLLECT_CA_INT_INFO retbuf defined
+ * in PAPR.
+ * Field pid_tid is now 'reserved' because it's no more used on bare-metal.
+ * On a guest environment, PSL_PID_An is located on the upper 32 bits and
+ * PSL_TID_An register in the lower 32 bits.
+ */
+struct cxl_irq_info {
+	u64 dsisr;
+	u64 dar;
+	u64 dsr;
+	u64 reserved;
+	u64 afu_err;
+	u64 errstat;
+	u64 proc_handle;
+	u64 padding[2]; /* to match the expected retbuf size for plpar_hcall9 */
+};
+
+void cxl_assign_psn_space(struct cxl_context *ctx);
+int cxl_invalidate_all_psl9(struct cxl *adapter);
+int cxl_invalidate_all_psl8(struct cxl *adapter);
+irqreturn_t cxl_irq_psl9(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info);
+irqreturn_t cxl_irq_psl8(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info);
+irqreturn_t cxl_fail_irq_psl(struct cxl_afu *afu, struct cxl_irq_info *irq_info);
+int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler,
+			void *cookie, irq_hw_number_t *dest_hwirq,
+			unsigned int *dest_virq, const char *name);
+
+int cxl_check_error(struct cxl_afu *afu);
+int cxl_afu_slbia(struct cxl_afu *afu);
+int cxl_data_cache_flush(struct cxl *adapter);
+int cxl_afu_disable(struct cxl_afu *afu);
+int cxl_psl_purge(struct cxl_afu *afu);
+int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
+			  u32 *phb_index, u64 *capp_unit_id);
+int cxl_slot_is_switched(struct pci_dev *dev);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
+u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
+
+void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
+void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx);
+void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter);
+void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter);
+int cxl_pci_vphb_add(struct cxl_afu *afu);
+void cxl_pci_vphb_remove(struct cxl_afu *afu);
+void cxl_release_mapping(struct cxl_context *ctx);
+
+extern struct pci_driver cxl_pci_driver;
+extern struct platform_driver cxl_of_driver;
+int afu_allocate_irqs(struct cxl_context *ctx, u32 count);
+
+int afu_open(struct inode *inode, struct file *file);
+int afu_release(struct inode *inode, struct file *file);
+long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int afu_mmap(struct file *file, struct vm_area_struct *vm);
+__poll_t afu_poll(struct file *file, struct poll_table_struct *poll);
+ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off);
+extern const struct file_operations afu_fops;
+
+struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *dev);
+void cxl_guest_remove_adapter(struct cxl *adapter);
+int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np);
+int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np);
+ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len);
+ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len);
+int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np);
+void cxl_guest_remove_afu(struct cxl_afu *afu);
+int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np);
+int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *afu_np);
+int cxl_guest_add_chardev(struct cxl *adapter);
+void cxl_guest_remove_chardev(struct cxl *adapter);
+void cxl_guest_reload_module(struct cxl *adapter);
+int cxl_of_probe(struct platform_device *pdev);
+
+struct cxl_backend_ops {
+	struct module *module;
+	int (*adapter_reset)(struct cxl *adapter);
+	int (*alloc_one_irq)(struct cxl *adapter);
+	void (*release_one_irq)(struct cxl *adapter, int hwirq);
+	int (*alloc_irq_ranges)(struct cxl_irq_ranges *irqs,
+				struct cxl *adapter, unsigned int num);
+	void (*release_irq_ranges)(struct cxl_irq_ranges *irqs,
+				struct cxl *adapter);
+	int (*setup_irq)(struct cxl *adapter, unsigned int hwirq,
+			unsigned int virq);
+	irqreturn_t (*handle_psl_slice_error)(struct cxl_context *ctx,
+					u64 dsisr, u64 errstat);
+	irqreturn_t (*psl_interrupt)(int irq, void *data);
+	int (*ack_irq)(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask);
+	void (*irq_wait)(struct cxl_context *ctx);
+	int (*attach_process)(struct cxl_context *ctx, bool kernel,
+			u64 wed, u64 amr);
+	int (*detach_process)(struct cxl_context *ctx);
+	void (*update_ivtes)(struct cxl_context *ctx);
+	bool (*support_attributes)(const char *attr_name, enum cxl_attrs type);
+	bool (*link_ok)(struct cxl *cxl, struct cxl_afu *afu);
+	void (*release_afu)(struct device *dev);
+	ssize_t (*afu_read_err_buffer)(struct cxl_afu *afu, char *buf,
+				loff_t off, size_t count);
+	int (*afu_check_and_enable)(struct cxl_afu *afu);
+	int (*afu_activate_mode)(struct cxl_afu *afu, int mode);
+	int (*afu_deactivate_mode)(struct cxl_afu *afu, int mode);
+	int (*afu_reset)(struct cxl_afu *afu);
+	int (*afu_cr_read8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 *val);
+	int (*afu_cr_read16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 *val);
+	int (*afu_cr_read32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 *val);
+	int (*afu_cr_read64)(struct cxl_afu *afu, int cr_idx, u64 offset, u64 *val);
+	int (*afu_cr_write8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 val);
+	int (*afu_cr_write16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 val);
+	int (*afu_cr_write32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 val);
+	ssize_t (*read_adapter_vpd)(struct cxl *adapter, void *buf, size_t count);
+};
+extern const struct cxl_backend_ops cxl_native_ops;
+extern const struct cxl_backend_ops cxl_guest_ops;
+extern const struct cxl_backend_ops *cxl_ops;
+
+/* check if the given pci_dev is on the cxl vphb bus */
+bool cxl_pci_is_vphb_device(struct pci_dev *dev);
+
+/* decode AFU error bits in the PSL register PSL_SERR_An */
+void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
+
+/*
+ * Increments the number of attached contexts on an adapter.
+ * In case an adapter_context_lock is taken the return -EBUSY.
+ */
+int cxl_adapter_context_get(struct cxl *adapter);
+
+/* Decrements the number of attached contexts on an adapter */
+void cxl_adapter_context_put(struct cxl *adapter);
+
+/* If no active contexts then prevents contexts from being attached */
+int cxl_adapter_context_lock(struct cxl *adapter);
+
+/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */
+void cxl_adapter_context_unlock(struct cxl *adapter);
+
+/* Increases the reference count to "struct mm_struct" */
+void cxl_context_mm_count_get(struct cxl_context *ctx);
+
+/* Decrements the reference count to "struct mm_struct" */
+void cxl_context_mm_count_put(struct cxl_context *ctx);
+
+#endif
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
new file mode 100644
index 0000000000..e5fe0a1714
--- /dev/null
+++ b/drivers/misc/cxl/cxllib.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2017 IBM Corp.
+ */
+
+#include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
+#include <asm/opal-api.h>
+#include <asm/pnv-pci.h>
+#include <misc/cxllib.h>
+
+#include "cxl.h"
+
+#define CXL_INVALID_DRA                 ~0ull
+#define CXL_DUMMY_READ_SIZE             128
+#define CXL_DUMMY_READ_ALIGN            8
+#define CXL_CAPI_WINDOW_START           0x2000000000000ull
+#define CXL_CAPI_WINDOW_LOG_SIZE        48
+#define CXL_XSL_CONFIG_CURRENT_VERSION  CXL_XSL_CONFIG_VERSION1
+
+
+bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
+{
+	int rc;
+	u32 phb_index;
+	u64 chip_id, capp_unit_id;
+
+	/* No flags currently supported */
+	if (flags)
+		return false;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return false;
+
+	if (!cxl_is_power9())
+		return false;
+
+	if (cxl_slot_is_switched(dev))
+		return false;
+
+	/* on p9, some pci slots are not connected to a CAPP unit */
+	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
+	if (rc)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
+
+static DEFINE_MUTEX(dra_mutex);
+static u64 dummy_read_addr = CXL_INVALID_DRA;
+
+static int allocate_dummy_read_buf(void)
+{
+	u64 buf, vaddr;
+	size_t buf_size;
+
+	/*
+	 * Dummy read buffer is 128-byte long, aligned on a
+	 * 256-byte boundary and we need the physical address.
+	 */
+	buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
+	buf = (u64) kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
+					(~0ull << CXL_DUMMY_READ_ALIGN);
+
+	WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
+		"Dummy read buffer alignment issue");
+	dummy_read_addr = virt_to_phys((void *) vaddr);
+	return 0;
+}
+
+int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
+{
+	int rc;
+	u32 phb_index;
+	u64 chip_id, capp_unit_id;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return -EINVAL;
+
+	mutex_lock(&dra_mutex);
+	if (dummy_read_addr == CXL_INVALID_DRA) {
+		rc = allocate_dummy_read_buf();
+		if (rc) {
+			mutex_unlock(&dra_mutex);
+			return rc;
+		}
+	}
+	mutex_unlock(&dra_mutex);
+
+	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
+	if (rc)
+		return rc;
+
+	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
+	if (rc)
+		return rc;
+
+	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
+	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
+	cfg->bar_addr = CXL_CAPI_WINDOW_START;
+	cfg->dra = dummy_read_addr;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
+
+int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
+			unsigned long flags)
+{
+	int rc = 0;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return -EINVAL;
+
+	switch (mode) {
+	case CXL_MODE_PCI:
+		/*
+		 * We currently don't support going back to PCI mode
+		 * However, we'll turn the invalidations off, so that
+		 * the firmware doesn't have to ack them and can do
+		 * things like reset, etc.. with no worries.
+		 * So always return EPERM (can't go back to PCI) or
+		 * EBUSY if we couldn't even turn off snooping
+		 */
+		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
+		if (rc)
+			rc = -EBUSY;
+		else
+			rc = -EPERM;
+		break;
+	case CXL_MODE_CXL:
+		/* DMA only supported on TVT1 for the time being */
+		if (flags != CXL_MODE_DMA_TVT1)
+			return -EINVAL;
+		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
+		if (rc)
+			return rc;
+		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
+		break;
+	default:
+		rc = -EINVAL;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
+
+/*
+ * When switching the PHB to capi mode, the TVT#1 entry for
+ * the Partitionable Endpoint is set in bypass mode, like
+ * in PCI mode.
+ * Configure the device dma to use TVT#1, which is done
+ * by calling dma_set_mask() with a mask large enough.
+ */
+int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
+{
+	int rc;
+
+	if (flags)
+		return -EINVAL;
+
+	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
+	return rc;
+}
+EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
+
+int cxllib_get_PE_attributes(struct task_struct *task,
+			     unsigned long translation_mode,
+			     struct cxllib_pe_attributes *attr)
+{
+	if (translation_mode != CXL_TRANSLATED_MODE &&
+		translation_mode != CXL_REAL_MODE)
+		return -EINVAL;
+
+	attr->sr = cxl_calculate_sr(false,
+				task == NULL,
+				translation_mode == CXL_REAL_MODE,
+				true);
+	attr->lpid = mfspr(SPRN_LPID);
+	if (task) {
+		struct mm_struct *mm = get_task_mm(task);
+		if (mm == NULL)
+			return -EINVAL;
+		/*
+		 * Caller is keeping a reference on mm_users for as long
+		 * as XSL uses the memory context
+		 */
+		attr->pid = mm->context.id;
+		mmput(mm);
+		attr->tid = task->thread.tidr;
+	} else {
+		attr->pid = 0;
+		attr->tid = 0;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
+
+static int get_vma_info(struct mm_struct *mm, u64 addr,
+			u64 *vma_start, u64 *vma_end,
+			unsigned long *page_size)
+{
+	struct vm_area_struct *vma = NULL;
+	int rc = 0;
+
+	mmap_read_lock(mm);
+
+	vma = find_vma(mm, addr);
+	if (!vma) {
+		rc = -EFAULT;
+		goto out;
+	}
+	*page_size = vma_kernel_pagesize(vma);
+	*vma_start = vma->vm_start;
+	*vma_end = vma->vm_end;
+out:
+	mmap_read_unlock(mm);
+	return rc;
+}
+
+int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
+{
+	int rc;
+	u64 dar, vma_start, vma_end;
+	unsigned long page_size;
+
+	if (mm == NULL)
+		return -EFAULT;
+
+	/*
+	 * The buffer we have to process can extend over several pages
+	 * and may also cover several VMAs.
+	 * We iterate over all the pages. The page size could vary
+	 * between VMAs.
+	 */
+	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
+	if (rc)
+		return rc;
+
+	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
+	     dar += page_size) {
+		if (dar < vma_start || dar >= vma_end) {
+			/*
+			 * We don't hold mm->mmap_lock while iterating, since
+			 * the lock is required by one of the lower-level page
+			 * fault processing functions and it could
+			 * create a deadlock.
+			 *
+			 * It means the VMAs can be altered between 2
+			 * loop iterations and we could theoretically
+			 * miss a page (however unlikely). But that's
+			 * not really a problem, as the driver will
+			 * retry access, get another page fault on the
+			 * missing page and call us again.
+			 */
+			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
+					&page_size);
+			if (rc)
+				return rc;
+		}
+
+		rc = cxl_handle_mm_fault(mm, flags, dar);
+		if (rc)
+			return -EFAULT;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxllib_handle_fault);
diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c
new file mode 100644
index 0000000000..7b987bf498
--- /dev/null
+++ b/drivers/misc/cxl/debugfs.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "cxl.h"
+
+static struct dentry *cxl_debugfs;
+
+/* Helpers to export CXL mmaped IO registers via debugfs */
+static int debugfs_io_u64_get(void *data, u64 *val)
+{
+	*val = in_be64((u64 __iomem *)data);
+	return 0;
+}
+
+static int debugfs_io_u64_set(void *data, u64 val)
+{
+	out_be64((u64 __iomem *)data, val);
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set,
+			 "0x%016llx\n");
+
+static void debugfs_create_io_x64(const char *name, umode_t mode,
+				  struct dentry *parent, u64 __iomem *value)
+{
+	debugfs_create_file_unsafe(name, mode, parent, (void __force *)value,
+				   &fops_io_x64);
+}
+
+void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir)
+{
+	debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR1));
+	debugfs_create_io_x64("fir_mask", 0400, dir,
+			      _cxl_p1_addr(adapter, CXL_PSL9_FIR_MASK));
+	debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR_CNTL));
+	debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_TRACECFG));
+	debugfs_create_io_x64("debug", 0600, dir,
+			      _cxl_p1_addr(adapter, CXL_PSL9_DEBUG));
+	debugfs_create_io_x64("xsl-debug", 0600, dir,
+			      _cxl_p1_addr(adapter, CXL_XSL9_DBG));
+}
+
+void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir)
+{
+	debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1));
+	debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2));
+	debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL));
+	debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE));
+}
+
+void cxl_debugfs_adapter_add(struct cxl *adapter)
+{
+	struct dentry *dir;
+	char buf[32];
+
+	if (!cxl_debugfs)
+		return;
+
+	snprintf(buf, 32, "card%i", adapter->adapter_num);
+	dir = debugfs_create_dir(buf, cxl_debugfs);
+	adapter->debugfs = dir;
+
+	debugfs_create_io_x64("err_ivte", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_ErrIVTE));
+
+	if (adapter->native->sl_ops->debugfs_add_adapter_regs)
+		adapter->native->sl_ops->debugfs_add_adapter_regs(adapter, dir);
+}
+
+void cxl_debugfs_adapter_remove(struct cxl *adapter)
+{
+	debugfs_remove_recursive(adapter->debugfs);
+}
+
+void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir)
+{
+	debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An));
+}
+
+void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir)
+{
+	debugfs_create_io_x64("sstp0", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP0_An));
+	debugfs_create_io_x64("sstp1", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP1_An));
+
+	debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An));
+	debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An));
+	debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An));
+	debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE));
+}
+
+void cxl_debugfs_afu_add(struct cxl_afu *afu)
+{
+	struct dentry *dir;
+	char buf[32];
+
+	if (!afu->adapter->debugfs)
+		return;
+
+	snprintf(buf, 32, "psl%i.%i", afu->adapter->adapter_num, afu->slice);
+	dir = debugfs_create_dir(buf, afu->adapter->debugfs);
+	afu->debugfs = dir;
+
+	debugfs_create_io_x64("sr",         S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SR_An));
+	debugfs_create_io_x64("dsisr",      S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DSISR_An));
+	debugfs_create_io_x64("dar",        S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DAR_An));
+
+	debugfs_create_io_x64("err_status", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_ErrStat_An));
+
+	if (afu->adapter->native->sl_ops->debugfs_add_afu_regs)
+		afu->adapter->native->sl_ops->debugfs_add_afu_regs(afu, dir);
+}
+
+void cxl_debugfs_afu_remove(struct cxl_afu *afu)
+{
+	debugfs_remove_recursive(afu->debugfs);
+}
+
+void __init cxl_debugfs_init(void)
+{
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return;
+
+	cxl_debugfs = debugfs_create_dir("cxl", NULL);
+}
+
+void cxl_debugfs_exit(void)
+{
+	debugfs_remove_recursive(cxl_debugfs);
+}
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
new file mode 100644
index 0000000000..2c64f55cf0
--- /dev/null
+++ b/drivers/misc/cxl/fault.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/workqueue.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/pid.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "cxl" "."
+#include <asm/current.h>
+#include <asm/copro.h>
+#include <asm/mmu.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb)
+{
+	return ((sste->vsid_data == cpu_to_be64(slb->vsid)) &&
+		(sste->esid_data == cpu_to_be64(slb->esid)));
+}
+
+/*
+ * This finds a free SSTE for the given SLB, or returns NULL if it's already in
+ * the segment table.
+ */
+static struct cxl_sste *find_free_sste(struct cxl_context *ctx,
+				       struct copro_slb *slb)
+{
+	struct cxl_sste *primary, *sste, *ret = NULL;
+	unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */
+	unsigned int entry;
+	unsigned int hash;
+
+	if (slb->vsid & SLB_VSID_B_1T)
+		hash = (slb->esid >> SID_SHIFT_1T) & mask;
+	else /* 256M */
+		hash = (slb->esid >> SID_SHIFT) & mask;
+
+	primary = ctx->sstp + (hash << 3);
+
+	for (entry = 0, sste = primary; entry < 8; entry++, sste++) {
+		if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V))
+			ret = sste;
+		if (sste_matches(sste, slb))
+			return NULL;
+	}
+	if (ret)
+		return ret;
+
+	/* Nothing free, select an entry to cast out */
+	ret = primary + ctx->sst_lru;
+	ctx->sst_lru = (ctx->sst_lru + 1) & 0x7;
+
+	return ret;
+}
+
+static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb)
+{
+	/* mask is the group index, we search primary and secondary here. */
+	struct cxl_sste *sste;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->sste_lock, flags);
+	sste = find_free_sste(ctx, slb);
+	if (!sste)
+		goto out_unlock;
+
+	pr_devel("CXL Populating SST[%li]: %#llx %#llx\n",
+			sste - ctx->sstp, slb->vsid, slb->esid);
+	trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid);
+
+	sste->vsid_data = cpu_to_be64(slb->vsid);
+	sste->esid_data = cpu_to_be64(slb->esid);
+out_unlock:
+	spin_unlock_irqrestore(&ctx->sste_lock, flags);
+}
+
+static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm,
+			     u64 ea)
+{
+	struct copro_slb slb = {0,0};
+	int rc;
+
+	if (!(rc = copro_calculate_slb(mm, ea, &slb))) {
+		cxl_load_segment(ctx, &slb);
+	}
+
+	return rc;
+}
+
+static void cxl_ack_ae(struct cxl_context *ctx)
+{
+	unsigned long flags;
+
+	cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0);
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	ctx->pending_fault = true;
+	ctx->fault_addr = ctx->dar;
+	ctx->fault_dsisr = ctx->dsisr;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	wake_up_all(&ctx->wq);
+}
+
+static int cxl_handle_segment_miss(struct cxl_context *ctx,
+				   struct mm_struct *mm, u64 ea)
+{
+	int rc;
+
+	pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea);
+	trace_cxl_ste_miss(ctx, ea);
+
+	if ((rc = cxl_fault_segment(ctx, mm, ea)))
+		cxl_ack_ae(ctx);
+	else {
+
+		mb(); /* Order seg table write to TFC MMIO write */
+		cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
+	}
+
+	return IRQ_HANDLED;
+}
+
+int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar)
+{
+	vm_fault_t flt = 0;
+	int result;
+	unsigned long access, flags, inv_flags = 0;
+
+	/*
+	 * Add the fault handling cpu to task mm cpumask so that we
+	 * can do a safe lockless page table walk when inserting the
+	 * hash page table entry. This function get called with a
+	 * valid mm for user space addresses. Hence using the if (mm)
+	 * check is sufficient here.
+	 */
+	if (mm && !cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
+		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+		/*
+		 * We need to make sure we walk the table only after
+		 * we update the cpumask. The other side of the barrier
+		 * is explained in serialize_against_pte_lookup()
+		 */
+		smp_mb();
+	}
+	if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) {
+		pr_devel("copro_handle_mm_fault failed: %#x\n", result);
+		return result;
+	}
+
+	if (!radix_enabled()) {
+		/*
+		 * update_mmu_cache() will not have loaded the hash since current->trap
+		 * is not a 0x400 or 0x300, so just call hash_page_mm() here.
+		 */
+		access = _PAGE_PRESENT | _PAGE_READ;
+		if (dsisr & CXL_PSL_DSISR_An_S)
+			access |= _PAGE_WRITE;
+
+		if (!mm && (get_region_id(dar) != USER_REGION_ID))
+			access |= _PAGE_PRIVILEGED;
+
+		if (dsisr & DSISR_NOHPTE)
+			inv_flags |= HPTE_NOHPTE_UPDATE;
+
+		local_irq_save(flags);
+		hash_page_mm(mm, dar, access, 0x300, inv_flags);
+		local_irq_restore(flags);
+	}
+	return 0;
+}
+
+static void cxl_handle_page_fault(struct cxl_context *ctx,
+				  struct mm_struct *mm,
+				  u64 dsisr, u64 dar)
+{
+	trace_cxl_pte_miss(ctx, dsisr, dar);
+
+	if (cxl_handle_mm_fault(mm, dsisr, dar)) {
+		cxl_ack_ae(ctx);
+	} else {
+		pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
+		cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
+	}
+}
+
+/*
+ * Returns the mm_struct corresponding to the context ctx.
+ * mm_users == 0, the context may be in the process of being closed.
+ */
+static struct mm_struct *get_mem_context(struct cxl_context *ctx)
+{
+	if (ctx->mm == NULL)
+		return NULL;
+
+	if (!mmget_not_zero(ctx->mm))
+		return NULL;
+
+	return ctx->mm;
+}
+
+static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
+{
+	if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
+		return true;
+
+	return false;
+}
+
+static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
+{
+	if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
+		return true;
+
+	if (cxl_is_power9())
+		return true;
+
+	return false;
+}
+
+void cxl_handle_fault(struct work_struct *fault_work)
+{
+	struct cxl_context *ctx =
+		container_of(fault_work, struct cxl_context, fault_work);
+	u64 dsisr = ctx->dsisr;
+	u64 dar = ctx->dar;
+	struct mm_struct *mm = NULL;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
+		    cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
+		    cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
+			/* Most likely explanation is harmless - a dedicated
+			 * process has detached and these were cleared by the
+			 * PSL purge, but warn about it just in case
+			 */
+			dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n");
+			return;
+		}
+	}
+
+	/* Early return if the context is being / has been detached */
+	if (ctx->status == CLOSED) {
+		cxl_ack_ae(ctx);
+		return;
+	}
+
+	pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. "
+		"DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
+
+	if (!ctx->kernel) {
+
+		mm = get_mem_context(ctx);
+		if (mm == NULL) {
+			pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
+				 __func__, ctx->pe, pid_nr(ctx->pid));
+			cxl_ack_ae(ctx);
+			return;
+		} else {
+			pr_devel("Handling page fault for pe=%d pid=%i\n",
+				 ctx->pe, pid_nr(ctx->pid));
+		}
+	}
+
+	if (cxl_is_segment_miss(ctx, dsisr))
+		cxl_handle_segment_miss(ctx, mm, dar);
+	else if (cxl_is_page_fault(ctx, dsisr))
+		cxl_handle_page_fault(ctx, mm, dsisr, dar);
+	else
+		WARN(1, "cxl_handle_fault has nothing to handle\n");
+
+	if (mm)
+		mmput(mm);
+}
+
+static u64 next_segment(u64 ea, u64 vsid)
+{
+	if (vsid & SLB_VSID_B_1T)
+		ea |= (1ULL << 40) - 1;
+	else
+		ea |= (1ULL << 28) - 1;
+
+	return ea + 1;
+}
+
+static void cxl_prefault_vma(struct cxl_context *ctx, struct mm_struct *mm)
+{
+	u64 ea, last_esid = 0;
+	struct copro_slb slb;
+	VMA_ITERATOR(vmi, mm, 0);
+	struct vm_area_struct *vma;
+	int rc;
+
+	mmap_read_lock(mm);
+	for_each_vma(vmi, vma) {
+		for (ea = vma->vm_start; ea < vma->vm_end;
+				ea = next_segment(ea, slb.vsid)) {
+			rc = copro_calculate_slb(mm, ea, &slb);
+			if (rc)
+				continue;
+
+			if (last_esid == slb.esid)
+				continue;
+
+			cxl_load_segment(ctx, &slb);
+			last_esid = slb.esid;
+		}
+	}
+	mmap_read_unlock(mm);
+}
+
+void cxl_prefault(struct cxl_context *ctx, u64 wed)
+{
+	struct mm_struct *mm = get_mem_context(ctx);
+
+	if (mm == NULL) {
+		pr_devel("cxl_prefault unable to get mm %i\n",
+			 pid_nr(ctx->pid));
+		return;
+	}
+
+	switch (ctx->afu->prefault_mode) {
+	case CXL_PREFAULT_WED:
+		cxl_fault_segment(ctx, mm, wed);
+		break;
+	case CXL_PREFAULT_ALL:
+		cxl_prefault_vma(ctx, mm);
+		break;
+	default:
+		break;
+	}
+
+	mmput(mm);
+}
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
new file mode 100644
index 0000000000..144d1f2d78
--- /dev/null
+++ b/drivers/misc/cxl/file.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/bitmap.h>
+#include <linux/sched/signal.h>
+#include <linux/poll.h>
+#include <linux/pid.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/cputable.h>
+#include <asm/current.h>
+#include <asm/copro.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+#define CXL_NUM_MINORS 256 /* Total to reserve */
+
+#define CXL_AFU_MINOR_D(afu) (CXL_CARD_MINOR(afu->adapter) + 1 + (3 * afu->slice))
+#define CXL_AFU_MINOR_M(afu) (CXL_AFU_MINOR_D(afu) + 1)
+#define CXL_AFU_MINOR_S(afu) (CXL_AFU_MINOR_D(afu) + 2)
+#define CXL_AFU_MKDEV_D(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_D(afu))
+#define CXL_AFU_MKDEV_M(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_M(afu))
+#define CXL_AFU_MKDEV_S(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_S(afu))
+
+#define CXL_DEVT_AFU(dev) ((MINOR(dev) % CXL_DEV_MINORS - 1) / 3)
+
+#define CXL_DEVT_IS_CARD(dev) (MINOR(dev) % CXL_DEV_MINORS == 0)
+
+static dev_t cxl_dev;
+
+static struct class *cxl_class;
+
+static int __afu_open(struct inode *inode, struct file *file, bool master)
+{
+	struct cxl *adapter;
+	struct cxl_afu *afu;
+	struct cxl_context *ctx;
+	int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev);
+	int slice = CXL_DEVT_AFU(inode->i_rdev);
+	int rc = -ENODEV;
+
+	pr_devel("afu_open afu%i.%i\n", slice, adapter_num);
+
+	if (!(adapter = get_cxl_adapter(adapter_num)))
+		return -ENODEV;
+
+	if (slice > adapter->slices)
+		goto err_put_adapter;
+
+	spin_lock(&adapter->afu_list_lock);
+	if (!(afu = adapter->afu[slice])) {
+		spin_unlock(&adapter->afu_list_lock);
+		goto err_put_adapter;
+	}
+
+	/*
+	 * taking a ref to the afu so that it doesn't go away
+	 * for rest of the function. This ref is released before
+	 * we return.
+	 */
+	cxl_afu_get(afu);
+	spin_unlock(&adapter->afu_list_lock);
+
+	if (!afu->current_mode)
+		goto err_put_afu;
+
+	if (!cxl_ops->link_ok(adapter, afu)) {
+		rc = -EIO;
+		goto err_put_afu;
+	}
+
+	if (!(ctx = cxl_context_alloc())) {
+		rc = -ENOMEM;
+		goto err_put_afu;
+	}
+
+	rc = cxl_context_init(ctx, afu, master);
+	if (rc)
+		goto err_put_afu;
+
+	cxl_context_set_mapping(ctx, inode->i_mapping);
+
+	pr_devel("afu_open pe: %i\n", ctx->pe);
+	file->private_data = ctx;
+
+	/* indicate success */
+	rc = 0;
+
+err_put_afu:
+	/* release the ref taken earlier */
+	cxl_afu_put(afu);
+err_put_adapter:
+	put_device(&adapter->dev);
+	return rc;
+}
+
+int afu_open(struct inode *inode, struct file *file)
+{
+	return __afu_open(inode, file, false);
+}
+
+static int afu_master_open(struct inode *inode, struct file *file)
+{
+	return __afu_open(inode, file, true);
+}
+
+int afu_release(struct inode *inode, struct file *file)
+{
+	struct cxl_context *ctx = file->private_data;
+
+	pr_devel("%s: closing cxl file descriptor. pe: %i\n",
+		 __func__, ctx->pe);
+	cxl_context_detach(ctx);
+
+
+	/*
+	 * Delete the context's mapping pointer, unless it's created by the
+	 * kernel API, in which case leave it so it can be freed by reclaim_ctx()
+	 */
+	if (!ctx->kernelapi) {
+		mutex_lock(&ctx->mapping_lock);
+		ctx->mapping = NULL;
+		mutex_unlock(&ctx->mapping_lock);
+	}
+
+	/*
+	 * At this this point all bottom halfs have finished and we should be
+	 * getting no more IRQs from the hardware for this context.  Once it's
+	 * removed from the IDR (and RCU synchronised) it's safe to free the
+	 * sstp and context.
+	 */
+	cxl_context_free(ctx);
+
+	return 0;
+}
+
+static long afu_ioctl_start_work(struct cxl_context *ctx,
+				 struct cxl_ioctl_start_work __user *uwork)
+{
+	struct cxl_ioctl_start_work work;
+	u64 amr = 0;
+	int rc;
+
+	pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+	/* Do this outside the status_mutex to avoid a circular dependency with
+	 * the locking in cxl_mmap_fault() */
+	if (copy_from_user(&work, uwork, sizeof(work)))
+		return -EFAULT;
+
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status != OPENED) {
+		rc = -EIO;
+		goto out;
+	}
+
+	/*
+	 * if any of the reserved fields are set or any of the unused
+	 * flags are set it's invalid
+	 */
+	if (work.reserved1 || work.reserved2 || work.reserved3 ||
+	    work.reserved4 || work.reserved5 ||
+	    (work.flags & ~CXL_START_WORK_ALL)) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (!(work.flags & CXL_START_WORK_NUM_IRQS))
+		work.num_interrupts = ctx->afu->pp_irqs;
+	else if ((work.num_interrupts < ctx->afu->pp_irqs) ||
+		 (work.num_interrupts > ctx->afu->irqs_max)) {
+		rc =  -EINVAL;
+		goto out;
+	}
+
+	if ((rc = afu_register_irqs(ctx, work.num_interrupts)))
+		goto out;
+
+	if (work.flags & CXL_START_WORK_AMR)
+		amr = work.amr & mfspr(SPRN_UAMOR);
+
+	if (work.flags & CXL_START_WORK_TID)
+		ctx->assign_tidr = true;
+
+	ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);
+
+	/*
+	 * Increment the mapped context count for adapter. This also checks
+	 * if adapter_context_lock is taken.
+	 */
+	rc = cxl_adapter_context_get(ctx->afu->adapter);
+	if (rc) {
+		afu_release_irqs(ctx, ctx);
+		goto out;
+	}
+
+	/*
+	 * We grab the PID here and not in the file open to allow for the case
+	 * where a process (master, some daemon, etc) has opened the chardev on
+	 * behalf of another process, so the AFU's mm gets bound to the process
+	 * that performs this ioctl and not the process that opened the file.
+	 * Also we grab the PID of the group leader so that if the task that
+	 * has performed the attach operation exits the mm context of the
+	 * process is still accessible.
+	 */
+	ctx->pid = get_task_pid(current, PIDTYPE_PID);
+
+	/* acquire a reference to the task's mm */
+	ctx->mm = get_task_mm(current);
+
+	/* ensure this mm_struct can't be freed */
+	cxl_context_mm_count_get(ctx);
+
+	if (ctx->mm) {
+		/* decrement the use count from above */
+		mmput(ctx->mm);
+		/* make TLBIs for this context global */
+		mm_context_add_copro(ctx->mm);
+	}
+
+	/*
+	 * Increment driver use count. Enables global TLBIs for hash
+	 * and callbacks to handle the segment table
+	 */
+	cxl_ctx_get();
+
+	/*
+	 * A barrier is needed to make sure all TLBIs are global
+	 * before we attach and the context starts being used by the
+	 * adapter.
+	 *
+	 * Needed after mm_context_add_copro() for radix and
+	 * cxl_ctx_get() for hash/p8.
+	 *
+	 * The barrier should really be mb(), since it involves a
+	 * device. However, it's only useful when we have local
+	 * vs. global TLBIs, i.e SMP=y. So keep smp_mb().
+	 */
+	smp_mb();
+
+	trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
+
+	if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
+							amr))) {
+		afu_release_irqs(ctx, ctx);
+		cxl_adapter_context_put(ctx->afu->adapter);
+		put_pid(ctx->pid);
+		ctx->pid = NULL;
+		cxl_ctx_put();
+		cxl_context_mm_count_put(ctx);
+		if (ctx->mm)
+			mm_context_remove_copro(ctx->mm);
+		goto out;
+	}
+
+	rc = 0;
+	if (work.flags & CXL_START_WORK_TID) {
+		work.tid = ctx->tidr;
+		if (copy_to_user(uwork, &work, sizeof(work)))
+			rc = -EFAULT;
+	}
+
+	ctx->status = STARTED;
+
+out:
+	mutex_unlock(&ctx->status_mutex);
+	return rc;
+}
+
+static long afu_ioctl_process_element(struct cxl_context *ctx,
+				      int __user *upe)
+{
+	pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+	if (copy_to_user(upe, &ctx->external_pe, sizeof(__u32)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long afu_ioctl_get_afu_id(struct cxl_context *ctx,
+				 struct cxl_afu_id __user *upafuid)
+{
+	struct cxl_afu_id afuid = { 0 };
+
+	afuid.card_id = ctx->afu->adapter->adapter_num;
+	afuid.afu_offset = ctx->afu->slice;
+	afuid.afu_mode = ctx->afu->current_mode;
+
+	/* set the flag bit in case the afu is a slave */
+	if (ctx->afu->current_mode == CXL_MODE_DIRECTED && !ctx->master)
+		afuid.flags |= CXL_AFUID_FLAG_SLAVE;
+
+	if (copy_to_user(upafuid, &afuid, sizeof(afuid)))
+		return -EFAULT;
+
+	return 0;
+}
+
+long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct cxl_context *ctx = file->private_data;
+
+	if (ctx->status == CLOSED)
+		return -EIO;
+
+	if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		return -EIO;
+
+	pr_devel("afu_ioctl\n");
+	switch (cmd) {
+	case CXL_IOCTL_START_WORK:
+		return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg);
+	case CXL_IOCTL_GET_PROCESS_ELEMENT:
+		return afu_ioctl_process_element(ctx, (__u32 __user *)arg);
+	case CXL_IOCTL_GET_AFU_ID:
+		return afu_ioctl_get_afu_id(ctx, (struct cxl_afu_id __user *)
+					    arg);
+	}
+	return -EINVAL;
+}
+
+static long afu_compat_ioctl(struct file *file, unsigned int cmd,
+			     unsigned long arg)
+{
+	return afu_ioctl(file, cmd, arg);
+}
+
+int afu_mmap(struct file *file, struct vm_area_struct *vm)
+{
+	struct cxl_context *ctx = file->private_data;
+
+	/* AFU must be started before we can MMIO */
+	if (ctx->status != STARTED)
+		return -EIO;
+
+	if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		return -EIO;
+
+	return cxl_context_iomap(ctx, vm);
+}
+
+static inline bool ctx_event_pending(struct cxl_context *ctx)
+{
+	if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err)
+		return true;
+
+	if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events))
+		return true;
+
+	return false;
+}
+
+__poll_t afu_poll(struct file *file, struct poll_table_struct *poll)
+{
+	struct cxl_context *ctx = file->private_data;
+	__poll_t mask = 0;
+	unsigned long flags;
+
+
+	poll_wait(file, &ctx->wq, poll);
+
+	pr_devel("afu_poll wait done pe: %i\n", ctx->pe);
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	if (ctx_event_pending(ctx))
+		mask |= EPOLLIN | EPOLLRDNORM;
+	else if (ctx->status == CLOSED)
+		/* Only error on closed when there are no futher events pending
+		 */
+		mask |= EPOLLERR;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	pr_devel("afu_poll pe: %i returning %#x\n", ctx->pe, mask);
+
+	return mask;
+}
+
+static ssize_t afu_driver_event_copy(struct cxl_context *ctx,
+				     char __user *buf,
+				     struct cxl_event *event,
+				     struct cxl_event_afu_driver_reserved *pl)
+{
+	/* Check event */
+	if (!pl) {
+		ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL);
+		return -EFAULT;
+	}
+
+	/* Check event size */
+	event->header.size += pl->data_size;
+	if (event->header.size > CXL_READ_MIN_SIZE) {
+		ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL);
+		return -EFAULT;
+	}
+
+	/* Copy event header */
+	if (copy_to_user(buf, event, sizeof(struct cxl_event_header))) {
+		ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT);
+		return -EFAULT;
+	}
+
+	/* Copy event data */
+	buf += sizeof(struct cxl_event_header);
+	if (copy_to_user(buf, &pl->data, pl->data_size)) {
+		ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT);
+		return -EFAULT;
+	}
+
+	ctx->afu_driver_ops->event_delivered(ctx, pl, 0); /* Success */
+	return event->header.size;
+}
+
+ssize_t afu_read(struct file *file, char __user *buf, size_t count,
+			loff_t *off)
+{
+	struct cxl_context *ctx = file->private_data;
+	struct cxl_event_afu_driver_reserved *pl = NULL;
+	struct cxl_event event;
+	unsigned long flags;
+	int rc;
+	DEFINE_WAIT(wait);
+
+	if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		return -EIO;
+
+	if (count < CXL_READ_MIN_SIZE)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+
+	for (;;) {
+		prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE);
+		if (ctx_event_pending(ctx) || (ctx->status == CLOSED))
+			break;
+
+		if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) {
+			rc = -EIO;
+			goto out;
+		}
+
+		if (file->f_flags & O_NONBLOCK) {
+			rc = -EAGAIN;
+			goto out;
+		}
+
+		if (signal_pending(current)) {
+			rc = -ERESTARTSYS;
+			goto out;
+		}
+
+		spin_unlock_irqrestore(&ctx->lock, flags);
+		pr_devel("afu_read going to sleep...\n");
+		schedule();
+		pr_devel("afu_read woken up\n");
+		spin_lock_irqsave(&ctx->lock, flags);
+	}
+
+	finish_wait(&ctx->wq, &wait);
+
+	memset(&event, 0, sizeof(event));
+	event.header.process_element = ctx->pe;
+	event.header.size = sizeof(struct cxl_event_header);
+	if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) {
+		pr_devel("afu_read delivering AFU driver specific event\n");
+		pl = ctx->afu_driver_ops->fetch_event(ctx);
+		atomic_dec(&ctx->afu_driver_events);
+		event.header.type = CXL_EVENT_AFU_DRIVER;
+	} else if (ctx->pending_irq) {
+		pr_devel("afu_read delivering AFU interrupt\n");
+		event.header.size += sizeof(struct cxl_event_afu_interrupt);
+		event.header.type = CXL_EVENT_AFU_INTERRUPT;
+		event.irq.irq = find_first_bit(ctx->irq_bitmap, ctx->irq_count) + 1;
+		clear_bit(event.irq.irq - 1, ctx->irq_bitmap);
+		if (bitmap_empty(ctx->irq_bitmap, ctx->irq_count))
+			ctx->pending_irq = false;
+	} else if (ctx->pending_fault) {
+		pr_devel("afu_read delivering data storage fault\n");
+		event.header.size += sizeof(struct cxl_event_data_storage);
+		event.header.type = CXL_EVENT_DATA_STORAGE;
+		event.fault.addr = ctx->fault_addr;
+		event.fault.dsisr = ctx->fault_dsisr;
+		ctx->pending_fault = false;
+	} else if (ctx->pending_afu_err) {
+		pr_devel("afu_read delivering afu error\n");
+		event.header.size += sizeof(struct cxl_event_afu_error);
+		event.header.type = CXL_EVENT_AFU_ERROR;
+		event.afu_error.error = ctx->afu_err;
+		ctx->pending_afu_err = false;
+	} else if (ctx->status == CLOSED) {
+		pr_devel("afu_read fatal error\n");
+		spin_unlock_irqrestore(&ctx->lock, flags);
+		return -EIO;
+	} else
+		WARN(1, "afu_read must be buggy\n");
+
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	if (event.header.type == CXL_EVENT_AFU_DRIVER)
+		return afu_driver_event_copy(ctx, buf, &event, pl);
+
+	if (copy_to_user(buf, &event, event.header.size))
+		return -EFAULT;
+	return event.header.size;
+
+out:
+	finish_wait(&ctx->wq, &wait);
+	spin_unlock_irqrestore(&ctx->lock, flags);
+	return rc;
+}
+
+/* 
+ * Note: if this is updated, we need to update api.c to patch the new ones in
+ * too
+ */
+const struct file_operations afu_fops = {
+	.owner		= THIS_MODULE,
+	.open           = afu_open,
+	.poll		= afu_poll,
+	.read		= afu_read,
+	.release        = afu_release,
+	.unlocked_ioctl = afu_ioctl,
+	.compat_ioctl   = afu_compat_ioctl,
+	.mmap           = afu_mmap,
+};
+
+static const struct file_operations afu_master_fops = {
+	.owner		= THIS_MODULE,
+	.open           = afu_master_open,
+	.poll		= afu_poll,
+	.read		= afu_read,
+	.release        = afu_release,
+	.unlocked_ioctl = afu_ioctl,
+	.compat_ioctl   = afu_compat_ioctl,
+	.mmap           = afu_mmap,
+};
+
+
+static char *cxl_devnode(const struct device *dev, umode_t *mode)
+{
+	if (cpu_has_feature(CPU_FTR_HVMODE) &&
+	    CXL_DEVT_IS_CARD(dev->devt)) {
+		/*
+		 * These minor numbers will eventually be used to program the
+		 * PSL and AFUs once we have dynamic reprogramming support
+		 */
+		return NULL;
+	}
+	return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
+}
+
+extern struct class *cxl_class;
+
+static int cxl_add_chardev(struct cxl_afu *afu, dev_t devt, struct cdev *cdev,
+			   struct device **chardev, char *postfix, char *desc,
+			   const struct file_operations *fops)
+{
+	struct device *dev;
+	int rc;
+
+	cdev_init(cdev, fops);
+	rc = cdev_add(cdev, devt, 1);
+	if (rc) {
+		dev_err(&afu->dev, "Unable to add %s chardev: %i\n", desc, rc);
+		return rc;
+	}
+
+	dev = device_create(cxl_class, &afu->dev, devt, afu,
+			"afu%i.%i%s", afu->adapter->adapter_num, afu->slice, postfix);
+	if (IS_ERR(dev)) {
+		rc = PTR_ERR(dev);
+		dev_err(&afu->dev, "Unable to create %s chardev in sysfs: %i\n", desc, rc);
+		goto err;
+	}
+
+	*chardev = dev;
+
+	return 0;
+err:
+	cdev_del(cdev);
+	return rc;
+}
+
+int cxl_chardev_d_afu_add(struct cxl_afu *afu)
+{
+	return cxl_add_chardev(afu, CXL_AFU_MKDEV_D(afu), &afu->afu_cdev_d,
+			       &afu->chardev_d, "d", "dedicated",
+			       &afu_master_fops); /* Uses master fops */
+}
+
+int cxl_chardev_m_afu_add(struct cxl_afu *afu)
+{
+	return cxl_add_chardev(afu, CXL_AFU_MKDEV_M(afu), &afu->afu_cdev_m,
+			       &afu->chardev_m, "m", "master",
+			       &afu_master_fops);
+}
+
+int cxl_chardev_s_afu_add(struct cxl_afu *afu)
+{
+	return cxl_add_chardev(afu, CXL_AFU_MKDEV_S(afu), &afu->afu_cdev_s,
+			       &afu->chardev_s, "s", "shared",
+			       &afu_fops);
+}
+
+void cxl_chardev_afu_remove(struct cxl_afu *afu)
+{
+	if (afu->chardev_d) {
+		cdev_del(&afu->afu_cdev_d);
+		device_unregister(afu->chardev_d);
+		afu->chardev_d = NULL;
+	}
+	if (afu->chardev_m) {
+		cdev_del(&afu->afu_cdev_m);
+		device_unregister(afu->chardev_m);
+		afu->chardev_m = NULL;
+	}
+	if (afu->chardev_s) {
+		cdev_del(&afu->afu_cdev_s);
+		device_unregister(afu->chardev_s);
+		afu->chardev_s = NULL;
+	}
+}
+
+int cxl_register_afu(struct cxl_afu *afu)
+{
+	afu->dev.class = cxl_class;
+
+	return device_register(&afu->dev);
+}
+
+int cxl_register_adapter(struct cxl *adapter)
+{
+	adapter->dev.class = cxl_class;
+
+	/*
+	 * Future: When we support dynamically reprogramming the PSL & AFU we
+	 * will expose the interface to do that via a chardev:
+	 * adapter->dev.devt = CXL_CARD_MKDEV(adapter);
+	 */
+
+	return device_register(&adapter->dev);
+}
+
+dev_t cxl_get_dev(void)
+{
+	return cxl_dev;
+}
+
+int __init cxl_file_init(void)
+{
+	int rc;
+
+	/*
+	 * If these change we really need to update API.  Either change some
+	 * flags or update API version number CXL_API_VERSION.
+	 */
+	BUILD_BUG_ON(CXL_API_VERSION != 3);
+	BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64);
+	BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8);
+	BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8);
+	BUILD_BUG_ON(sizeof(struct cxl_event_data_storage) != 32);
+	BUILD_BUG_ON(sizeof(struct cxl_event_afu_error) != 16);
+
+	if ((rc = alloc_chrdev_region(&cxl_dev, 0, CXL_NUM_MINORS, "cxl"))) {
+		pr_err("Unable to allocate CXL major number: %i\n", rc);
+		return rc;
+	}
+
+	pr_devel("CXL device allocated, MAJOR %i\n", MAJOR(cxl_dev));
+
+	cxl_class = class_create("cxl");
+	if (IS_ERR(cxl_class)) {
+		pr_err("Unable to create CXL class\n");
+		rc = PTR_ERR(cxl_class);
+		goto err;
+	}
+	cxl_class->devnode = cxl_devnode;
+
+	return 0;
+
+err:
+	unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS);
+	return rc;
+}
+
+void cxl_file_exit(void)
+{
+	unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS);
+	class_destroy(cxl_class);
+}
diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c
new file mode 100644
index 0000000000..eee9decc12
--- /dev/null
+++ b/drivers/misc/cxl/flash.c
@@ -0,0 +1,538 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/semaphore.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
+#include <asm/rtas.h>
+
+#include "cxl.h"
+#include "hcalls.h"
+
+#define DOWNLOAD_IMAGE 1
+#define VALIDATE_IMAGE 2
+
+struct ai_header {
+	u16 version;
+	u8  reserved0[6];
+	u16 vendor;
+	u16 device;
+	u16 subsystem_vendor;
+	u16 subsystem;
+	u64 image_offset;
+	u64 image_length;
+	u8  reserved1[96];
+};
+
+static struct semaphore sem;
+static unsigned long *buffer[CXL_AI_MAX_ENTRIES];
+static struct sg_list *le;
+static u64 continue_token;
+static unsigned int transfer;
+
+struct update_props_workarea {
+	__be32 phandle;
+	__be32 state;
+	__be64 reserved;
+	__be32 nprops;
+} __packed;
+
+struct update_nodes_workarea {
+	__be32 state;
+	__be64 unit_address;
+	__be32 reserved;
+} __packed;
+
+#define DEVICE_SCOPE 3
+#define NODE_ACTION_MASK	0xff000000
+#define NODE_COUNT_MASK		0x00ffffff
+#define OPCODE_DELETE	0x01000000
+#define OPCODE_UPDATE	0x02000000
+#define OPCODE_ADD	0x03000000
+
+static int rcall(int token, char *buf, s32 scope)
+{
+	int rc;
+
+	spin_lock(&rtas_data_buf_lock);
+
+	memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
+	rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope);
+	memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+
+	spin_unlock(&rtas_data_buf_lock);
+	return rc;
+}
+
+static int update_property(struct device_node *dn, const char *name,
+			   u32 vd, char *value)
+{
+	struct property *new_prop;
+	u32 *val;
+	int rc;
+
+	new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+	if (!new_prop)
+		return -ENOMEM;
+
+	new_prop->name = kstrdup(name, GFP_KERNEL);
+	if (!new_prop->name) {
+		kfree(new_prop);
+		return -ENOMEM;
+	}
+
+	new_prop->length = vd;
+	new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
+	if (!new_prop->value) {
+		kfree(new_prop->name);
+		kfree(new_prop);
+		return -ENOMEM;
+	}
+	memcpy(new_prop->value, value, vd);
+
+	val = (u32 *)new_prop->value;
+	rc = cxl_update_properties(dn, new_prop);
+	pr_devel("%pOFn: update property (%s, length: %i, value: %#x)\n",
+		  dn, name, vd, be32_to_cpu(*val));
+
+	if (rc) {
+		kfree(new_prop->name);
+		kfree(new_prop->value);
+		kfree(new_prop);
+	}
+	return rc;
+}
+
+static int update_node(__be32 phandle, s32 scope)
+{
+	struct update_props_workarea *upwa;
+	struct device_node *dn;
+	int i, rc, ret;
+	char *prop_data;
+	char *buf;
+	int token;
+	u32 nprops;
+	u32 vd;
+
+	token = rtas_token("ibm,update-properties");
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	dn = of_find_node_by_phandle(be32_to_cpu(phandle));
+	if (!dn) {
+		kfree(buf);
+		return -ENOENT;
+	}
+
+	upwa = (struct update_props_workarea *)&buf[0];
+	upwa->phandle = phandle;
+	do {
+		rc = rcall(token, buf, scope);
+		if (rc < 0)
+			break;
+
+		prop_data = buf + sizeof(*upwa);
+		nprops = be32_to_cpu(upwa->nprops);
+
+		if (*prop_data == 0) {
+			prop_data++;
+			vd = be32_to_cpu(*(__be32 *)prop_data);
+			prop_data += vd + sizeof(vd);
+			nprops--;
+		}
+
+		for (i = 0; i < nprops; i++) {
+			char *prop_name;
+
+			prop_name = prop_data;
+			prop_data += strlen(prop_name) + 1;
+			vd = be32_to_cpu(*(__be32 *)prop_data);
+			prop_data += sizeof(vd);
+
+			if ((vd != 0x00000000) && (vd != 0x80000000)) {
+				ret = update_property(dn, prop_name, vd,
+						prop_data);
+				if (ret)
+					pr_err("cxl: Could not update property %s - %i\n",
+					       prop_name, ret);
+
+				prop_data += vd;
+			}
+		}
+	} while (rc == 1);
+
+	of_node_put(dn);
+	kfree(buf);
+	return rc;
+}
+
+static int update_devicetree(struct cxl *adapter, s32 scope)
+{
+	struct update_nodes_workarea *unwa;
+	u32 action, node_count;
+	int token, rc, i;
+	__be32 *data, phandle;
+	char *buf;
+
+	token = rtas_token("ibm,update-nodes");
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	unwa = (struct update_nodes_workarea *)&buf[0];
+	unwa->unit_address = cpu_to_be64(adapter->guest->handle);
+	do {
+		rc = rcall(token, buf, scope);
+		if (rc && rc != 1)
+			break;
+
+		data = (__be32 *)buf + 4;
+		while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
+			action = be32_to_cpu(*data) & NODE_ACTION_MASK;
+			node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
+			pr_devel("device reconfiguration - action: %#x, nodes: %#x\n",
+				 action, node_count);
+			data++;
+
+			for (i = 0; i < node_count; i++) {
+				phandle = *data++;
+
+				switch (action) {
+				case OPCODE_DELETE:
+					/* nothing to do */
+					break;
+				case OPCODE_UPDATE:
+					update_node(phandle, scope);
+					break;
+				case OPCODE_ADD:
+					/* nothing to do, just move pointer */
+					data++;
+					break;
+				}
+			}
+		}
+	} while (rc == 1);
+
+	kfree(buf);
+	return 0;
+}
+
+static int handle_image(struct cxl *adapter, int operation,
+			long (*fct)(u64, u64, u64, u64 *),
+			struct cxl_adapter_image *ai)
+{
+	size_t mod, s_copy, len_chunk = 0;
+	struct ai_header *header = NULL;
+	unsigned int entries = 0, i;
+	void *dest, *from;
+	int rc = 0, need_header;
+
+	/* base adapter image header */
+	need_header = (ai->flags & CXL_AI_NEED_HEADER);
+	if (need_header) {
+		header = kzalloc(sizeof(struct ai_header), GFP_KERNEL);
+		if (!header)
+			return -ENOMEM;
+		header->version = cpu_to_be16(1);
+		header->vendor = cpu_to_be16(adapter->guest->vendor);
+		header->device = cpu_to_be16(adapter->guest->device);
+		header->subsystem_vendor = cpu_to_be16(adapter->guest->subsystem_vendor);
+		header->subsystem = cpu_to_be16(adapter->guest->subsystem);
+		header->image_offset = cpu_to_be64(CXL_AI_HEADER_SIZE);
+		header->image_length = cpu_to_be64(ai->len_image);
+	}
+
+	/* number of entries in the list */
+	len_chunk = ai->len_data;
+	if (need_header)
+		len_chunk += CXL_AI_HEADER_SIZE;
+
+	entries = len_chunk / CXL_AI_BUFFER_SIZE;
+	mod = len_chunk % CXL_AI_BUFFER_SIZE;
+	if (mod)
+		entries++;
+
+	if (entries > CXL_AI_MAX_ENTRIES) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	/*          < -- MAX_CHUNK_SIZE = 4096 * 256 = 1048576 bytes -->
+	 * chunk 0  ----------------------------------------------------
+	 *          | header   |  data                                 |
+	 *          ----------------------------------------------------
+	 * chunk 1  ----------------------------------------------------
+	 *          | data                                             |
+	 *          ----------------------------------------------------
+	 * ....
+	 * chunk n  ----------------------------------------------------
+	 *          | data                                             |
+	 *          ----------------------------------------------------
+	 */
+	from = (void *) ai->data;
+	for (i = 0; i < entries; i++) {
+		dest = buffer[i];
+		s_copy = CXL_AI_BUFFER_SIZE;
+
+		if ((need_header) && (i == 0)) {
+			/* add adapter image header */
+			memcpy(buffer[i], header, sizeof(struct ai_header));
+			s_copy = CXL_AI_BUFFER_SIZE - CXL_AI_HEADER_SIZE;
+			dest += CXL_AI_HEADER_SIZE; /* image offset */
+		}
+		if ((i == (entries - 1)) && mod)
+			s_copy = mod;
+
+		/* copy data */
+		if (copy_from_user(dest, from, s_copy))
+			goto err;
+
+		/* fill in the list */
+		le[i].phys_addr = cpu_to_be64(virt_to_phys(buffer[i]));
+		le[i].len = cpu_to_be64(CXL_AI_BUFFER_SIZE);
+		if ((i == (entries - 1)) && mod)
+			le[i].len = cpu_to_be64(mod);
+		from += s_copy;
+	}
+	pr_devel("%s (op: %i, need header: %i, entries: %i, token: %#llx)\n",
+		 __func__, operation, need_header, entries, continue_token);
+
+	/*
+	 * download/validate the adapter image to the coherent
+	 * platform facility
+	 */
+	rc = fct(adapter->guest->handle, virt_to_phys(le), entries,
+		&continue_token);
+	if (rc == 0) /* success of download/validation operation */
+		continue_token = 0;
+
+err:
+	kfree(header);
+
+	return rc;
+}
+
+static int transfer_image(struct cxl *adapter, int operation,
+			struct cxl_adapter_image *ai)
+{
+	int rc = 0;
+	int afu;
+
+	switch (operation) {
+	case DOWNLOAD_IMAGE:
+		rc = handle_image(adapter, operation,
+				&cxl_h_download_adapter_image, ai);
+		if (rc < 0) {
+			pr_devel("resetting adapter\n");
+			cxl_h_reset_adapter(adapter->guest->handle);
+		}
+		return rc;
+
+	case VALIDATE_IMAGE:
+		rc = handle_image(adapter, operation,
+				&cxl_h_validate_adapter_image, ai);
+		if (rc < 0) {
+			pr_devel("resetting adapter\n");
+			cxl_h_reset_adapter(adapter->guest->handle);
+			return rc;
+		}
+		if (rc == 0) {
+			pr_devel("remove current afu\n");
+			for (afu = 0; afu < adapter->slices; afu++)
+				cxl_guest_remove_afu(adapter->afu[afu]);
+
+			pr_devel("resetting adapter\n");
+			cxl_h_reset_adapter(adapter->guest->handle);
+
+			/* The entire image has now been
+			 * downloaded and the validation has
+			 * been successfully performed.
+			 * After that, the partition should call
+			 * ibm,update-nodes and
+			 * ibm,update-properties to receive the
+			 * current configuration
+			 */
+			rc = update_devicetree(adapter, DEVICE_SCOPE);
+			transfer = 1;
+		}
+		return rc;
+	}
+
+	return -EINVAL;
+}
+
+static long ioctl_transfer_image(struct cxl *adapter, int operation,
+				struct cxl_adapter_image __user *uai)
+{
+	struct cxl_adapter_image ai;
+
+	pr_devel("%s\n", __func__);
+
+	if (copy_from_user(&ai, uai, sizeof(struct cxl_adapter_image)))
+		return -EFAULT;
+
+	/*
+	 * Make sure reserved fields and bits are set to 0
+	 */
+	if (ai.reserved1 || ai.reserved2 || ai.reserved3 || ai.reserved4 ||
+		(ai.flags & ~CXL_AI_ALL))
+		return -EINVAL;
+
+	return transfer_image(adapter, operation, &ai);
+}
+
+static int device_open(struct inode *inode, struct file *file)
+{
+	int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev);
+	struct cxl *adapter;
+	int rc = 0, i;
+
+	pr_devel("in %s\n", __func__);
+
+	BUG_ON(sizeof(struct ai_header) != CXL_AI_HEADER_SIZE);
+
+	/* Allows one process to open the device by using a semaphore */
+	if (down_interruptible(&sem) != 0)
+		return -EPERM;
+
+	if (!(adapter = get_cxl_adapter(adapter_num))) {
+		rc = -ENODEV;
+		goto err_unlock;
+	}
+
+	file->private_data = adapter;
+	continue_token = 0;
+	transfer = 0;
+
+	for (i = 0; i < CXL_AI_MAX_ENTRIES; i++)
+		buffer[i] = NULL;
+
+	/* aligned buffer containing list entries which describes up to
+	 * 1 megabyte of data (256 entries of 4096 bytes each)
+	 *  Logical real address of buffer 0  -  Buffer 0 length in bytes
+	 *  Logical real address of buffer 1  -  Buffer 1 length in bytes
+	 *  Logical real address of buffer 2  -  Buffer 2 length in bytes
+	 *  ....
+	 *  ....
+	 *  Logical real address of buffer N  -  Buffer N length in bytes
+	 */
+	le = (struct sg_list *)get_zeroed_page(GFP_KERNEL);
+	if (!le) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) {
+		buffer[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+		if (!buffer[i]) {
+			rc = -ENOMEM;
+			goto err1;
+		}
+	}
+
+	return 0;
+
+err1:
+	for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) {
+		if (buffer[i])
+			free_page((unsigned long) buffer[i]);
+	}
+
+	if (le)
+		free_page((unsigned long) le);
+err:
+	put_device(&adapter->dev);
+err_unlock:
+	up(&sem);
+
+	return rc;
+}
+
+static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct cxl *adapter = file->private_data;
+
+	pr_devel("in %s\n", __func__);
+
+	if (cmd == CXL_IOCTL_DOWNLOAD_IMAGE)
+		return ioctl_transfer_image(adapter,
+					DOWNLOAD_IMAGE,
+					(struct cxl_adapter_image __user *)arg);
+	else if (cmd == CXL_IOCTL_VALIDATE_IMAGE)
+		return ioctl_transfer_image(adapter,
+					VALIDATE_IMAGE,
+					(struct cxl_adapter_image __user *)arg);
+	else
+		return -EINVAL;
+}
+
+static int device_close(struct inode *inode, struct file *file)
+{
+	struct cxl *adapter = file->private_data;
+	int i;
+
+	pr_devel("in %s\n", __func__);
+
+	for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) {
+		if (buffer[i])
+			free_page((unsigned long) buffer[i]);
+	}
+
+	if (le)
+		free_page((unsigned long) le);
+
+	up(&sem);
+	put_device(&adapter->dev);
+	continue_token = 0;
+
+	/* reload the module */
+	if (transfer)
+		cxl_guest_reload_module(adapter);
+	else {
+		pr_devel("resetting adapter\n");
+		cxl_h_reset_adapter(adapter->guest->handle);
+	}
+
+	transfer = 0;
+	return 0;
+}
+
+static const struct file_operations fops = {
+	.owner		= THIS_MODULE,
+	.open		= device_open,
+	.unlocked_ioctl	= device_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+	.release	= device_close,
+};
+
+void cxl_guest_remove_chardev(struct cxl *adapter)
+{
+	cdev_del(&adapter->guest->cdev);
+}
+
+int cxl_guest_add_chardev(struct cxl *adapter)
+{
+	dev_t devt;
+	int rc;
+
+	devt = MKDEV(MAJOR(cxl_get_dev()), CXL_CARD_MINOR(adapter));
+	cdev_init(&adapter->guest->cdev, &fops);
+	if ((rc = cdev_add(&adapter->guest->cdev, devt, 1))) {
+		dev_err(&adapter->dev,
+			"Unable to add chardev on adapter (card%i): %i\n",
+			adapter->adapter_num, rc);
+		goto err;
+	}
+	adapter->dev.devt = devt;
+	sema_init(&sem, 1);
+err:
+	return rc;
+}
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
new file mode 100644
index 0000000000..fb95a2d5ce
--- /dev/null
+++ b/drivers/misc/cxl/guest.c
@@ -0,0 +1,1208 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2015 IBM Corp.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/irqdomain.h>
+#include <linux/platform_device.h>
+
+#include "cxl.h"
+#include "hcalls.h"
+#include "trace.h"
+
+#define CXL_ERROR_DETECTED_EVENT	1
+#define CXL_SLOT_RESET_EVENT		2
+#define CXL_RESUME_EVENT		3
+
+static void pci_error_handlers(struct cxl_afu *afu,
+				int bus_error_event,
+				pci_channel_state_t state)
+{
+	struct pci_dev *afu_dev;
+	struct pci_driver *afu_drv;
+	const struct pci_error_handlers *err_handler;
+
+	if (afu->phb == NULL)
+		return;
+
+	list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+		afu_drv = to_pci_driver(afu_dev->dev.driver);
+		if (!afu_drv)
+			continue;
+
+		err_handler = afu_drv->err_handler;
+		switch (bus_error_event) {
+		case CXL_ERROR_DETECTED_EVENT:
+			afu_dev->error_state = state;
+
+			if (err_handler &&
+			    err_handler->error_detected)
+				err_handler->error_detected(afu_dev, state);
+			break;
+		case CXL_SLOT_RESET_EVENT:
+			afu_dev->error_state = state;
+
+			if (err_handler &&
+			    err_handler->slot_reset)
+				err_handler->slot_reset(afu_dev);
+			break;
+		case CXL_RESUME_EVENT:
+			if (err_handler &&
+			    err_handler->resume)
+				err_handler->resume(afu_dev);
+			break;
+		}
+	}
+}
+
+static irqreturn_t guest_handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr,
+					u64 errstat)
+{
+	pr_devel("in %s\n", __func__);
+	dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat);
+
+	return cxl_ops->ack_irq(ctx, 0, errstat);
+}
+
+static ssize_t guest_collect_vpd(struct cxl *adapter, struct cxl_afu *afu,
+			void *buf, size_t len)
+{
+	unsigned int entries, mod;
+	unsigned long **vpd_buf = NULL;
+	struct sg_list *le;
+	int rc = 0, i, tocopy;
+	u64 out = 0;
+
+	if (buf == NULL)
+		return -EINVAL;
+
+	/* number of entries in the list */
+	entries = len / SG_BUFFER_SIZE;
+	mod = len % SG_BUFFER_SIZE;
+	if (mod)
+		entries++;
+
+	if (entries > SG_MAX_ENTRIES) {
+		entries = SG_MAX_ENTRIES;
+		len = SG_MAX_ENTRIES * SG_BUFFER_SIZE;
+		mod = 0;
+	}
+
+	vpd_buf = kcalloc(entries, sizeof(unsigned long *), GFP_KERNEL);
+	if (!vpd_buf)
+		return -ENOMEM;
+
+	le = (struct sg_list *)get_zeroed_page(GFP_KERNEL);
+	if (!le) {
+		rc = -ENOMEM;
+		goto err1;
+	}
+
+	for (i = 0; i < entries; i++) {
+		vpd_buf[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+		if (!vpd_buf[i]) {
+			rc = -ENOMEM;
+			goto err2;
+		}
+		le[i].phys_addr = cpu_to_be64(virt_to_phys(vpd_buf[i]));
+		le[i].len = cpu_to_be64(SG_BUFFER_SIZE);
+		if ((i == (entries - 1)) && mod)
+			le[i].len = cpu_to_be64(mod);
+	}
+
+	if (adapter)
+		rc = cxl_h_collect_vpd_adapter(adapter->guest->handle,
+					virt_to_phys(le), entries, &out);
+	else
+		rc = cxl_h_collect_vpd(afu->guest->handle, 0,
+				virt_to_phys(le), entries, &out);
+	pr_devel("length of available (entries: %i), vpd: %#llx\n",
+		entries, out);
+
+	if (!rc) {
+		/*
+		 * hcall returns in 'out' the size of available VPDs.
+		 * It fills the buffer with as much data as possible.
+		 */
+		if (out < len)
+			len = out;
+		rc = len;
+		if (out) {
+			for (i = 0; i < entries; i++) {
+				if (len < SG_BUFFER_SIZE)
+					tocopy = len;
+				else
+					tocopy = SG_BUFFER_SIZE;
+				memcpy(buf, vpd_buf[i], tocopy);
+				buf += tocopy;
+				len -= tocopy;
+			}
+		}
+	}
+err2:
+	for (i = 0; i < entries; i++) {
+		if (vpd_buf[i])
+			free_page((unsigned long) vpd_buf[i]);
+	}
+	free_page((unsigned long) le);
+err1:
+	kfree(vpd_buf);
+	return rc;
+}
+
+static int guest_get_irq_info(struct cxl_context *ctx, struct cxl_irq_info *info)
+{
+	return cxl_h_collect_int_info(ctx->afu->guest->handle, ctx->process_token, info);
+}
+
+static irqreturn_t guest_psl_irq(int irq, void *data)
+{
+	struct cxl_context *ctx = data;
+	struct cxl_irq_info irq_info;
+	int rc;
+
+	pr_devel("%d: received PSL interrupt %i\n", ctx->pe, irq);
+	rc = guest_get_irq_info(ctx, &irq_info);
+	if (rc) {
+		WARN(1, "Unable to get IRQ info: %i\n", rc);
+		return IRQ_HANDLED;
+	}
+
+	rc = cxl_irq_psl8(irq, ctx, &irq_info);
+	return rc;
+}
+
+static int afu_read_error_state(struct cxl_afu *afu, int *state_out)
+{
+	u64 state;
+	int rc = 0;
+
+	if (!afu)
+		return -EIO;
+
+	rc = cxl_h_read_error_state(afu->guest->handle, &state);
+	if (!rc) {
+		WARN_ON(state != H_STATE_NORMAL &&
+			state != H_STATE_DISABLE &&
+			state != H_STATE_TEMP_UNAVAILABLE &&
+			state != H_STATE_PERM_UNAVAILABLE);
+		*state_out = state & 0xffffffff;
+	}
+	return rc;
+}
+
+static irqreturn_t guest_slice_irq_err(int irq, void *data)
+{
+	struct cxl_afu *afu = data;
+	int rc;
+	u64 serr, afu_error, dsisr;
+
+	rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, &serr);
+	if (rc) {
+		dev_crit(&afu->dev, "Couldn't read PSL_SERR_An: %d\n", rc);
+		return IRQ_HANDLED;
+	}
+	afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An);
+	dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	cxl_afu_decode_psl_serr(afu, serr);
+	dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error);
+	dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr);
+
+	rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr);
+	if (rc)
+		dev_crit(&afu->dev, "Couldn't ack slice error interrupt: %d\n",
+			rc);
+
+	return IRQ_HANDLED;
+}
+
+
+static int irq_alloc_range(struct cxl *adapter, int len, int *irq)
+{
+	int i, n;
+	struct irq_avail *cur;
+
+	for (i = 0; i < adapter->guest->irq_nranges; i++) {
+		cur = &adapter->guest->irq_avail[i];
+		n = bitmap_find_next_zero_area(cur->bitmap, cur->range,
+					0, len, 0);
+		if (n < cur->range) {
+			bitmap_set(cur->bitmap, n, len);
+			*irq = cur->offset + n;
+			pr_devel("guest: allocate IRQs %#x->%#x\n",
+				*irq, *irq + len - 1);
+
+			return 0;
+		}
+	}
+	return -ENOSPC;
+}
+
+static int irq_free_range(struct cxl *adapter, int irq, int len)
+{
+	int i, n;
+	struct irq_avail *cur;
+
+	if (len == 0)
+		return -ENOENT;
+
+	for (i = 0; i < adapter->guest->irq_nranges; i++) {
+		cur = &adapter->guest->irq_avail[i];
+		if (irq >= cur->offset &&
+			(irq + len) <= (cur->offset + cur->range)) {
+			n = irq - cur->offset;
+			bitmap_clear(cur->bitmap, n, len);
+			pr_devel("guest: release IRQs %#x->%#x\n",
+				irq, irq + len - 1);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int guest_reset(struct cxl *adapter)
+{
+	struct cxl_afu *afu = NULL;
+	int i, rc;
+
+	pr_devel("Adapter reset request\n");
+	spin_lock(&adapter->afu_list_lock);
+	for (i = 0; i < adapter->slices; i++) {
+		if ((afu = adapter->afu[i])) {
+			pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
+					pci_channel_io_frozen);
+			cxl_context_detach_all(afu);
+		}
+	}
+
+	rc = cxl_h_reset_adapter(adapter->guest->handle);
+	for (i = 0; i < adapter->slices; i++) {
+		if (!rc && (afu = adapter->afu[i])) {
+			pci_error_handlers(afu, CXL_SLOT_RESET_EVENT,
+					pci_channel_io_normal);
+			pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
+		}
+	}
+	spin_unlock(&adapter->afu_list_lock);
+	return rc;
+}
+
+static int guest_alloc_one_irq(struct cxl *adapter)
+{
+	int irq;
+
+	spin_lock(&adapter->guest->irq_alloc_lock);
+	if (irq_alloc_range(adapter, 1, &irq))
+		irq = -ENOSPC;
+	spin_unlock(&adapter->guest->irq_alloc_lock);
+	return irq;
+}
+
+static void guest_release_one_irq(struct cxl *adapter, int irq)
+{
+	spin_lock(&adapter->guest->irq_alloc_lock);
+	irq_free_range(adapter, irq, 1);
+	spin_unlock(&adapter->guest->irq_alloc_lock);
+}
+
+static int guest_alloc_irq_ranges(struct cxl_irq_ranges *irqs,
+				struct cxl *adapter, unsigned int num)
+{
+	int i, try, irq;
+
+	memset(irqs, 0, sizeof(struct cxl_irq_ranges));
+
+	spin_lock(&adapter->guest->irq_alloc_lock);
+	for (i = 0; i < CXL_IRQ_RANGES && num; i++) {
+		try = num;
+		while (try) {
+			if (irq_alloc_range(adapter, try, &irq) == 0)
+				break;
+			try /= 2;
+		}
+		if (!try)
+			goto error;
+		irqs->offset[i] = irq;
+		irqs->range[i] = try;
+		num -= try;
+	}
+	if (num)
+		goto error;
+	spin_unlock(&adapter->guest->irq_alloc_lock);
+	return 0;
+
+error:
+	for (i = 0; i < CXL_IRQ_RANGES; i++)
+		irq_free_range(adapter, irqs->offset[i], irqs->range[i]);
+	spin_unlock(&adapter->guest->irq_alloc_lock);
+	return -ENOSPC;
+}
+
+static void guest_release_irq_ranges(struct cxl_irq_ranges *irqs,
+				struct cxl *adapter)
+{
+	int i;
+
+	spin_lock(&adapter->guest->irq_alloc_lock);
+	for (i = 0; i < CXL_IRQ_RANGES; i++)
+		irq_free_range(adapter, irqs->offset[i], irqs->range[i]);
+	spin_unlock(&adapter->guest->irq_alloc_lock);
+}
+
+static int guest_register_serr_irq(struct cxl_afu *afu)
+{
+	afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+				      dev_name(&afu->dev));
+	if (!afu->err_irq_name)
+		return -ENOMEM;
+
+	if (!(afu->serr_virq = cxl_map_irq(afu->adapter, afu->serr_hwirq,
+				 guest_slice_irq_err, afu, afu->err_irq_name))) {
+		kfree(afu->err_irq_name);
+		afu->err_irq_name = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void guest_release_serr_irq(struct cxl_afu *afu)
+{
+	cxl_unmap_irq(afu->serr_virq, afu);
+	cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
+	kfree(afu->err_irq_name);
+}
+
+static int guest_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask)
+{
+	return cxl_h_control_faults(ctx->afu->guest->handle, ctx->process_token,
+				tfc >> 32, (psl_reset_mask != 0));
+}
+
+static void disable_afu_irqs(struct cxl_context *ctx)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+	int r, i;
+
+	pr_devel("Disabling AFU(%d) interrupts\n", ctx->afu->slice);
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		hwirq = ctx->irqs.offset[r];
+		for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+			virq = irq_find_mapping(NULL, hwirq);
+			disable_irq(virq);
+		}
+	}
+}
+
+static void enable_afu_irqs(struct cxl_context *ctx)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+	int r, i;
+
+	pr_devel("Enabling AFU(%d) interrupts\n", ctx->afu->slice);
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		hwirq = ctx->irqs.offset[r];
+		for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+			virq = irq_find_mapping(NULL, hwirq);
+			enable_irq(virq);
+		}
+	}
+}
+
+static int _guest_afu_cr_readXX(int sz, struct cxl_afu *afu, int cr_idx,
+			u64 offset, u64 *val)
+{
+	unsigned long cr;
+	char c;
+	int rc = 0;
+
+	if (afu->crs_len < sz)
+		return -ENOENT;
+
+	if (unlikely(offset >= afu->crs_len))
+		return -ERANGE;
+
+	cr = get_zeroed_page(GFP_KERNEL);
+	if (!cr)
+		return -ENOMEM;
+
+	rc = cxl_h_get_config(afu->guest->handle, cr_idx, offset,
+			virt_to_phys((void *)cr), sz);
+	if (rc)
+		goto err;
+
+	switch (sz) {
+	case 1:
+		c = *((char *) cr);
+		*val = c;
+		break;
+	case 2:
+		*val = in_le16((u16 *)cr);
+		break;
+	case 4:
+		*val = in_le32((unsigned *)cr);
+		break;
+	case 8:
+		*val = in_le64((u64 *)cr);
+		break;
+	default:
+		WARN_ON(1);
+	}
+err:
+	free_page(cr);
+	return rc;
+}
+
+static int guest_afu_cr_read32(struct cxl_afu *afu, int cr_idx, u64 offset,
+			u32 *out)
+{
+	int rc;
+	u64 val;
+
+	rc = _guest_afu_cr_readXX(4, afu, cr_idx, offset, &val);
+	if (!rc)
+		*out = (u32) val;
+	return rc;
+}
+
+static int guest_afu_cr_read16(struct cxl_afu *afu, int cr_idx, u64 offset,
+			u16 *out)
+{
+	int rc;
+	u64 val;
+
+	rc = _guest_afu_cr_readXX(2, afu, cr_idx, offset, &val);
+	if (!rc)
+		*out = (u16) val;
+	return rc;
+}
+
+static int guest_afu_cr_read8(struct cxl_afu *afu, int cr_idx, u64 offset,
+			u8 *out)
+{
+	int rc;
+	u64 val;
+
+	rc = _guest_afu_cr_readXX(1, afu, cr_idx, offset, &val);
+	if (!rc)
+		*out = (u8) val;
+	return rc;
+}
+
+static int guest_afu_cr_read64(struct cxl_afu *afu, int cr_idx, u64 offset,
+			u64 *out)
+{
+	return _guest_afu_cr_readXX(8, afu, cr_idx, offset, out);
+}
+
+static int guest_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in)
+{
+	/* config record is not writable from guest */
+	return -EPERM;
+}
+
+static int guest_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in)
+{
+	/* config record is not writable from guest */
+	return -EPERM;
+}
+
+static int guest_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in)
+{
+	/* config record is not writable from guest */
+	return -EPERM;
+}
+
+static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	struct cxl_process_element_hcall *elem;
+	struct cxl *adapter = ctx->afu->adapter;
+	const struct cred *cred;
+	u32 pid, idx;
+	int rc, r, i;
+	u64 mmio_addr, mmio_size;
+	__be64 flags = 0;
+
+	/* Must be 8 byte aligned and cannot cross a 4096 byte boundary */
+	if (!(elem = (struct cxl_process_element_hcall *)
+			get_zeroed_page(GFP_KERNEL)))
+		return -ENOMEM;
+
+	elem->version = cpu_to_be64(CXL_PROCESS_ELEMENT_VERSION);
+	if (ctx->kernel) {
+		pid = 0;
+		flags |= CXL_PE_TRANSLATION_ENABLED;
+		flags |= CXL_PE_PRIVILEGED_PROCESS;
+		if (mfmsr() & MSR_SF)
+			flags |= CXL_PE_64_BIT;
+	} else {
+		pid = current->pid;
+		flags |= CXL_PE_PROBLEM_STATE;
+		flags |= CXL_PE_TRANSLATION_ENABLED;
+		if (!test_tsk_thread_flag(current, TIF_32BIT))
+			flags |= CXL_PE_64_BIT;
+		cred = get_current_cred();
+		if (uid_eq(cred->euid, GLOBAL_ROOT_UID))
+			flags |= CXL_PE_PRIVILEGED_PROCESS;
+		put_cred(cred);
+	}
+	elem->flags         = cpu_to_be64(flags);
+	elem->common.tid    = cpu_to_be32(0); /* Unused */
+	elem->common.pid    = cpu_to_be32(pid);
+	elem->common.csrp   = cpu_to_be64(0); /* disable */
+	elem->common.u.psl8.aurp0  = cpu_to_be64(0); /* disable */
+	elem->common.u.psl8.aurp1  = cpu_to_be64(0); /* disable */
+
+	cxl_prefault(ctx, wed);
+
+	elem->common.u.psl8.sstp0  = cpu_to_be64(ctx->sstp0);
+	elem->common.u.psl8.sstp1  = cpu_to_be64(ctx->sstp1);
+
+	/*
+	 * Ensure we have at least one interrupt allocated to take faults for
+	 * kernel contexts that may not have allocated any AFU IRQs at all:
+	 */
+	if (ctx->irqs.range[0] == 0) {
+		rc = afu_register_irqs(ctx, 0);
+		if (rc)
+			goto out_free;
+	}
+
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		for (i = 0; i < ctx->irqs.range[r]; i++) {
+			if (r == 0 && i == 0) {
+				elem->pslVirtualIsn = cpu_to_be32(ctx->irqs.offset[0]);
+			} else {
+				idx = ctx->irqs.offset[r] + i - adapter->guest->irq_base_offset;
+				elem->applicationVirtualIsnBitmap[idx / 8] |= 0x80 >> (idx % 8);
+			}
+		}
+	}
+	elem->common.amr = cpu_to_be64(amr);
+	elem->common.wed = cpu_to_be64(wed);
+
+	disable_afu_irqs(ctx);
+
+	rc = cxl_h_attach_process(ctx->afu->guest->handle, elem,
+				&ctx->process_token, &mmio_addr, &mmio_size);
+	if (rc == H_SUCCESS) {
+		if (ctx->master || !ctx->afu->pp_psa) {
+			ctx->psn_phys = ctx->afu->psn_phys;
+			ctx->psn_size = ctx->afu->adapter->ps_size;
+		} else {
+			ctx->psn_phys = mmio_addr;
+			ctx->psn_size = mmio_size;
+		}
+		if (ctx->afu->pp_psa && mmio_size &&
+			ctx->afu->pp_size == 0) {
+			/*
+			 * There's no property in the device tree to read the
+			 * pp_size. We only find out at the 1st attach.
+			 * Compared to bare-metal, it is too late and we
+			 * should really lock here. However, on powerVM,
+			 * pp_size is really only used to display in /sys.
+			 * Being discussed with pHyp for their next release.
+			 */
+			ctx->afu->pp_size = mmio_size;
+		}
+		/* from PAPR: process element is bytes 4-7 of process token */
+		ctx->external_pe = ctx->process_token & 0xFFFFFFFF;
+		pr_devel("CXL pe=%i is known as %i for pHyp, mmio_size=%#llx",
+			ctx->pe, ctx->external_pe, ctx->psn_size);
+		ctx->pe_inserted = true;
+		enable_afu_irqs(ctx);
+	}
+
+out_free:
+	free_page((u64)elem);
+	return rc;
+}
+
+static int guest_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
+{
+	pr_devel("in %s\n", __func__);
+
+	ctx->kernel = kernel;
+	if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
+		return attach_afu_directed(ctx, wed, amr);
+
+	/* dedicated mode not supported on FW840 */
+
+	return -EINVAL;
+}
+
+static int detach_afu_directed(struct cxl_context *ctx)
+{
+	if (!ctx->pe_inserted)
+		return 0;
+	if (cxl_h_detach_process(ctx->afu->guest->handle, ctx->process_token))
+		return -1;
+	return 0;
+}
+
+static int guest_detach_process(struct cxl_context *ctx)
+{
+	pr_devel("in %s\n", __func__);
+	trace_cxl_detach(ctx);
+
+	if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		return -EIO;
+
+	if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
+		return detach_afu_directed(ctx);
+
+	return -EINVAL;
+}
+
+static void guest_release_afu(struct device *dev)
+{
+	struct cxl_afu *afu = to_cxl_afu(dev);
+
+	pr_devel("%s\n", __func__);
+
+	idr_destroy(&afu->contexts_idr);
+
+	kfree(afu->guest);
+	kfree(afu);
+}
+
+ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len)
+{
+	return guest_collect_vpd(NULL, afu, buf, len);
+}
+
+#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE
+static ssize_t guest_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+					loff_t off, size_t count)
+{
+	void *tbuf = NULL;
+	int rc = 0;
+
+	tbuf = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!tbuf)
+		return -ENOMEM;
+
+	rc = cxl_h_get_afu_err(afu->guest->handle,
+			       off & 0x7,
+			       virt_to_phys(tbuf),
+			       count);
+	if (rc)
+		goto err;
+
+	if (count > ERR_BUFF_MAX_COPY_SIZE)
+		count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7);
+	memcpy(buf, tbuf, count);
+err:
+	free_page((u64)tbuf);
+
+	return rc;
+}
+
+static int guest_afu_check_and_enable(struct cxl_afu *afu)
+{
+	return 0;
+}
+
+static bool guest_support_attributes(const char *attr_name,
+				     enum cxl_attrs type)
+{
+	switch (type) {
+	case CXL_ADAPTER_ATTRS:
+		if ((strcmp(attr_name, "base_image") == 0) ||
+			(strcmp(attr_name, "load_image_on_perst") == 0) ||
+			(strcmp(attr_name, "perst_reloads_same_image") == 0) ||
+			(strcmp(attr_name, "image_loaded") == 0))
+			return false;
+		break;
+	case CXL_AFU_MASTER_ATTRS:
+		if ((strcmp(attr_name, "pp_mmio_off") == 0))
+			return false;
+		break;
+	case CXL_AFU_ATTRS:
+		break;
+	default:
+		break;
+	}
+
+	return true;
+}
+
+static int activate_afu_directed(struct cxl_afu *afu)
+{
+	int rc;
+
+	dev_info(&afu->dev, "Activating AFU(%d) directed mode\n", afu->slice);
+
+	afu->current_mode = CXL_MODE_DIRECTED;
+
+	afu->num_procs = afu->max_procs_virtualised;
+
+	if ((rc = cxl_chardev_m_afu_add(afu)))
+		return rc;
+
+	if ((rc = cxl_sysfs_afu_m_add(afu)))
+		goto err;
+
+	if ((rc = cxl_chardev_s_afu_add(afu)))
+		goto err1;
+
+	return 0;
+err1:
+	cxl_sysfs_afu_m_remove(afu);
+err:
+	cxl_chardev_afu_remove(afu);
+	return rc;
+}
+
+static int guest_afu_activate_mode(struct cxl_afu *afu, int mode)
+{
+	if (!mode)
+		return 0;
+	if (!(mode & afu->modes_supported))
+		return -EINVAL;
+
+	if (mode == CXL_MODE_DIRECTED)
+		return activate_afu_directed(afu);
+
+	if (mode == CXL_MODE_DEDICATED)
+		dev_err(&afu->dev, "Dedicated mode not supported\n");
+
+	return -EINVAL;
+}
+
+static int deactivate_afu_directed(struct cxl_afu *afu)
+{
+	dev_info(&afu->dev, "Deactivating AFU(%d) directed mode\n", afu->slice);
+
+	afu->current_mode = 0;
+	afu->num_procs = 0;
+
+	cxl_sysfs_afu_m_remove(afu);
+	cxl_chardev_afu_remove(afu);
+
+	cxl_ops->afu_reset(afu);
+
+	return 0;
+}
+
+static int guest_afu_deactivate_mode(struct cxl_afu *afu, int mode)
+{
+	if (!mode)
+		return 0;
+	if (!(mode & afu->modes_supported))
+		return -EINVAL;
+
+	if (mode == CXL_MODE_DIRECTED)
+		return deactivate_afu_directed(afu);
+	return 0;
+}
+
+static int guest_afu_reset(struct cxl_afu *afu)
+{
+	pr_devel("AFU(%d) reset request\n", afu->slice);
+	return cxl_h_reset_afu(afu->guest->handle);
+}
+
+static int guest_map_slice_regs(struct cxl_afu *afu)
+{
+	if (!(afu->p2n_mmio = ioremap(afu->guest->p2n_phys, afu->guest->p2n_size))) {
+		dev_err(&afu->dev, "Error mapping AFU(%d) MMIO regions\n",
+			afu->slice);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void guest_unmap_slice_regs(struct cxl_afu *afu)
+{
+	if (afu->p2n_mmio)
+		iounmap(afu->p2n_mmio);
+}
+
+static int afu_update_state(struct cxl_afu *afu)
+{
+	int rc, cur_state;
+
+	rc = afu_read_error_state(afu, &cur_state);
+	if (rc)
+		return rc;
+
+	if (afu->guest->previous_state == cur_state)
+		return 0;
+
+	pr_devel("AFU(%d) update state to %#x\n", afu->slice, cur_state);
+
+	switch (cur_state) {
+	case H_STATE_NORMAL:
+		afu->guest->previous_state = cur_state;
+		break;
+
+	case H_STATE_DISABLE:
+		pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
+				pci_channel_io_frozen);
+
+		cxl_context_detach_all(afu);
+		if ((rc = cxl_ops->afu_reset(afu)))
+			pr_devel("reset hcall failed %d\n", rc);
+
+		rc = afu_read_error_state(afu, &cur_state);
+		if (!rc && cur_state == H_STATE_NORMAL) {
+			pci_error_handlers(afu, CXL_SLOT_RESET_EVENT,
+					pci_channel_io_normal);
+			pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
+		}
+		afu->guest->previous_state = 0;
+		break;
+
+	case H_STATE_TEMP_UNAVAILABLE:
+		afu->guest->previous_state = cur_state;
+		break;
+
+	case H_STATE_PERM_UNAVAILABLE:
+		dev_err(&afu->dev, "AFU is in permanent error state\n");
+		pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
+				pci_channel_io_perm_failure);
+		afu->guest->previous_state = cur_state;
+		break;
+
+	default:
+		pr_err("Unexpected AFU(%d) error state: %#x\n",
+		       afu->slice, cur_state);
+		return -EINVAL;
+	}
+
+	return rc;
+}
+
+static void afu_handle_errstate(struct work_struct *work)
+{
+	struct cxl_afu_guest *afu_guest =
+		container_of(to_delayed_work(work), struct cxl_afu_guest, work_err);
+
+	if (!afu_update_state(afu_guest->parent) &&
+	    afu_guest->previous_state == H_STATE_PERM_UNAVAILABLE)
+		return;
+
+	if (afu_guest->handle_err)
+		schedule_delayed_work(&afu_guest->work_err,
+				      msecs_to_jiffies(3000));
+}
+
+static bool guest_link_ok(struct cxl *cxl, struct cxl_afu *afu)
+{
+	int state;
+
+	if (afu && (!afu_read_error_state(afu, &state))) {
+		if (state == H_STATE_NORMAL)
+			return true;
+	}
+
+	return false;
+}
+
+static int afu_properties_look_ok(struct cxl_afu *afu)
+{
+	if (afu->pp_irqs < 0) {
+		dev_err(&afu->dev, "Unexpected per-process minimum interrupt value\n");
+		return -EINVAL;
+	}
+
+	if (afu->max_procs_virtualised < 1) {
+		dev_err(&afu->dev, "Unexpected max number of processes virtualised value\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np)
+{
+	struct cxl_afu *afu;
+	bool free = true;
+	int rc;
+
+	pr_devel("in %s - AFU(%d)\n", __func__, slice);
+	if (!(afu = cxl_alloc_afu(adapter, slice)))
+		return -ENOMEM;
+
+	if (!(afu->guest = kzalloc(sizeof(struct cxl_afu_guest), GFP_KERNEL))) {
+		kfree(afu);
+		return -ENOMEM;
+	}
+
+	if ((rc = dev_set_name(&afu->dev, "afu%i.%i",
+					  adapter->adapter_num,
+					  slice)))
+		goto err1;
+
+	adapter->slices++;
+
+	if ((rc = cxl_of_read_afu_handle(afu, afu_np)))
+		goto err1;
+
+	if ((rc = cxl_ops->afu_reset(afu)))
+		goto err1;
+
+	if ((rc = cxl_of_read_afu_properties(afu, afu_np)))
+		goto err1;
+
+	if ((rc = afu_properties_look_ok(afu)))
+		goto err1;
+
+	if ((rc = guest_map_slice_regs(afu)))
+		goto err1;
+
+	if ((rc = guest_register_serr_irq(afu)))
+		goto err2;
+
+	/*
+	 * After we call this function we must not free the afu directly, even
+	 * if it returns an error!
+	 */
+	if ((rc = cxl_register_afu(afu)))
+		goto err_put_dev;
+
+	if ((rc = cxl_sysfs_afu_add(afu)))
+		goto err_del_dev;
+
+	/*
+	 * pHyp doesn't expose the programming models supported by the
+	 * AFU. pHyp currently only supports directed mode. If it adds
+	 * dedicated mode later, this version of cxl has no way to
+	 * detect it. So we'll initialize the driver, but the first
+	 * attach will fail.
+	 * Being discussed with pHyp to do better (likely new property)
+	 */
+	if (afu->max_procs_virtualised == 1)
+		afu->modes_supported = CXL_MODE_DEDICATED;
+	else
+		afu->modes_supported = CXL_MODE_DIRECTED;
+
+	if ((rc = cxl_afu_select_best_mode(afu)))
+		goto err_remove_sysfs;
+
+	adapter->afu[afu->slice] = afu;
+
+	afu->enabled = true;
+
+	/*
+	 * wake up the cpu periodically to check the state
+	 * of the AFU using "afu" stored in the guest structure.
+	 */
+	afu->guest->parent = afu;
+	afu->guest->handle_err = true;
+	INIT_DELAYED_WORK(&afu->guest->work_err, afu_handle_errstate);
+	schedule_delayed_work(&afu->guest->work_err, msecs_to_jiffies(1000));
+
+	if ((rc = cxl_pci_vphb_add(afu)))
+		dev_info(&afu->dev, "Can't register vPHB\n");
+
+	return 0;
+
+err_remove_sysfs:
+	cxl_sysfs_afu_remove(afu);
+err_del_dev:
+	device_del(&afu->dev);
+err_put_dev:
+	put_device(&afu->dev);
+	free = false;
+	guest_release_serr_irq(afu);
+err2:
+	guest_unmap_slice_regs(afu);
+err1:
+	if (free) {
+		kfree(afu->guest);
+		kfree(afu);
+	}
+	return rc;
+}
+
+void cxl_guest_remove_afu(struct cxl_afu *afu)
+{
+	if (!afu)
+		return;
+
+	/* flush and stop pending job */
+	afu->guest->handle_err = false;
+	flush_delayed_work(&afu->guest->work_err);
+
+	cxl_pci_vphb_remove(afu);
+	cxl_sysfs_afu_remove(afu);
+
+	spin_lock(&afu->adapter->afu_list_lock);
+	afu->adapter->afu[afu->slice] = NULL;
+	spin_unlock(&afu->adapter->afu_list_lock);
+
+	cxl_context_detach_all(afu);
+	cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
+	guest_release_serr_irq(afu);
+	guest_unmap_slice_regs(afu);
+
+	device_unregister(&afu->dev);
+}
+
+static void free_adapter(struct cxl *adapter)
+{
+	struct irq_avail *cur;
+	int i;
+
+	if (adapter->guest) {
+		if (adapter->guest->irq_avail) {
+			for (i = 0; i < adapter->guest->irq_nranges; i++) {
+				cur = &adapter->guest->irq_avail[i];
+				bitmap_free(cur->bitmap);
+			}
+			kfree(adapter->guest->irq_avail);
+		}
+		kfree(adapter->guest->status);
+		kfree(adapter->guest);
+	}
+	cxl_remove_adapter_nr(adapter);
+	kfree(adapter);
+}
+
+static int properties_look_ok(struct cxl *adapter)
+{
+	/* The absence of this property means that the operational
+	 * status is unknown or okay
+	 */
+	if (strlen(adapter->guest->status) &&
+	    strcmp(adapter->guest->status, "okay")) {
+		pr_err("ABORTING:Bad operational status of the device\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len)
+{
+	return guest_collect_vpd(adapter, NULL, buf, len);
+}
+
+void cxl_guest_remove_adapter(struct cxl *adapter)
+{
+	pr_devel("in %s\n", __func__);
+
+	cxl_sysfs_adapter_remove(adapter);
+
+	cxl_guest_remove_chardev(adapter);
+	device_unregister(&adapter->dev);
+}
+
+static void release_adapter(struct device *dev)
+{
+	free_adapter(to_cxl_adapter(dev));
+}
+
+struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *pdev)
+{
+	struct cxl *adapter;
+	bool free = true;
+	int rc;
+
+	if (!(adapter = cxl_alloc_adapter()))
+		return ERR_PTR(-ENOMEM);
+
+	if (!(adapter->guest = kzalloc(sizeof(struct cxl_guest), GFP_KERNEL))) {
+		free_adapter(adapter);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	adapter->slices = 0;
+	adapter->guest->pdev = pdev;
+	adapter->dev.parent = &pdev->dev;
+	adapter->dev.release = release_adapter;
+	dev_set_drvdata(&pdev->dev, adapter);
+
+	/*
+	 * Hypervisor controls PSL timebase initialization (p1 register).
+	 * On FW840, PSL is initialized.
+	 */
+	adapter->psl_timebase_synced = true;
+
+	if ((rc = cxl_of_read_adapter_handle(adapter, np)))
+		goto err1;
+
+	if ((rc = cxl_of_read_adapter_properties(adapter, np)))
+		goto err1;
+
+	if ((rc = properties_look_ok(adapter)))
+		goto err1;
+
+	if ((rc = cxl_guest_add_chardev(adapter)))
+		goto err1;
+
+	/*
+	 * After we call this function we must not free the adapter directly,
+	 * even if it returns an error!
+	 */
+	if ((rc = cxl_register_adapter(adapter)))
+		goto err_put_dev;
+
+	if ((rc = cxl_sysfs_adapter_add(adapter)))
+		goto err_del_dev;
+
+	/* release the context lock as the adapter is configured */
+	cxl_adapter_context_unlock(adapter);
+
+	return adapter;
+
+err_del_dev:
+	device_del(&adapter->dev);
+err_put_dev:
+	put_device(&adapter->dev);
+	free = false;
+	cxl_guest_remove_chardev(adapter);
+err1:
+	if (free)
+		free_adapter(adapter);
+	return ERR_PTR(rc);
+}
+
+void cxl_guest_reload_module(struct cxl *adapter)
+{
+	struct platform_device *pdev;
+
+	pdev = adapter->guest->pdev;
+	cxl_guest_remove_adapter(adapter);
+
+	cxl_of_probe(pdev);
+}
+
+const struct cxl_backend_ops cxl_guest_ops = {
+	.module = THIS_MODULE,
+	.adapter_reset = guest_reset,
+	.alloc_one_irq = guest_alloc_one_irq,
+	.release_one_irq = guest_release_one_irq,
+	.alloc_irq_ranges = guest_alloc_irq_ranges,
+	.release_irq_ranges = guest_release_irq_ranges,
+	.setup_irq = NULL,
+	.handle_psl_slice_error = guest_handle_psl_slice_error,
+	.psl_interrupt = guest_psl_irq,
+	.ack_irq = guest_ack_irq,
+	.attach_process = guest_attach_process,
+	.detach_process = guest_detach_process,
+	.update_ivtes = NULL,
+	.support_attributes = guest_support_attributes,
+	.link_ok = guest_link_ok,
+	.release_afu = guest_release_afu,
+	.afu_read_err_buffer = guest_afu_read_err_buffer,
+	.afu_check_and_enable = guest_afu_check_and_enable,
+	.afu_activate_mode = guest_afu_activate_mode,
+	.afu_deactivate_mode = guest_afu_deactivate_mode,
+	.afu_reset = guest_afu_reset,
+	.afu_cr_read8 = guest_afu_cr_read8,
+	.afu_cr_read16 = guest_afu_cr_read16,
+	.afu_cr_read32 = guest_afu_cr_read32,
+	.afu_cr_read64 = guest_afu_cr_read64,
+	.afu_cr_write8 = guest_afu_cr_write8,
+	.afu_cr_write16 = guest_afu_cr_write16,
+	.afu_cr_write32 = guest_afu_cr_write32,
+	.read_adapter_vpd = cxl_guest_read_adapter_vpd,
+};
diff --git a/drivers/misc/cxl/hcalls.c b/drivers/misc/cxl/hcalls.c
new file mode 100644
index 0000000000..aba5e20eeb
--- /dev/null
+++ b/drivers/misc/cxl/hcalls.c
@@ -0,0 +1,643 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2015 IBM Corp.
+ */
+
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <asm/byteorder.h>
+#include "hcalls.h"
+#include "trace.h"
+
+#define CXL_HCALL_TIMEOUT 60000
+#define CXL_HCALL_TIMEOUT_DOWNLOAD 120000
+
+#define H_ATTACH_CA_PROCESS    0x344
+#define H_CONTROL_CA_FUNCTION  0x348
+#define H_DETACH_CA_PROCESS    0x34C
+#define H_COLLECT_CA_INT_INFO  0x350
+#define H_CONTROL_CA_FAULTS    0x354
+#define H_DOWNLOAD_CA_FUNCTION 0x35C
+#define H_DOWNLOAD_CA_FACILITY 0x364
+#define H_CONTROL_CA_FACILITY  0x368
+
+#define H_CONTROL_CA_FUNCTION_RESET                   1 /* perform a reset */
+#define H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS         2 /* suspend a process from being executed */
+#define H_CONTROL_CA_FUNCTION_RESUME_PROCESS          3 /* resume a process to be executed */
+#define H_CONTROL_CA_FUNCTION_READ_ERR_STATE          4 /* read the error state */
+#define H_CONTROL_CA_FUNCTION_GET_AFU_ERR             5 /* collect the AFU error buffer */
+#define H_CONTROL_CA_FUNCTION_GET_CONFIG              6 /* collect configuration record */
+#define H_CONTROL_CA_FUNCTION_GET_DOWNLOAD_STATE      7 /* query to return download status */
+#define H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS       8 /* terminate the process before completion */
+#define H_CONTROL_CA_FUNCTION_COLLECT_VPD             9 /* collect VPD */
+#define H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT   11 /* read the function-wide error data based on an interrupt */
+#define H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT   12 /* acknowledge function-wide error data based on an interrupt */
+#define H_CONTROL_CA_FUNCTION_GET_ERROR_LOG          13 /* retrieve the Platform Log ID (PLID) of an error log */
+
+#define H_CONTROL_CA_FAULTS_RESPOND_PSL         1
+#define H_CONTROL_CA_FAULTS_RESPOND_AFU         2
+
+#define H_CONTROL_CA_FACILITY_RESET             1 /* perform a reset */
+#define H_CONTROL_CA_FACILITY_COLLECT_VPD       2 /* collect VPD */
+
+#define H_DOWNLOAD_CA_FACILITY_DOWNLOAD         1 /* download adapter image */
+#define H_DOWNLOAD_CA_FACILITY_VALIDATE         2 /* validate adapter image */
+
+
+#define _CXL_LOOP_HCALL(call, rc, retbuf, fn, ...)			\
+	{								\
+		unsigned int delay, total_delay = 0;			\
+		u64 token = 0;						\
+									\
+		memset(retbuf, 0, sizeof(retbuf));			\
+		while (1) {						\
+			rc = call(fn, retbuf, __VA_ARGS__, token);	\
+			token = retbuf[0];				\
+			if (rc != H_BUSY && !H_IS_LONG_BUSY(rc))	\
+				break;					\
+									\
+			if (rc == H_BUSY)				\
+				delay = 10;				\
+			else						\
+				delay = get_longbusy_msecs(rc);		\
+									\
+			total_delay += delay;				\
+			if (total_delay > CXL_HCALL_TIMEOUT) {		\
+				WARN(1, "Warning: Giving up waiting for CXL hcall " \
+					"%#x after %u msec\n", fn, total_delay); \
+				rc = H_BUSY;				\
+				break;					\
+			}						\
+			msleep(delay);					\
+		}							\
+	}
+#define CXL_H_WAIT_UNTIL_DONE(...)  _CXL_LOOP_HCALL(plpar_hcall, __VA_ARGS__)
+#define CXL_H9_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall9, __VA_ARGS__)
+
+#define _PRINT_MSG(rc, format, ...)					\
+	{								\
+		if ((rc != H_SUCCESS) && (rc != H_CONTINUE))		\
+			pr_err(format, __VA_ARGS__);			\
+		else							\
+			pr_devel(format, __VA_ARGS__);			\
+	}								\
+
+
+static char *afu_op_names[] = {
+	"UNKNOWN_OP",		/* 0 undefined */
+	"RESET",		/* 1 */
+	"SUSPEND_PROCESS",	/* 2 */
+	"RESUME_PROCESS",	/* 3 */
+	"READ_ERR_STATE",	/* 4 */
+	"GET_AFU_ERR",		/* 5 */
+	"GET_CONFIG",		/* 6 */
+	"GET_DOWNLOAD_STATE",	/* 7 */
+	"TERMINATE_PROCESS",	/* 8 */
+	"COLLECT_VPD",		/* 9 */
+	"UNKNOWN_OP",		/* 10 undefined */
+	"GET_FUNCTION_ERR_INT",	/* 11 */
+	"ACK_FUNCTION_ERR_INT",	/* 12 */
+	"GET_ERROR_LOG",	/* 13 */
+};
+
+static char *control_adapter_op_names[] = {
+	"UNKNOWN_OP",		/* 0 undefined */
+	"RESET",		/* 1 */
+	"COLLECT_VPD",		/* 2 */
+};
+
+static char *download_op_names[] = {
+	"UNKNOWN_OP",		/* 0 undefined */
+	"DOWNLOAD",		/* 1 */
+	"VALIDATE",		/* 2 */
+};
+
+static char *op_str(unsigned int op, char *name_array[], int array_len)
+{
+	if (op >= array_len)
+		return "UNKNOWN_OP";
+	return name_array[op];
+}
+
+#define OP_STR(op, name_array)      op_str(op, name_array, ARRAY_SIZE(name_array))
+
+#define OP_STR_AFU(op)              OP_STR(op, afu_op_names)
+#define OP_STR_CONTROL_ADAPTER(op)  OP_STR(op, control_adapter_op_names)
+#define OP_STR_DOWNLOAD_ADAPTER(op) OP_STR(op, download_op_names)
+
+
+long cxl_h_attach_process(u64 unit_address,
+			struct cxl_process_element_hcall *element,
+			u64 *process_token, u64 *mmio_addr, u64 *mmio_size)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_ATTACH_CA_PROCESS, unit_address, virt_to_phys(element));
+	_PRINT_MSG(rc, "cxl_h_attach_process(%#.16llx, %#.16lx): %li\n",
+		unit_address, virt_to_phys(element), rc);
+	trace_cxl_hcall_attach(unit_address, virt_to_phys(element), retbuf[0], retbuf[1], retbuf[2], rc);
+
+	pr_devel("token: 0x%.8lx mmio_addr: 0x%lx mmio_size: 0x%lx\nProcess Element Structure:\n",
+		retbuf[0], retbuf[1], retbuf[2]);
+	cxl_dump_debug_buffer(element, sizeof(*element));
+
+	switch (rc) {
+	case H_SUCCESS:       /* The process info is attached to the coherent platform function */
+		*process_token = retbuf[0];
+		if (mmio_addr)
+			*mmio_addr = retbuf[1];
+		if (mmio_size)
+			*mmio_size = retbuf[2];
+		return 0;
+	case H_PARAMETER:     /* An incorrect parameter was supplied. */
+	case H_FUNCTION:      /* The function is not supported. */
+		return -EINVAL;
+	case H_AUTHORITY:     /* The partition does not have authority to perform this hcall */
+	case H_RESOURCE:      /* The coherent platform function does not have enough additional resource to attach the process */
+	case H_HARDWARE:      /* A hardware event prevented the attach operation */
+	case H_STATE:         /* The coherent platform function is not in a valid state */
+	case H_BUSY:
+		return -EBUSY;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/*
+ * cxl_h_detach_process - Detach a process element from a coherent
+ *                        platform function.
+ */
+long cxl_h_detach_process(u64 unit_address, u64 process_token)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_DETACH_CA_PROCESS, unit_address, process_token);
+	_PRINT_MSG(rc, "cxl_h_detach_process(%#.16llx, 0x%.8llx): %li\n", unit_address, process_token, rc);
+	trace_cxl_hcall_detach(unit_address, process_token, rc);
+
+	switch (rc) {
+	case H_SUCCESS:       /* The process was detached from the coherent platform function */
+		return 0;
+	case H_PARAMETER:     /* An incorrect parameter was supplied. */
+		return -EINVAL;
+	case H_AUTHORITY:     /* The partition does not have authority to perform this hcall */
+	case H_RESOURCE:      /* The function has page table mappings for MMIO */
+	case H_HARDWARE:      /* A hardware event prevented the detach operation */
+	case H_STATE:         /* The coherent platform function is not in a valid state */
+	case H_BUSY:
+		return -EBUSY;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/*
+ * cxl_h_control_function - This H_CONTROL_CA_FUNCTION hypervisor call allows
+ *                          the partition to manipulate or query
+ *                          certain coherent platform function behaviors.
+ */
+static long cxl_h_control_function(u64 unit_address, u64 op,
+				   u64 p1, u64 p2, u64 p3, u64 p4, u64 *out)
+{
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+	long rc;
+
+	CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FUNCTION, unit_address, op, p1, p2, p3, p4);
+	_PRINT_MSG(rc, "cxl_h_control_function(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n",
+		unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc);
+	trace_cxl_hcall_control_function(unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc);
+
+	switch (rc) {
+	case H_SUCCESS:       /* The operation is completed for the coherent platform function */
+		if ((op == H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT ||
+		     op == H_CONTROL_CA_FUNCTION_READ_ERR_STATE ||
+		     op == H_CONTROL_CA_FUNCTION_COLLECT_VPD))
+			*out = retbuf[0];
+		return 0;
+	case H_PARAMETER:     /* An incorrect parameter was supplied. */
+	case H_FUNCTION:      /* The function is not supported. */
+	case H_NOT_FOUND:     /* The operation supplied was not valid */
+	case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */
+	case H_SG_LIST:       /* An block list entry was invalid */
+		return -EINVAL;
+	case H_AUTHORITY:     /* The partition does not have authority to perform this hcall */
+	case H_RESOURCE:      /* The function has page table mappings for MMIO */
+	case H_HARDWARE:      /* A hardware event prevented the attach operation */
+	case H_STATE:         /* The coherent platform function is not in a valid state */
+	case H_BUSY:
+		return -EBUSY;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/*
+ * cxl_h_reset_afu - Perform a reset to the coherent platform function.
+ */
+long cxl_h_reset_afu(u64 unit_address)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_RESET,
+				0, 0, 0, 0,
+				NULL);
+}
+
+/*
+ * cxl_h_suspend_process - Suspend a process from being executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_suspend_process(u64 unit_address, u64 process_token)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS,
+				process_token, 0, 0, 0,
+				NULL);
+}
+
+/*
+ * cxl_h_resume_process - Resume a process to be executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_resume_process(u64 unit_address, u64 process_token)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_RESUME_PROCESS,
+				process_token, 0, 0, 0,
+				NULL);
+}
+
+/*
+ * cxl_h_read_error_state - Checks the error state of the coherent
+ *                          platform function.
+ * R4 contains the error state
+ */
+long cxl_h_read_error_state(u64 unit_address, u64 *state)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_READ_ERR_STATE,
+				0, 0, 0, 0,
+				state);
+}
+
+/*
+ * cxl_h_get_afu_err - collect the AFU error buffer
+ * Parameter1 = byte offset into error buffer to retrieve, valid values
+ *              are between 0 and (ibm,error-buffer-size - 1)
+ * Parameter2 = 4K aligned real address of error buffer, to be filled in
+ * Parameter3 = length of error buffer, valid values are 4K or less
+ */
+long cxl_h_get_afu_err(u64 unit_address, u64 offset,
+		u64 buf_address, u64 len)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_GET_AFU_ERR,
+				offset, buf_address, len, 0,
+				NULL);
+}
+
+/*
+ * cxl_h_get_config - collect configuration record for the
+ *                    coherent platform function
+ * Parameter1 = # of configuration record to retrieve, valid values are
+ *              between 0 and (ibm,#config-records - 1)
+ * Parameter2 = byte offset into configuration record to retrieve,
+ *              valid values are between 0 and (ibm,config-record-size - 1)
+ * Parameter3 = 4K aligned real address of configuration record buffer,
+ *              to be filled in
+ * Parameter4 = length of configuration buffer, valid values are 4K or less
+ */
+long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset,
+		u64 buf_address, u64 len)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_GET_CONFIG,
+				cr_num, offset, buf_address, len,
+				NULL);
+}
+
+/*
+ * cxl_h_terminate_process - Terminate the process before completion
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_terminate_process(u64 unit_address, u64 process_token)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS,
+				process_token, 0, 0, 0,
+				NULL);
+}
+
+/*
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = # of VPD record to retrieve, valid values are between 0
+ *              and (ibm,#config-records - 1).
+ * Parameter2 = 4K naturally aligned real buffer containing block
+ *              list entries
+ * Parameter3 = number of block list entries in the block list, valid
+ *              values are between 0 and 256
+ */
+long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address,
+		       u64 num, u64 *out)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_COLLECT_VPD,
+				record, list_address, num, 0,
+				out);
+}
+
+/*
+ * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt
+ */
+long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT,
+				0, 0, 0, 0, reg);
+}
+
+/*
+ * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data
+ *                                based on an interrupt
+ * Parameter1 = value to write to the function-wide error interrupt register
+ */
+long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT,
+				value, 0, 0, 0,
+				NULL);
+}
+
+/*
+ * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of
+ *                       an error log
+ */
+long cxl_h_get_error_log(u64 unit_address, u64 value)
+{
+	return cxl_h_control_function(unit_address,
+				H_CONTROL_CA_FUNCTION_GET_ERROR_LOG,
+				0, 0, 0, 0,
+				NULL);
+}
+
+/*
+ * cxl_h_collect_int_info - Collect interrupt info about a coherent
+ *                          platform function after an interrupt occurred.
+ */
+long cxl_h_collect_int_info(u64 unit_address, u64 process_token,
+			    struct cxl_irq_info *info)
+{
+	long rc;
+
+	BUG_ON(sizeof(*info) != sizeof(unsigned long[PLPAR_HCALL9_BUFSIZE]));
+
+	rc = plpar_hcall9(H_COLLECT_CA_INT_INFO, (unsigned long *) info,
+			unit_address, process_token);
+	_PRINT_MSG(rc, "cxl_h_collect_int_info(%#.16llx, 0x%llx): %li\n",
+		unit_address, process_token, rc);
+	trace_cxl_hcall_collect_int_info(unit_address, process_token, rc);
+
+	switch (rc) {
+	case H_SUCCESS:     /* The interrupt info is returned in return registers. */
+		pr_devel("dsisr:%#llx, dar:%#llx, dsr:%#llx, pid_tid:%#llx, afu_err:%#llx, errstat:%#llx\n",
+			info->dsisr, info->dar, info->dsr, info->reserved,
+			info->afu_err, info->errstat);
+		return 0;
+	case H_PARAMETER:   /* An incorrect parameter was supplied. */
+		return -EINVAL;
+	case H_AUTHORITY:   /* The partition does not have authority to perform this hcall. */
+	case H_HARDWARE:    /* A hardware event prevented the collection of the interrupt info.*/
+	case H_STATE:       /* The coherent platform function is not in a valid state to collect interrupt info. */
+		return -EBUSY;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/*
+ * cxl_h_control_faults - Control the operation of a coherent platform
+ *                        function after a fault occurs.
+ *
+ * Parameters
+ *    control-mask: value to control the faults
+ *                  looks like PSL_TFC_An shifted >> 32
+ *    reset-mask: mask to control reset of function faults
+ *                Set reset_mask = 1 to reset PSL errors
+ */
+long cxl_h_control_faults(u64 unit_address, u64 process_token,
+			  u64 control_mask, u64 reset_mask)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	memset(retbuf, 0, sizeof(retbuf));
+
+	rc = plpar_hcall(H_CONTROL_CA_FAULTS, retbuf, unit_address,
+			H_CONTROL_CA_FAULTS_RESPOND_PSL, process_token,
+			control_mask, reset_mask);
+	_PRINT_MSG(rc, "cxl_h_control_faults(%#.16llx, 0x%llx, %#llx, %#llx): %li (%#lx)\n",
+		unit_address, process_token, control_mask, reset_mask,
+		rc, retbuf[0]);
+	trace_cxl_hcall_control_faults(unit_address, process_token,
+				control_mask, reset_mask, retbuf[0], rc);
+
+	switch (rc) {
+	case H_SUCCESS:    /* Faults were successfully controlled for the function. */
+		return 0;
+	case H_PARAMETER:  /* An incorrect parameter was supplied. */
+		return -EINVAL;
+	case H_HARDWARE:   /* A hardware event prevented the control of faults. */
+	case H_STATE:      /* The function was in an invalid state. */
+	case H_AUTHORITY:  /* The partition does not have authority to perform this hcall; the coherent platform facilities may need to be licensed. */
+		return -EBUSY;
+	case H_FUNCTION:   /* The function is not supported */
+	case H_NOT_FOUND:  /* The operation supplied was not valid */
+		return -EINVAL;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/*
+ * cxl_h_control_facility - This H_CONTROL_CA_FACILITY hypervisor call
+ *                          allows the partition to manipulate or query
+ *                          certain coherent platform facility behaviors.
+ */
+static long cxl_h_control_facility(u64 unit_address, u64 op,
+				   u64 p1, u64 p2, u64 p3, u64 p4, u64 *out)
+{
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+	long rc;
+
+	CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FACILITY, unit_address, op, p1, p2, p3, p4);
+	_PRINT_MSG(rc, "cxl_h_control_facility(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n",
+		unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc);
+	trace_cxl_hcall_control_facility(unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc);
+
+	switch (rc) {
+	case H_SUCCESS:       /* The operation is completed for the coherent platform facility */
+		if (op == H_CONTROL_CA_FACILITY_COLLECT_VPD)
+			*out = retbuf[0];
+		return 0;
+	case H_PARAMETER:     /* An incorrect parameter was supplied. */
+	case H_FUNCTION:      /* The function is not supported. */
+	case H_NOT_FOUND:     /* The operation supplied was not valid */
+	case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */
+	case H_SG_LIST:       /* An block list entry was invalid */
+		return -EINVAL;
+	case H_AUTHORITY:     /* The partition does not have authority to perform this hcall */
+	case H_RESOURCE:      /* The function has page table mappings for MMIO */
+	case H_HARDWARE:      /* A hardware event prevented the attach operation */
+	case H_STATE:         /* The coherent platform facility is not in a valid state */
+	case H_BUSY:
+		return -EBUSY;
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/*
+ * cxl_h_reset_adapter - Perform a reset to the coherent platform facility.
+ */
+long cxl_h_reset_adapter(u64 unit_address)
+{
+	return cxl_h_control_facility(unit_address,
+				H_CONTROL_CA_FACILITY_RESET,
+				0, 0, 0, 0,
+				NULL);
+}
+
+/*
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = 4K naturally aligned real buffer containing block
+ *              list entries
+ * Parameter2 = number of block list entries in the block list, valid
+ *              values are between 0 and 256
+ */
+long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address,
+			       u64 num, u64 *out)
+{
+	return cxl_h_control_facility(unit_address,
+				H_CONTROL_CA_FACILITY_COLLECT_VPD,
+				list_address, num, 0, 0,
+				out);
+}
+
+/*
+ * cxl_h_download_facility - This H_DOWNLOAD_CA_FACILITY
+ *                    hypervisor call provide platform support for
+ *                    downloading a base adapter image to the coherent
+ *                    platform facility, and for validating the entire
+ *                    image after the download.
+ * Parameters
+ *    op: operation to perform to the coherent platform function
+ *      Download: operation = 1, the base image in the coherent platform
+ *                               facility is first erased, and then
+ *                               programmed using the image supplied
+ *                               in the scatter/gather list.
+ *      Validate: operation = 2, the base image in the coherent platform
+ *                               facility is compared with the image
+ *                               supplied in the scatter/gather list.
+ *    list_address: 4K naturally aligned real buffer containing
+ *                  scatter/gather list entries.
+ *    num: number of block list entries in the scatter/gather list.
+ */
+static long cxl_h_download_facility(u64 unit_address, u64 op,
+				    u64 list_address, u64 num,
+				    u64 *out)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	unsigned int delay, total_delay = 0;
+	u64 token = 0;
+	long rc;
+
+	if (*out != 0)
+		token = *out;
+
+	memset(retbuf, 0, sizeof(retbuf));
+	while (1) {
+		rc = plpar_hcall(H_DOWNLOAD_CA_FACILITY, retbuf,
+				 unit_address, op, list_address, num,
+				 token);
+		token = retbuf[0];
+		if (rc != H_BUSY && !H_IS_LONG_BUSY(rc))
+			break;
+
+		if (rc != H_BUSY) {
+			delay = get_longbusy_msecs(rc);
+			total_delay += delay;
+			if (total_delay > CXL_HCALL_TIMEOUT_DOWNLOAD) {
+				WARN(1, "Warning: Giving up waiting for CXL hcall "
+					"%#x after %u msec\n",
+					H_DOWNLOAD_CA_FACILITY, total_delay);
+				rc = H_BUSY;
+				break;
+			}
+			msleep(delay);
+		}
+	}
+	_PRINT_MSG(rc, "cxl_h_download_facility(%#.16llx, %s(%#llx, %#llx), %#lx): %li\n",
+		 unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc);
+	trace_cxl_hcall_download_facility(unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc);
+
+	switch (rc) {
+	case H_SUCCESS:       /* The operation is completed for the coherent platform facility */
+		return 0;
+	case H_PARAMETER:     /* An incorrect parameter was supplied */
+	case H_FUNCTION:      /* The function is not supported. */
+	case H_SG_LIST:       /* An block list entry was invalid */
+	case H_BAD_DATA:      /* Image verification failed */
+		return -EINVAL;
+	case H_AUTHORITY:     /* The partition does not have authority to perform this hcall */
+	case H_RESOURCE:      /* The function has page table mappings for MMIO */
+	case H_HARDWARE:      /* A hardware event prevented the attach operation */
+	case H_STATE:         /* The coherent platform facility is not in a valid state */
+	case H_BUSY:
+		return -EBUSY;
+	case H_CONTINUE:
+		*out = retbuf[0];
+		return 1;  /* More data is needed for the complete image */
+	default:
+		WARN(1, "Unexpected return code: %lx", rc);
+		return -EINVAL;
+	}
+}
+
+/*
+ * cxl_h_download_adapter_image - Download the base image to the coherent
+ *                                platform facility.
+ */
+long cxl_h_download_adapter_image(u64 unit_address,
+				  u64 list_address, u64 num,
+				  u64 *out)
+{
+	return cxl_h_download_facility(unit_address,
+				       H_DOWNLOAD_CA_FACILITY_DOWNLOAD,
+				       list_address, num, out);
+}
+
+/*
+ * cxl_h_validate_adapter_image - Validate the base image in the coherent
+ *                                platform facility.
+ */
+long cxl_h_validate_adapter_image(u64 unit_address,
+				  u64 list_address, u64 num,
+				  u64 *out)
+{
+	return cxl_h_download_facility(unit_address,
+				       H_DOWNLOAD_CA_FACILITY_VALIDATE,
+				       list_address, num, out);
+}
diff --git a/drivers/misc/cxl/hcalls.h b/drivers/misc/cxl/hcalls.h
new file mode 100644
index 0000000000..d200465dc6
--- /dev/null
+++ b/drivers/misc/cxl/hcalls.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2015 IBM Corp.
+ */
+
+#ifndef _HCALLS_H
+#define _HCALLS_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/hvcall.h>
+#include "cxl.h"
+
+#define SG_BUFFER_SIZE 4096
+#define SG_MAX_ENTRIES 256
+
+struct sg_list {
+	u64 phys_addr;
+	u64 len;
+};
+
+/*
+ * This is straight out of PAPR, but replacing some of the compound fields with
+ * a single field, where they were identical to the register layout.
+ *
+ * The 'flags' parameter regroups the various bit-fields
+ */
+#define CXL_PE_CSRP_VALID			(1ULL << 63)
+#define CXL_PE_PROBLEM_STATE			(1ULL << 62)
+#define CXL_PE_SECONDARY_SEGMENT_TBL_SRCH	(1ULL << 61)
+#define CXL_PE_TAGS_ACTIVE			(1ULL << 60)
+#define CXL_PE_USER_STATE			(1ULL << 59)
+#define CXL_PE_TRANSLATION_ENABLED		(1ULL << 58)
+#define CXL_PE_64_BIT				(1ULL << 57)
+#define CXL_PE_PRIVILEGED_PROCESS		(1ULL << 56)
+
+#define CXL_PROCESS_ELEMENT_VERSION 1
+struct cxl_process_element_hcall {
+	__be64 version;
+	__be64 flags;
+	u8     reserved0[12];
+	__be32 pslVirtualIsn;
+	u8     applicationVirtualIsnBitmap[256];
+	u8     reserved1[144];
+	struct cxl_process_element_common common;
+	u8     reserved4[12];
+} __packed;
+
+#define H_STATE_NORMAL              1
+#define H_STATE_DISABLE             2
+#define H_STATE_TEMP_UNAVAILABLE    3
+#define H_STATE_PERM_UNAVAILABLE    4
+
+/* NOTE: element must be a logical real address, and must be pinned */
+long cxl_h_attach_process(u64 unit_address, struct cxl_process_element_hcall *element,
+			u64 *process_token, u64 *mmio_addr, u64 *mmio_size);
+
+/**
+ * cxl_h_detach_process - Detach a process element from a coherent
+ *                        platform function.
+ */
+long cxl_h_detach_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_reset_afu - Perform a reset to the coherent platform function.
+ */
+long cxl_h_reset_afu(u64 unit_address);
+
+/**
+ * cxl_h_suspend_process - Suspend a process from being executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_suspend_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_resume_process - Resume a process to be executed
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_resume_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_read_error_state - Reads the error state of the coherent
+ *                          platform function.
+ * R4 contains the error state
+ */
+long cxl_h_read_error_state(u64 unit_address, u64 *state);
+
+/**
+ * cxl_h_get_afu_err - collect the AFU error buffer
+ * Parameter1 = byte offset into error buffer to retrieve, valid values
+ *              are between 0 and (ibm,error-buffer-size - 1)
+ * Parameter2 = 4K aligned real address of error buffer, to be filled in
+ * Parameter3 = length of error buffer, valid values are 4K or less
+ */
+long cxl_h_get_afu_err(u64 unit_address, u64 offset, u64 buf_address, u64 len);
+
+/**
+ * cxl_h_get_config - collect configuration record for the
+ *                    coherent platform function
+ * Parameter1 = # of configuration record to retrieve, valid values are
+ *              between 0 and (ibm,#config-records - 1)
+ * Parameter2 = byte offset into configuration record to retrieve,
+ *              valid values are between 0 and (ibm,config-record-size - 1)
+ * Parameter3 = 4K aligned real address of configuration record buffer,
+ *              to be filled in
+ * Parameter4 = length of configuration buffer, valid values are 4K or less
+ */
+long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset,
+		u64 buf_address, u64 len);
+
+/**
+ * cxl_h_terminate_process - Terminate the process before completion
+ * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when
+ *              process was attached.
+ */
+long cxl_h_terminate_process(u64 unit_address, u64 process_token);
+
+/**
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = # of VPD record to retrieve, valid values are between 0
+ *              and (ibm,#config-records - 1).
+ * Parameter2 = 4K naturally aligned real buffer containing block
+ *              list entries
+ * Parameter3 = number of block list entries in the block list, valid
+ *              values are between 0 and 256
+ */
+long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address,
+		       u64 num, u64 *out);
+
+/**
+ * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt
+ */
+long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg);
+
+/**
+ * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data
+ *                                based on an interrupt
+ * Parameter1 = value to write to the function-wide error interrupt register
+ */
+long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value);
+
+/**
+ * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of
+ *                       an error log
+ */
+long cxl_h_get_error_log(u64 unit_address, u64 value);
+
+/**
+ * cxl_h_collect_int_info - Collect interrupt info about a coherent
+ *                          platform function after an interrupt occurred.
+ */
+long cxl_h_collect_int_info(u64 unit_address, u64 process_token,
+			struct cxl_irq_info *info);
+
+/**
+ * cxl_h_control_faults - Control the operation of a coherent platform
+ *                        function after a fault occurs.
+ *
+ * Parameters
+ *    control-mask: value to control the faults
+ *                  looks like PSL_TFC_An shifted >> 32
+ *    reset-mask: mask to control reset of function faults
+ *                Set reset_mask = 1 to reset PSL errors
+ */
+long cxl_h_control_faults(u64 unit_address, u64 process_token,
+			u64 control_mask, u64 reset_mask);
+
+/**
+ * cxl_h_reset_adapter - Perform a reset to the coherent platform facility.
+ */
+long cxl_h_reset_adapter(u64 unit_address);
+
+/**
+ * cxl_h_collect_vpd - Collect VPD for the coherent platform function.
+ * Parameter1 = 4K naturally aligned real buffer containing block
+ *              list entries
+ * Parameter2 = number of block list entries in the block list, valid
+ *              values are between 0 and 256
+ */
+long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address,
+			       u64 num, u64 *out);
+
+/**
+ * cxl_h_download_adapter_image - Download the base image to the coherent
+ *                                platform facility.
+ */
+long cxl_h_download_adapter_image(u64 unit_address,
+				  u64 list_address, u64 num,
+				  u64 *out);
+
+/**
+ * cxl_h_validate_adapter_image - Validate the base image in the coherent
+ *                                platform facility.
+ */
+long cxl_h_validate_adapter_image(u64 unit_address,
+				  u64 list_address, u64 num,
+				  u64 *out);
+#endif /* _HCALLS_H */
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
new file mode 100644
index 0000000000..b730e022a4
--- /dev/null
+++ b/drivers/misc/cxl/irq.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/pid.h>
+#include <asm/cputable.h>
+#include <misc/cxl-base.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static int afu_irq_range_start(void)
+{
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		return 1;
+	return 0;
+}
+
+static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar)
+{
+	ctx->dsisr = dsisr;
+	ctx->dar = dar;
+	schedule_work(&ctx->fault_work);
+	return IRQ_HANDLED;
+}
+
+irqreturn_t cxl_irq_psl9(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info)
+{
+	u64 dsisr, dar;
+
+	dsisr = irq_info->dsisr;
+	dar = irq_info->dar;
+
+	trace_cxl_psl9_irq(ctx, irq, dsisr, dar);
+
+	pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar);
+
+	if (dsisr & CXL_PSL9_DSISR_An_TF) {
+		pr_devel("CXL interrupt: Scheduling translation fault handling for later (pe: %i)\n", ctx->pe);
+		return schedule_cxl_fault(ctx, dsisr, dar);
+	}
+
+	if (dsisr & CXL_PSL9_DSISR_An_PE)
+		return cxl_ops->handle_psl_slice_error(ctx, dsisr,
+						irq_info->errstat);
+	if (dsisr & CXL_PSL9_DSISR_An_AE) {
+		pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err);
+
+		if (ctx->pending_afu_err) {
+			/*
+			 * This shouldn't happen - the PSL treats these errors
+			 * as fatal and will have reset the AFU, so there's not
+			 * much point buffering multiple AFU errors.
+			 * OTOH if we DO ever see a storm of these come in it's
+			 * probably best that we log them somewhere:
+			 */
+			dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error undelivered to pe %i: 0x%016llx\n",
+					    ctx->pe, irq_info->afu_err);
+		} else {
+			spin_lock(&ctx->lock);
+			ctx->afu_err = irq_info->afu_err;
+			ctx->pending_afu_err = 1;
+			spin_unlock(&ctx->lock);
+
+			wake_up_all(&ctx->wq);
+		}
+
+		cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0);
+		return IRQ_HANDLED;
+	}
+	if (dsisr & CXL_PSL9_DSISR_An_OC)
+		pr_devel("CXL interrupt: OS Context Warning\n");
+
+	WARN(1, "Unhandled CXL PSL IRQ\n");
+	return IRQ_HANDLED;
+}
+
+irqreturn_t cxl_irq_psl8(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info)
+{
+	u64 dsisr, dar;
+
+	dsisr = irq_info->dsisr;
+	dar = irq_info->dar;
+
+	trace_cxl_psl_irq(ctx, irq, dsisr, dar);
+
+	pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar);
+
+	if (dsisr & CXL_PSL_DSISR_An_DS) {
+		/*
+		 * We don't inherently need to sleep to handle this, but we do
+		 * need to get a ref to the task's mm, which we can't do from
+		 * irq context without the potential for a deadlock since it
+		 * takes the task_lock. An alternate option would be to keep a
+		 * reference to the task's mm the entire time it has cxl open,
+		 * but to do that we need to solve the issue where we hold a
+		 * ref to the mm, but the mm can hold a ref to the fd after an
+		 * mmap preventing anything from being cleaned up.
+		 */
+		pr_devel("Scheduling segment miss handling for later pe: %i\n", ctx->pe);
+		return schedule_cxl_fault(ctx, dsisr, dar);
+	}
+
+	if (dsisr & CXL_PSL_DSISR_An_M)
+		pr_devel("CXL interrupt: PTE not found\n");
+	if (dsisr & CXL_PSL_DSISR_An_P)
+		pr_devel("CXL interrupt: Storage protection violation\n");
+	if (dsisr & CXL_PSL_DSISR_An_A)
+		pr_devel("CXL interrupt: AFU lock access to write through or cache inhibited storage\n");
+	if (dsisr & CXL_PSL_DSISR_An_S)
+		pr_devel("CXL interrupt: Access was afu_wr or afu_zero\n");
+	if (dsisr & CXL_PSL_DSISR_An_K)
+		pr_devel("CXL interrupt: Access not permitted by virtual page class key protection\n");
+
+	if (dsisr & CXL_PSL_DSISR_An_DM) {
+		/*
+		 * In some cases we might be able to handle the fault
+		 * immediately if hash_page would succeed, but we still need
+		 * the task's mm, which as above we can't get without a lock
+		 */
+		pr_devel("Scheduling page fault handling for later pe: %i\n", ctx->pe);
+		return schedule_cxl_fault(ctx, dsisr, dar);
+	}
+	if (dsisr & CXL_PSL_DSISR_An_ST)
+		WARN(1, "CXL interrupt: Segment Table PTE not found\n");
+	if (dsisr & CXL_PSL_DSISR_An_UR)
+		pr_devel("CXL interrupt: AURP PTE not found\n");
+	if (dsisr & CXL_PSL_DSISR_An_PE)
+		return cxl_ops->handle_psl_slice_error(ctx, dsisr,
+						irq_info->errstat);
+	if (dsisr & CXL_PSL_DSISR_An_AE) {
+		pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err);
+
+		if (ctx->pending_afu_err) {
+			/*
+			 * This shouldn't happen - the PSL treats these errors
+			 * as fatal and will have reset the AFU, so there's not
+			 * much point buffering multiple AFU errors.
+			 * OTOH if we DO ever see a storm of these come in it's
+			 * probably best that we log them somewhere:
+			 */
+			dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error "
+					    "undelivered to pe %i: 0x%016llx\n",
+					    ctx->pe, irq_info->afu_err);
+		} else {
+			spin_lock(&ctx->lock);
+			ctx->afu_err = irq_info->afu_err;
+			ctx->pending_afu_err = true;
+			spin_unlock(&ctx->lock);
+
+			wake_up_all(&ctx->wq);
+		}
+
+		cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0);
+		return IRQ_HANDLED;
+	}
+	if (dsisr & CXL_PSL_DSISR_An_OC)
+		pr_devel("CXL interrupt: OS Context Warning\n");
+
+	WARN(1, "Unhandled CXL PSL IRQ\n");
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cxl_irq_afu(int irq, void *data)
+{
+	struct cxl_context *ctx = data;
+	irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq));
+	int irq_off, afu_irq = 0;
+	__u16 range;
+	int r;
+
+	/*
+	 * Look for the interrupt number.
+	 * On bare-metal, we know range 0 only contains the PSL
+	 * interrupt so we could start counting at range 1 and initialize
+	 * afu_irq at 1.
+	 * In a guest, range 0 also contains AFU interrupts, so it must
+	 * be counted for. Therefore we initialize afu_irq at 0 to take into
+	 * account the PSL interrupt.
+	 *
+	 * For code-readability, it just seems easier to go over all
+	 * the ranges on bare-metal and guest. The end result is the same.
+	 */
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		irq_off = hwirq - ctx->irqs.offset[r];
+		range = ctx->irqs.range[r];
+		if (irq_off >= 0 && irq_off < range) {
+			afu_irq += irq_off;
+			break;
+		}
+		afu_irq += range;
+	}
+	if (unlikely(r >= CXL_IRQ_RANGES)) {
+		WARN(1, "Received AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n",
+		     ctx->pe, irq, hwirq);
+		return IRQ_HANDLED;
+	}
+
+	trace_cxl_afu_irq(ctx, afu_irq, irq, hwirq);
+	pr_devel("Received AFU interrupt %i for pe: %i (virq %i hwirq %lx)\n",
+	       afu_irq, ctx->pe, irq, hwirq);
+
+	if (unlikely(!ctx->irq_bitmap)) {
+		WARN(1, "Received AFU IRQ for context with no IRQ bitmap\n");
+		return IRQ_HANDLED;
+	}
+	spin_lock(&ctx->lock);
+	set_bit(afu_irq - 1, ctx->irq_bitmap);
+	ctx->pending_irq = true;
+	spin_unlock(&ctx->lock);
+
+	wake_up_all(&ctx->wq);
+
+	return IRQ_HANDLED;
+}
+
+unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
+			 irq_handler_t handler, void *cookie, const char *name)
+{
+	unsigned int virq;
+	int result;
+
+	/* IRQ Domain? */
+	virq = irq_create_mapping(NULL, hwirq);
+	if (!virq) {
+		dev_warn(&adapter->dev, "cxl_map_irq: irq_create_mapping failed\n");
+		return 0;
+	}
+
+	if (cxl_ops->setup_irq)
+		cxl_ops->setup_irq(adapter, hwirq, virq);
+
+	pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq);
+
+	result = request_irq(virq, handler, 0, name, cookie);
+	if (result) {
+		dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result);
+		return 0;
+	}
+
+	return virq;
+}
+
+void cxl_unmap_irq(unsigned int virq, void *cookie)
+{
+	free_irq(virq, cookie);
+}
+
+int cxl_register_one_irq(struct cxl *adapter,
+			irq_handler_t handler,
+			void *cookie,
+			irq_hw_number_t *dest_hwirq,
+			unsigned int *dest_virq,
+			const char *name)
+{
+	int hwirq, virq;
+
+	if ((hwirq = cxl_ops->alloc_one_irq(adapter)) < 0)
+		return hwirq;
+
+	if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name)))
+		goto err;
+
+	*dest_hwirq = hwirq;
+	*dest_virq = virq;
+
+	return 0;
+
+err:
+	cxl_ops->release_one_irq(adapter, hwirq);
+	return -ENOMEM;
+}
+
+void afu_irq_name_free(struct cxl_context *ctx)
+{
+	struct cxl_irq_name *irq_name, *tmp;
+
+	list_for_each_entry_safe(irq_name, tmp, &ctx->irq_names, list) {
+		kfree(irq_name->name);
+		list_del(&irq_name->list);
+		kfree(irq_name);
+	}
+}
+
+int afu_allocate_irqs(struct cxl_context *ctx, u32 count)
+{
+	int rc, r, i, j = 1;
+	struct cxl_irq_name *irq_name;
+	int alloc_count;
+
+	/*
+	 * In native mode, range 0 is reserved for the multiplexed
+	 * PSL interrupt. It has been allocated when the AFU was initialized.
+	 *
+	 * In a guest, the PSL interrupt is not mutliplexed, but per-context,
+	 * and is the first interrupt from range 0. It still needs to be
+	 * allocated, so bump the count by one.
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		alloc_count = count;
+	else
+		alloc_count = count + 1;
+
+	if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter,
+							alloc_count)))
+		return rc;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		/* Multiplexed PSL Interrupt */
+		ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq;
+		ctx->irqs.range[0] = 1;
+	}
+
+	ctx->irq_count = count;
+	ctx->irq_bitmap = bitmap_zalloc(count, GFP_KERNEL);
+	if (!ctx->irq_bitmap)
+		goto out;
+
+	/*
+	 * Allocate names first.  If any fail, bail out before allocating
+	 * actual hardware IRQs.
+	 */
+	for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) {
+		for (i = 0; i < ctx->irqs.range[r]; i++) {
+			irq_name = kmalloc(sizeof(struct cxl_irq_name),
+					   GFP_KERNEL);
+			if (!irq_name)
+				goto out;
+			irq_name->name = kasprintf(GFP_KERNEL, "cxl-%s-pe%i-%i",
+						   dev_name(&ctx->afu->dev),
+						   ctx->pe, j);
+			if (!irq_name->name) {
+				kfree(irq_name);
+				goto out;
+			}
+			/* Add to tail so next look get the correct order */
+			list_add_tail(&irq_name->list, &ctx->irq_names);
+			j++;
+		}
+	}
+	return 0;
+
+out:
+	cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+	bitmap_free(ctx->irq_bitmap);
+	afu_irq_name_free(ctx);
+	return -ENOMEM;
+}
+
+static void afu_register_hwirqs(struct cxl_context *ctx)
+{
+	irq_hw_number_t hwirq;
+	struct cxl_irq_name *irq_name;
+	int r, i;
+	irqreturn_t (*handler)(int irq, void *data);
+
+	/* We've allocated all memory now, so let's do the irq allocations */
+	irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list);
+	for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) {
+		hwirq = ctx->irqs.offset[r];
+		for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+			if (r == 0 && i == 0)
+				/*
+				 * The very first interrupt of range 0 is
+				 * always the PSL interrupt, but we only
+				 * need to connect a handler for guests,
+				 * because there's one PSL interrupt per
+				 * context.
+				 * On bare-metal, the PSL interrupt is
+				 * multiplexed and was setup when the AFU
+				 * was configured.
+				 */
+				handler = cxl_ops->psl_interrupt;
+			else
+				handler = cxl_irq_afu;
+			cxl_map_irq(ctx->afu->adapter, hwirq, handler, ctx,
+				irq_name->name);
+			irq_name = list_next_entry(irq_name, list);
+		}
+	}
+}
+
+int afu_register_irqs(struct cxl_context *ctx, u32 count)
+{
+	int rc;
+
+	rc = afu_allocate_irqs(ctx, count);
+	if (rc)
+		return rc;
+
+	afu_register_hwirqs(ctx);
+	return 0;
+}
+
+void afu_release_irqs(struct cxl_context *ctx, void *cookie)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+	int r, i;
+
+	for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) {
+		hwirq = ctx->irqs.offset[r];
+		for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
+			virq = irq_find_mapping(NULL, hwirq);
+			if (virq)
+				cxl_unmap_irq(virq, cookie);
+		}
+	}
+
+	afu_irq_name_free(ctx);
+	cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+
+	ctx->irq_count = 0;
+}
+
+void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr)
+{
+	dev_crit(&afu->dev,
+		 "PSL Slice error received. Check AFU for root cause.\n");
+	dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
+	if (serr & CXL_PSL_SERR_An_afuto)
+		dev_crit(&afu->dev, "AFU MMIO Timeout\n");
+	if (serr & CXL_PSL_SERR_An_afudis)
+		dev_crit(&afu->dev,
+			 "MMIO targeted Accelerator that was not enabled\n");
+	if (serr & CXL_PSL_SERR_An_afuov)
+		dev_crit(&afu->dev, "AFU CTAG Overflow\n");
+	if (serr & CXL_PSL_SERR_An_badsrc)
+		dev_crit(&afu->dev, "Bad Interrupt Source\n");
+	if (serr & CXL_PSL_SERR_An_badctx)
+		dev_crit(&afu->dev, "Bad Context Handle\n");
+	if (serr & CXL_PSL_SERR_An_llcmdis)
+		dev_crit(&afu->dev, "LLCMD to Disabled AFU\n");
+	if (serr & CXL_PSL_SERR_An_llcmdto)
+		dev_crit(&afu->dev, "LLCMD Timeout to AFU\n");
+	if (serr & CXL_PSL_SERR_An_afupar)
+		dev_crit(&afu->dev, "AFU MMIO Parity Error\n");
+	if (serr & CXL_PSL_SERR_An_afudup)
+		dev_crit(&afu->dev, "AFU MMIO Duplicate CTAG Error\n");
+	if (serr & CXL_PSL_SERR_An_AE)
+		dev_crit(&afu->dev,
+			 "AFU asserted JDONE with JERROR in AFU Directed Mode\n");
+}
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
new file mode 100644
index 0000000000..c1fbf6f588
--- /dev/null
+++ b/drivers/misc/cxl/main.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/sched/task.h>
+
+#include <asm/cputable.h>
+#include <asm/mmu.h>
+#include <misc/cxl-base.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static DEFINE_SPINLOCK(adapter_idr_lock);
+static DEFINE_IDR(cxl_adapter_idr);
+
+uint cxl_verbose;
+module_param_named(verbose, cxl_verbose, uint, 0600);
+MODULE_PARM_DESC(verbose, "Enable verbose dmesg output");
+
+const struct cxl_backend_ops *cxl_ops;
+
+int cxl_afu_slbia(struct cxl_afu *afu)
+{
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+	pr_devel("cxl_afu_slbia issuing SLBIA command\n");
+	cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL);
+	while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n");
+			return -EBUSY;
+		}
+		/* If the adapter has gone down, we can assume that we
+		 * will PERST it and that will invalidate everything.
+		 */
+		if (!cxl_ops->link_ok(afu->adapter, afu))
+			return -EIO;
+		cpu_relax();
+	}
+	return 0;
+}
+
+static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	if (ctx->mm != mm)
+		return;
+
+	pr_devel("%s matched mm - card: %i afu: %i pe: %i\n", __func__,
+		 ctx->afu->adapter->adapter_num, ctx->afu->slice, ctx->pe);
+
+	spin_lock_irqsave(&ctx->sste_lock, flags);
+	trace_cxl_slbia(ctx);
+	memset(ctx->sstp, 0, ctx->sst_size);
+	spin_unlock_irqrestore(&ctx->sste_lock, flags);
+	mb();
+	cxl_afu_slbia(ctx->afu);
+}
+
+static inline void cxl_slbia_core(struct mm_struct *mm)
+{
+	struct cxl *adapter;
+	struct cxl_afu *afu;
+	struct cxl_context *ctx;
+	int card, slice, id;
+
+	pr_devel("%s called\n", __func__);
+
+	spin_lock(&adapter_idr_lock);
+	idr_for_each_entry(&cxl_adapter_idr, adapter, card) {
+		/* XXX: Make this lookup faster with link from mm to ctx */
+		spin_lock(&adapter->afu_list_lock);
+		for (slice = 0; slice < adapter->slices; slice++) {
+			afu = adapter->afu[slice];
+			if (!afu || !afu->enabled)
+				continue;
+			rcu_read_lock();
+			idr_for_each_entry(&afu->contexts_idr, ctx, id)
+				_cxl_slbia(ctx, mm);
+			rcu_read_unlock();
+		}
+		spin_unlock(&adapter->afu_list_lock);
+	}
+	spin_unlock(&adapter_idr_lock);
+}
+
+static struct cxl_calls cxl_calls = {
+	.cxl_slbia = cxl_slbia_core,
+	.owner = THIS_MODULE,
+};
+
+int cxl_alloc_sst(struct cxl_context *ctx)
+{
+	unsigned long vsid;
+	u64 ea_mask, size, sstp0, sstp1;
+
+	sstp0 = 0;
+	sstp1 = 0;
+
+	ctx->sst_size = PAGE_SIZE;
+	ctx->sst_lru = 0;
+	ctx->sstp = (struct cxl_sste *)get_zeroed_page(GFP_KERNEL);
+	if (!ctx->sstp) {
+		pr_err("cxl_alloc_sst: Unable to allocate segment table\n");
+		return -ENOMEM;
+	}
+	pr_devel("SSTP allocated at 0x%p\n", ctx->sstp);
+
+	vsid  = get_kernel_vsid((u64)ctx->sstp, mmu_kernel_ssize) << 12;
+
+	sstp0 |= (u64)mmu_kernel_ssize << CXL_SSTP0_An_B_SHIFT;
+	sstp0 |= (SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp) << 50;
+
+	size = (((u64)ctx->sst_size >> 8) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT;
+	if (unlikely(size & ~CXL_SSTP0_An_SegTableSize_MASK)) {
+		WARN(1, "Impossible segment table size\n");
+		return -EINVAL;
+	}
+	sstp0 |= size;
+
+	if (mmu_kernel_ssize == MMU_SEGSIZE_256M)
+		ea_mask = 0xfffff00ULL;
+	else
+		ea_mask = 0xffffffff00ULL;
+
+	sstp0 |=  vsid >>     (50-14);  /*   Top 14 bits of VSID */
+	sstp1 |= (vsid << (64-(50-14))) & ~ea_mask;
+	sstp1 |= (u64)ctx->sstp & ea_mask;
+	sstp1 |= CXL_SSTP1_An_V;
+
+	pr_devel("Looked up %#llx: slbfee. %#llx (ssize: %x, vsid: %#lx), copied to SSTP0: %#llx, SSTP1: %#llx\n",
+			(u64)ctx->sstp, (u64)ctx->sstp & ESID_MASK, mmu_kernel_ssize, vsid, sstp0, sstp1);
+
+	/* Store calculated sstp hardware points for use later */
+	ctx->sstp0 = sstp0;
+	ctx->sstp1 = sstp1;
+
+	return 0;
+}
+
+/* print buffer content as integers when debugging */
+void cxl_dump_debug_buffer(void *buf, size_t buf_len)
+{
+#ifdef DEBUG
+	int i, *ptr;
+
+	/*
+	 * We want to regroup up to 4 integers per line, which means they
+	 * need to be in the same pr_devel() statement
+	 */
+	ptr = (int *) buf;
+	for (i = 0; i * 4 < buf_len; i += 4) {
+		if ((i + 3) * 4 < buf_len)
+			pr_devel("%.8x %.8x %.8x %.8x\n", ptr[i], ptr[i + 1],
+				ptr[i + 2], ptr[i + 3]);
+		else if ((i + 2) * 4 < buf_len)
+			pr_devel("%.8x %.8x %.8x\n", ptr[i], ptr[i + 1],
+				ptr[i + 2]);
+		else if ((i + 1) * 4 < buf_len)
+			pr_devel("%.8x %.8x\n", ptr[i], ptr[i + 1]);
+		else
+			pr_devel("%.8x\n", ptr[i]);
+	}
+#endif /* DEBUG */
+}
+
+/* Find a CXL adapter by it's number and increase it's refcount */
+struct cxl *get_cxl_adapter(int num)
+{
+	struct cxl *adapter;
+
+	spin_lock(&adapter_idr_lock);
+	if ((adapter = idr_find(&cxl_adapter_idr, num)))
+		get_device(&adapter->dev);
+	spin_unlock(&adapter_idr_lock);
+
+	return adapter;
+}
+
+static int cxl_alloc_adapter_nr(struct cxl *adapter)
+{
+	int i;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&adapter_idr_lock);
+	i = idr_alloc(&cxl_adapter_idr, adapter, 0, 0, GFP_NOWAIT);
+	spin_unlock(&adapter_idr_lock);
+	idr_preload_end();
+	if (i < 0)
+		return i;
+
+	adapter->adapter_num = i;
+
+	return 0;
+}
+
+void cxl_remove_adapter_nr(struct cxl *adapter)
+{
+	idr_remove(&cxl_adapter_idr, adapter->adapter_num);
+}
+
+struct cxl *cxl_alloc_adapter(void)
+{
+	struct cxl *adapter;
+
+	if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL)))
+		return NULL;
+
+	spin_lock_init(&adapter->afu_list_lock);
+
+	if (cxl_alloc_adapter_nr(adapter))
+		goto err1;
+
+	if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
+		goto err2;
+
+	/* start with context lock taken */
+	atomic_set(&adapter->contexts_num, -1);
+
+	return adapter;
+err2:
+	cxl_remove_adapter_nr(adapter);
+err1:
+	kfree(adapter);
+	return NULL;
+}
+
+struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
+{
+	struct cxl_afu *afu;
+
+	if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL)))
+		return NULL;
+
+	afu->adapter = adapter;
+	afu->dev.parent = &adapter->dev;
+	afu->dev.release = cxl_ops->release_afu;
+	afu->slice = slice;
+	idr_init(&afu->contexts_idr);
+	mutex_init(&afu->contexts_lock);
+	spin_lock_init(&afu->afu_cntl_lock);
+	atomic_set(&afu->configured_state, -1);
+	afu->prefault_mode = CXL_PREFAULT_NONE;
+	afu->irqs_max = afu->adapter->user_irqs;
+
+	return afu;
+}
+
+int cxl_afu_select_best_mode(struct cxl_afu *afu)
+{
+	if (afu->modes_supported & CXL_MODE_DIRECTED)
+		return cxl_ops->afu_activate_mode(afu, CXL_MODE_DIRECTED);
+
+	if (afu->modes_supported & CXL_MODE_DEDICATED)
+		return cxl_ops->afu_activate_mode(afu, CXL_MODE_DEDICATED);
+
+	dev_warn(&afu->dev, "No supported programming modes available\n");
+	/* We don't fail this so the user can inspect sysfs */
+	return 0;
+}
+
+int cxl_adapter_context_get(struct cxl *adapter)
+{
+	int rc;
+
+	rc = atomic_inc_unless_negative(&adapter->contexts_num);
+	return rc ? 0 : -EBUSY;
+}
+
+void cxl_adapter_context_put(struct cxl *adapter)
+{
+	atomic_dec_if_positive(&adapter->contexts_num);
+}
+
+int cxl_adapter_context_lock(struct cxl *adapter)
+{
+	int rc;
+	/* no active contexts -> contexts_num == 0 */
+	rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1);
+	return rc ? -EBUSY : 0;
+}
+
+void cxl_adapter_context_unlock(struct cxl *adapter)
+{
+	int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0);
+
+	/*
+	 * contexts lock taken -> contexts_num == -1
+	 * If not true then show a warning and force reset the lock.
+	 * This will happen when context_unlock was requested without
+	 * doing a context_lock.
+	 */
+	if (val != -1) {
+		atomic_set(&adapter->contexts_num, 0);
+		WARN(1, "Adapter context unlocked with %d active contexts",
+		     val);
+	}
+}
+
+static int __init init_cxl(void)
+{
+	int rc = 0;
+
+	if (!tlbie_capable)
+		return -EINVAL;
+
+	if ((rc = cxl_file_init()))
+		return rc;
+
+	cxl_debugfs_init();
+
+	/*
+	 * we don't register the callback on P9. slb callack is only
+	 * used for the PSL8 MMU and CX4.
+	 */
+	if (cxl_is_power8()) {
+		rc = register_cxl_calls(&cxl_calls);
+		if (rc)
+			goto err;
+	}
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		cxl_ops = &cxl_native_ops;
+		rc = pci_register_driver(&cxl_pci_driver);
+	}
+#ifdef CONFIG_PPC_PSERIES
+	else {
+		cxl_ops = &cxl_guest_ops;
+		rc = platform_driver_register(&cxl_of_driver);
+	}
+#endif
+	if (rc)
+		goto err1;
+
+	return 0;
+err1:
+	if (cxl_is_power8())
+		unregister_cxl_calls(&cxl_calls);
+err:
+	cxl_debugfs_exit();
+	cxl_file_exit();
+
+	return rc;
+}
+
+static void exit_cxl(void)
+{
+	if (cpu_has_feature(CPU_FTR_HVMODE))
+		pci_unregister_driver(&cxl_pci_driver);
+#ifdef CONFIG_PPC_PSERIES
+	else
+		platform_driver_unregister(&cxl_of_driver);
+#endif
+
+	cxl_debugfs_exit();
+	cxl_file_exit();
+	if (cxl_is_power8())
+		unregister_cxl_calls(&cxl_calls);
+	idr_destroy(&cxl_adapter_idr);
+}
+
+module_init(init_cxl);
+module_exit(exit_cxl);
+
+MODULE_DESCRIPTION("IBM Coherent Accelerator");
+MODULE_AUTHOR("Ian Munsie <imunsie@au1.ibm.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
new file mode 100644
index 0000000000..fbe16a6ab7
--- /dev/null
+++ b/drivers/misc/cxl/native.c
@@ -0,0 +1,1592 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/irqdomain.h>
+#include <asm/synch.h>
+#include <asm/switch_to.h>
+#include <misc/cxl-base.h>
+
+#include "cxl.h"
+#include "trace.h"
+
+static int afu_control(struct cxl_afu *afu, u64 command, u64 clear,
+		       u64 result, u64 mask, bool enabled)
+{
+	u64 AFU_Cntl;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+	int rc = 0;
+
+	spin_lock(&afu->afu_cntl_lock);
+	pr_devel("AFU command starting: %llx\n", command);
+
+	trace_cxl_afu_ctrl(afu, command);
+
+	AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	cxl_p2n_write(afu, CXL_AFU_Cntl_An, (AFU_Cntl & ~clear) | command);
+
+	AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	while ((AFU_Cntl & mask) != result) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&afu->dev, "WARNING: AFU control timed out!\n");
+			rc = -EBUSY;
+			goto out;
+		}
+
+		if (!cxl_ops->link_ok(afu->adapter, afu)) {
+			afu->enabled = enabled;
+			rc = -EIO;
+			goto out;
+		}
+
+		pr_devel_ratelimited("AFU control... (0x%016llx)\n",
+				     AFU_Cntl | command);
+		cpu_relax();
+		AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	}
+
+	if (AFU_Cntl & CXL_AFU_Cntl_An_RA) {
+		/*
+		 * Workaround for a bug in the XSL used in the Mellanox CX4
+		 * that fails to clear the RA bit after an AFU reset,
+		 * preventing subsequent AFU resets from working.
+		 */
+		cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl & ~CXL_AFU_Cntl_An_RA);
+	}
+
+	pr_devel("AFU command complete: %llx\n", command);
+	afu->enabled = enabled;
+out:
+	trace_cxl_afu_ctrl_done(afu, command, rc);
+	spin_unlock(&afu->afu_cntl_lock);
+
+	return rc;
+}
+
+static int afu_enable(struct cxl_afu *afu)
+{
+	pr_devel("AFU enable request\n");
+
+	return afu_control(afu, CXL_AFU_Cntl_An_E, 0,
+			   CXL_AFU_Cntl_An_ES_Enabled,
+			   CXL_AFU_Cntl_An_ES_MASK, true);
+}
+
+int cxl_afu_disable(struct cxl_afu *afu)
+{
+	pr_devel("AFU disable request\n");
+
+	return afu_control(afu, 0, CXL_AFU_Cntl_An_E,
+			   CXL_AFU_Cntl_An_ES_Disabled,
+			   CXL_AFU_Cntl_An_ES_MASK, false);
+}
+
+/* This will disable as well as reset */
+static int native_afu_reset(struct cxl_afu *afu)
+{
+	int rc;
+	u64 serr;
+
+	pr_devel("AFU reset request\n");
+
+	rc = afu_control(afu, CXL_AFU_Cntl_An_RA, 0,
+			   CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled,
+			   CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
+			   false);
+
+	/*
+	 * Re-enable any masked interrupts when the AFU is not
+	 * activated to avoid side effects after attaching a process
+	 * in dedicated mode.
+	 */
+	if (afu->current_mode == 0) {
+		serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+		serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
+		cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+	}
+
+	return rc;
+}
+
+static int native_afu_check_and_enable(struct cxl_afu *afu)
+{
+	if (!cxl_ops->link_ok(afu->adapter, afu)) {
+		WARN(1, "Refusing to enable afu while link down!\n");
+		return -EIO;
+	}
+	if (afu->enabled)
+		return 0;
+	return afu_enable(afu);
+}
+
+int cxl_psl_purge(struct cxl_afu *afu)
+{
+	u64 PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+	u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	u64 dsisr, dar;
+	u64 start, end;
+	u64 trans_fault = 0x0ULL;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+	int rc = 0;
+
+	trace_cxl_psl_ctrl(afu, CXL_PSL_SCNTL_An_Pc);
+
+	pr_devel("PSL purge request\n");
+
+	if (cxl_is_power8())
+		trans_fault = CXL_PSL_DSISR_TRANS;
+	if (cxl_is_power9())
+		trans_fault = CXL_PSL9_DSISR_An_TF;
+
+	if (!cxl_ops->link_ok(afu->adapter, afu)) {
+		dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n");
+		rc = -EIO;
+		goto out;
+	}
+
+	if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
+		WARN(1, "psl_purge request while AFU not disabled!\n");
+		cxl_afu_disable(afu);
+	}
+
+	cxl_p1n_write(afu, CXL_PSL_SCNTL_An,
+		       PSL_CNTL | CXL_PSL_SCNTL_An_Pc);
+	start = local_clock();
+	PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+	while ((PSL_CNTL &  CXL_PSL_SCNTL_An_Ps_MASK)
+			== CXL_PSL_SCNTL_An_Ps_Pending) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&afu->dev, "WARNING: PSL Purge timed out!\n");
+			rc = -EBUSY;
+			goto out;
+		}
+		if (!cxl_ops->link_ok(afu->adapter, afu)) {
+			rc = -EIO;
+			goto out;
+		}
+
+		dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+		pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%016llx  PSL_DSISR: 0x%016llx\n",
+				     PSL_CNTL, dsisr);
+
+		if (dsisr & trans_fault) {
+			dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
+			dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%016llx, DAR: 0x%016llx\n",
+				   dsisr, dar);
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+		} else if (dsisr) {
+			dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%016llx\n",
+				   dsisr);
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+		} else {
+			cpu_relax();
+		}
+		PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An);
+	}
+	end = local_clock();
+	pr_devel("PSL purged in %lld ns\n", end - start);
+
+	cxl_p1n_write(afu, CXL_PSL_SCNTL_An,
+		       PSL_CNTL & ~CXL_PSL_SCNTL_An_Pc);
+out:
+	trace_cxl_psl_ctrl_done(afu, CXL_PSL_SCNTL_An_Pc, rc);
+	return rc;
+}
+
+static int spa_max_procs(int spa_size)
+{
+	/*
+	 * From the CAIA:
+	 *    end_of_SPA_area = SPA_Base + ((n+4) * 128) + (( ((n*8) + 127) >> 7) * 128) + 255
+	 * Most of that junk is really just an overly-complicated way of saying
+	 * the last 256 bytes are __aligned(128), so it's really:
+	 *    end_of_SPA_area = end_of_PSL_queue_area + __aligned(128) 255
+	 * and
+	 *    end_of_PSL_queue_area = SPA_Base + ((n+4) * 128) + (n*8) - 1
+	 * so
+	 *    sizeof(SPA) = ((n+4) * 128) + (n*8) + __aligned(128) 256
+	 * Ignore the alignment (which is safe in this case as long as we are
+	 * careful with our rounding) and solve for n:
+	 */
+	return ((spa_size / 8) - 96) / 17;
+}
+
+static int cxl_alloc_spa(struct cxl_afu *afu, int mode)
+{
+	unsigned spa_size;
+
+	/* Work out how many pages to allocate */
+	afu->native->spa_order = -1;
+	do {
+		afu->native->spa_order++;
+		spa_size = (1 << afu->native->spa_order) * PAGE_SIZE;
+
+		if (spa_size > 0x100000) {
+			dev_warn(&afu->dev, "num_of_processes too large for the SPA, limiting to %i (0x%x)\n",
+					afu->native->spa_max_procs, afu->native->spa_size);
+			if (mode != CXL_MODE_DEDICATED)
+				afu->num_procs = afu->native->spa_max_procs;
+			break;
+		}
+
+		afu->native->spa_size = spa_size;
+		afu->native->spa_max_procs = spa_max_procs(afu->native->spa_size);
+	} while (afu->native->spa_max_procs < afu->num_procs);
+
+	if (!(afu->native->spa = (struct cxl_process_element *)
+	      __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->native->spa_order))) {
+		pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n");
+		return -ENOMEM;
+	}
+	pr_devel("spa pages: %i afu->spa_max_procs: %i   afu->num_procs: %i\n",
+		 1<<afu->native->spa_order, afu->native->spa_max_procs, afu->num_procs);
+
+	return 0;
+}
+
+static void attach_spa(struct cxl_afu *afu)
+{
+	u64 spap;
+
+	afu->native->sw_command_status = (__be64 *)((char *)afu->native->spa +
+					    ((afu->native->spa_max_procs + 3) * 128));
+
+	spap = virt_to_phys(afu->native->spa) & CXL_PSL_SPAP_Addr;
+	spap |= ((afu->native->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size;
+	spap |= CXL_PSL_SPAP_V;
+	pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n",
+		afu->native->spa, afu->native->spa_max_procs,
+		afu->native->sw_command_status, spap);
+	cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap);
+}
+
+void cxl_release_spa(struct cxl_afu *afu)
+{
+	if (afu->native->spa) {
+		free_pages((unsigned long) afu->native->spa,
+			afu->native->spa_order);
+		afu->native->spa = NULL;
+	}
+}
+
+/*
+ * Invalidation of all ERAT entries is no longer required by CAIA2. Use
+ * only for debug.
+ */
+int cxl_invalidate_all_psl9(struct cxl *adapter)
+{
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+	u64 ierat;
+
+	pr_devel("CXL adapter - invalidation of all ERAT entries\n");
+
+	/* Invalidates all ERAT entries for Radix or HPT */
+	ierat = CXL_XSL9_IERAT_IALL;
+	if (radix_enabled())
+		ierat |= CXL_XSL9_IERAT_INVR;
+	cxl_p1_write(adapter, CXL_XSL9_IERAT, ierat);
+
+	while (cxl_p1_read(adapter, CXL_XSL9_IERAT) & CXL_XSL9_IERAT_IINPROG) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&adapter->dev,
+			"WARNING: CXL adapter invalidation of all ERAT entries timed out!\n");
+			return -EBUSY;
+		}
+		if (!cxl_ops->link_ok(adapter, NULL))
+			return -EIO;
+		cpu_relax();
+	}
+	return 0;
+}
+
+int cxl_invalidate_all_psl8(struct cxl *adapter)
+{
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+	pr_devel("CXL adapter wide TLBIA & SLBIA\n");
+
+	cxl_p1_write(adapter, CXL_PSL_AFUSEL, CXL_PSL_AFUSEL_A);
+
+	cxl_p1_write(adapter, CXL_PSL_TLBIA, CXL_TLB_SLB_IQ_ALL);
+	while (cxl_p1_read(adapter, CXL_PSL_TLBIA) & CXL_TLB_SLB_P) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n");
+			return -EBUSY;
+		}
+		if (!cxl_ops->link_ok(adapter, NULL))
+			return -EIO;
+		cpu_relax();
+	}
+
+	cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_ALL);
+	while (cxl_p1_read(adapter, CXL_PSL_SLBIA) & CXL_TLB_SLB_P) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n");
+			return -EBUSY;
+		}
+		if (!cxl_ops->link_ok(adapter, NULL))
+			return -EIO;
+		cpu_relax();
+	}
+	return 0;
+}
+
+int cxl_data_cache_flush(struct cxl *adapter)
+{
+	u64 reg;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+	/*
+	 * Do a datacache flush only if datacache is available.
+	 * In case of PSL9D datacache absent hence flush operation.
+	 * would timeout.
+	 */
+	if (adapter->native->no_data_cache) {
+		pr_devel("No PSL data cache. Ignoring cache flush req.\n");
+		return 0;
+	}
+
+	pr_devel("Flushing data cache\n");
+	reg = cxl_p1_read(adapter, CXL_PSL_Control);
+	reg |= CXL_PSL_Control_Fr;
+	cxl_p1_write(adapter, CXL_PSL_Control, reg);
+
+	reg = cxl_p1_read(adapter, CXL_PSL_Control);
+	while ((reg & CXL_PSL_Control_Fs_MASK) != CXL_PSL_Control_Fs_Complete) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&adapter->dev, "WARNING: cache flush timed out!\n");
+			return -EBUSY;
+		}
+
+		if (!cxl_ops->link_ok(adapter, NULL)) {
+			dev_warn(&adapter->dev, "WARNING: link down when flushing cache\n");
+			return -EIO;
+		}
+		cpu_relax();
+		reg = cxl_p1_read(adapter, CXL_PSL_Control);
+	}
+
+	reg &= ~CXL_PSL_Control_Fr;
+	cxl_p1_write(adapter, CXL_PSL_Control, reg);
+	return 0;
+}
+
+static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1)
+{
+	int rc;
+
+	/* 1. Disable SSTP by writing 0 to SSTP1[V] */
+	cxl_p2n_write(afu, CXL_SSTP1_An, 0);
+
+	/* 2. Invalidate all SLB entries */
+	if ((rc = cxl_afu_slbia(afu)))
+		return rc;
+
+	/* 3. Set SSTP0_An */
+	cxl_p2n_write(afu, CXL_SSTP0_An, sstp0);
+
+	/* 4. Set SSTP1_An */
+	cxl_p2n_write(afu, CXL_SSTP1_An, sstp1);
+
+	return 0;
+}
+
+/* Using per slice version may improve performance here. (ie. SLBIA_An) */
+static void slb_invalid(struct cxl_context *ctx)
+{
+	struct cxl *adapter = ctx->afu->adapter;
+	u64 slbia;
+
+	WARN_ON(!mutex_is_locked(&ctx->afu->native->spa_mutex));
+
+	cxl_p1_write(adapter, CXL_PSL_LBISEL,
+			((u64)be32_to_cpu(ctx->elem->common.pid) << 32) |
+			be32_to_cpu(ctx->elem->lpid));
+	cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID);
+
+	while (1) {
+		if (!cxl_ops->link_ok(adapter, NULL))
+			break;
+		slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA);
+		if (!(slbia & CXL_TLB_SLB_P))
+			break;
+		cpu_relax();
+	}
+}
+
+static int do_process_element_cmd(struct cxl_context *ctx,
+				  u64 cmd, u64 pe_state)
+{
+	u64 state;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+	int rc = 0;
+
+	trace_cxl_llcmd(ctx, cmd);
+
+	WARN_ON(!ctx->afu->enabled);
+
+	ctx->elem->software_state = cpu_to_be32(pe_state);
+	smp_wmb();
+	*(ctx->afu->native->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe);
+	smp_mb();
+	cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
+	while (1) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n");
+			rc = -EBUSY;
+			goto out;
+		}
+		if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) {
+			dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n");
+			rc = -EIO;
+			goto out;
+		}
+		state = be64_to_cpup(ctx->afu->native->sw_command_status);
+		if (state == ~0ULL) {
+			pr_err("cxl: Error adding process element to AFU\n");
+			rc = -1;
+			goto out;
+		}
+		if ((state & (CXL_SPA_SW_CMD_MASK | CXL_SPA_SW_STATE_MASK  | CXL_SPA_SW_LINK_MASK)) ==
+		    (cmd | (cmd >> 16) | ctx->pe))
+			break;
+		/*
+		 * The command won't finish in the PSL if there are
+		 * outstanding DSIs.  Hence we need to yield here in
+		 * case there are outstanding DSIs that we need to
+		 * service.  Tuning possiblity: we could wait for a
+		 * while before sched
+		 */
+		schedule();
+
+	}
+out:
+	trace_cxl_llcmd_done(ctx, cmd, rc);
+	return rc;
+}
+
+static int add_process_element(struct cxl_context *ctx)
+{
+	int rc = 0;
+
+	mutex_lock(&ctx->afu->native->spa_mutex);
+	pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe);
+	if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V)))
+		ctx->pe_inserted = true;
+	pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe);
+	mutex_unlock(&ctx->afu->native->spa_mutex);
+	return rc;
+}
+
+static int terminate_process_element(struct cxl_context *ctx)
+{
+	int rc = 0;
+
+	/* fast path terminate if it's already invalid */
+	if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V)))
+		return rc;
+
+	mutex_lock(&ctx->afu->native->spa_mutex);
+	pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe);
+	/* We could be asked to terminate when the hw is down. That
+	 * should always succeed: it's not running if the hw has gone
+	 * away and is being reset.
+	 */
+	if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE,
+					    CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T);
+	ctx->elem->software_state = 0;	/* Remove Valid bit */
+	pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe);
+	mutex_unlock(&ctx->afu->native->spa_mutex);
+	return rc;
+}
+
+static int remove_process_element(struct cxl_context *ctx)
+{
+	int rc = 0;
+
+	mutex_lock(&ctx->afu->native->spa_mutex);
+	pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe);
+
+	/* We could be asked to remove when the hw is down. Again, if
+	 * the hw is down, the PE is gone, so we succeed.
+	 */
+	if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu))
+		rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0);
+
+	if (!rc)
+		ctx->pe_inserted = false;
+	if (cxl_is_power8())
+		slb_invalid(ctx);
+	pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe);
+	mutex_unlock(&ctx->afu->native->spa_mutex);
+
+	return rc;
+}
+
+void cxl_assign_psn_space(struct cxl_context *ctx)
+{
+	if (!ctx->afu->pp_size || ctx->master) {
+		ctx->psn_phys = ctx->afu->psn_phys;
+		ctx->psn_size = ctx->afu->adapter->ps_size;
+	} else {
+		ctx->psn_phys = ctx->afu->psn_phys +
+			(ctx->afu->native->pp_offset + ctx->afu->pp_size * ctx->pe);
+		ctx->psn_size = ctx->afu->pp_size;
+	}
+}
+
+static int activate_afu_directed(struct cxl_afu *afu)
+{
+	int rc;
+
+	dev_info(&afu->dev, "Activating AFU directed mode\n");
+
+	afu->num_procs = afu->max_procs_virtualised;
+	if (afu->native->spa == NULL) {
+		if (cxl_alloc_spa(afu, CXL_MODE_DIRECTED))
+			return -ENOMEM;
+	}
+	attach_spa(afu);
+
+	cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU);
+	if (cxl_is_power8())
+		cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
+	cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L);
+
+	afu->current_mode = CXL_MODE_DIRECTED;
+
+	if ((rc = cxl_chardev_m_afu_add(afu)))
+		return rc;
+
+	if ((rc = cxl_sysfs_afu_m_add(afu)))
+		goto err;
+
+	if ((rc = cxl_chardev_s_afu_add(afu)))
+		goto err1;
+
+	return 0;
+err1:
+	cxl_sysfs_afu_m_remove(afu);
+err:
+	cxl_chardev_afu_remove(afu);
+	return rc;
+}
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define set_endian(sr) ((sr) |= CXL_PSL_SR_An_LE)
+#else
+#define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE))
+#endif
+
+u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9)
+{
+	u64 sr = 0;
+
+	set_endian(sr);
+	if (master)
+		sr |= CXL_PSL_SR_An_MP;
+	if (mfspr(SPRN_LPCR) & LPCR_TC)
+		sr |= CXL_PSL_SR_An_TC;
+
+	if (kernel) {
+		if (!real_mode)
+			sr |= CXL_PSL_SR_An_R;
+		sr |= (mfmsr() & MSR_SF) | CXL_PSL_SR_An_HV;
+	} else {
+		sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
+		if (radix_enabled())
+			sr |= CXL_PSL_SR_An_HV;
+		else
+			sr &= ~(CXL_PSL_SR_An_HV);
+		if (!test_tsk_thread_flag(current, TIF_32BIT))
+			sr |= CXL_PSL_SR_An_SF;
+	}
+	if (p9) {
+		if (radix_enabled())
+			sr |= CXL_PSL_SR_An_XLAT_ror;
+		else
+			sr |= CXL_PSL_SR_An_XLAT_hpt;
+	}
+	return sr;
+}
+
+static u64 calculate_sr(struct cxl_context *ctx)
+{
+	return cxl_calculate_sr(ctx->master, ctx->kernel, false,
+				cxl_is_power9());
+}
+
+static void update_ivtes_directed(struct cxl_context *ctx)
+{
+	bool need_update = (ctx->status == STARTED);
+	int r;
+
+	if (need_update) {
+		WARN_ON(terminate_process_element(ctx));
+		WARN_ON(remove_process_element(ctx));
+	}
+
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]);
+		ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]);
+	}
+
+	/*
+	 * Theoretically we could use the update llcmd, instead of a
+	 * terminate/remove/add (or if an atomic update was required we could
+	 * do a suspend/update/resume), however it seems there might be issues
+	 * with the update llcmd on some cards (including those using an XSL on
+	 * an ASIC) so for now it's safest to go with the commands that are
+	 * known to work. In the future if we come across a situation where the
+	 * card may be performing transactions using the same PE while we are
+	 * doing this update we might need to revisit this.
+	 */
+	if (need_update)
+		WARN_ON(add_process_element(ctx));
+}
+
+static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	u32 pid;
+	int rc;
+
+	cxl_assign_psn_space(ctx);
+
+	ctx->elem->ctxtime = 0; /* disable */
+	ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+	ctx->elem->haurp = 0; /* disable */
+
+	if (ctx->kernel)
+		pid = 0;
+	else {
+		if (ctx->mm == NULL) {
+			pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
+				__func__, ctx->pe, pid_nr(ctx->pid));
+			return -EINVAL;
+		}
+		pid = ctx->mm->context.id;
+	}
+
+	/* Assign a unique TIDR (thread id) for the current thread */
+	if (!(ctx->tidr) && (ctx->assign_tidr)) {
+		rc = set_thread_tidr(current);
+		if (rc)
+			return -ENODEV;
+		ctx->tidr = current->thread.tidr;
+		pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr);
+	}
+
+	ctx->elem->common.tid = cpu_to_be32(ctx->tidr);
+	ctx->elem->common.pid = cpu_to_be32(pid);
+
+	ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
+
+	ctx->elem->common.csrp = 0; /* disable */
+
+	cxl_prefault(ctx, wed);
+
+	/*
+	 * Ensure we have the multiplexed PSL interrupt set up to take faults
+	 * for kernel contexts that may not have allocated any AFU IRQs at all:
+	 */
+	if (ctx->irqs.range[0] == 0) {
+		ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq;
+		ctx->irqs.range[0] = 1;
+	}
+
+	ctx->elem->common.amr = cpu_to_be64(amr);
+	ctx->elem->common.wed = cpu_to_be64(wed);
+
+	return 0;
+}
+
+int cxl_attach_afu_directed_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	int result;
+
+	/* fill the process element entry */
+	result = process_element_entry_psl9(ctx, wed, amr);
+	if (result)
+		return result;
+
+	update_ivtes_directed(ctx);
+
+	/* first guy needs to enable */
+	result = cxl_ops->afu_check_and_enable(ctx->afu);
+	if (result)
+		return result;
+
+	return add_process_element(ctx);
+}
+
+int cxl_attach_afu_directed_psl8(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	u32 pid;
+	int result;
+
+	cxl_assign_psn_space(ctx);
+
+	ctx->elem->ctxtime = 0; /* disable */
+	ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+	ctx->elem->haurp = 0; /* disable */
+	ctx->elem->u.sdr = cpu_to_be64(mfspr(SPRN_SDR1));
+
+	pid = current->pid;
+	if (ctx->kernel)
+		pid = 0;
+	ctx->elem->common.tid = 0;
+	ctx->elem->common.pid = cpu_to_be32(pid);
+
+	ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
+
+	ctx->elem->common.csrp = 0; /* disable */
+	ctx->elem->common.u.psl8.aurp0 = 0; /* disable */
+	ctx->elem->common.u.psl8.aurp1 = 0; /* disable */
+
+	cxl_prefault(ctx, wed);
+
+	ctx->elem->common.u.psl8.sstp0 = cpu_to_be64(ctx->sstp0);
+	ctx->elem->common.u.psl8.sstp1 = cpu_to_be64(ctx->sstp1);
+
+	/*
+	 * Ensure we have the multiplexed PSL interrupt set up to take faults
+	 * for kernel contexts that may not have allocated any AFU IRQs at all:
+	 */
+	if (ctx->irqs.range[0] == 0) {
+		ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq;
+		ctx->irqs.range[0] = 1;
+	}
+
+	update_ivtes_directed(ctx);
+
+	ctx->elem->common.amr = cpu_to_be64(amr);
+	ctx->elem->common.wed = cpu_to_be64(wed);
+
+	/* first guy needs to enable */
+	if ((result = cxl_ops->afu_check_and_enable(ctx->afu)))
+		return result;
+
+	return add_process_element(ctx);
+}
+
+static int deactivate_afu_directed(struct cxl_afu *afu)
+{
+	dev_info(&afu->dev, "Deactivating AFU directed mode\n");
+
+	afu->current_mode = 0;
+	afu->num_procs = 0;
+
+	cxl_sysfs_afu_m_remove(afu);
+	cxl_chardev_afu_remove(afu);
+
+	/*
+	 * The CAIA section 2.2.1 indicates that the procedure for starting and
+	 * stopping an AFU in AFU directed mode is AFU specific, which is not
+	 * ideal since this code is generic and with one exception has no
+	 * knowledge of the AFU. This is in contrast to the procedure for
+	 * disabling a dedicated process AFU, which is documented to just
+	 * require a reset. The architecture does indicate that both an AFU
+	 * reset and an AFU disable should result in the AFU being disabled and
+	 * we do both followed by a PSL purge for safety.
+	 *
+	 * Notably we used to have some issues with the disable sequence on PSL
+	 * cards, which is why we ended up using this heavy weight procedure in
+	 * the first place, however a bug was discovered that had rendered the
+	 * disable operation ineffective, so it is conceivable that was the
+	 * sole explanation for those difficulties. Careful regression testing
+	 * is recommended if anyone attempts to remove or reorder these
+	 * operations.
+	 *
+	 * The XSL on the Mellanox CX4 behaves a little differently from the
+	 * PSL based cards and will time out an AFU reset if the AFU is still
+	 * enabled. That card is special in that we do have a means to identify
+	 * it from this code, so in that case we skip the reset and just use a
+	 * disable/purge to avoid the timeout and corresponding noise in the
+	 * kernel log.
+	 */
+	if (afu->adapter->native->sl_ops->needs_reset_before_disable)
+		cxl_ops->afu_reset(afu);
+	cxl_afu_disable(afu);
+	cxl_psl_purge(afu);
+
+	return 0;
+}
+
+int cxl_activate_dedicated_process_psl9(struct cxl_afu *afu)
+{
+	dev_info(&afu->dev, "Activating dedicated process mode\n");
+
+	/*
+	 * If XSL is set to dedicated mode (Set in PSL_SCNTL reg), the
+	 * XSL and AFU are programmed to work with a single context.
+	 * The context information should be configured in the SPA area
+	 * index 0 (so PSL_SPAP must be configured before enabling the
+	 * AFU).
+	 */
+	afu->num_procs = 1;
+	if (afu->native->spa == NULL) {
+		if (cxl_alloc_spa(afu, CXL_MODE_DEDICATED))
+			return -ENOMEM;
+	}
+	attach_spa(afu);
+
+	cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process);
+	cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L);
+
+	afu->current_mode = CXL_MODE_DEDICATED;
+
+	return cxl_chardev_d_afu_add(afu);
+}
+
+int cxl_activate_dedicated_process_psl8(struct cxl_afu *afu)
+{
+	dev_info(&afu->dev, "Activating dedicated process mode\n");
+
+	cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process);
+
+	cxl_p1n_write(afu, CXL_PSL_CtxTime_An, 0); /* disable */
+	cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0);    /* disable */
+	cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
+	cxl_p1n_write(afu, CXL_PSL_LPID_An, mfspr(SPRN_LPID));
+	cxl_p1n_write(afu, CXL_HAURP_An, 0);       /* disable */
+	cxl_p1n_write(afu, CXL_PSL_SDR_An, mfspr(SPRN_SDR1));
+
+	cxl_p2n_write(afu, CXL_CSRP_An, 0);        /* disable */
+	cxl_p2n_write(afu, CXL_AURP0_An, 0);       /* disable */
+	cxl_p2n_write(afu, CXL_AURP1_An, 0);       /* disable */
+
+	afu->current_mode = CXL_MODE_DEDICATED;
+	afu->num_procs = 1;
+
+	return cxl_chardev_d_afu_add(afu);
+}
+
+void cxl_update_dedicated_ivtes_psl9(struct cxl_context *ctx)
+{
+	int r;
+
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]);
+		ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]);
+	}
+}
+
+void cxl_update_dedicated_ivtes_psl8(struct cxl_context *ctx)
+{
+	struct cxl_afu *afu = ctx->afu;
+
+	cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An,
+		       (((u64)ctx->irqs.offset[0] & 0xffff) << 48) |
+		       (((u64)ctx->irqs.offset[1] & 0xffff) << 32) |
+		       (((u64)ctx->irqs.offset[2] & 0xffff) << 16) |
+			((u64)ctx->irqs.offset[3] & 0xffff));
+	cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64)
+		       (((u64)ctx->irqs.range[0] & 0xffff) << 48) |
+		       (((u64)ctx->irqs.range[1] & 0xffff) << 32) |
+		       (((u64)ctx->irqs.range[2] & 0xffff) << 16) |
+			((u64)ctx->irqs.range[3] & 0xffff));
+}
+
+int cxl_attach_dedicated_process_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	struct cxl_afu *afu = ctx->afu;
+	int result;
+
+	/* fill the process element entry */
+	result = process_element_entry_psl9(ctx, wed, amr);
+	if (result)
+		return result;
+
+	if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes)
+		afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx);
+
+	ctx->elem->software_state = cpu_to_be32(CXL_PE_SOFTWARE_STATE_V);
+	/*
+	 * Ideally we should do a wmb() here to make sure the changes to the
+	 * PE are visible to the card before we call afu_enable.
+	 * On ppc64 though all mmios are preceded by a 'sync' instruction hence
+	 * we dont dont need one here.
+	 */
+
+	result = cxl_ops->afu_reset(afu);
+	if (result)
+		return result;
+
+	return afu_enable(afu);
+}
+
+int cxl_attach_dedicated_process_psl8(struct cxl_context *ctx, u64 wed, u64 amr)
+{
+	struct cxl_afu *afu = ctx->afu;
+	u64 pid;
+	int rc;
+
+	pid = (u64)current->pid << 32;
+	if (ctx->kernel)
+		pid = 0;
+	cxl_p2n_write(afu, CXL_PSL_PID_TID_An, pid);
+
+	cxl_p1n_write(afu, CXL_PSL_SR_An, calculate_sr(ctx));
+
+	if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1)))
+		return rc;
+
+	cxl_prefault(ctx, wed);
+
+	if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes)
+		afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx);
+
+	cxl_p2n_write(afu, CXL_PSL_AMR_An, amr);
+
+	/* master only context for dedicated */
+	cxl_assign_psn_space(ctx);
+
+	if ((rc = cxl_ops->afu_reset(afu)))
+		return rc;
+
+	cxl_p2n_write(afu, CXL_PSL_WED_An, wed);
+
+	return afu_enable(afu);
+}
+
+static int deactivate_dedicated_process(struct cxl_afu *afu)
+{
+	dev_info(&afu->dev, "Deactivating dedicated process mode\n");
+
+	afu->current_mode = 0;
+	afu->num_procs = 0;
+
+	cxl_chardev_afu_remove(afu);
+
+	return 0;
+}
+
+static int native_afu_deactivate_mode(struct cxl_afu *afu, int mode)
+{
+	if (mode == CXL_MODE_DIRECTED)
+		return deactivate_afu_directed(afu);
+	if (mode == CXL_MODE_DEDICATED)
+		return deactivate_dedicated_process(afu);
+	return 0;
+}
+
+static int native_afu_activate_mode(struct cxl_afu *afu, int mode)
+{
+	if (!mode)
+		return 0;
+	if (!(mode & afu->modes_supported))
+		return -EINVAL;
+
+	if (!cxl_ops->link_ok(afu->adapter, afu)) {
+		WARN(1, "Device link is down, refusing to activate!\n");
+		return -EIO;
+	}
+
+	if (mode == CXL_MODE_DIRECTED)
+		return activate_afu_directed(afu);
+	if ((mode == CXL_MODE_DEDICATED) &&
+	    (afu->adapter->native->sl_ops->activate_dedicated_process))
+		return afu->adapter->native->sl_ops->activate_dedicated_process(afu);
+
+	return -EINVAL;
+}
+
+static int native_attach_process(struct cxl_context *ctx, bool kernel,
+				u64 wed, u64 amr)
+{
+	if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) {
+		WARN(1, "Device link is down, refusing to attach process!\n");
+		return -EIO;
+	}
+
+	ctx->kernel = kernel;
+	if ((ctx->afu->current_mode == CXL_MODE_DIRECTED) &&
+	    (ctx->afu->adapter->native->sl_ops->attach_afu_directed))
+		return ctx->afu->adapter->native->sl_ops->attach_afu_directed(ctx, wed, amr);
+
+	if ((ctx->afu->current_mode == CXL_MODE_DEDICATED) &&
+	    (ctx->afu->adapter->native->sl_ops->attach_dedicated_process))
+		return ctx->afu->adapter->native->sl_ops->attach_dedicated_process(ctx, wed, amr);
+
+	return -EINVAL;
+}
+
+static inline int detach_process_native_dedicated(struct cxl_context *ctx)
+{
+	/*
+	 * The CAIA section 2.1.1 indicates that we need to do an AFU reset to
+	 * stop the AFU in dedicated mode (we therefore do not make that
+	 * optional like we do in the afu directed path). It does not indicate
+	 * that we need to do an explicit disable (which should occur
+	 * implicitly as part of the reset) or purge, but we do these as well
+	 * to be on the safe side.
+	 *
+	 * Notably we used to have some issues with the disable sequence
+	 * (before the sequence was spelled out in the architecture) which is
+	 * why we were so heavy weight in the first place, however a bug was
+	 * discovered that had rendered the disable operation ineffective, so
+	 * it is conceivable that was the sole explanation for those
+	 * difficulties. Point is, we should be careful and do some regression
+	 * testing if we ever attempt to remove any part of this procedure.
+	 */
+	cxl_ops->afu_reset(ctx->afu);
+	cxl_afu_disable(ctx->afu);
+	cxl_psl_purge(ctx->afu);
+	return 0;
+}
+
+static void native_update_ivtes(struct cxl_context *ctx)
+{
+	if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
+		return update_ivtes_directed(ctx);
+	if ((ctx->afu->current_mode == CXL_MODE_DEDICATED) &&
+	    (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes))
+		return ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx);
+	WARN(1, "native_update_ivtes: Bad mode\n");
+}
+
+static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
+{
+	if (!ctx->pe_inserted)
+		return 0;
+	if (terminate_process_element(ctx))
+		return -1;
+	if (remove_process_element(ctx))
+		return -1;
+
+	return 0;
+}
+
+static int native_detach_process(struct cxl_context *ctx)
+{
+	trace_cxl_detach(ctx);
+
+	if (ctx->afu->current_mode == CXL_MODE_DEDICATED)
+		return detach_process_native_dedicated(ctx);
+
+	return detach_process_native_afu_directed(ctx);
+}
+
+static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info)
+{
+	/* If the adapter has gone away, we can't get any meaningful
+	 * information.
+	 */
+	if (!cxl_ops->link_ok(afu->adapter, afu))
+		return -EIO;
+
+	info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
+	if (cxl_is_power8())
+		info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An);
+	info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An);
+	info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+	info->proc_handle = 0;
+
+	return 0;
+}
+
+void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx)
+{
+	u64 fir1, serr;
+
+	fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL9_FIR1);
+
+	dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
+	if (ctx->afu->adapter->native->sl_ops->register_serr_irq) {
+		serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
+		cxl_afu_decode_psl_serr(ctx->afu, serr);
+	}
+}
+
+void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx)
+{
+	u64 fir1, fir2, fir_slice, serr, afu_debug;
+
+	fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1);
+	fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2);
+	fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An);
+	afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An);
+
+	dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
+	dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2);
+	if (ctx->afu->adapter->native->sl_ops->register_serr_irq) {
+		serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
+		cxl_afu_decode_psl_serr(ctx->afu, serr);
+	}
+	dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
+	dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
+}
+
+static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx,
+						u64 dsisr, u64 errstat)
+{
+
+	dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat);
+
+	if (ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers)
+		ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers(ctx);
+
+	if (ctx->afu->adapter->native->sl_ops->debugfs_stop_trace) {
+		dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n");
+		ctx->afu->adapter->native->sl_ops->debugfs_stop_trace(ctx->afu->adapter);
+	}
+
+	return cxl_ops->ack_irq(ctx, 0, errstat);
+}
+
+static bool cxl_is_translation_fault(struct cxl_afu *afu, u64 dsisr)
+{
+	if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_TRANS))
+		return true;
+
+	if ((cxl_is_power9()) && (dsisr & CXL_PSL9_DSISR_An_TF))
+		return true;
+
+	return false;
+}
+
+irqreturn_t cxl_fail_irq_psl(struct cxl_afu *afu, struct cxl_irq_info *irq_info)
+{
+	if (cxl_is_translation_fault(afu, irq_info->dsisr))
+		cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+	else
+		cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t native_irq_multiplexed(int irq, void *data)
+{
+	struct cxl_afu *afu = data;
+	struct cxl_context *ctx;
+	struct cxl_irq_info irq_info;
+	u64 phreg = cxl_p2n_read(afu, CXL_PSL_PEHandle_An);
+	int ph, ret = IRQ_HANDLED, res;
+
+	/* check if eeh kicked in while the interrupt was in flight */
+	if (unlikely(phreg == ~0ULL)) {
+		dev_warn(&afu->dev,
+			 "Ignoring slice interrupt(%d) due to fenced card",
+			 irq);
+		return IRQ_HANDLED;
+	}
+	/* Mask the pe-handle from register value */
+	ph = phreg & 0xffff;
+	if ((res = native_get_irq_info(afu, &irq_info))) {
+		WARN(1, "Unable to get CXL IRQ Info: %i\n", res);
+		if (afu->adapter->native->sl_ops->fail_irq)
+			return afu->adapter->native->sl_ops->fail_irq(afu, &irq_info);
+		return ret;
+	}
+
+	rcu_read_lock();
+	ctx = idr_find(&afu->contexts_idr, ph);
+	if (ctx) {
+		if (afu->adapter->native->sl_ops->handle_interrupt)
+			ret = afu->adapter->native->sl_ops->handle_interrupt(irq, ctx, &irq_info);
+		rcu_read_unlock();
+		return ret;
+	}
+	rcu_read_unlock();
+
+	WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR"
+		" %016llx\n(Possible AFU HW issue - was a term/remove acked"
+		" with outstanding transactions?)\n", ph, irq_info.dsisr,
+		irq_info.dar);
+	if (afu->adapter->native->sl_ops->fail_irq)
+		ret = afu->adapter->native->sl_ops->fail_irq(afu, &irq_info);
+	return ret;
+}
+
+static void native_irq_wait(struct cxl_context *ctx)
+{
+	u64 dsisr;
+	int timeout = 1000;
+	int ph;
+
+	/*
+	 * Wait until no further interrupts are presented by the PSL
+	 * for this context.
+	 */
+	while (timeout--) {
+		ph = cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) & 0xffff;
+		if (ph != ctx->pe)
+			return;
+		dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An);
+		if (cxl_is_power8() &&
+		   ((dsisr & CXL_PSL_DSISR_PENDING) == 0))
+			return;
+		if (cxl_is_power9() &&
+		   ((dsisr & CXL_PSL9_DSISR_PENDING) == 0))
+			return;
+		/*
+		 * We are waiting for the workqueue to process our
+		 * irq, so need to let that run here.
+		 */
+		msleep(1);
+	}
+
+	dev_warn(&ctx->afu->dev, "WARNING: waiting on DSI for PE %i"
+		 " DSISR %016llx!\n", ph, dsisr);
+	return;
+}
+
+static irqreturn_t native_slice_irq_err(int irq, void *data)
+{
+	struct cxl_afu *afu = data;
+	u64 errstat, serr, afu_error, dsisr;
+	u64 fir_slice, afu_debug, irq_mask;
+
+	/*
+	 * slice err interrupt is only used with full PSL (no XSL)
+	 */
+	serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+	errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+	afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An);
+	dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	cxl_afu_decode_psl_serr(afu, serr);
+
+	if (cxl_is_power8()) {
+		fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An);
+		afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An);
+		dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
+		dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
+	}
+	dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat);
+	dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error);
+	dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr);
+
+	/* mask off the IRQ so it won't retrigger until the AFU is reset */
+	irq_mask = (serr & CXL_PSL_SERR_An_IRQS) >> 32;
+	serr |= irq_mask;
+	cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+	dev_info(&afu->dev, "Further such interrupts will be masked until the AFU is reset\n");
+
+	return IRQ_HANDLED;
+}
+
+void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter)
+{
+	u64 fir1;
+
+	fir1 = cxl_p1_read(adapter, CXL_PSL9_FIR1);
+	dev_crit(&adapter->dev, "PSL_FIR: 0x%016llx\n", fir1);
+}
+
+void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter)
+{
+	u64 fir1, fir2;
+
+	fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1);
+	fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2);
+	dev_crit(&adapter->dev,
+		 "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n",
+		 fir1, fir2);
+}
+
+static irqreturn_t native_irq_err(int irq, void *data)
+{
+	struct cxl *adapter = data;
+	u64 err_ivte;
+
+	WARN(1, "CXL ERROR interrupt %i\n", irq);
+
+	err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE);
+	dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte);
+
+	if (adapter->native->sl_ops->debugfs_stop_trace) {
+		dev_crit(&adapter->dev, "STOPPING CXL TRACE\n");
+		adapter->native->sl_ops->debugfs_stop_trace(adapter);
+	}
+
+	if (adapter->native->sl_ops->err_irq_dump_registers)
+		adapter->native->sl_ops->err_irq_dump_registers(adapter);
+
+	return IRQ_HANDLED;
+}
+
+int cxl_native_register_psl_err_irq(struct cxl *adapter)
+{
+	int rc;
+
+	adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+				      dev_name(&adapter->dev));
+	if (!adapter->irq_name)
+		return -ENOMEM;
+
+	if ((rc = cxl_register_one_irq(adapter, native_irq_err, adapter,
+				       &adapter->native->err_hwirq,
+				       &adapter->native->err_virq,
+				       adapter->irq_name))) {
+		kfree(adapter->irq_name);
+		adapter->irq_name = NULL;
+		return rc;
+	}
+
+	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->native->err_hwirq & 0xffff);
+
+	return 0;
+}
+
+void cxl_native_release_psl_err_irq(struct cxl *adapter)
+{
+	if (adapter->native->err_virq == 0 ||
+	    adapter->native->err_virq !=
+	    irq_find_mapping(NULL, adapter->native->err_hwirq))
+		return;
+
+	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
+	cxl_unmap_irq(adapter->native->err_virq, adapter);
+	cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq);
+	kfree(adapter->irq_name);
+	adapter->native->err_virq = 0;
+}
+
+int cxl_native_register_serr_irq(struct cxl_afu *afu)
+{
+	u64 serr;
+	int rc;
+
+	afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err",
+				      dev_name(&afu->dev));
+	if (!afu->err_irq_name)
+		return -ENOMEM;
+
+	if ((rc = cxl_register_one_irq(afu->adapter, native_slice_irq_err, afu,
+				       &afu->serr_hwirq,
+				       &afu->serr_virq, afu->err_irq_name))) {
+		kfree(afu->err_irq_name);
+		afu->err_irq_name = NULL;
+		return rc;
+	}
+
+	serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+	if (cxl_is_power8())
+		serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff);
+	if (cxl_is_power9()) {
+		/*
+		 * By default, all errors are masked. So don't set all masks.
+		 * Slice errors will be transfered.
+		 */
+		serr = (serr & ~0xff0000007fffffffULL) | (afu->serr_hwirq & 0xffff);
+	}
+	cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+
+	return 0;
+}
+
+void cxl_native_release_serr_irq(struct cxl_afu *afu)
+{
+	if (afu->serr_virq == 0 ||
+	    afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
+		return;
+
+	cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
+	cxl_unmap_irq(afu->serr_virq, afu);
+	cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq);
+	kfree(afu->err_irq_name);
+	afu->serr_virq = 0;
+}
+
+int cxl_native_register_psl_irq(struct cxl_afu *afu)
+{
+	int rc;
+
+	afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s",
+				      dev_name(&afu->dev));
+	if (!afu->psl_irq_name)
+		return -ENOMEM;
+
+	if ((rc = cxl_register_one_irq(afu->adapter, native_irq_multiplexed,
+				    afu, &afu->native->psl_hwirq, &afu->native->psl_virq,
+				    afu->psl_irq_name))) {
+		kfree(afu->psl_irq_name);
+		afu->psl_irq_name = NULL;
+	}
+	return rc;
+}
+
+void cxl_native_release_psl_irq(struct cxl_afu *afu)
+{
+	if (afu->native->psl_virq == 0 ||
+	    afu->native->psl_virq !=
+	    irq_find_mapping(NULL, afu->native->psl_hwirq))
+		return;
+
+	cxl_unmap_irq(afu->native->psl_virq, afu);
+	cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq);
+	kfree(afu->psl_irq_name);
+	afu->native->psl_virq = 0;
+}
+
+static void recover_psl_err(struct cxl_afu *afu, u64 errstat)
+{
+	u64 dsisr;
+
+	pr_devel("RECOVERING FROM PSL ERROR... (0x%016llx)\n", errstat);
+
+	/* Clear PSL_DSISR[PE] */
+	dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	cxl_p2n_write(afu, CXL_PSL_DSISR_An, dsisr & ~CXL_PSL_DSISR_An_PE);
+
+	/* Write 1s to clear error status bits */
+	cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat);
+}
+
+static int native_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask)
+{
+	trace_cxl_psl_irq_ack(ctx, tfc);
+	if (tfc)
+		cxl_p2n_write(ctx->afu, CXL_PSL_TFC_An, tfc);
+	if (psl_reset_mask)
+		recover_psl_err(ctx->afu, psl_reset_mask);
+
+	return 0;
+}
+
+int cxl_check_error(struct cxl_afu *afu)
+{
+	return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL);
+}
+
+static bool native_support_attributes(const char *attr_name,
+				      enum cxl_attrs type)
+{
+	return true;
+}
+
+static int native_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off, u64 *out)
+{
+	if (unlikely(!cxl_ops->link_ok(afu->adapter, afu)))
+		return -EIO;
+	if (unlikely(off >= afu->crs_len))
+		return -ERANGE;
+	*out = in_le64(afu->native->afu_desc_mmio + afu->crs_offset +
+		(cr * afu->crs_len) + off);
+	return 0;
+}
+
+static int native_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off, u32 *out)
+{
+	if (unlikely(!cxl_ops->link_ok(afu->adapter, afu)))
+		return -EIO;
+	if (unlikely(off >= afu->crs_len))
+		return -ERANGE;
+	*out = in_le32(afu->native->afu_desc_mmio + afu->crs_offset +
+		(cr * afu->crs_len) + off);
+	return 0;
+}
+
+static int native_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off, u16 *out)
+{
+	u64 aligned_off = off & ~0x3L;
+	u32 val;
+	int rc;
+
+	rc = native_afu_cr_read32(afu, cr, aligned_off, &val);
+	if (!rc)
+		*out = (val >> ((off & 0x3) * 8)) & 0xffff;
+	return rc;
+}
+
+static int native_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off, u8 *out)
+{
+	u64 aligned_off = off & ~0x3L;
+	u32 val;
+	int rc;
+
+	rc = native_afu_cr_read32(afu, cr, aligned_off, &val);
+	if (!rc)
+		*out = (val >> ((off & 0x3) * 8)) & 0xff;
+	return rc;
+}
+
+static int native_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in)
+{
+	if (unlikely(!cxl_ops->link_ok(afu->adapter, afu)))
+		return -EIO;
+	if (unlikely(off >= afu->crs_len))
+		return -ERANGE;
+	out_le32(afu->native->afu_desc_mmio + afu->crs_offset +
+		(cr * afu->crs_len) + off, in);
+	return 0;
+}
+
+static int native_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in)
+{
+	u64 aligned_off = off & ~0x3L;
+	u32 val32, mask, shift;
+	int rc;
+
+	rc = native_afu_cr_read32(afu, cr, aligned_off, &val32);
+	if (rc)
+		return rc;
+	shift = (off & 0x3) * 8;
+	WARN_ON(shift == 24);
+	mask = 0xffff << shift;
+	val32 = (val32 & ~mask) | (in << shift);
+
+	rc = native_afu_cr_write32(afu, cr, aligned_off, val32);
+	return rc;
+}
+
+static int native_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in)
+{
+	u64 aligned_off = off & ~0x3L;
+	u32 val32, mask, shift;
+	int rc;
+
+	rc = native_afu_cr_read32(afu, cr, aligned_off, &val32);
+	if (rc)
+		return rc;
+	shift = (off & 0x3) * 8;
+	mask = 0xff << shift;
+	val32 = (val32 & ~mask) | (in << shift);
+
+	rc = native_afu_cr_write32(afu, cr, aligned_off, val32);
+	return rc;
+}
+
+const struct cxl_backend_ops cxl_native_ops = {
+	.module = THIS_MODULE,
+	.adapter_reset = cxl_pci_reset,
+	.alloc_one_irq = cxl_pci_alloc_one_irq,
+	.release_one_irq = cxl_pci_release_one_irq,
+	.alloc_irq_ranges = cxl_pci_alloc_irq_ranges,
+	.release_irq_ranges = cxl_pci_release_irq_ranges,
+	.setup_irq = cxl_pci_setup_irq,
+	.handle_psl_slice_error = native_handle_psl_slice_error,
+	.psl_interrupt = NULL,
+	.ack_irq = native_ack_irq,
+	.irq_wait = native_irq_wait,
+	.attach_process = native_attach_process,
+	.detach_process = native_detach_process,
+	.update_ivtes = native_update_ivtes,
+	.support_attributes = native_support_attributes,
+	.link_ok = cxl_adapter_link_ok,
+	.release_afu = cxl_pci_release_afu,
+	.afu_read_err_buffer = cxl_pci_afu_read_err_buffer,
+	.afu_check_and_enable = native_afu_check_and_enable,
+	.afu_activate_mode = native_afu_activate_mode,
+	.afu_deactivate_mode = native_afu_deactivate_mode,
+	.afu_reset = native_afu_reset,
+	.afu_cr_read8 = native_afu_cr_read8,
+	.afu_cr_read16 = native_afu_cr_read16,
+	.afu_cr_read32 = native_afu_cr_read32,
+	.afu_cr_read64 = native_afu_cr_read64,
+	.afu_cr_write8 = native_afu_cr_write8,
+	.afu_cr_write16 = native_afu_cr_write16,
+	.afu_cr_write32 = native_afu_cr_write32,
+	.read_adapter_vpd = cxl_pci_read_adapter_vpd,
+};
diff --git a/drivers/misc/cxl/of.c b/drivers/misc/cxl/of.c
new file mode 100644
index 0000000000..25ce725035
--- /dev/null
+++ b/drivers/misc/cxl/of.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2015 IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include "cxl.h"
+
+
+static const __be32 *read_prop_string(const struct device_node *np,
+				const char *prop_name)
+{
+	const __be32 *prop;
+
+	prop = of_get_property(np, prop_name, NULL);
+	if (cxl_verbose && prop)
+		pr_info("%s: %s\n", prop_name, (char *) prop);
+	return prop;
+}
+
+static const __be32 *read_prop_dword(const struct device_node *np,
+				const char *prop_name, u32 *val)
+{
+	const __be32 *prop;
+
+	prop = of_get_property(np, prop_name, NULL);
+	if (prop)
+		*val = be32_to_cpu(prop[0]);
+	if (cxl_verbose && prop)
+		pr_info("%s: %#x (%u)\n", prop_name, *val, *val);
+	return prop;
+}
+
+static const __be64 *read_prop64_dword(const struct device_node *np,
+				const char *prop_name, u64 *val)
+{
+	const __be64 *prop;
+
+	prop = of_get_property(np, prop_name, NULL);
+	if (prop)
+		*val = be64_to_cpu(prop[0]);
+	if (cxl_verbose && prop)
+		pr_info("%s: %#llx (%llu)\n", prop_name, *val, *val);
+	return prop;
+}
+
+
+static int read_handle(struct device_node *np, u64 *handle)
+{
+	const __be32 *prop;
+	u64 size;
+
+	/* Get address and size of the node */
+	prop = of_get_address(np, 0, &size, NULL);
+	if (size)
+		return -EINVAL;
+
+	/* Helper to read a big number; size is in cells (not bytes) */
+	*handle = of_read_number(prop, of_n_addr_cells(np));
+	return 0;
+}
+
+static int read_phys_addr(struct device_node *np, char *prop_name,
+			struct cxl_afu *afu)
+{
+	int i, len, entry_size, naddr, nsize, type;
+	u64 addr, size;
+	const __be32 *prop;
+
+	naddr = of_n_addr_cells(np);
+	nsize = of_n_size_cells(np);
+
+	prop = of_get_property(np, prop_name, &len);
+	if (prop) {
+		entry_size = naddr + nsize;
+		for (i = 0; i < (len / 4); i += entry_size, prop += entry_size) {
+			type = be32_to_cpu(prop[0]);
+			addr = of_read_number(prop, naddr);
+			size = of_read_number(&prop[naddr], nsize);
+			switch (type) {
+			case 0: /* unit address */
+				afu->guest->handle = addr;
+				break;
+			case 1: /* p2 area */
+				afu->guest->p2n_phys += addr;
+				afu->guest->p2n_size = size;
+				break;
+			case 2: /* problem state area */
+				afu->psn_phys += addr;
+				afu->adapter->ps_size = size;
+				break;
+			default:
+				pr_err("Invalid address type %d found in %s property of AFU\n",
+					type, prop_name);
+				return -EINVAL;
+			}
+			if (cxl_verbose)
+				pr_info("%s: %#x %#llx (size %#llx)\n",
+					prop_name, type, addr, size);
+		}
+	}
+	return 0;
+}
+
+static int read_vpd(struct cxl *adapter, struct cxl_afu *afu)
+{
+	char vpd[256];
+	int rc;
+	size_t len = sizeof(vpd);
+
+	memset(vpd, 0, len);
+
+	if (adapter)
+		rc = cxl_guest_read_adapter_vpd(adapter, vpd, len);
+	else
+		rc = cxl_guest_read_afu_vpd(afu, vpd, len);
+
+	if (rc > 0) {
+		cxl_dump_debug_buffer(vpd, rc);
+		rc = 0;
+	}
+	return rc;
+}
+
+int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np)
+{
+	if (read_handle(afu_np, &afu->guest->handle))
+		return -EINVAL;
+	pr_devel("AFU handle: 0x%.16llx\n", afu->guest->handle);
+
+	return 0;
+}
+
+int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *np)
+{
+	int i, len, rc;
+	char *p;
+	const __be32 *prop;
+	u16 device_id, vendor_id;
+	u32 val = 0, class_code;
+
+	/* Properties are read in the same order as listed in PAPR */
+
+	if (cxl_verbose) {
+		pr_info("Dump of the 'ibm,coherent-platform-function' node properties:\n");
+
+		prop = of_get_property(np, "compatible", &len);
+		i = 0;
+		while (i < len) {
+			p = (char *) prop + i;
+			pr_info("compatible: %s\n", p);
+			i += strlen(p) + 1;
+		}
+		read_prop_string(np, "name");
+	}
+
+	rc = read_phys_addr(np, "reg", afu);
+	if (rc)
+		return rc;
+
+	rc = read_phys_addr(np, "assigned-addresses", afu);
+	if (rc)
+		return rc;
+
+	if (afu->psn_phys == 0)
+		afu->psa = false;
+	else
+		afu->psa = true;
+
+	if (cxl_verbose) {
+		read_prop_string(np, "ibm,loc-code");
+		read_prop_string(np, "device_type");
+	}
+
+	read_prop_dword(np, "ibm,#processes", &afu->max_procs_virtualised);
+
+	if (cxl_verbose) {
+		read_prop_dword(np, "ibm,scratchpad-size", &val);
+		read_prop_dword(np, "ibm,programmable", &val);
+		read_prop_string(np, "ibm,phandle");
+		read_vpd(NULL, afu);
+	}
+
+	read_prop_dword(np, "ibm,max-ints-per-process", &afu->guest->max_ints);
+	afu->irqs_max = afu->guest->max_ints;
+
+	prop = read_prop_dword(np, "ibm,min-ints-per-process", &afu->pp_irqs);
+	if (prop) {
+		/* One extra interrupt for the PSL interrupt is already
+		 * included. Remove it now to keep only AFU interrupts and
+		 * match the native case.
+		 */
+		afu->pp_irqs--;
+	}
+
+	if (cxl_verbose) {
+		read_prop_dword(np, "ibm,max-ints", &val);
+		read_prop_dword(np, "ibm,vpd-size", &val);
+	}
+
+	read_prop64_dword(np, "ibm,error-buffer-size", &afu->eb_len);
+	afu->eb_offset = 0;
+
+	if (cxl_verbose)
+		read_prop_dword(np, "ibm,config-record-type", &val);
+
+	read_prop64_dword(np, "ibm,config-record-size", &afu->crs_len);
+	afu->crs_offset = 0;
+
+	read_prop_dword(np, "ibm,#config-records", &afu->crs_num);
+
+	if (cxl_verbose) {
+		for (i = 0; i < afu->crs_num; i++) {
+			rc = cxl_ops->afu_cr_read16(afu, i, PCI_DEVICE_ID,
+						&device_id);
+			if (!rc)
+				pr_info("record %d - device-id: %#x\n",
+					i, device_id);
+			rc = cxl_ops->afu_cr_read16(afu, i, PCI_VENDOR_ID,
+						&vendor_id);
+			if (!rc)
+				pr_info("record %d - vendor-id: %#x\n",
+					i, vendor_id);
+			rc = cxl_ops->afu_cr_read32(afu, i, PCI_CLASS_REVISION,
+						&class_code);
+			if (!rc) {
+				class_code >>= 8;
+				pr_info("record %d - class-code: %#x\n",
+					i, class_code);
+			}
+		}
+
+		read_prop_dword(np, "ibm,function-number", &val);
+		read_prop_dword(np, "ibm,privileged-function", &val);
+		read_prop_dword(np, "vendor-id", &val);
+		read_prop_dword(np, "device-id", &val);
+		read_prop_dword(np, "revision-id", &val);
+		read_prop_dword(np, "class-code", &val);
+		read_prop_dword(np, "subsystem-vendor-id", &val);
+		read_prop_dword(np, "subsystem-id", &val);
+	}
+	/*
+	 * if "ibm,process-mmio" doesn't exist then per-process mmio is
+	 * not supported
+	 */
+	val = 0;
+	prop = read_prop_dword(np, "ibm,process-mmio", &val);
+	if (prop && val == 1)
+		afu->pp_psa = true;
+	else
+		afu->pp_psa = false;
+
+	if (cxl_verbose) {
+		read_prop_dword(np, "ibm,supports-aur", &val);
+		read_prop_dword(np, "ibm,supports-csrp", &val);
+		read_prop_dword(np, "ibm,supports-prr", &val);
+	}
+
+	prop = read_prop_dword(np, "ibm,function-error-interrupt", &val);
+	if (prop)
+		afu->serr_hwirq = val;
+
+	pr_devel("AFU handle: %#llx\n", afu->guest->handle);
+	pr_devel("p2n_phys: %#llx (size %#llx)\n",
+		afu->guest->p2n_phys, afu->guest->p2n_size);
+	pr_devel("psn_phys: %#llx (size %#llx)\n",
+		afu->psn_phys, afu->adapter->ps_size);
+	pr_devel("Max number of processes virtualised=%i\n",
+		afu->max_procs_virtualised);
+	pr_devel("Per-process irqs min=%i, max=%i\n", afu->pp_irqs,
+		 afu->irqs_max);
+	pr_devel("Slice error interrupt=%#lx\n", afu->serr_hwirq);
+
+	return 0;
+}
+
+static int read_adapter_irq_config(struct cxl *adapter, struct device_node *np)
+{
+	const __be32 *ranges;
+	int len, nranges, i;
+	struct irq_avail *cur;
+
+	ranges = of_get_property(np, "interrupt-ranges", &len);
+	if (ranges == NULL || len < (2 * sizeof(int)))
+		return -EINVAL;
+
+	/*
+	 * encoded array of two cells per entry, each cell encoded as
+	 * with encode-int
+	 */
+	nranges = len / (2 * sizeof(int));
+	if (nranges == 0 || (nranges * 2 * sizeof(int)) != len)
+		return -EINVAL;
+
+	adapter->guest->irq_avail = kcalloc(nranges, sizeof(struct irq_avail),
+					    GFP_KERNEL);
+	if (adapter->guest->irq_avail == NULL)
+		return -ENOMEM;
+
+	adapter->guest->irq_base_offset = be32_to_cpu(ranges[0]);
+	for (i = 0; i < nranges; i++) {
+		cur = &adapter->guest->irq_avail[i];
+		cur->offset = be32_to_cpu(ranges[i * 2]);
+		cur->range  = be32_to_cpu(ranges[i * 2 + 1]);
+		cur->bitmap = bitmap_zalloc(cur->range, GFP_KERNEL);
+		if (cur->bitmap == NULL)
+			goto err;
+		if (cur->offset < adapter->guest->irq_base_offset)
+			adapter->guest->irq_base_offset = cur->offset;
+		if (cxl_verbose)
+			pr_info("available IRQ range: %#lx-%#lx (%lu)\n",
+				cur->offset, cur->offset + cur->range - 1,
+				cur->range);
+	}
+	adapter->guest->irq_nranges = nranges;
+	spin_lock_init(&adapter->guest->irq_alloc_lock);
+
+	return 0;
+err:
+	for (i--; i >= 0; i--) {
+		cur = &adapter->guest->irq_avail[i];
+		bitmap_free(cur->bitmap);
+	}
+	kfree(adapter->guest->irq_avail);
+	adapter->guest->irq_avail = NULL;
+	return -ENOMEM;
+}
+
+int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np)
+{
+	if (read_handle(np, &adapter->guest->handle))
+		return -EINVAL;
+	pr_devel("Adapter handle: 0x%.16llx\n", adapter->guest->handle);
+
+	return 0;
+}
+
+int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np)
+{
+	int rc, len, naddr, i;
+	char *p;
+	const __be32 *prop;
+	u32 val = 0;
+
+	/* Properties are read in the same order as listed in PAPR */
+
+	naddr = of_n_addr_cells(np);
+
+	if (cxl_verbose) {
+		pr_info("Dump of the 'ibm,coherent-platform-facility' node properties:\n");
+
+		read_prop_dword(np, "#address-cells", &val);
+		read_prop_dword(np, "#size-cells", &val);
+
+		prop = of_get_property(np, "compatible", &len);
+		i = 0;
+		while (i < len) {
+			p = (char *) prop + i;
+			pr_info("compatible: %s\n", p);
+			i += strlen(p) + 1;
+		}
+		read_prop_string(np, "name");
+		read_prop_string(np, "model");
+
+		prop = of_get_property(np, "reg", NULL);
+		if (prop) {
+			pr_info("reg: addr:%#llx size:%#x\n",
+				of_read_number(prop, naddr),
+				be32_to_cpu(prop[naddr]));
+		}
+
+		read_prop_string(np, "ibm,loc-code");
+	}
+
+	if ((rc = read_adapter_irq_config(adapter, np)))
+		return rc;
+
+	if (cxl_verbose) {
+		read_prop_string(np, "device_type");
+		read_prop_string(np, "ibm,phandle");
+	}
+
+	prop = read_prop_dword(np, "ibm,caia-version", &val);
+	if (prop) {
+		adapter->caia_major = (val & 0xFF00) >> 8;
+		adapter->caia_minor = val & 0xFF;
+	}
+
+	prop = read_prop_dword(np, "ibm,psl-revision", &val);
+	if (prop)
+		adapter->psl_rev = val;
+
+	prop = read_prop_string(np, "status");
+	if (prop) {
+		adapter->guest->status = kasprintf(GFP_KERNEL, "%s", (char *) prop);
+		if (adapter->guest->status == NULL)
+			return -ENOMEM;
+	}
+
+	prop = read_prop_dword(np, "vendor-id", &val);
+	if (prop)
+		adapter->guest->vendor = val;
+
+	prop = read_prop_dword(np, "device-id", &val);
+	if (prop)
+		adapter->guest->device = val;
+
+	if (cxl_verbose) {
+		read_prop_dword(np, "ibm,privileged-facility", &val);
+		read_prop_dword(np, "revision-id", &val);
+		read_prop_dword(np, "class-code", &val);
+	}
+
+	prop = read_prop_dword(np, "subsystem-vendor-id", &val);
+	if (prop)
+		adapter->guest->subsystem_vendor = val;
+
+	prop = read_prop_dword(np, "subsystem-id", &val);
+	if (prop)
+		adapter->guest->subsystem = val;
+
+	if (cxl_verbose)
+		read_vpd(adapter, NULL);
+
+	return 0;
+}
+
+static int cxl_of_remove(struct platform_device *pdev)
+{
+	struct cxl *adapter;
+	int afu;
+
+	adapter = dev_get_drvdata(&pdev->dev);
+	for (afu = 0; afu < adapter->slices; afu++)
+		cxl_guest_remove_afu(adapter->afu[afu]);
+
+	cxl_guest_remove_adapter(adapter);
+	return 0;
+}
+
+static void cxl_of_shutdown(struct platform_device *pdev)
+{
+	cxl_of_remove(pdev);
+}
+
+int cxl_of_probe(struct platform_device *pdev)
+{
+	struct device_node *np = NULL;
+	struct device_node *afu_np = NULL;
+	struct cxl *adapter = NULL;
+	int ret;
+	int slice = 0, slice_ok = 0;
+
+	pr_devel("in %s\n", __func__);
+
+	np = pdev->dev.of_node;
+	if (np == NULL)
+		return -ENODEV;
+
+	/* init adapter */
+	adapter = cxl_guest_init_adapter(np, pdev);
+	if (IS_ERR(adapter)) {
+		dev_err(&pdev->dev, "guest_init_adapter failed: %li\n", PTR_ERR(adapter));
+		return PTR_ERR(adapter);
+	}
+
+	/* init afu */
+	for_each_child_of_node(np, afu_np) {
+		if ((ret = cxl_guest_init_afu(adapter, slice, afu_np)))
+			dev_err(&pdev->dev, "AFU %i failed to initialise: %i\n",
+				slice, ret);
+		else
+			slice_ok++;
+		slice++;
+	}
+
+	if (slice_ok == 0) {
+		dev_info(&pdev->dev, "No active AFU");
+		adapter->slices = 0;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id cxl_of_match[] = {
+	{ .compatible = "ibm,coherent-platform-facility",},
+	{},
+};
+MODULE_DEVICE_TABLE(of, cxl_of_match);
+
+struct platform_driver cxl_of_driver = {
+	.driver = {
+		.name = "cxl_of",
+		.of_match_table = cxl_of_match,
+		.owner = THIS_MODULE
+	},
+	.probe = cxl_of_probe,
+	.remove = cxl_of_remove,
+	.shutdown = cxl_of_shutdown,
+};
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
new file mode 100644
index 0000000000..4cf9e7c42a
--- /dev/null
+++ b/drivers/misc/cxl/pci.c
@@ -0,0 +1,2107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/pci_regs.h>
+#include <linux/pci_ids.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/delay.h>
+#include <asm/opal.h>
+#include <asm/msi_bitmap.h>
+#include <asm/pnv-pci.h>
+#include <asm/io.h>
+#include <asm/reg.h>
+
+#include "cxl.h"
+#include <misc/cxl.h>
+
+
+#define CXL_PCI_VSEC_ID	0x1280
+#define CXL_VSEC_MIN_SIZE 0x80
+
+#define CXL_READ_VSEC_LENGTH(dev, vsec, dest)			\
+	{							\
+		pci_read_config_word(dev, vsec + 0x6, dest);	\
+		*dest >>= 4;					\
+	}
+#define CXL_READ_VSEC_NAFUS(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0x8, dest)
+
+#define CXL_READ_VSEC_STATUS(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0x9, dest)
+#define CXL_STATUS_SECOND_PORT  0x80
+#define CXL_STATUS_MSI_X_FULL   0x40
+#define CXL_STATUS_MSI_X_SINGLE 0x20
+#define CXL_STATUS_FLASH_RW     0x08
+#define CXL_STATUS_FLASH_RO     0x04
+#define CXL_STATUS_LOADABLE_AFU 0x02
+#define CXL_STATUS_LOADABLE_PSL 0x01
+/* If we see these features we won't try to use the card */
+#define CXL_UNSUPPORTED_FEATURES \
+	(CXL_STATUS_MSI_X_FULL | CXL_STATUS_MSI_X_SINGLE)
+
+#define CXL_READ_VSEC_MODE_CONTROL(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0xa, dest)
+#define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \
+	pci_write_config_byte(dev, vsec + 0xa, val)
+#define CXL_VSEC_PROTOCOL_MASK   0xe0
+#define CXL_VSEC_PROTOCOL_1024TB 0x80
+#define CXL_VSEC_PROTOCOL_512TB  0x40
+#define CXL_VSEC_PROTOCOL_256TB  0x20 /* Power 8/9 uses this */
+#define CXL_VSEC_PROTOCOL_ENABLE 0x01
+
+#define CXL_READ_VSEC_PSL_REVISION(dev, vsec, dest) \
+	pci_read_config_word(dev, vsec + 0xc, dest)
+#define CXL_READ_VSEC_CAIA_MINOR(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0xe, dest)
+#define CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0xf, dest)
+#define CXL_READ_VSEC_BASE_IMAGE(dev, vsec, dest) \
+	pci_read_config_word(dev, vsec + 0x10, dest)
+
+#define CXL_READ_VSEC_IMAGE_STATE(dev, vsec, dest) \
+	pci_read_config_byte(dev, vsec + 0x13, dest)
+#define CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, val) \
+	pci_write_config_byte(dev, vsec + 0x13, val)
+#define CXL_VSEC_USER_IMAGE_LOADED 0x80 /* RO */
+#define CXL_VSEC_PERST_LOADS_IMAGE 0x20 /* RW */
+#define CXL_VSEC_PERST_SELECT_USER 0x10 /* RW */
+
+#define CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, dest) \
+	pci_read_config_dword(dev, vsec + 0x20, dest)
+#define CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, dest) \
+	pci_read_config_dword(dev, vsec + 0x24, dest)
+#define CXL_READ_VSEC_PS_OFF(dev, vsec, dest) \
+	pci_read_config_dword(dev, vsec + 0x28, dest)
+#define CXL_READ_VSEC_PS_SIZE(dev, vsec, dest) \
+	pci_read_config_dword(dev, vsec + 0x2c, dest)
+
+
+/* This works a little different than the p1/p2 register accesses to make it
+ * easier to pull out individual fields */
+#define AFUD_READ(afu, off)		in_be64(afu->native->afu_desc_mmio + off)
+#define AFUD_READ_LE(afu, off)		in_le64(afu->native->afu_desc_mmio + off)
+#define EXTRACT_PPC_BIT(val, bit)	(!!(val & PPC_BIT(bit)))
+#define EXTRACT_PPC_BITS(val, bs, be)	((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be))
+
+#define AFUD_READ_INFO(afu)		AFUD_READ(afu, 0x0)
+#define   AFUD_NUM_INTS_PER_PROC(val)	EXTRACT_PPC_BITS(val,  0, 15)
+#define   AFUD_NUM_PROCS(val)		EXTRACT_PPC_BITS(val, 16, 31)
+#define   AFUD_NUM_CRS(val)		EXTRACT_PPC_BITS(val, 32, 47)
+#define   AFUD_MULTIMODE(val)		EXTRACT_PPC_BIT(val, 48)
+#define   AFUD_PUSH_BLOCK_TRANSFER(val)	EXTRACT_PPC_BIT(val, 55)
+#define   AFUD_DEDICATED_PROCESS(val)	EXTRACT_PPC_BIT(val, 59)
+#define   AFUD_AFU_DIRECTED(val)	EXTRACT_PPC_BIT(val, 61)
+#define   AFUD_TIME_SLICED(val)		EXTRACT_PPC_BIT(val, 63)
+#define AFUD_READ_CR(afu)		AFUD_READ(afu, 0x20)
+#define   AFUD_CR_LEN(val)		EXTRACT_PPC_BITS(val, 8, 63)
+#define AFUD_READ_CR_OFF(afu)		AFUD_READ(afu, 0x28)
+#define AFUD_READ_PPPSA(afu)		AFUD_READ(afu, 0x30)
+#define   AFUD_PPPSA_PP(val)		EXTRACT_PPC_BIT(val, 6)
+#define   AFUD_PPPSA_PSA(val)		EXTRACT_PPC_BIT(val, 7)
+#define   AFUD_PPPSA_LEN(val)		EXTRACT_PPC_BITS(val, 8, 63)
+#define AFUD_READ_PPPSA_OFF(afu)	AFUD_READ(afu, 0x38)
+#define AFUD_READ_EB(afu)		AFUD_READ(afu, 0x40)
+#define   AFUD_EB_LEN(val)		EXTRACT_PPC_BITS(val, 8, 63)
+#define AFUD_READ_EB_OFF(afu)		AFUD_READ(afu, 0x48)
+
+static const struct pci_device_id cxl_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0623), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0628), },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, cxl_pci_tbl);
+
+
+/*
+ * Mostly using these wrappers to avoid confusion:
+ * priv 1 is BAR2, while priv 2 is BAR0
+ */
+static inline resource_size_t p1_base(struct pci_dev *dev)
+{
+	return pci_resource_start(dev, 2);
+}
+
+static inline resource_size_t p1_size(struct pci_dev *dev)
+{
+	return pci_resource_len(dev, 2);
+}
+
+static inline resource_size_t p2_base(struct pci_dev *dev)
+{
+	return pci_resource_start(dev, 0);
+}
+
+static inline resource_size_t p2_size(struct pci_dev *dev)
+{
+	return pci_resource_len(dev, 0);
+}
+
+static int find_cxl_vsec(struct pci_dev *dev)
+{
+	return pci_find_vsec_capability(dev, PCI_VENDOR_ID_IBM, CXL_PCI_VSEC_ID);
+}
+
+static void dump_cxl_config_space(struct pci_dev *dev)
+{
+	int vsec;
+	u32 val;
+
+	dev_info(&dev->dev, "dump_cxl_config_space\n");
+
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &val);
+	dev_info(&dev->dev, "BAR0: %#.8x\n", val);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &val);
+	dev_info(&dev->dev, "BAR1: %#.8x\n", val);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_2, &val);
+	dev_info(&dev->dev, "BAR2: %#.8x\n", val);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_3, &val);
+	dev_info(&dev->dev, "BAR3: %#.8x\n", val);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_4, &val);
+	dev_info(&dev->dev, "BAR4: %#.8x\n", val);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_5, &val);
+	dev_info(&dev->dev, "BAR5: %#.8x\n", val);
+
+	dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n",
+		p1_base(dev), p1_size(dev));
+	dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n",
+		p2_base(dev), p2_size(dev));
+	dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n",
+		pci_resource_start(dev, 4), pci_resource_len(dev, 4));
+
+	if (!(vsec = find_cxl_vsec(dev)))
+		return;
+
+#define show_reg(name, what) \
+	dev_info(&dev->dev, "cxl vsec: %30s: %#x\n", name, what)
+
+	pci_read_config_dword(dev, vsec + 0x0, &val);
+	show_reg("Cap ID", (val >> 0) & 0xffff);
+	show_reg("Cap Ver", (val >> 16) & 0xf);
+	show_reg("Next Cap Ptr", (val >> 20) & 0xfff);
+	pci_read_config_dword(dev, vsec + 0x4, &val);
+	show_reg("VSEC ID", (val >> 0) & 0xffff);
+	show_reg("VSEC Rev", (val >> 16) & 0xf);
+	show_reg("VSEC Length",	(val >> 20) & 0xfff);
+	pci_read_config_dword(dev, vsec + 0x8, &val);
+	show_reg("Num AFUs", (val >> 0) & 0xff);
+	show_reg("Status", (val >> 8) & 0xff);
+	show_reg("Mode Control", (val >> 16) & 0xff);
+	show_reg("Reserved", (val >> 24) & 0xff);
+	pci_read_config_dword(dev, vsec + 0xc, &val);
+	show_reg("PSL Rev", (val >> 0) & 0xffff);
+	show_reg("CAIA Ver", (val >> 16) & 0xffff);
+	pci_read_config_dword(dev, vsec + 0x10, &val);
+	show_reg("Base Image Rev", (val >> 0) & 0xffff);
+	show_reg("Reserved", (val >> 16) & 0x0fff);
+	show_reg("Image Control", (val >> 28) & 0x3);
+	show_reg("Reserved", (val >> 30) & 0x1);
+	show_reg("Image Loaded", (val >> 31) & 0x1);
+
+	pci_read_config_dword(dev, vsec + 0x14, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x18, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x1c, &val);
+	show_reg("Reserved", val);
+
+	pci_read_config_dword(dev, vsec + 0x20, &val);
+	show_reg("AFU Descriptor Offset", val);
+	pci_read_config_dword(dev, vsec + 0x24, &val);
+	show_reg("AFU Descriptor Size", val);
+	pci_read_config_dword(dev, vsec + 0x28, &val);
+	show_reg("Problem State Offset", val);
+	pci_read_config_dword(dev, vsec + 0x2c, &val);
+	show_reg("Problem State Size", val);
+
+	pci_read_config_dword(dev, vsec + 0x30, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x34, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x38, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x3c, &val);
+	show_reg("Reserved", val);
+
+	pci_read_config_dword(dev, vsec + 0x40, &val);
+	show_reg("PSL Programming Port", val);
+	pci_read_config_dword(dev, vsec + 0x44, &val);
+	show_reg("PSL Programming Control", val);
+
+	pci_read_config_dword(dev, vsec + 0x48, &val);
+	show_reg("Reserved", val);
+	pci_read_config_dword(dev, vsec + 0x4c, &val);
+	show_reg("Reserved", val);
+
+	pci_read_config_dword(dev, vsec + 0x50, &val);
+	show_reg("Flash Address Register", val);
+	pci_read_config_dword(dev, vsec + 0x54, &val);
+	show_reg("Flash Size Register", val);
+	pci_read_config_dword(dev, vsec + 0x58, &val);
+	show_reg("Flash Status/Control Register", val);
+	pci_read_config_dword(dev, vsec + 0x58, &val);
+	show_reg("Flash Data Port", val);
+
+#undef show_reg
+}
+
+static void dump_afu_descriptor(struct cxl_afu *afu)
+{
+	u64 val, afu_cr_num, afu_cr_off, afu_cr_len;
+	int i;
+
+#define show_reg(name, what) \
+	dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what)
+
+	val = AFUD_READ_INFO(afu);
+	show_reg("num_ints_per_process", AFUD_NUM_INTS_PER_PROC(val));
+	show_reg("num_of_processes", AFUD_NUM_PROCS(val));
+	show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val));
+	show_reg("req_prog_mode", val & 0xffffULL);
+	afu_cr_num = AFUD_NUM_CRS(val);
+
+	val = AFUD_READ(afu, 0x8);
+	show_reg("Reserved", val);
+	val = AFUD_READ(afu, 0x10);
+	show_reg("Reserved", val);
+	val = AFUD_READ(afu, 0x18);
+	show_reg("Reserved", val);
+
+	val = AFUD_READ_CR(afu);
+	show_reg("Reserved", (val >> (63-7)) & 0xff);
+	show_reg("AFU_CR_len", AFUD_CR_LEN(val));
+	afu_cr_len = AFUD_CR_LEN(val) * 256;
+
+	val = AFUD_READ_CR_OFF(afu);
+	afu_cr_off = val;
+	show_reg("AFU_CR_offset", val);
+
+	val = AFUD_READ_PPPSA(afu);
+	show_reg("PerProcessPSA_control", (val >> (63-7)) & 0xff);
+	show_reg("PerProcessPSA Length", AFUD_PPPSA_LEN(val));
+
+	val = AFUD_READ_PPPSA_OFF(afu);
+	show_reg("PerProcessPSA_offset", val);
+
+	val = AFUD_READ_EB(afu);
+	show_reg("Reserved", (val >> (63-7)) & 0xff);
+	show_reg("AFU_EB_len", AFUD_EB_LEN(val));
+
+	val = AFUD_READ_EB_OFF(afu);
+	show_reg("AFU_EB_offset", val);
+
+	for (i = 0; i < afu_cr_num; i++) {
+		val = AFUD_READ_LE(afu, afu_cr_off + i * afu_cr_len);
+		show_reg("CR Vendor", val & 0xffff);
+		show_reg("CR Device", (val >> 16) & 0xffff);
+	}
+#undef show_reg
+}
+
+#define P8_CAPP_UNIT0_ID 0xBA
+#define P8_CAPP_UNIT1_ID 0XBE
+#define P9_CAPP_UNIT0_ID 0xC0
+#define P9_CAPP_UNIT1_ID 0xE0
+
+static int get_phb_index(struct device_node *np, u32 *phb_index)
+{
+	if (of_property_read_u32(np, "ibm,phb-index", phb_index))
+		return -ENODEV;
+	return 0;
+}
+
+static u64 get_capp_unit_id(struct device_node *np, u32 phb_index)
+{
+	/*
+	 * POWER 8:
+	 *  - For chips other than POWER8NVL, we only have CAPP 0,
+	 *    irrespective of which PHB is used.
+	 *  - For POWER8NVL, assume CAPP 0 is attached to PHB0 and
+	 *    CAPP 1 is attached to PHB1.
+	 */
+	if (cxl_is_power8()) {
+		if (!pvr_version_is(PVR_POWER8NVL))
+			return P8_CAPP_UNIT0_ID;
+
+		if (phb_index == 0)
+			return P8_CAPP_UNIT0_ID;
+
+		if (phb_index == 1)
+			return P8_CAPP_UNIT1_ID;
+	}
+
+	/*
+	 * POWER 9:
+	 *   PEC0 (PHB0). Capp ID = CAPP0 (0b1100_0000)
+	 *   PEC1 (PHB1 - PHB2). No capi mode
+	 *   PEC2 (PHB3 - PHB4 - PHB5): Capi mode on PHB3 only. Capp ID = CAPP1 (0b1110_0000)
+	 */
+	if (cxl_is_power9()) {
+		if (phb_index == 0)
+			return P9_CAPP_UNIT0_ID;
+
+		if (phb_index == 3)
+			return P9_CAPP_UNIT1_ID;
+	}
+
+	return 0;
+}
+
+int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
+			     u32 *phb_index, u64 *capp_unit_id)
+{
+	int rc;
+	struct device_node *np;
+	const __be32 *prop;
+
+	if (!(np = pnv_pci_get_phb_node(dev)))
+		return -ENODEV;
+
+	while (np && !(prop = of_get_property(np, "ibm,chip-id", NULL)))
+		np = of_get_next_parent(np);
+	if (!np)
+		return -ENODEV;
+
+	*chipid = be32_to_cpup(prop);
+
+	rc = get_phb_index(np, phb_index);
+	if (rc) {
+		pr_err("cxl: invalid phb index\n");
+		of_node_put(np);
+		return rc;
+	}
+
+	*capp_unit_id = get_capp_unit_id(np, *phb_index);
+	of_node_put(np);
+	if (!*capp_unit_id) {
+		pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n",
+		       *chipid, *phb_index);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64 *capiind, u64 *asnind,
+			       u64 *nbwind)
+{
+	static u64 nbw, asn, capi = 0;
+	struct device_node *np;
+	const __be32 *prop;
+
+	mutex_lock(&indications_mutex);
+	if (!capi) {
+		if (!(np = pnv_pci_get_phb_node(dev))) {
+			mutex_unlock(&indications_mutex);
+			return -ENODEV;
+		}
+
+		prop = of_get_property(np, "ibm,phb-indications", NULL);
+		if (!prop) {
+			nbw = 0x0300UL; /* legacy values */
+			asn = 0x0400UL;
+			capi = 0x0200UL;
+		} else {
+			nbw = (u64)be32_to_cpu(prop[2]);
+			asn = (u64)be32_to_cpu(prop[1]);
+			capi = (u64)be32_to_cpu(prop[0]);
+		}
+		of_node_put(np);
+	}
+	*capiind = capi;
+	*asnind = asn;
+	*nbwind = nbw;
+	mutex_unlock(&indications_mutex);
+	return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
+{
+	u64 xsl_dsnctl;
+	u64 capiind, asnind, nbwind;
+
+	/*
+	 * CAPI Identifier bits [0:7]
+	 * bit 61:60 MSI bits --> 0
+	 * bit 59 TVT selector --> 0
+	 */
+	if (get_phb_indications(dev, &capiind, &asnind, &nbwind))
+		return -ENODEV;
+
+	/*
+	 * Tell XSL where to route data to.
+	 * The field chipid should match the PHB CAPI_CMPM register
+	 */
+	xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
+	xsl_dsnctl |= (capp_unit_id << (63-15));
+
+	/* nMMU_ID Defaults to: b’000001001’*/
+	xsl_dsnctl |= ((u64)0x09 << (63-28));
+
+	/*
+	 * Used to identify CAPI packets which should be sorted into
+	 * the Non-Blocking queues by the PHB. This field should match
+	 * the PHB PBL_NBW_CMPM register
+	 * nbwind=0x03, bits [57:58], must include capi indicator.
+	 * Not supported on P9 DD1.
+	 */
+	xsl_dsnctl |= (nbwind << (63-55));
+
+	/*
+	 * Upper 16b address bits of ASB_Notify messages sent to the
+	 * system. Need to match the PHB’s ASN Compare/Mask Register.
+	 * Not supported on P9 DD1.
+	 */
+	xsl_dsnctl |= asnind;
+
+	*reg = xsl_dsnctl;
+	return 0;
+}
+
+static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
+						 struct pci_dev *dev)
+{
+	u64 xsl_dsnctl, psl_fircntl;
+	u64 chipid;
+	u32 phb_index;
+	u64 capp_unit_id;
+	u64 psl_debug;
+	int rc;
+
+	rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
+	if (rc)
+		return rc;
+
+	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &xsl_dsnctl);
+	if (rc)
+		return rc;
+
+	cxl_p1_write(adapter, CXL_XSL9_DSNCTL, xsl_dsnctl);
+
+	/* Set fir_cntl to recommended value for production env */
+	psl_fircntl = (0x2ULL << (63-3)); /* ce_report */
+	psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */
+	psl_fircntl |= 0x1ULL; /* ce_thresh */
+	cxl_p1_write(adapter, CXL_PSL9_FIR_CNTL, psl_fircntl);
+
+	/* Setup the PSL to transmit packets on the PCIe before the
+	 * CAPP is enabled. Make sure that CAPP virtual machines are disabled
+	 */
+	cxl_p1_write(adapter, CXL_PSL9_DSNDCTL, 0x0001001000012A10ULL);
+
+	/*
+	 * A response to an ASB_Notify request is returned by the
+	 * system as an MMIO write to the address defined in
+	 * the PSL_TNR_ADDR register.
+	 * keep the Reset Value: 0x00020000E0000000
+	 */
+
+	/* Enable XSL rty limit */
+	cxl_p1_write(adapter, CXL_XSL9_DEF, 0x51F8000000000005ULL);
+
+	/* Change XSL_INV dummy read threshold */
+	cxl_p1_write(adapter, CXL_XSL9_INV, 0x0000040007FFC200ULL);
+
+	if (phb_index == 3) {
+		/* disable machines 31-47 and 20-27 for DMA */
+		cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000FF3FFFF0000ULL);
+	}
+
+	/* Snoop machines */
+	cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL);
+
+	/* Enable NORST and DD2 features */
+	cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL);
+
+	/*
+	 * Check if PSL has data-cache. We need to flush adapter datacache
+	 * when as its about to be removed.
+	 */
+	psl_debug = cxl_p1_read(adapter, CXL_PSL9_DEBUG);
+	if (psl_debug & CXL_PSL_DEBUG_CDC) {
+		dev_dbg(&dev->dev, "No data-cache present\n");
+		adapter->native->no_data_cache = true;
+	}
+
+	return 0;
+}
+
+static int init_implementation_adapter_regs_psl8(struct cxl *adapter, struct pci_dev *dev)
+{
+	u64 psl_dsnctl, psl_fircntl;
+	u64 chipid;
+	u32 phb_index;
+	u64 capp_unit_id;
+	int rc;
+
+	rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
+	if (rc)
+		return rc;
+
+	psl_dsnctl = 0x0000900000000000ULL; /* pteupd ttype, scdone */
+	psl_dsnctl |= (0x2ULL << (63-38)); /* MMIO hang pulse: 256 us */
+	/* Tell PSL where to route data to */
+	psl_dsnctl |= (chipid << (63-5));
+	psl_dsnctl |= (capp_unit_id << (63-13));
+
+	cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl);
+	cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL);
+	/* snoop write mask */
+	cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL);
+	/* set fir_cntl to recommended value for production env */
+	psl_fircntl = (0x2ULL << (63-3)); /* ce_report */
+	psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */
+	psl_fircntl |= 0x1ULL; /* ce_thresh */
+	cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl);
+	/* for debugging with trace arrays */
+	cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL);
+
+	return 0;
+}
+
+/* PSL */
+#define TBSYNC_CAL(n) (((u64)n & 0x7) << (63-3))
+#define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6))
+/* For the PSL this is a multiple for 0 < n <= 7: */
+#define PSL_2048_250MHZ_CYCLES 1
+
+static void write_timebase_ctrl_psl8(struct cxl *adapter)
+{
+	cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
+		     TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES));
+}
+
+static u64 timebase_read_psl9(struct cxl *adapter)
+{
+	return cxl_p1_read(adapter, CXL_PSL9_Timebase);
+}
+
+static u64 timebase_read_psl8(struct cxl *adapter)
+{
+	return cxl_p1_read(adapter, CXL_PSL_Timebase);
+}
+
+static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+{
+	struct device_node *np;
+
+	adapter->psl_timebase_synced = false;
+
+	if (!(np = pnv_pci_get_phb_node(dev)))
+		return;
+
+	/* Do not fail when CAPP timebase sync is not supported by OPAL */
+	of_node_get(np);
+	if (! of_get_property(np, "ibm,capp-timebase-sync", NULL)) {
+		of_node_put(np);
+		dev_info(&dev->dev, "PSL timebase inactive: OPAL support missing\n");
+		return;
+	}
+	of_node_put(np);
+
+	/*
+	 * Setup PSL Timebase Control and Status register
+	 * with the recommended Timebase Sync Count value
+	 */
+	if (adapter->native->sl_ops->write_timebase_ctrl)
+		adapter->native->sl_ops->write_timebase_ctrl(adapter);
+
+	/* Enable PSL Timebase */
+	cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000);
+	cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
+
+	return;
+}
+
+static int init_implementation_afu_regs_psl9(struct cxl_afu *afu)
+{
+	return 0;
+}
+
+static int init_implementation_afu_regs_psl8(struct cxl_afu *afu)
+{
+	/* read/write masks for this slice */
+	cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL);
+	/* APC read/write masks for this slice */
+	cxl_p1n_write(afu, CXL_PSL_COALLOC_A, 0xFF000000FEFEFEFEULL);
+	/* for debugging with trace arrays */
+	cxl_p1n_write(afu, CXL_PSL_SLICE_TRACE, 0x0000FFFF00000000ULL);
+	cxl_p1n_write(afu, CXL_PSL_RXCTL_A, CXL_PSL_RXCTL_AFUHP_4S);
+
+	return 0;
+}
+
+int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq,
+		unsigned int virq)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	return pnv_cxl_ioda_msi_setup(dev, hwirq, virq);
+}
+
+int cxl_update_image_control(struct cxl *adapter)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+	int rc;
+	int vsec;
+	u8 image_state;
+
+	if (!(vsec = find_cxl_vsec(dev))) {
+		dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
+		return -ENODEV;
+	}
+
+	if ((rc = CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state))) {
+		dev_err(&dev->dev, "failed to read image state: %i\n", rc);
+		return rc;
+	}
+
+	if (adapter->perst_loads_image)
+		image_state |= CXL_VSEC_PERST_LOADS_IMAGE;
+	else
+		image_state &= ~CXL_VSEC_PERST_LOADS_IMAGE;
+
+	if (adapter->perst_select_user)
+		image_state |= CXL_VSEC_PERST_SELECT_USER;
+	else
+		image_state &= ~CXL_VSEC_PERST_SELECT_USER;
+
+	if ((rc = CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, image_state))) {
+		dev_err(&dev->dev, "failed to update image control: %i\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+int cxl_pci_alloc_one_irq(struct cxl *adapter)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	return pnv_cxl_alloc_hwirqs(dev, 1);
+}
+
+void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	return pnv_cxl_release_hwirqs(dev, hwirq, 1);
+}
+
+int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs,
+			struct cxl *adapter, unsigned int num)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num);
+}
+
+void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs,
+				struct cxl *adapter)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	pnv_cxl_release_hwirq_ranges(irqs, dev);
+}
+
+static int setup_cxl_bars(struct pci_dev *dev)
+{
+	/* Safety check in case we get backported to < 3.17 without M64 */
+	if ((p1_base(dev) < 0x100000000ULL) ||
+	    (p2_base(dev) < 0x100000000ULL)) {
+		dev_err(&dev->dev, "ABORTING: M32 BAR assignment incompatible with CXL\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * BAR 4/5 has a special meaning for CXL and must be programmed with a
+	 * special value corresponding to the CXL protocol address range.
+	 * For POWER 8/9 that means bits 48:49 must be set to 10
+	 */
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_4, 0x00000000);
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, 0x00020000);
+
+	return 0;
+}
+
+/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */
+static int switch_card_to_cxl(struct pci_dev *dev)
+{
+	int vsec;
+	u8 val;
+	int rc;
+
+	dev_info(&dev->dev, "switch card to CXL\n");
+
+	if (!(vsec = find_cxl_vsec(dev))) {
+		dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
+		return -ENODEV;
+	}
+
+	if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) {
+		dev_err(&dev->dev, "failed to read current mode control: %i", rc);
+		return rc;
+	}
+	val &= ~CXL_VSEC_PROTOCOL_MASK;
+	val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE;
+	if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) {
+		dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc);
+		return rc;
+	}
+	/*
+	 * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states
+	 * we must wait 100ms after this mode switch before touching
+	 * PCIe config space.
+	 */
+	msleep(100);
+
+	return 0;
+}
+
+static int pci_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
+{
+	u64 p1n_base, p2n_base, afu_desc;
+	const u64 p1n_size = 0x100;
+	const u64 p2n_size = 0x1000;
+
+	p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size);
+	p2n_base = p2_base(dev) + (afu->slice * p2n_size);
+	afu->psn_phys = p2_base(dev) + (adapter->native->ps_off + (afu->slice * adapter->ps_size));
+	afu_desc = p2_base(dev) + adapter->native->afu_desc_off + (afu->slice * adapter->native->afu_desc_size);
+
+	if (!(afu->native->p1n_mmio = ioremap(p1n_base, p1n_size)))
+		goto err;
+	if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size)))
+		goto err1;
+	if (afu_desc) {
+		if (!(afu->native->afu_desc_mmio = ioremap(afu_desc, adapter->native->afu_desc_size)))
+			goto err2;
+	}
+
+	return 0;
+err2:
+	iounmap(afu->p2n_mmio);
+err1:
+	iounmap(afu->native->p1n_mmio);
+err:
+	dev_err(&afu->dev, "Error mapping AFU MMIO regions\n");
+	return -ENOMEM;
+}
+
+static void pci_unmap_slice_regs(struct cxl_afu *afu)
+{
+	if (afu->p2n_mmio) {
+		iounmap(afu->p2n_mmio);
+		afu->p2n_mmio = NULL;
+	}
+	if (afu->native->p1n_mmio) {
+		iounmap(afu->native->p1n_mmio);
+		afu->native->p1n_mmio = NULL;
+	}
+	if (afu->native->afu_desc_mmio) {
+		iounmap(afu->native->afu_desc_mmio);
+		afu->native->afu_desc_mmio = NULL;
+	}
+}
+
+void cxl_pci_release_afu(struct device *dev)
+{
+	struct cxl_afu *afu = to_cxl_afu(dev);
+
+	pr_devel("%s\n", __func__);
+
+	idr_destroy(&afu->contexts_idr);
+	cxl_release_spa(afu);
+
+	kfree(afu->native);
+	kfree(afu);
+}
+
+/* Expects AFU struct to have recently been zeroed out */
+static int cxl_read_afu_descriptor(struct cxl_afu *afu)
+{
+	u64 val;
+
+	val = AFUD_READ_INFO(afu);
+	afu->pp_irqs = AFUD_NUM_INTS_PER_PROC(val);
+	afu->max_procs_virtualised = AFUD_NUM_PROCS(val);
+	afu->crs_num = AFUD_NUM_CRS(val);
+
+	if (AFUD_AFU_DIRECTED(val))
+		afu->modes_supported |= CXL_MODE_DIRECTED;
+	if (AFUD_DEDICATED_PROCESS(val))
+		afu->modes_supported |= CXL_MODE_DEDICATED;
+	if (AFUD_TIME_SLICED(val))
+		afu->modes_supported |= CXL_MODE_TIME_SLICED;
+
+	val = AFUD_READ_PPPSA(afu);
+	afu->pp_size = AFUD_PPPSA_LEN(val) * 4096;
+	afu->psa = AFUD_PPPSA_PSA(val);
+	if ((afu->pp_psa = AFUD_PPPSA_PP(val)))
+		afu->native->pp_offset = AFUD_READ_PPPSA_OFF(afu);
+
+	val = AFUD_READ_CR(afu);
+	afu->crs_len = AFUD_CR_LEN(val) * 256;
+	afu->crs_offset = AFUD_READ_CR_OFF(afu);
+
+
+	/* eb_len is in multiple of 4K */
+	afu->eb_len = AFUD_EB_LEN(AFUD_READ_EB(afu)) * 4096;
+	afu->eb_offset = AFUD_READ_EB_OFF(afu);
+
+	/* eb_off is 4K aligned so lower 12 bits are always zero */
+	if (EXTRACT_PPC_BITS(afu->eb_offset, 0, 11) != 0) {
+		dev_warn(&afu->dev,
+			 "Invalid AFU error buffer offset %Lx\n",
+			 afu->eb_offset);
+		dev_info(&afu->dev,
+			 "Ignoring AFU error buffer in the descriptor\n");
+		/* indicate that no afu buffer exists */
+		afu->eb_len = 0;
+	}
+
+	return 0;
+}
+
+static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu)
+{
+	int i, rc;
+	u32 val;
+
+	if (afu->psa && afu->adapter->ps_size <
+			(afu->native->pp_offset + afu->pp_size*afu->max_procs_virtualised)) {
+		dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n");
+		return -ENODEV;
+	}
+
+	if (afu->pp_psa && (afu->pp_size < PAGE_SIZE))
+		dev_warn(&afu->dev, "AFU uses pp_size(%#016llx) < PAGE_SIZE per-process PSA!\n", afu->pp_size);
+
+	for (i = 0; i < afu->crs_num; i++) {
+		rc = cxl_ops->afu_cr_read32(afu, i, 0, &val);
+		if (rc || val == 0) {
+			dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i);
+			return -EINVAL;
+		}
+	}
+
+	if ((afu->modes_supported & ~CXL_MODE_DEDICATED) && afu->max_procs_virtualised == 0) {
+		/*
+		 * We could also check this for the dedicated process model
+		 * since the architecture indicates it should be set to 1, but
+		 * in that case we ignore the value and I'd rather not risk
+		 * breaking any existing dedicated process AFUs that left it as
+		 * 0 (not that I'm aware of any). It is clearly an error for an
+		 * AFU directed AFU to set this to 0, and would have previously
+		 * triggered a bug resulting in the maximum not being enforced
+		 * at all since idr_alloc treats 0 as no maximum.
+		 */
+		dev_err(&afu->dev, "AFU does not support any processes\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int sanitise_afu_regs_psl9(struct cxl_afu *afu)
+{
+	u64 reg;
+
+	/*
+	 * Clear out any regs that contain either an IVTE or address or may be
+	 * waiting on an acknowledgment to try to be a bit safer as we bring
+	 * it online
+	 */
+	reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
+		dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg);
+		if (cxl_ops->afu_reset(afu))
+			return -EIO;
+		if (cxl_afu_disable(afu))
+			return -EIO;
+		if (cxl_psl_purge(afu))
+			return -EIO;
+	}
+	cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000);
+	reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	if (reg) {
+		dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg);
+		if (reg & CXL_PSL9_DSISR_An_TF)
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+		else
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+	}
+	if (afu->adapter->native->sl_ops->register_serr_irq) {
+		reg = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+		if (reg) {
+			if (reg & ~0x000000007fffffff)
+				dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg);
+			cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff);
+		}
+	}
+	reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+	if (reg) {
+		dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg);
+		cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg);
+	}
+
+	return 0;
+}
+
+static int sanitise_afu_regs_psl8(struct cxl_afu *afu)
+{
+	u64 reg;
+
+	/*
+	 * Clear out any regs that contain either an IVTE or address or may be
+	 * waiting on an acknowledgement to try to be a bit safer as we bring
+	 * it online
+	 */
+	reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
+	if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
+		dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg);
+		if (cxl_ops->afu_reset(afu))
+			return -EIO;
+		if (cxl_afu_disable(afu))
+			return -EIO;
+		if (cxl_psl_purge(afu))
+			return -EIO;
+	}
+	cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_PSL_SPOffset_An, 0x0000000000000000);
+	cxl_p1n_write(afu, CXL_HAURP_An, 0x0000000000000000);
+	cxl_p2n_write(afu, CXL_CSRP_An, 0x0000000000000000);
+	cxl_p2n_write(afu, CXL_AURP1_An, 0x0000000000000000);
+	cxl_p2n_write(afu, CXL_AURP0_An, 0x0000000000000000);
+	cxl_p2n_write(afu, CXL_SSTP1_An, 0x0000000000000000);
+	cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000);
+	reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
+	if (reg) {
+		dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg);
+		if (reg & CXL_PSL_DSISR_TRANS)
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
+		else
+			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
+	}
+	if (afu->adapter->native->sl_ops->register_serr_irq) {
+		reg = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+		if (reg) {
+			if (reg & ~0xffff)
+				dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg);
+			cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff);
+		}
+	}
+	reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
+	if (reg) {
+		dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg);
+		cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg);
+	}
+
+	return 0;
+}
+
+#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE
+/*
+ * afu_eb_read:
+ * Called from sysfs and reads the afu error info buffer. The h/w only supports
+ * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte
+ * aligned the function uses a bounce buffer which can be max PAGE_SIZE.
+ */
+ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+				loff_t off, size_t count)
+{
+	loff_t aligned_start, aligned_end;
+	size_t aligned_length;
+	void *tbuf;
+	const void __iomem *ebuf = afu->native->afu_desc_mmio + afu->eb_offset;
+
+	if (count == 0 || off < 0 || (size_t)off >= afu->eb_len)
+		return 0;
+
+	/* calculate aligned read window */
+	count = min((size_t)(afu->eb_len - off), count);
+	aligned_start = round_down(off, 8);
+	aligned_end = round_up(off + count, 8);
+	aligned_length = aligned_end - aligned_start;
+
+	/* max we can copy in one read is PAGE_SIZE */
+	if (aligned_length > ERR_BUFF_MAX_COPY_SIZE) {
+		aligned_length = ERR_BUFF_MAX_COPY_SIZE;
+		count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7);
+	}
+
+	/* use bounce buffer for copy */
+	tbuf = (void *)__get_free_page(GFP_KERNEL);
+	if (!tbuf)
+		return -ENOMEM;
+
+	/* perform aligned read from the mmio region */
+	memcpy_fromio(tbuf, ebuf + aligned_start, aligned_length);
+	memcpy(buf, tbuf + (off & 0x7), count);
+
+	free_page((unsigned long)tbuf);
+
+	return count;
+}
+
+static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
+{
+	int rc;
+
+	if ((rc = pci_map_slice_regs(afu, adapter, dev)))
+		return rc;
+
+	if (adapter->native->sl_ops->sanitise_afu_regs) {
+		rc = adapter->native->sl_ops->sanitise_afu_regs(afu);
+		if (rc)
+			goto err1;
+	}
+
+	/* We need to reset the AFU before we can read the AFU descriptor */
+	if ((rc = cxl_ops->afu_reset(afu)))
+		goto err1;
+
+	if (cxl_verbose)
+		dump_afu_descriptor(afu);
+
+	if ((rc = cxl_read_afu_descriptor(afu)))
+		goto err1;
+
+	if ((rc = cxl_afu_descriptor_looks_ok(afu)))
+		goto err1;
+
+	if (adapter->native->sl_ops->afu_regs_init)
+		if ((rc = adapter->native->sl_ops->afu_regs_init(afu)))
+			goto err1;
+
+	if (adapter->native->sl_ops->register_serr_irq)
+		if ((rc = adapter->native->sl_ops->register_serr_irq(afu)))
+			goto err1;
+
+	if ((rc = cxl_native_register_psl_irq(afu)))
+		goto err2;
+
+	atomic_set(&afu->configured_state, 0);
+	return 0;
+
+err2:
+	if (adapter->native->sl_ops->release_serr_irq)
+		adapter->native->sl_ops->release_serr_irq(afu);
+err1:
+	pci_unmap_slice_regs(afu);
+	return rc;
+}
+
+static void pci_deconfigure_afu(struct cxl_afu *afu)
+{
+	/*
+	 * It's okay to deconfigure when AFU is already locked, otherwise wait
+	 * until there are no readers
+	 */
+	if (atomic_read(&afu->configured_state) != -1) {
+		while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1)
+			schedule();
+	}
+	cxl_native_release_psl_irq(afu);
+	if (afu->adapter->native->sl_ops->release_serr_irq)
+		afu->adapter->native->sl_ops->release_serr_irq(afu);
+	pci_unmap_slice_regs(afu);
+}
+
+static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
+{
+	struct cxl_afu *afu;
+	int rc = -ENOMEM;
+
+	afu = cxl_alloc_afu(adapter, slice);
+	if (!afu)
+		return -ENOMEM;
+
+	afu->native = kzalloc(sizeof(struct cxl_afu_native), GFP_KERNEL);
+	if (!afu->native)
+		goto err_free_afu;
+
+	mutex_init(&afu->native->spa_mutex);
+
+	rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice);
+	if (rc)
+		goto err_free_native;
+
+	rc = pci_configure_afu(afu, adapter, dev);
+	if (rc)
+		goto err_free_native;
+
+	/* Don't care if this fails */
+	cxl_debugfs_afu_add(afu);
+
+	/*
+	 * After we call this function we must not free the afu directly, even
+	 * if it returns an error!
+	 */
+	if ((rc = cxl_register_afu(afu)))
+		goto err_put_dev;
+
+	if ((rc = cxl_sysfs_afu_add(afu)))
+		goto err_del_dev;
+
+	adapter->afu[afu->slice] = afu;
+
+	if ((rc = cxl_pci_vphb_add(afu)))
+		dev_info(&afu->dev, "Can't register vPHB\n");
+
+	return 0;
+
+err_del_dev:
+	device_del(&afu->dev);
+err_put_dev:
+	pci_deconfigure_afu(afu);
+	cxl_debugfs_afu_remove(afu);
+	put_device(&afu->dev);
+	return rc;
+
+err_free_native:
+	kfree(afu->native);
+err_free_afu:
+	kfree(afu);
+	return rc;
+
+}
+
+static void cxl_pci_remove_afu(struct cxl_afu *afu)
+{
+	pr_devel("%s\n", __func__);
+
+	if (!afu)
+		return;
+
+	cxl_pci_vphb_remove(afu);
+	cxl_sysfs_afu_remove(afu);
+	cxl_debugfs_afu_remove(afu);
+
+	spin_lock(&afu->adapter->afu_list_lock);
+	afu->adapter->afu[afu->slice] = NULL;
+	spin_unlock(&afu->adapter->afu_list_lock);
+
+	cxl_context_detach_all(afu);
+	cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
+
+	pci_deconfigure_afu(afu);
+	device_unregister(&afu->dev);
+}
+
+int cxl_pci_reset(struct cxl *adapter)
+{
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+	int rc;
+
+	if (adapter->perst_same_image) {
+		dev_warn(&dev->dev,
+			 "cxl: refusing to reset/reflash when perst_reloads_same_image is set.\n");
+		return -EINVAL;
+	}
+
+	dev_info(&dev->dev, "CXL reset\n");
+
+	/*
+	 * The adapter is about to be reset, so ignore errors.
+	 */
+	cxl_data_cache_flush(adapter);
+
+	/* pcie_warm_reset requests a fundamental pci reset which includes a
+	 * PERST assert/deassert.  PERST triggers a loading of the image
+	 * if "user" or "factory" is selected in sysfs */
+	if ((rc = pci_set_pcie_reset_state(dev, pcie_warm_reset))) {
+		dev_err(&dev->dev, "cxl: pcie_warm_reset failed\n");
+		return rc;
+	}
+
+	return rc;
+}
+
+static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev)
+{
+	if (pci_request_region(dev, 2, "priv 2 regs"))
+		goto err1;
+	if (pci_request_region(dev, 0, "priv 1 regs"))
+		goto err2;
+
+	pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx",
+			p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev));
+
+	if (!(adapter->native->p1_mmio = ioremap(p1_base(dev), p1_size(dev))))
+		goto err3;
+
+	if (!(adapter->native->p2_mmio = ioremap(p2_base(dev), p2_size(dev))))
+		goto err4;
+
+	return 0;
+
+err4:
+	iounmap(adapter->native->p1_mmio);
+	adapter->native->p1_mmio = NULL;
+err3:
+	pci_release_region(dev, 0);
+err2:
+	pci_release_region(dev, 2);
+err1:
+	return -ENOMEM;
+}
+
+static void cxl_unmap_adapter_regs(struct cxl *adapter)
+{
+	if (adapter->native->p1_mmio) {
+		iounmap(adapter->native->p1_mmio);
+		adapter->native->p1_mmio = NULL;
+		pci_release_region(to_pci_dev(adapter->dev.parent), 2);
+	}
+	if (adapter->native->p2_mmio) {
+		iounmap(adapter->native->p2_mmio);
+		adapter->native->p2_mmio = NULL;
+		pci_release_region(to_pci_dev(adapter->dev.parent), 0);
+	}
+}
+
+static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev)
+{
+	int vsec;
+	u32 afu_desc_off, afu_desc_size;
+	u32 ps_off, ps_size;
+	u16 vseclen;
+	u8 image_state;
+
+	if (!(vsec = find_cxl_vsec(dev))) {
+		dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
+		return -ENODEV;
+	}
+
+	CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen);
+	if (vseclen < CXL_VSEC_MIN_SIZE) {
+		dev_err(&dev->dev, "ABORTING: CXL VSEC too short\n");
+		return -EINVAL;
+	}
+
+	CXL_READ_VSEC_STATUS(dev, vsec, &adapter->vsec_status);
+	CXL_READ_VSEC_PSL_REVISION(dev, vsec, &adapter->psl_rev);
+	CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, &adapter->caia_major);
+	CXL_READ_VSEC_CAIA_MINOR(dev, vsec, &adapter->caia_minor);
+	CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image);
+	CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state);
+	adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED);
+	adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED);
+	adapter->perst_loads_image = !!(image_state & CXL_VSEC_PERST_LOADS_IMAGE);
+
+	CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices);
+	CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, &afu_desc_off);
+	CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, &afu_desc_size);
+	CXL_READ_VSEC_PS_OFF(dev, vsec, &ps_off);
+	CXL_READ_VSEC_PS_SIZE(dev, vsec, &ps_size);
+
+	/* Convert everything to bytes, because there is NO WAY I'd look at the
+	 * code a month later and forget what units these are in ;-) */
+	adapter->native->ps_off = ps_off * 64 * 1024;
+	adapter->ps_size = ps_size * 64 * 1024;
+	adapter->native->afu_desc_off = afu_desc_off * 64 * 1024;
+	adapter->native->afu_desc_size = afu_desc_size * 64 * 1024;
+
+	/* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */
+	adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices;
+
+	return 0;
+}
+
+/*
+ * Workaround a PCIe Host Bridge defect on some cards, that can cause
+ * malformed Transaction Layer Packet (TLP) errors to be erroneously
+ * reported. Mask this error in the Uncorrectable Error Mask Register.
+ *
+ * The upper nibble of the PSL revision is used to distinguish between
+ * different cards. The affected ones have it set to 0.
+ */
+static void cxl_fixup_malformed_tlp(struct cxl *adapter, struct pci_dev *dev)
+{
+	int aer;
+	u32 data;
+
+	if (adapter->psl_rev & 0xf000)
+		return;
+	if (!(aer = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)))
+		return;
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &data);
+	if (data & PCI_ERR_UNC_MALF_TLP)
+		if (data & PCI_ERR_UNC_INTN)
+			return;
+	data |= PCI_ERR_UNC_MALF_TLP;
+	data |= PCI_ERR_UNC_INTN;
+	pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, data);
+}
+
+static bool cxl_compatible_caia_version(struct cxl *adapter)
+{
+	if (cxl_is_power8() && (adapter->caia_major == 1))
+		return true;
+
+	if (cxl_is_power9() && (adapter->caia_major == 2))
+		return true;
+
+	return false;
+}
+
+static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev)
+{
+	if (adapter->vsec_status & CXL_STATUS_SECOND_PORT)
+		return -EBUSY;
+
+	if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) {
+		dev_err(&dev->dev, "ABORTING: CXL requires unsupported features\n");
+		return -EINVAL;
+	}
+
+	if (!cxl_compatible_caia_version(adapter)) {
+		dev_info(&dev->dev, "Ignoring card. PSL type is not supported (caia version: %d)\n",
+			 adapter->caia_major);
+		return -ENODEV;
+	}
+
+	if (!adapter->slices) {
+		/* Once we support dynamic reprogramming we can use the card if
+		 * it supports loadable AFUs */
+		dev_err(&dev->dev, "ABORTING: Device has no AFUs\n");
+		return -EINVAL;
+	}
+
+	if (!adapter->native->afu_desc_off || !adapter->native->afu_desc_size) {
+		dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n");
+		return -EINVAL;
+	}
+
+	if (adapter->ps_size > p2_size(dev) - adapter->native->ps_off) {
+		dev_err(&dev->dev, "ABORTING: Problem state size larger than "
+				   "available in BAR2: 0x%llx > 0x%llx\n",
+			 adapter->ps_size, p2_size(dev) - adapter->native->ps_off);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len)
+{
+	return pci_read_vpd(to_pci_dev(adapter->dev.parent), 0, len, buf);
+}
+
+static void cxl_release_adapter(struct device *dev)
+{
+	struct cxl *adapter = to_cxl_adapter(dev);
+
+	pr_devel("cxl_release_adapter\n");
+
+	cxl_remove_adapter_nr(adapter);
+
+	kfree(adapter->native);
+	kfree(adapter);
+}
+
+#define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31))
+
+static int sanitise_adapter_regs(struct cxl *adapter)
+{
+	int rc = 0;
+
+	/* Clear PSL tberror bit by writing 1 to it */
+	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror);
+
+	if (adapter->native->sl_ops->invalidate_all) {
+		/* do not invalidate ERAT entries when not reloading on PERST */
+		if (cxl_is_power9() && (adapter->perst_loads_image))
+			return 0;
+		rc = adapter->native->sl_ops->invalidate_all(adapter);
+	}
+
+	return rc;
+}
+
+/* This should contain *only* operations that can safely be done in
+ * both creation and recovery.
+ */
+static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
+{
+	int rc;
+
+	adapter->dev.parent = &dev->dev;
+	adapter->dev.release = cxl_release_adapter;
+	pci_set_drvdata(dev, adapter);
+
+	rc = pci_enable_device(dev);
+	if (rc) {
+		dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc);
+		return rc;
+	}
+
+	if ((rc = cxl_read_vsec(adapter, dev)))
+		return rc;
+
+	if ((rc = cxl_vsec_looks_ok(adapter, dev)))
+	        return rc;
+
+	cxl_fixup_malformed_tlp(adapter, dev);
+
+	if ((rc = setup_cxl_bars(dev)))
+		return rc;
+
+	if ((rc = switch_card_to_cxl(dev)))
+		return rc;
+
+	if ((rc = cxl_update_image_control(adapter)))
+		return rc;
+
+	if ((rc = cxl_map_adapter_regs(adapter, dev)))
+		return rc;
+
+	if ((rc = sanitise_adapter_regs(adapter)))
+		goto err;
+
+	if ((rc = adapter->native->sl_ops->adapter_regs_init(adapter, dev)))
+		goto err;
+
+	/* Required for devices using CAPP DMA mode, harmless for others */
+	pci_set_master(dev);
+
+	adapter->tunneled_ops_supported = false;
+
+	if (cxl_is_power9()) {
+		if (pnv_pci_set_tunnel_bar(dev, 0x00020000E0000000ull, 1))
+			dev_info(&dev->dev, "Tunneled operations unsupported\n");
+		else
+			adapter->tunneled_ops_supported = true;
+	}
+
+	if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode)))
+		goto err;
+
+	/* If recovery happened, the last step is to turn on snooping.
+	 * In the non-recovery case this has no effect */
+	if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON)))
+		goto err;
+
+	/* Ignore error, adapter init is not dependant on timebase sync */
+	cxl_setup_psl_timebase(adapter, dev);
+
+	if ((rc = cxl_native_register_psl_err_irq(adapter)))
+		goto err;
+
+	return 0;
+
+err:
+	cxl_unmap_adapter_regs(adapter);
+	return rc;
+
+}
+
+static void cxl_deconfigure_adapter(struct cxl *adapter)
+{
+	struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
+
+	if (cxl_is_power9())
+		pnv_pci_set_tunnel_bar(pdev, 0x00020000E0000000ull, 0);
+
+	cxl_native_release_psl_err_irq(adapter);
+	cxl_unmap_adapter_regs(adapter);
+
+	pci_disable_device(pdev);
+}
+
+static void cxl_stop_trace_psl9(struct cxl *adapter)
+{
+	int traceid;
+	u64 trace_state, trace_mask;
+	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
+
+	/* read each tracearray state and issue mmio to stop them is needed */
+	for (traceid = 0; traceid <= CXL_PSL9_TRACEID_MAX; ++traceid) {
+		trace_state = cxl_p1_read(adapter, CXL_PSL9_CTCCFG);
+		trace_mask = (0x3ULL << (62 - traceid * 2));
+		trace_state = (trace_state & trace_mask) >> (62 - traceid * 2);
+		dev_dbg(&dev->dev, "cxl: Traceid-%d trace_state=0x%0llX\n",
+			traceid, trace_state);
+
+		/* issue mmio if the trace array isn't in FIN state */
+		if (trace_state != CXL_PSL9_TRACESTATE_FIN)
+			cxl_p1_write(adapter, CXL_PSL9_TRACECFG,
+				     0x8400000000000000ULL | traceid);
+	}
+}
+
+static void cxl_stop_trace_psl8(struct cxl *adapter)
+{
+	int slice;
+
+	/* Stop the trace */
+	cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL);
+
+	/* Stop the slice traces */
+	spin_lock(&adapter->afu_list_lock);
+	for (slice = 0; slice < adapter->slices; slice++) {
+		if (adapter->afu[slice])
+			cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE,
+				      0x8000000000000000LL);
+	}
+	spin_unlock(&adapter->afu_list_lock);
+}
+
+static const struct cxl_service_layer_ops psl9_ops = {
+	.adapter_regs_init = init_implementation_adapter_regs_psl9,
+	.invalidate_all = cxl_invalidate_all_psl9,
+	.afu_regs_init = init_implementation_afu_regs_psl9,
+	.sanitise_afu_regs = sanitise_afu_regs_psl9,
+	.register_serr_irq = cxl_native_register_serr_irq,
+	.release_serr_irq = cxl_native_release_serr_irq,
+	.handle_interrupt = cxl_irq_psl9,
+	.fail_irq = cxl_fail_irq_psl,
+	.activate_dedicated_process = cxl_activate_dedicated_process_psl9,
+	.attach_afu_directed = cxl_attach_afu_directed_psl9,
+	.attach_dedicated_process = cxl_attach_dedicated_process_psl9,
+	.update_dedicated_ivtes = cxl_update_dedicated_ivtes_psl9,
+	.debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9,
+	.debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9,
+	.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
+	.err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl9,
+	.debugfs_stop_trace = cxl_stop_trace_psl9,
+	.timebase_read = timebase_read_psl9,
+	.capi_mode = OPAL_PHB_CAPI_MODE_CAPI,
+	.needs_reset_before_disable = true,
+};
+
+static const struct cxl_service_layer_ops psl8_ops = {
+	.adapter_regs_init = init_implementation_adapter_regs_psl8,
+	.invalidate_all = cxl_invalidate_all_psl8,
+	.afu_regs_init = init_implementation_afu_regs_psl8,
+	.sanitise_afu_regs = sanitise_afu_regs_psl8,
+	.register_serr_irq = cxl_native_register_serr_irq,
+	.release_serr_irq = cxl_native_release_serr_irq,
+	.handle_interrupt = cxl_irq_psl8,
+	.fail_irq = cxl_fail_irq_psl,
+	.activate_dedicated_process = cxl_activate_dedicated_process_psl8,
+	.attach_afu_directed = cxl_attach_afu_directed_psl8,
+	.attach_dedicated_process = cxl_attach_dedicated_process_psl8,
+	.update_dedicated_ivtes = cxl_update_dedicated_ivtes_psl8,
+	.debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl8,
+	.debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl8,
+	.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl8,
+	.err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl8,
+	.debugfs_stop_trace = cxl_stop_trace_psl8,
+	.write_timebase_ctrl = write_timebase_ctrl_psl8,
+	.timebase_read = timebase_read_psl8,
+	.capi_mode = OPAL_PHB_CAPI_MODE_CAPI,
+	.needs_reset_before_disable = true,
+};
+
+static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev)
+{
+	if (cxl_is_power8()) {
+		dev_info(&dev->dev, "Device uses a PSL8\n");
+		adapter->native->sl_ops = &psl8_ops;
+	} else {
+		dev_info(&dev->dev, "Device uses a PSL9\n");
+		adapter->native->sl_ops = &psl9_ops;
+	}
+}
+
+
+static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev)
+{
+	struct cxl *adapter;
+	int rc;
+
+	adapter = cxl_alloc_adapter();
+	if (!adapter)
+		return ERR_PTR(-ENOMEM);
+
+	adapter->native = kzalloc(sizeof(struct cxl_native), GFP_KERNEL);
+	if (!adapter->native) {
+		rc = -ENOMEM;
+		goto err_release;
+	}
+
+	set_sl_ops(adapter, dev);
+
+	/* Set defaults for parameters which need to persist over
+	 * configure/reconfigure
+	 */
+	adapter->perst_loads_image = true;
+	adapter->perst_same_image = false;
+
+	rc = cxl_configure_adapter(adapter, dev);
+	if (rc) {
+		pci_disable_device(dev);
+		goto err_release;
+	}
+
+	/* Don't care if this one fails: */
+	cxl_debugfs_adapter_add(adapter);
+
+	/*
+	 * After we call this function we must not free the adapter directly,
+	 * even if it returns an error!
+	 */
+	if ((rc = cxl_register_adapter(adapter)))
+		goto err_put_dev;
+
+	if ((rc = cxl_sysfs_adapter_add(adapter)))
+		goto err_del_dev;
+
+	/* Release the context lock as adapter is configured */
+	cxl_adapter_context_unlock(adapter);
+
+	return adapter;
+
+err_del_dev:
+	device_del(&adapter->dev);
+err_put_dev:
+	/* This should mirror cxl_remove_adapter, except without the
+	 * sysfs parts
+	 */
+	cxl_debugfs_adapter_remove(adapter);
+	cxl_deconfigure_adapter(adapter);
+	put_device(&adapter->dev);
+	return ERR_PTR(rc);
+
+err_release:
+	cxl_release_adapter(&adapter->dev);
+	return ERR_PTR(rc);
+}
+
+static void cxl_pci_remove_adapter(struct cxl *adapter)
+{
+	pr_devel("cxl_remove_adapter\n");
+
+	cxl_sysfs_adapter_remove(adapter);
+	cxl_debugfs_adapter_remove(adapter);
+
+	/*
+	 * Flush adapter datacache as its about to be removed.
+	 */
+	cxl_data_cache_flush(adapter);
+
+	cxl_deconfigure_adapter(adapter);
+
+	device_unregister(&adapter->dev);
+}
+
+#define CXL_MAX_PCIEX_PARENT 2
+
+int cxl_slot_is_switched(struct pci_dev *dev)
+{
+	struct device_node *np;
+	int depth = 0;
+
+	if (!(np = pci_device_to_OF_node(dev))) {
+		pr_err("cxl: np = NULL\n");
+		return -ENODEV;
+	}
+	of_node_get(np);
+	while (np) {
+		np = of_get_next_parent(np);
+		if (!of_node_is_type(np, "pciex"))
+			break;
+		depth++;
+	}
+	of_node_put(np);
+	return (depth > CXL_MAX_PCIEX_PARENT);
+}
+
+static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	struct cxl *adapter;
+	int slice;
+	int rc;
+
+	if (cxl_pci_is_vphb_device(dev)) {
+		dev_dbg(&dev->dev, "cxl_init_adapter: Ignoring cxl vphb device\n");
+		return -ENODEV;
+	}
+
+	if (cxl_slot_is_switched(dev)) {
+		dev_info(&dev->dev, "Ignoring card on incompatible PCI slot\n");
+		return -ENODEV;
+	}
+
+	if (cxl_is_power9() && !radix_enabled()) {
+		dev_info(&dev->dev, "Only Radix mode supported\n");
+		return -ENODEV;
+	}
+
+	if (cxl_verbose)
+		dump_cxl_config_space(dev);
+
+	adapter = cxl_pci_init_adapter(dev);
+	if (IS_ERR(adapter)) {
+		dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter));
+		return PTR_ERR(adapter);
+	}
+
+	for (slice = 0; slice < adapter->slices; slice++) {
+		if ((rc = pci_init_afu(adapter, slice, dev))) {
+			dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc);
+			continue;
+		}
+
+		rc = cxl_afu_select_best_mode(adapter->afu[slice]);
+		if (rc)
+			dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc);
+	}
+
+	return 0;
+}
+
+static void cxl_remove(struct pci_dev *dev)
+{
+	struct cxl *adapter = pci_get_drvdata(dev);
+	struct cxl_afu *afu;
+	int i;
+
+	/*
+	 * Lock to prevent someone grabbing a ref through the adapter list as
+	 * we are removing it
+	 */
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+		cxl_pci_remove_afu(afu);
+	}
+	cxl_pci_remove_adapter(adapter);
+}
+
+static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu,
+						pci_channel_state_t state)
+{
+	struct pci_dev *afu_dev;
+	struct pci_driver *afu_drv;
+	const struct pci_error_handlers *err_handler;
+	pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
+	pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
+
+	/* There should only be one entry, but go through the list
+	 * anyway
+	 */
+	if (afu == NULL || afu->phb == NULL)
+		return result;
+
+	list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+		afu_drv = to_pci_driver(afu_dev->dev.driver);
+		if (!afu_drv)
+			continue;
+
+		afu_dev->error_state = state;
+
+		err_handler = afu_drv->err_handler;
+		if (err_handler)
+			afu_result = err_handler->error_detected(afu_dev,
+								 state);
+		/* Disconnect trumps all, NONE trumps NEED_RESET */
+		if (afu_result == PCI_ERS_RESULT_DISCONNECT)
+			result = PCI_ERS_RESULT_DISCONNECT;
+		else if ((afu_result == PCI_ERS_RESULT_NONE) &&
+			 (result == PCI_ERS_RESULT_NEED_RESET))
+			result = PCI_ERS_RESULT_NONE;
+	}
+	return result;
+}
+
+static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
+					       pci_channel_state_t state)
+{
+	struct cxl *adapter = pci_get_drvdata(pdev);
+	struct cxl_afu *afu;
+	pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
+	pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
+	int i;
+
+	/* At this point, we could still have an interrupt pending.
+	 * Let's try to get them out of the way before they do
+	 * anything we don't like.
+	 */
+	schedule();
+
+	/* If we're permanently dead, give up. */
+	if (state == pci_channel_io_perm_failure) {
+		spin_lock(&adapter->afu_list_lock);
+		for (i = 0; i < adapter->slices; i++) {
+			afu = adapter->afu[i];
+			/*
+			 * Tell the AFU drivers; but we don't care what they
+			 * say, we're going away.
+			 */
+			cxl_vphb_error_detected(afu, state);
+		}
+		spin_unlock(&adapter->afu_list_lock);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	/* Are we reflashing?
+	 *
+	 * If we reflash, we could come back as something entirely
+	 * different, including a non-CAPI card. As such, by default
+	 * we don't participate in the process. We'll be unbound and
+	 * the slot re-probed. (TODO: check EEH doesn't blindly rebind
+	 * us!)
+	 *
+	 * However, this isn't the entire story: for reliablity
+	 * reasons, we usually want to reflash the FPGA on PERST in
+	 * order to get back to a more reliable known-good state.
+	 *
+	 * This causes us a bit of a problem: if we reflash we can't
+	 * trust that we'll come back the same - we could have a new
+	 * image and been PERSTed in order to load that
+	 * image. However, most of the time we actually *will* come
+	 * back the same - for example a regular EEH event.
+	 *
+	 * Therefore, we allow the user to assert that the image is
+	 * indeed the same and that we should continue on into EEH
+	 * anyway.
+	 */
+	if (adapter->perst_loads_image && !adapter->perst_same_image) {
+		/* TODO take the PHB out of CXL mode */
+		dev_info(&pdev->dev, "reflashing, so opting out of EEH!\n");
+		return PCI_ERS_RESULT_NONE;
+	}
+
+	/*
+	 * At this point, we want to try to recover.  We'll always
+	 * need a complete slot reset: we don't trust any other reset.
+	 *
+	 * Now, we go through each AFU:
+	 *  - We send the driver, if bound, an error_detected callback.
+	 *    We expect it to clean up, but it can also tell us to give
+	 *    up and permanently detach the card. To simplify things, if
+	 *    any bound AFU driver doesn't support EEH, we give up on EEH.
+	 *
+	 *  - We detach all contexts associated with the AFU. This
+	 *    does not free them, but puts them into a CLOSED state
+	 *    which causes any the associated files to return useful
+	 *    errors to userland. It also unmaps, but does not free,
+	 *    any IRQs.
+	 *
+	 *  - We clean up our side: releasing and unmapping resources we hold
+	 *    so we can wire them up again when the hardware comes back up.
+	 *
+	 * Driver authors should note:
+	 *
+	 *  - Any contexts you create in your kernel driver (except
+	 *    those associated with anonymous file descriptors) are
+	 *    your responsibility to free and recreate. Likewise with
+	 *    any attached resources.
+	 *
+	 *  - We will take responsibility for re-initialising the
+	 *    device context (the one set up for you in
+	 *    cxl_pci_enable_device_hook and accessed through
+	 *    cxl_get_context). If you've attached IRQs or other
+	 *    resources to it, they remains yours to free.
+	 *
+	 * You can call the same functions to release resources as you
+	 * normally would: we make sure that these functions continue
+	 * to work when the hardware is down.
+	 *
+	 * Two examples:
+	 *
+	 * 1) If you normally free all your resources at the end of
+	 *    each request, or if you use anonymous FDs, your
+	 *    error_detected callback can simply set a flag to tell
+	 *    your driver not to start any new calls. You can then
+	 *    clear the flag in the resume callback.
+	 *
+	 * 2) If you normally allocate your resources on startup:
+	 *     * Set a flag in error_detected as above.
+	 *     * Let CXL detach your contexts.
+	 *     * In slot_reset, free the old resources and allocate new ones.
+	 *     * In resume, clear the flag to allow things to start.
+	 */
+
+	/* Make sure no one else changes the afu list */
+	spin_lock(&adapter->afu_list_lock);
+
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+
+		if (afu == NULL)
+			continue;
+
+		afu_result = cxl_vphb_error_detected(afu, state);
+		cxl_context_detach_all(afu);
+		cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
+		pci_deconfigure_afu(afu);
+
+		/* Disconnect trumps all, NONE trumps NEED_RESET */
+		if (afu_result == PCI_ERS_RESULT_DISCONNECT)
+			result = PCI_ERS_RESULT_DISCONNECT;
+		else if ((afu_result == PCI_ERS_RESULT_NONE) &&
+			 (result == PCI_ERS_RESULT_NEED_RESET))
+			result = PCI_ERS_RESULT_NONE;
+	}
+	spin_unlock(&adapter->afu_list_lock);
+
+	/* should take the context lock here */
+	if (cxl_adapter_context_lock(adapter) != 0)
+		dev_warn(&adapter->dev,
+			 "Couldn't take context lock with %d active-contexts\n",
+			 atomic_read(&adapter->contexts_num));
+
+	cxl_deconfigure_adapter(adapter);
+
+	return result;
+}
+
+static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
+{
+	struct cxl *adapter = pci_get_drvdata(pdev);
+	struct cxl_afu *afu;
+	struct cxl_context *ctx;
+	struct pci_dev *afu_dev;
+	struct pci_driver *afu_drv;
+	const struct pci_error_handlers *err_handler;
+	pci_ers_result_t afu_result = PCI_ERS_RESULT_RECOVERED;
+	pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
+	int i;
+
+	if (cxl_configure_adapter(adapter, pdev))
+		goto err;
+
+	/*
+	 * Unlock context activation for the adapter. Ideally this should be
+	 * done in cxl_pci_resume but cxlflash module tries to activate the
+	 * master context as part of slot_reset callback.
+	 */
+	cxl_adapter_context_unlock(adapter);
+
+	spin_lock(&adapter->afu_list_lock);
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+
+		if (afu == NULL)
+			continue;
+
+		if (pci_configure_afu(afu, adapter, pdev))
+			goto err_unlock;
+
+		if (cxl_afu_select_best_mode(afu))
+			goto err_unlock;
+
+		if (afu->phb == NULL)
+			continue;
+
+		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+			/* Reset the device context.
+			 * TODO: make this less disruptive
+			 */
+			ctx = cxl_get_context(afu_dev);
+
+			if (ctx && cxl_release_context(ctx))
+				goto err_unlock;
+
+			ctx = cxl_dev_context_init(afu_dev);
+			if (IS_ERR(ctx))
+				goto err_unlock;
+
+			afu_dev->dev.archdata.cxl_ctx = ctx;
+
+			if (cxl_ops->afu_check_and_enable(afu))
+				goto err_unlock;
+
+			afu_dev->error_state = pci_channel_io_normal;
+
+			/* If there's a driver attached, allow it to
+			 * chime in on recovery. Drivers should check
+			 * if everything has come back OK, but
+			 * shouldn't start new work until we call
+			 * their resume function.
+			 */
+			afu_drv = to_pci_driver(afu_dev->dev.driver);
+			if (!afu_drv)
+				continue;
+
+			err_handler = afu_drv->err_handler;
+			if (err_handler && err_handler->slot_reset)
+				afu_result = err_handler->slot_reset(afu_dev);
+
+			if (afu_result == PCI_ERS_RESULT_DISCONNECT)
+				result = PCI_ERS_RESULT_DISCONNECT;
+		}
+	}
+
+	spin_unlock(&adapter->afu_list_lock);
+	return result;
+
+err_unlock:
+	spin_unlock(&adapter->afu_list_lock);
+
+err:
+	/* All the bits that happen in both error_detected and cxl_remove
+	 * should be idempotent, so we don't need to worry about leaving a mix
+	 * of unconfigured and reconfigured resources.
+	 */
+	dev_err(&pdev->dev, "EEH recovery failed. Asking to be disconnected.\n");
+	return PCI_ERS_RESULT_DISCONNECT;
+}
+
+static void cxl_pci_resume(struct pci_dev *pdev)
+{
+	struct cxl *adapter = pci_get_drvdata(pdev);
+	struct cxl_afu *afu;
+	struct pci_dev *afu_dev;
+	struct pci_driver *afu_drv;
+	const struct pci_error_handlers *err_handler;
+	int i;
+
+	/* Everything is back now. Drivers should restart work now.
+	 * This is not the place to be checking if everything came back up
+	 * properly, because there's no return value: do that in slot_reset.
+	 */
+	spin_lock(&adapter->afu_list_lock);
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+
+		if (afu == NULL || afu->phb == NULL)
+			continue;
+
+		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+			afu_drv = to_pci_driver(afu_dev->dev.driver);
+			if (!afu_drv)
+				continue;
+
+			err_handler = afu_drv->err_handler;
+			if (err_handler && err_handler->resume)
+				err_handler->resume(afu_dev);
+		}
+	}
+	spin_unlock(&adapter->afu_list_lock);
+}
+
+static const struct pci_error_handlers cxl_err_handler = {
+	.error_detected = cxl_pci_error_detected,
+	.slot_reset = cxl_pci_slot_reset,
+	.resume = cxl_pci_resume,
+};
+
+struct pci_driver cxl_pci_driver = {
+	.name = "cxl-pci",
+	.id_table = cxl_pci_tbl,
+	.probe = cxl_probe,
+	.remove = cxl_remove,
+	.shutdown = cxl_remove,
+	.err_handler = &cxl_err_handler,
+};
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
new file mode 100644
index 0000000000..315c43f17d
--- /dev/null
+++ b/drivers/misc/cxl/sysfs.c
@@ -0,0 +1,771 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/pci_regs.h>
+
+#include "cxl.h"
+
+#define to_afu_chardev_m(d) dev_get_drvdata(d)
+
+/*********  Adapter attributes  **********************************************/
+
+static ssize_t caia_version_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i.%i\n", adapter->caia_major,
+			 adapter->caia_minor);
+}
+
+static ssize_t psl_revision_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_rev);
+}
+
+static ssize_t base_image_show(struct device *device,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->base_image);
+}
+
+static ssize_t image_loaded_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	if (adapter->user_image_loaded)
+		return scnprintf(buf, PAGE_SIZE, "user\n");
+	return scnprintf(buf, PAGE_SIZE, "factory\n");
+}
+
+static ssize_t psl_timebase_synced_show(struct device *device,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+	u64 psl_tb, delta;
+
+	/* Recompute the status only in native mode */
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		psl_tb = adapter->native->sl_ops->timebase_read(adapter);
+		delta = abs(mftb() - psl_tb);
+
+		/* CORE TB and PSL TB difference <= 16usecs ? */
+		adapter->psl_timebase_synced = (tb_to_ns(delta) < 16000) ? true : false;
+		pr_devel("PSL timebase %s - delta: 0x%016llx\n",
+			 (tb_to_ns(delta) < 16000) ? "synchronized" :
+			 "not synchronized", tb_to_ns(delta));
+	}
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
+}
+
+static ssize_t tunneled_ops_supported_show(struct device *device,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->tunneled_ops_supported);
+}
+
+static ssize_t reset_adapter_store(struct device *device,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+	int rc;
+	int val;
+
+	rc = sscanf(buf, "%i", &val);
+	if ((rc != 1) || (val != 1 && val != -1))
+		return -EINVAL;
+
+	/*
+	 * See if we can lock the context mapping that's only allowed
+	 * when there are no contexts attached to the adapter. Once
+	 * taken this will also prevent any context from getting activated.
+	 */
+	if (val == 1) {
+		rc =  cxl_adapter_context_lock(adapter);
+		if (rc)
+			goto out;
+
+		rc = cxl_ops->adapter_reset(adapter);
+		/* In case reset failed release context lock */
+		if (rc)
+			cxl_adapter_context_unlock(adapter);
+
+	} else if (val == -1) {
+		/* Perform a forced adapter reset */
+		rc = cxl_ops->adapter_reset(adapter);
+	}
+
+out:
+	return rc ? rc : count;
+}
+
+static ssize_t load_image_on_perst_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	if (!adapter->perst_loads_image)
+		return scnprintf(buf, PAGE_SIZE, "none\n");
+
+	if (adapter->perst_select_user)
+		return scnprintf(buf, PAGE_SIZE, "user\n");
+	return scnprintf(buf, PAGE_SIZE, "factory\n");
+}
+
+static ssize_t load_image_on_perst_store(struct device *device,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+	int rc;
+
+	if (!strncmp(buf, "none", 4))
+		adapter->perst_loads_image = false;
+	else if (!strncmp(buf, "user", 4)) {
+		adapter->perst_select_user = true;
+		adapter->perst_loads_image = true;
+	} else if (!strncmp(buf, "factory", 7)) {
+		adapter->perst_select_user = false;
+		adapter->perst_loads_image = true;
+	} else
+		return -EINVAL;
+
+	if ((rc = cxl_update_image_control(adapter)))
+		return rc;
+
+	return count;
+}
+
+static ssize_t perst_reloads_same_image_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->perst_same_image);
+}
+
+static ssize_t perst_reloads_same_image_store(struct device *device,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+	int rc;
+	int val;
+
+	rc = sscanf(buf, "%i", &val);
+	if ((rc != 1) || !(val == 1 || val == 0))
+		return -EINVAL;
+
+	adapter->perst_same_image = (val == 1);
+	return count;
+}
+
+static struct device_attribute adapter_attrs[] = {
+	__ATTR_RO(caia_version),
+	__ATTR_RO(psl_revision),
+	__ATTR_RO(base_image),
+	__ATTR_RO(image_loaded),
+	__ATTR_RO(psl_timebase_synced),
+	__ATTR_RO(tunneled_ops_supported),
+	__ATTR_RW(load_image_on_perst),
+	__ATTR_RW(perst_reloads_same_image),
+	__ATTR(reset, S_IWUSR, NULL, reset_adapter_store),
+};
+
+
+/*********  AFU master specific attributes  **********************************/
+
+static ssize_t mmio_size_show_master(struct device *device,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct cxl_afu *afu = to_afu_chardev_m(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size);
+}
+
+static ssize_t pp_mmio_off_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct cxl_afu *afu = to_afu_chardev_m(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->native->pp_offset);
+}
+
+static ssize_t pp_mmio_len_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct cxl_afu *afu = to_afu_chardev_m(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size);
+}
+
+static struct device_attribute afu_master_attrs[] = {
+	__ATTR(mmio_size, S_IRUGO, mmio_size_show_master, NULL),
+	__ATTR_RO(pp_mmio_off),
+	__ATTR_RO(pp_mmio_len),
+};
+
+
+/*********  AFU attributes  **************************************************/
+
+static ssize_t mmio_size_show(struct device *device,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+
+	if (afu->pp_size)
+		return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size);
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size);
+}
+
+static ssize_t reset_store_afu(struct device *device,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+	int rc;
+
+	/* Not safe to reset if it is currently in use */
+	mutex_lock(&afu->contexts_lock);
+	if (!idr_is_empty(&afu->contexts_idr)) {
+		rc = -EBUSY;
+		goto err;
+	}
+
+	if ((rc = cxl_ops->afu_reset(afu)))
+		goto err;
+
+	rc = count;
+err:
+	mutex_unlock(&afu->contexts_lock);
+	return rc;
+}
+
+static ssize_t irqs_min_show(struct device *device,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", afu->pp_irqs);
+}
+
+static ssize_t irqs_max_show(struct device *device,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", afu->irqs_max);
+}
+
+static ssize_t irqs_max_store(struct device *device,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+	ssize_t ret;
+	int irqs_max;
+
+	ret = sscanf(buf, "%i", &irqs_max);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (irqs_max < afu->pp_irqs)
+		return -EINVAL;
+
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		if (irqs_max > afu->adapter->user_irqs)
+			return -EINVAL;
+	} else {
+		/* pHyp sets a per-AFU limit */
+		if (irqs_max > afu->guest->max_ints)
+			return -EINVAL;
+	}
+
+	afu->irqs_max = irqs_max;
+	return count;
+}
+
+static ssize_t modes_supported_show(struct device *device,
+				    struct device_attribute *attr, char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+	char *p = buf, *end = buf + PAGE_SIZE;
+
+	if (afu->modes_supported & CXL_MODE_DEDICATED)
+		p += scnprintf(p, end - p, "dedicated_process\n");
+	if (afu->modes_supported & CXL_MODE_DIRECTED)
+		p += scnprintf(p, end - p, "afu_directed\n");
+	return (p - buf);
+}
+
+static ssize_t prefault_mode_show(struct device *device,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+
+	switch (afu->prefault_mode) {
+	case CXL_PREFAULT_WED:
+		return scnprintf(buf, PAGE_SIZE, "work_element_descriptor\n");
+	case CXL_PREFAULT_ALL:
+		return scnprintf(buf, PAGE_SIZE, "all\n");
+	default:
+		return scnprintf(buf, PAGE_SIZE, "none\n");
+	}
+}
+
+static ssize_t prefault_mode_store(struct device *device,
+			  struct device_attribute *attr,
+			  const char *buf, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+	enum prefault_modes mode = -1;
+
+	if (!strncmp(buf, "none", 4))
+		mode = CXL_PREFAULT_NONE;
+	else {
+		if (!radix_enabled()) {
+
+			/* only allowed when not in radix mode */
+			if (!strncmp(buf, "work_element_descriptor", 23))
+				mode = CXL_PREFAULT_WED;
+			if (!strncmp(buf, "all", 3))
+				mode = CXL_PREFAULT_ALL;
+		} else {
+			dev_err(device, "Cannot prefault with radix enabled\n");
+		}
+	}
+
+	if (mode == -1)
+		return -EINVAL;
+
+	afu->prefault_mode = mode;
+	return count;
+}
+
+static ssize_t mode_show(struct device *device,
+			 struct device_attribute *attr,
+			 char *buf)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+
+	if (afu->current_mode == CXL_MODE_DEDICATED)
+		return scnprintf(buf, PAGE_SIZE, "dedicated_process\n");
+	if (afu->current_mode == CXL_MODE_DIRECTED)
+		return scnprintf(buf, PAGE_SIZE, "afu_directed\n");
+	return scnprintf(buf, PAGE_SIZE, "none\n");
+}
+
+static ssize_t mode_store(struct device *device, struct device_attribute *attr,
+			  const char *buf, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(device);
+	int old_mode, mode = -1;
+	int rc = -EBUSY;
+
+	/* can't change this if we have a user */
+	mutex_lock(&afu->contexts_lock);
+	if (!idr_is_empty(&afu->contexts_idr))
+		goto err;
+
+	if (!strncmp(buf, "dedicated_process", 17))
+		mode = CXL_MODE_DEDICATED;
+	if (!strncmp(buf, "afu_directed", 12))
+		mode = CXL_MODE_DIRECTED;
+	if (!strncmp(buf, "none", 4))
+		mode = 0;
+
+	if (mode == -1) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	/*
+	 * afu_deactivate_mode needs to be done outside the lock, prevent
+	 * other contexts coming in before we are ready:
+	 */
+	old_mode = afu->current_mode;
+	afu->current_mode = 0;
+	afu->num_procs = 0;
+
+	mutex_unlock(&afu->contexts_lock);
+
+	if ((rc = cxl_ops->afu_deactivate_mode(afu, old_mode)))
+		return rc;
+	if ((rc = cxl_ops->afu_activate_mode(afu, mode)))
+		return rc;
+
+	return count;
+err:
+	mutex_unlock(&afu->contexts_lock);
+	return rc;
+}
+
+static ssize_t api_version_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION);
+}
+
+static ssize_t api_version_compatible_show(struct device *device,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE);
+}
+
+static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr, char *buf,
+			       loff_t off, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(kobj_to_dev(kobj));
+
+	return cxl_ops->afu_read_err_buffer(afu, buf, off, count);
+}
+
+static struct device_attribute afu_attrs[] = {
+	__ATTR_RO(mmio_size),
+	__ATTR_RO(irqs_min),
+	__ATTR_RW(irqs_max),
+	__ATTR_RO(modes_supported),
+	__ATTR_RW(mode),
+	__ATTR_RW(prefault_mode),
+	__ATTR_RO(api_version),
+	__ATTR_RO(api_version_compatible),
+	__ATTR(reset, S_IWUSR, NULL, reset_store_afu),
+};
+
+int cxl_sysfs_adapter_add(struct cxl *adapter)
+{
+	struct device_attribute *dev_attr;
+	int i, rc;
+
+	for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) {
+		dev_attr = &adapter_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_ADAPTER_ATTRS)) {
+			if ((rc = device_create_file(&adapter->dev, dev_attr)))
+				goto err;
+		}
+	}
+	return 0;
+err:
+	for (i--; i >= 0; i--) {
+		dev_attr = &adapter_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_ADAPTER_ATTRS))
+			device_remove_file(&adapter->dev, dev_attr);
+	}
+	return rc;
+}
+
+void cxl_sysfs_adapter_remove(struct cxl *adapter)
+{
+	struct device_attribute *dev_attr;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) {
+		dev_attr = &adapter_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_ADAPTER_ATTRS))
+			device_remove_file(&adapter->dev, dev_attr);
+	}
+}
+
+struct afu_config_record {
+	struct kobject kobj;
+	struct bin_attribute config_attr;
+	struct list_head list;
+	int cr;
+	u16 device;
+	u16 vendor;
+	u32 class;
+};
+
+#define to_cr(obj) container_of(obj, struct afu_config_record, kobj)
+
+static ssize_t vendor_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct afu_config_record *cr = to_cr(kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->vendor);
+}
+
+static ssize_t device_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct afu_config_record *cr = to_cr(kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->device);
+}
+
+static ssize_t class_show(struct kobject *kobj,
+			  struct kobj_attribute *attr, char *buf)
+{
+	struct afu_config_record *cr = to_cr(kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%.6x\n", cr->class);
+}
+
+static ssize_t afu_read_config(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr, char *buf,
+			       loff_t off, size_t count)
+{
+	struct afu_config_record *cr = to_cr(kobj);
+	struct cxl_afu *afu = to_cxl_afu(kobj_to_dev(kobj->parent));
+
+	u64 i, j, val, rc;
+
+	for (i = 0; i < count;) {
+		rc = cxl_ops->afu_cr_read64(afu, cr->cr, off & ~0x7, &val);
+		if (rc)
+			val = ~0ULL;
+		for (j = off & 0x7; j < 8 && i < count; i++, j++, off++)
+			buf[i] = (val >> (j * 8)) & 0xff;
+	}
+
+	return count;
+}
+
+static struct kobj_attribute vendor_attribute =
+	__ATTR_RO(vendor);
+static struct kobj_attribute device_attribute =
+	__ATTR_RO(device);
+static struct kobj_attribute class_attribute =
+	__ATTR_RO(class);
+
+static struct attribute *afu_cr_attrs[] = {
+	&vendor_attribute.attr,
+	&device_attribute.attr,
+	&class_attribute.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(afu_cr);
+
+static void release_afu_config_record(struct kobject *kobj)
+{
+	struct afu_config_record *cr = to_cr(kobj);
+
+	kfree(cr);
+}
+
+static struct kobj_type afu_config_record_type = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.release = release_afu_config_record,
+	.default_groups = afu_cr_groups,
+};
+
+static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int cr_idx)
+{
+	struct afu_config_record *cr;
+	int rc;
+
+	cr = kzalloc(sizeof(struct afu_config_record), GFP_KERNEL);
+	if (!cr)
+		return ERR_PTR(-ENOMEM);
+
+	cr->cr = cr_idx;
+
+	rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID, &cr->device);
+	if (rc)
+		goto err;
+	rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID, &cr->vendor);
+	if (rc)
+		goto err;
+	rc = cxl_ops->afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION, &cr->class);
+	if (rc)
+		goto err;
+	cr->class >>= 8;
+
+	/*
+	 * Export raw AFU PCIe like config record. For now this is read only by
+	 * root - we can expand that later to be readable by non-root and maybe
+	 * even writable provided we have a good use-case. Once we support
+	 * exposing AFUs through a virtual PHB they will get that for free from
+	 * Linux' PCI infrastructure, but until then it's not clear that we
+	 * need it for anything since the main use case is just identifying
+	 * AFUs, which can be done via the vendor, device and class attributes.
+	 */
+	sysfs_bin_attr_init(&cr->config_attr);
+	cr->config_attr.attr.name = "config";
+	cr->config_attr.attr.mode = S_IRUSR;
+	cr->config_attr.size = afu->crs_len;
+	cr->config_attr.read = afu_read_config;
+
+	rc = kobject_init_and_add(&cr->kobj, &afu_config_record_type,
+				  &afu->dev.kobj, "cr%i", cr->cr);
+	if (rc)
+		goto err1;
+
+	rc = sysfs_create_bin_file(&cr->kobj, &cr->config_attr);
+	if (rc)
+		goto err1;
+
+	rc = kobject_uevent(&cr->kobj, KOBJ_ADD);
+	if (rc)
+		goto err2;
+
+	return cr;
+err2:
+	sysfs_remove_bin_file(&cr->kobj, &cr->config_attr);
+err1:
+	kobject_put(&cr->kobj);
+	return ERR_PTR(rc);
+err:
+	kfree(cr);
+	return ERR_PTR(rc);
+}
+
+void cxl_sysfs_afu_remove(struct cxl_afu *afu)
+{
+	struct device_attribute *dev_attr;
+	struct afu_config_record *cr, *tmp;
+	int i;
+
+	/* remove the err buffer bin attribute */
+	if (afu->eb_len)
+		device_remove_bin_file(&afu->dev, &afu->attr_eb);
+
+	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+		dev_attr = &afu_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_ATTRS))
+			device_remove_file(&afu->dev, &afu_attrs[i]);
+	}
+
+	list_for_each_entry_safe(cr, tmp, &afu->crs, list) {
+		sysfs_remove_bin_file(&cr->kobj, &cr->config_attr);
+		kobject_put(&cr->kobj);
+	}
+}
+
+int cxl_sysfs_afu_add(struct cxl_afu *afu)
+{
+	struct device_attribute *dev_attr;
+	struct afu_config_record *cr;
+	int i, rc;
+
+	INIT_LIST_HEAD(&afu->crs);
+
+	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+		dev_attr = &afu_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_ATTRS)) {
+			if ((rc = device_create_file(&afu->dev, &afu_attrs[i])))
+				goto err;
+		}
+	}
+
+	/* conditionally create the add the binary file for error info buffer */
+	if (afu->eb_len) {
+		sysfs_attr_init(&afu->attr_eb.attr);
+
+		afu->attr_eb.attr.name = "afu_err_buff";
+		afu->attr_eb.attr.mode = S_IRUGO;
+		afu->attr_eb.size = afu->eb_len;
+		afu->attr_eb.read = afu_eb_read;
+
+		rc = device_create_bin_file(&afu->dev, &afu->attr_eb);
+		if (rc) {
+			dev_err(&afu->dev,
+				"Unable to create eb attr for the afu. Err(%d)\n",
+				rc);
+			goto err;
+		}
+	}
+
+	for (i = 0; i < afu->crs_num; i++) {
+		cr = cxl_sysfs_afu_new_cr(afu, i);
+		if (IS_ERR(cr)) {
+			rc = PTR_ERR(cr);
+			goto err1;
+		}
+		list_add(&cr->list, &afu->crs);
+	}
+
+	return 0;
+
+err1:
+	cxl_sysfs_afu_remove(afu);
+	return rc;
+err:
+	/* reset the eb_len as we havent created the bin attr */
+	afu->eb_len = 0;
+
+	for (i--; i >= 0; i--) {
+		dev_attr = &afu_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_ATTRS))
+		device_remove_file(&afu->dev, &afu_attrs[i]);
+	}
+	return rc;
+}
+
+int cxl_sysfs_afu_m_add(struct cxl_afu *afu)
+{
+	struct device_attribute *dev_attr;
+	int i, rc;
+
+	for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) {
+		dev_attr = &afu_master_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_MASTER_ATTRS)) {
+			if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i])))
+				goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	for (i--; i >= 0; i--) {
+		dev_attr = &afu_master_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_MASTER_ATTRS))
+			device_remove_file(afu->chardev_m, &afu_master_attrs[i]);
+	}
+	return rc;
+}
+
+void cxl_sysfs_afu_m_remove(struct cxl_afu *afu)
+{
+	struct device_attribute *dev_attr;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) {
+		dev_attr = &afu_master_attrs[i];
+		if (cxl_ops->support_attributes(dev_attr->attr.name,
+						CXL_AFU_MASTER_ATTRS))
+			device_remove_file(afu->chardev_m, &afu_master_attrs[i]);
+	}
+}
diff --git a/drivers/misc/cxl/trace.c b/drivers/misc/cxl/trace.c
new file mode 100644
index 0000000000..86f654b99e
--- /dev/null
+++ b/drivers/misc/cxl/trace.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2015 IBM Corp.
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#endif
diff --git a/drivers/misc/cxl/trace.h b/drivers/misc/cxl/trace.h
new file mode 100644
index 0000000000..c474157c68
--- /dev/null
+++ b/drivers/misc/cxl/trace.h
@@ -0,0 +1,691 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2015 IBM Corp.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cxl
+
+#if !defined(_CXL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _CXL_TRACE_H
+
+#include <linux/tracepoint.h>
+
+#include "cxl.h"
+
+#define dsisr_psl9_flags(flags) \
+	__print_flags(flags, "|", \
+		{ CXL_PSL9_DSISR_An_CO_MASK,	"FR" }, \
+		{ CXL_PSL9_DSISR_An_TF,		"TF" }, \
+		{ CXL_PSL9_DSISR_An_PE,		"PE" }, \
+		{ CXL_PSL9_DSISR_An_AE,		"AE" }, \
+		{ CXL_PSL9_DSISR_An_OC,		"OC" }, \
+		{ CXL_PSL9_DSISR_An_S,		"S" })
+
+#define DSISR_FLAGS \
+	{ CXL_PSL_DSISR_An_DS,	"DS" }, \
+	{ CXL_PSL_DSISR_An_DM,	"DM" }, \
+	{ CXL_PSL_DSISR_An_ST,	"ST" }, \
+	{ CXL_PSL_DSISR_An_UR,	"UR" }, \
+	{ CXL_PSL_DSISR_An_PE,	"PE" }, \
+	{ CXL_PSL_DSISR_An_AE,	"AE" }, \
+	{ CXL_PSL_DSISR_An_OC,	"OC" }, \
+	{ CXL_PSL_DSISR_An_M,	"M" }, \
+	{ CXL_PSL_DSISR_An_P,	"P" }, \
+	{ CXL_PSL_DSISR_An_A,	"A" }, \
+	{ CXL_PSL_DSISR_An_S,	"S" }, \
+	{ CXL_PSL_DSISR_An_K,	"K" }
+
+#define TFC_FLAGS \
+	{ CXL_PSL_TFC_An_A,	"A" }, \
+	{ CXL_PSL_TFC_An_C,	"C" }, \
+	{ CXL_PSL_TFC_An_AE,	"AE" }, \
+	{ CXL_PSL_TFC_An_R,	"R" }
+
+#define LLCMD_NAMES \
+	{ CXL_SPA_SW_CMD_TERMINATE,	"TERMINATE" }, \
+	{ CXL_SPA_SW_CMD_REMOVE,	"REMOVE" }, \
+	{ CXL_SPA_SW_CMD_SUSPEND,	"SUSPEND" }, \
+	{ CXL_SPA_SW_CMD_RESUME,	"RESUME" }, \
+	{ CXL_SPA_SW_CMD_ADD,		"ADD" }, \
+	{ CXL_SPA_SW_CMD_UPDATE,	"UPDATE" }
+
+#define AFU_COMMANDS \
+	{ 0,			"DISABLE" }, \
+	{ CXL_AFU_Cntl_An_E,	"ENABLE" }, \
+	{ CXL_AFU_Cntl_An_RA,	"RESET" }
+
+#define PSL_COMMANDS \
+	{ CXL_PSL_SCNTL_An_Pc,	"PURGE" }, \
+	{ CXL_PSL_SCNTL_An_Sc,	"SUSPEND" }
+
+
+DECLARE_EVENT_CLASS(cxl_pe_class,
+	TP_PROTO(struct cxl_context *ctx),
+
+	TP_ARGS(ctx),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+	),
+
+	TP_printk("afu%i.%i pe=%i",
+		__entry->card,
+		__entry->afu,
+		__entry->pe
+	)
+);
+
+
+TRACE_EVENT(cxl_attach,
+	TP_PROTO(struct cxl_context *ctx, u64 wed, s16 num_interrupts, u64 amr),
+
+	TP_ARGS(ctx, wed, num_interrupts, amr),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(pid_t, pid)
+		__field(u64, wed)
+		__field(u64, amr)
+		__field(s16, num_interrupts)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->pid = pid_nr(ctx->pid);
+		__entry->wed = wed;
+		__entry->amr = amr;
+		__entry->num_interrupts = num_interrupts;
+	),
+
+	TP_printk("afu%i.%i pid=%i pe=%i wed=0x%016llx irqs=%i amr=0x%llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pid,
+		__entry->pe,
+		__entry->wed,
+		__entry->num_interrupts,
+		__entry->amr
+	)
+);
+
+DEFINE_EVENT(cxl_pe_class, cxl_detach,
+	TP_PROTO(struct cxl_context *ctx),
+	TP_ARGS(ctx)
+);
+
+TRACE_EVENT(cxl_afu_irq,
+	TP_PROTO(struct cxl_context *ctx, int afu_irq, int virq, irq_hw_number_t hwirq),
+
+	TP_ARGS(ctx, afu_irq, virq, hwirq),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u16, afu_irq)
+		__field(int, virq)
+		__field(irq_hw_number_t, hwirq)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->afu_irq = afu_irq;
+		__entry->virq = virq;
+		__entry->hwirq = hwirq;
+	),
+
+	TP_printk("afu%i.%i pe=%i afu_irq=%i virq=%i hwirq=0x%lx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__entry->afu_irq,
+		__entry->virq,
+		__entry->hwirq
+	)
+);
+
+TRACE_EVENT(cxl_psl9_irq,
+	TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar),
+
+	TP_ARGS(ctx, irq, dsisr, dar),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(int, irq)
+		__field(u64, dsisr)
+		__field(u64, dar)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->irq = irq;
+		__entry->dsisr = dsisr;
+		__entry->dar = dar;
+	),
+
+	TP_printk("afu%i.%i pe=%i irq=%i dsisr=0x%016llx dsisr=%s dar=0x%016llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__entry->irq,
+		__entry->dsisr,
+		dsisr_psl9_flags(__entry->dsisr),
+		__entry->dar
+	)
+);
+
+TRACE_EVENT(cxl_psl_irq,
+	TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar),
+
+	TP_ARGS(ctx, irq, dsisr, dar),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(int, irq)
+		__field(u64, dsisr)
+		__field(u64, dar)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->irq = irq;
+		__entry->dsisr = dsisr;
+		__entry->dar = dar;
+	),
+
+	TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%016llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__entry->irq,
+		__print_flags(__entry->dsisr, "|", DSISR_FLAGS),
+		__entry->dar
+	)
+);
+
+TRACE_EVENT(cxl_psl_irq_ack,
+	TP_PROTO(struct cxl_context *ctx, u64 tfc),
+
+	TP_ARGS(ctx, tfc),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u64, tfc)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->tfc = tfc;
+	),
+
+	TP_printk("afu%i.%i pe=%i tfc=%s",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__print_flags(__entry->tfc, "|", TFC_FLAGS)
+	)
+);
+
+TRACE_EVENT(cxl_ste_miss,
+	TP_PROTO(struct cxl_context *ctx, u64 dar),
+
+	TP_ARGS(ctx, dar),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u64, dar)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->dar = dar;
+	),
+
+	TP_printk("afu%i.%i pe=%i dar=0x%016llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__entry->dar
+	)
+);
+
+TRACE_EVENT(cxl_ste_write,
+	TP_PROTO(struct cxl_context *ctx, unsigned int idx, u64 e, u64 v),
+
+	TP_ARGS(ctx, idx, e, v),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(unsigned int, idx)
+		__field(u64, e)
+		__field(u64, v)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->idx = idx;
+		__entry->e = e;
+		__entry->v = v;
+	),
+
+	TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%016llx V=0x%016llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__entry->idx,
+		__entry->e,
+		__entry->v
+	)
+);
+
+TRACE_EVENT(cxl_pte_miss,
+	TP_PROTO(struct cxl_context *ctx, u64 dsisr, u64 dar),
+
+	TP_ARGS(ctx, dsisr, dar),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u64, dsisr)
+		__field(u64, dar)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->dsisr = dsisr;
+		__entry->dar = dar;
+	),
+
+	TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%016llx",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__print_flags(__entry->dsisr, "|", DSISR_FLAGS),
+		__entry->dar
+	)
+);
+
+TRACE_EVENT(cxl_llcmd,
+	TP_PROTO(struct cxl_context *ctx, u64 cmd),
+
+	TP_ARGS(ctx, cmd),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u64, cmd)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->cmd = cmd;
+	),
+
+	TP_printk("afu%i.%i pe=%i cmd=%s",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__print_symbolic_u64(__entry->cmd, LLCMD_NAMES)
+	)
+);
+
+TRACE_EVENT(cxl_llcmd_done,
+	TP_PROTO(struct cxl_context *ctx, u64 cmd, int rc),
+
+	TP_ARGS(ctx, cmd, rc),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u16, pe)
+		__field(u64, cmd)
+		__field(int, rc)
+	),
+
+	TP_fast_assign(
+		__entry->card = ctx->afu->adapter->adapter_num;
+		__entry->afu = ctx->afu->slice;
+		__entry->pe = ctx->pe;
+		__entry->rc = rc;
+		__entry->cmd = cmd;
+	),
+
+	TP_printk("afu%i.%i pe=%i cmd=%s rc=%i",
+		__entry->card,
+		__entry->afu,
+		__entry->pe,
+		__print_symbolic_u64(__entry->cmd, LLCMD_NAMES),
+		__entry->rc
+	)
+);
+
+DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd),
+
+	TP_ARGS(afu, cmd),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u64, cmd)
+	),
+
+	TP_fast_assign(
+		__entry->card = afu->adapter->adapter_num;
+		__entry->afu = afu->slice;
+		__entry->cmd = cmd;
+	),
+
+	TP_printk("afu%i.%i cmd=%s",
+		__entry->card,
+		__entry->afu,
+		__print_symbolic_u64(__entry->cmd, AFU_COMMANDS)
+	)
+);
+
+DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl_done,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc),
+
+	TP_ARGS(afu, cmd, rc),
+
+	TP_STRUCT__entry(
+		__field(u8, card)
+		__field(u8, afu)
+		__field(u64, cmd)
+		__field(int, rc)
+	),
+
+	TP_fast_assign(
+		__entry->card = afu->adapter->adapter_num;
+		__entry->afu = afu->slice;
+		__entry->rc = rc;
+		__entry->cmd = cmd;
+	),
+
+	TP_printk("afu%i.%i cmd=%s rc=%i",
+		__entry->card,
+		__entry->afu,
+		__print_symbolic_u64(__entry->cmd, AFU_COMMANDS),
+		__entry->rc
+	)
+);
+
+DEFINE_EVENT(cxl_afu_psl_ctrl, cxl_afu_ctrl,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd),
+	TP_ARGS(afu, cmd)
+);
+
+DEFINE_EVENT(cxl_afu_psl_ctrl_done, cxl_afu_ctrl_done,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc),
+	TP_ARGS(afu, cmd, rc)
+);
+
+DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl, cxl_psl_ctrl,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd),
+	TP_ARGS(afu, cmd),
+
+	TP_printk("psl%i.%i cmd=%s",
+		__entry->card,
+		__entry->afu,
+		__print_symbolic_u64(__entry->cmd, PSL_COMMANDS)
+	)
+);
+
+DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl_done, cxl_psl_ctrl_done,
+	TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc),
+	TP_ARGS(afu, cmd, rc),
+
+	TP_printk("psl%i.%i cmd=%s rc=%i",
+		__entry->card,
+		__entry->afu,
+		__print_symbolic_u64(__entry->cmd, PSL_COMMANDS),
+		__entry->rc
+	)
+);
+
+DEFINE_EVENT(cxl_pe_class, cxl_slbia,
+	TP_PROTO(struct cxl_context *ctx),
+	TP_ARGS(ctx)
+);
+
+TRACE_EVENT(cxl_hcall,
+	TP_PROTO(u64 unit_address, u64 process_token, long rc),
+
+	TP_ARGS(unit_address, process_token, rc),
+
+	TP_STRUCT__entry(
+		__field(u64, unit_address)
+		__field(u64, process_token)
+		__field(long, rc)
+	),
+
+	TP_fast_assign(
+		__entry->unit_address = unit_address;
+		__entry->process_token = process_token;
+		__entry->rc = rc;
+	),
+
+	TP_printk("unit_address=0x%016llx process_token=0x%016llx rc=%li",
+		__entry->unit_address,
+		__entry->process_token,
+		__entry->rc
+	)
+);
+
+TRACE_EVENT(cxl_hcall_control,
+	TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3,
+	u64 p4, unsigned long r4, long rc),
+
+	TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc),
+
+	TP_STRUCT__entry(
+		__field(u64, unit_address)
+		__field(char *, fct)
+		__field(u64, p1)
+		__field(u64, p2)
+		__field(u64, p3)
+		__field(u64, p4)
+		__field(unsigned long, r4)
+		__field(long, rc)
+	),
+
+	TP_fast_assign(
+		__entry->unit_address = unit_address;
+		__entry->fct = fct;
+		__entry->p1 = p1;
+		__entry->p2 = p2;
+		__entry->p3 = p3;
+		__entry->p4 = p4;
+		__entry->r4 = r4;
+		__entry->rc = rc;
+	),
+
+	TP_printk("unit_address=%#.16llx %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li",
+		__entry->unit_address,
+		__entry->fct,
+		__entry->p1,
+		__entry->p2,
+		__entry->p3,
+		__entry->p4,
+		__entry->r4,
+		__entry->rc
+	)
+);
+
+TRACE_EVENT(cxl_hcall_attach,
+	TP_PROTO(u64 unit_address, u64 phys_addr, unsigned long process_token,
+		unsigned long mmio_addr, unsigned long mmio_size, long rc),
+
+	TP_ARGS(unit_address, phys_addr, process_token,
+		mmio_addr, mmio_size, rc),
+
+	TP_STRUCT__entry(
+		__field(u64, unit_address)
+		__field(u64, phys_addr)
+		__field(unsigned long, process_token)
+		__field(unsigned long, mmio_addr)
+		__field(unsigned long, mmio_size)
+		__field(long, rc)
+	),
+
+	TP_fast_assign(
+		__entry->unit_address = unit_address;
+		__entry->phys_addr = phys_addr;
+		__entry->process_token = process_token;
+		__entry->mmio_addr = mmio_addr;
+		__entry->mmio_size = mmio_size;
+		__entry->rc = rc;
+	),
+
+	TP_printk("unit_address=0x%016llx phys_addr=0x%016llx "
+		"token=0x%.8lx mmio_addr=0x%lx mmio_size=0x%lx rc=%li",
+		__entry->unit_address,
+		__entry->phys_addr,
+		__entry->process_token,
+		__entry->mmio_addr,
+		__entry->mmio_size,
+		__entry->rc
+	)
+);
+
+DEFINE_EVENT(cxl_hcall, cxl_hcall_detach,
+	TP_PROTO(u64 unit_address, u64 process_token, long rc),
+	TP_ARGS(unit_address, process_token, rc)
+);
+
+DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_function,
+	TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3,
+	u64 p4, unsigned long r4, long rc),
+	TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc)
+);
+
+DEFINE_EVENT(cxl_hcall, cxl_hcall_collect_int_info,
+	TP_PROTO(u64 unit_address, u64 process_token, long rc),
+	TP_ARGS(unit_address, process_token, rc)
+);
+
+TRACE_EVENT(cxl_hcall_control_faults,
+	TP_PROTO(u64 unit_address, u64 process_token,
+		u64 control_mask, u64 reset_mask, unsigned long r4,
+		long rc),
+
+	TP_ARGS(unit_address, process_token,
+		control_mask, reset_mask, r4, rc),
+
+	TP_STRUCT__entry(
+		__field(u64, unit_address)
+		__field(u64, process_token)
+		__field(u64, control_mask)
+		__field(u64, reset_mask)
+		__field(unsigned long, r4)
+		__field(long, rc)
+	),
+
+	TP_fast_assign(
+		__entry->unit_address = unit_address;
+		__entry->process_token = process_token;
+		__entry->control_mask = control_mask;
+		__entry->reset_mask = reset_mask;
+		__entry->r4 = r4;
+		__entry->rc = rc;
+	),
+
+	TP_printk("unit_address=0x%016llx process_token=0x%llx "
+		"control_mask=%#llx reset_mask=%#llx r4=%#lx rc=%li",
+		__entry->unit_address,
+		__entry->process_token,
+		__entry->control_mask,
+		__entry->reset_mask,
+		__entry->r4,
+		__entry->rc
+	)
+);
+
+DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_facility,
+	TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3,
+	u64 p4, unsigned long r4, long rc),
+	TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc)
+);
+
+TRACE_EVENT(cxl_hcall_download_facility,
+	TP_PROTO(u64 unit_address, char *fct, u64 list_address, u64 num,
+	unsigned long r4, long rc),
+
+	TP_ARGS(unit_address, fct, list_address, num, r4, rc),
+
+	TP_STRUCT__entry(
+		__field(u64, unit_address)
+		__field(char *, fct)
+		__field(u64, list_address)
+		__field(u64, num)
+		__field(unsigned long, r4)
+		__field(long, rc)
+	),
+
+	TP_fast_assign(
+		__entry->unit_address = unit_address;
+		__entry->fct = fct;
+		__entry->list_address = list_address;
+		__entry->num = num;
+		__entry->r4 = r4;
+		__entry->rc = rc;
+	),
+
+	TP_printk("%#.16llx, %s(%#llx, %#llx), %#lx): %li",
+		__entry->unit_address,
+		__entry->fct,
+		__entry->list_address,
+		__entry->num,
+		__entry->r4,
+		__entry->rc
+	)
+);
+
+#endif /* _CXL_TRACE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
new file mode 100644
index 0000000000..6332db8044
--- /dev/null
+++ b/drivers/misc/cxl/vphb.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <linux/pci.h>
+#include <misc/cxl.h>
+#include "cxl.h"
+
+static int cxl_pci_probe_mode(struct pci_bus *bus)
+{
+	return PCI_PROBE_NORMAL;
+}
+
+static int cxl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	return -ENODEV;
+}
+
+static void cxl_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	/*
+	 * MSI should never be set but need still need to provide this call
+	 * back.
+	 */
+}
+
+static bool cxl_pci_enable_device_hook(struct pci_dev *dev)
+{
+	struct pci_controller *phb;
+	struct cxl_afu *afu;
+	struct cxl_context *ctx;
+
+	phb = pci_bus_to_host(dev->bus);
+	afu = (struct cxl_afu *)phb->private_data;
+
+	if (!cxl_ops->link_ok(afu->adapter, afu)) {
+		dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__);
+		return false;
+	}
+
+	dev->dev.archdata.dma_offset = PAGE_OFFSET;
+
+	/*
+	 * Allocate a context to do cxl things too.  If we eventually do real
+	 * DMA ops, we'll need a default context to attach them to
+	 */
+	ctx = cxl_dev_context_init(dev);
+	if (IS_ERR(ctx))
+		return false;
+	dev->dev.archdata.cxl_ctx = ctx;
+
+	return (cxl_ops->afu_check_and_enable(afu) == 0);
+}
+
+static void cxl_pci_disable_device(struct pci_dev *dev)
+{
+	struct cxl_context *ctx = cxl_get_context(dev);
+
+	if (ctx) {
+		if (ctx->status == STARTED) {
+			dev_err(&dev->dev, "Default context started\n");
+			return;
+		}
+		dev->dev.archdata.cxl_ctx = NULL;
+		cxl_release_context(ctx);
+	}
+}
+
+static void cxl_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+	/* Should we do an AFU reset here ? */
+}
+
+static int cxl_pcie_cfg_record(u8 bus, u8 devfn)
+{
+	return (bus << 8) + devfn;
+}
+
+static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus)
+{
+	struct pci_controller *phb = bus ? pci_bus_to_host(bus) : NULL;
+
+	return phb ? phb->private_data : NULL;
+}
+
+static void cxl_afu_configured_put(struct cxl_afu *afu)
+{
+	atomic_dec_if_positive(&afu->configured_state);
+}
+
+static bool cxl_afu_configured_get(struct cxl_afu *afu)
+{
+	return atomic_inc_unless_negative(&afu->configured_state);
+}
+
+static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
+				       struct cxl_afu *afu, int *_record)
+{
+	int record;
+
+	record = cxl_pcie_cfg_record(bus->number, devfn);
+	if (record > afu->crs_num)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	*_record = record;
+	return 0;
+}
+
+static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
+				int offset, int len, u32 *val)
+{
+	int rc, record;
+	struct cxl_afu *afu;
+	u8 val8;
+	u16 val16;
+	u32 val32;
+
+	afu = pci_bus_to_afu(bus);
+	/* Grab a reader lock on afu. */
+	if (afu == NULL || !cxl_afu_configured_get(afu))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+	if (rc)
+		goto out;
+
+	switch (len) {
+	case 1:
+		rc = cxl_ops->afu_cr_read8(afu, record, offset,	&val8);
+		*val = val8;
+		break;
+	case 2:
+		rc = cxl_ops->afu_cr_read16(afu, record, offset, &val16);
+		*val = val16;
+		break;
+	case 4:
+		rc = cxl_ops->afu_cr_read32(afu, record, offset, &val32);
+		*val = val32;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+out:
+	cxl_afu_configured_put(afu);
+	return rc ? PCIBIOS_DEVICE_NOT_FOUND : 0;
+}
+
+static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+				 int offset, int len, u32 val)
+{
+	int rc, record;
+	struct cxl_afu *afu;
+
+	afu = pci_bus_to_afu(bus);
+	/* Grab a reader lock on afu. */
+	if (afu == NULL || !cxl_afu_configured_get(afu))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
+	if (rc)
+		goto out;
+
+	switch (len) {
+	case 1:
+		rc = cxl_ops->afu_cr_write8(afu, record, offset, val & 0xff);
+		break;
+	case 2:
+		rc = cxl_ops->afu_cr_write16(afu, record, offset, val & 0xffff);
+		break;
+	case 4:
+		rc = cxl_ops->afu_cr_write32(afu, record, offset, val);
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+out:
+	cxl_afu_configured_put(afu);
+	return rc ? PCIBIOS_SET_FAILED : 0;
+}
+
+static struct pci_ops cxl_pcie_pci_ops =
+{
+	.read = cxl_pcie_read_config,
+	.write = cxl_pcie_write_config,
+};
+
+
+static struct pci_controller_ops cxl_pci_controller_ops =
+{
+	.probe_mode = cxl_pci_probe_mode,
+	.enable_device_hook = cxl_pci_enable_device_hook,
+	.disable_device = cxl_pci_disable_device,
+	.release_device = cxl_pci_disable_device,
+	.reset_secondary_bus = cxl_pci_reset_secondary_bus,
+	.setup_msi_irqs = cxl_setup_msi_irqs,
+	.teardown_msi_irqs = cxl_teardown_msi_irqs,
+};
+
+int cxl_pci_vphb_add(struct cxl_afu *afu)
+{
+	struct pci_controller *phb;
+	struct device_node *vphb_dn;
+	struct device *parent;
+
+	/*
+	 * If there are no AFU configuration records we won't have anything to
+	 * expose under the vPHB, so skip creating one, returning success since
+	 * this is still a valid case. This will also opt us out of EEH
+	 * handling since we won't have anything special to do if there are no
+	 * kernel drivers attached to the vPHB, and EEH handling is not yet
+	 * supported in the peer model.
+	 */
+	if (!afu->crs_num)
+		return 0;
+
+	/* The parent device is the adapter. Reuse the device node of
+	 * the adapter.
+	 * We don't seem to care what device node is used for the vPHB,
+	 * but tools such as lsvpd walk up the device parents looking
+	 * for a valid location code, so we might as well show devices
+	 * attached to the adapter as being located on that adapter.
+	 */
+	parent = afu->adapter->dev.parent;
+	vphb_dn = parent->of_node;
+
+	/* Alloc and setup PHB data structure */
+	phb = pcibios_alloc_controller(vphb_dn);
+	if (!phb)
+		return -ENODEV;
+
+	/* Setup parent in sysfs */
+	phb->parent = parent;
+
+	/* Setup the PHB using arch provided callback */
+	phb->ops = &cxl_pcie_pci_ops;
+	phb->cfg_addr = NULL;
+	phb->cfg_data = NULL;
+	phb->private_data = afu;
+	phb->controller_ops = cxl_pci_controller_ops;
+
+	/* Scan the bus */
+	pcibios_scan_phb(phb);
+	if (phb->bus == NULL)
+		return -ENXIO;
+
+	/* Set release hook on root bus */
+	pci_set_host_bridge_release(to_pci_host_bridge(phb->bus->bridge),
+				    pcibios_free_controller_deferred,
+				    (void *) phb);
+
+	/* Claim resources. This might need some rework as well depending
+	 * whether we are doing probe-only or not, like assigning unassigned
+	 * resources etc...
+	 */
+	pcibios_claim_one_bus(phb->bus);
+
+	/* Add probed PCI devices to the device model */
+	pci_bus_add_devices(phb->bus);
+
+	afu->phb = phb;
+
+	return 0;
+}
+
+void cxl_pci_vphb_remove(struct cxl_afu *afu)
+{
+	struct pci_controller *phb;
+
+	/* If there is no configuration record we won't have one of these */
+	if (!afu || !afu->phb)
+		return;
+
+	phb = afu->phb;
+	afu->phb = NULL;
+
+	pci_remove_root_bus(phb->bus);
+	/*
+	 * We don't free phb here - that's handled by
+	 * pcibios_free_controller_deferred()
+	 */
+}
+
+bool cxl_pci_is_vphb_device(struct pci_dev *dev)
+{
+	struct pci_controller *phb;
+
+	phb = pci_bus_to_host(dev->bus);
+
+	return (phb->ops == &cxl_pcie_pci_ops);
+}
+
+struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev)
+{
+	struct pci_controller *phb;
+
+	phb = pci_bus_to_host(dev->bus);
+
+	return (struct cxl_afu *)phb->private_data;
+}
+EXPORT_SYMBOL_GPL(cxl_pci_to_afu);
+
+unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev)
+{
+	return cxl_pcie_cfg_record(dev->bus->number, dev->devfn);
+}
+EXPORT_SYMBOL_GPL(cxl_pci_to_cfg_record);
diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c
new file mode 100644
index 0000000000..21fc5bc85c
--- /dev/null
+++ b/drivers/misc/ds1682.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Dallas Semiconductor DS1682 Elapsed Time Recorder device driver
+ *
+ * Written by: Grant Likely <grant.likely@secretlab.ca>
+ *
+ * Copyright (C) 2007 Secret Lab Technologies Ltd.
+ */
+
+/*
+ * The DS1682 elapsed timer recorder is a simple device that implements
+ * one elapsed time counter, one event counter, an alarm signal and 10
+ * bytes of general purpose EEPROM.
+ *
+ * This driver provides access to the DS1682 counters and user data via
+ * the sysfs.  The following attributes are added to the device node:
+ *     elapsed_time (u32): Total elapsed event time in ms resolution
+ *     alarm_time (u32): When elapsed time exceeds the value in alarm_time,
+ *                       then the alarm pin is asserted.
+ *     event_count (u16): number of times the event pin has gone low.
+ *     eeprom (u8[10]): general purpose EEPROM
+ *
+ * Counter registers and user data are both read/write unless the device
+ * has been write protected.  This driver does not support turning off write
+ * protection.  Once write protection is turned on, it is impossible to
+ * turn it off again, so I have left the feature out of this driver to avoid
+ * accidental enabling, but it is trivial to add write protect support.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/hwmon-sysfs.h>
+
+/* Device registers */
+#define DS1682_REG_CONFIG		0x00
+#define DS1682_REG_ALARM		0x01
+#define DS1682_REG_ELAPSED		0x05
+#define DS1682_REG_EVT_CNTR		0x09
+#define DS1682_REG_EEPROM		0x0b
+#define DS1682_REG_RESET		0x1d
+#define DS1682_REG_WRITE_DISABLE	0x1e
+#define DS1682_REG_WRITE_MEM_DISABLE	0x1f
+
+#define DS1682_EEPROM_SIZE		10
+
+/*
+ * Generic counter attributes
+ */
+static ssize_t ds1682_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long long val, check;
+	__le32 val_le = 0;
+	int rc;
+
+	dev_dbg(dev, "ds1682_show() called on %s\n", attr->attr.name);
+
+	/* Read the register */
+	rc = i2c_smbus_read_i2c_block_data(client, sattr->index, sattr->nr,
+					   (u8 *)&val_le);
+	if (rc < 0)
+		return -EIO;
+
+	val = le32_to_cpu(val_le);
+
+	if (sattr->index == DS1682_REG_ELAPSED) {
+		int retries = 5;
+
+		/* Detect and retry when a tick occurs mid-read */
+		do {
+			rc = i2c_smbus_read_i2c_block_data(client, sattr->index,
+							   sattr->nr,
+							   (u8 *)&val_le);
+			if (rc < 0 || retries <= 0)
+				return -EIO;
+
+			check = val;
+			val = le32_to_cpu(val_le);
+			retries--;
+		} while (val != check && val != (check + 1));
+	}
+
+	/* Format the output string and return # of bytes
+	 * Special case: the 32 bit regs are time values with 1/4s
+	 * resolution, scale them up to milliseconds
+	 */
+	return sprintf(buf, "%llu\n", (sattr->nr == 4) ? (val * 250) : val);
+}
+
+static ssize_t ds1682_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
+	struct i2c_client *client = to_i2c_client(dev);
+	u64 val;
+	__le32 val_le;
+	int rc;
+
+	dev_dbg(dev, "ds1682_store() called on %s\n", attr->attr.name);
+
+	/* Decode input */
+	rc = kstrtoull(buf, 0, &val);
+	if (rc < 0) {
+		dev_dbg(dev, "input string not a number\n");
+		return -EINVAL;
+	}
+
+	/* Special case: the 32 bit regs are time values with 1/4s
+	 * resolution, scale input down to quarter-seconds */
+	if (sattr->nr == 4)
+		do_div(val, 250);
+
+	/* write out the value */
+	val_le = cpu_to_le32(val);
+	rc = i2c_smbus_write_i2c_block_data(client, sattr->index, sattr->nr,
+					    (u8 *) & val_le);
+	if (rc < 0) {
+		dev_err(dev, "register write failed; reg=0x%x, size=%i\n",
+			sattr->index, sattr->nr);
+		return -EIO;
+	}
+
+	return count;
+}
+
+/*
+ * Simple register attributes
+ */
+static SENSOR_DEVICE_ATTR_2(elapsed_time, S_IRUGO | S_IWUSR, ds1682_show,
+			    ds1682_store, 4, DS1682_REG_ELAPSED);
+static SENSOR_DEVICE_ATTR_2(alarm_time, S_IRUGO | S_IWUSR, ds1682_show,
+			    ds1682_store, 4, DS1682_REG_ALARM);
+static SENSOR_DEVICE_ATTR_2(event_count, S_IRUGO | S_IWUSR, ds1682_show,
+			    ds1682_store, 2, DS1682_REG_EVT_CNTR);
+
+static const struct attribute_group ds1682_group = {
+	.attrs = (struct attribute *[]) {
+		&sensor_dev_attr_elapsed_time.dev_attr.attr,
+		&sensor_dev_attr_alarm_time.dev_attr.attr,
+		&sensor_dev_attr_event_count.dev_attr.attr,
+		NULL,
+	},
+};
+
+/*
+ * User data attribute
+ */
+static ssize_t ds1682_eeprom_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr,
+				  char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+	int rc;
+
+	dev_dbg(&client->dev, "ds1682_eeprom_read(p=%p, off=%lli, c=%zi)\n",
+		buf, off, count);
+
+	rc = i2c_smbus_read_i2c_block_data(client, DS1682_REG_EEPROM + off,
+					   count, buf);
+	if (rc < 0)
+		return -EIO;
+
+	return count;
+}
+
+static ssize_t ds1682_eeprom_write(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *attr,
+				   char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+
+	dev_dbg(&client->dev, "ds1682_eeprom_write(p=%p, off=%lli, c=%zi)\n",
+		buf, off, count);
+
+	/* Write out to the device */
+	if (i2c_smbus_write_i2c_block_data(client, DS1682_REG_EEPROM + off,
+					   count, buf) < 0)
+		return -EIO;
+
+	return count;
+}
+
+static const struct bin_attribute ds1682_eeprom_attr = {
+	.attr = {
+		.name = "eeprom",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = DS1682_EEPROM_SIZE,
+	.read = ds1682_eeprom_read,
+	.write = ds1682_eeprom_write,
+};
+
+/*
+ * Called when a ds1682 device is matched with this driver
+ */
+static int ds1682_probe(struct i2c_client *client)
+{
+	int rc;
+
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_I2C_BLOCK)) {
+		dev_err(&client->dev, "i2c bus does not support the ds1682\n");
+		rc = -ENODEV;
+		goto exit;
+	}
+
+	rc = sysfs_create_group(&client->dev.kobj, &ds1682_group);
+	if (rc)
+		goto exit;
+
+	rc = sysfs_create_bin_file(&client->dev.kobj, &ds1682_eeprom_attr);
+	if (rc)
+		goto exit_bin_attr;
+
+	return 0;
+
+ exit_bin_attr:
+	sysfs_remove_group(&client->dev.kobj, &ds1682_group);
+ exit:
+	return rc;
+}
+
+static void ds1682_remove(struct i2c_client *client)
+{
+	sysfs_remove_bin_file(&client->dev.kobj, &ds1682_eeprom_attr);
+	sysfs_remove_group(&client->dev.kobj, &ds1682_group);
+}
+
+static const struct i2c_device_id ds1682_id[] = {
+	{ "ds1682", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ds1682_id);
+
+static const struct of_device_id ds1682_of_match[] = {
+	{ .compatible = "dallas,ds1682", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, ds1682_of_match);
+
+static struct i2c_driver ds1682_driver = {
+	.driver = {
+		.name = "ds1682",
+		.of_match_table = ds1682_of_match,
+	},
+	.probe = ds1682_probe,
+	.remove = ds1682_remove,
+	.id_table = ds1682_id,
+};
+
+module_i2c_driver(ds1682_driver);
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("DS1682 Elapsed Time Indicator driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/dummy-irq.c b/drivers/misc/dummy-irq.c
new file mode 100644
index 0000000000..fe3bfcb31a
--- /dev/null
+++ b/drivers/misc/dummy-irq.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Dummy IRQ handler driver.
+ *
+ * This module only registers itself as a handler that is specified to it
+ * by the 'irq' parameter.
+ *
+ * The sole purpose of this module is to help with debugging of systems on
+ * which spurious IRQs would happen on disabled IRQ vector.
+ *
+ * Copyright (C) 2013 Jiri Kosina
+ */
+
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+static int irq = -1;
+
+static irqreturn_t dummy_interrupt(int irq, void *dev_id)
+{
+	static int count = 0;
+
+	if (count == 0) {
+		printk(KERN_INFO "dummy-irq: interrupt occurred on IRQ %d\n",
+				irq);
+		count++;
+	}
+
+	return IRQ_NONE;
+}
+
+static int __init dummy_irq_init(void)
+{
+	if (irq < 0) {
+		printk(KERN_ERR "dummy-irq: no IRQ given.  Use irq=N\n");
+		return -EIO;
+	}
+	if (request_irq(irq, &dummy_interrupt, IRQF_SHARED, "dummy_irq", &irq)) {
+		printk(KERN_ERR "dummy-irq: cannot register IRQ %d\n", irq);
+		return -EIO;
+	}
+	printk(KERN_INFO "dummy-irq: registered for IRQ %d\n", irq);
+	return 0;
+}
+
+static void __exit dummy_irq_exit(void)
+{
+	printk(KERN_INFO "dummy-irq unloaded\n");
+	free_irq(irq, &irq);
+}
+
+module_init(dummy_irq_init);
+module_exit(dummy_irq_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jiri Kosina");
+module_param_hw(irq, uint, irq, 0444);
+MODULE_PARM_DESC(irq, "The IRQ to register for");
+MODULE_DESCRIPTION("Dummy IRQ handler driver");
diff --git a/drivers/misc/dw-xdata-pcie.c b/drivers/misc/dw-xdata-pcie.c
new file mode 100644
index 0000000000..257c25da51
--- /dev/null
+++ b/drivers/misc/dw-xdata-pcie.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare xData driver
+ *
+ * Author: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+ */
+
+#include <linux/miscdevice.h>
+#include <linux/bitfield.h>
+#include <linux/pci-epf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitops.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+
+#define DW_XDATA_DRIVER_NAME		"dw-xdata-pcie"
+
+#define DW_XDATA_EP_MEM_OFFSET		0x8000000
+
+static DEFINE_IDA(xdata_ida);
+
+#define STATUS_DONE			BIT(0)
+
+#define CONTROL_DOORBELL		BIT(0)
+#define CONTROL_IS_WRITE		BIT(1)
+#define CONTROL_LENGTH(a)		FIELD_PREP(GENMASK(13, 2), a)
+#define CONTROL_PATTERN_INC		BIT(16)
+#define CONTROL_NO_ADDR_INC		BIT(18)
+
+#define XPERF_CONTROL_ENABLE		BIT(5)
+
+#define BURST_REPEAT			BIT(31)
+#define BURST_VALUE			0x1001
+
+#define PATTERN_VALUE			0x0
+
+struct dw_xdata_regs {
+	u32 addr_lsb;					/* 0x000 */
+	u32 addr_msb;					/* 0x004 */
+	u32 burst_cnt;					/* 0x008 */
+	u32 control;					/* 0x00c */
+	u32 pattern;					/* 0x010 */
+	u32 status;					/* 0x014 */
+	u32 RAM_addr;					/* 0x018 */
+	u32 RAM_port;					/* 0x01c */
+	u32 _reserved0[14];				/* 0x020..0x054 */
+	u32 perf_control;				/* 0x058 */
+	u32 _reserved1[41];				/* 0x05c..0x0fc */
+	u32 wr_cnt_lsb;					/* 0x100 */
+	u32 wr_cnt_msb;					/* 0x104 */
+	u32 rd_cnt_lsb;					/* 0x108 */
+	u32 rd_cnt_msb;					/* 0x10c */
+} __packed;
+
+struct dw_xdata_region {
+	phys_addr_t paddr;				/* physical address */
+	void __iomem *vaddr;				/* virtual address */
+};
+
+struct dw_xdata {
+	struct dw_xdata_region rg_region;		/* registers */
+	size_t max_wr_len;				/* max wr xfer len */
+	size_t max_rd_len;				/* max rd xfer len */
+	struct mutex mutex;
+	struct pci_dev *pdev;
+	struct miscdevice misc_dev;
+};
+
+static inline struct dw_xdata_regs __iomem *__dw_regs(struct dw_xdata *dw)
+{
+	return dw->rg_region.vaddr;
+}
+
+static void dw_xdata_stop(struct dw_xdata *dw)
+{
+	u32 burst;
+
+	mutex_lock(&dw->mutex);
+
+	burst = readl(&(__dw_regs(dw)->burst_cnt));
+
+	if (burst & BURST_REPEAT) {
+		burst &= ~(u32)BURST_REPEAT;
+		writel(burst, &(__dw_regs(dw)->burst_cnt));
+	}
+
+	mutex_unlock(&dw->mutex);
+}
+
+static void dw_xdata_start(struct dw_xdata *dw, bool write)
+{
+	struct device *dev = &dw->pdev->dev;
+	u32 control, status;
+
+	/* Stop first if xfer in progress */
+	dw_xdata_stop(dw);
+
+	mutex_lock(&dw->mutex);
+
+	/* Clear status register */
+	writel(0x0, &(__dw_regs(dw)->status));
+
+	/* Burst count register set for continuous until stopped */
+	writel(BURST_REPEAT | BURST_VALUE, &(__dw_regs(dw)->burst_cnt));
+
+	/* Pattern register */
+	writel(PATTERN_VALUE, &(__dw_regs(dw)->pattern));
+
+	/* Control register */
+	control = CONTROL_DOORBELL | CONTROL_PATTERN_INC | CONTROL_NO_ADDR_INC;
+	if (write) {
+		control |= CONTROL_IS_WRITE;
+		control |= CONTROL_LENGTH(dw->max_wr_len);
+	} else {
+		control |= CONTROL_LENGTH(dw->max_rd_len);
+	}
+	writel(control, &(__dw_regs(dw)->control));
+
+	/*
+	 * The xData HW block needs about 100 ms to initiate the traffic
+	 * generation according this HW block datasheet.
+	 */
+	usleep_range(100, 150);
+
+	status = readl(&(__dw_regs(dw)->status));
+
+	mutex_unlock(&dw->mutex);
+
+	if (!(status & STATUS_DONE))
+		dev_dbg(dev, "xData: started %s direction\n",
+			write ? "write" : "read");
+}
+
+static void dw_xdata_perf_meas(struct dw_xdata *dw, u64 *data, bool write)
+{
+	if (write) {
+		*data = readl(&(__dw_regs(dw)->wr_cnt_msb));
+		*data <<= 32;
+		*data |= readl(&(__dw_regs(dw)->wr_cnt_lsb));
+	} else {
+		*data = readl(&(__dw_regs(dw)->rd_cnt_msb));
+		*data <<= 32;
+		*data |= readl(&(__dw_regs(dw)->rd_cnt_lsb));
+	}
+}
+
+static u64 dw_xdata_perf_diff(u64 *m1, u64 *m2, u64 time)
+{
+	u64 rate = (*m1 - *m2);
+
+	rate *= (1000 * 1000 * 1000);
+	rate >>= 20;
+	rate = DIV_ROUND_CLOSEST_ULL(rate, time);
+
+	return rate;
+}
+
+static void dw_xdata_perf(struct dw_xdata *dw, u64 *rate, bool write)
+{
+	struct device *dev = &dw->pdev->dev;
+	u64 data[2], time[2], diff;
+
+	mutex_lock(&dw->mutex);
+
+	/* First acquisition of current count frames */
+	writel(0x0, &(__dw_regs(dw)->perf_control));
+	dw_xdata_perf_meas(dw, &data[0], write);
+	time[0] = jiffies;
+	writel((u32)XPERF_CONTROL_ENABLE, &(__dw_regs(dw)->perf_control));
+
+	/*
+	 * Wait 100ms between the 1st count frame acquisition and the 2nd
+	 * count frame acquisition, in order to calculate the speed later
+	 */
+	mdelay(100);
+
+	/* Second acquisition of current count frames */
+	writel(0x0, &(__dw_regs(dw)->perf_control));
+	dw_xdata_perf_meas(dw, &data[1], write);
+	time[1] = jiffies;
+	writel((u32)XPERF_CONTROL_ENABLE, &(__dw_regs(dw)->perf_control));
+
+	/*
+	 * Speed calculation
+	 *
+	 * rate = (2nd count frames - 1st count frames) / (time elapsed)
+	 */
+	diff = jiffies_to_nsecs(time[1] - time[0]);
+	*rate = dw_xdata_perf_diff(&data[1], &data[0], diff);
+
+	mutex_unlock(&dw->mutex);
+
+	dev_dbg(dev, "xData: time=%llu us, %s=%llu MB/s\n",
+		diff, write ? "write" : "read", *rate);
+}
+
+static struct dw_xdata *misc_dev_to_dw(struct miscdevice *misc_dev)
+{
+	return container_of(misc_dev, struct dw_xdata, misc_dev);
+}
+
+static ssize_t write_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct miscdevice *misc_dev = dev_get_drvdata(dev);
+	struct dw_xdata *dw = misc_dev_to_dw(misc_dev);
+	u64 rate;
+
+	dw_xdata_perf(dw, &rate, true);
+
+	return sysfs_emit(buf, "%llu\n", rate);
+}
+
+static ssize_t write_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	struct miscdevice *misc_dev = dev_get_drvdata(dev);
+	struct dw_xdata *dw = misc_dev_to_dw(misc_dev);
+	bool enabled;
+	int ret;
+
+	ret = kstrtobool(buf, &enabled);
+	if (ret < 0)
+		return ret;
+
+	if (enabled) {
+		dev_dbg(dev, "xData: requested write transfer\n");
+		dw_xdata_start(dw, true);
+	} else {
+		dev_dbg(dev, "xData: requested stop transfer\n");
+		dw_xdata_stop(dw);
+	}
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(write);
+
+static ssize_t read_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct miscdevice *misc_dev = dev_get_drvdata(dev);
+	struct dw_xdata *dw = misc_dev_to_dw(misc_dev);
+	u64 rate;
+
+	dw_xdata_perf(dw, &rate, false);
+
+	return sysfs_emit(buf, "%llu\n", rate);
+}
+
+static ssize_t read_store(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	struct miscdevice *misc_dev = dev_get_drvdata(dev);
+	struct dw_xdata *dw = misc_dev_to_dw(misc_dev);
+	bool enabled;
+	int ret;
+
+	ret = kstrtobool(buf, &enabled);
+	if (ret < 0)
+		return ret;
+
+	if (enabled) {
+		dev_dbg(dev, "xData: requested read transfer\n");
+		dw_xdata_start(dw, false);
+	} else {
+		dev_dbg(dev, "xData: requested stop transfer\n");
+		dw_xdata_stop(dw);
+	}
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(read);
+
+static struct attribute *xdata_attrs[] = {
+	&dev_attr_write.attr,
+	&dev_attr_read.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(xdata);
+
+static int dw_xdata_pcie_probe(struct pci_dev *pdev,
+			       const struct pci_device_id *pid)
+{
+	struct device *dev = &pdev->dev;
+	struct dw_xdata *dw;
+	char name[24];
+	u64 addr;
+	int err;
+	int id;
+
+	/* Enable PCI device */
+	err = pcim_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "enabling device failed\n");
+		return err;
+	}
+
+	/* Mapping PCI BAR regions */
+	err = pcim_iomap_regions(pdev, BIT(BAR_0), pci_name(pdev));
+	if (err) {
+		dev_err(dev, "xData BAR I/O remapping failed\n");
+		return err;
+	}
+
+	pci_set_master(pdev);
+
+	/* Allocate memory */
+	dw = devm_kzalloc(dev, sizeof(*dw), GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	/* Data structure initialization */
+	mutex_init(&dw->mutex);
+
+	dw->rg_region.vaddr = pcim_iomap_table(pdev)[BAR_0];
+	if (!dw->rg_region.vaddr)
+		return -ENOMEM;
+
+	dw->rg_region.paddr = pdev->resource[BAR_0].start;
+
+	dw->max_wr_len = pcie_get_mps(pdev);
+	dw->max_wr_len >>= 2;
+
+	dw->max_rd_len = pcie_get_readrq(pdev);
+	dw->max_rd_len >>= 2;
+
+	dw->pdev = pdev;
+
+	id = ida_simple_get(&xdata_ida, 0, 0, GFP_KERNEL);
+	if (id < 0) {
+		dev_err(dev, "xData: unable to get id\n");
+		return id;
+	}
+
+	snprintf(name, sizeof(name), DW_XDATA_DRIVER_NAME ".%d", id);
+	dw->misc_dev.name = kstrdup(name, GFP_KERNEL);
+	if (!dw->misc_dev.name) {
+		err = -ENOMEM;
+		goto err_ida_remove;
+	}
+
+	dw->misc_dev.minor = MISC_DYNAMIC_MINOR;
+	dw->misc_dev.parent = dev;
+	dw->misc_dev.groups = xdata_groups;
+
+	writel(0x0, &(__dw_regs(dw)->RAM_addr));
+	writel(0x0, &(__dw_regs(dw)->RAM_port));
+
+	addr = dw->rg_region.paddr + DW_XDATA_EP_MEM_OFFSET;
+	writel(lower_32_bits(addr), &(__dw_regs(dw)->addr_lsb));
+	writel(upper_32_bits(addr), &(__dw_regs(dw)->addr_msb));
+	dev_dbg(dev, "xData: target address = 0x%.16llx\n", addr);
+
+	dev_dbg(dev, "xData: wr_len = %zu, rd_len = %zu\n",
+		dw->max_wr_len * 4, dw->max_rd_len * 4);
+
+	/* Saving data structure reference */
+	pci_set_drvdata(pdev, dw);
+
+	/* Register misc device */
+	err = misc_register(&dw->misc_dev);
+	if (err) {
+		dev_err(dev, "xData: failed to register device\n");
+		goto err_kfree_name;
+	}
+
+	return 0;
+
+err_kfree_name:
+	kfree(dw->misc_dev.name);
+
+err_ida_remove:
+	ida_simple_remove(&xdata_ida, id);
+
+	return err;
+}
+
+static void dw_xdata_pcie_remove(struct pci_dev *pdev)
+{
+	struct dw_xdata *dw = pci_get_drvdata(pdev);
+	int id;
+
+	if (sscanf(dw->misc_dev.name, DW_XDATA_DRIVER_NAME ".%d", &id) != 1)
+		return;
+
+	if (id < 0)
+		return;
+
+	dw_xdata_stop(dw);
+	misc_deregister(&dw->misc_dev);
+	kfree(dw->misc_dev.name);
+	ida_simple_remove(&xdata_ida, id);
+}
+
+static const struct pci_device_id dw_xdata_pcie_id_table[] = {
+	{ PCI_DEVICE_DATA(SYNOPSYS, EDDA, NULL) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, dw_xdata_pcie_id_table);
+
+static struct pci_driver dw_xdata_pcie_driver = {
+	.name		= DW_XDATA_DRIVER_NAME,
+	.id_table	= dw_xdata_pcie_id_table,
+	.probe		= dw_xdata_pcie_probe,
+	.remove		= dw_xdata_pcie_remove,
+};
+
+module_pci_driver(dw_xdata_pcie_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare xData PCIe driver");
+MODULE_AUTHOR("Gustavo Pimentel <gustavo.pimentel@synopsys.com>");
+
diff --git a/drivers/misc/echo/Kconfig b/drivers/misc/echo/Kconfig
new file mode 100644
index 0000000000..ce0a37a47f
--- /dev/null
+++ b/drivers/misc/echo/Kconfig
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config ECHO
+	tristate "Line Echo Canceller support"
+	help
+	  This driver provides line echo cancelling support for mISDN and
+	  Zaptel drivers.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called echo.
diff --git a/drivers/misc/echo/Makefile b/drivers/misc/echo/Makefile
new file mode 100644
index 0000000000..5b97467ffb
--- /dev/null
+++ b/drivers/misc/echo/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_ECHO) += echo.o
diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c
new file mode 100644
index 0000000000..3c4eaba865
--- /dev/null
+++ b/drivers/misc/echo/echo.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * echo.c - A line echo canceller.  This code is being developed
+ *          against and partially complies with G168.
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *         and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe
+ *
+ * Based on a bit from here, a bit from there, eye of toad, ear of
+ * bat, 15 years of failed attempts by David and a few fried brain
+ * cells.
+ *
+ * All rights reserved.
+ */
+
+/*! \file */
+
+/* Implementation Notes
+   David Rowe
+   April 2007
+
+   This code started life as Steve's NLMS algorithm with a tap
+   rotation algorithm to handle divergence during double talk.  I
+   added a Geigel Double Talk Detector (DTD) [2] and performed some
+   G168 tests.  However I had trouble meeting the G168 requirements,
+   especially for double talk - there were always cases where my DTD
+   failed, for example where near end speech was under the 6dB
+   threshold required for declaring double talk.
+
+   So I tried a two path algorithm [1], which has so far given better
+   results.  The original tap rotation/Geigel algorithm is available
+   in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit.
+   It's probably possible to make it work if some one wants to put some
+   serious work into it.
+
+   At present no special treatment is provided for tones, which
+   generally cause NLMS algorithms to diverge.  Initial runs of a
+   subset of the G168 tests for tones (e.g ./echo_test 6) show the
+   current algorithm is passing OK, which is kind of surprising.  The
+   full set of tests needs to be performed to confirm this result.
+
+   One other interesting change is that I have managed to get the NLMS
+   code to work with 16 bit coefficients, rather than the original 32
+   bit coefficents.  This reduces the MIPs and storage required.
+   I evaulated the 16 bit port using g168_tests.sh and listening tests
+   on 4 real-world samples.
+
+   I also attempted the implementation of a block based NLMS update
+   [2] but although this passes g168_tests.sh it didn't converge well
+   on the real-world samples.  I have no idea why, perhaps a scaling
+   problem.  The block based code is also available in SVN
+   http://svn.rowetel.com/software/oslec/tags/before_16bit.  If this
+   code can be debugged, it will lead to further reduction in MIPS, as
+   the block update code maps nicely onto DSP instruction sets (it's a
+   dot product) compared to the current sample-by-sample update.
+
+   Steve also has some nice notes on echo cancellers in echo.h
+
+   References:
+
+   [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
+       Path Models", IEEE Transactions on communications, COM-25,
+       No. 6, June
+       1977.
+       https://www.rowetel.com/images/echo/dual_path_paper.pdf
+
+   [2] The classic, very useful paper that tells you how to
+       actually build a real world echo canceller:
+	 Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice
+	 Echo Canceller with a TMS320020,
+	 https://www.rowetel.com/images/echo/spra129.pdf
+
+   [3] I have written a series of blog posts on this work, here is
+       Part 1: http://www.rowetel.com/blog/?p=18
+
+   [4] The source code http://svn.rowetel.com/software/oslec/
+
+   [5] A nice reference on LMS filters:
+	 https://en.wikipedia.org/wiki/Least_mean_squares_filter
+
+   Credits:
+
+   Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan
+   Muthukrishnan for their suggestions and email discussions.  Thanks
+   also to those people who collected echo samples for me such as
+   Mark, Pawel, and Pavel.
+*/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "echo.h"
+
+#define MIN_TX_POWER_FOR_ADAPTION	64
+#define MIN_RX_POWER_FOR_ADAPTION	64
+#define DTD_HANGOVER			600	/* 600 samples, or 75ms     */
+#define DC_LOG2BETA			3	/* log2() of DC filter Beta */
+
+/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
+
+static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
+{
+	int i;
+
+	int offset1;
+	int offset2;
+	int factor;
+	int exp;
+
+	if (shift > 0)
+		factor = clean << shift;
+	else
+		factor = clean >> -shift;
+
+	/* Update the FIR taps */
+
+	offset2 = ec->curr_pos;
+	offset1 = ec->taps - offset2;
+
+	for (i = ec->taps - 1; i >= offset1; i--) {
+		exp = (ec->fir_state_bg.history[i - offset1] * factor);
+		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+	}
+	for (; i >= 0; i--) {
+		exp = (ec->fir_state_bg.history[i + offset2] * factor);
+		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+	}
+}
+
+static inline int top_bit(unsigned int bits)
+{
+	if (bits == 0)
+		return -1;
+	else
+		return (int)fls((int32_t) bits) - 1;
+}
+
+struct oslec_state *oslec_create(int len, int adaption_mode)
+{
+	struct oslec_state *ec;
+	int i;
+	const int16_t *history;
+
+	ec = kzalloc(sizeof(*ec), GFP_KERNEL);
+	if (!ec)
+		return NULL;
+
+	ec->taps = len;
+	ec->log2taps = top_bit(len);
+	ec->curr_pos = ec->taps - 1;
+
+	ec->fir_taps16[0] =
+	    kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+	if (!ec->fir_taps16[0])
+		goto error_oom_0;
+
+	ec->fir_taps16[1] =
+	    kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+	if (!ec->fir_taps16[1])
+		goto error_oom_1;
+
+	history = fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
+	if (!history)
+		goto error_state;
+	history = fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps);
+	if (!history)
+		goto error_state_bg;
+
+	for (i = 0; i < 5; i++)
+		ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
+
+	ec->cng_level = 1000;
+	oslec_adaption_mode(ec, adaption_mode);
+
+	ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+	if (!ec->snapshot)
+		goto error_snap;
+
+	ec->cond_met = 0;
+	ec->pstates = 0;
+	ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0;
+	ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0;
+	ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
+	ec->lbgn = ec->lbgn_acc = 0;
+	ec->lbgn_upper = 200;
+	ec->lbgn_upper_acc = ec->lbgn_upper << 13;
+
+	return ec;
+
+error_snap:
+	fir16_free(&ec->fir_state_bg);
+error_state_bg:
+	fir16_free(&ec->fir_state);
+error_state:
+	kfree(ec->fir_taps16[1]);
+error_oom_1:
+	kfree(ec->fir_taps16[0]);
+error_oom_0:
+	kfree(ec);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(oslec_create);
+
+void oslec_free(struct oslec_state *ec)
+{
+	int i;
+
+	fir16_free(&ec->fir_state);
+	fir16_free(&ec->fir_state_bg);
+	for (i = 0; i < 2; i++)
+		kfree(ec->fir_taps16[i]);
+	kfree(ec->snapshot);
+	kfree(ec);
+}
+EXPORT_SYMBOL_GPL(oslec_free);
+
+void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode)
+{
+	ec->adaption_mode = adaption_mode;
+}
+EXPORT_SYMBOL_GPL(oslec_adaption_mode);
+
+void oslec_flush(struct oslec_state *ec)
+{
+	int i;
+
+	ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0;
+	ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0;
+	ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
+
+	ec->lbgn = ec->lbgn_acc = 0;
+	ec->lbgn_upper = 200;
+	ec->lbgn_upper_acc = ec->lbgn_upper << 13;
+
+	ec->nonupdate_dwell = 0;
+
+	fir16_flush(&ec->fir_state);
+	fir16_flush(&ec->fir_state_bg);
+	ec->fir_state.curr_pos = ec->taps - 1;
+	ec->fir_state_bg.curr_pos = ec->taps - 1;
+	for (i = 0; i < 2; i++)
+		memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
+
+	ec->curr_pos = ec->taps - 1;
+	ec->pstates = 0;
+}
+EXPORT_SYMBOL_GPL(oslec_flush);
+
+void oslec_snapshot(struct oslec_state *ec)
+{
+	memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t));
+}
+EXPORT_SYMBOL_GPL(oslec_snapshot);
+
+/* Dual Path Echo Canceller */
+
+int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
+{
+	int32_t echo_value;
+	int clean_bg;
+	int tmp;
+	int tmp1;
+
+	/*
+	 * Input scaling was found be required to prevent problems when tx
+	 * starts clipping.  Another possible way to handle this would be the
+	 * filter coefficent scaling.
+	 */
+
+	ec->tx = tx;
+	ec->rx = rx;
+	tx >>= 1;
+	rx >>= 1;
+
+	/*
+	 * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision
+	 * required otherwise values do not track down to 0. Zero at DC, Pole
+	 * at (1-Beta) on real axis.  Some chip sets (like Si labs) don't
+	 * need this, but something like a $10 X100P card does.  Any DC really
+	 * slows down convergence.
+	 *
+	 * Note: removes some low frequency from the signal, this reduces the
+	 * speech quality when listening to samples through headphones but may
+	 * not be obvious through a telephone handset.
+	 *
+	 * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta
+	 * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
+	 */
+
+	if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
+		tmp = rx << 15;
+
+		/*
+		 * Make sure the gain of the HPF is 1.0. This can still
+		 * saturate a little under impulse conditions, and it might
+		 * roll to 32768 and need clipping on sustained peak level
+		 * signals. However, the scale of such clipping is small, and
+		 * the error due to any saturation should not markedly affect
+		 * the downstream processing.
+		 */
+		tmp -= (tmp >> 4);
+
+		ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
+
+		/*
+		 * hard limit filter to prevent clipping.  Note that at this
+		 * stage rx should be limited to +/- 16383 due to right shift
+		 * above
+		 */
+		tmp1 = ec->rx_1 >> 15;
+		if (tmp1 > 16383)
+			tmp1 = 16383;
+		if (tmp1 < -16383)
+			tmp1 = -16383;
+		rx = tmp1;
+		ec->rx_2 = tmp;
+	}
+
+	/* Block average of power in the filter states.  Used for
+	   adaption power calculation. */
+
+	{
+		int new, old;
+
+		/* efficient "out with the old and in with the new" algorithm so
+		   we don't have to recalculate over the whole block of
+		   samples. */
+		new = (int)tx * (int)tx;
+		old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
+		    (int)ec->fir_state.history[ec->fir_state.curr_pos];
+		ec->pstates +=
+		    ((new - old) + (1 << (ec->log2taps - 1))) >> ec->log2taps;
+		if (ec->pstates < 0)
+			ec->pstates = 0;
+	}
+
+	/* Calculate short term average levels using simple single pole IIRs */
+
+	ec->ltxacc += abs(tx) - ec->ltx;
+	ec->ltx = (ec->ltxacc + (1 << 4)) >> 5;
+	ec->lrxacc += abs(rx) - ec->lrx;
+	ec->lrx = (ec->lrxacc + (1 << 4)) >> 5;
+
+	/* Foreground filter */
+
+	ec->fir_state.coeffs = ec->fir_taps16[0];
+	echo_value = fir16(&ec->fir_state, tx);
+	ec->clean = rx - echo_value;
+	ec->lcleanacc += abs(ec->clean) - ec->lclean;
+	ec->lclean = (ec->lcleanacc + (1 << 4)) >> 5;
+
+	/* Background filter */
+
+	echo_value = fir16(&ec->fir_state_bg, tx);
+	clean_bg = rx - echo_value;
+	ec->lclean_bgacc += abs(clean_bg) - ec->lclean_bg;
+	ec->lclean_bg = (ec->lclean_bgacc + (1 << 4)) >> 5;
+
+	/* Background Filter adaption */
+
+	/* Almost always adap bg filter, just simple DT and energy
+	   detection to minimise adaption in cases of strong double talk.
+	   However this is not critical for the dual path algorithm.
+	 */
+	ec->factor = 0;
+	ec->shift = 0;
+	if (!ec->nonupdate_dwell) {
+		int p, logp, shift;
+
+		/* Determine:
+
+		   f = Beta * clean_bg_rx/P ------ (1)
+
+		   where P is the total power in the filter states.
+
+		   The Boffins have shown that if we obey (1) we converge
+		   quickly and avoid instability.
+
+		   The correct factor f must be in Q30, as this is the fixed
+		   point format required by the lms_adapt_bg() function,
+		   therefore the scaled version of (1) is:
+
+		   (2^30) * f  = (2^30) * Beta * clean_bg_rx/P
+		   factor      = (2^30) * Beta * clean_bg_rx/P     ----- (2)
+
+		   We have chosen Beta = 0.25 by experiment, so:
+
+		   factor      = (2^30) * (2^-2) * clean_bg_rx/P
+
+		   (30 - 2 - log2(P))
+		   factor      = clean_bg_rx 2                     ----- (3)
+
+		   To avoid a divide we approximate log2(P) as top_bit(P),
+		   which returns the position of the highest non-zero bit in
+		   P.  This approximation introduces an error as large as a
+		   factor of 2, but the algorithm seems to handle it OK.
+
+		   Come to think of it a divide may not be a big deal on a
+		   modern DSP, so its probably worth checking out the cycles
+		   for a divide versus a top_bit() implementation.
+		 */
+
+		p = MIN_TX_POWER_FOR_ADAPTION + ec->pstates;
+		logp = top_bit(p) + ec->log2taps;
+		shift = 30 - 2 - logp;
+		ec->shift = shift;
+
+		lms_adapt_bg(ec, clean_bg, shift);
+	}
+
+	/* very simple DTD to make sure we dont try and adapt with strong
+	   near end speech */
+
+	ec->adapt = 0;
+	if ((ec->lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->lrx > ec->ltx))
+		ec->nonupdate_dwell = DTD_HANGOVER;
+	if (ec->nonupdate_dwell)
+		ec->nonupdate_dwell--;
+
+	/* Transfer logic */
+
+	/* These conditions are from the dual path paper [1], I messed with
+	   them a bit to improve performance. */
+
+	if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
+	    (ec->nonupdate_dwell == 0) &&
+	    /* (ec->Lclean_bg < 0.875*ec->Lclean) */
+	    (8 * ec->lclean_bg < 7 * ec->lclean) &&
+	    /* (ec->Lclean_bg < 0.125*ec->Ltx) */
+	    (8 * ec->lclean_bg < ec->ltx)) {
+		if (ec->cond_met == 6) {
+			/*
+			 * BG filter has had better results for 6 consecutive
+			 * samples
+			 */
+			ec->adapt = 1;
+			memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
+			       ec->taps * sizeof(int16_t));
+		} else
+			ec->cond_met++;
+	} else
+		ec->cond_met = 0;
+
+	/* Non-Linear Processing */
+
+	ec->clean_nlp = ec->clean;
+	if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
+		/*
+		 * Non-linear processor - a fancy way to say "zap small
+		 * signals, to avoid residual echo due to (uLaw/ALaw)
+		 * non-linearity in the channel.".
+		 */
+
+		if ((16 * ec->lclean < ec->ltx)) {
+			/*
+			 * Our e/c has improved echo by at least 24 dB (each
+			 * factor of 2 is 6dB, so 2*2*2*2=16 is the same as
+			 * 6+6+6+6=24dB)
+			 */
+			if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
+				ec->cng_level = ec->lbgn;
+
+				/*
+				 * Very elementary comfort noise generation.
+				 * Just random numbers rolled off very vaguely
+				 * Hoth-like.  DR: This noise doesn't sound
+				 * quite right to me - I suspect there are some
+				 * overflow issues in the filtering as it's too
+				 * "crackly".
+				 * TODO: debug this, maybe just play noise at
+				 * high level or look at spectrum.
+				 */
+
+				ec->cng_rndnum =
+				    1664525U * ec->cng_rndnum + 1013904223U;
+				ec->cng_filter =
+				    ((ec->cng_rndnum & 0xFFFF) - 32768 +
+				     5 * ec->cng_filter) >> 3;
+				ec->clean_nlp =
+				    (ec->cng_filter * ec->cng_level * 8) >> 14;
+
+			} else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
+				/* This sounds much better than CNG */
+				if (ec->clean_nlp > ec->lbgn)
+					ec->clean_nlp = ec->lbgn;
+				if (ec->clean_nlp < -ec->lbgn)
+					ec->clean_nlp = -ec->lbgn;
+			} else {
+				/*
+				 * just mute the residual, doesn't sound very
+				 * good, used mainly in G168 tests
+				 */
+				ec->clean_nlp = 0;
+			}
+		} else {
+			/*
+			 * Background noise estimator.  I tried a few
+			 * algorithms here without much luck.  This very simple
+			 * one seems to work best, we just average the level
+			 * using a slow (1 sec time const) filter if the
+			 * current level is less than a (experimentally
+			 * derived) constant.  This means we dont include high
+			 * level signals like near end speech.  When combined
+			 * with CNG or especially CLIP seems to work OK.
+			 */
+			if (ec->lclean < 40) {
+				ec->lbgn_acc += abs(ec->clean) - ec->lbgn;
+				ec->lbgn = (ec->lbgn_acc + (1 << 11)) >> 12;
+			}
+		}
+	}
+
+	/* Roll around the taps buffer */
+	if (ec->curr_pos <= 0)
+		ec->curr_pos = ec->taps;
+	ec->curr_pos--;
+
+	if (ec->adaption_mode & ECHO_CAN_DISABLE)
+		ec->clean_nlp = rx;
+
+	/* Output scaled back up again to match input scaling */
+
+	return (int16_t) ec->clean_nlp << 1;
+}
+EXPORT_SYMBOL_GPL(oslec_update);
+
+/* This function is separated from the echo canceller is it is usually called
+   as part of the tx process.  See rx HP (DC blocking) filter above, it's
+   the same design.
+
+   Some soft phones send speech signals with a lot of low frequency
+   energy, e.g. down to 20Hz.  This can make the hybrid non-linear
+   which causes the echo canceller to fall over.  This filter can help
+   by removing any low frequency before it gets to the tx port of the
+   hybrid.
+
+   It can also help by removing and DC in the tx signal.  DC is bad
+   for LMS algorithms.
+
+   This is one of the classic DC removal filters, adjusted to provide
+   sufficient bass rolloff to meet the above requirement to protect hybrids
+   from things that upset them. The difference between successive samples
+   produces a lousy HPF, and then a suitably placed pole flattens things out.
+   The final result is a nicely rolled off bass end. The filtering is
+   implemented with extended fractional precision, which noise shapes things,
+   giving very clean DC removal.
+*/
+
+int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx)
+{
+	int tmp;
+	int tmp1;
+
+	if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
+		tmp = tx << 15;
+
+		/*
+		 * Make sure the gain of the HPF is 1.0. The first can still
+		 * saturate a little under impulse conditions, and it might
+		 * roll to 32768 and need clipping on sustained peak level
+		 * signals. However, the scale of such clipping is small, and
+		 * the error due to any saturation should not markedly affect
+		 * the downstream processing.
+		 */
+		tmp -= (tmp >> 4);
+
+		ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
+		tmp1 = ec->tx_1 >> 15;
+		if (tmp1 > 32767)
+			tmp1 = 32767;
+		if (tmp1 < -32767)
+			tmp1 = -32767;
+		tx = tmp1;
+		ec->tx_2 = tmp;
+	}
+
+	return tx;
+}
+EXPORT_SYMBOL_GPL(oslec_hpf_tx);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David Rowe");
+MODULE_DESCRIPTION("Open Source Line Echo Canceller");
+MODULE_VERSION("0.3.0");
diff --git a/drivers/misc/echo/echo.h b/drivers/misc/echo/echo.h
new file mode 100644
index 0000000000..56b4b95fd0
--- /dev/null
+++ b/drivers/misc/echo/echo.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * echo.c - A line echo canceller.  This code is being developed
+ *          against and partially complies with G168.
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *         and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001 Steve Underwood and 2007 David Rowe
+ *
+ * All rights reserved.
+ */
+
+#ifndef __ECHO_H
+#define __ECHO_H
+
+/*
+Line echo cancellation for voice
+
+What does it do?
+
+This module aims to provide G.168-2002 compliant echo cancellation, to remove
+electrical echoes (e.g. from 2-4 wire hybrids) from voice calls.
+
+How does it work?
+
+The heart of the echo cancellor is FIR filter. This is adapted to match the
+echo impulse response of the telephone line. It must be long enough to
+adequately cover the duration of that impulse response. The signal transmitted
+to the telephone line is passed through the FIR filter. Once the FIR is
+properly adapted, the resulting output is an estimate of the echo signal
+received from the line. This is subtracted from the received signal. The result
+is an estimate of the signal which originated at the far end of the line, free
+from echos of our own transmitted signal.
+
+The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and
+was introduced in 1960. It is the commonest form of filter adaption used in
+things like modem line equalisers and line echo cancellers. There it works very
+well.  However, it only works well for signals of constant amplitude. It works
+very poorly for things like speech echo cancellation, where the signal level
+varies widely.  This is quite easy to fix. If the signal level is normalised -
+similar to applying AGC - LMS can work as well for a signal of varying
+amplitude as it does for a modem signal. This normalised least mean squares
+(NLMS) algorithm is the commonest one used for speech echo cancellation. Many
+other algorithms exist - e.g. RLS (essentially the same as Kalman filtering),
+FAP, etc. Some perform significantly better than NLMS.  However, factors such
+as computational complexity and patents favour the use of NLMS.
+
+A simple refinement to NLMS can improve its performance with speech. NLMS tends
+to adapt best to the strongest parts of a signal. If the signal is white noise,
+the NLMS algorithm works very well. However, speech has more low frequency than
+high frequency content. Pre-whitening (i.e. filtering the signal to flatten its
+spectrum) the echo signal improves the adapt rate for speech, and ensures the
+final residual signal is not heavily biased towards high frequencies. A very
+low complexity filter is adequate for this, so pre-whitening adds little to the
+compute requirements of the echo canceller.
+
+An FIR filter adapted using pre-whitened NLMS performs well, provided certain
+conditions are met:
+
+    - The transmitted signal has poor self-correlation.
+    - There is no signal being generated within the environment being
+      cancelled.
+
+The difficulty is that neither of these can be guaranteed.
+
+If the adaption is performed while transmitting noise (or something fairly
+noise like, such as voice) the adaption works very well. If the adaption is
+performed while transmitting something highly correlative (typically narrow
+band energy such as signalling tones or DTMF), the adaption can go seriously
+wrong. The reason is there is only one solution for the adaption on a near
+random signal - the impulse response of the line. For a repetitive signal,
+there are any number of solutions which converge the adaption, and nothing
+guides the adaption to choose the generalised one. Allowing an untrained
+canceller to converge on this kind of narrowband energy probably a good thing,
+since at least it cancels the tones. Allowing a well converged canceller to
+continue converging on such energy is just a way to ruin its generalised
+adaption. A narrowband detector is needed, so adapation can be suspended at
+appropriate times.
+
+The adaption process is based on trying to eliminate the received signal. When
+there is any signal from within the environment being cancelled it may upset
+the adaption process. Similarly, if the signal we are transmitting is small,
+noise may dominate and disturb the adaption process. If we can ensure that the
+adaption is only performed when we are transmitting a significant signal level,
+and the environment is not, things will be OK. Clearly, it is easy to tell when
+we are sending a significant signal. Telling, if the environment is generating
+a significant signal, and doing it with sufficient speed that the adaption will
+not have diverged too much more we stop it, is a little harder.
+
+The key problem in detecting when the environment is sourcing significant
+energy is that we must do this very quickly. Given a reasonably long sample of
+the received signal, there are a number of strategies which may be used to
+assess whether that signal contains a strong far end component. However, by the
+time that assessment is complete the far end signal will have already caused
+major mis-convergence in the adaption process. An assessment algorithm is
+needed which produces a fairly accurate result from a very short burst of far
+end energy.
+
+How do I use it?
+
+The echo cancellor processes both the transmit and receive streams sample by
+sample. The processing function is not declared inline. Unfortunately,
+cancellation requires many operations per sample, so the call overhead is only
+a minor burden.
+*/
+
+#include "fir.h"
+#include "oslec.h"
+
+/*
+    G.168 echo canceller descriptor. This defines the working state for a line
+    echo canceller.
+*/
+struct oslec_state {
+	int16_t tx;
+	int16_t rx;
+	int16_t clean;
+	int16_t clean_nlp;
+
+	int nonupdate_dwell;
+	int curr_pos;
+	int taps;
+	int log2taps;
+	int adaption_mode;
+
+	int cond_met;
+	int32_t pstates;
+	int16_t adapt;
+	int32_t factor;
+	int16_t shift;
+
+	/* Average levels and averaging filter states */
+	int ltxacc;
+	int lrxacc;
+	int lcleanacc;
+	int lclean_bgacc;
+	int ltx;
+	int lrx;
+	int lclean;
+	int lclean_bg;
+	int lbgn;
+	int lbgn_acc;
+	int lbgn_upper;
+	int lbgn_upper_acc;
+
+	/* foreground and background filter states */
+	struct fir16_state_t fir_state;
+	struct fir16_state_t fir_state_bg;
+	int16_t *fir_taps16[2];
+
+	/* DC blocking filter states */
+	int tx_1;
+	int tx_2;
+	int rx_1;
+	int rx_2;
+
+	/* optional High Pass Filter states */
+	int32_t xvtx[5];
+	int32_t yvtx[5];
+	int32_t xvrx[5];
+	int32_t yvrx[5];
+
+	/* Parameters for the optional Hoth noise generator */
+	int cng_level;
+	int cng_rndnum;
+	int cng_filter;
+
+	/* snapshot sample of coeffs used for development */
+	int16_t *snapshot;
+};
+
+#endif /* __ECHO_H */
diff --git a/drivers/misc/echo/fir.h b/drivers/misc/echo/fir.h
new file mode 100644
index 0000000000..4d08210252
--- /dev/null
+++ b/drivers/misc/echo/fir.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * fir.h - General telephony FIR routines
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *
+ * Copyright (C) 2002 Steve Underwood
+ *
+ * All rights reserved.
+ */
+
+#if !defined(_FIR_H_)
+#define _FIR_H_
+
+/*
+   Ideas for improvement:
+
+   1/ Rewrite filter for dual MAC inner loop.  The issue here is handling
+   history sample offsets that are 16 bit aligned - the dual MAC needs
+   32 bit aligmnent.  There are some good examples in libbfdsp.
+
+   2/ Use the hardware circular buffer facility tohalve memory usage.
+
+   3/ Consider using internal memory.
+
+   Using less memory might also improve speed as cache misses will be
+   reduced. A drop in MIPs and memory approaching 50% should be
+   possible.
+
+   The foreground and background filters currenlty use a total of
+   about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo
+   can.
+*/
+
+/*
+ * 16 bit integer FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using 16 bit integer coefficients.
+ */
+struct fir16_state_t {
+	int taps;
+	int curr_pos;
+	const int16_t *coeffs;
+	int16_t *history;
+};
+
+/*
+ * 32 bit integer FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using 32 bit integer coefficients, and filtering
+ * 16 bit integer data.
+ */
+struct fir32_state_t {
+	int taps;
+	int curr_pos;
+	const int32_t *coeffs;
+	int16_t *history;
+};
+
+/*
+ * Floating point FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using floating point coefficients and data.
+ */
+struct fir_float_state_t {
+	int taps;
+	int curr_pos;
+	const float *coeffs;
+	float *history;
+};
+
+static inline const int16_t *fir16_create(struct fir16_state_t *fir,
+					      const int16_t *coeffs, int taps)
+{
+	fir->taps = taps;
+	fir->curr_pos = taps - 1;
+	fir->coeffs = coeffs;
+	fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
+	return fir->history;
+}
+
+static inline void fir16_flush(struct fir16_state_t *fir)
+{
+	memset(fir->history, 0, fir->taps * sizeof(int16_t));
+}
+
+static inline void fir16_free(struct fir16_state_t *fir)
+{
+	kfree(fir->history);
+}
+
+static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample)
+{
+	int32_t y;
+	int i;
+	int offset1;
+	int offset2;
+
+	fir->history[fir->curr_pos] = sample;
+
+	offset2 = fir->curr_pos;
+	offset1 = fir->taps - offset2;
+	y = 0;
+	for (i = fir->taps - 1; i >= offset1; i--)
+		y += fir->coeffs[i] * fir->history[i - offset1];
+	for (; i >= 0; i--)
+		y += fir->coeffs[i] * fir->history[i + offset2];
+	if (fir->curr_pos <= 0)
+		fir->curr_pos = fir->taps;
+	fir->curr_pos--;
+	return (int16_t) (y >> 15);
+}
+
+static inline const int16_t *fir32_create(struct fir32_state_t *fir,
+					      const int32_t *coeffs, int taps)
+{
+	fir->taps = taps;
+	fir->curr_pos = taps - 1;
+	fir->coeffs = coeffs;
+	fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
+	return fir->history;
+}
+
+static inline void fir32_flush(struct fir32_state_t *fir)
+{
+	memset(fir->history, 0, fir->taps * sizeof(int16_t));
+}
+
+static inline void fir32_free(struct fir32_state_t *fir)
+{
+	kfree(fir->history);
+}
+
+static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample)
+{
+	int i;
+	int32_t y;
+	int offset1;
+	int offset2;
+
+	fir->history[fir->curr_pos] = sample;
+	offset2 = fir->curr_pos;
+	offset1 = fir->taps - offset2;
+	y = 0;
+	for (i = fir->taps - 1; i >= offset1; i--)
+		y += fir->coeffs[i] * fir->history[i - offset1];
+	for (; i >= 0; i--)
+		y += fir->coeffs[i] * fir->history[i + offset2];
+	if (fir->curr_pos <= 0)
+		fir->curr_pos = fir->taps;
+	fir->curr_pos--;
+	return (int16_t) (y >> 15);
+}
+
+#endif
diff --git a/drivers/misc/echo/oslec.h b/drivers/misc/echo/oslec.h
new file mode 100644
index 0000000000..f1adac143b
--- /dev/null
+++ b/drivers/misc/echo/oslec.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  OSLEC - A line echo canceller.  This code is being developed
+ *          against and partially complies with G168. Using code from SpanDSP
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *         and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001 Steve Underwood and 2007-2008 David Rowe
+ *
+ * All rights reserved.
+ */
+
+#ifndef __OSLEC_H
+#define __OSLEC_H
+
+/* Mask bits for the adaption mode */
+#define ECHO_CAN_USE_ADAPTION	0x01
+#define ECHO_CAN_USE_NLP	0x02
+#define ECHO_CAN_USE_CNG	0x04
+#define ECHO_CAN_USE_CLIP	0x08
+#define ECHO_CAN_USE_TX_HPF	0x10
+#define ECHO_CAN_USE_RX_HPF	0x20
+#define ECHO_CAN_DISABLE	0x40
+
+/**
+ * oslec_state: G.168 echo canceller descriptor.
+ *
+ * This defines the working state for a line echo canceller.
+ */
+struct oslec_state;
+
+/**
+ * oslec_create - Create a voice echo canceller context.
+ * @len: The length of the canceller, in samples.
+ * @return: The new canceller context, or NULL if the canceller could not be
+ * created.
+ */
+struct oslec_state *oslec_create(int len, int adaption_mode);
+
+/**
+ * oslec_free - Free a voice echo canceller context.
+ * @ec: The echo canceller context.
+ */
+void oslec_free(struct oslec_state *ec);
+
+/**
+ * oslec_flush - Flush (reinitialise) a voice echo canceller context.
+ * @ec: The echo canceller context.
+ */
+void oslec_flush(struct oslec_state *ec);
+
+/**
+ * oslec_adaption_mode - set the adaption mode of a voice echo canceller context.
+ * @ec The echo canceller context.
+ * @adaption_mode: The mode.
+ */
+void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode);
+
+void oslec_snapshot(struct oslec_state *ec);
+
+/**
+ * oslec_update: Process a sample through a voice echo canceller.
+ * @ec: The echo canceller context.
+ * @tx: The transmitted audio sample.
+ * @rx: The received audio sample.
+ *
+ * The return value is the clean (echo cancelled) received sample.
+ */
+int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx);
+
+/**
+ * oslec_hpf_tx: Process to high pass filter the tx signal.
+ * @ec: The echo canceller context.
+ * @tx: The transmitted auio sample.
+ *
+ * The return value is the HP filtered transmit sample, send this to your D/A.
+ */
+int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx);
+
+#endif /* __OSLEC_H */
diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
new file mode 100644
index 0000000000..2d240bfa81
--- /dev/null
+++ b/drivers/misc/eeprom/Kconfig
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menu "EEPROM support"
+
+config EEPROM_AT24
+	tristate "I2C EEPROMs / RAMs / ROMs from most vendors"
+	depends on I2C && SYSFS
+	select NVMEM
+	select NVMEM_SYSFS
+	select REGMAP
+	select REGMAP_I2C
+	help
+	  Enable this driver to get read/write support to most I2C EEPROMs
+	  and compatible devices like FRAMs, SRAMs, ROMs etc. After you
+	  configure the driver to know about each chip on your target
+	  board.  Use these generic chip names, instead of vendor-specific
+	  ones like at24c64, 24lc02 or fm24c04:
+
+	     24c00, 24c01, 24c02, spd (readonly 24c02), 24c04, 24c08,
+	     24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024, 24c2048
+
+	  Unless you like data loss puzzles, always be sure that any chip
+	  you configure as a 24c32 (32 kbit) or larger is NOT really a
+	  24c16 (16 kbit) or smaller, and vice versa. Marking the chip
+	  as read-only won't help recover from this. Also, if your chip
+	  has any software write-protect mechanism you may want to review the
+	  code to make sure this driver won't turn it on by accident.
+
+	  If you use this with an SMBus adapter instead of an I2C adapter,
+	  full functionality is not available.  Only smaller devices are
+	  supported (24c16 and below, max 4 kByte).
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called at24.
+
+config EEPROM_AT25
+	tristate "SPI EEPROMs (FRAMs) from most vendors"
+	depends on SPI && SYSFS
+	select NVMEM
+	select NVMEM_SYSFS
+	help
+	  Enable this driver to get read/write support to most SPI EEPROMs
+	  and Cypress FRAMs,
+	  after you configure the board init code to know about each eeprom
+	  on your target board.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called at25.
+
+config EEPROM_LEGACY
+	tristate "Old I2C EEPROM reader (DEPRECATED)"
+	depends on I2C && SYSFS
+	help
+	  If you say yes here you get read-only access to the EEPROM data
+	  available on modern memory DIMMs and Sony Vaio laptops via I2C. Such
+	  EEPROMs could theoretically be available on other devices as well.
+
+	  This driver is deprecated and will be removed soon, please use the
+	  better at24 driver instead.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called eeprom.
+
+config EEPROM_MAX6875
+	tristate "Maxim MAX6874/5 power supply supervisor"
+	depends on I2C
+	help
+	  If you say yes here you get read-only support for the user EEPROM of
+	  the Maxim MAX6874/5 EEPROM-programmable, quad power-supply
+	  sequencer/supervisor.
+
+	  All other features of this chip should be accessed via i2c-dev.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called max6875.
+
+
+config EEPROM_93CX6
+	tristate "EEPROM 93CX6 support"
+	help
+	  This is a driver for the EEPROM chipsets 93c46 and 93c66.
+	  The driver supports both read as well as write commands.
+
+	  If unsure, say N.
+
+config EEPROM_93XX46
+	tristate "Microwire EEPROM 93XX46 support"
+	depends on SPI && SYSFS
+	select REGMAP
+	select NVMEM
+	select NVMEM_SYSFS
+	help
+	  Driver for the microwire EEPROM chipsets 93xx46x. The driver
+	  supports both read and write commands and also the command to
+	  erase the whole EEPROM.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called eeprom_93xx46.
+
+	  If unsure, say N.
+
+config EEPROM_DIGSY_MTC_CFG
+	bool "DigsyMTC display configuration EEPROMs device"
+	depends on GPIO_MPC5200 && SPI_GPIO
+	help
+	  This option enables access to display configuration EEPROMs
+	  on digsy_mtc board. You have to additionally select Microwire
+	  EEPROM 93XX46 driver. sysfs entries will be created for that
+	  EEPROM allowing to read/write the configuration data or to
+	  erase the whole EEPROM.
+
+	  If unsure, say N.
+
+config EEPROM_IDT_89HPESX
+	tristate "IDT 89HPESx PCIe-swtiches EEPROM / CSR support"
+	depends on I2C && SYSFS
+	help
+	  Enable this driver to get read/write access to EEPROM / CSRs
+	  over IDT PCIe-swtich i2c-slave interface.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called idt_89hpesx.
+
+config EEPROM_EE1004
+	tristate "SPD EEPROMs on DDR4 memory modules"
+	depends on I2C && SYSFS
+	help
+	  Enable this driver to get read support to SPD EEPROMs following
+	  the JEDEC EE1004 standard. These are typically found on DDR4
+	  SDRAM memory modules.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called ee1004.
+
+endmenu
diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile
new file mode 100644
index 0000000000..a9b4b6579b
--- /dev/null
+++ b/drivers/misc/eeprom/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_EEPROM_AT24)	+= at24.o
+obj-$(CONFIG_EEPROM_AT25)	+= at25.o
+obj-$(CONFIG_EEPROM_LEGACY)	+= eeprom.o
+obj-$(CONFIG_EEPROM_MAX6875)	+= max6875.o
+obj-$(CONFIG_EEPROM_93CX6)	+= eeprom_93cx6.o
+obj-$(CONFIG_EEPROM_93XX46)	+= eeprom_93xx46.o
+obj-$(CONFIG_EEPROM_DIGSY_MTC_CFG) += digsy_mtc_eeprom.o
+obj-$(CONFIG_EEPROM_IDT_89HPESX) += idt_89hpesx.o
+obj-$(CONFIG_EEPROM_EE1004)	+= ee1004.o
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
new file mode 100644
index 0000000000..dbbf7db4ff
--- /dev/null
+++ b/drivers/misc/eeprom/at24.c
@@ -0,0 +1,862 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * at24.c - handle most I2C EEPROMs
+ *
+ * Copyright (C) 2005-2007 David Brownell
+ * Copyright (C) 2008 Wolfram Sang, Pengutronix
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/capability.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/nvmem-provider.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+/* Address pointer is 16 bit. */
+#define AT24_FLAG_ADDR16	BIT(7)
+/* sysfs-entry will be read-only. */
+#define AT24_FLAG_READONLY	BIT(6)
+/* sysfs-entry will be world-readable. */
+#define AT24_FLAG_IRUGO		BIT(5)
+/* Take always 8 addresses (24c00). */
+#define AT24_FLAG_TAKE8ADDR	BIT(4)
+/* Factory-programmed serial number. */
+#define AT24_FLAG_SERIAL	BIT(3)
+/* Factory-programmed mac address. */
+#define AT24_FLAG_MAC		BIT(2)
+/* Does not auto-rollover reads to the next slave address. */
+#define AT24_FLAG_NO_RDROL	BIT(1)
+
+/*
+ * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
+ * Differences between different vendor product lines (like Atmel AT24C or
+ * MicroChip 24LC, etc) won't much matter for typical read/write access.
+ * There are also I2C RAM chips, likewise interchangeable. One example
+ * would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes).
+ *
+ * However, misconfiguration can lose data. "Set 16-bit memory address"
+ * to a part with 8-bit addressing will overwrite data. Writing with too
+ * big a page size also loses data. And it's not safe to assume that the
+ * conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC
+ * uses 0x51, for just one example.
+ *
+ * Accordingly, explicit board-specific configuration data should be used
+ * in almost all cases. (One partial exception is an SMBus used to access
+ * "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.)
+ *
+ * So this driver uses "new style" I2C driver binding, expecting to be
+ * told what devices exist. That may be in arch/X/mach-Y/board-Z.c or
+ * similar kernel-resident tables; or, configuration data coming from
+ * a bootloader.
+ *
+ * Other than binding model, current differences from "eeprom" driver are
+ * that this one handles write access and isn't restricted to 24c02 devices.
+ * It also handles larger devices (32 kbit and up) with two-byte addresses,
+ * which won't work on pure SMBus systems.
+ */
+
+struct at24_data {
+	/*
+	 * Lock protects against activities from other Linux tasks,
+	 * but not from changes by other I2C masters.
+	 */
+	struct mutex lock;
+
+	unsigned int write_max;
+	unsigned int num_addresses;
+	unsigned int offset_adj;
+
+	u32 byte_len;
+	u16 page_size;
+	u8 flags;
+
+	struct nvmem_device *nvmem;
+	struct regulator *vcc_reg;
+	void (*read_post)(unsigned int off, char *buf, size_t count);
+
+	/*
+	 * Some chips tie up multiple I2C addresses; dummy devices reserve
+	 * them for us.
+	 */
+	u8 bank_addr_shift;
+	struct regmap *client_regmaps[];
+};
+
+/*
+ * This parameter is to help this driver avoid blocking other drivers out
+ * of I2C for potentially troublesome amounts of time. With a 100 kHz I2C
+ * clock, one 256 byte read takes about 1/43 second which is excessive;
+ * but the 1/170 second it takes at 400 kHz may be quite reasonable; and
+ * at 1 MHz (Fm+) a 1/430 second delay could easily be invisible.
+ *
+ * This value is forced to be a power of two so that writes align on pages.
+ */
+static unsigned int at24_io_limit = 128;
+module_param_named(io_limit, at24_io_limit, uint, 0);
+MODULE_PARM_DESC(at24_io_limit, "Maximum bytes per I/O (default 128)");
+
+/*
+ * Specs often allow 5 msec for a page write, sometimes 20 msec;
+ * it's important to recover from write timeouts.
+ */
+static unsigned int at24_write_timeout = 25;
+module_param_named(write_timeout, at24_write_timeout, uint, 0);
+MODULE_PARM_DESC(at24_write_timeout, "Time (in ms) to try writes (default 25)");
+
+struct at24_chip_data {
+	u32 byte_len;
+	u8 flags;
+	u8 bank_addr_shift;
+	void (*read_post)(unsigned int off, char *buf, size_t count);
+};
+
+#define AT24_CHIP_DATA(_name, _len, _flags)				\
+	static const struct at24_chip_data _name = {			\
+		.byte_len = _len, .flags = _flags,			\
+	}
+
+#define AT24_CHIP_DATA_CB(_name, _len, _flags, _read_post)		\
+	static const struct at24_chip_data _name = {			\
+		.byte_len = _len, .flags = _flags,			\
+		.read_post = _read_post,				\
+	}
+
+#define AT24_CHIP_DATA_BS(_name, _len, _flags, _bank_addr_shift)	\
+	static const struct at24_chip_data _name = {			\
+		.byte_len = _len, .flags = _flags,			\
+		.bank_addr_shift = _bank_addr_shift			\
+	}
+
+static void at24_read_post_vaio(unsigned int off, char *buf, size_t count)
+{
+	int i;
+
+	if (capable(CAP_SYS_ADMIN))
+		return;
+
+	/*
+	 * Hide VAIO private settings to regular users:
+	 * - BIOS passwords: bytes 0x00 to 0x0f
+	 * - UUID: bytes 0x10 to 0x1f
+	 * - Serial number: 0xc0 to 0xdf
+	 */
+	for (i = 0; i < count; i++) {
+		if ((off + i <= 0x1f) ||
+		    (off + i >= 0xc0 && off + i <= 0xdf))
+			buf[i] = 0;
+	}
+}
+
+/* needs 8 addresses as A0-A2 are ignored */
+AT24_CHIP_DATA(at24_data_24c00, 128 / 8, AT24_FLAG_TAKE8ADDR);
+/* old variants can't be handled with this generic entry! */
+AT24_CHIP_DATA(at24_data_24c01, 1024 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs01, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c02, 2048 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs02, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24mac402, 48 / 8,
+	AT24_FLAG_MAC | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24mac602, 64 / 8,
+	AT24_FLAG_MAC | AT24_FLAG_READONLY);
+/* spd is a 24c02 in memory DIMMs */
+AT24_CHIP_DATA(at24_data_spd, 2048 / 8,
+	AT24_FLAG_READONLY | AT24_FLAG_IRUGO);
+/* 24c02_vaio is a 24c02 on some Sony laptops */
+AT24_CHIP_DATA_CB(at24_data_24c02_vaio, 2048 / 8,
+	AT24_FLAG_READONLY | AT24_FLAG_IRUGO,
+	at24_read_post_vaio);
+AT24_CHIP_DATA(at24_data_24c04, 4096 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs04, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+/* 24rf08 quirk is handled at i2c-core */
+AT24_CHIP_DATA(at24_data_24c08, 8192 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs08, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c16, 16384 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs16, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c32, 32768 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24cs32, 16,
+	AT24_FLAG_ADDR16 | AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c64, 65536 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24cs64, 16,
+	AT24_FLAG_ADDR16 | AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c128, 131072 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24c256, 262144 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24c512, 524288 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24c1024, 1048576 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA_BS(at24_data_24c1025, 1048576 / 8, AT24_FLAG_ADDR16, 2);
+AT24_CHIP_DATA(at24_data_24c2048, 2097152 / 8, AT24_FLAG_ADDR16);
+/* identical to 24c08 ? */
+AT24_CHIP_DATA(at24_data_INT3499, 8192 / 8, 0);
+
+static const struct i2c_device_id at24_ids[] = {
+	{ "24c00",	(kernel_ulong_t)&at24_data_24c00 },
+	{ "24c01",	(kernel_ulong_t)&at24_data_24c01 },
+	{ "24cs01",	(kernel_ulong_t)&at24_data_24cs01 },
+	{ "24c02",	(kernel_ulong_t)&at24_data_24c02 },
+	{ "24cs02",	(kernel_ulong_t)&at24_data_24cs02 },
+	{ "24mac402",	(kernel_ulong_t)&at24_data_24mac402 },
+	{ "24mac602",	(kernel_ulong_t)&at24_data_24mac602 },
+	{ "spd",	(kernel_ulong_t)&at24_data_spd },
+	{ "24c02-vaio",	(kernel_ulong_t)&at24_data_24c02_vaio },
+	{ "24c04",	(kernel_ulong_t)&at24_data_24c04 },
+	{ "24cs04",	(kernel_ulong_t)&at24_data_24cs04 },
+	{ "24c08",	(kernel_ulong_t)&at24_data_24c08 },
+	{ "24cs08",	(kernel_ulong_t)&at24_data_24cs08 },
+	{ "24c16",	(kernel_ulong_t)&at24_data_24c16 },
+	{ "24cs16",	(kernel_ulong_t)&at24_data_24cs16 },
+	{ "24c32",	(kernel_ulong_t)&at24_data_24c32 },
+	{ "24cs32",	(kernel_ulong_t)&at24_data_24cs32 },
+	{ "24c64",	(kernel_ulong_t)&at24_data_24c64 },
+	{ "24cs64",	(kernel_ulong_t)&at24_data_24cs64 },
+	{ "24c128",	(kernel_ulong_t)&at24_data_24c128 },
+	{ "24c256",	(kernel_ulong_t)&at24_data_24c256 },
+	{ "24c512",	(kernel_ulong_t)&at24_data_24c512 },
+	{ "24c1024",	(kernel_ulong_t)&at24_data_24c1024 },
+	{ "24c1025",	(kernel_ulong_t)&at24_data_24c1025 },
+	{ "24c2048",    (kernel_ulong_t)&at24_data_24c2048 },
+	{ "at24",	0 },
+	{ /* END OF LIST */ }
+};
+MODULE_DEVICE_TABLE(i2c, at24_ids);
+
+static const struct of_device_id at24_of_match[] = {
+	{ .compatible = "atmel,24c00",		.data = &at24_data_24c00 },
+	{ .compatible = "atmel,24c01",		.data = &at24_data_24c01 },
+	{ .compatible = "atmel,24cs01",		.data = &at24_data_24cs01 },
+	{ .compatible = "atmel,24c02",		.data = &at24_data_24c02 },
+	{ .compatible = "atmel,24cs02",		.data = &at24_data_24cs02 },
+	{ .compatible = "atmel,24mac402",	.data = &at24_data_24mac402 },
+	{ .compatible = "atmel,24mac602",	.data = &at24_data_24mac602 },
+	{ .compatible = "atmel,spd",		.data = &at24_data_spd },
+	{ .compatible = "atmel,24c04",		.data = &at24_data_24c04 },
+	{ .compatible = "atmel,24cs04",		.data = &at24_data_24cs04 },
+	{ .compatible = "atmel,24c08",		.data = &at24_data_24c08 },
+	{ .compatible = "atmel,24cs08",		.data = &at24_data_24cs08 },
+	{ .compatible = "atmel,24c16",		.data = &at24_data_24c16 },
+	{ .compatible = "atmel,24cs16",		.data = &at24_data_24cs16 },
+	{ .compatible = "atmel,24c32",		.data = &at24_data_24c32 },
+	{ .compatible = "atmel,24cs32",		.data = &at24_data_24cs32 },
+	{ .compatible = "atmel,24c64",		.data = &at24_data_24c64 },
+	{ .compatible = "atmel,24cs64",		.data = &at24_data_24cs64 },
+	{ .compatible = "atmel,24c128",		.data = &at24_data_24c128 },
+	{ .compatible = "atmel,24c256",		.data = &at24_data_24c256 },
+	{ .compatible = "atmel,24c512",		.data = &at24_data_24c512 },
+	{ .compatible = "atmel,24c1024",	.data = &at24_data_24c1024 },
+	{ .compatible = "atmel,24c1025",	.data = &at24_data_24c1025 },
+	{ .compatible = "atmel,24c2048",	.data = &at24_data_24c2048 },
+	{ /* END OF LIST */ },
+};
+MODULE_DEVICE_TABLE(of, at24_of_match);
+
+static const struct acpi_device_id __maybe_unused at24_acpi_ids[] = {
+	{ "INT3499",	(kernel_ulong_t)&at24_data_INT3499 },
+	{ "TPF0001",	(kernel_ulong_t)&at24_data_24c1024 },
+	{ /* END OF LIST */ }
+};
+MODULE_DEVICE_TABLE(acpi, at24_acpi_ids);
+
+/*
+ * This routine supports chips which consume multiple I2C addresses. It
+ * computes the addressing information to be used for a given r/w request.
+ * Assumes that sanity checks for offset happened at sysfs-layer.
+ *
+ * Slave address and byte offset derive from the offset. Always
+ * set the byte address; on a multi-master board, another master
+ * may have changed the chip's "current" address pointer.
+ */
+static struct regmap *at24_translate_offset(struct at24_data *at24,
+					    unsigned int *offset)
+{
+	unsigned int i;
+
+	if (at24->flags & AT24_FLAG_ADDR16) {
+		i = *offset >> 16;
+		*offset &= 0xffff;
+	} else {
+		i = *offset >> 8;
+		*offset &= 0xff;
+	}
+
+	return at24->client_regmaps[i];
+}
+
+static struct device *at24_base_client_dev(struct at24_data *at24)
+{
+	return regmap_get_device(at24->client_regmaps[0]);
+}
+
+static size_t at24_adjust_read_count(struct at24_data *at24,
+				      unsigned int offset, size_t count)
+{
+	unsigned int bits;
+	size_t remainder;
+
+	/*
+	 * In case of multi-address chips that don't rollover reads to
+	 * the next slave address: truncate the count to the slave boundary,
+	 * so that the read never straddles slaves.
+	 */
+	if (at24->flags & AT24_FLAG_NO_RDROL) {
+		bits = (at24->flags & AT24_FLAG_ADDR16) ? 16 : 8;
+		remainder = BIT(bits) - offset;
+		if (count > remainder)
+			count = remainder;
+	}
+
+	if (count > at24_io_limit)
+		count = at24_io_limit;
+
+	return count;
+}
+
+static ssize_t at24_regmap_read(struct at24_data *at24, char *buf,
+				unsigned int offset, size_t count)
+{
+	unsigned long timeout, read_time;
+	struct regmap *regmap;
+	int ret;
+
+	regmap = at24_translate_offset(at24, &offset);
+	count = at24_adjust_read_count(at24, offset, count);
+
+	/* adjust offset for mac and serial read ops */
+	offset += at24->offset_adj;
+
+	timeout = jiffies + msecs_to_jiffies(at24_write_timeout);
+	do {
+		/*
+		 * The timestamp shall be taken before the actual operation
+		 * to avoid a premature timeout in case of high CPU load.
+		 */
+		read_time = jiffies;
+
+		ret = regmap_bulk_read(regmap, offset, buf, count);
+		dev_dbg(regmap_get_device(regmap), "read %zu@%d --> %d (%ld)\n",
+			count, offset, ret, jiffies);
+		if (!ret)
+			return count;
+
+		usleep_range(1000, 1500);
+	} while (time_before(read_time, timeout));
+
+	return -ETIMEDOUT;
+}
+
+/*
+ * Note that if the hardware write-protect pin is pulled high, the whole
+ * chip is normally write protected. But there are plenty of product
+ * variants here, including OTP fuses and partial chip protect.
+ *
+ * We only use page mode writes; the alternative is sloooow. These routines
+ * write at most one page.
+ */
+
+static size_t at24_adjust_write_count(struct at24_data *at24,
+				      unsigned int offset, size_t count)
+{
+	unsigned int next_page;
+
+	/* write_max is at most a page */
+	if (count > at24->write_max)
+		count = at24->write_max;
+
+	/* Never roll over backwards, to the start of this page */
+	next_page = roundup(offset + 1, at24->page_size);
+	if (offset + count > next_page)
+		count = next_page - offset;
+
+	return count;
+}
+
+static ssize_t at24_regmap_write(struct at24_data *at24, const char *buf,
+				 unsigned int offset, size_t count)
+{
+	unsigned long timeout, write_time;
+	struct regmap *regmap;
+	int ret;
+
+	regmap = at24_translate_offset(at24, &offset);
+	count = at24_adjust_write_count(at24, offset, count);
+	timeout = jiffies + msecs_to_jiffies(at24_write_timeout);
+
+	do {
+		/*
+		 * The timestamp shall be taken before the actual operation
+		 * to avoid a premature timeout in case of high CPU load.
+		 */
+		write_time = jiffies;
+
+		ret = regmap_bulk_write(regmap, offset, buf, count);
+		dev_dbg(regmap_get_device(regmap), "write %zu@%d --> %d (%ld)\n",
+			count, offset, ret, jiffies);
+		if (!ret)
+			return count;
+
+		usleep_range(1000, 1500);
+	} while (time_before(write_time, timeout));
+
+	return -ETIMEDOUT;
+}
+
+static int at24_read(void *priv, unsigned int off, void *val, size_t count)
+{
+	struct at24_data *at24;
+	struct device *dev;
+	char *buf = val;
+	int i, ret;
+
+	at24 = priv;
+	dev = at24_base_client_dev(at24);
+
+	if (unlikely(!count))
+		return count;
+
+	if (off + count > at24->byte_len)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	/*
+	 * Read data from chip, protecting against concurrent updates
+	 * from this host, but not from other I2C masters.
+	 */
+	mutex_lock(&at24->lock);
+
+	for (i = 0; count; i += ret, count -= ret) {
+		ret = at24_regmap_read(at24, buf + i, off + i, count);
+		if (ret < 0) {
+			mutex_unlock(&at24->lock);
+			pm_runtime_put(dev);
+			return ret;
+		}
+	}
+
+	mutex_unlock(&at24->lock);
+
+	pm_runtime_put(dev);
+
+	if (unlikely(at24->read_post))
+		at24->read_post(off, buf, i);
+
+	return 0;
+}
+
+static int at24_write(void *priv, unsigned int off, void *val, size_t count)
+{
+	struct at24_data *at24;
+	struct device *dev;
+	char *buf = val;
+	int ret;
+
+	at24 = priv;
+	dev = at24_base_client_dev(at24);
+
+	if (unlikely(!count))
+		return -EINVAL;
+
+	if (off + count > at24->byte_len)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	/*
+	 * Write data to chip, protecting against concurrent updates
+	 * from this host, but not from other I2C masters.
+	 */
+	mutex_lock(&at24->lock);
+
+	while (count) {
+		ret = at24_regmap_write(at24, buf, off, count);
+		if (ret < 0) {
+			mutex_unlock(&at24->lock);
+			pm_runtime_put(dev);
+			return ret;
+		}
+		buf += ret;
+		off += ret;
+		count -= ret;
+	}
+
+	mutex_unlock(&at24->lock);
+
+	pm_runtime_put(dev);
+
+	return 0;
+}
+
+static const struct at24_chip_data *at24_get_chip_data(struct device *dev)
+{
+	struct device_node *of_node = dev->of_node;
+	const struct at24_chip_data *cdata;
+	const struct i2c_device_id *id;
+
+	id = i2c_match_id(at24_ids, to_i2c_client(dev));
+
+	/*
+	 * The I2C core allows OF nodes compatibles to match against the
+	 * I2C device ID table as a fallback, so check not only if an OF
+	 * node is present but also if it matches an OF device ID entry.
+	 */
+	if (of_node && of_match_device(at24_of_match, dev))
+		cdata = of_device_get_match_data(dev);
+	else if (id)
+		cdata = (void *)id->driver_data;
+	else
+		cdata = acpi_device_get_match_data(dev);
+
+	if (!cdata)
+		return ERR_PTR(-ENODEV);
+
+	return cdata;
+}
+
+static int at24_make_dummy_client(struct at24_data *at24, unsigned int index,
+				  struct i2c_client *base_client,
+				  struct regmap_config *regmap_config)
+{
+	struct i2c_client *dummy_client;
+	struct regmap *regmap;
+
+	dummy_client = devm_i2c_new_dummy_device(&base_client->dev,
+						 base_client->adapter,
+						 base_client->addr +
+						 (index << at24->bank_addr_shift));
+	if (IS_ERR(dummy_client))
+		return PTR_ERR(dummy_client);
+
+	regmap = devm_regmap_init_i2c(dummy_client, regmap_config);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	at24->client_regmaps[index] = regmap;
+
+	return 0;
+}
+
+static unsigned int at24_get_offset_adj(u8 flags, unsigned int byte_len)
+{
+	if (flags & AT24_FLAG_MAC) {
+		/* EUI-48 starts from 0x9a, EUI-64 from 0x98 */
+		return 0xa0 - byte_len;
+	} else if (flags & AT24_FLAG_SERIAL && flags & AT24_FLAG_ADDR16) {
+		/*
+		 * For 16 bit address pointers, the word address must contain
+		 * a '10' sequence in bits 11 and 10 regardless of the
+		 * intended position of the address pointer.
+		 */
+		return 0x0800;
+	} else if (flags & AT24_FLAG_SERIAL) {
+		/*
+		 * Otherwise the word address must begin with a '10' sequence,
+		 * regardless of the intended address.
+		 */
+		return 0x0080;
+	} else {
+		return 0;
+	}
+}
+
+static int at24_probe(struct i2c_client *client)
+{
+	struct regmap_config regmap_config = { };
+	struct nvmem_config nvmem_config = { };
+	u32 byte_len, page_size, flags, addrw;
+	const struct at24_chip_data *cdata;
+	struct device *dev = &client->dev;
+	bool i2c_fn_i2c, i2c_fn_block;
+	unsigned int i, num_addresses;
+	struct at24_data *at24;
+	bool full_power;
+	struct regmap *regmap;
+	bool writable;
+	u8 test_byte;
+	int err;
+
+	i2c_fn_i2c = i2c_check_functionality(client->adapter, I2C_FUNC_I2C);
+	i2c_fn_block = i2c_check_functionality(client->adapter,
+					       I2C_FUNC_SMBUS_WRITE_I2C_BLOCK);
+
+	cdata = at24_get_chip_data(dev);
+	if (IS_ERR(cdata))
+		return PTR_ERR(cdata);
+
+	err = device_property_read_u32(dev, "pagesize", &page_size);
+	if (err)
+		/*
+		 * This is slow, but we can't know all eeproms, so we better
+		 * play safe. Specifying custom eeprom-types via device tree
+		 * or properties is recommended anyhow.
+		 */
+		page_size = 1;
+
+	flags = cdata->flags;
+	if (device_property_present(dev, "read-only"))
+		flags |= AT24_FLAG_READONLY;
+	if (device_property_present(dev, "no-read-rollover"))
+		flags |= AT24_FLAG_NO_RDROL;
+
+	err = device_property_read_u32(dev, "address-width", &addrw);
+	if (!err) {
+		switch (addrw) {
+		case 8:
+			if (flags & AT24_FLAG_ADDR16)
+				dev_warn(dev,
+					 "Override address width to be 8, while default is 16\n");
+			flags &= ~AT24_FLAG_ADDR16;
+			break;
+		case 16:
+			flags |= AT24_FLAG_ADDR16;
+			break;
+		default:
+			dev_warn(dev, "Bad \"address-width\" property: %u\n",
+				 addrw);
+		}
+	}
+
+	err = device_property_read_u32(dev, "size", &byte_len);
+	if (err)
+		byte_len = cdata->byte_len;
+
+	if (!i2c_fn_i2c && !i2c_fn_block)
+		page_size = 1;
+
+	if (!page_size) {
+		dev_err(dev, "page_size must not be 0!\n");
+		return -EINVAL;
+	}
+
+	if (!is_power_of_2(page_size))
+		dev_warn(dev, "page_size looks suspicious (no power of 2)!\n");
+
+	err = device_property_read_u32(dev, "num-addresses", &num_addresses);
+	if (err) {
+		if (flags & AT24_FLAG_TAKE8ADDR)
+			num_addresses = 8;
+		else
+			num_addresses =	DIV_ROUND_UP(byte_len,
+				(flags & AT24_FLAG_ADDR16) ? 65536 : 256);
+	}
+
+	if ((flags & AT24_FLAG_SERIAL) && (flags & AT24_FLAG_MAC)) {
+		dev_err(dev,
+			"invalid device data - cannot have both AT24_FLAG_SERIAL & AT24_FLAG_MAC.");
+		return -EINVAL;
+	}
+
+	regmap_config.val_bits = 8;
+	regmap_config.reg_bits = (flags & AT24_FLAG_ADDR16) ? 16 : 8;
+	regmap_config.disable_locking = true;
+
+	regmap = devm_regmap_init_i2c(client, &regmap_config);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	at24 = devm_kzalloc(dev, struct_size(at24, client_regmaps, num_addresses),
+			    GFP_KERNEL);
+	if (!at24)
+		return -ENOMEM;
+
+	mutex_init(&at24->lock);
+	at24->byte_len = byte_len;
+	at24->page_size = page_size;
+	at24->flags = flags;
+	at24->read_post = cdata->read_post;
+	at24->bank_addr_shift = cdata->bank_addr_shift;
+	at24->num_addresses = num_addresses;
+	at24->offset_adj = at24_get_offset_adj(flags, byte_len);
+	at24->client_regmaps[0] = regmap;
+
+	at24->vcc_reg = devm_regulator_get(dev, "vcc");
+	if (IS_ERR(at24->vcc_reg))
+		return PTR_ERR(at24->vcc_reg);
+
+	writable = !(flags & AT24_FLAG_READONLY);
+	if (writable) {
+		at24->write_max = min_t(unsigned int,
+					page_size, at24_io_limit);
+		if (!i2c_fn_i2c && at24->write_max > I2C_SMBUS_BLOCK_MAX)
+			at24->write_max = I2C_SMBUS_BLOCK_MAX;
+	}
+
+	/* use dummy devices for multiple-address chips */
+	for (i = 1; i < num_addresses; i++) {
+		err = at24_make_dummy_client(at24, i, client, &regmap_config);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * We initialize nvmem_config.id to NVMEM_DEVID_AUTO even if the
+	 * label property is set as some platform can have multiple eeproms
+	 * with same label and we can not register each of those with same
+	 * label. Failing to register those eeproms trigger cascade failure
+	 * on such platform.
+	 */
+	nvmem_config.id = NVMEM_DEVID_AUTO;
+
+	if (device_property_present(dev, "label")) {
+		err = device_property_read_string(dev, "label",
+						  &nvmem_config.name);
+		if (err)
+			return err;
+	} else {
+		nvmem_config.name = dev_name(dev);
+	}
+
+	nvmem_config.type = NVMEM_TYPE_EEPROM;
+	nvmem_config.dev = dev;
+	nvmem_config.read_only = !writable;
+	nvmem_config.root_only = !(flags & AT24_FLAG_IRUGO);
+	nvmem_config.owner = THIS_MODULE;
+	nvmem_config.compat = true;
+	nvmem_config.base_dev = dev;
+	nvmem_config.reg_read = at24_read;
+	nvmem_config.reg_write = at24_write;
+	nvmem_config.priv = at24;
+	nvmem_config.stride = 1;
+	nvmem_config.word_size = 1;
+	nvmem_config.size = byte_len;
+
+	i2c_set_clientdata(client, at24);
+
+	full_power = acpi_dev_state_d0(&client->dev);
+	if (full_power) {
+		err = regulator_enable(at24->vcc_reg);
+		if (err) {
+			dev_err(dev, "Failed to enable vcc regulator\n");
+			return err;
+		}
+
+		pm_runtime_set_active(dev);
+	}
+	pm_runtime_enable(dev);
+
+	at24->nvmem = devm_nvmem_register(dev, &nvmem_config);
+	if (IS_ERR(at24->nvmem)) {
+		pm_runtime_disable(dev);
+		if (!pm_runtime_status_suspended(dev))
+			regulator_disable(at24->vcc_reg);
+		return dev_err_probe(dev, PTR_ERR(at24->nvmem),
+				     "failed to register nvmem\n");
+	}
+
+	/*
+	 * Perform a one-byte test read to verify that the chip is functional,
+	 * unless powering on the device is to be avoided during probe (i.e.
+	 * it's powered off right now).
+	 */
+	if (full_power) {
+		err = at24_read(at24, 0, &test_byte, 1);
+		if (err) {
+			pm_runtime_disable(dev);
+			if (!pm_runtime_status_suspended(dev))
+				regulator_disable(at24->vcc_reg);
+			return -ENODEV;
+		}
+	}
+
+	pm_runtime_idle(dev);
+
+	if (writable)
+		dev_info(dev, "%u byte %s EEPROM, writable, %u bytes/write\n",
+			 byte_len, client->name, at24->write_max);
+	else
+		dev_info(dev, "%u byte %s EEPROM, read-only\n",
+			 byte_len, client->name);
+
+	return 0;
+}
+
+static void at24_remove(struct i2c_client *client)
+{
+	struct at24_data *at24 = i2c_get_clientdata(client);
+
+	pm_runtime_disable(&client->dev);
+	if (acpi_dev_state_d0(&client->dev)) {
+		if (!pm_runtime_status_suspended(&client->dev))
+			regulator_disable(at24->vcc_reg);
+		pm_runtime_set_suspended(&client->dev);
+	}
+}
+
+static int __maybe_unused at24_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct at24_data *at24 = i2c_get_clientdata(client);
+
+	return regulator_disable(at24->vcc_reg);
+}
+
+static int __maybe_unused at24_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct at24_data *at24 = i2c_get_clientdata(client);
+
+	return regulator_enable(at24->vcc_reg);
+}
+
+static const struct dev_pm_ops at24_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+	SET_RUNTIME_PM_OPS(at24_suspend, at24_resume, NULL)
+};
+
+static struct i2c_driver at24_driver = {
+	.driver = {
+		.name = "at24",
+		.pm = &at24_pm_ops,
+		.of_match_table = at24_of_match,
+		.acpi_match_table = ACPI_PTR(at24_acpi_ids),
+	},
+	.probe = at24_probe,
+	.remove = at24_remove,
+	.id_table = at24_ids,
+	.flags = I2C_DRV_ACPI_WAIVE_D0_PROBE,
+};
+
+static int __init at24_init(void)
+{
+	if (!at24_io_limit) {
+		pr_err("at24: at24_io_limit must not be 0!\n");
+		return -EINVAL;
+	}
+
+	at24_io_limit = rounddown_pow_of_two(at24_io_limit);
+	return i2c_add_driver(&at24_driver);
+}
+module_init(at24_init);
+
+static void __exit at24_exit(void)
+{
+	i2c_del_driver(&at24_driver);
+}
+module_exit(at24_exit);
+
+MODULE_DESCRIPTION("Driver for most I2C EEPROMs");
+MODULE_AUTHOR("David Brownell and Wolfram Sang");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
new file mode 100644
index 0000000000..65d49a6de1
--- /dev/null
+++ b/drivers/misc/eeprom/at25.c
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Driver for most of the SPI EEPROMs, such as Atmel AT25 models
+ * and Cypress FRAMs FM25 models.
+ *
+ * Copyright (C) 2006 David Brownell
+ */
+
+#include <linux/bits.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/property.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <linux/spi/eeprom.h>
+#include <linux/spi/spi.h>
+
+#include <linux/nvmem-provider.h>
+
+/*
+ * NOTE: this is an *EEPROM* driver. The vagaries of product naming
+ * mean that some AT25 products are EEPROMs, and others are FLASH.
+ * Handle FLASH chips with the drivers/mtd/devices/m25p80.c driver,
+ * not this one!
+ *
+ * EEPROMs that can be used with this driver include, for example:
+ *   AT25M02, AT25128B
+ */
+
+#define	FM25_SN_LEN	8		/* serial number length */
+#define EE_MAXADDRLEN	3		/* 24 bit addresses, up to 2 MBytes */
+
+struct at25_data {
+	struct spi_eeprom	chip;
+	struct spi_device	*spi;
+	struct mutex		lock;
+	unsigned		addrlen;
+	struct nvmem_config	nvmem_config;
+	struct nvmem_device	*nvmem;
+	u8 sernum[FM25_SN_LEN];
+	u8 command[EE_MAXADDRLEN + 1];
+};
+
+#define	AT25_WREN	0x06		/* latch the write enable */
+#define	AT25_WRDI	0x04		/* reset the write enable */
+#define	AT25_RDSR	0x05		/* read status register */
+#define	AT25_WRSR	0x01		/* write status register */
+#define	AT25_READ	0x03		/* read byte(s) */
+#define	AT25_WRITE	0x02		/* write byte(s)/sector */
+#define	FM25_SLEEP	0xb9		/* enter sleep mode */
+#define	FM25_RDID	0x9f		/* read device ID */
+#define	FM25_RDSN	0xc3		/* read S/N */
+
+#define	AT25_SR_nRDY	0x01		/* nRDY = write-in-progress */
+#define	AT25_SR_WEN	0x02		/* write enable (latched) */
+#define	AT25_SR_BP0	0x04		/* BP for software writeprotect */
+#define	AT25_SR_BP1	0x08
+#define	AT25_SR_WPEN	0x80		/* writeprotect enable */
+
+#define	AT25_INSTR_BIT3	0x08		/* additional address bit in instr */
+
+#define	FM25_ID_LEN	9		/* ID length */
+
+/*
+ * Specs often allow 5ms for a page write, sometimes 20ms;
+ * it's important to recover from write timeouts.
+ */
+#define	EE_TIMEOUT	25
+
+/*-------------------------------------------------------------------------*/
+
+#define	io_limit	PAGE_SIZE	/* bytes */
+
+static int at25_ee_read(void *priv, unsigned int offset,
+			void *val, size_t count)
+{
+	struct at25_data *at25 = priv;
+	char *buf = val;
+	size_t max_chunk = spi_max_transfer_size(at25->spi);
+	unsigned int msg_offset = offset;
+	size_t bytes_left = count;
+	size_t segment;
+	u8			*cp;
+	ssize_t			status;
+	struct spi_transfer	t[2];
+	struct spi_message	m;
+	u8			instr;
+
+	if (unlikely(offset >= at25->chip.byte_len))
+		return -EINVAL;
+	if ((offset + count) > at25->chip.byte_len)
+		count = at25->chip.byte_len - offset;
+	if (unlikely(!count))
+		return -EINVAL;
+
+	do {
+		segment = min(bytes_left, max_chunk);
+		cp = at25->command;
+
+		instr = AT25_READ;
+		if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
+			if (msg_offset >= BIT(at25->addrlen * 8))
+				instr |= AT25_INSTR_BIT3;
+
+		mutex_lock(&at25->lock);
+
+		*cp++ = instr;
+
+		/* 8/16/24-bit address is written MSB first */
+		switch (at25->addrlen) {
+		default:	/* case 3 */
+			*cp++ = msg_offset >> 16;
+			fallthrough;
+		case 2:
+			*cp++ = msg_offset >> 8;
+			fallthrough;
+		case 1:
+		case 0:	/* can't happen: for better code generation */
+			*cp++ = msg_offset >> 0;
+		}
+
+		spi_message_init(&m);
+		memset(t, 0, sizeof(t));
+
+		t[0].tx_buf = at25->command;
+		t[0].len = at25->addrlen + 1;
+		spi_message_add_tail(&t[0], &m);
+
+		t[1].rx_buf = buf;
+		t[1].len = segment;
+		spi_message_add_tail(&t[1], &m);
+
+		status = spi_sync(at25->spi, &m);
+
+		mutex_unlock(&at25->lock);
+
+		if (status)
+			return status;
+
+		msg_offset += segment;
+		buf += segment;
+		bytes_left -= segment;
+	} while (bytes_left > 0);
+
+	dev_dbg(&at25->spi->dev, "read %zu bytes at %d\n",
+		count, offset);
+	return 0;
+}
+
+/* Read extra registers as ID or serial number */
+static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command,
+			 int len)
+{
+	int status;
+	struct spi_transfer t[2];
+	struct spi_message m;
+
+	spi_message_init(&m);
+	memset(t, 0, sizeof(t));
+
+	t[0].tx_buf = at25->command;
+	t[0].len = 1;
+	spi_message_add_tail(&t[0], &m);
+
+	t[1].rx_buf = buf;
+	t[1].len = len;
+	spi_message_add_tail(&t[1], &m);
+
+	mutex_lock(&at25->lock);
+
+	at25->command[0] = command;
+
+	status = spi_sync(at25->spi, &m);
+	dev_dbg(&at25->spi->dev, "read %d aux bytes --> %d\n", len, status);
+
+	mutex_unlock(&at25->lock);
+	return status;
+}
+
+static ssize_t sernum_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct at25_data *at25;
+
+	at25 = dev_get_drvdata(dev);
+	return sysfs_emit(buf, "%*ph\n", (int)sizeof(at25->sernum), at25->sernum);
+}
+static DEVICE_ATTR_RO(sernum);
+
+static struct attribute *sernum_attrs[] = {
+	&dev_attr_sernum.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(sernum);
+
+static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
+{
+	struct at25_data *at25 = priv;
+	size_t maxsz = spi_max_transfer_size(at25->spi);
+	const char *buf = val;
+	int			status = 0;
+	unsigned		buf_size;
+	u8			*bounce;
+
+	if (unlikely(off >= at25->chip.byte_len))
+		return -EFBIG;
+	if ((off + count) > at25->chip.byte_len)
+		count = at25->chip.byte_len - off;
+	if (unlikely(!count))
+		return -EINVAL;
+
+	/* Temp buffer starts with command and address */
+	buf_size = at25->chip.page_size;
+	if (buf_size > io_limit)
+		buf_size = io_limit;
+	bounce = kmalloc(buf_size + at25->addrlen + 1, GFP_KERNEL);
+	if (!bounce)
+		return -ENOMEM;
+
+	/*
+	 * For write, rollover is within the page ... so we write at
+	 * most one page, then manually roll over to the next page.
+	 */
+	mutex_lock(&at25->lock);
+	do {
+		unsigned long	timeout, retries;
+		unsigned	segment;
+		unsigned	offset = off;
+		u8		*cp = bounce;
+		int		sr;
+		u8		instr;
+
+		*cp = AT25_WREN;
+		status = spi_write(at25->spi, cp, 1);
+		if (status < 0) {
+			dev_dbg(&at25->spi->dev, "WREN --> %d\n", status);
+			break;
+		}
+
+		instr = AT25_WRITE;
+		if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
+			if (offset >= BIT(at25->addrlen * 8))
+				instr |= AT25_INSTR_BIT3;
+		*cp++ = instr;
+
+		/* 8/16/24-bit address is written MSB first */
+		switch (at25->addrlen) {
+		default:	/* case 3 */
+			*cp++ = offset >> 16;
+			fallthrough;
+		case 2:
+			*cp++ = offset >> 8;
+			fallthrough;
+		case 1:
+		case 0:	/* can't happen: for better code generation */
+			*cp++ = offset >> 0;
+		}
+
+		/* Write as much of a page as we can */
+		segment = buf_size - (offset % buf_size);
+		if (segment > count)
+			segment = count;
+		if (segment > maxsz)
+			segment = maxsz;
+		memcpy(cp, buf, segment);
+		status = spi_write(at25->spi, bounce,
+				segment + at25->addrlen + 1);
+		dev_dbg(&at25->spi->dev, "write %u bytes at %u --> %d\n",
+			segment, offset, status);
+		if (status < 0)
+			break;
+
+		/*
+		 * REVISIT this should detect (or prevent) failed writes
+		 * to read-only sections of the EEPROM...
+		 */
+
+		/* Wait for non-busy status */
+		timeout = jiffies + msecs_to_jiffies(EE_TIMEOUT);
+		retries = 0;
+		do {
+
+			sr = spi_w8r8(at25->spi, AT25_RDSR);
+			if (sr < 0 || (sr & AT25_SR_nRDY)) {
+				dev_dbg(&at25->spi->dev,
+					"rdsr --> %d (%02x)\n", sr, sr);
+				/* at HZ=100, this is sloooow */
+				msleep(1);
+				continue;
+			}
+			if (!(sr & AT25_SR_nRDY))
+				break;
+		} while (retries++ < 3 || time_before_eq(jiffies, timeout));
+
+		if ((sr < 0) || (sr & AT25_SR_nRDY)) {
+			dev_err(&at25->spi->dev,
+				"write %u bytes offset %u, timeout after %u msecs\n",
+				segment, offset,
+				jiffies_to_msecs(jiffies -
+					(timeout - EE_TIMEOUT)));
+			status = -ETIMEDOUT;
+			break;
+		}
+
+		off += segment;
+		buf += segment;
+		count -= segment;
+
+	} while (count > 0);
+
+	mutex_unlock(&at25->lock);
+
+	kfree(bounce);
+	return status;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip)
+{
+	u32 val;
+	int err;
+
+	strscpy(chip->name, "at25", sizeof(chip->name));
+
+	err = device_property_read_u32(dev, "size", &val);
+	if (err)
+		err = device_property_read_u32(dev, "at25,byte-len", &val);
+	if (err) {
+		dev_err(dev, "Error: missing \"size\" property\n");
+		return err;
+	}
+	chip->byte_len = val;
+
+	err = device_property_read_u32(dev, "pagesize", &val);
+	if (err)
+		err = device_property_read_u32(dev, "at25,page-size", &val);
+	if (err) {
+		dev_err(dev, "Error: missing \"pagesize\" property\n");
+		return err;
+	}
+	chip->page_size = val;
+
+	err = device_property_read_u32(dev, "address-width", &val);
+	if (err) {
+		err = device_property_read_u32(dev, "at25,addr-mode", &val);
+		if (err) {
+			dev_err(dev, "Error: missing \"address-width\" property\n");
+			return err;
+		}
+		chip->flags = (u16)val;
+	} else {
+		switch (val) {
+		case 9:
+			chip->flags |= EE_INSTR_BIT3_IS_ADDR;
+			fallthrough;
+		case 8:
+			chip->flags |= EE_ADDR1;
+			break;
+		case 16:
+			chip->flags |= EE_ADDR2;
+			break;
+		case 24:
+			chip->flags |= EE_ADDR3;
+			break;
+		default:
+			dev_err(dev,
+				"Error: bad \"address-width\" property: %u\n",
+				val);
+			return -ENODEV;
+		}
+		if (device_property_present(dev, "read-only"))
+			chip->flags |= EE_READONLY;
+	}
+	return 0;
+}
+
+static int at25_fram_to_chip(struct device *dev, struct spi_eeprom *chip)
+{
+	struct at25_data *at25 = container_of(chip, struct at25_data, chip);
+	u8 sernum[FM25_SN_LEN];
+	u8 id[FM25_ID_LEN];
+	int i;
+
+	strscpy(chip->name, "fm25", sizeof(chip->name));
+
+	/* Get ID of chip */
+	fm25_aux_read(at25, id, FM25_RDID, FM25_ID_LEN);
+	if (id[6] != 0xc2) {
+		dev_err(dev, "Error: no Cypress FRAM (id %02x)\n", id[6]);
+		return -ENODEV;
+	}
+	/* Set size found in ID */
+	if (id[7] < 0x21 || id[7] > 0x26) {
+		dev_err(dev, "Error: unsupported size (id %02x)\n", id[7]);
+		return -ENODEV;
+	}
+
+	chip->byte_len = BIT(id[7] - 0x21 + 4) * 1024;
+	if (chip->byte_len > 64 * 1024)
+		chip->flags |= EE_ADDR3;
+	else
+		chip->flags |= EE_ADDR2;
+
+	if (id[8]) {
+		fm25_aux_read(at25, sernum, FM25_RDSN, FM25_SN_LEN);
+		/* Swap byte order */
+		for (i = 0; i < FM25_SN_LEN; i++)
+			at25->sernum[i] = sernum[FM25_SN_LEN - 1 - i];
+	}
+
+	chip->page_size = PAGE_SIZE;
+	return 0;
+}
+
+static const struct of_device_id at25_of_match[] = {
+	{ .compatible = "atmel,at25" },
+	{ .compatible = "cypress,fm25" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, at25_of_match);
+
+static const struct spi_device_id at25_spi_ids[] = {
+	{ .name = "at25" },
+	{ .name = "fm25" },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, at25_spi_ids);
+
+static int at25_probe(struct spi_device *spi)
+{
+	struct at25_data	*at25 = NULL;
+	int			err;
+	int			sr;
+	struct spi_eeprom *pdata;
+	bool is_fram;
+
+	/*
+	 * Ping the chip ... the status register is pretty portable,
+	 * unlike probing manufacturer IDs. We do expect that system
+	 * firmware didn't write it in the past few milliseconds!
+	 */
+	sr = spi_w8r8(spi, AT25_RDSR);
+	if (sr < 0 || sr & AT25_SR_nRDY) {
+		dev_dbg(&spi->dev, "rdsr --> %d (%02x)\n", sr, sr);
+		return -ENXIO;
+	}
+
+	at25 = devm_kzalloc(&spi->dev, sizeof(*at25), GFP_KERNEL);
+	if (!at25)
+		return -ENOMEM;
+
+	mutex_init(&at25->lock);
+	at25->spi = spi;
+	spi_set_drvdata(spi, at25);
+
+	is_fram = fwnode_device_is_compatible(dev_fwnode(&spi->dev), "cypress,fm25");
+
+	/* Chip description */
+	pdata = dev_get_platdata(&spi->dev);
+	if (pdata) {
+		at25->chip = *pdata;
+	} else {
+		if (is_fram)
+			err = at25_fram_to_chip(&spi->dev, &at25->chip);
+		else
+			err = at25_fw_to_chip(&spi->dev, &at25->chip);
+		if (err)
+			return err;
+	}
+
+	/* For now we only support 8/16/24 bit addressing */
+	if (at25->chip.flags & EE_ADDR1)
+		at25->addrlen = 1;
+	else if (at25->chip.flags & EE_ADDR2)
+		at25->addrlen = 2;
+	else if (at25->chip.flags & EE_ADDR3)
+		at25->addrlen = 3;
+	else {
+		dev_dbg(&spi->dev, "unsupported address type\n");
+		return -EINVAL;
+	}
+
+	at25->nvmem_config.type = is_fram ? NVMEM_TYPE_FRAM : NVMEM_TYPE_EEPROM;
+	at25->nvmem_config.name = dev_name(&spi->dev);
+	at25->nvmem_config.dev = &spi->dev;
+	at25->nvmem_config.read_only = at25->chip.flags & EE_READONLY;
+	at25->nvmem_config.root_only = true;
+	at25->nvmem_config.owner = THIS_MODULE;
+	at25->nvmem_config.compat = true;
+	at25->nvmem_config.base_dev = &spi->dev;
+	at25->nvmem_config.reg_read = at25_ee_read;
+	at25->nvmem_config.reg_write = at25_ee_write;
+	at25->nvmem_config.priv = at25;
+	at25->nvmem_config.stride = 1;
+	at25->nvmem_config.word_size = 1;
+	at25->nvmem_config.size = at25->chip.byte_len;
+
+	at25->nvmem = devm_nvmem_register(&spi->dev, &at25->nvmem_config);
+	if (IS_ERR(at25->nvmem))
+		return PTR_ERR(at25->nvmem);
+
+	dev_info(&spi->dev, "%d %s %s %s%s, pagesize %u\n",
+		 (at25->chip.byte_len < 1024) ?
+			at25->chip.byte_len : (at25->chip.byte_len / 1024),
+		 (at25->chip.byte_len < 1024) ? "Byte" : "KByte",
+		 at25->chip.name, is_fram ? "fram" : "eeprom",
+		 (at25->chip.flags & EE_READONLY) ? " (readonly)" : "",
+		 at25->chip.page_size);
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct spi_driver at25_driver = {
+	.driver = {
+		.name		= "at25",
+		.of_match_table = at25_of_match,
+		.dev_groups	= sernum_groups,
+	},
+	.probe		= at25_probe,
+	.id_table	= at25_spi_ids,
+};
+
+module_spi_driver(at25_driver);
+
+MODULE_DESCRIPTION("Driver for most SPI EEPROMs");
+MODULE_AUTHOR("David Brownell");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:at25");
diff --git a/drivers/misc/eeprom/digsy_mtc_eeprom.c b/drivers/misc/eeprom/digsy_mtc_eeprom.c
new file mode 100644
index 0000000000..f1f766b709
--- /dev/null
+++ b/drivers/misc/eeprom/digsy_mtc_eeprom.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * EEPROMs access control driver for display configuration EEPROMs
+ * on DigsyMTC board.
+ *
+ * (C) 2011 DENX Software Engineering, Anatolij Gustschin <agust@denx.de>
+ *
+ * FIXME: this driver is used on a device-tree probed platform: it
+ * should be defined as a bit-banged SPI device and probed from the device
+ * tree and not like this with static grabbing of a few numbered GPIO
+ * lines at random.
+ *
+ * Add proper SPI and EEPROM in arch/powerpc/boot/dts/digsy_mtc.dts
+ * and delete this driver.
+ */
+
+#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi_gpio.h>
+#include <linux/eeprom_93xx46.h>
+
+#define GPIO_EEPROM_CLK		216
+#define GPIO_EEPROM_CS		210
+#define GPIO_EEPROM_DI		217
+#define GPIO_EEPROM_DO		249
+#define GPIO_EEPROM_OE		255
+#define EE_SPI_BUS_NUM	1
+
+static void digsy_mtc_op_prepare(void *p)
+{
+	/* enable */
+	gpio_set_value(GPIO_EEPROM_OE, 0);
+}
+
+static void digsy_mtc_op_finish(void *p)
+{
+	/* disable */
+	gpio_set_value(GPIO_EEPROM_OE, 1);
+}
+
+struct eeprom_93xx46_platform_data digsy_mtc_eeprom_data = {
+	.flags		= EE_ADDR8,
+	.prepare	= digsy_mtc_op_prepare,
+	.finish		= digsy_mtc_op_finish,
+};
+
+static struct spi_gpio_platform_data eeprom_spi_gpio_data = {
+	.num_chipselect	= 1,
+};
+
+static struct platform_device digsy_mtc_eeprom = {
+	.name	= "spi_gpio",
+	.id	= EE_SPI_BUS_NUM,
+	.dev	= {
+		.platform_data	= &eeprom_spi_gpio_data,
+	},
+};
+
+static struct gpiod_lookup_table eeprom_spi_gpiod_table = {
+	.dev_id         = "spi_gpio",
+	.table          = {
+		GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_CLK,
+			    "sck", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_DI,
+			    "mosi", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_DO,
+			    "miso", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_CS,
+			    "cs", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
+static struct spi_board_info digsy_mtc_eeprom_info[] __initdata = {
+	{
+		.modalias		= "93xx46",
+		.max_speed_hz		= 1000000,
+		.bus_num		= EE_SPI_BUS_NUM,
+		.chip_select		= 0,
+		.mode			= SPI_MODE_0,
+		.platform_data		= &digsy_mtc_eeprom_data,
+	},
+};
+
+static int __init digsy_mtc_eeprom_devices_init(void)
+{
+	int ret;
+
+	ret = gpio_request_one(GPIO_EEPROM_OE, GPIOF_OUT_INIT_HIGH,
+				"93xx46 EEPROMs OE");
+	if (ret) {
+		pr_err("can't request gpio %d\n", GPIO_EEPROM_OE);
+		return ret;
+	}
+	gpiod_add_lookup_table(&eeprom_spi_gpiod_table);
+	spi_register_board_info(digsy_mtc_eeprom_info,
+				ARRAY_SIZE(digsy_mtc_eeprom_info));
+	return platform_device_register(&digsy_mtc_eeprom);
+}
+device_initcall(digsy_mtc_eeprom_devices_init);
diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c
new file mode 100644
index 0000000000..a1acd77130
--- /dev/null
+++ b/drivers/misc/eeprom/ee1004.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ee1004 - driver for DDR4 SPD EEPROMs
+ *
+ * Copyright (C) 2017-2019 Jean Delvare
+ *
+ * Based on the at24 driver:
+ * Copyright (C) 2005-2007 David Brownell
+ * Copyright (C) 2008 Wolfram Sang, Pengutronix
+ */
+
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+
+/*
+ * DDR4 memory modules use special EEPROMs following the Jedec EE1004
+ * specification. These are 512-byte EEPROMs using a single I2C address
+ * in the 0x50-0x57 range for data. One of two 256-byte page is selected
+ * by writing a command to I2C address 0x36 or 0x37 on the same I2C bus.
+ *
+ * Therefore we need to request these 2 additional addresses, and serialize
+ * access to all such EEPROMs with a single mutex.
+ *
+ * We assume it is safe to read up to 32 bytes at once from these EEPROMs.
+ * We use SMBus access even if I2C is available, these EEPROMs are small
+ * enough, and reading from them infrequent enough, that we favor simplicity
+ * over performance.
+ */
+
+#define EE1004_ADDR_SET_PAGE		0x36
+#define EE1004_NUM_PAGES		2
+#define EE1004_PAGE_SIZE		256
+#define EE1004_PAGE_SHIFT		8
+#define EE1004_EEPROM_SIZE		(EE1004_PAGE_SIZE * EE1004_NUM_PAGES)
+
+/*
+ * Mutex protects ee1004_set_page and ee1004_dev_count, and must be held
+ * from page selection to end of read.
+ */
+static DEFINE_MUTEX(ee1004_bus_lock);
+static struct i2c_client *ee1004_set_page[EE1004_NUM_PAGES];
+static unsigned int ee1004_dev_count;
+static int ee1004_current_page;
+
+static const struct i2c_device_id ee1004_ids[] = {
+	{ "ee1004", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ee1004_ids);
+
+/*-------------------------------------------------------------------------*/
+
+static int ee1004_get_current_page(void)
+{
+	int err;
+
+	err = i2c_smbus_read_byte(ee1004_set_page[0]);
+	if (err == -ENXIO) {
+		/* Nack means page 1 is selected */
+		return 1;
+	}
+	if (err < 0) {
+		/* Anything else is a real error, bail out */
+		return err;
+	}
+
+	/* Ack means page 0 is selected, returned value meaningless */
+	return 0;
+}
+
+static int ee1004_set_current_page(struct device *dev, int page)
+{
+	int ret;
+
+	if (page == ee1004_current_page)
+		return 0;
+
+	/* Data is ignored */
+	ret = i2c_smbus_write_byte(ee1004_set_page[page], 0x00);
+	/*
+	 * Don't give up just yet. Some memory modules will select the page
+	 * but not ack the command. Check which page is selected now.
+	 */
+	if (ret == -ENXIO && ee1004_get_current_page() == page)
+		ret = 0;
+	if (ret < 0) {
+		dev_err(dev, "Failed to select page %d (%d)\n", page, ret);
+		return ret;
+	}
+
+	dev_dbg(dev, "Selected page %d\n", page);
+	ee1004_current_page = page;
+
+	return 0;
+}
+
+static ssize_t ee1004_eeprom_read(struct i2c_client *client, char *buf,
+				  unsigned int offset, size_t count)
+{
+	int status, page;
+
+	page = offset >> EE1004_PAGE_SHIFT;
+	offset &= (1 << EE1004_PAGE_SHIFT) - 1;
+
+	status = ee1004_set_current_page(&client->dev, page);
+	if (status)
+		return status;
+
+	/* Can't cross page boundaries */
+	if (offset + count > EE1004_PAGE_SIZE)
+		count = EE1004_PAGE_SIZE - offset;
+
+	if (count > I2C_SMBUS_BLOCK_MAX)
+		count = I2C_SMBUS_BLOCK_MAX;
+
+	return i2c_smbus_read_i2c_block_data_or_emulated(client, offset, count, buf);
+}
+
+static ssize_t eeprom_read(struct file *filp, struct kobject *kobj,
+			   struct bin_attribute *bin_attr,
+			   char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+	size_t requested = count;
+	int ret = 0;
+
+	/*
+	 * Read data from chip, protecting against concurrent access to
+	 * other EE1004 SPD EEPROMs on the same adapter.
+	 */
+	mutex_lock(&ee1004_bus_lock);
+
+	while (count) {
+		ret = ee1004_eeprom_read(client, buf, off, count);
+		if (ret < 0)
+			goto out;
+
+		buf += ret;
+		off += ret;
+		count -= ret;
+	}
+out:
+	mutex_unlock(&ee1004_bus_lock);
+
+	return ret < 0 ? ret : requested;
+}
+
+static BIN_ATTR_RO(eeprom, EE1004_EEPROM_SIZE);
+
+static struct bin_attribute *ee1004_attrs[] = {
+	&bin_attr_eeprom,
+	NULL
+};
+
+BIN_ATTRIBUTE_GROUPS(ee1004);
+
+static void ee1004_cleanup(int idx)
+{
+	if (--ee1004_dev_count == 0)
+		while (--idx >= 0) {
+			i2c_unregister_device(ee1004_set_page[idx]);
+			ee1004_set_page[idx] = NULL;
+		}
+}
+
+static int ee1004_probe(struct i2c_client *client)
+{
+	int err, cnr = 0;
+
+	/* Make sure we can operate on this adapter */
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_READ_I2C_BLOCK) &&
+	    !i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_READ_BYTE_DATA))
+		return -EPFNOSUPPORT;
+
+	/* Use 2 dummy devices for page select command */
+	mutex_lock(&ee1004_bus_lock);
+	if (++ee1004_dev_count == 1) {
+		for (cnr = 0; cnr < EE1004_NUM_PAGES; cnr++) {
+			struct i2c_client *cl;
+
+			cl = i2c_new_dummy_device(client->adapter, EE1004_ADDR_SET_PAGE + cnr);
+			if (IS_ERR(cl)) {
+				err = PTR_ERR(cl);
+				goto err_clients;
+			}
+			ee1004_set_page[cnr] = cl;
+		}
+
+		/* Remember current page to avoid unneeded page select */
+		err = ee1004_get_current_page();
+		if (err < 0)
+			goto err_clients;
+		dev_dbg(&client->dev, "Currently selected page: %d\n", err);
+		ee1004_current_page = err;
+	} else if (client->adapter != ee1004_set_page[0]->adapter) {
+		dev_err(&client->dev,
+			"Driver only supports devices on a single I2C bus\n");
+		err = -EOPNOTSUPP;
+		goto err_clients;
+	}
+	mutex_unlock(&ee1004_bus_lock);
+
+	dev_info(&client->dev,
+		 "%u byte EE1004-compliant SPD EEPROM, read-only\n",
+		 EE1004_EEPROM_SIZE);
+
+	return 0;
+
+ err_clients:
+	ee1004_cleanup(cnr);
+	mutex_unlock(&ee1004_bus_lock);
+
+	return err;
+}
+
+static void ee1004_remove(struct i2c_client *client)
+{
+	/* Remove page select clients if this is the last device */
+	mutex_lock(&ee1004_bus_lock);
+	ee1004_cleanup(EE1004_NUM_PAGES);
+	mutex_unlock(&ee1004_bus_lock);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct i2c_driver ee1004_driver = {
+	.driver = {
+		.name = "ee1004",
+		.dev_groups = ee1004_groups,
+	},
+	.probe = ee1004_probe,
+	.remove = ee1004_remove,
+	.id_table = ee1004_ids,
+};
+module_i2c_driver(ee1004_driver);
+
+MODULE_DESCRIPTION("Driver for EE1004-compliant DDR4 SPD EEPROMs");
+MODULE_AUTHOR("Jean Delvare");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
new file mode 100644
index 0000000000..ccb7c2f7ee
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 1998, 1999  Frodo Looijaard <frodol@dds.nl> and
+ *                           Philip Edelbrock <phil@netroedge.com>
+ * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2003 IBM Corp.
+ * Copyright (C) 2004 Jean Delvare <jdelvare@suse.de>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/capability.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+/* Addresses to scan */
+static const unsigned short normal_i2c[] = { 0x50, 0x51, 0x52, 0x53, 0x54,
+					0x55, 0x56, 0x57, I2C_CLIENT_END };
+
+
+/* Size of EEPROM in bytes */
+#define EEPROM_SIZE		256
+
+/* possible types of eeprom devices */
+enum eeprom_nature {
+	UNKNOWN,
+	VAIO,
+};
+
+/* Each client has this additional data */
+struct eeprom_data {
+	struct mutex update_lock;
+	u8 valid;			/* bitfield, bit!=0 if slice is valid */
+	unsigned long last_updated[8];	/* In jiffies, 8 slices */
+	u8 data[EEPROM_SIZE];		/* Register values */
+	enum eeprom_nature nature;
+};
+
+
+static void eeprom_update_client(struct i2c_client *client, u8 slice)
+{
+	struct eeprom_data *data = i2c_get_clientdata(client);
+	int i;
+
+	mutex_lock(&data->update_lock);
+
+	if (!(data->valid & (1 << slice)) ||
+	    time_after(jiffies, data->last_updated[slice] + 300 * HZ)) {
+		dev_dbg(&client->dev, "Starting eeprom update, slice %u\n", slice);
+
+		if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+			for (i = slice << 5; i < (slice + 1) << 5; i += 32)
+				if (i2c_smbus_read_i2c_block_data(client, i,
+							32, data->data + i)
+							!= 32)
+					goto exit;
+		} else {
+			for (i = slice << 5; i < (slice + 1) << 5; i += 2) {
+				int word = i2c_smbus_read_word_data(client, i);
+				if (word < 0)
+					goto exit;
+				data->data[i] = word & 0xff;
+				data->data[i + 1] = word >> 8;
+			}
+		}
+		data->last_updated[slice] = jiffies;
+		data->valid |= (1 << slice);
+	}
+exit:
+	mutex_unlock(&data->update_lock);
+}
+
+static ssize_t eeprom_read(struct file *filp, struct kobject *kobj,
+			   struct bin_attribute *bin_attr,
+			   char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+	struct eeprom_data *data = i2c_get_clientdata(client);
+	u8 slice;
+
+	/* Only refresh slices which contain requested bytes */
+	for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++)
+		eeprom_update_client(client, slice);
+
+	/* Hide Vaio private settings to regular users:
+	   - BIOS passwords: bytes 0x00 to 0x0f
+	   - UUID: bytes 0x10 to 0x1f
+	   - Serial number: 0xc0 to 0xdf */
+	if (data->nature == VAIO && !capable(CAP_SYS_ADMIN)) {
+		int i;
+
+		for (i = 0; i < count; i++) {
+			if ((off + i <= 0x1f) ||
+			    (off + i >= 0xc0 && off + i <= 0xdf))
+				buf[i] = 0;
+			else
+				buf[i] = data->data[off + i];
+		}
+	} else {
+		memcpy(buf, &data->data[off], count);
+	}
+
+	return count;
+}
+
+static const struct bin_attribute eeprom_attr = {
+	.attr = {
+		.name = "eeprom",
+		.mode = S_IRUGO,
+	},
+	.size = EEPROM_SIZE,
+	.read = eeprom_read,
+};
+
+/* Return 0 if detection is successful, -ENODEV otherwise */
+static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info)
+{
+	struct i2c_adapter *adapter = client->adapter;
+
+	/* EDID EEPROMs are often 24C00 EEPROMs, which answer to all
+	   addresses 0x50-0x57, but we only care about 0x50. So decline
+	   attaching to addresses >= 0x51 on DDC buses */
+	if (!(adapter->class & I2C_CLASS_SPD) && client->addr >= 0x51)
+		return -ENODEV;
+
+	/* There are four ways we can read the EEPROM data:
+	   (1) I2C block reads (faster, but unsupported by most adapters)
+	   (2) Word reads (128% overhead)
+	   (3) Consecutive byte reads (88% overhead, unsafe)
+	   (4) Regular byte data reads (265% overhead)
+	   The third and fourth methods are not implemented by this driver
+	   because all known adapters support one of the first two. */
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)
+	 && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
+		return -ENODEV;
+
+	strscpy(info->type, "eeprom", I2C_NAME_SIZE);
+
+	return 0;
+}
+
+static int eeprom_probe(struct i2c_client *client)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	struct eeprom_data *data;
+
+	data = devm_kzalloc(&client->dev, sizeof(struct eeprom_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	memset(data->data, 0xff, EEPROM_SIZE);
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->update_lock);
+	data->nature = UNKNOWN;
+
+	/* Detect the Vaio nature of EEPROMs.
+	   We use the "PCG-" or "VGN-" prefix as the signature. */
+	if (client->addr == 0x57
+	 && i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		char name[4];
+
+		name[0] = i2c_smbus_read_byte_data(client, 0x80);
+		name[1] = i2c_smbus_read_byte_data(client, 0x81);
+		name[2] = i2c_smbus_read_byte_data(client, 0x82);
+		name[3] = i2c_smbus_read_byte_data(client, 0x83);
+
+		if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) {
+			dev_info(&client->dev, "Vaio EEPROM detected, "
+				 "enabling privacy protection\n");
+			data->nature = VAIO;
+		}
+	}
+
+	/* Let the users know they are using deprecated driver */
+	dev_notice(&client->dev,
+		   "eeprom driver is deprecated, please use at24 instead\n");
+
+	/* create the sysfs eeprom file */
+	return sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr);
+}
+
+static void eeprom_remove(struct i2c_client *client)
+{
+	sysfs_remove_bin_file(&client->dev.kobj, &eeprom_attr);
+}
+
+static const struct i2c_device_id eeprom_id[] = {
+	{ "eeprom", 0 },
+	{ }
+};
+
+static struct i2c_driver eeprom_driver = {
+	.driver = {
+		.name	= "eeprom",
+	},
+	.probe		= eeprom_probe,
+	.remove		= eeprom_remove,
+	.id_table	= eeprom_id,
+
+	.class		= I2C_CLASS_DDC | I2C_CLASS_SPD,
+	.detect		= eeprom_detect,
+	.address_list	= normal_i2c,
+};
+
+module_i2c_driver(eeprom_driver);
+
+MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl> and "
+		"Philip Edelbrock <phil@netroedge.com> and "
+		"Greg Kroah-Hartman <greg@kroah.com>");
+MODULE_DESCRIPTION("I2C EEPROM driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c
new file mode 100644
index 0000000000..9627294fe3
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom_93cx6.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2004 - 2006 rt2x00 SourceForge Project
+ * <http://rt2x00.serialmonkey.com>
+ *
+ * Module: eeprom_93cx6
+ * Abstract: EEPROM reader routines for 93cx6 chipsets.
+ * Supported chipsets: 93c46 & 93c66.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/eeprom_93cx6.h>
+
+MODULE_AUTHOR("http://rt2x00.serialmonkey.com");
+MODULE_VERSION("1.0");
+MODULE_DESCRIPTION("EEPROM 93cx6 chip driver");
+MODULE_LICENSE("GPL");
+
+static inline void eeprom_93cx6_pulse_high(struct eeprom_93cx6 *eeprom)
+{
+	eeprom->reg_data_clock = 1;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * Add a short delay for the pulse to work.
+	 * According to the specifications the "maximum minimum"
+	 * time should be 450ns.
+	 */
+	ndelay(450);
+}
+
+static inline void eeprom_93cx6_pulse_low(struct eeprom_93cx6 *eeprom)
+{
+	eeprom->reg_data_clock = 0;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * Add a short delay for the pulse to work.
+	 * According to the specifications the "maximum minimum"
+	 * time should be 450ns.
+	 */
+	ndelay(450);
+}
+
+static void eeprom_93cx6_startup(struct eeprom_93cx6 *eeprom)
+{
+	/*
+	 * Clear all flags, and enable chip select.
+	 */
+	eeprom->register_read(eeprom);
+	eeprom->reg_data_in = 0;
+	eeprom->reg_data_out = 0;
+	eeprom->reg_data_clock = 0;
+	eeprom->reg_chip_select = 1;
+	eeprom->drive_data = 1;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * kick a pulse.
+	 */
+	eeprom_93cx6_pulse_high(eeprom);
+	eeprom_93cx6_pulse_low(eeprom);
+}
+
+static void eeprom_93cx6_cleanup(struct eeprom_93cx6 *eeprom)
+{
+	/*
+	 * Clear chip_select and data_in flags.
+	 */
+	eeprom->register_read(eeprom);
+	eeprom->reg_data_in = 0;
+	eeprom->reg_chip_select = 0;
+	eeprom->register_write(eeprom);
+
+	/*
+	 * kick a pulse.
+	 */
+	eeprom_93cx6_pulse_high(eeprom);
+	eeprom_93cx6_pulse_low(eeprom);
+}
+
+static void eeprom_93cx6_write_bits(struct eeprom_93cx6 *eeprom,
+	const u16 data, const u16 count)
+{
+	unsigned int i;
+
+	eeprom->register_read(eeprom);
+
+	/*
+	 * Clear data flags.
+	 */
+	eeprom->reg_data_in = 0;
+	eeprom->reg_data_out = 0;
+	eeprom->drive_data = 1;
+
+	/*
+	 * Start writing all bits.
+	 */
+	for (i = count; i > 0; i--) {
+		/*
+		 * Check if this bit needs to be set.
+		 */
+		eeprom->reg_data_in = !!(data & (1 << (i - 1)));
+
+		/*
+		 * Write the bit to the eeprom register.
+		 */
+		eeprom->register_write(eeprom);
+
+		/*
+		 * Kick a pulse.
+		 */
+		eeprom_93cx6_pulse_high(eeprom);
+		eeprom_93cx6_pulse_low(eeprom);
+	}
+
+	eeprom->reg_data_in = 0;
+	eeprom->register_write(eeprom);
+}
+
+static void eeprom_93cx6_read_bits(struct eeprom_93cx6 *eeprom,
+	u16 *data, const u16 count)
+{
+	unsigned int i;
+	u16 buf = 0;
+
+	eeprom->register_read(eeprom);
+
+	/*
+	 * Clear data flags.
+	 */
+	eeprom->reg_data_in = 0;
+	eeprom->reg_data_out = 0;
+	eeprom->drive_data = 0;
+
+	/*
+	 * Start reading all bits.
+	 */
+	for (i = count; i > 0; i--) {
+		eeprom_93cx6_pulse_high(eeprom);
+
+		eeprom->register_read(eeprom);
+
+		/*
+		 * Clear data_in flag.
+		 */
+		eeprom->reg_data_in = 0;
+
+		/*
+		 * Read if the bit has been set.
+		 */
+		if (eeprom->reg_data_out)
+			buf |= (1 << (i - 1));
+
+		eeprom_93cx6_pulse_low(eeprom);
+	}
+
+	*data = buf;
+}
+
+/**
+ * eeprom_93cx6_read - Read a word from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Word index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ *
+ * This function will read the eeprom data as host-endian word
+ * into the given data pointer.
+ */
+void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom, const u8 word,
+	u16 *data)
+{
+	u16 command;
+
+	/*
+	 * Initialize the eeprom register
+	 */
+	eeprom_93cx6_startup(eeprom);
+
+	/*
+	 * Select the read opcode and the word to be read.
+	 */
+	command = (PCI_EEPROM_READ_OPCODE << eeprom->width) | word;
+	eeprom_93cx6_write_bits(eeprom, command,
+		PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	/*
+	 * Read the requested 16 bits.
+	 */
+	eeprom_93cx6_read_bits(eeprom, data, 16);
+
+	/*
+	 * Cleanup eeprom register.
+	 */
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_read);
+
+/**
+ * eeprom_93cx6_multiread - Read multiple words from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Word index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ * @words: Number of words that should be read.
+ *
+ * This function will read all requested words from the eeprom,
+ * this is done by calling eeprom_93cx6_read() multiple times.
+ * But with the additional change that while the eeprom_93cx6_read
+ * will return host ordered bytes, this method will return little
+ * endian words.
+ */
+void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word,
+	__le16 *data, const u16 words)
+{
+	unsigned int i;
+	u16 tmp;
+
+	for (i = 0; i < words; i++) {
+		tmp = 0;
+		eeprom_93cx6_read(eeprom, word + i, &tmp);
+		data[i] = cpu_to_le16(tmp);
+	}
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_multiread);
+
+/**
+ * eeprom_93cx6_readb - Read a byte from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @byte: Byte index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ *
+ * This function will read a byte of the eeprom data
+ * into the given data pointer.
+ */
+void eeprom_93cx6_readb(struct eeprom_93cx6 *eeprom, const u8 byte,
+	u8 *data)
+{
+	u16 command;
+	u16 tmp;
+
+	/*
+	 * Initialize the eeprom register
+	 */
+	eeprom_93cx6_startup(eeprom);
+
+	/*
+	 * Select the read opcode and the byte to be read.
+	 */
+	command = (PCI_EEPROM_READ_OPCODE << (eeprom->width + 1)) | byte;
+	eeprom_93cx6_write_bits(eeprom, command,
+		PCI_EEPROM_WIDTH_OPCODE + eeprom->width + 1);
+
+	/*
+	 * Read the requested 8 bits.
+	 */
+	eeprom_93cx6_read_bits(eeprom, &tmp, 8);
+	*data = tmp & 0xff;
+
+	/*
+	 * Cleanup eeprom register.
+	 */
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_readb);
+
+/**
+ * eeprom_93cx6_multireadb - Read multiple bytes from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @byte: Index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ * @bytes: Number of bytes that should be read.
+ *
+ * This function will read all requested bytes from the eeprom,
+ * this is done by calling eeprom_93cx6_readb() multiple times.
+ */
+void eeprom_93cx6_multireadb(struct eeprom_93cx6 *eeprom, const u8 byte,
+	u8 *data, const u16 bytes)
+{
+	unsigned int i;
+
+	for (i = 0; i < bytes; i++)
+		eeprom_93cx6_readb(eeprom, byte + i, &data[i]);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_multireadb);
+
+/**
+ * eeprom_93cx6_wren - set the write enable state
+ * @eeprom: Pointer to eeprom structure
+ * @enable: true to enable writes, otherwise disable writes
+ *
+ * Set the EEPROM write enable state to either allow or deny
+ * writes depending on the @enable value.
+ */
+void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable)
+{
+	u16 command;
+
+	/* start the command */
+	eeprom_93cx6_startup(eeprom);
+
+	/* create command to enable/disable */
+
+	command = enable ? PCI_EEPROM_EWEN_OPCODE : PCI_EEPROM_EWDS_OPCODE;
+	command <<= (eeprom->width - 2);
+
+	eeprom_93cx6_write_bits(eeprom, command,
+				PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_wren);
+
+/**
+ * eeprom_93cx6_write - write data to the EEPROM
+ * @eeprom: Pointer to eeprom structure
+ * @addr: Address to write data to.
+ * @data: The data to write to address @addr.
+ *
+ * Write the @data to the specified @addr in the EEPROM and
+ * waiting for the device to finish writing.
+ *
+ * Note, since we do not expect large number of write operations
+ * we delay in between parts of the operation to avoid using excessive
+ * amounts of CPU time busy waiting.
+ */
+void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, u8 addr, u16 data)
+{
+	int timeout = 100;
+	u16 command;
+
+	/* start the command */
+	eeprom_93cx6_startup(eeprom);
+
+	command = PCI_EEPROM_WRITE_OPCODE << eeprom->width;
+	command |= addr;
+
+	/* send write command */
+	eeprom_93cx6_write_bits(eeprom, command,
+				PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	/* send data */
+	eeprom_93cx6_write_bits(eeprom, data, 16);
+
+	/* get ready to check for busy */
+	eeprom->drive_data = 0;
+	eeprom->reg_chip_select = 1;
+	eeprom->register_write(eeprom);
+
+	/* wait at-least 250ns to get DO to be the busy signal */
+	usleep_range(1000, 2000);
+
+	/* wait for DO to go high to signify finish */
+
+	while (true) {
+		eeprom->register_read(eeprom);
+
+		if (eeprom->reg_data_out)
+			break;
+
+		usleep_range(1000, 2000);
+
+		if (--timeout <= 0) {
+			printk(KERN_ERR "%s: timeout\n", __func__);
+			break;
+		}
+	}
+
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_write);
diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
new file mode 100644
index 0000000000..b630625b30
--- /dev/null
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -0,0 +1,583 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for 93xx46 EEPROMs
+ *
+ * (C) 2011 DENX Software Engineering, Anatolij Gustschin <agust@denx.de>
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/nvmem-provider.h>
+#include <linux/eeprom_93xx46.h>
+
+#define OP_START	0x4
+#define OP_WRITE	(OP_START | 0x1)
+#define OP_READ		(OP_START | 0x2)
+#define ADDR_EWDS	0x00
+#define ADDR_ERAL	0x20
+#define ADDR_EWEN	0x30
+
+struct eeprom_93xx46_devtype_data {
+	unsigned int quirks;
+	unsigned char flags;
+};
+
+static const struct eeprom_93xx46_devtype_data at93c46_data = {
+	.flags = EE_SIZE1K,
+};
+
+static const struct eeprom_93xx46_devtype_data at93c56_data = {
+	.flags = EE_SIZE2K,
+};
+
+static const struct eeprom_93xx46_devtype_data at93c66_data = {
+	.flags = EE_SIZE4K,
+};
+
+static const struct eeprom_93xx46_devtype_data atmel_at93c46d_data = {
+	.flags = EE_SIZE1K,
+	.quirks = EEPROM_93XX46_QUIRK_SINGLE_WORD_READ |
+		  EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH,
+};
+
+static const struct eeprom_93xx46_devtype_data microchip_93lc46b_data = {
+	.flags = EE_SIZE1K,
+	.quirks = EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE,
+};
+
+struct eeprom_93xx46_dev {
+	struct spi_device *spi;
+	struct eeprom_93xx46_platform_data *pdata;
+	struct mutex lock;
+	struct nvmem_config nvmem_config;
+	struct nvmem_device *nvmem;
+	int addrlen;
+	int size;
+};
+
+static inline bool has_quirk_single_word_read(struct eeprom_93xx46_dev *edev)
+{
+	return edev->pdata->quirks & EEPROM_93XX46_QUIRK_SINGLE_WORD_READ;
+}
+
+static inline bool has_quirk_instruction_length(struct eeprom_93xx46_dev *edev)
+{
+	return edev->pdata->quirks & EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH;
+}
+
+static inline bool has_quirk_extra_read_cycle(struct eeprom_93xx46_dev *edev)
+{
+	return edev->pdata->quirks & EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE;
+}
+
+static int eeprom_93xx46_read(void *priv, unsigned int off,
+			      void *val, size_t count)
+{
+	struct eeprom_93xx46_dev *edev = priv;
+	char *buf = val;
+	int err = 0;
+	int bits;
+
+	if (unlikely(off >= edev->size))
+		return 0;
+	if ((off + count) > edev->size)
+		count = edev->size - off;
+	if (unlikely(!count))
+		return count;
+
+	mutex_lock(&edev->lock);
+
+	if (edev->pdata->prepare)
+		edev->pdata->prepare(edev);
+
+	/* The opcode in front of the address is three bits. */
+	bits = edev->addrlen + 3;
+
+	while (count) {
+		struct spi_message m;
+		struct spi_transfer t[2] = { { 0 } };
+		u16 cmd_addr = OP_READ << edev->addrlen;
+		size_t nbytes = count;
+
+		if (edev->pdata->flags & EE_ADDR8) {
+			cmd_addr |= off;
+			if (has_quirk_single_word_read(edev))
+				nbytes = 1;
+		} else {
+			cmd_addr |= (off >> 1);
+			if (has_quirk_single_word_read(edev))
+				nbytes = 2;
+		}
+
+		dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n",
+			cmd_addr, edev->spi->max_speed_hz);
+
+		if (has_quirk_extra_read_cycle(edev)) {
+			cmd_addr <<= 1;
+			bits += 1;
+		}
+
+		spi_message_init(&m);
+
+		t[0].tx_buf = (char *)&cmd_addr;
+		t[0].len = 2;
+		t[0].bits_per_word = bits;
+		spi_message_add_tail(&t[0], &m);
+
+		t[1].rx_buf = buf;
+		t[1].len = count;
+		t[1].bits_per_word = 8;
+		spi_message_add_tail(&t[1], &m);
+
+		err = spi_sync(edev->spi, &m);
+		/* have to wait at least Tcsl ns */
+		ndelay(250);
+
+		if (err) {
+			dev_err(&edev->spi->dev, "read %zu bytes at %d: err. %d\n",
+				nbytes, (int)off, err);
+			break;
+		}
+
+		buf += nbytes;
+		off += nbytes;
+		count -= nbytes;
+	}
+
+	if (edev->pdata->finish)
+		edev->pdata->finish(edev);
+
+	mutex_unlock(&edev->lock);
+
+	return err;
+}
+
+static int eeprom_93xx46_ew(struct eeprom_93xx46_dev *edev, int is_on)
+{
+	struct spi_message m;
+	struct spi_transfer t;
+	int bits, ret;
+	u16 cmd_addr;
+
+	/* The opcode in front of the address is three bits. */
+	bits = edev->addrlen + 3;
+
+	cmd_addr = OP_START << edev->addrlen;
+	if (edev->pdata->flags & EE_ADDR8)
+		cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS) << 1;
+	else
+		cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS);
+
+	if (has_quirk_instruction_length(edev)) {
+		cmd_addr <<= 2;
+		bits += 2;
+	}
+
+	dev_dbg(&edev->spi->dev, "ew%s cmd 0x%04x, %d bits\n",
+			is_on ? "en" : "ds", cmd_addr, bits);
+
+	spi_message_init(&m);
+	memset(&t, 0, sizeof(t));
+
+	t.tx_buf = &cmd_addr;
+	t.len = 2;
+	t.bits_per_word = bits;
+	spi_message_add_tail(&t, &m);
+
+	mutex_lock(&edev->lock);
+
+	if (edev->pdata->prepare)
+		edev->pdata->prepare(edev);
+
+	ret = spi_sync(edev->spi, &m);
+	/* have to wait at least Tcsl ns */
+	ndelay(250);
+	if (ret)
+		dev_err(&edev->spi->dev, "erase/write %sable error %d\n",
+			is_on ? "en" : "dis", ret);
+
+	if (edev->pdata->finish)
+		edev->pdata->finish(edev);
+
+	mutex_unlock(&edev->lock);
+	return ret;
+}
+
+static ssize_t
+eeprom_93xx46_write_word(struct eeprom_93xx46_dev *edev,
+			 const char *buf, unsigned off)
+{
+	struct spi_message m;
+	struct spi_transfer t[2];
+	int bits, data_len, ret;
+	u16 cmd_addr;
+
+	if (unlikely(off >= edev->size))
+		return -EINVAL;
+
+	/* The opcode in front of the address is three bits. */
+	bits = edev->addrlen + 3;
+
+	cmd_addr = OP_WRITE << edev->addrlen;
+
+	if (edev->pdata->flags & EE_ADDR8) {
+		cmd_addr |= off;
+		data_len = 1;
+	} else {
+		cmd_addr |= (off >> 1);
+		data_len = 2;
+	}
+
+	dev_dbg(&edev->spi->dev, "write cmd 0x%x\n", cmd_addr);
+
+	spi_message_init(&m);
+	memset(t, 0, sizeof(t));
+
+	t[0].tx_buf = (char *)&cmd_addr;
+	t[0].len = 2;
+	t[0].bits_per_word = bits;
+	spi_message_add_tail(&t[0], &m);
+
+	t[1].tx_buf = buf;
+	t[1].len = data_len;
+	t[1].bits_per_word = 8;
+	spi_message_add_tail(&t[1], &m);
+
+	ret = spi_sync(edev->spi, &m);
+	/* have to wait program cycle time Twc ms */
+	mdelay(6);
+	return ret;
+}
+
+static int eeprom_93xx46_write(void *priv, unsigned int off,
+				   void *val, size_t count)
+{
+	struct eeprom_93xx46_dev *edev = priv;
+	char *buf = val;
+	int i, ret, step = 1;
+
+	if (unlikely(off >= edev->size))
+		return -EFBIG;
+	if ((off + count) > edev->size)
+		count = edev->size - off;
+	if (unlikely(!count))
+		return count;
+
+	/* only write even number of bytes on 16-bit devices */
+	if (edev->pdata->flags & EE_ADDR16) {
+		step = 2;
+		count &= ~1;
+	}
+
+	/* erase/write enable */
+	ret = eeprom_93xx46_ew(edev, 1);
+	if (ret)
+		return ret;
+
+	mutex_lock(&edev->lock);
+
+	if (edev->pdata->prepare)
+		edev->pdata->prepare(edev);
+
+	for (i = 0; i < count; i += step) {
+		ret = eeprom_93xx46_write_word(edev, &buf[i], off + i);
+		if (ret) {
+			dev_err(&edev->spi->dev, "write failed at %d: %d\n",
+				(int)off + i, ret);
+			break;
+		}
+	}
+
+	if (edev->pdata->finish)
+		edev->pdata->finish(edev);
+
+	mutex_unlock(&edev->lock);
+
+	/* erase/write disable */
+	eeprom_93xx46_ew(edev, 0);
+	return ret;
+}
+
+static int eeprom_93xx46_eral(struct eeprom_93xx46_dev *edev)
+{
+	struct eeprom_93xx46_platform_data *pd = edev->pdata;
+	struct spi_message m;
+	struct spi_transfer t;
+	int bits, ret;
+	u16 cmd_addr;
+
+	/* The opcode in front of the address is three bits. */
+	bits = edev->addrlen + 3;
+
+	cmd_addr = OP_START << edev->addrlen;
+	if (edev->pdata->flags & EE_ADDR8)
+		cmd_addr |= ADDR_ERAL << 1;
+	else
+		cmd_addr |= ADDR_ERAL;
+
+	if (has_quirk_instruction_length(edev)) {
+		cmd_addr <<= 2;
+		bits += 2;
+	}
+
+	dev_dbg(&edev->spi->dev, "eral cmd 0x%04x, %d bits\n", cmd_addr, bits);
+
+	spi_message_init(&m);
+	memset(&t, 0, sizeof(t));
+
+	t.tx_buf = &cmd_addr;
+	t.len = 2;
+	t.bits_per_word = bits;
+	spi_message_add_tail(&t, &m);
+
+	mutex_lock(&edev->lock);
+
+	if (edev->pdata->prepare)
+		edev->pdata->prepare(edev);
+
+	ret = spi_sync(edev->spi, &m);
+	if (ret)
+		dev_err(&edev->spi->dev, "erase error %d\n", ret);
+	/* have to wait erase cycle time Tec ms */
+	mdelay(6);
+
+	if (pd->finish)
+		pd->finish(edev);
+
+	mutex_unlock(&edev->lock);
+	return ret;
+}
+
+static ssize_t eeprom_93xx46_store_erase(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct eeprom_93xx46_dev *edev = dev_get_drvdata(dev);
+	int erase = 0, ret;
+
+	sscanf(buf, "%d", &erase);
+	if (erase) {
+		ret = eeprom_93xx46_ew(edev, 1);
+		if (ret)
+			return ret;
+		ret = eeprom_93xx46_eral(edev);
+		if (ret)
+			return ret;
+		ret = eeprom_93xx46_ew(edev, 0);
+		if (ret)
+			return ret;
+	}
+	return count;
+}
+static DEVICE_ATTR(erase, S_IWUSR, NULL, eeprom_93xx46_store_erase);
+
+static void select_assert(void *context)
+{
+	struct eeprom_93xx46_dev *edev = context;
+
+	gpiod_set_value_cansleep(edev->pdata->select, 1);
+}
+
+static void select_deassert(void *context)
+{
+	struct eeprom_93xx46_dev *edev = context;
+
+	gpiod_set_value_cansleep(edev->pdata->select, 0);
+}
+
+static const struct of_device_id eeprom_93xx46_of_table[] = {
+	{ .compatible = "eeprom-93xx46", .data = &at93c46_data, },
+	{ .compatible = "atmel,at93c46", .data = &at93c46_data, },
+	{ .compatible = "atmel,at93c46d", .data = &atmel_at93c46d_data, },
+	{ .compatible = "atmel,at93c56", .data = &at93c56_data, },
+	{ .compatible = "atmel,at93c66", .data = &at93c66_data, },
+	{ .compatible = "microchip,93lc46b", .data = &microchip_93lc46b_data, },
+	{}
+};
+MODULE_DEVICE_TABLE(of, eeprom_93xx46_of_table);
+
+static const struct spi_device_id eeprom_93xx46_spi_ids[] = {
+	{ .name = "eeprom-93xx46",
+	  .driver_data = (kernel_ulong_t)&at93c46_data, },
+	{ .name = "at93c46",
+	  .driver_data = (kernel_ulong_t)&at93c46_data, },
+	{ .name = "at93c46d",
+	  .driver_data = (kernel_ulong_t)&atmel_at93c46d_data, },
+	{ .name = "at93c56",
+	  .driver_data = (kernel_ulong_t)&at93c56_data, },
+	{ .name = "at93c66",
+	  .driver_data = (kernel_ulong_t)&at93c66_data, },
+	{ .name = "93lc46b",
+	  .driver_data = (kernel_ulong_t)&microchip_93lc46b_data, },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, eeprom_93xx46_spi_ids);
+
+static int eeprom_93xx46_probe_dt(struct spi_device *spi)
+{
+	const struct of_device_id *of_id =
+		of_match_device(eeprom_93xx46_of_table, &spi->dev);
+	struct device_node *np = spi->dev.of_node;
+	struct eeprom_93xx46_platform_data *pd;
+	u32 tmp;
+	int ret;
+
+	pd = devm_kzalloc(&spi->dev, sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return -ENOMEM;
+
+	ret = of_property_read_u32(np, "data-size", &tmp);
+	if (ret < 0) {
+		dev_err(&spi->dev, "data-size property not found\n");
+		return ret;
+	}
+
+	if (tmp == 8) {
+		pd->flags |= EE_ADDR8;
+	} else if (tmp == 16) {
+		pd->flags |= EE_ADDR16;
+	} else {
+		dev_err(&spi->dev, "invalid data-size (%d)\n", tmp);
+		return -EINVAL;
+	}
+
+	if (of_property_read_bool(np, "read-only"))
+		pd->flags |= EE_READONLY;
+
+	pd->select = devm_gpiod_get_optional(&spi->dev, "select",
+					     GPIOD_OUT_LOW);
+	if (IS_ERR(pd->select))
+		return PTR_ERR(pd->select);
+
+	pd->prepare = select_assert;
+	pd->finish = select_deassert;
+	gpiod_direction_output(pd->select, 0);
+
+	if (of_id->data) {
+		const struct eeprom_93xx46_devtype_data *data = of_id->data;
+
+		pd->quirks = data->quirks;
+		pd->flags |= data->flags;
+	}
+
+	spi->dev.platform_data = pd;
+
+	return 0;
+}
+
+static int eeprom_93xx46_probe(struct spi_device *spi)
+{
+	struct eeprom_93xx46_platform_data *pd;
+	struct eeprom_93xx46_dev *edev;
+	int err;
+
+	if (spi->dev.of_node) {
+		err = eeprom_93xx46_probe_dt(spi);
+		if (err < 0)
+			return err;
+	}
+
+	pd = spi->dev.platform_data;
+	if (!pd) {
+		dev_err(&spi->dev, "missing platform data\n");
+		return -ENODEV;
+	}
+
+	edev = devm_kzalloc(&spi->dev, sizeof(*edev), GFP_KERNEL);
+	if (!edev)
+		return -ENOMEM;
+
+	if (pd->flags & EE_SIZE1K)
+		edev->size = 128;
+	else if (pd->flags & EE_SIZE2K)
+		edev->size = 256;
+	else if (pd->flags & EE_SIZE4K)
+		edev->size = 512;
+	else {
+		dev_err(&spi->dev, "unspecified size\n");
+		return -EINVAL;
+	}
+
+	if (pd->flags & EE_ADDR8)
+		edev->addrlen = ilog2(edev->size);
+	else if (pd->flags & EE_ADDR16)
+		edev->addrlen = ilog2(edev->size) - 1;
+	else {
+		dev_err(&spi->dev, "unspecified address type\n");
+		return -EINVAL;
+	}
+
+	mutex_init(&edev->lock);
+
+	edev->spi = spi;
+	edev->pdata = pd;
+
+	edev->nvmem_config.type = NVMEM_TYPE_EEPROM;
+	edev->nvmem_config.name = dev_name(&spi->dev);
+	edev->nvmem_config.dev = &spi->dev;
+	edev->nvmem_config.read_only = pd->flags & EE_READONLY;
+	edev->nvmem_config.root_only = true;
+	edev->nvmem_config.owner = THIS_MODULE;
+	edev->nvmem_config.compat = true;
+	edev->nvmem_config.base_dev = &spi->dev;
+	edev->nvmem_config.reg_read = eeprom_93xx46_read;
+	edev->nvmem_config.reg_write = eeprom_93xx46_write;
+	edev->nvmem_config.priv = edev;
+	edev->nvmem_config.stride = 4;
+	edev->nvmem_config.word_size = 1;
+	edev->nvmem_config.size = edev->size;
+
+	edev->nvmem = devm_nvmem_register(&spi->dev, &edev->nvmem_config);
+	if (IS_ERR(edev->nvmem))
+		return PTR_ERR(edev->nvmem);
+
+	dev_info(&spi->dev, "%d-bit eeprom containing %d bytes %s\n",
+		(pd->flags & EE_ADDR8) ? 8 : 16,
+		edev->size,
+		(pd->flags & EE_READONLY) ? "(readonly)" : "");
+
+	if (!(pd->flags & EE_READONLY)) {
+		if (device_create_file(&spi->dev, &dev_attr_erase))
+			dev_err(&spi->dev, "can't create erase interface\n");
+	}
+
+	spi_set_drvdata(spi, edev);
+	return 0;
+}
+
+static void eeprom_93xx46_remove(struct spi_device *spi)
+{
+	struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi);
+
+	if (!(edev->pdata->flags & EE_READONLY))
+		device_remove_file(&spi->dev, &dev_attr_erase);
+}
+
+static struct spi_driver eeprom_93xx46_driver = {
+	.driver = {
+		.name	= "93xx46",
+		.of_match_table = of_match_ptr(eeprom_93xx46_of_table),
+	},
+	.probe		= eeprom_93xx46_probe,
+	.remove		= eeprom_93xx46_remove,
+	.id_table	= eeprom_93xx46_spi_ids,
+};
+
+module_spi_driver(eeprom_93xx46_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Driver for 93xx46 EEPROMs");
+MODULE_AUTHOR("Anatolij Gustschin <agust@denx.de>");
+MODULE_ALIAS("spi:93xx46");
+MODULE_ALIAS("spi:eeprom-93xx46");
+MODULE_ALIAS("spi:93lc46b");
diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c
new file mode 100644
index 0000000000..1d1f30b5c4
--- /dev/null
+++ b/drivers/misc/eeprom/idt_89hpesx.c
@@ -0,0 +1,1592 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
+ *
+ * IDT PCIe-switch NTB Linux driver
+ *
+ * Contact Information:
+ * Serge Semin <fancer.lancer@gmail.com>, <Sergey.Semin@t-platforms.ru>
+ */
+/*
+ *           NOTE of the IDT 89HPESx SMBus-slave interface driver
+ *    This driver primarily is developed to have an access to EEPROM device of
+ * IDT PCIe-switches. IDT provides a simple SMBus interface to perform IO-
+ * operations from/to EEPROM, which is located at private (so called Master)
+ * SMBus of switches. Using that interface this the driver creates a simple
+ * binary sysfs-file in the device directory:
+ * /sys/bus/i2c/devices/<bus>-<devaddr>/eeprom
+ * In case if read-only flag is specified in the dts-node of device desription,
+ * User-space applications won't be able to write to the EEPROM sysfs-node.
+ *    Additionally IDT 89HPESx SMBus interface has an ability to write/read
+ * data of device CSRs. This driver exposes debugf-file to perform simple IO
+ * operations using that ability for just basic debug purpose. Particularly
+ * next file is created in the specific debugfs-directory:
+ * /sys/kernel/debug/idt_csr/
+ * Format of the debugfs-node is:
+ * $ cat /sys/kernel/debug/idt_csr/<bus>-<devaddr>/<devname>;
+ * <CSR address>:<CSR value>
+ * So reading the content of the file gives current CSR address and it value.
+ * If User-space application wishes to change current CSR address,
+ * it can just write a proper value to the sysfs-file:
+ * $ echo "<CSR address>" > /sys/kernel/debug/idt_csr/<bus>-<devaddr>/<devname>
+ * If it wants to change the CSR value as well, the format of the write
+ * operation is:
+ * $ echo "<CSR address>:<CSR value>" > \
+ *        /sys/kernel/debug/idt_csr/<bus>-<devaddr>/<devname>;
+ * CSR address and value can be any of hexadecimal, decimal or octal format.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/debugfs.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
+#include <linux/i2c.h>
+#include <linux/pci_ids.h>
+#include <linux/delay.h>
+
+#define IDT_NAME		"89hpesx"
+#define IDT_89HPESX_DESC	"IDT 89HPESx SMBus-slave interface driver"
+#define IDT_89HPESX_VER		"1.0"
+
+MODULE_DESCRIPTION(IDT_89HPESX_DESC);
+MODULE_VERSION(IDT_89HPESX_VER);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("T-platforms");
+
+/*
+ * csr_dbgdir - CSR read/write operations Debugfs directory
+ */
+static struct dentry *csr_dbgdir;
+
+/*
+ * struct idt_89hpesx_dev - IDT 89HPESx device data structure
+ * @eesize:	Size of EEPROM in bytes (calculated from "idt,eecompatible")
+ * @eero:	EEPROM Read-only flag
+ * @eeaddr:	EEPROM custom address
+ *
+ * @inieecmd:	Initial cmd value for EEPROM read/write operations
+ * @inicsrcmd:	Initial cmd value for CSR read/write operations
+ * @iniccode:	Initialial command code value for IO-operations
+ *
+ * @csr:	CSR address to perform read operation
+ *
+ * @smb_write:	SMBus write method
+ * @smb_read:	SMBus read method
+ * @smb_mtx:	SMBus mutex
+ *
+ * @client:	i2c client used to perform IO operations
+ *
+ * @ee_file:	EEPROM read/write sysfs-file
+ */
+struct idt_smb_seq;
+struct idt_89hpesx_dev {
+	u32 eesize;
+	bool eero;
+	u8 eeaddr;
+
+	u8 inieecmd;
+	u8 inicsrcmd;
+	u8 iniccode;
+
+	u16 csr;
+
+	int (*smb_write)(struct idt_89hpesx_dev *, const struct idt_smb_seq *);
+	int (*smb_read)(struct idt_89hpesx_dev *, struct idt_smb_seq *);
+	struct mutex smb_mtx;
+
+	struct i2c_client *client;
+
+	struct bin_attribute *ee_file;
+	struct dentry *csr_dir;
+};
+
+/*
+ * struct idt_smb_seq - sequence of data to be read/written from/to IDT 89HPESx
+ * @ccode:	SMBus command code
+ * @bytecnt:	Byte count of operation
+ * @data:	Data to by written
+ */
+struct idt_smb_seq {
+	u8 ccode;
+	u8 bytecnt;
+	u8 *data;
+};
+
+/*
+ * struct idt_eeprom_seq - sequence of data to be read/written from/to EEPROM
+ * @cmd:	Transaction CMD
+ * @eeaddr:	EEPROM custom address
+ * @memaddr:	Internal memory address of EEPROM
+ * @data:	Data to be written at the memory address
+ */
+struct idt_eeprom_seq {
+	u8 cmd;
+	u8 eeaddr;
+	u16 memaddr;
+	u8 data;
+} __packed;
+
+/*
+ * struct idt_csr_seq - sequence of data to be read/written from/to CSR
+ * @cmd:	Transaction CMD
+ * @csraddr:	Internal IDT device CSR address
+ * @data:	Data to be read/written from/to the CSR address
+ */
+struct idt_csr_seq {
+	u8 cmd;
+	u16 csraddr;
+	u32 data;
+} __packed;
+
+/*
+ * SMBus command code macros
+ * @CCODE_END:		Indicates the end of transaction
+ * @CCODE_START:	Indicates the start of transaction
+ * @CCODE_CSR:		CSR read/write transaction
+ * @CCODE_EEPROM:	EEPROM read/write transaction
+ * @CCODE_BYTE:		Supplied data has BYTE length
+ * @CCODE_WORD:		Supplied data has WORD length
+ * @CCODE_BLOCK:	Supplied data has variable length passed in bytecnt
+ *			byte right following CCODE byte
+ */
+#define CCODE_END	((u8)0x01)
+#define CCODE_START	((u8)0x02)
+#define CCODE_CSR	((u8)0x00)
+#define CCODE_EEPROM	((u8)0x04)
+#define CCODE_BYTE	((u8)0x00)
+#define CCODE_WORD	((u8)0x20)
+#define CCODE_BLOCK	((u8)0x40)
+#define CCODE_PEC	((u8)0x80)
+
+/*
+ * EEPROM command macros
+ * @EEPROM_OP_WRITE:	EEPROM write operation
+ * @EEPROM_OP_READ:	EEPROM read operation
+ * @EEPROM_USA:		Use specified address of EEPROM
+ * @EEPROM_NAERR:	EEPROM device is not ready to respond
+ * @EEPROM_LAERR:	EEPROM arbitration loss error
+ * @EEPROM_MSS:		EEPROM misplace start & stop bits error
+ * @EEPROM_WR_CNT:	Bytes count to perform write operation
+ * @EEPROM_WRRD_CNT:	Bytes count to write before reading
+ * @EEPROM_RD_CNT:	Bytes count to perform read operation
+ * @EEPROM_DEF_SIZE:	Fall back size of EEPROM
+ * @EEPROM_DEF_ADDR:	Defatul EEPROM address
+ * @EEPROM_TOUT:	Timeout before retry read operation if eeprom is busy
+ */
+#define EEPROM_OP_WRITE	((u8)0x00)
+#define EEPROM_OP_READ	((u8)0x01)
+#define EEPROM_USA	((u8)0x02)
+#define EEPROM_NAERR	((u8)0x08)
+#define EEPROM_LAERR    ((u8)0x10)
+#define EEPROM_MSS	((u8)0x20)
+#define EEPROM_WR_CNT	((u8)5)
+#define EEPROM_WRRD_CNT	((u8)4)
+#define EEPROM_RD_CNT	((u8)5)
+#define EEPROM_DEF_SIZE	((u16)4096)
+#define EEPROM_DEF_ADDR	((u8)0x50)
+#define EEPROM_TOUT	(100)
+
+/*
+ * CSR command macros
+ * @CSR_DWE:		Enable all four bytes of the operation
+ * @CSR_OP_WRITE:	CSR write operation
+ * @CSR_OP_READ:	CSR read operation
+ * @CSR_RERR:		Read operation error
+ * @CSR_WERR:		Write operation error
+ * @CSR_WR_CNT:		Bytes count to perform write operation
+ * @CSR_WRRD_CNT:	Bytes count to write before reading
+ * @CSR_RD_CNT:		Bytes count to perform read operation
+ * @CSR_MAX:		Maximum CSR address
+ * @CSR_DEF:		Default CSR address
+ * @CSR_REAL_ADDR:	CSR real unshifted address
+ */
+#define CSR_DWE			((u8)0x0F)
+#define CSR_OP_WRITE		((u8)0x00)
+#define CSR_OP_READ		((u8)0x10)
+#define CSR_RERR		((u8)0x40)
+#define CSR_WERR		((u8)0x80)
+#define CSR_WR_CNT		((u8)7)
+#define CSR_WRRD_CNT		((u8)3)
+#define CSR_RD_CNT		((u8)7)
+#define CSR_MAX			((u32)0x3FFFF)
+#define CSR_DEF			((u16)0x0000)
+#define CSR_REAL_ADDR(val)	((unsigned int)val << 2)
+
+/*
+ * IDT 89HPESx basic register
+ * @IDT_VIDDID_CSR:	PCIe VID and DID of IDT 89HPESx
+ * @IDT_VID_MASK:	Mask of VID
+ */
+#define IDT_VIDDID_CSR	((u32)0x0000)
+#define IDT_VID_MASK	((u32)0xFFFF)
+
+/*
+ * IDT 89HPESx can send NACK when new command is sent before previous one
+ * fininshed execution. In this case driver retries operation
+ * certain times.
+ * @RETRY_CNT:		Number of retries before giving up and fail
+ * @idt_smb_safe:	Generate a retry loop on corresponding SMBus method
+ */
+#define RETRY_CNT (128)
+#define idt_smb_safe(ops, args...) ({ \
+	int __retry = RETRY_CNT; \
+	s32 __sts; \
+	do { \
+		__sts = i2c_smbus_ ## ops ## _data(args); \
+	} while (__retry-- && __sts < 0); \
+	__sts; \
+})
+
+/*===========================================================================
+ *                         i2c bus level IO-operations
+ *===========================================================================
+ */
+
+/*
+ * idt_smb_write_byte() - SMBus write method when I2C_SMBUS_BYTE_DATA operation
+ *                        is only available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Sequence of data to be written
+ */
+static int idt_smb_write_byte(struct idt_89hpesx_dev *pdev,
+			      const struct idt_smb_seq *seq)
+{
+	s32 sts;
+	u8 ccode;
+	int idx;
+
+	/* Loop over the supplied data sending byte one-by-one */
+	for (idx = 0; idx < seq->bytecnt; idx++) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_BYTE;
+		if (idx == 0)
+			ccode |= CCODE_START;
+		if (idx == seq->bytecnt - 1)
+			ccode |= CCODE_END;
+
+		/* Send data to the device */
+		sts = idt_smb_safe(write_byte, pdev->client, ccode,
+			seq->data[idx]);
+		if (sts != 0)
+			return (int)sts;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_smb_read_byte() - SMBus read method when I2C_SMBUS_BYTE_DATA operation
+ *                        is only available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Buffer to read data to
+ */
+static int idt_smb_read_byte(struct idt_89hpesx_dev *pdev,
+			     struct idt_smb_seq *seq)
+{
+	s32 sts;
+	u8 ccode;
+	int idx;
+
+	/* Loop over the supplied buffer receiving byte one-by-one */
+	for (idx = 0; idx < seq->bytecnt; idx++) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_BYTE;
+		if (idx == 0)
+			ccode |= CCODE_START;
+		if (idx == seq->bytecnt - 1)
+			ccode |= CCODE_END;
+
+		/* Read data from the device */
+		sts = idt_smb_safe(read_byte, pdev->client, ccode);
+		if (sts < 0)
+			return (int)sts;
+
+		seq->data[idx] = (u8)sts;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_smb_write_word() - SMBus write method when I2C_SMBUS_BYTE_DATA and
+ *                        I2C_FUNC_SMBUS_WORD_DATA operations are available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Sequence of data to be written
+ */
+static int idt_smb_write_word(struct idt_89hpesx_dev *pdev,
+			      const struct idt_smb_seq *seq)
+{
+	s32 sts;
+	u8 ccode;
+	int idx, evencnt;
+
+	/* Calculate the even count of data to send */
+	evencnt = seq->bytecnt - (seq->bytecnt % 2);
+
+	/* Loop over the supplied data sending two bytes at a time */
+	for (idx = 0; idx < evencnt; idx += 2) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_WORD;
+		if (idx == 0)
+			ccode |= CCODE_START;
+		if (idx == evencnt - 2)
+			ccode |= CCODE_END;
+
+		/* Send word data to the device */
+		sts = idt_smb_safe(write_word, pdev->client, ccode,
+			*(u16 *)&seq->data[idx]);
+		if (sts != 0)
+			return (int)sts;
+	}
+
+	/* If there is odd number of bytes then send just one last byte */
+	if (seq->bytecnt != evencnt) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_BYTE | CCODE_END;
+		if (idx == 0)
+			ccode |= CCODE_START;
+
+		/* Send byte data to the device */
+		sts = idt_smb_safe(write_byte, pdev->client, ccode,
+			seq->data[idx]);
+		if (sts != 0)
+			return (int)sts;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_smb_read_word() - SMBus read method when I2C_SMBUS_BYTE_DATA and
+ *                       I2C_FUNC_SMBUS_WORD_DATA operations are available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Buffer to read data to
+ */
+static int idt_smb_read_word(struct idt_89hpesx_dev *pdev,
+			     struct idt_smb_seq *seq)
+{
+	s32 sts;
+	u8 ccode;
+	int idx, evencnt;
+
+	/* Calculate the even count of data to send */
+	evencnt = seq->bytecnt - (seq->bytecnt % 2);
+
+	/* Loop over the supplied data reading two bytes at a time */
+	for (idx = 0; idx < evencnt; idx += 2) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_WORD;
+		if (idx == 0)
+			ccode |= CCODE_START;
+		if (idx == evencnt - 2)
+			ccode |= CCODE_END;
+
+		/* Read word data from the device */
+		sts = idt_smb_safe(read_word, pdev->client, ccode);
+		if (sts < 0)
+			return (int)sts;
+
+		*(u16 *)&seq->data[idx] = (u16)sts;
+	}
+
+	/* If there is odd number of bytes then receive just one last byte */
+	if (seq->bytecnt != evencnt) {
+		/* Collect the command code byte */
+		ccode = seq->ccode | CCODE_BYTE | CCODE_END;
+		if (idx == 0)
+			ccode |= CCODE_START;
+
+		/* Read last data byte from the device */
+		sts = idt_smb_safe(read_byte, pdev->client, ccode);
+		if (sts < 0)
+			return (int)sts;
+
+		seq->data[idx] = (u8)sts;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_smb_write_block() - SMBus write method when I2C_SMBUS_BLOCK_DATA
+ *                         operation is available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Sequence of data to be written
+ */
+static int idt_smb_write_block(struct idt_89hpesx_dev *pdev,
+			       const struct idt_smb_seq *seq)
+{
+	u8 ccode;
+
+	/* Return error if too much data passed to send */
+	if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX)
+		return -EINVAL;
+
+	/* Collect the command code byte */
+	ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END;
+
+	/* Send block of data to the device */
+	return idt_smb_safe(write_block, pdev->client, ccode, seq->bytecnt,
+		seq->data);
+}
+
+/*
+ * idt_smb_read_block() - SMBus read method when I2C_SMBUS_BLOCK_DATA
+ *                        operation is available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Buffer to read data to
+ */
+static int idt_smb_read_block(struct idt_89hpesx_dev *pdev,
+			      struct idt_smb_seq *seq)
+{
+	s32 sts;
+	u8 ccode;
+
+	/* Return error if too much data passed to send */
+	if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX)
+		return -EINVAL;
+
+	/* Collect the command code byte */
+	ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END;
+
+	/* Read block of data from the device */
+	sts = idt_smb_safe(read_block, pdev->client, ccode, seq->data);
+	if (sts != seq->bytecnt)
+		return (sts < 0 ? sts : -ENODATA);
+
+	return 0;
+}
+
+/*
+ * idt_smb_write_i2c_block() - SMBus write method when I2C_SMBUS_I2C_BLOCK_DATA
+ *                             operation is available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Sequence of data to be written
+ *
+ * NOTE It's usual SMBus write block operation, except the actual data length is
+ * sent as first byte of data
+ */
+static int idt_smb_write_i2c_block(struct idt_89hpesx_dev *pdev,
+				   const struct idt_smb_seq *seq)
+{
+	u8 ccode, buf[I2C_SMBUS_BLOCK_MAX + 1];
+
+	/* Return error if too much data passed to send */
+	if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX)
+		return -EINVAL;
+
+	/* Collect the data to send. Length byte must be added prior the data */
+	buf[0] = seq->bytecnt;
+	memcpy(&buf[1], seq->data, seq->bytecnt);
+
+	/* Collect the command code byte */
+	ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END;
+
+	/* Send length and block of data to the device */
+	return idt_smb_safe(write_i2c_block, pdev->client, ccode,
+		seq->bytecnt + 1, buf);
+}
+
+/*
+ * idt_smb_read_i2c_block() - SMBus read method when I2C_SMBUS_I2C_BLOCK_DATA
+ *                            operation is available
+ * @pdev:	Pointer to the driver data
+ * @seq:	Buffer to read data to
+ *
+ * NOTE It's usual SMBus read block operation, except the actual data length is
+ * retrieved as first byte of data
+ */
+static int idt_smb_read_i2c_block(struct idt_89hpesx_dev *pdev,
+				  struct idt_smb_seq *seq)
+{
+	u8 ccode, buf[I2C_SMBUS_BLOCK_MAX + 1];
+	s32 sts;
+
+	/* Return error if too much data passed to send */
+	if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX)
+		return -EINVAL;
+
+	/* Collect the command code byte */
+	ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END;
+
+	/* Read length and block of data from the device */
+	sts = idt_smb_safe(read_i2c_block, pdev->client, ccode,
+		seq->bytecnt + 1, buf);
+	if (sts != seq->bytecnt + 1)
+		return (sts < 0 ? sts : -ENODATA);
+	if (buf[0] != seq->bytecnt)
+		return -ENODATA;
+
+	/* Copy retrieved data to the output data buffer */
+	memcpy(seq->data, &buf[1], seq->bytecnt);
+
+	return 0;
+}
+
+/*===========================================================================
+ *                          EEPROM IO-operations
+ *===========================================================================
+ */
+
+/*
+ * idt_eeprom_read_byte() - read just one byte from EEPROM
+ * @pdev:	Pointer to the driver data
+ * @memaddr:	Start EEPROM memory address
+ * @data:	Data to be written to EEPROM
+ */
+static int idt_eeprom_read_byte(struct idt_89hpesx_dev *pdev, u16 memaddr,
+				u8 *data)
+{
+	struct device *dev = &pdev->client->dev;
+	struct idt_eeprom_seq eeseq;
+	struct idt_smb_seq smbseq;
+	int ret, retry;
+
+	/* Initialize SMBus sequence fields */
+	smbseq.ccode = pdev->iniccode | CCODE_EEPROM;
+	smbseq.data = (u8 *)&eeseq;
+
+	/*
+	 * Sometimes EEPROM may respond with NACK if it's busy with previous
+	 * operation, so we need to perform a few attempts of read cycle
+	 */
+	retry = RETRY_CNT;
+	do {
+		/* Send EEPROM memory address to read data from */
+		smbseq.bytecnt = EEPROM_WRRD_CNT;
+		eeseq.cmd = pdev->inieecmd | EEPROM_OP_READ;
+		eeseq.eeaddr = pdev->eeaddr;
+		eeseq.memaddr = cpu_to_le16(memaddr);
+		ret = pdev->smb_write(pdev, &smbseq);
+		if (ret != 0) {
+			dev_err(dev, "Failed to init eeprom addr 0x%02x",
+				memaddr);
+			break;
+		}
+
+		/* Perform read operation */
+		smbseq.bytecnt = EEPROM_RD_CNT;
+		ret = pdev->smb_read(pdev, &smbseq);
+		if (ret != 0) {
+			dev_err(dev, "Failed to read eeprom data 0x%02x",
+				memaddr);
+			break;
+		}
+
+		/* Restart read operation if the device is busy */
+		if (retry && (eeseq.cmd & EEPROM_NAERR)) {
+			dev_dbg(dev, "EEPROM busy, retry reading after %d ms",
+				EEPROM_TOUT);
+			msleep(EEPROM_TOUT);
+			continue;
+		}
+
+		/* Check whether IDT successfully read data from EEPROM */
+		if (eeseq.cmd & (EEPROM_NAERR | EEPROM_LAERR | EEPROM_MSS)) {
+			dev_err(dev,
+				"Communication with eeprom failed, cmd 0x%hhx",
+				eeseq.cmd);
+			ret = -EREMOTEIO;
+			break;
+		}
+
+		/* Save retrieved data and exit the loop */
+		*data = eeseq.data;
+		break;
+	} while (retry--);
+
+	/* Return the status of operation */
+	return ret;
+}
+
+/*
+ * idt_eeprom_write() - EEPROM write operation
+ * @pdev:	Pointer to the driver data
+ * @memaddr:	Start EEPROM memory address
+ * @len:	Length of data to be written
+ * @data:	Data to be written to EEPROM
+ */
+static int idt_eeprom_write(struct idt_89hpesx_dev *pdev, u16 memaddr, u16 len,
+			    const u8 *data)
+{
+	struct device *dev = &pdev->client->dev;
+	struct idt_eeprom_seq eeseq;
+	struct idt_smb_seq smbseq;
+	int ret;
+	u16 idx;
+
+	/* Initialize SMBus sequence fields */
+	smbseq.ccode = pdev->iniccode | CCODE_EEPROM;
+	smbseq.data = (u8 *)&eeseq;
+
+	/* Send data byte-by-byte, checking if it is successfully written */
+	for (idx = 0; idx < len; idx++, memaddr++) {
+		/* Lock IDT SMBus device */
+		mutex_lock(&pdev->smb_mtx);
+
+		/* Perform write operation */
+		smbseq.bytecnt = EEPROM_WR_CNT;
+		eeseq.cmd = pdev->inieecmd | EEPROM_OP_WRITE;
+		eeseq.eeaddr = pdev->eeaddr;
+		eeseq.memaddr = cpu_to_le16(memaddr);
+		eeseq.data = data[idx];
+		ret = pdev->smb_write(pdev, &smbseq);
+		if (ret != 0) {
+			dev_err(dev,
+				"Failed to write 0x%04hx:0x%02hhx to eeprom",
+				memaddr, data[idx]);
+			goto err_mutex_unlock;
+		}
+
+		/*
+		 * Check whether the data is successfully written by reading
+		 * from the same EEPROM memory address.
+		 */
+		eeseq.data = ~data[idx];
+		ret = idt_eeprom_read_byte(pdev, memaddr, &eeseq.data);
+		if (ret != 0)
+			goto err_mutex_unlock;
+
+		/* Check whether the read byte is the same as written one */
+		if (eeseq.data != data[idx]) {
+			dev_err(dev, "Values don't match 0x%02hhx != 0x%02hhx",
+				eeseq.data, data[idx]);
+			ret = -EREMOTEIO;
+			goto err_mutex_unlock;
+		}
+
+		/* Unlock IDT SMBus device */
+err_mutex_unlock:
+		mutex_unlock(&pdev->smb_mtx);
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_eeprom_read() - EEPROM read operation
+ * @pdev:	Pointer to the driver data
+ * @memaddr:	Start EEPROM memory address
+ * @len:	Length of data to read
+ * @buf:	Buffer to read data to
+ */
+static int idt_eeprom_read(struct idt_89hpesx_dev *pdev, u16 memaddr, u16 len,
+			   u8 *buf)
+{
+	int ret;
+	u16 idx;
+
+	/* Read data byte-by-byte, retrying if it wasn't successful */
+	for (idx = 0; idx < len; idx++, memaddr++) {
+		/* Lock IDT SMBus device */
+		mutex_lock(&pdev->smb_mtx);
+
+		/* Just read the byte to the buffer */
+		ret = idt_eeprom_read_byte(pdev, memaddr, &buf[idx]);
+
+		/* Unlock IDT SMBus device */
+		mutex_unlock(&pdev->smb_mtx);
+
+		/* Return error if read operation failed */
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*===========================================================================
+ *                          CSR IO-operations
+ *===========================================================================
+ */
+
+/*
+ * idt_csr_write() - CSR write operation
+ * @pdev:	Pointer to the driver data
+ * @csraddr:	CSR address (with no two LS bits)
+ * @data:	Data to be written to CSR
+ */
+static int idt_csr_write(struct idt_89hpesx_dev *pdev, u16 csraddr,
+			 const u32 data)
+{
+	struct device *dev = &pdev->client->dev;
+	struct idt_csr_seq csrseq;
+	struct idt_smb_seq smbseq;
+	int ret;
+
+	/* Initialize SMBus sequence fields */
+	smbseq.ccode = pdev->iniccode | CCODE_CSR;
+	smbseq.data = (u8 *)&csrseq;
+
+	/* Lock IDT SMBus device */
+	mutex_lock(&pdev->smb_mtx);
+
+	/* Perform write operation */
+	smbseq.bytecnt = CSR_WR_CNT;
+	csrseq.cmd = pdev->inicsrcmd | CSR_OP_WRITE;
+	csrseq.csraddr = cpu_to_le16(csraddr);
+	csrseq.data = cpu_to_le32(data);
+	ret = pdev->smb_write(pdev, &smbseq);
+	if (ret != 0) {
+		dev_err(dev, "Failed to write 0x%04x: 0x%04x to csr",
+			CSR_REAL_ADDR(csraddr), data);
+		goto err_mutex_unlock;
+	}
+
+	/* Send CSR address to read data from */
+	smbseq.bytecnt = CSR_WRRD_CNT;
+	csrseq.cmd = pdev->inicsrcmd | CSR_OP_READ;
+	ret = pdev->smb_write(pdev, &smbseq);
+	if (ret != 0) {
+		dev_err(dev, "Failed to init csr address 0x%04x",
+			CSR_REAL_ADDR(csraddr));
+		goto err_mutex_unlock;
+	}
+
+	/* Perform read operation */
+	smbseq.bytecnt = CSR_RD_CNT;
+	ret = pdev->smb_read(pdev, &smbseq);
+	if (ret != 0) {
+		dev_err(dev, "Failed to read csr 0x%04x",
+			CSR_REAL_ADDR(csraddr));
+		goto err_mutex_unlock;
+	}
+
+	/* Check whether IDT successfully retrieved CSR data */
+	if (csrseq.cmd & (CSR_RERR | CSR_WERR)) {
+		dev_err(dev, "IDT failed to perform CSR r/w");
+		ret = -EREMOTEIO;
+		goto err_mutex_unlock;
+	}
+
+	/* Unlock IDT SMBus device */
+err_mutex_unlock:
+	mutex_unlock(&pdev->smb_mtx);
+
+	return ret;
+}
+
+/*
+ * idt_csr_read() - CSR read operation
+ * @pdev:	Pointer to the driver data
+ * @csraddr:	CSR address (with no two LS bits)
+ * @data:	Data to be written to CSR
+ */
+static int idt_csr_read(struct idt_89hpesx_dev *pdev, u16 csraddr, u32 *data)
+{
+	struct device *dev = &pdev->client->dev;
+	struct idt_csr_seq csrseq;
+	struct idt_smb_seq smbseq;
+	int ret;
+
+	/* Initialize SMBus sequence fields */
+	smbseq.ccode = pdev->iniccode | CCODE_CSR;
+	smbseq.data = (u8 *)&csrseq;
+
+	/* Lock IDT SMBus device */
+	mutex_lock(&pdev->smb_mtx);
+
+	/* Send CSR register address before reading it */
+	smbseq.bytecnt = CSR_WRRD_CNT;
+	csrseq.cmd = pdev->inicsrcmd | CSR_OP_READ;
+	csrseq.csraddr = cpu_to_le16(csraddr);
+	ret = pdev->smb_write(pdev, &smbseq);
+	if (ret != 0) {
+		dev_err(dev, "Failed to init csr address 0x%04x",
+			CSR_REAL_ADDR(csraddr));
+		goto err_mutex_unlock;
+	}
+
+	/* Perform read operation */
+	smbseq.bytecnt = CSR_RD_CNT;
+	ret = pdev->smb_read(pdev, &smbseq);
+	if (ret != 0) {
+		dev_err(dev, "Failed to read csr 0x%04x",
+			CSR_REAL_ADDR(csraddr));
+		goto err_mutex_unlock;
+	}
+
+	/* Check whether IDT successfully retrieved CSR data */
+	if (csrseq.cmd & (CSR_RERR | CSR_WERR)) {
+		dev_err(dev, "IDT failed to perform CSR r/w");
+		ret = -EREMOTEIO;
+		goto err_mutex_unlock;
+	}
+
+	/* Save data retrieved from IDT */
+	*data = le32_to_cpu(csrseq.data);
+
+	/* Unlock IDT SMBus device */
+err_mutex_unlock:
+	mutex_unlock(&pdev->smb_mtx);
+
+	return ret;
+}
+
+/*===========================================================================
+ *                          Sysfs/debugfs-nodes IO-operations
+ *===========================================================================
+ */
+
+/*
+ * eeprom_write() - EEPROM sysfs-node write callback
+ * @filep:	Pointer to the file system node
+ * @kobj:	Pointer to the kernel object related to the sysfs-node
+ * @attr:	Attributes of the file
+ * @buf:	Buffer to write data to
+ * @off:	Offset at which data should be written to
+ * @count:	Number of bytes to write
+ */
+static ssize_t eeprom_write(struct file *filp, struct kobject *kobj,
+			    struct bin_attribute *attr,
+			    char *buf, loff_t off, size_t count)
+{
+	struct idt_89hpesx_dev *pdev;
+	int ret;
+
+	/* Retrieve driver data */
+	pdev = dev_get_drvdata(kobj_to_dev(kobj));
+
+	/* Perform EEPROM write operation */
+	ret = idt_eeprom_write(pdev, (u16)off, (u16)count, (u8 *)buf);
+	return (ret != 0 ? ret : count);
+}
+
+/*
+ * eeprom_read() - EEPROM sysfs-node read callback
+ * @filep:	Pointer to the file system node
+ * @kobj:	Pointer to the kernel object related to the sysfs-node
+ * @attr:	Attributes of the file
+ * @buf:	Buffer to write data to
+ * @off:	Offset at which data should be written to
+ * @count:	Number of bytes to write
+ */
+static ssize_t eeprom_read(struct file *filp, struct kobject *kobj,
+			   struct bin_attribute *attr,
+			   char *buf, loff_t off, size_t count)
+{
+	struct idt_89hpesx_dev *pdev;
+	int ret;
+
+	/* Retrieve driver data */
+	pdev = dev_get_drvdata(kobj_to_dev(kobj));
+
+	/* Perform EEPROM read operation */
+	ret = idt_eeprom_read(pdev, (u16)off, (u16)count, (u8 *)buf);
+	return (ret != 0 ? ret : count);
+}
+
+/*
+ * idt_dbgfs_csr_write() - CSR debugfs-node write callback
+ * @filep:	Pointer to the file system file descriptor
+ * @buf:	Buffer to read data from
+ * @count:	Size of the buffer
+ * @offp:	Offset within the file
+ *
+ * It accepts either "0x<reg addr>:0x<value>" for saving register address
+ * and writing value to specified DWORD register or "0x<reg addr>" for
+ * just saving register address in order to perform next read operation.
+ *
+ * WARNING No spaces are allowed. Incoming string must be strictly formated as:
+ * "<reg addr>:<value>". Register address must be aligned within 4 bytes
+ * (one DWORD).
+ */
+static ssize_t idt_dbgfs_csr_write(struct file *filep, const char __user *ubuf,
+				   size_t count, loff_t *offp)
+{
+	struct idt_89hpesx_dev *pdev = filep->private_data;
+	char *colon_ch, *csraddr_str, *csrval_str;
+	int ret, csraddr_len;
+	u32 csraddr, csrval;
+	char *buf;
+
+	if (*offp)
+		return 0;
+
+	/* Copy data from User-space */
+	buf = memdup_user_nul(ubuf, count);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	/* Find position of colon in the buffer */
+	colon_ch = strnchr(buf, count, ':');
+
+	/*
+	 * If there is colon passed then new CSR value should be parsed as
+	 * well, so allocate buffer for CSR address substring.
+	 * If no colon is found, then string must have just one number with
+	 * no new CSR value
+	 */
+	if (colon_ch != NULL) {
+		csraddr_len = colon_ch - buf;
+		csraddr_str =
+			kmalloc(csraddr_len + 1, GFP_KERNEL);
+		if (csraddr_str == NULL) {
+			ret = -ENOMEM;
+			goto free_buf;
+		}
+		/* Copy the register address to the substring buffer */
+		strncpy(csraddr_str, buf, csraddr_len);
+		csraddr_str[csraddr_len] = '\0';
+		/* Register value must follow the colon */
+		csrval_str = colon_ch + 1;
+	} else /* if (str_colon == NULL) */ {
+		csraddr_str = (char *)buf; /* Just to shut warning up */
+		csraddr_len = strnlen(csraddr_str, count);
+		csrval_str = NULL;
+	}
+
+	/* Convert CSR address to u32 value */
+	ret = kstrtou32(csraddr_str, 0, &csraddr);
+	if (ret != 0)
+		goto free_csraddr_str;
+
+	/* Check whether passed register address is valid */
+	if (csraddr > CSR_MAX || !IS_ALIGNED(csraddr, SZ_4)) {
+		ret = -EINVAL;
+		goto free_csraddr_str;
+	}
+
+	/* Shift register address to the right so to have u16 address */
+	pdev->csr = (csraddr >> 2);
+
+	/* Parse new CSR value and send it to IDT, if colon has been found */
+	if (colon_ch != NULL) {
+		ret = kstrtou32(csrval_str, 0, &csrval);
+		if (ret != 0)
+			goto free_csraddr_str;
+
+		ret = idt_csr_write(pdev, pdev->csr, csrval);
+		if (ret != 0)
+			goto free_csraddr_str;
+	}
+
+	/* Free memory only if colon has been found */
+free_csraddr_str:
+	if (colon_ch != NULL)
+		kfree(csraddr_str);
+
+	/* Free buffer allocated for data retrieved from User-space */
+free_buf:
+	kfree(buf);
+
+	return (ret != 0 ? ret : count);
+}
+
+/*
+ * idt_dbgfs_csr_read() - CSR debugfs-node read callback
+ * @filep:	Pointer to the file system file descriptor
+ * @buf:	Buffer to write data to
+ * @count:	Size of the buffer
+ * @offp:	Offset within the file
+ *
+ * It just prints the pair "0x<reg addr>:0x<value>" to passed buffer.
+ */
+#define CSRBUF_SIZE	((size_t)32)
+static ssize_t idt_dbgfs_csr_read(struct file *filep, char __user *ubuf,
+				  size_t count, loff_t *offp)
+{
+	struct idt_89hpesx_dev *pdev = filep->private_data;
+	u32 csraddr, csrval;
+	char buf[CSRBUF_SIZE];
+	int ret, size;
+
+	/* Perform CSR read operation */
+	ret = idt_csr_read(pdev, pdev->csr, &csrval);
+	if (ret != 0)
+		return ret;
+
+	/* Shift register address to the left so to have real address */
+	csraddr = ((u32)pdev->csr << 2);
+
+	/* Print the "0x<reg addr>:0x<value>" to buffer */
+	size = snprintf(buf, CSRBUF_SIZE, "0x%05x:0x%08x\n",
+		(unsigned int)csraddr, (unsigned int)csrval);
+
+	/* Copy data to User-space */
+	return simple_read_from_buffer(ubuf, count, offp, buf, size);
+}
+
+/*
+ * eeprom_attribute - EEPROM sysfs-node attributes
+ *
+ * NOTE Size will be changed in compliance with OF node. EEPROM attribute will
+ * be read-only as well if the corresponding flag is specified in OF node.
+ */
+static BIN_ATTR_RW(eeprom, EEPROM_DEF_SIZE);
+
+/*
+ * csr_dbgfs_ops - CSR debugfs-node read/write operations
+ */
+static const struct file_operations csr_dbgfs_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = idt_dbgfs_csr_write,
+	.read = idt_dbgfs_csr_read
+};
+
+/*===========================================================================
+ *                       Driver init/deinit methods
+ *===========================================================================
+ */
+
+/*
+ * idt_set_defval() - disable EEPROM access by default
+ * @pdev:	Pointer to the driver data
+ */
+static void idt_set_defval(struct idt_89hpesx_dev *pdev)
+{
+	/* If OF info is missing then use next values */
+	pdev->eesize = 0;
+	pdev->eero = true;
+	pdev->inieecmd = 0;
+	pdev->eeaddr = 0;
+}
+
+static const struct i2c_device_id ee_ids[];
+
+/*
+ * idt_ee_match_id() - check whether the node belongs to compatible EEPROMs
+ */
+static const struct i2c_device_id *idt_ee_match_id(struct fwnode_handle *fwnode)
+{
+	const struct i2c_device_id *id = ee_ids;
+	const char *compatible, *p;
+	char devname[I2C_NAME_SIZE];
+	int ret;
+
+	ret = fwnode_property_read_string(fwnode, "compatible", &compatible);
+	if (ret)
+		return NULL;
+
+	p = strchr(compatible, ',');
+	strscpy(devname, p ? p + 1 : compatible, sizeof(devname));
+	/* Search through the device name */
+	while (id->name[0]) {
+		if (strcmp(devname, id->name) == 0)
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
+/*
+ * idt_get_fw_data() - get IDT i2c-device parameters from device tree
+ * @pdev:	Pointer to the driver data
+ */
+static void idt_get_fw_data(struct idt_89hpesx_dev *pdev)
+{
+	struct device *dev = &pdev->client->dev;
+	struct fwnode_handle *fwnode;
+	const struct i2c_device_id *ee_id = NULL;
+	u32 eeprom_addr;
+	int ret;
+
+	device_for_each_child_node(dev, fwnode) {
+		ee_id = idt_ee_match_id(fwnode);
+		if (ee_id)
+			break;
+
+		dev_warn(dev, "Skip unsupported EEPROM device %pfw\n", fwnode);
+	}
+
+	/* If there is no fwnode EEPROM device, then set zero size */
+	if (!ee_id) {
+		dev_warn(dev, "No fwnode, EEPROM access disabled");
+		idt_set_defval(pdev);
+		return;
+	}
+
+	/* Retrieve EEPROM size */
+	pdev->eesize = (u32)ee_id->driver_data;
+
+	/* Get custom EEPROM address from 'reg' attribute */
+	ret = fwnode_property_read_u32(fwnode, "reg", &eeprom_addr);
+	if (ret || (eeprom_addr == 0)) {
+		dev_warn(dev, "No EEPROM reg found, use default address 0x%x",
+			 EEPROM_DEF_ADDR);
+		pdev->inieecmd = 0;
+		pdev->eeaddr = EEPROM_DEF_ADDR << 1;
+	} else {
+		pdev->inieecmd = EEPROM_USA;
+		pdev->eeaddr = eeprom_addr << 1;
+	}
+
+	/* Check EEPROM 'read-only' flag */
+	if (fwnode_property_read_bool(fwnode, "read-only"))
+		pdev->eero = true;
+	else /* if (!fwnode_property_read_bool(node, "read-only")) */
+		pdev->eero = false;
+
+	fwnode_handle_put(fwnode);
+	dev_info(dev, "EEPROM of %d bytes found by 0x%x",
+		pdev->eesize, pdev->eeaddr);
+}
+
+/*
+ * idt_create_pdev() - create and init data structure of the driver
+ * @client:	i2c client of IDT PCIe-switch device
+ */
+static struct idt_89hpesx_dev *idt_create_pdev(struct i2c_client *client)
+{
+	struct idt_89hpesx_dev *pdev;
+
+	/* Allocate memory for driver data */
+	pdev = devm_kmalloc(&client->dev, sizeof(struct idt_89hpesx_dev),
+		GFP_KERNEL);
+	if (pdev == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Initialize basic fields of the data */
+	pdev->client = client;
+	i2c_set_clientdata(client, pdev);
+
+	/* Read firmware nodes information */
+	idt_get_fw_data(pdev);
+
+	/* Initialize basic CSR CMD field - use full DWORD-sized r/w ops */
+	pdev->inicsrcmd = CSR_DWE;
+	pdev->csr = CSR_DEF;
+
+	/* Enable Packet Error Checking if it's supported by adapter */
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_PEC)) {
+		pdev->iniccode = CCODE_PEC;
+		client->flags |= I2C_CLIENT_PEC;
+	} else /* PEC is unsupported */ {
+		pdev->iniccode = 0;
+	}
+
+	return pdev;
+}
+
+/*
+ * idt_free_pdev() - free data structure of the driver
+ * @pdev:	Pointer to the driver data
+ */
+static void idt_free_pdev(struct idt_89hpesx_dev *pdev)
+{
+	/* Clear driver data from device private field */
+	i2c_set_clientdata(pdev->client, NULL);
+}
+
+/*
+ * idt_set_smbus_ops() - set supported SMBus operations
+ * @pdev:	Pointer to the driver data
+ * Return status of smbus check operations
+ */
+static int idt_set_smbus_ops(struct idt_89hpesx_dev *pdev)
+{
+	struct i2c_adapter *adapter = pdev->client->adapter;
+	struct device *dev = &pdev->client->dev;
+
+	/* Check i2c adapter read functionality */
+	if (i2c_check_functionality(adapter,
+				    I2C_FUNC_SMBUS_READ_BLOCK_DATA)) {
+		pdev->smb_read = idt_smb_read_block;
+		dev_dbg(dev, "SMBus block-read op chosen");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+		pdev->smb_read = idt_smb_read_i2c_block;
+		dev_dbg(dev, "SMBus i2c-block-read op chosen");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_READ_WORD_DATA) &&
+		   i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		pdev->smb_read = idt_smb_read_word;
+		dev_warn(dev, "Use slow word/byte SMBus read ops");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		pdev->smb_read = idt_smb_read_byte;
+		dev_warn(dev, "Use slow byte SMBus read op");
+	} else /* no supported smbus read operations */ {
+		dev_err(dev, "No supported SMBus read op");
+		return -EPFNOSUPPORT;
+	}
+
+	/* Check i2c adapter write functionality */
+	if (i2c_check_functionality(adapter,
+				    I2C_FUNC_SMBUS_WRITE_BLOCK_DATA)) {
+		pdev->smb_write = idt_smb_write_block;
+		dev_dbg(dev, "SMBus block-write op chosen");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
+		pdev->smb_write = idt_smb_write_i2c_block;
+		dev_dbg(dev, "SMBus i2c-block-write op chosen");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_WRITE_WORD_DATA) &&
+		   i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) {
+		pdev->smb_write = idt_smb_write_word;
+		dev_warn(dev, "Use slow word/byte SMBus write op");
+	} else if (i2c_check_functionality(adapter,
+					   I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) {
+		pdev->smb_write = idt_smb_write_byte;
+		dev_warn(dev, "Use slow byte SMBus write op");
+	} else /* no supported smbus write operations */ {
+		dev_err(dev, "No supported SMBus write op");
+		return -EPFNOSUPPORT;
+	}
+
+	/* Initialize IDT SMBus slave interface mutex */
+	mutex_init(&pdev->smb_mtx);
+
+	return 0;
+}
+
+/*
+ * idt_check_dev() - check whether it's really IDT 89HPESx device
+ * @pdev:	Pointer to the driver data
+ * Return status of i2c adapter check operation
+ */
+static int idt_check_dev(struct idt_89hpesx_dev *pdev)
+{
+	struct device *dev = &pdev->client->dev;
+	u32 viddid;
+	int ret;
+
+	/* Read VID and DID directly from IDT memory space */
+	ret = idt_csr_read(pdev, IDT_VIDDID_CSR, &viddid);
+	if (ret != 0) {
+		dev_err(dev, "Failed to read VID/DID");
+		return ret;
+	}
+
+	/* Check whether it's IDT device */
+	if ((viddid & IDT_VID_MASK) != PCI_VENDOR_ID_IDT) {
+		dev_err(dev, "Got unsupported VID/DID: 0x%08x", viddid);
+		return -ENODEV;
+	}
+
+	dev_info(dev, "Found IDT 89HPES device VID:0x%04x, DID:0x%04x",
+		(viddid & IDT_VID_MASK), (viddid >> 16));
+
+	return 0;
+}
+
+/*
+ * idt_create_sysfs_files() - create sysfs attribute files
+ * @pdev:	Pointer to the driver data
+ * Return status of operation
+ */
+static int idt_create_sysfs_files(struct idt_89hpesx_dev *pdev)
+{
+	struct device *dev = &pdev->client->dev;
+	int ret;
+
+	/* Don't do anything if EEPROM isn't accessible */
+	if (pdev->eesize == 0) {
+		dev_dbg(dev, "Skip creating sysfs-files");
+		return 0;
+	}
+
+	/*
+	 * Allocate memory for attribute file and copy the declared EEPROM attr
+	 * structure to change some of fields
+	 */
+	pdev->ee_file = devm_kmemdup(dev, &bin_attr_eeprom,
+				     sizeof(*pdev->ee_file), GFP_KERNEL);
+	if (!pdev->ee_file)
+		return -ENOMEM;
+
+	/* In case of read-only EEPROM get rid of write ability */
+	if (pdev->eero) {
+		pdev->ee_file->attr.mode &= ~0200;
+		pdev->ee_file->write = NULL;
+	}
+	/* Create EEPROM sysfs file */
+	pdev->ee_file->size = pdev->eesize;
+	ret = sysfs_create_bin_file(&dev->kobj, pdev->ee_file);
+	if (ret != 0) {
+		dev_err(dev, "Failed to create EEPROM sysfs-node");
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_remove_sysfs_files() - remove sysfs attribute files
+ * @pdev:	Pointer to the driver data
+ */
+static void idt_remove_sysfs_files(struct idt_89hpesx_dev *pdev)
+{
+	struct device *dev = &pdev->client->dev;
+
+	/* Don't do anything if EEPROM wasn't accessible */
+	if (pdev->eesize == 0)
+		return;
+
+	/* Remove EEPROM sysfs file */
+	sysfs_remove_bin_file(&dev->kobj, pdev->ee_file);
+}
+
+/*
+ * idt_create_dbgfs_files() - create debugfs files
+ * @pdev:	Pointer to the driver data
+ */
+#define CSRNAME_LEN	((size_t)32)
+static void idt_create_dbgfs_files(struct idt_89hpesx_dev *pdev)
+{
+	struct i2c_client *cli = pdev->client;
+	char fname[CSRNAME_LEN];
+
+	/* Create Debugfs directory for CSR file */
+	snprintf(fname, CSRNAME_LEN, "%d-%04hx", cli->adapter->nr, cli->addr);
+	pdev->csr_dir = debugfs_create_dir(fname, csr_dbgdir);
+
+	/* Create Debugfs file for CSR read/write operations */
+	debugfs_create_file(cli->name, 0600, pdev->csr_dir, pdev,
+			    &csr_dbgfs_ops);
+}
+
+/*
+ * idt_remove_dbgfs_files() - remove debugfs files
+ * @pdev:	Pointer to the driver data
+ */
+static void idt_remove_dbgfs_files(struct idt_89hpesx_dev *pdev)
+{
+	/* Remove CSR directory and it sysfs-node */
+	debugfs_remove_recursive(pdev->csr_dir);
+}
+
+/*
+ * idt_probe() - IDT 89HPESx driver probe() callback method
+ */
+static int idt_probe(struct i2c_client *client)
+{
+	struct idt_89hpesx_dev *pdev;
+	int ret;
+
+	/* Create driver data */
+	pdev = idt_create_pdev(client);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	/* Set SMBus operations */
+	ret = idt_set_smbus_ops(pdev);
+	if (ret != 0)
+		goto err_free_pdev;
+
+	/* Check whether it is truly IDT 89HPESx device */
+	ret = idt_check_dev(pdev);
+	if (ret != 0)
+		goto err_free_pdev;
+
+	/* Create sysfs files */
+	ret = idt_create_sysfs_files(pdev);
+	if (ret != 0)
+		goto err_free_pdev;
+
+	/* Create debugfs files */
+	idt_create_dbgfs_files(pdev);
+
+	return 0;
+
+err_free_pdev:
+	idt_free_pdev(pdev);
+
+	return ret;
+}
+
+/*
+ * idt_remove() - IDT 89HPESx driver remove() callback method
+ */
+static void idt_remove(struct i2c_client *client)
+{
+	struct idt_89hpesx_dev *pdev = i2c_get_clientdata(client);
+
+	/* Remove debugfs files first */
+	idt_remove_dbgfs_files(pdev);
+
+	/* Remove sysfs files */
+	idt_remove_sysfs_files(pdev);
+
+	/* Discard driver data structure */
+	idt_free_pdev(pdev);
+}
+
+/*
+ * ee_ids - array of supported EEPROMs
+ */
+static const struct i2c_device_id ee_ids[] = {
+	{ "24c32",  4096},
+	{ "24c64",  8192},
+	{ "24c128", 16384},
+	{ "24c256", 32768},
+	{ "24c512", 65536},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, ee_ids);
+
+/*
+ * idt_ids - supported IDT 89HPESx devices
+ */
+static const struct i2c_device_id idt_ids[] = {
+	{ "89hpes8nt2", 0 },
+	{ "89hpes12nt3", 0 },
+
+	{ "89hpes24nt6ag2", 0 },
+	{ "89hpes32nt8ag2", 0 },
+	{ "89hpes32nt8bg2", 0 },
+	{ "89hpes12nt12g2", 0 },
+	{ "89hpes16nt16g2", 0 },
+	{ "89hpes24nt24g2", 0 },
+	{ "89hpes32nt24ag2", 0 },
+	{ "89hpes32nt24bg2", 0 },
+
+	{ "89hpes12n3", 0 },
+	{ "89hpes12n3a", 0 },
+	{ "89hpes24n3", 0 },
+	{ "89hpes24n3a", 0 },
+
+	{ "89hpes32h8", 0 },
+	{ "89hpes32h8g2", 0 },
+	{ "89hpes48h12", 0 },
+	{ "89hpes48h12g2", 0 },
+	{ "89hpes48h12ag2", 0 },
+	{ "89hpes16h16", 0 },
+	{ "89hpes22h16", 0 },
+	{ "89hpes22h16g2", 0 },
+	{ "89hpes34h16", 0 },
+	{ "89hpes34h16g2", 0 },
+	{ "89hpes64h16", 0 },
+	{ "89hpes64h16g2", 0 },
+	{ "89hpes64h16ag2", 0 },
+
+	/* { "89hpes3t3", 0 }, // No SMBus-slave iface */
+	{ "89hpes12t3g2", 0 },
+	{ "89hpes24t3g2", 0 },
+	/* { "89hpes4t4", 0 }, // No SMBus-slave iface */
+	{ "89hpes16t4", 0 },
+	{ "89hpes4t4g2", 0 },
+	{ "89hpes10t4g2", 0 },
+	{ "89hpes16t4g2", 0 },
+	{ "89hpes16t4ag2", 0 },
+	{ "89hpes5t5", 0 },
+	{ "89hpes6t5", 0 },
+	{ "89hpes8t5", 0 },
+	{ "89hpes8t5a", 0 },
+	{ "89hpes24t6", 0 },
+	{ "89hpes6t6g2", 0 },
+	{ "89hpes24t6g2", 0 },
+	{ "89hpes16t7", 0 },
+	{ "89hpes32t8", 0 },
+	{ "89hpes32t8g2", 0 },
+	{ "89hpes48t12", 0 },
+	{ "89hpes48t12g2", 0 },
+	{ /* END OF LIST */ }
+};
+MODULE_DEVICE_TABLE(i2c, idt_ids);
+
+static const struct of_device_id idt_of_match[] = {
+	{ .compatible = "idt,89hpes8nt2", },
+	{ .compatible = "idt,89hpes12nt3", },
+
+	{ .compatible = "idt,89hpes24nt6ag2", },
+	{ .compatible = "idt,89hpes32nt8ag2", },
+	{ .compatible = "idt,89hpes32nt8bg2", },
+	{ .compatible = "idt,89hpes12nt12g2", },
+	{ .compatible = "idt,89hpes16nt16g2", },
+	{ .compatible = "idt,89hpes24nt24g2", },
+	{ .compatible = "idt,89hpes32nt24ag2", },
+	{ .compatible = "idt,89hpes32nt24bg2", },
+
+	{ .compatible = "idt,89hpes12n3", },
+	{ .compatible = "idt,89hpes12n3a", },
+	{ .compatible = "idt,89hpes24n3", },
+	{ .compatible = "idt,89hpes24n3a", },
+
+	{ .compatible = "idt,89hpes32h8", },
+	{ .compatible = "idt,89hpes32h8g2", },
+	{ .compatible = "idt,89hpes48h12", },
+	{ .compatible = "idt,89hpes48h12g2", },
+	{ .compatible = "idt,89hpes48h12ag2", },
+	{ .compatible = "idt,89hpes16h16", },
+	{ .compatible = "idt,89hpes22h16", },
+	{ .compatible = "idt,89hpes22h16g2", },
+	{ .compatible = "idt,89hpes34h16", },
+	{ .compatible = "idt,89hpes34h16g2", },
+	{ .compatible = "idt,89hpes64h16", },
+	{ .compatible = "idt,89hpes64h16g2", },
+	{ .compatible = "idt,89hpes64h16ag2", },
+
+	{ .compatible = "idt,89hpes12t3g2", },
+	{ .compatible = "idt,89hpes24t3g2", },
+
+	{ .compatible = "idt,89hpes16t4", },
+	{ .compatible = "idt,89hpes4t4g2", },
+	{ .compatible = "idt,89hpes10t4g2", },
+	{ .compatible = "idt,89hpes16t4g2", },
+	{ .compatible = "idt,89hpes16t4ag2", },
+	{ .compatible = "idt,89hpes5t5", },
+	{ .compatible = "idt,89hpes6t5", },
+	{ .compatible = "idt,89hpes8t5", },
+	{ .compatible = "idt,89hpes8t5a", },
+	{ .compatible = "idt,89hpes24t6", },
+	{ .compatible = "idt,89hpes6t6g2", },
+	{ .compatible = "idt,89hpes24t6g2", },
+	{ .compatible = "idt,89hpes16t7", },
+	{ .compatible = "idt,89hpes32t8", },
+	{ .compatible = "idt,89hpes32t8g2", },
+	{ .compatible = "idt,89hpes48t12", },
+	{ .compatible = "idt,89hpes48t12g2", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, idt_of_match);
+
+/*
+ * idt_driver - IDT 89HPESx driver structure
+ */
+static struct i2c_driver idt_driver = {
+	.driver = {
+		.name = IDT_NAME,
+		.of_match_table = idt_of_match,
+	},
+	.probe = idt_probe,
+	.remove = idt_remove,
+	.id_table = idt_ids,
+};
+
+/*
+ * idt_init() - IDT 89HPESx driver init() callback method
+ */
+static int __init idt_init(void)
+{
+	int ret;
+
+	/* Create Debugfs directory first */
+	if (debugfs_initialized())
+		csr_dbgdir = debugfs_create_dir("idt_csr", NULL);
+
+	/* Add new i2c-device driver */
+	ret = i2c_add_driver(&idt_driver);
+	if (ret) {
+		debugfs_remove_recursive(csr_dbgdir);
+		return ret;
+	}
+
+	return 0;
+}
+module_init(idt_init);
+
+/*
+ * idt_exit() - IDT 89HPESx driver exit() callback method
+ */
+static void __exit idt_exit(void)
+{
+	/* Discard debugfs directory and all files if any */
+	debugfs_remove_recursive(csr_dbgdir);
+
+	/* Unregister i2c-device driver */
+	i2c_del_driver(&idt_driver);
+}
+module_exit(idt_exit);
diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c
new file mode 100644
index 0000000000..cb6b1efeaf
--- /dev/null
+++ b/drivers/misc/eeprom/max6875.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * max6875.c - driver for MAX6874/MAX6875
+ *
+ * Copyright (C) 2005 Ben Gardner <bgardner@wabtec.com>
+ *
+ * Based on eeprom.c
+ *
+ * The MAX6875 has a bank of registers and two banks of EEPROM.
+ * Address ranges are defined as follows:
+ *  * 0x0000 - 0x0046 = configuration registers
+ *  * 0x8000 - 0x8046 = configuration EEPROM
+ *  * 0x8100 - 0x82FF = user EEPROM
+ *
+ * This driver makes the user EEPROM available for read.
+ *
+ * The registers & config EEPROM should be accessed via i2c-dev.
+ *
+ * The MAX6875 ignores the lowest address bit, so each chip responds to
+ * two addresses - 0x50/0x51 and 0x52/0x53.
+ *
+ * Note that the MAX6875 uses i2c_smbus_write_byte_data() to set the read
+ * address, so this driver is destructive if loaded for the wrong EEPROM chip.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+/* The MAX6875 can only read/write 16 bytes at a time */
+#define SLICE_SIZE			16
+#define SLICE_BITS			4
+
+/* USER EEPROM is at addresses 0x8100 - 0x82FF */
+#define USER_EEPROM_BASE		0x8100
+#define USER_EEPROM_SIZE		0x0200
+#define USER_EEPROM_SLICES		32
+
+/* MAX6875 commands */
+#define MAX6875_CMD_BLK_READ		0x84
+
+/* Each client has this additional data */
+struct max6875_data {
+	struct i2c_client	*fake_client;
+	struct mutex		update_lock;
+
+	u32			valid;
+	u8			data[USER_EEPROM_SIZE];
+	unsigned long		last_updated[USER_EEPROM_SLICES];
+};
+
+static void max6875_update_slice(struct i2c_client *client, int slice)
+{
+	struct max6875_data *data = i2c_get_clientdata(client);
+	int i, j, addr;
+	u8 *buf;
+
+	if (slice >= USER_EEPROM_SLICES)
+		return;
+
+	mutex_lock(&data->update_lock);
+
+	buf = &data->data[slice << SLICE_BITS];
+
+	if (!(data->valid & (1 << slice)) ||
+	    time_after(jiffies, data->last_updated[slice])) {
+
+		dev_dbg(&client->dev, "Starting update of slice %u\n", slice);
+
+		data->valid &= ~(1 << slice);
+
+		addr = USER_EEPROM_BASE + (slice << SLICE_BITS);
+
+		/* select the eeprom address */
+		if (i2c_smbus_write_byte_data(client, addr >> 8, addr & 0xFF)) {
+			dev_err(&client->dev, "address set failed\n");
+			goto exit_up;
+		}
+
+		if (i2c_check_functionality(client->adapter,
+					    I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
+			if (i2c_smbus_read_i2c_block_data(client,
+							  MAX6875_CMD_BLK_READ,
+							  SLICE_SIZE,
+							  buf) != SLICE_SIZE) {
+				goto exit_up;
+			}
+		} else {
+			for (i = 0; i < SLICE_SIZE; i++) {
+				j = i2c_smbus_read_byte(client);
+				if (j < 0) {
+					goto exit_up;
+				}
+				buf[i] = j;
+			}
+		}
+		data->last_updated[slice] = jiffies;
+		data->valid |= (1 << slice);
+	}
+exit_up:
+	mutex_unlock(&data->update_lock);
+}
+
+static ssize_t max6875_read(struct file *filp, struct kobject *kobj,
+			    struct bin_attribute *bin_attr,
+			    char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = kobj_to_i2c_client(kobj);
+	struct max6875_data *data = i2c_get_clientdata(client);
+	int slice, max_slice;
+
+	/* refresh slices which contain requested bytes */
+	max_slice = (off + count - 1) >> SLICE_BITS;
+	for (slice = (off >> SLICE_BITS); slice <= max_slice; slice++)
+		max6875_update_slice(client, slice);
+
+	memcpy(buf, &data->data[off], count);
+
+	return count;
+}
+
+static const struct bin_attribute user_eeprom_attr = {
+	.attr = {
+		.name = "eeprom",
+		.mode = S_IRUGO,
+	},
+	.size = USER_EEPROM_SIZE,
+	.read = max6875_read,
+};
+
+static int max6875_probe(struct i2c_client *client)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	struct max6875_data *data;
+	int err;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA
+				     | I2C_FUNC_SMBUS_READ_BYTE))
+		return -ENODEV;
+
+	/* Only bind to even addresses */
+	if (client->addr & 1)
+		return -ENODEV;
+
+	data = kzalloc(sizeof(struct max6875_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* A fake client is created on the odd address */
+	data->fake_client = i2c_new_dummy_device(client->adapter, client->addr + 1);
+	if (IS_ERR(data->fake_client)) {
+		err = PTR_ERR(data->fake_client);
+		goto exit_kfree;
+	}
+
+	/* Init real i2c_client */
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->update_lock);
+
+	err = sysfs_create_bin_file(&client->dev.kobj, &user_eeprom_attr);
+	if (err)
+		goto exit_remove_fake;
+
+	return 0;
+
+exit_remove_fake:
+	i2c_unregister_device(data->fake_client);
+exit_kfree:
+	kfree(data);
+	return err;
+}
+
+static void max6875_remove(struct i2c_client *client)
+{
+	struct max6875_data *data = i2c_get_clientdata(client);
+
+	i2c_unregister_device(data->fake_client);
+
+	sysfs_remove_bin_file(&client->dev.kobj, &user_eeprom_attr);
+	kfree(data);
+}
+
+static const struct i2c_device_id max6875_id[] = {
+	{ "max6875", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max6875_id);
+
+static struct i2c_driver max6875_driver = {
+	.driver = {
+		.name	= "max6875",
+	},
+	.probe		= max6875_probe,
+	.remove		= max6875_remove,
+	.id_table	= max6875_id,
+};
+
+module_i2c_driver(max6875_driver);
+
+MODULE_AUTHOR("Ben Gardner <bgardner@wabtec.com>");
+MODULE_DESCRIPTION("MAX6875 driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
new file mode 100644
index 0000000000..76511d279a
--- /dev/null
+++ b/drivers/misc/enclosure.c
@@ -0,0 +1,683 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Enclosure Services
+ *
+ * Copyright (C) 2008 James Bottomley <James.Bottomley@HansenPartnership.com>
+ *
+**-----------------------------------------------------------------------------
+**
+**
+**-----------------------------------------------------------------------------
+*/
+#include <linux/device.h>
+#include <linux/enclosure.h>
+#include <linux/err.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+static LIST_HEAD(container_list);
+static DEFINE_MUTEX(container_list_lock);
+static struct class enclosure_class;
+
+/**
+ * enclosure_find - find an enclosure given a parent device
+ * @dev:	the parent to match against
+ * @start:	Optional enclosure device to start from (NULL if none)
+ *
+ * Looks through the list of registered enclosures to find all those
+ * with @dev as a parent.  Returns NULL if no enclosure is
+ * found. @start can be used as a starting point to obtain multiple
+ * enclosures per parent (should begin with NULL and then be set to
+ * each returned enclosure device). Obtains a reference to the
+ * enclosure class device which must be released with put_device().
+ * If @start is not NULL, a reference must be taken on it which is
+ * released before returning (this allows a loop through all
+ * enclosures to exit with only the reference on the enclosure of
+ * interest held).  Note that the @dev may correspond to the actual
+ * device housing the enclosure, in which case no iteration via @start
+ * is required.
+ */
+struct enclosure_device *enclosure_find(struct device *dev,
+					struct enclosure_device *start)
+{
+	struct enclosure_device *edev;
+
+	mutex_lock(&container_list_lock);
+	edev = list_prepare_entry(start, &container_list, node);
+	if (start)
+		put_device(&start->edev);
+
+	list_for_each_entry_continue(edev, &container_list, node) {
+		struct device *parent = edev->edev.parent;
+		/* parent might not be immediate, so iterate up to
+		 * the root of the tree if necessary */
+		while (parent) {
+			if (parent == dev) {
+				get_device(&edev->edev);
+				mutex_unlock(&container_list_lock);
+				return edev;
+			}
+			parent = parent->parent;
+		}
+	}
+	mutex_unlock(&container_list_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(enclosure_find);
+
+/**
+ * enclosure_for_each_device - calls a function for each enclosure
+ * @fn:		the function to call
+ * @data:	the data to pass to each call
+ *
+ * Loops over all the enclosures calling the function.
+ *
+ * Note, this function uses a mutex which will be held across calls to
+ * @fn, so it must have non atomic context, and @fn may (although it
+ * should not) sleep or otherwise cause the mutex to be held for
+ * indefinite periods
+ */
+int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *),
+			      void *data)
+{
+	int error = 0;
+	struct enclosure_device *edev;
+
+	mutex_lock(&container_list_lock);
+	list_for_each_entry(edev, &container_list, node) {
+		error = fn(edev, data);
+		if (error)
+			break;
+	}
+	mutex_unlock(&container_list_lock);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(enclosure_for_each_device);
+
+/**
+ * enclosure_register - register device as an enclosure
+ *
+ * @dev:	device containing the enclosure
+ * @name:	chosen device name
+ * @components:	number of components in the enclosure
+ * @cb:         platform call-backs
+ *
+ * This sets up the device for being an enclosure.  Note that @dev does
+ * not have to be a dedicated enclosure device.  It may be some other type
+ * of device that additionally responds to enclosure services
+ */
+struct enclosure_device *
+enclosure_register(struct device *dev, const char *name, int components,
+		   struct enclosure_component_callbacks *cb)
+{
+	struct enclosure_device *edev =
+		kzalloc(struct_size(edev, component, components), GFP_KERNEL);
+	int err, i;
+
+	BUG_ON(!cb);
+
+	if (!edev)
+		return ERR_PTR(-ENOMEM);
+
+	edev->components = components;
+
+	edev->edev.class = &enclosure_class;
+	edev->edev.parent = get_device(dev);
+	edev->cb = cb;
+	dev_set_name(&edev->edev, "%s", name);
+	err = device_register(&edev->edev);
+	if (err)
+		goto err;
+
+	for (i = 0; i < components; i++) {
+		edev->component[i].number = -1;
+		edev->component[i].slot = -1;
+		edev->component[i].power_status = -1;
+	}
+
+	mutex_lock(&container_list_lock);
+	list_add_tail(&edev->node, &container_list);
+	mutex_unlock(&container_list_lock);
+
+	return edev;
+
+ err:
+	put_device(edev->edev.parent);
+	kfree(edev);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(enclosure_register);
+
+static struct enclosure_component_callbacks enclosure_null_callbacks;
+
+/**
+ * enclosure_unregister - remove an enclosure
+ *
+ * @edev:	the registered enclosure to remove;
+ */
+void enclosure_unregister(struct enclosure_device *edev)
+{
+	int i;
+
+	mutex_lock(&container_list_lock);
+	list_del(&edev->node);
+	mutex_unlock(&container_list_lock);
+
+	for (i = 0; i < edev->components; i++)
+		if (edev->component[i].number != -1)
+			device_unregister(&edev->component[i].cdev);
+
+	/* prevent any callbacks into service user */
+	edev->cb = &enclosure_null_callbacks;
+	device_unregister(&edev->edev);
+}
+EXPORT_SYMBOL_GPL(enclosure_unregister);
+
+#define ENCLOSURE_NAME_SIZE	64
+#define COMPONENT_NAME_SIZE	64
+
+static void enclosure_link_name(struct enclosure_component *cdev, char *name)
+{
+	strcpy(name, "enclosure_device:");
+	strcat(name, dev_name(&cdev->cdev));
+}
+
+static void enclosure_remove_links(struct enclosure_component *cdev)
+{
+	char name[ENCLOSURE_NAME_SIZE];
+
+	enclosure_link_name(cdev, name);
+
+	/*
+	 * In odd circumstances, like multipath devices, something else may
+	 * already have removed the links, so check for this condition first.
+	 */
+	if (cdev->dev->kobj.sd)
+		sysfs_remove_link(&cdev->dev->kobj, name);
+
+	if (cdev->cdev.kobj.sd)
+		sysfs_remove_link(&cdev->cdev.kobj, "device");
+}
+
+static int enclosure_add_links(struct enclosure_component *cdev)
+{
+	int error;
+	char name[ENCLOSURE_NAME_SIZE];
+
+	error = sysfs_create_link(&cdev->cdev.kobj, &cdev->dev->kobj, "device");
+	if (error)
+		return error;
+
+	enclosure_link_name(cdev, name);
+	error = sysfs_create_link(&cdev->dev->kobj, &cdev->cdev.kobj, name);
+	if (error)
+		sysfs_remove_link(&cdev->cdev.kobj, "device");
+
+	return error;
+}
+
+static void enclosure_release(struct device *cdev)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev);
+
+	put_device(cdev->parent);
+	kfree(edev);
+}
+
+static void enclosure_component_release(struct device *dev)
+{
+	struct enclosure_component *cdev = to_enclosure_component(dev);
+
+	if (cdev->dev) {
+		enclosure_remove_links(cdev);
+		put_device(cdev->dev);
+	}
+	put_device(dev->parent);
+}
+
+static struct enclosure_component *
+enclosure_component_find_by_name(struct enclosure_device *edev,
+				const char *name)
+{
+	int i;
+	const char *cname;
+	struct enclosure_component *ecomp;
+
+	if (!edev || !name || !name[0])
+		return NULL;
+
+	for (i = 0; i < edev->components; i++) {
+		ecomp = &edev->component[i];
+		cname = dev_name(&ecomp->cdev);
+		if (ecomp->number != -1 &&
+		    cname && cname[0] &&
+		    !strcmp(cname, name))
+			return ecomp;
+	}
+
+	return NULL;
+}
+
+static const struct attribute_group *enclosure_component_groups[];
+
+/**
+ * enclosure_component_alloc - prepare a new enclosure component
+ * @edev:	the enclosure to add the component
+ * @number:	the device number
+ * @type:	the type of component being added
+ * @name:	an optional name to appear in sysfs (leave NULL if none)
+ *
+ * The name is optional for enclosures that give their components a unique
+ * name.  If not, leave the field NULL and a name will be assigned.
+ *
+ * Returns a pointer to the enclosure component or an error.
+ */
+struct enclosure_component *
+enclosure_component_alloc(struct enclosure_device *edev,
+			  unsigned int number,
+			  enum enclosure_component_type type,
+			  const char *name)
+{
+	struct enclosure_component *ecomp;
+	struct device *cdev;
+	int i;
+	char newname[COMPONENT_NAME_SIZE];
+
+	if (number >= edev->components)
+		return ERR_PTR(-EINVAL);
+
+	ecomp = &edev->component[number];
+
+	if (ecomp->number != -1)
+		return ERR_PTR(-EINVAL);
+
+	ecomp->type = type;
+	ecomp->number = number;
+	cdev = &ecomp->cdev;
+	cdev->parent = get_device(&edev->edev);
+
+	if (name && name[0]) {
+		/* Some hardware (e.g. enclosure in RX300 S6) has components
+		 * with non unique names. Registering duplicates in sysfs
+		 * will lead to warnings during bootup. So make the names
+		 * unique by appending consecutive numbers -1, -2, ... */
+		i = 1;
+		snprintf(newname, COMPONENT_NAME_SIZE,
+			 "%s", name);
+		while (enclosure_component_find_by_name(edev, newname))
+			snprintf(newname, COMPONENT_NAME_SIZE,
+				 "%s-%i", name, i++);
+		dev_set_name(cdev, "%s", newname);
+	} else
+		dev_set_name(cdev, "%u", number);
+
+	cdev->release = enclosure_component_release;
+	cdev->groups = enclosure_component_groups;
+
+	return ecomp;
+}
+EXPORT_SYMBOL_GPL(enclosure_component_alloc);
+
+/**
+ * enclosure_component_register - publishes an initialized enclosure component
+ * @ecomp:	component to add
+ *
+ * Returns 0 on successful registration, releases the component otherwise
+ */
+int enclosure_component_register(struct enclosure_component *ecomp)
+{
+	struct device *cdev;
+	int err;
+
+	cdev = &ecomp->cdev;
+	err = device_register(cdev);
+	if (err) {
+		ecomp->number = -1;
+		put_device(cdev);
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(enclosure_component_register);
+
+/**
+ * enclosure_add_device - add a device as being part of an enclosure
+ * @edev:	the enclosure device being added to.
+ * @component:	the number of the component
+ * @dev:	the device being added
+ *
+ * Declares a real device to reside in slot (or identifier) @num of an
+ * enclosure.  This will cause the relevant sysfs links to appear.
+ * This function may also be used to change a device associated with
+ * an enclosure without having to call enclosure_remove_device() in
+ * between.
+ *
+ * Returns zero on success or an error.
+ */
+int enclosure_add_device(struct enclosure_device *edev, int component,
+			 struct device *dev)
+{
+	struct enclosure_component *cdev;
+	int err;
+
+	if (!edev || component >= edev->components)
+		return -EINVAL;
+
+	cdev = &edev->component[component];
+
+	if (cdev->dev == dev)
+		return -EEXIST;
+
+	if (cdev->dev) {
+		enclosure_remove_links(cdev);
+		put_device(cdev->dev);
+	}
+	cdev->dev = get_device(dev);
+	err = enclosure_add_links(cdev);
+	if (err) {
+		put_device(cdev->dev);
+		cdev->dev = NULL;
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(enclosure_add_device);
+
+/**
+ * enclosure_remove_device - remove a device from an enclosure
+ * @edev:	the enclosure device
+ * @dev:	device to remove/put
+ *
+ * Returns zero on success or an error.
+ *
+ */
+int enclosure_remove_device(struct enclosure_device *edev, struct device *dev)
+{
+	struct enclosure_component *cdev;
+	int i;
+
+	if (!edev || !dev)
+		return -EINVAL;
+
+	for (i = 0; i < edev->components; i++) {
+		cdev = &edev->component[i];
+		if (cdev->dev == dev) {
+			enclosure_remove_links(cdev);
+			put_device(dev);
+			cdev->dev = NULL;
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(enclosure_remove_device);
+
+/*
+ * sysfs pieces below
+ */
+
+static ssize_t components_show(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev);
+
+	return sysfs_emit(buf, "%d\n", edev->components);
+}
+static DEVICE_ATTR_RO(components);
+
+static ssize_t id_show(struct device *cdev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev);
+
+	if (edev->cb->show_id)
+		return edev->cb->show_id(edev, buf);
+	return -EINVAL;
+}
+static DEVICE_ATTR_RO(id);
+
+static struct attribute *enclosure_class_attrs[] = {
+	&dev_attr_components.attr,
+	&dev_attr_id.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(enclosure_class);
+
+static struct class enclosure_class = {
+	.name			= "enclosure",
+	.dev_release		= enclosure_release,
+	.dev_groups		= enclosure_class_groups,
+};
+
+static const char *const enclosure_status[] = {
+	[ENCLOSURE_STATUS_UNSUPPORTED] = "unsupported",
+	[ENCLOSURE_STATUS_OK] = "OK",
+	[ENCLOSURE_STATUS_CRITICAL] = "critical",
+	[ENCLOSURE_STATUS_NON_CRITICAL] = "non-critical",
+	[ENCLOSURE_STATUS_UNRECOVERABLE] = "unrecoverable",
+	[ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed",
+	[ENCLOSURE_STATUS_UNKNOWN] = "unknown",
+	[ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable",
+	[ENCLOSURE_STATUS_MAX] = NULL,
+};
+
+static const char *const enclosure_type[] = {
+	[ENCLOSURE_COMPONENT_DEVICE] = "device",
+	[ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device",
+};
+
+static ssize_t get_component_fault(struct device *cdev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_fault)
+		edev->cb->get_fault(edev, ecomp);
+	return sysfs_emit(buf, "%d\n", ecomp->fault);
+}
+
+static ssize_t set_component_fault(struct device *cdev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_fault)
+		edev->cb->set_fault(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_status(struct device *cdev,
+				    struct device_attribute *attr,char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_status)
+		edev->cb->get_status(edev, ecomp);
+	return sysfs_emit(buf, "%s\n", enclosure_status[ecomp->status]);
+}
+
+static ssize_t set_component_status(struct device *cdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int i;
+
+	for (i = 0; enclosure_status[i]; i++) {
+		if (strncmp(buf, enclosure_status[i],
+			    strlen(enclosure_status[i])) == 0 &&
+		    (buf[strlen(enclosure_status[i])] == '\n' ||
+		     buf[strlen(enclosure_status[i])] == '\0'))
+			break;
+	}
+
+	if (enclosure_status[i] && edev->cb->set_status) {
+		edev->cb->set_status(edev, ecomp, i);
+		return count;
+	} else
+		return -EINVAL;
+}
+
+static ssize_t get_component_active(struct device *cdev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_active)
+		edev->cb->get_active(edev, ecomp);
+	return sysfs_emit(buf, "%d\n", ecomp->active);
+}
+
+static ssize_t set_component_active(struct device *cdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_active)
+		edev->cb->set_active(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_locate(struct device *cdev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_locate)
+		edev->cb->get_locate(edev, ecomp);
+	return sysfs_emit(buf, "%d\n", ecomp->locate);
+}
+
+static ssize_t set_component_locate(struct device *cdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val = simple_strtoul(buf, NULL, 0);
+
+	if (edev->cb->set_locate)
+		edev->cb->set_locate(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_power_status(struct device *cdev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	if (edev->cb->get_power_status)
+		edev->cb->get_power_status(edev, ecomp);
+
+	/* If still uninitialized, the callback failed or does not exist. */
+	if (ecomp->power_status == -1)
+		return (edev->cb->get_power_status) ? -EIO : -ENOTTY;
+
+	return sysfs_emit(buf, "%s\n", ecomp->power_status ? "on" : "off");
+}
+
+static ssize_t set_component_power_status(struct device *cdev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct enclosure_device *edev = to_enclosure_device(cdev->parent);
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int val;
+
+	if (strncmp(buf, "on", 2) == 0 &&
+	    (buf[2] == '\n' || buf[2] == '\0'))
+		val = 1;
+	else if (strncmp(buf, "off", 3) == 0 &&
+	    (buf[3] == '\n' || buf[3] == '\0'))
+		val = 0;
+	else
+		return -EINVAL;
+
+	if (edev->cb->set_power_status)
+		edev->cb->set_power_status(edev, ecomp, val);
+	return count;
+}
+
+static ssize_t get_component_type(struct device *cdev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+
+	return sysfs_emit(buf, "%s\n", enclosure_type[ecomp->type]);
+}
+
+static ssize_t get_component_slot(struct device *cdev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct enclosure_component *ecomp = to_enclosure_component(cdev);
+	int slot;
+
+	/* if the enclosure does not override then use 'number' as a stand-in */
+	if (ecomp->slot >= 0)
+		slot = ecomp->slot;
+	else
+		slot = ecomp->number;
+
+	return sysfs_emit(buf, "%d\n", slot);
+}
+
+static DEVICE_ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault,
+		    set_component_fault);
+static DEVICE_ATTR(status, S_IRUGO | S_IWUSR, get_component_status,
+		   set_component_status);
+static DEVICE_ATTR(active, S_IRUGO | S_IWUSR, get_component_active,
+		   set_component_active);
+static DEVICE_ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate,
+		   set_component_locate);
+static DEVICE_ATTR(power_status, S_IRUGO | S_IWUSR, get_component_power_status,
+		   set_component_power_status);
+static DEVICE_ATTR(type, S_IRUGO, get_component_type, NULL);
+static DEVICE_ATTR(slot, S_IRUGO, get_component_slot, NULL);
+
+static struct attribute *enclosure_component_attrs[] = {
+	&dev_attr_fault.attr,
+	&dev_attr_status.attr,
+	&dev_attr_active.attr,
+	&dev_attr_locate.attr,
+	&dev_attr_power_status.attr,
+	&dev_attr_type.attr,
+	&dev_attr_slot.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(enclosure_component);
+
+static int __init enclosure_init(void)
+{
+	return class_register(&enclosure_class);
+}
+
+static void __exit enclosure_exit(void)
+{
+	class_unregister(&enclosure_class);
+}
+
+module_init(enclosure_init);
+module_exit(enclosure_exit);
+
+MODULE_AUTHOR("James Bottomley");
+MODULE_DESCRIPTION("Enclosure Services");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
new file mode 100644
index 0000000000..1c6c62a7f7
--- /dev/null
+++ b/drivers/misc/fastrpc.c
@@ -0,0 +1,2482 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2018, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-resv.h>
+#include <linux/idr.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/sort.h>
+#include <linux/of_platform.h>
+#include <linux/rpmsg.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/firmware/qcom/qcom_scm.h>
+#include <uapi/misc/fastrpc.h>
+#include <linux/of_reserved_mem.h>
+
+#define ADSP_DOMAIN_ID (0)
+#define MDSP_DOMAIN_ID (1)
+#define SDSP_DOMAIN_ID (2)
+#define CDSP_DOMAIN_ID (3)
+#define FASTRPC_DEV_MAX		4 /* adsp, mdsp, slpi, cdsp*/
+#define FASTRPC_MAX_SESSIONS	14
+#define FASTRPC_MAX_VMIDS	16
+#define FASTRPC_ALIGN		128
+#define FASTRPC_MAX_FDLIST	16
+#define FASTRPC_MAX_CRCLIST	64
+#define FASTRPC_PHYS(p)	((p) & 0xffffffff)
+#define FASTRPC_CTX_MAX (256)
+#define FASTRPC_INIT_HANDLE	1
+#define FASTRPC_DSP_UTILITIES_HANDLE	2
+#define FASTRPC_CTXID_MASK (0xFF0)
+#define INIT_FILELEN_MAX (2 * 1024 * 1024)
+#define INIT_FILE_NAMELEN_MAX (128)
+#define FASTRPC_DEVICE_NAME	"fastrpc"
+
+/* Add memory to static PD pool, protection thru XPU */
+#define ADSP_MMAP_HEAP_ADDR  4
+/* MAP static DMA buffer on DSP User PD */
+#define ADSP_MMAP_DMA_BUFFER  6
+/* Add memory to static PD pool protection thru hypervisor */
+#define ADSP_MMAP_REMOTE_HEAP_ADDR  8
+/* Add memory to userPD pool, for user heap */
+#define ADSP_MMAP_ADD_PAGES 0x1000
+/* Add memory to userPD pool, for LLC heap */
+#define ADSP_MMAP_ADD_PAGES_LLC 0x3000,
+
+#define DSP_UNSUPPORTED_API (0x80000414)
+/* MAX NUMBER of DSP ATTRIBUTES SUPPORTED */
+#define FASTRPC_MAX_DSP_ATTRIBUTES (256)
+#define FASTRPC_MAX_DSP_ATTRIBUTES_LEN (sizeof(u32) * FASTRPC_MAX_DSP_ATTRIBUTES)
+
+/* Retrives number of input buffers from the scalars parameter */
+#define REMOTE_SCALARS_INBUFS(sc)	(((sc) >> 16) & 0x0ff)
+
+/* Retrives number of output buffers from the scalars parameter */
+#define REMOTE_SCALARS_OUTBUFS(sc)	(((sc) >> 8) & 0x0ff)
+
+/* Retrives number of input handles from the scalars parameter */
+#define REMOTE_SCALARS_INHANDLES(sc)	(((sc) >> 4) & 0x0f)
+
+/* Retrives number of output handles from the scalars parameter */
+#define REMOTE_SCALARS_OUTHANDLES(sc)	((sc) & 0x0f)
+
+#define REMOTE_SCALARS_LENGTH(sc)	(REMOTE_SCALARS_INBUFS(sc) +   \
+					 REMOTE_SCALARS_OUTBUFS(sc) +  \
+					 REMOTE_SCALARS_INHANDLES(sc)+ \
+					 REMOTE_SCALARS_OUTHANDLES(sc))
+#define FASTRPC_BUILD_SCALARS(attr, method, in, out, oin, oout)  \
+				(((attr & 0x07) << 29) |		\
+				((method & 0x1f) << 24) |	\
+				((in & 0xff) << 16) |		\
+				((out & 0xff) <<  8) |		\
+				((oin & 0x0f) <<  4) |		\
+				(oout & 0x0f))
+
+#define FASTRPC_SCALARS(method, in, out) \
+		FASTRPC_BUILD_SCALARS(0, method, in, out, 0, 0)
+
+#define FASTRPC_CREATE_PROCESS_NARGS	6
+#define FASTRPC_CREATE_STATIC_PROCESS_NARGS	3
+/* Remote Method id table */
+#define FASTRPC_RMID_INIT_ATTACH	0
+#define FASTRPC_RMID_INIT_RELEASE	1
+#define FASTRPC_RMID_INIT_MMAP		4
+#define FASTRPC_RMID_INIT_MUNMAP	5
+#define FASTRPC_RMID_INIT_CREATE	6
+#define FASTRPC_RMID_INIT_CREATE_ATTR	7
+#define FASTRPC_RMID_INIT_CREATE_STATIC	8
+#define FASTRPC_RMID_INIT_MEM_MAP      10
+#define FASTRPC_RMID_INIT_MEM_UNMAP    11
+
+/* Protection Domain(PD) ids */
+#define ROOT_PD		(0)
+#define USER_PD		(1)
+#define SENSORS_PD	(2)
+
+#define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev)
+
+static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
+						"sdsp", "cdsp"};
+struct fastrpc_phy_page {
+	u64 addr;		/* physical address */
+	u64 size;		/* size of contiguous region */
+};
+
+struct fastrpc_invoke_buf {
+	u32 num;		/* number of contiguous regions */
+	u32 pgidx;		/* index to start of contiguous region */
+};
+
+struct fastrpc_remote_dmahandle {
+	s32 fd;		/* dma handle fd */
+	u32 offset;	/* dma handle offset */
+	u32 len;	/* dma handle length */
+};
+
+struct fastrpc_remote_buf {
+	u64 pv;		/* buffer pointer */
+	u64 len;	/* length of buffer */
+};
+
+union fastrpc_remote_arg {
+	struct fastrpc_remote_buf buf;
+	struct fastrpc_remote_dmahandle dma;
+};
+
+struct fastrpc_mmap_rsp_msg {
+	u64 vaddr;
+};
+
+struct fastrpc_mmap_req_msg {
+	s32 pgid;
+	u32 flags;
+	u64 vaddr;
+	s32 num;
+};
+
+struct fastrpc_mem_map_req_msg {
+	s32 pgid;
+	s32 fd;
+	s32 offset;
+	u32 flags;
+	u64 vaddrin;
+	s32 num;
+	s32 data_len;
+};
+
+struct fastrpc_munmap_req_msg {
+	s32 pgid;
+	u64 vaddr;
+	u64 size;
+};
+
+struct fastrpc_mem_unmap_req_msg {
+	s32 pgid;
+	s32 fd;
+	u64 vaddrin;
+	u64 len;
+};
+
+struct fastrpc_msg {
+	int pid;		/* process group id */
+	int tid;		/* thread id */
+	u64 ctx;		/* invoke caller context */
+	u32 handle;	/* handle to invoke */
+	u32 sc;		/* scalars structure describing the data */
+	u64 addr;		/* physical address */
+	u64 size;		/* size of contiguous region */
+};
+
+struct fastrpc_invoke_rsp {
+	u64 ctx;		/* invoke caller context */
+	int retval;		/* invoke return value */
+};
+
+struct fastrpc_buf_overlap {
+	u64 start;
+	u64 end;
+	int raix;
+	u64 mstart;
+	u64 mend;
+	u64 offset;
+};
+
+struct fastrpc_buf {
+	struct fastrpc_user *fl;
+	struct dma_buf *dmabuf;
+	struct device *dev;
+	void *virt;
+	u64 phys;
+	u64 size;
+	/* Lock for dma buf attachments */
+	struct mutex lock;
+	struct list_head attachments;
+	/* mmap support */
+	struct list_head node; /* list of user requested mmaps */
+	uintptr_t raddr;
+};
+
+struct fastrpc_dma_buf_attachment {
+	struct device *dev;
+	struct sg_table sgt;
+	struct list_head node;
+};
+
+struct fastrpc_map {
+	struct list_head node;
+	struct fastrpc_user *fl;
+	int fd;
+	struct dma_buf *buf;
+	struct sg_table *table;
+	struct dma_buf_attachment *attach;
+	u64 phys;
+	u64 size;
+	void *va;
+	u64 len;
+	u64 raddr;
+	u32 attr;
+	struct kref refcount;
+};
+
+struct fastrpc_invoke_ctx {
+	int nscalars;
+	int nbufs;
+	int retval;
+	int pid;
+	int tgid;
+	u32 sc;
+	u32 *crc;
+	u64 ctxid;
+	u64 msg_sz;
+	struct kref refcount;
+	struct list_head node; /* list of ctxs */
+	struct completion work;
+	struct work_struct put_work;
+	struct fastrpc_msg msg;
+	struct fastrpc_user *fl;
+	union fastrpc_remote_arg *rpra;
+	struct fastrpc_map **maps;
+	struct fastrpc_buf *buf;
+	struct fastrpc_invoke_args *args;
+	struct fastrpc_buf_overlap *olaps;
+	struct fastrpc_channel_ctx *cctx;
+};
+
+struct fastrpc_session_ctx {
+	struct device *dev;
+	int sid;
+	bool used;
+	bool valid;
+};
+
+struct fastrpc_channel_ctx {
+	int domain_id;
+	int sesscount;
+	int vmcount;
+	u64 perms;
+	struct qcom_scm_vmperm vmperms[FASTRPC_MAX_VMIDS];
+	struct rpmsg_device *rpdev;
+	struct fastrpc_session_ctx session[FASTRPC_MAX_SESSIONS];
+	spinlock_t lock;
+	struct idr ctx_idr;
+	struct list_head users;
+	struct kref refcount;
+	/* Flag if dsp attributes are cached */
+	bool valid_attributes;
+	u32 dsp_attributes[FASTRPC_MAX_DSP_ATTRIBUTES];
+	struct fastrpc_device *secure_fdevice;
+	struct fastrpc_device *fdevice;
+	struct fastrpc_buf *remote_heap;
+	struct list_head invoke_interrupted_mmaps;
+	bool secure;
+	bool unsigned_support;
+	u64 dma_mask;
+};
+
+struct fastrpc_device {
+	struct fastrpc_channel_ctx *cctx;
+	struct miscdevice miscdev;
+	bool secure;
+};
+
+struct fastrpc_user {
+	struct list_head user;
+	struct list_head maps;
+	struct list_head pending;
+	struct list_head mmaps;
+
+	struct fastrpc_channel_ctx *cctx;
+	struct fastrpc_session_ctx *sctx;
+	struct fastrpc_buf *init_mem;
+
+	int tgid;
+	int pd;
+	bool is_secure_dev;
+	/* Lock for lists */
+	spinlock_t lock;
+	/* lock for allocations */
+	struct mutex mutex;
+};
+
+static void fastrpc_free_map(struct kref *ref)
+{
+	struct fastrpc_map *map;
+
+	map = container_of(ref, struct fastrpc_map, refcount);
+
+	if (map->table) {
+		if (map->attr & FASTRPC_ATTR_SECUREMAP) {
+			struct qcom_scm_vmperm perm;
+			int vmid = map->fl->cctx->vmperms[0].vmid;
+			u64 src_perms = BIT(QCOM_SCM_VMID_HLOS) | BIT(vmid);
+			int err = 0;
+
+			perm.vmid = QCOM_SCM_VMID_HLOS;
+			perm.perm = QCOM_SCM_PERM_RWX;
+			err = qcom_scm_assign_mem(map->phys, map->size,
+				&src_perms, &perm, 1);
+			if (err) {
+				dev_err(map->fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
+						map->phys, map->size, err);
+				return;
+			}
+		}
+		dma_buf_unmap_attachment_unlocked(map->attach, map->table,
+						  DMA_BIDIRECTIONAL);
+		dma_buf_detach(map->buf, map->attach);
+		dma_buf_put(map->buf);
+	}
+
+	if (map->fl) {
+		spin_lock(&map->fl->lock);
+		list_del(&map->node);
+		spin_unlock(&map->fl->lock);
+		map->fl = NULL;
+	}
+
+	kfree(map);
+}
+
+static void fastrpc_map_put(struct fastrpc_map *map)
+{
+	if (map)
+		kref_put(&map->refcount, fastrpc_free_map);
+}
+
+static int fastrpc_map_get(struct fastrpc_map *map)
+{
+	if (!map)
+		return -ENOENT;
+
+	return kref_get_unless_zero(&map->refcount) ? 0 : -ENOENT;
+}
+
+
+static int fastrpc_map_lookup(struct fastrpc_user *fl, int fd,
+			    struct fastrpc_map **ppmap, bool take_ref)
+{
+	struct fastrpc_session_ctx *sess = fl->sctx;
+	struct fastrpc_map *map = NULL;
+	int ret = -ENOENT;
+
+	spin_lock(&fl->lock);
+	list_for_each_entry(map, &fl->maps, node) {
+		if (map->fd != fd)
+			continue;
+
+		if (take_ref) {
+			ret = fastrpc_map_get(map);
+			if (ret) {
+				dev_dbg(sess->dev, "%s: Failed to get map fd=%d ret=%d\n",
+					__func__, fd, ret);
+				break;
+			}
+		}
+
+		*ppmap = map;
+		ret = 0;
+		break;
+	}
+	spin_unlock(&fl->lock);
+
+	return ret;
+}
+
+static void fastrpc_buf_free(struct fastrpc_buf *buf)
+{
+	dma_free_coherent(buf->dev, buf->size, buf->virt,
+			  FASTRPC_PHYS(buf->phys));
+	kfree(buf);
+}
+
+static int __fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
+			     u64 size, struct fastrpc_buf **obuf)
+{
+	struct fastrpc_buf *buf;
+
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&buf->attachments);
+	INIT_LIST_HEAD(&buf->node);
+	mutex_init(&buf->lock);
+
+	buf->fl = fl;
+	buf->virt = NULL;
+	buf->phys = 0;
+	buf->size = size;
+	buf->dev = dev;
+	buf->raddr = 0;
+
+	buf->virt = dma_alloc_coherent(dev, buf->size, (dma_addr_t *)&buf->phys,
+				       GFP_KERNEL);
+	if (!buf->virt) {
+		mutex_destroy(&buf->lock);
+		kfree(buf);
+		return -ENOMEM;
+	}
+
+	*obuf = buf;
+
+	return 0;
+}
+
+static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
+			     u64 size, struct fastrpc_buf **obuf)
+{
+	int ret;
+	struct fastrpc_buf *buf;
+
+	ret = __fastrpc_buf_alloc(fl, dev, size, obuf);
+	if (ret)
+		return ret;
+
+	buf = *obuf;
+
+	if (fl->sctx && fl->sctx->sid)
+		buf->phys += ((u64)fl->sctx->sid << 32);
+
+	return 0;
+}
+
+static int fastrpc_remote_heap_alloc(struct fastrpc_user *fl, struct device *dev,
+				     u64 size, struct fastrpc_buf **obuf)
+{
+	struct device *rdev = &fl->cctx->rpdev->dev;
+
+	return  __fastrpc_buf_alloc(fl, rdev, size, obuf);
+}
+
+static void fastrpc_channel_ctx_free(struct kref *ref)
+{
+	struct fastrpc_channel_ctx *cctx;
+
+	cctx = container_of(ref, struct fastrpc_channel_ctx, refcount);
+
+	kfree(cctx);
+}
+
+static void fastrpc_channel_ctx_get(struct fastrpc_channel_ctx *cctx)
+{
+	kref_get(&cctx->refcount);
+}
+
+static void fastrpc_channel_ctx_put(struct fastrpc_channel_ctx *cctx)
+{
+	kref_put(&cctx->refcount, fastrpc_channel_ctx_free);
+}
+
+static void fastrpc_context_free(struct kref *ref)
+{
+	struct fastrpc_invoke_ctx *ctx;
+	struct fastrpc_channel_ctx *cctx;
+	unsigned long flags;
+	int i;
+
+	ctx = container_of(ref, struct fastrpc_invoke_ctx, refcount);
+	cctx = ctx->cctx;
+
+	for (i = 0; i < ctx->nbufs; i++)
+		fastrpc_map_put(ctx->maps[i]);
+
+	if (ctx->buf)
+		fastrpc_buf_free(ctx->buf);
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	idr_remove(&cctx->ctx_idr, ctx->ctxid >> 4);
+	spin_unlock_irqrestore(&cctx->lock, flags);
+
+	kfree(ctx->maps);
+	kfree(ctx->olaps);
+	kfree(ctx);
+
+	fastrpc_channel_ctx_put(cctx);
+}
+
+static void fastrpc_context_get(struct fastrpc_invoke_ctx *ctx)
+{
+	kref_get(&ctx->refcount);
+}
+
+static void fastrpc_context_put(struct fastrpc_invoke_ctx *ctx)
+{
+	kref_put(&ctx->refcount, fastrpc_context_free);
+}
+
+static void fastrpc_context_put_wq(struct work_struct *work)
+{
+	struct fastrpc_invoke_ctx *ctx =
+			container_of(work, struct fastrpc_invoke_ctx, put_work);
+
+	fastrpc_context_put(ctx);
+}
+
+#define CMP(aa, bb) ((aa) == (bb) ? 0 : (aa) < (bb) ? -1 : 1)
+static int olaps_cmp(const void *a, const void *b)
+{
+	struct fastrpc_buf_overlap *pa = (struct fastrpc_buf_overlap *)a;
+	struct fastrpc_buf_overlap *pb = (struct fastrpc_buf_overlap *)b;
+	/* sort with lowest starting buffer first */
+	int st = CMP(pa->start, pb->start);
+	/* sort with highest ending buffer first */
+	int ed = CMP(pb->end, pa->end);
+
+	return st == 0 ? ed : st;
+}
+
+static void fastrpc_get_buff_overlaps(struct fastrpc_invoke_ctx *ctx)
+{
+	u64 max_end = 0;
+	int i;
+
+	for (i = 0; i < ctx->nbufs; ++i) {
+		ctx->olaps[i].start = ctx->args[i].ptr;
+		ctx->olaps[i].end = ctx->olaps[i].start + ctx->args[i].length;
+		ctx->olaps[i].raix = i;
+	}
+
+	sort(ctx->olaps, ctx->nbufs, sizeof(*ctx->olaps), olaps_cmp, NULL);
+
+	for (i = 0; i < ctx->nbufs; ++i) {
+		/* Falling inside previous range */
+		if (ctx->olaps[i].start < max_end) {
+			ctx->olaps[i].mstart = max_end;
+			ctx->olaps[i].mend = ctx->olaps[i].end;
+			ctx->olaps[i].offset = max_end - ctx->olaps[i].start;
+
+			if (ctx->olaps[i].end > max_end) {
+				max_end = ctx->olaps[i].end;
+			} else {
+				ctx->olaps[i].mend = 0;
+				ctx->olaps[i].mstart = 0;
+			}
+
+		} else  {
+			ctx->olaps[i].mend = ctx->olaps[i].end;
+			ctx->olaps[i].mstart = ctx->olaps[i].start;
+			ctx->olaps[i].offset = 0;
+			max_end = ctx->olaps[i].end;
+		}
+	}
+}
+
+static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
+			struct fastrpc_user *user, u32 kernel, u32 sc,
+			struct fastrpc_invoke_args *args)
+{
+	struct fastrpc_channel_ctx *cctx = user->cctx;
+	struct fastrpc_invoke_ctx *ctx = NULL;
+	unsigned long flags;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&ctx->node);
+	ctx->fl = user;
+	ctx->nscalars = REMOTE_SCALARS_LENGTH(sc);
+	ctx->nbufs = REMOTE_SCALARS_INBUFS(sc) +
+		     REMOTE_SCALARS_OUTBUFS(sc);
+
+	if (ctx->nscalars) {
+		ctx->maps = kcalloc(ctx->nscalars,
+				    sizeof(*ctx->maps), GFP_KERNEL);
+		if (!ctx->maps) {
+			kfree(ctx);
+			return ERR_PTR(-ENOMEM);
+		}
+		ctx->olaps = kcalloc(ctx->nscalars,
+				    sizeof(*ctx->olaps), GFP_KERNEL);
+		if (!ctx->olaps) {
+			kfree(ctx->maps);
+			kfree(ctx);
+			return ERR_PTR(-ENOMEM);
+		}
+		ctx->args = args;
+		fastrpc_get_buff_overlaps(ctx);
+	}
+
+	/* Released in fastrpc_context_put() */
+	fastrpc_channel_ctx_get(cctx);
+
+	ctx->sc = sc;
+	ctx->retval = -1;
+	ctx->pid = current->pid;
+	ctx->tgid = user->tgid;
+	ctx->cctx = cctx;
+	init_completion(&ctx->work);
+	INIT_WORK(&ctx->put_work, fastrpc_context_put_wq);
+
+	spin_lock(&user->lock);
+	list_add_tail(&ctx->node, &user->pending);
+	spin_unlock(&user->lock);
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	ret = idr_alloc_cyclic(&cctx->ctx_idr, ctx, 1,
+			       FASTRPC_CTX_MAX, GFP_ATOMIC);
+	if (ret < 0) {
+		spin_unlock_irqrestore(&cctx->lock, flags);
+		goto err_idr;
+	}
+	ctx->ctxid = ret << 4;
+	spin_unlock_irqrestore(&cctx->lock, flags);
+
+	kref_init(&ctx->refcount);
+
+	return ctx;
+err_idr:
+	spin_lock(&user->lock);
+	list_del(&ctx->node);
+	spin_unlock(&user->lock);
+	fastrpc_channel_ctx_put(cctx);
+	kfree(ctx->maps);
+	kfree(ctx->olaps);
+	kfree(ctx);
+
+	return ERR_PTR(ret);
+}
+
+static struct sg_table *
+fastrpc_map_dma_buf(struct dma_buf_attachment *attachment,
+		    enum dma_data_direction dir)
+{
+	struct fastrpc_dma_buf_attachment *a = attachment->priv;
+	struct sg_table *table;
+	int ret;
+
+	table = &a->sgt;
+
+	ret = dma_map_sgtable(attachment->dev, table, dir, 0);
+	if (ret)
+		table = ERR_PTR(ret);
+	return table;
+}
+
+static void fastrpc_unmap_dma_buf(struct dma_buf_attachment *attach,
+				  struct sg_table *table,
+				  enum dma_data_direction dir)
+{
+	dma_unmap_sgtable(attach->dev, table, dir, 0);
+}
+
+static void fastrpc_release(struct dma_buf *dmabuf)
+{
+	struct fastrpc_buf *buffer = dmabuf->priv;
+
+	fastrpc_buf_free(buffer);
+}
+
+static int fastrpc_dma_buf_attach(struct dma_buf *dmabuf,
+				  struct dma_buf_attachment *attachment)
+{
+	struct fastrpc_dma_buf_attachment *a;
+	struct fastrpc_buf *buffer = dmabuf->priv;
+	int ret;
+
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
+	if (!a)
+		return -ENOMEM;
+
+	ret = dma_get_sgtable(buffer->dev, &a->sgt, buffer->virt,
+			      FASTRPC_PHYS(buffer->phys), buffer->size);
+	if (ret < 0) {
+		dev_err(buffer->dev, "failed to get scatterlist from DMA API\n");
+		kfree(a);
+		return -EINVAL;
+	}
+
+	a->dev = attachment->dev;
+	INIT_LIST_HEAD(&a->node);
+	attachment->priv = a;
+
+	mutex_lock(&buffer->lock);
+	list_add(&a->node, &buffer->attachments);
+	mutex_unlock(&buffer->lock);
+
+	return 0;
+}
+
+static void fastrpc_dma_buf_detatch(struct dma_buf *dmabuf,
+				    struct dma_buf_attachment *attachment)
+{
+	struct fastrpc_dma_buf_attachment *a = attachment->priv;
+	struct fastrpc_buf *buffer = dmabuf->priv;
+
+	mutex_lock(&buffer->lock);
+	list_del(&a->node);
+	mutex_unlock(&buffer->lock);
+	sg_free_table(&a->sgt);
+	kfree(a);
+}
+
+static int fastrpc_vmap(struct dma_buf *dmabuf, struct iosys_map *map)
+{
+	struct fastrpc_buf *buf = dmabuf->priv;
+
+	iosys_map_set_vaddr(map, buf->virt);
+
+	return 0;
+}
+
+static int fastrpc_mmap(struct dma_buf *dmabuf,
+			struct vm_area_struct *vma)
+{
+	struct fastrpc_buf *buf = dmabuf->priv;
+	size_t size = vma->vm_end - vma->vm_start;
+
+	dma_resv_assert_held(dmabuf->resv);
+
+	return dma_mmap_coherent(buf->dev, vma, buf->virt,
+				 FASTRPC_PHYS(buf->phys), size);
+}
+
+static const struct dma_buf_ops fastrpc_dma_buf_ops = {
+	.attach = fastrpc_dma_buf_attach,
+	.detach = fastrpc_dma_buf_detatch,
+	.map_dma_buf = fastrpc_map_dma_buf,
+	.unmap_dma_buf = fastrpc_unmap_dma_buf,
+	.mmap = fastrpc_mmap,
+	.vmap = fastrpc_vmap,
+	.release = fastrpc_release,
+};
+
+static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
+			      u64 len, u32 attr, struct fastrpc_map **ppmap)
+{
+	struct fastrpc_session_ctx *sess = fl->sctx;
+	struct fastrpc_map *map = NULL;
+	struct sg_table *table;
+	int err = 0;
+
+	if (!fastrpc_map_lookup(fl, fd, ppmap, true))
+		return 0;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&map->node);
+	kref_init(&map->refcount);
+
+	map->fl = fl;
+	map->fd = fd;
+	map->buf = dma_buf_get(fd);
+	if (IS_ERR(map->buf)) {
+		err = PTR_ERR(map->buf);
+		goto get_err;
+	}
+
+	map->attach = dma_buf_attach(map->buf, sess->dev);
+	if (IS_ERR(map->attach)) {
+		dev_err(sess->dev, "Failed to attach dmabuf\n");
+		err = PTR_ERR(map->attach);
+		goto attach_err;
+	}
+
+	table = dma_buf_map_attachment_unlocked(map->attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(table)) {
+		err = PTR_ERR(table);
+		goto map_err;
+	}
+	map->table = table;
+
+	if (attr & FASTRPC_ATTR_SECUREMAP) {
+		map->phys = sg_phys(map->table->sgl);
+	} else {
+		map->phys = sg_dma_address(map->table->sgl);
+		map->phys += ((u64)fl->sctx->sid << 32);
+	}
+	map->size = len;
+	map->va = sg_virt(map->table->sgl);
+	map->len = len;
+
+	if (attr & FASTRPC_ATTR_SECUREMAP) {
+		/*
+		 * If subsystem VMIDs are defined in DTSI, then do
+		 * hyp_assign from HLOS to those VM(s)
+		 */
+		u64 src_perms = BIT(QCOM_SCM_VMID_HLOS);
+		struct qcom_scm_vmperm dst_perms[2] = {0};
+
+		dst_perms[0].vmid = QCOM_SCM_VMID_HLOS;
+		dst_perms[0].perm = QCOM_SCM_PERM_RW;
+		dst_perms[1].vmid = fl->cctx->vmperms[0].vmid;
+		dst_perms[1].perm = QCOM_SCM_PERM_RWX;
+		map->attr = attr;
+		err = qcom_scm_assign_mem(map->phys, (u64)map->size, &src_perms, dst_perms, 2);
+		if (err) {
+			dev_err(sess->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d",
+					map->phys, map->size, err);
+			goto map_err;
+		}
+	}
+	spin_lock(&fl->lock);
+	list_add_tail(&map->node, &fl->maps);
+	spin_unlock(&fl->lock);
+	*ppmap = map;
+
+	return 0;
+
+map_err:
+	dma_buf_detach(map->buf, map->attach);
+attach_err:
+	dma_buf_put(map->buf);
+get_err:
+	fastrpc_map_put(map);
+
+	return err;
+}
+
+/*
+ * Fastrpc payload buffer with metadata looks like:
+ *
+ * >>>>>>  START of METADATA <<<<<<<<<
+ * +---------------------------------+
+ * |           Arguments             |
+ * | type:(union fastrpc_remote_arg)|
+ * |             (0 - N)             |
+ * +---------------------------------+
+ * |         Invoke Buffer list      |
+ * | type:(struct fastrpc_invoke_buf)|
+ * |           (0 - N)               |
+ * +---------------------------------+
+ * |         Page info list          |
+ * | type:(struct fastrpc_phy_page)  |
+ * |             (0 - N)             |
+ * +---------------------------------+
+ * |         Optional info           |
+ * |(can be specific to SoC/Firmware)|
+ * +---------------------------------+
+ * >>>>>>>>  END of METADATA <<<<<<<<<
+ * +---------------------------------+
+ * |         Inline ARGS             |
+ * |            (0-N)                |
+ * +---------------------------------+
+ */
+
+static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
+{
+	int size = 0;
+
+	size = (sizeof(struct fastrpc_remote_buf) +
+		sizeof(struct fastrpc_invoke_buf) +
+		sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
+		sizeof(u64) * FASTRPC_MAX_FDLIST +
+		sizeof(u32) * FASTRPC_MAX_CRCLIST;
+
+	return size;
+}
+
+static u64 fastrpc_get_payload_size(struct fastrpc_invoke_ctx *ctx, int metalen)
+{
+	u64 size = 0;
+	int oix;
+
+	size = ALIGN(metalen, FASTRPC_ALIGN);
+	for (oix = 0; oix < ctx->nbufs; oix++) {
+		int i = ctx->olaps[oix].raix;
+
+		if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1) {
+
+			if (ctx->olaps[oix].offset == 0)
+				size = ALIGN(size, FASTRPC_ALIGN);
+
+			size += (ctx->olaps[oix].mend - ctx->olaps[oix].mstart);
+		}
+	}
+
+	return size;
+}
+
+static int fastrpc_create_maps(struct fastrpc_invoke_ctx *ctx)
+{
+	struct device *dev = ctx->fl->sctx->dev;
+	int i, err;
+
+	for (i = 0; i < ctx->nscalars; ++i) {
+
+		if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1 ||
+		    ctx->args[i].length == 0)
+			continue;
+
+		err = fastrpc_map_create(ctx->fl, ctx->args[i].fd,
+			 ctx->args[i].length, ctx->args[i].attr, &ctx->maps[i]);
+		if (err) {
+			dev_err(dev, "Error Creating map %d\n", err);
+			return -EINVAL;
+		}
+
+	}
+	return 0;
+}
+
+static struct fastrpc_invoke_buf *fastrpc_invoke_buf_start(union fastrpc_remote_arg *pra, int len)
+{
+	return (struct fastrpc_invoke_buf *)(&pra[len]);
+}
+
+static struct fastrpc_phy_page *fastrpc_phy_page_start(struct fastrpc_invoke_buf *buf, int len)
+{
+	return (struct fastrpc_phy_page *)(&buf[len]);
+}
+
+static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
+{
+	struct device *dev = ctx->fl->sctx->dev;
+	union fastrpc_remote_arg *rpra;
+	struct fastrpc_invoke_buf *list;
+	struct fastrpc_phy_page *pages;
+	int inbufs, i, oix, err = 0;
+	u64 len, rlen, pkt_size;
+	u64 pg_start, pg_end;
+	uintptr_t args;
+	int metalen;
+
+	inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
+	metalen = fastrpc_get_meta_size(ctx);
+	pkt_size = fastrpc_get_payload_size(ctx, metalen);
+
+	err = fastrpc_create_maps(ctx);
+	if (err)
+		return err;
+
+	ctx->msg_sz = pkt_size;
+
+	err = fastrpc_buf_alloc(ctx->fl, dev, pkt_size, &ctx->buf);
+	if (err)
+		return err;
+
+	memset(ctx->buf->virt, 0, pkt_size);
+	rpra = ctx->buf->virt;
+	list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
+	pages = fastrpc_phy_page_start(list, ctx->nscalars);
+	args = (uintptr_t)ctx->buf->virt + metalen;
+	rlen = pkt_size - metalen;
+	ctx->rpra = rpra;
+
+	for (oix = 0; oix < ctx->nbufs; ++oix) {
+		int mlen;
+
+		i = ctx->olaps[oix].raix;
+		len = ctx->args[i].length;
+
+		rpra[i].buf.pv = 0;
+		rpra[i].buf.len = len;
+		list[i].num = len ? 1 : 0;
+		list[i].pgidx = i;
+
+		if (!len)
+			continue;
+
+		if (ctx->maps[i]) {
+			struct vm_area_struct *vma = NULL;
+
+			rpra[i].buf.pv = (u64) ctx->args[i].ptr;
+			pages[i].addr = ctx->maps[i]->phys;
+
+			mmap_read_lock(current->mm);
+			vma = find_vma(current->mm, ctx->args[i].ptr);
+			if (vma)
+				pages[i].addr += ctx->args[i].ptr -
+						 vma->vm_start;
+			mmap_read_unlock(current->mm);
+
+			pg_start = (ctx->args[i].ptr & PAGE_MASK) >> PAGE_SHIFT;
+			pg_end = ((ctx->args[i].ptr + len - 1) & PAGE_MASK) >>
+				  PAGE_SHIFT;
+			pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE;
+
+		} else {
+
+			if (ctx->olaps[oix].offset == 0) {
+				rlen -= ALIGN(args, FASTRPC_ALIGN) - args;
+				args = ALIGN(args, FASTRPC_ALIGN);
+			}
+
+			mlen = ctx->olaps[oix].mend - ctx->olaps[oix].mstart;
+
+			if (rlen < mlen)
+				goto bail;
+
+			rpra[i].buf.pv = args - ctx->olaps[oix].offset;
+			pages[i].addr = ctx->buf->phys -
+					ctx->olaps[oix].offset +
+					(pkt_size - rlen);
+			pages[i].addr = pages[i].addr &	PAGE_MASK;
+
+			pg_start = (args & PAGE_MASK) >> PAGE_SHIFT;
+			pg_end = ((args + len - 1) & PAGE_MASK) >> PAGE_SHIFT;
+			pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE;
+			args = args + mlen;
+			rlen -= mlen;
+		}
+
+		if (i < inbufs && !ctx->maps[i]) {
+			void *dst = (void *)(uintptr_t)rpra[i].buf.pv;
+			void *src = (void *)(uintptr_t)ctx->args[i].ptr;
+
+			if (!kernel) {
+				if (copy_from_user(dst, (void __user *)src,
+						   len)) {
+					err = -EFAULT;
+					goto bail;
+				}
+			} else {
+				memcpy(dst, src, len);
+			}
+		}
+	}
+
+	for (i = ctx->nbufs; i < ctx->nscalars; ++i) {
+		list[i].num = ctx->args[i].length ? 1 : 0;
+		list[i].pgidx = i;
+		if (ctx->maps[i]) {
+			pages[i].addr = ctx->maps[i]->phys;
+			pages[i].size = ctx->maps[i]->size;
+		}
+		rpra[i].dma.fd = ctx->args[i].fd;
+		rpra[i].dma.len = ctx->args[i].length;
+		rpra[i].dma.offset = (u64) ctx->args[i].ptr;
+	}
+
+bail:
+	if (err)
+		dev_err(dev, "Error: get invoke args failed:%d\n", err);
+
+	return err;
+}
+
+static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
+			    u32 kernel)
+{
+	union fastrpc_remote_arg *rpra = ctx->rpra;
+	struct fastrpc_user *fl = ctx->fl;
+	struct fastrpc_map *mmap = NULL;
+	struct fastrpc_invoke_buf *list;
+	struct fastrpc_phy_page *pages;
+	u64 *fdlist;
+	int i, inbufs, outbufs, handles;
+
+	inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
+	outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc);
+	handles = REMOTE_SCALARS_INHANDLES(ctx->sc) + REMOTE_SCALARS_OUTHANDLES(ctx->sc);
+	list = fastrpc_invoke_buf_start(rpra, ctx->nscalars);
+	pages = fastrpc_phy_page_start(list, ctx->nscalars);
+	fdlist = (uint64_t *)(pages + inbufs + outbufs + handles);
+
+	for (i = inbufs; i < ctx->nbufs; ++i) {
+		if (!ctx->maps[i]) {
+			void *src = (void *)(uintptr_t)rpra[i].buf.pv;
+			void *dst = (void *)(uintptr_t)ctx->args[i].ptr;
+			u64 len = rpra[i].buf.len;
+
+			if (!kernel) {
+				if (copy_to_user((void __user *)dst, src, len))
+					return -EFAULT;
+			} else {
+				memcpy(dst, src, len);
+			}
+		}
+	}
+
+	/* Clean up fdlist which is updated by DSP */
+	for (i = 0; i < FASTRPC_MAX_FDLIST; i++) {
+		if (!fdlist[i])
+			break;
+		if (!fastrpc_map_lookup(fl, (int)fdlist[i], &mmap, false))
+			fastrpc_map_put(mmap);
+	}
+
+	return 0;
+}
+
+static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
+			       struct fastrpc_invoke_ctx *ctx,
+			       u32 kernel, uint32_t handle)
+{
+	struct fastrpc_channel_ctx *cctx;
+	struct fastrpc_user *fl = ctx->fl;
+	struct fastrpc_msg *msg = &ctx->msg;
+	int ret;
+
+	cctx = fl->cctx;
+	msg->pid = fl->tgid;
+	msg->tid = current->pid;
+
+	if (kernel)
+		msg->pid = 0;
+
+	msg->ctx = ctx->ctxid | fl->pd;
+	msg->handle = handle;
+	msg->sc = ctx->sc;
+	msg->addr = ctx->buf ? ctx->buf->phys : 0;
+	msg->size = roundup(ctx->msg_sz, PAGE_SIZE);
+	fastrpc_context_get(ctx);
+
+	ret = rpmsg_send(cctx->rpdev->ept, (void *)msg, sizeof(*msg));
+
+	if (ret)
+		fastrpc_context_put(ctx);
+
+	return ret;
+
+}
+
+static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
+				   u32 handle, u32 sc,
+				   struct fastrpc_invoke_args *args)
+{
+	struct fastrpc_invoke_ctx *ctx = NULL;
+	struct fastrpc_buf *buf, *b;
+
+	int err = 0;
+
+	if (!fl->sctx)
+		return -EINVAL;
+
+	if (!fl->cctx->rpdev)
+		return -EPIPE;
+
+	if (handle == FASTRPC_INIT_HANDLE && !kernel) {
+		dev_warn_ratelimited(fl->sctx->dev, "user app trying to send a kernel RPC message (%d)\n",  handle);
+		return -EPERM;
+	}
+
+	ctx = fastrpc_context_alloc(fl, kernel, sc, args);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	err = fastrpc_get_args(kernel, ctx);
+	if (err)
+		goto bail;
+
+	/* make sure that all CPU memory writes are seen by DSP */
+	dma_wmb();
+	/* Send invoke buffer to remote dsp */
+	err = fastrpc_invoke_send(fl->sctx, ctx, kernel, handle);
+	if (err)
+		goto bail;
+
+	if (kernel) {
+		if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
+			err = -ETIMEDOUT;
+	} else {
+		err = wait_for_completion_interruptible(&ctx->work);
+	}
+
+	if (err)
+		goto bail;
+
+	/* make sure that all memory writes by DSP are seen by CPU */
+	dma_rmb();
+	/* populate all the output buffers with results */
+	err = fastrpc_put_args(ctx, kernel);
+	if (err)
+		goto bail;
+
+	/* Check the response from remote dsp */
+	err = ctx->retval;
+	if (err)
+		goto bail;
+
+bail:
+	if (err != -ERESTARTSYS && err != -ETIMEDOUT) {
+		/* We are done with this compute context */
+		spin_lock(&fl->lock);
+		list_del(&ctx->node);
+		spin_unlock(&fl->lock);
+		fastrpc_context_put(ctx);
+	}
+
+	if (err == -ERESTARTSYS) {
+		list_for_each_entry_safe(buf, b, &fl->mmaps, node) {
+			list_del(&buf->node);
+			list_add_tail(&buf->node, &fl->cctx->invoke_interrupted_mmaps);
+		}
+	}
+
+	if (err)
+		dev_dbg(fl->sctx->dev, "Error: Invoke Failed %d\n", err);
+
+	return err;
+}
+
+static bool is_session_rejected(struct fastrpc_user *fl, bool unsigned_pd_request)
+{
+	/* Check if the device node is non-secure and channel is secure*/
+	if (!fl->is_secure_dev && fl->cctx->secure) {
+		/*
+		 * Allow untrusted applications to offload only to Unsigned PD when
+		 * channel is configured as secure and block untrusted apps on channel
+		 * that does not support unsigned PD offload
+		 */
+		if (!fl->cctx->unsigned_support || !unsigned_pd_request) {
+			dev_err(&fl->cctx->rpdev->dev, "Error: Untrusted application trying to offload to signed PD");
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static int fastrpc_init_create_static_process(struct fastrpc_user *fl,
+					      char __user *argp)
+{
+	struct fastrpc_init_create_static init;
+	struct fastrpc_invoke_args *args;
+	struct fastrpc_phy_page pages[1];
+	char *name;
+	int err;
+	struct {
+		int pgid;
+		u32 namelen;
+		u32 pageslen;
+	} inbuf;
+	u32 sc;
+
+	args = kcalloc(FASTRPC_CREATE_STATIC_PROCESS_NARGS, sizeof(*args), GFP_KERNEL);
+	if (!args)
+		return -ENOMEM;
+
+	if (copy_from_user(&init, argp, sizeof(init))) {
+		err = -EFAULT;
+		goto err;
+	}
+
+	if (init.namelen > INIT_FILE_NAMELEN_MAX) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	name = kzalloc(init.namelen, GFP_KERNEL);
+	if (!name) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	if (copy_from_user(name, (void __user *)(uintptr_t)init.name, init.namelen)) {
+		err = -EFAULT;
+		goto err_name;
+	}
+
+	if (!fl->cctx->remote_heap) {
+		err = fastrpc_remote_heap_alloc(fl, fl->sctx->dev, init.memlen,
+						&fl->cctx->remote_heap);
+		if (err)
+			goto err_name;
+
+		/* Map if we have any heap VMIDs associated with this ADSP Static Process. */
+		if (fl->cctx->vmcount) {
+			err = qcom_scm_assign_mem(fl->cctx->remote_heap->phys,
+							(u64)fl->cctx->remote_heap->size,
+							&fl->cctx->perms,
+							fl->cctx->vmperms, fl->cctx->vmcount);
+			if (err) {
+				dev_err(fl->sctx->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d",
+					fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err);
+				goto err_map;
+			}
+		}
+	}
+
+	inbuf.pgid = fl->tgid;
+	inbuf.namelen = init.namelen;
+	inbuf.pageslen = 0;
+	fl->pd = USER_PD;
+
+	args[0].ptr = (u64)(uintptr_t)&inbuf;
+	args[0].length = sizeof(inbuf);
+	args[0].fd = -1;
+
+	args[1].ptr = (u64)(uintptr_t)name;
+	args[1].length = inbuf.namelen;
+	args[1].fd = -1;
+
+	pages[0].addr = fl->cctx->remote_heap->phys;
+	pages[0].size = fl->cctx->remote_heap->size;
+
+	args[2].ptr = (u64)(uintptr_t) pages;
+	args[2].length = sizeof(*pages);
+	args[2].fd = -1;
+
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_STATIC, 3, 0);
+
+	err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
+				      sc, args);
+	if (err)
+		goto err_invoke;
+
+	kfree(args);
+
+	return 0;
+err_invoke:
+	if (fl->cctx->vmcount) {
+		u64 src_perms = 0;
+		struct qcom_scm_vmperm dst_perms;
+		u32 i;
+
+		for (i = 0; i < fl->cctx->vmcount; i++)
+			src_perms |= BIT(fl->cctx->vmperms[i].vmid);
+
+		dst_perms.vmid = QCOM_SCM_VMID_HLOS;
+		dst_perms.perm = QCOM_SCM_PERM_RWX;
+		err = qcom_scm_assign_mem(fl->cctx->remote_heap->phys,
+						(u64)fl->cctx->remote_heap->size,
+						&src_perms, &dst_perms, 1);
+		if (err)
+			dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
+				fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err);
+	}
+err_map:
+	fastrpc_buf_free(fl->cctx->remote_heap);
+err_name:
+	kfree(name);
+err:
+	kfree(args);
+
+	return err;
+}
+
+static int fastrpc_init_create_process(struct fastrpc_user *fl,
+					char __user *argp)
+{
+	struct fastrpc_init_create init;
+	struct fastrpc_invoke_args *args;
+	struct fastrpc_phy_page pages[1];
+	struct fastrpc_map *map = NULL;
+	struct fastrpc_buf *imem = NULL;
+	int memlen;
+	int err;
+	struct {
+		int pgid;
+		u32 namelen;
+		u32 filelen;
+		u32 pageslen;
+		u32 attrs;
+		u32 siglen;
+	} inbuf;
+	u32 sc;
+	bool unsigned_module = false;
+
+	args = kcalloc(FASTRPC_CREATE_PROCESS_NARGS, sizeof(*args), GFP_KERNEL);
+	if (!args)
+		return -ENOMEM;
+
+	if (copy_from_user(&init, argp, sizeof(init))) {
+		err = -EFAULT;
+		goto err;
+	}
+
+	if (init.attrs & FASTRPC_MODE_UNSIGNED_MODULE)
+		unsigned_module = true;
+
+	if (is_session_rejected(fl, unsigned_module)) {
+		err = -ECONNREFUSED;
+		goto err;
+	}
+
+	if (init.filelen > INIT_FILELEN_MAX) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	inbuf.pgid = fl->tgid;
+	inbuf.namelen = strlen(current->comm) + 1;
+	inbuf.filelen = init.filelen;
+	inbuf.pageslen = 1;
+	inbuf.attrs = init.attrs;
+	inbuf.siglen = init.siglen;
+	fl->pd = USER_PD;
+
+	if (init.filelen && init.filefd) {
+		err = fastrpc_map_create(fl, init.filefd, init.filelen, 0, &map);
+		if (err)
+			goto err;
+	}
+
+	memlen = ALIGN(max(INIT_FILELEN_MAX, (int)init.filelen * 4),
+		       1024 * 1024);
+	err = fastrpc_buf_alloc(fl, fl->sctx->dev, memlen,
+				&imem);
+	if (err)
+		goto err_alloc;
+
+	fl->init_mem = imem;
+	args[0].ptr = (u64)(uintptr_t)&inbuf;
+	args[0].length = sizeof(inbuf);
+	args[0].fd = -1;
+
+	args[1].ptr = (u64)(uintptr_t)current->comm;
+	args[1].length = inbuf.namelen;
+	args[1].fd = -1;
+
+	args[2].ptr = (u64) init.file;
+	args[2].length = inbuf.filelen;
+	args[2].fd = init.filefd;
+
+	pages[0].addr = imem->phys;
+	pages[0].size = imem->size;
+
+	args[3].ptr = (u64)(uintptr_t) pages;
+	args[3].length = 1 * sizeof(*pages);
+	args[3].fd = -1;
+
+	args[4].ptr = (u64)(uintptr_t)&inbuf.attrs;
+	args[4].length = sizeof(inbuf.attrs);
+	args[4].fd = -1;
+
+	args[5].ptr = (u64)(uintptr_t) &inbuf.siglen;
+	args[5].length = sizeof(inbuf.siglen);
+	args[5].fd = -1;
+
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE, 4, 0);
+	if (init.attrs)
+		sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_ATTR, 4, 0);
+
+	err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
+				      sc, args);
+	if (err)
+		goto err_invoke;
+
+	kfree(args);
+
+	return 0;
+
+err_invoke:
+	fl->init_mem = NULL;
+	fastrpc_buf_free(imem);
+err_alloc:
+	fastrpc_map_put(map);
+err:
+	kfree(args);
+
+	return err;
+}
+
+static struct fastrpc_session_ctx *fastrpc_session_alloc(
+					struct fastrpc_channel_ctx *cctx)
+{
+	struct fastrpc_session_ctx *session = NULL;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	for (i = 0; i < cctx->sesscount; i++) {
+		if (!cctx->session[i].used && cctx->session[i].valid) {
+			cctx->session[i].used = true;
+			session = &cctx->session[i];
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&cctx->lock, flags);
+
+	return session;
+}
+
+static void fastrpc_session_free(struct fastrpc_channel_ctx *cctx,
+				 struct fastrpc_session_ctx *session)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	session->used = false;
+	spin_unlock_irqrestore(&cctx->lock, flags);
+}
+
+static int fastrpc_release_current_dsp_process(struct fastrpc_user *fl)
+{
+	struct fastrpc_invoke_args args[1];
+	int tgid = 0;
+	u32 sc;
+
+	tgid = fl->tgid;
+	args[0].ptr = (u64)(uintptr_t) &tgid;
+	args[0].length = sizeof(tgid);
+	args[0].fd = -1;
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_RELEASE, 1, 0);
+
+	return fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
+				       sc, &args[0]);
+}
+
+static int fastrpc_device_release(struct inode *inode, struct file *file)
+{
+	struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
+	struct fastrpc_channel_ctx *cctx = fl->cctx;
+	struct fastrpc_invoke_ctx *ctx, *n;
+	struct fastrpc_map *map, *m;
+	struct fastrpc_buf *buf, *b;
+	unsigned long flags;
+
+	fastrpc_release_current_dsp_process(fl);
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	list_del(&fl->user);
+	spin_unlock_irqrestore(&cctx->lock, flags);
+
+	if (fl->init_mem)
+		fastrpc_buf_free(fl->init_mem);
+
+	list_for_each_entry_safe(ctx, n, &fl->pending, node) {
+		list_del(&ctx->node);
+		fastrpc_context_put(ctx);
+	}
+
+	list_for_each_entry_safe(map, m, &fl->maps, node)
+		fastrpc_map_put(map);
+
+	list_for_each_entry_safe(buf, b, &fl->mmaps, node) {
+		list_del(&buf->node);
+		fastrpc_buf_free(buf);
+	}
+
+	fastrpc_session_free(cctx, fl->sctx);
+	fastrpc_channel_ctx_put(cctx);
+
+	mutex_destroy(&fl->mutex);
+	kfree(fl);
+	file->private_data = NULL;
+
+	return 0;
+}
+
+static int fastrpc_device_open(struct inode *inode, struct file *filp)
+{
+	struct fastrpc_channel_ctx *cctx;
+	struct fastrpc_device *fdevice;
+	struct fastrpc_user *fl = NULL;
+	unsigned long flags;
+
+	fdevice = miscdev_to_fdevice(filp->private_data);
+	cctx = fdevice->cctx;
+
+	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
+	if (!fl)
+		return -ENOMEM;
+
+	/* Released in fastrpc_device_release() */
+	fastrpc_channel_ctx_get(cctx);
+
+	filp->private_data = fl;
+	spin_lock_init(&fl->lock);
+	mutex_init(&fl->mutex);
+	INIT_LIST_HEAD(&fl->pending);
+	INIT_LIST_HEAD(&fl->maps);
+	INIT_LIST_HEAD(&fl->mmaps);
+	INIT_LIST_HEAD(&fl->user);
+	fl->tgid = current->tgid;
+	fl->cctx = cctx;
+	fl->is_secure_dev = fdevice->secure;
+
+	fl->sctx = fastrpc_session_alloc(cctx);
+	if (!fl->sctx) {
+		dev_err(&cctx->rpdev->dev, "No session available\n");
+		mutex_destroy(&fl->mutex);
+		kfree(fl);
+
+		return -EBUSY;
+	}
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	list_add_tail(&fl->user, &cctx->users);
+	spin_unlock_irqrestore(&cctx->lock, flags);
+
+	return 0;
+}
+
+static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_alloc_dma_buf bp;
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct fastrpc_buf *buf = NULL;
+	int err;
+
+	if (copy_from_user(&bp, argp, sizeof(bp)))
+		return -EFAULT;
+
+	err = fastrpc_buf_alloc(fl, fl->sctx->dev, bp.size, &buf);
+	if (err)
+		return err;
+	exp_info.ops = &fastrpc_dma_buf_ops;
+	exp_info.size = bp.size;
+	exp_info.flags = O_RDWR;
+	exp_info.priv = buf;
+	buf->dmabuf = dma_buf_export(&exp_info);
+	if (IS_ERR(buf->dmabuf)) {
+		err = PTR_ERR(buf->dmabuf);
+		fastrpc_buf_free(buf);
+		return err;
+	}
+
+	bp.fd = dma_buf_fd(buf->dmabuf, O_ACCMODE);
+	if (bp.fd < 0) {
+		dma_buf_put(buf->dmabuf);
+		return -EINVAL;
+	}
+
+	if (copy_to_user(argp, &bp, sizeof(bp))) {
+		/*
+		 * The usercopy failed, but we can't do much about it, as
+		 * dma_buf_fd() already called fd_install() and made the
+		 * file descriptor accessible for the current process. It
+		 * might already be closed and dmabuf no longer valid when
+		 * we reach this point. Therefore "leak" the fd and rely on
+		 * the process exit path to do any required cleanup.
+		 */
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int fastrpc_init_attach(struct fastrpc_user *fl, int pd)
+{
+	struct fastrpc_invoke_args args[1];
+	int tgid = fl->tgid;
+	u32 sc;
+
+	args[0].ptr = (u64)(uintptr_t) &tgid;
+	args[0].length = sizeof(tgid);
+	args[0].fd = -1;
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_ATTACH, 1, 0);
+	fl->pd = pd;
+
+	return fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
+				       sc, &args[0]);
+}
+
+static int fastrpc_invoke(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_invoke_args *args = NULL;
+	struct fastrpc_invoke inv;
+	u32 nscalars;
+	int err;
+
+	if (copy_from_user(&inv, argp, sizeof(inv)))
+		return -EFAULT;
+
+	/* nscalars is truncated here to max supported value */
+	nscalars = REMOTE_SCALARS_LENGTH(inv.sc);
+	if (nscalars) {
+		args = kcalloc(nscalars, sizeof(*args), GFP_KERNEL);
+		if (!args)
+			return -ENOMEM;
+
+		if (copy_from_user(args, (void __user *)(uintptr_t)inv.args,
+				   nscalars * sizeof(*args))) {
+			kfree(args);
+			return -EFAULT;
+		}
+	}
+
+	err = fastrpc_internal_invoke(fl, false, inv.handle, inv.sc, args);
+	kfree(args);
+
+	return err;
+}
+
+static int fastrpc_get_info_from_dsp(struct fastrpc_user *fl, uint32_t *dsp_attr_buf,
+				     uint32_t dsp_attr_buf_len)
+{
+	struct fastrpc_invoke_args args[2] = { 0 };
+
+	/* Capability filled in userspace */
+	dsp_attr_buf[0] = 0;
+
+	args[0].ptr = (u64)(uintptr_t)&dsp_attr_buf_len;
+	args[0].length = sizeof(dsp_attr_buf_len);
+	args[0].fd = -1;
+	args[1].ptr = (u64)(uintptr_t)&dsp_attr_buf[1];
+	args[1].length = dsp_attr_buf_len;
+	args[1].fd = -1;
+	fl->pd = USER_PD;
+
+	return fastrpc_internal_invoke(fl, true, FASTRPC_DSP_UTILITIES_HANDLE,
+				       FASTRPC_SCALARS(0, 1, 1), args);
+}
+
+static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap,
+					struct fastrpc_user *fl)
+{
+	struct fastrpc_channel_ctx *cctx = fl->cctx;
+	uint32_t attribute_id = cap->attribute_id;
+	uint32_t *dsp_attributes;
+	unsigned long flags;
+	uint32_t domain = cap->domain;
+	int err;
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	/* check if we already have queried dsp for attributes */
+	if (cctx->valid_attributes) {
+		spin_unlock_irqrestore(&cctx->lock, flags);
+		goto done;
+	}
+	spin_unlock_irqrestore(&cctx->lock, flags);
+
+	dsp_attributes = kzalloc(FASTRPC_MAX_DSP_ATTRIBUTES_LEN, GFP_KERNEL);
+	if (!dsp_attributes)
+		return -ENOMEM;
+
+	err = fastrpc_get_info_from_dsp(fl, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN);
+	if (err == DSP_UNSUPPORTED_API) {
+		dev_info(&cctx->rpdev->dev,
+			 "Warning: DSP capabilities not supported on domain: %d\n", domain);
+		kfree(dsp_attributes);
+		return -EOPNOTSUPP;
+	} else if (err) {
+		dev_err(&cctx->rpdev->dev, "Error: dsp information is incorrect err: %d\n", err);
+		kfree(dsp_attributes);
+		return err;
+	}
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	memcpy(cctx->dsp_attributes, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN);
+	cctx->valid_attributes = true;
+	spin_unlock_irqrestore(&cctx->lock, flags);
+	kfree(dsp_attributes);
+done:
+	cap->capability = cctx->dsp_attributes[attribute_id];
+	return 0;
+}
+
+static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_ioctl_capability cap = {0};
+	int err = 0;
+
+	if (copy_from_user(&cap, argp, sizeof(cap)))
+		return  -EFAULT;
+
+	cap.capability = 0;
+	if (cap.domain >= FASTRPC_DEV_MAX) {
+		dev_err(&fl->cctx->rpdev->dev, "Error: Invalid domain id:%d, err:%d\n",
+			cap.domain, err);
+		return -ECHRNG;
+	}
+
+	/* Fastrpc Capablities does not support modem domain */
+	if (cap.domain == MDSP_DOMAIN_ID) {
+		dev_err(&fl->cctx->rpdev->dev, "Error: modem not supported %d\n", err);
+		return -ECHRNG;
+	}
+
+	if (cap.attribute_id >= FASTRPC_MAX_DSP_ATTRIBUTES) {
+		dev_err(&fl->cctx->rpdev->dev, "Error: invalid attribute: %d, err: %d\n",
+			cap.attribute_id, err);
+		return -EOVERFLOW;
+	}
+
+	err = fastrpc_get_info_from_kernel(&cap, fl);
+	if (err)
+		return err;
+
+	if (copy_to_user(argp, &cap.capability, sizeof(cap.capability)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, struct fastrpc_buf *buf)
+{
+	struct fastrpc_invoke_args args[1] = { [0] = { 0 } };
+	struct fastrpc_munmap_req_msg req_msg;
+	struct device *dev = fl->sctx->dev;
+	int err;
+	u32 sc;
+
+	req_msg.pgid = fl->tgid;
+	req_msg.size = buf->size;
+	req_msg.vaddr = buf->raddr;
+
+	args[0].ptr = (u64) (uintptr_t) &req_msg;
+	args[0].length = sizeof(req_msg);
+
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MUNMAP, 1, 0);
+	err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc,
+				      &args[0]);
+	if (!err) {
+		dev_dbg(dev, "unmmap\tpt 0x%09lx OK\n", buf->raddr);
+		spin_lock(&fl->lock);
+		list_del(&buf->node);
+		spin_unlock(&fl->lock);
+		fastrpc_buf_free(buf);
+	} else {
+		dev_err(dev, "unmmap\tpt 0x%09lx ERROR\n", buf->raddr);
+	}
+
+	return err;
+}
+
+static int fastrpc_req_munmap(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_buf *buf = NULL, *iter, *b;
+	struct fastrpc_req_munmap req;
+	struct device *dev = fl->sctx->dev;
+
+	if (copy_from_user(&req, argp, sizeof(req)))
+		return -EFAULT;
+
+	spin_lock(&fl->lock);
+	list_for_each_entry_safe(iter, b, &fl->mmaps, node) {
+		if ((iter->raddr == req.vaddrout) && (iter->size == req.size)) {
+			buf = iter;
+			break;
+		}
+	}
+	spin_unlock(&fl->lock);
+
+	if (!buf) {
+		dev_err(dev, "mmap\t\tpt 0x%09llx [len 0x%08llx] not in list\n",
+			req.vaddrout, req.size);
+		return -EINVAL;
+	}
+
+	return fastrpc_req_munmap_impl(fl, buf);
+}
+
+static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_invoke_args args[3] = { [0 ... 2] = { 0 } };
+	struct fastrpc_buf *buf = NULL;
+	struct fastrpc_mmap_req_msg req_msg;
+	struct fastrpc_mmap_rsp_msg rsp_msg;
+	struct fastrpc_phy_page pages;
+	struct fastrpc_req_mmap req;
+	struct device *dev = fl->sctx->dev;
+	int err;
+	u32 sc;
+
+	if (copy_from_user(&req, argp, sizeof(req)))
+		return -EFAULT;
+
+	if (req.flags != ADSP_MMAP_ADD_PAGES && req.flags != ADSP_MMAP_REMOTE_HEAP_ADDR) {
+		dev_err(dev, "flag not supported 0x%x\n", req.flags);
+
+		return -EINVAL;
+	}
+
+	if (req.vaddrin) {
+		dev_err(dev, "adding user allocated pages is not supported\n");
+		return -EINVAL;
+	}
+
+	if (req.flags == ADSP_MMAP_REMOTE_HEAP_ADDR)
+		err = fastrpc_remote_heap_alloc(fl, dev, req.size, &buf);
+	else
+		err = fastrpc_buf_alloc(fl, dev, req.size, &buf);
+
+	if (err) {
+		dev_err(dev, "failed to allocate buffer\n");
+		return err;
+	}
+
+	req_msg.pgid = fl->tgid;
+	req_msg.flags = req.flags;
+	req_msg.vaddr = req.vaddrin;
+	req_msg.num = sizeof(pages);
+
+	args[0].ptr = (u64) (uintptr_t) &req_msg;
+	args[0].length = sizeof(req_msg);
+
+	pages.addr = buf->phys;
+	pages.size = buf->size;
+
+	args[1].ptr = (u64) (uintptr_t) &pages;
+	args[1].length = sizeof(pages);
+
+	args[2].ptr = (u64) (uintptr_t) &rsp_msg;
+	args[2].length = sizeof(rsp_msg);
+
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MMAP, 2, 1);
+	err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc,
+				      &args[0]);
+	if (err) {
+		dev_err(dev, "mmap error (len 0x%08llx)\n", buf->size);
+		goto err_invoke;
+	}
+
+	/* update the buffer to be able to deallocate the memory on the DSP */
+	buf->raddr = (uintptr_t) rsp_msg.vaddr;
+
+	/* let the client know the address to use */
+	req.vaddrout = rsp_msg.vaddr;
+
+	/* Add memory to static PD pool, protection thru hypervisor */
+	if (req.flags == ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) {
+		err = qcom_scm_assign_mem(buf->phys, (u64)buf->size,
+			&fl->cctx->perms, fl->cctx->vmperms, fl->cctx->vmcount);
+		if (err) {
+			dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
+					buf->phys, buf->size, err);
+			goto err_assign;
+		}
+	}
+
+	spin_lock(&fl->lock);
+	list_add_tail(&buf->node, &fl->mmaps);
+	spin_unlock(&fl->lock);
+
+	if (copy_to_user((void __user *)argp, &req, sizeof(req))) {
+		err = -EFAULT;
+		goto err_assign;
+	}
+
+	dev_dbg(dev, "mmap\t\tpt 0x%09lx OK [len 0x%08llx]\n",
+		buf->raddr, buf->size);
+
+	return 0;
+
+err_assign:
+	fastrpc_req_munmap_impl(fl, buf);
+err_invoke:
+	fastrpc_buf_free(buf);
+
+	return err;
+}
+
+static int fastrpc_req_mem_unmap_impl(struct fastrpc_user *fl, struct fastrpc_mem_unmap *req)
+{
+	struct fastrpc_invoke_args args[1] = { [0] = { 0 } };
+	struct fastrpc_map *map = NULL, *iter, *m;
+	struct fastrpc_mem_unmap_req_msg req_msg = { 0 };
+	int err = 0;
+	u32 sc;
+	struct device *dev = fl->sctx->dev;
+
+	spin_lock(&fl->lock);
+	list_for_each_entry_safe(iter, m, &fl->maps, node) {
+		if ((req->fd < 0 || iter->fd == req->fd) && (iter->raddr == req->vaddr)) {
+			map = iter;
+			break;
+		}
+	}
+
+	spin_unlock(&fl->lock);
+
+	if (!map) {
+		dev_err(dev, "map not in list\n");
+		return -EINVAL;
+	}
+
+	req_msg.pgid = fl->tgid;
+	req_msg.len = map->len;
+	req_msg.vaddrin = map->raddr;
+	req_msg.fd = map->fd;
+
+	args[0].ptr = (u64) (uintptr_t) &req_msg;
+	args[0].length = sizeof(req_msg);
+
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MEM_UNMAP, 1, 0);
+	err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc,
+				      &args[0]);
+	if (err) {
+		dev_err(dev, "unmmap\tpt fd = %d, 0x%09llx error\n",  map->fd, map->raddr);
+		return err;
+	}
+	fastrpc_map_put(map);
+
+	return 0;
+}
+
+static int fastrpc_req_mem_unmap(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_mem_unmap req;
+
+	if (copy_from_user(&req, argp, sizeof(req)))
+		return -EFAULT;
+
+	return fastrpc_req_mem_unmap_impl(fl, &req);
+}
+
+static int fastrpc_req_mem_map(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_invoke_args args[4] = { [0 ... 3] = { 0 } };
+	struct fastrpc_mem_map_req_msg req_msg = { 0 };
+	struct fastrpc_mmap_rsp_msg rsp_msg = { 0 };
+	struct fastrpc_mem_unmap req_unmap = { 0 };
+	struct fastrpc_phy_page pages = { 0 };
+	struct fastrpc_mem_map req;
+	struct device *dev = fl->sctx->dev;
+	struct fastrpc_map *map = NULL;
+	int err;
+	u32 sc;
+
+	if (copy_from_user(&req, argp, sizeof(req)))
+		return -EFAULT;
+
+	/* create SMMU mapping */
+	err = fastrpc_map_create(fl, req.fd, req.length, 0, &map);
+	if (err) {
+		dev_err(dev, "failed to map buffer, fd = %d\n", req.fd);
+		return err;
+	}
+
+	req_msg.pgid = fl->tgid;
+	req_msg.fd = req.fd;
+	req_msg.offset = req.offset;
+	req_msg.vaddrin = req.vaddrin;
+	map->va = (void *) (uintptr_t) req.vaddrin;
+	req_msg.flags = req.flags;
+	req_msg.num = sizeof(pages);
+	req_msg.data_len = 0;
+
+	args[0].ptr = (u64) (uintptr_t) &req_msg;
+	args[0].length = sizeof(req_msg);
+
+	pages.addr = map->phys;
+	pages.size = map->size;
+
+	args[1].ptr = (u64) (uintptr_t) &pages;
+	args[1].length = sizeof(pages);
+
+	args[2].ptr = (u64) (uintptr_t) &pages;
+	args[2].length = 0;
+
+	args[3].ptr = (u64) (uintptr_t) &rsp_msg;
+	args[3].length = sizeof(rsp_msg);
+
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MEM_MAP, 3, 1);
+	err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, &args[0]);
+	if (err) {
+		dev_err(dev, "mem mmap error, fd %d, vaddr %llx, size %lld\n",
+			req.fd, req.vaddrin, map->size);
+		goto err_invoke;
+	}
+
+	/* update the buffer to be able to deallocate the memory on the DSP */
+	map->raddr = rsp_msg.vaddr;
+
+	/* let the client know the address to use */
+	req.vaddrout = rsp_msg.vaddr;
+
+	if (copy_to_user((void __user *)argp, &req, sizeof(req))) {
+		/* unmap the memory and release the buffer */
+		req_unmap.vaddr = (uintptr_t) rsp_msg.vaddr;
+		req_unmap.length = map->size;
+		fastrpc_req_mem_unmap_impl(fl, &req_unmap);
+		return -EFAULT;
+	}
+
+	return 0;
+
+err_invoke:
+	fastrpc_map_put(map);
+
+	return err;
+}
+
+static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
+				 unsigned long arg)
+{
+	struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
+	char __user *argp = (char __user *)arg;
+	int err;
+
+	switch (cmd) {
+	case FASTRPC_IOCTL_INVOKE:
+		err = fastrpc_invoke(fl, argp);
+		break;
+	case FASTRPC_IOCTL_INIT_ATTACH:
+		err = fastrpc_init_attach(fl, ROOT_PD);
+		break;
+	case FASTRPC_IOCTL_INIT_ATTACH_SNS:
+		err = fastrpc_init_attach(fl, SENSORS_PD);
+		break;
+	case FASTRPC_IOCTL_INIT_CREATE_STATIC:
+		err = fastrpc_init_create_static_process(fl, argp);
+		break;
+	case FASTRPC_IOCTL_INIT_CREATE:
+		err = fastrpc_init_create_process(fl, argp);
+		break;
+	case FASTRPC_IOCTL_ALLOC_DMA_BUFF:
+		err = fastrpc_dmabuf_alloc(fl, argp);
+		break;
+	case FASTRPC_IOCTL_MMAP:
+		err = fastrpc_req_mmap(fl, argp);
+		break;
+	case FASTRPC_IOCTL_MUNMAP:
+		err = fastrpc_req_munmap(fl, argp);
+		break;
+	case FASTRPC_IOCTL_MEM_MAP:
+		err = fastrpc_req_mem_map(fl, argp);
+		break;
+	case FASTRPC_IOCTL_MEM_UNMAP:
+		err = fastrpc_req_mem_unmap(fl, argp);
+		break;
+	case FASTRPC_IOCTL_GET_DSP_INFO:
+		err = fastrpc_get_dsp_info(fl, argp);
+		break;
+	default:
+		err = -ENOTTY;
+		break;
+	}
+
+	return err;
+}
+
+static const struct file_operations fastrpc_fops = {
+	.open = fastrpc_device_open,
+	.release = fastrpc_device_release,
+	.unlocked_ioctl = fastrpc_device_ioctl,
+	.compat_ioctl = fastrpc_device_ioctl,
+};
+
+static int fastrpc_cb_probe(struct platform_device *pdev)
+{
+	struct fastrpc_channel_ctx *cctx;
+	struct fastrpc_session_ctx *sess;
+	struct device *dev = &pdev->dev;
+	int i, sessions = 0;
+	unsigned long flags;
+	int rc;
+
+	cctx = dev_get_drvdata(dev->parent);
+	if (!cctx)
+		return -EINVAL;
+
+	of_property_read_u32(dev->of_node, "qcom,nsessions", &sessions);
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	if (cctx->sesscount >= FASTRPC_MAX_SESSIONS) {
+		dev_err(&pdev->dev, "too many sessions\n");
+		spin_unlock_irqrestore(&cctx->lock, flags);
+		return -ENOSPC;
+	}
+	sess = &cctx->session[cctx->sesscount++];
+	sess->used = false;
+	sess->valid = true;
+	sess->dev = dev;
+	dev_set_drvdata(dev, sess);
+
+	if (of_property_read_u32(dev->of_node, "reg", &sess->sid))
+		dev_info(dev, "FastRPC Session ID not specified in DT\n");
+
+	if (sessions > 0) {
+		struct fastrpc_session_ctx *dup_sess;
+
+		for (i = 1; i < sessions; i++) {
+			if (cctx->sesscount >= FASTRPC_MAX_SESSIONS)
+				break;
+			dup_sess = &cctx->session[cctx->sesscount++];
+			memcpy(dup_sess, sess, sizeof(*dup_sess));
+		}
+	}
+	spin_unlock_irqrestore(&cctx->lock, flags);
+	rc = dma_set_mask(dev, DMA_BIT_MASK(32));
+	if (rc) {
+		dev_err(dev, "32-bit DMA enable failed\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static int fastrpc_cb_remove(struct platform_device *pdev)
+{
+	struct fastrpc_channel_ctx *cctx = dev_get_drvdata(pdev->dev.parent);
+	struct fastrpc_session_ctx *sess = dev_get_drvdata(&pdev->dev);
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) {
+		if (cctx->session[i].sid == sess->sid) {
+			cctx->session[i].valid = false;
+			cctx->sesscount--;
+		}
+	}
+	spin_unlock_irqrestore(&cctx->lock, flags);
+
+	return 0;
+}
+
+static const struct of_device_id fastrpc_match_table[] = {
+	{ .compatible = "qcom,fastrpc-compute-cb", },
+	{}
+};
+
+static struct platform_driver fastrpc_cb_driver = {
+	.probe = fastrpc_cb_probe,
+	.remove = fastrpc_cb_remove,
+	.driver = {
+		.name = "qcom,fastrpc-cb",
+		.of_match_table = fastrpc_match_table,
+		.suppress_bind_attrs = true,
+	},
+};
+
+static int fastrpc_device_register(struct device *dev, struct fastrpc_channel_ctx *cctx,
+				   bool is_secured, const char *domain)
+{
+	struct fastrpc_device *fdev;
+	int err;
+
+	fdev = devm_kzalloc(dev, sizeof(*fdev), GFP_KERNEL);
+	if (!fdev)
+		return -ENOMEM;
+
+	fdev->secure = is_secured;
+	fdev->cctx = cctx;
+	fdev->miscdev.minor = MISC_DYNAMIC_MINOR;
+	fdev->miscdev.fops = &fastrpc_fops;
+	fdev->miscdev.name = devm_kasprintf(dev, GFP_KERNEL, "fastrpc-%s%s",
+					    domain, is_secured ? "-secure" : "");
+	if (!fdev->miscdev.name)
+		return -ENOMEM;
+
+	err = misc_register(&fdev->miscdev);
+	if (!err) {
+		if (is_secured)
+			cctx->secure_fdevice = fdev;
+		else
+			cctx->fdevice = fdev;
+	}
+
+	return err;
+}
+
+static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
+{
+	struct device *rdev = &rpdev->dev;
+	struct fastrpc_channel_ctx *data;
+	int i, err, domain_id = -1, vmcount;
+	const char *domain;
+	bool secure_dsp;
+	unsigned int vmids[FASTRPC_MAX_VMIDS];
+
+	err = of_property_read_string(rdev->of_node, "label", &domain);
+	if (err) {
+		dev_info(rdev, "FastRPC Domain not specified in DT\n");
+		return err;
+	}
+
+	for (i = 0; i <= CDSP_DOMAIN_ID; i++) {
+		if (!strcmp(domains[i], domain)) {
+			domain_id = i;
+			break;
+		}
+	}
+
+	if (domain_id < 0) {
+		dev_info(rdev, "FastRPC Invalid Domain ID %d\n", domain_id);
+		return -EINVAL;
+	}
+
+	if (of_reserved_mem_device_init_by_idx(rdev, rdev->of_node, 0))
+		dev_info(rdev, "no reserved DMA memory for FASTRPC\n");
+
+	vmcount = of_property_read_variable_u32_array(rdev->of_node,
+				"qcom,vmids", &vmids[0], 0, FASTRPC_MAX_VMIDS);
+	if (vmcount < 0)
+		vmcount = 0;
+	else if (!qcom_scm_is_available())
+		return -EPROBE_DEFER;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	if (vmcount) {
+		data->vmcount = vmcount;
+		data->perms = BIT(QCOM_SCM_VMID_HLOS);
+		for (i = 0; i < data->vmcount; i++) {
+			data->vmperms[i].vmid = vmids[i];
+			data->vmperms[i].perm = QCOM_SCM_PERM_RWX;
+		}
+	}
+
+	secure_dsp = !(of_property_read_bool(rdev->of_node, "qcom,non-secure-domain"));
+	data->secure = secure_dsp;
+
+	switch (domain_id) {
+	case ADSP_DOMAIN_ID:
+	case MDSP_DOMAIN_ID:
+	case SDSP_DOMAIN_ID:
+		/* Unsigned PD offloading is only supported on CDSP*/
+		data->unsigned_support = false;
+		err = fastrpc_device_register(rdev, data, secure_dsp, domains[domain_id]);
+		if (err)
+			goto fdev_error;
+		break;
+	case CDSP_DOMAIN_ID:
+		data->unsigned_support = true;
+		/* Create both device nodes so that we can allow both Signed and Unsigned PD */
+		err = fastrpc_device_register(rdev, data, true, domains[domain_id]);
+		if (err)
+			goto fdev_error;
+
+		err = fastrpc_device_register(rdev, data, false, domains[domain_id]);
+		if (err)
+			goto fdev_error;
+		break;
+	default:
+		err = -EINVAL;
+		goto fdev_error;
+	}
+
+	kref_init(&data->refcount);
+
+	dev_set_drvdata(&rpdev->dev, data);
+	rdev->dma_mask = &data->dma_mask;
+	dma_set_mask_and_coherent(rdev, DMA_BIT_MASK(32));
+	INIT_LIST_HEAD(&data->users);
+	INIT_LIST_HEAD(&data->invoke_interrupted_mmaps);
+	spin_lock_init(&data->lock);
+	idr_init(&data->ctx_idr);
+	data->domain_id = domain_id;
+	data->rpdev = rpdev;
+
+	err = of_platform_populate(rdev->of_node, NULL, NULL, rdev);
+	if (err)
+		goto populate_error;
+
+	return 0;
+
+populate_error:
+	if (data->fdevice)
+		misc_deregister(&data->fdevice->miscdev);
+	if (data->secure_fdevice)
+		misc_deregister(&data->secure_fdevice->miscdev);
+
+fdev_error:
+	kfree(data);
+	return err;
+}
+
+static void fastrpc_notify_users(struct fastrpc_user *user)
+{
+	struct fastrpc_invoke_ctx *ctx;
+
+	spin_lock(&user->lock);
+	list_for_each_entry(ctx, &user->pending, node) {
+		ctx->retval = -EPIPE;
+		complete(&ctx->work);
+	}
+	spin_unlock(&user->lock);
+}
+
+static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
+{
+	struct fastrpc_channel_ctx *cctx = dev_get_drvdata(&rpdev->dev);
+	struct fastrpc_buf *buf, *b;
+	struct fastrpc_user *user;
+	unsigned long flags;
+
+	/* No invocations past this point */
+	spin_lock_irqsave(&cctx->lock, flags);
+	cctx->rpdev = NULL;
+	list_for_each_entry(user, &cctx->users, user)
+		fastrpc_notify_users(user);
+	spin_unlock_irqrestore(&cctx->lock, flags);
+
+	if (cctx->fdevice)
+		misc_deregister(&cctx->fdevice->miscdev);
+
+	if (cctx->secure_fdevice)
+		misc_deregister(&cctx->secure_fdevice->miscdev);
+
+	list_for_each_entry_safe(buf, b, &cctx->invoke_interrupted_mmaps, node)
+		list_del(&buf->node);
+
+	if (cctx->remote_heap)
+		fastrpc_buf_free(cctx->remote_heap);
+
+	of_platform_depopulate(&rpdev->dev);
+
+	fastrpc_channel_ctx_put(cctx);
+}
+
+static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
+				  int len, void *priv, u32 addr)
+{
+	struct fastrpc_channel_ctx *cctx = dev_get_drvdata(&rpdev->dev);
+	struct fastrpc_invoke_rsp *rsp = data;
+	struct fastrpc_invoke_ctx *ctx;
+	unsigned long flags;
+	unsigned long ctxid;
+
+	if (len < sizeof(*rsp))
+		return -EINVAL;
+
+	ctxid = ((rsp->ctx & FASTRPC_CTXID_MASK) >> 4);
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	ctx = idr_find(&cctx->ctx_idr, ctxid);
+	spin_unlock_irqrestore(&cctx->lock, flags);
+
+	if (!ctx) {
+		dev_err(&rpdev->dev, "No context ID matches response\n");
+		return -ENOENT;
+	}
+
+	ctx->retval = rsp->retval;
+	complete(&ctx->work);
+
+	/*
+	 * The DMA buffer associated with the context cannot be freed in
+	 * interrupt context so schedule it through a worker thread to
+	 * avoid a kernel BUG.
+	 */
+	schedule_work(&ctx->put_work);
+
+	return 0;
+}
+
+static const struct of_device_id fastrpc_rpmsg_of_match[] = {
+	{ .compatible = "qcom,fastrpc" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, fastrpc_rpmsg_of_match);
+
+static struct rpmsg_driver fastrpc_driver = {
+	.probe = fastrpc_rpmsg_probe,
+	.remove = fastrpc_rpmsg_remove,
+	.callback = fastrpc_rpmsg_callback,
+	.drv = {
+		.name = "qcom,fastrpc",
+		.of_match_table = fastrpc_rpmsg_of_match,
+	},
+};
+
+static int fastrpc_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&fastrpc_cb_driver);
+	if (ret < 0) {
+		pr_err("fastrpc: failed to register cb driver\n");
+		return ret;
+	}
+
+	ret = register_rpmsg_driver(&fastrpc_driver);
+	if (ret < 0) {
+		pr_err("fastrpc: failed to register rpmsg driver\n");
+		platform_driver_unregister(&fastrpc_cb_driver);
+		return ret;
+	}
+
+	return 0;
+}
+module_init(fastrpc_init);
+
+static void fastrpc_exit(void)
+{
+	platform_driver_unregister(&fastrpc_cb_driver);
+	unregister_rpmsg_driver(&fastrpc_driver);
+}
+module_exit(fastrpc_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(DMA_BUF);
diff --git a/drivers/misc/gehc-achc.c b/drivers/misc/gehc-achc.c
new file mode 100644
index 0000000000..4c9c5394da
--- /dev/null
+++ b/drivers/misc/gehc-achc.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * datasheet: https://www.nxp.com/docs/en/data-sheet/K20P144M120SF3.pdf
+ *
+ * Copyright (C) 2018-2021 Collabora
+ * Copyright (C) 2018-2021 GE Healthcare
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/spi/spi.h>
+
+#define ACHC_MAX_FREQ_HZ 300000
+#define ACHC_FAST_READ_FREQ_HZ 1000000
+
+struct achc_data {
+	struct spi_device *main;
+	struct spi_device *ezport;
+	struct gpio_desc *reset;
+
+	struct mutex device_lock; /* avoid concurrent device access */
+};
+
+#define EZPORT_RESET_DELAY_MS	100
+#define EZPORT_STARTUP_DELAY_MS	200
+#define EZPORT_WRITE_WAIT_MS	10
+#define EZPORT_TRANSFER_SIZE	2048
+
+#define EZPORT_CMD_SP		0x02 /* flash section program */
+#define EZPORT_CMD_RDSR		0x05 /* read status register */
+#define EZPORT_CMD_WREN		0x06 /* write enable */
+#define EZPORT_CMD_FAST_READ	0x0b /* flash read data at high speed */
+#define EZPORT_CMD_RESET	0xb9 /* reset chip */
+#define EZPORT_CMD_BE		0xc7 /* bulk erase */
+#define EZPORT_CMD_SE		0xd8 /* sector erase */
+
+#define EZPORT_SECTOR_SIZE	4096
+#define EZPORT_SECTOR_MASK	(EZPORT_SECTOR_SIZE - 1)
+
+#define EZPORT_STATUS_WIP	BIT(0) /* write in progress */
+#define EZPORT_STATUS_WEN	BIT(1) /* write enable */
+#define EZPORT_STATUS_BEDIS	BIT(2) /* bulk erase disable */
+#define EZPORT_STATUS_FLEXRAM	BIT(3) /* FlexRAM mode */
+#define EZPORT_STATUS_WEF	BIT(6) /* write error flag */
+#define EZPORT_STATUS_FS	BIT(7) /* flash security */
+
+static void ezport_reset(struct gpio_desc *reset)
+{
+	gpiod_set_value(reset, 1);
+	msleep(EZPORT_RESET_DELAY_MS);
+	gpiod_set_value(reset, 0);
+	msleep(EZPORT_STARTUP_DELAY_MS);
+}
+
+static int ezport_start_programming(struct spi_device *spi, struct gpio_desc *reset)
+{
+	struct spi_message msg;
+	struct spi_transfer assert_cs = {
+		.cs_change   = 1,
+	};
+	struct spi_transfer release_cs = { };
+	int ret;
+
+	spi_bus_lock(spi->master);
+
+	/* assert chip select */
+	spi_message_init(&msg);
+	spi_message_add_tail(&assert_cs, &msg);
+	ret = spi_sync_locked(spi, &msg);
+	if (ret)
+		goto fail;
+
+	msleep(EZPORT_STARTUP_DELAY_MS);
+
+	/* reset with asserted chip select to switch into programming mode */
+	ezport_reset(reset);
+
+	/* release chip select */
+	spi_message_init(&msg);
+	spi_message_add_tail(&release_cs, &msg);
+	ret = spi_sync_locked(spi, &msg);
+
+fail:
+	spi_bus_unlock(spi->master);
+	return ret;
+}
+
+static void ezport_stop_programming(struct spi_device *spi, struct gpio_desc *reset)
+{
+	/* reset without asserted chip select to return into normal mode */
+	spi_bus_lock(spi->master);
+	ezport_reset(reset);
+	spi_bus_unlock(spi->master);
+}
+
+static int ezport_get_status_register(struct spi_device *spi)
+{
+	int ret;
+
+	ret = spi_w8r8(spi, EZPORT_CMD_RDSR);
+	if (ret < 0)
+		return ret;
+	if (ret == 0xff) {
+		dev_err(&spi->dev, "Invalid EzPort status, EzPort is not functional!\n");
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int ezport_soft_reset(struct spi_device *spi)
+{
+	u8 cmd = EZPORT_CMD_RESET;
+	int ret;
+
+	ret = spi_write(spi, &cmd, 1);
+	if (ret < 0)
+		return ret;
+
+	msleep(EZPORT_STARTUP_DELAY_MS);
+
+	return 0;
+}
+
+static int ezport_send_simple(struct spi_device *spi, u8 cmd)
+{
+	int ret;
+
+	ret = spi_write(spi, &cmd, 1);
+	if (ret < 0)
+		return ret;
+
+	return ezport_get_status_register(spi);
+}
+
+static int ezport_wait_write(struct spi_device *spi, u32 retries)
+{
+	int ret;
+	u32 i;
+
+	for (i = 0; i < retries; i++) {
+		ret = ezport_get_status_register(spi);
+		if (ret >= 0 && !(ret & EZPORT_STATUS_WIP))
+			break;
+		msleep(EZPORT_WRITE_WAIT_MS);
+	}
+
+	return ret;
+}
+
+static int ezport_write_enable(struct spi_device *spi)
+{
+	int ret = 0, retries = 3;
+
+	for (retries = 0; retries < 3; retries++) {
+		ret = ezport_send_simple(spi, EZPORT_CMD_WREN);
+		if (ret > 0 && ret & EZPORT_STATUS_WEN)
+			break;
+	}
+
+	if (!(ret & EZPORT_STATUS_WEN)) {
+		dev_err(&spi->dev, "EzPort write enable timed out\n");
+		return -ETIMEDOUT;
+	}
+	return 0;
+}
+
+static int ezport_bulk_erase(struct spi_device *spi)
+{
+	int ret;
+	static const u8 cmd = EZPORT_CMD_BE;
+
+	dev_dbg(&spi->dev, "EzPort bulk erase...\n");
+
+	ret = ezport_write_enable(spi);
+	if (ret < 0)
+		return ret;
+
+	ret = spi_write(spi, &cmd, 1);
+	if (ret < 0)
+		return ret;
+
+	ret = ezport_wait_write(spi, 1000);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int ezport_section_erase(struct spi_device *spi, u32 address)
+{
+	u8 query[] = {EZPORT_CMD_SE, (address >> 16) & 0xff, (address >> 8) & 0xff, address & 0xff};
+	int ret;
+
+	dev_dbg(&spi->dev, "Ezport section erase @ 0x%06x...\n", address);
+
+	if (address & EZPORT_SECTOR_MASK)
+		return -EINVAL;
+
+	ret = ezport_write_enable(spi);
+	if (ret < 0)
+		return ret;
+
+	ret = spi_write(spi, query, sizeof(query));
+	if (ret < 0)
+		return ret;
+
+	return ezport_wait_write(spi, 200);
+}
+
+static int ezport_flash_transfer(struct spi_device *spi, u32 address,
+				 const u8 *payload, size_t payload_size)
+{
+	struct spi_transfer xfers[2] = {};
+	u8 *command;
+	int ret;
+
+	dev_dbg(&spi->dev, "EzPort write %zu bytes @ 0x%06x...\n", payload_size, address);
+
+	ret = ezport_write_enable(spi);
+	if (ret < 0)
+		return ret;
+
+	command = kmalloc(4, GFP_KERNEL | GFP_DMA);
+	if (!command)
+		return -ENOMEM;
+
+	command[0] = EZPORT_CMD_SP;
+	command[1] = address >> 16;
+	command[2] = address >> 8;
+	command[3] = address >> 0;
+
+	xfers[0].tx_buf = command;
+	xfers[0].len = 4;
+
+	xfers[1].tx_buf = payload;
+	xfers[1].len = payload_size;
+
+	ret = spi_sync_transfer(spi, xfers, 2);
+	kfree(command);
+	if (ret < 0)
+		return ret;
+
+	return ezport_wait_write(spi, 40);
+}
+
+static int ezport_flash_compare(struct spi_device *spi, u32 address,
+				const u8 *payload, size_t payload_size)
+{
+	struct spi_transfer xfers[2] = {};
+	u8 *buffer;
+	int ret;
+
+	buffer = kmalloc(payload_size + 5, GFP_KERNEL | GFP_DMA);
+	if (!buffer)
+		return -ENOMEM;
+
+	buffer[0] = EZPORT_CMD_FAST_READ;
+	buffer[1] = address >> 16;
+	buffer[2] = address >> 8;
+	buffer[3] = address >> 0;
+
+	xfers[0].tx_buf = buffer;
+	xfers[0].len = 4;
+	xfers[0].speed_hz = ACHC_FAST_READ_FREQ_HZ;
+
+	xfers[1].rx_buf = buffer + 4;
+	xfers[1].len = payload_size + 1;
+	xfers[1].speed_hz = ACHC_FAST_READ_FREQ_HZ;
+
+	ret = spi_sync_transfer(spi, xfers, 2);
+	if (ret)
+		goto err;
+
+	/* FAST_READ receives one dummy byte before the real data */
+	ret = memcmp(payload, buffer + 4 + 1, payload_size);
+	if (ret) {
+		ret = -EBADMSG;
+		dev_dbg(&spi->dev, "Verification failure @ %06x", address);
+		print_hex_dump_bytes("fw:  ", DUMP_PREFIX_OFFSET, payload, payload_size);
+		print_hex_dump_bytes("dev: ", DUMP_PREFIX_OFFSET, buffer + 4, payload_size);
+	}
+
+err:
+	kfree(buffer);
+	return ret;
+}
+
+static int ezport_firmware_compare_data(struct spi_device *spi,
+					const u8 *data, size_t size)
+{
+	int ret;
+	size_t address = 0;
+	size_t transfer_size;
+
+	dev_dbg(&spi->dev, "EzPort compare data with %zu bytes...\n", size);
+
+	ret = ezport_get_status_register(spi);
+	if (ret < 0)
+		return ret;
+
+	if (ret & EZPORT_STATUS_FS) {
+		dev_info(&spi->dev, "Device is in secure mode (status=0x%02x)!\n", ret);
+		dev_info(&spi->dev, "FW verification is not possible\n");
+		return -EACCES;
+	}
+
+	while (size - address > 0) {
+		transfer_size = min((size_t) EZPORT_TRANSFER_SIZE, size - address);
+
+		ret = ezport_flash_compare(spi, address, data+address, transfer_size);
+		if (ret)
+			return ret;
+
+		address += transfer_size;
+	}
+
+	return 0;
+}
+
+static int ezport_firmware_flash_data(struct spi_device *spi,
+				      const u8 *data, size_t size)
+{
+	int ret;
+	size_t address = 0;
+	size_t transfer_size;
+
+	dev_dbg(&spi->dev, "EzPort flash data with %zu bytes...\n", size);
+
+	ret = ezport_get_status_register(spi);
+	if (ret < 0)
+		return ret;
+
+	if (ret & EZPORT_STATUS_FS) {
+		ret = ezport_bulk_erase(spi);
+		if (ret < 0)
+			return ret;
+		if (ret & EZPORT_STATUS_FS)
+			return -EINVAL;
+	}
+
+	while (size - address > 0) {
+		if (!(address & EZPORT_SECTOR_MASK)) {
+			ret = ezport_section_erase(spi, address);
+			if (ret < 0)
+				return ret;
+			if (ret & EZPORT_STATUS_WIP || ret & EZPORT_STATUS_WEF)
+				return -EIO;
+		}
+
+		transfer_size = min((size_t) EZPORT_TRANSFER_SIZE, size - address);
+
+		ret = ezport_flash_transfer(spi, address,
+					    data+address, transfer_size);
+		if (ret < 0)
+			return ret;
+		else if (ret & EZPORT_STATUS_WIP)
+			return -ETIMEDOUT;
+		else if (ret & EZPORT_STATUS_WEF)
+			return -EIO;
+
+		address += transfer_size;
+	}
+
+	dev_dbg(&spi->dev, "EzPort verify flashed data...\n");
+	ret = ezport_firmware_compare_data(spi, data, size);
+
+	/* allow missing FW verfication in secure mode */
+	if (ret == -EACCES)
+		ret = 0;
+
+	if (ret < 0)
+		dev_err(&spi->dev, "Failed to verify flashed data: %d\n", ret);
+
+	ret = ezport_soft_reset(spi);
+	if (ret < 0)
+		dev_warn(&spi->dev, "EzPort reset failed!\n");
+
+	return ret;
+}
+
+static int ezport_firmware_load(struct spi_device *spi, const char *fwname)
+{
+	const struct firmware *fw;
+	int ret;
+
+	ret = request_firmware(&fw, fwname, &spi->dev);
+	if (ret) {
+		dev_err(&spi->dev, "Could not get firmware: %d\n", ret);
+		return ret;
+	}
+
+	ret = ezport_firmware_flash_data(spi, fw->data, fw->size);
+
+	release_firmware(fw);
+
+	return ret;
+}
+
+/**
+ * ezport_flash - flash device firmware
+ * @spi: SPI device for NXP EzPort interface
+ * @reset: the gpio connected to the device reset pin
+ * @fwname: filename of the firmware that should be flashed
+ *
+ * Context: can sleep
+ *
+ * Return: 0 on success; negative errno on failure
+ */
+static int ezport_flash(struct spi_device *spi, struct gpio_desc *reset, const char *fwname)
+{
+	int ret;
+
+	ret = ezport_start_programming(spi, reset);
+	if (ret)
+		return ret;
+
+	ret = ezport_firmware_load(spi, fwname);
+
+	ezport_stop_programming(spi, reset);
+
+	if (ret)
+		dev_err(&spi->dev, "Failed to flash firmware: %d\n", ret);
+	else
+		dev_dbg(&spi->dev, "Finished FW flashing!\n");
+
+	return ret;
+}
+
+static ssize_t update_firmware_store(struct device *dev, struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct achc_data *achc = dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret < 0 || value != 1)
+		return -EINVAL;
+
+	mutex_lock(&achc->device_lock);
+	ret = ezport_flash(achc->ezport, achc->reset, "achc.bin");
+	mutex_unlock(&achc->device_lock);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_WO(update_firmware);
+
+static ssize_t reset_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct achc_data *achc = dev_get_drvdata(dev);
+	int ret;
+
+	mutex_lock(&achc->device_lock);
+	ret = gpiod_get_value(achc->reset);
+	mutex_unlock(&achc->device_lock);
+
+	if (ret < 0)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", ret);
+}
+
+static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct achc_data *achc = dev_get_drvdata(dev);
+	unsigned long value;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &value);
+	if (ret < 0 || value > 1)
+		return -EINVAL;
+
+	mutex_lock(&achc->device_lock);
+	gpiod_set_value(achc->reset, value);
+	mutex_unlock(&achc->device_lock);
+
+	return count;
+}
+static DEVICE_ATTR_RW(reset);
+
+static struct attribute *gehc_achc_attrs[] = {
+	&dev_attr_update_firmware.attr,
+	&dev_attr_reset.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(gehc_achc);
+
+static void unregister_ezport(void *data)
+{
+	struct spi_device *ezport = data;
+
+	spi_unregister_device(ezport);
+}
+
+static int gehc_achc_probe(struct spi_device *spi)
+{
+	struct achc_data *achc;
+	int ezport_reg, ret;
+
+	spi->max_speed_hz = ACHC_MAX_FREQ_HZ;
+	spi->bits_per_word = 8;
+	spi->mode = SPI_MODE_0;
+
+	achc = devm_kzalloc(&spi->dev, sizeof(*achc), GFP_KERNEL);
+	if (!achc)
+		return -ENOMEM;
+	spi_set_drvdata(spi, achc);
+	achc->main = spi;
+
+	mutex_init(&achc->device_lock);
+
+	ret = of_property_read_u32_index(spi->dev.of_node, "reg", 1, &ezport_reg);
+	if (ret)
+		return dev_err_probe(&spi->dev, ret, "missing second reg entry!\n");
+
+	achc->ezport = spi_new_ancillary_device(spi, ezport_reg);
+	if (IS_ERR(achc->ezport))
+		return PTR_ERR(achc->ezport);
+
+	ret = devm_add_action_or_reset(&spi->dev, unregister_ezport, achc->ezport);
+	if (ret)
+		return ret;
+
+	achc->reset = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(achc->reset))
+		return dev_err_probe(&spi->dev, PTR_ERR(achc->reset), "Could not get reset gpio\n");
+
+	return 0;
+}
+
+static const struct spi_device_id gehc_achc_id[] = {
+	{ "ge,achc", 0 },
+	{ "achc", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, gehc_achc_id);
+
+static const struct of_device_id gehc_achc_of_match[] = {
+	{ .compatible = "ge,achc" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, gehc_achc_of_match);
+
+static struct spi_driver gehc_achc_spi_driver = {
+	.driver = {
+		.name	= "gehc-achc",
+		.of_match_table = gehc_achc_of_match,
+		.dev_groups = gehc_achc_groups,
+	},
+	.probe		= gehc_achc_probe,
+	.id_table	= gehc_achc_id,
+};
+module_spi_driver(gehc_achc_spi_driver);
+
+MODULE_DESCRIPTION("GEHC ACHC driver");
+MODULE_AUTHOR("Sebastian Reichel <sebastian.reichel@collabora.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/genwqe/Kconfig b/drivers/misc/genwqe/Kconfig
new file mode 100644
index 0000000000..97f64bcf9f
--- /dev/null
+++ b/drivers/misc/genwqe/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# IBM Accelerator Family 'GenWQE'
+#
+
+menuconfig GENWQE
+	tristate "GenWQE PCIe Accelerator"
+	depends on PCI && 64BIT
+	select CRC_ITU_T
+	help
+	  Enables PCIe card driver for IBM GenWQE accelerators.
+	  The user-space interface is described in
+	  include/linux/genwqe/genwqe_card.h.
+
+config GENWQE_PLATFORM_ERROR_RECOVERY
+	int "Use platform recovery procedures (0=off, 1=on)"
+	depends on GENWQE
+	default 1 if PPC64
+	default 0
diff --git a/drivers/misc/genwqe/Makefile b/drivers/misc/genwqe/Makefile
new file mode 100644
index 0000000000..d9811ecbe4
--- /dev/null
+++ b/drivers/misc/genwqe/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for GenWQE driver
+#
+
+obj-$(CONFIG_GENWQE) := genwqe_card.o
+genwqe_card-objs := card_base.o card_dev.o card_ddcb.o card_sysfs.o \
+	card_debugfs.o card_utils.o
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
new file mode 100644
index 0000000000..224a7e97cb
--- /dev/null
+++ b/drivers/misc/genwqe/card_base.c
@@ -0,0 +1,1402 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ */
+
+/*
+ * Module initialization and PCIe setup. Card health monitoring and
+ * recovery functionality. Character device creation and deletion are
+ * controlled from here.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <linux/log2.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>");
+MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>");
+MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>");
+MODULE_AUTHOR("Michael Jung <mijung@gmx.net>");
+
+MODULE_DESCRIPTION("GenWQE Card");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL");
+
+static char genwqe_driver_name[] = GENWQE_DEVNAME;
+
+static struct dentry *debugfs_genwqe;
+static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX];
+
+/* PCI structure for identifying device by PCI vendor and device ID */
+static const struct pci_device_id genwqe_device_table[] = {
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5 << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Initial SR-IOV bring-up image */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ .vendor      = PCI_VENDOR_ID_IBM,  /* VF Vendor ID */
+	  .device      = 0x0000,  /* VF Device ID */
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Fixed up image */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ .vendor      = PCI_VENDOR_ID_IBM,  /* VF Vendor ID */
+	  .device      = 0x0000,  /* VF Device ID */
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Even one more ... */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_NEW,
+	  .class       = (PCI_CLASSCODE_GENWQE5 << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ 0, }			/* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, genwqe_device_table);
+
+/**
+ * genwqe_devnode() - Set default access mode for genwqe devices.
+ * @dev:	Pointer to device (unused)
+ * @mode:	Carrier to pass-back given mode (permissions)
+ *
+ * Default mode should be rw for everybody. Do not change default
+ * device name.
+ */
+static char *genwqe_devnode(const struct device *dev, umode_t *mode)
+{
+	if (mode)
+		*mode = 0666;
+	return NULL;
+}
+
+static const struct class class_genwqe = {
+	.name = GENWQE_DEVNAME,
+	.devnode = genwqe_devnode,
+};
+
+/**
+ * genwqe_dev_alloc() - Create and prepare a new card descriptor
+ *
+ * Return: Pointer to card descriptor, or ERR_PTR(err) on error
+ */
+static struct genwqe_dev *genwqe_dev_alloc(void)
+{
+	unsigned int i = 0, j;
+	struct genwqe_dev *cd;
+
+	for (i = 0; i < GENWQE_CARD_NO_MAX; i++) {
+		if (genwqe_devices[i] == NULL)
+			break;
+	}
+	if (i >= GENWQE_CARD_NO_MAX)
+		return ERR_PTR(-ENODEV);
+
+	cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL);
+	if (!cd)
+		return ERR_PTR(-ENOMEM);
+
+	cd->card_idx = i;
+	cd->class_genwqe = &class_genwqe;
+	cd->debugfs_genwqe = debugfs_genwqe;
+
+	/*
+	 * This comes from kernel config option and can be overritten via
+	 * debugfs.
+	 */
+	cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY;
+
+	init_waitqueue_head(&cd->queue_waitq);
+
+	spin_lock_init(&cd->file_lock);
+	INIT_LIST_HEAD(&cd->file_list);
+
+	cd->card_state = GENWQE_CARD_UNUSED;
+	spin_lock_init(&cd->print_lock);
+
+	cd->ddcb_software_timeout = GENWQE_DDCB_SOFTWARE_TIMEOUT;
+	cd->kill_timeout = GENWQE_KILL_TIMEOUT;
+
+	for (j = 0; j < GENWQE_MAX_VFS; j++)
+		cd->vf_jobtimeout_msec[j] = GENWQE_VF_JOBTIMEOUT_MSEC;
+
+	genwqe_devices[i] = cd;
+	return cd;
+}
+
+static void genwqe_dev_free(struct genwqe_dev *cd)
+{
+	if (!cd)
+		return;
+
+	genwqe_devices[cd->card_idx] = NULL;
+	kfree(cd);
+}
+
+/**
+ * genwqe_bus_reset() - Card recovery
+ * @cd: GenWQE device information
+ *
+ * pci_reset_function() will recover the device and ensure that the
+ * registers are accessible again when it completes with success. If
+ * not, the card will stay dead and registers will be unaccessible
+ * still.
+ */
+static int genwqe_bus_reset(struct genwqe_dev *cd)
+{
+	int rc = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	void __iomem *mmio;
+
+	if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE)
+		return -EIO;
+
+	mmio = cd->mmio;
+	cd->mmio = NULL;
+	pci_iounmap(pci_dev, mmio);
+
+	pci_release_mem_regions(pci_dev);
+
+	/*
+	 * Firmware/BIOS might change memory mapping during bus reset.
+	 * Settings like enable bus-mastering, ... are backuped and
+	 * restored by the pci_reset_function().
+	 */
+	dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__);
+	rc = pci_reset_function(pci_dev);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed reset func (rc %d)\n", __func__, rc);
+		return rc;
+	}
+	dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc);
+
+	/*
+	 * Here is the right spot to clear the register read
+	 * failure. pci_bus_reset() does this job in real systems.
+	 */
+	cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
+			    GENWQE_INJECT_GFIR_FATAL |
+			    GENWQE_INJECT_GFIR_INFO);
+
+	rc = pci_request_mem_regions(pci_dev, genwqe_driver_name);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: request bars failed (%d)\n", __func__, rc);
+		return -EIO;
+	}
+
+	cd->mmio = pci_iomap(pci_dev, 0, 0);
+	if (cd->mmio == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: mapping BAR0 failed\n", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/*
+ * Hardware circumvention section. Certain bitstreams in our test-lab
+ * had different kinds of problems. Here is where we adjust those
+ * bitstreams to function will with this version of our device driver.
+ *
+ * Thise circumventions are applied to the physical function only.
+ * The magical numbers below are identifying development/manufacturing
+ * versions of the bitstream used on the card.
+ *
+ * Turn off error reporting for old/manufacturing images.
+ */
+
+bool genwqe_need_err_masking(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
+}
+
+static void genwqe_tweak_hardware(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/* Mask FIRs for development images */
+	if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) &&
+	    ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) {
+		dev_warn(&pci_dev->dev,
+			 "FIRs masked due to bitstream %016llx.%016llx\n",
+			 cd->slu_unitcfg, cd->app_unitcfg);
+
+		__genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR,
+				0xFFFFFFFFFFFFFFFFull);
+
+		__genwqe_writeq(cd, IO_APP_ERR_ACT_MASK,
+				0x0000000000000000ull);
+	}
+}
+
+/**
+ * genwqe_recovery_on_fatal_gfir_required() - Version depended actions
+ * @cd: GenWQE device information
+ *
+ * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must
+ * be ignored. This is e.g. true for the bitstream we gave to the card
+ * manufacturer, but also for some old bitstreams we released to our
+ * test-lab.
+ */
+int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull;
+}
+
+int genwqe_flash_readback_fails(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
+}
+
+/**
+ * genwqe_T_psec() - Calculate PF/VF timeout register content
+ * @cd: GenWQE device information
+ *
+ * Note: From a design perspective it turned out to be a bad idea to
+ * use codes here to specifiy the frequency/speed values. An old
+ * driver cannot understand new codes and is therefore always a
+ * problem. Better is to measure out the value or put the
+ * speed/frequency directly into a register which is always a valid
+ * value for old as well as for new software.
+ */
+/* T = 1/f */
+static int genwqe_T_psec(struct genwqe_dev *cd)
+{
+	u16 speed;	/* 1/f -> 250,  200,  166,  175 */
+	static const int T[] = { 4000, 5000, 6000, 5714 };
+
+	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
+	if (speed >= ARRAY_SIZE(T))
+		return -1;	/* illegal value */
+
+	return T[speed];
+}
+
+/**
+ * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution
+ * @cd: GenWQE device information
+ *
+ * Do this _after_ card_reset() is called. Otherwise the values will
+ * vanish. The settings need to be done when the queues are inactive.
+ *
+ * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16.
+ * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16.
+ */
+static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd)
+{
+	u32 T = genwqe_T_psec(cd);
+	u64 x;
+
+	if (GENWQE_PF_JOBTIMEOUT_MSEC == 0)
+		return false;
+
+	/* PF: large value needed, flash update 2sec per block */
+	x = ilog2(GENWQE_PF_JOBTIMEOUT_MSEC *
+		  16000000000uL/(T * 15)) - 10;
+
+	genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+			  0xff00 | (x & 0xff), 0);
+	return true;
+}
+
+/**
+ * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution
+ * @cd: GenWQE device information
+ */
+static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+	unsigned int vf;
+	u32 T = genwqe_T_psec(cd);
+	u64 x;
+	int totalvfs;
+
+	totalvfs = pci_sriov_get_totalvfs(pci_dev);
+	if (totalvfs <= 0)
+		return false;
+
+	for (vf = 0; vf < totalvfs; vf++) {
+
+		if (cd->vf_jobtimeout_msec[vf] == 0)
+			continue;
+
+		x = ilog2(cd->vf_jobtimeout_msec[vf] *
+			  16000000000uL/(T * 15)) - 10;
+
+		genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+				  0xff00 | (x & 0xff), vf + 1);
+	}
+	return true;
+}
+
+static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd)
+{
+	unsigned int type, e = 0;
+
+	for (type = 0; type < GENWQE_DBG_UNITS; type++) {
+		switch (type) {
+		case GENWQE_DBG_UNIT0:
+			e = genwqe_ffdc_buff_size(cd, 0);
+			break;
+		case GENWQE_DBG_UNIT1:
+			e = genwqe_ffdc_buff_size(cd, 1);
+			break;
+		case GENWQE_DBG_UNIT2:
+			e = genwqe_ffdc_buff_size(cd, 2);
+			break;
+		case GENWQE_DBG_REGS:
+			e = GENWQE_FFDC_REGS;
+			break;
+		}
+
+		/* currently support only the debug units mentioned here */
+		cd->ffdc[type].entries = e;
+		cd->ffdc[type].regs =
+			kmalloc_array(e, sizeof(struct genwqe_reg),
+				      GFP_KERNEL);
+		/*
+		 * regs == NULL is ok, the using code treats this as no regs,
+		 * Printing warning is ok in this case.
+		 */
+	}
+	return 0;
+}
+
+static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd)
+{
+	unsigned int type;
+
+	for (type = 0; type < GENWQE_DBG_UNITS; type++) {
+		kfree(cd->ffdc[type].regs);
+		cd->ffdc[type].regs = NULL;
+	}
+}
+
+static int genwqe_read_ids(struct genwqe_dev *cd)
+{
+	int err = 0;
+	int slu_id;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) {
+		dev_err(&pci_dev->dev,
+			"err: SLUID=%016llx\n", cd->slu_unitcfg);
+		err = -EIO;
+		goto out_err;
+	}
+
+	slu_id = genwqe_get_slu_id(cd);
+	if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) {
+		dev_err(&pci_dev->dev,
+			"err: incompatible SLU Architecture %u\n", slu_id);
+		err = -ENOENT;
+		goto out_err;
+	}
+
+	cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
+	if (cd->app_unitcfg == IO_ILLEGAL_VALUE) {
+		dev_err(&pci_dev->dev,
+			"err: APPID=%016llx\n", cd->app_unitcfg);
+		err = -EIO;
+		goto out_err;
+	}
+	genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name));
+
+	/*
+	 * Is access to all registers possible? If we are a VF the
+	 * answer is obvious. If we run fully virtualized, we need to
+	 * check if we can access all registers. If we do not have
+	 * full access we will cause an UR and some informational FIRs
+	 * in the PF, but that should not harm.
+	 */
+	if (pci_dev->is_virtfn)
+		cd->is_privileged = 0;
+	else
+		cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
+				     != IO_ILLEGAL_VALUE);
+
+ out_err:
+	return err;
+}
+
+static int genwqe_start(struct genwqe_dev *cd)
+{
+	int err;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	err = genwqe_read_ids(cd);
+	if (err)
+		return err;
+
+	if (genwqe_is_privileged(cd)) {
+		/* do this after the tweaks. alloc fail is acceptable */
+		genwqe_ffdc_buffs_alloc(cd);
+		genwqe_stop_traps(cd);
+
+		/* Collect registers e.g. FIRs, UNITIDs, traces ... */
+		genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs,
+				      cd->ffdc[GENWQE_DBG_REGS].entries, 0);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0,
+				      cd->ffdc[GENWQE_DBG_UNIT0].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT0].entries);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1,
+				      cd->ffdc[GENWQE_DBG_UNIT1].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT1].entries);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2,
+				      cd->ffdc[GENWQE_DBG_UNIT2].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT2].entries);
+
+		genwqe_start_traps(cd);
+
+		if (cd->card_state == GENWQE_CARD_FATAL_ERROR) {
+			dev_warn(&pci_dev->dev,
+				 "[%s] chip reload/recovery!\n", __func__);
+
+			/*
+			 * Stealth Mode: Reload chip on either hot
+			 * reset or PERST.
+			 */
+			cd->softreset = 0x7Cull;
+			__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
+				       cd->softreset);
+
+			err = genwqe_bus_reset(cd);
+			if (err != 0) {
+				dev_err(&pci_dev->dev,
+					"[%s] err: bus reset failed!\n",
+					__func__);
+				goto out;
+			}
+
+			/*
+			 * Re-read the IDs because
+			 * it could happen that the bitstream load
+			 * failed!
+			 */
+			err = genwqe_read_ids(cd);
+			if (err)
+				goto out;
+		}
+	}
+
+	err = genwqe_setup_service_layer(cd);  /* does a reset to the card */
+	if (err != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: could not setup servicelayer!\n", __func__);
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (genwqe_is_privileged(cd)) {	 /* code is running _after_ reset */
+		genwqe_tweak_hardware(cd);
+
+		genwqe_setup_pf_jtimer(cd);
+		genwqe_setup_vf_jtimer(cd);
+	}
+
+	err = genwqe_device_create(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: chdev init failed! (err=%d)\n", err);
+		goto out_release_service_layer;
+	}
+	return 0;
+
+ out_release_service_layer:
+	genwqe_release_service_layer(cd);
+ out:
+	if (genwqe_is_privileged(cd))
+		genwqe_ffdc_buffs_free(cd);
+	return -EIO;
+}
+
+/**
+ * genwqe_stop() - Stop card operation
+ * @cd: GenWQE device information
+ *
+ * Recovery notes:
+ *   As long as genwqe_thread runs we might access registers during
+ *   error data capture. Same is with the genwqe_health_thread.
+ *   When genwqe_bus_reset() fails this function might called two times:
+ *   first by the genwqe_health_thread() and later by genwqe_remove() to
+ *   unbind the device. We must be able to survive that.
+ *
+ * This function must be robust enough to be called twice.
+ */
+static int genwqe_stop(struct genwqe_dev *cd)
+{
+	genwqe_finish_queue(cd);	    /* no register access */
+	genwqe_device_remove(cd);	    /* device removed, procs killed */
+	genwqe_release_service_layer(cd);   /* here genwqe_thread is stopped */
+
+	if (genwqe_is_privileged(cd)) {
+		pci_disable_sriov(cd->pci_dev);	/* access pci config space */
+		genwqe_ffdc_buffs_free(cd);
+	}
+
+	return 0;
+}
+
+/**
+ * genwqe_recover_card() - Try to recover the card if it is possible
+ * @cd: GenWQE device information
+ * @fatal_err: Indicate whether to attempt soft reset
+ *
+ * If fatal_err is set no register access is possible anymore. It is
+ * likely that genwqe_start fails in that situation. Proper error
+ * handling is required in this case.
+ *
+ * genwqe_bus_reset() will cause the pci code to call genwqe_remove()
+ * and later genwqe_probe() for all virtual functions.
+ */
+static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	genwqe_stop(cd);
+
+	/*
+	 * Make sure chip is not reloaded to maintain FFDC. Write SLU
+	 * Reset Register, CPLDReset field to 0.
+	 */
+	if (!fatal_err) {
+		cd->softreset = 0x70ull;
+		__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
+	}
+
+	rc = genwqe_bus_reset(cd);
+	if (rc != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: card recovery impossible!\n", __func__);
+		return rc;
+	}
+
+	rc = genwqe_start(cd);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed to launch device!\n", __func__);
+		return rc;
+	}
+	return 0;
+}
+
+static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir)
+{
+	*gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	return (*gfir & GFIR_ERR_TRIGGER) &&
+		genwqe_recovery_on_fatal_gfir_required(cd);
+}
+
+/**
+ * genwqe_fir_checking() - Check the fault isolation registers of the card
+ * @cd: GenWQE device information
+ *
+ * If this code works ok, can be tried out with help of the genwqe_poke tool:
+ *   sudo ./tools/genwqe_poke 0x8 0xfefefefefef
+ *
+ * Now the relevant FIRs/sFIRs should be printed out and the driver should
+ * invoke recovery (devices are removed and readded).
+ */
+static u64 genwqe_fir_checking(struct genwqe_dev *cd)
+{
+	int j, iterations = 0;
+	u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec;
+	u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+ healthMonitor:
+	iterations++;
+	if (iterations > 16) {
+		dev_err(&pci_dev->dev, "* exit looping after %d times\n",
+			iterations);
+		goto fatal_error;
+	}
+
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if (gfir != 0x0)
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n",
+				    IO_SLC_CFGREG_GFIR, gfir);
+	if (gfir == IO_ILLEGAL_VALUE)
+		goto fatal_error;
+
+	/*
+	 * Avoid printing when to GFIR bit is on prevents contignous
+	 * printout e.g. for the following bug:
+	 *   FIR set without a 2ndary FIR/FIR cannot be cleared
+	 * Comment out the following if to get the prints:
+	 */
+	if (gfir == 0)
+		return 0;
+
+	gfir_masked = gfir & GFIR_ERR_TRIGGER;  /* fatal errors */
+
+	for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */
+
+		/* read the primary FIR (pfir) */
+		fir_addr = (uid << 24) + 0x08;
+		fir = __genwqe_readq(cd, fir_addr);
+		if (fir == 0x0)
+			continue;  /* no error in this unit */
+
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir);
+		if (fir == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		/* read primary FEC */
+		fec_addr = (uid << 24) + 0x18;
+		fec = __genwqe_readq(cd, fec_addr);
+
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec);
+		if (fec == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) {
+
+			/* secondary fir empty, skip it */
+			if ((fir & mask) == 0x0)
+				continue;
+
+			sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
+			sfir = __genwqe_readq(cd, sfir_addr);
+
+			if (sfir == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+			dev_err(&pci_dev->dev,
+				"* 0x%08x 0x%016llx\n", sfir_addr, sfir);
+
+			sfec_addr = (uid << 24) + 0x300 + 0x08 * j;
+			sfec = __genwqe_readq(cd, sfec_addr);
+
+			if (sfec == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+			dev_err(&pci_dev->dev,
+				"* 0x%08x 0x%016llx\n", sfec_addr, sfec);
+
+			gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+			if (gfir == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+
+			/* gfir turned on during routine! get out and
+			   start over. */
+			if ((gfir_masked == 0x0) &&
+			    (gfir & GFIR_ERR_TRIGGER)) {
+				goto healthMonitor;
+			}
+
+			/* do not clear if we entered with a fatal gfir */
+			if (gfir_masked == 0x0) {
+
+				/* NEW clear by mask the logged bits */
+				sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
+				__genwqe_writeq(cd, sfir_addr, sfir);
+
+				dev_dbg(&pci_dev->dev,
+					"[HM] Clearing  2ndary FIR 0x%08x with 0x%016llx\n",
+					sfir_addr, sfir);
+
+				/*
+				 * note, these cannot be error-Firs
+				 * since gfir_masked is 0 after sfir
+				 * was read. Also, it is safe to do
+				 * this write if sfir=0. Still need to
+				 * clear the primary. This just means
+				 * there is no secondary FIR.
+				 */
+
+				/* clear by mask the logged bit. */
+				fir_clr_addr = (uid << 24) + 0x10;
+				__genwqe_writeq(cd, fir_clr_addr, mask);
+
+				dev_dbg(&pci_dev->dev,
+					"[HM] Clearing primary FIR 0x%08x with 0x%016llx\n",
+					fir_clr_addr, mask);
+			}
+		}
+	}
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if (gfir == IO_ILLEGAL_VALUE)
+		goto fatal_error;
+
+	if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) {
+		/*
+		 * Check once more that it didn't go on after all the
+		 * FIRS were cleared.
+		 */
+		dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n",
+			iterations);
+		goto healthMonitor;
+	}
+	return gfir_masked;
+
+ fatal_error:
+	return IO_ILLEGAL_VALUE;
+}
+
+/**
+ * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot
+ * @pci_dev:	PCI device information struct
+ *
+ * Note: pci_set_pcie_reset_state() is not implemented on all archs, so this
+ * reset method will not work in all cases.
+ *
+ * Return: 0 on success or error code from pci_set_pcie_reset_state()
+ */
+static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev)
+{
+	int rc;
+
+	/*
+	 * lock pci config space access from userspace,
+	 * save state and issue PCIe fundamental reset
+	 */
+	pci_cfg_access_lock(pci_dev);
+	pci_save_state(pci_dev);
+	rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset);
+	if (!rc) {
+		/* keep PCIe reset asserted for 250ms */
+		msleep(250);
+		pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset);
+		/* Wait for 2s to reload flash and train the link */
+		msleep(2000);
+	}
+	pci_restore_state(pci_dev);
+	pci_cfg_access_unlock(pci_dev);
+	return rc;
+}
+
+
+static int genwqe_platform_recovery(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+	int rc;
+
+	dev_info(&pci_dev->dev,
+		 "[%s] resetting card for error recovery\n", __func__);
+
+	/* Clear out error injection flags */
+	cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
+			    GENWQE_INJECT_GFIR_FATAL |
+			    GENWQE_INJECT_GFIR_INFO);
+
+	genwqe_stop(cd);
+
+	/* Try recoverying the card with fundamental reset */
+	rc = genwqe_pci_fundamental_reset(pci_dev);
+	if (!rc) {
+		rc = genwqe_start(cd);
+		if (!rc)
+			dev_info(&pci_dev->dev,
+				 "[%s] card recovered\n", __func__);
+		else
+			dev_err(&pci_dev->dev,
+				"[%s] err: cannot start card services! (err=%d)\n",
+				__func__, rc);
+	} else {
+		dev_err(&pci_dev->dev,
+			"[%s] card reset failed\n", __func__);
+	}
+
+	return rc;
+}
+
+/**
+ * genwqe_reload_bistream() - reload card bitstream
+ * @cd: GenWQE device information
+ *
+ * Set the appropriate register and call fundamental reset to reaload the card
+ * bitstream.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+static int genwqe_reload_bistream(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+	int rc;
+
+	dev_info(&pci_dev->dev,
+		 "[%s] resetting card for bitstream reload\n",
+		 __func__);
+
+	genwqe_stop(cd);
+
+	/*
+	 * Cause a CPLD reprogram with the 'next_bitstream'
+	 * partition on PCIe hot or fundamental reset
+	 */
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
+			(cd->softreset & 0xcull) | 0x70ull);
+
+	rc = genwqe_pci_fundamental_reset(pci_dev);
+	if (rc) {
+		/*
+		 * A fundamental reset failure can be caused
+		 * by lack of support on the arch, so we just
+		 * log the error and try to start the card
+		 * again.
+		 */
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed to reset card for bitstream reload\n",
+			__func__);
+	}
+
+	rc = genwqe_start(cd);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: cannot start card services! (err=%d)\n",
+			__func__, rc);
+		return rc;
+	}
+	dev_info(&pci_dev->dev,
+		 "[%s] card reloaded\n", __func__);
+	return 0;
+}
+
+
+/**
+ * genwqe_health_thread() - Health checking thread
+ * @data: GenWQE device information
+ *
+ * This thread is only started for the PF of the card.
+ *
+ * This thread monitors the health of the card. A critical situation
+ * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In
+ * this case we need to be recovered from outside. Writing to
+ * registers will very likely not work either.
+ *
+ * This thread must only exit if kthread_should_stop() becomes true.
+ *
+ * Condition for the health-thread to trigger:
+ *   a) when a kthread_stop() request comes in or
+ *   b) a critical GFIR occured
+ *
+ * Informational GFIRs are checked and potentially printed in
+ * GENWQE_HEALTH_CHECK_INTERVAL seconds.
+ */
+static int genwqe_health_thread(void *data)
+{
+	int rc, should_stop = 0;
+	struct genwqe_dev *cd = data;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg;
+
+ health_thread_begin:
+	while (!kthread_should_stop()) {
+		rc = wait_event_interruptible_timeout(cd->health_waitq,
+			 (genwqe_health_check_cond(cd, &gfir) ||
+			  (should_stop = kthread_should_stop())),
+				GENWQE_HEALTH_CHECK_INTERVAL * HZ);
+
+		if (should_stop)
+			break;
+
+		if (gfir == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] GFIR=%016llx\n", __func__, gfir);
+			goto fatal_error;
+		}
+
+		slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
+		if (slu_unitcfg == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] SLU_UNITCFG=%016llx\n",
+				__func__, slu_unitcfg);
+			goto fatal_error;
+		}
+
+		app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
+		if (app_unitcfg == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] APP_UNITCFG=%016llx\n",
+				__func__, app_unitcfg);
+			goto fatal_error;
+		}
+
+		gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+		if (gfir == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] %s: GFIR=%016llx\n", __func__,
+				(gfir & GFIR_ERR_TRIGGER) ? "err" : "info",
+				gfir);
+			goto fatal_error;
+		}
+
+		gfir_masked = genwqe_fir_checking(cd);
+		if (gfir_masked == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		/*
+		 * GFIR ErrorTrigger bits set => reset the card!
+		 * Never do this for old/manufacturing images!
+		 */
+		if ((gfir_masked) && !cd->skip_recovery &&
+		    genwqe_recovery_on_fatal_gfir_required(cd)) {
+
+			cd->card_state = GENWQE_CARD_FATAL_ERROR;
+
+			rc = genwqe_recover_card(cd, 0);
+			if (rc < 0) {
+				/* FIXME Card is unusable and needs unbind! */
+				goto fatal_error;
+			}
+		}
+
+		if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) {
+			/* Userspace requested card bitstream reload */
+			rc = genwqe_reload_bistream(cd);
+			if (rc)
+				goto fatal_error;
+		}
+
+		cd->last_gfir = gfir;
+		cond_resched();
+	}
+
+	return 0;
+
+ fatal_error:
+	if (cd->use_platform_recovery) {
+		/*
+		 * Since we use raw accessors, EEH errors won't be detected
+		 * by the platform until we do a non-raw MMIO or config space
+		 * read
+		 */
+		readq(cd->mmio + IO_SLC_CFGREG_GFIR);
+
+		/* We do nothing if the card is going over PCI recovery */
+		if (pci_channel_offline(pci_dev))
+			return -EIO;
+
+		/*
+		 * If it's supported by the platform, we try a fundamental reset
+		 * to recover from a fatal error. Otherwise, we continue to wait
+		 * for an external recovery procedure to take care of it.
+		 */
+		rc = genwqe_platform_recovery(cd);
+		if (!rc)
+			goto health_thread_begin;
+	}
+
+	dev_err(&pci_dev->dev,
+		"[%s] card unusable. Please trigger unbind!\n", __func__);
+
+	/* Bring down logical devices to inform user space via udev remove. */
+	cd->card_state = GENWQE_CARD_FATAL_ERROR;
+	genwqe_stop(cd);
+
+	/* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */
+	while (!kthread_should_stop())
+		cond_resched();
+
+	return -EIO;
+}
+
+static int genwqe_health_check_start(struct genwqe_dev *cd)
+{
+	int rc;
+
+	if (GENWQE_HEALTH_CHECK_INTERVAL <= 0)
+		return 0;	/* valid for disabling the service */
+
+	/* moved before request_irq() */
+	/* init_waitqueue_head(&cd->health_waitq); */
+
+	cd->health_thread = kthread_run(genwqe_health_thread, cd,
+					GENWQE_DEVNAME "%d_health",
+					cd->card_idx);
+	if (IS_ERR(cd->health_thread)) {
+		rc = PTR_ERR(cd->health_thread);
+		cd->health_thread = NULL;
+		return rc;
+	}
+	return 0;
+}
+
+static int genwqe_health_thread_running(struct genwqe_dev *cd)
+{
+	return cd->health_thread != NULL;
+}
+
+static int genwqe_health_check_stop(struct genwqe_dev *cd)
+{
+	if (!genwqe_health_thread_running(cd))
+		return -EIO;
+
+	kthread_stop(cd->health_thread);
+	cd->health_thread = NULL;
+	return 0;
+}
+
+/**
+ * genwqe_pci_setup() - Allocate PCIe related resources for our card
+ * @cd: GenWQE device information
+ */
+static int genwqe_pci_setup(struct genwqe_dev *cd)
+{
+	int err;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	err = pci_enable_device_mem(pci_dev);
+	if (err) {
+		dev_err(&pci_dev->dev,
+			"err: failed to enable pci memory (err=%d)\n", err);
+		goto err_out;
+	}
+
+	/* Reserve PCI I/O and memory resources */
+	err = pci_request_mem_regions(pci_dev, genwqe_driver_name);
+	if (err) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: request bars failed (%d)\n", __func__, err);
+		err = -EIO;
+		goto err_disable_device;
+	}
+
+	/* check for 64-bit DMA address supported (DAC) */
+	/* check for 32-bit DMA address supported (SAC) */
+	if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64)) &&
+	    dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32))) {
+		dev_err(&pci_dev->dev,
+			"err: neither DMA32 nor DMA64 supported\n");
+		err = -EIO;
+		goto out_release_resources;
+	}
+
+	pci_set_master(pci_dev);
+
+	/* EEH recovery requires PCIe fundamental reset */
+	pci_dev->needs_freset = 1;
+
+	/* request complete BAR-0 space (length = 0) */
+	cd->mmio_len = pci_resource_len(pci_dev, 0);
+	cd->mmio = pci_iomap(pci_dev, 0, 0);
+	if (cd->mmio == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: mapping BAR0 failed\n", __func__);
+		err = -ENOMEM;
+		goto out_release_resources;
+	}
+
+	cd->num_vfs = pci_sriov_get_totalvfs(pci_dev);
+	if (cd->num_vfs < 0)
+		cd->num_vfs = 0;
+
+	err = genwqe_read_ids(cd);
+	if (err)
+		goto out_iounmap;
+
+	return 0;
+
+ out_iounmap:
+	pci_iounmap(pci_dev, cd->mmio);
+ out_release_resources:
+	pci_release_mem_regions(pci_dev);
+ err_disable_device:
+	pci_disable_device(pci_dev);
+ err_out:
+	return err;
+}
+
+/**
+ * genwqe_pci_remove() - Free PCIe related resources for our card
+ * @cd: GenWQE device information
+ */
+static void genwqe_pci_remove(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (cd->mmio)
+		pci_iounmap(pci_dev, cd->mmio);
+
+	pci_release_mem_regions(pci_dev);
+	pci_disable_device(pci_dev);
+}
+
+/**
+ * genwqe_probe() - Device initialization
+ * @pci_dev:	PCI device information struct
+ * @id:		PCI device ID
+ *
+ * Callable for multiple cards. This function is called on bind.
+ *
+ * Return: 0 if succeeded, < 0 when failed
+ */
+static int genwqe_probe(struct pci_dev *pci_dev,
+			const struct pci_device_id *id)
+{
+	int err;
+	struct genwqe_dev *cd;
+
+	genwqe_init_crc32();
+
+	cd = genwqe_dev_alloc();
+	if (IS_ERR(cd)) {
+		dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n",
+			(int)PTR_ERR(cd));
+		return PTR_ERR(cd);
+	}
+
+	dev_set_drvdata(&pci_dev->dev, cd);
+	cd->pci_dev = pci_dev;
+
+	err = genwqe_pci_setup(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: problems with PCI setup (err=%d)\n", err);
+		goto out_free_dev;
+	}
+
+	err = genwqe_start(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: cannot start card services! (err=%d)\n", err);
+		goto out_pci_remove;
+	}
+
+	if (genwqe_is_privileged(cd)) {
+		err = genwqe_health_check_start(cd);
+		if (err < 0) {
+			dev_err(&pci_dev->dev,
+				"err: cannot start health checking! (err=%d)\n",
+				err);
+			goto out_stop_services;
+		}
+	}
+	return 0;
+
+ out_stop_services:
+	genwqe_stop(cd);
+ out_pci_remove:
+	genwqe_pci_remove(cd);
+ out_free_dev:
+	genwqe_dev_free(cd);
+	return err;
+}
+
+/**
+ * genwqe_remove() - Called when device is removed (hot-plugable)
+ * @pci_dev:	PCI device information struct
+ *
+ * Or when driver is unloaded respecitively when unbind is done.
+ */
+static void genwqe_remove(struct pci_dev *pci_dev)
+{
+	struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+	genwqe_health_check_stop(cd);
+
+	/*
+	 * genwqe_stop() must survive if it is called twice
+	 * sequentially. This happens when the health thread calls it
+	 * and fails on genwqe_bus_reset().
+	 */
+	genwqe_stop(cd);
+	genwqe_pci_remove(cd);
+	genwqe_dev_free(cd);
+}
+
+/**
+ * genwqe_err_error_detected() - Error detection callback
+ * @pci_dev:	PCI device information struct
+ * @state:	PCI channel state
+ *
+ * This callback is called by the PCI subsystem whenever a PCI bus
+ * error is detected.
+ */
+static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
+						 pci_channel_state_t state)
+{
+	struct genwqe_dev *cd;
+
+	dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state);
+
+	cd = dev_get_drvdata(&pci_dev->dev);
+	if (cd == NULL)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	/* Stop the card */
+	genwqe_health_check_stop(cd);
+	genwqe_stop(cd);
+
+	/*
+	 * On permanent failure, the PCI code will call device remove
+	 * after the return of this function.
+	 * genwqe_stop() can be called twice.
+	 */
+	if (state == pci_channel_io_perm_failure) {
+		return PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		genwqe_pci_remove(cd);
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+}
+
+static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev)
+{
+	int rc;
+	struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+	rc = genwqe_pci_setup(cd);
+	if (!rc) {
+		return PCI_ERS_RESULT_RECOVERED;
+	} else {
+		dev_err(&pci_dev->dev,
+			"err: problems with PCI setup (err=%d)\n", rc);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+}
+
+static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
+{
+	return PCI_ERS_RESULT_NONE;
+}
+
+static void genwqe_err_resume(struct pci_dev *pci_dev)
+{
+	int rc;
+	struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+	rc = genwqe_start(cd);
+	if (!rc) {
+		rc = genwqe_health_check_start(cd);
+		if (rc)
+			dev_err(&pci_dev->dev,
+				"err: cannot start health checking! (err=%d)\n",
+				rc);
+	} else {
+		dev_err(&pci_dev->dev,
+			"err: cannot start card services! (err=%d)\n", rc);
+	}
+}
+
+static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs)
+{
+	int rc;
+	struct genwqe_dev *cd = dev_get_drvdata(&dev->dev);
+
+	if (numvfs > 0) {
+		genwqe_setup_vf_jtimer(cd);
+		rc = pci_enable_sriov(dev, numvfs);
+		if (rc < 0)
+			return rc;
+		return numvfs;
+	}
+	if (numvfs == 0) {
+		pci_disable_sriov(dev);
+		return 0;
+	}
+	return 0;
+}
+
+static const struct pci_error_handlers genwqe_err_handler = {
+	.error_detected = genwqe_err_error_detected,
+	.mmio_enabled	= genwqe_err_result_none,
+	.slot_reset	= genwqe_err_slot_reset,
+	.resume		= genwqe_err_resume,
+};
+
+static struct pci_driver genwqe_driver = {
+	.name	  = genwqe_driver_name,
+	.id_table = genwqe_device_table,
+	.probe	  = genwqe_probe,
+	.remove	  = genwqe_remove,
+	.sriov_configure = genwqe_sriov_configure,
+	.err_handler = &genwqe_err_handler,
+};
+
+/**
+ * genwqe_init_module() - Driver registration and initialization
+ */
+static int __init genwqe_init_module(void)
+{
+	int rc;
+
+	rc = class_register(&class_genwqe);
+	if (rc) {
+		pr_err("[%s] create class failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL);
+
+	rc = pci_register_driver(&genwqe_driver);
+	if (rc != 0) {
+		pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc);
+		goto err_out0;
+	}
+
+	return rc;
+
+ err_out0:
+	debugfs_remove(debugfs_genwqe);
+	class_unregister(&class_genwqe);
+	return rc;
+}
+
+/**
+ * genwqe_exit_module() - Driver exit
+ */
+static void __exit genwqe_exit_module(void)
+{
+	pci_unregister_driver(&genwqe_driver);
+	debugfs_remove(debugfs_genwqe);
+	class_unregister(&class_genwqe);
+}
+
+module_init(genwqe_init_module);
+module_exit(genwqe_exit_module);
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h
new file mode 100644
index 0000000000..d700266f2c
--- /dev/null
+++ b/drivers/misc/genwqe/card_base.h
@@ -0,0 +1,577 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __CARD_BASE_H__
+#define __CARD_BASE_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ */
+
+/*
+ * Interfaces within the GenWQE module. Defines genwqe_card and
+ * ddcb_queue as well as ddcb_requ.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/stringify.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <linux/genwqe/genwqe_card.h>
+#include "genwqe_driver.h"
+
+#define GENWQE_MSI_IRQS			4  /* Just one supported, no MSIx */
+
+#define GENWQE_MAX_VFS			15 /* maximum 15 VFs are possible */
+#define GENWQE_MAX_FUNCS		16 /* 1 PF and 15 VFs */
+#define GENWQE_CARD_NO_MAX		(16 * GENWQE_MAX_FUNCS)
+
+/* Compile parameters, some of them appear in debugfs for later adjustment */
+#define GENWQE_DDCB_MAX			32 /* DDCBs on the work-queue */
+#define GENWQE_POLLING_ENABLED		0  /* in case of irqs not working */
+#define GENWQE_DDCB_SOFTWARE_TIMEOUT	10 /* timeout per DDCB in seconds */
+#define GENWQE_KILL_TIMEOUT		8  /* time until process gets killed */
+#define GENWQE_VF_JOBTIMEOUT_MSEC	250  /* 250 msec */
+#define GENWQE_PF_JOBTIMEOUT_MSEC	8000 /* 8 sec should be ok */
+#define GENWQE_HEALTH_CHECK_INTERVAL	4 /* <= 0: disabled */
+
+/* Sysfs attribute groups used when we create the genwqe device */
+extern const struct attribute_group *genwqe_attribute_groups[];
+
+/*
+ * Config space for Genwqe5 A7:
+ * 00:[14 10 4b 04]40 00 10 00[00 00 00 12]00 00 00 00
+ * 10: 0c 00 00 f0 07 3c 00 00 00 00 00 00 00 00 00 00
+ * 20: 00 00 00 00 00 00 00 00 00 00 00 00[14 10 4b 04]
+ * 30: 00 00 00 00 50 00 00 00 00 00 00 00 00 00 00 00
+ */
+#define PCI_DEVICE_GENWQE		0x044b /* Genwqe DeviceID */
+
+#define PCI_SUBSYSTEM_ID_GENWQE5	0x035f /* Genwqe A5 Subsystem-ID */
+#define PCI_SUBSYSTEM_ID_GENWQE5_NEW	0x044b /* Genwqe A5 Subsystem-ID */
+#define PCI_CLASSCODE_GENWQE5		0x1200 /* UNKNOWN */
+
+#define PCI_SUBVENDOR_ID_IBM_SRIOV	0x0000
+#define PCI_SUBSYSTEM_ID_GENWQE5_SRIOV	0x0000 /* Genwqe A5 Subsystem-ID */
+#define PCI_CLASSCODE_GENWQE5_SRIOV	0x1200 /* UNKNOWN */
+
+#define	GENWQE_SLU_ARCH_REQ		2 /* Required SLU architecture level */
+
+/**
+ * struct genwqe_reg - Genwqe data dump functionality
+ */
+struct genwqe_reg {
+	u32 addr;
+	u32 idx;
+	u64 val;
+};
+
+/*
+ * enum genwqe_dbg_type - Specify chip unit to dump/debug
+ */
+enum genwqe_dbg_type {
+	GENWQE_DBG_UNIT0 = 0,  /* captured before prev errs cleared */
+	GENWQE_DBG_UNIT1 = 1,
+	GENWQE_DBG_UNIT2 = 2,
+	GENWQE_DBG_UNIT3 = 3,
+	GENWQE_DBG_UNIT4 = 4,
+	GENWQE_DBG_UNIT5 = 5,
+	GENWQE_DBG_UNIT6 = 6,
+	GENWQE_DBG_UNIT7 = 7,
+	GENWQE_DBG_REGS  = 8,
+	GENWQE_DBG_DMA   = 9,
+	GENWQE_DBG_UNITS = 10, /* max number of possible debug units  */
+};
+
+/* Software error injection to simulate card failures */
+#define GENWQE_INJECT_HARDWARE_FAILURE	0x00000001 /* injects -1 reg reads */
+#define GENWQE_INJECT_BUS_RESET_FAILURE 0x00000002 /* pci_bus_reset fail */
+#define GENWQE_INJECT_GFIR_FATAL	0x00000004 /* GFIR = 0x0000ffff */
+#define GENWQE_INJECT_GFIR_INFO		0x00000008 /* GFIR = 0xffff0000 */
+
+/*
+ * Genwqe card description and management data.
+ *
+ * Error-handling in case of card malfunction
+ * ------------------------------------------
+ *
+ * If the card is detected to be defective the outside environment
+ * will cause the PCI layer to call deinit (the cleanup function for
+ * probe). This is the same effect like doing a unbind/bind operation
+ * on the card.
+ *
+ * The genwqe card driver implements a health checking thread which
+ * verifies the card function. If this detects a problem the cards
+ * device is being shutdown and restarted again, along with a reset of
+ * the card and queue.
+ *
+ * All functions accessing the card device return either -EIO or -ENODEV
+ * code to indicate the malfunction to the user. The user has to close
+ * the file descriptor and open a new one, once the card becomes
+ * available again.
+ *
+ * If the open file descriptor is setup to receive SIGIO, the signal is
+ * genereated for the application which has to provide a handler to
+ * react on it. If the application does not close the open
+ * file descriptor a SIGKILL is send to enforce freeing the cards
+ * resources.
+ *
+ * I did not find a different way to prevent kernel problems due to
+ * reference counters for the cards character devices getting out of
+ * sync. The character device deallocation does not block, even if
+ * there is still an open file descriptor pending. If this pending
+ * descriptor is closed, the data structures used by the character
+ * device is reinstantiated, which will lead to the reference counter
+ * dropping below the allowed values.
+ *
+ * Card recovery
+ * -------------
+ *
+ * To test the internal driver recovery the following command can be used:
+ *   sudo sh -c 'echo 0xfffff > /sys/class/genwqe/genwqe0_card/err_inject'
+ */
+
+
+/**
+ * struct dma_mapping_type - Mapping type definition
+ *
+ * To avoid memcpying data arround we use user memory directly. To do
+ * this we need to pin/swap-in the memory and request a DMA address
+ * for it.
+ */
+enum dma_mapping_type {
+	GENWQE_MAPPING_RAW = 0,		/* contignous memory buffer */
+	GENWQE_MAPPING_SGL_TEMP,	/* sglist dynamically used */
+	GENWQE_MAPPING_SGL_PINNED,	/* sglist used with pinning */
+};
+
+/**
+ * struct dma_mapping - Information about memory mappings done by the driver
+ */
+struct dma_mapping {
+	enum dma_mapping_type type;
+
+	void *u_vaddr;			/* user-space vaddr/non-aligned */
+	void *k_vaddr;			/* kernel-space vaddr/non-aligned */
+	dma_addr_t dma_addr;		/* physical DMA address */
+
+	struct page **page_list;	/* list of pages used by user buff */
+	dma_addr_t *dma_list;		/* list of dma addresses per page */
+	unsigned int nr_pages;		/* number of pages */
+	unsigned int size;		/* size in bytes */
+
+	struct list_head card_list;	/* list of usr_maps for card */
+	struct list_head pin_list;	/* list of pinned memory for dev */
+	int write;			/* writable map? useful in unmapping */
+};
+
+static inline void genwqe_mapping_init(struct dma_mapping *m,
+				       enum dma_mapping_type type)
+{
+	memset(m, 0, sizeof(*m));
+	m->type = type;
+	m->write = 1; /* Assume the maps we create are R/W */
+}
+
+/**
+ * struct ddcb_queue - DDCB queue data
+ * @ddcb_max:          Number of DDCBs on the queue
+ * @ddcb_next:         Next free DDCB
+ * @ddcb_act:          Next DDCB supposed to finish
+ * @ddcb_seq:          Sequence number of last DDCB
+ * @ddcbs_in_flight:   Currently enqueued DDCBs
+ * @ddcbs_completed:   Number of already completed DDCBs
+ * @return_on_busy:    Number of -EBUSY returns on full queue
+ * @wait_on_busy:      Number of waits on full queue
+ * @ddcb_daddr:        DMA address of first DDCB in the queue
+ * @ddcb_vaddr:        Kernel virtual address of first DDCB in the queue
+ * @ddcb_req:          Associated requests (one per DDCB)
+ * @ddcb_waitqs:       Associated wait queues (one per DDCB)
+ * @ddcb_lock:         Lock to protect queuing operations
+ * @ddcb_waitq:        Wait on next DDCB finishing
+ */
+
+struct ddcb_queue {
+	int ddcb_max;			/* amount of DDCBs  */
+	int ddcb_next;			/* next available DDCB num */
+	int ddcb_act;			/* DDCB to be processed */
+	u16 ddcb_seq;			/* slc seq num */
+	unsigned int ddcbs_in_flight;	/* number of ddcbs in processing */
+	unsigned int ddcbs_completed;
+	unsigned int ddcbs_max_in_flight;
+	unsigned int return_on_busy;    /* how many times -EBUSY? */
+	unsigned int wait_on_busy;
+
+	dma_addr_t ddcb_daddr;		/* DMA address */
+	struct ddcb *ddcb_vaddr;	/* kernel virtual addr for DDCBs */
+	struct ddcb_requ **ddcb_req;	/* ddcb processing parameter */
+	wait_queue_head_t *ddcb_waitqs; /* waitqueue per ddcb */
+
+	spinlock_t ddcb_lock;		/* exclusive access to queue */
+	wait_queue_head_t busy_waitq;   /* wait for ddcb processing */
+
+	/* registers or the respective queue to be used */
+	u32 IO_QUEUE_CONFIG;
+	u32 IO_QUEUE_STATUS;
+	u32 IO_QUEUE_SEGMENT;
+	u32 IO_QUEUE_INITSQN;
+	u32 IO_QUEUE_WRAP;
+	u32 IO_QUEUE_OFFSET;
+	u32 IO_QUEUE_WTIME;
+	u32 IO_QUEUE_ERRCNTS;
+	u32 IO_QUEUE_LRW;
+};
+
+/*
+ * GFIR, SLU_UNITCFG, APP_UNITCFG
+ *   8 Units with FIR/FEC + 64 * 2ndary FIRS/FEC.
+ */
+#define GENWQE_FFDC_REGS	(3 + (8 * (2 + 2 * 64)))
+
+struct genwqe_ffdc {
+	unsigned int entries;
+	struct genwqe_reg *regs;
+};
+
+/**
+ * struct genwqe_dev - GenWQE device information
+ * @card_state:       Card operation state, see above
+ * @ffdc:             First Failure Data Capture buffers for each unit
+ * @card_thread:      Working thread to operate the DDCB queue
+ * @card_waitq:       Wait queue used in card_thread
+ * @queue:            DDCB queue
+ * @health_thread:    Card monitoring thread (only for PFs)
+ * @health_waitq:     Wait queue used in health_thread
+ * @pci_dev:          Associated PCI device (function)
+ * @mmio:             Base address of 64-bit register space
+ * @mmio_len:         Length of register area
+ * @file_lock:        Lock to protect access to file_list
+ * @file_list:        List of all processes with open GenWQE file descriptors
+ *
+ * This struct contains all information needed to communicate with a
+ * GenWQE card. It is initialized when a GenWQE device is found and
+ * destroyed when it goes away. It holds data to maintain the queue as
+ * well as data needed to feed the user interfaces.
+ */
+struct genwqe_dev {
+	enum genwqe_card_state card_state;
+	spinlock_t print_lock;
+
+	int card_idx;			/* card index 0..CARD_NO_MAX-1 */
+	u64 flags;			/* general flags */
+
+	/* FFDC data gathering */
+	struct genwqe_ffdc ffdc[GENWQE_DBG_UNITS];
+
+	/* DDCB workqueue */
+	struct task_struct *card_thread;
+	wait_queue_head_t queue_waitq;
+	struct ddcb_queue queue;	/* genwqe DDCB queue */
+	unsigned int irqs_processed;
+
+	/* Card health checking thread */
+	struct task_struct *health_thread;
+	wait_queue_head_t health_waitq;
+
+	int use_platform_recovery;	/* use platform recovery mechanisms */
+
+	/* char device */
+	dev_t  devnum_genwqe;		/* major/minor num card */
+	const struct class *class_genwqe;	/* reference to class object */
+	struct device *dev;		/* for device creation */
+	struct cdev cdev_genwqe;	/* char device for card */
+
+	struct dentry *debugfs_root;	/* debugfs card root directory */
+	struct dentry *debugfs_genwqe;	/* debugfs driver root directory */
+
+	/* pci resources */
+	struct pci_dev *pci_dev;	/* PCI device */
+	void __iomem *mmio;		/* BAR-0 MMIO start */
+	unsigned long mmio_len;
+	int num_vfs;
+	u32 vf_jobtimeout_msec[GENWQE_MAX_VFS];
+	int is_privileged;		/* access to all regs possible */
+
+	/* config regs which we need often */
+	u64 slu_unitcfg;
+	u64 app_unitcfg;
+	u64 softreset;
+	u64 err_inject;
+	u64 last_gfir;
+	char app_name[5];
+
+	spinlock_t file_lock;		/* lock for open files */
+	struct list_head file_list;	/* list of open files */
+
+	/* debugfs parameters */
+	int ddcb_software_timeout;	/* wait until DDCB times out */
+	int skip_recovery;		/* circumvention if recovery fails */
+	int kill_timeout;		/* wait after sending SIGKILL */
+};
+
+/**
+ * enum genwqe_requ_state - State of a DDCB execution request
+ */
+enum genwqe_requ_state {
+	GENWQE_REQU_NEW      = 0,
+	GENWQE_REQU_ENQUEUED = 1,
+	GENWQE_REQU_TAPPED   = 2,
+	GENWQE_REQU_FINISHED = 3,
+	GENWQE_REQU_STATE_MAX,
+};
+
+/**
+ * struct genwqe_sgl - Scatter gather list describing user-space memory
+ * @sgl:            scatter gather list needs to be 128 byte aligned
+ * @sgl_dma_addr:   dma address of sgl
+ * @sgl_size:       size of area used for sgl
+ * @user_addr:      user-space address of memory area
+ * @user_size:      size of user-space memory area
+ * @page:           buffer for partial pages if needed
+ * @page_dma_addr:  dma address partial pages
+ * @write:          should we write it back to userspace?
+ */
+struct genwqe_sgl {
+	dma_addr_t sgl_dma_addr;
+	struct sg_entry *sgl;
+	size_t sgl_size;	/* size of sgl */
+
+	void __user *user_addr; /* user-space base-address */
+	size_t user_size;       /* size of memory area */
+
+	int write;
+
+	unsigned long nr_pages;
+	unsigned long fpage_offs;
+	size_t fpage_size;
+	size_t lpage_size;
+
+	void *fpage;
+	dma_addr_t fpage_dma_addr;
+
+	void *lpage;
+	dma_addr_t lpage_dma_addr;
+};
+
+int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+			  void __user *user_addr, size_t user_size, int write);
+
+int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+		     dma_addr_t *dma_list);
+
+int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl);
+
+/**
+ * struct ddcb_requ - Kernel internal representation of the DDCB request
+ * @cmd:          User space representation of the DDCB execution request
+ */
+struct ddcb_requ {
+	/* kernel specific content */
+	enum genwqe_requ_state req_state; /* request status */
+	int num;			  /* ddcb_no for this request */
+	struct ddcb_queue *queue;	  /* associated queue */
+
+	struct dma_mapping  dma_mappings[DDCB_FIXUPS];
+	struct genwqe_sgl sgls[DDCB_FIXUPS];
+
+	/* kernel/user shared content */
+	struct genwqe_ddcb_cmd cmd;	/* ddcb_no for this request */
+	struct genwqe_debug_data debug_data;
+};
+
+/**
+ * struct genwqe_file - Information for open GenWQE devices
+ */
+struct genwqe_file {
+	struct genwqe_dev *cd;
+	struct genwqe_driver *client;
+	struct file *filp;
+
+	struct fasync_struct *async_queue;
+	struct pid *opener;
+	struct list_head list;		/* entry in list of open files */
+
+	spinlock_t map_lock;		/* lock for dma_mappings */
+	struct list_head map_list;	/* list of dma_mappings */
+
+	spinlock_t pin_lock;		/* lock for pinned memory */
+	struct list_head pin_list;	/* list of pinned memory */
+};
+
+int  genwqe_setup_service_layer(struct genwqe_dev *cd); /* for PF only */
+int  genwqe_finish_queue(struct genwqe_dev *cd);
+int  genwqe_release_service_layer(struct genwqe_dev *cd);
+
+/**
+ * genwqe_get_slu_id() - Read Service Layer Unit Id
+ * Return: 0x00: Development code
+ *         0x01: SLC1 (old)
+ *         0x02: SLC2 (sept2012)
+ *         0x03: SLC2 (feb2013, generic driver)
+ */
+static inline int genwqe_get_slu_id(struct genwqe_dev *cd)
+{
+	return (int)((cd->slu_unitcfg >> 32) & 0xff);
+}
+
+int  genwqe_ddcbs_in_flight(struct genwqe_dev *cd);
+
+u8   genwqe_card_type(struct genwqe_dev *cd);
+int  genwqe_card_reset(struct genwqe_dev *cd);
+int  genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count);
+void genwqe_reset_interrupt_capability(struct genwqe_dev *cd);
+
+int  genwqe_device_create(struct genwqe_dev *cd);
+int  genwqe_device_remove(struct genwqe_dev *cd);
+
+/* debugfs */
+void genwqe_init_debugfs(struct genwqe_dev *cd);
+void genqwe_exit_debugfs(struct genwqe_dev *cd);
+
+int  genwqe_read_softreset(struct genwqe_dev *cd);
+
+/* Hardware Circumventions */
+int  genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd);
+int  genwqe_flash_readback_fails(struct genwqe_dev *cd);
+
+/**
+ * genwqe_write_vreg() - Write register in VF window
+ * @cd:    genwqe device
+ * @reg:   register address
+ * @val:   value to write
+ * @func:  0: PF, 1: VF0, ..., 15: VF14
+ */
+int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func);
+
+/**
+ * genwqe_read_vreg() - Read register in VF window
+ * @cd:    genwqe device
+ * @reg:   register address
+ * @func:  0: PF, 1: VF0, ..., 15: VF14
+ *
+ * Return: content of the register
+ */
+u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func);
+
+/* FFDC Buffer Management */
+int  genwqe_ffdc_buff_size(struct genwqe_dev *cd, int unit_id);
+int  genwqe_ffdc_buff_read(struct genwqe_dev *cd, int unit_id,
+			   struct genwqe_reg *regs, unsigned int max_regs);
+int  genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
+			   unsigned int max_regs, int all);
+int  genwqe_ffdc_dump_dma(struct genwqe_dev *cd,
+			  struct genwqe_reg *regs, unsigned int max_regs);
+
+int  genwqe_init_debug_data(struct genwqe_dev *cd,
+			    struct genwqe_debug_data *d);
+
+void genwqe_init_crc32(void);
+int  genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len);
+
+/* Memory allocation/deallocation; dma address handling */
+int  genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m,
+		      void *uaddr, unsigned long size);
+
+int  genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m);
+
+static inline bool dma_mapping_used(struct dma_mapping *m)
+{
+	if (!m)
+		return false;
+	return m->size != 0;
+}
+
+/**
+ * __genwqe_execute_ddcb() - Execute DDCB request with addr translation
+ *
+ * This function will do the address translation changes to the DDCBs
+ * according to the definitions required by the ATS field. It looks up
+ * the memory allocation buffer or does vmap/vunmap for the respective
+ * user-space buffers, inclusive page pinning and scatter gather list
+ * buildup and teardown.
+ */
+int  __genwqe_execute_ddcb(struct genwqe_dev *cd,
+			   struct genwqe_ddcb_cmd *cmd, unsigned int f_flags);
+
+/**
+ * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation
+ *
+ * This version will not do address translation or any modification of
+ * the DDCB data. It is used e.g. for the MoveFlash DDCB which is
+ * entirely prepared by the driver itself. That means the appropriate
+ * DMA addresses are already in the DDCB and do not need any
+ * modification.
+ */
+int  __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
+			       struct genwqe_ddcb_cmd *cmd,
+			       unsigned int f_flags);
+int  __genwqe_enqueue_ddcb(struct genwqe_dev *cd,
+			   struct ddcb_requ *req,
+			   unsigned int f_flags);
+
+int  __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+int  __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+
+/* register access */
+int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val);
+u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs);
+int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val);
+u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs);
+
+void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
+				 dma_addr_t *dma_handle);
+void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
+			      void *vaddr, dma_addr_t dma_handle);
+
+/* Base clock frequency in MHz */
+int  genwqe_base_clock_frequency(struct genwqe_dev *cd);
+
+/* Before FFDC is captured the traps should be stopped. */
+void genwqe_stop_traps(struct genwqe_dev *cd);
+void genwqe_start_traps(struct genwqe_dev *cd);
+
+/* Hardware circumvention */
+bool genwqe_need_err_masking(struct genwqe_dev *cd);
+
+/**
+ * genwqe_is_privileged() - Determine operation mode for PCI function
+ *
+ * On Intel with SRIOV support we see:
+ *   PF: is_physfn = 1 is_virtfn = 0
+ *   VF: is_physfn = 0 is_virtfn = 1
+ *
+ * On Systems with no SRIOV support _and_ virtualized systems we get:
+ *       is_physfn = 0 is_virtfn = 0
+ *
+ * Other vendors have individual pci device ids to distinguish between
+ * virtual function drivers and physical function drivers. GenWQE
+ * unfortunately has just on pci device id for both, VFs and PF.
+ *
+ * The following code is used to distinguish if the card is running in
+ * privileged mode, either as true PF or in a virtualized system with
+ * full register access e.g. currently on PowerPC.
+ *
+ * if (pci_dev->is_virtfn)
+ *          cd->is_privileged = 0;
+ *  else
+ *          cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
+ *				 != IO_ILLEGAL_VALUE);
+ */
+static inline int genwqe_is_privileged(struct genwqe_dev *cd)
+{
+	return cd->is_privileged;
+}
+
+#endif	/* __CARD_BASE_H__ */
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
new file mode 100644
index 0000000000..500b1feaf1
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -0,0 +1,1411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ */
+
+/*
+ * Device Driver Control Block (DDCB) queue support. Definition of
+ * interrupt handlers for queue support as well as triggering the
+ * health monitor code in case of problems. The current hardware uses
+ * an MSI interrupt which is shared between error handling and
+ * functional code.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/crc-itu-t.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+/*
+ * N: next DDCB, this is where the next DDCB will be put.
+ * A: active DDCB, this is where the code will look for the next completion.
+ * x: DDCB is enqueued, we are waiting for its completion.
+
+ * Situation (1): Empty queue
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  |   |   |   |   |   |   |   |   |
+ *  +---+---+---+---+---+---+---+---+
+ *           A/N
+ *  enqueued_ddcbs = A - N = 2 - 2 = 0
+ *
+ * Situation (2): Wrapped, N > A
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  |   |   | x | x |   |   |   |   |
+ *  +---+---+---+---+---+---+---+---+
+ *            A       N
+ *  enqueued_ddcbs = N - A = 4 - 2 = 2
+ *
+ * Situation (3): Queue wrapped, A > N
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x |   |   | x | x | x | x |
+ *  +---+---+---+---+---+---+---+---+
+ *            N       A
+ *  enqueued_ddcbs = queue_max  - (A - N) = 8 - (4 - 2) = 6
+ *
+ * Situation (4a): Queue full N > A
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x | x | x | x | x | x |   |
+ *  +---+---+---+---+---+---+---+---+
+ *    A                           N
+ *
+ *  enqueued_ddcbs = N - A = 7 - 0 = 7
+ *
+ * Situation (4a): Queue full A > N
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x | x |   | x | x | x | x |
+ *  +---+---+---+---+---+---+---+---+
+ *                N   A
+ *  enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 3) = 7
+ */
+
+static int queue_empty(struct ddcb_queue *queue)
+{
+	return queue->ddcb_next == queue->ddcb_act;
+}
+
+static int queue_enqueued_ddcbs(struct ddcb_queue *queue)
+{
+	if (queue->ddcb_next >= queue->ddcb_act)
+		return queue->ddcb_next - queue->ddcb_act;
+
+	return queue->ddcb_max - (queue->ddcb_act - queue->ddcb_next);
+}
+
+static int queue_free_ddcbs(struct ddcb_queue *queue)
+{
+	int free_ddcbs = queue->ddcb_max - queue_enqueued_ddcbs(queue) - 1;
+
+	if (WARN_ON_ONCE(free_ddcbs < 0)) { /* must never ever happen! */
+		return 0;
+	}
+	return free_ddcbs;
+}
+
+/*
+ * Use of the PRIV field in the DDCB for queue debugging:
+ *
+ * (1) Trying to get rid of a DDCB which saw a timeout:
+ *     pddcb->priv[6] = 0xcc;   # cleared
+ *
+ * (2) Append a DDCB via NEXT bit:
+ *     pddcb->priv[7] = 0xaa;	# appended
+ *
+ * (3) DDCB needed tapping:
+ *     pddcb->priv[7] = 0xbb;   # tapped
+ *
+ * (4) DDCB marked as correctly finished:
+ *     pddcb->priv[6] = 0xff;	# finished
+ */
+
+static inline void ddcb_mark_tapped(struct ddcb *pddcb)
+{
+	pddcb->priv[7] = 0xbb;  /* tapped */
+}
+
+static inline void ddcb_mark_appended(struct ddcb *pddcb)
+{
+	pddcb->priv[7] = 0xaa;	/* appended */
+}
+
+static inline void ddcb_mark_cleared(struct ddcb *pddcb)
+{
+	pddcb->priv[6] = 0xcc; /* cleared */
+}
+
+static inline void ddcb_mark_finished(struct ddcb *pddcb)
+{
+	pddcb->priv[6] = 0xff;	/* finished */
+}
+
+static inline void ddcb_mark_unused(struct ddcb *pddcb)
+{
+	pddcb->priv_64 = cpu_to_be64(0); /* not tapped */
+}
+
+/**
+ * genwqe_crc16() - Generate 16-bit crc as required for DDCBs
+ * @buff:       pointer to data buffer
+ * @len:        length of data for calculation
+ * @init:       initial crc (0xffff at start)
+ *
+ * Polynomial = x^16 + x^12 + x^5 + 1   (0x1021)
+ * Example: 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff
+ *          should result in a crc16 of 0x89c3
+ *
+ * Return: crc16 checksum in big endian format !
+ */
+static inline u16 genwqe_crc16(const u8 *buff, size_t len, u16 init)
+{
+	return crc_itu_t(init, buff, len);
+}
+
+static void print_ddcb_info(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	int i;
+	struct ddcb *pddcb;
+	unsigned long flags;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	spin_lock_irqsave(&cd->print_lock, flags);
+
+	dev_info(&pci_dev->dev,
+		 "DDCB list for card #%d (ddcb_act=%d / ddcb_next=%d):\n",
+		 cd->card_idx, queue->ddcb_act, queue->ddcb_next);
+
+	pddcb = queue->ddcb_vaddr;
+	for (i = 0; i < queue->ddcb_max; i++) {
+		dev_err(&pci_dev->dev,
+			"  %c %-3d: RETC=%03x SEQ=%04x HSI=%02X SHI=%02x PRIV=%06llx CMD=%03x\n",
+			i == queue->ddcb_act ? '>' : ' ',
+			i,
+			be16_to_cpu(pddcb->retc_16),
+			be16_to_cpu(pddcb->seqnum_16),
+			pddcb->hsi,
+			pddcb->shi,
+			be64_to_cpu(pddcb->priv_64),
+			pddcb->cmd);
+		pddcb++;
+	}
+	spin_unlock_irqrestore(&cd->print_lock, flags);
+}
+
+struct genwqe_ddcb_cmd *ddcb_requ_alloc(void)
+{
+	struct ddcb_requ *req;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return NULL;
+
+	return &req->cmd;
+}
+
+void ddcb_requ_free(struct genwqe_ddcb_cmd *cmd)
+{
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+	kfree(req);
+}
+
+static inline enum genwqe_requ_state ddcb_requ_get_state(struct ddcb_requ *req)
+{
+	return req->req_state;
+}
+
+static inline void ddcb_requ_set_state(struct ddcb_requ *req,
+				       enum genwqe_requ_state new_state)
+{
+	req->req_state = new_state;
+}
+
+static inline int ddcb_requ_collect_debug_data(struct ddcb_requ *req)
+{
+	return req->cmd.ddata_addr != 0x0;
+}
+
+/**
+ * ddcb_requ_finished() - Returns the hardware state of the associated DDCB
+ * @cd:          pointer to genwqe device descriptor
+ * @req:         DDCB work request
+ *
+ * Status of ddcb_requ mirrors this hardware state, but is copied in
+ * the ddcb_requ on interrupt/polling function. The lowlevel code
+ * should check the hardware state directly, the higher level code
+ * should check the copy.
+ *
+ * This function will also return true if the state of the queue is
+ * not GENWQE_CARD_USED. This enables us to purge all DDCBs in the
+ * shutdown case.
+ */
+static int ddcb_requ_finished(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	return (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) ||
+		(cd->card_state != GENWQE_CARD_USED);
+}
+
+#define RET_DDCB_APPENDED 1
+#define RET_DDCB_TAPPED   2
+/**
+ * enqueue_ddcb() - Enqueue a DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @queue:	queue this operation should be done on
+ * @pddcb:      pointer to ddcb structure
+ * @ddcb_no:    pointer to ddcb number being tapped
+ *
+ * Start execution of DDCB by tapping or append to queue via NEXT
+ * bit. This is done by an atomic 'compare and swap' instruction and
+ * checking SHI and HSI of the previous DDCB.
+ *
+ * This function must only be called with ddcb_lock held.
+ *
+ * Return: 1 if new DDCB is appended to previous
+ *         2 if DDCB queue is tapped via register/simulation
+ */
+static int enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_queue *queue,
+			struct ddcb *pddcb, int ddcb_no)
+{
+	unsigned int try;
+	int prev_no;
+	struct ddcb *prev_ddcb;
+	__be32 old, new, icrc_hsi_shi;
+	u64 num;
+
+	/*
+	 * For performance checks a Dispatch Timestamp can be put into
+	 * DDCB It is supposed to use the SLU's free running counter,
+	 * but this requires PCIe cycles.
+	 */
+	ddcb_mark_unused(pddcb);
+
+	/* check previous DDCB if already fetched */
+	prev_no = (ddcb_no == 0) ? queue->ddcb_max - 1 : ddcb_no - 1;
+	prev_ddcb = &queue->ddcb_vaddr[prev_no];
+
+	/*
+	 * It might have happened that the HSI.FETCHED bit is
+	 * set. Retry in this case. Therefore I expect maximum 2 times
+	 * trying.
+	 */
+	ddcb_mark_appended(pddcb);
+	for (try = 0; try < 2; try++) {
+		old = prev_ddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */
+
+		/* try to append via NEXT bit if prev DDCB is not completed */
+		if ((old & DDCB_COMPLETED_BE32) != 0x00000000)
+			break;
+
+		new = (old | DDCB_NEXT_BE32);
+
+		wmb();		/* need to ensure write ordering */
+		icrc_hsi_shi = cmpxchg(&prev_ddcb->icrc_hsi_shi_32, old, new);
+
+		if (icrc_hsi_shi == old)
+			return RET_DDCB_APPENDED; /* appended to queue */
+	}
+
+	/* Queue must be re-started by updating QUEUE_OFFSET */
+	ddcb_mark_tapped(pddcb);
+	num = (u64)ddcb_no << 8;
+
+	wmb();			/* need to ensure write ordering */
+	__genwqe_writeq(cd, queue->IO_QUEUE_OFFSET, num); /* start queue */
+
+	return RET_DDCB_TAPPED;
+}
+
+/**
+ * copy_ddcb_results() - Copy output state from real DDCB to request
+ * @req:        pointer to requested DDCB parameters
+ * @ddcb_no:    pointer to ddcb number being tapped
+ *
+ * Copy DDCB ASV to request struct. There is no endian
+ * conversion made, since data structure in ASV is still
+ * unknown here.
+ *
+ * This is needed by:
+ *   - genwqe_purge_ddcb()
+ *   - genwqe_check_ddcb_queue()
+ */
+static void copy_ddcb_results(struct ddcb_requ *req, int ddcb_no)
+{
+	struct ddcb_queue *queue = req->queue;
+	struct ddcb *pddcb = &queue->ddcb_vaddr[req->num];
+
+	memcpy(&req->cmd.asv[0], &pddcb->asv[0], DDCB_ASV_LENGTH);
+
+	/* copy status flags of the variant part */
+	req->cmd.vcrc     = be16_to_cpu(pddcb->vcrc_16);
+	req->cmd.deque_ts = be64_to_cpu(pddcb->deque_ts_64);
+	req->cmd.cmplt_ts = be64_to_cpu(pddcb->cmplt_ts_64);
+
+	req->cmd.attn     = be16_to_cpu(pddcb->attn_16);
+	req->cmd.progress = be32_to_cpu(pddcb->progress_32);
+	req->cmd.retc     = be16_to_cpu(pddcb->retc_16);
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		int prev_no = (ddcb_no == 0) ?
+			queue->ddcb_max - 1 : ddcb_no - 1;
+		struct ddcb *prev_pddcb = &queue->ddcb_vaddr[prev_no];
+
+		memcpy(&req->debug_data.ddcb_finished, pddcb,
+		       sizeof(req->debug_data.ddcb_finished));
+		memcpy(&req->debug_data.ddcb_prev, prev_pddcb,
+		       sizeof(req->debug_data.ddcb_prev));
+	}
+}
+
+/**
+ * genwqe_check_ddcb_queue() - Checks DDCB queue for completed work requests.
+ * @cd:         pointer to genwqe device descriptor
+ * @queue:	queue to be checked
+ *
+ * Return: Number of DDCBs which were finished
+ */
+static int genwqe_check_ddcb_queue(struct genwqe_dev *cd,
+				   struct ddcb_queue *queue)
+{
+	unsigned long flags;
+	int ddcbs_finished = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	/* FIXME avoid soft locking CPU */
+	while (!queue_empty(queue) && (ddcbs_finished < queue->ddcb_max)) {
+
+		struct ddcb *pddcb;
+		struct ddcb_requ *req;
+		u16 vcrc, vcrc_16, retc_16;
+
+		pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
+
+		if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) ==
+		    0x00000000)
+			goto go_home; /* not completed, continue waiting */
+
+		wmb();  /*  Add sync to decouple prev. read operations */
+
+		/* Note: DDCB could be purged */
+		req = queue->ddcb_req[queue->ddcb_act];
+		if (req == NULL) {
+			/* this occurs if DDCB is purged, not an error */
+			/* Move active DDCB further; Nothing to do anymore. */
+			goto pick_next_one;
+		}
+
+		/*
+		 * HSI=0x44 (fetched and completed), but RETC is
+		 * 0x101, or even worse 0x000.
+		 *
+		 * In case of seeing the queue in inconsistent state
+		 * we read the errcnts and the queue status to provide
+		 * a trigger for our PCIe analyzer stop capturing.
+		 */
+		retc_16 = be16_to_cpu(pddcb->retc_16);
+		if ((pddcb->hsi == 0x44) && (retc_16 <= 0x101)) {
+			u64 errcnts, status;
+			u64 ddcb_offs = (u64)pddcb - (u64)queue->ddcb_vaddr;
+
+			errcnts = __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS);
+			status  = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
+
+			dev_err(&pci_dev->dev,
+				"[%s] SEQN=%04x HSI=%02x RETC=%03x Q_ERRCNTS=%016llx Q_STATUS=%016llx DDCB_DMA_ADDR=%016llx\n",
+				__func__, be16_to_cpu(pddcb->seqnum_16),
+				pddcb->hsi, retc_16, errcnts, status,
+				queue->ddcb_daddr + ddcb_offs);
+		}
+
+		copy_ddcb_results(req, queue->ddcb_act);
+		queue->ddcb_req[queue->ddcb_act] = NULL; /* take from queue */
+
+		dev_dbg(&pci_dev->dev, "FINISHED DDCB#%d\n", req->num);
+		genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+		ddcb_mark_finished(pddcb);
+
+		/* calculate CRC_16 to see if VCRC is correct */
+		vcrc = genwqe_crc16(pddcb->asv,
+				   VCRC_LENGTH(req->cmd.asv_length),
+				   0xffff);
+		vcrc_16 = be16_to_cpu(pddcb->vcrc_16);
+		if (vcrc != vcrc_16) {
+			printk_ratelimited(KERN_ERR
+				"%s %s: err: wrong VCRC pre=%02x vcrc_len=%d bytes vcrc_data=%04x is not vcrc_card=%04x\n",
+				GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+				pddcb->pre, VCRC_LENGTH(req->cmd.asv_length),
+				vcrc, vcrc_16);
+		}
+
+		ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
+		queue->ddcbs_completed++;
+		queue->ddcbs_in_flight--;
+
+		/* wake up process waiting for this DDCB, and
+                   processes on the busy queue */
+		wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
+		wake_up_interruptible(&queue->busy_waitq);
+
+pick_next_one:
+		queue->ddcb_act = (queue->ddcb_act + 1) % queue->ddcb_max;
+		ddcbs_finished++;
+	}
+
+ go_home:
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	return ddcbs_finished;
+}
+
+/**
+ * __genwqe_wait_ddcb(): Waits until DDCB is completed
+ * @cd:         pointer to genwqe device descriptor
+ * @req:        pointer to requsted DDCB parameters
+ *
+ * The Service Layer will update the RETC in DDCB when processing is
+ * pending or done.
+ *
+ * Return: > 0 remaining jiffies, DDCB completed
+ *           -ETIMEDOUT	when timeout
+ *           -ERESTARTSYS when ^C
+ *           -EINVAL when unknown error condition
+ *
+ * When an error is returned the called needs to ensure that
+ * purge_ddcb() is being called to get the &req removed from the
+ * queue.
+ */
+int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	int rc;
+	unsigned int ddcb_no;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (req == NULL)
+		return -EINVAL;
+
+	queue = req->queue;
+	if (queue == NULL)
+		return -EINVAL;
+
+	ddcb_no = req->num;
+	if (ddcb_no >= queue->ddcb_max)
+		return -EINVAL;
+
+	rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no],
+				ddcb_requ_finished(cd, req),
+				GENWQE_DDCB_SOFTWARE_TIMEOUT * HZ);
+
+	/*
+	 * We need to distinguish 3 cases here:
+	 *   1. rc == 0              timeout occurred
+	 *   2. rc == -ERESTARTSYS   signal received
+	 *   3. rc > 0               remaining jiffies condition is true
+	 */
+	if (rc == 0) {
+		struct ddcb_queue *queue = req->queue;
+		struct ddcb *pddcb;
+
+		/*
+		 * Timeout may be caused by long task switching time.
+		 * When timeout happens, check if the request has
+		 * meanwhile completed.
+		 */
+		genwqe_check_ddcb_queue(cd, req->queue);
+		if (ddcb_requ_finished(cd, req))
+			return rc;
+
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d timeout rc=%d state=%d req @ %p\n",
+			__func__, req->num, rc,	ddcb_requ_get_state(req),
+			req);
+		dev_err(&pci_dev->dev,
+			"[%s]      IO_QUEUE_STATUS=0x%016llx\n", __func__,
+			__genwqe_readq(cd, queue->IO_QUEUE_STATUS));
+
+		pddcb = &queue->ddcb_vaddr[req->num];
+		genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+		print_ddcb_info(cd, req->queue);
+		return -ETIMEDOUT;
+
+	} else if (rc == -ERESTARTSYS) {
+		return rc;
+		/*
+		 * EINTR:       Stops the application
+		 * ERESTARTSYS: Restartable systemcall; called again
+		 */
+
+	} else if (rc < 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d unknown result (rc=%d) %d!\n",
+			__func__, req->num, rc, ddcb_requ_get_state(req));
+		return -EINVAL;
+	}
+
+	/* Severe error occured. Driver is forced to stop operation */
+	if (cd->card_state != GENWQE_CARD_USED) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d forced to stop (rc=%d)\n",
+			__func__, req->num, rc);
+		return -EIO;
+	}
+	return rc;
+}
+
+/**
+ * get_next_ddcb() - Get next available DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @queue:      DDCB queue
+ * @num:        internal DDCB number
+ *
+ * DDCB's content is completely cleared but presets for PRE and
+ * SEQNUM. This function must only be called when ddcb_lock is held.
+ *
+ * Return: NULL if no empty DDCB available otherwise ptr to next DDCB.
+ */
+static struct ddcb *get_next_ddcb(struct genwqe_dev *cd,
+				  struct ddcb_queue *queue,
+				  int *num)
+{
+	u64 *pu64;
+	struct ddcb *pddcb;
+
+	if (queue_free_ddcbs(queue) == 0) /* queue is  full */
+		return NULL;
+
+	/* find new ddcb */
+	pddcb = &queue->ddcb_vaddr[queue->ddcb_next];
+
+	/* if it is not completed, we are not allowed to use it */
+	/* barrier(); */
+	if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == 0x00000000)
+		return NULL;
+
+	*num = queue->ddcb_next;	/* internal DDCB number */
+	queue->ddcb_next = (queue->ddcb_next + 1) % queue->ddcb_max;
+
+	/* clear important DDCB fields */
+	pu64 = (u64 *)pddcb;
+	pu64[0] = 0ULL;		/* offs 0x00 (ICRC,HSI,SHI,...) */
+	pu64[1] = 0ULL;		/* offs 0x01 (ACFUNC,CMD...) */
+
+	/* destroy previous results in ASV */
+	pu64[0x80/8] = 0ULL;	/* offs 0x80 (ASV + 0) */
+	pu64[0x88/8] = 0ULL;	/* offs 0x88 (ASV + 0x08) */
+	pu64[0x90/8] = 0ULL;	/* offs 0x90 (ASV + 0x10) */
+	pu64[0x98/8] = 0ULL;	/* offs 0x98 (ASV + 0x18) */
+	pu64[0xd0/8] = 0ULL;	/* offs 0xd0 (RETC,ATTN...) */
+
+	pddcb->pre = DDCB_PRESET_PRE; /* 128 */
+	pddcb->seqnum_16 = cpu_to_be16(queue->ddcb_seq++);
+	return pddcb;
+}
+
+/**
+ * __genwqe_purge_ddcb() - Remove a DDCB from the workqueue
+ * @cd:         genwqe device descriptor
+ * @req:        DDCB request
+ *
+ * This will fail when the request was already FETCHED. In this case
+ * we need to wait until it is finished. Else the DDCB can be
+ * reused. This function also ensures that the request data structure
+ * is removed from ddcb_req[].
+ *
+ * Do not forget to call this function when genwqe_wait_ddcb() fails,
+ * such that the request gets really removed from ddcb_req[].
+ *
+ * Return: 0 success
+ */
+int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	struct ddcb *pddcb = NULL;
+	unsigned int t;
+	unsigned long flags;
+	struct ddcb_queue *queue = req->queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u64 queue_status;
+	__be32 icrc_hsi_shi = 0x0000;
+	__be32 old, new;
+
+	/* unsigned long flags; */
+	if (GENWQE_DDCB_SOFTWARE_TIMEOUT <= 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: software timeout is not set!\n", __func__);
+		return -EFAULT;
+	}
+
+	pddcb = &queue->ddcb_vaddr[req->num];
+
+	for (t = 0; t < GENWQE_DDCB_SOFTWARE_TIMEOUT * 10; t++) {
+
+		spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+		/* Check if req was meanwhile finished */
+		if (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED)
+			goto go_home;
+
+		/* try to set PURGE bit if FETCHED/COMPLETED are not set */
+		old = pddcb->icrc_hsi_shi_32;	/* read SHI/HSI in BE32 */
+		if ((old & DDCB_FETCHED_BE32) == 0x00000000) {
+
+			new = (old | DDCB_PURGE_BE32);
+			icrc_hsi_shi = cmpxchg(&pddcb->icrc_hsi_shi_32,
+					       old, new);
+			if (icrc_hsi_shi == old)
+				goto finish_ddcb;
+		}
+
+		/* normal finish with HSI bit */
+		barrier();
+		icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
+		if (icrc_hsi_shi & DDCB_COMPLETED_BE32)
+			goto finish_ddcb;
+
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+		/*
+		 * Here the check_ddcb() function will most likely
+		 * discover this DDCB to be finished some point in
+		 * time. It will mark the req finished and free it up
+		 * in the list.
+		 */
+
+		copy_ddcb_results(req, req->num); /* for the failing case */
+		msleep(100); /* sleep for 1/10 second and try again */
+		continue;
+
+finish_ddcb:
+		copy_ddcb_results(req, req->num);
+		ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
+		queue->ddcbs_in_flight--;
+		queue->ddcb_req[req->num] = NULL; /* delete from array */
+		ddcb_mark_cleared(pddcb);
+
+		/* Move active DDCB further; Nothing to do here anymore. */
+
+		/*
+		 * We need to ensure that there is at least one free
+		 * DDCB in the queue. To do that, we must update
+		 * ddcb_act only if the COMPLETED bit is set for the
+		 * DDCB we are working on else we treat that DDCB even
+		 * if we PURGED it as occupied (hardware is supposed
+		 * to set the COMPLETED bit yet!).
+		 */
+		icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
+		if ((icrc_hsi_shi & DDCB_COMPLETED_BE32) &&
+		    (queue->ddcb_act == req->num)) {
+			queue->ddcb_act = ((queue->ddcb_act + 1) %
+					   queue->ddcb_max);
+		}
+go_home:
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 0;
+	}
+
+	/*
+	 * If the card is dead and the queue is forced to stop, we
+	 * might see this in the queue status register.
+	 */
+	queue_status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
+
+	dev_dbg(&pci_dev->dev, "UN/FINISHED DDCB#%d\n", req->num);
+	genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+	dev_err(&pci_dev->dev,
+		"[%s] err: DDCB#%d not purged and not completed after %d seconds QSTAT=%016llx!!\n",
+		__func__, req->num, GENWQE_DDCB_SOFTWARE_TIMEOUT,
+		queue_status);
+
+	print_ddcb_info(cd, req->queue);
+
+	return -EFAULT;
+}
+
+int genwqe_init_debug_data(struct genwqe_dev *cd, struct genwqe_debug_data *d)
+{
+	int len;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (d == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: invalid memory for debug data!\n",
+			__func__);
+		return -EFAULT;
+	}
+
+	len  = sizeof(d->driver_version);
+	snprintf(d->driver_version, len, "%s", DRV_VERSION);
+	d->slu_unitcfg = cd->slu_unitcfg;
+	d->app_unitcfg = cd->app_unitcfg;
+	return 0;
+}
+
+/**
+ * __genwqe_enqueue_ddcb() - Enqueue a DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @req:        pointer to DDCB execution request
+ * @f_flags:    file mode: blocking, non-blocking
+ *
+ * Return: 0 if enqueuing succeeded
+ *         -EIO if card is unusable/PCIe problems
+ *         -EBUSY if enqueuing failed
+ */
+int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req,
+			  unsigned int f_flags)
+{
+	struct ddcb *pddcb;
+	unsigned long flags;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u16 icrc;
+
+ retry:
+	if (cd->card_state != GENWQE_CARD_USED) {
+		printk_ratelimited(KERN_ERR
+			"%s %s: [%s] Card is unusable/PCIe problem Req#%d\n",
+			GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+			__func__, req->num);
+		return -EIO;
+	}
+
+	queue = req->queue = &cd->queue;
+
+	/* FIXME circumvention to improve performance when no irq is
+	 * there.
+	 */
+	if (GENWQE_POLLING_ENABLED)
+		genwqe_check_ddcb_queue(cd, queue);
+
+	/*
+	 * It must be ensured to process all DDCBs in successive
+	 * order. Use a lock here in order to prevent nested DDCB
+	 * enqueuing.
+	 */
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	pddcb = get_next_ddcb(cd, queue, &req->num);	/* get ptr and num */
+	if (pddcb == NULL) {
+		int rc;
+
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+		if (f_flags & O_NONBLOCK) {
+			queue->return_on_busy++;
+			return -EBUSY;
+		}
+
+		queue->wait_on_busy++;
+		rc = wait_event_interruptible(queue->busy_waitq,
+					      queue_free_ddcbs(queue) != 0);
+		dev_dbg(&pci_dev->dev, "[%s] waiting for free DDCB: rc=%d\n",
+			__func__, rc);
+		if (rc == -ERESTARTSYS)
+			return rc;  /* interrupted by a signal */
+
+		goto retry;
+	}
+
+	if (queue->ddcb_req[req->num] != NULL) {
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+		dev_err(&pci_dev->dev,
+			"[%s] picked DDCB %d with req=%p still in use!!\n",
+			__func__, req->num, req);
+		return -EFAULT;
+	}
+	ddcb_requ_set_state(req, GENWQE_REQU_ENQUEUED);
+	queue->ddcb_req[req->num] = req;
+
+	pddcb->cmdopts_16 = cpu_to_be16(req->cmd.cmdopts);
+	pddcb->cmd = req->cmd.cmd;
+	pddcb->acfunc = req->cmd.acfunc;	/* functional unit */
+
+	/*
+	 * We know that we can get retc 0x104 with CRC error, do not
+	 * stop the queue in those cases for this command. XDIR = 1
+	 * does not work for old SLU versions.
+	 *
+	 * Last bitstream with the old XDIR behavior had SLU_ID
+	 * 0x34199.
+	 */
+	if ((cd->slu_unitcfg & 0xFFFF0ull) > 0x34199ull)
+		pddcb->xdir = 0x1;
+	else
+		pddcb->xdir = 0x0;
+
+
+	pddcb->psp = (((req->cmd.asiv_length / 8) << 4) |
+		      ((req->cmd.asv_length  / 8)));
+	pddcb->disp_ts_64 = cpu_to_be64(req->cmd.disp_ts);
+
+	/*
+	 * If copying the whole DDCB_ASIV_LENGTH is impacting
+	 * performance we need to change it to
+	 * req->cmd.asiv_length. But simulation benefits from some
+	 * non-architectured bits behind the architectured content.
+	 *
+	 * How much data is copied depends on the availability of the
+	 * ATS field, which was introduced late. If the ATS field is
+	 * supported ASIV is 8 bytes shorter than it used to be. Since
+	 * the ATS field is copied too, the code should do exactly
+	 * what it did before, but I wanted to make copying of the ATS
+	 * field very explicit.
+	 */
+	if (genwqe_get_slu_id(cd) <= 0x2) {
+		memcpy(&pddcb->__asiv[0],	/* destination */
+		       &req->cmd.__asiv[0],	/* source */
+		       DDCB_ASIV_LENGTH);	/* req->cmd.asiv_length */
+	} else {
+		pddcb->n.ats_64 = cpu_to_be64(req->cmd.ats);
+		memcpy(&pddcb->n.asiv[0],	/* destination */
+			&req->cmd.asiv[0],	/* source */
+			DDCB_ASIV_LENGTH_ATS);	/* req->cmd.asiv_length */
+	}
+
+	pddcb->icrc_hsi_shi_32 = cpu_to_be32(0x00000000); /* for crc */
+
+	/*
+	 * Calculate CRC_16 for corresponding range PSP(7:4). Include
+	 * empty 4 bytes prior to the data.
+	 */
+	icrc = genwqe_crc16((const u8 *)pddcb,
+			   ICRC_LENGTH(req->cmd.asiv_length), 0xffff);
+	pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16);
+
+	/* enable DDCB completion irq */
+	if (!GENWQE_POLLING_ENABLED)
+		pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32;
+
+	dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num);
+	genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		/* use the kernel copy of debug data. copying back to
+		   user buffer happens later */
+
+		genwqe_init_debug_data(cd, &req->debug_data);
+		memcpy(&req->debug_data.ddcb_before, pddcb,
+		       sizeof(req->debug_data.ddcb_before));
+	}
+
+	enqueue_ddcb(cd, queue, pddcb, req->num);
+	queue->ddcbs_in_flight++;
+
+	if (queue->ddcbs_in_flight > queue->ddcbs_max_in_flight)
+		queue->ddcbs_max_in_flight = queue->ddcbs_in_flight;
+
+	ddcb_requ_set_state(req, GENWQE_REQU_TAPPED);
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	wake_up_interruptible(&cd->queue_waitq);
+
+	return 0;
+}
+
+/**
+ * __genwqe_execute_raw_ddcb() - Setup and execute DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @cmd:        user provided DDCB command
+ * @f_flags:    file mode: blocking, non-blocking
+ */
+int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
+			      struct genwqe_ddcb_cmd *cmd,
+			      unsigned int f_flags)
+{
+	int rc = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+	if (cmd->asiv_length > DDCB_ASIV_LENGTH) {
+		dev_err(&pci_dev->dev, "[%s] err: wrong asiv_length of %d\n",
+			__func__, cmd->asiv_length);
+		return -EINVAL;
+	}
+	if (cmd->asv_length > DDCB_ASV_LENGTH) {
+		dev_err(&pci_dev->dev, "[%s] err: wrong asv_length of %d\n",
+			__func__, cmd->asiv_length);
+		return -EINVAL;
+	}
+	rc = __genwqe_enqueue_ddcb(cd, req, f_flags);
+	if (rc != 0)
+		return rc;
+
+	rc = __genwqe_wait_ddcb(cd, req);
+	if (rc < 0)		/* error or signal interrupt */
+		goto err_exit;
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		if (copy_to_user((struct genwqe_debug_data __user *)
+				 (unsigned long)cmd->ddata_addr,
+				 &req->debug_data,
+				 sizeof(struct genwqe_debug_data)))
+			return -EFAULT;
+	}
+
+	/*
+	 * Higher values than 0x102 indicate completion with faults,
+	 * lower values than 0x102 indicate processing faults. Note
+	 * that DDCB might have been purged. E.g. Cntl+C.
+	 */
+	if (cmd->retc != DDCB_RETC_COMPLETE) {
+		/* This might happen e.g. flash read, and needs to be
+		   handled by the upper layer code. */
+		rc = -EBADMSG;	/* not processed/error retc */
+	}
+
+	return rc;
+
+ err_exit:
+	__genwqe_purge_ddcb(cd, req);
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		if (copy_to_user((struct genwqe_debug_data __user *)
+				 (unsigned long)cmd->ddata_addr,
+				 &req->debug_data,
+				 sizeof(struct genwqe_debug_data)))
+			return -EFAULT;
+	}
+	return rc;
+}
+
+/**
+ * genwqe_next_ddcb_ready() - Figure out if the next DDCB is already finished
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * We use this as condition for our wait-queue code.
+ */
+static int genwqe_next_ddcb_ready(struct genwqe_dev *cd)
+{
+	unsigned long flags;
+	struct ddcb *pddcb;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	if (queue_empty(queue)) { /* empty queue */
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 0;
+	}
+
+	pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
+	if (pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) { /* ddcb ready */
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 1;
+	}
+
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	return 0;
+}
+
+/**
+ * genwqe_ddcbs_in_flight() - Check how many DDCBs are in flight
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * Keep track on the number of DDCBs which ware currently in the
+ * queue. This is needed for statistics as well as condition if we want
+ * to wait or better do polling in case of no interrupts available.
+ */
+int genwqe_ddcbs_in_flight(struct genwqe_dev *cd)
+{
+	unsigned long flags;
+	int ddcbs_in_flight = 0;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+	ddcbs_in_flight += queue->ddcbs_in_flight;
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+	return ddcbs_in_flight;
+}
+
+static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	int rc, i;
+	struct ddcb *pddcb;
+	u64 val64;
+	unsigned int queue_size;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (GENWQE_DDCB_MAX < 2)
+		return -EINVAL;
+
+	queue_size = roundup(GENWQE_DDCB_MAX * sizeof(struct ddcb), PAGE_SIZE);
+
+	queue->ddcbs_in_flight = 0;  /* statistics */
+	queue->ddcbs_max_in_flight = 0;
+	queue->ddcbs_completed = 0;
+	queue->return_on_busy = 0;
+	queue->wait_on_busy = 0;
+
+	queue->ddcb_seq	  = 0x100; /* start sequence number */
+	queue->ddcb_max	  = GENWQE_DDCB_MAX;
+	queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size,
+						&queue->ddcb_daddr);
+	if (queue->ddcb_vaddr == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] **err: could not allocate DDCB **\n", __func__);
+		return -ENOMEM;
+	}
+	queue->ddcb_req = kcalloc(queue->ddcb_max, sizeof(struct ddcb_requ *),
+				  GFP_KERNEL);
+	if (!queue->ddcb_req) {
+		rc = -ENOMEM;
+		goto free_ddcbs;
+	}
+
+	queue->ddcb_waitqs = kcalloc(queue->ddcb_max,
+				     sizeof(wait_queue_head_t),
+				     GFP_KERNEL);
+	if (!queue->ddcb_waitqs) {
+		rc = -ENOMEM;
+		goto free_requs;
+	}
+
+	for (i = 0; i < queue->ddcb_max; i++) {
+		pddcb = &queue->ddcb_vaddr[i];		     /* DDCBs */
+		pddcb->icrc_hsi_shi_32 = DDCB_COMPLETED_BE32;
+		pddcb->retc_16 = cpu_to_be16(0xfff);
+
+		queue->ddcb_req[i] = NULL;		     /* requests */
+		init_waitqueue_head(&queue->ddcb_waitqs[i]); /* waitqueues */
+	}
+
+	queue->ddcb_act  = 0;
+	queue->ddcb_next = 0;	/* queue is empty */
+
+	spin_lock_init(&queue->ddcb_lock);
+	init_waitqueue_head(&queue->busy_waitq);
+
+	val64 = ((u64)(queue->ddcb_max - 1) <<  8); /* lastptr */
+	__genwqe_writeq(cd, queue->IO_QUEUE_CONFIG,  0x07);  /* iCRC/vCRC */
+	__genwqe_writeq(cd, queue->IO_QUEUE_SEGMENT, queue->ddcb_daddr);
+	__genwqe_writeq(cd, queue->IO_QUEUE_INITSQN, queue->ddcb_seq);
+	__genwqe_writeq(cd, queue->IO_QUEUE_WRAP,    val64);
+	return 0;
+
+ free_requs:
+	kfree(queue->ddcb_req);
+	queue->ddcb_req = NULL;
+ free_ddcbs:
+	__genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
+				queue->ddcb_daddr);
+	queue->ddcb_vaddr = NULL;
+	queue->ddcb_daddr = 0ull;
+	return rc;
+
+}
+
+static int ddcb_queue_initialized(struct ddcb_queue *queue)
+{
+	return queue->ddcb_vaddr != NULL;
+}
+
+static void free_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	unsigned int queue_size;
+
+	queue_size = roundup(queue->ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
+
+	kfree(queue->ddcb_req);
+	queue->ddcb_req = NULL;
+
+	if (queue->ddcb_vaddr) {
+		__genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
+					queue->ddcb_daddr);
+		queue->ddcb_vaddr = NULL;
+		queue->ddcb_daddr = 0ull;
+	}
+}
+
+static irqreturn_t genwqe_pf_isr(int irq, void *dev_id)
+{
+	u64 gfir;
+	struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/*
+	 * In case of fatal FIR error the queue is stopped, such that
+	 * we can safely check it without risking anything.
+	 */
+	cd->irqs_processed++;
+	wake_up_interruptible(&cd->queue_waitq);
+
+	/*
+	 * Checking for errors before kicking the queue might be
+	 * safer, but slower for the good-case ... See above.
+	 */
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if (((gfir & GFIR_ERR_TRIGGER) != 0x0) &&
+	    !pci_channel_offline(pci_dev)) {
+
+		if (cd->use_platform_recovery) {
+			/*
+			 * Since we use raw accessors, EEH errors won't be
+			 * detected by the platform until we do a non-raw
+			 * MMIO or config space read
+			 */
+			readq(cd->mmio + IO_SLC_CFGREG_GFIR);
+
+			/* Don't do anything if the PCI channel is frozen */
+			if (pci_channel_offline(pci_dev))
+				goto exit;
+		}
+
+		wake_up_interruptible(&cd->health_waitq);
+
+		/*
+		 * By default GFIRs causes recovery actions. This
+		 * count is just for debug when recovery is masked.
+		 */
+		dev_err_ratelimited(&pci_dev->dev,
+				    "[%s] GFIR=%016llx\n",
+				    __func__, gfir);
+	}
+
+ exit:
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t genwqe_vf_isr(int irq, void *dev_id)
+{
+	struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
+
+	cd->irqs_processed++;
+	wake_up_interruptible(&cd->queue_waitq);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * genwqe_card_thread() - Work thread for the DDCB queue
+ * @data:         pointer to genwqe device descriptor
+ *
+ * The idea is to check if there are DDCBs in processing. If there are
+ * some finished DDCBs, we process them and wakeup the
+ * requestors. Otherwise we give other processes time using
+ * cond_resched().
+ */
+static int genwqe_card_thread(void *data)
+{
+	int should_stop = 0;
+	struct genwqe_dev *cd = (struct genwqe_dev *)data;
+
+	while (!kthread_should_stop()) {
+
+		genwqe_check_ddcb_queue(cd, &cd->queue);
+
+		if (GENWQE_POLLING_ENABLED) {
+			wait_event_interruptible_timeout(
+				cd->queue_waitq,
+				genwqe_ddcbs_in_flight(cd) ||
+				(should_stop = kthread_should_stop()), 1);
+		} else {
+			wait_event_interruptible_timeout(
+				cd->queue_waitq,
+				genwqe_next_ddcb_ready(cd) ||
+				(should_stop = kthread_should_stop()), HZ);
+		}
+		if (should_stop)
+			break;
+
+		/*
+		 * Avoid soft lockups on heavy loads; we do not want
+		 * to disable our interrupts.
+		 */
+		cond_resched();
+	}
+	return 0;
+}
+
+/**
+ * genwqe_setup_service_layer() - Setup DDCB queue
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * Allocate DDCBs. Configure Service Layer Controller (SLC).
+ *
+ * Return: 0 success
+ */
+int genwqe_setup_service_layer(struct genwqe_dev *cd)
+{
+	int rc;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (genwqe_is_privileged(cd)) {
+		rc = genwqe_card_reset(cd);
+		if (rc < 0) {
+			dev_err(&pci_dev->dev,
+				"[%s] err: reset failed.\n", __func__);
+			return rc;
+		}
+		genwqe_read_softreset(cd);
+	}
+
+	queue = &cd->queue;
+	queue->IO_QUEUE_CONFIG  = IO_SLC_QUEUE_CONFIG;
+	queue->IO_QUEUE_STATUS  = IO_SLC_QUEUE_STATUS;
+	queue->IO_QUEUE_SEGMENT = IO_SLC_QUEUE_SEGMENT;
+	queue->IO_QUEUE_INITSQN = IO_SLC_QUEUE_INITSQN;
+	queue->IO_QUEUE_OFFSET  = IO_SLC_QUEUE_OFFSET;
+	queue->IO_QUEUE_WRAP    = IO_SLC_QUEUE_WRAP;
+	queue->IO_QUEUE_WTIME   = IO_SLC_QUEUE_WTIME;
+	queue->IO_QUEUE_ERRCNTS = IO_SLC_QUEUE_ERRCNTS;
+	queue->IO_QUEUE_LRW     = IO_SLC_QUEUE_LRW;
+
+	rc = setup_ddcb_queue(cd, queue);
+	if (rc != 0) {
+		rc = -ENODEV;
+		goto err_out;
+	}
+
+	init_waitqueue_head(&cd->queue_waitq);
+	cd->card_thread = kthread_run(genwqe_card_thread, cd,
+				      GENWQE_DEVNAME "%d_thread",
+				      cd->card_idx);
+	if (IS_ERR(cd->card_thread)) {
+		rc = PTR_ERR(cd->card_thread);
+		cd->card_thread = NULL;
+		goto stop_free_queue;
+	}
+
+	rc = genwqe_set_interrupt_capability(cd, GENWQE_MSI_IRQS);
+	if (rc)
+		goto stop_kthread;
+
+	/*
+	 * We must have all wait-queues initialized when we enable the
+	 * interrupts. Otherwise we might crash if we get an early
+	 * irq.
+	 */
+	init_waitqueue_head(&cd->health_waitq);
+
+	if (genwqe_is_privileged(cd)) {
+		rc = request_irq(pci_dev->irq, genwqe_pf_isr, IRQF_SHARED,
+				 GENWQE_DEVNAME, cd);
+	} else {
+		rc = request_irq(pci_dev->irq, genwqe_vf_isr, IRQF_SHARED,
+				 GENWQE_DEVNAME, cd);
+	}
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "irq %d not free.\n", pci_dev->irq);
+		goto stop_irq_cap;
+	}
+
+	cd->card_state = GENWQE_CARD_USED;
+	return 0;
+
+ stop_irq_cap:
+	genwqe_reset_interrupt_capability(cd);
+ stop_kthread:
+	kthread_stop(cd->card_thread);
+	cd->card_thread = NULL;
+ stop_free_queue:
+	free_ddcb_queue(cd, queue);
+ err_out:
+	return rc;
+}
+
+/**
+ * queue_wake_up_all() - Handles fatal error case
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * The PCI device got unusable and we have to stop all pending
+ * requests as fast as we can. The code after this must purge the
+ * DDCBs in question and ensure that all mappings are freed.
+ */
+static int queue_wake_up_all(struct genwqe_dev *cd)
+{
+	unsigned int i;
+	unsigned long flags;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	for (i = 0; i < queue->ddcb_max; i++)
+		wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
+
+	wake_up_interruptible(&queue->busy_waitq);
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+	return 0;
+}
+
+/**
+ * genwqe_finish_queue() - Remove any genwqe devices and user-interfaces
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * Relies on the pre-condition that there are no users of the card
+ * device anymore e.g. with open file-descriptors.
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_finish_queue(struct genwqe_dev *cd)
+{
+	int i, rc = 0, in_flight;
+	int waitmax = GENWQE_DDCB_SOFTWARE_TIMEOUT;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct ddcb_queue *queue = &cd->queue;
+
+	if (!ddcb_queue_initialized(queue))
+		return 0;
+
+	/* Do not wipe out the error state. */
+	if (cd->card_state == GENWQE_CARD_USED)
+		cd->card_state = GENWQE_CARD_UNUSED;
+
+	/* Wake up all requests in the DDCB queue such that they
+	   should be removed nicely. */
+	queue_wake_up_all(cd);
+
+	/* We must wait to get rid of the DDCBs in flight */
+	for (i = 0; i < waitmax; i++) {
+		in_flight = genwqe_ddcbs_in_flight(cd);
+
+		if (in_flight == 0)
+			break;
+
+		dev_dbg(&pci_dev->dev,
+			"  DEBUG [%d/%d] waiting for queue to get empty: %d requests!\n",
+			i, waitmax, in_flight);
+
+		/*
+		 * Severe severe error situation: The card itself has
+		 * 16 DDCB queues, each queue has e.g. 32 entries,
+		 * each DDBC has a hardware timeout of currently 250
+		 * msec but the PFs have a hardware timeout of 8 sec
+		 * ... so I take something large.
+		 */
+		msleep(1000);
+	}
+	if (i == waitmax) {
+		dev_err(&pci_dev->dev, "  [%s] err: queue is not empty!!\n",
+			__func__);
+		rc = -EIO;
+	}
+	return rc;
+}
+
+/**
+ * genwqe_release_service_layer() - Shutdown DDCB queue
+ * @cd:       genwqe device descriptor
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_release_service_layer(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!ddcb_queue_initialized(&cd->queue))
+		return 1;
+
+	free_irq(pci_dev->irq, cd);
+	genwqe_reset_interrupt_capability(cd);
+
+	if (cd->card_thread != NULL) {
+		kthread_stop(cd->card_thread);
+		cd->card_thread = NULL;
+	}
+
+	free_ddcb_queue(cd, &cd->queue);
+	return 0;
+}
diff --git a/drivers/misc/genwqe/card_ddcb.h b/drivers/misc/genwqe/card_ddcb.h
new file mode 100644
index 0000000000..a47ff49aee
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __CARD_DDCB_H__
+#define __CARD_DDCB_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#include "genwqe_driver.h"
+#include "card_base.h"
+
+/**
+ * struct ddcb - Device Driver Control Block DDCB
+ * @hsi:        Hardware software interlock
+ * @shi:        Software hardware interlock. Hsi and shi are used to interlock
+ *              software and hardware activities. We are using a compare and
+ *              swap operation to ensure that there are no races when
+ *              activating new DDCBs on the queue, or when we need to
+ *              purge a DDCB from a running queue.
+ * @acfunc:     Accelerator function addresses a unit within the chip
+ * @cmd:        Command to work on
+ * @cmdopts_16: Options for the command
+ * @asiv:       Input data
+ * @asv:        Output data
+ *
+ * The DDCB data format is big endian. Multiple consequtive DDBCs form
+ * a DDCB queue.
+ */
+#define ASIV_LENGTH		104 /* Old specification without ATS field */
+#define ASIV_LENGTH_ATS		96  /* New specification with ATS field */
+#define ASV_LENGTH		64
+
+struct ddcb {
+	union {
+		__be32 icrc_hsi_shi_32;	/* iCRC, Hardware/SW interlock */
+		struct {
+			__be16	icrc_16;
+			u8	hsi;
+			u8	shi;
+		};
+	};
+	u8  pre;		/* Preamble */
+	u8  xdir;		/* Execution Directives */
+	__be16 seqnum_16;	/* Sequence Number */
+
+	u8  acfunc;		/* Accelerator Function.. */
+	u8  cmd;		/* Command. */
+	__be16 cmdopts_16;	/* Command Options */
+	u8  sur;		/* Status Update Rate */
+	u8  psp;		/* Protection Section Pointer */
+	__be16 rsvd_0e_16;	/* Reserved invariant */
+
+	__be64 fwiv_64;		/* Firmware Invariant. */
+
+	union {
+		struct {
+			__be64 ats_64;  /* Address Translation Spec */
+			u8     asiv[ASIV_LENGTH_ATS]; /* New ASIV */
+		} n;
+		u8  __asiv[ASIV_LENGTH];	/* obsolete */
+	};
+	u8     asv[ASV_LENGTH];	/* Appl Spec Variant */
+
+	__be16 rsvd_c0_16;	/* Reserved Variant */
+	__be16 vcrc_16;		/* Variant CRC */
+	__be32 rsvd_32;		/* Reserved unprotected */
+
+	__be64 deque_ts_64;	/* Deque Time Stamp. */
+
+	__be16 retc_16;		/* Return Code */
+	__be16 attn_16;		/* Attention/Extended Error Codes */
+	__be32 progress_32;	/* Progress indicator. */
+
+	__be64 cmplt_ts_64;	/* Completion Time Stamp. */
+
+	/* The following layout matches the new service layer format */
+	__be32 ibdc_32;		/* Inbound Data Count  (* 256) */
+	__be32 obdc_32;		/* Outbound Data Count (* 256) */
+
+	__be64 rsvd_SLH_64;	/* Reserved for hardware */
+	union {			/* private data for driver */
+		u8	priv[8];
+		__be64	priv_64;
+	};
+	__be64 disp_ts_64;	/* Dispatch TimeStamp */
+} __attribute__((__packed__));
+
+/* CRC polynomials for DDCB */
+#define CRC16_POLYNOMIAL	0x1021
+
+/*
+ * SHI: Software to Hardware Interlock
+ *   This 1 byte field is written by software to interlock the
+ *   movement of one queue entry to another with the hardware in the
+ *   chip.
+ */
+#define DDCB_SHI_INTR		0x04 /* Bit 2 */
+#define DDCB_SHI_PURGE		0x02 /* Bit 1 */
+#define DDCB_SHI_NEXT		0x01 /* Bit 0 */
+
+/*
+ * HSI: Hardware to Software interlock
+ * This 1 byte field is written by hardware to interlock the movement
+ * of one queue entry to another with the software in the chip.
+ */
+#define DDCB_HSI_COMPLETED	0x40 /* Bit 6 */
+#define DDCB_HSI_FETCHED	0x04 /* Bit 2 */
+
+/*
+ * Accessing HSI/SHI is done 32-bit wide
+ *   Normally 16-bit access would work too, but on some platforms the
+ *   16 compare and swap operation is not supported. Therefore
+ *   switching to 32-bit such that those platforms will work too.
+ *
+ *                                         iCRC HSI/SHI
+ */
+#define DDCB_INTR_BE32		cpu_to_be32(0x00000004)
+#define DDCB_PURGE_BE32		cpu_to_be32(0x00000002)
+#define DDCB_NEXT_BE32		cpu_to_be32(0x00000001)
+#define DDCB_COMPLETED_BE32	cpu_to_be32(0x00004000)
+#define DDCB_FETCHED_BE32	cpu_to_be32(0x00000400)
+
+/* Definitions of DDCB presets */
+#define DDCB_PRESET_PRE		0x80
+#define ICRC_LENGTH(n)		((n) + 8 + 8 + 8)  /* used ASIV + hdr fields */
+#define VCRC_LENGTH(n)		((n))		   /* used ASV */
+
+/*
+ * Genwqe Scatter Gather list
+ *   Each element has up to 8 entries.
+ *   The chaining element is element 0 cause of prefetching needs.
+ */
+
+/*
+ * 0b0110 Chained descriptor. The descriptor is describing the next
+ * descriptor list.
+ */
+#define SG_CHAINED		(0x6)
+
+/*
+ * 0b0010 First entry of a descriptor list. Start from a Buffer-Empty
+ * condition.
+ */
+#define SG_DATA			(0x2)
+
+/*
+ * 0b0000 Early terminator. This is the last entry on the list
+ * irregardless of the length indicated.
+ */
+#define SG_END_LIST		(0x0)
+
+/**
+ * struct sglist - Scatter gather list
+ * @target_addr:       Either a dma addr of memory to work on or a
+ *                     dma addr or a subsequent sglist block.
+ * @len:               Length of the data block.
+ * @flags:             See above.
+ *
+ * Depending on the command the GenWQE card can use a scatter gather
+ * list to describe the memory it works on. Always 8 sg_entry's form
+ * a block.
+ */
+struct sg_entry {
+	__be64 target_addr;
+	__be32 len;
+	__be32 flags;
+};
+
+#endif /* __CARD_DDCB_H__ */
diff --git a/drivers/misc/genwqe/card_debugfs.c b/drivers/misc/genwqe/card_debugfs.c
new file mode 100644
index 0000000000..491fb4482d
--- /dev/null
+++ b/drivers/misc/genwqe/card_debugfs.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ */
+
+/*
+ * Debugfs interfaces for the GenWQE card. Help to debug potential
+ * problems. Dump internal chip state for debugging and failure
+ * determination.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static void dbg_uidn_show(struct seq_file *s, struct genwqe_reg *regs,
+			  int entries)
+{
+	unsigned int i;
+	u32 v_hi, v_lo;
+
+	for (i = 0; i < entries; i++) {
+		v_hi = (regs[i].val >> 32) & 0xffffffff;
+		v_lo = (regs[i].val)       & 0xffffffff;
+
+		seq_printf(s, "  0x%08x 0x%08x 0x%08x 0x%08x EXT_ERR_REC\n",
+			   regs[i].addr, regs[i].idx, v_hi, v_lo);
+	}
+}
+
+static int curr_dbg_uidn_show(struct seq_file *s, void *unused, int uid)
+{
+	struct genwqe_dev *cd = s->private;
+	int entries;
+	struct genwqe_reg *regs;
+
+	entries = genwqe_ffdc_buff_size(cd, uid);
+	if (entries < 0)
+		return -EINVAL;
+
+	if (entries == 0)
+		return 0;
+
+	regs = kcalloc(entries, sizeof(*regs), GFP_KERNEL);
+	if (regs == NULL)
+		return -ENOMEM;
+
+	genwqe_stop_traps(cd); /* halt the traps while dumping data */
+	genwqe_ffdc_buff_read(cd, uid, regs, entries);
+	genwqe_start_traps(cd);
+
+	dbg_uidn_show(s, regs, entries);
+	kfree(regs);
+	return 0;
+}
+
+static int curr_dbg_uid0_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 0);
+}
+
+DEFINE_SHOW_ATTRIBUTE(curr_dbg_uid0);
+
+static int curr_dbg_uid1_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 1);
+}
+
+DEFINE_SHOW_ATTRIBUTE(curr_dbg_uid1);
+
+static int curr_dbg_uid2_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 2);
+}
+
+DEFINE_SHOW_ATTRIBUTE(curr_dbg_uid2);
+
+static int prev_dbg_uidn_show(struct seq_file *s, void *unused, int uid)
+{
+	struct genwqe_dev *cd = s->private;
+
+	dbg_uidn_show(s, cd->ffdc[uid].regs,  cd->ffdc[uid].entries);
+	return 0;
+}
+
+static int prev_dbg_uid0_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 0);
+}
+
+DEFINE_SHOW_ATTRIBUTE(prev_dbg_uid0);
+
+static int prev_dbg_uid1_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 1);
+}
+
+DEFINE_SHOW_ATTRIBUTE(prev_dbg_uid1);
+
+static int prev_dbg_uid2_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 2);
+}
+
+DEFINE_SHOW_ATTRIBUTE(prev_dbg_uid2);
+
+static int curr_regs_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct genwqe_reg *regs;
+
+	regs = kcalloc(GENWQE_FFDC_REGS, sizeof(*regs), GFP_KERNEL);
+	if (regs == NULL)
+		return -ENOMEM;
+
+	genwqe_stop_traps(cd);
+	genwqe_read_ffdc_regs(cd, regs, GENWQE_FFDC_REGS, 1);
+	genwqe_start_traps(cd);
+
+	for (i = 0; i < GENWQE_FFDC_REGS; i++) {
+		if (regs[i].addr == 0xffffffff)
+			break;  /* invalid entries */
+
+		if (regs[i].val == 0x0ull)
+			continue;  /* do not print 0x0 FIRs */
+
+		seq_printf(s, "  0x%08x 0x%016llx\n",
+			   regs[i].addr, regs[i].val);
+	}
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(curr_regs);
+
+static int prev_regs_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct genwqe_reg *regs = cd->ffdc[GENWQE_DBG_REGS].regs;
+
+	if (regs == NULL)
+		return -EINVAL;
+
+	for (i = 0; i < GENWQE_FFDC_REGS; i++) {
+		if (regs[i].addr == 0xffffffff)
+			break;  /* invalid entries */
+
+		if (regs[i].val == 0x0ull)
+			continue;  /* do not print 0x0 FIRs */
+
+		seq_printf(s, "  0x%08x 0x%016llx\n",
+			   regs[i].addr, regs[i].val);
+	}
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(prev_regs);
+
+static int jtimer_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int vf_num;
+	u64 jtimer;
+
+	jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 0);
+	seq_printf(s, "  PF   0x%016llx %d msec\n", jtimer,
+		   GENWQE_PF_JOBTIMEOUT_MSEC);
+
+	for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) {
+		jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+					  vf_num + 1);
+		seq_printf(s, "  VF%-2d 0x%016llx %d msec\n", vf_num, jtimer,
+			   cd->vf_jobtimeout_msec[vf_num]);
+	}
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(jtimer);
+
+static int queue_working_time_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int vf_num;
+	u64 t;
+
+	t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, 0);
+	seq_printf(s, "  PF   0x%016llx\n", t);
+
+	for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) {
+		t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, vf_num + 1);
+		seq_printf(s, "  VF%-2d 0x%016llx\n", vf_num, t);
+	}
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(queue_working_time);
+
+static int ddcb_info_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct ddcb_queue *queue;
+	struct ddcb *pddcb;
+
+	queue = &cd->queue;
+	seq_puts(s, "DDCB QUEUE:\n");
+	seq_printf(s, "  ddcb_max:            %d\n"
+		   "  ddcb_daddr:          %016llx - %016llx\n"
+		   "  ddcb_vaddr:          %p\n"
+		   "  ddcbs_in_flight:     %u\n"
+		   "  ddcbs_max_in_flight: %u\n"
+		   "  ddcbs_completed:     %u\n"
+		   "  return_on_busy:      %u\n"
+		   "  wait_on_busy:        %u\n"
+		   "  irqs_processed:      %u\n",
+		   queue->ddcb_max, (long long)queue->ddcb_daddr,
+		   (long long)queue->ddcb_daddr +
+		   (queue->ddcb_max * DDCB_LENGTH),
+		   queue->ddcb_vaddr, queue->ddcbs_in_flight,
+		   queue->ddcbs_max_in_flight, queue->ddcbs_completed,
+		   queue->return_on_busy, queue->wait_on_busy,
+		   cd->irqs_processed);
+
+	/* Hardware State */
+	seq_printf(s, "  0x%08x 0x%016llx IO_QUEUE_CONFIG\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_STATUS\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_SEGMENT\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_INITSQN\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_WRAP\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_OFFSET\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_WTIME\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_ERRCNTS\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_LRW\n",
+		   queue->IO_QUEUE_CONFIG,
+		   __genwqe_readq(cd, queue->IO_QUEUE_CONFIG),
+		   queue->IO_QUEUE_STATUS,
+		   __genwqe_readq(cd, queue->IO_QUEUE_STATUS),
+		   queue->IO_QUEUE_SEGMENT,
+		   __genwqe_readq(cd, queue->IO_QUEUE_SEGMENT),
+		   queue->IO_QUEUE_INITSQN,
+		   __genwqe_readq(cd, queue->IO_QUEUE_INITSQN),
+		   queue->IO_QUEUE_WRAP,
+		   __genwqe_readq(cd, queue->IO_QUEUE_WRAP),
+		   queue->IO_QUEUE_OFFSET,
+		   __genwqe_readq(cd, queue->IO_QUEUE_OFFSET),
+		   queue->IO_QUEUE_WTIME,
+		   __genwqe_readq(cd, queue->IO_QUEUE_WTIME),
+		   queue->IO_QUEUE_ERRCNTS,
+		   __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS),
+		   queue->IO_QUEUE_LRW,
+		   __genwqe_readq(cd, queue->IO_QUEUE_LRW));
+
+	seq_printf(s, "DDCB list (ddcb_act=%d/ddcb_next=%d):\n",
+		   queue->ddcb_act, queue->ddcb_next);
+
+	pddcb = queue->ddcb_vaddr;
+	for (i = 0; i < queue->ddcb_max; i++) {
+		seq_printf(s, "  %-3d: RETC=%03x SEQ=%04x HSI/SHI=%02x/%02x ",
+			   i, be16_to_cpu(pddcb->retc_16),
+			   be16_to_cpu(pddcb->seqnum_16),
+			   pddcb->hsi, pddcb->shi);
+		seq_printf(s, "PRIV=%06llx CMD=%02x\n",
+			   be64_to_cpu(pddcb->priv_64), pddcb->cmd);
+		pddcb++;
+	}
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(ddcb_info);
+
+static int info_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	u64 app_id, slu_id, bitstream = -1;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	app_id = __genwqe_readq(cd, IO_APP_UNITCFG);
+
+	if (genwqe_is_privileged(cd))
+		bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM);
+
+	seq_printf(s, "%s driver version: %s\n"
+		   "    Device Name/Type: %s %s CardIdx: %d\n"
+		   "    SLU/APP Config  : 0x%016llx/0x%016llx\n"
+		   "    Build Date      : %u/%x/%u\n"
+		   "    Base Clock      : %u MHz\n"
+		   "    Arch/SVN Release: %u/%llx\n"
+		   "    Bitstream       : %llx\n",
+		   GENWQE_DEVNAME, DRV_VERSION, dev_name(&pci_dev->dev),
+		   genwqe_is_privileged(cd) ?
+		   "Physical" : "Virtual or no SR-IOV",
+		   cd->card_idx, slu_id, app_id,
+		   (u16)((slu_id >> 12) & 0x0fLLU),	   /* month */
+		   (u16)((slu_id >>  4) & 0xffLLU),	   /* day */
+		   (u16)((slu_id >> 16) & 0x0fLLU) + 2010, /* year */
+		   genwqe_base_clock_frequency(cd),
+		   (u16)((slu_id >> 32) & 0xffLLU), slu_id >> 40,
+		   bitstream);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(info);
+
+void genwqe_init_debugfs(struct genwqe_dev *cd)
+{
+	struct dentry *root;
+	char card_name[64];
+	char name[64];
+	unsigned int i;
+
+	sprintf(card_name, "%s%d_card", GENWQE_DEVNAME, cd->card_idx);
+
+	root = debugfs_create_dir(card_name, cd->debugfs_genwqe);
+
+	/* non privileged interfaces are done here */
+	debugfs_create_file("ddcb_info", S_IRUGO, root, cd, &ddcb_info_fops);
+	debugfs_create_file("info", S_IRUGO, root, cd, &info_fops);
+	debugfs_create_x64("err_inject", 0666, root, &cd->err_inject);
+	debugfs_create_u32("ddcb_software_timeout", 0666, root,
+			   &cd->ddcb_software_timeout);
+	debugfs_create_u32("kill_timeout", 0666, root, &cd->kill_timeout);
+
+	/* privileged interfaces follow here */
+	if (!genwqe_is_privileged(cd)) {
+		cd->debugfs_root = root;
+		return;
+	}
+
+	debugfs_create_file("curr_regs", S_IRUGO, root, cd, &curr_regs_fops);
+	debugfs_create_file("curr_dbg_uid0", S_IRUGO, root, cd,
+			    &curr_dbg_uid0_fops);
+	debugfs_create_file("curr_dbg_uid1", S_IRUGO, root, cd,
+			    &curr_dbg_uid1_fops);
+	debugfs_create_file("curr_dbg_uid2", S_IRUGO, root, cd,
+			    &curr_dbg_uid2_fops);
+	debugfs_create_file("prev_regs", S_IRUGO, root, cd, &prev_regs_fops);
+	debugfs_create_file("prev_dbg_uid0", S_IRUGO, root, cd,
+			    &prev_dbg_uid0_fops);
+	debugfs_create_file("prev_dbg_uid1", S_IRUGO, root, cd,
+			    &prev_dbg_uid1_fops);
+	debugfs_create_file("prev_dbg_uid2", S_IRUGO, root, cd,
+			    &prev_dbg_uid2_fops);
+
+	for (i = 0; i <  GENWQE_MAX_VFS; i++) {
+		sprintf(name, "vf%u_jobtimeout_msec", i);
+		debugfs_create_u32(name, 0666, root,
+				   &cd->vf_jobtimeout_msec[i]);
+	}
+
+	debugfs_create_file("jobtimer", S_IRUGO, root, cd, &jtimer_fops);
+	debugfs_create_file("queue_working_time", S_IRUGO, root, cd,
+			    &queue_working_time_fops);
+	debugfs_create_u32("skip_recovery", 0666, root, &cd->skip_recovery);
+	debugfs_create_u32("use_platform_recovery", 0666, root,
+			   &cd->use_platform_recovery);
+
+	cd->debugfs_root = root;
+}
+
+void genqwe_exit_debugfs(struct genwqe_dev *cd)
+{
+	debugfs_remove_recursive(cd->debugfs_root);
+}
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
new file mode 100644
index 0000000000..55fc5b80e6
--- /dev/null
+++ b/drivers/misc/genwqe/card_dev.c
@@ -0,0 +1,1391 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ */
+
+/*
+ * Character device representation of the GenWQE device. This allows
+ * user-space applications to communicate with the card.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/sched/signal.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/atomic.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static int genwqe_open_files(struct genwqe_dev *cd)
+{
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	rc = list_empty(&cd->file_list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return !rc;
+}
+
+static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
+{
+	unsigned long flags;
+
+	cfile->opener = get_pid(task_tgid(current));
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_add(&cfile->list, &cd->file_list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+}
+
+static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_del(&cfile->list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	put_pid(cfile->opener);
+
+	return 0;
+}
+
+static void genwqe_add_pin(struct genwqe_file *cfile, struct dma_mapping *m)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+	list_add(&m->pin_list, &cfile->pin_list);
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+}
+
+static int genwqe_del_pin(struct genwqe_file *cfile, struct dma_mapping *m)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+	list_del(&m->pin_list);
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+
+	return 0;
+}
+
+/**
+ * genwqe_search_pin() - Search for the mapping for a userspace address
+ * @cfile:	Descriptor of opened file
+ * @u_addr:	User virtual address
+ * @size:	Size of buffer
+ * @virt_addr:	Virtual address to be updated
+ *
+ * Return: Pointer to the corresponding mapping	NULL if not found
+ */
+static struct dma_mapping *genwqe_search_pin(struct genwqe_file *cfile,
+					    unsigned long u_addr,
+					    unsigned int size,
+					    void **virt_addr)
+{
+	unsigned long flags;
+	struct dma_mapping *m;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+
+	list_for_each_entry(m, &cfile->pin_list, pin_list) {
+		if ((((u64)m->u_vaddr) <= (u_addr)) &&
+		    (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
+
+			if (virt_addr)
+				*virt_addr = m->k_vaddr +
+					(u_addr - (u64)m->u_vaddr);
+
+			spin_unlock_irqrestore(&cfile->pin_lock, flags);
+			return m;
+		}
+	}
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+	return NULL;
+}
+
+static void __genwqe_add_mapping(struct genwqe_file *cfile,
+			      struct dma_mapping *dma_map)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_add(&dma_map->card_list, &cfile->map_list);
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+}
+
+static void __genwqe_del_mapping(struct genwqe_file *cfile,
+			      struct dma_mapping *dma_map)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_del(&dma_map->card_list);
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+}
+
+
+/**
+ * __genwqe_search_mapping() - Search for the mapping for a userspace address
+ * @cfile:	descriptor of opened file
+ * @u_addr:	user virtual address
+ * @size:	size of buffer
+ * @dma_addr:	DMA address to be updated
+ * @virt_addr:	Virtual address to be updated
+ * Return: Pointer to the corresponding mapping	NULL if not found
+ */
+static struct dma_mapping *__genwqe_search_mapping(struct genwqe_file *cfile,
+						   unsigned long u_addr,
+						   unsigned int size,
+						   dma_addr_t *dma_addr,
+						   void **virt_addr)
+{
+	unsigned long flags;
+	struct dma_mapping *m;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_for_each_entry(m, &cfile->map_list, card_list) {
+
+		if ((((u64)m->u_vaddr) <= (u_addr)) &&
+		    (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
+
+			/* match found: current is as expected and
+			   addr is in range */
+			if (dma_addr)
+				*dma_addr = m->dma_addr +
+					(u_addr - (u64)m->u_vaddr);
+
+			if (virt_addr)
+				*virt_addr = m->k_vaddr +
+					(u_addr - (u64)m->u_vaddr);
+
+			spin_unlock_irqrestore(&cfile->map_lock, flags);
+			return m;
+		}
+	}
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+
+	dev_err(&pci_dev->dev,
+		"[%s] Entry not found: u_addr=%lx, size=%x\n",
+		__func__, u_addr, size);
+
+	return NULL;
+}
+
+static void genwqe_remove_mappings(struct genwqe_file *cfile)
+{
+	int i = 0;
+	struct list_head *node, *next;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+
+	list_for_each_safe(node, next, &cfile->map_list) {
+		dma_map = list_entry(node, struct dma_mapping, card_list);
+
+		list_del_init(&dma_map->card_list);
+
+		/*
+		 * This is really a bug, because those things should
+		 * have been already tidied up.
+		 *
+		 * GENWQE_MAPPING_RAW should have been removed via mmunmap().
+		 * GENWQE_MAPPING_SGL_TEMP should be removed by tidy up code.
+		 */
+		dev_err(&pci_dev->dev,
+			"[%s] %d. cleanup mapping: u_vaddr=%p u_kaddr=%016lx dma_addr=%lx\n",
+			__func__, i++, dma_map->u_vaddr,
+			(unsigned long)dma_map->k_vaddr,
+			(unsigned long)dma_map->dma_addr);
+
+		if (dma_map->type == GENWQE_MAPPING_RAW) {
+			/* we allocated this dynamically */
+			__genwqe_free_consistent(cd, dma_map->size,
+						dma_map->k_vaddr,
+						dma_map->dma_addr);
+			kfree(dma_map);
+		} else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) {
+			/* we use dma_map statically from the request */
+			genwqe_user_vunmap(cd, dma_map);
+		}
+	}
+}
+
+static void genwqe_remove_pinnings(struct genwqe_file *cfile)
+{
+	struct list_head *node, *next;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+
+	list_for_each_safe(node, next, &cfile->pin_list) {
+		dma_map = list_entry(node, struct dma_mapping, pin_list);
+
+		/*
+		 * This is not a bug, because a killed processed might
+		 * not call the unpin ioctl, which is supposed to free
+		 * the resources.
+		 *
+		 * Pinnings are dymically allocated and need to be
+		 * deleted.
+		 */
+		list_del_init(&dma_map->pin_list);
+		genwqe_user_vunmap(cd, dma_map);
+		kfree(dma_map);
+	}
+}
+
+/**
+ * genwqe_kill_fasync() - Send signal to all processes with open GenWQE files
+ * @cd: GenWQE device information
+ * @sig: Signal to send out
+ *
+ * E.g. genwqe_send_signal(cd, SIGIO);
+ */
+static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig)
+{
+	unsigned int files = 0;
+	unsigned long flags;
+	struct genwqe_file *cfile;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_for_each_entry(cfile, &cd->file_list, list) {
+		if (cfile->async_queue)
+			kill_fasync(&cfile->async_queue, sig, POLL_HUP);
+		files++;
+	}
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return files;
+}
+
+static int genwqe_terminate(struct genwqe_dev *cd)
+{
+	unsigned int files = 0;
+	unsigned long flags;
+	struct genwqe_file *cfile;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_for_each_entry(cfile, &cd->file_list, list) {
+		kill_pid(cfile->opener, SIGKILL, 1);
+		files++;
+	}
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return files;
+}
+
+/**
+ * genwqe_open() - file open
+ * @inode:      file system information
+ * @filp:	file handle
+ *
+ * This function is executed whenever an application calls
+ * open("/dev/genwqe",..).
+ *
+ * Return: 0 if successful or <0 if errors
+ */
+static int genwqe_open(struct inode *inode, struct file *filp)
+{
+	struct genwqe_dev *cd;
+	struct genwqe_file *cfile;
+
+	cfile = kzalloc(sizeof(*cfile), GFP_KERNEL);
+	if (cfile == NULL)
+		return -ENOMEM;
+
+	cd = container_of(inode->i_cdev, struct genwqe_dev, cdev_genwqe);
+	cfile->cd = cd;
+	cfile->filp = filp;
+	cfile->client = NULL;
+
+	spin_lock_init(&cfile->map_lock);  /* list of raw memory allocations */
+	INIT_LIST_HEAD(&cfile->map_list);
+
+	spin_lock_init(&cfile->pin_lock);  /* list of user pinned memory */
+	INIT_LIST_HEAD(&cfile->pin_list);
+
+	filp->private_data = cfile;
+
+	genwqe_add_file(cd, cfile);
+	return 0;
+}
+
+/**
+ * genwqe_fasync() - Setup process to receive SIGIO.
+ * @fd:        file descriptor
+ * @filp:      file handle
+ * @mode:      file mode
+ *
+ * Sending a signal is working as following:
+ *
+ * if (cdev->async_queue)
+ *         kill_fasync(&cdev->async_queue, SIGIO, POLL_IN);
+ *
+ * Some devices also implement asynchronous notification to indicate
+ * when the device can be written; in this case, of course,
+ * kill_fasync must be called with a mode of POLL_OUT.
+ */
+static int genwqe_fasync(int fd, struct file *filp, int mode)
+{
+	struct genwqe_file *cdev = (struct genwqe_file *)filp->private_data;
+
+	return fasync_helper(fd, filp, mode, &cdev->async_queue);
+}
+
+
+/**
+ * genwqe_release() - file close
+ * @inode:      file system information
+ * @filp:       file handle
+ *
+ * This function is executed whenever an application calls 'close(fd_genwqe)'
+ *
+ * Return: always 0
+ */
+static int genwqe_release(struct inode *inode, struct file *filp)
+{
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+
+	/* there must be no entries in these lists! */
+	genwqe_remove_mappings(cfile);
+	genwqe_remove_pinnings(cfile);
+
+	/* remove this filp from the asynchronously notified filp's */
+	genwqe_fasync(-1, filp, 0);
+
+	/*
+	 * For this to work we must not release cd when this cfile is
+	 * not yet released, otherwise the list entry is invalid,
+	 * because the list itself gets reinstantiated!
+	 */
+	genwqe_del_file(cd, cfile);
+	kfree(cfile);
+	return 0;
+}
+
+static void genwqe_vma_open(struct vm_area_struct *vma)
+{
+	/* nothing ... */
+}
+
+/**
+ * genwqe_vma_close() - Called each time when vma is unmapped
+ * @vma: VMA area to close
+ *
+ * Free memory which got allocated by GenWQE mmap().
+ */
+static void genwqe_vma_close(struct vm_area_struct *vma)
+{
+	unsigned long vsize = vma->vm_end - vma->vm_start;
+	struct inode *inode = file_inode(vma->vm_file);
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = container_of(inode->i_cdev, struct genwqe_dev,
+					    cdev_genwqe);
+	struct pci_dev *pci_dev = cd->pci_dev;
+	dma_addr_t d_addr = 0;
+	struct genwqe_file *cfile = vma->vm_private_data;
+
+	dma_map = __genwqe_search_mapping(cfile, vma->vm_start, vsize,
+					 &d_addr, NULL);
+	if (dma_map == NULL) {
+		dev_err(&pci_dev->dev,
+			"  [%s] err: mapping not found: v=%lx, p=%lx s=%lx\n",
+			__func__, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
+			vsize);
+		return;
+	}
+	__genwqe_del_mapping(cfile, dma_map);
+	__genwqe_free_consistent(cd, dma_map->size, dma_map->k_vaddr,
+				 dma_map->dma_addr);
+	kfree(dma_map);
+}
+
+static const struct vm_operations_struct genwqe_vma_ops = {
+	.open   = genwqe_vma_open,
+	.close  = genwqe_vma_close,
+};
+
+/**
+ * genwqe_mmap() - Provide contignous buffers to userspace
+ * @filp:	File pointer (unused)
+ * @vma:	VMA area to map
+ *
+ * We use mmap() to allocate contignous buffers used for DMA
+ * transfers. After the buffer is allocated we remap it to user-space
+ * and remember a reference to our dma_mapping data structure, where
+ * we store the associated DMA address and allocated size.
+ *
+ * When we receive a DDCB execution request with the ATS bits set to
+ * plain buffer, we lookup our dma_mapping list to find the
+ * corresponding DMA address for the associated user-space address.
+ */
+static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int rc;
+	unsigned long pfn, vsize = vma->vm_end - vma->vm_start;
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+	struct dma_mapping *dma_map;
+
+	if (vsize == 0)
+		return -EINVAL;
+
+	if (get_order(vsize) > MAX_ORDER)
+		return -ENOMEM;
+
+	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
+	if (dma_map == NULL)
+		return -ENOMEM;
+
+	genwqe_mapping_init(dma_map, GENWQE_MAPPING_RAW);
+	dma_map->u_vaddr = (void *)vma->vm_start;
+	dma_map->size = vsize;
+	dma_map->nr_pages = DIV_ROUND_UP(vsize, PAGE_SIZE);
+	dma_map->k_vaddr = __genwqe_alloc_consistent(cd, vsize,
+						     &dma_map->dma_addr);
+	if (dma_map->k_vaddr == NULL) {
+		rc = -ENOMEM;
+		goto free_dma_map;
+	}
+
+	if (capable(CAP_SYS_ADMIN) && (vsize > sizeof(dma_addr_t)))
+		*(dma_addr_t *)dma_map->k_vaddr = dma_map->dma_addr;
+
+	pfn = virt_to_phys(dma_map->k_vaddr) >> PAGE_SHIFT;
+	rc = remap_pfn_range(vma,
+			     vma->vm_start,
+			     pfn,
+			     vsize,
+			     vma->vm_page_prot);
+	if (rc != 0) {
+		rc = -EFAULT;
+		goto free_dma_mem;
+	}
+
+	vma->vm_private_data = cfile;
+	vma->vm_ops = &genwqe_vma_ops;
+	__genwqe_add_mapping(cfile, dma_map);
+
+	return 0;
+
+ free_dma_mem:
+	__genwqe_free_consistent(cd, dma_map->size,
+				dma_map->k_vaddr,
+				dma_map->dma_addr);
+ free_dma_map:
+	kfree(dma_map);
+	return rc;
+}
+
+#define	FLASH_BLOCK	0x40000	/* we use 256k blocks */
+
+/**
+ * do_flash_update() - Excute flash update (write image or CVPD)
+ * @cfile:	Descriptor of opened file
+ * @load:      details about image load
+ *
+ * Return: 0 if successful
+ */
+static int do_flash_update(struct genwqe_file *cfile,
+			   struct genwqe_bitstream *load)
+{
+	int rc = 0;
+	int blocks_to_flash;
+	dma_addr_t dma_addr;
+	u64 flash = 0;
+	size_t tocopy = 0;
+	u8 __user *buf;
+	u8 *xbuf;
+	u32 crc;
+	u8 cmdopts;
+	struct genwqe_dev *cd = cfile->cd;
+	struct file *filp = cfile->filp;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if ((load->size & 0x3) != 0)
+		return -EINVAL;
+
+	if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0)
+		return -EINVAL;
+
+	/* FIXME Bits have changed for new service layer! */
+	switch ((char)load->partition) {
+	case '0':
+		cmdopts = 0x14;
+		break;		/* download/erase_first/part_0 */
+	case '1':
+		cmdopts = 0x1C;
+		break;		/* download/erase_first/part_1 */
+	case 'v':
+		cmdopts = 0x0C;
+		break;		/* download/erase_first/vpd */
+	default:
+		return -EINVAL;
+	}
+
+	buf = (u8 __user *)load->data_addr;
+	xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
+	if (xbuf == NULL)
+		return -ENOMEM;
+
+	blocks_to_flash = load->size / FLASH_BLOCK;
+	while (load->size) {
+		struct genwqe_ddcb_cmd *req;
+
+		/*
+		 * We must be 4 byte aligned. Buffer must be 0 appened
+		 * to have defined values when calculating CRC.
+		 */
+		tocopy = min_t(size_t, load->size, FLASH_BLOCK);
+
+		rc = copy_from_user(xbuf, buf, tocopy);
+		if (rc) {
+			rc = -EFAULT;
+			goto free_buffer;
+		}
+		crc = genwqe_crc32(xbuf, tocopy, 0xffffffff);
+
+		dev_dbg(&pci_dev->dev,
+			"[%s] DMA: %lx CRC: %08x SZ: %ld %d\n",
+			__func__, (unsigned long)dma_addr, crc, tocopy,
+			blocks_to_flash);
+
+		/* prepare DDCB for SLU process */
+		req = ddcb_requ_alloc();
+		if (req == NULL) {
+			rc = -ENOMEM;
+			goto free_buffer;
+		}
+
+		req->cmd = SLCMD_MOVE_FLASH;
+		req->cmdopts = cmdopts;
+
+		/* prepare invariant values */
+		if (genwqe_get_slu_id(cd) <= 0x2) {
+			*(__be64 *)&req->__asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be64 *)&req->__asiv[8]  = cpu_to_be64(tocopy);
+			*(__be64 *)&req->__asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&req->__asiv[24] = cpu_to_be32(0);
+			req->__asiv[24]	       = load->uid;
+			*(__be32 *)&req->__asiv[28] = cpu_to_be32(crc);
+
+			/* for simulation only */
+			*(__be64 *)&req->__asiv[88] = cpu_to_be64(load->slu_id);
+			*(__be64 *)&req->__asiv[96] = cpu_to_be64(load->app_id);
+			req->asiv_length = 32; /* bytes included in crc calc */
+		} else {	/* setup DDCB for ATS architecture */
+			*(__be64 *)&req->asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be32 *)&req->asiv[8]  = cpu_to_be32(tocopy);
+			*(__be32 *)&req->asiv[12] = cpu_to_be32(0); /* resvd */
+			*(__be64 *)&req->asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&req->asiv[24] = cpu_to_be32(load->uid<<24);
+			*(__be32 *)&req->asiv[28] = cpu_to_be32(crc);
+
+			/* for simulation only */
+			*(__be64 *)&req->asiv[80] = cpu_to_be64(load->slu_id);
+			*(__be64 *)&req->asiv[88] = cpu_to_be64(load->app_id);
+
+			/* Rd only */
+			req->ats = 0x4ULL << 44;
+			req->asiv_length = 40; /* bytes included in crc calc */
+		}
+		req->asv_length  = 8;
+
+		/* For Genwqe5 we get back the calculated CRC */
+		*(u64 *)&req->asv[0] = 0ULL;			/* 0x80 */
+
+		rc = __genwqe_execute_raw_ddcb(cd, req, filp->f_flags);
+
+		load->retc = req->retc;
+		load->attn = req->attn;
+		load->progress = req->progress;
+
+		if (rc < 0) {
+			ddcb_requ_free(req);
+			goto free_buffer;
+		}
+
+		if (req->retc != DDCB_RETC_COMPLETE) {
+			rc = -EIO;
+			ddcb_requ_free(req);
+			goto free_buffer;
+		}
+
+		load->size  -= tocopy;
+		flash += tocopy;
+		buf += tocopy;
+		blocks_to_flash--;
+		ddcb_requ_free(req);
+	}
+
+ free_buffer:
+	__genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
+	return rc;
+}
+
+static int do_flash_read(struct genwqe_file *cfile,
+			 struct genwqe_bitstream *load)
+{
+	int rc, blocks_to_flash;
+	dma_addr_t dma_addr;
+	u64 flash = 0;
+	size_t tocopy = 0;
+	u8 __user *buf;
+	u8 *xbuf;
+	u8 cmdopts;
+	struct genwqe_dev *cd = cfile->cd;
+	struct file *filp = cfile->filp;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct genwqe_ddcb_cmd *cmd;
+
+	if ((load->size & 0x3) != 0)
+		return -EINVAL;
+
+	if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0)
+		return -EINVAL;
+
+	/* FIXME Bits have changed for new service layer! */
+	switch ((char)load->partition) {
+	case '0':
+		cmdopts = 0x12;
+		break;		/* upload/part_0 */
+	case '1':
+		cmdopts = 0x1A;
+		break;		/* upload/part_1 */
+	case 'v':
+		cmdopts = 0x0A;
+		break;		/* upload/vpd */
+	default:
+		return -EINVAL;
+	}
+
+	buf = (u8 __user *)load->data_addr;
+	xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
+	if (xbuf == NULL)
+		return -ENOMEM;
+
+	blocks_to_flash = load->size / FLASH_BLOCK;
+	while (load->size) {
+		/*
+		 * We must be 4 byte aligned. Buffer must be 0 appened
+		 * to have defined values when calculating CRC.
+		 */
+		tocopy = min_t(size_t, load->size, FLASH_BLOCK);
+
+		dev_dbg(&pci_dev->dev,
+			"[%s] DMA: %lx SZ: %ld %d\n",
+			__func__, (unsigned long)dma_addr, tocopy,
+			blocks_to_flash);
+
+		/* prepare DDCB for SLU process */
+		cmd = ddcb_requ_alloc();
+		if (cmd == NULL) {
+			rc = -ENOMEM;
+			goto free_buffer;
+		}
+		cmd->cmd = SLCMD_MOVE_FLASH;
+		cmd->cmdopts = cmdopts;
+
+		/* prepare invariant values */
+		if (genwqe_get_slu_id(cd) <= 0x2) {
+			*(__be64 *)&cmd->__asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be64 *)&cmd->__asiv[8]  = cpu_to_be64(tocopy);
+			*(__be64 *)&cmd->__asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&cmd->__asiv[24] = cpu_to_be32(0);
+			cmd->__asiv[24] = load->uid;
+			*(__be32 *)&cmd->__asiv[28] = cpu_to_be32(0) /* CRC */;
+			cmd->asiv_length = 32; /* bytes included in crc calc */
+		} else {	/* setup DDCB for ATS architecture */
+			*(__be64 *)&cmd->asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be32 *)&cmd->asiv[8]  = cpu_to_be32(tocopy);
+			*(__be32 *)&cmd->asiv[12] = cpu_to_be32(0); /* resvd */
+			*(__be64 *)&cmd->asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&cmd->asiv[24] = cpu_to_be32(load->uid<<24);
+			*(__be32 *)&cmd->asiv[28] = cpu_to_be32(0); /* CRC */
+
+			/* rd/wr */
+			cmd->ats = 0x5ULL << 44;
+			cmd->asiv_length = 40; /* bytes included in crc calc */
+		}
+		cmd->asv_length  = 8;
+
+		/* we only get back the calculated CRC */
+		*(u64 *)&cmd->asv[0] = 0ULL;	/* 0x80 */
+
+		rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags);
+
+		load->retc = cmd->retc;
+		load->attn = cmd->attn;
+		load->progress = cmd->progress;
+
+		if ((rc < 0) && (rc != -EBADMSG)) {
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		rc = copy_to_user(buf, xbuf, tocopy);
+		if (rc) {
+			rc = -EFAULT;
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		/* We know that we can get retc 0x104 with CRC err */
+		if (((cmd->retc == DDCB_RETC_FAULT) &&
+		     (cmd->attn != 0x02)) ||  /* Normally ignore CRC error */
+		    ((cmd->retc == DDCB_RETC_COMPLETE) &&
+		     (cmd->attn != 0x00))) {  /* Everything was fine */
+			rc = -EIO;
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		load->size  -= tocopy;
+		flash += tocopy;
+		buf += tocopy;
+		blocks_to_flash--;
+		ddcb_requ_free(cmd);
+	}
+	rc = 0;
+
+ free_buffer:
+	__genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
+	return rc;
+}
+
+static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
+{
+	int rc;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+	struct dma_mapping *dma_map;
+	unsigned long map_addr;
+	unsigned long map_size;
+
+	if ((m->addr == 0x0) || (m->size == 0))
+		return -EINVAL;
+	if (m->size > ULONG_MAX - PAGE_SIZE - (m->addr & ~PAGE_MASK))
+		return -EINVAL;
+
+	map_addr = (m->addr & PAGE_MASK);
+	map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
+
+	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
+	if (dma_map == NULL)
+		return -ENOMEM;
+
+	genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED);
+	rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size);
+	if (rc != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] genwqe_user_vmap rc=%d\n", __func__, rc);
+		kfree(dma_map);
+		return rc;
+	}
+
+	genwqe_add_pin(cfile, dma_map);
+	return 0;
+}
+
+static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
+{
+	struct genwqe_dev *cd = cfile->cd;
+	struct dma_mapping *dma_map;
+	unsigned long map_addr;
+	unsigned long map_size;
+
+	if (m->addr == 0x0)
+		return -EINVAL;
+
+	map_addr = (m->addr & PAGE_MASK);
+	map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
+
+	dma_map = genwqe_search_pin(cfile, map_addr, map_size, NULL);
+	if (dma_map == NULL)
+		return -ENOENT;
+
+	genwqe_del_pin(cfile, dma_map);
+	genwqe_user_vunmap(cd, dma_map);
+	kfree(dma_map);
+	return 0;
+}
+
+/**
+ * ddcb_cmd_cleanup() - Remove dynamically created fixup entries
+ * @cfile:	Descriptor of opened file
+ * @req:	DDCB work request
+ *
+ * Only if there are any. Pinnings are not removed.
+ */
+static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req)
+{
+	unsigned int i;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+
+	for (i = 0; i < DDCB_FIXUPS; i++) {
+		dma_map = &req->dma_mappings[i];
+
+		if (dma_mapping_used(dma_map)) {
+			__genwqe_del_mapping(cfile, dma_map);
+			genwqe_user_vunmap(cd, dma_map);
+		}
+		if (req->sgls[i].sgl != NULL)
+			genwqe_free_sync_sgl(cd, &req->sgls[i]);
+	}
+	return 0;
+}
+
+/**
+ * ddcb_cmd_fixups() - Establish DMA fixups/sglists for user memory references
+ * @cfile:	Descriptor of opened file
+ * @req:	DDCB work request
+ *
+ * Before the DDCB gets executed we need to handle the fixups. We
+ * replace the user-space addresses with DMA addresses or do
+ * additional setup work e.g. generating a scatter-gather list which
+ * is used to describe the memory referred to in the fixup.
+ */
+static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req)
+{
+	int rc;
+	unsigned int asiv_offs, i;
+	struct genwqe_dev *cd = cfile->cd;
+	struct genwqe_ddcb_cmd *cmd = &req->cmd;
+	struct dma_mapping *m;
+
+	for (i = 0, asiv_offs = 0x00; asiv_offs <= 0x58;
+	     i++, asiv_offs += 0x08) {
+
+		u64 u_addr;
+		dma_addr_t d_addr;
+		u32 u_size = 0;
+		u64 ats_flags;
+
+		ats_flags = ATS_GET_FLAGS(cmd->ats, asiv_offs);
+
+		switch (ats_flags) {
+
+		case ATS_TYPE_DATA:
+			break;	/* nothing to do here */
+
+		case ATS_TYPE_FLAT_RDWR:
+		case ATS_TYPE_FLAT_RD: {
+			u_addr = be64_to_cpu(*((__be64 *)&cmd->
+					       asiv[asiv_offs]));
+			u_size = be32_to_cpu(*((__be32 *)&cmd->
+					       asiv[asiv_offs + 0x08]));
+
+			/*
+			 * No data available. Ignore u_addr in this
+			 * case and set addr to 0. Hardware must not
+			 * fetch the buffer.
+			 */
+			if (u_size == 0x0) {
+				*((__be64 *)&cmd->asiv[asiv_offs]) =
+					cpu_to_be64(0x0);
+				break;
+			}
+
+			m = __genwqe_search_mapping(cfile, u_addr, u_size,
+						   &d_addr, NULL);
+			if (m == NULL) {
+				rc = -EFAULT;
+				goto err_out;
+			}
+
+			*((__be64 *)&cmd->asiv[asiv_offs]) =
+				cpu_to_be64(d_addr);
+			break;
+		}
+
+		case ATS_TYPE_SGL_RDWR:
+		case ATS_TYPE_SGL_RD: {
+			int page_offs;
+
+			u_addr = be64_to_cpu(*((__be64 *)
+					       &cmd->asiv[asiv_offs]));
+			u_size = be32_to_cpu(*((__be32 *)
+					       &cmd->asiv[asiv_offs + 0x08]));
+
+			/*
+			 * No data available. Ignore u_addr in this
+			 * case and set addr to 0. Hardware must not
+			 * fetch the empty sgl.
+			 */
+			if (u_size == 0x0) {
+				*((__be64 *)&cmd->asiv[asiv_offs]) =
+					cpu_to_be64(0x0);
+				break;
+			}
+
+			m = genwqe_search_pin(cfile, u_addr, u_size, NULL);
+			if (m != NULL) {
+				page_offs = (u_addr -
+					     (u64)m->u_vaddr)/PAGE_SIZE;
+			} else {
+				m = &req->dma_mappings[i];
+
+				genwqe_mapping_init(m,
+						    GENWQE_MAPPING_SGL_TEMP);
+
+				if (ats_flags == ATS_TYPE_SGL_RD)
+					m->write = 0;
+
+				rc = genwqe_user_vmap(cd, m, (void *)u_addr,
+						      u_size);
+				if (rc != 0)
+					goto err_out;
+
+				__genwqe_add_mapping(cfile, m);
+				page_offs = 0;
+			}
+
+			/* create genwqe style scatter gather list */
+			rc = genwqe_alloc_sync_sgl(cd, &req->sgls[i],
+						   (void __user *)u_addr,
+						   u_size, m->write);
+			if (rc != 0)
+				goto err_out;
+
+			genwqe_setup_sgl(cd, &req->sgls[i],
+					 &m->dma_list[page_offs]);
+
+			*((__be64 *)&cmd->asiv[asiv_offs]) =
+				cpu_to_be64(req->sgls[i].sgl_dma_addr);
+
+			break;
+		}
+		default:
+			rc = -EINVAL;
+			goto err_out;
+		}
+	}
+	return 0;
+
+ err_out:
+	ddcb_cmd_cleanup(cfile, req);
+	return rc;
+}
+
+/**
+ * genwqe_execute_ddcb() - Execute DDCB using userspace address fixups
+ * @cfile:	Descriptor of opened file
+ * @cmd:        Command identifier (passed from user)
+ *
+ * The code will build up the translation tables or lookup the
+ * contignous memory allocation table to find the right translations
+ * and DMA addresses.
+ */
+static int genwqe_execute_ddcb(struct genwqe_file *cfile,
+			       struct genwqe_ddcb_cmd *cmd)
+{
+	int rc;
+	struct genwqe_dev *cd = cfile->cd;
+	struct file *filp = cfile->filp;
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+	rc = ddcb_cmd_fixups(cfile, req);
+	if (rc != 0)
+		return rc;
+
+	rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags);
+	ddcb_cmd_cleanup(cfile, req);
+	return rc;
+}
+
+static int do_execute_ddcb(struct genwqe_file *cfile,
+			   unsigned long arg, int raw)
+{
+	int rc;
+	struct genwqe_ddcb_cmd *cmd;
+	struct genwqe_dev *cd = cfile->cd;
+	struct file *filp = cfile->filp;
+
+	cmd = ddcb_requ_alloc();
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) {
+		ddcb_requ_free(cmd);
+		return -EFAULT;
+	}
+
+	if (!raw)
+		rc = genwqe_execute_ddcb(cfile, cmd);
+	else
+		rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags);
+
+	/* Copy back only the modifed fields. Do not copy ASIV
+	   back since the copy got modified by the driver. */
+	if (copy_to_user((void __user *)arg, cmd,
+			 sizeof(*cmd) - DDCB_ASIV_LENGTH)) {
+		ddcb_requ_free(cmd);
+		return -EFAULT;
+	}
+
+	ddcb_requ_free(cmd);
+	return rc;
+}
+
+/**
+ * genwqe_ioctl() - IO control
+ * @filp:       file handle
+ * @cmd:        command identifier (passed from user)
+ * @arg:        argument (passed from user)
+ *
+ * Return: 0 success
+ */
+static long genwqe_ioctl(struct file *filp, unsigned int cmd,
+			 unsigned long arg)
+{
+	int rc = 0;
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct genwqe_reg_io __user *io;
+	u64 val;
+	u32 reg_offs;
+
+	/* Return -EIO if card hit EEH */
+	if (pci_channel_offline(pci_dev))
+		return -EIO;
+
+	if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE)
+		return -EINVAL;
+
+	switch (cmd) {
+
+	case GENWQE_GET_CARD_STATE:
+		put_user(cd->card_state, (enum genwqe_card_state __user *)arg);
+		return 0;
+
+		/* Register access */
+	case GENWQE_READ_REG64: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
+			return -EINVAL;
+
+		val = __genwqe_readq(cd, reg_offs);
+		put_user(val, &io->val64);
+		return 0;
+	}
+
+	case GENWQE_WRITE_REG64: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
+			return -EINVAL;
+
+		if (get_user(val, &io->val64))
+			return -EFAULT;
+
+		__genwqe_writeq(cd, reg_offs, val);
+		return 0;
+	}
+
+	case GENWQE_READ_REG32: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
+			return -EINVAL;
+
+		val = __genwqe_readl(cd, reg_offs);
+		put_user(val, &io->val64);
+		return 0;
+	}
+
+	case GENWQE_WRITE_REG32: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
+			return -EINVAL;
+
+		if (get_user(val, &io->val64))
+			return -EFAULT;
+
+		__genwqe_writel(cd, reg_offs, val);
+		return 0;
+	}
+
+		/* Flash update/reading */
+	case GENWQE_SLU_UPDATE: {
+		struct genwqe_bitstream load;
+
+		if (!genwqe_is_privileged(cd))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (copy_from_user(&load, (void __user *)arg,
+				   sizeof(load)))
+			return -EFAULT;
+
+		rc = do_flash_update(cfile, &load);
+
+		if (copy_to_user((void __user *)arg, &load, sizeof(load)))
+			return -EFAULT;
+
+		return rc;
+	}
+
+	case GENWQE_SLU_READ: {
+		struct genwqe_bitstream load;
+
+		if (!genwqe_is_privileged(cd))
+			return -EPERM;
+
+		if (genwqe_flash_readback_fails(cd))
+			return -ENOSPC;	 /* known to fail for old versions */
+
+		if (copy_from_user(&load, (void __user *)arg, sizeof(load)))
+			return -EFAULT;
+
+		rc = do_flash_read(cfile, &load);
+
+		if (copy_to_user((void __user *)arg, &load, sizeof(load)))
+			return -EFAULT;
+
+		return rc;
+	}
+
+		/* memory pinning and unpinning */
+	case GENWQE_PIN_MEM: {
+		struct genwqe_mem m;
+
+		if (copy_from_user(&m, (void __user *)arg, sizeof(m)))
+			return -EFAULT;
+
+		return genwqe_pin_mem(cfile, &m);
+	}
+
+	case GENWQE_UNPIN_MEM: {
+		struct genwqe_mem m;
+
+		if (copy_from_user(&m, (void __user *)arg, sizeof(m)))
+			return -EFAULT;
+
+		return genwqe_unpin_mem(cfile, &m);
+	}
+
+		/* launch an DDCB and wait for completion */
+	case GENWQE_EXECUTE_DDCB:
+		return do_execute_ddcb(cfile, arg, 0);
+
+	case GENWQE_EXECUTE_RAW_DDCB: {
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		return do_execute_ddcb(cfile, arg, 1);
+	}
+
+	default:
+		return -EINVAL;
+	}
+
+	return rc;
+}
+
+static const struct file_operations genwqe_fops = {
+	.owner		= THIS_MODULE,
+	.open		= genwqe_open,
+	.fasync		= genwqe_fasync,
+	.mmap		= genwqe_mmap,
+	.unlocked_ioctl	= genwqe_ioctl,
+	.compat_ioctl   = compat_ptr_ioctl,
+	.release	= genwqe_release,
+};
+
+static int genwqe_device_initialized(struct genwqe_dev *cd)
+{
+	return cd->dev != NULL;
+}
+
+/**
+ * genwqe_device_create() - Create and configure genwqe char device
+ * @cd:      genwqe device descriptor
+ *
+ * This function must be called before we create any more genwqe
+ * character devices, because it is allocating the major and minor
+ * number which are supposed to be used by the client drivers.
+ */
+int genwqe_device_create(struct genwqe_dev *cd)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/*
+	 * Here starts the individual setup per client. It must
+	 * initialize its own cdev data structure with its own fops.
+	 * The appropriate devnum needs to be created. The ranges must
+	 * not overlap.
+	 */
+	rc = alloc_chrdev_region(&cd->devnum_genwqe, 0,
+				 GENWQE_MAX_MINOR, GENWQE_DEVNAME);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "err: alloc_chrdev_region failed\n");
+		goto err_dev;
+	}
+
+	cdev_init(&cd->cdev_genwqe, &genwqe_fops);
+	cd->cdev_genwqe.owner = THIS_MODULE;
+
+	rc = cdev_add(&cd->cdev_genwqe, cd->devnum_genwqe, 1);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "err: cdev_add failed\n");
+		goto err_add;
+	}
+
+	/*
+	 * Finally the device in /dev/... must be created. The rule is
+	 * to use card%d_clientname for each created device.
+	 */
+	cd->dev = device_create_with_groups(cd->class_genwqe,
+					    &cd->pci_dev->dev,
+					    cd->devnum_genwqe, cd,
+					    genwqe_attribute_groups,
+					    GENWQE_DEVNAME "%u_card",
+					    cd->card_idx);
+	if (IS_ERR(cd->dev)) {
+		rc = PTR_ERR(cd->dev);
+		goto err_cdev;
+	}
+
+	genwqe_init_debugfs(cd);
+
+	return 0;
+
+ err_cdev:
+	cdev_del(&cd->cdev_genwqe);
+ err_add:
+	unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
+ err_dev:
+	cd->dev = NULL;
+	return rc;
+}
+
+static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd)
+{
+	int rc;
+	unsigned int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_open_files(cd))
+		return 0;
+
+	dev_warn(&pci_dev->dev, "[%s] send SIGIO and wait ...\n", __func__);
+
+	rc = genwqe_kill_fasync(cd, SIGIO);
+	if (rc > 0) {
+		/* give kill_timeout seconds to close file descriptors ... */
+		for (i = 0; (i < GENWQE_KILL_TIMEOUT) &&
+			     genwqe_open_files(cd); i++) {
+			dev_info(&pci_dev->dev, "  %d sec ...", i);
+
+			cond_resched();
+			msleep(1000);
+		}
+
+		/* if no open files we can safely continue, else ... */
+		if (!genwqe_open_files(cd))
+			return 0;
+
+		dev_warn(&pci_dev->dev,
+			 "[%s] send SIGKILL and wait ...\n", __func__);
+
+		rc = genwqe_terminate(cd);
+		if (rc) {
+			/* Give kill_timout more seconds to end processes */
+			for (i = 0; (i < GENWQE_KILL_TIMEOUT) &&
+				     genwqe_open_files(cd); i++) {
+				dev_warn(&pci_dev->dev, "  %d sec ...", i);
+
+				cond_resched();
+				msleep(1000);
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_device_remove() - Remove genwqe's char device
+ * @cd: GenWQE device information
+ *
+ * This function must be called after the client devices are removed
+ * because it will free the major/minor number range for the genwqe
+ * drivers.
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_device_remove(struct genwqe_dev *cd)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_device_initialized(cd))
+		return 1;
+
+	genwqe_inform_and_stop_processes(cd);
+
+	/*
+	 * We currently do wait until all filedescriptors are
+	 * closed. This leads to a problem when we abort the
+	 * application which will decrease this reference from
+	 * 1/unused to 0/illegal and not from 2/used 1/empty.
+	 */
+	rc = kref_read(&cd->cdev_genwqe.kobj.kref);
+	if (rc != 1) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc);
+		panic("Fatal err: cannot free resources with pending references!");
+	}
+
+	genqwe_exit_debugfs(cd);
+	device_destroy(cd->class_genwqe, cd->devnum_genwqe);
+	cdev_del(&cd->cdev_genwqe);
+	unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
+	cd->dev = NULL;
+
+	return 0;
+}
diff --git a/drivers/misc/genwqe/card_sysfs.c b/drivers/misc/genwqe/card_sysfs.c
new file mode 100644
index 0000000000..b2f1156025
--- /dev/null
+++ b/drivers/misc/genwqe/card_sysfs.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ */
+
+/*
+ * Sysfs interfaces for the GenWQE card. There are attributes to query
+ * the version of the bitstream as well as some for the driver. For
+ * debugging, please also see the debugfs interfaces of this driver.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static const char * const genwqe_types[] = {
+	[GENWQE_TYPE_ALTERA_230] = "GenWQE4-230",
+	[GENWQE_TYPE_ALTERA_530] = "GenWQE4-530",
+	[GENWQE_TYPE_ALTERA_A4]  = "GenWQE5-A4",
+	[GENWQE_TYPE_ALTERA_A7]  = "GenWQE5-A7",
+};
+
+static ssize_t status_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+	const char *cs[GENWQE_CARD_STATE_MAX] = { "unused", "used", "error" };
+
+	return sprintf(buf, "%s\n", cs[cd->card_state]);
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t appid_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	char app_name[5];
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	genwqe_read_app_id(cd, app_name, sizeof(app_name));
+	return sprintf(buf, "%s\n", app_name);
+}
+static DEVICE_ATTR_RO(appid);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	u64 slu_id, app_id;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	app_id = __genwqe_readq(cd, IO_APP_UNITCFG);
+
+	return sprintf(buf, "%016llx.%016llx\n", slu_id, app_id);
+}
+static DEVICE_ATTR_RO(version);
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	u8 card_type;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	card_type = genwqe_card_type(cd);
+	return sprintf(buf, "%s\n", (card_type >= ARRAY_SIZE(genwqe_types)) ?
+		       "invalid" : genwqe_types[card_type]);
+}
+static DEVICE_ATTR_RO(type);
+
+static ssize_t tempsens_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	u64 tempsens;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	tempsens = __genwqe_readq(cd, IO_SLU_TEMPERATURE_SENSOR);
+	return sprintf(buf, "%016llx\n", tempsens);
+}
+static DEVICE_ATTR_RO(tempsens);
+
+static ssize_t freerunning_timer_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	u64 t;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	t = __genwqe_readq(cd, IO_SLC_FREE_RUNNING_TIMER);
+	return sprintf(buf, "%016llx\n", t);
+}
+static DEVICE_ATTR_RO(freerunning_timer);
+
+static ssize_t queue_working_time_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	u64 t;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	t = __genwqe_readq(cd, IO_SLC_QUEUE_WTIME);
+	return sprintf(buf, "%016llx\n", t);
+}
+static DEVICE_ATTR_RO(queue_working_time);
+
+static ssize_t base_clock_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	u64 base_clock;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	base_clock = genwqe_base_clock_frequency(cd);
+	return sprintf(buf, "%lld\n", base_clock);
+}
+static DEVICE_ATTR_RO(base_clock);
+
+/*
+ * curr_bitstream_show() - Show the current bitstream id
+ *
+ * There is a bug in some old versions of the CPLD which selects the
+ * bitstream, which causes the IO_SLU_BITSTREAM register to report
+ * unreliable data in very rare cases. This makes this sysfs
+ * unreliable up to the point were a new CPLD version is being used.
+ *
+ * Unfortunately there is no automatic way yet to query the CPLD
+ * version, such that you need to manually ensure via programming
+ * tools that you have a recent version of the CPLD software.
+ *
+ * The proposed circumvention is to use a special recovery bitstream
+ * on the backup partition (0) to identify problems while loading the
+ * image.
+ */
+static ssize_t curr_bitstream_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int curr_bitstream;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	curr_bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
+	return sprintf(buf, "%d\n", curr_bitstream);
+}
+static DEVICE_ATTR_RO(curr_bitstream);
+
+/*
+ * next_bitstream_show() - Show the next activated bitstream
+ *
+ * IO_SLC_CFGREG_SOFTRESET: This register can only be accessed by the PF.
+ */
+static ssize_t next_bitstream_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int next_bitstream;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	switch ((cd->softreset & 0xc) >> 2) {
+	case 0x2:
+		next_bitstream =  0;
+		break;
+	case 0x3:
+		next_bitstream =  1;
+		break;
+	default:
+		next_bitstream = -1;
+		break;		/* error */
+	}
+	return sprintf(buf, "%d\n", next_bitstream);
+}
+
+static ssize_t next_bitstream_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	int partition;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	if (kstrtoint(buf, 0, &partition) < 0)
+		return -EINVAL;
+
+	switch (partition) {
+	case 0x0:
+		cd->softreset = 0x78;
+		break;
+	case 0x1:
+		cd->softreset = 0x7c;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
+	return count;
+}
+static DEVICE_ATTR_RW(next_bitstream);
+
+static ssize_t reload_bitstream_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	int reload;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	if (kstrtoint(buf, 0, &reload) < 0)
+		return -EINVAL;
+
+	if (reload == 0x1) {
+		if (cd->card_state == GENWQE_CARD_UNUSED ||
+		    cd->card_state == GENWQE_CARD_USED)
+			cd->card_state = GENWQE_CARD_RELOAD_BITSTREAM;
+		else
+			return -EIO;
+	} else {
+		return -EINVAL;
+	}
+
+	return count;
+}
+static DEVICE_ATTR_WO(reload_bitstream);
+
+/*
+ * Create device_attribute structures / params: name, mode, show, store
+ * additional flag if valid in VF
+ */
+static struct attribute *genwqe_attributes[] = {
+	&dev_attr_tempsens.attr,
+	&dev_attr_next_bitstream.attr,
+	&dev_attr_curr_bitstream.attr,
+	&dev_attr_base_clock.attr,
+	&dev_attr_type.attr,
+	&dev_attr_version.attr,
+	&dev_attr_appid.attr,
+	&dev_attr_status.attr,
+	&dev_attr_freerunning_timer.attr,
+	&dev_attr_queue_working_time.attr,
+	&dev_attr_reload_bitstream.attr,
+	NULL,
+};
+
+static struct attribute *genwqe_normal_attributes[] = {
+	&dev_attr_type.attr,
+	&dev_attr_version.attr,
+	&dev_attr_appid.attr,
+	&dev_attr_status.attr,
+	&dev_attr_freerunning_timer.attr,
+	&dev_attr_queue_working_time.attr,
+	NULL,
+};
+
+/*
+ * genwqe_is_visible() - Determine if sysfs attribute should be visible or not
+ *
+ * VFs have restricted mmio capabilities, so not all sysfs entries
+ * are allowed in VFs.
+ */
+static umode_t genwqe_is_visible(struct kobject *kobj,
+				 struct attribute *attr, int n)
+{
+	unsigned int j;
+	struct device *dev = kobj_to_dev(kobj);
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+	umode_t mode = attr->mode;
+
+	if (genwqe_is_privileged(cd))
+		return mode;
+
+	for (j = 0; genwqe_normal_attributes[j] != NULL;  j++)
+		if (genwqe_normal_attributes[j] == attr)
+			return mode;
+
+	return 0;
+}
+
+static struct attribute_group genwqe_attribute_group = {
+	.is_visible = genwqe_is_visible,
+	.attrs      = genwqe_attributes,
+};
+
+const struct attribute_group *genwqe_attribute_groups[] = {
+	&genwqe_attribute_group,
+	NULL,
+};
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
new file mode 100644
index 0000000000..1c798d6b2d
--- /dev/null
+++ b/drivers/misc/genwqe/card_utils.c
@@ -0,0 +1,1049 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ */
+
+/*
+ * Miscelanous functionality used in the other GenWQE driver parts.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/page-flags.h>
+#include <linux/scatterlist.h>
+#include <linux/hugetlb.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/pgtable.h>
+
+#include "genwqe_driver.h"
+#include "card_base.h"
+#include "card_ddcb.h"
+
+/**
+ * __genwqe_writeq() - Write 64-bit register
+ * @cd:	        genwqe device descriptor
+ * @byte_offs:  byte offset within BAR
+ * @val:        64-bit value
+ *
+ * Return: 0 if success; < 0 if error
+ */
+int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return -EIO;
+
+	if (cd->mmio == NULL)
+		return -EIO;
+
+	if (pci_channel_offline(pci_dev))
+		return -EIO;
+
+	__raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs);
+	return 0;
+}
+
+/**
+ * __genwqe_readq() - Read 64-bit register
+ * @cd:         genwqe device descriptor
+ * @byte_offs:  offset within BAR
+ *
+ * Return: value from register
+ */
+u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return 0xffffffffffffffffull;
+
+	if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) &&
+	    (byte_offs == IO_SLC_CFGREG_GFIR))
+		return 0x000000000000ffffull;
+
+	if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) &&
+	    (byte_offs == IO_SLC_CFGREG_GFIR))
+		return 0x00000000ffff0000ull;
+
+	if (cd->mmio == NULL)
+		return 0xffffffffffffffffull;
+
+	return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs));
+}
+
+/**
+ * __genwqe_writel() - Write 32-bit register
+ * @cd:	        genwqe device descriptor
+ * @byte_offs:  byte offset within BAR
+ * @val:        32-bit value
+ *
+ * Return: 0 if success; < 0 if error
+ */
+int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return -EIO;
+
+	if (cd->mmio == NULL)
+		return -EIO;
+
+	if (pci_channel_offline(pci_dev))
+		return -EIO;
+
+	__raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs);
+	return 0;
+}
+
+/**
+ * __genwqe_readl() - Read 32-bit register
+ * @cd:         genwqe device descriptor
+ * @byte_offs:  offset within BAR
+ *
+ * Return: Value from register
+ */
+u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return 0xffffffff;
+
+	if (cd->mmio == NULL)
+		return 0xffffffff;
+
+	return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs));
+}
+
+/**
+ * genwqe_read_app_id() - Extract app_id
+ * @cd:	        genwqe device descriptor
+ * @app_name:   carrier used to pass-back name
+ * @len:        length of data for name
+ *
+ * app_unitcfg need to be filled with valid data first
+ */
+int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len)
+{
+	int i, j;
+	u32 app_id = (u32)cd->app_unitcfg;
+
+	memset(app_name, 0, len);
+	for (i = 0, j = 0; j < min(len, 4); j++) {
+		char ch = (char)((app_id >> (24 - j*8)) & 0xff);
+
+		if (ch == ' ')
+			continue;
+		app_name[i++] = isprint(ch) ? ch : 'X';
+	}
+	return i;
+}
+
+#define CRC32_POLYNOMIAL	0x20044009
+static u32 crc32_tab[256];	/* crc32 lookup table */
+
+/**
+ * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations
+ *
+ * Existing kernel functions seem to use a different polynom,
+ * therefore we could not use them here.
+ *
+ * Genwqe's Polynomial = 0x20044009
+ */
+void genwqe_init_crc32(void)
+{
+	int i, j;
+	u32 crc;
+
+	for (i = 0;  i < 256;  i++) {
+		crc = i << 24;
+		for (j = 0;  j < 8;  j++) {
+			if (crc & 0x80000000)
+				crc = (crc << 1) ^ CRC32_POLYNOMIAL;
+			else
+				crc = (crc << 1);
+		}
+		crc32_tab[i] = crc;
+	}
+}
+
+/**
+ * genwqe_crc32() - Generate 32-bit crc as required for DDCBs
+ * @buff:       pointer to data buffer
+ * @len:        length of data for calculation
+ * @init:       initial crc (0xffffffff at start)
+ *
+ * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009)
+ *
+ * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should
+ * result in a crc32 of 0xf33cb7d3.
+ *
+ * The existing kernel crc functions did not cover this polynom yet.
+ *
+ * Return: crc32 checksum.
+ */
+u32 genwqe_crc32(u8 *buff, size_t len, u32 init)
+{
+	int i;
+	u32 crc;
+
+	crc = init;
+	while (len--) {
+		i = ((crc >> 24) ^ *buff++) & 0xFF;
+		crc = (crc << 8) ^ crc32_tab[i];
+	}
+	return crc;
+}
+
+void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
+			       dma_addr_t *dma_handle)
+{
+	if (get_order(size) > MAX_ORDER)
+		return NULL;
+
+	return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
+				  GFP_KERNEL);
+}
+
+void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
+			     void *vaddr, dma_addr_t dma_handle)
+{
+	if (vaddr == NULL)
+		return;
+
+	dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle);
+}
+
+static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
+			      int num_pages)
+{
+	int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) {
+		dma_unmap_page(&pci_dev->dev, dma_list[i], PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		dma_list[i] = 0x0;
+	}
+}
+
+static int genwqe_map_pages(struct genwqe_dev *cd,
+			   struct page **page_list, int num_pages,
+			   dma_addr_t *dma_list)
+{
+	int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/* establish DMA mapping for requested pages */
+	for (i = 0; i < num_pages; i++) {
+		dma_addr_t daddr;
+
+		dma_list[i] = 0x0;
+		daddr = dma_map_page(&pci_dev->dev, page_list[i],
+				     0,	 /* map_offs */
+				     PAGE_SIZE,
+				     DMA_BIDIRECTIONAL);  /* FIXME rd/rw */
+
+		if (dma_mapping_error(&pci_dev->dev, daddr)) {
+			dev_err(&pci_dev->dev,
+				"[%s] err: no dma addr daddr=%016llx!\n",
+				__func__, (long long)daddr);
+			goto err;
+		}
+
+		dma_list[i] = daddr;
+	}
+	return 0;
+
+ err:
+	genwqe_unmap_pages(cd, dma_list, num_pages);
+	return -EIO;
+}
+
+static int genwqe_sgl_size(int num_pages)
+{
+	int len, num_tlb = num_pages / 7;
+
+	len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1);
+	return roundup(len, PAGE_SIZE);
+}
+
+/*
+ * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages
+ *
+ * Allocates memory for sgl and overlapping pages. Pages which might
+ * overlap other user-space memory blocks are being cached for DMAs,
+ * such that we do not run into syncronization issues. Data is copied
+ * from user-space into the cached pages.
+ */
+int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+			  void __user *user_addr, size_t user_size, int write)
+{
+	int ret = -ENOMEM;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	sgl->fpage_offs = offset_in_page((unsigned long)user_addr);
+	sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size);
+	sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE);
+	sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE;
+
+	dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n",
+		__func__, user_addr, user_size, sgl->nr_pages,
+		sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size);
+
+	sgl->user_addr = user_addr;
+	sgl->user_size = user_size;
+	sgl->write = write;
+	sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages);
+
+	if (get_order(sgl->sgl_size) > MAX_ORDER) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: too much memory requested!\n", __func__);
+		return ret;
+	}
+
+	sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size,
+					     &sgl->sgl_dma_addr);
+	if (sgl->sgl == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: no memory available!\n", __func__);
+		return ret;
+	}
+
+	/* Only use buffering on incomplete pages */
+	if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) {
+		sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
+						       &sgl->fpage_dma_addr);
+		if (sgl->fpage == NULL)
+			goto err_out;
+
+		/* Sync with user memory */
+		if (copy_from_user(sgl->fpage + sgl->fpage_offs,
+				   user_addr, sgl->fpage_size)) {
+			ret = -EFAULT;
+			goto err_out;
+		}
+	}
+	if (sgl->lpage_size != 0) {
+		sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
+						       &sgl->lpage_dma_addr);
+		if (sgl->lpage == NULL)
+			goto err_out1;
+
+		/* Sync with user memory */
+		if (copy_from_user(sgl->lpage, user_addr + user_size -
+				   sgl->lpage_size, sgl->lpage_size)) {
+			ret = -EFAULT;
+			goto err_out2;
+		}
+	}
+	return 0;
+
+ err_out2:
+	__genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
+				 sgl->lpage_dma_addr);
+	sgl->lpage = NULL;
+	sgl->lpage_dma_addr = 0;
+ err_out1:
+	__genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
+				 sgl->fpage_dma_addr);
+	sgl->fpage = NULL;
+	sgl->fpage_dma_addr = 0;
+ err_out:
+	__genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
+				 sgl->sgl_dma_addr);
+	sgl->sgl = NULL;
+	sgl->sgl_dma_addr = 0;
+	sgl->sgl_size = 0;
+
+	return ret;
+}
+
+int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+		     dma_addr_t *dma_list)
+{
+	int i = 0, j = 0, p;
+	unsigned long dma_offs, map_offs;
+	dma_addr_t prev_daddr = 0;
+	struct sg_entry *s, *last_s = NULL;
+	size_t size = sgl->user_size;
+
+	dma_offs = 128;		/* next block if needed/dma_offset */
+	map_offs = sgl->fpage_offs; /* offset in first page */
+
+	s = &sgl->sgl[0];	/* first set of 8 entries */
+	p = 0;			/* page */
+	while (p < sgl->nr_pages) {
+		dma_addr_t daddr;
+		unsigned int size_to_map;
+
+		/* always write the chaining entry, cleanup is done later */
+		j = 0;
+		s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs);
+		s[j].len	 = cpu_to_be32(128);
+		s[j].flags	 = cpu_to_be32(SG_CHAINED);
+		j++;
+
+		while (j < 8) {
+			/* DMA mapping for requested page, offs, size */
+			size_to_map = min(size, PAGE_SIZE - map_offs);
+
+			if ((p == 0) && (sgl->fpage != NULL)) {
+				daddr = sgl->fpage_dma_addr + map_offs;
+
+			} else if ((p == sgl->nr_pages - 1) &&
+				   (sgl->lpage != NULL)) {
+				daddr = sgl->lpage_dma_addr;
+			} else {
+				daddr = dma_list[p] + map_offs;
+			}
+
+			size -= size_to_map;
+			map_offs = 0;
+
+			if (prev_daddr == daddr) {
+				u32 prev_len = be32_to_cpu(last_s->len);
+
+				/* pr_info("daddr combining: "
+					"%016llx/%08x -> %016llx\n",
+					prev_daddr, prev_len, daddr); */
+
+				last_s->len = cpu_to_be32(prev_len +
+							  size_to_map);
+
+				p++; /* process next page */
+				if (p == sgl->nr_pages)
+					goto fixup;  /* nothing to do */
+
+				prev_daddr = daddr + size_to_map;
+				continue;
+			}
+
+			/* start new entry */
+			s[j].target_addr = cpu_to_be64(daddr);
+			s[j].len	 = cpu_to_be32(size_to_map);
+			s[j].flags	 = cpu_to_be32(SG_DATA);
+			prev_daddr = daddr + size_to_map;
+			last_s = &s[j];
+			j++;
+
+			p++;	/* process next page */
+			if (p == sgl->nr_pages)
+				goto fixup;  /* nothing to do */
+		}
+		dma_offs += 128;
+		s += 8;		/* continue 8 elements further */
+	}
+ fixup:
+	if (j == 1) {		/* combining happened on last entry! */
+		s -= 8;		/* full shift needed on previous sgl block */
+		j =  7;		/* shift all elements */
+	}
+
+	for (i = 0; i < j; i++)	/* move elements 1 up */
+		s[i] = s[i + 1];
+
+	s[i].target_addr = cpu_to_be64(0);
+	s[i].len	 = cpu_to_be32(0);
+	s[i].flags	 = cpu_to_be32(SG_END_LIST);
+	return 0;
+}
+
+/**
+ * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages
+ * @cd:	        genwqe device descriptor
+ * @sgl:        scatter gather list describing user-space memory
+ *
+ * After the DMA transfer has been completed we free the memory for
+ * the sgl and the cached pages. Data is being transferred from cached
+ * pages into user-space buffers.
+ */
+int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl)
+{
+	int rc = 0;
+	size_t offset;
+	unsigned long res;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (sgl->fpage) {
+		if (sgl->write) {
+			res = copy_to_user(sgl->user_addr,
+				sgl->fpage + sgl->fpage_offs, sgl->fpage_size);
+			if (res) {
+				dev_err(&pci_dev->dev,
+					"[%s] err: copying fpage! (res=%lu)\n",
+					__func__, res);
+				rc = -EFAULT;
+			}
+		}
+		__genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
+					 sgl->fpage_dma_addr);
+		sgl->fpage = NULL;
+		sgl->fpage_dma_addr = 0;
+	}
+	if (sgl->lpage) {
+		if (sgl->write) {
+			offset = sgl->user_size - sgl->lpage_size;
+			res = copy_to_user(sgl->user_addr + offset, sgl->lpage,
+					   sgl->lpage_size);
+			if (res) {
+				dev_err(&pci_dev->dev,
+					"[%s] err: copying lpage! (res=%lu)\n",
+					__func__, res);
+				rc = -EFAULT;
+			}
+		}
+		__genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
+					 sgl->lpage_dma_addr);
+		sgl->lpage = NULL;
+		sgl->lpage_dma_addr = 0;
+	}
+	__genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
+				 sgl->sgl_dma_addr);
+
+	sgl->sgl = NULL;
+	sgl->sgl_dma_addr = 0x0;
+	sgl->sgl_size = 0;
+	return rc;
+}
+
+/**
+ * genwqe_user_vmap() - Map user-space memory to virtual kernel memory
+ * @cd:         pointer to genwqe device
+ * @m:          mapping params
+ * @uaddr:      user virtual address
+ * @size:       size of memory to be mapped
+ *
+ * We need to think about how we could speed this up. Of course it is
+ * not a good idea to do this over and over again, like we are
+ * currently doing it. Nevertheless, I am curious where on the path
+ * the performance is spend. Most probably within the memory
+ * allocation functions, but maybe also in the DMA mapping code.
+ *
+ * Restrictions: The maximum size of the possible mapping currently depends
+ *               on the amount of memory we can get using kzalloc() for the
+ *               page_list and pci_alloc_consistent for the sg_list.
+ *               The sg_list is currently itself not scattered, which could
+ *               be fixed with some effort. The page_list must be split into
+ *               PAGE_SIZE chunks too. All that will make the complicated
+ *               code more complicated.
+ *
+ * Return: 0 if success
+ */
+int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr,
+		     unsigned long size)
+{
+	int rc = -EINVAL;
+	unsigned long data, offs;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if ((uaddr == NULL) || (size == 0)) {
+		m->size = 0;	/* mark unused and not added */
+		return -EINVAL;
+	}
+	m->u_vaddr = uaddr;
+	m->size    = size;
+
+	/* determine space needed for page_list. */
+	data = (unsigned long)uaddr;
+	offs = offset_in_page(data);
+	if (size > ULONG_MAX - PAGE_SIZE - offs) {
+		m->size = 0;	/* mark unused and not added */
+		return -EINVAL;
+	}
+	m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE);
+
+	m->page_list = kcalloc(m->nr_pages,
+			       sizeof(struct page *) + sizeof(dma_addr_t),
+			       GFP_KERNEL);
+	if (!m->page_list) {
+		dev_err(&pci_dev->dev, "err: alloc page_list failed\n");
+		m->nr_pages = 0;
+		m->u_vaddr = NULL;
+		m->size = 0;	/* mark unused and not added */
+		return -ENOMEM;
+	}
+	m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages);
+
+	/* pin user pages in memory */
+	rc = pin_user_pages_fast(data & PAGE_MASK, /* page aligned addr */
+				 m->nr_pages,
+				 m->write ? FOLL_WRITE : 0,	/* readable/writable */
+				 m->page_list);	/* ptrs to pages */
+	if (rc < 0)
+		goto fail_pin_user_pages;
+
+	/* assumption: pin_user_pages can be killed by signals. */
+	if (rc < m->nr_pages) {
+		unpin_user_pages_dirty_lock(m->page_list, rc, m->write);
+		rc = -EFAULT;
+		goto fail_pin_user_pages;
+	}
+
+	rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list);
+	if (rc != 0)
+		goto fail_free_user_pages;
+
+	return 0;
+
+ fail_free_user_pages:
+	unpin_user_pages_dirty_lock(m->page_list, m->nr_pages, m->write);
+
+ fail_pin_user_pages:
+	kfree(m->page_list);
+	m->page_list = NULL;
+	m->dma_list = NULL;
+	m->nr_pages = 0;
+	m->u_vaddr = NULL;
+	m->size = 0;		/* mark unused and not added */
+	return rc;
+}
+
+/**
+ * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel
+ *                        memory
+ * @cd:         pointer to genwqe device
+ * @m:          mapping params
+ */
+int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!dma_mapping_used(m)) {
+		dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n",
+			__func__, m);
+		return -EINVAL;
+	}
+
+	if (m->dma_list)
+		genwqe_unmap_pages(cd, m->dma_list, m->nr_pages);
+
+	if (m->page_list) {
+		unpin_user_pages_dirty_lock(m->page_list, m->nr_pages,
+					    m->write);
+		kfree(m->page_list);
+		m->page_list = NULL;
+		m->dma_list = NULL;
+		m->nr_pages = 0;
+	}
+
+	m->u_vaddr = NULL;
+	m->size = 0;		/* mark as unused and not added */
+	return 0;
+}
+
+/**
+ * genwqe_card_type() - Get chip type SLU Configuration Register
+ * @cd:         pointer to the genwqe device descriptor
+ * Return: 0: Altera Stratix-IV 230
+ *         1: Altera Stratix-IV 530
+ *         2: Altera Stratix-V A4
+ *         3: Altera Stratix-V A7
+ */
+u8 genwqe_card_type(struct genwqe_dev *cd)
+{
+	u64 card_type = cd->slu_unitcfg;
+
+	return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20);
+}
+
+/**
+ * genwqe_card_reset() - Reset the card
+ * @cd:         pointer to the genwqe device descriptor
+ */
+int genwqe_card_reset(struct genwqe_dev *cd)
+{
+	u64 softrst;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_is_privileged(cd))
+		return -ENODEV;
+
+	/* new SL */
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull);
+	msleep(1000);
+	__genwqe_readq(cd, IO_HSU_FIR_CLR);
+	__genwqe_readq(cd, IO_APP_FIR_CLR);
+	__genwqe_readq(cd, IO_SLU_FIR_CLR);
+
+	/*
+	 * Read-modify-write to preserve the stealth bits
+	 *
+	 * For SL >= 039, Stealth WE bit allows removing
+	 * the read-modify-wrote.
+	 * r-m-w may require a mask 0x3C to avoid hitting hard
+	 * reset again for error reset (should be 0, chicken).
+	 */
+	softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull;
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull);
+
+	/* give ERRORRESET some time to finish */
+	msleep(50);
+
+	if (genwqe_need_err_masking(cd)) {
+		dev_info(&pci_dev->dev,
+			 "[%s] masking errors for old bitstreams\n", __func__);
+		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
+	}
+	return 0;
+}
+
+int genwqe_read_softreset(struct genwqe_dev *cd)
+{
+	u64 bitstream;
+
+	if (!genwqe_is_privileged(cd))
+		return -ENODEV;
+
+	bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
+	cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull;
+	return 0;
+}
+
+/**
+ * genwqe_set_interrupt_capability() - Configure MSI capability structure
+ * @cd:         pointer to the device
+ * @count:      number of vectors to allocate
+ * Return: 0 if no error
+ */
+int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count)
+{
+	int rc;
+
+	rc = pci_alloc_irq_vectors(cd->pci_dev, 1, count, PCI_IRQ_MSI);
+	if (rc < 0)
+		return rc;
+	return 0;
+}
+
+/**
+ * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability()
+ * @cd:         pointer to the device
+ */
+void genwqe_reset_interrupt_capability(struct genwqe_dev *cd)
+{
+	pci_free_irq_vectors(cd->pci_dev);
+}
+
+/**
+ * set_reg_idx() - Fill array with data. Ignore illegal offsets.
+ * @cd:         card device
+ * @r:          debug register array
+ * @i:          index to desired entry
+ * @m:          maximum possible entries
+ * @addr:       addr which is read
+ * @idx:        index in debug array
+ * @val:        read value
+ */
+static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r,
+		       unsigned int *i, unsigned int m, u32 addr, u32 idx,
+		       u64 val)
+{
+	if (WARN_ON_ONCE(*i >= m))
+		return -EFAULT;
+
+	r[*i].addr = addr;
+	r[*i].idx = idx;
+	r[*i].val = val;
+	++*i;
+	return 0;
+}
+
+static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r,
+		   unsigned int *i, unsigned int m, u32 addr, u64 val)
+{
+	return set_reg_idx(cd, r, i, m, addr, 0, val);
+}
+
+int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
+			 unsigned int max_regs, int all)
+{
+	unsigned int i, j, idx = 0;
+	u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr;
+	u64 gfir, sluid, appid, ufir, ufec, sfir, sfec;
+
+	/* Global FIR */
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir);
+
+	/* UnitCfg for SLU */
+	sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */
+	set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid);
+
+	/* UnitCfg for APP */
+	appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */
+	set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid);
+
+	/* Check all chip Units */
+	for (i = 0; i < GENWQE_MAX_UNITS; i++) {
+
+		/* Unit FIR */
+		ufir_addr = (i << 24) | 0x008;
+		ufir = __genwqe_readq(cd, ufir_addr);
+		set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir);
+
+		/* Unit FEC */
+		ufec_addr = (i << 24) | 0x018;
+		ufec = __genwqe_readq(cd, ufec_addr);
+		set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec);
+
+		for (j = 0; j < 64; j++) {
+			/* wherever there is a primary 1, read the 2ndary */
+			if (!all && (!(ufir & (1ull << j))))
+				continue;
+
+			sfir_addr = (i << 24) | (0x100 + 8 * j);
+			sfir = __genwqe_readq(cd, sfir_addr);
+			set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir);
+
+			sfec_addr = (i << 24) | (0x300 + 8 * j);
+			sfec = __genwqe_readq(cd, sfec_addr);
+			set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec);
+		}
+	}
+
+	/* fill with invalid data until end */
+	for (i = idx; i < max_regs; i++) {
+		regs[i].addr = 0xffffffff;
+		regs[i].val = 0xffffffffffffffffull;
+	}
+	return idx;
+}
+
+/**
+ * genwqe_ffdc_buff_size() - Calculates the number of dump registers
+ * @cd:	        genwqe device descriptor
+ * @uid:	unit ID
+ */
+int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid)
+{
+	int entries = 0, ring, traps, traces, trace_entries;
+	u32 eevptr_addr, l_addr, d_len, d_type;
+	u64 eevptr, val, addr;
+
+	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
+	eevptr = __genwqe_readq(cd, eevptr_addr);
+
+	if ((eevptr != 0x0) && (eevptr != -1ull)) {
+		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
+
+		while (1) {
+			val = __genwqe_readq(cd, l_addr);
+
+			if ((val == 0x0) || (val == -1ull))
+				break;
+
+			/* 38:24 */
+			d_len  = (val & 0x0000007fff000000ull) >> 24;
+
+			/* 39 */
+			d_type = (val & 0x0000008000000000ull) >> 36;
+
+			if (d_type) {	/* repeat */
+				entries += d_len;
+			} else {	/* size in bytes! */
+				entries += d_len >> 3;
+			}
+
+			l_addr += 8;
+		}
+	}
+
+	for (ring = 0; ring < 8; ring++) {
+		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
+		val = __genwqe_readq(cd, addr);
+
+		if ((val == 0x0ull) || (val == -1ull))
+			continue;
+
+		traps = (val >> 24) & 0xff;
+		traces = (val >> 16) & 0xff;
+		trace_entries = val & 0xffff;
+
+		entries += traps + (traces * trace_entries);
+	}
+	return entries;
+}
+
+/**
+ * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure
+ * @cd:	        genwqe device descriptor
+ * @uid:	unit ID
+ * @regs:       register information
+ * @max_regs:   number of register entries
+ */
+int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid,
+			  struct genwqe_reg *regs, unsigned int max_regs)
+{
+	int i, traps, traces, trace, trace_entries, trace_entry, ring;
+	unsigned int idx = 0;
+	u32 eevptr_addr, l_addr, d_addr, d_len, d_type;
+	u64 eevptr, e, val, addr;
+
+	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
+	eevptr = __genwqe_readq(cd, eevptr_addr);
+
+	if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) {
+		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
+		while (1) {
+			e = __genwqe_readq(cd, l_addr);
+			if ((e == 0x0) || (e == 0xffffffffffffffffull))
+				break;
+
+			d_addr = (e & 0x0000000000ffffffull);	    /* 23:0 */
+			d_len  = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */
+			d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */
+			d_addr |= GENWQE_UID_OFFS(uid);
+
+			if (d_type) {
+				for (i = 0; i < (int)d_len; i++) {
+					val = __genwqe_readq(cd, d_addr);
+					set_reg_idx(cd, regs, &idx, max_regs,
+						    d_addr, i, val);
+				}
+			} else {
+				d_len >>= 3; /* Size in bytes! */
+				for (i = 0; i < (int)d_len; i++, d_addr += 8) {
+					val = __genwqe_readq(cd, d_addr);
+					set_reg_idx(cd, regs, &idx, max_regs,
+						    d_addr, 0, val);
+				}
+			}
+			l_addr += 8;
+		}
+	}
+
+	/*
+	 * To save time, there are only 6 traces poplulated on Uid=2,
+	 * Ring=1. each with iters=512.
+	 */
+	for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds,
+					      2...7 are ASI rings */
+		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
+		val = __genwqe_readq(cd, addr);
+
+		if ((val == 0x0ull) || (val == -1ull))
+			continue;
+
+		traps = (val >> 24) & 0xff;	/* Number of Traps	*/
+		traces = (val >> 16) & 0xff;	/* Number of Traces	*/
+		trace_entries = val & 0xffff;	/* Entries per trace	*/
+
+		/* Note: This is a combined loop that dumps both the traps */
+		/* (for the trace == 0 case) as well as the traces 1 to    */
+		/* 'traces'.						   */
+		for (trace = 0; trace <= traces; trace++) {
+			u32 diag_sel =
+				GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace);
+
+			addr = (GENWQE_UID_OFFS(uid) |
+				IO_EXTENDED_DIAG_SELECTOR);
+			__genwqe_writeq(cd, addr, diag_sel);
+
+			for (trace_entry = 0;
+			     trace_entry < (trace ? trace_entries : traps);
+			     trace_entry++) {
+				addr = (GENWQE_UID_OFFS(uid) |
+					IO_EXTENDED_DIAG_READ_MBX);
+				val = __genwqe_readq(cd, addr);
+				set_reg_idx(cd, regs, &idx, max_regs, addr,
+					    (diag_sel<<16) | trace_entry, val);
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_write_vreg() - Write register in virtual window
+ * @cd:	        genwqe device descriptor
+ * @reg:	register (byte) offset within BAR
+ * @val:	value to write
+ * @func:	PCI virtual function
+ *
+ * Note, these registers are only accessible to the PF through the
+ * VF-window. It is not intended for the VF to access.
+ */
+int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func)
+{
+	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
+	__genwqe_writeq(cd, reg, val);
+	return 0;
+}
+
+/**
+ * genwqe_read_vreg() - Read register in virtual window
+ * @cd:	        genwqe device descriptor
+ * @reg:	register (byte) offset within BAR
+ * @func:	PCI virtual function
+ *
+ * Note, these registers are only accessible to the PF through the
+ * VF-window. It is not intended for the VF to access.
+ */
+u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func)
+{
+	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
+	return __genwqe_readq(cd, reg);
+}
+
+/**
+ * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card
+ * @cd:	        genwqe device descriptor
+ *
+ * Note: From a design perspective it turned out to be a bad idea to
+ * use codes here to specifiy the frequency/speed values. An old
+ * driver cannot understand new codes and is therefore always a
+ * problem. Better is to measure out the value or put the
+ * speed/frequency directly into a register which is always a valid
+ * value for old as well as for new software.
+ *
+ * Return: Card clock in MHz
+ */
+int genwqe_base_clock_frequency(struct genwqe_dev *cd)
+{
+	u16 speed;		/*         MHz  MHz  MHz  MHz */
+	static const int speed_grade[] = { 250, 200, 166, 175 };
+
+	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
+	if (speed >= ARRAY_SIZE(speed_grade))
+		return 0;	/* illegal value */
+
+	return speed_grade[speed];
+}
+
+/**
+ * genwqe_stop_traps() - Stop traps
+ * @cd:	        genwqe device descriptor
+ *
+ * Before reading out the analysis data, we need to stop the traps.
+ */
+void genwqe_stop_traps(struct genwqe_dev *cd)
+{
+	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull);
+}
+
+/**
+ * genwqe_start_traps() - Start traps
+ * @cd:	        genwqe device descriptor
+ *
+ * After having read the data, we can/must enable the traps again.
+ */
+void genwqe_start_traps(struct genwqe_dev *cd)
+{
+	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull);
+
+	if (genwqe_need_err_masking(cd))
+		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
+}
diff --git a/drivers/misc/genwqe/genwqe_driver.h b/drivers/misc/genwqe/genwqe_driver.h
new file mode 100644
index 0000000000..8c333481b3
--- /dev/null
+++ b/drivers/misc/genwqe/genwqe_driver.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __GENWQE_DRIVER_H__
+#define __GENWQE_DRIVER_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+#include <linux/scatterlist.h>
+#include <linux/iommu.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+
+#include <asm/byteorder.h>
+#include <linux/genwqe/genwqe_card.h>
+
+#define DRV_VERSION		"2.0.25"
+
+/*
+ * Static minor number assignement, until we decide/implement
+ * something dynamic.
+ */
+#define GENWQE_MAX_MINOR	128 /* up to 128 possible genwqe devices */
+
+/**
+ * genwqe_requ_alloc() - Allocate a new DDCB execution request
+ *
+ * This data structure contains the user visiable fields of the DDCB
+ * to be executed.
+ *
+ * Return: ptr to genwqe_ddcb_cmd data structure
+ */
+struct genwqe_ddcb_cmd *ddcb_requ_alloc(void);
+
+/**
+ * ddcb_requ_free() - Free DDCB execution request.
+ * @req:       ptr to genwqe_ddcb_cmd data structure.
+ */
+void ddcb_requ_free(struct genwqe_ddcb_cmd *req);
+
+u32  genwqe_crc32(u8 *buff, size_t len, u32 init);
+
+static inline void genwqe_hexdump(struct pci_dev *pci_dev,
+				  const void *buff, unsigned int size)
+{
+	char prefix[32];
+
+	scnprintf(prefix, sizeof(prefix), "%s %s: ",
+		  GENWQE_DEVNAME, pci_name(pci_dev));
+
+	print_hex_dump_debug(prefix, DUMP_PREFIX_OFFSET, 16, 1, buff,
+			     size, true);
+}
+
+#endif	/* __GENWQE_DRIVER_H__ */
diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c
new file mode 100644
index 0000000000..b075d803a2
--- /dev/null
+++ b/drivers/misc/hi6421v600-irq.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device driver for irqs in HISI PMIC IC
+ *
+ * Copyright (c) 2013 Linaro Ltd.
+ * Copyright (c) 2011 Hisilicon.
+ * Copyright (c) 2020-2021 Huawei Technologies Co., Ltd.
+ */
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/irqdomain.h>
+#include <linux/regmap.h>
+
+struct hi6421v600_irq {
+	struct device		*dev;
+	struct irq_domain	*domain;
+	int			irq;
+	unsigned int		*irqs;
+	struct regmap		*regmap;
+
+	/* Protect IRQ mask changes */
+	spinlock_t		lock;
+};
+
+enum hi6421v600_irq_list {
+	OTMP = 0,
+	VBUS_CONNECT,
+	VBUS_DISCONNECT,
+	ALARMON_R,
+	HOLD_6S,
+	HOLD_1S,
+	POWERKEY_UP,
+	POWERKEY_DOWN,
+	OCP_SCP_R,
+	COUL_R,
+	SIM0_HPD_R,
+	SIM0_HPD_F,
+	SIM1_HPD_R,
+	SIM1_HPD_F,
+
+	PMIC_IRQ_LIST_MAX
+};
+
+#define HISI_IRQ_BANK_SIZE		2
+
+/*
+ * IRQ number for the power key button and mask for both UP and DOWN IRQs
+ */
+#define HISI_POWERKEY_IRQ_NUM		0
+#define HISI_IRQ_POWERKEY_UP_DOWN	(BIT(POWERKEY_DOWN) | BIT(POWERKEY_UP))
+
+/*
+ * Registers for IRQ address and IRQ mask bits
+ *
+ * Please notice that we need to regmap a larger region, as other
+ * registers are used by the irqs.
+ * See drivers/irq/hi6421-irq.c.
+ */
+#define SOC_PMIC_IRQ_MASK_0_ADDR	0x0202
+#define SOC_PMIC_IRQ0_ADDR		0x0212
+
+/*
+ * The IRQs are mapped as:
+ *
+ *	======================  =============   ============	=====
+ *	IRQ			MASK REGISTER	IRQ REGISTER	BIT
+ *	======================  =============   ============	=====
+ *	OTMP			0x0202		0x212		bit 0
+ *	VBUS_CONNECT		0x0202		0x212		bit 1
+ *	VBUS_DISCONNECT		0x0202		0x212		bit 2
+ *	ALARMON_R		0x0202		0x212		bit 3
+ *	HOLD_6S			0x0202		0x212		bit 4
+ *	HOLD_1S			0x0202		0x212		bit 5
+ *	POWERKEY_UP		0x0202		0x212		bit 6
+ *	POWERKEY_DOWN		0x0202		0x212		bit 7
+ *
+ *	OCP_SCP_R		0x0203		0x213		bit 0
+ *	COUL_R			0x0203		0x213		bit 1
+ *	SIM0_HPD_R		0x0203		0x213		bit 2
+ *	SIM0_HPD_F		0x0203		0x213		bit 3
+ *	SIM1_HPD_R		0x0203		0x213		bit 4
+ *	SIM1_HPD_F		0x0203		0x213		bit 5
+ *	======================  =============   ============	=====
+ *
+ * Each mask register contains 8 bits. The ancillary macros below
+ * convert a number from 0 to 14 into a register address and a bit mask
+ */
+#define HISI_IRQ_MASK_REG(irq_data)	(SOC_PMIC_IRQ_MASK_0_ADDR + \
+					 (irqd_to_hwirq(irq_data) / BITS_PER_BYTE))
+#define HISI_IRQ_MASK_BIT(irq_data)	BIT(irqd_to_hwirq(irq_data) & (BITS_PER_BYTE - 1))
+#define HISI_8BITS_MASK			0xff
+
+static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
+{
+	struct hi6421v600_irq *priv = __priv;
+	unsigned long pending;
+	unsigned int in;
+	int i, offset;
+
+	for (i = 0; i < HISI_IRQ_BANK_SIZE; i++) {
+		regmap_read(priv->regmap, SOC_PMIC_IRQ0_ADDR + i, &in);
+
+		/* Mark pending IRQs as handled */
+		regmap_write(priv->regmap, SOC_PMIC_IRQ0_ADDR + i, in);
+
+		pending = in & HISI_8BITS_MASK;
+
+		if (i == HISI_POWERKEY_IRQ_NUM &&
+		    (pending & HISI_IRQ_POWERKEY_UP_DOWN) == HISI_IRQ_POWERKEY_UP_DOWN) {
+			/*
+			 * If both powerkey down and up IRQs are received,
+			 * handle them at the right order
+			 */
+			generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]);
+			generic_handle_irq_safe(priv->irqs[POWERKEY_UP]);
+			pending &= ~HISI_IRQ_POWERKEY_UP_DOWN;
+		}
+
+		if (!pending)
+			continue;
+
+		for_each_set_bit(offset, &pending, BITS_PER_BYTE) {
+			generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void hi6421v600_irq_mask(struct irq_data *d)
+{
+	struct hi6421v600_irq *priv = irq_data_get_irq_chip_data(d);
+	unsigned long flags;
+	unsigned int data;
+	u32 offset;
+
+	offset = HISI_IRQ_MASK_REG(d);
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	regmap_read(priv->regmap, offset, &data);
+	data |= HISI_IRQ_MASK_BIT(d);
+	regmap_write(priv->regmap, offset, data);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void hi6421v600_irq_unmask(struct irq_data *d)
+{
+	struct hi6421v600_irq *priv = irq_data_get_irq_chip_data(d);
+	u32 data, offset;
+	unsigned long flags;
+
+	offset = HISI_IRQ_MASK_REG(d);
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	regmap_read(priv->regmap, offset, &data);
+	data &= ~HISI_IRQ_MASK_BIT(d);
+	regmap_write(priv->regmap, offset, data);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static struct irq_chip hi6421v600_pmu_irqchip = {
+	.name		= "hi6421v600-irq",
+	.irq_mask	= hi6421v600_irq_mask,
+	.irq_unmask	= hi6421v600_irq_unmask,
+	.irq_disable	= hi6421v600_irq_mask,
+	.irq_enable	= hi6421v600_irq_unmask,
+};
+
+static int hi6421v600_irq_map(struct irq_domain *d, unsigned int virq,
+			      irq_hw_number_t hw)
+{
+	struct hi6421v600_irq *priv = d->host_data;
+
+	irq_set_chip_and_handler_name(virq, &hi6421v600_pmu_irqchip,
+				      handle_simple_irq, "hi6421v600");
+	irq_set_chip_data(virq, priv);
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static const struct irq_domain_ops hi6421v600_domain_ops = {
+	.map	= hi6421v600_irq_map,
+	.xlate	= irq_domain_xlate_twocell,
+};
+
+static void hi6421v600_irq_init(struct hi6421v600_irq *priv)
+{
+	int i;
+	unsigned int pending;
+
+	/* Mask all IRQs */
+	for (i = 0; i < HISI_IRQ_BANK_SIZE; i++)
+		regmap_write(priv->regmap, SOC_PMIC_IRQ_MASK_0_ADDR + i,
+			     HISI_8BITS_MASK);
+
+	/* Mark all IRQs as handled */
+	for (i = 0; i < HISI_IRQ_BANK_SIZE; i++) {
+		regmap_read(priv->regmap, SOC_PMIC_IRQ0_ADDR + i, &pending);
+		regmap_write(priv->regmap, SOC_PMIC_IRQ0_ADDR + i,
+			     HISI_8BITS_MASK);
+	}
+}
+
+static int hi6421v600_irq_probe(struct platform_device *pdev)
+{
+	struct device *pmic_dev = pdev->dev.parent;
+	struct device_node *np = pmic_dev->of_node;
+	struct platform_device *pmic_pdev;
+	struct device *dev = &pdev->dev;
+	struct hi6421v600_irq *priv;
+	struct regmap *regmap;
+	unsigned int virq;
+	int i, ret;
+
+	/*
+	 * This driver is meant to be called by hi6421-spmi-core,
+	 * which should first set drvdata. If this doesn't happen, hit
+	 * a warn on and return.
+	 */
+	regmap = dev_get_drvdata(pmic_dev);
+	if (WARN_ON(!regmap))
+		return -ENODEV;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = dev;
+	priv->regmap = regmap;
+
+	spin_lock_init(&priv->lock);
+
+	pmic_pdev = container_of(pmic_dev, struct platform_device, dev);
+
+	priv->irq = platform_get_irq(pmic_pdev, 0);
+	if (priv->irq < 0)
+		return priv->irq;
+
+	platform_set_drvdata(pdev, priv);
+
+	hi6421v600_irq_init(priv);
+
+	priv->irqs = devm_kzalloc(dev, PMIC_IRQ_LIST_MAX * sizeof(int), GFP_KERNEL);
+	if (!priv->irqs)
+		return -ENOMEM;
+
+	priv->domain = irq_domain_add_simple(np, PMIC_IRQ_LIST_MAX, 0,
+					     &hi6421v600_domain_ops, priv);
+	if (!priv->domain) {
+		dev_err(dev, "Failed to create IRQ domain\n");
+		return -ENODEV;
+	}
+
+	for (i = 0; i < PMIC_IRQ_LIST_MAX; i++) {
+		virq = irq_create_mapping(priv->domain, i);
+		if (!virq) {
+			dev_err(dev, "Failed to map H/W IRQ\n");
+			return -ENODEV;
+		}
+		priv->irqs[i] = virq;
+	}
+
+	ret = devm_request_threaded_irq(dev,
+					priv->irq, hi6421v600_irq_handler,
+					NULL,
+					IRQF_TRIGGER_LOW | IRQF_SHARED | IRQF_NO_SUSPEND,
+					"pmic", priv);
+	if (ret < 0) {
+		dev_err(dev, "Failed to start IRQ handling thread: error %d\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct platform_device_id hi6421v600_irq_table[] = {
+	{ .name = "hi6421v600-irq" },
+	{},
+};
+MODULE_DEVICE_TABLE(platform, hi6421v600_irq_table);
+
+static struct platform_driver hi6421v600_irq_driver = {
+	.id_table = hi6421v600_irq_table,
+	.driver = {
+		.name = "hi6421v600-irq",
+	},
+	.probe	= hi6421v600_irq_probe,
+};
+module_platform_driver(hi6421v600_irq_driver);
+
+MODULE_DESCRIPTION("HiSilicon Hi6421v600 IRQ driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/hisi_hikey_usb.c b/drivers/misc/hisi_hikey_usb.c
new file mode 100644
index 0000000000..2165ec35a3
--- /dev/null
+++ b/drivers/misc/hisi_hikey_usb.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for usb functionality of Hikey series boards
+ * based on Hisilicon Kirin Soc.
+ *
+ * Copyright (C) 2017-2018 Hilisicon Electronics Co., Ltd.
+ *		http://www.huawei.com
+ *
+ * Authors: Yu Chen <chenyu56@huawei.com>
+ */
+
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+#include <linux/usb/role.h>
+
+#define DEVICE_DRIVER_NAME "hisi_hikey_usb"
+
+#define HUB_VBUS_POWER_ON 1
+#define HUB_VBUS_POWER_OFF 0
+#define USB_SWITCH_TO_HUB 1
+#define USB_SWITCH_TO_TYPEC 0
+#define TYPEC_VBUS_POWER_ON 1
+#define TYPEC_VBUS_POWER_OFF 0
+
+struct hisi_hikey_usb {
+	struct device *dev;
+	struct gpio_desc *otg_switch;
+	struct gpio_desc *typec_vbus;
+	struct gpio_desc *reset;
+
+	struct regulator *regulator;
+
+	struct usb_role_switch *hub_role_sw;
+
+	struct usb_role_switch *dev_role_sw;
+	enum usb_role role;
+
+	struct mutex lock;
+	struct work_struct work;
+
+	struct notifier_block nb;
+};
+
+static void hub_power_ctrl(struct hisi_hikey_usb *hisi_hikey_usb, int value)
+{
+	int ret, status;
+
+	if (!hisi_hikey_usb->regulator)
+		return;
+
+	status = regulator_is_enabled(hisi_hikey_usb->regulator);
+	if (status == !!value)
+		return;
+
+	if (value)
+		ret = regulator_enable(hisi_hikey_usb->regulator);
+	else
+		ret = regulator_disable(hisi_hikey_usb->regulator);
+
+	if (ret)
+		dev_err(hisi_hikey_usb->dev,
+			"Can't switch regulator state to %s\n",
+			value ? "enabled" : "disabled");
+}
+
+static void usb_switch_ctrl(struct hisi_hikey_usb *hisi_hikey_usb,
+			    int switch_to)
+{
+	if (!hisi_hikey_usb->otg_switch)
+		return;
+
+	gpiod_set_value_cansleep(hisi_hikey_usb->otg_switch, switch_to);
+}
+
+static void usb_typec_power_ctrl(struct hisi_hikey_usb *hisi_hikey_usb,
+				 int value)
+{
+	if (!hisi_hikey_usb->typec_vbus)
+		return;
+
+	gpiod_set_value_cansleep(hisi_hikey_usb->typec_vbus, value);
+}
+
+static void relay_set_role_switch(struct work_struct *work)
+{
+	struct hisi_hikey_usb *hisi_hikey_usb = container_of(work,
+							struct hisi_hikey_usb,
+							work);
+	struct usb_role_switch *sw;
+	enum usb_role role;
+
+	if (!hisi_hikey_usb || !hisi_hikey_usb->dev_role_sw)
+		return;
+
+	mutex_lock(&hisi_hikey_usb->lock);
+	switch (hisi_hikey_usb->role) {
+	case USB_ROLE_NONE:
+		usb_typec_power_ctrl(hisi_hikey_usb, TYPEC_VBUS_POWER_OFF);
+		usb_switch_ctrl(hisi_hikey_usb, USB_SWITCH_TO_HUB);
+		hub_power_ctrl(hisi_hikey_usb, HUB_VBUS_POWER_ON);
+		break;
+	case USB_ROLE_HOST:
+		hub_power_ctrl(hisi_hikey_usb, HUB_VBUS_POWER_OFF);
+		usb_switch_ctrl(hisi_hikey_usb, USB_SWITCH_TO_TYPEC);
+		usb_typec_power_ctrl(hisi_hikey_usb, TYPEC_VBUS_POWER_ON);
+		break;
+	case USB_ROLE_DEVICE:
+		hub_power_ctrl(hisi_hikey_usb, HUB_VBUS_POWER_OFF);
+		usb_typec_power_ctrl(hisi_hikey_usb, TYPEC_VBUS_POWER_OFF);
+		usb_switch_ctrl(hisi_hikey_usb, USB_SWITCH_TO_TYPEC);
+		break;
+	default:
+		break;
+	}
+	sw = hisi_hikey_usb->dev_role_sw;
+	role = hisi_hikey_usb->role;
+	mutex_unlock(&hisi_hikey_usb->lock);
+
+	usb_role_switch_set_role(sw, role);
+}
+
+static int hub_usb_role_switch_set(struct usb_role_switch *sw, enum usb_role role)
+{
+	struct hisi_hikey_usb *hisi_hikey_usb = usb_role_switch_get_drvdata(sw);
+
+	if (!hisi_hikey_usb || !hisi_hikey_usb->dev_role_sw)
+		return -EINVAL;
+
+	mutex_lock(&hisi_hikey_usb->lock);
+	hisi_hikey_usb->role = role;
+	mutex_unlock(&hisi_hikey_usb->lock);
+
+	schedule_work(&hisi_hikey_usb->work);
+
+	return 0;
+}
+
+static int hisi_hikey_usb_of_role_switch(struct platform_device *pdev,
+					 struct hisi_hikey_usb *hisi_hikey_usb)
+{
+	struct device *dev = &pdev->dev;
+	struct usb_role_switch_desc hub_role_switch = {NULL};
+
+	if (!device_property_read_bool(dev, "usb-role-switch"))
+		return 0;
+
+	hisi_hikey_usb->otg_switch = devm_gpiod_get(dev, "otg-switch",
+						    GPIOD_OUT_HIGH);
+	if (IS_ERR(hisi_hikey_usb->otg_switch)) {
+		dev_err(dev, "get otg-switch failed with error %ld\n",
+			PTR_ERR(hisi_hikey_usb->otg_switch));
+		return PTR_ERR(hisi_hikey_usb->otg_switch);
+	}
+
+	hisi_hikey_usb->typec_vbus = devm_gpiod_get(dev, "typec-vbus",
+						    GPIOD_OUT_LOW);
+	if (IS_ERR(hisi_hikey_usb->typec_vbus)) {
+		dev_err(dev, "get typec-vbus failed with error %ld\n",
+			PTR_ERR(hisi_hikey_usb->typec_vbus));
+		return PTR_ERR(hisi_hikey_usb->typec_vbus);
+	}
+
+	hisi_hikey_usb->reset = devm_gpiod_get_optional(dev,
+							"hub-reset-en",
+							GPIOD_OUT_HIGH);
+	if (IS_ERR(hisi_hikey_usb->reset)) {
+		dev_err(dev, "get hub-reset-en failed with error %ld\n",
+			PTR_ERR(hisi_hikey_usb->reset));
+		return PTR_ERR(hisi_hikey_usb->reset);
+	}
+
+	hisi_hikey_usb->dev_role_sw = usb_role_switch_get(dev);
+	if (!hisi_hikey_usb->dev_role_sw)
+		return -EPROBE_DEFER;
+	if (IS_ERR(hisi_hikey_usb->dev_role_sw)) {
+		dev_err(dev, "get device role switch failed with error %ld\n",
+			PTR_ERR(hisi_hikey_usb->dev_role_sw));
+		return PTR_ERR(hisi_hikey_usb->dev_role_sw);
+	}
+
+	INIT_WORK(&hisi_hikey_usb->work, relay_set_role_switch);
+
+	hub_role_switch.fwnode = dev_fwnode(dev);
+	hub_role_switch.set = hub_usb_role_switch_set;
+	hub_role_switch.driver_data = hisi_hikey_usb;
+
+	hisi_hikey_usb->hub_role_sw = usb_role_switch_register(dev,
+							       &hub_role_switch);
+
+	if (IS_ERR(hisi_hikey_usb->hub_role_sw)) {
+		dev_err(dev,
+			"failed to register hub role with error %ld\n",
+			PTR_ERR(hisi_hikey_usb->hub_role_sw));
+		usb_role_switch_put(hisi_hikey_usb->dev_role_sw);
+		return PTR_ERR(hisi_hikey_usb->hub_role_sw);
+	}
+
+	return 0;
+}
+
+static int hisi_hikey_usb_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct hisi_hikey_usb *hisi_hikey_usb;
+	int ret;
+
+	hisi_hikey_usb = devm_kzalloc(dev, sizeof(*hisi_hikey_usb), GFP_KERNEL);
+	if (!hisi_hikey_usb)
+		return -ENOMEM;
+
+	hisi_hikey_usb->dev = &pdev->dev;
+	mutex_init(&hisi_hikey_usb->lock);
+
+	hisi_hikey_usb->regulator = devm_regulator_get(dev, "hub-vdd");
+	if (IS_ERR(hisi_hikey_usb->regulator)) {
+		if (PTR_ERR(hisi_hikey_usb->regulator) == -EPROBE_DEFER) {
+			dev_info(dev, "waiting for hub-vdd-supply\n");
+			return PTR_ERR(hisi_hikey_usb->regulator);
+		}
+		dev_err(dev, "get hub-vdd-supply failed with error %ld\n",
+			PTR_ERR(hisi_hikey_usb->regulator));
+		return PTR_ERR(hisi_hikey_usb->regulator);
+	}
+
+	ret = hisi_hikey_usb_of_role_switch(pdev, hisi_hikey_usb);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, hisi_hikey_usb);
+
+	return 0;
+}
+
+static int  hisi_hikey_usb_remove(struct platform_device *pdev)
+{
+	struct hisi_hikey_usb *hisi_hikey_usb = platform_get_drvdata(pdev);
+
+	if (hisi_hikey_usb->hub_role_sw) {
+		usb_role_switch_unregister(hisi_hikey_usb->hub_role_sw);
+
+		if (hisi_hikey_usb->dev_role_sw)
+			usb_role_switch_put(hisi_hikey_usb->dev_role_sw);
+	} else {
+		hub_power_ctrl(hisi_hikey_usb, HUB_VBUS_POWER_OFF);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id id_table_hisi_hikey_usb[] = {
+	{ .compatible = "hisilicon,usbhub" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, id_table_hisi_hikey_usb);
+
+static struct platform_driver hisi_hikey_usb_driver = {
+	.probe = hisi_hikey_usb_probe,
+	.remove = hisi_hikey_usb_remove,
+	.driver = {
+		.name = DEVICE_DRIVER_NAME,
+		.of_match_table = id_table_hisi_hikey_usb,
+	},
+};
+
+module_platform_driver(hisi_hikey_usb_driver);
+
+MODULE_AUTHOR("Yu Chen <chenyu56@huawei.com>");
+MODULE_DESCRIPTION("Driver Support for USB functionality of Hikey");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/hmc6352.c b/drivers/misc/hmc6352.c
new file mode 100644
index 0000000000..759eaeb643
--- /dev/null
+++ b/drivers/misc/hmc6352.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * hmc6352.c - Honeywell Compass Driver
+ *
+ * Copyright (C) 2009 Intel Corp
+ *
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/sysfs.h>
+#include <linux/nospec.h>
+
+static DEFINE_MUTEX(compass_mutex);
+
+static int compass_command(struct i2c_client *c, u8 cmd)
+{
+	int ret = i2c_master_send(c, &cmd, 1);
+	if (ret < 0)
+		dev_warn(&c->dev, "command '%c' failed.\n", cmd);
+	return ret;
+}
+
+static int compass_store(struct device *dev, const char *buf, size_t count,
+			const char *map)
+{
+	struct i2c_client *c = to_i2c_client(dev);
+	int ret;
+	unsigned long val;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+	if (val >= strlen(map))
+		return -EINVAL;
+	val = array_index_nospec(val, strlen(map));
+	mutex_lock(&compass_mutex);
+	ret = compass_command(c, map[val]);
+	mutex_unlock(&compass_mutex);
+	if (ret < 0)
+		return ret;
+	return count;
+}
+
+static ssize_t compass_calibration_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	return compass_store(dev, buf, count, "EC");
+}
+
+static ssize_t compass_power_mode_store(struct device *dev,
+		struct device_attribute *attr, const  char *buf, size_t count)
+{
+	return compass_store(dev, buf, count, "SW");
+}
+
+static ssize_t compass_heading_data_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned char i2c_data[2];
+	int ret;
+
+	mutex_lock(&compass_mutex);
+	ret = compass_command(client, 'A');
+	if (ret != 1) {
+		mutex_unlock(&compass_mutex);
+		return ret;
+	}
+	msleep(10); /* sending 'A' cmd we need to wait for 7-10 millisecs */
+	ret = i2c_master_recv(client, i2c_data, 2);
+	mutex_unlock(&compass_mutex);
+	if (ret < 0) {
+		dev_warn(dev, "i2c read data cmd failed\n");
+		return ret;
+	}
+	ret = (i2c_data[0] << 8) | i2c_data[1];
+	return sprintf(buf, "%d.%d\n", ret/10, ret%10);
+}
+
+
+static DEVICE_ATTR(heading0_input, S_IRUGO, compass_heading_data_show, NULL);
+static DEVICE_ATTR(calibration, S_IWUSR, NULL, compass_calibration_store);
+static DEVICE_ATTR(power_state, S_IWUSR, NULL, compass_power_mode_store);
+
+static struct attribute *mid_att_compass[] = {
+	&dev_attr_heading0_input.attr,
+	&dev_attr_calibration.attr,
+	&dev_attr_power_state.attr,
+	NULL
+};
+
+static const struct attribute_group m_compass_gr = {
+	.name = "hmc6352",
+	.attrs = mid_att_compass
+};
+
+static int hmc6352_probe(struct i2c_client *client)
+{
+	int res;
+
+	res = sysfs_create_group(&client->dev.kobj, &m_compass_gr);
+	if (res) {
+		dev_err(&client->dev, "device_create_file failed\n");
+		return res;
+	}
+	dev_info(&client->dev, "%s HMC6352 compass chip found\n",
+							client->name);
+	return 0;
+}
+
+static void hmc6352_remove(struct i2c_client *client)
+{
+	sysfs_remove_group(&client->dev.kobj, &m_compass_gr);
+}
+
+static const struct i2c_device_id hmc6352_id[] = {
+	{ "hmc6352", 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, hmc6352_id);
+
+static struct i2c_driver hmc6352_driver = {
+	.driver = {
+		.name = "hmc6352",
+	},
+	.probe = hmc6352_probe,
+	.remove = hmc6352_remove,
+	.id_table = hmc6352_id,
+};
+
+module_i2c_driver(hmc6352_driver);
+
+MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com");
+MODULE_DESCRIPTION("hmc6352 Compass Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
new file mode 100644
index 0000000000..f1b74d3f89
--- /dev/null
+++ b/drivers/misc/hpilo.c
@@ -0,0 +1,927 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for the HP iLO management processor.
+ *
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *	David Altobelli <david.altobelli@hpe.com>
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include "hpilo.h"
+
+static const struct class ilo_class = {
+	.name = "iLO",
+};
+static unsigned int ilo_major;
+static unsigned int max_ccb = 16;
+static char ilo_hwdev[MAX_ILO_DEV];
+static const struct pci_device_id ilo_blacklist[] = {
+	/* auxiliary iLO */
+	{PCI_DEVICE_SUB(PCI_VENDOR_ID_HP, 0x3307, PCI_VENDOR_ID_HP, 0x1979)},
+	/* CL */
+	{PCI_DEVICE_SUB(PCI_VENDOR_ID_HP, 0x3307, PCI_VENDOR_ID_HP_3PAR, 0x0289)},
+	{}
+};
+
+static inline int get_entry_id(int entry)
+{
+	return (entry & ENTRY_MASK_DESCRIPTOR) >> ENTRY_BITPOS_DESCRIPTOR;
+}
+
+static inline int get_entry_len(int entry)
+{
+	return ((entry & ENTRY_MASK_QWORDS) >> ENTRY_BITPOS_QWORDS) << 3;
+}
+
+static inline int mk_entry(int id, int len)
+{
+	int qlen = len & 7 ? (len >> 3) + 1 : len >> 3;
+	return id << ENTRY_BITPOS_DESCRIPTOR | qlen << ENTRY_BITPOS_QWORDS;
+}
+
+static inline int desc_mem_sz(int nr_entry)
+{
+	return nr_entry << L2_QENTRY_SZ;
+}
+
+/*
+ * FIFO queues, shared with hardware.
+ *
+ * If a queue has empty slots, an entry is added to the queue tail,
+ * and that entry is marked as occupied.
+ * Entries can be dequeued from the head of the list, when the device
+ * has marked the entry as consumed.
+ *
+ * Returns true on successful queue/dequeue, false on failure.
+ */
+static int fifo_enqueue(struct ilo_hwinfo *hw, char *fifobar, int entry)
+{
+	struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&hw->fifo_lock, flags);
+	if (!(fifo_q->fifobar[(fifo_q->tail + 1) & fifo_q->imask]
+	      & ENTRY_MASK_O)) {
+		fifo_q->fifobar[fifo_q->tail & fifo_q->imask] |=
+				(entry & ENTRY_MASK_NOSTATE) | fifo_q->merge;
+		fifo_q->tail += 1;
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+	return ret;
+}
+
+static int fifo_dequeue(struct ilo_hwinfo *hw, char *fifobar, int *entry)
+{
+	struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+	unsigned long flags;
+	int ret = 0;
+	u64 c;
+
+	spin_lock_irqsave(&hw->fifo_lock, flags);
+	c = fifo_q->fifobar[fifo_q->head & fifo_q->imask];
+	if (c & ENTRY_MASK_C) {
+		if (entry)
+			*entry = c & ENTRY_MASK_NOSTATE;
+
+		fifo_q->fifobar[fifo_q->head & fifo_q->imask] =
+							(c | ENTRY_MASK) + 1;
+		fifo_q->head += 1;
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+	return ret;
+}
+
+static int fifo_check_recv(struct ilo_hwinfo *hw, char *fifobar)
+{
+	struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar);
+	unsigned long flags;
+	int ret = 0;
+	u64 c;
+
+	spin_lock_irqsave(&hw->fifo_lock, flags);
+	c = fifo_q->fifobar[fifo_q->head & fifo_q->imask];
+	if (c & ENTRY_MASK_C)
+		ret = 1;
+	spin_unlock_irqrestore(&hw->fifo_lock, flags);
+
+	return ret;
+}
+
+static int ilo_pkt_enqueue(struct ilo_hwinfo *hw, struct ccb *ccb,
+			   int dir, int id, int len)
+{
+	char *fifobar;
+	int entry;
+
+	if (dir == SENDQ)
+		fifobar = ccb->ccb_u1.send_fifobar;
+	else
+		fifobar = ccb->ccb_u3.recv_fifobar;
+
+	entry = mk_entry(id, len);
+	return fifo_enqueue(hw, fifobar, entry);
+}
+
+static int ilo_pkt_dequeue(struct ilo_hwinfo *hw, struct ccb *ccb,
+			   int dir, int *id, int *len, void **pkt)
+{
+	char *fifobar, *desc;
+	int entry = 0, pkt_id = 0;
+	int ret;
+
+	if (dir == SENDQ) {
+		fifobar = ccb->ccb_u1.send_fifobar;
+		desc = ccb->ccb_u2.send_desc;
+	} else {
+		fifobar = ccb->ccb_u3.recv_fifobar;
+		desc = ccb->ccb_u4.recv_desc;
+	}
+
+	ret = fifo_dequeue(hw, fifobar, &entry);
+	if (ret) {
+		pkt_id = get_entry_id(entry);
+		if (id)
+			*id = pkt_id;
+		if (len)
+			*len = get_entry_len(entry);
+		if (pkt)
+			*pkt = (void *)(desc + desc_mem_sz(pkt_id));
+	}
+
+	return ret;
+}
+
+static int ilo_pkt_recv(struct ilo_hwinfo *hw, struct ccb *ccb)
+{
+	char *fifobar = ccb->ccb_u3.recv_fifobar;
+
+	return fifo_check_recv(hw, fifobar);
+}
+
+static inline void doorbell_set(struct ccb *ccb)
+{
+	iowrite8(1, ccb->ccb_u5.db_base);
+}
+
+static inline void doorbell_clr(struct ccb *ccb)
+{
+	iowrite8(2, ccb->ccb_u5.db_base);
+}
+
+static inline int ctrl_set(int l2sz, int idxmask, int desclim)
+{
+	int active = 0, go = 1;
+	return l2sz << CTRL_BITPOS_L2SZ |
+	       idxmask << CTRL_BITPOS_FIFOINDEXMASK |
+	       desclim << CTRL_BITPOS_DESCLIMIT |
+	       active << CTRL_BITPOS_A |
+	       go << CTRL_BITPOS_G;
+}
+
+static void ctrl_setup(struct ccb *ccb, int nr_desc, int l2desc_sz)
+{
+	/* for simplicity, use the same parameters for send and recv ctrls */
+	ccb->send_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1);
+	ccb->recv_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1);
+}
+
+static inline int fifo_sz(int nr_entry)
+{
+	/* size of a fifo is determined by the number of entries it contains */
+	return nr_entry * sizeof(u64) + FIFOHANDLESIZE;
+}
+
+static void fifo_setup(void *base_addr, int nr_entry)
+{
+	struct fifo *fifo_q = base_addr;
+	int i;
+
+	/* set up an empty fifo */
+	fifo_q->head = 0;
+	fifo_q->tail = 0;
+	fifo_q->reset = 0;
+	fifo_q->nrents = nr_entry;
+	fifo_q->imask = nr_entry - 1;
+	fifo_q->merge = ENTRY_MASK_O;
+
+	for (i = 0; i < nr_entry; i++)
+		fifo_q->fifobar[i] = 0;
+}
+
+static void ilo_ccb_close(struct pci_dev *pdev, struct ccb_data *data)
+{
+	struct ccb *driver_ccb = &data->driver_ccb;
+	struct ccb __iomem *device_ccb = data->mapped_ccb;
+	int retries;
+
+	/* complicated dance to tell the hw we are stopping */
+	doorbell_clr(driver_ccb);
+	iowrite32(ioread32(&device_ccb->send_ctrl) & ~(1 << CTRL_BITPOS_G),
+		  &device_ccb->send_ctrl);
+	iowrite32(ioread32(&device_ccb->recv_ctrl) & ~(1 << CTRL_BITPOS_G),
+		  &device_ccb->recv_ctrl);
+
+	/* give iLO some time to process stop request */
+	for (retries = MAX_WAIT; retries > 0; retries--) {
+		doorbell_set(driver_ccb);
+		udelay(WAIT_TIME);
+		if (!(ioread32(&device_ccb->send_ctrl) & (1 << CTRL_BITPOS_A))
+		    &&
+		    !(ioread32(&device_ccb->recv_ctrl) & (1 << CTRL_BITPOS_A)))
+			break;
+	}
+	if (retries == 0)
+		dev_err(&pdev->dev, "Closing, but controller still active\n");
+
+	/* clear the hw ccb */
+	memset_io(device_ccb, 0, sizeof(struct ccb));
+
+	/* free resources used to back send/recv queues */
+	dma_free_coherent(&pdev->dev, data->dma_size, data->dma_va,
+			  data->dma_pa);
+}
+
+static int ilo_ccb_setup(struct ilo_hwinfo *hw, struct ccb_data *data, int slot)
+{
+	char *dma_va;
+	dma_addr_t dma_pa;
+	struct ccb *driver_ccb, *ilo_ccb;
+
+	driver_ccb = &data->driver_ccb;
+	ilo_ccb = &data->ilo_ccb;
+
+	data->dma_size = 2 * fifo_sz(NR_QENTRY) +
+			 2 * desc_mem_sz(NR_QENTRY) +
+			 ILO_START_ALIGN + ILO_CACHE_SZ;
+
+	data->dma_va = dma_alloc_coherent(&hw->ilo_dev->dev, data->dma_size,
+					  &data->dma_pa, GFP_ATOMIC);
+	if (!data->dma_va)
+		return -ENOMEM;
+
+	dma_va = (char *)data->dma_va;
+	dma_pa = data->dma_pa;
+
+	dma_va = (char *)roundup((unsigned long)dma_va, ILO_START_ALIGN);
+	dma_pa = roundup(dma_pa, ILO_START_ALIGN);
+
+	/*
+	 * Create two ccb's, one with virt addrs, one with phys addrs.
+	 * Copy the phys addr ccb to device shared mem.
+	 */
+	ctrl_setup(driver_ccb, NR_QENTRY, L2_QENTRY_SZ);
+	ctrl_setup(ilo_ccb, NR_QENTRY, L2_QENTRY_SZ);
+
+	fifo_setup(dma_va, NR_QENTRY);
+	driver_ccb->ccb_u1.send_fifobar = dma_va + FIFOHANDLESIZE;
+	ilo_ccb->ccb_u1.send_fifobar_pa = dma_pa + FIFOHANDLESIZE;
+	dma_va += fifo_sz(NR_QENTRY);
+	dma_pa += fifo_sz(NR_QENTRY);
+
+	dma_va = (char *)roundup((unsigned long)dma_va, ILO_CACHE_SZ);
+	dma_pa = roundup(dma_pa, ILO_CACHE_SZ);
+
+	fifo_setup(dma_va, NR_QENTRY);
+	driver_ccb->ccb_u3.recv_fifobar = dma_va + FIFOHANDLESIZE;
+	ilo_ccb->ccb_u3.recv_fifobar_pa = dma_pa + FIFOHANDLESIZE;
+	dma_va += fifo_sz(NR_QENTRY);
+	dma_pa += fifo_sz(NR_QENTRY);
+
+	driver_ccb->ccb_u2.send_desc = dma_va;
+	ilo_ccb->ccb_u2.send_desc_pa = dma_pa;
+	dma_pa += desc_mem_sz(NR_QENTRY);
+	dma_va += desc_mem_sz(NR_QENTRY);
+
+	driver_ccb->ccb_u4.recv_desc = dma_va;
+	ilo_ccb->ccb_u4.recv_desc_pa = dma_pa;
+
+	driver_ccb->channel = slot;
+	ilo_ccb->channel = slot;
+
+	driver_ccb->ccb_u5.db_base = hw->db_vaddr + (slot << L2_DB_SIZE);
+	ilo_ccb->ccb_u5.db_base = NULL; /* hw ccb's doorbell is not used */
+
+	return 0;
+}
+
+static void ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot)
+{
+	int pkt_id, pkt_sz;
+	struct ccb *driver_ccb = &data->driver_ccb;
+
+	/* copy the ccb with physical addrs to device memory */
+	data->mapped_ccb = (struct ccb __iomem *)
+				(hw->ram_vaddr + (slot * ILOHW_CCB_SZ));
+	memcpy_toio(data->mapped_ccb, &data->ilo_ccb, sizeof(struct ccb));
+
+	/* put packets on the send and receive queues */
+	pkt_sz = 0;
+	for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++) {
+		ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, pkt_sz);
+		doorbell_set(driver_ccb);
+	}
+
+	pkt_sz = desc_mem_sz(1);
+	for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++)
+		ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, pkt_sz);
+
+	/* the ccb is ready to use */
+	doorbell_clr(driver_ccb);
+}
+
+static int ilo_ccb_verify(struct ilo_hwinfo *hw, struct ccb_data *data)
+{
+	int pkt_id, i;
+	struct ccb *driver_ccb = &data->driver_ccb;
+
+	/* make sure iLO is really handling requests */
+	for (i = MAX_WAIT; i > 0; i--) {
+		if (ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, NULL, NULL))
+			break;
+		udelay(WAIT_TIME);
+	}
+
+	if (i == 0) {
+		dev_err(&hw->ilo_dev->dev, "Open could not dequeue a packet\n");
+		return -EBUSY;
+	}
+
+	ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, 0);
+	doorbell_set(driver_ccb);
+	return 0;
+}
+
+static inline int is_channel_reset(struct ccb *ccb)
+{
+	/* check for this particular channel needing a reset */
+	return FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset;
+}
+
+static inline void set_channel_reset(struct ccb *ccb)
+{
+	/* set a flag indicating this channel needs a reset */
+	FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset = 1;
+}
+
+static inline int get_device_outbound(struct ilo_hwinfo *hw)
+{
+	return ioread32(&hw->mmio_vaddr[DB_OUT]);
+}
+
+static inline int is_db_reset(int db_out)
+{
+	return db_out & (1 << DB_RESET);
+}
+
+static inline void clear_pending_db(struct ilo_hwinfo *hw, int clr)
+{
+	iowrite32(clr, &hw->mmio_vaddr[DB_OUT]);
+}
+
+static inline void clear_device(struct ilo_hwinfo *hw)
+{
+	/* clear the device (reset bits, pending channel entries) */
+	clear_pending_db(hw, -1);
+}
+
+static inline void ilo_enable_interrupts(struct ilo_hwinfo *hw)
+{
+	iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) | 1, &hw->mmio_vaddr[DB_IRQ]);
+}
+
+static inline void ilo_disable_interrupts(struct ilo_hwinfo *hw)
+{
+	iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) & ~1,
+		 &hw->mmio_vaddr[DB_IRQ]);
+}
+
+static void ilo_set_reset(struct ilo_hwinfo *hw)
+{
+	int slot;
+
+	/*
+	 * Mapped memory is zeroed on ilo reset, so set a per ccb flag
+	 * to indicate that this ccb needs to be closed and reopened.
+	 */
+	for (slot = 0; slot < max_ccb; slot++) {
+		if (!hw->ccb_alloc[slot])
+			continue;
+		set_channel_reset(&hw->ccb_alloc[slot]->driver_ccb);
+	}
+}
+
+static ssize_t ilo_read(struct file *fp, char __user *buf,
+			size_t len, loff_t *off)
+{
+	int err, found, cnt, pkt_id, pkt_len;
+	struct ccb_data *data = fp->private_data;
+	struct ccb *driver_ccb = &data->driver_ccb;
+	struct ilo_hwinfo *hw = data->ilo_hw;
+	void *pkt;
+
+	if (is_channel_reset(driver_ccb)) {
+		/*
+		 * If the device has been reset, applications
+		 * need to close and reopen all ccbs.
+		 */
+		return -ENODEV;
+	}
+
+	/*
+	 * This function is to be called when data is expected
+	 * in the channel, and will return an error if no packet is found
+	 * during the loop below.  The sleep/retry logic is to allow
+	 * applications to call read() immediately post write(),
+	 * and give iLO some time to process the sent packet.
+	 */
+	cnt = 20;
+	do {
+		/* look for a received packet */
+		found = ilo_pkt_dequeue(hw, driver_ccb, RECVQ, &pkt_id,
+					&pkt_len, &pkt);
+		if (found)
+			break;
+		cnt--;
+		msleep(100);
+	} while (!found && cnt);
+
+	if (!found)
+		return -EAGAIN;
+
+	/* only copy the length of the received packet */
+	if (pkt_len < len)
+		len = pkt_len;
+
+	err = copy_to_user(buf, pkt, len);
+
+	/* return the received packet to the queue */
+	ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, desc_mem_sz(1));
+
+	return err ? -EFAULT : len;
+}
+
+static ssize_t ilo_write(struct file *fp, const char __user *buf,
+			 size_t len, loff_t *off)
+{
+	int err, pkt_id, pkt_len;
+	struct ccb_data *data = fp->private_data;
+	struct ccb *driver_ccb = &data->driver_ccb;
+	struct ilo_hwinfo *hw = data->ilo_hw;
+	void *pkt;
+
+	if (is_channel_reset(driver_ccb))
+		return -ENODEV;
+
+	/* get a packet to send the user command */
+	if (!ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, &pkt_len, &pkt))
+		return -EBUSY;
+
+	/* limit the length to the length of the packet */
+	if (pkt_len < len)
+		len = pkt_len;
+
+	/* on failure, set the len to 0 to return empty packet to the device */
+	err = copy_from_user(pkt, buf, len);
+	if (err)
+		len = 0;
+
+	/* send the packet */
+	ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, len);
+	doorbell_set(driver_ccb);
+
+	return err ? -EFAULT : len;
+}
+
+static __poll_t ilo_poll(struct file *fp, poll_table *wait)
+{
+	struct ccb_data *data = fp->private_data;
+	struct ccb *driver_ccb = &data->driver_ccb;
+
+	poll_wait(fp, &data->ccb_waitq, wait);
+
+	if (is_channel_reset(driver_ccb))
+		return EPOLLERR;
+	else if (ilo_pkt_recv(data->ilo_hw, driver_ccb))
+		return EPOLLIN | EPOLLRDNORM;
+
+	return 0;
+}
+
+static int ilo_close(struct inode *ip, struct file *fp)
+{
+	int slot;
+	struct ccb_data *data;
+	struct ilo_hwinfo *hw;
+	unsigned long flags;
+
+	slot = iminor(ip) % max_ccb;
+	hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev);
+
+	spin_lock(&hw->open_lock);
+
+	if (hw->ccb_alloc[slot]->ccb_cnt == 1) {
+
+		data = fp->private_data;
+
+		spin_lock_irqsave(&hw->alloc_lock, flags);
+		hw->ccb_alloc[slot] = NULL;
+		spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+		ilo_ccb_close(hw->ilo_dev, data);
+
+		kfree(data);
+	} else
+		hw->ccb_alloc[slot]->ccb_cnt--;
+
+	spin_unlock(&hw->open_lock);
+
+	return 0;
+}
+
+static int ilo_open(struct inode *ip, struct file *fp)
+{
+	int slot, error;
+	struct ccb_data *data;
+	struct ilo_hwinfo *hw;
+	unsigned long flags;
+
+	slot = iminor(ip) % max_ccb;
+	hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev);
+
+	/* new ccb allocation */
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	spin_lock(&hw->open_lock);
+
+	/* each fd private_data holds sw/hw view of ccb */
+	if (hw->ccb_alloc[slot] == NULL) {
+		/* create a channel control block for this minor */
+		error = ilo_ccb_setup(hw, data, slot);
+		if (error) {
+			kfree(data);
+			goto out;
+		}
+
+		data->ccb_cnt = 1;
+		data->ccb_excl = fp->f_flags & O_EXCL;
+		data->ilo_hw = hw;
+		init_waitqueue_head(&data->ccb_waitq);
+
+		/* write the ccb to hw */
+		spin_lock_irqsave(&hw->alloc_lock, flags);
+		ilo_ccb_open(hw, data, slot);
+		hw->ccb_alloc[slot] = data;
+		spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+		/* make sure the channel is functional */
+		error = ilo_ccb_verify(hw, data);
+		if (error) {
+
+			spin_lock_irqsave(&hw->alloc_lock, flags);
+			hw->ccb_alloc[slot] = NULL;
+			spin_unlock_irqrestore(&hw->alloc_lock, flags);
+
+			ilo_ccb_close(hw->ilo_dev, data);
+
+			kfree(data);
+			goto out;
+		}
+
+	} else {
+		kfree(data);
+		if (fp->f_flags & O_EXCL || hw->ccb_alloc[slot]->ccb_excl) {
+			/*
+			 * The channel exists, and either this open
+			 * or a previous open of this channel wants
+			 * exclusive access.
+			 */
+			error = -EBUSY;
+		} else {
+			hw->ccb_alloc[slot]->ccb_cnt++;
+			error = 0;
+		}
+	}
+out:
+	spin_unlock(&hw->open_lock);
+
+	if (!error)
+		fp->private_data = hw->ccb_alloc[slot];
+
+	return error;
+}
+
+static const struct file_operations ilo_fops = {
+	.owner		= THIS_MODULE,
+	.read		= ilo_read,
+	.write		= ilo_write,
+	.poll		= ilo_poll,
+	.open 		= ilo_open,
+	.release 	= ilo_close,
+	.llseek		= noop_llseek,
+};
+
+static irqreturn_t ilo_isr(int irq, void *data)
+{
+	struct ilo_hwinfo *hw = data;
+	int pending, i;
+
+	spin_lock(&hw->alloc_lock);
+
+	/* check for ccbs which have data */
+	pending = get_device_outbound(hw);
+	if (!pending) {
+		spin_unlock(&hw->alloc_lock);
+		return IRQ_NONE;
+	}
+
+	if (is_db_reset(pending)) {
+		/* wake up all ccbs if the device was reset */
+		pending = -1;
+		ilo_set_reset(hw);
+	}
+
+	for (i = 0; i < max_ccb; i++) {
+		if (!hw->ccb_alloc[i])
+			continue;
+		if (pending & (1 << i))
+			wake_up_interruptible(&hw->ccb_alloc[i]->ccb_waitq);
+	}
+
+	/* clear the device of the channels that have been handled */
+	clear_pending_db(hw, pending);
+
+	spin_unlock(&hw->alloc_lock);
+
+	return IRQ_HANDLED;
+}
+
+static void ilo_unmap_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
+{
+	pci_iounmap(pdev, hw->db_vaddr);
+	pci_iounmap(pdev, hw->ram_vaddr);
+	pci_iounmap(pdev, hw->mmio_vaddr);
+}
+
+static int ilo_map_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
+{
+	int bar;
+	unsigned long off;
+	u8 pci_rev_id;
+	int rc;
+
+	/* map the memory mapped i/o registers */
+	hw->mmio_vaddr = pci_iomap(pdev, 1, 0);
+	if (hw->mmio_vaddr == NULL) {
+		dev_err(&pdev->dev, "Error mapping mmio\n");
+		goto out;
+	}
+
+	/* map the adapter shared memory region */
+	rc = pci_read_config_byte(pdev, PCI_REVISION_ID, &pci_rev_id);
+	if (rc != 0) {
+		dev_err(&pdev->dev, "Error reading PCI rev id: %d\n", rc);
+		goto out;
+	}
+
+	if (pci_rev_id >= PCI_REV_ID_NECHES) {
+		bar = 5;
+		/* Last 8k is reserved for CCBs */
+		off = pci_resource_len(pdev, bar) - 0x2000;
+	} else {
+		bar = 2;
+		off = 0;
+	}
+	hw->ram_vaddr = pci_iomap_range(pdev, bar, off, max_ccb * ILOHW_CCB_SZ);
+	if (hw->ram_vaddr == NULL) {
+		dev_err(&pdev->dev, "Error mapping shared mem\n");
+		goto mmio_free;
+	}
+
+	/* map the doorbell aperture */
+	hw->db_vaddr = pci_iomap(pdev, 3, max_ccb * ONE_DB_SIZE);
+	if (hw->db_vaddr == NULL) {
+		dev_err(&pdev->dev, "Error mapping doorbell\n");
+		goto ram_free;
+	}
+
+	return 0;
+ram_free:
+	pci_iounmap(pdev, hw->ram_vaddr);
+mmio_free:
+	pci_iounmap(pdev, hw->mmio_vaddr);
+out:
+	return -ENOMEM;
+}
+
+static void ilo_remove(struct pci_dev *pdev)
+{
+	int i, minor;
+	struct ilo_hwinfo *ilo_hw = pci_get_drvdata(pdev);
+
+	if (!ilo_hw)
+		return;
+
+	clear_device(ilo_hw);
+
+	minor = MINOR(ilo_hw->cdev.dev);
+	for (i = minor; i < minor + max_ccb; i++)
+		device_destroy(&ilo_class, MKDEV(ilo_major, i));
+
+	cdev_del(&ilo_hw->cdev);
+	ilo_disable_interrupts(ilo_hw);
+	free_irq(pdev->irq, ilo_hw);
+	ilo_unmap_device(pdev, ilo_hw);
+	pci_release_regions(pdev);
+	/*
+	 * pci_disable_device(pdev) used to be here. But this PCI device has
+	 * two functions with interrupt lines connected to a single pin. The
+	 * other one is a USB host controller. So when we disable the PIN here
+	 * e.g. by rmmod hpilo, the controller stops working. It is because
+	 * the interrupt link is disabled in ACPI since it is not refcounted
+	 * yet. See acpi_pci_link_free_irq called from acpi_pci_irq_disable.
+	 */
+	kfree(ilo_hw);
+	ilo_hwdev[(minor / max_ccb)] = 0;
+}
+
+static int ilo_probe(struct pci_dev *pdev,
+			       const struct pci_device_id *ent)
+{
+	int devnum, minor, start, error = 0;
+	struct ilo_hwinfo *ilo_hw;
+
+	if (pci_match_id(ilo_blacklist, pdev)) {
+		dev_dbg(&pdev->dev, "Not supported on this device\n");
+		return -ENODEV;
+	}
+
+	if (max_ccb > MAX_CCB)
+		max_ccb = MAX_CCB;
+	else if (max_ccb < MIN_CCB)
+		max_ccb = MIN_CCB;
+
+	/* find a free range for device files */
+	for (devnum = 0; devnum < MAX_ILO_DEV; devnum++) {
+		if (ilo_hwdev[devnum] == 0) {
+			ilo_hwdev[devnum] = 1;
+			break;
+		}
+	}
+
+	if (devnum == MAX_ILO_DEV) {
+		dev_err(&pdev->dev, "Error finding free device\n");
+		return -ENODEV;
+	}
+
+	/* track global allocations for this device */
+	error = -ENOMEM;
+	ilo_hw = kzalloc(sizeof(*ilo_hw), GFP_KERNEL);
+	if (!ilo_hw)
+		goto out;
+
+	ilo_hw->ilo_dev = pdev;
+	spin_lock_init(&ilo_hw->alloc_lock);
+	spin_lock_init(&ilo_hw->fifo_lock);
+	spin_lock_init(&ilo_hw->open_lock);
+
+	error = pci_enable_device(pdev);
+	if (error)
+		goto free;
+
+	pci_set_master(pdev);
+
+	error = pci_request_regions(pdev, ILO_NAME);
+	if (error)
+		goto disable;
+
+	error = ilo_map_device(pdev, ilo_hw);
+	if (error)
+		goto free_regions;
+
+	pci_set_drvdata(pdev, ilo_hw);
+	clear_device(ilo_hw);
+
+	error = request_irq(pdev->irq, ilo_isr, IRQF_SHARED, "hpilo", ilo_hw);
+	if (error)
+		goto unmap;
+
+	ilo_enable_interrupts(ilo_hw);
+
+	cdev_init(&ilo_hw->cdev, &ilo_fops);
+	ilo_hw->cdev.owner = THIS_MODULE;
+	start = devnum * max_ccb;
+	error = cdev_add(&ilo_hw->cdev, MKDEV(ilo_major, start), max_ccb);
+	if (error) {
+		dev_err(&pdev->dev, "Could not add cdev\n");
+		goto remove_isr;
+	}
+
+	for (minor = 0 ; minor < max_ccb; minor++) {
+		struct device *dev;
+		dev = device_create(&ilo_class, &pdev->dev,
+				    MKDEV(ilo_major, minor), NULL,
+				    "hpilo!d%dccb%d", devnum, minor);
+		if (IS_ERR(dev))
+			dev_err(&pdev->dev, "Could not create files\n");
+	}
+
+	return 0;
+remove_isr:
+	ilo_disable_interrupts(ilo_hw);
+	free_irq(pdev->irq, ilo_hw);
+unmap:
+	ilo_unmap_device(pdev, ilo_hw);
+free_regions:
+	pci_release_regions(pdev);
+disable:
+/*	pci_disable_device(pdev);  see comment in ilo_remove */
+free:
+	kfree(ilo_hw);
+out:
+	ilo_hwdev[devnum] = 0;
+	return error;
+}
+
+static const struct pci_device_id ilo_devices[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_COMPAQ, 0xB204) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HP, 0x3307) },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, ilo_devices);
+
+static struct pci_driver ilo_driver = {
+	.name 	  = ILO_NAME,
+	.id_table = ilo_devices,
+	.probe 	  = ilo_probe,
+	.remove   = ilo_remove,
+};
+
+static int __init ilo_init(void)
+{
+	int error;
+	dev_t dev;
+
+	error = class_register(&ilo_class);
+	if (error)
+		goto out;
+
+	error = alloc_chrdev_region(&dev, 0, MAX_OPEN, ILO_NAME);
+	if (error)
+		goto class_destroy;
+
+	ilo_major = MAJOR(dev);
+
+	error =	pci_register_driver(&ilo_driver);
+	if (error)
+		goto chr_remove;
+
+	return 0;
+chr_remove:
+	unregister_chrdev_region(dev, MAX_OPEN);
+class_destroy:
+	class_unregister(&ilo_class);
+out:
+	return error;
+}
+
+static void __exit ilo_exit(void)
+{
+	pci_unregister_driver(&ilo_driver);
+	unregister_chrdev_region(MKDEV(ilo_major, 0), MAX_OPEN);
+	class_unregister(&ilo_class);
+}
+
+MODULE_VERSION("1.5.0");
+MODULE_ALIAS(ILO_NAME);
+MODULE_DESCRIPTION(ILO_NAME);
+MODULE_AUTHOR("David Altobelli <david.altobelli@hpe.com>");
+MODULE_LICENSE("GPL v2");
+
+module_param(max_ccb, uint, 0444);
+MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (8-24)(default=16)");
+
+module_init(ilo_init);
+module_exit(ilo_exit);
diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h
new file mode 100644
index 0000000000..d57c34680b
--- /dev/null
+++ b/drivers/misc/hpilo.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/drivers/char/hpilo.h
+ *
+ * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
+ *	David Altobelli <david.altobelli@hp.com>
+ */
+#ifndef __HPILO_H
+#define __HPILO_H
+
+#define ILO_NAME "hpilo"
+
+/* iLO ASIC PCI revision id */
+#define PCI_REV_ID_NECHES	7
+
+/* max number of open channel control blocks per device, hw limited to 32 */
+#define MAX_CCB	       24
+/* min number of open channel control blocks per device, hw limited to 32 */
+#define MIN_CCB		8
+/* max number of supported devices */
+#define MAX_ILO_DEV	1
+/* max number of files */
+#define MAX_OPEN	(MAX_CCB * MAX_ILO_DEV)
+/* total wait time in usec */
+#define MAX_WAIT_TIME	10000
+/* per spin wait time in usec */
+#define WAIT_TIME	10
+/* spin counter for open/close delay */
+#define MAX_WAIT	(MAX_WAIT_TIME / WAIT_TIME)
+
+/*
+ * Per device, used to track global memory allocations.
+ */
+struct ilo_hwinfo {
+	/* mmio registers on device */
+	char __iomem *mmio_vaddr;
+
+	/* doorbell registers on device */
+	char __iomem *db_vaddr;
+
+	/* shared memory on device used for channel control blocks */
+	char __iomem *ram_vaddr;
+
+	/* files corresponding to this device */
+	struct ccb_data *ccb_alloc[MAX_CCB];
+
+	struct pci_dev *ilo_dev;
+
+	/*
+	 * open_lock      serializes ccb_cnt during open and close
+	 * [ irq disabled ]
+	 * -> alloc_lock  used when adding/removing/searching ccb_alloc,
+	 *                which represents all ccbs open on the device
+	 * --> fifo_lock  controls access to fifo queues shared with hw
+	 *
+	 * Locks must be taken in this order, but open_lock and alloc_lock
+	 * are optional, they do not need to be held in order to take a
+	 * lower level lock.
+	 */
+	spinlock_t open_lock;
+	spinlock_t alloc_lock;
+	spinlock_t fifo_lock;
+
+	struct cdev cdev;
+};
+
+/* offset from mmio_vaddr for enabling doorbell interrupts */
+#define DB_IRQ		0xB2
+/* offset from mmio_vaddr for outbound communications */
+#define DB_OUT		0xD4
+/* DB_OUT reset bit */
+#define DB_RESET	26
+
+/*
+ * Channel control block. Used to manage hardware queues.
+ * The format must match hw's version.  The hw ccb is 128 bytes,
+ * but the context area shouldn't be touched by the driver.
+ */
+#define ILOSW_CCB_SZ	64
+#define ILOHW_CCB_SZ 	128
+struct ccb {
+	union {
+		char *send_fifobar;
+		u64 send_fifobar_pa;
+	} ccb_u1;
+	union {
+		char *send_desc;
+		u64 send_desc_pa;
+	} ccb_u2;
+	u64 send_ctrl;
+
+	union {
+		char *recv_fifobar;
+		u64 recv_fifobar_pa;
+	} ccb_u3;
+	union {
+		char *recv_desc;
+		u64 recv_desc_pa;
+	} ccb_u4;
+	u64 recv_ctrl;
+
+	union {
+		char __iomem *db_base;
+		u64 padding5;
+	} ccb_u5;
+
+	u64 channel;
+
+	/* unused context area (64 bytes) */
+};
+
+/* ccb queue parameters */
+#define SENDQ		1
+#define RECVQ 		2
+#define NR_QENTRY    	4
+#define L2_QENTRY_SZ 	12
+
+/* ccb ctrl bitfields */
+#define CTRL_BITPOS_L2SZ             0
+#define CTRL_BITPOS_FIFOINDEXMASK    4
+#define CTRL_BITPOS_DESCLIMIT        18
+#define CTRL_BITPOS_A                30
+#define CTRL_BITPOS_G                31
+
+/* ccb doorbell macros */
+#define L2_DB_SIZE		14
+#define ONE_DB_SIZE		(1 << L2_DB_SIZE)
+
+/*
+ * Per fd structure used to track the ccb allocated to that dev file.
+ */
+struct ccb_data {
+	/* software version of ccb, using virtual addrs */
+	struct ccb  driver_ccb;
+
+	/* hardware version of ccb, using physical addrs */
+	struct ccb  ilo_ccb;
+
+	/* hardware ccb is written to this shared mapped device memory */
+	struct ccb __iomem *mapped_ccb;
+
+	/* dma'able memory used for send/recv queues */
+	void       *dma_va;
+	dma_addr_t  dma_pa;
+	size_t      dma_size;
+
+	/* pointer to hardware device info */
+	struct ilo_hwinfo *ilo_hw;
+
+	/* queue for this ccb to wait for recv data */
+	wait_queue_head_t ccb_waitq;
+
+	/* usage count, to allow for shared ccb's */
+	int	    ccb_cnt;
+
+	/* open wanted exclusive access to this ccb */
+	int	    ccb_excl;
+};
+
+/*
+ * FIFO queue structure, shared with hw.
+ */
+#define ILO_START_ALIGN	4096
+#define ILO_CACHE_SZ 	 128
+struct fifo {
+	u64 nrents;	/* user requested number of fifo entries */
+	u64 imask;  /* mask to extract valid fifo index */
+	u64 merge;	/*  O/C bits to merge in during enqueue operation */
+	u64 reset;	/* set to non-zero when the target device resets */
+	u8  pad_0[ILO_CACHE_SZ - (sizeof(u64) * 4)];
+
+	u64 head;
+	u8  pad_1[ILO_CACHE_SZ - (sizeof(u64))];
+
+	u64 tail;
+	u8  pad_2[ILO_CACHE_SZ - (sizeof(u64))];
+
+	u64 fifobar[];
+};
+
+/* convert between struct fifo, and the fifobar, which is saved in the ccb */
+#define FIFOHANDLESIZE (sizeof(struct fifo))
+#define FIFOBARTOHANDLE(_fifo) \
+	((struct fifo *)(((char *)(_fifo)) - FIFOHANDLESIZE))
+
+/* the number of qwords to consume from the entry descriptor */
+#define ENTRY_BITPOS_QWORDS      0
+/* descriptor index number (within a specified queue) */
+#define ENTRY_BITPOS_DESCRIPTOR  10
+/* state bit, fifo entry consumed by consumer */
+#define ENTRY_BITPOS_C           22
+/* state bit, fifo entry is occupied */
+#define ENTRY_BITPOS_O           23
+
+#define ENTRY_BITS_QWORDS        10
+#define ENTRY_BITS_DESCRIPTOR    12
+#define ENTRY_BITS_C             1
+#define ENTRY_BITS_O             1
+#define ENTRY_BITS_TOTAL	\
+	(ENTRY_BITS_C + ENTRY_BITS_O + \
+	 ENTRY_BITS_QWORDS + ENTRY_BITS_DESCRIPTOR)
+
+/* extract various entry fields */
+#define ENTRY_MASK ((1 << ENTRY_BITS_TOTAL) - 1)
+#define ENTRY_MASK_C (((1 << ENTRY_BITS_C) - 1) << ENTRY_BITPOS_C)
+#define ENTRY_MASK_O (((1 << ENTRY_BITS_O) - 1) << ENTRY_BITPOS_O)
+#define ENTRY_MASK_QWORDS \
+	(((1 << ENTRY_BITS_QWORDS) - 1) << ENTRY_BITPOS_QWORDS)
+#define ENTRY_MASK_DESCRIPTOR \
+	(((1 << ENTRY_BITS_DESCRIPTOR) - 1) << ENTRY_BITPOS_DESCRIPTOR)
+
+#define ENTRY_MASK_NOSTATE (ENTRY_MASK >> (ENTRY_BITS_C + ENTRY_BITS_O))
+
+#endif /* __HPILO_H */
diff --git a/drivers/misc/ibmasm/Makefile b/drivers/misc/ibmasm/Makefile
new file mode 100644
index 0000000000..1b9dd0f444
--- /dev/null
+++ b/drivers/misc/ibmasm/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_IBM_ASM) := ibmasm.o
+
+ibmasm-y :=	module.o      \
+		ibmasmfs.o    \
+		event.o       \
+		command.o     \
+		remote.o      \
+		heartbeat.o   \
+		r_heartbeat.o \
+		dot_command.o \
+		lowlevel.o
+
+ibmasm-$(CONFIG_SERIAL_8250) += uart.o
+
diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c
new file mode 100644
index 0000000000..733dd30fba
--- /dev/null
+++ b/drivers/misc/ibmasm/command.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+static void exec_next_command(struct service_processor *sp);
+
+static atomic_t command_count = ATOMIC_INIT(0);
+
+struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size)
+{
+	struct command *cmd;
+
+	if (buffer_size > IBMASM_CMD_MAX_BUFFER_SIZE)
+		return NULL;
+
+	cmd = kzalloc(sizeof(struct command), GFP_KERNEL);
+	if (cmd == NULL)
+		return NULL;
+
+
+	cmd->buffer = kzalloc(buffer_size, GFP_KERNEL);
+	if (cmd->buffer == NULL) {
+		kfree(cmd);
+		return NULL;
+	}
+	cmd->buffer_size = buffer_size;
+
+	kref_init(&cmd->kref);
+	cmd->lock = &sp->lock;
+
+	cmd->status = IBMASM_CMD_PENDING;
+	init_waitqueue_head(&cmd->wait);
+	INIT_LIST_HEAD(&cmd->queue_node);
+
+	atomic_inc(&command_count);
+	dbg("command count: %d\n", atomic_read(&command_count));
+
+	return cmd;
+}
+
+void ibmasm_free_command(struct kref *kref)
+{
+	struct command *cmd = to_command(kref);
+
+	list_del(&cmd->queue_node);
+	atomic_dec(&command_count);
+	dbg("command count: %d\n", atomic_read(&command_count));
+	kfree(cmd->buffer);
+	kfree(cmd);
+}
+
+static void enqueue_command(struct service_processor *sp, struct command *cmd)
+{
+	list_add_tail(&cmd->queue_node, &sp->command_queue);
+}
+
+static struct command *dequeue_command(struct service_processor *sp)
+{
+	struct command *cmd;
+	struct list_head *next;
+
+	if (list_empty(&sp->command_queue))
+		return NULL;
+
+	next = sp->command_queue.next;
+	list_del_init(next);
+	cmd = list_entry(next, struct command, queue_node);
+
+	return cmd;
+}
+
+static inline void do_exec_command(struct service_processor *sp)
+{
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+	if (ibmasm_send_i2o_message(sp)) {
+		sp->current_command->status = IBMASM_CMD_FAILED;
+		wake_up(&sp->current_command->wait);
+		command_put(sp->current_command);
+		exec_next_command(sp);
+	}
+}
+
+/*
+ * exec_command
+ * send a command to a service processor
+ * Commands are executed sequentially. One command (sp->current_command)
+ * is sent to the service processor. Once the interrupt handler gets a
+ * message of type command_response, the message is copied into
+ * the current commands buffer,
+ */
+void ibmasm_exec_command(struct service_processor *sp, struct command *cmd)
+{
+	unsigned long flags;
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+	spin_lock_irqsave(&sp->lock, flags);
+
+	if (!sp->current_command) {
+		sp->current_command = cmd;
+		command_get(sp->current_command);
+		spin_unlock_irqrestore(&sp->lock, flags);
+		do_exec_command(sp);
+	} else {
+		enqueue_command(sp, cmd);
+		spin_unlock_irqrestore(&sp->lock, flags);
+	}
+}
+
+static void exec_next_command(struct service_processor *sp)
+{
+	unsigned long flags;
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+
+	spin_lock_irqsave(&sp->lock, flags);
+	sp->current_command = dequeue_command(sp);
+	if (sp->current_command) {
+		command_get(sp->current_command);
+		spin_unlock_irqrestore(&sp->lock, flags);
+		do_exec_command(sp);
+	} else {
+		spin_unlock_irqrestore(&sp->lock, flags);
+	}
+}
+
+/*
+ * Sleep until a command has failed or a response has been received
+ * and the command status been updated by the interrupt handler.
+ * (see receive_response).
+ */
+void ibmasm_wait_for_response(struct command *cmd, int timeout)
+{
+	wait_event_interruptible_timeout(cmd->wait,
+				cmd->status == IBMASM_CMD_COMPLETE ||
+				cmd->status == IBMASM_CMD_FAILED,
+				timeout * HZ);
+}
+
+/*
+ * receive_command_response
+ * called by the interrupt handler when a dot command of type command_response
+ * was received.
+ */
+void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size)
+{
+	struct command *cmd = sp->current_command;
+
+	if (!sp->current_command)
+		return;
+
+	memcpy_fromio(cmd->buffer, response, min(size, cmd->buffer_size));
+	cmd->status = IBMASM_CMD_COMPLETE;
+	wake_up(&sp->current_command->wait);
+	command_put(sp->current_command);
+	exec_next_command(sp);
+}
diff --git a/drivers/misc/ibmasm/dot_command.c b/drivers/misc/ibmasm/dot_command.c
new file mode 100644
index 0000000000..df389bd4c9
--- /dev/null
+++ b/drivers/misc/ibmasm/dot_command.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+#include "ibmasm.h"
+#include "dot_command.h"
+
+/*
+ * Dispatch an incoming message to the specific handler for the message.
+ * Called from interrupt context.
+ */
+void ibmasm_receive_message(struct service_processor *sp, void *message, int message_size)
+{
+	u32 size;
+	struct dot_command_header *header = (struct dot_command_header *)message;
+
+	if (message_size == 0)
+		return;
+
+	size = get_dot_command_size(message);
+	if (size == 0)
+		return;
+
+	if (size > message_size)
+		size = message_size;
+
+	switch (header->type) {
+	case sp_event:
+		ibmasm_receive_event(sp, message, size);
+		break;
+	case sp_command_response:
+		ibmasm_receive_command_response(sp, message, size);
+		break;
+	case sp_heartbeat:
+		ibmasm_receive_heartbeat(sp, message, size);
+		break;
+	default:
+		dev_err(sp->dev, "Received unknown message from service processor\n");
+	}
+}
+
+
+#define INIT_BUFFER_SIZE 32
+
+
+/*
+ * send the 4.3.5.10 dot command (driver VPD) to the service processor
+ */
+int ibmasm_send_driver_vpd(struct service_processor *sp)
+{
+	struct command *command;
+	struct dot_command_header *header;
+	u8 *vpd_command;
+	u8 *vpd_data;
+	int result = 0;
+
+	command = ibmasm_new_command(sp, INIT_BUFFER_SIZE);
+	if (command == NULL)
+		return -ENOMEM;
+
+	header = (struct dot_command_header *)command->buffer;
+	header->type                = sp_write;
+	header->command_size        = 4;
+	header->data_size           = 16;
+	header->status              = 0;
+	header->reserved            = 0;
+
+	vpd_command = command->buffer + sizeof(struct dot_command_header);
+	vpd_command[0] = 0x4;
+	vpd_command[1] = 0x3;
+	vpd_command[2] = 0x5;
+	vpd_command[3] = 0xa;
+
+	vpd_data = vpd_command + header->command_size;
+	vpd_data[0] = 0;
+	strcat(vpd_data, IBMASM_DRIVER_VPD);
+	vpd_data[10] = 0;
+	vpd_data[15] = 0;
+
+	ibmasm_exec_command(sp, command);
+	ibmasm_wait_for_response(command, IBMASM_CMD_TIMEOUT_NORMAL);
+
+	if (command->status != IBMASM_CMD_COMPLETE)
+		result = -ENODEV;
+
+	command_put(command);
+
+	return result;
+}
+
+struct os_state_command {
+	struct dot_command_header	header;
+	unsigned char			command[3];
+	unsigned char			data;
+};
+
+/*
+ * send the 4.3.6 dot command (os state) to the service processor
+ * During driver init this function is called with os state "up".
+ * This causes the service processor to start sending heartbeats the
+ * driver.
+ * During driver exit the function is called with os state "down",
+ * causing the service processor to stop the heartbeats.
+ */
+int ibmasm_send_os_state(struct service_processor *sp, int os_state)
+{
+	struct command *cmd;
+	struct os_state_command *os_state_cmd;
+	int result = 0;
+
+	cmd = ibmasm_new_command(sp, sizeof(struct os_state_command));
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	os_state_cmd = (struct os_state_command *)cmd->buffer;
+	os_state_cmd->header.type		= sp_write;
+	os_state_cmd->header.command_size	= 3;
+	os_state_cmd->header.data_size		= 1;
+	os_state_cmd->header.status		= 0;
+	os_state_cmd->command[0]		= 4;
+	os_state_cmd->command[1]		= 3;
+	os_state_cmd->command[2]		= 6;
+	os_state_cmd->data			= os_state;
+
+	ibmasm_exec_command(sp, cmd);
+	ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL);
+
+	if (cmd->status != IBMASM_CMD_COMPLETE)
+		result = -ENODEV;
+
+	command_put(cmd);
+	return result;
+}
diff --git a/drivers/misc/ibmasm/dot_command.h b/drivers/misc/ibmasm/dot_command.h
new file mode 100644
index 0000000000..e03399ebe2
--- /dev/null
+++ b/drivers/misc/ibmasm/dot_command.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+#ifndef __DOT_COMMAND_H__
+#define __DOT_COMMAND_H__
+
+/*
+ * dot commands are the protocol used to communicate with the service
+ * processor.
+ * They consist of header, a command of variable length and data of
+ * variable length.
+ */
+
+/* dot command types */
+#define sp_write		0
+#define sp_write_next		1
+#define sp_read			2
+#define sp_read_next		3
+#define sp_command_response	4
+#define sp_event		5
+#define sp_heartbeat		6
+
+#pragma pack(1)
+struct dot_command_header {
+	u8	type;
+	u8	command_size;
+	u16	data_size;
+	u8	status;
+	u8	reserved;
+};
+#pragma pack()
+
+static inline size_t get_dot_command_size(void *buffer)
+{
+	struct dot_command_header *cmd = (struct dot_command_header *)buffer;
+	return sizeof(struct dot_command_header) + cmd->command_size + cmd->data_size;
+}
+
+static inline unsigned int get_dot_command_timeout(void *buffer)
+{
+	struct dot_command_header *header = (struct dot_command_header *)buffer;
+	unsigned char *cmd = buffer + sizeof(struct dot_command_header);
+
+	/* dot commands 6.3.1, 7.1 and 8.x need a longer timeout */
+
+	if (header->command_size == 3) {
+		if ((cmd[0] == 6) && (cmd[1] == 3) && (cmd[2] == 1))
+			return IBMASM_CMD_TIMEOUT_EXTRA;
+	} else if (header->command_size == 2) {
+		if ((cmd[0] == 7) && (cmd[1] == 1))
+			return IBMASM_CMD_TIMEOUT_EXTRA;
+		if (cmd[0] == 8)
+			return IBMASM_CMD_TIMEOUT_EXTRA;
+	}
+	return IBMASM_CMD_TIMEOUT_NORMAL;
+}
+
+#endif /* __DOT_COMMAND_H__ */
diff --git a/drivers/misc/ibmasm/event.c b/drivers/misc/ibmasm/event.c
new file mode 100644
index 0000000000..40ce75f897
--- /dev/null
+++ b/drivers/misc/ibmasm/event.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+/*
+ * ASM service processor event handling routines.
+ *
+ * Events are signalled to the device drivers through interrupts.
+ * They have the format of dot commands, with the type field set to
+ * sp_event.
+ * The driver does not interpret the events, it simply stores them in a
+ * circular buffer.
+ */
+
+static void wake_up_event_readers(struct service_processor *sp)
+{
+	struct event_reader *reader;
+
+	list_for_each_entry(reader, &sp->event_buffer->readers, node)
+                wake_up_interruptible(&reader->wait);
+}
+
+/*
+ * receive_event
+ * Called by the interrupt handler when a dot command of type sp_event is
+ * received.
+ * Store the event in the circular event buffer, wake up any sleeping
+ * event readers.
+ * There is no reader marker in the buffer, therefore readers are
+ * responsible for keeping up with the writer, or they will lose events.
+ */
+void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size)
+{
+	struct event_buffer *buffer = sp->event_buffer;
+	struct ibmasm_event *event;
+	unsigned long flags;
+
+	data_size = min(data_size, IBMASM_EVENT_MAX_SIZE);
+
+	spin_lock_irqsave(&sp->lock, flags);
+	/* copy the event into the next slot in the circular buffer */
+	event = &buffer->events[buffer->next_index];
+	memcpy_fromio(event->data, data, data_size);
+	event->data_size = data_size;
+	event->serial_number = buffer->next_serial_number;
+
+	/* advance indices in the buffer */
+	buffer->next_index = (buffer->next_index + 1) % IBMASM_NUM_EVENTS;
+	buffer->next_serial_number++;
+	spin_unlock_irqrestore(&sp->lock, flags);
+
+	wake_up_event_readers(sp);
+}
+
+static inline int event_available(struct event_buffer *b, struct event_reader *r)
+{
+	return (r->next_serial_number < b->next_serial_number);
+}
+
+/*
+ * get_next_event
+ * Called by event readers (initiated from user space through the file
+ * system).
+ * Sleeps until a new event is available.
+ */
+int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader)
+{
+	struct event_buffer *buffer = sp->event_buffer;
+	struct ibmasm_event *event;
+	unsigned int index;
+	unsigned long flags;
+
+	reader->cancelled = 0;
+
+	if (wait_event_interruptible(reader->wait,
+			event_available(buffer, reader) || reader->cancelled))
+		return -ERESTARTSYS;
+
+	if (!event_available(buffer, reader))
+		return 0;
+
+	spin_lock_irqsave(&sp->lock, flags);
+
+	index = buffer->next_index;
+	event = &buffer->events[index];
+	while (event->serial_number < reader->next_serial_number) {
+		index = (index + 1) % IBMASM_NUM_EVENTS;
+		event = &buffer->events[index];
+	}
+	memcpy(reader->data, event->data, event->data_size);
+	reader->data_size = event->data_size;
+	reader->next_serial_number = event->serial_number + 1;
+
+	spin_unlock_irqrestore(&sp->lock, flags);
+
+	return event->data_size;
+}
+
+void ibmasm_cancel_next_event(struct event_reader *reader)
+{
+        reader->cancelled = 1;
+        wake_up_interruptible(&reader->wait);
+}
+
+void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader)
+{
+	unsigned long flags;
+
+	reader->next_serial_number = sp->event_buffer->next_serial_number;
+	init_waitqueue_head(&reader->wait);
+	spin_lock_irqsave(&sp->lock, flags);
+	list_add(&reader->node, &sp->event_buffer->readers);
+	spin_unlock_irqrestore(&sp->lock, flags);
+}
+
+void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sp->lock, flags);
+	list_del(&reader->node);
+	spin_unlock_irqrestore(&sp->lock, flags);
+}
+
+int ibmasm_event_buffer_init(struct service_processor *sp)
+{
+	struct event_buffer *buffer;
+	struct ibmasm_event *event;
+	int i;
+
+	buffer = kmalloc(sizeof(struct event_buffer), GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	buffer->next_index = 0;
+	buffer->next_serial_number = 1;
+
+	event = buffer->events;
+	for (i=0; i<IBMASM_NUM_EVENTS; i++, event++)
+		event->serial_number = 0;
+
+	INIT_LIST_HEAD(&buffer->readers);
+
+	sp->event_buffer = buffer;
+
+	return 0;
+}
+
+void ibmasm_event_buffer_exit(struct service_processor *sp)
+{
+	kfree(sp->event_buffer);
+}
diff --git a/drivers/misc/ibmasm/heartbeat.c b/drivers/misc/ibmasm/heartbeat.c
new file mode 100644
index 0000000000..59c9a0d956
--- /dev/null
+++ b/drivers/misc/ibmasm/heartbeat.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+#include <linux/notifier.h>
+#include <linux/panic_notifier.h>
+#include "ibmasm.h"
+#include "dot_command.h"
+#include "lowlevel.h"
+
+static int suspend_heartbeats = 0;
+
+/*
+ * Once the driver indicates to the service processor that it is running
+ * - see send_os_state() - the service processor sends periodic heartbeats
+ * to the driver. The driver must respond to the heartbeats or else the OS
+ * will be rebooted.
+ * In the case of a panic the interrupt handler continues to work and thus
+ * continues to respond to heartbeats, making the service processor believe
+ * the OS is still running and thus preventing a reboot.
+ * To prevent this from happening a callback is added the panic_notifier_list.
+ * Before responding to a heartbeat the driver checks if a panic has happened,
+ * if yes it suspends heartbeat, causing the service processor to reboot as
+ * expected.
+ */
+static int panic_happened(struct notifier_block *n, unsigned long val, void *v)
+{
+	suspend_heartbeats = 1;
+	return 0;
+}
+
+static struct notifier_block panic_notifier = { panic_happened, NULL, 1 };
+
+void ibmasm_register_panic_notifier(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_notifier);
+}
+
+void ibmasm_unregister_panic_notifier(void)
+{
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+			&panic_notifier);
+}
+
+
+int ibmasm_heartbeat_init(struct service_processor *sp)
+{
+	sp->heartbeat = ibmasm_new_command(sp, HEARTBEAT_BUFFER_SIZE);
+	if (sp->heartbeat == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ibmasm_heartbeat_exit(struct service_processor *sp)
+{
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+	ibmasm_wait_for_response(sp->heartbeat, IBMASM_CMD_TIMEOUT_NORMAL);
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+	suspend_heartbeats = 1;
+	command_put(sp->heartbeat);
+}
+
+void ibmasm_receive_heartbeat(struct service_processor *sp,  void *message, size_t size)
+{
+	struct command *cmd = sp->heartbeat;
+	struct dot_command_header *header = (struct dot_command_header *)cmd->buffer;
+	char tsbuf[32];
+
+	dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf));
+	if (suspend_heartbeats)
+		return;
+
+	/* return the received dot command to sender */
+	cmd->status = IBMASM_CMD_PENDING;
+	size = min(size, cmd->buffer_size);
+	memcpy_fromio(cmd->buffer, message, size);
+	header->type = sp_write;
+	ibmasm_exec_command(sp, cmd);
+}
diff --git a/drivers/misc/ibmasm/i2o.h b/drivers/misc/ibmasm/i2o.h
new file mode 100644
index 0000000000..468fa84a32
--- /dev/null
+++ b/drivers/misc/ibmasm/i2o.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+#pragma pack(1)
+struct i2o_header {
+	u8	version;
+	u8	message_flags;
+	u16	message_size;
+	u8	target;
+	u8	initiator_and_target;
+	u8	initiator;
+	u8	function;
+	u32	initiator_context;
+};
+#pragma pack()
+
+#define I2O_HEADER_TEMPLATE \
+      { .version              = 0x01, \
+	.message_flags        = 0x00, \
+	.function             = 0xFF, \
+	.initiator            = 0x00, \
+	.initiator_and_target = 0x40, \
+	.target               = 0x00, \
+	.initiator_context    = 0x0 }
+
+#define I2O_MESSAGE_SIZE	0x1000
+#define I2O_COMMAND_SIZE	(I2O_MESSAGE_SIZE - sizeof(struct i2o_header))
+
+#pragma pack(1)
+struct i2o_message {
+	struct i2o_header	header;
+	void			*data;
+};
+#pragma pack()
+
+static inline unsigned short outgoing_message_size(unsigned int data_size)
+{
+	unsigned int size;
+	unsigned short i2o_size;
+
+	if (data_size > I2O_COMMAND_SIZE)
+		data_size = I2O_COMMAND_SIZE;
+
+	size = sizeof(struct i2o_header) + data_size;
+
+	i2o_size = size / sizeof(u32);
+
+	if (size % sizeof(u32))
+	       i2o_size++;
+
+	return i2o_size;
+}
+
+static inline u32 incoming_data_size(struct i2o_message *i2o_message)
+{
+	return (sizeof(u32) * i2o_message->header.message_size);
+}
diff --git a/drivers/misc/ibmasm/ibmasm.h b/drivers/misc/ibmasm/ibmasm.h
new file mode 100644
index 0000000000..a5ced88ca9
--- /dev/null
+++ b/drivers/misc/ibmasm/ibmasm.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/time64.h>
+
+/* Driver identification */
+#define DRIVER_NAME	"ibmasm"
+#define DRIVER_VERSION  "1.0"
+#define DRIVER_AUTHOR   "Max Asbock <masbock@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>"
+#define DRIVER_DESC     "IBM ASM Service Processor Driver"
+
+#define err(msg) printk(KERN_ERR "%s: " msg "\n", DRIVER_NAME)
+#define info(msg) printk(KERN_INFO "%s: " msg "\n", DRIVER_NAME)
+
+extern int ibmasm_debug;
+#define dbg(STR, ARGS...)					\
+	do {							\
+		if (ibmasm_debug)				\
+			printk(KERN_DEBUG STR , ##ARGS);	\
+	} while (0)
+
+static inline char *get_timestamp(char *buf)
+{
+	struct timespec64 now;
+
+	ktime_get_real_ts64(&now);
+	sprintf(buf, "%llu.%.08lu", (long long)now.tv_sec,
+				now.tv_nsec / NSEC_PER_USEC);
+	return buf;
+}
+
+#define IBMASM_CMD_PENDING	0
+#define IBMASM_CMD_COMPLETE	1
+#define IBMASM_CMD_FAILED	2
+
+#define IBMASM_CMD_TIMEOUT_NORMAL	45
+#define IBMASM_CMD_TIMEOUT_EXTRA	240
+
+#define IBMASM_CMD_MAX_BUFFER_SIZE	0x8000
+
+#define REVERSE_HEARTBEAT_TIMEOUT	120
+
+#define HEARTBEAT_BUFFER_SIZE		0x400
+
+#ifdef IA64
+#define IBMASM_DRIVER_VPD "Lin64 6.08      "
+#else
+#define IBMASM_DRIVER_VPD "Lin32 6.08      "
+#endif
+
+#define SYSTEM_STATE_OS_UP      5
+#define SYSTEM_STATE_OS_DOWN    4
+
+#define IBMASM_NAME_SIZE	16
+
+#define IBMASM_NUM_EVENTS	10
+#define IBMASM_EVENT_MAX_SIZE	2048u
+
+
+struct command {
+	struct list_head	queue_node;
+	wait_queue_head_t	wait;
+	unsigned char		*buffer;
+	size_t			buffer_size;
+	int			status;
+	struct kref		kref;
+	spinlock_t		*lock;
+};
+#define to_command(c) container_of(c, struct command, kref)
+
+void ibmasm_free_command(struct kref *kref);
+static inline void command_put(struct command *cmd)
+{
+	unsigned long flags;
+	spinlock_t *lock = cmd->lock;
+
+	spin_lock_irqsave(lock, flags);
+	kref_put(&cmd->kref, ibmasm_free_command);
+	spin_unlock_irqrestore(lock, flags);
+}
+
+static inline void command_get(struct command *cmd)
+{
+	kref_get(&cmd->kref);
+}
+
+
+struct ibmasm_event {
+	unsigned int	serial_number;
+	unsigned int	data_size;
+	unsigned char	data[IBMASM_EVENT_MAX_SIZE];
+};
+
+struct event_buffer {
+	struct ibmasm_event	events[IBMASM_NUM_EVENTS];
+	unsigned int		next_serial_number;
+	unsigned int		next_index;
+	struct list_head	readers;
+};
+
+struct event_reader {
+	int			cancelled;
+	unsigned int		next_serial_number;
+	wait_queue_head_t	wait;
+	struct list_head	node;
+	unsigned int		data_size;
+	unsigned char		data[IBMASM_EVENT_MAX_SIZE];
+};
+
+struct reverse_heartbeat {
+	wait_queue_head_t	wait;
+	unsigned int		stopped;
+};
+
+struct ibmasm_remote {
+	struct input_dev *keybd_dev;
+	struct input_dev *mouse_dev;
+};
+
+struct service_processor {
+	struct list_head	node;
+	spinlock_t		lock;
+	void __iomem		*base_address;
+	unsigned int		irq;
+	struct command		*current_command;
+	struct command		*heartbeat;
+	struct list_head	command_queue;
+	struct event_buffer	*event_buffer;
+	char			dirname[IBMASM_NAME_SIZE];
+	char			devname[IBMASM_NAME_SIZE];
+	unsigned int		number;
+	struct ibmasm_remote	remote;
+	int			serial_line;
+	struct device		*dev;
+};
+
+/* command processing */
+struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size);
+void ibmasm_exec_command(struct service_processor *sp, struct command *cmd);
+void ibmasm_wait_for_response(struct command *cmd, int timeout);
+void ibmasm_receive_command_response(struct service_processor *sp, void *response,  size_t size);
+
+/* event processing */
+int ibmasm_event_buffer_init(struct service_processor *sp);
+void ibmasm_event_buffer_exit(struct service_processor *sp);
+void ibmasm_receive_event(struct service_processor *sp, void *data,  unsigned int data_size);
+void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader);
+void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader);
+int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader);
+void ibmasm_cancel_next_event(struct event_reader *reader);
+
+/* heartbeat - from SP to OS */
+void ibmasm_register_panic_notifier(void);
+void ibmasm_unregister_panic_notifier(void);
+int ibmasm_heartbeat_init(struct service_processor *sp);
+void ibmasm_heartbeat_exit(struct service_processor *sp);
+void ibmasm_receive_heartbeat(struct service_processor *sp,  void *message, size_t size);
+
+/* reverse heartbeat - from OS to SP */
+void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb);
+int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb);
+void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb);
+
+/* dot commands */
+void ibmasm_receive_message(struct service_processor *sp, void *data, int data_size);
+int ibmasm_send_driver_vpd(struct service_processor *sp);
+int ibmasm_send_os_state(struct service_processor *sp, int os_state);
+
+/* low level message processing */
+int ibmasm_send_i2o_message(struct service_processor *sp);
+irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id);
+
+/* remote console */
+void ibmasm_handle_mouse_interrupt(struct service_processor *sp);
+int ibmasm_init_remote_input_dev(struct service_processor *sp);
+void ibmasm_free_remote_input_dev(struct service_processor *sp);
+
+/* file system */
+int ibmasmfs_register(void);
+void ibmasmfs_unregister(void);
+void ibmasmfs_add_sp(struct service_processor *sp);
+
+/* uart */
+#if IS_ENABLED(CONFIG_SERIAL_8250)
+void ibmasm_register_uart(struct service_processor *sp);
+void ibmasm_unregister_uart(struct service_processor *sp);
+#else
+#define ibmasm_register_uart(sp)	do { } while(0)
+#define ibmasm_unregister_uart(sp)	do { } while(0)
+#endif
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
new file mode 100644
index 0000000000..5867af9f59
--- /dev/null
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -0,0 +1,603 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+/*
+ * Parts of this code are based on an article by Jonathan Corbet
+ * that appeared in Linux Weekly News.
+ */
+
+
+/*
+ * The IBMASM file virtual filesystem. It creates the following hierarchy
+ * dynamically when mounted from user space:
+ *
+ *    /ibmasm
+ *    |-- 0
+ *    |   |-- command
+ *    |   |-- event
+ *    |   |-- reverse_heartbeat
+ *    |   `-- remote_video
+ *    |       |-- depth
+ *    |       |-- height
+ *    |       `-- width
+ *    .
+ *    .
+ *    .
+ *    `-- n
+ *        |-- command
+ *        |-- event
+ *        |-- reverse_heartbeat
+ *        `-- remote_video
+ *            |-- depth
+ *            |-- height
+ *            `-- width
+ *
+ * For each service processor the following files are created:
+ *
+ * command: execute dot commands
+ *	write: execute a dot command on the service processor
+ *	read: return the result of a previously executed dot command
+ *
+ * events: listen for service processor events
+ *	read: sleep (interruptible) until an event occurs
+ *      write: wakeup sleeping event listener
+ *
+ * reverse_heartbeat: send a heartbeat to the service processor
+ *	read: sleep (interruptible) until the reverse heartbeat fails
+ *      write: wakeup sleeping heartbeat listener
+ *
+ * remote_video/width
+ * remote_video/height
+ * remote_video/width: control remote display settings
+ *	write: set value
+ *	read: read value
+ */
+
+#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <asm/io.h>
+#include "ibmasm.h"
+#include "remote.h"
+#include "dot_command.h"
+
+#define IBMASMFS_MAGIC 0x66726f67
+
+static LIST_HEAD(service_processors);
+
+static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode);
+static void ibmasmfs_create_files (struct super_block *sb);
+static int ibmasmfs_fill_super(struct super_block *sb, struct fs_context *fc);
+
+static int ibmasmfs_get_tree(struct fs_context *fc)
+{
+	return get_tree_single(fc, ibmasmfs_fill_super);
+}
+
+static const struct fs_context_operations ibmasmfs_context_ops = {
+	.get_tree	= ibmasmfs_get_tree,
+};
+
+static int ibmasmfs_init_fs_context(struct fs_context *fc)
+{
+	fc->ops = &ibmasmfs_context_ops;
+	return 0;
+}
+
+static const struct super_operations ibmasmfs_s_ops = {
+	.statfs		= simple_statfs,
+	.drop_inode	= generic_delete_inode,
+};
+
+static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations;
+
+static struct file_system_type ibmasmfs_type = {
+	.owner          = THIS_MODULE,
+	.name           = "ibmasmfs",
+	.init_fs_context = ibmasmfs_init_fs_context,
+	.kill_sb        = kill_litter_super,
+};
+MODULE_ALIAS_FS("ibmasmfs");
+
+static int ibmasmfs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+	struct inode *root;
+
+	sb->s_blocksize = PAGE_SIZE;
+	sb->s_blocksize_bits = PAGE_SHIFT;
+	sb->s_magic = IBMASMFS_MAGIC;
+	sb->s_op = &ibmasmfs_s_ops;
+	sb->s_time_gran = 1;
+
+	root = ibmasmfs_make_inode (sb, S_IFDIR | 0500);
+	if (!root)
+		return -ENOMEM;
+
+	root->i_op = &simple_dir_inode_operations;
+	root->i_fop = ibmasmfs_dir_ops;
+
+	sb->s_root = d_make_root(root);
+	if (!sb->s_root)
+		return -ENOMEM;
+
+	ibmasmfs_create_files(sb);
+	return 0;
+}
+
+static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
+{
+	struct inode *ret = new_inode(sb);
+
+	if (ret) {
+		ret->i_ino = get_next_ino();
+		ret->i_mode = mode;
+		ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
+	}
+	return ret;
+}
+
+static struct dentry *ibmasmfs_create_file(struct dentry *parent,
+			const char *name,
+			const struct file_operations *fops,
+			void *data,
+			int mode)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	dentry = d_alloc_name(parent, name);
+	if (!dentry)
+		return NULL;
+
+	inode = ibmasmfs_make_inode(parent->d_sb, S_IFREG | mode);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+
+	inode->i_fop = fops;
+	inode->i_private = data;
+
+	d_add(dentry, inode);
+	return dentry;
+}
+
+static struct dentry *ibmasmfs_create_dir(struct dentry *parent,
+				const char *name)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	dentry = d_alloc_name(parent, name);
+	if (!dentry)
+		return NULL;
+
+	inode = ibmasmfs_make_inode(parent->d_sb, S_IFDIR | 0500);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
+	}
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = ibmasmfs_dir_ops;
+
+	d_add(dentry, inode);
+	return dentry;
+}
+
+int ibmasmfs_register(void)
+{
+	return register_filesystem(&ibmasmfs_type);
+}
+
+void ibmasmfs_unregister(void)
+{
+	unregister_filesystem(&ibmasmfs_type);
+}
+
+void ibmasmfs_add_sp(struct service_processor *sp)
+{
+	list_add(&sp->node, &service_processors);
+}
+
+/* struct to save state between command file operations */
+struct ibmasmfs_command_data {
+	struct service_processor	*sp;
+	struct command			*command;
+};
+
+/* struct to save state between event file operations */
+struct ibmasmfs_event_data {
+	struct service_processor	*sp;
+	struct event_reader		reader;
+	int				active;
+};
+
+/* struct to save state between reverse heartbeat file operations */
+struct ibmasmfs_heartbeat_data {
+	struct service_processor	*sp;
+	struct reverse_heartbeat	heartbeat;
+	int				active;
+};
+
+static int command_file_open(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_command_data *command_data;
+
+	if (!inode->i_private)
+		return -ENODEV;
+
+	command_data = kmalloc(sizeof(struct ibmasmfs_command_data), GFP_KERNEL);
+	if (!command_data)
+		return -ENOMEM;
+
+	command_data->command = NULL;
+	command_data->sp = inode->i_private;
+	file->private_data = command_data;
+	return 0;
+}
+
+static int command_file_close(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_command_data *command_data = file->private_data;
+
+	if (command_data->command)
+		command_put(command_data->command);
+
+	kfree(command_data);
+	return 0;
+}
+
+static ssize_t command_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_command_data *command_data = file->private_data;
+	struct command *cmd;
+	int len;
+	unsigned long flags;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	spin_lock_irqsave(&command_data->sp->lock, flags);
+	cmd = command_data->command;
+	if (cmd == NULL) {
+		spin_unlock_irqrestore(&command_data->sp->lock, flags);
+		return 0;
+	}
+	command_data->command = NULL;
+	spin_unlock_irqrestore(&command_data->sp->lock, flags);
+
+	if (cmd->status != IBMASM_CMD_COMPLETE) {
+		command_put(cmd);
+		return -EIO;
+	}
+	len = min(count, cmd->buffer_size);
+	if (copy_to_user(buf, cmd->buffer, len)) {
+		command_put(cmd);
+		return -EFAULT;
+	}
+	command_put(cmd);
+
+	return len;
+}
+
+static ssize_t command_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_command_data *command_data = file->private_data;
+	struct command *cmd;
+	unsigned long flags;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	/* commands are executed sequentially, only one command at a time */
+	if (command_data->command)
+		return -EAGAIN;
+
+	cmd = ibmasm_new_command(command_data->sp, count);
+	if (!cmd)
+		return -ENOMEM;
+
+	if (copy_from_user(cmd->buffer, ubuff, count)) {
+		command_put(cmd);
+		return -EFAULT;
+	}
+
+	spin_lock_irqsave(&command_data->sp->lock, flags);
+	if (command_data->command) {
+		spin_unlock_irqrestore(&command_data->sp->lock, flags);
+		command_put(cmd);
+		return -EAGAIN;
+	}
+	command_data->command = cmd;
+	spin_unlock_irqrestore(&command_data->sp->lock, flags);
+
+	ibmasm_exec_command(command_data->sp, cmd);
+	ibmasm_wait_for_response(cmd, get_dot_command_timeout(cmd->buffer));
+
+	return count;
+}
+
+static int event_file_open(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_event_data *event_data;
+	struct service_processor *sp;
+
+	if (!inode->i_private)
+		return -ENODEV;
+
+	sp = inode->i_private;
+
+	event_data = kmalloc(sizeof(struct ibmasmfs_event_data), GFP_KERNEL);
+	if (!event_data)
+		return -ENOMEM;
+
+	ibmasm_event_reader_register(sp, &event_data->reader);
+
+	event_data->sp = sp;
+	event_data->active = 0;
+	file->private_data = event_data;
+	return 0;
+}
+
+static int event_file_close(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_event_data *event_data = file->private_data;
+
+	ibmasm_event_reader_unregister(event_data->sp, &event_data->reader);
+	kfree(event_data);
+	return 0;
+}
+
+static ssize_t event_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_event_data *event_data = file->private_data;
+	struct event_reader *reader = &event_data->reader;
+	struct service_processor *sp = event_data->sp;
+	int ret;
+	unsigned long flags;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > IBMASM_EVENT_MAX_SIZE)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	spin_lock_irqsave(&sp->lock, flags);
+	if (event_data->active) {
+		spin_unlock_irqrestore(&sp->lock, flags);
+		return -EBUSY;
+	}
+	event_data->active = 1;
+	spin_unlock_irqrestore(&sp->lock, flags);
+
+	ret = ibmasm_get_next_event(sp, reader);
+	if (ret <= 0)
+		goto out;
+
+	if (count < reader->data_size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+        if (copy_to_user(buf, reader->data, reader->data_size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+	ret = reader->data_size;
+
+out:
+	event_data->active = 0;
+	return ret;
+}
+
+static ssize_t event_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_event_data *event_data = file->private_data;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count != 1)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	ibmasm_cancel_next_event(&event_data->reader);
+	return 0;
+}
+
+static int r_heartbeat_file_open(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat;
+
+	if (!inode->i_private)
+		return -ENODEV;
+
+	rhbeat = kmalloc(sizeof(struct ibmasmfs_heartbeat_data), GFP_KERNEL);
+	if (!rhbeat)
+		return -ENOMEM;
+
+	rhbeat->sp = inode->i_private;
+	rhbeat->active = 0;
+	ibmasm_init_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat);
+	file->private_data = rhbeat;
+	return 0;
+}
+
+static int r_heartbeat_file_close(struct inode *inode, struct file *file)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+
+	kfree(rhbeat);
+	return 0;
+}
+
+static ssize_t r_heartbeat_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+	unsigned long flags;
+	int result;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > 1024)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	/* allow only one reverse heartbeat per process */
+	spin_lock_irqsave(&rhbeat->sp->lock, flags);
+	if (rhbeat->active) {
+		spin_unlock_irqrestore(&rhbeat->sp->lock, flags);
+		return -EBUSY;
+	}
+	rhbeat->active = 1;
+	spin_unlock_irqrestore(&rhbeat->sp->lock, flags);
+
+	result = ibmasm_start_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat);
+	rhbeat->active = 0;
+
+	return result;
+}
+
+static ssize_t r_heartbeat_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset)
+{
+	struct ibmasmfs_heartbeat_data *rhbeat = file->private_data;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count != 1)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	if (rhbeat->active)
+		ibmasm_stop_reverse_heartbeat(&rhbeat->heartbeat);
+
+	return 1;
+}
+
+static int remote_settings_file_close(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
+{
+	void __iomem *address = (void __iomem *)file->private_data;
+	int len = 0;
+	unsigned int value;
+	char lbuf[20];
+
+	value = readl(address);
+	len = snprintf(lbuf, sizeof(lbuf), "%d\n", value);
+
+	return simple_read_from_buffer(buf, count, offset, lbuf, len);
+}
+
+static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset)
+{
+	void __iomem *address = (void __iomem *)file->private_data;
+	char *buff;
+	unsigned int value;
+
+	if (*offset < 0)
+		return -EINVAL;
+	if (count == 0 || count > 1024)
+		return 0;
+	if (*offset != 0)
+		return 0;
+
+	buff = kzalloc (count + 1, GFP_KERNEL);
+	if (!buff)
+		return -ENOMEM;
+
+
+	if (copy_from_user(buff, ubuff, count)) {
+		kfree(buff);
+		return -EFAULT;
+	}
+
+	value = simple_strtoul(buff, NULL, 10);
+	writel(value, address);
+	kfree(buff);
+
+	return count;
+}
+
+static const struct file_operations command_fops = {
+	.open =		command_file_open,
+	.release =	command_file_close,
+	.read =		command_file_read,
+	.write =	command_file_write,
+	.llseek =	generic_file_llseek,
+};
+
+static const struct file_operations event_fops = {
+	.open =		event_file_open,
+	.release =	event_file_close,
+	.read =		event_file_read,
+	.write =	event_file_write,
+	.llseek =	generic_file_llseek,
+};
+
+static const struct file_operations r_heartbeat_fops = {
+	.open =		r_heartbeat_file_open,
+	.release =	r_heartbeat_file_close,
+	.read =		r_heartbeat_file_read,
+	.write =	r_heartbeat_file_write,
+	.llseek =	generic_file_llseek,
+};
+
+static const struct file_operations remote_settings_fops = {
+	.open =		simple_open,
+	.release =	remote_settings_file_close,
+	.read =		remote_settings_file_read,
+	.write =	remote_settings_file_write,
+	.llseek =	generic_file_llseek,
+};
+
+
+static void ibmasmfs_create_files (struct super_block *sb)
+{
+	struct list_head *entry;
+	struct service_processor *sp;
+
+	list_for_each(entry, &service_processors) {
+		struct dentry *dir;
+		struct dentry *remote_dir;
+		sp = list_entry(entry, struct service_processor, node);
+		dir = ibmasmfs_create_dir(sb->s_root, sp->dirname);
+		if (!dir)
+			continue;
+
+		ibmasmfs_create_file(dir, "command", &command_fops, sp, S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(dir, "event", &event_fops, sp, S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(dir, "reverse_heartbeat", &r_heartbeat_fops, sp, S_IRUSR|S_IWUSR);
+
+		remote_dir = ibmasmfs_create_dir(dir, "remote_video");
+		if (!remote_dir)
+			continue;
+
+		ibmasmfs_create_file(remote_dir, "width", &remote_settings_fops, (void *)display_width(sp), S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(remote_dir, "height", &remote_settings_fops, (void *)display_height(sp), S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(remote_dir, "depth", &remote_settings_fops, (void *)display_depth(sp), S_IRUSR|S_IWUSR);
+	}
+}
diff --git a/drivers/misc/ibmasm/lowlevel.c b/drivers/misc/ibmasm/lowlevel.c
new file mode 100644
index 0000000000..6922dc6c10
--- /dev/null
+++ b/drivers/misc/ibmasm/lowlevel.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+#include "ibmasm.h"
+#include "lowlevel.h"
+#include "i2o.h"
+#include "dot_command.h"
+#include "remote.h"
+
+static struct i2o_header header = I2O_HEADER_TEMPLATE;
+
+
+int ibmasm_send_i2o_message(struct service_processor *sp)
+{
+	u32 mfa;
+	unsigned int command_size;
+	struct i2o_message *message;
+	struct command *command = sp->current_command;
+
+	mfa = get_mfa_inbound(sp->base_address);
+	if (!mfa)
+		return 1;
+
+	command_size = get_dot_command_size(command->buffer);
+	header.message_size = outgoing_message_size(command_size);
+
+	message = get_i2o_message(sp->base_address, mfa);
+
+	memcpy_toio(&message->header, &header, sizeof(struct i2o_header));
+	memcpy_toio(&message->data, command->buffer, command_size);
+
+	set_mfa_inbound(sp->base_address, mfa);
+
+	return 0;
+}
+
+irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id)
+{
+	u32	mfa;
+	struct service_processor *sp = (struct service_processor *)dev_id;
+	void __iomem *base_address = sp->base_address;
+	char tsbuf[32];
+
+	if (!sp_interrupt_pending(base_address))
+		return IRQ_NONE;
+
+	dbg("respond to interrupt at %s\n", get_timestamp(tsbuf));
+
+	if (mouse_interrupt_pending(sp)) {
+		ibmasm_handle_mouse_interrupt(sp);
+		clear_mouse_interrupt(sp);
+	}
+
+	mfa = get_mfa_outbound(base_address);
+	if (valid_mfa(mfa)) {
+		struct i2o_message *msg = get_i2o_message(base_address, mfa);
+		ibmasm_receive_message(sp, &msg->data, incoming_data_size(msg));
+	} else
+		dbg("didn't get a valid MFA\n");
+
+	set_mfa_outbound(base_address, mfa);
+	dbg("finished interrupt at   %s\n", get_timestamp(tsbuf));
+
+	return IRQ_HANDLED;
+}
diff --git a/drivers/misc/ibmasm/lowlevel.h b/drivers/misc/ibmasm/lowlevel.h
new file mode 100644
index 0000000000..25f1ed07c3
--- /dev/null
+++ b/drivers/misc/ibmasm/lowlevel.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+/* Condor service processor specific hardware definitions */
+
+#ifndef __IBMASM_CONDOR_H__
+#define __IBMASM_CONDOR_H__
+
+#include <asm/io.h>
+
+#define VENDORID_IBM	0x1014
+#define DEVICEID_RSA	0x010F
+
+#define GET_MFA_ADDR(x)  (x & 0xFFFFFF00)
+
+#define MAILBOX_FULL(x)  (x & 0x00000001)
+
+#define NO_MFAS_AVAILABLE     0xFFFFFFFF
+
+
+#define INBOUND_QUEUE_PORT   0x40  /* contains address of next free MFA */
+#define OUTBOUND_QUEUE_PORT  0x44  /* contains address of posted MFA    */
+
+#define SP_INTR_MASK	0x00000008
+#define UART_INTR_MASK	0x00000010
+
+#define INTR_STATUS_REGISTER   0x13A0
+#define INTR_CONTROL_REGISTER  0x13A4
+
+#define SCOUT_COM_A_BASE         0x0000
+#define SCOUT_COM_B_BASE         0x0100
+#define SCOUT_COM_C_BASE         0x0200
+#define SCOUT_COM_D_BASE         0x0300
+
+static inline int sp_interrupt_pending(void __iomem *base_address)
+{
+	return SP_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER);
+}
+
+static inline int uart_interrupt_pending(void __iomem *base_address)
+{
+	return UART_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER);
+}
+
+static inline void ibmasm_enable_interrupts(void __iomem *base_address, int mask)
+{
+	void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER;
+	writel( readl(ctrl_reg) & ~mask, ctrl_reg);
+}
+
+static inline void ibmasm_disable_interrupts(void __iomem *base_address, int mask)
+{
+	void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER;
+	writel( readl(ctrl_reg) | mask, ctrl_reg);
+}
+
+static inline void enable_sp_interrupts(void __iomem *base_address)
+{
+	ibmasm_enable_interrupts(base_address, SP_INTR_MASK);
+}
+
+static inline void disable_sp_interrupts(void __iomem *base_address)
+{
+	ibmasm_disable_interrupts(base_address, SP_INTR_MASK);
+}
+
+static inline void enable_uart_interrupts(void __iomem *base_address)
+{
+	ibmasm_enable_interrupts(base_address, UART_INTR_MASK);
+}
+
+static inline void disable_uart_interrupts(void __iomem *base_address)
+{
+	ibmasm_disable_interrupts(base_address, UART_INTR_MASK);
+}
+
+#define valid_mfa(mfa)	( (mfa) != NO_MFAS_AVAILABLE )
+
+static inline u32 get_mfa_outbound(void __iomem *base_address)
+{
+	int retry;
+	u32 mfa;
+
+	for (retry=0; retry<=10; retry++) {
+		mfa = readl(base_address + OUTBOUND_QUEUE_PORT);
+		if (valid_mfa(mfa))
+			break;
+	}
+	return mfa;
+}
+
+static inline void set_mfa_outbound(void __iomem *base_address, u32 mfa)
+{
+	writel(mfa, base_address + OUTBOUND_QUEUE_PORT);
+}
+
+static inline u32 get_mfa_inbound(void __iomem *base_address)
+{
+	u32 mfa = readl(base_address + INBOUND_QUEUE_PORT);
+
+	if (MAILBOX_FULL(mfa))
+		return 0;
+
+	return mfa;
+}
+
+static inline void set_mfa_inbound(void __iomem *base_address, u32 mfa)
+{
+	writel(mfa, base_address + INBOUND_QUEUE_PORT);
+}
+
+static inline struct i2o_message *get_i2o_message(void __iomem *base_address, u32 mfa)
+{
+	return (struct i2o_message *)(GET_MFA_ADDR(mfa) + base_address);
+}
+
+#endif /* __IBMASM_CONDOR_H__ */
diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c
new file mode 100644
index 0000000000..dc8a06c06c
--- /dev/null
+++ b/drivers/misc/ibmasm/module.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ * This driver is based on code originally written by Pete Reynolds
+ * and others.
+ */
+
+/*
+ * The ASM device driver does the following things:
+ *
+ * 1) When loaded it sends a message to the service processor,
+ * indicating that an OS is * running. This causes the service processor
+ * to send periodic heartbeats to the OS.
+ *
+ * 2) Answers the periodic heartbeats sent by the service processor.
+ * Failure to do so would result in system reboot.
+ *
+ * 3) Acts as a pass through for dot commands sent from user applications.
+ * The interface for this is the ibmasmfs file system.
+ *
+ * 4) Allows user applications to register for event notification. Events
+ * are sent to the driver through interrupts. They can be read from user
+ * space through the ibmasmfs file system.
+ *
+ * 5) Allows user space applications to send heartbeats to the service
+ * processor (aka reverse heartbeats). Again this happens through ibmasmfs.
+ *
+ * 6) Handles remote mouse and keyboard event interrupts and makes them
+ * available to user applications through ibmasmfs.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+#include "remote.h"
+
+int ibmasm_debug = 0;
+module_param(ibmasm_debug, int , S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(ibmasm_debug, " Set debug mode on or off");
+
+
+static int ibmasm_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int result;
+	struct service_processor *sp;
+
+	if ((result = pci_enable_device(pdev))) {
+		dev_err(&pdev->dev, "Failed to enable PCI device\n");
+		return result;
+	}
+	if ((result = pci_request_regions(pdev, DRIVER_NAME))) {
+		dev_err(&pdev->dev, "Failed to allocate PCI resources\n");
+		goto error_resources;
+	}
+	/* vnc client won't work without bus-mastering */
+	pci_set_master(pdev);
+
+	sp = kzalloc(sizeof(struct service_processor), GFP_KERNEL);
+	if (sp == NULL) {
+		dev_err(&pdev->dev, "Failed to allocate memory\n");
+		result = -ENOMEM;
+		goto error_kmalloc;
+	}
+
+	spin_lock_init(&sp->lock);
+	INIT_LIST_HEAD(&sp->command_queue);
+
+	pci_set_drvdata(pdev, (void *)sp);
+	sp->dev = &pdev->dev;
+	sp->number = pdev->bus->number;
+	snprintf(sp->dirname, IBMASM_NAME_SIZE, "%d", sp->number);
+	snprintf(sp->devname, IBMASM_NAME_SIZE, "%s%d", DRIVER_NAME, sp->number);
+
+	result = ibmasm_event_buffer_init(sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to allocate event buffer\n");
+		goto error_eventbuffer;
+	}
+
+	result = ibmasm_heartbeat_init(sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to allocate heartbeat command\n");
+		goto error_heartbeat;
+	}
+
+	sp->irq = pdev->irq;
+	sp->base_address = pci_ioremap_bar(pdev, 0);
+	if (!sp->base_address) {
+		dev_err(sp->dev, "Failed to ioremap pci memory\n");
+		result =  -ENODEV;
+		goto error_ioremap;
+	}
+
+	result = request_irq(sp->irq, ibmasm_interrupt_handler, IRQF_SHARED, sp->devname, (void*)sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to register interrupt handler\n");
+		goto error_request_irq;
+	}
+
+	enable_sp_interrupts(sp->base_address);
+
+	result = ibmasm_init_remote_input_dev(sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to initialize remote queue\n");
+		goto error_init_remote;
+	}
+
+	result = ibmasm_send_driver_vpd(sp);
+	if (result) {
+		dev_err(sp->dev, "Failed to send driver VPD to service processor\n");
+		goto error_send_message;
+	}
+	result = ibmasm_send_os_state(sp, SYSTEM_STATE_OS_UP);
+	if (result) {
+		dev_err(sp->dev, "Failed to send OS state to service processor\n");
+		goto error_send_message;
+	}
+	ibmasmfs_add_sp(sp);
+
+	ibmasm_register_uart(sp);
+
+	return 0;
+
+error_send_message:
+	ibmasm_free_remote_input_dev(sp);
+error_init_remote:
+	disable_sp_interrupts(sp->base_address);
+	free_irq(sp->irq, (void *)sp);
+error_request_irq:
+	iounmap(sp->base_address);
+error_ioremap:
+	ibmasm_heartbeat_exit(sp);
+error_heartbeat:
+	ibmasm_event_buffer_exit(sp);
+error_eventbuffer:
+	kfree(sp);
+error_kmalloc:
+        pci_release_regions(pdev);
+error_resources:
+        pci_disable_device(pdev);
+
+	return result;
+}
+
+static void ibmasm_remove_one(struct pci_dev *pdev)
+{
+	struct service_processor *sp = pci_get_drvdata(pdev);
+
+	dbg("Unregistering UART\n");
+	ibmasm_unregister_uart(sp);
+	dbg("Sending OS down message\n");
+	if (ibmasm_send_os_state(sp, SYSTEM_STATE_OS_DOWN))
+		err("failed to get response to 'Send OS State' command\n");
+	dbg("Disabling heartbeats\n");
+	ibmasm_heartbeat_exit(sp);
+	dbg("Disabling interrupts\n");
+	disable_sp_interrupts(sp->base_address);
+	dbg("Freeing SP irq\n");
+	free_irq(sp->irq, (void *)sp);
+	dbg("Cleaning up\n");
+	ibmasm_free_remote_input_dev(sp);
+	iounmap(sp->base_address);
+	ibmasm_event_buffer_exit(sp);
+	kfree(sp);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static struct pci_device_id ibmasm_pci_table[] =
+{
+	{ PCI_DEVICE(VENDORID_IBM, DEVICEID_RSA) },
+	{},
+};
+
+static struct pci_driver ibmasm_driver = {
+	.name		= DRIVER_NAME,
+	.id_table	= ibmasm_pci_table,
+	.probe		= ibmasm_init_one,
+	.remove		= ibmasm_remove_one,
+};
+
+static void __exit ibmasm_exit (void)
+{
+	ibmasm_unregister_panic_notifier();
+	ibmasmfs_unregister();
+	pci_unregister_driver(&ibmasm_driver);
+	info(DRIVER_DESC " version " DRIVER_VERSION " unloaded");
+}
+
+static int __init ibmasm_init(void)
+{
+	int result = pci_register_driver(&ibmasm_driver);
+	if (result)
+		return result;
+
+	result = ibmasmfs_register();
+	if (result) {
+		pci_unregister_driver(&ibmasm_driver);
+		err("Failed to register ibmasmfs file system");
+		return result;
+	}
+
+	ibmasm_register_panic_notifier();
+	info(DRIVER_DESC " version " DRIVER_VERSION " loaded");
+	return 0;
+}
+
+module_init(ibmasm_init);
+module_exit(ibmasm_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, ibmasm_pci_table);
+
diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c
new file mode 100644
index 0000000000..21c9b6a6f2
--- /dev/null
+++ b/drivers/misc/ibmasm/r_heartbeat.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+#include <linux/sched/signal.h>
+#include "ibmasm.h"
+#include "dot_command.h"
+
+/*
+ * Reverse Heartbeat, i.e. heartbeats sent from the driver to the
+ * service processor.
+ * These heartbeats are initiated by user level programs.
+ */
+
+/* the reverse heartbeat dot command */
+#pragma pack(1)
+static struct {
+	struct dot_command_header	header;
+	unsigned char			command[3];
+} rhb_dot_cmd = {
+	.header = {
+		.type =		sp_read,
+		.command_size = 3,
+		.data_size =	0,
+		.status =	0
+	},
+	.command = { 4, 3, 6 }
+};
+#pragma pack()
+
+void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb)
+{
+	init_waitqueue_head(&rhb->wait);
+	rhb->stopped = 0;
+}
+
+/*
+ * start_reverse_heartbeat
+ * Loop forever, sending a reverse heartbeat dot command to the service
+ * processor, then sleeping. The loop comes to an end if the service
+ * processor fails to respond 3 times or we were interrupted.
+ */
+int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb)
+{
+	struct command *cmd;
+	int times_failed = 0;
+	int result = 1;
+
+	cmd = ibmasm_new_command(sp, sizeof rhb_dot_cmd);
+	if (!cmd)
+		return -ENOMEM;
+
+	while (times_failed < 3) {
+		memcpy(cmd->buffer, (void *)&rhb_dot_cmd, sizeof rhb_dot_cmd);
+		cmd->status = IBMASM_CMD_PENDING;
+		ibmasm_exec_command(sp, cmd);
+		ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL);
+
+		if (cmd->status != IBMASM_CMD_COMPLETE)
+			times_failed++;
+
+		wait_event_interruptible_timeout(rhb->wait,
+			rhb->stopped,
+			REVERSE_HEARTBEAT_TIMEOUT * HZ);
+
+		if (signal_pending(current) || rhb->stopped) {
+			result = -EINTR;
+			break;
+		}
+	}
+	command_put(cmd);
+	rhb->stopped = 0;
+
+	return result;
+}
+
+void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb)
+{
+	rhb->stopped = 1;
+	wake_up_interruptible(&rhb->wait);
+}
diff --git a/drivers/misc/ibmasm/remote.c b/drivers/misc/ibmasm/remote.c
new file mode 100644
index 0000000000..ec816d3b38
--- /dev/null
+++ b/drivers/misc/ibmasm/remote.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Authors: Max Asböck <amax@us.ibm.com>
+ *          Vernon Mauery <vernux@us.ibm.com>
+ */
+
+/* Remote mouse and keyboard event handling functions */
+
+#include <linux/pci.h>
+#include "ibmasm.h"
+#include "remote.h"
+
+#define MOUSE_X_MAX	1600
+#define MOUSE_Y_MAX	1200
+
+static const unsigned short xlate_high[XLATE_SIZE] = {
+	[KEY_SYM_ENTER & 0xff] = KEY_ENTER,
+	[KEY_SYM_KPSLASH & 0xff] = KEY_KPSLASH,
+	[KEY_SYM_KPSTAR & 0xff] = KEY_KPASTERISK,
+	[KEY_SYM_KPMINUS & 0xff] = KEY_KPMINUS,
+	[KEY_SYM_KPDOT & 0xff] = KEY_KPDOT,
+	[KEY_SYM_KPPLUS & 0xff] = KEY_KPPLUS,
+	[KEY_SYM_KP0 & 0xff] = KEY_KP0,
+	[KEY_SYM_KP1 & 0xff] = KEY_KP1,
+	[KEY_SYM_KP2 & 0xff] = KEY_KP2, [KEY_SYM_KPDOWN & 0xff] = KEY_KP2,
+	[KEY_SYM_KP3 & 0xff] = KEY_KP3,
+	[KEY_SYM_KP4 & 0xff] = KEY_KP4, [KEY_SYM_KPLEFT & 0xff] = KEY_KP4,
+	[KEY_SYM_KP5 & 0xff] = KEY_KP5,
+	[KEY_SYM_KP6 & 0xff] = KEY_KP6, [KEY_SYM_KPRIGHT & 0xff] = KEY_KP6,
+	[KEY_SYM_KP7 & 0xff] = KEY_KP7,
+	[KEY_SYM_KP8 & 0xff] = KEY_KP8, [KEY_SYM_KPUP & 0xff] = KEY_KP8,
+	[KEY_SYM_KP9 & 0xff] = KEY_KP9,
+	[KEY_SYM_BK_SPC & 0xff] = KEY_BACKSPACE,
+	[KEY_SYM_TAB & 0xff] = KEY_TAB,
+	[KEY_SYM_CTRL & 0xff] = KEY_LEFTCTRL,
+	[KEY_SYM_ALT & 0xff] = KEY_LEFTALT,
+	[KEY_SYM_INSERT & 0xff] = KEY_INSERT,
+	[KEY_SYM_DELETE & 0xff] = KEY_DELETE,
+	[KEY_SYM_SHIFT & 0xff] = KEY_LEFTSHIFT,
+	[KEY_SYM_UARROW & 0xff] = KEY_UP,
+	[KEY_SYM_DARROW & 0xff] = KEY_DOWN,
+	[KEY_SYM_LARROW & 0xff] = KEY_LEFT,
+	[KEY_SYM_RARROW & 0xff] = KEY_RIGHT,
+	[KEY_SYM_ESCAPE & 0xff] = KEY_ESC,
+        [KEY_SYM_PAGEUP & 0xff] = KEY_PAGEUP,
+        [KEY_SYM_PAGEDOWN & 0xff] = KEY_PAGEDOWN,
+        [KEY_SYM_HOME & 0xff] = KEY_HOME,
+        [KEY_SYM_END & 0xff] = KEY_END,
+	[KEY_SYM_F1 & 0xff] = KEY_F1,
+	[KEY_SYM_F2 & 0xff] = KEY_F2,
+	[KEY_SYM_F3 & 0xff] = KEY_F3,
+	[KEY_SYM_F4 & 0xff] = KEY_F4,
+	[KEY_SYM_F5 & 0xff] = KEY_F5,
+	[KEY_SYM_F6 & 0xff] = KEY_F6,
+	[KEY_SYM_F7 & 0xff] = KEY_F7,
+	[KEY_SYM_F8 & 0xff] = KEY_F8,
+	[KEY_SYM_F9 & 0xff] = KEY_F9,
+	[KEY_SYM_F10 & 0xff] = KEY_F10,
+	[KEY_SYM_F11 & 0xff] = KEY_F11,
+	[KEY_SYM_F12 & 0xff] = KEY_F12,
+	[KEY_SYM_CAP_LOCK & 0xff] = KEY_CAPSLOCK,
+	[KEY_SYM_NUM_LOCK & 0xff] = KEY_NUMLOCK,
+	[KEY_SYM_SCR_LOCK & 0xff] = KEY_SCROLLLOCK,
+};
+
+static const unsigned short xlate[XLATE_SIZE] = {
+	[NO_KEYCODE] = KEY_RESERVED,
+	[KEY_SYM_SPACE] = KEY_SPACE,
+	[KEY_SYM_TILDE] = KEY_GRAVE,        [KEY_SYM_BKTIC] = KEY_GRAVE,
+	[KEY_SYM_ONE] = KEY_1,              [KEY_SYM_BANG] = KEY_1,
+	[KEY_SYM_TWO] = KEY_2,              [KEY_SYM_AT] = KEY_2,
+	[KEY_SYM_THREE] = KEY_3,            [KEY_SYM_POUND] = KEY_3,
+	[KEY_SYM_FOUR] = KEY_4,             [KEY_SYM_DOLLAR] = KEY_4,
+	[KEY_SYM_FIVE] = KEY_5,             [KEY_SYM_PERCENT] = KEY_5,
+	[KEY_SYM_SIX] = KEY_6,              [KEY_SYM_CARAT] = KEY_6,
+	[KEY_SYM_SEVEN] = KEY_7,            [KEY_SYM_AMPER] = KEY_7,
+	[KEY_SYM_EIGHT] = KEY_8,            [KEY_SYM_STAR] = KEY_8,
+	[KEY_SYM_NINE] = KEY_9,             [KEY_SYM_LPAREN] = KEY_9,
+	[KEY_SYM_ZERO] = KEY_0,             [KEY_SYM_RPAREN] = KEY_0,
+	[KEY_SYM_MINUS] = KEY_MINUS,        [KEY_SYM_USCORE] = KEY_MINUS,
+	[KEY_SYM_EQUAL] = KEY_EQUAL,        [KEY_SYM_PLUS] = KEY_EQUAL,
+	[KEY_SYM_LBRKT] = KEY_LEFTBRACE,    [KEY_SYM_LCURLY] = KEY_LEFTBRACE,
+	[KEY_SYM_RBRKT] = KEY_RIGHTBRACE,   [KEY_SYM_RCURLY] = KEY_RIGHTBRACE,
+	[KEY_SYM_SLASH] = KEY_BACKSLASH,    [KEY_SYM_PIPE] = KEY_BACKSLASH,
+	[KEY_SYM_TIC] = KEY_APOSTROPHE,     [KEY_SYM_QUOTE] = KEY_APOSTROPHE,
+	[KEY_SYM_SEMIC] = KEY_SEMICOLON,    [KEY_SYM_COLON] = KEY_SEMICOLON,
+	[KEY_SYM_COMMA] = KEY_COMMA,        [KEY_SYM_LT] = KEY_COMMA,
+	[KEY_SYM_PERIOD] = KEY_DOT,         [KEY_SYM_GT] = KEY_DOT,
+	[KEY_SYM_BSLASH] = KEY_SLASH,       [KEY_SYM_QMARK] = KEY_SLASH,
+	[KEY_SYM_A] = KEY_A,                [KEY_SYM_a] = KEY_A,
+	[KEY_SYM_B] = KEY_B,                [KEY_SYM_b] = KEY_B,
+	[KEY_SYM_C] = KEY_C,                [KEY_SYM_c] = KEY_C,
+	[KEY_SYM_D] = KEY_D,                [KEY_SYM_d] = KEY_D,
+	[KEY_SYM_E] = KEY_E,                [KEY_SYM_e] = KEY_E,
+	[KEY_SYM_F] = KEY_F,                [KEY_SYM_f] = KEY_F,
+	[KEY_SYM_G] = KEY_G,                [KEY_SYM_g] = KEY_G,
+	[KEY_SYM_H] = KEY_H,                [KEY_SYM_h] = KEY_H,
+	[KEY_SYM_I] = KEY_I,                [KEY_SYM_i] = KEY_I,
+	[KEY_SYM_J] = KEY_J,                [KEY_SYM_j] = KEY_J,
+	[KEY_SYM_K] = KEY_K,                [KEY_SYM_k] = KEY_K,
+	[KEY_SYM_L] = KEY_L,                [KEY_SYM_l] = KEY_L,
+	[KEY_SYM_M] = KEY_M,                [KEY_SYM_m] = KEY_M,
+	[KEY_SYM_N] = KEY_N,                [KEY_SYM_n] = KEY_N,
+	[KEY_SYM_O] = KEY_O,                [KEY_SYM_o] = KEY_O,
+	[KEY_SYM_P] = KEY_P,                [KEY_SYM_p] = KEY_P,
+	[KEY_SYM_Q] = KEY_Q,                [KEY_SYM_q] = KEY_Q,
+	[KEY_SYM_R] = KEY_R,                [KEY_SYM_r] = KEY_R,
+	[KEY_SYM_S] = KEY_S,                [KEY_SYM_s] = KEY_S,
+	[KEY_SYM_T] = KEY_T,                [KEY_SYM_t] = KEY_T,
+	[KEY_SYM_U] = KEY_U,                [KEY_SYM_u] = KEY_U,
+	[KEY_SYM_V] = KEY_V,                [KEY_SYM_v] = KEY_V,
+	[KEY_SYM_W] = KEY_W,                [KEY_SYM_w] = KEY_W,
+	[KEY_SYM_X] = KEY_X,                [KEY_SYM_x] = KEY_X,
+	[KEY_SYM_Y] = KEY_Y,                [KEY_SYM_y] = KEY_Y,
+	[KEY_SYM_Z] = KEY_Z,                [KEY_SYM_z] = KEY_Z,
+};
+
+static void print_input(struct remote_input *input)
+{
+	if (input->type == INPUT_TYPE_MOUSE) {
+		unsigned char buttons = input->mouse_buttons;
+		dbg("remote mouse movement: (x,y)=(%d,%d)%s%s%s%s\n",
+			input->data.mouse.x, input->data.mouse.y,
+			(buttons) ? " -- buttons:" : "",
+			(buttons & REMOTE_BUTTON_LEFT) ? "left " : "",
+			(buttons & REMOTE_BUTTON_MIDDLE) ? "middle " : "",
+			(buttons & REMOTE_BUTTON_RIGHT) ? "right" : ""
+		      );
+	} else {
+		dbg("remote keypress (code, flag, down):"
+			   "%d (0x%x) [0x%x] [0x%x]\n",
+				input->data.keyboard.key_code,
+				input->data.keyboard.key_code,
+				input->data.keyboard.key_flag,
+				input->data.keyboard.key_down
+		      );
+	}
+}
+
+static void send_mouse_event(struct input_dev *dev, struct remote_input *input)
+{
+	unsigned char buttons = input->mouse_buttons;
+
+	input_report_abs(dev, ABS_X, input->data.mouse.x);
+	input_report_abs(dev, ABS_Y, input->data.mouse.y);
+	input_report_key(dev, BTN_LEFT, buttons & REMOTE_BUTTON_LEFT);
+	input_report_key(dev, BTN_MIDDLE, buttons & REMOTE_BUTTON_MIDDLE);
+	input_report_key(dev, BTN_RIGHT, buttons & REMOTE_BUTTON_RIGHT);
+	input_sync(dev);
+}
+
+static void send_keyboard_event(struct input_dev *dev,
+		struct remote_input *input)
+{
+	unsigned int key;
+	unsigned short code = input->data.keyboard.key_code;
+
+	if (code & 0xff00)
+		key = xlate_high[code & 0xff];
+	else
+		key = xlate[code];
+	input_report_key(dev, key, input->data.keyboard.key_down);
+	input_sync(dev);
+}
+
+void ibmasm_handle_mouse_interrupt(struct service_processor *sp)
+{
+	unsigned long reader;
+	unsigned long writer;
+	struct remote_input input;
+
+	reader = get_queue_reader(sp);
+	writer = get_queue_writer(sp);
+
+	while (reader != writer) {
+		memcpy_fromio(&input, get_queue_entry(sp, reader),
+				sizeof(struct remote_input));
+
+		print_input(&input);
+		if (input.type == INPUT_TYPE_MOUSE) {
+			send_mouse_event(sp->remote.mouse_dev, &input);
+		} else if (input.type == INPUT_TYPE_KEYBOARD) {
+			send_keyboard_event(sp->remote.keybd_dev, &input);
+		} else
+			break;
+
+		reader = advance_queue_reader(sp, reader);
+		writer = get_queue_writer(sp);
+	}
+}
+
+int ibmasm_init_remote_input_dev(struct service_processor *sp)
+{
+	/* set up the mouse input device */
+	struct input_dev *mouse_dev, *keybd_dev;
+	struct pci_dev *pdev = to_pci_dev(sp->dev);
+	int error = -ENOMEM;
+	int i;
+
+	sp->remote.mouse_dev = mouse_dev = input_allocate_device();
+	sp->remote.keybd_dev = keybd_dev = input_allocate_device();
+
+	if (!mouse_dev || !keybd_dev)
+		goto err_free_devices;
+
+	mouse_dev->id.bustype = BUS_PCI;
+	mouse_dev->id.vendor = pdev->vendor;
+	mouse_dev->id.product = pdev->device;
+	mouse_dev->id.version = 1;
+	mouse_dev->dev.parent = sp->dev;
+	mouse_dev->evbit[0]  = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	set_bit(BTN_TOUCH, mouse_dev->keybit);
+	mouse_dev->name = "ibmasm RSA I remote mouse";
+	input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0);
+	input_set_abs_params(mouse_dev, ABS_Y, 0, MOUSE_Y_MAX, 0, 0);
+
+	keybd_dev->id.bustype = BUS_PCI;
+	keybd_dev->id.vendor = pdev->vendor;
+	keybd_dev->id.product = pdev->device;
+	keybd_dev->id.version = 2;
+	keybd_dev->dev.parent = sp->dev;
+	keybd_dev->evbit[0]  = BIT_MASK(EV_KEY);
+	keybd_dev->name = "ibmasm RSA I remote keyboard";
+
+	for (i = 0; i < XLATE_SIZE; i++) {
+		if (xlate_high[i])
+			set_bit(xlate_high[i], keybd_dev->keybit);
+		if (xlate[i])
+			set_bit(xlate[i], keybd_dev->keybit);
+	}
+
+	error = input_register_device(mouse_dev);
+	if (error)
+		goto err_free_devices;
+
+	error = input_register_device(keybd_dev);
+	if (error)
+		goto err_unregister_mouse_dev;
+
+	enable_mouse_interrupts(sp);
+
+	printk(KERN_INFO "ibmasm remote responding to events on RSA card %d\n", sp->number);
+
+	return 0;
+
+ err_unregister_mouse_dev:
+	input_unregister_device(mouse_dev);
+	mouse_dev = NULL; /* so we don't try to free it again below */
+ err_free_devices:
+	input_free_device(mouse_dev);
+	input_free_device(keybd_dev);
+
+	return error;
+}
+
+void ibmasm_free_remote_input_dev(struct service_processor *sp)
+{
+	disable_mouse_interrupts(sp);
+	input_unregister_device(sp->remote.mouse_dev);
+	input_unregister_device(sp->remote.keybd_dev);
+}
+
diff --git a/drivers/misc/ibmasm/remote.h b/drivers/misc/ibmasm/remote.h
new file mode 100644
index 0000000000..ec4e78ec5a
--- /dev/null
+++ b/drivers/misc/ibmasm/remote.h
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ *
+ * Originally written by Pete Reynolds
+ */
+
+#ifndef _IBMASM_REMOTE_H_
+#define _IBMASM_REMOTE_H_
+
+#include <asm/io.h>
+
+/* pci offsets */
+#define CONDOR_MOUSE_DATA		0x000AC000
+#define CONDOR_MOUSE_ISR_CONTROL	0x00
+#define CONDOR_MOUSE_ISR_STATUS		0x04
+#define CONDOR_MOUSE_Q_READER		0x08
+#define CONDOR_MOUSE_Q_WRITER		0x0C
+#define CONDOR_MOUSE_Q_BEGIN		0x10
+#define CONDOR_MOUSE_MAX_X		0x14
+#define CONDOR_MOUSE_MAX_Y		0x18
+
+#define CONDOR_INPUT_DESKTOP_INFO	0x1F0
+#define CONDOR_INPUT_DISPLAY_RESX	0x1F4
+#define CONDOR_INPUT_DISPLAY_RESY	0x1F8
+#define CONDOR_INPUT_DISPLAY_BITS	0x1FC
+#define CONDOR_OUTPUT_VNC_STATUS	0x200
+
+#define CONDOR_MOUSE_INTR_STATUS_MASK	0x00000001
+
+#define INPUT_TYPE_MOUSE	0x1
+#define INPUT_TYPE_KEYBOARD	0x2
+
+
+/* mouse button states received from SP */
+#define REMOTE_DOUBLE_CLICK	0xF0
+#define REMOTE_BUTTON_LEFT	0x01
+#define REMOTE_BUTTON_MIDDLE	0x02
+#define REMOTE_BUTTON_RIGHT	0x04
+
+/* size of keysym/keycode translation matrices */
+#define XLATE_SIZE 256
+
+struct mouse_input {
+	unsigned short	y;
+	unsigned short	x;
+};
+
+
+struct keyboard_input {
+	unsigned short	key_code;
+	unsigned char	key_flag;
+	unsigned char	key_down;
+};
+
+
+
+struct remote_input {
+	union {
+		struct mouse_input	mouse;
+		struct keyboard_input	keyboard;
+	} data;
+
+	unsigned char	type;
+	unsigned char	pad1;
+	unsigned char	mouse_buttons;
+	unsigned char	pad3;
+};
+
+#define mouse_addr(sp)		(sp->base_address + CONDOR_MOUSE_DATA)
+#define display_width(sp)	(mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESX)
+#define display_height(sp)	(mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESY)
+#define display_depth(sp)	(mouse_addr(sp) + CONDOR_INPUT_DISPLAY_BITS)
+#define desktop_info(sp)	(mouse_addr(sp) + CONDOR_INPUT_DESKTOP_INFO)
+#define vnc_status(sp)		(mouse_addr(sp) + CONDOR_OUTPUT_VNC_STATUS)
+#define isr_control(sp)		(mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+
+#define mouse_interrupt_pending(sp)	readl(mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS)
+#define clear_mouse_interrupt(sp)	writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS)
+#define enable_mouse_interrupts(sp)	writel(1, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+#define disable_mouse_interrupts(sp)	writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL)
+
+/* remote input queue operations */
+#define REMOTE_QUEUE_SIZE	60
+
+#define get_queue_writer(sp)	readl(mouse_addr(sp) + CONDOR_MOUSE_Q_WRITER)
+#define get_queue_reader(sp)	readl(mouse_addr(sp) + CONDOR_MOUSE_Q_READER)
+#define set_queue_reader(sp, reader)	writel(reader, mouse_addr(sp) + CONDOR_MOUSE_Q_READER)
+
+#define queue_begin	(mouse_addr(sp) + CONDOR_MOUSE_Q_BEGIN)
+
+#define get_queue_entry(sp, read_index) \
+	((void*)(queue_begin + read_index * sizeof(struct remote_input)))
+
+static inline int advance_queue_reader(struct service_processor *sp, unsigned long reader)
+{
+	reader++;
+	if (reader == REMOTE_QUEUE_SIZE)
+		reader = 0;
+
+	set_queue_reader(sp, reader);
+	return reader;
+}
+
+#define NO_KEYCODE 0
+#define KEY_SYM_BK_SPC   0xFF08
+#define KEY_SYM_TAB      0xFF09
+#define KEY_SYM_ENTER    0xFF0D
+#define KEY_SYM_SCR_LOCK 0xFF14
+#define KEY_SYM_ESCAPE   0xFF1B
+#define KEY_SYM_HOME     0xFF50
+#define KEY_SYM_LARROW   0xFF51
+#define KEY_SYM_UARROW   0xFF52
+#define KEY_SYM_RARROW   0xFF53
+#define KEY_SYM_DARROW   0xFF54
+#define KEY_SYM_PAGEUP   0xFF55
+#define KEY_SYM_PAGEDOWN 0xFF56
+#define KEY_SYM_END      0xFF57
+#define KEY_SYM_INSERT   0xFF63
+#define KEY_SYM_NUM_LOCK 0xFF7F
+#define KEY_SYM_KPSTAR   0xFFAA
+#define KEY_SYM_KPPLUS   0xFFAB
+#define KEY_SYM_KPMINUS  0xFFAD
+#define KEY_SYM_KPDOT    0xFFAE
+#define KEY_SYM_KPSLASH  0xFFAF
+#define KEY_SYM_KPRIGHT  0xFF96
+#define KEY_SYM_KPUP     0xFF97
+#define KEY_SYM_KPLEFT   0xFF98
+#define KEY_SYM_KPDOWN   0xFF99
+#define KEY_SYM_KP0      0xFFB0
+#define KEY_SYM_KP1      0xFFB1
+#define KEY_SYM_KP2      0xFFB2
+#define KEY_SYM_KP3      0xFFB3
+#define KEY_SYM_KP4      0xFFB4
+#define KEY_SYM_KP5      0xFFB5
+#define KEY_SYM_KP6      0xFFB6
+#define KEY_SYM_KP7      0xFFB7
+#define KEY_SYM_KP8      0xFFB8
+#define KEY_SYM_KP9      0xFFB9
+#define KEY_SYM_F1       0xFFBE      // 1B 5B 5B 41
+#define KEY_SYM_F2       0xFFBF      // 1B 5B 5B 42
+#define KEY_SYM_F3       0xFFC0      // 1B 5B 5B 43
+#define KEY_SYM_F4       0xFFC1      // 1B 5B 5B 44
+#define KEY_SYM_F5       0xFFC2      // 1B 5B 5B 45
+#define KEY_SYM_F6       0xFFC3      // 1B 5B 31 37 7E
+#define KEY_SYM_F7       0xFFC4      // 1B 5B 31 38 7E
+#define KEY_SYM_F8       0xFFC5      // 1B 5B 31 39 7E
+#define KEY_SYM_F9       0xFFC6      // 1B 5B 32 30 7E
+#define KEY_SYM_F10      0xFFC7      // 1B 5B 32 31 7E
+#define KEY_SYM_F11      0xFFC8      // 1B 5B 32 33 7E
+#define KEY_SYM_F12      0xFFC9      // 1B 5B 32 34 7E
+#define KEY_SYM_SHIFT    0xFFE1
+#define KEY_SYM_CTRL     0xFFE3
+#define KEY_SYM_ALT      0xFFE9
+#define KEY_SYM_CAP_LOCK 0xFFE5
+#define KEY_SYM_DELETE   0xFFFF
+#define KEY_SYM_TILDE    0x60
+#define KEY_SYM_BKTIC    0x7E
+#define KEY_SYM_ONE      0x31
+#define KEY_SYM_BANG     0x21
+#define KEY_SYM_TWO      0x32
+#define KEY_SYM_AT       0x40
+#define KEY_SYM_THREE    0x33
+#define KEY_SYM_POUND    0x23
+#define KEY_SYM_FOUR     0x34
+#define KEY_SYM_DOLLAR   0x24
+#define KEY_SYM_FIVE     0x35
+#define KEY_SYM_PERCENT  0x25
+#define KEY_SYM_SIX      0x36
+#define KEY_SYM_CARAT    0x5E
+#define KEY_SYM_SEVEN    0x37
+#define KEY_SYM_AMPER    0x26
+#define KEY_SYM_EIGHT    0x38
+#define KEY_SYM_STAR     0x2A
+#define KEY_SYM_NINE     0x39
+#define KEY_SYM_LPAREN   0x28
+#define KEY_SYM_ZERO     0x30
+#define KEY_SYM_RPAREN   0x29
+#define KEY_SYM_MINUS    0x2D
+#define KEY_SYM_USCORE   0x5F
+#define KEY_SYM_EQUAL    0x2B
+#define KEY_SYM_PLUS     0x3D
+#define KEY_SYM_LBRKT    0x5B
+#define KEY_SYM_LCURLY   0x7B
+#define KEY_SYM_RBRKT    0x5D
+#define KEY_SYM_RCURLY   0x7D
+#define KEY_SYM_SLASH    0x5C
+#define KEY_SYM_PIPE     0x7C
+#define KEY_SYM_TIC      0x27
+#define KEY_SYM_QUOTE    0x22
+#define KEY_SYM_SEMIC    0x3B
+#define KEY_SYM_COLON    0x3A
+#define KEY_SYM_COMMA    0x2C
+#define KEY_SYM_LT       0x3C
+#define KEY_SYM_PERIOD   0x2E
+#define KEY_SYM_GT       0x3E
+#define KEY_SYM_BSLASH   0x2F
+#define KEY_SYM_QMARK    0x3F
+#define KEY_SYM_A        0x41
+#define KEY_SYM_B        0x42
+#define KEY_SYM_C        0x43
+#define KEY_SYM_D        0x44
+#define KEY_SYM_E        0x45
+#define KEY_SYM_F        0x46
+#define KEY_SYM_G        0x47
+#define KEY_SYM_H        0x48
+#define KEY_SYM_I        0x49
+#define KEY_SYM_J        0x4A
+#define KEY_SYM_K        0x4B
+#define KEY_SYM_L        0x4C
+#define KEY_SYM_M        0x4D
+#define KEY_SYM_N        0x4E
+#define KEY_SYM_O        0x4F
+#define KEY_SYM_P        0x50
+#define KEY_SYM_Q        0x51
+#define KEY_SYM_R        0x52
+#define KEY_SYM_S        0x53
+#define KEY_SYM_T        0x54
+#define KEY_SYM_U        0x55
+#define KEY_SYM_V        0x56
+#define KEY_SYM_W        0x57
+#define KEY_SYM_X        0x58
+#define KEY_SYM_Y        0x59
+#define KEY_SYM_Z        0x5A
+#define KEY_SYM_a        0x61
+#define KEY_SYM_b        0x62
+#define KEY_SYM_c        0x63
+#define KEY_SYM_d        0x64
+#define KEY_SYM_e        0x65
+#define KEY_SYM_f        0x66
+#define KEY_SYM_g        0x67
+#define KEY_SYM_h        0x68
+#define KEY_SYM_i        0x69
+#define KEY_SYM_j        0x6A
+#define KEY_SYM_k        0x6B
+#define KEY_SYM_l        0x6C
+#define KEY_SYM_m        0x6D
+#define KEY_SYM_n        0x6E
+#define KEY_SYM_o        0x6F
+#define KEY_SYM_p        0x70
+#define KEY_SYM_q        0x71
+#define KEY_SYM_r        0x72
+#define KEY_SYM_s        0x73
+#define KEY_SYM_t        0x74
+#define KEY_SYM_u        0x75
+#define KEY_SYM_v        0x76
+#define KEY_SYM_w        0x77
+#define KEY_SYM_x        0x78
+#define KEY_SYM_y        0x79
+#define KEY_SYM_z        0x7A
+#define KEY_SYM_SPACE    0x20
+#endif /* _IBMASM_REMOTE_H_ */
diff --git a/drivers/misc/ibmasm/uart.c b/drivers/misc/ibmasm/uart.c
new file mode 100644
index 0000000000..a5d4c8e010
--- /dev/null
+++ b/drivers/misc/ibmasm/uart.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * IBM ASM Service Processor Device Driver
+ *
+ * Copyright (C) IBM Corporation, 2004
+ *
+ * Author: Max Asböck <amax@us.ibm.com>
+ */
+
+#include <linux/termios.h>
+#include <linux/tty.h>
+#include <linux/serial_core.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+#include "ibmasm.h"
+#include "lowlevel.h"
+
+
+void ibmasm_register_uart(struct service_processor *sp)
+{
+	struct uart_8250_port uart;
+	void __iomem *iomem_base;
+
+	iomem_base = sp->base_address + SCOUT_COM_B_BASE;
+
+	/* read the uart scratch register to determine if the UART
+	 * is dedicated to the service processor or if the OS can use it
+	 */
+	if (0 == readl(iomem_base + UART_SCR)) {
+		dev_info(sp->dev, "IBM SP UART not registered, owned by service processor\n");
+		sp->serial_line = -1;
+		return;
+	}
+
+	memset(&uart, 0, sizeof(uart));
+	uart.port.irq		= sp->irq;
+	uart.port.uartclk	= 3686400;
+	uart.port.flags		= UPF_SHARE_IRQ;
+	uart.port.iotype	= UPIO_MEM;
+	uart.port.membase	= iomem_base;
+
+	sp->serial_line = serial8250_register_8250_port(&uart);
+	if (sp->serial_line < 0) {
+		dev_err(sp->dev, "Failed to register serial port\n");
+		return;
+	}
+	enable_uart_interrupts(sp->base_address);
+}
+
+void ibmasm_unregister_uart(struct service_processor *sp)
+{
+	if (sp->serial_line < 0)
+		return;
+
+	disable_uart_interrupts(sp->base_address);
+	serial8250_unregister_port(sp->serial_line);
+}
diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c
new file mode 100644
index 0000000000..2101eb12bc
--- /dev/null
+++ b/drivers/misc/ibmvmc.c
@@ -0,0 +1,2421 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IBM Power Systems Virtual Management Channel Support.
+ *
+ * Copyright (c) 2004, 2018 IBM Corp.
+ *   Dave Engebretsen engebret@us.ibm.com
+ *   Steven Royer seroyer@linux.vnet.ibm.com
+ *   Adam Reznechek adreznec@linux.vnet.ibm.com
+ *   Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/fcntl.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/miscdevice.h>
+#include <linux/sched/signal.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/vio.h>
+
+#include "ibmvmc.h"
+
+#define IBMVMC_DRIVER_VERSION "1.0"
+
+/*
+ * Static global variables
+ */
+static DECLARE_WAIT_QUEUE_HEAD(ibmvmc_read_wait);
+
+static const char ibmvmc_driver_name[] = "ibmvmc";
+
+static struct ibmvmc_struct ibmvmc;
+static struct ibmvmc_hmc hmcs[MAX_HMCS];
+static struct crq_server_adapter ibmvmc_adapter;
+
+static int ibmvmc_max_buf_pool_size = DEFAULT_BUF_POOL_SIZE;
+static int ibmvmc_max_hmcs = DEFAULT_HMCS;
+static int ibmvmc_max_mtu = DEFAULT_MTU;
+
+static inline long h_copy_rdma(s64 length, u64 sliobn, u64 slioba,
+			       u64 dliobn, u64 dlioba)
+{
+	long rc = 0;
+
+	/* Ensure all writes to source memory are visible before hcall */
+	dma_wmb();
+	pr_debug("ibmvmc: h_copy_rdma(0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
+		 length, sliobn, slioba, dliobn, dlioba);
+	rc = plpar_hcall_norets(H_COPY_RDMA, length, sliobn, slioba,
+				dliobn, dlioba);
+	pr_debug("ibmvmc: h_copy_rdma rc = 0x%lx\n", rc);
+
+	return rc;
+}
+
+static inline void h_free_crq(uint32_t unit_address)
+{
+	long rc = 0;
+
+	do {
+		if (H_IS_LONG_BUSY(rc))
+			msleep(get_longbusy_msecs(rc));
+
+		rc = plpar_hcall_norets(H_FREE_CRQ, unit_address);
+	} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
+}
+
+/**
+ * h_request_vmc: - request a hypervisor virtual management channel device
+ * @vmc_index: drc index of the vmc device created
+ *
+ * Requests the hypervisor create a new virtual management channel device,
+ * allowing this partition to send hypervisor virtualization control
+ * commands.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static inline long h_request_vmc(u32 *vmc_index)
+{
+	long rc = 0;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	do {
+		if (H_IS_LONG_BUSY(rc))
+			msleep(get_longbusy_msecs(rc));
+
+		/* Call to request the VMC device from phyp */
+		rc = plpar_hcall(H_REQUEST_VMC, retbuf);
+		pr_debug("ibmvmc: %s rc = 0x%lx\n", __func__, rc);
+		*vmc_index = retbuf[0];
+	} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
+
+	return rc;
+}
+
+/* routines for managing a command/response queue */
+/**
+ * ibmvmc_handle_event: - Interrupt handler for crq events
+ * @irq:        number of irq to handle, not used
+ * @dev_instance: crq_server_adapter that received interrupt
+ *
+ * Disables interrupts and schedules ibmvmc_task
+ *
+ * Always returns IRQ_HANDLED
+ */
+static irqreturn_t ibmvmc_handle_event(int irq, void *dev_instance)
+{
+	struct crq_server_adapter *adapter =
+		(struct crq_server_adapter *)dev_instance;
+
+	vio_disable_interrupts(to_vio_dev(adapter->dev));
+	tasklet_schedule(&adapter->work_task);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ibmvmc_release_crq_queue - Release CRQ Queue
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-Zero - Failure
+ */
+static void ibmvmc_release_crq_queue(struct crq_server_adapter *adapter)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct crq_queue *queue = &adapter->queue;
+
+	free_irq(vdev->irq, (void *)adapter);
+	tasklet_kill(&adapter->work_task);
+
+	if (adapter->reset_task)
+		kthread_stop(adapter->reset_task);
+
+	h_free_crq(vdev->unit_address);
+	dma_unmap_single(adapter->dev,
+			 queue->msg_token,
+			 queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
+	free_page((unsigned long)queue->msgs);
+}
+
+/**
+ * ibmvmc_reset_crq_queue - Reset CRQ Queue
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * This function calls h_free_crq and then calls H_REG_CRQ and does all the
+ * bookkeeping to get us back to where we can communicate.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-Zero - Failure
+ */
+static int ibmvmc_reset_crq_queue(struct crq_server_adapter *adapter)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct crq_queue *queue = &adapter->queue;
+	int rc = 0;
+
+	/* Close the CRQ */
+	h_free_crq(vdev->unit_address);
+
+	/* Clean out the queue */
+	memset(queue->msgs, 0x00, PAGE_SIZE);
+	queue->cur = 0;
+
+	/* And re-open it again */
+	rc = plpar_hcall_norets(H_REG_CRQ,
+				vdev->unit_address,
+				queue->msg_token, PAGE_SIZE);
+	if (rc == 2)
+		/* Adapter is good, but other end is not ready */
+		dev_warn(adapter->dev, "Partner adapter not ready\n");
+	else if (rc != 0)
+		dev_err(adapter->dev, "couldn't register crq--rc 0x%x\n", rc);
+
+	return rc;
+}
+
+/**
+ * crq_queue_next_crq: - Returns the next entry in message queue
+ * @queue:      crq_queue to use
+ *
+ * Returns pointer to next entry in queue, or NULL if there are no new
+ * entried in the CRQ.
+ */
+static struct ibmvmc_crq_msg *crq_queue_next_crq(struct crq_queue *queue)
+{
+	struct ibmvmc_crq_msg *crq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&queue->lock, flags);
+	crq = &queue->msgs[queue->cur];
+	if (crq->valid & 0x80) {
+		if (++queue->cur == queue->size)
+			queue->cur = 0;
+
+		/* Ensure the read of the valid bit occurs before reading any
+		 * other bits of the CRQ entry
+		 */
+		dma_rmb();
+	} else {
+		crq = NULL;
+	}
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	return crq;
+}
+
+/**
+ * ibmvmc_send_crq - Send CRQ
+ *
+ * @adapter:	crq_server_adapter struct
+ * @word1:	Word1 Data field
+ * @word2:	Word2 Data field
+ *
+ * Return:
+ *	0 - Success
+ *	Non-Zero - Failure
+ */
+static long ibmvmc_send_crq(struct crq_server_adapter *adapter,
+			    u64 word1, u64 word2)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	long rc = 0;
+
+	dev_dbg(adapter->dev, "(0x%x, 0x%016llx, 0x%016llx)\n",
+		vdev->unit_address, word1, word2);
+
+	/*
+	 * Ensure the command buffer is flushed to memory before handing it
+	 * over to the other side to prevent it from fetching any stale data.
+	 */
+	dma_wmb();
+	rc = plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, word1, word2);
+	dev_dbg(adapter->dev, "rc = 0x%lx\n", rc);
+
+	return rc;
+}
+
+/**
+ * alloc_dma_buffer - Create DMA Buffer
+ *
+ * @vdev:	vio_dev struct
+ * @size:	Size field
+ * @dma_handle:	DMA address field
+ *
+ * Allocates memory for the command queue and maps remote memory into an
+ * ioba.
+ *
+ * Returns a pointer to the buffer
+ */
+static void *alloc_dma_buffer(struct vio_dev *vdev, size_t size,
+			      dma_addr_t *dma_handle)
+{
+	/* allocate memory */
+	void *buffer = kzalloc(size, GFP_ATOMIC);
+
+	if (!buffer) {
+		*dma_handle = 0;
+		return NULL;
+	}
+
+	/* DMA map */
+	*dma_handle = dma_map_single(&vdev->dev, buffer, size,
+				     DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(&vdev->dev, *dma_handle)) {
+		*dma_handle = 0;
+		kfree_sensitive(buffer);
+		return NULL;
+	}
+
+	return buffer;
+}
+
+/**
+ * free_dma_buffer - Free DMA Buffer
+ *
+ * @vdev:	vio_dev struct
+ * @size:	Size field
+ * @vaddr:	Address field
+ * @dma_handle:	DMA address field
+ *
+ * Releases memory for a command queue and unmaps mapped remote memory.
+ */
+static void free_dma_buffer(struct vio_dev *vdev, size_t size, void *vaddr,
+			    dma_addr_t dma_handle)
+{
+	/* DMA unmap */
+	dma_unmap_single(&vdev->dev, dma_handle, size, DMA_BIDIRECTIONAL);
+
+	/* deallocate memory */
+	kfree_sensitive(vaddr);
+}
+
+/**
+ * ibmvmc_get_valid_hmc_buffer - Retrieve Valid HMC Buffer
+ *
+ * @hmc_index:	HMC Index Field
+ *
+ * Return:
+ *	Pointer to ibmvmc_buffer
+ */
+static struct ibmvmc_buffer *ibmvmc_get_valid_hmc_buffer(u8 hmc_index)
+{
+	struct ibmvmc_buffer *buffer;
+	struct ibmvmc_buffer *ret_buf = NULL;
+	unsigned long i;
+
+	if (hmc_index > ibmvmc.max_hmc_index)
+		return NULL;
+
+	buffer = hmcs[hmc_index].buffer;
+
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].valid && buffer[i].free &&
+		    buffer[i].owner == VMC_BUF_OWNER_ALPHA) {
+			buffer[i].free = 0;
+			ret_buf = &buffer[i];
+			break;
+		}
+	}
+
+	return ret_buf;
+}
+
+/**
+ * ibmvmc_get_free_hmc_buffer - Get Free HMC Buffer
+ *
+ * @adapter:	crq_server_adapter struct
+ * @hmc_index:	Hmc Index field
+ *
+ * Return:
+ *	Pointer to ibmvmc_buffer
+ */
+static struct ibmvmc_buffer *ibmvmc_get_free_hmc_buffer(struct crq_server_adapter *adapter,
+							u8 hmc_index)
+{
+	struct ibmvmc_buffer *buffer;
+	struct ibmvmc_buffer *ret_buf = NULL;
+	unsigned long i;
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_info(adapter->dev, "get_free_hmc_buffer: invalid hmc_index=0x%x\n",
+			 hmc_index);
+		return NULL;
+	}
+
+	buffer = hmcs[hmc_index].buffer;
+
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].free &&
+		    buffer[i].owner == VMC_BUF_OWNER_ALPHA) {
+			buffer[i].free = 0;
+			ret_buf = &buffer[i];
+			break;
+		}
+	}
+
+	return ret_buf;
+}
+
+/**
+ * ibmvmc_free_hmc_buffer - Free an HMC Buffer
+ *
+ * @hmc:	ibmvmc_hmc struct
+ * @buffer:	ibmvmc_buffer struct
+ *
+ */
+static void ibmvmc_free_hmc_buffer(struct ibmvmc_hmc *hmc,
+				   struct ibmvmc_buffer *buffer)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&hmc->lock, flags);
+	buffer->free = 1;
+	spin_unlock_irqrestore(&hmc->lock, flags);
+}
+
+/**
+ * ibmvmc_count_hmc_buffers - Count HMC Buffers
+ *
+ * @hmc_index:	HMC Index field
+ * @valid:	Valid number of buffers field
+ * @free:	Free number of buffers field
+ *
+ */
+static void ibmvmc_count_hmc_buffers(u8 hmc_index, unsigned int *valid,
+				     unsigned int *free)
+{
+	struct ibmvmc_buffer *buffer;
+	unsigned long i;
+	unsigned long flags;
+
+	if (hmc_index > ibmvmc.max_hmc_index)
+		return;
+
+	if (!valid || !free)
+		return;
+
+	*valid = 0; *free = 0;
+
+	buffer = hmcs[hmc_index].buffer;
+	spin_lock_irqsave(&hmcs[hmc_index].lock, flags);
+
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].valid) {
+			*valid = *valid + 1;
+			if (buffer[i].free)
+				*free = *free + 1;
+		}
+	}
+
+	spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+}
+
+/**
+ * ibmvmc_get_free_hmc - Get Free HMC
+ *
+ * Return:
+ *	Pointer to an available HMC Connection
+ *	Null otherwise
+ */
+static struct ibmvmc_hmc *ibmvmc_get_free_hmc(void)
+{
+	unsigned long i;
+	unsigned long flags;
+
+	/*
+	 * Find an available HMC connection.
+	 */
+	for (i = 0; i <= ibmvmc.max_hmc_index; i++) {
+		spin_lock_irqsave(&hmcs[i].lock, flags);
+		if (hmcs[i].state == ibmhmc_state_free) {
+			hmcs[i].index = i;
+			hmcs[i].state = ibmhmc_state_initial;
+			spin_unlock_irqrestore(&hmcs[i].lock, flags);
+			return &hmcs[i];
+		}
+		spin_unlock_irqrestore(&hmcs[i].lock, flags);
+	}
+
+	return NULL;
+}
+
+/**
+ * ibmvmc_return_hmc - Return an HMC Connection
+ *
+ * @hmc:		ibmvmc_hmc struct
+ * @release_readers:	Number of readers connected to session
+ *
+ * This function releases the HMC connections back into the pool.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_return_hmc(struct ibmvmc_hmc *hmc, bool release_readers)
+{
+	struct ibmvmc_buffer *buffer;
+	struct crq_server_adapter *adapter;
+	struct vio_dev *vdev;
+	unsigned long i;
+	unsigned long flags;
+
+	if (!hmc || !hmc->adapter)
+		return -EIO;
+
+	if (release_readers) {
+		if (hmc->file_session) {
+			struct ibmvmc_file_session *session = hmc->file_session;
+
+			session->valid = 0;
+			wake_up_interruptible(&ibmvmc_read_wait);
+		}
+	}
+
+	adapter = hmc->adapter;
+	vdev = to_vio_dev(adapter->dev);
+
+	spin_lock_irqsave(&hmc->lock, flags);
+	hmc->index = 0;
+	hmc->state = ibmhmc_state_free;
+	hmc->queue_head = 0;
+	hmc->queue_tail = 0;
+	buffer = hmc->buffer;
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].valid) {
+			free_dma_buffer(vdev,
+					ibmvmc.max_mtu,
+					buffer[i].real_addr_local,
+					buffer[i].dma_addr_local);
+			dev_dbg(adapter->dev, "Forgot buffer id 0x%lx\n", i);
+		}
+		memset(&buffer[i], 0, sizeof(struct ibmvmc_buffer));
+
+		hmc->queue_outbound_msgs[i] = VMC_INVALID_BUFFER_ID;
+	}
+
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_open - Interface Open
+ * @buffer: Pointer to ibmvmc_buffer struct
+ * @hmc: Pointer to ibmvmc_hmc struct
+ *
+ * This command is sent by the management partition as the result of a
+ * management partition device request. It causes the hypervisor to
+ * prepare a set of data buffers for the management application connection
+ * indicated HMC idx. A unique HMC Idx would be used if multiple management
+ * applications running concurrently were desired. Before responding to this
+ * command, the hypervisor must provide the management partition with at
+ * least one of these new buffers via the Add Buffer. This indicates whether
+ * the messages are inbound or outbound from the hypervisor.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_send_open(struct ibmvmc_buffer *buffer,
+			    struct ibmvmc_hmc *hmc)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	struct crq_server_adapter *adapter;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+	int rc = 0;
+
+	if (!hmc || !hmc->adapter)
+		return -EIO;
+
+	adapter = hmc->adapter;
+
+	dev_dbg(adapter->dev, "send_open: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
+		(unsigned long)buffer->size, (unsigned long)adapter->liobn,
+		(unsigned long)buffer->dma_addr_local,
+		(unsigned long)adapter->riobn,
+		(unsigned long)buffer->dma_addr_remote);
+
+	rc = h_copy_rdma(buffer->size,
+			 adapter->liobn,
+			 buffer->dma_addr_local,
+			 adapter->riobn,
+			 buffer->dma_addr_remote);
+	if (rc) {
+		dev_err(adapter->dev, "Error: In send_open, h_copy_rdma rc 0x%x\n",
+			rc);
+		return -EIO;
+	}
+
+	hmc->state = ibmhmc_state_opening;
+
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_OPEN;
+	crq_msg.status = 0;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc->session;
+	crq_msg.hmc_index = hmc->index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer->id);
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return rc;
+}
+
+/**
+ * ibmvmc_send_close - Interface Close
+ * @hmc: Pointer to ibmvmc_hmc struct
+ *
+ * This command is sent by the management partition to terminate a
+ * management application to hypervisor connection. When this command is
+ * sent, the management partition has quiesced all I/O operations to all
+ * buffers associated with this management application connection, and
+ * has freed any storage for these buffers.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_send_close(struct ibmvmc_hmc *hmc)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	struct crq_server_adapter *adapter;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+	int rc = 0;
+
+	if (!hmc || !hmc->adapter)
+		return -EIO;
+
+	adapter = hmc->adapter;
+
+	dev_info(adapter->dev, "CRQ send: close\n");
+
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_CLOSE;
+	crq_msg.status = 0;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc->session;
+	crq_msg.hmc_index = hmc->index;
+	crq_msg.var2.rsvd = 0;
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return rc;
+}
+
+/**
+ * ibmvmc_send_capabilities - Send VMC Capabilities
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * The capabilities message is an administrative message sent after the CRQ
+ * initialization sequence of messages and is used to exchange VMC capabilities
+ * between the management partition and the hypervisor. The management
+ * partition must send this message and the hypervisor must respond with VMC
+ * capabilities Response message before HMC interface message can begin. Any
+ * HMC interface messages received before the exchange of capabilities has
+ * complete are dropped.
+ *
+ * Return:
+ *	0 - Success
+ */
+static int ibmvmc_send_capabilities(struct crq_server_adapter *adapter)
+{
+	struct ibmvmc_admin_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+
+	dev_dbg(adapter->dev, "ibmvmc: CRQ send: capabilities\n");
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_CAP;
+	crq_msg.status = 0;
+	crq_msg.rsvd[0] = 0;
+	crq_msg.rsvd[1] = 0;
+	crq_msg.max_hmc = ibmvmc_max_hmcs;
+	crq_msg.max_mtu = cpu_to_be32(ibmvmc_max_mtu);
+	crq_msg.pool_size = cpu_to_be16(ibmvmc_max_buf_pool_size);
+	crq_msg.crq_size = cpu_to_be16(adapter->queue.size);
+	crq_msg.version = cpu_to_be16(IBMVMC_PROTOCOL_VERSION);
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	ibmvmc.state = ibmvmc_state_capabilities;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_add_buffer_resp - Add Buffer Response
+ *
+ * @adapter:	crq_server_adapter struct
+ * @status:	Status field
+ * @hmc_session: HMC Session field
+ * @hmc_index:	HMC Index field
+ * @buffer_id:	Buffer Id field
+ *
+ * This command is sent by the management partition to the hypervisor in
+ * response to the Add Buffer message. The Status field indicates the result of
+ * the command.
+ *
+ * Return:
+ *	0 - Success
+ */
+static int ibmvmc_send_add_buffer_resp(struct crq_server_adapter *adapter,
+				       u8 status, u8 hmc_session,
+				       u8 hmc_index, u16 buffer_id)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+
+	dev_dbg(adapter->dev, "CRQ send: add_buffer_resp\n");
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_ADD_BUF_RESP;
+	crq_msg.status = status;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc_session;
+	crq_msg.hmc_index = hmc_index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer_id);
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_rem_buffer_resp - Remove Buffer Response
+ *
+ * @adapter:	crq_server_adapter struct
+ * @status:	Status field
+ * @hmc_session: HMC Session field
+ * @hmc_index:	HMC Index field
+ * @buffer_id:	Buffer Id field
+ *
+ * This command is sent by the management partition to the hypervisor in
+ * response to the Remove Buffer message. The Buffer ID field indicates
+ * which buffer the management partition selected to remove. The Status
+ * field indicates the result of the command.
+ *
+ * Return:
+ *	0 - Success
+ */
+static int ibmvmc_send_rem_buffer_resp(struct crq_server_adapter *adapter,
+				       u8 status, u8 hmc_session,
+				       u8 hmc_index, u16 buffer_id)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+
+	dev_dbg(adapter->dev, "CRQ send: rem_buffer_resp\n");
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_REM_BUF_RESP;
+	crq_msg.status = status;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc_session;
+	crq_msg.hmc_index = hmc_index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer_id);
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_msg - Signal Message
+ *
+ * @adapter:	crq_server_adapter struct
+ * @buffer:	ibmvmc_buffer struct
+ * @hmc:	ibmvmc_hmc struct
+ * @msg_len:	message length field
+ *
+ * This command is sent between the management partition and the hypervisor
+ * in order to signal the arrival of an HMC protocol message. The command
+ * can be sent by both the management partition and the hypervisor. It is
+ * used for all traffic between the management application and the hypervisor,
+ * regardless of who initiated the communication.
+ *
+ * There is no response to this message.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_send_msg(struct crq_server_adapter *adapter,
+			   struct ibmvmc_buffer *buffer,
+			   struct ibmvmc_hmc *hmc, int msg_len)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+	int rc = 0;
+
+	dev_dbg(adapter->dev, "CRQ send: rdma to HV\n");
+	rc = h_copy_rdma(msg_len,
+			 adapter->liobn,
+			 buffer->dma_addr_local,
+			 adapter->riobn,
+			 buffer->dma_addr_remote);
+	if (rc) {
+		dev_err(adapter->dev, "Error in send_msg, h_copy_rdma rc 0x%x\n",
+			rc);
+		return rc;
+	}
+
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_SIGNAL;
+	crq_msg.status = 0;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc->session;
+	crq_msg.hmc_index = hmc->index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer->id);
+	crq_msg.var3.msg_len = cpu_to_be32(msg_len);
+	dev_dbg(adapter->dev, "CRQ send: msg to HV 0x%llx 0x%llx\n",
+		be64_to_cpu(crq_as_u64[0]), be64_to_cpu(crq_as_u64[1]));
+
+	buffer->owner = VMC_BUF_OWNER_HV;
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return rc;
+}
+
+/**
+ * ibmvmc_open - Open Session
+ *
+ * @inode:	inode struct
+ * @file:	file struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_open(struct inode *inode, struct file *file)
+{
+	struct ibmvmc_file_session *session;
+
+	pr_debug("%s: inode = 0x%lx, file = 0x%lx, state = 0x%x\n", __func__,
+		 (unsigned long)inode, (unsigned long)file,
+		 ibmvmc.state);
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	if (!session)
+		return -ENOMEM;
+
+	session->file = file;
+	file->private_data = session;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_close - Close Session
+ *
+ * @inode:	inode struct
+ * @file:	file struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_close(struct inode *inode, struct file *file)
+{
+	struct ibmvmc_file_session *session;
+	struct ibmvmc_hmc *hmc;
+	int rc = 0;
+	unsigned long flags;
+
+	pr_debug("%s: file = 0x%lx, state = 0x%x\n", __func__,
+		 (unsigned long)file, ibmvmc.state);
+
+	session = file->private_data;
+	if (!session)
+		return -EIO;
+
+	hmc = session->hmc;
+	if (hmc) {
+		if (!hmc->adapter)
+			return -EIO;
+
+		if (ibmvmc.state == ibmvmc_state_failed) {
+			dev_warn(hmc->adapter->dev, "close: state_failed\n");
+			return -EIO;
+		}
+
+		spin_lock_irqsave(&hmc->lock, flags);
+		if (hmc->state >= ibmhmc_state_opening) {
+			rc = ibmvmc_send_close(hmc);
+			if (rc)
+				dev_warn(hmc->adapter->dev, "close: send_close failed.\n");
+		}
+		spin_unlock_irqrestore(&hmc->lock, flags);
+	}
+
+	kfree_sensitive(session);
+
+	return rc;
+}
+
+/**
+ * ibmvmc_read - Read
+ *
+ * @file:	file struct
+ * @buf:	Character buffer
+ * @nbytes:	Size in bytes
+ * @ppos:	Offset
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static ssize_t ibmvmc_read(struct file *file, char *buf, size_t nbytes,
+			   loff_t *ppos)
+{
+	struct ibmvmc_file_session *session;
+	struct ibmvmc_hmc *hmc;
+	struct crq_server_adapter *adapter;
+	struct ibmvmc_buffer *buffer;
+	ssize_t n;
+	ssize_t retval = 0;
+	unsigned long flags;
+	DEFINE_WAIT(wait);
+
+	pr_debug("ibmvmc: read: file = 0x%lx, buf = 0x%lx, nbytes = 0x%lx\n",
+		 (unsigned long)file, (unsigned long)buf,
+		 (unsigned long)nbytes);
+
+	if (nbytes == 0)
+		return 0;
+
+	if (nbytes > ibmvmc.max_mtu) {
+		pr_warn("ibmvmc: read: nbytes invalid 0x%x\n",
+			(unsigned int)nbytes);
+		return -EINVAL;
+	}
+
+	session = file->private_data;
+	if (!session) {
+		pr_warn("ibmvmc: read: no session\n");
+		return -EIO;
+	}
+
+	hmc = session->hmc;
+	if (!hmc) {
+		pr_warn("ibmvmc: read: no hmc\n");
+		return -EIO;
+	}
+
+	adapter = hmc->adapter;
+	if (!adapter) {
+		pr_warn("ibmvmc: read: no adapter\n");
+		return -EIO;
+	}
+
+	do {
+		prepare_to_wait(&ibmvmc_read_wait, &wait, TASK_INTERRUPTIBLE);
+
+		spin_lock_irqsave(&hmc->lock, flags);
+		if (hmc->queue_tail != hmc->queue_head)
+			/* Data is available */
+			break;
+
+		spin_unlock_irqrestore(&hmc->lock, flags);
+
+		if (!session->valid) {
+			retval = -EBADFD;
+			goto out;
+		}
+		if (file->f_flags & O_NONBLOCK) {
+			retval = -EAGAIN;
+			goto out;
+		}
+
+		schedule();
+
+		if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			goto out;
+		}
+	} while (1);
+
+	buffer = &(hmc->buffer[hmc->queue_outbound_msgs[hmc->queue_tail]]);
+	hmc->queue_tail++;
+	if (hmc->queue_tail == ibmvmc_max_buf_pool_size)
+		hmc->queue_tail = 0;
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	nbytes = min_t(size_t, nbytes, buffer->msg_len);
+	n = copy_to_user((void *)buf, buffer->real_addr_local, nbytes);
+	dev_dbg(adapter->dev, "read: copy to user nbytes = 0x%lx.\n", nbytes);
+	ibmvmc_free_hmc_buffer(hmc, buffer);
+	retval = nbytes;
+
+	if (n) {
+		dev_warn(adapter->dev, "read: copy to user failed.\n");
+		retval = -EFAULT;
+	}
+
+ out:
+	finish_wait(&ibmvmc_read_wait, &wait);
+	dev_dbg(adapter->dev, "read: out %ld\n", retval);
+	return retval;
+}
+
+/**
+ * ibmvmc_poll - Poll
+ *
+ * @file:	file struct
+ * @wait:	Poll Table
+ *
+ * Return:
+ *	poll.h return values
+ */
+static unsigned int ibmvmc_poll(struct file *file, poll_table *wait)
+{
+	struct ibmvmc_file_session *session;
+	struct ibmvmc_hmc *hmc;
+	unsigned int mask = 0;
+
+	session = file->private_data;
+	if (!session)
+		return 0;
+
+	hmc = session->hmc;
+	if (!hmc)
+		return 0;
+
+	poll_wait(file, &ibmvmc_read_wait, wait);
+
+	if (hmc->queue_head != hmc->queue_tail)
+		mask |= POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+/**
+ * ibmvmc_write - Write
+ *
+ * @file:	file struct
+ * @buffer:	Character buffer
+ * @count:	Count field
+ * @ppos:	Offset
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static ssize_t ibmvmc_write(struct file *file, const char *buffer,
+			    size_t count, loff_t *ppos)
+{
+	struct inode *inode;
+	struct ibmvmc_buffer *vmc_buffer;
+	struct ibmvmc_file_session *session;
+	struct crq_server_adapter *adapter;
+	struct ibmvmc_hmc *hmc;
+	unsigned char *buf;
+	unsigned long flags;
+	size_t bytes;
+	const char *p = buffer;
+	size_t c = count;
+	int ret = 0;
+
+	session = file->private_data;
+	if (!session)
+		return -EIO;
+
+	hmc = session->hmc;
+	if (!hmc)
+		return -EIO;
+
+	spin_lock_irqsave(&hmc->lock, flags);
+	if (hmc->state == ibmhmc_state_free) {
+		/* HMC connection is not valid (possibly was reset under us). */
+		ret = -EIO;
+		goto out;
+	}
+
+	adapter = hmc->adapter;
+	if (!adapter) {
+		ret = -EIO;
+		goto out;
+	}
+
+	if (count > ibmvmc.max_mtu) {
+		dev_warn(adapter->dev, "invalid buffer size 0x%lx\n",
+			 (unsigned long)count);
+		ret = -EIO;
+		goto out;
+	}
+
+	/* Waiting for the open resp message to the ioctl(1) - retry */
+	if (hmc->state == ibmhmc_state_opening) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* Make sure the ioctl() was called & the open msg sent, and that
+	 * the HMC connection has not failed.
+	 */
+	if (hmc->state != ibmhmc_state_ready) {
+		ret = -EIO;
+		goto out;
+	}
+
+	vmc_buffer = ibmvmc_get_valid_hmc_buffer(hmc->index);
+	if (!vmc_buffer) {
+		/* No buffer available for the msg send, or we have not yet
+		 * completed the open/open_resp sequence.  Retry until this is
+		 * complete.
+		 */
+		ret = -EBUSY;
+		goto out;
+	}
+	if (!vmc_buffer->real_addr_local) {
+		dev_err(adapter->dev, "no buffer storage assigned\n");
+		ret = -EIO;
+		goto out;
+	}
+	buf = vmc_buffer->real_addr_local;
+
+	while (c > 0) {
+		bytes = min_t(size_t, c, vmc_buffer->size);
+
+		bytes -= copy_from_user(buf, p, bytes);
+		if (!bytes) {
+			ret = -EFAULT;
+			goto out;
+		}
+		c -= bytes;
+		p += bytes;
+	}
+	if (p == buffer)
+		goto out;
+
+	inode = file_inode(file);
+	inode->i_mtime = inode_set_ctime_current(inode);
+	mark_inode_dirty(inode);
+
+	dev_dbg(adapter->dev, "write: file = 0x%lx, count = 0x%lx\n",
+		(unsigned long)file, (unsigned long)count);
+
+	ibmvmc_send_msg(adapter, vmc_buffer, hmc, count);
+	ret = p - buffer;
+ out:
+	spin_unlock_irqrestore(&hmc->lock, flags);
+	return (ssize_t)(ret);
+}
+
+/**
+ * ibmvmc_setup_hmc - Setup the HMC
+ *
+ * @session:	ibmvmc_file_session struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_setup_hmc(struct ibmvmc_file_session *session)
+{
+	struct ibmvmc_hmc *hmc;
+	unsigned int valid, free, index;
+
+	if (ibmvmc.state == ibmvmc_state_failed) {
+		pr_warn("ibmvmc: Reserve HMC: state_failed\n");
+		return -EIO;
+	}
+
+	if (ibmvmc.state < ibmvmc_state_ready) {
+		pr_warn("ibmvmc: Reserve HMC: not state_ready\n");
+		return -EAGAIN;
+	}
+
+	/* Device is busy until capabilities have been exchanged and we
+	 * have a generic buffer for each possible HMC connection.
+	 */
+	for (index = 0; index <= ibmvmc.max_hmc_index; index++) {
+		valid = 0;
+		ibmvmc_count_hmc_buffers(index, &valid, &free);
+		if (valid == 0) {
+			pr_warn("ibmvmc: buffers not ready for index %d\n",
+				index);
+			return -ENOBUFS;
+		}
+	}
+
+	/* Get an hmc object, and transition to ibmhmc_state_initial */
+	hmc = ibmvmc_get_free_hmc();
+	if (!hmc) {
+		pr_warn("%s: free hmc not found\n", __func__);
+		return -EBUSY;
+	}
+
+	hmc->session = hmc->session + 1;
+	if (hmc->session == 0xff)
+		hmc->session = 1;
+
+	session->hmc = hmc;
+	hmc->adapter = &ibmvmc_adapter;
+	hmc->file_session = session;
+	session->valid = 1;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_ioctl_sethmcid - IOCTL Set HMC ID
+ *
+ * @session:	ibmvmc_file_session struct
+ * @new_hmc_id:	HMC id field
+ *
+ * IOCTL command to setup the hmc id
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl_sethmcid(struct ibmvmc_file_session *session,
+				  unsigned char __user *new_hmc_id)
+{
+	struct ibmvmc_hmc *hmc;
+	struct ibmvmc_buffer *buffer;
+	size_t bytes;
+	char print_buffer[HMC_ID_LEN + 1];
+	unsigned long flags;
+	long rc = 0;
+
+	/* Reserve HMC session */
+	hmc = session->hmc;
+	if (!hmc) {
+		rc = ibmvmc_setup_hmc(session);
+		if (rc)
+			return rc;
+
+		hmc = session->hmc;
+		if (!hmc) {
+			pr_err("ibmvmc: setup_hmc success but no hmc\n");
+			return -EIO;
+		}
+	}
+
+	if (hmc->state != ibmhmc_state_initial) {
+		pr_warn("ibmvmc: sethmcid: invalid state to send open 0x%x\n",
+			hmc->state);
+		return -EIO;
+	}
+
+	bytes = copy_from_user(hmc->hmc_id, new_hmc_id, HMC_ID_LEN);
+	if (bytes)
+		return -EFAULT;
+
+	/* Send Open Session command */
+	spin_lock_irqsave(&hmc->lock, flags);
+	buffer = ibmvmc_get_valid_hmc_buffer(hmc->index);
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	if (!buffer || !buffer->real_addr_local) {
+		pr_warn("ibmvmc: sethmcid: no buffer available\n");
+		return -EIO;
+	}
+
+	/* Make sure buffer is NULL terminated before trying to print it */
+	memset(print_buffer, 0, HMC_ID_LEN + 1);
+	strncpy(print_buffer, hmc->hmc_id, HMC_ID_LEN);
+	pr_info("ibmvmc: sethmcid: Set HMC ID: \"%s\"\n", print_buffer);
+
+	memcpy(buffer->real_addr_local, hmc->hmc_id, HMC_ID_LEN);
+	/* RDMA over ID, send open msg, change state to ibmhmc_state_opening */
+	rc = ibmvmc_send_open(buffer, hmc);
+
+	return rc;
+}
+
+/**
+ * ibmvmc_ioctl_query - IOCTL Query
+ *
+ * @session:	ibmvmc_file_session struct
+ * @ret_struct:	ibmvmc_query_struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl_query(struct ibmvmc_file_session *session,
+			       struct ibmvmc_query_struct __user *ret_struct)
+{
+	struct ibmvmc_query_struct query_struct;
+	size_t bytes;
+
+	memset(&query_struct, 0, sizeof(query_struct));
+	query_struct.have_vmc = (ibmvmc.state > ibmvmc_state_initial);
+	query_struct.state = ibmvmc.state;
+	query_struct.vmc_drc_index = ibmvmc.vmc_drc_index;
+
+	bytes = copy_to_user(ret_struct, &query_struct,
+			     sizeof(query_struct));
+	if (bytes)
+		return -EFAULT;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_ioctl_requestvmc - IOCTL Request VMC
+ *
+ * @session:	ibmvmc_file_session struct
+ * @ret_vmc_index:	VMC Index
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl_requestvmc(struct ibmvmc_file_session *session,
+				    u32 __user *ret_vmc_index)
+{
+	/* TODO: (adreznec) Add locking to control multiple process access */
+	size_t bytes;
+	long rc;
+	u32 vmc_drc_index;
+
+	/* Call to request the VMC device from phyp*/
+	rc = h_request_vmc(&vmc_drc_index);
+	pr_debug("ibmvmc: requestvmc: H_REQUEST_VMC rc = 0x%lx\n", rc);
+
+	if (rc == H_SUCCESS) {
+		rc = 0;
+	} else if (rc == H_FUNCTION) {
+		pr_err("ibmvmc: requestvmc: h_request_vmc not supported\n");
+		return -EPERM;
+	} else if (rc == H_AUTHORITY) {
+		pr_err("ibmvmc: requestvmc: hypervisor denied vmc request\n");
+		return -EPERM;
+	} else if (rc == H_HARDWARE) {
+		pr_err("ibmvmc: requestvmc: hypervisor hardware fault\n");
+		return -EIO;
+	} else if (rc == H_RESOURCE) {
+		pr_err("ibmvmc: requestvmc: vmc resource unavailable\n");
+		return -ENODEV;
+	} else if (rc == H_NOT_AVAILABLE) {
+		pr_err("ibmvmc: requestvmc: system cannot be vmc managed\n");
+		return -EPERM;
+	} else if (rc == H_PARAMETER) {
+		pr_err("ibmvmc: requestvmc: invalid parameter\n");
+		return -EINVAL;
+	}
+
+	/* Success, set the vmc index in global struct */
+	ibmvmc.vmc_drc_index = vmc_drc_index;
+
+	bytes = copy_to_user(ret_vmc_index, &vmc_drc_index,
+			     sizeof(*ret_vmc_index));
+	if (bytes) {
+		pr_warn("ibmvmc: requestvmc: copy to user failed.\n");
+		return -EFAULT;
+	}
+	return rc;
+}
+
+/**
+ * ibmvmc_ioctl - IOCTL
+ *
+ * @file:	file information
+ * @cmd:	cmd field
+ * @arg:	Argument field
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl(struct file *file,
+			 unsigned int cmd, unsigned long arg)
+{
+	struct ibmvmc_file_session *session = file->private_data;
+
+	pr_debug("ibmvmc: ioctl file=0x%lx, cmd=0x%x, arg=0x%lx, ses=0x%lx\n",
+		 (unsigned long)file, cmd, arg,
+		 (unsigned long)session);
+
+	if (!session) {
+		pr_warn("ibmvmc: ioctl: no session\n");
+		return -EIO;
+	}
+
+	switch (cmd) {
+	case VMC_IOCTL_SETHMCID:
+		return ibmvmc_ioctl_sethmcid(session,
+			(unsigned char __user *)arg);
+	case VMC_IOCTL_QUERY:
+		return ibmvmc_ioctl_query(session,
+			(struct ibmvmc_query_struct __user *)arg);
+	case VMC_IOCTL_REQUESTVMC:
+		return ibmvmc_ioctl_requestvmc(session,
+			(unsigned int __user *)arg);
+	default:
+		pr_warn("ibmvmc: unknown ioctl 0x%x\n", cmd);
+		return -EINVAL;
+	}
+}
+
+static const struct file_operations ibmvmc_fops = {
+	.owner		= THIS_MODULE,
+	.read		= ibmvmc_read,
+	.write		= ibmvmc_write,
+	.poll		= ibmvmc_poll,
+	.unlocked_ioctl	= ibmvmc_ioctl,
+	.open           = ibmvmc_open,
+	.release        = ibmvmc_close,
+};
+
+/**
+ * ibmvmc_add_buffer - Add Buffer
+ *
+ * @adapter: crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * This message transfers a buffer from hypervisor ownership to management
+ * partition ownership. The LIOBA is obtained from the virtual TCE table
+ * associated with the hypervisor side of the VMC device, and points to a
+ * buffer of size MTU (as established in the capabilities exchange).
+ *
+ * Typical flow for ading buffers:
+ * 1. A new management application connection is opened by the management
+ *	partition.
+ * 2. The hypervisor assigns new buffers for the traffic associated with
+ *	that connection.
+ * 3. The hypervisor sends VMC Add Buffer messages to the management
+ *	partition, informing it of the new buffers.
+ * 4. The hypervisor sends an HMC protocol message (to the management
+ *	application) notifying it of the new buffers. This informs the
+ *	application that it has buffers available for sending HMC
+ *	commands.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_add_buffer(struct crq_server_adapter *adapter,
+			     struct ibmvmc_crq_msg *crq)
+{
+	struct ibmvmc_buffer *buffer;
+	u8 hmc_index;
+	u8 hmc_session;
+	u16 buffer_id;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!crq)
+		return -1;
+
+	hmc_session = crq->hmc_session;
+	hmc_index = crq->hmc_index;
+	buffer_id = be16_to_cpu(crq->var2.buffer_id);
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_err(adapter->dev, "add_buffer: invalid hmc_index = 0x%x\n",
+			hmc_index);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	if (buffer_id >= ibmvmc.max_buffer_pool_size) {
+		dev_err(adapter->dev, "add_buffer: invalid buffer_id = 0x%x\n",
+			buffer_id);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	spin_lock_irqsave(&hmcs[hmc_index].lock, flags);
+	buffer = &hmcs[hmc_index].buffer[buffer_id];
+
+	if (buffer->real_addr_local || buffer->dma_addr_local) {
+		dev_warn(adapter->dev, "add_buffer: already allocated id = 0x%lx\n",
+			 (unsigned long)buffer_id);
+		spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	buffer->real_addr_local = alloc_dma_buffer(to_vio_dev(adapter->dev),
+						   ibmvmc.max_mtu,
+						   &buffer->dma_addr_local);
+
+	if (!buffer->real_addr_local) {
+		dev_err(adapter->dev, "add_buffer: alloc_dma_buffer failed.\n");
+		spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INTERFACE_FAILURE,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	buffer->dma_addr_remote = be32_to_cpu(crq->var3.lioba);
+	buffer->size = ibmvmc.max_mtu;
+	buffer->owner = crq->var1.owner;
+	buffer->free = 1;
+	/* Must ensure valid==1 is observable only after all other fields are */
+	dma_wmb();
+	buffer->valid = 1;
+	buffer->id = buffer_id;
+
+	dev_dbg(adapter->dev, "add_buffer: successfully added a buffer:\n");
+	dev_dbg(adapter->dev, "   index: %d, session: %d, buffer: 0x%x, owner: %d\n",
+		hmc_index, hmc_session, buffer_id, buffer->owner);
+	dev_dbg(adapter->dev, "   local: 0x%x, remote: 0x%x\n",
+		(u32)buffer->dma_addr_local,
+		(u32)buffer->dma_addr_remote);
+	spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+
+	ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_SUCCESS, hmc_session,
+				    hmc_index, buffer_id);
+
+	return rc;
+}
+
+/**
+ * ibmvmc_rem_buffer - Remove Buffer
+ *
+ * @adapter: crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * This message requests an HMC buffer to be transferred from management
+ * partition ownership to hypervisor ownership. The management partition may
+ * not be able to satisfy the request at a particular point in time if all its
+ * buffers are in use. The management partition requires a depth of at least
+ * one inbound buffer to allow management application commands to flow to the
+ * hypervisor. It is, therefore, an interface error for the hypervisor to
+ * attempt to remove the management partition's last buffer.
+ *
+ * The hypervisor is expected to manage buffer usage with the management
+ * application directly and inform the management partition when buffers may be
+ * removed. The typical flow for removing buffers:
+ *
+ * 1. The management application no longer needs a communication path to a
+ *	particular hypervisor function. That function is closed.
+ * 2. The hypervisor and the management application quiesce all traffic to that
+ *	function. The hypervisor requests a reduction in buffer pool size.
+ * 3. The management application acknowledges the reduction in buffer pool size.
+ * 4. The hypervisor sends a Remove Buffer message to the management partition,
+ *	informing it of the reduction in buffers.
+ * 5. The management partition verifies it can remove the buffer. This is
+ *	possible if buffers have been quiesced.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+/*
+ * The hypervisor requested that we pick an unused buffer, and return it.
+ * Before sending the buffer back, we free any storage associated with the
+ * buffer.
+ */
+static int ibmvmc_rem_buffer(struct crq_server_adapter *adapter,
+			     struct ibmvmc_crq_msg *crq)
+{
+	struct ibmvmc_buffer *buffer;
+	u8 hmc_index;
+	u8 hmc_session;
+	u16 buffer_id = 0;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!crq)
+		return -1;
+
+	hmc_session = crq->hmc_session;
+	hmc_index = crq->hmc_index;
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_warn(adapter->dev, "rem_buffer: invalid hmc_index = 0x%x\n",
+			 hmc_index);
+		ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	spin_lock_irqsave(&hmcs[hmc_index].lock, flags);
+	buffer = ibmvmc_get_free_hmc_buffer(adapter, hmc_index);
+	if (!buffer) {
+		dev_info(adapter->dev, "rem_buffer: no buffer to remove\n");
+		spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+		ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_NO_BUFFER,
+					    hmc_session, hmc_index,
+					    VMC_INVALID_BUFFER_ID);
+		return -1;
+	}
+
+	buffer_id = buffer->id;
+
+	if (buffer->valid)
+		free_dma_buffer(to_vio_dev(adapter->dev),
+				ibmvmc.max_mtu,
+				buffer->real_addr_local,
+				buffer->dma_addr_local);
+
+	memset(buffer, 0, sizeof(struct ibmvmc_buffer));
+	spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+
+	dev_dbg(adapter->dev, "rem_buffer: removed buffer 0x%x.\n", buffer_id);
+	ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_SUCCESS, hmc_session,
+				    hmc_index, buffer_id);
+
+	return rc;
+}
+
+static int ibmvmc_recv_msg(struct crq_server_adapter *adapter,
+			   struct ibmvmc_crq_msg *crq)
+{
+	struct ibmvmc_buffer *buffer;
+	struct ibmvmc_hmc *hmc;
+	unsigned long msg_len;
+	u8 hmc_index;
+	u8 hmc_session;
+	u16 buffer_id;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!crq)
+		return -1;
+
+	/* Hypervisor writes CRQs directly into our memory in big endian */
+	dev_dbg(adapter->dev, "Recv_msg: msg from HV 0x%016llx 0x%016llx\n",
+		be64_to_cpu(*((unsigned long *)crq)),
+		be64_to_cpu(*(((unsigned long *)crq) + 1)));
+
+	hmc_session = crq->hmc_session;
+	hmc_index = crq->hmc_index;
+	buffer_id = be16_to_cpu(crq->var2.buffer_id);
+	msg_len = be32_to_cpu(crq->var3.msg_len);
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_err(adapter->dev, "Recv_msg: invalid hmc_index = 0x%x\n",
+			hmc_index);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	if (buffer_id >= ibmvmc.max_buffer_pool_size) {
+		dev_err(adapter->dev, "Recv_msg: invalid buffer_id = 0x%x\n",
+			buffer_id);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	hmc = &hmcs[hmc_index];
+	spin_lock_irqsave(&hmc->lock, flags);
+
+	if (hmc->state == ibmhmc_state_free) {
+		dev_err(adapter->dev, "Recv_msg: invalid hmc state = 0x%x\n",
+			hmc->state);
+		/* HMC connection is not valid (possibly was reset under us). */
+		spin_unlock_irqrestore(&hmc->lock, flags);
+		return -1;
+	}
+
+	buffer = &hmc->buffer[buffer_id];
+
+	if (buffer->valid == 0 || buffer->owner == VMC_BUF_OWNER_ALPHA) {
+		dev_err(adapter->dev, "Recv_msg: not valid, or not HV.  0x%x 0x%x\n",
+			buffer->valid, buffer->owner);
+		spin_unlock_irqrestore(&hmc->lock, flags);
+		return -1;
+	}
+
+	/* RDMA the data into the partition. */
+	rc = h_copy_rdma(msg_len,
+			 adapter->riobn,
+			 buffer->dma_addr_remote,
+			 adapter->liobn,
+			 buffer->dma_addr_local);
+
+	dev_dbg(adapter->dev, "Recv_msg: msg_len = 0x%x, buffer_id = 0x%x, queue_head = 0x%x, hmc_idx = 0x%x\n",
+		(unsigned int)msg_len, (unsigned int)buffer_id,
+		(unsigned int)hmc->queue_head, (unsigned int)hmc_index);
+	buffer->msg_len = msg_len;
+	buffer->free = 0;
+	buffer->owner = VMC_BUF_OWNER_ALPHA;
+
+	if (rc) {
+		dev_err(adapter->dev, "Failure in recv_msg: h_copy_rdma = 0x%x\n",
+			rc);
+		spin_unlock_irqrestore(&hmc->lock, flags);
+		return -1;
+	}
+
+	/* Must be locked because read operates on the same data */
+	hmc->queue_outbound_msgs[hmc->queue_head] = buffer_id;
+	hmc->queue_head++;
+	if (hmc->queue_head == ibmvmc_max_buf_pool_size)
+		hmc->queue_head = 0;
+
+	if (hmc->queue_head == hmc->queue_tail)
+		dev_err(adapter->dev, "outbound buffer queue wrapped.\n");
+
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	wake_up_interruptible(&ibmvmc_read_wait);
+
+	return 0;
+}
+
+/**
+ * ibmvmc_process_capabilities - Process Capabilities
+ *
+ * @adapter:	crq_server_adapter struct
+ * @crqp:	ibmvmc_crq_msg struct
+ *
+ */
+static void ibmvmc_process_capabilities(struct crq_server_adapter *adapter,
+					struct ibmvmc_crq_msg *crqp)
+{
+	struct ibmvmc_admin_crq_msg *crq = (struct ibmvmc_admin_crq_msg *)crqp;
+
+	if ((be16_to_cpu(crq->version) >> 8) !=
+			(IBMVMC_PROTOCOL_VERSION >> 8)) {
+		dev_err(adapter->dev, "init failed, incompatible versions 0x%x 0x%x\n",
+			be16_to_cpu(crq->version),
+			IBMVMC_PROTOCOL_VERSION);
+		ibmvmc.state = ibmvmc_state_failed;
+		return;
+	}
+
+	ibmvmc.max_mtu = min_t(u32, ibmvmc_max_mtu, be32_to_cpu(crq->max_mtu));
+	ibmvmc.max_buffer_pool_size = min_t(u16, ibmvmc_max_buf_pool_size,
+					    be16_to_cpu(crq->pool_size));
+	ibmvmc.max_hmc_index = min_t(u8, ibmvmc_max_hmcs, crq->max_hmc) - 1;
+	ibmvmc.state = ibmvmc_state_ready;
+
+	dev_info(adapter->dev, "Capabilities: mtu=0x%x, pool_size=0x%x, max_hmc=0x%x\n",
+		 ibmvmc.max_mtu, ibmvmc.max_buffer_pool_size,
+		 ibmvmc.max_hmc_index);
+}
+
+/**
+ * ibmvmc_validate_hmc_session - Validate HMC Session
+ *
+ * @adapter:	crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_validate_hmc_session(struct crq_server_adapter *adapter,
+				       struct ibmvmc_crq_msg *crq)
+{
+	unsigned char hmc_index;
+
+	hmc_index = crq->hmc_index;
+
+	if (crq->hmc_session == 0)
+		return 0;
+
+	if (hmc_index > ibmvmc.max_hmc_index)
+		return -1;
+
+	if (hmcs[hmc_index].session != crq->hmc_session) {
+		dev_warn(adapter->dev, "Drop, bad session: expected 0x%x, recv 0x%x\n",
+			 hmcs[hmc_index].session, crq->hmc_session);
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * ibmvmc_reset - Reset
+ *
+ * @adapter:	crq_server_adapter struct
+ * @xport_event:	export_event field
+ *
+ * Closes all HMC sessions and conditionally schedules a CRQ reset.
+ * @xport_event: If true, the partner closed their CRQ; we don't need to reset.
+ *               If false, we need to schedule a CRQ reset.
+ */
+static void ibmvmc_reset(struct crq_server_adapter *adapter, bool xport_event)
+{
+	int i;
+
+	if (ibmvmc.state != ibmvmc_state_sched_reset) {
+		dev_info(adapter->dev, "*** Reset to initial state.\n");
+		for (i = 0; i < ibmvmc_max_hmcs; i++)
+			ibmvmc_return_hmc(&hmcs[i], xport_event);
+
+		if (xport_event) {
+			/* CRQ was closed by the partner.  We don't need to do
+			 * anything except set ourself to the correct state to
+			 * handle init msgs.
+			 */
+			ibmvmc.state = ibmvmc_state_crqinit;
+		} else {
+			/* The partner did not close their CRQ - instead, we're
+			 * closing the CRQ on our end. Need to schedule this
+			 * for process context, because CRQ reset may require a
+			 * sleep.
+			 *
+			 * Setting ibmvmc.state here immediately prevents
+			 * ibmvmc_open from completing until the reset
+			 * completes in process context.
+			 */
+			ibmvmc.state = ibmvmc_state_sched_reset;
+			dev_dbg(adapter->dev, "Device reset scheduled");
+			wake_up_interruptible(&adapter->reset_wait_queue);
+		}
+	}
+}
+
+/**
+ * ibmvmc_reset_task - Reset Task
+ *
+ * @data:	Data field
+ *
+ * Performs a CRQ reset of the VMC device in process context.
+ * NOTE: This function should not be called directly, use ibmvmc_reset.
+ */
+static int ibmvmc_reset_task(void *data)
+{
+	struct crq_server_adapter *adapter = data;
+	int rc;
+
+	set_user_nice(current, -20);
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(adapter->reset_wait_queue,
+			(ibmvmc.state == ibmvmc_state_sched_reset) ||
+			kthread_should_stop());
+
+		if (kthread_should_stop())
+			break;
+
+		dev_dbg(adapter->dev, "CRQ resetting in process context");
+		tasklet_disable(&adapter->work_task);
+
+		rc = ibmvmc_reset_crq_queue(adapter);
+
+		if (rc != H_SUCCESS && rc != H_RESOURCE) {
+			dev_err(adapter->dev, "Error initializing CRQ.  rc = 0x%x\n",
+				rc);
+			ibmvmc.state = ibmvmc_state_failed;
+		} else {
+			ibmvmc.state = ibmvmc_state_crqinit;
+
+			if (ibmvmc_send_crq(adapter, 0xC001000000000000LL, 0)
+			    != 0 && rc != H_RESOURCE)
+				dev_warn(adapter->dev, "Failed to send initialize CRQ message\n");
+		}
+
+		vio_enable_interrupts(to_vio_dev(adapter->dev));
+		tasklet_enable(&adapter->work_task);
+	}
+
+	return 0;
+}
+
+/**
+ * ibmvmc_process_open_resp - Process Open Response
+ *
+ * @crq: ibmvmc_crq_msg struct
+ * @adapter:    crq_server_adapter struct
+ *
+ * This command is sent by the hypervisor in response to the Interface
+ * Open message. When this message is received, the indicated buffer is
+ * again available for management partition use.
+ */
+static void ibmvmc_process_open_resp(struct ibmvmc_crq_msg *crq,
+				     struct crq_server_adapter *adapter)
+{
+	unsigned char hmc_index;
+	unsigned short buffer_id;
+
+	hmc_index = crq->hmc_index;
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		/* Why would PHYP give an index > max negotiated? */
+		ibmvmc_reset(adapter, false);
+		return;
+	}
+
+	if (crq->status) {
+		dev_warn(adapter->dev, "open_resp: failed - status 0x%x\n",
+			 crq->status);
+		ibmvmc_return_hmc(&hmcs[hmc_index], false);
+		return;
+	}
+
+	if (hmcs[hmc_index].state == ibmhmc_state_opening) {
+		buffer_id = be16_to_cpu(crq->var2.buffer_id);
+		if (buffer_id >= ibmvmc.max_buffer_pool_size) {
+			dev_err(adapter->dev, "open_resp: invalid buffer_id = 0x%x\n",
+				buffer_id);
+			hmcs[hmc_index].state = ibmhmc_state_failed;
+		} else {
+			ibmvmc_free_hmc_buffer(&hmcs[hmc_index],
+					       &hmcs[hmc_index].buffer[buffer_id]);
+			hmcs[hmc_index].state = ibmhmc_state_ready;
+			dev_dbg(adapter->dev, "open_resp: set hmc state = ready\n");
+		}
+	} else {
+		dev_warn(adapter->dev, "open_resp: invalid hmc state (0x%x)\n",
+			 hmcs[hmc_index].state);
+	}
+}
+
+/**
+ * ibmvmc_process_close_resp - Process Close Response
+ *
+ * @crq: ibmvmc_crq_msg struct
+ * @adapter:    crq_server_adapter struct
+ *
+ * This command is sent by the hypervisor in response to the managemant
+ * application Interface Close message.
+ *
+ * If the close fails, simply reset the entire driver as the state of the VMC
+ * must be in tough shape.
+ */
+static void ibmvmc_process_close_resp(struct ibmvmc_crq_msg *crq,
+				      struct crq_server_adapter *adapter)
+{
+	unsigned char hmc_index;
+
+	hmc_index = crq->hmc_index;
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		ibmvmc_reset(adapter, false);
+		return;
+	}
+
+	if (crq->status) {
+		dev_warn(adapter->dev, "close_resp: failed - status 0x%x\n",
+			 crq->status);
+		ibmvmc_reset(adapter, false);
+		return;
+	}
+
+	ibmvmc_return_hmc(&hmcs[hmc_index], false);
+}
+
+/**
+ * ibmvmc_crq_process - Process CRQ
+ *
+ * @adapter:    crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * Process the CRQ message based upon the type of message received.
+ *
+ */
+static void ibmvmc_crq_process(struct crq_server_adapter *adapter,
+			       struct ibmvmc_crq_msg *crq)
+{
+	switch (crq->type) {
+	case VMC_MSG_CAP_RESP:
+		dev_dbg(adapter->dev, "CRQ recv: capabilities resp (0x%x)\n",
+			crq->type);
+		if (ibmvmc.state == ibmvmc_state_capabilities)
+			ibmvmc_process_capabilities(adapter, crq);
+		else
+			dev_warn(adapter->dev, "caps msg invalid in state 0x%x\n",
+				 ibmvmc.state);
+		break;
+	case VMC_MSG_OPEN_RESP:
+		dev_dbg(adapter->dev, "CRQ recv: open resp (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_process_open_resp(crq, adapter);
+		break;
+	case VMC_MSG_ADD_BUF:
+		dev_dbg(adapter->dev, "CRQ recv: add buf (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_add_buffer(adapter, crq);
+		break;
+	case VMC_MSG_REM_BUF:
+		dev_dbg(adapter->dev, "CRQ recv: rem buf (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_rem_buffer(adapter, crq);
+		break;
+	case VMC_MSG_SIGNAL:
+		dev_dbg(adapter->dev, "CRQ recv: signal msg (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_recv_msg(adapter, crq);
+		break;
+	case VMC_MSG_CLOSE_RESP:
+		dev_dbg(adapter->dev, "CRQ recv: close resp (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_process_close_resp(crq, adapter);
+		break;
+	case VMC_MSG_CAP:
+	case VMC_MSG_OPEN:
+	case VMC_MSG_CLOSE:
+	case VMC_MSG_ADD_BUF_RESP:
+	case VMC_MSG_REM_BUF_RESP:
+		dev_warn(adapter->dev, "CRQ recv: unexpected msg (0x%x)\n",
+			 crq->type);
+		break;
+	default:
+		dev_warn(adapter->dev, "CRQ recv: unknown msg (0x%x)\n",
+			 crq->type);
+		break;
+	}
+}
+
+/**
+ * ibmvmc_handle_crq_init - Handle CRQ Init
+ *
+ * @crq:	ibmvmc_crq_msg struct
+ * @adapter:	crq_server_adapter struct
+ *
+ * Handle the type of crq initialization based on whether
+ * it is a message or a response.
+ *
+ */
+static void ibmvmc_handle_crq_init(struct ibmvmc_crq_msg *crq,
+				   struct crq_server_adapter *adapter)
+{
+	switch (crq->type) {
+	case 0x01:	/* Initialization message */
+		dev_dbg(adapter->dev, "CRQ recv: CRQ init msg - state 0x%x\n",
+			ibmvmc.state);
+		if (ibmvmc.state == ibmvmc_state_crqinit) {
+			/* Send back a response */
+			if (ibmvmc_send_crq(adapter, 0xC002000000000000,
+					    0) == 0)
+				ibmvmc_send_capabilities(adapter);
+			else
+				dev_err(adapter->dev, " Unable to send init rsp\n");
+		} else {
+			dev_err(adapter->dev, "Invalid state 0x%x mtu = 0x%x\n",
+				ibmvmc.state, ibmvmc.max_mtu);
+		}
+
+		break;
+	case 0x02:	/* Initialization response */
+		dev_dbg(adapter->dev, "CRQ recv: initialization resp msg - state 0x%x\n",
+			ibmvmc.state);
+		if (ibmvmc.state == ibmvmc_state_crqinit)
+			ibmvmc_send_capabilities(adapter);
+		break;
+	default:
+		dev_warn(adapter->dev, "Unknown crq message type 0x%lx\n",
+			 (unsigned long)crq->type);
+	}
+}
+
+/**
+ * ibmvmc_handle_crq - Handle CRQ
+ *
+ * @crq:	ibmvmc_crq_msg struct
+ * @adapter:	crq_server_adapter struct
+ *
+ * Read the command elements from the command queue and execute the
+ * requests based upon the type of crq message.
+ *
+ */
+static void ibmvmc_handle_crq(struct ibmvmc_crq_msg *crq,
+			      struct crq_server_adapter *adapter)
+{
+	switch (crq->valid) {
+	case 0xC0:		/* initialization */
+		ibmvmc_handle_crq_init(crq, adapter);
+		break;
+	case 0xFF:	/* Hypervisor telling us the connection is closed */
+		dev_warn(adapter->dev, "CRQ recv: virtual adapter failed - resetting.\n");
+		ibmvmc_reset(adapter, true);
+		break;
+	case 0x80:	/* real payload */
+		ibmvmc_crq_process(adapter, crq);
+		break;
+	default:
+		dev_warn(adapter->dev, "CRQ recv: unknown msg 0x%02x.\n",
+			 crq->valid);
+		break;
+	}
+}
+
+static void ibmvmc_task(unsigned long data)
+{
+	struct crq_server_adapter *adapter =
+		(struct crq_server_adapter *)data;
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct ibmvmc_crq_msg *crq;
+	int done = 0;
+
+	while (!done) {
+		/* Pull all the valid messages off the CRQ */
+		while ((crq = crq_queue_next_crq(&adapter->queue)) != NULL) {
+			ibmvmc_handle_crq(crq, adapter);
+			crq->valid = 0x00;
+			/* CRQ reset was requested, stop processing CRQs.
+			 * Interrupts will be re-enabled by the reset task.
+			 */
+			if (ibmvmc.state == ibmvmc_state_sched_reset)
+				return;
+		}
+
+		vio_enable_interrupts(vdev);
+		crq = crq_queue_next_crq(&adapter->queue);
+		if (crq) {
+			vio_disable_interrupts(vdev);
+			ibmvmc_handle_crq(crq, adapter);
+			crq->valid = 0x00;
+			/* CRQ reset was requested, stop processing CRQs.
+			 * Interrupts will be re-enabled by the reset task.
+			 */
+			if (ibmvmc.state == ibmvmc_state_sched_reset)
+				return;
+		} else {
+			done = 1;
+		}
+	}
+}
+
+/**
+ * ibmvmc_init_crq_queue - Init CRQ Queue
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_init_crq_queue(struct crq_server_adapter *adapter)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct crq_queue *queue = &adapter->queue;
+	int rc = 0;
+	int retrc = 0;
+
+	queue->msgs = (struct ibmvmc_crq_msg *)get_zeroed_page(GFP_KERNEL);
+
+	if (!queue->msgs)
+		goto malloc_failed;
+
+	queue->size = PAGE_SIZE / sizeof(*queue->msgs);
+
+	queue->msg_token = dma_map_single(adapter->dev, queue->msgs,
+					  queue->size * sizeof(*queue->msgs),
+					  DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(adapter->dev, queue->msg_token))
+		goto map_failed;
+
+	retrc = plpar_hcall_norets(H_REG_CRQ,
+				   vdev->unit_address,
+				   queue->msg_token, PAGE_SIZE);
+	rc = retrc;
+
+	if (rc == H_RESOURCE)
+		rc = ibmvmc_reset_crq_queue(adapter);
+
+	if (rc == 2) {
+		dev_warn(adapter->dev, "Partner adapter not ready\n");
+		retrc = 0;
+	} else if (rc != 0) {
+		dev_err(adapter->dev, "Error %d opening adapter\n", rc);
+		goto reg_crq_failed;
+	}
+
+	queue->cur = 0;
+	spin_lock_init(&queue->lock);
+
+	tasklet_init(&adapter->work_task, ibmvmc_task, (unsigned long)adapter);
+
+	if (request_irq(vdev->irq,
+			ibmvmc_handle_event,
+			0, "ibmvmc", (void *)adapter) != 0) {
+		dev_err(adapter->dev, "couldn't register irq 0x%x\n",
+			vdev->irq);
+		goto req_irq_failed;
+	}
+
+	rc = vio_enable_interrupts(vdev);
+	if (rc != 0) {
+		dev_err(adapter->dev, "Error %d enabling interrupts!!!\n", rc);
+		goto req_irq_failed;
+	}
+
+	return retrc;
+
+req_irq_failed:
+	/* Cannot have any work since we either never got our IRQ registered,
+	 * or never got interrupts enabled
+	 */
+	tasklet_kill(&adapter->work_task);
+	h_free_crq(vdev->unit_address);
+reg_crq_failed:
+	dma_unmap_single(adapter->dev,
+			 queue->msg_token,
+			 queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
+map_failed:
+	free_page((unsigned long)queue->msgs);
+malloc_failed:
+	return -ENOMEM;
+}
+
+/* Fill in the liobn and riobn fields on the adapter */
+static int read_dma_window(struct vio_dev *vdev,
+			   struct crq_server_adapter *adapter)
+{
+	const __be32 *dma_window;
+	const __be32 *prop;
+
+	/* TODO Using of_parse_dma_window would be better, but it doesn't give
+	 * a way to read multiple windows without already knowing the size of
+	 * a window or the number of windows
+	 */
+	dma_window =
+		(const __be32 *)vio_get_attribute(vdev, "ibm,my-dma-window",
+						NULL);
+	if (!dma_window) {
+		dev_warn(adapter->dev, "Couldn't find ibm,my-dma-window property\n");
+		return -1;
+	}
+
+	adapter->liobn = be32_to_cpu(*dma_window);
+	dma_window++;
+
+	prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-address-cells",
+						NULL);
+	if (!prop) {
+		dev_warn(adapter->dev, "Couldn't find ibm,#dma-address-cells property\n");
+		dma_window++;
+	} else {
+		dma_window += be32_to_cpu(*prop);
+	}
+
+	prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-size-cells",
+						NULL);
+	if (!prop) {
+		dev_warn(adapter->dev, "Couldn't find ibm,#dma-size-cells property\n");
+		dma_window++;
+	} else {
+		dma_window += be32_to_cpu(*prop);
+	}
+
+	/* dma_window should point to the second window now */
+	adapter->riobn = be32_to_cpu(*dma_window);
+
+	return 0;
+}
+
+static int ibmvmc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+{
+	struct crq_server_adapter *adapter = &ibmvmc_adapter;
+	int rc;
+
+	dev_set_drvdata(&vdev->dev, NULL);
+	memset(adapter, 0, sizeof(*adapter));
+	adapter->dev = &vdev->dev;
+
+	dev_info(adapter->dev, "Probe for UA 0x%x\n", vdev->unit_address);
+
+	rc = read_dma_window(vdev, adapter);
+	if (rc != 0) {
+		ibmvmc.state = ibmvmc_state_failed;
+		return -1;
+	}
+
+	dev_dbg(adapter->dev, "Probe: liobn 0x%x, riobn 0x%x\n",
+		adapter->liobn, adapter->riobn);
+
+	init_waitqueue_head(&adapter->reset_wait_queue);
+	adapter->reset_task = kthread_run(ibmvmc_reset_task, adapter, "ibmvmc");
+	if (IS_ERR(adapter->reset_task)) {
+		dev_err(adapter->dev, "Failed to start reset thread\n");
+		ibmvmc.state = ibmvmc_state_failed;
+		rc = PTR_ERR(adapter->reset_task);
+		adapter->reset_task = NULL;
+		return rc;
+	}
+
+	rc = ibmvmc_init_crq_queue(adapter);
+	if (rc != 0 && rc != H_RESOURCE) {
+		dev_err(adapter->dev, "Error initializing CRQ.  rc = 0x%x\n",
+			rc);
+		ibmvmc.state = ibmvmc_state_failed;
+		goto crq_failed;
+	}
+
+	ibmvmc.state = ibmvmc_state_crqinit;
+
+	/* Try to send an initialization message.  Note that this is allowed
+	 * to fail if the other end is not acive.  In that case we just wait
+	 * for the other side to initialize.
+	 */
+	if (ibmvmc_send_crq(adapter, 0xC001000000000000LL, 0) != 0 &&
+	    rc != H_RESOURCE)
+		dev_warn(adapter->dev, "Failed to send initialize CRQ message\n");
+
+	dev_set_drvdata(&vdev->dev, adapter);
+
+	return 0;
+
+crq_failed:
+	kthread_stop(adapter->reset_task);
+	adapter->reset_task = NULL;
+	return -EPERM;
+}
+
+static void ibmvmc_remove(struct vio_dev *vdev)
+{
+	struct crq_server_adapter *adapter = dev_get_drvdata(&vdev->dev);
+
+	dev_info(adapter->dev, "Entering remove for UA 0x%x\n",
+		 vdev->unit_address);
+	ibmvmc_release_crq_queue(adapter);
+}
+
+static struct vio_device_id ibmvmc_device_table[] = {
+	{ "ibm,vmc", "IBM,vmc" },
+	{ "", "" }
+};
+MODULE_DEVICE_TABLE(vio, ibmvmc_device_table);
+
+static struct vio_driver ibmvmc_driver = {
+	.name        = ibmvmc_driver_name,
+	.id_table    = ibmvmc_device_table,
+	.probe       = ibmvmc_probe,
+	.remove      = ibmvmc_remove,
+};
+
+static void __init ibmvmc_scrub_module_parms(void)
+{
+	if (ibmvmc_max_mtu > MAX_MTU) {
+		pr_warn("ibmvmc: Max MTU reduced to %d\n", MAX_MTU);
+		ibmvmc_max_mtu = MAX_MTU;
+	} else if (ibmvmc_max_mtu < MIN_MTU) {
+		pr_warn("ibmvmc: Max MTU increased to %d\n", MIN_MTU);
+		ibmvmc_max_mtu = MIN_MTU;
+	}
+
+	if (ibmvmc_max_buf_pool_size > MAX_BUF_POOL_SIZE) {
+		pr_warn("ibmvmc: Max buffer pool size reduced to %d\n",
+			MAX_BUF_POOL_SIZE);
+		ibmvmc_max_buf_pool_size = MAX_BUF_POOL_SIZE;
+	} else if (ibmvmc_max_buf_pool_size < MIN_BUF_POOL_SIZE) {
+		pr_warn("ibmvmc: Max buffer pool size increased to %d\n",
+			MIN_BUF_POOL_SIZE);
+		ibmvmc_max_buf_pool_size = MIN_BUF_POOL_SIZE;
+	}
+
+	if (ibmvmc_max_hmcs > MAX_HMCS) {
+		pr_warn("ibmvmc: Max HMCs reduced to %d\n", MAX_HMCS);
+		ibmvmc_max_hmcs = MAX_HMCS;
+	} else if (ibmvmc_max_hmcs < MIN_HMCS) {
+		pr_warn("ibmvmc: Max HMCs increased to %d\n", MIN_HMCS);
+		ibmvmc_max_hmcs = MIN_HMCS;
+	}
+}
+
+static struct miscdevice ibmvmc_miscdev = {
+	.name = ibmvmc_driver_name,
+	.minor = MISC_DYNAMIC_MINOR,
+	.fops = &ibmvmc_fops,
+};
+
+static int __init ibmvmc_module_init(void)
+{
+	int rc, i, j;
+
+	ibmvmc.state = ibmvmc_state_initial;
+	pr_info("ibmvmc: version %s\n", IBMVMC_DRIVER_VERSION);
+
+	rc = misc_register(&ibmvmc_miscdev);
+	if (rc) {
+		pr_err("ibmvmc: misc registration failed\n");
+		goto misc_register_failed;
+	}
+	pr_info("ibmvmc: node %d:%d\n", MISC_MAJOR,
+		ibmvmc_miscdev.minor);
+
+	/* Initialize data structures */
+	memset(hmcs, 0, sizeof(struct ibmvmc_hmc) * MAX_HMCS);
+	for (i = 0; i < MAX_HMCS; i++) {
+		spin_lock_init(&hmcs[i].lock);
+		hmcs[i].state = ibmhmc_state_free;
+		for (j = 0; j < MAX_BUF_POOL_SIZE; j++)
+			hmcs[i].queue_outbound_msgs[j] = VMC_INVALID_BUFFER_ID;
+	}
+
+	/* Sanity check module parms */
+	ibmvmc_scrub_module_parms();
+
+	/*
+	 * Initialize some reasonable values.  Might be negotiated smaller
+	 * values during the capabilities exchange.
+	 */
+	ibmvmc.max_mtu = ibmvmc_max_mtu;
+	ibmvmc.max_buffer_pool_size = ibmvmc_max_buf_pool_size;
+	ibmvmc.max_hmc_index = ibmvmc_max_hmcs - 1;
+
+	rc = vio_register_driver(&ibmvmc_driver);
+
+	if (rc) {
+		pr_err("ibmvmc: rc %d from vio_register_driver\n", rc);
+		goto vio_reg_failed;
+	}
+
+	return 0;
+
+vio_reg_failed:
+	misc_deregister(&ibmvmc_miscdev);
+misc_register_failed:
+	return rc;
+}
+
+static void __exit ibmvmc_module_exit(void)
+{
+	pr_info("ibmvmc: module exit\n");
+	vio_unregister_driver(&ibmvmc_driver);
+	misc_deregister(&ibmvmc_miscdev);
+}
+
+module_init(ibmvmc_module_init);
+module_exit(ibmvmc_module_exit);
+
+module_param_named(buf_pool_size, ibmvmc_max_buf_pool_size,
+		   int, 0644);
+MODULE_PARM_DESC(buf_pool_size, "Buffer pool size");
+module_param_named(max_hmcs, ibmvmc_max_hmcs, int, 0644);
+MODULE_PARM_DESC(max_hmcs, "Max HMCs");
+module_param_named(max_mtu, ibmvmc_max_mtu, int, 0644);
+MODULE_PARM_DESC(max_mtu, "Max MTU");
+
+MODULE_AUTHOR("Steven Royer <seroyer@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM VMC");
+MODULE_VERSION(IBMVMC_DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/ibmvmc.h b/drivers/misc/ibmvmc.h
new file mode 100644
index 0000000000..0e1756fffe
--- /dev/null
+++ b/drivers/misc/ibmvmc.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * linux/drivers/misc/ibmvmc.h
+ *
+ * IBM Power Systems Virtual Management Channel Support.
+ *
+ * Copyright (c) 2004, 2018 IBM Corp.
+ *   Dave Engebretsen engebret@us.ibm.com
+ *   Steven Royer seroyer@linux.vnet.ibm.com
+ *   Adam Reznechek adreznec@linux.vnet.ibm.com
+ *   Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+ */
+#ifndef IBMVMC_H
+#define IBMVMC_H
+
+#include <linux/types.h>
+#include <linux/cdev.h>
+
+#include <asm/vio.h>
+
+#define IBMVMC_PROTOCOL_VERSION    0x0101
+
+#define MIN_BUF_POOL_SIZE 16
+#define MIN_HMCS          1
+#define MIN_MTU           4096
+#define MAX_BUF_POOL_SIZE 64
+#define MAX_HMCS          2
+#define MAX_MTU           (4 * 4096)
+#define DEFAULT_BUF_POOL_SIZE 32
+#define DEFAULT_HMCS          1
+#define DEFAULT_MTU           4096
+#define HMC_ID_LEN        32
+
+#define VMC_INVALID_BUFFER_ID 0xFFFF
+
+/* ioctl numbers */
+#define VMC_BASE	     0xCC
+#define VMC_IOCTL_SETHMCID   _IOW(VMC_BASE, 0x00, unsigned char *)
+#define VMC_IOCTL_QUERY      _IOR(VMC_BASE, 0x01, struct ibmvmc_query_struct)
+#define VMC_IOCTL_REQUESTVMC _IOR(VMC_BASE, 0x02, u32)
+
+#define VMC_MSG_CAP          0x01
+#define VMC_MSG_CAP_RESP     0x81
+#define VMC_MSG_OPEN         0x02
+#define VMC_MSG_OPEN_RESP    0x82
+#define VMC_MSG_CLOSE        0x03
+#define VMC_MSG_CLOSE_RESP   0x83
+#define VMC_MSG_ADD_BUF      0x04
+#define VMC_MSG_ADD_BUF_RESP 0x84
+#define VMC_MSG_REM_BUF      0x05
+#define VMC_MSG_REM_BUF_RESP 0x85
+#define VMC_MSG_SIGNAL       0x06
+
+#define VMC_MSG_SUCCESS 0
+#define VMC_MSG_INVALID_HMC_INDEX 1
+#define VMC_MSG_INVALID_BUFFER_ID 2
+#define VMC_MSG_CLOSED_HMC        3
+#define VMC_MSG_INTERFACE_FAILURE 4
+#define VMC_MSG_NO_BUFFER         5
+
+#define VMC_BUF_OWNER_ALPHA 0
+#define VMC_BUF_OWNER_HV    1
+
+enum ibmvmc_states {
+	ibmvmc_state_sched_reset  = -1,
+	ibmvmc_state_initial      = 0,
+	ibmvmc_state_crqinit      = 1,
+	ibmvmc_state_capabilities = 2,
+	ibmvmc_state_ready        = 3,
+	ibmvmc_state_failed       = 4,
+};
+
+enum ibmhmc_states {
+	/* HMC connection not established */
+	ibmhmc_state_free    = 0,
+
+	/* HMC connection established (open called) */
+	ibmhmc_state_initial = 1,
+
+	/* open msg sent to HV, due to ioctl(1) call */
+	ibmhmc_state_opening = 2,
+
+	/* HMC connection ready, open resp msg from HV */
+	ibmhmc_state_ready   = 3,
+
+	/* HMC connection failure */
+	ibmhmc_state_failed  = 4,
+};
+
+struct ibmvmc_buffer {
+	u8 valid;	/* 1 when DMA storage allocated to buffer          */
+	u8 free;	/* 1 when buffer available for the Alpha Partition */
+	u8 owner;
+	u16 id;
+	u32 size;
+	u32 msg_len;
+	dma_addr_t dma_addr_local;
+	dma_addr_t dma_addr_remote;
+	void *real_addr_local;
+};
+
+struct ibmvmc_admin_crq_msg {
+	u8 valid;	/* RPA Defined           */
+	u8 type;	/* ibmvmc msg type       */
+	u8 status;	/* Response msg status. Zero is success and on failure,
+			 * either 1 - General Failure, or 2 - Invalid Version is
+			 * returned.
+			 */
+	u8 rsvd[2];
+	u8 max_hmc;	/* Max # of independent HMC connections supported */
+	__be16 pool_size;	/* Maximum number of buffers supported per HMC
+				 * connection
+				 */
+	__be32 max_mtu;		/* Maximum message size supported (bytes) */
+	__be16 crq_size;	/* # of entries available in the CRQ for the
+				 * source partition. The target partition must
+				 * limit the number of outstanding messages to
+				 * one half or less.
+				 */
+	__be16 version;	/* Indicates the code level of the management partition
+			 * or the hypervisor with the high-order byte
+			 * indicating a major version and the low-order byte
+			 * indicating a minor version.
+			 */
+};
+
+struct ibmvmc_crq_msg {
+	u8 valid;     /* RPA Defined           */
+	u8 type;      /* ibmvmc msg type       */
+	u8 status;    /* Response msg status   */
+	union {
+		u8 rsvd;  /* Reserved              */
+		u8 owner;
+	} var1;
+	u8 hmc_session;	/* Session Identifier for the current VMC connection */
+	u8 hmc_index;	/* A unique HMC Idx would be used if multiple management
+			 * applications running concurrently were desired
+			 */
+	union {
+		__be16 rsvd;
+		__be16 buffer_id;
+	} var2;
+	__be32 rsvd;
+	union {
+		__be32 rsvd;
+		__be32 lioba;
+		__be32 msg_len;
+	} var3;
+};
+
+/* an RPA command/response transport queue */
+struct crq_queue {
+	struct ibmvmc_crq_msg *msgs;
+	int size, cur;
+	dma_addr_t msg_token;
+	spinlock_t lock;
+};
+
+/* VMC server adapter settings */
+struct crq_server_adapter {
+	struct device *dev;
+	struct crq_queue queue;
+	u32 liobn;
+	u32 riobn;
+	struct tasklet_struct work_task;
+	wait_queue_head_t reset_wait_queue;
+	struct task_struct *reset_task;
+};
+
+/* Driver wide settings */
+struct ibmvmc_struct {
+	u32 state;
+	u32 max_mtu;
+	u32 max_buffer_pool_size;
+	u32 max_hmc_index;
+	struct crq_server_adapter *adapter;
+	struct cdev cdev;
+	u32 vmc_drc_index;
+};
+
+struct ibmvmc_file_session;
+
+/* Connection specific settings */
+struct ibmvmc_hmc {
+	u8 session;
+	u8 index;
+	u32 state;
+	struct crq_server_adapter *adapter;
+	spinlock_t lock;
+	unsigned char hmc_id[HMC_ID_LEN];
+	struct ibmvmc_buffer buffer[MAX_BUF_POOL_SIZE];
+	unsigned short queue_outbound_msgs[MAX_BUF_POOL_SIZE];
+	int queue_head, queue_tail;
+	struct ibmvmc_file_session *file_session;
+};
+
+struct ibmvmc_file_session {
+	struct file *file;
+	struct ibmvmc_hmc *hmc;
+	bool valid;
+};
+
+struct ibmvmc_query_struct {
+	int have_vmc;
+	int state;
+	int vmc_drc_index;
+};
+
+#endif /* __IBMVMC_H */
diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
new file mode 100644
index 0000000000..ee6296b980
--- /dev/null
+++ b/drivers/misc/ics932s401.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * A driver for the Integrated Circuits ICS932S401
+ * Copyright (C) 2008 IBM
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+
+/* Addresses to scan */
+static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END };
+
+/* ICS932S401 registers */
+#define ICS932S401_REG_CFG2			0x01
+#define		ICS932S401_CFG1_SPREAD		0x01
+#define ICS932S401_REG_CFG7			0x06
+#define		ICS932S401_FS_MASK		0x07
+#define	ICS932S401_REG_VENDOR_REV		0x07
+#define		ICS932S401_VENDOR		1
+#define		ICS932S401_VENDOR_MASK		0x0F
+#define		ICS932S401_REV			4
+#define		ICS932S401_REV_SHIFT		4
+#define ICS932S401_REG_DEVICE			0x09
+#define		ICS932S401_DEVICE		11
+#define	ICS932S401_REG_CTRL			0x0A
+#define		ICS932S401_MN_ENABLED		0x80
+#define		ICS932S401_CPU_ALT		0x04
+#define		ICS932S401_SRC_ALT		0x08
+#define ICS932S401_REG_CPU_M_CTRL		0x0B
+#define		ICS932S401_M_MASK		0x3F
+#define	ICS932S401_REG_CPU_N_CTRL		0x0C
+#define	ICS932S401_REG_CPU_SPREAD1		0x0D
+#define ICS932S401_REG_CPU_SPREAD2		0x0E
+#define		ICS932S401_SPREAD_MASK		0x7FFF
+#define ICS932S401_REG_SRC_M_CTRL		0x0F
+#define ICS932S401_REG_SRC_N_CTRL		0x10
+#define	ICS932S401_REG_SRC_SPREAD1		0x11
+#define ICS932S401_REG_SRC_SPREAD2		0x12
+#define ICS932S401_REG_CPU_DIVISOR		0x13
+#define		ICS932S401_CPU_DIVISOR_SHIFT	4
+#define ICS932S401_REG_PCISRC_DIVISOR		0x14
+#define		ICS932S401_SRC_DIVISOR_MASK	0x0F
+#define		ICS932S401_PCI_DIVISOR_SHIFT	4
+
+/* Base clock is 14.318MHz */
+#define BASE_CLOCK				14318
+
+#define NUM_REGS				21
+#define NUM_MIRRORED_REGS			15
+
+static int regs_to_copy[NUM_MIRRORED_REGS] = {
+	ICS932S401_REG_CFG2,
+	ICS932S401_REG_CFG7,
+	ICS932S401_REG_VENDOR_REV,
+	ICS932S401_REG_DEVICE,
+	ICS932S401_REG_CTRL,
+	ICS932S401_REG_CPU_M_CTRL,
+	ICS932S401_REG_CPU_N_CTRL,
+	ICS932S401_REG_CPU_SPREAD1,
+	ICS932S401_REG_CPU_SPREAD2,
+	ICS932S401_REG_SRC_M_CTRL,
+	ICS932S401_REG_SRC_N_CTRL,
+	ICS932S401_REG_SRC_SPREAD1,
+	ICS932S401_REG_SRC_SPREAD2,
+	ICS932S401_REG_CPU_DIVISOR,
+	ICS932S401_REG_PCISRC_DIVISOR,
+};
+
+/* How often do we reread sensors values? (In jiffies) */
+#define SENSOR_REFRESH_INTERVAL	(2 * HZ)
+
+/* How often do we reread sensor limit values? (In jiffies) */
+#define LIMIT_REFRESH_INTERVAL	(60 * HZ)
+
+struct ics932s401_data {
+	struct attribute_group	attrs;
+	struct mutex		lock;
+	char			sensors_valid;
+	unsigned long		sensors_last_updated;	/* In jiffies */
+
+	u8			regs[NUM_REGS];
+};
+
+static int ics932s401_probe(struct i2c_client *client);
+static int ics932s401_detect(struct i2c_client *client,
+			  struct i2c_board_info *info);
+static void ics932s401_remove(struct i2c_client *client);
+
+static const struct i2c_device_id ics932s401_id[] = {
+	{ "ics932s401", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ics932s401_id);
+
+static struct i2c_driver ics932s401_driver = {
+	.class		= I2C_CLASS_HWMON,
+	.driver = {
+		.name	= "ics932s401",
+	},
+	.probe		= ics932s401_probe,
+	.remove		= ics932s401_remove,
+	.id_table	= ics932s401_id,
+	.detect		= ics932s401_detect,
+	.address_list	= normal_i2c,
+};
+
+static struct ics932s401_data *ics932s401_update_device(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ics932s401_data *data = i2c_get_clientdata(client);
+	unsigned long local_jiffies = jiffies;
+	int i, temp;
+
+	mutex_lock(&data->lock);
+	if (time_before(local_jiffies, data->sensors_last_updated +
+		SENSOR_REFRESH_INTERVAL)
+		&& data->sensors_valid)
+		goto out;
+
+	/*
+	 * Each register must be read as a word and then right shifted 8 bits.
+	 * Not really sure why this is; setting the "byte count programming"
+	 * register to 1 does not fix this problem.
+	 */
+	for (i = 0; i < NUM_MIRRORED_REGS; i++) {
+		temp = i2c_smbus_read_word_data(client, regs_to_copy[i]);
+		if (temp < 0)
+			temp = 0;
+		data->regs[regs_to_copy[i]] = temp >> 8;
+	}
+
+	data->sensors_last_updated = local_jiffies;
+	data->sensors_valid = 1;
+
+out:
+	mutex_unlock(&data->lock);
+	return data;
+}
+
+static ssize_t show_spread_enabled(struct device *dev,
+				   struct device_attribute *devattr,
+				   char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	if (data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD)
+		return sprintf(buf, "1\n");
+
+	return sprintf(buf, "0\n");
+}
+
+/* bit to cpu khz map */
+static const int fs_speeds[] = {
+	266666,
+	133333,
+	200000,
+	166666,
+	333333,
+	100000,
+	400000,
+	0,
+};
+
+/* clock divisor map */
+static const int divisors[] = {2, 3, 5, 15, 4, 6, 10, 30, 8, 12, 20, 60, 16,
+			       24, 40, 120};
+
+/* Calculate CPU frequency from the M/N registers. */
+static int calculate_cpu_freq(struct ics932s401_data *data)
+{
+	int m, n, freq;
+
+	m = data->regs[ICS932S401_REG_CPU_M_CTRL] & ICS932S401_M_MASK;
+	n = data->regs[ICS932S401_REG_CPU_N_CTRL];
+
+	/* Pull in bits 8 & 9 from the M register */
+	n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x80) << 1;
+	n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x40) << 3;
+
+	freq = BASE_CLOCK * (n + 8) / (m + 2);
+	freq /= divisors[data->regs[ICS932S401_REG_CPU_DIVISOR] >>
+			 ICS932S401_CPU_DIVISOR_SHIFT];
+
+	return freq;
+}
+
+static ssize_t show_cpu_clock(struct device *dev,
+			      struct device_attribute *devattr,
+			      char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	return sprintf(buf, "%d\n", calculate_cpu_freq(data));
+}
+
+static ssize_t show_cpu_clock_sel(struct device *dev,
+				  struct device_attribute *devattr,
+				  char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int freq;
+
+	if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+		freq = calculate_cpu_freq(data);
+	else {
+		/* Freq is neatly wrapped up for us */
+		int fid = data->regs[ICS932S401_REG_CFG7] & ICS932S401_FS_MASK;
+
+		freq = fs_speeds[fid];
+		if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT) {
+			switch (freq) {
+			case 166666:
+				freq = 160000;
+				break;
+			case 333333:
+				freq = 320000;
+				break;
+			}
+		}
+	}
+
+	return sprintf(buf, "%d\n", freq);
+}
+
+/* Calculate SRC frequency from the M/N registers. */
+static int calculate_src_freq(struct ics932s401_data *data)
+{
+	int m, n, freq;
+
+	m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK;
+	n = data->regs[ICS932S401_REG_SRC_N_CTRL];
+
+	/* Pull in bits 8 & 9 from the M register */
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1;
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3;
+
+	freq = BASE_CLOCK * (n + 8) / (m + 2);
+	freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] &
+			 ICS932S401_SRC_DIVISOR_MASK];
+
+	return freq;
+}
+
+static ssize_t show_src_clock(struct device *dev,
+			      struct device_attribute *devattr,
+			      char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	return sprintf(buf, "%d\n", calculate_src_freq(data));
+}
+
+static ssize_t show_src_clock_sel(struct device *dev,
+				  struct device_attribute *devattr,
+				  char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int freq;
+
+	if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+		freq = calculate_src_freq(data);
+	else
+		/* Freq is neatly wrapped up for us */
+		if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT &&
+		    data->regs[ICS932S401_REG_CTRL] & ICS932S401_SRC_ALT)
+			freq = 96000;
+		else
+			freq = 100000;
+
+	return sprintf(buf, "%d\n", freq);
+}
+
+/* Calculate PCI frequency from the SRC M/N registers. */
+static int calculate_pci_freq(struct ics932s401_data *data)
+{
+	int m, n, freq;
+
+	m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK;
+	n = data->regs[ICS932S401_REG_SRC_N_CTRL];
+
+	/* Pull in bits 8 & 9 from the M register */
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1;
+	n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3;
+
+	freq = BASE_CLOCK * (n + 8) / (m + 2);
+	freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] >>
+			 ICS932S401_PCI_DIVISOR_SHIFT];
+
+	return freq;
+}
+
+static ssize_t show_pci_clock(struct device *dev,
+			      struct device_attribute *devattr,
+			      char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+
+	return sprintf(buf, "%d\n", calculate_pci_freq(data));
+}
+
+static ssize_t show_pci_clock_sel(struct device *dev,
+				  struct device_attribute *devattr,
+				  char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int freq;
+
+	if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED)
+		freq = calculate_pci_freq(data);
+	else
+		freq = 33333;
+
+	return sprintf(buf, "%d\n", freq);
+}
+
+static ssize_t show_value(struct device *dev,
+			  struct device_attribute *devattr,
+			  char *buf);
+
+static ssize_t show_spread(struct device *dev,
+			   struct device_attribute *devattr,
+			   char *buf);
+
+static DEVICE_ATTR(spread_enabled, S_IRUGO, show_spread_enabled, NULL);
+static DEVICE_ATTR(cpu_clock_selection, S_IRUGO, show_cpu_clock_sel, NULL);
+static DEVICE_ATTR(cpu_clock, S_IRUGO, show_cpu_clock, NULL);
+static DEVICE_ATTR(src_clock_selection, S_IRUGO, show_src_clock_sel, NULL);
+static DEVICE_ATTR(src_clock, S_IRUGO, show_src_clock, NULL);
+static DEVICE_ATTR(pci_clock_selection, S_IRUGO, show_pci_clock_sel, NULL);
+static DEVICE_ATTR(pci_clock, S_IRUGO, show_pci_clock, NULL);
+static DEVICE_ATTR(usb_clock, S_IRUGO, show_value, NULL);
+static DEVICE_ATTR(ref_clock, S_IRUGO, show_value, NULL);
+static DEVICE_ATTR(cpu_spread, S_IRUGO, show_spread, NULL);
+static DEVICE_ATTR(src_spread, S_IRUGO, show_spread, NULL);
+
+static struct attribute *ics932s401_attr[] = {
+	&dev_attr_spread_enabled.attr,
+	&dev_attr_cpu_clock_selection.attr,
+	&dev_attr_cpu_clock.attr,
+	&dev_attr_src_clock_selection.attr,
+	&dev_attr_src_clock.attr,
+	&dev_attr_pci_clock_selection.attr,
+	&dev_attr_pci_clock.attr,
+	&dev_attr_usb_clock.attr,
+	&dev_attr_ref_clock.attr,
+	&dev_attr_cpu_spread.attr,
+	&dev_attr_src_spread.attr,
+	NULL
+};
+
+static ssize_t show_value(struct device *dev,
+			  struct device_attribute *devattr,
+			  char *buf)
+{
+	int x;
+
+	if (devattr == &dev_attr_usb_clock)
+		x = 48000;
+	else if (devattr == &dev_attr_ref_clock)
+		x = BASE_CLOCK;
+	else
+		BUG();
+
+	return sprintf(buf, "%d\n", x);
+}
+
+static ssize_t show_spread(struct device *dev,
+			   struct device_attribute *devattr,
+			   char *buf)
+{
+	struct ics932s401_data *data = ics932s401_update_device(dev);
+	int reg;
+	unsigned long val;
+
+	if (!(data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD))
+		return sprintf(buf, "0%%\n");
+
+	if (devattr == &dev_attr_src_spread)
+		reg = ICS932S401_REG_SRC_SPREAD1;
+	else if (devattr == &dev_attr_cpu_spread)
+		reg = ICS932S401_REG_CPU_SPREAD1;
+	else
+		BUG();
+
+	val = data->regs[reg] | (data->regs[reg + 1] << 8);
+	val &= ICS932S401_SPREAD_MASK;
+
+	/* Scale 0..2^14 to -0.5. */
+	val = 500000 * val / 16384;
+	return sprintf(buf, "-0.%lu%%\n", val);
+}
+
+/* Return 0 if detection is successful, -ENODEV otherwise */
+static int ics932s401_detect(struct i2c_client *client,
+			  struct i2c_board_info *info)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	int vendor, device, revision;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -ENODEV;
+
+	vendor = i2c_smbus_read_word_data(client, ICS932S401_REG_VENDOR_REV);
+	vendor >>= 8;
+	revision = vendor >> ICS932S401_REV_SHIFT;
+	vendor &= ICS932S401_VENDOR_MASK;
+	if (vendor != ICS932S401_VENDOR)
+		return -ENODEV;
+
+	device = i2c_smbus_read_word_data(client, ICS932S401_REG_DEVICE);
+	device >>= 8;
+	if (device != ICS932S401_DEVICE)
+		return -ENODEV;
+
+	if (revision != ICS932S401_REV)
+		dev_info(&adapter->dev, "Unknown revision %d\n", revision);
+
+	strscpy(info->type, "ics932s401", I2C_NAME_SIZE);
+
+	return 0;
+}
+
+static int ics932s401_probe(struct i2c_client *client)
+{
+	struct ics932s401_data *data;
+	int err;
+
+	data = kzalloc(sizeof(struct ics932s401_data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->lock);
+
+	dev_info(&client->dev, "%s chip found\n", client->name);
+
+	/* Register sysfs hooks */
+	data->attrs.attrs = ics932s401_attr;
+	err = sysfs_create_group(&client->dev.kobj, &data->attrs);
+	if (err)
+		goto exit_free;
+
+	return 0;
+
+exit_free:
+	kfree(data);
+exit:
+	return err;
+}
+
+static void ics932s401_remove(struct i2c_client *client)
+{
+	struct ics932s401_data *data = i2c_get_clientdata(client);
+
+	sysfs_remove_group(&client->dev.kobj, &data->attrs);
+	kfree(data);
+}
+
+module_i2c_driver(ics932s401_driver);
+
+MODULE_AUTHOR("Darrick J. Wong <darrick.wong@oracle.com>");
+MODULE_DESCRIPTION("ICS932S401 driver");
+MODULE_LICENSE("GPL");
+
+/* IBM IntelliStation Z30 */
+MODULE_ALIAS("dmi:bvnIBM:*:rn9228:*");
+MODULE_ALIAS("dmi:bvnIBM:*:rn9232:*");
+
+/* IBM x3650/x3550 */
+MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3650*");
+MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3550*");
diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c
new file mode 100644
index 0000000000..ebf0635aee
--- /dev/null
+++ b/drivers/misc/isl29003.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  isl29003.c - Linux kernel module for
+ * 	Intersil ISL29003 ambient light sensor
+ *
+ *  See file:Documentation/misc-devices/isl29003.rst
+ *
+ *  Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ *  Based on code written by
+ *  	Rodolfo Giometti <giometti@linux.it>
+ *  	Eurotech S.p.A. <info@eurotech.it>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+
+#define ISL29003_DRV_NAME	"isl29003"
+#define DRIVER_VERSION		"1.0"
+
+#define ISL29003_REG_COMMAND		0x00
+#define ISL29003_ADC_ENABLED		(1 << 7)
+#define ISL29003_ADC_PD			(1 << 6)
+#define ISL29003_TIMING_INT		(1 << 5)
+#define ISL29003_MODE_SHIFT		(2)
+#define ISL29003_MODE_MASK		(0x3 << ISL29003_MODE_SHIFT)
+#define ISL29003_RES_SHIFT		(0)
+#define ISL29003_RES_MASK		(0x3 << ISL29003_RES_SHIFT)
+
+#define ISL29003_REG_CONTROL		0x01
+#define ISL29003_INT_FLG		(1 << 5)
+#define ISL29003_RANGE_SHIFT		(2)
+#define ISL29003_RANGE_MASK		(0x3 << ISL29003_RANGE_SHIFT)
+#define ISL29003_INT_PERSISTS_SHIFT	(0)
+#define ISL29003_INT_PERSISTS_MASK	(0xf << ISL29003_INT_PERSISTS_SHIFT)
+
+#define ISL29003_REG_IRQ_THRESH_HI	0x02
+#define ISL29003_REG_IRQ_THRESH_LO	0x03
+#define ISL29003_REG_LSB_SENSOR		0x04
+#define ISL29003_REG_MSB_SENSOR		0x05
+#define ISL29003_REG_LSB_TIMER		0x06
+#define ISL29003_REG_MSB_TIMER		0x07
+
+#define ISL29003_NUM_CACHABLE_REGS	4
+
+struct isl29003_data {
+	struct i2c_client *client;
+	struct mutex lock;
+	u8 reg_cache[ISL29003_NUM_CACHABLE_REGS];
+	u8 power_state_before_suspend;
+};
+
+static int gain_range[] = {
+	1000, 4000, 16000, 64000
+};
+
+/*
+ * register access helpers
+ */
+
+static int __isl29003_read_reg(struct i2c_client *client,
+			       u32 reg, u8 mask, u8 shift)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+
+	return (data->reg_cache[reg] & mask) >> shift;
+}
+
+static int __isl29003_write_reg(struct i2c_client *client,
+				u32 reg, u8 mask, u8 shift, u8 val)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int ret = 0;
+	u8 tmp;
+
+	if (reg >= ISL29003_NUM_CACHABLE_REGS)
+		return -EINVAL;
+
+	mutex_lock(&data->lock);
+
+	tmp = data->reg_cache[reg];
+	tmp &= ~mask;
+	tmp |= val << shift;
+
+	ret = i2c_smbus_write_byte_data(client, reg, tmp);
+	if (!ret)
+		data->reg_cache[reg] = tmp;
+
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+/*
+ * internally used functions
+ */
+
+/* range */
+static int isl29003_get_range(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_CONTROL,
+		ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT);
+}
+
+static int isl29003_set_range(struct i2c_client *client, int range)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_CONTROL,
+		ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT, range);
+}
+
+/* resolution */
+static int isl29003_get_resolution(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT);
+}
+
+static int isl29003_set_resolution(struct i2c_client *client, int res)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT, res);
+}
+
+/* mode */
+static int isl29003_get_mode(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_MODE_MASK, ISL29003_MODE_SHIFT);
+}
+
+static int isl29003_set_mode(struct i2c_client *client, int mode)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_MODE_MASK, ISL29003_MODE_SHIFT, mode);
+}
+
+/* power_state */
+static int isl29003_set_power_state(struct i2c_client *client, int state)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+				ISL29003_ADC_ENABLED | ISL29003_ADC_PD, 0,
+				state ? ISL29003_ADC_ENABLED : ISL29003_ADC_PD);
+}
+
+static int isl29003_get_power_state(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	u8 cmdreg = data->reg_cache[ISL29003_REG_COMMAND];
+
+	return ~cmdreg & ISL29003_ADC_PD;
+}
+
+static int isl29003_get_adc_value(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int lsb, msb, range, bitdepth;
+
+	mutex_lock(&data->lock);
+	lsb = i2c_smbus_read_byte_data(client, ISL29003_REG_LSB_SENSOR);
+
+	if (lsb < 0) {
+		mutex_unlock(&data->lock);
+		return lsb;
+	}
+
+	msb = i2c_smbus_read_byte_data(client, ISL29003_REG_MSB_SENSOR);
+	mutex_unlock(&data->lock);
+
+	if (msb < 0)
+		return msb;
+
+	range = isl29003_get_range(client);
+	bitdepth = (4 - isl29003_get_resolution(client)) * 4;
+	return (((msb << 8) | lsb) * gain_range[range]) >> bitdepth;
+}
+
+/*
+ * sysfs layer
+ */
+
+/* range */
+static ssize_t isl29003_show_range(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return sysfs_emit(buf, "%i\n", isl29003_get_range(client));
+}
+
+static ssize_t isl29003_store_range(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > 3)
+		return -EINVAL;
+
+	ret = isl29003_set_range(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(range, S_IWUSR | S_IRUGO,
+		   isl29003_show_range, isl29003_store_range);
+
+
+/* resolution */
+static ssize_t isl29003_show_resolution(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return sysfs_emit(buf, "%d\n", isl29003_get_resolution(client));
+}
+
+static ssize_t isl29003_store_resolution(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > 3)
+		return -EINVAL;
+
+	ret = isl29003_set_resolution(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(resolution, S_IWUSR | S_IRUGO,
+		   isl29003_show_resolution, isl29003_store_resolution);
+
+/* mode */
+static ssize_t isl29003_show_mode(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return sysfs_emit(buf, "%d\n", isl29003_get_mode(client));
+}
+
+static ssize_t isl29003_store_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > 2)
+		return -EINVAL;
+
+	ret = isl29003_set_mode(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO,
+		   isl29003_show_mode, isl29003_store_mode);
+
+
+/* power state */
+static ssize_t isl29003_show_power_state(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return sysfs_emit(buf, "%d\n", isl29003_get_power_state(client));
+}
+
+static ssize_t isl29003_store_power_state(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > 1)
+		return -EINVAL;
+
+	ret = isl29003_set_power_state(client, val);
+	return ret ? ret : count;
+}
+
+static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
+		   isl29003_show_power_state, isl29003_store_power_state);
+
+
+/* lux */
+static ssize_t isl29003_show_lux(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	/* No LUX data if not operational */
+	if (!isl29003_get_power_state(client))
+		return -EBUSY;
+
+	return sysfs_emit(buf, "%d\n", isl29003_get_adc_value(client));
+}
+
+static DEVICE_ATTR(lux, S_IRUGO, isl29003_show_lux, NULL);
+
+static struct attribute *isl29003_attributes[] = {
+	&dev_attr_range.attr,
+	&dev_attr_resolution.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_power_state.attr,
+	&dev_attr_lux.attr,
+	NULL
+};
+
+static const struct attribute_group isl29003_attr_group = {
+	.attrs = isl29003_attributes,
+};
+
+static int isl29003_init_client(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int i;
+
+	/* read all the registers once to fill the cache.
+	 * if one of the reads fails, we consider the init failed */
+	for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) {
+		int v = i2c_smbus_read_byte_data(client, i);
+
+		if (v < 0)
+			return -ENODEV;
+
+		data->reg_cache[i] = v;
+	}
+
+	/* set defaults */
+	isl29003_set_range(client, 0);
+	isl29003_set_resolution(client, 0);
+	isl29003_set_mode(client, 0);
+	isl29003_set_power_state(client, 0);
+
+	return 0;
+}
+
+/*
+ * I2C layer
+ */
+
+static int isl29003_probe(struct i2c_client *client)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	struct isl29003_data *data;
+	int err = 0;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
+		return -EIO;
+
+	data = kzalloc(sizeof(struct isl29003_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->client = client;
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->lock);
+
+	/* initialize the ISL29003 chip */
+	err = isl29003_init_client(client);
+	if (err)
+		goto exit_kfree;
+
+	/* register sysfs hooks */
+	err = sysfs_create_group(&client->dev.kobj, &isl29003_attr_group);
+	if (err)
+		goto exit_kfree;
+
+	dev_info(&client->dev, "driver version %s enabled\n", DRIVER_VERSION);
+	return 0;
+
+exit_kfree:
+	kfree(data);
+	return err;
+}
+
+static void isl29003_remove(struct i2c_client *client)
+{
+	sysfs_remove_group(&client->dev.kobj, &isl29003_attr_group);
+	isl29003_set_power_state(client, 0);
+	kfree(i2c_get_clientdata(client));
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int isl29003_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct isl29003_data *data = i2c_get_clientdata(client);
+
+	data->power_state_before_suspend = isl29003_get_power_state(client);
+	return isl29003_set_power_state(client, 0);
+}
+
+static int isl29003_resume(struct device *dev)
+{
+	int i;
+	struct i2c_client *client = to_i2c_client(dev);
+	struct isl29003_data *data = i2c_get_clientdata(client);
+
+	/* restore registers from cache */
+	for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++)
+		if (i2c_smbus_write_byte_data(client, i, data->reg_cache[i]))
+			return -EIO;
+
+	return isl29003_set_power_state(client,
+		data->power_state_before_suspend);
+}
+
+static SIMPLE_DEV_PM_OPS(isl29003_pm_ops, isl29003_suspend, isl29003_resume);
+#define ISL29003_PM_OPS (&isl29003_pm_ops)
+
+#else
+#define ISL29003_PM_OPS NULL
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct i2c_device_id isl29003_id[] = {
+	{ "isl29003", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, isl29003_id);
+
+static struct i2c_driver isl29003_driver = {
+	.driver = {
+		.name	= ISL29003_DRV_NAME,
+		.pm	= ISL29003_PM_OPS,
+	},
+	.probe = isl29003_probe,
+	.remove	= isl29003_remove,
+	.id_table = isl29003_id,
+};
+
+module_i2c_driver(isl29003_driver);
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("ISL29003 ambient light sensor driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/isl29020.c b/drivers/misc/isl29020.c
new file mode 100644
index 0000000000..c5976fa8c8
--- /dev/null
+++ b/drivers/misc/isl29020.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * isl29020.c - Intersil  ALS Driver
+ *
+ * Copyright (C) 2008 Intel Corp
+ *
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Data sheet at: http://www.intersil.com/data/fn/fn6505.pdf
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/sysfs.h>
+#include <linux/pm_runtime.h>
+
+static DEFINE_MUTEX(mutex);
+
+static ssize_t als_sensing_range_show(struct device *dev,
+			struct device_attribute *attr,  char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int  val;
+
+	val = i2c_smbus_read_byte_data(client, 0x00);
+
+	if (val < 0)
+		return val;
+	return sprintf(buf, "%d000\n", 1 << (2 * (val & 3)));
+
+}
+
+static ssize_t als_lux_input_data_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret_val, val;
+	unsigned long int lux;
+	int temp;
+
+	pm_runtime_get_sync(dev);
+	msleep(100);
+
+	mutex_lock(&mutex);
+	temp = i2c_smbus_read_byte_data(client, 0x02); /* MSB data */
+	if (temp < 0) {
+		pm_runtime_put_sync(dev);
+		mutex_unlock(&mutex);
+		return temp;
+	}
+
+	ret_val = i2c_smbus_read_byte_data(client, 0x01); /* LSB data */
+	mutex_unlock(&mutex);
+
+	if (ret_val < 0) {
+		pm_runtime_put_sync(dev);
+		return ret_val;
+	}
+
+	ret_val |= temp << 8;
+	val = i2c_smbus_read_byte_data(client, 0x00);
+	pm_runtime_put_sync(dev);
+	if (val < 0)
+		return val;
+	lux = ((((1 << (2 * (val & 3))))*1000) * ret_val) / 65536;
+	return sprintf(buf, "%ld\n", lux);
+}
+
+static ssize_t als_sensing_range_store(struct device *dev,
+		struct device_attribute *attr, const  char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret_val;
+	unsigned long val;
+
+	ret_val = kstrtoul(buf, 10, &val);
+	if (ret_val)
+		return ret_val;
+
+	if (val < 1 || val > 64000)
+		return -EINVAL;
+
+	/* Pick the smallest sensor range that will meet our requirements */
+	if (val <= 1000)
+		val = 1;
+	else if (val <= 4000)
+		val = 2;
+	else if (val <= 16000)
+		val = 3;
+	else
+		val = 4;
+
+	ret_val = i2c_smbus_read_byte_data(client, 0x00);
+	if (ret_val < 0)
+		return ret_val;
+
+	ret_val &= 0xFC; /*reset the bit before setting them */
+	ret_val |= val - 1;
+	ret_val = i2c_smbus_write_byte_data(client, 0x00, ret_val);
+
+	if (ret_val < 0)
+		return ret_val;
+	return count;
+}
+
+static void als_set_power_state(struct i2c_client *client, int enable)
+{
+	int ret_val;
+
+	ret_val = i2c_smbus_read_byte_data(client, 0x00);
+	if (ret_val < 0)
+		return;
+
+	if (enable)
+		ret_val |= 0x80;
+	else
+		ret_val &= 0x7F;
+
+	i2c_smbus_write_byte_data(client, 0x00, ret_val);
+}
+
+static DEVICE_ATTR(lux0_sensor_range, S_IRUGO | S_IWUSR,
+	als_sensing_range_show, als_sensing_range_store);
+static DEVICE_ATTR(lux0_input, S_IRUGO, als_lux_input_data_show, NULL);
+
+static struct attribute *mid_att_als[] = {
+	&dev_attr_lux0_sensor_range.attr,
+	&dev_attr_lux0_input.attr,
+	NULL
+};
+
+static const struct attribute_group m_als_gr = {
+	.name = "isl29020",
+	.attrs = mid_att_als
+};
+
+static int als_set_default_config(struct i2c_client *client)
+{
+	int retval;
+
+	retval = i2c_smbus_write_byte_data(client, 0x00, 0xc0);
+	if (retval < 0) {
+		dev_err(&client->dev, "default write failed.");
+		return retval;
+	}
+	return 0;
+}
+
+static int  isl29020_probe(struct i2c_client *client)
+{
+	int res;
+
+	res = als_set_default_config(client);
+	if (res <  0)
+		return res;
+
+	res = sysfs_create_group(&client->dev.kobj, &m_als_gr);
+	if (res) {
+		dev_err(&client->dev, "isl29020: device create file failed\n");
+		return res;
+	}
+	dev_info(&client->dev, "%s isl29020: ALS chip found\n", client->name);
+	als_set_power_state(client, 0);
+	pm_runtime_enable(&client->dev);
+	return res;
+}
+
+static void isl29020_remove(struct i2c_client *client)
+{
+	pm_runtime_disable(&client->dev);
+	sysfs_remove_group(&client->dev.kobj, &m_als_gr);
+}
+
+static const struct i2c_device_id isl29020_id[] = {
+	{ "isl29020", 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, isl29020_id);
+
+#ifdef CONFIG_PM
+
+static int isl29020_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	als_set_power_state(client, 0);
+	return 0;
+}
+
+static int isl29020_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	als_set_power_state(client, 1);
+	return 0;
+}
+
+static const struct dev_pm_ops isl29020_pm_ops = {
+	.runtime_suspend = isl29020_runtime_suspend,
+	.runtime_resume = isl29020_runtime_resume,
+};
+
+#define ISL29020_PM_OPS (&isl29020_pm_ops)
+#else	/* CONFIG_PM */
+#define ISL29020_PM_OPS NULL
+#endif	/* CONFIG_PM */
+
+static struct i2c_driver isl29020_driver = {
+	.driver = {
+		.name = "isl29020",
+		.pm = ISL29020_PM_OPS,
+	},
+	.probe = isl29020_probe,
+	.remove = isl29020_remove,
+	.id_table = isl29020_id,
+};
+
+module_i2c_driver(isl29020_driver);
+
+MODULE_AUTHOR("Kalhan Trisal <kalhan.trisal@intel.com>");
+MODULE_DESCRIPTION("Intersil isl29020 ALS Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
new file mode 100644
index 0000000000..88b91ad8e5
--- /dev/null
+++ b/drivers/misc/kgdbts.c
@@ -0,0 +1,1191 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kgdbts is a test suite for kgdb for the sole purpose of validating
+ * that key pieces of the kgdb internals are working properly such as
+ * HW/SW breakpoints, single stepping, and NMI.
+ *
+ * Created by: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (c) 2008 Wind River Systems, Inc.
+ */
+/* Information about the kgdb test suite.
+ * -------------------------------------
+ *
+ * The kgdb test suite is designed as a KGDB I/O module which
+ * simulates the communications that a debugger would have with kgdb.
+ * The tests are broken up in to a line by line and referenced here as
+ * a "get" which is kgdb requesting input and "put" which is kgdb
+ * sending a response.
+ *
+ * The kgdb suite can be invoked from the kernel command line
+ * arguments system or executed dynamically at run time.  The test
+ * suite uses the variable "kgdbts" to obtain the information about
+ * which tests to run and to configure the verbosity level.  The
+ * following are the various characters you can use with the kgdbts=
+ * line:
+ *
+ * When using the "kgdbts=" you only choose one of the following core
+ * test types:
+ * A = Run all the core tests silently
+ * V1 = Run all the core tests with minimal output
+ * V2 = Run all the core tests in debug mode
+ *
+ * You can also specify optional tests:
+ * N## = Go to sleep with interrupts of for ## seconds
+ *       to test the HW NMI watchdog
+ * F## = Break at kernel_clone for ## iterations
+ * S## = Break at sys_open for ## iterations
+ * I## = Run the single step test ## iterations
+ *
+ * NOTE: that the kernel_clone and sys_open tests are mutually exclusive.
+ *
+ * To invoke the kgdb test suite from boot you use a kernel start
+ * argument as follows:
+ * 	kgdbts=V1 kgdbwait
+ * Or if you wanted to perform the NMI test for 6 seconds and kernel_clone
+ * test for 100 forks, you could use:
+ * 	kgdbts=V1N6F100 kgdbwait
+ *
+ * The test suite can also be invoked at run time with:
+ *	echo kgdbts=V1N6F100 > /sys/module/kgdbts/parameters/kgdbts
+ * Or as another example:
+ *	echo kgdbts=V2 > /sys/module/kgdbts/parameters/kgdbts
+ *
+ * When developing a new kgdb arch specific implementation or
+ * using these tests for the purpose of regression testing,
+ * several invocations are required.
+ *
+ * 1) Boot with the test suite enabled by using the kernel arguments
+ *       "kgdbts=V1F100 kgdbwait"
+ *    ## If kgdb arch specific implementation has NMI use
+ *       "kgdbts=V1N6F100
+ *
+ * 2) After the system boot run the basic test.
+ * echo kgdbts=V1 > /sys/module/kgdbts/parameters/kgdbts
+ *
+ * 3) Run the concurrency tests.  It is best to use n+1
+ *    while loops where n is the number of cpus you have
+ *    in your system.  The example below uses only two
+ *    loops.
+ *
+ * ## This tests break points on sys_open
+ * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
+ * while [ 1 ] ; do find / > /dev/null 2>&1 ; done &
+ * echo kgdbts=V1S10000 > /sys/module/kgdbts/parameters/kgdbts
+ * fg # and hit control-c
+ * fg # and hit control-c
+ * ## This tests break points on kernel_clone
+ * while [ 1 ] ; do date > /dev/null ; done &
+ * while [ 1 ] ; do date > /dev/null ; done &
+ * echo kgdbts=V1F1000 > /sys/module/kgdbts/parameters/kgdbts
+ * fg # and hit control-c
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/kgdb.h>
+#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/sched/task.h>
+#include <linux/kallsyms.h>
+
+#include <asm/sections.h>
+
+#define v1printk(a...) do {		\
+	if (verbose)			\
+		printk(KERN_INFO a);	\
+} while (0)
+#define v2printk(a...) do {		\
+	if (verbose > 1) {		\
+		printk(KERN_INFO a);	\
+	}				\
+	touch_nmi_watchdog();		\
+} while (0)
+#define eprintk(a...) do {		\
+	printk(KERN_ERR a);		\
+	WARN_ON(1);			\
+} while (0)
+#define MAX_CONFIG_LEN		40
+
+static struct kgdb_io kgdbts_io_ops;
+static char get_buf[BUFMAX];
+static int get_buf_cnt;
+static char put_buf[BUFMAX];
+static int put_buf_cnt;
+static char scratch_buf[BUFMAX];
+static int verbose;
+static int repeat_test;
+static int test_complete;
+static int send_ack;
+static int final_ack;
+static int force_hwbrks;
+static int hwbreaks_ok;
+static int hw_break_val;
+static int hw_break_val2;
+static int cont_instead_of_sstep;
+static unsigned long cont_thread_id;
+static unsigned long sstep_thread_id;
+#if defined(CONFIG_ARM) || defined(CONFIG_MIPS) || defined(CONFIG_SPARC)
+static int arch_needs_sstep_emulation = 1;
+#else
+static int arch_needs_sstep_emulation;
+#endif
+static unsigned long cont_addr;
+static unsigned long sstep_addr;
+static int restart_from_top_after_write;
+static int sstep_state;
+
+/* Storage for the registers, in GDB format. */
+static unsigned long kgdbts_gdb_regs[(NUMREGBYTES +
+					sizeof(unsigned long) - 1) /
+					sizeof(unsigned long)];
+static struct pt_regs kgdbts_regs;
+
+/* -1 = init not run yet, 0 = unconfigured, 1 = configured. */
+static int configured		= -1;
+
+#ifdef CONFIG_KGDB_TESTS_BOOT_STRING
+static char config[MAX_CONFIG_LEN] = CONFIG_KGDB_TESTS_BOOT_STRING;
+#else
+static char config[MAX_CONFIG_LEN];
+#endif
+static struct kparam_string kps = {
+	.string			= config,
+	.maxlen			= MAX_CONFIG_LEN,
+};
+
+static void fill_get_buf(char *buf);
+
+struct test_struct {
+	char *get;
+	char *put;
+	void (*get_handler)(char *);
+	int (*put_handler)(char *, char *);
+};
+
+struct test_state {
+	char *name;
+	struct test_struct *tst;
+	int idx;
+	int (*run_test) (int, int);
+	int (*validate_put) (char *);
+};
+
+static struct test_state ts;
+
+static int kgdbts_unreg_thread(void *ptr)
+{
+	/* Wait until the tests are complete and then ungresiter the I/O
+	 * driver.
+	 */
+	while (!final_ack)
+		msleep_interruptible(1500);
+	/* Pause for any other threads to exit after final ack. */
+	msleep_interruptible(1000);
+	if (configured)
+		kgdb_unregister_io_module(&kgdbts_io_ops);
+	configured = 0;
+
+	return 0;
+}
+
+/* This is noinline such that it can be used for a single location to
+ * place a breakpoint
+ */
+static noinline void kgdbts_break_test(void)
+{
+	v2printk("kgdbts: breakpoint complete\n");
+}
+
+/*
+ * This is a cached wrapper for kallsyms_lookup_name().
+ *
+ * The cache is a big win for several tests. For example it more the doubles
+ * the cycles per second during the sys_open test. This is not theoretic,
+ * the performance improvement shows up at human scale, especially when
+ * testing using emulators.
+ *
+ * Obviously neither re-entrant nor thread-safe but that is OK since it
+ * can only be called from the debug trap (and therefore all other CPUs
+ * are halted).
+ */
+static unsigned long lookup_addr(char *arg)
+{
+	static char cached_arg[KSYM_NAME_LEN];
+	static unsigned long cached_addr;
+
+	if (strcmp(arg, cached_arg)) {
+		strscpy(cached_arg, arg, KSYM_NAME_LEN);
+		cached_addr = kallsyms_lookup_name(arg);
+	}
+
+	return (unsigned long)dereference_function_descriptor(
+			(void *)cached_addr);
+}
+
+static void break_helper(char *bp_type, char *arg, unsigned long vaddr)
+{
+	unsigned long addr;
+
+	if (arg)
+		addr = lookup_addr(arg);
+	else
+		addr = vaddr;
+
+	sprintf(scratch_buf, "%s,%lx,%i", bp_type, addr,
+		BREAK_INSTR_SIZE);
+	fill_get_buf(scratch_buf);
+}
+
+static void sw_break(char *arg)
+{
+	break_helper(force_hwbrks ? "Z1" : "Z0", arg, 0);
+}
+
+static void sw_rem_break(char *arg)
+{
+	break_helper(force_hwbrks ? "z1" : "z0", arg, 0);
+}
+
+static void hw_break(char *arg)
+{
+	break_helper("Z1", arg, 0);
+}
+
+static void hw_rem_break(char *arg)
+{
+	break_helper("z1", arg, 0);
+}
+
+static void hw_write_break(char *arg)
+{
+	break_helper("Z2", arg, 0);
+}
+
+static void hw_rem_write_break(char *arg)
+{
+	break_helper("z2", arg, 0);
+}
+
+static void hw_access_break(char *arg)
+{
+	break_helper("Z4", arg, 0);
+}
+
+static void hw_rem_access_break(char *arg)
+{
+	break_helper("z4", arg, 0);
+}
+
+static void hw_break_val_access(void)
+{
+	hw_break_val2 = hw_break_val;
+}
+
+static void hw_break_val_write(void)
+{
+	hw_break_val++;
+}
+
+static int get_thread_id_continue(char *put_str, char *arg)
+{
+	char *ptr = &put_str[11];
+
+	if (put_str[1] != 'T' || put_str[2] != '0')
+		return 1;
+	kgdb_hex2long(&ptr, &cont_thread_id);
+	return 0;
+}
+
+static int check_and_rewind_pc(char *put_str, char *arg)
+{
+	unsigned long addr = lookup_addr(arg);
+	unsigned long ip;
+	int offset = 0;
+
+	kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+		 NUMREGBYTES);
+	gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+	ip = instruction_pointer(&kgdbts_regs);
+	v2printk("Stopped at IP: %lx\n", ip);
+#ifdef GDB_ADJUSTS_BREAK_OFFSET
+	/* On some arches, a breakpoint stop requires it to be decremented */
+	if (addr + BREAK_INSTR_SIZE == ip)
+		offset = -BREAK_INSTR_SIZE;
+#endif
+
+	if (arch_needs_sstep_emulation && sstep_addr &&
+	    ip + offset == sstep_addr &&
+	    ((!strcmp(arg, "do_sys_openat2") || !strcmp(arg, "kernel_clone")))) {
+		/* This is special case for emulated single step */
+		v2printk("Emul: rewind hit single step bp\n");
+		restart_from_top_after_write = 1;
+	} else if (strcmp(arg, "silent") && ip + offset != addr) {
+		eprintk("kgdbts: BP mismatch %lx expected %lx\n",
+			   ip + offset, addr);
+		return 1;
+	}
+	/* Readjust the instruction pointer if needed */
+	ip += offset;
+	cont_addr = ip;
+#ifdef GDB_ADJUSTS_BREAK_OFFSET
+	instruction_pointer_set(&kgdbts_regs, ip);
+#endif
+	return 0;
+}
+
+static int check_single_step(char *put_str, char *arg)
+{
+	unsigned long addr = lookup_addr(arg);
+	static int matched_id;
+
+	/*
+	 * From an arch indepent point of view the instruction pointer
+	 * should be on a different instruction
+	 */
+	kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+		 NUMREGBYTES);
+	gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+	v2printk("Singlestep stopped at IP: %lx\n",
+		   instruction_pointer(&kgdbts_regs));
+
+	if (sstep_thread_id != cont_thread_id) {
+		/*
+		 * Ensure we stopped in the same thread id as before, else the
+		 * debugger should continue until the original thread that was
+		 * single stepped is scheduled again, emulating gdb's behavior.
+		 */
+		v2printk("ThrID does not match: %lx\n", cont_thread_id);
+		if (arch_needs_sstep_emulation) {
+			if (matched_id &&
+			    instruction_pointer(&kgdbts_regs) != addr)
+				goto continue_test;
+			matched_id++;
+			ts.idx -= 2;
+			sstep_state = 0;
+			return 0;
+		}
+		cont_instead_of_sstep = 1;
+		ts.idx -= 4;
+		return 0;
+	}
+continue_test:
+	matched_id = 0;
+	if (instruction_pointer(&kgdbts_regs) == addr) {
+		eprintk("kgdbts: SingleStep failed at %lx\n",
+			   instruction_pointer(&kgdbts_regs));
+		return 1;
+	}
+
+	return 0;
+}
+
+static void write_regs(char *arg)
+{
+	memset(scratch_buf, 0, sizeof(scratch_buf));
+	scratch_buf[0] = 'G';
+	pt_regs_to_gdb_regs(kgdbts_gdb_regs, &kgdbts_regs);
+	kgdb_mem2hex((char *)kgdbts_gdb_regs, &scratch_buf[1], NUMREGBYTES);
+	fill_get_buf(scratch_buf);
+}
+
+static void skip_back_repeat_test(char *arg)
+{
+	int go_back = simple_strtol(arg, NULL, 10);
+
+	repeat_test--;
+	if (repeat_test <= 0) {
+		ts.idx++;
+	} else {
+		if (repeat_test % 100 == 0)
+			v1printk("kgdbts:RUN ... %d remaining\n", repeat_test);
+
+		ts.idx -= go_back;
+	}
+	fill_get_buf(ts.tst[ts.idx].get);
+}
+
+static int got_break(char *put_str, char *arg)
+{
+	test_complete = 1;
+	if (!strncmp(put_str+1, arg, 2)) {
+		if (!strncmp(arg, "T0", 2))
+			test_complete = 2;
+		return 0;
+	}
+	return 1;
+}
+
+static void get_cont_catch(char *arg)
+{
+	/* Always send detach because the test is completed at this point */
+	fill_get_buf("D");
+}
+
+static int put_cont_catch(char *put_str, char *arg)
+{
+	/* This is at the end of the test and we catch any and all input */
+	v2printk("kgdbts: cleanup task: %lx\n", sstep_thread_id);
+	ts.idx--;
+	return 0;
+}
+
+static int emul_reset(char *put_str, char *arg)
+{
+	if (strncmp(put_str, "$OK", 3))
+		return 1;
+	if (restart_from_top_after_write) {
+		restart_from_top_after_write = 0;
+		ts.idx = -1;
+	}
+	return 0;
+}
+
+static void emul_sstep_get(char *arg)
+{
+	if (!arch_needs_sstep_emulation) {
+		if (cont_instead_of_sstep) {
+			cont_instead_of_sstep = 0;
+			fill_get_buf("c");
+		} else {
+			fill_get_buf(arg);
+		}
+		return;
+	}
+	switch (sstep_state) {
+	case 0:
+		v2printk("Emulate single step\n");
+		/* Start by looking at the current PC */
+		fill_get_buf("g");
+		break;
+	case 1:
+		/* set breakpoint */
+		break_helper("Z0", NULL, sstep_addr);
+		break;
+	case 2:
+		/* Continue */
+		fill_get_buf("c");
+		break;
+	case 3:
+		/* Clear breakpoint */
+		break_helper("z0", NULL, sstep_addr);
+		break;
+	default:
+		eprintk("kgdbts: ERROR failed sstep get emulation\n");
+	}
+	sstep_state++;
+}
+
+static int emul_sstep_put(char *put_str, char *arg)
+{
+	if (!arch_needs_sstep_emulation) {
+		char *ptr = &put_str[11];
+		if (put_str[1] != 'T' || put_str[2] != '0')
+			return 1;
+		kgdb_hex2long(&ptr, &sstep_thread_id);
+		return 0;
+	}
+	switch (sstep_state) {
+	case 1:
+		/* validate the "g" packet to get the IP */
+		kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs,
+			 NUMREGBYTES);
+		gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs);
+		v2printk("Stopped at IP: %lx\n",
+			 instruction_pointer(&kgdbts_regs));
+		/* Want to stop at IP + break instruction size by default */
+		sstep_addr = cont_addr + BREAK_INSTR_SIZE;
+		break;
+	case 2:
+		if (strncmp(put_str, "$OK", 3)) {
+			eprintk("kgdbts: failed sstep break set\n");
+			return 1;
+		}
+		break;
+	case 3:
+		if (strncmp(put_str, "$T0", 3)) {
+			eprintk("kgdbts: failed continue sstep\n");
+			return 1;
+		} else {
+			char *ptr = &put_str[11];
+			kgdb_hex2long(&ptr, &sstep_thread_id);
+		}
+		break;
+	case 4:
+		if (strncmp(put_str, "$OK", 3)) {
+			eprintk("kgdbts: failed sstep break unset\n");
+			return 1;
+		}
+		/* Single step is complete so continue on! */
+		sstep_state = 0;
+		return 0;
+	default:
+		eprintk("kgdbts: ERROR failed sstep put emulation\n");
+	}
+
+	/* Continue on the same test line until emulation is complete */
+	ts.idx--;
+	return 0;
+}
+
+static int final_ack_set(char *put_str, char *arg)
+{
+	if (strncmp(put_str+1, arg, 2))
+		return 1;
+	final_ack = 1;
+	return 0;
+}
+/*
+ * Test to plant a breakpoint and detach, which should clear out the
+ * breakpoint and restore the original instruction.
+ */
+static struct test_struct plant_and_detach_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "", "" },
+};
+
+/*
+ * Simple test to write in a software breakpoint, check for the
+ * correct stop location and detach.
+ */
+static struct test_struct sw_breakpoint_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test a known bad memory read location to test the fault handler and
+ * read bytes 1-8 at the bad address
+ */
+static struct test_struct bad_read_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "m0,1", "E*" }, /* read 1 byte at address 1 */
+	{ "m0,2", "E*" }, /* read 1 byte at address 2 */
+	{ "m0,3", "E*" }, /* read 1 byte at address 3 */
+	{ "m0,4", "E*" }, /* read 1 byte at address 4 */
+	{ "m0,5", "E*" }, /* read 1 byte at address 5 */
+	{ "m0,6", "E*" }, /* read 1 byte at address 6 */
+	{ "m0,7", "E*" }, /* read 1 byte at address 7 */
+	{ "m0,8", "E*" }, /* read 1 byte at address 8 */
+	{ "D", "OK" }, /* Detach which removes all breakpoints and continues */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a breakpoint, remove it, single step, plant it
+ * again and detach.
+ */
+static struct test_struct singlestep_break_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", NULL, get_thread_id_continue }, /* Continue */
+	{ "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs }, /* Write registers */
+	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+	{ "g", "kgdbts_break_test", NULL, check_single_step },
+	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs }, /* Write registers */
+	{ "D", "OK" }, /* Remove all breakpoints and continues */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a breakpoint at kernel_clone for what ever the number
+ * of iterations required by the variable repeat_test.
+ */
+static struct test_struct do_kernel_clone_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kernel_clone", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", NULL, get_thread_id_continue }, /* Continue */
+	{ "kernel_clone", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "g", "kernel_clone", NULL, check_and_rewind_pc }, /* check location */
+	{ "write", "OK", write_regs, emul_reset }, /* Write registers */
+	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+	{ "g", "kernel_clone", NULL, check_single_step },
+	{ "kernel_clone", "OK", sw_break, }, /* set sw breakpoint */
+	{ "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+	{ "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
+	{ "", "", get_cont_catch, put_cont_catch },
+};
+
+/* Test for hitting a breakpoint at sys_open for what ever the number
+ * of iterations required by the variable repeat_test.
+ */
+static struct test_struct sys_open_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "do_sys_openat2", "OK", sw_break, }, /* set sw breakpoint */
+	{ "c", "T0*", NULL, get_thread_id_continue }, /* Continue */
+	{ "do_sys_openat2", "OK", sw_rem_break }, /*remove breakpoint */
+	{ "g", "do_sys_openat2", NULL, check_and_rewind_pc }, /* check location */
+	{ "write", "OK", write_regs, emul_reset }, /* Write registers */
+	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+	{ "g", "do_sys_openat2", NULL, check_single_step },
+	{ "do_sys_openat2", "OK", sw_break, }, /* set sw breakpoint */
+	{ "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+	{ "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
+	{ "", "", get_cont_catch, put_cont_catch },
+};
+
+/*
+ * Test for hitting a simple hw breakpoint
+ */
+static struct test_struct hw_breakpoint_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */
+	{ "c", "T0*", }, /* Continue */
+	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a hw write breakpoint
+ */
+static struct test_struct hw_write_break_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */
+	{ "c", "T0*", NULL, got_break }, /* Continue */
+	{ "g", "silent", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a hw access breakpoint
+ */
+static struct test_struct hw_access_break_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */
+	{ "c", "T0*", NULL, got_break }, /* Continue */
+	{ "g", "silent", NULL, check_and_rewind_pc },
+	{ "write", "OK", write_regs },
+	{ "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+/*
+ * Test for hitting a hw access breakpoint
+ */
+static struct test_struct nmi_sleep_test[] = {
+	{ "?", "S0*" }, /* Clear break points */
+	{ "c", "T0*", NULL, got_break }, /* Continue */
+	{ "D", "OK" }, /* Detach */
+	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+	{ "", "" },
+};
+
+static void fill_get_buf(char *buf)
+{
+	unsigned char checksum = 0;
+	int count = 0;
+	char ch;
+
+	strcpy(get_buf, "$");
+	strcat(get_buf, buf);
+	while ((ch = buf[count])) {
+		checksum += ch;
+		count++;
+	}
+	strcat(get_buf, "#");
+	get_buf[count + 2] = hex_asc_hi(checksum);
+	get_buf[count + 3] = hex_asc_lo(checksum);
+	get_buf[count + 4] = '\0';
+	v2printk("get%i: %s\n", ts.idx, get_buf);
+}
+
+static int validate_simple_test(char *put_str)
+{
+	char *chk_str;
+
+	if (ts.tst[ts.idx].put_handler)
+		return ts.tst[ts.idx].put_handler(put_str,
+			ts.tst[ts.idx].put);
+
+	chk_str = ts.tst[ts.idx].put;
+	if (*put_str == '$')
+		put_str++;
+
+	while (*chk_str != '\0' && *put_str != '\0') {
+		/* If someone does a * to match the rest of the string, allow
+		 * it, or stop if the received string is complete.
+		 */
+		if (*put_str == '#' || *chk_str == '*')
+			return 0;
+		if (*put_str != *chk_str)
+			return 1;
+
+		chk_str++;
+		put_str++;
+	}
+	if (*chk_str == '\0' && (*put_str == '\0' || *put_str == '#'))
+		return 0;
+
+	return 1;
+}
+
+static int run_simple_test(int is_get_char, int chr)
+{
+	int ret = 0;
+	if (is_get_char) {
+		/* Send an ACK on the get if a prior put completed and set the
+		 * send ack variable
+		 */
+		if (send_ack) {
+			send_ack = 0;
+			return '+';
+		}
+		/* On the first get char, fill the transmit buffer and then
+		 * take from the get_string.
+		 */
+		if (get_buf_cnt == 0) {
+			if (ts.tst[ts.idx].get_handler)
+				ts.tst[ts.idx].get_handler(ts.tst[ts.idx].get);
+			else
+				fill_get_buf(ts.tst[ts.idx].get);
+		}
+
+		if (get_buf[get_buf_cnt] == '\0') {
+			eprintk("kgdbts: ERROR GET: EOB on '%s' at %i\n",
+			   ts.name, ts.idx);
+			get_buf_cnt = 0;
+			fill_get_buf("D");
+		}
+		ret = get_buf[get_buf_cnt];
+		get_buf_cnt++;
+		return ret;
+	}
+
+	/* This callback is a put char which is when kgdb sends data to
+	 * this I/O module.
+	 */
+	if (ts.tst[ts.idx].get[0] == '\0' && ts.tst[ts.idx].put[0] == '\0' &&
+	    !ts.tst[ts.idx].get_handler) {
+		eprintk("kgdbts: ERROR: beyond end of test on"
+			   " '%s' line %i\n", ts.name, ts.idx);
+		return 0;
+	}
+
+	if (put_buf_cnt >= BUFMAX) {
+		eprintk("kgdbts: ERROR: put buffer overflow on"
+			   " '%s' line %i\n", ts.name, ts.idx);
+		put_buf_cnt = 0;
+		return 0;
+	}
+	/* Ignore everything until the first valid packet start '$' */
+	if (put_buf_cnt == 0 && chr != '$')
+		return 0;
+
+	put_buf[put_buf_cnt] = chr;
+	put_buf_cnt++;
+
+	/* End of packet == #XX so look for the '#' */
+	if (put_buf_cnt > 3 && put_buf[put_buf_cnt - 3] == '#') {
+		if (put_buf_cnt >= BUFMAX) {
+			eprintk("kgdbts: ERROR: put buffer overflow on"
+				" '%s' line %i\n", ts.name, ts.idx);
+			put_buf_cnt = 0;
+			return 0;
+		}
+		put_buf[put_buf_cnt] = '\0';
+		v2printk("put%i: %s\n", ts.idx, put_buf);
+		/* Trigger check here */
+		if (ts.validate_put && ts.validate_put(put_buf)) {
+			eprintk("kgdbts: ERROR PUT: end of test "
+			   "buffer on '%s' line %i expected %s got %s\n",
+			   ts.name, ts.idx, ts.tst[ts.idx].put, put_buf);
+		}
+		ts.idx++;
+		put_buf_cnt = 0;
+		get_buf_cnt = 0;
+		send_ack = 1;
+	}
+	return 0;
+}
+
+static void init_simple_test(void)
+{
+	memset(&ts, 0, sizeof(ts));
+	ts.run_test = run_simple_test;
+	ts.validate_put = validate_simple_test;
+}
+
+static void run_plant_and_detach_test(int is_early)
+{
+	char before[BREAK_INSTR_SIZE];
+	char after[BREAK_INSTR_SIZE];
+
+	copy_from_kernel_nofault(before, (char *)kgdbts_break_test,
+	  BREAK_INSTR_SIZE);
+	init_simple_test();
+	ts.tst = plant_and_detach_test;
+	ts.name = "plant_and_detach_test";
+	/* Activate test with initial breakpoint */
+	if (!is_early)
+		kgdb_breakpoint();
+	copy_from_kernel_nofault(after, (char *)kgdbts_break_test,
+			BREAK_INSTR_SIZE);
+	if (memcmp(before, after, BREAK_INSTR_SIZE)) {
+		printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n");
+		panic("kgdb memory corruption");
+	}
+
+	/* complete the detach test */
+	if (!is_early)
+		kgdbts_break_test();
+}
+
+static void run_breakpoint_test(int is_hw_breakpoint)
+{
+	test_complete = 0;
+	init_simple_test();
+	if (is_hw_breakpoint) {
+		ts.tst = hw_breakpoint_test;
+		ts.name = "hw_breakpoint_test";
+	} else {
+		ts.tst = sw_breakpoint_test;
+		ts.name = "sw_breakpoint_test";
+	}
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	/* run code with the break point in it */
+	kgdbts_break_test();
+	kgdb_breakpoint();
+
+	if (test_complete)
+		return;
+
+	eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+	if (is_hw_breakpoint)
+		hwbreaks_ok = 0;
+}
+
+static void run_hw_break_test(int is_write_test)
+{
+	test_complete = 0;
+	init_simple_test();
+	if (is_write_test) {
+		ts.tst = hw_write_break_test;
+		ts.name = "hw_write_break_test";
+	} else {
+		ts.tst = hw_access_break_test;
+		ts.name = "hw_access_break_test";
+	}
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	hw_break_val_access();
+	if (is_write_test) {
+		if (test_complete == 2) {
+			eprintk("kgdbts: ERROR %s broke on access\n",
+				ts.name);
+			hwbreaks_ok = 0;
+		}
+		hw_break_val_write();
+	}
+	kgdb_breakpoint();
+
+	if (test_complete == 1)
+		return;
+
+	eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+	hwbreaks_ok = 0;
+}
+
+static void run_nmi_sleep_test(int nmi_sleep)
+{
+	unsigned long flags;
+
+	init_simple_test();
+	ts.tst = nmi_sleep_test;
+	ts.name = "nmi_sleep_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	local_irq_save(flags);
+	mdelay(nmi_sleep*1000);
+	touch_nmi_watchdog();
+	local_irq_restore(flags);
+	if (test_complete != 2)
+		eprintk("kgdbts: ERROR nmi_test did not hit nmi\n");
+	kgdb_breakpoint();
+	if (test_complete == 1)
+		return;
+
+	eprintk("kgdbts: ERROR %s test failed\n", ts.name);
+}
+
+static void run_bad_read_test(void)
+{
+	init_simple_test();
+	ts.tst = bad_read_test;
+	ts.name = "bad_read_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+}
+
+static void run_kernel_clone_test(void)
+{
+	init_simple_test();
+	ts.tst = do_kernel_clone_test;
+	ts.name = "do_kernel_clone_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+}
+
+static void run_sys_open_test(void)
+{
+	init_simple_test();
+	ts.tst = sys_open_test;
+	ts.name = "sys_open_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+}
+
+static void run_singlestep_break_test(void)
+{
+	init_simple_test();
+	ts.tst = singlestep_break_test;
+	ts.name = "singlestep_breakpoint_test";
+	/* Activate test with initial breakpoint */
+	kgdb_breakpoint();
+	kgdbts_break_test();
+	kgdbts_break_test();
+}
+
+static void kgdbts_run_tests(void)
+{
+	char *ptr;
+	int clone_test = 0;
+	int do_sys_open_test = 0;
+	int sstep_test = 1000;
+	int nmi_sleep = 0;
+	int i;
+
+	verbose = 0;
+	if (strstr(config, "V1"))
+		verbose = 1;
+	if (strstr(config, "V2"))
+		verbose = 2;
+
+	ptr = strchr(config, 'F');
+	if (ptr)
+		clone_test = simple_strtol(ptr + 1, NULL, 10);
+	ptr = strchr(config, 'S');
+	if (ptr)
+		do_sys_open_test = simple_strtol(ptr + 1, NULL, 10);
+	ptr = strchr(config, 'N');
+	if (ptr)
+		nmi_sleep = simple_strtol(ptr+1, NULL, 10);
+	ptr = strchr(config, 'I');
+	if (ptr)
+		sstep_test = simple_strtol(ptr+1, NULL, 10);
+
+	/* All HW break point tests */
+	if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) {
+		hwbreaks_ok = 1;
+		v1printk("kgdbts:RUN hw breakpoint test\n");
+		run_breakpoint_test(1);
+		v1printk("kgdbts:RUN hw write breakpoint test\n");
+		run_hw_break_test(1);
+		v1printk("kgdbts:RUN access write breakpoint test\n");
+		run_hw_break_test(0);
+	}
+
+	/* required internal KGDB tests */
+	v1printk("kgdbts:RUN plant and detach test\n");
+	run_plant_and_detach_test(0);
+	v1printk("kgdbts:RUN sw breakpoint test\n");
+	run_breakpoint_test(0);
+	v1printk("kgdbts:RUN bad memory access test\n");
+	run_bad_read_test();
+	v1printk("kgdbts:RUN singlestep test %i iterations\n", sstep_test);
+	for (i = 0; i < sstep_test; i++) {
+		run_singlestep_break_test();
+		if (i % 100 == 0)
+			v1printk("kgdbts:RUN singlestep [%i/%i]\n",
+				 i, sstep_test);
+	}
+
+	/* ===Optional tests=== */
+
+	if (nmi_sleep) {
+		v1printk("kgdbts:RUN NMI sleep %i seconds test\n", nmi_sleep);
+		run_nmi_sleep_test(nmi_sleep);
+	}
+
+	/* If the kernel_clone test is run it will be the last test that is
+	 * executed because a kernel thread will be spawned at the very
+	 * end to unregister the debug hooks.
+	 */
+	if (clone_test) {
+		repeat_test = clone_test;
+		printk(KERN_INFO "kgdbts:RUN kernel_clone for %i breakpoints\n",
+			repeat_test);
+		kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
+		run_kernel_clone_test();
+		return;
+	}
+
+	/* If the sys_open test is run it will be the last test that is
+	 * executed because a kernel thread will be spawned at the very
+	 * end to unregister the debug hooks.
+	 */
+	if (do_sys_open_test) {
+		repeat_test = do_sys_open_test;
+		printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n",
+			repeat_test);
+		kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
+		run_sys_open_test();
+		return;
+	}
+	/* Shutdown and unregister */
+	kgdb_unregister_io_module(&kgdbts_io_ops);
+	configured = 0;
+}
+
+static int kgdbts_option_setup(char *opt)
+{
+	if (strlen(opt) >= MAX_CONFIG_LEN) {
+		printk(KERN_ERR "kgdbts: config string too long\n");
+		return 1;
+	}
+	strcpy(config, opt);
+	return 1;
+}
+
+__setup("kgdbts=", kgdbts_option_setup);
+
+static int configure_kgdbts(void)
+{
+	int err = 0;
+
+	if (!strlen(config) || isspace(config[0]))
+		goto noconfig;
+
+	final_ack = 0;
+	run_plant_and_detach_test(1);
+
+	err = kgdb_register_io_module(&kgdbts_io_ops);
+	if (err) {
+		configured = 0;
+		return err;
+	}
+	configured = 1;
+	kgdbts_run_tests();
+
+	return err;
+
+noconfig:
+	config[0] = 0;
+	configured = 0;
+
+	return err;
+}
+
+static int __init init_kgdbts(void)
+{
+	/* Already configured? */
+	if (configured == 1)
+		return 0;
+
+	return configure_kgdbts();
+}
+device_initcall(init_kgdbts);
+
+static int kgdbts_get_char(void)
+{
+	int val = 0;
+
+	if (ts.run_test)
+		val = ts.run_test(1, 0);
+
+	return val;
+}
+
+static void kgdbts_put_char(u8 chr)
+{
+	if (ts.run_test)
+		ts.run_test(0, chr);
+}
+
+static int param_set_kgdbts_var(const char *kmessage,
+				const struct kernel_param *kp)
+{
+	size_t len = strlen(kmessage);
+
+	if (len >= MAX_CONFIG_LEN) {
+		printk(KERN_ERR "kgdbts: config string too long\n");
+		return -ENOSPC;
+	}
+
+	/* Only copy in the string if the init function has not run yet */
+	if (configured < 0) {
+		strcpy(config, kmessage);
+		return 0;
+	}
+
+	if (configured == 1) {
+		printk(KERN_ERR "kgdbts: ERROR: Already configured and running.\n");
+		return -EBUSY;
+	}
+
+	strcpy(config, kmessage);
+	/* Chop out \n char as a result of echo */
+	if (len && config[len - 1] == '\n')
+		config[len - 1] = '\0';
+
+	/* Go and configure with the new params. */
+	return configure_kgdbts();
+}
+
+static void kgdbts_pre_exp_handler(void)
+{
+	/* Increment the module count when the debugger is active */
+	if (!kgdb_connected)
+		try_module_get(THIS_MODULE);
+}
+
+static void kgdbts_post_exp_handler(void)
+{
+	/* decrement the module count when the debugger detaches */
+	if (!kgdb_connected)
+		module_put(THIS_MODULE);
+}
+
+static struct kgdb_io kgdbts_io_ops = {
+	.name			= "kgdbts",
+	.read_char		= kgdbts_get_char,
+	.write_char		= kgdbts_put_char,
+	.pre_exception		= kgdbts_pre_exp_handler,
+	.post_exception		= kgdbts_post_exp_handler,
+};
+
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param here.
+ */
+module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644);
+MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]");
diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c
new file mode 100644
index 0000000000..bac4df2e52
--- /dev/null
+++ b/drivers/misc/lattice-ecp3-config.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2012 Stefan Roese <sr@denx.de>
+ */
+
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <asm/unaligned.h>
+
+#define FIRMWARE_NAME	"lattice-ecp3.bit"
+
+/*
+ * The JTAG ID's of the supported FPGA's. The ID is 32bit wide
+ * reversed as noted in the manual.
+ */
+#define ID_ECP3_17	0xc2088080
+#define ID_ECP3_35	0xc2048080
+
+/* FPGA commands */
+#define FPGA_CMD_READ_ID	0x07	/* plus 24 bits */
+#define FPGA_CMD_READ_STATUS	0x09	/* plus 24 bits */
+#define FPGA_CMD_CLEAR		0x70
+#define FPGA_CMD_REFRESH	0x71
+#define FPGA_CMD_WRITE_EN	0x4a	/* plus 2 bits */
+#define FPGA_CMD_WRITE_DIS	0x4f	/* plus 8 bits */
+#define FPGA_CMD_WRITE_INC	0x41	/* plus 0 bits */
+
+/*
+ * The status register is 32bit revered, DONE is bit 17 from the TN1222.pdf
+ * (LatticeECP3 Slave SPI Port User's Guide)
+ */
+#define FPGA_STATUS_DONE	0x00004000
+#define FPGA_STATUS_CLEARED	0x00010000
+
+#define FPGA_CLEAR_TIMEOUT	5000	/* max. 5000ms for FPGA clear */
+#define FPGA_CLEAR_MSLEEP	10
+#define FPGA_CLEAR_LOOP_COUNT	(FPGA_CLEAR_TIMEOUT / FPGA_CLEAR_MSLEEP)
+
+struct fpga_data {
+	struct completion fw_loaded;
+};
+
+struct ecp3_dev {
+	u32 jedec_id;
+	char *name;
+};
+
+static const struct ecp3_dev ecp3_dev[] = {
+	{
+		.jedec_id = ID_ECP3_17,
+		.name = "Lattice ECP3-17",
+	},
+	{
+		.jedec_id = ID_ECP3_35,
+		.name = "Lattice ECP3-35",
+	},
+};
+
+static void firmware_load(const struct firmware *fw, void *context)
+{
+	struct spi_device *spi = (struct spi_device *)context;
+	struct fpga_data *data = spi_get_drvdata(spi);
+	u8 *buffer;
+	u8 txbuf[8];
+	u8 rxbuf[8];
+	int rx_len = 8;
+	int i;
+	u32 jedec_id;
+	u32 status;
+
+	if (fw == NULL) {
+		dev_err(&spi->dev, "Cannot load firmware, aborting\n");
+		goto out;
+	}
+
+	if (fw->size == 0) {
+		dev_err(&spi->dev, "Error: Firmware size is 0!\n");
+		goto out;
+	}
+
+	/* Fill dummy data (24 stuffing bits for commands) */
+	txbuf[1] = 0x00;
+	txbuf[2] = 0x00;
+	txbuf[3] = 0x00;
+
+	/* Trying to speak with the FPGA via SPI... */
+	txbuf[0] = FPGA_CMD_READ_ID;
+	spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+	jedec_id = get_unaligned_be32(&rxbuf[4]);
+	dev_dbg(&spi->dev, "FPGA JTAG ID=%08x\n", jedec_id);
+
+	for (i = 0; i < ARRAY_SIZE(ecp3_dev); i++) {
+		if (jedec_id == ecp3_dev[i].jedec_id)
+			break;
+	}
+	if (i == ARRAY_SIZE(ecp3_dev)) {
+		dev_err(&spi->dev,
+			"Error: No supported FPGA detected (JEDEC_ID=%08x)!\n",
+			jedec_id);
+		goto out;
+	}
+
+	dev_info(&spi->dev, "FPGA %s detected\n", ecp3_dev[i].name);
+
+	txbuf[0] = FPGA_CMD_READ_STATUS;
+	spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+	status = get_unaligned_be32(&rxbuf[4]);
+	dev_dbg(&spi->dev, "FPGA Status=%08x\n", status);
+
+	buffer = kzalloc(fw->size + 8, GFP_KERNEL);
+	if (!buffer) {
+		dev_err(&spi->dev, "Error: Can't allocate memory!\n");
+		goto out;
+	}
+
+	/*
+	 * Insert WRITE_INC command into stream (one SPI frame)
+	 */
+	buffer[0] = FPGA_CMD_WRITE_INC;
+	buffer[1] = 0xff;
+	buffer[2] = 0xff;
+	buffer[3] = 0xff;
+	memcpy(buffer + 4, fw->data, fw->size);
+
+	txbuf[0] = FPGA_CMD_REFRESH;
+	spi_write(spi, txbuf, 4);
+
+	txbuf[0] = FPGA_CMD_WRITE_EN;
+	spi_write(spi, txbuf, 4);
+
+	txbuf[0] = FPGA_CMD_CLEAR;
+	spi_write(spi, txbuf, 4);
+
+	/*
+	 * Wait for FPGA memory to become cleared
+	 */
+	for (i = 0; i < FPGA_CLEAR_LOOP_COUNT; i++) {
+		txbuf[0] = FPGA_CMD_READ_STATUS;
+		spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+		status = get_unaligned_be32(&rxbuf[4]);
+		if (status == FPGA_STATUS_CLEARED)
+			break;
+
+		msleep(FPGA_CLEAR_MSLEEP);
+	}
+
+	if (i == FPGA_CLEAR_LOOP_COUNT) {
+		dev_err(&spi->dev,
+			"Error: Timeout waiting for FPGA to clear (status=%08x)!\n",
+			status);
+		kfree(buffer);
+		goto out;
+	}
+
+	dev_info(&spi->dev, "Configuring the FPGA...\n");
+	spi_write(spi, buffer, fw->size + 8);
+
+	txbuf[0] = FPGA_CMD_WRITE_DIS;
+	spi_write(spi, txbuf, 4);
+
+	txbuf[0] = FPGA_CMD_READ_STATUS;
+	spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len);
+	status = get_unaligned_be32(&rxbuf[4]);
+	dev_dbg(&spi->dev, "FPGA Status=%08x\n", status);
+
+	/* Check result */
+	if (status & FPGA_STATUS_DONE)
+		dev_info(&spi->dev, "FPGA successfully configured!\n");
+	else
+		dev_info(&spi->dev, "FPGA not configured (DONE not set)\n");
+
+	/*
+	 * Don't forget to release the firmware again
+	 */
+	release_firmware(fw);
+
+	kfree(buffer);
+out:
+	complete(&data->fw_loaded);
+}
+
+static int lattice_ecp3_probe(struct spi_device *spi)
+{
+	struct fpga_data *data;
+	int err;
+
+	data = devm_kzalloc(&spi->dev, sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		dev_err(&spi->dev, "Memory allocation for fpga_data failed\n");
+		return -ENOMEM;
+	}
+	spi_set_drvdata(spi, data);
+
+	init_completion(&data->fw_loaded);
+	err = request_firmware_nowait(THIS_MODULE, FW_ACTION_UEVENT,
+				      FIRMWARE_NAME, &spi->dev,
+				      GFP_KERNEL, spi, firmware_load);
+	if (err) {
+		dev_err(&spi->dev, "Firmware loading failed with %d!\n", err);
+		return err;
+	}
+
+	dev_info(&spi->dev, "FPGA bitstream configuration driver registered\n");
+
+	return 0;
+}
+
+static void lattice_ecp3_remove(struct spi_device *spi)
+{
+	struct fpga_data *data = spi_get_drvdata(spi);
+
+	wait_for_completion(&data->fw_loaded);
+}
+
+static const struct spi_device_id lattice_ecp3_id[] = {
+	{ "ecp3-17", 0 },
+	{ "ecp3-35", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, lattice_ecp3_id);
+
+static struct spi_driver lattice_ecp3_driver = {
+	.driver = {
+		.name = "lattice-ecp3",
+	},
+	.probe = lattice_ecp3_probe,
+	.remove = lattice_ecp3_remove,
+	.id_table = lattice_ecp3_id,
+};
+
+module_spi_driver(lattice_ecp3_driver);
+
+MODULE_AUTHOR("Stefan Roese <sr@denx.de>");
+MODULE_DESCRIPTION("Lattice ECP3 FPGA configuration via SPI");
+MODULE_LICENSE("GPL");
+MODULE_FIRMWARE(FIRMWARE_NAME);
diff --git a/drivers/misc/lis3lv02d/Kconfig b/drivers/misc/lis3lv02d/Kconfig
new file mode 100644
index 0000000000..bb2fec4b58
--- /dev/null
+++ b/drivers/misc/lis3lv02d/Kconfig
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# STMicroelectonics LIS3LV02D and similar accelerometers
+#
+
+config SENSORS_LIS3_SPI
+	tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (SPI)"
+	depends on !ACPI && SPI_MASTER && INPUT
+	select SENSORS_LIS3LV02D
+	help
+	  This driver provides support for the LIS3LV02Dx accelerometer connected
+	  via SPI. The accelerometer data is readable via
+	  /sys/devices/platform/lis3lv02d.
+
+	  This driver also provides an absolute input class device, allowing
+	  the laptop to act as a pinball machine-esque joystick.
+
+	  This driver can also be built as modules.  If so, the core module
+	  will be called lis3lv02d and a specific module for the SPI transport
+	  is called lis3lv02d_spi.
+
+config SENSORS_LIS3_I2C
+	tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (I2C)"
+	depends on I2C && INPUT
+	select SENSORS_LIS3LV02D
+	help
+	  This driver provides support for the LIS3LV02Dx accelerometer connected
+	  via I2C. The accelerometer data is readable via
+	  /sys/devices/platform/lis3lv02d.
+
+	  This driver also provides an absolute input class device, allowing
+	  the device to act as a pinball machine-esque joystick.
+
+	  This driver can also be built as modules.  If so, the core module
+	  will be called lis3lv02d and a specific module for the I2C transport
+	  is called lis3lv02d_i2c.
diff --git a/drivers/misc/lis3lv02d/Makefile b/drivers/misc/lis3lv02d/Makefile
new file mode 100644
index 0000000000..137e7020c1
--- /dev/null
+++ b/drivers/misc/lis3lv02d/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# STMicroelectonics LIS3LV02D and similar accelerometers
+#
+
+obj-$(CONFIG_SENSORS_LIS3LV02D) += lis3lv02d.o
+obj-$(CONFIG_SENSORS_LIS3_SPI)	+= lis3lv02d_spi.o
+obj-$(CONFIG_SENSORS_LIS3_I2C)	+= lis3lv02d_i2c.o
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
new file mode 100644
index 0000000000..49868a45c0
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -0,0 +1,1266 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  lis3lv02d.c - ST LIS3LV02DL accelerometer driver
+ *
+ *  Copyright (C) 2007-2008 Yan Burman
+ *  Copyright (C) 2008 Eric Piel
+ *  Copyright (C) 2008-2009 Pavel Machek
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/sched/signal.h>
+#include <linux/dmi.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/freezer.h>
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/pm_runtime.h>
+#include <linux/atomic.h>
+#include <linux/of.h>
+#include "lis3lv02d.h"
+
+#define DRIVER_NAME     "lis3lv02d"
+
+/* joystick device poll interval in milliseconds */
+#define MDPS_POLL_INTERVAL 50
+#define MDPS_POLL_MIN	   0
+#define MDPS_POLL_MAX	   2000
+
+#define LIS3_SYSFS_POWERDOWN_DELAY 5000 /* In milliseconds */
+
+#define SELFTEST_OK	       0
+#define SELFTEST_FAIL	       -1
+#define SELFTEST_IRQ	       -2
+
+#define IRQ_LINE0	       0
+#define IRQ_LINE1	       1
+
+/*
+ * The sensor can also generate interrupts (DRDY) but it's pretty pointless
+ * because they are generated even if the data do not change. So it's better
+ * to keep the interrupt for the free-fall event. The values are updated at
+ * 40Hz (at the lowest frequency), but as it can be pretty time consuming on
+ * some low processor, we poll the sensor only at 20Hz... enough for the
+ * joystick.
+ */
+
+#define LIS3_PWRON_DELAY_WAI_12B	(5000)
+#define LIS3_PWRON_DELAY_WAI_8B		(3000)
+
+/*
+ * LIS3LV02D spec says 1024 LSBs corresponds 1 G -> 1LSB is 1000/1024 mG
+ * LIS302D spec says: 18 mG / digit
+ * LIS3_ACCURACY is used to increase accuracy of the intermediate
+ * calculation results.
+ */
+#define LIS3_ACCURACY			1024
+/* Sensitivity values for -2G +2G scale */
+#define LIS3_SENSITIVITY_12B		((LIS3_ACCURACY * 1000) / 1024)
+#define LIS3_SENSITIVITY_8B		(18 * LIS3_ACCURACY)
+
+/*
+ * LIS331DLH spec says 1LSBs corresponds 4G/4096 -> 1LSB is 1000/1024 mG.
+ * Below macros defines sensitivity values for +/-2G. Dataout bits for
+ * +/-2G range is 12 bits so 4 bits adjustment must be done to get 12bit
+ * data from 16bit value. Currently this driver supports only 2G range.
+ */
+#define LIS3DLH_SENSITIVITY_2G		((LIS3_ACCURACY * 1000) / 1024)
+#define SHIFT_ADJ_2G			4
+
+#define LIS3_DEFAULT_FUZZ_12B		3
+#define LIS3_DEFAULT_FLAT_12B		3
+#define LIS3_DEFAULT_FUZZ_8B		1
+#define LIS3_DEFAULT_FLAT_8B		1
+
+struct lis3lv02d lis3_dev = {
+	.misc_wait   = __WAIT_QUEUE_HEAD_INITIALIZER(lis3_dev.misc_wait),
+};
+EXPORT_SYMBOL_GPL(lis3_dev);
+
+/* just like param_set_int() but does sanity-check so that it won't point
+ * over the axis array size
+ */
+static int param_set_axis(const char *val, const struct kernel_param *kp)
+{
+	int ret = param_set_int(val, kp);
+	if (!ret) {
+		int val = *(int *)kp->arg;
+		if (val < 0)
+			val = -val;
+		if (!val || val > 3)
+			return -EINVAL;
+	}
+	return ret;
+}
+
+static const struct kernel_param_ops param_ops_axis = {
+	.set = param_set_axis,
+	.get = param_get_int,
+};
+
+#define param_check_axis(name, p) param_check_int(name, p)
+
+module_param_array_named(axes, lis3_dev.ac.as_array, axis, NULL, 0644);
+MODULE_PARM_DESC(axes, "Axis-mapping for x,y,z directions");
+
+static s16 lis3lv02d_read_8(struct lis3lv02d *lis3, int reg)
+{
+	s8 lo;
+	if (lis3->read(lis3, reg, &lo) < 0)
+		return 0;
+
+	return lo;
+}
+
+static s16 lis3lv02d_read_12(struct lis3lv02d *lis3, int reg)
+{
+	u8 lo, hi;
+
+	lis3->read(lis3, reg - 1, &lo);
+	lis3->read(lis3, reg, &hi);
+	/* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */
+	return (s16)((hi << 8) | lo);
+}
+
+/* 12bits for 2G range, 13 bits for 4G range and 14 bits for 8G range */
+static s16 lis331dlh_read_data(struct lis3lv02d *lis3, int reg)
+{
+	u8 lo, hi;
+	int v;
+
+	lis3->read(lis3, reg - 1, &lo);
+	lis3->read(lis3, reg, &hi);
+	v = (int) ((hi << 8) | lo);
+
+	return (s16) v >> lis3->shift_adj;
+}
+
+/**
+ * lis3lv02d_get_axis - For the given axis, give the value converted
+ * @axis:      1,2,3 - can also be negative
+ * @hw_values: raw values returned by the hardware
+ *
+ * Returns the converted value.
+ */
+static inline int lis3lv02d_get_axis(s8 axis, int hw_values[3])
+{
+	if (axis > 0)
+		return hw_values[axis - 1];
+	else
+		return -hw_values[-axis - 1];
+}
+
+/**
+ * lis3lv02d_get_xyz - Get X, Y and Z axis values from the accelerometer
+ * @lis3: pointer to the device struct
+ * @x:    where to store the X axis value
+ * @y:    where to store the Y axis value
+ * @z:    where to store the Z axis value
+ *
+ * Note that 40Hz input device can eat up about 10% CPU at 800MHZ
+ */
+static void lis3lv02d_get_xyz(struct lis3lv02d *lis3, int *x, int *y, int *z)
+{
+	int position[3];
+	int i;
+
+	if (lis3->blkread) {
+		if (lis3->whoami == WAI_12B) {
+			u16 data[3];
+			lis3->blkread(lis3, OUTX_L, 6, (u8 *)data);
+			for (i = 0; i < 3; i++)
+				position[i] = (s16)le16_to_cpu(data[i]);
+		} else {
+			u8 data[5];
+			/* Data: x, dummy, y, dummy, z */
+			lis3->blkread(lis3, OUTX, 5, data);
+			for (i = 0; i < 3; i++)
+				position[i] = (s8)data[i * 2];
+		}
+	} else {
+		position[0] = lis3->read_data(lis3, OUTX);
+		position[1] = lis3->read_data(lis3, OUTY);
+		position[2] = lis3->read_data(lis3, OUTZ);
+	}
+
+	for (i = 0; i < 3; i++)
+		position[i] = (position[i] * lis3->scale) / LIS3_ACCURACY;
+
+	*x = lis3lv02d_get_axis(lis3->ac.x, position);
+	*y = lis3lv02d_get_axis(lis3->ac.y, position);
+	*z = lis3lv02d_get_axis(lis3->ac.z, position);
+}
+
+/* conversion btw sampling rate and the register values */
+static int lis3_12_rates[4] = {40, 160, 640, 2560};
+static int lis3_8_rates[2] = {100, 400};
+static int lis3_3dc_rates[16] = {0, 1, 10, 25, 50, 100, 200, 400, 1600, 5000};
+static int lis3_3dlh_rates[4] = {50, 100, 400, 1000};
+
+/* ODR is Output Data Rate */
+static int lis3lv02d_get_odr_index(struct lis3lv02d *lis3)
+{
+	u8 ctrl;
+	int shift;
+
+	lis3->read(lis3, CTRL_REG1, &ctrl);
+	ctrl &= lis3->odr_mask;
+	shift = ffs(lis3->odr_mask) - 1;
+	return (ctrl >> shift);
+}
+
+static int lis3lv02d_get_pwron_wait(struct lis3lv02d *lis3)
+{
+	int odr_idx = lis3lv02d_get_odr_index(lis3);
+	int div = lis3->odrs[odr_idx];
+
+	if (div == 0) {
+		if (odr_idx == 0) {
+			/* Power-down mode, not sampling no need to sleep */
+			return 0;
+		}
+
+		dev_err(&lis3->pdev->dev, "Error unknown odrs-index: %d\n", odr_idx);
+		return -ENXIO;
+	}
+
+	/* LIS3 power on delay is quite long */
+	msleep(lis3->pwron_delay / div);
+	return 0;
+}
+
+static int lis3lv02d_set_odr(struct lis3lv02d *lis3, int rate)
+{
+	u8 ctrl;
+	int i, len, shift;
+
+	if (!rate)
+		return -EINVAL;
+
+	lis3->read(lis3, CTRL_REG1, &ctrl);
+	ctrl &= ~lis3->odr_mask;
+	len = 1 << hweight_long(lis3->odr_mask); /* # of possible values */
+	shift = ffs(lis3->odr_mask) - 1;
+
+	for (i = 0; i < len; i++)
+		if (lis3->odrs[i] == rate) {
+			lis3->write(lis3, CTRL_REG1,
+					ctrl | (i << shift));
+			return 0;
+		}
+	return -EINVAL;
+}
+
+static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3])
+{
+	u8 ctlreg, reg;
+	s16 x, y, z;
+	u8 selftest;
+	int ret;
+	u8 ctrl_reg_data;
+	unsigned char irq_cfg;
+
+	mutex_lock(&lis3->mutex);
+
+	irq_cfg = lis3->irq_cfg;
+	if (lis3->whoami == WAI_8B) {
+		lis3->data_ready_count[IRQ_LINE0] = 0;
+		lis3->data_ready_count[IRQ_LINE1] = 0;
+
+		/* Change interrupt cfg to data ready for selftest */
+		atomic_inc(&lis3->wake_thread);
+		lis3->irq_cfg = LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY;
+		lis3->read(lis3, CTRL_REG3, &ctrl_reg_data);
+		lis3->write(lis3, CTRL_REG3, (ctrl_reg_data &
+				~(LIS3_IRQ1_MASK | LIS3_IRQ2_MASK)) |
+				(LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY));
+	}
+
+	if ((lis3->whoami == WAI_3DC) || (lis3->whoami == WAI_3DLH)) {
+		ctlreg = CTRL_REG4;
+		selftest = CTRL4_ST0;
+	} else {
+		ctlreg = CTRL_REG1;
+		if (lis3->whoami == WAI_12B)
+			selftest = CTRL1_ST;
+		else
+			selftest = CTRL1_STP;
+	}
+
+	lis3->read(lis3, ctlreg, &reg);
+	lis3->write(lis3, ctlreg, (reg | selftest));
+	ret = lis3lv02d_get_pwron_wait(lis3);
+	if (ret)
+		goto fail;
+
+	/* Read directly to avoid axis remap */
+	x = lis3->read_data(lis3, OUTX);
+	y = lis3->read_data(lis3, OUTY);
+	z = lis3->read_data(lis3, OUTZ);
+
+	/* back to normal settings */
+	lis3->write(lis3, ctlreg, reg);
+	ret = lis3lv02d_get_pwron_wait(lis3);
+	if (ret)
+		goto fail;
+
+	results[0] = x - lis3->read_data(lis3, OUTX);
+	results[1] = y - lis3->read_data(lis3, OUTY);
+	results[2] = z - lis3->read_data(lis3, OUTZ);
+
+	ret = 0;
+
+	if (lis3->whoami == WAI_8B) {
+		/* Restore original interrupt configuration */
+		atomic_dec(&lis3->wake_thread);
+		lis3->write(lis3, CTRL_REG3, ctrl_reg_data);
+		lis3->irq_cfg = irq_cfg;
+
+		if ((irq_cfg & LIS3_IRQ1_MASK) &&
+			lis3->data_ready_count[IRQ_LINE0] < 2) {
+			ret = SELFTEST_IRQ;
+			goto fail;
+		}
+
+		if ((irq_cfg & LIS3_IRQ2_MASK) &&
+			lis3->data_ready_count[IRQ_LINE1] < 2) {
+			ret = SELFTEST_IRQ;
+			goto fail;
+		}
+	}
+
+	if (lis3->pdata) {
+		int i;
+		for (i = 0; i < 3; i++) {
+			/* Check against selftest acceptance limits */
+			if ((results[i] < lis3->pdata->st_min_limits[i]) ||
+			    (results[i] > lis3->pdata->st_max_limits[i])) {
+				ret = SELFTEST_FAIL;
+				goto fail;
+			}
+		}
+	}
+
+	/* test passed */
+fail:
+	mutex_unlock(&lis3->mutex);
+	return ret;
+}
+
+/*
+ * Order of registers in the list affects to order of the restore process.
+ * Perhaps it is a good idea to set interrupt enable register as a last one
+ * after all other configurations
+ */
+static u8 lis3_wai8_regs[] = { FF_WU_CFG_1, FF_WU_THS_1, FF_WU_DURATION_1,
+			       FF_WU_CFG_2, FF_WU_THS_2, FF_WU_DURATION_2,
+			       CLICK_CFG, CLICK_SRC, CLICK_THSY_X, CLICK_THSZ,
+			       CLICK_TIMELIMIT, CLICK_LATENCY, CLICK_WINDOW,
+			       CTRL_REG1, CTRL_REG2, CTRL_REG3};
+
+static u8 lis3_wai12_regs[] = {FF_WU_CFG, FF_WU_THS_L, FF_WU_THS_H,
+			       FF_WU_DURATION, DD_CFG, DD_THSI_L, DD_THSI_H,
+			       DD_THSE_L, DD_THSE_H,
+			       CTRL_REG1, CTRL_REG3, CTRL_REG2};
+
+static inline void lis3_context_save(struct lis3lv02d *lis3)
+{
+	int i;
+	for (i = 0; i < lis3->regs_size; i++)
+		lis3->read(lis3, lis3->regs[i], &lis3->reg_cache[i]);
+	lis3->regs_stored = true;
+}
+
+static inline void lis3_context_restore(struct lis3lv02d *lis3)
+{
+	int i;
+	if (lis3->regs_stored)
+		for (i = 0; i < lis3->regs_size; i++)
+			lis3->write(lis3, lis3->regs[i], lis3->reg_cache[i]);
+}
+
+void lis3lv02d_poweroff(struct lis3lv02d *lis3)
+{
+	if (lis3->reg_ctrl)
+		lis3_context_save(lis3);
+	/* disable X,Y,Z axis and power down */
+	lis3->write(lis3, CTRL_REG1, 0x00);
+	if (lis3->reg_ctrl)
+		lis3->reg_ctrl(lis3, LIS3_REG_OFF);
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_poweroff);
+
+int lis3lv02d_poweron(struct lis3lv02d *lis3)
+{
+	int err;
+	u8 reg;
+
+	lis3->init(lis3);
+
+	/*
+	 * Common configuration
+	 * BDU: (12 bits sensors only) LSB and MSB values are not updated until
+	 *      both have been read. So the value read will always be correct.
+	 * Set BOOT bit to refresh factory tuning values.
+	 */
+	if (lis3->pdata) {
+		lis3->read(lis3, CTRL_REG2, &reg);
+		if (lis3->whoami ==  WAI_12B)
+			reg |= CTRL2_BDU | CTRL2_BOOT;
+		else if (lis3->whoami ==  WAI_3DLH)
+			reg |= CTRL2_BOOT_3DLH;
+		else
+			reg |= CTRL2_BOOT_8B;
+		lis3->write(lis3, CTRL_REG2, reg);
+
+		if (lis3->whoami ==  WAI_3DLH) {
+			lis3->read(lis3, CTRL_REG4, &reg);
+			reg |= CTRL4_BDU;
+			lis3->write(lis3, CTRL_REG4, reg);
+		}
+	}
+
+	err = lis3lv02d_get_pwron_wait(lis3);
+	if (err)
+		return err;
+
+	if (lis3->reg_ctrl)
+		lis3_context_restore(lis3);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_poweron);
+
+
+static void lis3lv02d_joystick_poll(struct input_dev *input)
+{
+	struct lis3lv02d *lis3 = input_get_drvdata(input);
+	int x, y, z;
+
+	mutex_lock(&lis3->mutex);
+	lis3lv02d_get_xyz(lis3, &x, &y, &z);
+	input_report_abs(input, ABS_X, x);
+	input_report_abs(input, ABS_Y, y);
+	input_report_abs(input, ABS_Z, z);
+	input_sync(input);
+	mutex_unlock(&lis3->mutex);
+}
+
+static int lis3lv02d_joystick_open(struct input_dev *input)
+{
+	struct lis3lv02d *lis3 = input_get_drvdata(input);
+
+	if (lis3->pm_dev)
+		pm_runtime_get_sync(lis3->pm_dev);
+
+	if (lis3->pdata && lis3->whoami == WAI_8B && lis3->idev)
+		atomic_set(&lis3->wake_thread, 1);
+	/*
+	 * Update coordinates for the case where poll interval is 0 and
+	 * the chip in running purely under interrupt control
+	 */
+	lis3lv02d_joystick_poll(input);
+
+	return 0;
+}
+
+static void lis3lv02d_joystick_close(struct input_dev *input)
+{
+	struct lis3lv02d *lis3 = input_get_drvdata(input);
+
+	atomic_set(&lis3->wake_thread, 0);
+	if (lis3->pm_dev)
+		pm_runtime_put(lis3->pm_dev);
+}
+
+static irqreturn_t lis302dl_interrupt(int irq, void *data)
+{
+	struct lis3lv02d *lis3 = data;
+
+	if (!test_bit(0, &lis3->misc_opened))
+		goto out;
+
+	/*
+	 * Be careful: on some HP laptops the bios force DD when on battery and
+	 * the lid is closed. This leads to interrupts as soon as a little move
+	 * is done.
+	 */
+	atomic_inc(&lis3->count);
+
+	wake_up_interruptible(&lis3->misc_wait);
+	kill_fasync(&lis3->async_queue, SIGIO, POLL_IN);
+out:
+	if (atomic_read(&lis3->wake_thread))
+		return IRQ_WAKE_THREAD;
+	return IRQ_HANDLED;
+}
+
+static void lis302dl_interrupt_handle_click(struct lis3lv02d *lis3)
+{
+	struct input_dev *dev = lis3->idev;
+	u8 click_src;
+
+	mutex_lock(&lis3->mutex);
+	lis3->read(lis3, CLICK_SRC, &click_src);
+
+	if (click_src & CLICK_SINGLE_X) {
+		input_report_key(dev, lis3->mapped_btns[0], 1);
+		input_report_key(dev, lis3->mapped_btns[0], 0);
+	}
+
+	if (click_src & CLICK_SINGLE_Y) {
+		input_report_key(dev, lis3->mapped_btns[1], 1);
+		input_report_key(dev, lis3->mapped_btns[1], 0);
+	}
+
+	if (click_src & CLICK_SINGLE_Z) {
+		input_report_key(dev, lis3->mapped_btns[2], 1);
+		input_report_key(dev, lis3->mapped_btns[2], 0);
+	}
+	input_sync(dev);
+	mutex_unlock(&lis3->mutex);
+}
+
+static inline void lis302dl_data_ready(struct lis3lv02d *lis3, int index)
+{
+	int dummy;
+
+	/* Dummy read to ack interrupt */
+	lis3lv02d_get_xyz(lis3, &dummy, &dummy, &dummy);
+	lis3->data_ready_count[index]++;
+}
+
+static irqreturn_t lis302dl_interrupt_thread1_8b(int irq, void *data)
+{
+	struct lis3lv02d *lis3 = data;
+	u8 irq_cfg = lis3->irq_cfg & LIS3_IRQ1_MASK;
+
+	if (irq_cfg == LIS3_IRQ1_CLICK)
+		lis302dl_interrupt_handle_click(lis3);
+	else if (unlikely(irq_cfg == LIS3_IRQ1_DATA_READY))
+		lis302dl_data_ready(lis3, IRQ_LINE0);
+	else
+		lis3lv02d_joystick_poll(lis3->idev);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t lis302dl_interrupt_thread2_8b(int irq, void *data)
+{
+	struct lis3lv02d *lis3 = data;
+	u8 irq_cfg = lis3->irq_cfg & LIS3_IRQ2_MASK;
+
+	if (irq_cfg == LIS3_IRQ2_CLICK)
+		lis302dl_interrupt_handle_click(lis3);
+	else if (unlikely(irq_cfg == LIS3_IRQ2_DATA_READY))
+		lis302dl_data_ready(lis3, IRQ_LINE1);
+	else
+		lis3lv02d_joystick_poll(lis3->idev);
+
+	return IRQ_HANDLED;
+}
+
+static int lis3lv02d_misc_open(struct inode *inode, struct file *file)
+{
+	struct lis3lv02d *lis3 = container_of(file->private_data,
+					      struct lis3lv02d, miscdev);
+
+	if (test_and_set_bit(0, &lis3->misc_opened))
+		return -EBUSY; /* already open */
+
+	if (lis3->pm_dev)
+		pm_runtime_get_sync(lis3->pm_dev);
+
+	atomic_set(&lis3->count, 0);
+	return 0;
+}
+
+static int lis3lv02d_misc_release(struct inode *inode, struct file *file)
+{
+	struct lis3lv02d *lis3 = container_of(file->private_data,
+					      struct lis3lv02d, miscdev);
+
+	clear_bit(0, &lis3->misc_opened); /* release the device */
+	if (lis3->pm_dev)
+		pm_runtime_put(lis3->pm_dev);
+	return 0;
+}
+
+static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf,
+				size_t count, loff_t *pos)
+{
+	struct lis3lv02d *lis3 = container_of(file->private_data,
+					      struct lis3lv02d, miscdev);
+
+	DECLARE_WAITQUEUE(wait, current);
+	u32 data;
+	unsigned char byte_data;
+	ssize_t retval = 1;
+
+	if (count < 1)
+		return -EINVAL;
+
+	add_wait_queue(&lis3->misc_wait, &wait);
+	while (true) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		data = atomic_xchg(&lis3->count, 0);
+		if (data)
+			break;
+
+		if (file->f_flags & O_NONBLOCK) {
+			retval = -EAGAIN;
+			goto out;
+		}
+
+		if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			goto out;
+		}
+
+		schedule();
+	}
+
+	if (data < 255)
+		byte_data = data;
+	else
+		byte_data = 255;
+
+	/* make sure we are not going into copy_to_user() with
+	 * TASK_INTERRUPTIBLE state */
+	set_current_state(TASK_RUNNING);
+	if (copy_to_user(buf, &byte_data, sizeof(byte_data)))
+		retval = -EFAULT;
+
+out:
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&lis3->misc_wait, &wait);
+
+	return retval;
+}
+
+static __poll_t lis3lv02d_misc_poll(struct file *file, poll_table *wait)
+{
+	struct lis3lv02d *lis3 = container_of(file->private_data,
+					      struct lis3lv02d, miscdev);
+
+	poll_wait(file, &lis3->misc_wait, wait);
+	if (atomic_read(&lis3->count))
+		return EPOLLIN | EPOLLRDNORM;
+	return 0;
+}
+
+static int lis3lv02d_misc_fasync(int fd, struct file *file, int on)
+{
+	struct lis3lv02d *lis3 = container_of(file->private_data,
+					      struct lis3lv02d, miscdev);
+
+	return fasync_helper(fd, file, on, &lis3->async_queue);
+}
+
+static const struct file_operations lis3lv02d_misc_fops = {
+	.owner   = THIS_MODULE,
+	.llseek  = no_llseek,
+	.read    = lis3lv02d_misc_read,
+	.open    = lis3lv02d_misc_open,
+	.release = lis3lv02d_misc_release,
+	.poll    = lis3lv02d_misc_poll,
+	.fasync  = lis3lv02d_misc_fasync,
+};
+
+int lis3lv02d_joystick_enable(struct lis3lv02d *lis3)
+{
+	struct input_dev *input_dev;
+	int err;
+	int max_val, fuzz, flat;
+	int btns[] = {BTN_X, BTN_Y, BTN_Z};
+
+	if (lis3->idev)
+		return -EINVAL;
+
+	input_dev = input_allocate_device();
+	if (!input_dev)
+		return -ENOMEM;
+
+	input_dev->name       = "ST LIS3LV02DL Accelerometer";
+	input_dev->phys       = DRIVER_NAME "/input0";
+	input_dev->id.bustype = BUS_HOST;
+	input_dev->id.vendor  = 0;
+	input_dev->dev.parent = &lis3->pdev->dev;
+
+	input_dev->open = lis3lv02d_joystick_open;
+	input_dev->close = lis3lv02d_joystick_close;
+
+	max_val = (lis3->mdps_max_val * lis3->scale) / LIS3_ACCURACY;
+	if (lis3->whoami == WAI_12B) {
+		fuzz = LIS3_DEFAULT_FUZZ_12B;
+		flat = LIS3_DEFAULT_FLAT_12B;
+	} else {
+		fuzz = LIS3_DEFAULT_FUZZ_8B;
+		flat = LIS3_DEFAULT_FLAT_8B;
+	}
+	fuzz = (fuzz * lis3->scale) / LIS3_ACCURACY;
+	flat = (flat * lis3->scale) / LIS3_ACCURACY;
+
+	input_set_abs_params(input_dev, ABS_X, -max_val, max_val, fuzz, flat);
+	input_set_abs_params(input_dev, ABS_Y, -max_val, max_val, fuzz, flat);
+	input_set_abs_params(input_dev, ABS_Z, -max_val, max_val, fuzz, flat);
+
+	input_set_drvdata(input_dev, lis3);
+	lis3->idev = input_dev;
+
+	err = input_setup_polling(input_dev, lis3lv02d_joystick_poll);
+	if (err)
+		goto err_free_input;
+
+	input_set_poll_interval(input_dev, MDPS_POLL_INTERVAL);
+	input_set_min_poll_interval(input_dev, MDPS_POLL_MIN);
+	input_set_max_poll_interval(input_dev, MDPS_POLL_MAX);
+
+	lis3->mapped_btns[0] = lis3lv02d_get_axis(abs(lis3->ac.x), btns);
+	lis3->mapped_btns[1] = lis3lv02d_get_axis(abs(lis3->ac.y), btns);
+	lis3->mapped_btns[2] = lis3lv02d_get_axis(abs(lis3->ac.z), btns);
+
+	err = input_register_device(lis3->idev);
+	if (err)
+		goto err_free_input;
+
+	return 0;
+
+err_free_input:
+	input_free_device(input_dev);
+	lis3->idev = NULL;
+	return err;
+
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable);
+
+void lis3lv02d_joystick_disable(struct lis3lv02d *lis3)
+{
+	if (lis3->irq)
+		free_irq(lis3->irq, lis3);
+	if (lis3->pdata && lis3->pdata->irq2)
+		free_irq(lis3->pdata->irq2, lis3);
+
+	if (!lis3->idev)
+		return;
+
+	if (lis3->irq)
+		misc_deregister(&lis3->miscdev);
+	input_unregister_device(lis3->idev);
+	lis3->idev = NULL;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable);
+
+/* Sysfs stuff */
+static void lis3lv02d_sysfs_poweron(struct lis3lv02d *lis3)
+{
+	/*
+	 * SYSFS functions are fast visitors so put-call
+	 * immediately after the get-call. However, keep
+	 * chip running for a while and schedule delayed
+	 * suspend. This way periodic sysfs calls doesn't
+	 * suffer from relatively long power up time.
+	 */
+
+	if (lis3->pm_dev) {
+		pm_runtime_get_sync(lis3->pm_dev);
+		pm_runtime_put_noidle(lis3->pm_dev);
+		pm_schedule_suspend(lis3->pm_dev, LIS3_SYSFS_POWERDOWN_DELAY);
+	}
+}
+
+static ssize_t lis3lv02d_selftest_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+	s16 values[3];
+
+	static const char ok[] = "OK";
+	static const char fail[] = "FAIL";
+	static const char irq[] = "FAIL_IRQ";
+	const char *res;
+
+	lis3lv02d_sysfs_poweron(lis3);
+	switch (lis3lv02d_selftest(lis3, values)) {
+	case SELFTEST_FAIL:
+		res = fail;
+		break;
+	case SELFTEST_IRQ:
+		res = irq;
+		break;
+	case SELFTEST_OK:
+	default:
+		res = ok;
+		break;
+	}
+	return sprintf(buf, "%s %d %d %d\n", res,
+		values[0], values[1], values[2]);
+}
+
+static ssize_t lis3lv02d_position_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+	int x, y, z;
+
+	lis3lv02d_sysfs_poweron(lis3);
+	mutex_lock(&lis3->mutex);
+	lis3lv02d_get_xyz(lis3, &x, &y, &z);
+	mutex_unlock(&lis3->mutex);
+	return sprintf(buf, "(%d,%d,%d)\n", x, y, z);
+}
+
+static ssize_t lis3lv02d_rate_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+	int odr_idx;
+
+	lis3lv02d_sysfs_poweron(lis3);
+
+	odr_idx = lis3lv02d_get_odr_index(lis3);
+	return sprintf(buf, "%d\n", lis3->odrs[odr_idx]);
+}
+
+static ssize_t lis3lv02d_rate_set(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct lis3lv02d *lis3 = dev_get_drvdata(dev);
+	unsigned long rate;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &rate);
+	if (ret)
+		return ret;
+
+	lis3lv02d_sysfs_poweron(lis3);
+	if (lis3lv02d_set_odr(lis3, rate))
+		return -EINVAL;
+
+	return count;
+}
+
+static DEVICE_ATTR(selftest, S_IRUSR, lis3lv02d_selftest_show, NULL);
+static DEVICE_ATTR(position, S_IRUGO, lis3lv02d_position_show, NULL);
+static DEVICE_ATTR(rate, S_IRUGO | S_IWUSR, lis3lv02d_rate_show,
+					    lis3lv02d_rate_set);
+
+static struct attribute *lis3lv02d_attributes[] = {
+	&dev_attr_selftest.attr,
+	&dev_attr_position.attr,
+	&dev_attr_rate.attr,
+	NULL
+};
+
+static const struct attribute_group lis3lv02d_attribute_group = {
+	.attrs = lis3lv02d_attributes
+};
+
+
+static int lis3lv02d_add_fs(struct lis3lv02d *lis3)
+{
+	lis3->pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0);
+	if (IS_ERR(lis3->pdev))
+		return PTR_ERR(lis3->pdev);
+
+	platform_set_drvdata(lis3->pdev, lis3);
+	return sysfs_create_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group);
+}
+
+void lis3lv02d_remove_fs(struct lis3lv02d *lis3)
+{
+	sysfs_remove_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group);
+	platform_device_unregister(lis3->pdev);
+	if (lis3->pm_dev) {
+		/* Barrier after the sysfs remove */
+		pm_runtime_barrier(lis3->pm_dev);
+
+		/* SYSFS may have left chip running. Turn off if necessary */
+		if (!pm_runtime_suspended(lis3->pm_dev))
+			lis3lv02d_poweroff(lis3);
+
+		pm_runtime_disable(lis3->pm_dev);
+		pm_runtime_set_suspended(lis3->pm_dev);
+	}
+	kfree(lis3->reg_cache);
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_remove_fs);
+
+static void lis3lv02d_8b_configure(struct lis3lv02d *lis3,
+				struct lis3lv02d_platform_data *p)
+{
+	int err;
+	int ctrl2 = p->hipass_ctrl;
+
+	if (p->click_flags) {
+		lis3->write(lis3, CLICK_CFG, p->click_flags);
+		lis3->write(lis3, CLICK_TIMELIMIT, p->click_time_limit);
+		lis3->write(lis3, CLICK_LATENCY, p->click_latency);
+		lis3->write(lis3, CLICK_WINDOW, p->click_window);
+		lis3->write(lis3, CLICK_THSZ, p->click_thresh_z & 0xf);
+		lis3->write(lis3, CLICK_THSY_X,
+			(p->click_thresh_x & 0xf) |
+			(p->click_thresh_y << 4));
+
+		if (lis3->idev) {
+			input_set_capability(lis3->idev, EV_KEY, BTN_X);
+			input_set_capability(lis3->idev, EV_KEY, BTN_Y);
+			input_set_capability(lis3->idev, EV_KEY, BTN_Z);
+		}
+	}
+
+	if (p->wakeup_flags) {
+		lis3->write(lis3, FF_WU_CFG_1, p->wakeup_flags);
+		lis3->write(lis3, FF_WU_THS_1, p->wakeup_thresh & 0x7f);
+		/* pdata value + 1 to keep this backward compatible*/
+		lis3->write(lis3, FF_WU_DURATION_1, p->duration1 + 1);
+		ctrl2 ^= HP_FF_WU1; /* Xor to keep compatible with old pdata*/
+	}
+
+	if (p->wakeup_flags2) {
+		lis3->write(lis3, FF_WU_CFG_2, p->wakeup_flags2);
+		lis3->write(lis3, FF_WU_THS_2, p->wakeup_thresh2 & 0x7f);
+		/* pdata value + 1 to keep this backward compatible*/
+		lis3->write(lis3, FF_WU_DURATION_2, p->duration2 + 1);
+		ctrl2 ^= HP_FF_WU2; /* Xor to keep compatible with old pdata*/
+	}
+	/* Configure hipass filters */
+	lis3->write(lis3, CTRL_REG2, ctrl2);
+
+	if (p->irq2) {
+		err = request_threaded_irq(p->irq2,
+					NULL,
+					lis302dl_interrupt_thread2_8b,
+					IRQF_TRIGGER_RISING | IRQF_ONESHOT |
+					(p->irq_flags2 & IRQF_TRIGGER_MASK),
+					DRIVER_NAME, lis3);
+		if (err < 0)
+			pr_err("No second IRQ. Limited functionality\n");
+	}
+}
+
+#ifdef CONFIG_OF
+int lis3lv02d_init_dt(struct lis3lv02d *lis3)
+{
+	struct lis3lv02d_platform_data *pdata;
+	struct device_node *np = lis3->of_node;
+	u32 val;
+	s32 sval;
+
+	if (!lis3->of_node)
+		return 0;
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	if (of_property_read_bool(np, "st,click-single-x"))
+		pdata->click_flags |= LIS3_CLICK_SINGLE_X;
+	if (of_property_read_bool(np, "st,click-double-x"))
+		pdata->click_flags |= LIS3_CLICK_DOUBLE_X;
+
+	if (of_property_read_bool(np, "st,click-single-y"))
+		pdata->click_flags |= LIS3_CLICK_SINGLE_Y;
+	if (of_property_read_bool(np, "st,click-double-y"))
+		pdata->click_flags |= LIS3_CLICK_DOUBLE_Y;
+
+	if (of_property_read_bool(np, "st,click-single-z"))
+		pdata->click_flags |= LIS3_CLICK_SINGLE_Z;
+	if (of_property_read_bool(np, "st,click-double-z"))
+		pdata->click_flags |= LIS3_CLICK_DOUBLE_Z;
+
+	if (!of_property_read_u32(np, "st,click-threshold-x", &val))
+		pdata->click_thresh_x = val;
+	if (!of_property_read_u32(np, "st,click-threshold-y", &val))
+		pdata->click_thresh_y = val;
+	if (!of_property_read_u32(np, "st,click-threshold-z", &val))
+		pdata->click_thresh_z = val;
+
+	if (!of_property_read_u32(np, "st,click-time-limit", &val))
+		pdata->click_time_limit = val;
+	if (!of_property_read_u32(np, "st,click-latency", &val))
+		pdata->click_latency = val;
+	if (!of_property_read_u32(np, "st,click-window", &val))
+		pdata->click_window = val;
+
+	if (of_property_read_bool(np, "st,irq1-disable"))
+		pdata->irq_cfg |= LIS3_IRQ1_DISABLE;
+	if (of_property_read_bool(np, "st,irq1-ff-wu-1"))
+		pdata->irq_cfg |= LIS3_IRQ1_FF_WU_1;
+	if (of_property_read_bool(np, "st,irq1-ff-wu-2"))
+		pdata->irq_cfg |= LIS3_IRQ1_FF_WU_2;
+	if (of_property_read_bool(np, "st,irq1-data-ready"))
+		pdata->irq_cfg |= LIS3_IRQ1_DATA_READY;
+	if (of_property_read_bool(np, "st,irq1-click"))
+		pdata->irq_cfg |= LIS3_IRQ1_CLICK;
+
+	if (of_property_read_bool(np, "st,irq2-disable"))
+		pdata->irq_cfg |= LIS3_IRQ2_DISABLE;
+	if (of_property_read_bool(np, "st,irq2-ff-wu-1"))
+		pdata->irq_cfg |= LIS3_IRQ2_FF_WU_1;
+	if (of_property_read_bool(np, "st,irq2-ff-wu-2"))
+		pdata->irq_cfg |= LIS3_IRQ2_FF_WU_2;
+	if (of_property_read_bool(np, "st,irq2-data-ready"))
+		pdata->irq_cfg |= LIS3_IRQ2_DATA_READY;
+	if (of_property_read_bool(np, "st,irq2-click"))
+		pdata->irq_cfg |= LIS3_IRQ2_CLICK;
+
+	if (of_property_read_bool(np, "st,irq-open-drain"))
+		pdata->irq_cfg |= LIS3_IRQ_OPEN_DRAIN;
+	if (of_property_read_bool(np, "st,irq-active-low"))
+		pdata->irq_cfg |= LIS3_IRQ_ACTIVE_LOW;
+
+	if (!of_property_read_u32(np, "st,wu-duration-1", &val))
+		pdata->duration1 = val;
+	if (!of_property_read_u32(np, "st,wu-duration-2", &val))
+		pdata->duration2 = val;
+
+	if (of_property_read_bool(np, "st,wakeup-x-lo"))
+		pdata->wakeup_flags |= LIS3_WAKEUP_X_LO;
+	if (of_property_read_bool(np, "st,wakeup-x-hi"))
+		pdata->wakeup_flags |= LIS3_WAKEUP_X_HI;
+	if (of_property_read_bool(np, "st,wakeup-y-lo"))
+		pdata->wakeup_flags |= LIS3_WAKEUP_Y_LO;
+	if (of_property_read_bool(np, "st,wakeup-y-hi"))
+		pdata->wakeup_flags |= LIS3_WAKEUP_Y_HI;
+	if (of_property_read_bool(np, "st,wakeup-z-lo"))
+		pdata->wakeup_flags |= LIS3_WAKEUP_Z_LO;
+	if (of_property_read_bool(np, "st,wakeup-z-hi"))
+		pdata->wakeup_flags |= LIS3_WAKEUP_Z_HI;
+	if (of_get_property(np, "st,wakeup-threshold", &val))
+		pdata->wakeup_thresh = val;
+
+	if (of_property_read_bool(np, "st,wakeup2-x-lo"))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_X_LO;
+	if (of_property_read_bool(np, "st,wakeup2-x-hi"))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_X_HI;
+	if (of_property_read_bool(np, "st,wakeup2-y-lo"))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_Y_LO;
+	if (of_property_read_bool(np, "st,wakeup2-y-hi"))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_Y_HI;
+	if (of_property_read_bool(np, "st,wakeup2-z-lo"))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_Z_LO;
+	if (of_property_read_bool(np, "st,wakeup2-z-hi"))
+		pdata->wakeup_flags2 |= LIS3_WAKEUP_Z_HI;
+	if (of_get_property(np, "st,wakeup2-threshold", &val))
+		pdata->wakeup_thresh2 = val;
+
+	if (!of_property_read_u32(np, "st,highpass-cutoff-hz", &val)) {
+		switch (val) {
+		case 1:
+			pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_1HZ;
+			break;
+		case 2:
+			pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_2HZ;
+			break;
+		case 4:
+			pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_4HZ;
+			break;
+		case 8:
+			pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_8HZ;
+			break;
+		}
+	}
+
+	if (of_property_read_bool(np, "st,hipass1-disable"))
+		pdata->hipass_ctrl |= LIS3_HIPASS1_DISABLE;
+	if (of_property_read_bool(np, "st,hipass2-disable"))
+		pdata->hipass_ctrl |= LIS3_HIPASS2_DISABLE;
+
+	if (of_property_read_s32(np, "st,axis-x", &sval) == 0)
+		pdata->axis_x = sval;
+	if (of_property_read_s32(np, "st,axis-y", &sval) == 0)
+		pdata->axis_y = sval;
+	if (of_property_read_s32(np, "st,axis-z", &sval) == 0)
+		pdata->axis_z = sval;
+
+	if (of_property_read_u32(np, "st,default-rate", &val) == 0)
+		pdata->default_rate = val;
+
+	if (of_property_read_s32(np, "st,min-limit-x", &sval) == 0)
+		pdata->st_min_limits[0] = sval;
+	if (of_property_read_s32(np, "st,min-limit-y", &sval) == 0)
+		pdata->st_min_limits[1] = sval;
+	if (of_property_read_s32(np, "st,min-limit-z", &sval) == 0)
+		pdata->st_min_limits[2] = sval;
+
+	if (of_property_read_s32(np, "st,max-limit-x", &sval) == 0)
+		pdata->st_max_limits[0] = sval;
+	if (of_property_read_s32(np, "st,max-limit-y", &sval) == 0)
+		pdata->st_max_limits[1] = sval;
+	if (of_property_read_s32(np, "st,max-limit-z", &sval) == 0)
+		pdata->st_max_limits[2] = sval;
+
+
+	lis3->pdata = pdata;
+
+	return 0;
+}
+
+#else
+int lis3lv02d_init_dt(struct lis3lv02d *lis3)
+{
+	return 0;
+}
+#endif
+EXPORT_SYMBOL_GPL(lis3lv02d_init_dt);
+
+/*
+ * Initialise the accelerometer and the various subsystems.
+ * Should be rather independent of the bus system.
+ */
+int lis3lv02d_init_device(struct lis3lv02d *lis3)
+{
+	int err;
+	irq_handler_t thread_fn;
+	int irq_flags = 0;
+
+	lis3->whoami = lis3lv02d_read_8(lis3, WHO_AM_I);
+
+	switch (lis3->whoami) {
+	case WAI_12B:
+		pr_info("12 bits sensor found\n");
+		lis3->read_data = lis3lv02d_read_12;
+		lis3->mdps_max_val = 2048;
+		lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_12B;
+		lis3->odrs = lis3_12_rates;
+		lis3->odr_mask = CTRL1_DF0 | CTRL1_DF1;
+		lis3->scale = LIS3_SENSITIVITY_12B;
+		lis3->regs = lis3_wai12_regs;
+		lis3->regs_size = ARRAY_SIZE(lis3_wai12_regs);
+		break;
+	case WAI_8B:
+		pr_info("8 bits sensor found\n");
+		lis3->read_data = lis3lv02d_read_8;
+		lis3->mdps_max_val = 128;
+		lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B;
+		lis3->odrs = lis3_8_rates;
+		lis3->odr_mask = CTRL1_DR;
+		lis3->scale = LIS3_SENSITIVITY_8B;
+		lis3->regs = lis3_wai8_regs;
+		lis3->regs_size = ARRAY_SIZE(lis3_wai8_regs);
+		break;
+	case WAI_3DC:
+		pr_info("8 bits 3DC sensor found\n");
+		lis3->read_data = lis3lv02d_read_8;
+		lis3->mdps_max_val = 128;
+		lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B;
+		lis3->odrs = lis3_3dc_rates;
+		lis3->odr_mask = CTRL1_ODR0|CTRL1_ODR1|CTRL1_ODR2|CTRL1_ODR3;
+		lis3->scale = LIS3_SENSITIVITY_8B;
+		break;
+	case WAI_3DLH:
+		pr_info("16 bits lis331dlh sensor found\n");
+		lis3->read_data = lis331dlh_read_data;
+		lis3->mdps_max_val = 2048; /* 12 bits for 2G */
+		lis3->shift_adj = SHIFT_ADJ_2G;
+		lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B;
+		lis3->odrs = lis3_3dlh_rates;
+		lis3->odr_mask = CTRL1_DR0 | CTRL1_DR1;
+		lis3->scale = LIS3DLH_SENSITIVITY_2G;
+		break;
+	default:
+		pr_err("unknown sensor type 0x%X\n", lis3->whoami);
+		return -ENODEV;
+	}
+
+	lis3->reg_cache = kzalloc(max(sizeof(lis3_wai8_regs),
+				     sizeof(lis3_wai12_regs)), GFP_KERNEL);
+
+	if (lis3->reg_cache == NULL)
+		return -ENOMEM;
+
+	mutex_init(&lis3->mutex);
+	atomic_set(&lis3->wake_thread, 0);
+
+	lis3lv02d_add_fs(lis3);
+	err = lis3lv02d_poweron(lis3);
+	if (err) {
+		lis3lv02d_remove_fs(lis3);
+		return err;
+	}
+
+	if (lis3->pm_dev) {
+		pm_runtime_set_active(lis3->pm_dev);
+		pm_runtime_enable(lis3->pm_dev);
+	}
+
+	if (lis3lv02d_joystick_enable(lis3))
+		pr_err("joystick initialization failed\n");
+
+	/* passing in platform specific data is purely optional and only
+	 * used by the SPI transport layer at the moment */
+	if (lis3->pdata) {
+		struct lis3lv02d_platform_data *p = lis3->pdata;
+
+		if (lis3->whoami == WAI_8B)
+			lis3lv02d_8b_configure(lis3, p);
+
+		irq_flags = p->irq_flags1 & IRQF_TRIGGER_MASK;
+
+		lis3->irq_cfg = p->irq_cfg;
+		if (p->irq_cfg)
+			lis3->write(lis3, CTRL_REG3, p->irq_cfg);
+
+		if (p->default_rate)
+			lis3lv02d_set_odr(lis3, p->default_rate);
+	}
+
+	/* bail if we did not get an IRQ from the bus layer */
+	if (!lis3->irq) {
+		pr_debug("No IRQ. Disabling /dev/freefall\n");
+		goto out;
+	}
+
+	/*
+	 * The sensor can generate interrupts for free-fall and direction
+	 * detection (distinguishable with FF_WU_SRC and DD_SRC) but to keep
+	 * the things simple and _fast_ we activate it only for free-fall, so
+	 * no need to read register (very slow with ACPI). For the same reason,
+	 * we forbid shared interrupts.
+	 *
+	 * IRQF_TRIGGER_RISING seems pointless on HP laptops because the
+	 * io-apic is not configurable (and generates a warning) but I keep it
+	 * in case of support for other hardware.
+	 */
+	if (lis3->pdata && lis3->whoami == WAI_8B)
+		thread_fn = lis302dl_interrupt_thread1_8b;
+	else
+		thread_fn = NULL;
+
+	err = request_threaded_irq(lis3->irq, lis302dl_interrupt,
+				thread_fn,
+				IRQF_TRIGGER_RISING | IRQF_ONESHOT |
+				irq_flags,
+				DRIVER_NAME, lis3);
+
+	if (err < 0) {
+		pr_err("Cannot get IRQ\n");
+		goto out;
+	}
+
+	lis3->miscdev.minor	= MISC_DYNAMIC_MINOR;
+	lis3->miscdev.name	= "freefall";
+	lis3->miscdev.fops	= &lis3lv02d_misc_fops;
+
+	if (misc_register(&lis3->miscdev))
+		pr_err("misc_register failed\n");
+out:
+	return 0;
+}
+EXPORT_SYMBOL_GPL(lis3lv02d_init_device);
+
+MODULE_DESCRIPTION("ST LIS3LV02Dx three-axis digital accelerometer driver");
+MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h
new file mode 100644
index 0000000000..195bd2fd2e
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d.h
@@ -0,0 +1,318 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  lis3lv02d.h - ST LIS3LV02DL accelerometer driver
+ *
+ *  Copyright (C) 2007-2008 Yan Burman
+ *  Copyright (C) 2008-2009 Eric Piel
+ */
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/regulator/consumer.h>
+#include <linux/miscdevice.h>
+
+/*
+ * This driver tries to support the "digital" accelerometer chips from
+ * STMicroelectronics such as LIS3LV02DL, LIS302DL, LIS3L02DQ, LIS331DL,
+ * LIS331DLH, LIS35DE, or LIS202DL. They are very similar in terms of
+ * programming, with almost the same registers. In addition to differing
+ * on physical properties, they differ on the number of axes (2/3),
+ * precision (8/12 bits), and special features (freefall detection,
+ * click...). Unfortunately, not all the differences can be probed via
+ * a register. They can be connected either via I²C or SPI.
+ */
+
+#include <linux/lis3lv02d.h>
+
+enum lis3_reg {
+	WHO_AM_I	= 0x0F,
+	OFFSET_X	= 0x16,
+	OFFSET_Y	= 0x17,
+	OFFSET_Z	= 0x18,
+	GAIN_X		= 0x19,
+	GAIN_Y		= 0x1A,
+	GAIN_Z		= 0x1B,
+	CTRL_REG1	= 0x20,
+	CTRL_REG2	= 0x21,
+	CTRL_REG3	= 0x22,
+	CTRL_REG4	= 0x23,
+	HP_FILTER_RESET	= 0x23,
+	STATUS_REG	= 0x27,
+	OUTX_L		= 0x28,
+	OUTX_H		= 0x29,
+	OUTX		= 0x29,
+	OUTY_L		= 0x2A,
+	OUTY_H		= 0x2B,
+	OUTY		= 0x2B,
+	OUTZ_L		= 0x2C,
+	OUTZ_H		= 0x2D,
+	OUTZ		= 0x2D,
+};
+
+enum lis302d_reg {
+	FF_WU_CFG_1	= 0x30,
+	FF_WU_SRC_1	= 0x31,
+	FF_WU_THS_1	= 0x32,
+	FF_WU_DURATION_1 = 0x33,
+	FF_WU_CFG_2	= 0x34,
+	FF_WU_SRC_2	= 0x35,
+	FF_WU_THS_2	= 0x36,
+	FF_WU_DURATION_2 = 0x37,
+	CLICK_CFG	= 0x38,
+	CLICK_SRC	= 0x39,
+	CLICK_THSY_X	= 0x3B,
+	CLICK_THSZ	= 0x3C,
+	CLICK_TIMELIMIT	= 0x3D,
+	CLICK_LATENCY	= 0x3E,
+	CLICK_WINDOW	= 0x3F,
+};
+
+enum lis3lv02d_reg {
+	FF_WU_CFG	= 0x30,
+	FF_WU_SRC	= 0x31,
+	FF_WU_ACK	= 0x32,
+	FF_WU_THS_L	= 0x34,
+	FF_WU_THS_H	= 0x35,
+	FF_WU_DURATION	= 0x36,
+	DD_CFG		= 0x38,
+	DD_SRC		= 0x39,
+	DD_ACK		= 0x3A,
+	DD_THSI_L	= 0x3C,
+	DD_THSI_H	= 0x3D,
+	DD_THSE_L	= 0x3E,
+	DD_THSE_H	= 0x3F,
+};
+
+enum lis3_who_am_i {
+	WAI_3DLH	= 0x32,	/* 16 bits: LIS331DLH */
+	WAI_3DC		= 0x33,	/* 8 bits: LIS3DC, HP3DC */
+	WAI_12B		= 0x3A, /* 12 bits: LIS3LV02D[LQ]... */
+	WAI_8B		= 0x3B, /* 8 bits: LIS[23]02D[LQ]... */
+	WAI_6B		= 0x52, /* 6 bits: LIS331DLF - not supported */
+};
+
+enum lis3_type {
+	LIS3LV02D,
+	LIS3DC,
+	HP3DC,
+	LIS2302D,
+	LIS331DLF,
+	LIS331DLH,
+};
+
+enum lis3lv02d_ctrl1_12b {
+	CTRL1_Xen	= 0x01,
+	CTRL1_Yen	= 0x02,
+	CTRL1_Zen	= 0x04,
+	CTRL1_ST	= 0x08,
+	CTRL1_DF0	= 0x10,
+	CTRL1_DF1	= 0x20,
+	CTRL1_PD0	= 0x40,
+	CTRL1_PD1	= 0x80,
+};
+
+/* Delta to ctrl1_12b version */
+enum lis3lv02d_ctrl1_8b {
+	CTRL1_STM	= 0x08,
+	CTRL1_STP	= 0x10,
+	CTRL1_FS	= 0x20,
+	CTRL1_PD	= 0x40,
+	CTRL1_DR	= 0x80,
+};
+
+enum lis3lv02d_ctrl1_3dc {
+	CTRL1_ODR0	= 0x10,
+	CTRL1_ODR1	= 0x20,
+	CTRL1_ODR2	= 0x40,
+	CTRL1_ODR3	= 0x80,
+};
+
+enum lis331dlh_ctrl1 {
+	CTRL1_DR0	= 0x08,
+	CTRL1_DR1	= 0x10,
+	CTRL1_PM0	= 0x20,
+	CTRL1_PM1	= 0x40,
+	CTRL1_PM2	= 0x80,
+};
+
+enum lis331dlh_ctrl2 {
+	CTRL2_HPEN1	= 0x04,
+	CTRL2_HPEN2	= 0x08,
+	CTRL2_FDS_3DLH	= 0x10,
+	CTRL2_BOOT_3DLH	= 0x80,
+};
+
+enum lis331dlh_ctrl4 {
+	CTRL4_STSIGN	= 0x08,
+	CTRL4_BLE	= 0x40,
+	CTRL4_BDU	= 0x80,
+};
+
+enum lis3lv02d_ctrl2 {
+	CTRL2_DAS	= 0x01,
+	CTRL2_SIM	= 0x02,
+	CTRL2_DRDY	= 0x04,
+	CTRL2_IEN	= 0x08,
+	CTRL2_BOOT	= 0x10,
+	CTRL2_BLE	= 0x20,
+	CTRL2_BDU	= 0x40, /* Block Data Update */
+	CTRL2_FS	= 0x80, /* Full Scale selection */
+};
+
+enum lis3lv02d_ctrl4_3dc {
+	CTRL4_SIM	= 0x01,
+	CTRL4_ST0	= 0x02,
+	CTRL4_ST1	= 0x04,
+	CTRL4_FS0	= 0x10,
+	CTRL4_FS1	= 0x20,
+};
+
+enum lis302d_ctrl2 {
+	HP_FF_WU2	= 0x08,
+	HP_FF_WU1	= 0x04,
+	CTRL2_BOOT_8B   = 0x40,
+};
+
+enum lis3lv02d_ctrl3 {
+	CTRL3_CFS0	= 0x01,
+	CTRL3_CFS1	= 0x02,
+	CTRL3_FDS	= 0x10,
+	CTRL3_HPFF	= 0x20,
+	CTRL3_HPDD	= 0x40,
+	CTRL3_ECK	= 0x80,
+};
+
+enum lis3lv02d_status_reg {
+	STATUS_XDA	= 0x01,
+	STATUS_YDA	= 0x02,
+	STATUS_ZDA	= 0x04,
+	STATUS_XYZDA	= 0x08,
+	STATUS_XOR	= 0x10,
+	STATUS_YOR	= 0x20,
+	STATUS_ZOR	= 0x40,
+	STATUS_XYZOR	= 0x80,
+};
+
+enum lis3lv02d_ff_wu_cfg {
+	FF_WU_CFG_XLIE	= 0x01,
+	FF_WU_CFG_XHIE	= 0x02,
+	FF_WU_CFG_YLIE	= 0x04,
+	FF_WU_CFG_YHIE	= 0x08,
+	FF_WU_CFG_ZLIE	= 0x10,
+	FF_WU_CFG_ZHIE	= 0x20,
+	FF_WU_CFG_LIR	= 0x40,
+	FF_WU_CFG_AOI	= 0x80,
+};
+
+enum lis3lv02d_ff_wu_src {
+	FF_WU_SRC_XL	= 0x01,
+	FF_WU_SRC_XH	= 0x02,
+	FF_WU_SRC_YL	= 0x04,
+	FF_WU_SRC_YH	= 0x08,
+	FF_WU_SRC_ZL	= 0x10,
+	FF_WU_SRC_ZH	= 0x20,
+	FF_WU_SRC_IA	= 0x40,
+};
+
+enum lis3lv02d_dd_cfg {
+	DD_CFG_XLIE	= 0x01,
+	DD_CFG_XHIE	= 0x02,
+	DD_CFG_YLIE	= 0x04,
+	DD_CFG_YHIE	= 0x08,
+	DD_CFG_ZLIE	= 0x10,
+	DD_CFG_ZHIE	= 0x20,
+	DD_CFG_LIR	= 0x40,
+	DD_CFG_IEND	= 0x80,
+};
+
+enum lis3lv02d_dd_src {
+	DD_SRC_XL	= 0x01,
+	DD_SRC_XH	= 0x02,
+	DD_SRC_YL	= 0x04,
+	DD_SRC_YH	= 0x08,
+	DD_SRC_ZL	= 0x10,
+	DD_SRC_ZH	= 0x20,
+	DD_SRC_IA	= 0x40,
+};
+
+enum lis3lv02d_click_src_8b {
+	CLICK_SINGLE_X	= 0x01,
+	CLICK_DOUBLE_X	= 0x02,
+	CLICK_SINGLE_Y	= 0x04,
+	CLICK_DOUBLE_Y	= 0x08,
+	CLICK_SINGLE_Z	= 0x10,
+	CLICK_DOUBLE_Z	= 0x20,
+	CLICK_IA	= 0x40,
+};
+
+enum lis3lv02d_reg_state {
+	LIS3_REG_OFF	= 0x00,
+	LIS3_REG_ON	= 0x01,
+};
+
+union axis_conversion {
+	struct {
+		int x, y, z;
+	};
+	int as_array[3];
+
+};
+
+struct lis3lv02d {
+	void			*bus_priv; /* used by the bus layer only */
+	struct device		*pm_dev; /* for pm_runtime purposes */
+	int (*init) (struct lis3lv02d *lis3);
+	int (*write) (struct lis3lv02d *lis3, int reg, u8 val);
+	int (*read) (struct lis3lv02d *lis3, int reg, u8 *ret);
+	int (*blkread) (struct lis3lv02d *lis3, int reg, int len, u8 *ret);
+	int (*reg_ctrl) (struct lis3lv02d *lis3, bool state);
+
+	int                     *odrs;     /* Supported output data rates */
+	u8			*regs;	   /* Regs to store / restore */
+	int			regs_size;
+	u8                      *reg_cache;
+	bool			regs_stored;
+	u8                      odr_mask;  /* ODR bit mask */
+	u8			whoami;    /* indicates measurement precision */
+	s16 (*read_data) (struct lis3lv02d *lis3, int reg);
+	int			mdps_max_val;
+	int			pwron_delay;
+	int                     scale; /*
+					* relationship between 1 LBS and mG
+					* (1/1000th of earth gravity)
+					*/
+
+	struct input_dev	*idev;     /* input device */
+	struct platform_device	*pdev;     /* platform device */
+	struct regulator_bulk_data regulators[2];
+	atomic_t		count;     /* interrupt count after last read */
+	union axis_conversion	ac;        /* hw -> logical axis */
+	int			mapped_btns[3];
+
+	u32			irq;       /* IRQ number */
+	struct fasync_struct	*async_queue; /* queue for the misc device */
+	wait_queue_head_t	misc_wait; /* Wait queue for the misc device */
+	unsigned long		misc_opened; /* bit0: whether the device is open */
+	struct miscdevice	miscdev;
+
+	int                     data_ready_count[2];
+	atomic_t		wake_thread;
+	unsigned char           irq_cfg;
+	unsigned int		shift_adj;
+
+	struct lis3lv02d_platform_data *pdata;	/* for passing board config */
+	struct mutex		mutex;     /* Serialize poll and selftest */
+
+#ifdef CONFIG_OF
+	struct device_node	*of_node;
+#endif
+};
+
+int lis3lv02d_init_device(struct lis3lv02d *lis3);
+int lis3lv02d_joystick_enable(struct lis3lv02d *lis3);
+void lis3lv02d_joystick_disable(struct lis3lv02d *lis3);
+void lis3lv02d_poweroff(struct lis3lv02d *lis3);
+int lis3lv02d_poweron(struct lis3lv02d *lis3);
+void lis3lv02d_remove_fs(struct lis3lv02d *lis3);
+int lis3lv02d_init_dt(struct lis3lv02d *lis3);
+
+extern struct lis3lv02d lis3_dev;
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
new file mode 100644
index 0000000000..3882e97e96
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * drivers/hwmon/lis3lv02d_i2c.c
+ *
+ * Implements I2C interface for lis3lv02d (STMicroelectronics) accelerometer.
+ * Driver is based on corresponding SPI driver written by Daniel Mack
+ * (lis3lv02d_spi.c (C) 2009 Daniel Mack <daniel@caiaq.de> ).
+ *
+ * Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+ *
+ * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include "lis3lv02d.h"
+
+#define DRV_NAME	"lis3lv02d_i2c"
+
+static const char reg_vdd[]    = "Vdd";
+static const char reg_vdd_io[] = "Vdd_IO";
+
+static int lis3_reg_ctrl(struct lis3lv02d *lis3, bool state)
+{
+	int ret;
+	if (state == LIS3_REG_OFF) {
+		ret = regulator_bulk_disable(ARRAY_SIZE(lis3->regulators),
+					lis3->regulators);
+	} else {
+		ret = regulator_bulk_enable(ARRAY_SIZE(lis3->regulators),
+					lis3->regulators);
+		/* Chip needs time to wakeup. Not mentioned in datasheet */
+		usleep_range(10000, 20000);
+	}
+	return ret;
+}
+
+static inline s32 lis3_i2c_write(struct lis3lv02d *lis3, int reg, u8 value)
+{
+	struct i2c_client *c = lis3->bus_priv;
+	return i2c_smbus_write_byte_data(c, reg, value);
+}
+
+static inline s32 lis3_i2c_read(struct lis3lv02d *lis3, int reg, u8 *v)
+{
+	struct i2c_client *c = lis3->bus_priv;
+	*v = i2c_smbus_read_byte_data(c, reg);
+	return 0;
+}
+
+static inline s32 lis3_i2c_blockread(struct lis3lv02d *lis3, int reg, int len,
+				u8 *v)
+{
+	struct i2c_client *c = lis3->bus_priv;
+	reg |= (1 << 7); /* 7th bit enables address auto incrementation */
+	return i2c_smbus_read_i2c_block_data(c, reg, len, v);
+}
+
+static int lis3_i2c_init(struct lis3lv02d *lis3)
+{
+	u8 reg;
+	int ret;
+
+	lis3_reg_ctrl(lis3, LIS3_REG_ON);
+
+	lis3->read(lis3, WHO_AM_I, &reg);
+	if (reg != lis3->whoami)
+		printk(KERN_ERR "lis3: power on failure\n");
+
+	/* power up the device */
+	ret = lis3->read(lis3, CTRL_REG1, &reg);
+	if (ret < 0)
+		return ret;
+
+	if (lis3->whoami == WAI_3DLH)
+		reg |= CTRL1_PM0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen;
+	else
+		reg |= CTRL1_PD0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen;
+
+	return lis3->write(lis3, CTRL_REG1, reg);
+}
+
+/* Default axis mapping but it can be overwritten by platform data */
+static union axis_conversion lis3lv02d_axis_map =
+	{ .as_array = { LIS3_DEV_X, LIS3_DEV_Y, LIS3_DEV_Z } };
+
+#ifdef CONFIG_OF
+static const struct of_device_id lis3lv02d_i2c_dt_ids[] = {
+	{ .compatible = "st,lis3lv02d" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, lis3lv02d_i2c_dt_ids);
+#endif
+
+static int lis3lv02d_i2c_probe(struct i2c_client *client)
+{
+	int ret = 0;
+	struct lis3lv02d_platform_data *pdata = client->dev.platform_data;
+
+#ifdef CONFIG_OF
+	if (of_match_device(lis3lv02d_i2c_dt_ids, &client->dev)) {
+		lis3_dev.of_node = client->dev.of_node;
+		ret = lis3lv02d_init_dt(&lis3_dev);
+		if (ret)
+			return ret;
+		pdata = lis3_dev.pdata;
+	}
+#endif
+
+	if (pdata) {
+		if ((pdata->driver_features & LIS3_USE_BLOCK_READ) &&
+			(i2c_check_functionality(client->adapter,
+						I2C_FUNC_SMBUS_I2C_BLOCK)))
+			lis3_dev.blkread  = lis3_i2c_blockread;
+
+		if (pdata->axis_x)
+			lis3lv02d_axis_map.x = pdata->axis_x;
+
+		if (pdata->axis_y)
+			lis3lv02d_axis_map.y = pdata->axis_y;
+
+		if (pdata->axis_z)
+			lis3lv02d_axis_map.z = pdata->axis_z;
+
+		if (pdata->setup_resources)
+			ret = pdata->setup_resources();
+
+		if (ret)
+			goto fail;
+	}
+
+	lis3_dev.regulators[0].supply = reg_vdd;
+	lis3_dev.regulators[1].supply = reg_vdd_io;
+	ret = regulator_bulk_get(&client->dev,
+				 ARRAY_SIZE(lis3_dev.regulators),
+				 lis3_dev.regulators);
+	if (ret < 0)
+		goto fail;
+
+	lis3_dev.pdata	  = pdata;
+	lis3_dev.bus_priv = client;
+	lis3_dev.init	  = lis3_i2c_init;
+	lis3_dev.read	  = lis3_i2c_read;
+	lis3_dev.write	  = lis3_i2c_write;
+	lis3_dev.irq	  = client->irq;
+	lis3_dev.ac	  = lis3lv02d_axis_map;
+	lis3_dev.pm_dev	  = &client->dev;
+
+	i2c_set_clientdata(client, &lis3_dev);
+
+	/* Provide power over the init call */
+	lis3_reg_ctrl(&lis3_dev, LIS3_REG_ON);
+
+	ret = lis3lv02d_init_device(&lis3_dev);
+
+	lis3_reg_ctrl(&lis3_dev, LIS3_REG_OFF);
+
+	if (ret)
+		goto fail2;
+	return 0;
+
+fail2:
+	regulator_bulk_free(ARRAY_SIZE(lis3_dev.regulators),
+				lis3_dev.regulators);
+fail:
+	if (pdata && pdata->release_resources)
+		pdata->release_resources();
+	return ret;
+}
+
+static void lis3lv02d_i2c_remove(struct i2c_client *client)
+{
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+	struct lis3lv02d_platform_data *pdata = client->dev.platform_data;
+
+	if (pdata && pdata->release_resources)
+		pdata->release_resources();
+
+	lis3lv02d_joystick_disable(lis3);
+	lis3lv02d_remove_fs(&lis3_dev);
+
+	regulator_bulk_free(ARRAY_SIZE(lis3->regulators),
+			    lis3_dev.regulators);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int lis3lv02d_i2c_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+	if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+		lis3lv02d_poweroff(lis3);
+	return 0;
+}
+
+static int lis3lv02d_i2c_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+	/*
+	 * pm_runtime documentation says that devices should always
+	 * be powered on at resume. Pm_runtime turns them off after system
+	 * wide resume is complete.
+	 */
+	if (!lis3->pdata || !lis3->pdata->wakeup_flags ||
+		pm_runtime_suspended(dev))
+		lis3lv02d_poweron(lis3);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int lis3_i2c_runtime_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+	lis3lv02d_poweroff(lis3);
+	return 0;
+}
+
+static int lis3_i2c_runtime_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct lis3lv02d *lis3 = i2c_get_clientdata(client);
+
+	lis3lv02d_poweron(lis3);
+	return 0;
+}
+#endif /* CONFIG_PM */
+
+static const struct i2c_device_id lis3lv02d_id[] = {
+	{"lis3lv02d", LIS3LV02D},
+	{"lis331dlh", LIS331DLH},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, lis3lv02d_id);
+
+static const struct dev_pm_ops lis3_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(lis3lv02d_i2c_suspend,
+				lis3lv02d_i2c_resume)
+	SET_RUNTIME_PM_OPS(lis3_i2c_runtime_suspend,
+			   lis3_i2c_runtime_resume,
+			   NULL)
+};
+
+static struct i2c_driver lis3lv02d_i2c_driver = {
+	.driver	 = {
+		.name   = DRV_NAME,
+		.pm     = &lis3_pm_ops,
+		.of_match_table = of_match_ptr(lis3lv02d_i2c_dt_ids),
+	},
+	.probe = lis3lv02d_i2c_probe,
+	.remove	= lis3lv02d_i2c_remove,
+	.id_table = lis3lv02d_id,
+};
+
+module_i2c_driver(lis3lv02d_i2c_driver);
+
+MODULE_AUTHOR("Nokia Corporation");
+MODULE_DESCRIPTION("lis3lv02d I2C interface");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/drivers/misc/lis3lv02d/lis3lv02d_spi.c
new file mode 100644
index 0000000000..203a108b88
--- /dev/null
+++ b/drivers/misc/lis3lv02d/lis3lv02d_spi.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * lis3lv02d_spi - SPI glue layer for lis3lv02d
+ *
+ * Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/spi/spi.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+
+#include "lis3lv02d.h"
+
+#define DRV_NAME 	"lis3lv02d_spi"
+#define LIS3_SPI_READ	0x80
+
+static int lis3_spi_read(struct lis3lv02d *lis3, int reg, u8 *v)
+{
+	struct spi_device *spi = lis3->bus_priv;
+	int ret = spi_w8r8(spi, reg | LIS3_SPI_READ);
+	if (ret < 0)
+		return -EINVAL;
+
+	*v = (u8) ret;
+	return 0;
+}
+
+static int lis3_spi_write(struct lis3lv02d *lis3, int reg, u8 val)
+{
+	u8 tmp[2] = { reg, val };
+	struct spi_device *spi = lis3->bus_priv;
+	return spi_write(spi, tmp, sizeof(tmp));
+}
+
+static int lis3_spi_init(struct lis3lv02d *lis3)
+{
+	u8 reg;
+	int ret;
+
+	/* power up the device */
+	ret = lis3->read(lis3, CTRL_REG1, &reg);
+	if (ret < 0)
+		return ret;
+
+	reg |= CTRL1_PD0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen;
+	return lis3->write(lis3, CTRL_REG1, reg);
+}
+
+static union axis_conversion lis3lv02d_axis_normal =
+	{ .as_array = { 1, 2, 3 } };
+
+#ifdef CONFIG_OF
+static const struct of_device_id lis302dl_spi_dt_ids[] = {
+	{ .compatible = "st,lis302dl-spi" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, lis302dl_spi_dt_ids);
+#endif
+
+static int lis302dl_spi_probe(struct spi_device *spi)
+{
+	int ret;
+
+	spi->bits_per_word = 8;
+	spi->mode = SPI_MODE_0;
+	ret = spi_setup(spi);
+	if (ret < 0)
+		return ret;
+
+	lis3_dev.bus_priv	= spi;
+	lis3_dev.init		= lis3_spi_init;
+	lis3_dev.read		= lis3_spi_read;
+	lis3_dev.write		= lis3_spi_write;
+	lis3_dev.irq		= spi->irq;
+	lis3_dev.ac		= lis3lv02d_axis_normal;
+	lis3_dev.pdata		= spi->dev.platform_data;
+
+#ifdef CONFIG_OF
+	if (of_match_device(lis302dl_spi_dt_ids, &spi->dev)) {
+		lis3_dev.of_node = spi->dev.of_node;
+		ret = lis3lv02d_init_dt(&lis3_dev);
+		if (ret)
+			return ret;
+	}
+#endif
+	spi_set_drvdata(spi, &lis3_dev);
+
+	return lis3lv02d_init_device(&lis3_dev);
+}
+
+static void lis302dl_spi_remove(struct spi_device *spi)
+{
+	struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+	lis3lv02d_joystick_disable(lis3);
+	lis3lv02d_poweroff(lis3);
+
+	lis3lv02d_remove_fs(&lis3_dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int lis3lv02d_spi_suspend(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+
+	if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+		lis3lv02d_poweroff(&lis3_dev);
+
+	return 0;
+}
+
+static int lis3lv02d_spi_resume(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct lis3lv02d *lis3 = spi_get_drvdata(spi);
+
+	if (!lis3->pdata || !lis3->pdata->wakeup_flags)
+		lis3lv02d_poweron(lis3);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(lis3lv02d_spi_pm, lis3lv02d_spi_suspend,
+			 lis3lv02d_spi_resume);
+
+static struct spi_driver lis302dl_spi_driver = {
+	.driver	 = {
+		.name   = DRV_NAME,
+		.pm	= &lis3lv02d_spi_pm,
+		.of_match_table = of_match_ptr(lis302dl_spi_dt_ids),
+	},
+	.probe	= lis302dl_spi_probe,
+	.remove	= lis302dl_spi_remove,
+};
+
+module_spi_driver(lis302dl_spi_driver);
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("lis3lv02d SPI glue layer");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:" DRV_NAME);
diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
new file mode 100644
index 0000000000..95ef971b5e
--- /dev/null
+++ b/drivers/misc/lkdtm/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_LKDTM)		+= lkdtm.o
+
+lkdtm-$(CONFIG_LKDTM)		+= core.o
+lkdtm-$(CONFIG_LKDTM)		+= bugs.o
+lkdtm-$(CONFIG_LKDTM)		+= heap.o
+lkdtm-$(CONFIG_LKDTM)		+= perms.o
+lkdtm-$(CONFIG_LKDTM)		+= refcount.o
+lkdtm-$(CONFIG_LKDTM)		+= rodata_objcopy.o
+lkdtm-$(CONFIG_LKDTM)		+= usercopy.o
+lkdtm-$(CONFIG_LKDTM)		+= stackleak.o
+lkdtm-$(CONFIG_LKDTM)		+= cfi.o
+lkdtm-$(CONFIG_LKDTM)		+= fortify.o
+lkdtm-$(CONFIG_PPC_64S_HASH_MMU)	+= powerpc.o
+
+KASAN_SANITIZE_stackleak.o	:= n
+
+KASAN_SANITIZE_rodata.o			:= n
+KCSAN_SANITIZE_rodata.o			:= n
+KCOV_INSTRUMENT_rodata.o		:= n
+OBJECT_FILES_NON_STANDARD_rodata.o	:= y
+CFLAGS_REMOVE_rodata.o			+= $(CC_FLAGS_LTO) $(RETHUNK_CFLAGS)
+
+OBJCOPYFLAGS :=
+OBJCOPYFLAGS_rodata_objcopy.o	:= \
+			--rename-section .noinstr.text=.rodata,alloc,readonly,load,contents
+targets += rodata.o rodata_objcopy.o
+$(obj)/rodata_objcopy.o: $(obj)/rodata.o FORCE
+	$(call if_changed,objcopy)
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
new file mode 100644
index 0000000000..c66cc05a68
--- /dev/null
+++ b/drivers/misc/lkdtm/bugs.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests related to logic bugs (e.g. bad dereferences,
+ * bad alignment, bad loops, bad locking, bad scheduling, deep stacks, and
+ * lockups) along with other things that don't fit well into existing LKDTM
+ * test source files.
+ */
+#include "lkdtm.h"
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+#if IS_ENABLED(CONFIG_X86_32) && !IS_ENABLED(CONFIG_UML)
+#include <asm/desc.h>
+#endif
+
+struct lkdtm_list {
+	struct list_head node;
+};
+
+/*
+ * Make sure our attempts to over run the kernel stack doesn't trigger
+ * a compiler warning when CONFIG_FRAME_WARN is set. Then make sure we
+ * recurse past the end of THREAD_SIZE by default.
+ */
+#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN > 0)
+#define REC_STACK_SIZE (_AC(CONFIG_FRAME_WARN, UL) / 2)
+#else
+#define REC_STACK_SIZE (THREAD_SIZE / 8UL)
+#endif
+#define REC_NUM_DEFAULT ((THREAD_SIZE / REC_STACK_SIZE) * 2)
+
+static int recur_count = REC_NUM_DEFAULT;
+
+static DEFINE_SPINLOCK(lock_me_up);
+
+/*
+ * Make sure compiler does not optimize this function or stack frame away:
+ * - function marked noinline
+ * - stack variables are marked volatile
+ * - stack variables are written (memset()) and read (buf[..] passed as arg)
+ * - function may have external effects (memzero_explicit())
+ * - no tail recursion possible
+ */
+static int noinline recursive_loop(int remaining)
+{
+	volatile char buf[REC_STACK_SIZE];
+	volatile int ret;
+
+	memset((void *)buf, remaining & 0xFF, sizeof(buf));
+	if (!remaining)
+		ret = 0;
+	else
+		ret = recursive_loop((int)buf[remaining % sizeof(buf)] - 1);
+	memzero_explicit((void *)buf, sizeof(buf));
+	return ret;
+}
+
+/* If the depth is negative, use the default, otherwise keep parameter. */
+void __init lkdtm_bugs_init(int *recur_param)
+{
+	if (*recur_param < 0)
+		*recur_param = recur_count;
+	else
+		recur_count = *recur_param;
+}
+
+static void lkdtm_PANIC(void)
+{
+	panic("dumptest");
+}
+
+static void lkdtm_BUG(void)
+{
+	BUG();
+}
+
+static int warn_counter;
+
+static void lkdtm_WARNING(void)
+{
+	WARN_ON(++warn_counter);
+}
+
+static void lkdtm_WARNING_MESSAGE(void)
+{
+	WARN(1, "Warning message trigger count: %d\n", ++warn_counter);
+}
+
+static void lkdtm_EXCEPTION(void)
+{
+	*((volatile int *) 0) = 0;
+}
+
+static void lkdtm_LOOP(void)
+{
+	for (;;)
+		;
+}
+
+static void lkdtm_EXHAUST_STACK(void)
+{
+	pr_info("Calling function with %lu frame size to depth %d ...\n",
+		REC_STACK_SIZE, recur_count);
+	recursive_loop(recur_count);
+	pr_info("FAIL: survived without exhausting stack?!\n");
+}
+
+static noinline void __lkdtm_CORRUPT_STACK(void *stack)
+{
+	memset(stack, '\xff', 64);
+}
+
+/* This should trip the stack canary, not corrupt the return address. */
+static noinline void lkdtm_CORRUPT_STACK(void)
+{
+	/* Use default char array length that triggers stack protection. */
+	char data[8] __aligned(sizeof(void *));
+
+	pr_info("Corrupting stack containing char array ...\n");
+	__lkdtm_CORRUPT_STACK((void *)&data);
+}
+
+/* Same as above but will only get a canary with -fstack-protector-strong */
+static noinline void lkdtm_CORRUPT_STACK_STRONG(void)
+{
+	union {
+		unsigned short shorts[4];
+		unsigned long *ptr;
+	} data __aligned(sizeof(void *));
+
+	pr_info("Corrupting stack containing union ...\n");
+	__lkdtm_CORRUPT_STACK((void *)&data);
+}
+
+static pid_t stack_pid;
+static unsigned long stack_addr;
+
+static void lkdtm_REPORT_STACK(void)
+{
+	volatile uintptr_t magic;
+	pid_t pid = task_pid_nr(current);
+
+	if (pid != stack_pid) {
+		pr_info("Starting stack offset tracking for pid %d\n", pid);
+		stack_pid = pid;
+		stack_addr = (uintptr_t)&magic;
+	}
+
+	pr_info("Stack offset: %d\n", (int)(stack_addr - (uintptr_t)&magic));
+}
+
+static pid_t stack_canary_pid;
+static unsigned long stack_canary;
+static unsigned long stack_canary_offset;
+
+static noinline void __lkdtm_REPORT_STACK_CANARY(void *stack)
+{
+	int i = 0;
+	pid_t pid = task_pid_nr(current);
+	unsigned long *canary = (unsigned long *)stack;
+	unsigned long current_offset = 0, init_offset = 0;
+
+	/* Do our best to find the canary in a 16 word window ... */
+	for (i = 1; i < 16; i++) {
+		canary = (unsigned long *)stack + i;
+#ifdef CONFIG_STACKPROTECTOR
+		if (*canary == current->stack_canary)
+			current_offset = i;
+		if (*canary == init_task.stack_canary)
+			init_offset = i;
+#endif
+	}
+
+	if (current_offset == 0) {
+		/*
+		 * If the canary doesn't match what's in the task_struct,
+		 * we're either using a global canary or the stack frame
+		 * layout changed.
+		 */
+		if (init_offset != 0) {
+			pr_err("FAIL: global stack canary found at offset %ld (canary for pid %d matches init_task's)!\n",
+			       init_offset, pid);
+		} else {
+			pr_warn("FAIL: did not correctly locate stack canary :(\n");
+			pr_expected_config(CONFIG_STACKPROTECTOR);
+		}
+
+		return;
+	} else if (init_offset != 0) {
+		pr_warn("WARNING: found both current and init_task canaries nearby?!\n");
+	}
+
+	canary = (unsigned long *)stack + current_offset;
+	if (stack_canary_pid == 0) {
+		stack_canary = *canary;
+		stack_canary_pid = pid;
+		stack_canary_offset = current_offset;
+		pr_info("Recorded stack canary for pid %d at offset %ld\n",
+			stack_canary_pid, stack_canary_offset);
+	} else if (pid == stack_canary_pid) {
+		pr_warn("ERROR: saw pid %d again -- please use a new pid\n", pid);
+	} else {
+		if (current_offset != stack_canary_offset) {
+			pr_warn("ERROR: canary offset changed from %ld to %ld!?\n",
+				stack_canary_offset, current_offset);
+			return;
+		}
+
+		if (*canary == stack_canary) {
+			pr_warn("FAIL: canary identical for pid %d and pid %d at offset %ld!\n",
+				stack_canary_pid, pid, current_offset);
+		} else {
+			pr_info("ok: stack canaries differ between pid %d and pid %d at offset %ld.\n",
+				stack_canary_pid, pid, current_offset);
+			/* Reset the test. */
+			stack_canary_pid = 0;
+		}
+	}
+}
+
+static void lkdtm_REPORT_STACK_CANARY(void)
+{
+	/* Use default char array length that triggers stack protection. */
+	char data[8] __aligned(sizeof(void *)) = { };
+
+	__lkdtm_REPORT_STACK_CANARY((void *)&data);
+}
+
+static void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void)
+{
+	static u8 data[5] __attribute__((aligned(4))) = {1, 2, 3, 4, 5};
+	u32 *p;
+	u32 val = 0x12345678;
+
+	p = (u32 *)(data + 1);
+	if (*p == 0)
+		val = 0x87654321;
+	*p = val;
+
+	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		pr_err("XFAIL: arch has CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS\n");
+}
+
+static void lkdtm_SOFTLOCKUP(void)
+{
+	preempt_disable();
+	for (;;)
+		cpu_relax();
+}
+
+static void lkdtm_HARDLOCKUP(void)
+{
+	local_irq_disable();
+	for (;;)
+		cpu_relax();
+}
+
+static void lkdtm_SPINLOCKUP(void)
+{
+	/* Must be called twice to trigger. */
+	spin_lock(&lock_me_up);
+	/* Let sparse know we intended to exit holding the lock. */
+	__release(&lock_me_up);
+}
+
+static void lkdtm_HUNG_TASK(void)
+{
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule();
+}
+
+static volatile unsigned int huge = INT_MAX - 2;
+static volatile unsigned int ignored;
+
+static void lkdtm_OVERFLOW_SIGNED(void)
+{
+	int value;
+
+	value = huge;
+	pr_info("Normal signed addition ...\n");
+	value += 1;
+	ignored = value;
+
+	pr_info("Overflowing signed addition ...\n");
+	value += 4;
+	ignored = value;
+}
+
+
+static void lkdtm_OVERFLOW_UNSIGNED(void)
+{
+	unsigned int value;
+
+	value = huge;
+	pr_info("Normal unsigned addition ...\n");
+	value += 1;
+	ignored = value;
+
+	pr_info("Overflowing unsigned addition ...\n");
+	value += 4;
+	ignored = value;
+}
+
+/* Intentionally using unannotated flex array definition. */
+struct array_bounds_flex_array {
+	int one;
+	int two;
+	char data[];
+};
+
+struct array_bounds {
+	int one;
+	int two;
+	char data[8];
+	int three;
+};
+
+static void lkdtm_ARRAY_BOUNDS(void)
+{
+	struct array_bounds_flex_array *not_checked;
+	struct array_bounds *checked;
+	volatile int i;
+
+	not_checked = kmalloc(sizeof(*not_checked) * 2, GFP_KERNEL);
+	checked = kmalloc(sizeof(*checked) * 2, GFP_KERNEL);
+	if (!not_checked || !checked) {
+		kfree(not_checked);
+		kfree(checked);
+		return;
+	}
+
+	pr_info("Array access within bounds ...\n");
+	/* For both, touch all bytes in the actual member size. */
+	for (i = 0; i < sizeof(checked->data); i++)
+		checked->data[i] = 'A';
+	/*
+	 * For the uninstrumented flex array member, also touch 1 byte
+	 * beyond to verify it is correctly uninstrumented.
+	 */
+	for (i = 0; i < 2; i++)
+		not_checked->data[i] = 'A';
+
+	pr_info("Array access beyond bounds ...\n");
+	for (i = 0; i < sizeof(checked->data) + 1; i++)
+		checked->data[i] = 'B';
+
+	kfree(not_checked);
+	kfree(checked);
+	pr_err("FAIL: survived array bounds overflow!\n");
+	if (IS_ENABLED(CONFIG_UBSAN_BOUNDS))
+		pr_expected_config(CONFIG_UBSAN_TRAP);
+	else
+		pr_expected_config(CONFIG_UBSAN_BOUNDS);
+}
+
+struct lkdtm_annotated {
+	unsigned long flags;
+	int count;
+	int array[] __counted_by(count);
+};
+
+static volatile int fam_count = 4;
+
+static void lkdtm_FAM_BOUNDS(void)
+{
+	struct lkdtm_annotated *inst;
+
+	inst = kzalloc(struct_size(inst, array, fam_count + 1), GFP_KERNEL);
+	if (!inst) {
+		pr_err("FAIL: could not allocate test struct!\n");
+		return;
+	}
+
+	inst->count = fam_count;
+	pr_info("Array access within bounds ...\n");
+	inst->array[1] = fam_count;
+	ignored = inst->array[1];
+
+	pr_info("Array access beyond bounds ...\n");
+	inst->array[fam_count] = fam_count;
+	ignored = inst->array[fam_count];
+
+	kfree(inst);
+
+	pr_err("FAIL: survived access of invalid flexible array member index!\n");
+
+	if (!__has_attribute(__counted_by__))
+		pr_warn("This is expected since this %s was built a compiler supporting __counted_by\n",
+			lkdtm_kernel_info);
+	else if (IS_ENABLED(CONFIG_UBSAN_BOUNDS))
+		pr_expected_config(CONFIG_UBSAN_TRAP);
+	else
+		pr_expected_config(CONFIG_UBSAN_BOUNDS);
+}
+
+static void lkdtm_CORRUPT_LIST_ADD(void)
+{
+	/*
+	 * Initially, an empty list via LIST_HEAD:
+	 *	test_head.next = &test_head
+	 *	test_head.prev = &test_head
+	 */
+	LIST_HEAD(test_head);
+	struct lkdtm_list good, bad;
+	void *target[2] = { };
+	void *redirection = &target;
+
+	pr_info("attempting good list addition\n");
+
+	/*
+	 * Adding to the list performs these actions:
+	 *	test_head.next->prev = &good.node
+	 *	good.node.next = test_head.next
+	 *	good.node.prev = test_head
+	 *	test_head.next = good.node
+	 */
+	list_add(&good.node, &test_head);
+
+	pr_info("attempting corrupted list addition\n");
+	/*
+	 * In simulating this "write what where" primitive, the "what" is
+	 * the address of &bad.node, and the "where" is the address held
+	 * by "redirection".
+	 */
+	test_head.next = redirection;
+	list_add(&bad.node, &test_head);
+
+	if (target[0] == NULL && target[1] == NULL)
+		pr_err("Overwrite did not happen, but no BUG?!\n");
+	else {
+		pr_err("list_add() corruption not detected!\n");
+		pr_expected_config(CONFIG_LIST_HARDENED);
+	}
+}
+
+static void lkdtm_CORRUPT_LIST_DEL(void)
+{
+	LIST_HEAD(test_head);
+	struct lkdtm_list item;
+	void *target[2] = { };
+	void *redirection = &target;
+
+	list_add(&item.node, &test_head);
+
+	pr_info("attempting good list removal\n");
+	list_del(&item.node);
+
+	pr_info("attempting corrupted list removal\n");
+	list_add(&item.node, &test_head);
+
+	/* As with the list_add() test above, this corrupts "next". */
+	item.node.next = redirection;
+	list_del(&item.node);
+
+	if (target[0] == NULL && target[1] == NULL)
+		pr_err("Overwrite did not happen, but no BUG?!\n");
+	else {
+		pr_err("list_del() corruption not detected!\n");
+		pr_expected_config(CONFIG_LIST_HARDENED);
+	}
+}
+
+/* Test that VMAP_STACK is actually allocating with a leading guard page */
+static void lkdtm_STACK_GUARD_PAGE_LEADING(void)
+{
+	const unsigned char *stack = task_stack_page(current);
+	const unsigned char *ptr = stack - 1;
+	volatile unsigned char byte;
+
+	pr_info("attempting bad read from page below current stack\n");
+
+	byte = *ptr;
+
+	pr_err("FAIL: accessed page before stack! (byte: %x)\n", byte);
+}
+
+/* Test that VMAP_STACK is actually allocating with a trailing guard page */
+static void lkdtm_STACK_GUARD_PAGE_TRAILING(void)
+{
+	const unsigned char *stack = task_stack_page(current);
+	const unsigned char *ptr = stack + THREAD_SIZE;
+	volatile unsigned char byte;
+
+	pr_info("attempting bad read from page above current stack\n");
+
+	byte = *ptr;
+
+	pr_err("FAIL: accessed page after stack! (byte: %x)\n", byte);
+}
+
+static void lkdtm_UNSET_SMEP(void)
+{
+#if IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_UML)
+#define MOV_CR4_DEPTH	64
+	void (*direct_write_cr4)(unsigned long val);
+	unsigned char *insn;
+	unsigned long cr4;
+	int i;
+
+	cr4 = native_read_cr4();
+
+	if ((cr4 & X86_CR4_SMEP) != X86_CR4_SMEP) {
+		pr_err("FAIL: SMEP not in use\n");
+		return;
+	}
+	cr4 &= ~(X86_CR4_SMEP);
+
+	pr_info("trying to clear SMEP normally\n");
+	native_write_cr4(cr4);
+	if (cr4 == native_read_cr4()) {
+		pr_err("FAIL: pinning SMEP failed!\n");
+		cr4 |= X86_CR4_SMEP;
+		pr_info("restoring SMEP\n");
+		native_write_cr4(cr4);
+		return;
+	}
+	pr_info("ok: SMEP did not get cleared\n");
+
+	/*
+	 * To test the post-write pinning verification we need to call
+	 * directly into the middle of native_write_cr4() where the
+	 * cr4 write happens, skipping any pinning. This searches for
+	 * the cr4 writing instruction.
+	 */
+	insn = (unsigned char *)native_write_cr4;
+	OPTIMIZER_HIDE_VAR(insn);
+	for (i = 0; i < MOV_CR4_DEPTH; i++) {
+		/* mov %rdi, %cr4 */
+		if (insn[i] == 0x0f && insn[i+1] == 0x22 && insn[i+2] == 0xe7)
+			break;
+		/* mov %rdi,%rax; mov %rax, %cr4 */
+		if (insn[i]   == 0x48 && insn[i+1] == 0x89 &&
+		    insn[i+2] == 0xf8 && insn[i+3] == 0x0f &&
+		    insn[i+4] == 0x22 && insn[i+5] == 0xe0)
+			break;
+	}
+	if (i >= MOV_CR4_DEPTH) {
+		pr_info("ok: cannot locate cr4 writing call gadget\n");
+		return;
+	}
+	direct_write_cr4 = (void *)(insn + i);
+
+	pr_info("trying to clear SMEP with call gadget\n");
+	direct_write_cr4(cr4);
+	if (native_read_cr4() & X86_CR4_SMEP) {
+		pr_info("ok: SMEP removal was reverted\n");
+	} else {
+		pr_err("FAIL: cleared SMEP not detected!\n");
+		cr4 |= X86_CR4_SMEP;
+		pr_info("restoring SMEP\n");
+		native_write_cr4(cr4);
+	}
+#else
+	pr_err("XFAIL: this test is x86_64-only\n");
+#endif
+}
+
+static void lkdtm_DOUBLE_FAULT(void)
+{
+#if IS_ENABLED(CONFIG_X86_32) && !IS_ENABLED(CONFIG_UML)
+	/*
+	 * Trigger #DF by setting the stack limit to zero.  This clobbers
+	 * a GDT TLS slot, which is okay because the current task will die
+	 * anyway due to the double fault.
+	 */
+	struct desc_struct d = {
+		.type = 3,	/* expand-up, writable, accessed data */
+		.p = 1,		/* present */
+		.d = 1,		/* 32-bit */
+		.g = 0,		/* limit in bytes */
+		.s = 1,		/* not system */
+	};
+
+	local_irq_disable();
+	write_gdt_entry(get_cpu_gdt_rw(smp_processor_id()),
+			GDT_ENTRY_TLS_MIN, &d, DESCTYPE_S);
+
+	/*
+	 * Put our zero-limit segment in SS and then trigger a fault.  The
+	 * 4-byte access to (%esp) will fault with #SS, and the attempt to
+	 * deliver the fault will recursively cause #SS and result in #DF.
+	 * This whole process happens while NMIs and MCEs are blocked by the
+	 * MOV SS window.  This is nice because an NMI with an invalid SS
+	 * would also double-fault, resulting in the NMI or MCE being lost.
+	 */
+	asm volatile ("movw %0, %%ss; addl $0, (%%esp)" ::
+		      "r" ((unsigned short)(GDT_ENTRY_TLS_MIN << 3)));
+
+	pr_err("FAIL: tried to double fault but didn't die\n");
+#else
+	pr_err("XFAIL: this test is ia32-only\n");
+#endif
+}
+
+#ifdef CONFIG_ARM64
+static noinline void change_pac_parameters(void)
+{
+	if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) {
+		/* Reset the keys of current task */
+		ptrauth_thread_init_kernel(current);
+		ptrauth_thread_switch_kernel(current);
+	}
+}
+#endif
+
+static noinline void lkdtm_CORRUPT_PAC(void)
+{
+#ifdef CONFIG_ARM64
+#define CORRUPT_PAC_ITERATE	10
+	int i;
+
+	if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
+		pr_err("FAIL: kernel not built with CONFIG_ARM64_PTR_AUTH_KERNEL\n");
+
+	if (!system_supports_address_auth()) {
+		pr_err("FAIL: CPU lacks pointer authentication feature\n");
+		return;
+	}
+
+	pr_info("changing PAC parameters to force function return failure...\n");
+	/*
+	 * PAC is a hash value computed from input keys, return address and
+	 * stack pointer. As pac has fewer bits so there is a chance of
+	 * collision, so iterate few times to reduce the collision probability.
+	 */
+	for (i = 0; i < CORRUPT_PAC_ITERATE; i++)
+		change_pac_parameters();
+
+	pr_err("FAIL: survived PAC changes! Kernel may be unstable from here\n");
+#else
+	pr_err("XFAIL: this test is arm64-only\n");
+#endif
+}
+
+static struct crashtype crashtypes[] = {
+	CRASHTYPE(PANIC),
+	CRASHTYPE(BUG),
+	CRASHTYPE(WARNING),
+	CRASHTYPE(WARNING_MESSAGE),
+	CRASHTYPE(EXCEPTION),
+	CRASHTYPE(LOOP),
+	CRASHTYPE(EXHAUST_STACK),
+	CRASHTYPE(CORRUPT_STACK),
+	CRASHTYPE(CORRUPT_STACK_STRONG),
+	CRASHTYPE(REPORT_STACK),
+	CRASHTYPE(REPORT_STACK_CANARY),
+	CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
+	CRASHTYPE(SOFTLOCKUP),
+	CRASHTYPE(HARDLOCKUP),
+	CRASHTYPE(SPINLOCKUP),
+	CRASHTYPE(HUNG_TASK),
+	CRASHTYPE(OVERFLOW_SIGNED),
+	CRASHTYPE(OVERFLOW_UNSIGNED),
+	CRASHTYPE(ARRAY_BOUNDS),
+	CRASHTYPE(FAM_BOUNDS),
+	CRASHTYPE(CORRUPT_LIST_ADD),
+	CRASHTYPE(CORRUPT_LIST_DEL),
+	CRASHTYPE(STACK_GUARD_PAGE_LEADING),
+	CRASHTYPE(STACK_GUARD_PAGE_TRAILING),
+	CRASHTYPE(UNSET_SMEP),
+	CRASHTYPE(DOUBLE_FAULT),
+	CRASHTYPE(CORRUPT_PAC),
+};
+
+struct crashtype_category bugs_crashtypes = {
+	.crashtypes = crashtypes,
+	.len	    = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/cfi.c b/drivers/misc/lkdtm/cfi.c
new file mode 100644
index 0000000000..fc28714ae3
--- /dev/null
+++ b/drivers/misc/lkdtm/cfi.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests relating directly to Control Flow Integrity.
+ */
+#include "lkdtm.h"
+#include <asm/page.h>
+
+static int called_count;
+
+/* Function taking one argument, without a return value. */
+static noinline void lkdtm_increment_void(int *counter)
+{
+	(*counter)++;
+}
+
+/* Function taking one argument, returning int. */
+static noinline int lkdtm_increment_int(int *counter)
+{
+	(*counter)++;
+
+	return *counter;
+}
+
+/* Don't allow the compiler to inline the calls. */
+static noinline void lkdtm_indirect_call(void (*func)(int *))
+{
+	func(&called_count);
+}
+
+/*
+ * This tries to call an indirect function with a mismatched prototype.
+ */
+static void lkdtm_CFI_FORWARD_PROTO(void)
+{
+	/*
+	 * Matches lkdtm_increment_void()'s prototype, but not
+	 * lkdtm_increment_int()'s prototype.
+	 */
+	pr_info("Calling matched prototype ...\n");
+	lkdtm_indirect_call(lkdtm_increment_void);
+
+	pr_info("Calling mismatched prototype ...\n");
+	lkdtm_indirect_call((void *)lkdtm_increment_int);
+
+	pr_err("FAIL: survived mismatched prototype function call!\n");
+	pr_expected_config(CONFIG_CFI_CLANG);
+}
+
+/*
+ * This can stay local to LKDTM, as there should not be a production reason
+ * to disable PAC && SCS.
+ */
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+# ifdef CONFIG_ARM64_BTI_KERNEL
+#  define __no_pac             "branch-protection=bti"
+# else
+#  ifdef CONFIG_CC_HAS_BRANCH_PROT_PAC_RET
+#   define __no_pac            "branch-protection=none"
+#  else
+#   define __no_pac            "sign-return-address=none"
+#  endif
+# endif
+# define __no_ret_protection   __noscs __attribute__((__target__(__no_pac)))
+#else
+# define __no_ret_protection   __noscs
+#endif
+
+#define no_pac_addr(addr)      \
+	((__force __typeof__(addr))((uintptr_t)(addr) | PAGE_OFFSET))
+
+/* The ultimate ROP gadget. */
+static noinline __no_ret_protection
+void set_return_addr_unchecked(unsigned long *expected, unsigned long *addr)
+{
+	/* Use of volatile is to make sure final write isn't seen as a dead store. */
+	unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1;
+
+	/* Make sure we've found the right place on the stack before writing it. */
+	if (no_pac_addr(*ret_addr) == expected)
+		*ret_addr = (addr);
+	else
+		/* Check architecture, stack layout, or compiler behavior... */
+		pr_warn("Eek: return address mismatch! %px != %px\n",
+			*ret_addr, addr);
+}
+
+static noinline
+void set_return_addr(unsigned long *expected, unsigned long *addr)
+{
+	/* Use of volatile is to make sure final write isn't seen as a dead store. */
+	unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1;
+
+	/* Make sure we've found the right place on the stack before writing it. */
+	if (no_pac_addr(*ret_addr) == expected)
+		*ret_addr = (addr);
+	else
+		/* Check architecture, stack layout, or compiler behavior... */
+		pr_warn("Eek: return address mismatch! %px != %px\n",
+			*ret_addr, addr);
+}
+
+static volatile int force_check;
+
+static void lkdtm_CFI_BACKWARD(void)
+{
+	/* Use calculated gotos to keep labels addressable. */
+	void *labels[] = { NULL, &&normal, &&redirected, &&check_normal, &&check_redirected };
+
+	pr_info("Attempting unchecked stack return address redirection ...\n");
+
+	/* Always false */
+	if (force_check) {
+		/*
+		 * Prepare to call with NULLs to avoid parameters being treated as
+		 * constants in -02.
+		 */
+		set_return_addr_unchecked(NULL, NULL);
+		set_return_addr(NULL, NULL);
+		if (force_check)
+			goto *labels[1];
+		if (force_check)
+			goto *labels[2];
+		if (force_check)
+			goto *labels[3];
+		if (force_check)
+			goto *labels[4];
+		return;
+	}
+
+	/*
+	 * Use fallthrough switch case to keep basic block ordering between
+	 * set_return_addr*() and the label after it.
+	 */
+	switch (force_check) {
+	case 0:
+		set_return_addr_unchecked(&&normal, &&redirected);
+		fallthrough;
+	case 1:
+normal:
+		/* Always true */
+		if (!force_check) {
+			pr_err("FAIL: stack return address manipulation failed!\n");
+			/* If we can't redirect "normally", we can't test mitigations. */
+			return;
+		}
+		break;
+	default:
+redirected:
+		pr_info("ok: redirected stack return address.\n");
+		break;
+	}
+
+	pr_info("Attempting checked stack return address redirection ...\n");
+
+	switch (force_check) {
+	case 0:
+		set_return_addr(&&check_normal, &&check_redirected);
+		fallthrough;
+	case 1:
+check_normal:
+		/* Always true */
+		if (!force_check) {
+			pr_info("ok: control flow unchanged.\n");
+			return;
+		}
+
+check_redirected:
+		pr_err("FAIL: stack return address was redirected!\n");
+		break;
+	}
+
+	if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) {
+		pr_expected_config(CONFIG_ARM64_PTR_AUTH_KERNEL);
+		return;
+	}
+	if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK)) {
+		pr_expected_config(CONFIG_SHADOW_CALL_STACK);
+		return;
+	}
+	pr_warn("This is probably expected, since this %s was built *without* %s=y nor %s=y\n",
+		lkdtm_kernel_info,
+		"CONFIG_ARM64_PTR_AUTH_KERNEL", "CONFIG_SHADOW_CALL_STACK");
+}
+
+static struct crashtype crashtypes[] = {
+	CRASHTYPE(CFI_FORWARD_PROTO),
+	CRASHTYPE(CFI_BACKWARD),
+};
+
+struct crashtype_category cfi_crashtypes = {
+	.crashtypes = crashtypes,
+	.len	    = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
new file mode 100644
index 0000000000..0772e4a475
--- /dev/null
+++ b/drivers/misc/lkdtm/core.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Linux Kernel Dump Test Module for testing kernel crashes conditions:
+ * induces system failures at predefined crashpoints and under predefined
+ * operational conditions in order to evaluate the reliability of kernel
+ * sanity checking and crash dumps obtained using different dumping
+ * solutions.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Ankita Garg <ankita@in.ibm.com>
+ *
+ * It is adapted from the Linux Kernel Dump Test Tool by
+ * Fernando Luis Vazquez Cao <http://lkdtt.sourceforge.net>
+ *
+ * Debugfs support added by Simon Kagstrom <simon.kagstrom@netinsight.net>
+ *
+ * See Documentation/fault-injection/provoke-crashes.rst for instructions
+ */
+#include "lkdtm.h"
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/kprobes.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/utsname.h>
+
+#define DEFAULT_COUNT 10
+
+static int lkdtm_debugfs_open(struct inode *inode, struct file *file);
+static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf,
+		size_t count, loff_t *off);
+static ssize_t direct_entry(struct file *f, const char __user *user_buf,
+			    size_t count, loff_t *off);
+
+#ifdef CONFIG_KPROBES
+static int lkdtm_kprobe_handler(struct kprobe *kp, struct pt_regs *regs);
+static ssize_t lkdtm_debugfs_entry(struct file *f,
+				   const char __user *user_buf,
+				   size_t count, loff_t *off);
+# define CRASHPOINT_KPROBE(_symbol)				\
+		.kprobe = {					\
+			.symbol_name = (_symbol),		\
+			.pre_handler = lkdtm_kprobe_handler,	\
+		},
+# define CRASHPOINT_WRITE(_symbol)				\
+		(_symbol) ? lkdtm_debugfs_entry : direct_entry
+#else
+# define CRASHPOINT_KPROBE(_symbol)
+# define CRASHPOINT_WRITE(_symbol)		direct_entry
+#endif
+
+/* Crash points */
+struct crashpoint {
+	const char *name;
+	const struct file_operations fops;
+	struct kprobe kprobe;
+};
+
+#define CRASHPOINT(_name, _symbol)				\
+	{							\
+		.name = _name,					\
+		.fops = {					\
+			.read	= lkdtm_debugfs_read,		\
+			.llseek	= generic_file_llseek,		\
+			.open	= lkdtm_debugfs_open,		\
+			.write	= CRASHPOINT_WRITE(_symbol)	\
+		},						\
+		CRASHPOINT_KPROBE(_symbol)			\
+	}
+
+/* Define the possible places where we can trigger a crash point. */
+static struct crashpoint crashpoints[] = {
+	CRASHPOINT("DIRECT",		 NULL),
+#ifdef CONFIG_KPROBES
+	CRASHPOINT("INT_HARDWARE_ENTRY", "do_IRQ"),
+	CRASHPOINT("INT_HW_IRQ_EN",	 "handle_irq_event"),
+	CRASHPOINT("INT_TASKLET_ENTRY",	 "tasklet_action"),
+	CRASHPOINT("FS_SUBMIT_BH",		 "submit_bh"),
+	CRASHPOINT("MEM_SWAPOUT",	 "shrink_inactive_list"),
+	CRASHPOINT("TIMERADD",		 "hrtimer_start"),
+	CRASHPOINT("SCSI_QUEUE_RQ",	 "scsi_queue_rq"),
+#endif
+};
+
+/* List of possible types for crashes that can be triggered. */
+static const struct crashtype_category *crashtype_categories[] = {
+	&bugs_crashtypes,
+	&heap_crashtypes,
+	&perms_crashtypes,
+	&refcount_crashtypes,
+	&usercopy_crashtypes,
+	&stackleak_crashtypes,
+	&cfi_crashtypes,
+	&fortify_crashtypes,
+#ifdef CONFIG_PPC_64S_HASH_MMU
+	&powerpc_crashtypes,
+#endif
+};
+
+/* Global kprobe entry and crashtype. */
+static struct kprobe *lkdtm_kprobe;
+static struct crashpoint *lkdtm_crashpoint;
+static const struct crashtype *lkdtm_crashtype;
+
+/* Module parameters */
+static int recur_count = -1;
+module_param(recur_count, int, 0644);
+MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test");
+
+static char* cpoint_name;
+module_param(cpoint_name, charp, 0444);
+MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed");
+
+static char* cpoint_type;
+module_param(cpoint_type, charp, 0444);
+MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\
+				"hitting the crash point");
+
+static int cpoint_count = DEFAULT_COUNT;
+module_param(cpoint_count, int, 0644);
+MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
+				"crash point is to be hit to trigger action");
+
+/*
+ * For test debug reporting when CI systems provide terse summaries.
+ * TODO: Remove this once reasonable reporting exists in most CI systems:
+ * https://lore.kernel.org/lkml/CAHk-=wiFvfkoFixTapvvyPMN9pq5G-+Dys2eSyBa1vzDGAO5+A@mail.gmail.com
+ */
+char *lkdtm_kernel_info;
+
+/* Return the crashtype number or NULL if the name is invalid */
+static const struct crashtype *find_crashtype(const char *name)
+{
+	int cat, idx;
+
+	for (cat = 0; cat < ARRAY_SIZE(crashtype_categories); cat++) {
+		for (idx = 0; idx < crashtype_categories[cat]->len; idx++) {
+			struct crashtype *crashtype;
+
+			crashtype = &crashtype_categories[cat]->crashtypes[idx];
+			if (!strcmp(name, crashtype->name))
+				return crashtype;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * This is forced noinline just so it distinctly shows up in the stackdump
+ * which makes validation of expected lkdtm crashes easier.
+ */
+static noinline void lkdtm_do_action(const struct crashtype *crashtype)
+{
+	if (WARN_ON(!crashtype || !crashtype->func))
+		return;
+	crashtype->func();
+}
+
+static int lkdtm_register_cpoint(struct crashpoint *crashpoint,
+				 const struct crashtype *crashtype)
+{
+	int ret;
+
+	/* If this doesn't have a symbol, just call immediately. */
+	if (!crashpoint->kprobe.symbol_name) {
+		lkdtm_do_action(crashtype);
+		return 0;
+	}
+
+	if (lkdtm_kprobe != NULL)
+		unregister_kprobe(lkdtm_kprobe);
+
+	lkdtm_crashpoint = crashpoint;
+	lkdtm_crashtype = crashtype;
+	lkdtm_kprobe = &crashpoint->kprobe;
+	ret = register_kprobe(lkdtm_kprobe);
+	if (ret < 0) {
+		pr_info("Couldn't register kprobe %s\n",
+			crashpoint->kprobe.symbol_name);
+		lkdtm_kprobe = NULL;
+		lkdtm_crashpoint = NULL;
+		lkdtm_crashtype = NULL;
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_KPROBES
+/* Global crash counter and spinlock. */
+static int crash_count = DEFAULT_COUNT;
+static DEFINE_SPINLOCK(crash_count_lock);
+
+/* Called by kprobe entry points. */
+static int lkdtm_kprobe_handler(struct kprobe *kp, struct pt_regs *regs)
+{
+	unsigned long flags;
+	bool do_it = false;
+
+	if (WARN_ON(!lkdtm_crashpoint || !lkdtm_crashtype))
+		return 0;
+
+	spin_lock_irqsave(&crash_count_lock, flags);
+	crash_count--;
+	pr_info("Crash point %s of type %s hit, trigger in %d rounds\n",
+		lkdtm_crashpoint->name, lkdtm_crashtype->name, crash_count);
+
+	if (crash_count == 0) {
+		do_it = true;
+		crash_count = cpoint_count;
+	}
+	spin_unlock_irqrestore(&crash_count_lock, flags);
+
+	if (do_it)
+		lkdtm_do_action(lkdtm_crashtype);
+
+	return 0;
+}
+
+static ssize_t lkdtm_debugfs_entry(struct file *f,
+				   const char __user *user_buf,
+				   size_t count, loff_t *off)
+{
+	struct crashpoint *crashpoint = file_inode(f)->i_private;
+	const struct crashtype *crashtype = NULL;
+	char *buf;
+	int err;
+
+	if (count >= PAGE_SIZE)
+		return -EINVAL;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	if (copy_from_user(buf, user_buf, count)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
+	}
+	/* NULL-terminate and remove enter */
+	buf[count] = '\0';
+	strim(buf);
+
+	crashtype = find_crashtype(buf);
+	free_page((unsigned long)buf);
+
+	if (!crashtype)
+		return -EINVAL;
+
+	err = lkdtm_register_cpoint(crashpoint, crashtype);
+	if (err < 0)
+		return err;
+
+	*off += count;
+
+	return count;
+}
+#endif
+
+/* Generic read callback that just prints out the available crash types */
+static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf,
+		size_t count, loff_t *off)
+{
+	int n, cat, idx;
+	ssize_t out;
+	char *buf;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	n = scnprintf(buf, PAGE_SIZE, "Available crash types:\n");
+
+	for (cat = 0; cat < ARRAY_SIZE(crashtype_categories); cat++) {
+		for (idx = 0; idx < crashtype_categories[cat]->len; idx++) {
+			struct crashtype *crashtype;
+
+			crashtype = &crashtype_categories[cat]->crashtypes[idx];
+			n += scnprintf(buf + n, PAGE_SIZE - n, "%s\n",
+				      crashtype->name);
+		}
+	}
+	buf[n] = '\0';
+
+	out = simple_read_from_buffer(user_buf, count, off,
+				      buf, n);
+	free_page((unsigned long) buf);
+
+	return out;
+}
+
+static int lkdtm_debugfs_open(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+/* Special entry to just crash directly. Available without KPROBEs */
+static ssize_t direct_entry(struct file *f, const char __user *user_buf,
+		size_t count, loff_t *off)
+{
+	const struct crashtype *crashtype;
+	char *buf;
+
+	if (count >= PAGE_SIZE)
+		return -EINVAL;
+	if (count < 1)
+		return -EINVAL;
+
+	buf = (char *)__get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	if (copy_from_user(buf, user_buf, count)) {
+		free_page((unsigned long) buf);
+		return -EFAULT;
+	}
+	/* NULL-terminate and remove enter */
+	buf[count] = '\0';
+	strim(buf);
+
+	crashtype = find_crashtype(buf);
+	free_page((unsigned long) buf);
+	if (!crashtype)
+		return -EINVAL;
+
+	pr_info("Performing direct entry %s\n", crashtype->name);
+	lkdtm_do_action(crashtype);
+	*off += count;
+
+	return count;
+}
+
+#ifndef MODULE
+/*
+ * To avoid needing to export parse_args(), just don't use this code
+ * when LKDTM is built as a module.
+ */
+struct check_cmdline_args {
+	const char *param;
+	int value;
+};
+
+static int lkdtm_parse_one(char *param, char *val,
+			   const char *unused, void *arg)
+{
+	struct check_cmdline_args *args = arg;
+
+	/* short circuit if we already found a value. */
+	if (args->value != -ESRCH)
+		return 0;
+	if (strncmp(param, args->param, strlen(args->param)) == 0) {
+		bool bool_result;
+		int ret;
+
+		ret = kstrtobool(val, &bool_result);
+		if (ret == 0)
+			args->value = bool_result;
+	}
+	return 0;
+}
+
+int lkdtm_check_bool_cmdline(const char *param)
+{
+	char *command_line;
+	struct check_cmdline_args args = {
+		.param = param,
+		.value = -ESRCH,
+	};
+
+	command_line = kstrdup(saved_command_line, GFP_KERNEL);
+	if (!command_line)
+		return -ENOMEM;
+
+	parse_args("Setting sysctl args", command_line,
+		   NULL, 0, -1, -1, &args, lkdtm_parse_one);
+
+	kfree(command_line);
+
+	return args.value;
+}
+#endif
+
+static struct dentry *lkdtm_debugfs_root;
+
+static int __init lkdtm_module_init(void)
+{
+	struct crashpoint *crashpoint = NULL;
+	const struct crashtype *crashtype = NULL;
+	int ret;
+	int i;
+
+	/* Neither or both of these need to be set */
+	if ((cpoint_type || cpoint_name) && !(cpoint_type && cpoint_name)) {
+		pr_err("Need both cpoint_type and cpoint_name or neither\n");
+		return -EINVAL;
+	}
+
+	if (cpoint_type) {
+		crashtype = find_crashtype(cpoint_type);
+		if (!crashtype) {
+			pr_err("Unknown crashtype '%s'\n", cpoint_type);
+			return -EINVAL;
+		}
+	}
+
+	if (cpoint_name) {
+		for (i = 0; i < ARRAY_SIZE(crashpoints); i++) {
+			if (!strcmp(cpoint_name, crashpoints[i].name))
+				crashpoint = &crashpoints[i];
+		}
+
+		/* Refuse unknown crashpoints. */
+		if (!crashpoint) {
+			pr_err("Invalid crashpoint %s\n", cpoint_name);
+			return -EINVAL;
+		}
+	}
+
+#ifdef CONFIG_KPROBES
+	/* Set crash count. */
+	crash_count = cpoint_count;
+#endif
+
+	/* Common initialization. */
+	lkdtm_kernel_info = kasprintf(GFP_KERNEL, "kernel (%s %s)",
+				      init_uts_ns.name.release,
+				      init_uts_ns.name.machine);
+
+	/* Handle test-specific initialization. */
+	lkdtm_bugs_init(&recur_count);
+	lkdtm_perms_init();
+	lkdtm_usercopy_init();
+	lkdtm_heap_init();
+
+	/* Register debugfs interface */
+	lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
+
+	/* Install debugfs trigger files. */
+	for (i = 0; i < ARRAY_SIZE(crashpoints); i++) {
+		struct crashpoint *cur = &crashpoints[i];
+
+		debugfs_create_file(cur->name, 0644, lkdtm_debugfs_root, cur,
+				    &cur->fops);
+	}
+
+	/* Install crashpoint if one was selected. */
+	if (crashpoint) {
+		ret = lkdtm_register_cpoint(crashpoint, crashtype);
+		if (ret < 0) {
+			pr_info("Invalid crashpoint %s\n", crashpoint->name);
+			goto out_err;
+		}
+		pr_info("Crash point %s of type %s registered\n",
+			crashpoint->name, cpoint_type);
+	} else {
+		pr_info("No crash points registered, enable through debugfs\n");
+	}
+
+	return 0;
+
+out_err:
+	debugfs_remove_recursive(lkdtm_debugfs_root);
+	return ret;
+}
+
+static void __exit lkdtm_module_exit(void)
+{
+	debugfs_remove_recursive(lkdtm_debugfs_root);
+
+	/* Handle test-specific clean-up. */
+	lkdtm_heap_exit();
+	lkdtm_usercopy_exit();
+
+	if (lkdtm_kprobe != NULL)
+		unregister_kprobe(lkdtm_kprobe);
+
+	kfree(lkdtm_kernel_info);
+
+	pr_info("Crash point unregistered\n");
+}
+
+module_init(lkdtm_module_init);
+module_exit(lkdtm_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Kernel crash testing module");
diff --git a/drivers/misc/lkdtm/fortify.c b/drivers/misc/lkdtm/fortify.c
new file mode 100644
index 0000000000..0159276656
--- /dev/null
+++ b/drivers/misc/lkdtm/fortify.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Francis Laniel <laniel_francis@privacyrequired.com>
+ *
+ * Add tests related to fortified functions in this file.
+ */
+#include "lkdtm.h"
+#include <linux/string.h>
+#include <linux/slab.h>
+
+static volatile int fortify_scratch_space;
+
+static void lkdtm_FORTIFY_STR_OBJECT(void)
+{
+	struct target {
+		char a[10];
+		int foo;
+	} target[3] = {};
+	/*
+	 * Using volatile prevents the compiler from determining the value of
+	 * 'size' at compile time. Without that, we would get a compile error
+	 * rather than a runtime error.
+	 */
+	volatile int size = 20;
+
+	pr_info("trying to strcmp() past the end of a struct\n");
+
+	strncpy(target[0].a, target[1].a, size);
+
+	/* Store result to global to prevent the code from being eliminated */
+	fortify_scratch_space = target[0].a[3];
+
+	pr_err("FAIL: fortify did not block a strncpy() object write overflow!\n");
+	pr_expected_config(CONFIG_FORTIFY_SOURCE);
+}
+
+static void lkdtm_FORTIFY_STR_MEMBER(void)
+{
+	struct target {
+		char a[10];
+		char b[10];
+	} target;
+	volatile int size = 20;
+	char *src;
+
+	src = kmalloc(size, GFP_KERNEL);
+	strscpy(src, "over ten bytes", size);
+	size = strlen(src) + 1;
+
+	pr_info("trying to strncpy() past the end of a struct member...\n");
+
+	/*
+	 * strncpy(target.a, src, 20); will hit a compile error because the
+	 * compiler knows at build time that target.a < 20 bytes. Use a
+	 * volatile to force a runtime error.
+	 */
+	strncpy(target.a, src, size);
+
+	/* Store result to global to prevent the code from being eliminated */
+	fortify_scratch_space = target.a[3];
+
+	pr_err("FAIL: fortify did not block a strncpy() struct member write overflow!\n");
+	pr_expected_config(CONFIG_FORTIFY_SOURCE);
+
+	kfree(src);
+}
+
+static void lkdtm_FORTIFY_MEM_OBJECT(void)
+{
+	int before[10];
+	struct target {
+		char a[10];
+		int foo;
+	} target = {};
+	int after[10];
+	/*
+	 * Using volatile prevents the compiler from determining the value of
+	 * 'size' at compile time. Without that, we would get a compile error
+	 * rather than a runtime error.
+	 */
+	volatile int size = 20;
+
+	memset(before, 0, sizeof(before));
+	memset(after, 0, sizeof(after));
+	fortify_scratch_space = before[5];
+	fortify_scratch_space = after[5];
+
+	pr_info("trying to memcpy() past the end of a struct\n");
+
+	pr_info("0: %zu\n", __builtin_object_size(&target, 0));
+	pr_info("1: %zu\n", __builtin_object_size(&target, 1));
+	pr_info("s: %d\n", size);
+	memcpy(&target, &before, size);
+
+	/* Store result to global to prevent the code from being eliminated */
+	fortify_scratch_space = target.a[3];
+
+	pr_err("FAIL: fortify did not block a memcpy() object write overflow!\n");
+	pr_expected_config(CONFIG_FORTIFY_SOURCE);
+}
+
+static void lkdtm_FORTIFY_MEM_MEMBER(void)
+{
+	struct target {
+		char a[10];
+		char b[10];
+	} target;
+	volatile int size = 20;
+	char *src;
+
+	src = kmalloc(size, GFP_KERNEL);
+	strscpy(src, "over ten bytes", size);
+	size = strlen(src) + 1;
+
+	pr_info("trying to memcpy() past the end of a struct member...\n");
+
+	/*
+	 * strncpy(target.a, src, 20); will hit a compile error because the
+	 * compiler knows at build time that target.a < 20 bytes. Use a
+	 * volatile to force a runtime error.
+	 */
+	memcpy(target.a, src, size);
+
+	/* Store result to global to prevent the code from being eliminated */
+	fortify_scratch_space = target.a[3];
+
+	pr_err("FAIL: fortify did not block a memcpy() struct member write overflow!\n");
+	pr_expected_config(CONFIG_FORTIFY_SOURCE);
+
+	kfree(src);
+}
+
+/*
+ * Calls fortified strscpy to test that it returns the same result as vanilla
+ * strscpy and generate a panic because there is a write overflow (i.e. src
+ * length is greater than dst length).
+ */
+static void lkdtm_FORTIFY_STRSCPY(void)
+{
+	char *src;
+	char dst[5];
+
+	struct {
+		union {
+			char big[10];
+			char src[5];
+		};
+	} weird = { .big = "hello!" };
+	char weird_dst[sizeof(weird.src) + 1];
+
+	src = kstrdup("foobar", GFP_KERNEL);
+
+	if (src == NULL)
+		return;
+
+	/* Vanilla strscpy returns -E2BIG if size is 0. */
+	if (strscpy(dst, src, 0) != -E2BIG)
+		pr_warn("FAIL: strscpy() of 0 length did not return -E2BIG\n");
+
+	/* Vanilla strscpy returns -E2BIG if src is truncated. */
+	if (strscpy(dst, src, sizeof(dst)) != -E2BIG)
+		pr_warn("FAIL: strscpy() did not return -E2BIG while src is truncated\n");
+
+	/* After above call, dst must contain "foob" because src was truncated. */
+	if (strncmp(dst, "foob", sizeof(dst)) != 0)
+		pr_warn("FAIL: after strscpy() dst does not contain \"foob\" but \"%s\"\n",
+			dst);
+
+	/* Shrink src so the strscpy() below succeeds. */
+	src[3] = '\0';
+
+	/*
+	 * Vanilla strscpy returns number of character copied if everything goes
+	 * well.
+	 */
+	if (strscpy(dst, src, sizeof(dst)) != 3)
+		pr_warn("FAIL: strscpy() did not return 3 while src was copied entirely truncated\n");
+
+	/* After above call, dst must contain "foo" because src was copied. */
+	if (strncmp(dst, "foo", sizeof(dst)) != 0)
+		pr_warn("FAIL: after strscpy() dst does not contain \"foo\" but \"%s\"\n",
+			dst);
+
+	/* Test when src is embedded inside a union. */
+	strscpy(weird_dst, weird.src, sizeof(weird_dst));
+
+	if (strcmp(weird_dst, "hello") != 0)
+		pr_warn("FAIL: after strscpy() weird_dst does not contain \"hello\" but \"%s\"\n",
+			weird_dst);
+
+	/* Restore src to its initial value. */
+	src[3] = 'b';
+
+	/*
+	 * Use strlen here so size cannot be known at compile time and there is
+	 * a runtime write overflow.
+	 */
+	strscpy(dst, src, strlen(src));
+
+	pr_err("FAIL: strscpy() overflow not detected!\n");
+	pr_expected_config(CONFIG_FORTIFY_SOURCE);
+
+	kfree(src);
+}
+
+static struct crashtype crashtypes[] = {
+	CRASHTYPE(FORTIFY_STR_OBJECT),
+	CRASHTYPE(FORTIFY_STR_MEMBER),
+	CRASHTYPE(FORTIFY_MEM_OBJECT),
+	CRASHTYPE(FORTIFY_MEM_MEMBER),
+	CRASHTYPE(FORTIFY_STRSCPY),
+};
+
+struct crashtype_category fortify_crashtypes = {
+	.crashtypes = crashtypes,
+	.len	    = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/heap.c b/drivers/misc/lkdtm/heap.c
new file mode 100644
index 0000000000..0ce4cbf6ab
--- /dev/null
+++ b/drivers/misc/lkdtm/heap.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests relating directly to heap memory, including
+ * page allocation and slab allocations.
+ */
+#include "lkdtm.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+
+static struct kmem_cache *double_free_cache;
+static struct kmem_cache *a_cache;
+static struct kmem_cache *b_cache;
+
+/*
+ * Using volatile here means the compiler cannot ever make assumptions
+ * about this value. This means compile-time length checks involving
+ * this variable cannot be performed; only run-time checks.
+ */
+static volatile int __offset = 1;
+
+/*
+ * If there aren't guard pages, it's likely that a consecutive allocation will
+ * let us overflow into the second allocation without overwriting something real.
+ *
+ * This should always be caught because there is an unconditional unmapped
+ * page after vmap allocations.
+ */
+static void lkdtm_VMALLOC_LINEAR_OVERFLOW(void)
+{
+	char *one, *two;
+
+	one = vzalloc(PAGE_SIZE);
+	OPTIMIZER_HIDE_VAR(one);
+	two = vzalloc(PAGE_SIZE);
+
+	pr_info("Attempting vmalloc linear overflow ...\n");
+	memset(one, 0xAA, PAGE_SIZE + __offset);
+
+	vfree(two);
+	vfree(one);
+}
+
+/*
+ * This tries to stay within the next largest power-of-2 kmalloc cache
+ * to avoid actually overwriting anything important if it's not detected
+ * correctly.
+ *
+ * This should get caught by either memory tagging, KASan, or by using
+ * CONFIG_SLUB_DEBUG=y and slub_debug=ZF (or CONFIG_SLUB_DEBUG_ON=y).
+ */
+static void lkdtm_SLAB_LINEAR_OVERFLOW(void)
+{
+	size_t len = 1020;
+	u32 *data = kmalloc(len, GFP_KERNEL);
+	if (!data)
+		return;
+
+	pr_info("Attempting slab linear overflow ...\n");
+	OPTIMIZER_HIDE_VAR(data);
+	data[1024 / sizeof(u32)] = 0x12345678;
+	kfree(data);
+}
+
+static void lkdtm_WRITE_AFTER_FREE(void)
+{
+	int *base, *again;
+	size_t len = 1024;
+	/*
+	 * The slub allocator uses the first word to store the free
+	 * pointer in some configurations. Use the middle of the
+	 * allocation to avoid running into the freelist
+	 */
+	size_t offset = (len / sizeof(*base)) / 2;
+
+	base = kmalloc(len, GFP_KERNEL);
+	if (!base)
+		return;
+	pr_info("Allocated memory %p-%p\n", base, &base[offset * 2]);
+	pr_info("Attempting bad write to freed memory at %p\n",
+		&base[offset]);
+	kfree(base);
+	base[offset] = 0x0abcdef0;
+	/* Attempt to notice the overwrite. */
+	again = kmalloc(len, GFP_KERNEL);
+	kfree(again);
+	if (again != base)
+		pr_info("Hmm, didn't get the same memory range.\n");
+}
+
+static void lkdtm_READ_AFTER_FREE(void)
+{
+	int *base, *val, saw;
+	size_t len = 1024;
+	/*
+	 * The slub allocator will use the either the first word or
+	 * the middle of the allocation to store the free pointer,
+	 * depending on configurations. Store in the second word to
+	 * avoid running into the freelist.
+	 */
+	size_t offset = sizeof(*base);
+
+	base = kmalloc(len, GFP_KERNEL);
+	if (!base) {
+		pr_info("Unable to allocate base memory.\n");
+		return;
+	}
+
+	val = kmalloc(len, GFP_KERNEL);
+	if (!val) {
+		pr_info("Unable to allocate val memory.\n");
+		kfree(base);
+		return;
+	}
+
+	*val = 0x12345678;
+	base[offset] = *val;
+	pr_info("Value in memory before free: %x\n", base[offset]);
+
+	kfree(base);
+
+	pr_info("Attempting bad read from freed memory\n");
+	saw = base[offset];
+	if (saw != *val) {
+		/* Good! Poisoning happened, so declare a win. */
+		pr_info("Memory correctly poisoned (%x)\n", saw);
+	} else {
+		pr_err("FAIL: Memory was not poisoned!\n");
+		pr_expected_config_param(CONFIG_INIT_ON_FREE_DEFAULT_ON, "init_on_free");
+	}
+
+	kfree(val);
+}
+
+static void lkdtm_WRITE_BUDDY_AFTER_FREE(void)
+{
+	unsigned long p = __get_free_page(GFP_KERNEL);
+	if (!p) {
+		pr_info("Unable to allocate free page\n");
+		return;
+	}
+
+	pr_info("Writing to the buddy page before free\n");
+	memset((void *)p, 0x3, PAGE_SIZE);
+	free_page(p);
+	schedule();
+	pr_info("Attempting bad write to the buddy page after free\n");
+	memset((void *)p, 0x78, PAGE_SIZE);
+	/* Attempt to notice the overwrite. */
+	p = __get_free_page(GFP_KERNEL);
+	free_page(p);
+	schedule();
+}
+
+static void lkdtm_READ_BUDDY_AFTER_FREE(void)
+{
+	unsigned long p = __get_free_page(GFP_KERNEL);
+	int saw, *val;
+	int *base;
+
+	if (!p) {
+		pr_info("Unable to allocate free page\n");
+		return;
+	}
+
+	val = kmalloc(1024, GFP_KERNEL);
+	if (!val) {
+		pr_info("Unable to allocate val memory.\n");
+		free_page(p);
+		return;
+	}
+
+	base = (int *)p;
+
+	*val = 0x12345678;
+	base[0] = *val;
+	pr_info("Value in memory before free: %x\n", base[0]);
+	free_page(p);
+	pr_info("Attempting to read from freed memory\n");
+	saw = base[0];
+	if (saw != *val) {
+		/* Good! Poisoning happened, so declare a win. */
+		pr_info("Memory correctly poisoned (%x)\n", saw);
+	} else {
+		pr_err("FAIL: Buddy page was not poisoned!\n");
+		pr_expected_config_param(CONFIG_INIT_ON_FREE_DEFAULT_ON, "init_on_free");
+	}
+
+	kfree(val);
+}
+
+static void lkdtm_SLAB_INIT_ON_ALLOC(void)
+{
+	u8 *first;
+	u8 *val;
+
+	first = kmalloc(512, GFP_KERNEL);
+	if (!first) {
+		pr_info("Unable to allocate 512 bytes the first time.\n");
+		return;
+	}
+
+	memset(first, 0xAB, 512);
+	kfree(first);
+
+	val = kmalloc(512, GFP_KERNEL);
+	if (!val) {
+		pr_info("Unable to allocate 512 bytes the second time.\n");
+		return;
+	}
+	if (val != first) {
+		pr_warn("Reallocation missed clobbered memory.\n");
+	}
+
+	if (memchr(val, 0xAB, 512) == NULL) {
+		pr_info("Memory appears initialized (%x, no earlier values)\n", *val);
+	} else {
+		pr_err("FAIL: Slab was not initialized\n");
+		pr_expected_config_param(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, "init_on_alloc");
+	}
+	kfree(val);
+}
+
+static void lkdtm_BUDDY_INIT_ON_ALLOC(void)
+{
+	u8 *first;
+	u8 *val;
+
+	first = (u8 *)__get_free_page(GFP_KERNEL);
+	if (!first) {
+		pr_info("Unable to allocate first free page\n");
+		return;
+	}
+
+	memset(first, 0xAB, PAGE_SIZE);
+	free_page((unsigned long)first);
+
+	val = (u8 *)__get_free_page(GFP_KERNEL);
+	if (!val) {
+		pr_info("Unable to allocate second free page\n");
+		return;
+	}
+
+	if (val != first) {
+		pr_warn("Reallocation missed clobbered memory.\n");
+	}
+
+	if (memchr(val, 0xAB, PAGE_SIZE) == NULL) {
+		pr_info("Memory appears initialized (%x, no earlier values)\n", *val);
+	} else {
+		pr_err("FAIL: Slab was not initialized\n");
+		pr_expected_config_param(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, "init_on_alloc");
+	}
+	free_page((unsigned long)val);
+}
+
+static void lkdtm_SLAB_FREE_DOUBLE(void)
+{
+	int *val;
+
+	val = kmem_cache_alloc(double_free_cache, GFP_KERNEL);
+	if (!val) {
+		pr_info("Unable to allocate double_free_cache memory.\n");
+		return;
+	}
+
+	/* Just make sure we got real memory. */
+	*val = 0x12345678;
+	pr_info("Attempting double slab free ...\n");
+	kmem_cache_free(double_free_cache, val);
+	kmem_cache_free(double_free_cache, val);
+}
+
+static void lkdtm_SLAB_FREE_CROSS(void)
+{
+	int *val;
+
+	val = kmem_cache_alloc(a_cache, GFP_KERNEL);
+	if (!val) {
+		pr_info("Unable to allocate a_cache memory.\n");
+		return;
+	}
+
+	/* Just make sure we got real memory. */
+	*val = 0x12345679;
+	pr_info("Attempting cross-cache slab free ...\n");
+	kmem_cache_free(b_cache, val);
+}
+
+static void lkdtm_SLAB_FREE_PAGE(void)
+{
+	unsigned long p = __get_free_page(GFP_KERNEL);
+
+	pr_info("Attempting non-Slab slab free ...\n");
+	kmem_cache_free(NULL, (void *)p);
+	free_page(p);
+}
+
+/*
+ * We have constructors to keep the caches distinctly separated without
+ * needing to boot with "slab_nomerge".
+ */
+static void ctor_double_free(void *region)
+{ }
+static void ctor_a(void *region)
+{ }
+static void ctor_b(void *region)
+{ }
+
+void __init lkdtm_heap_init(void)
+{
+	double_free_cache = kmem_cache_create("lkdtm-heap-double_free",
+					      64, 0, 0, ctor_double_free);
+	a_cache = kmem_cache_create("lkdtm-heap-a", 64, 0, 0, ctor_a);
+	b_cache = kmem_cache_create("lkdtm-heap-b", 64, 0, 0, ctor_b);
+}
+
+void __exit lkdtm_heap_exit(void)
+{
+	kmem_cache_destroy(double_free_cache);
+	kmem_cache_destroy(a_cache);
+	kmem_cache_destroy(b_cache);
+}
+
+static struct crashtype crashtypes[] = {
+	CRASHTYPE(SLAB_LINEAR_OVERFLOW),
+	CRASHTYPE(VMALLOC_LINEAR_OVERFLOW),
+	CRASHTYPE(WRITE_AFTER_FREE),
+	CRASHTYPE(READ_AFTER_FREE),
+	CRASHTYPE(WRITE_BUDDY_AFTER_FREE),
+	CRASHTYPE(READ_BUDDY_AFTER_FREE),
+	CRASHTYPE(SLAB_INIT_ON_ALLOC),
+	CRASHTYPE(BUDDY_INIT_ON_ALLOC),
+	CRASHTYPE(SLAB_FREE_DOUBLE),
+	CRASHTYPE(SLAB_FREE_CROSS),
+	CRASHTYPE(SLAB_FREE_PAGE),
+};
+
+struct crashtype_category heap_crashtypes = {
+	.crashtypes = crashtypes,
+	.len	    = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
new file mode 100644
index 0000000000..015e048402
--- /dev/null
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LKDTM_H
+#define __LKDTM_H
+
+#define pr_fmt(fmt) "lkdtm: " fmt
+
+#include <linux/kernel.h>
+
+extern char *lkdtm_kernel_info;
+
+#define pr_expected_config(kconfig)				\
+do {								\
+	if (IS_ENABLED(kconfig)) 				\
+		pr_err("Unexpected! This %s was built with " #kconfig "=y\n", \
+			lkdtm_kernel_info);			\
+	else							\
+		pr_warn("This is probably expected, since this %s was built *without* " #kconfig "=y\n", \
+			lkdtm_kernel_info);			\
+} while (0)
+
+#ifndef MODULE
+int lkdtm_check_bool_cmdline(const char *param);
+#define pr_expected_config_param(kconfig, param)		\
+do {								\
+	if (IS_ENABLED(kconfig)) {				\
+		switch (lkdtm_check_bool_cmdline(param)) {	\
+		case 0:						\
+			pr_warn("This is probably expected, since this %s was built with " #kconfig "=y but booted with '" param "=N'\n", \
+				lkdtm_kernel_info);		\
+			break;					\
+		case 1:						\
+			pr_err("Unexpected! This %s was built with " #kconfig "=y and booted with '" param "=Y'\n", \
+				lkdtm_kernel_info);		\
+			break;					\
+		default:					\
+			pr_err("Unexpected! This %s was built with " #kconfig "=y (and booted without '" param "' specified)\n", \
+				lkdtm_kernel_info);		\
+		}						\
+	} else {						\
+		switch (lkdtm_check_bool_cmdline(param)) {	\
+		case 0:						\
+			pr_warn("This is probably expected, as this %s was built *without* " #kconfig "=y and booted with '" param "=N'\n", \
+				lkdtm_kernel_info);		\
+			break;					\
+		case 1:						\
+			pr_err("Unexpected! This %s was built *without* " #kconfig "=y but booted with '" param "=Y'\n", \
+				lkdtm_kernel_info);		\
+			break;					\
+		default:					\
+			pr_err("This is probably expected, since this %s was built *without* " #kconfig "=y (and booted without '" param "' specified)\n", \
+				lkdtm_kernel_info);		\
+			break;					\
+		}						\
+	}							\
+} while (0)
+#else
+#define pr_expected_config_param(kconfig, param) pr_expected_config(kconfig)
+#endif
+
+/* Crash types. */
+struct crashtype {
+	const char *name;
+	void (*func)(void);
+};
+
+#define CRASHTYPE(_name)			\
+	{					\
+		.name = __stringify(_name),	\
+		.func = lkdtm_ ## _name,	\
+	}
+
+/* Category's collection of crashtypes. */
+struct crashtype_category {
+	struct crashtype *crashtypes;
+	size_t len;
+};
+
+/* Each category's crashtypes list. */
+extern struct crashtype_category bugs_crashtypes;
+extern struct crashtype_category heap_crashtypes;
+extern struct crashtype_category perms_crashtypes;
+extern struct crashtype_category refcount_crashtypes;
+extern struct crashtype_category usercopy_crashtypes;
+extern struct crashtype_category stackleak_crashtypes;
+extern struct crashtype_category cfi_crashtypes;
+extern struct crashtype_category fortify_crashtypes;
+extern struct crashtype_category powerpc_crashtypes;
+
+/* Each category's init/exit routines. */
+void __init lkdtm_bugs_init(int *recur_param);
+void __init lkdtm_heap_init(void);
+void __exit lkdtm_heap_exit(void);
+void __init lkdtm_perms_init(void);
+void __init lkdtm_usercopy_init(void);
+void __exit lkdtm_usercopy_exit(void);
+
+/* Special declaration for function-in-rodata. */
+void lkdtm_rodata_do_nothing(void);
+
+#endif
diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
new file mode 100644
index 0000000000..b93404d656
--- /dev/null
+++ b/drivers/misc/lkdtm/perms.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests related to validating kernel memory
+ * permissions: non-executable regions, non-writable regions, and
+ * even non-readable regions.
+ */
+#include "lkdtm.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
+
+/* Whether or not to fill the target memory area with do_nothing(). */
+#define CODE_WRITE	true
+#define CODE_AS_IS	false
+
+/* How many bytes to copy to be sure we've copied enough of do_nothing(). */
+#define EXEC_SIZE 64
+
+/* This is non-const, so it will end up in the .data section. */
+static u8 data_area[EXEC_SIZE];
+
+/* This is const, so it will end up in the .rodata section. */
+static const unsigned long rodata = 0xAA55AA55;
+
+/* This is marked __ro_after_init, so it should ultimately be .rodata. */
+static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
+
+/*
+ * This just returns to the caller. It is designed to be copied into
+ * non-executable memory regions.
+ */
+static noinline void do_nothing(void)
+{
+	return;
+}
+
+/* Must immediately follow do_nothing for size calculuations to work out. */
+static noinline void do_overwritten(void)
+{
+	pr_info("do_overwritten wasn't overwritten!\n");
+	return;
+}
+
+static noinline void do_almost_nothing(void)
+{
+	pr_info("do_nothing was hijacked!\n");
+}
+
+static void *setup_function_descriptor(func_desc_t *fdesc, void *dst)
+{
+	if (!have_function_descriptors())
+		return dst;
+
+	memcpy(fdesc, do_nothing, sizeof(*fdesc));
+	fdesc->addr = (unsigned long)dst;
+	barrier();
+
+	return fdesc;
+}
+
+static noinline void execute_location(void *dst, bool write)
+{
+	void (*func)(void);
+	func_desc_t fdesc;
+	void *do_nothing_text = dereference_function_descriptor(do_nothing);
+
+	pr_info("attempting ok execution at %px\n", do_nothing_text);
+	do_nothing();
+
+	if (write == CODE_WRITE) {
+		memcpy(dst, do_nothing_text, EXEC_SIZE);
+		flush_icache_range((unsigned long)dst,
+				   (unsigned long)dst + EXEC_SIZE);
+	}
+	pr_info("attempting bad execution at %px\n", dst);
+	func = setup_function_descriptor(&fdesc, dst);
+	func();
+	pr_err("FAIL: func returned\n");
+}
+
+static void execute_user_location(void *dst)
+{
+	int copied;
+
+	/* Intentionally crossing kernel/user memory boundary. */
+	void (*func)(void);
+	func_desc_t fdesc;
+	void *do_nothing_text = dereference_function_descriptor(do_nothing);
+
+	pr_info("attempting ok execution at %px\n", do_nothing_text);
+	do_nothing();
+
+	copied = access_process_vm(current, (unsigned long)dst, do_nothing_text,
+				   EXEC_SIZE, FOLL_WRITE);
+	if (copied < EXEC_SIZE)
+		return;
+	pr_info("attempting bad execution at %px\n", dst);
+	func = setup_function_descriptor(&fdesc, dst);
+	func();
+	pr_err("FAIL: func returned\n");
+}
+
+static void lkdtm_WRITE_RO(void)
+{
+	/* Explicitly cast away "const" for the test and make volatile. */
+	volatile unsigned long *ptr = (unsigned long *)&rodata;
+
+	pr_info("attempting bad rodata write at %px\n", ptr);
+	*ptr ^= 0xabcd1234;
+	pr_err("FAIL: survived bad write\n");
+}
+
+static void lkdtm_WRITE_RO_AFTER_INIT(void)
+{
+	volatile unsigned long *ptr = &ro_after_init;
+
+	/*
+	 * Verify we were written to during init. Since an Oops
+	 * is considered a "success", a failure is to just skip the
+	 * real test.
+	 */
+	if ((*ptr & 0xAA) != 0xAA) {
+		pr_info("%p was NOT written during init!?\n", ptr);
+		return;
+	}
+
+	pr_info("attempting bad ro_after_init write at %px\n", ptr);
+	*ptr ^= 0xabcd1234;
+	pr_err("FAIL: survived bad write\n");
+}
+
+static void lkdtm_WRITE_KERN(void)
+{
+	size_t size;
+	volatile unsigned char *ptr;
+
+	size = (unsigned long)dereference_function_descriptor(do_overwritten) -
+	       (unsigned long)dereference_function_descriptor(do_nothing);
+	ptr = dereference_function_descriptor(do_overwritten);
+
+	pr_info("attempting bad %zu byte write at %px\n", size, ptr);
+	memcpy((void *)ptr, (unsigned char *)do_nothing, size);
+	flush_icache_range((unsigned long)ptr, (unsigned long)(ptr + size));
+	pr_err("FAIL: survived bad write\n");
+
+	do_overwritten();
+}
+
+static void lkdtm_WRITE_OPD(void)
+{
+	size_t size = sizeof(func_desc_t);
+	void (*func)(void) = do_nothing;
+
+	if (!have_function_descriptors()) {
+		pr_info("XFAIL: Platform doesn't use function descriptors.\n");
+		return;
+	}
+	pr_info("attempting bad %zu bytes write at %px\n", size, do_nothing);
+	memcpy(do_nothing, do_almost_nothing, size);
+	pr_err("FAIL: survived bad write\n");
+
+	asm("" : "=m"(func));
+	func();
+}
+
+static void lkdtm_EXEC_DATA(void)
+{
+	execute_location(data_area, CODE_WRITE);
+}
+
+static void lkdtm_EXEC_STACK(void)
+{
+	u8 stack_area[EXEC_SIZE];
+	execute_location(stack_area, CODE_WRITE);
+}
+
+static void lkdtm_EXEC_KMALLOC(void)
+{
+	u32 *kmalloc_area = kmalloc(EXEC_SIZE, GFP_KERNEL);
+	execute_location(kmalloc_area, CODE_WRITE);
+	kfree(kmalloc_area);
+}
+
+static void lkdtm_EXEC_VMALLOC(void)
+{
+	u32 *vmalloc_area = vmalloc(EXEC_SIZE);
+	execute_location(vmalloc_area, CODE_WRITE);
+	vfree(vmalloc_area);
+}
+
+static void lkdtm_EXEC_RODATA(void)
+{
+	execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing),
+			 CODE_AS_IS);
+}
+
+static void lkdtm_EXEC_USERSPACE(void)
+{
+	unsigned long user_addr;
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+	execute_user_location((void *)user_addr);
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+static void lkdtm_EXEC_NULL(void)
+{
+	execute_location(NULL, CODE_AS_IS);
+}
+
+static void lkdtm_ACCESS_USERSPACE(void)
+{
+	unsigned long user_addr, tmp = 0;
+	unsigned long *ptr;
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+
+	if (copy_to_user((void __user *)user_addr, &tmp, sizeof(tmp))) {
+		pr_warn("copy_to_user failed\n");
+		vm_munmap(user_addr, PAGE_SIZE);
+		return;
+	}
+
+	ptr = (unsigned long *)user_addr;
+
+	pr_info("attempting bad read at %px\n", ptr);
+	tmp = *ptr;
+	tmp += 0xc0dec0de;
+	pr_err("FAIL: survived bad read\n");
+
+	pr_info("attempting bad write at %px\n", ptr);
+	*ptr = tmp;
+	pr_err("FAIL: survived bad write\n");
+
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+static void lkdtm_ACCESS_NULL(void)
+{
+	unsigned long tmp;
+	volatile unsigned long *ptr = (unsigned long *)NULL;
+
+	pr_info("attempting bad read at %px\n", ptr);
+	tmp = *ptr;
+	tmp += 0xc0dec0de;
+	pr_err("FAIL: survived bad read\n");
+
+	pr_info("attempting bad write at %px\n", ptr);
+	*ptr = tmp;
+	pr_err("FAIL: survived bad write\n");
+}
+
+void __init lkdtm_perms_init(void)
+{
+	/* Make sure we can write to __ro_after_init values during __init */
+	ro_after_init |= 0xAA;
+}
+
+static struct crashtype crashtypes[] = {
+	CRASHTYPE(WRITE_RO),
+	CRASHTYPE(WRITE_RO_AFTER_INIT),
+	CRASHTYPE(WRITE_KERN),
+	CRASHTYPE(WRITE_OPD),
+	CRASHTYPE(EXEC_DATA),
+	CRASHTYPE(EXEC_STACK),
+	CRASHTYPE(EXEC_KMALLOC),
+	CRASHTYPE(EXEC_VMALLOC),
+	CRASHTYPE(EXEC_RODATA),
+	CRASHTYPE(EXEC_USERSPACE),
+	CRASHTYPE(EXEC_NULL),
+	CRASHTYPE(ACCESS_USERSPACE),
+	CRASHTYPE(ACCESS_NULL),
+};
+
+struct crashtype_category perms_crashtypes = {
+	.crashtypes = crashtypes,
+	.len	    = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c
new file mode 100644
index 0000000000..be38544991
--- /dev/null
+++ b/drivers/misc/lkdtm/powerpc.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "lkdtm.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/mmu.h>
+
+/* Inserts new slb entries */
+static void insert_slb_entry(unsigned long p, int ssize, int page_size)
+{
+	unsigned long flags;
+
+	flags = SLB_VSID_KERNEL | mmu_psize_defs[page_size].sllp;
+	preempt_disable();
+
+	asm volatile("slbmte %0,%1" :
+		     : "r" (mk_vsid_data(p, ssize, flags)),
+		       "r" (mk_esid_data(p, ssize, SLB_NUM_BOLTED))
+		     : "memory");
+
+	asm volatile("slbmte %0,%1" :
+			: "r" (mk_vsid_data(p, ssize, flags)),
+			  "r" (mk_esid_data(p, ssize, SLB_NUM_BOLTED + 1))
+			: "memory");
+	preempt_enable();
+}
+
+/* Inject slb multihit on vmalloc-ed address i.e 0xD00... */
+static int inject_vmalloc_slb_multihit(void)
+{
+	char *p;
+
+	p = vmalloc(PAGE_SIZE);
+	if (!p)
+		return -ENOMEM;
+
+	insert_slb_entry((unsigned long)p, MMU_SEGSIZE_1T, mmu_vmalloc_psize);
+	/*
+	 * This triggers exception, If handled correctly we must recover
+	 * from this error.
+	 */
+	p[0] = '!';
+	vfree(p);
+	return 0;
+}
+
+/* Inject slb multihit on kmalloc-ed address i.e 0xC00... */
+static int inject_kmalloc_slb_multihit(void)
+{
+	char *p;
+
+	p = kmalloc(2048, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	insert_slb_entry((unsigned long)p, MMU_SEGSIZE_1T, mmu_linear_psize);
+	/*
+	 * This triggers exception, If handled correctly we must recover
+	 * from this error.
+	 */
+	p[0] = '!';
+	kfree(p);
+	return 0;
+}
+
+/*
+ * Few initial SLB entries are bolted. Add a test to inject
+ * multihit in bolted entry 0.
+ */
+static void insert_dup_slb_entry_0(void)
+{
+	unsigned long test_address = PAGE_OFFSET, *test_ptr;
+	unsigned long esid, vsid;
+	unsigned long i = 0;
+
+	test_ptr = (unsigned long *)test_address;
+	preempt_disable();
+
+	asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
+	asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
+
+	/* for i !=0 we would need to mask out the old entry number */
+	asm volatile("slbmte %0,%1" :
+			: "r" (vsid),
+			  "r" (esid | SLB_NUM_BOLTED)
+			: "memory");
+
+	asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
+	asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
+
+	/* for i !=0 we would need to mask out the old entry number */
+	asm volatile("slbmte %0,%1" :
+			: "r" (vsid),
+			  "r" (esid | (SLB_NUM_BOLTED + 1))
+			: "memory");
+
+	pr_info("%s accessing test address 0x%lx: 0x%lx\n",
+		__func__, test_address, *test_ptr);
+
+	preempt_enable();
+}
+
+static void lkdtm_PPC_SLB_MULTIHIT(void)
+{
+	if (!radix_enabled()) {
+		pr_info("Injecting SLB multihit errors\n");
+		/*
+		 * These need not be separate tests, And they do pretty
+		 * much same thing. In any case we must recover from the
+		 * errors introduced by these functions, machine would not
+		 * survive these tests in case of failure to handle.
+		 */
+		inject_vmalloc_slb_multihit();
+		inject_kmalloc_slb_multihit();
+		insert_dup_slb_entry_0();
+		pr_info("Recovered from SLB multihit errors\n");
+	} else {
+		pr_err("XFAIL: This test is for ppc64 and with hash mode MMU only\n");
+	}
+}
+
+static struct crashtype crashtypes[] = {
+	CRASHTYPE(PPC_SLB_MULTIHIT),
+};
+
+struct crashtype_category powerpc_crashtypes = {
+	.crashtypes = crashtypes,
+	.len	    = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c
new file mode 100644
index 0000000000..5cd488f54c
--- /dev/null
+++ b/drivers/misc/lkdtm/refcount.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests related to refcount bugs (e.g. overflow,
+ * underflow, reaching zero untested, etc).
+ */
+#include "lkdtm.h"
+#include <linux/refcount.h>
+
+static void overflow_check(refcount_t *ref)
+{
+	switch (refcount_read(ref)) {
+	case REFCOUNT_SATURATED:
+		pr_info("Overflow detected: saturated\n");
+		break;
+	case REFCOUNT_MAX:
+		pr_warn("Overflow detected: unsafely reset to max\n");
+		break;
+	default:
+		pr_err("Fail: refcount wrapped to %d\n", refcount_read(ref));
+	}
+}
+
+/*
+ * A refcount_inc() above the maximum value of the refcount implementation,
+ * should at least saturate, and at most also WARN.
+ */
+static void lkdtm_REFCOUNT_INC_OVERFLOW(void)
+{
+	refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX - 1);
+
+	pr_info("attempting good refcount_inc() without overflow\n");
+	refcount_dec(&over);
+	refcount_inc(&over);
+
+	pr_info("attempting bad refcount_inc() overflow\n");
+	refcount_inc(&over);
+	refcount_inc(&over);
+
+	overflow_check(&over);
+}
+
+/* refcount_add() should behave just like refcount_inc() above. */
+static void lkdtm_REFCOUNT_ADD_OVERFLOW(void)
+{
+	refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX - 1);
+
+	pr_info("attempting good refcount_add() without overflow\n");
+	refcount_dec(&over);
+	refcount_dec(&over);
+	refcount_dec(&over);
+	refcount_dec(&over);
+	refcount_add(4, &over);
+
+	pr_info("attempting bad refcount_add() overflow\n");
+	refcount_add(4, &over);
+
+	overflow_check(&over);
+}
+
+/* refcount_inc_not_zero() should behave just like refcount_inc() above. */
+static void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void)
+{
+	refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX);
+
+	pr_info("attempting bad refcount_inc_not_zero() overflow\n");
+	if (!refcount_inc_not_zero(&over))
+		pr_warn("Weird: refcount_inc_not_zero() reported zero\n");
+
+	overflow_check(&over);
+}
+
+/* refcount_add_not_zero() should behave just like refcount_inc() above. */
+static void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void)
+{
+	refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX);
+
+	pr_info("attempting bad refcount_add_not_zero() overflow\n");
+	if (!refcount_add_not_zero(6, &over))
+		pr_warn("Weird: refcount_add_not_zero() reported zero\n");
+
+	overflow_check(&over);
+}
+
+static void check_zero(refcount_t *ref)
+{
+	switch (refcount_read(ref)) {
+	case REFCOUNT_SATURATED:
+		pr_info("Zero detected: saturated\n");
+		break;
+	case REFCOUNT_MAX:
+		pr_warn("Zero detected: unsafely reset to max\n");
+		break;
+	case 0:
+		pr_warn("Still at zero: refcount_inc/add() must not inc-from-0\n");
+		break;
+	default:
+		pr_err("Fail: refcount went crazy: %d\n", refcount_read(ref));
+	}
+}
+
+/*
+ * A refcount_dec(), as opposed to a refcount_dec_and_test(), when it hits
+ * zero it should either saturate (when inc-from-zero isn't protected)
+ * or stay at zero (when inc-from-zero is protected) and should WARN for both.
+ */
+static void lkdtm_REFCOUNT_DEC_ZERO(void)
+{
+	refcount_t zero = REFCOUNT_INIT(2);
+
+	pr_info("attempting good refcount_dec()\n");
+	refcount_dec(&zero);
+
+	pr_info("attempting bad refcount_dec() to zero\n");
+	refcount_dec(&zero);
+
+	check_zero(&zero);
+}
+
+static void check_negative(refcount_t *ref, int start)
+{
+	/*
+	 * refcount_t refuses to move a refcount at all on an
+	 * over-sub, so we have to track our starting position instead of
+	 * looking only at zero-pinning.
+	 */
+	if (refcount_read(ref) == start) {
+		pr_warn("Still at %d: refcount_inc/add() must not inc-from-0\n",
+			start);
+		return;
+	}
+
+	switch (refcount_read(ref)) {
+	case REFCOUNT_SATURATED:
+		pr_info("Negative detected: saturated\n");
+		break;
+	case REFCOUNT_MAX:
+		pr_warn("Negative detected: unsafely reset to max\n");
+		break;
+	default:
+		pr_err("Fail: refcount went crazy: %d\n", refcount_read(ref));
+	}
+}
+
+/* A refcount_dec() going negative should saturate and may WARN. */
+static void lkdtm_REFCOUNT_DEC_NEGATIVE(void)
+{
+	refcount_t neg = REFCOUNT_INIT(0);
+
+	pr_info("attempting bad refcount_dec() below zero\n");
+	refcount_dec(&neg);
+
+	check_negative(&neg, 0);
+}
+
+/*
+ * A refcount_dec_and_test() should act like refcount_dec() above when
+ * going negative.
+ */
+static void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void)
+{
+	refcount_t neg = REFCOUNT_INIT(0);
+
+	pr_info("attempting bad refcount_dec_and_test() below zero\n");
+	if (refcount_dec_and_test(&neg))
+		pr_warn("Weird: refcount_dec_and_test() reported zero\n");
+
+	check_negative(&neg, 0);
+}
+
+/*
+ * A refcount_sub_and_test() should act like refcount_dec_and_test()
+ * above when going negative.
+ */
+static void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void)
+{
+	refcount_t neg = REFCOUNT_INIT(3);
+
+	pr_info("attempting bad refcount_sub_and_test() below zero\n");
+	if (refcount_sub_and_test(5, &neg))
+		pr_warn("Weird: refcount_sub_and_test() reported zero\n");
+
+	check_negative(&neg, 3);
+}
+
+static void check_from_zero(refcount_t *ref)
+{
+	switch (refcount_read(ref)) {
+	case 0:
+		pr_info("Zero detected: stayed at zero\n");
+		break;
+	case REFCOUNT_SATURATED:
+		pr_info("Zero detected: saturated\n");
+		break;
+	case REFCOUNT_MAX:
+		pr_warn("Zero detected: unsafely reset to max\n");
+		break;
+	default:
+		pr_info("Fail: zero not detected, incremented to %d\n",
+			refcount_read(ref));
+	}
+}
+
+/*
+ * A refcount_inc() from zero should pin to zero or saturate and may WARN.
+ */
+static void lkdtm_REFCOUNT_INC_ZERO(void)
+{
+	refcount_t zero = REFCOUNT_INIT(0);
+
+	pr_info("attempting safe refcount_inc_not_zero() from zero\n");
+	if (!refcount_inc_not_zero(&zero)) {
+		pr_info("Good: zero detected\n");
+		if (refcount_read(&zero) == 0)
+			pr_info("Correctly stayed at zero\n");
+		else
+			pr_err("Fail: refcount went past zero!\n");
+	} else {
+		pr_err("Fail: Zero not detected!?\n");
+	}
+
+	pr_info("attempting bad refcount_inc() from zero\n");
+	refcount_inc(&zero);
+
+	check_from_zero(&zero);
+}
+
+/*
+ * A refcount_add() should act like refcount_inc() above when starting
+ * at zero.
+ */
+static void lkdtm_REFCOUNT_ADD_ZERO(void)
+{
+	refcount_t zero = REFCOUNT_INIT(0);
+
+	pr_info("attempting safe refcount_add_not_zero() from zero\n");
+	if (!refcount_add_not_zero(3, &zero)) {
+		pr_info("Good: zero detected\n");
+		if (refcount_read(&zero) == 0)
+			pr_info("Correctly stayed at zero\n");
+		else
+			pr_err("Fail: refcount went past zero\n");
+	} else {
+		pr_err("Fail: Zero not detected!?\n");
+	}
+
+	pr_info("attempting bad refcount_add() from zero\n");
+	refcount_add(3, &zero);
+
+	check_from_zero(&zero);
+}
+
+static void check_saturated(refcount_t *ref)
+{
+	switch (refcount_read(ref)) {
+	case REFCOUNT_SATURATED:
+		pr_info("Saturation detected: still saturated\n");
+		break;
+	case REFCOUNT_MAX:
+		pr_warn("Saturation detected: unsafely reset to max\n");
+		break;
+	default:
+		pr_err("Fail: refcount went crazy: %d\n", refcount_read(ref));
+	}
+}
+
+/*
+ * A refcount_inc() from a saturated value should at most warn about
+ * being saturated already.
+ */
+static void lkdtm_REFCOUNT_INC_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_inc() from saturated\n");
+	refcount_inc(&sat);
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+static void lkdtm_REFCOUNT_DEC_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_dec() from saturated\n");
+	refcount_dec(&sat);
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+static void lkdtm_REFCOUNT_ADD_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_dec() from saturated\n");
+	refcount_add(8, &sat);
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+static void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_inc_not_zero() from saturated\n");
+	if (!refcount_inc_not_zero(&sat))
+		pr_warn("Weird: refcount_inc_not_zero() reported zero\n");
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+static void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_add_not_zero() from saturated\n");
+	if (!refcount_add_not_zero(7, &sat))
+		pr_warn("Weird: refcount_add_not_zero() reported zero\n");
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+static void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_dec_and_test() from saturated\n");
+	if (refcount_dec_and_test(&sat))
+		pr_warn("Weird: refcount_dec_and_test() reported zero\n");
+
+	check_saturated(&sat);
+}
+
+/* Should act like refcount_inc() above from saturated. */
+static void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void)
+{
+	refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED);
+
+	pr_info("attempting bad refcount_sub_and_test() from saturated\n");
+	if (refcount_sub_and_test(8, &sat))
+		pr_warn("Weird: refcount_sub_and_test() reported zero\n");
+
+	check_saturated(&sat);
+}
+
+/* Used to time the existing atomic_t when used for reference counting */
+static void lkdtm_ATOMIC_TIMING(void)
+{
+	unsigned int i;
+	atomic_t count = ATOMIC_INIT(1);
+
+	for (i = 0; i < INT_MAX - 1; i++)
+		atomic_inc(&count);
+
+	for (i = INT_MAX; i > 0; i--)
+		if (atomic_dec_and_test(&count))
+			break;
+
+	if (i != 1)
+		pr_err("atomic timing: out of sync up/down cycle: %u\n", i - 1);
+	else
+		pr_info("atomic timing: done\n");
+}
+
+/*
+ * This can be compared to ATOMIC_TIMING when implementing fast refcount
+ * protections. Looking at the number of CPU cycles tells the real story
+ * about performance. For example:
+ *    cd /sys/kernel/debug/provoke-crash
+ *    perf stat -B -- cat <(echo REFCOUNT_TIMING) > DIRECT
+ */
+static void lkdtm_REFCOUNT_TIMING(void)
+{
+	unsigned int i;
+	refcount_t count = REFCOUNT_INIT(1);
+
+	for (i = 0; i < INT_MAX - 1; i++)
+		refcount_inc(&count);
+
+	for (i = INT_MAX; i > 0; i--)
+		if (refcount_dec_and_test(&count))
+			break;
+
+	if (i != 1)
+		pr_err("refcount: out of sync up/down cycle: %u\n", i - 1);
+	else
+		pr_info("refcount timing: done\n");
+}
+
+static struct crashtype crashtypes[] = {
+	CRASHTYPE(REFCOUNT_INC_OVERFLOW),
+	CRASHTYPE(REFCOUNT_ADD_OVERFLOW),
+	CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW),
+	CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_OVERFLOW),
+	CRASHTYPE(REFCOUNT_DEC_ZERO),
+	CRASHTYPE(REFCOUNT_DEC_NEGATIVE),
+	CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE),
+	CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE),
+	CRASHTYPE(REFCOUNT_INC_ZERO),
+	CRASHTYPE(REFCOUNT_ADD_ZERO),
+	CRASHTYPE(REFCOUNT_INC_SATURATED),
+	CRASHTYPE(REFCOUNT_DEC_SATURATED),
+	CRASHTYPE(REFCOUNT_ADD_SATURATED),
+	CRASHTYPE(REFCOUNT_INC_NOT_ZERO_SATURATED),
+	CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_SATURATED),
+	CRASHTYPE(REFCOUNT_DEC_AND_TEST_SATURATED),
+	CRASHTYPE(REFCOUNT_SUB_AND_TEST_SATURATED),
+	CRASHTYPE(ATOMIC_TIMING),
+	CRASHTYPE(REFCOUNT_TIMING),
+};
+
+struct crashtype_category refcount_crashtypes = {
+	.crashtypes = crashtypes,
+	.len	    = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/rodata.c b/drivers/misc/lkdtm/rodata.c
new file mode 100644
index 0000000000..baacb876d1
--- /dev/null
+++ b/drivers/misc/lkdtm/rodata.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This includes functions that are meant to live entirely in .rodata
+ * (via objcopy tricks), to validate the non-executability of .rodata.
+ */
+#include "lkdtm.h"
+
+void noinstr lkdtm_rodata_do_nothing(void)
+{
+	/* Does nothing. We just want an architecture agnostic "return". */
+}
diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c
new file mode 100644
index 0000000000..f1d0221609
--- /dev/null
+++ b/drivers/misc/lkdtm/stackleak.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code tests that the current task stack is properly erased (filled
+ * with STACKLEAK_POISON).
+ *
+ * Authors:
+ *   Alexander Popov <alex.popov@linux.com>
+ *   Tycho Andersen <tycho@tycho.ws>
+ */
+
+#include "lkdtm.h"
+#include <linux/stackleak.h>
+
+#if defined(CONFIG_GCC_PLUGIN_STACKLEAK)
+/*
+ * Check that stackleak tracks the lowest stack pointer and erases the stack
+ * below this as expected.
+ *
+ * To prevent the lowest stack pointer changing during the test, IRQs are
+ * masked and instrumentation of this function is disabled. We assume that the
+ * compiler will create a fixed-size stack frame for this function.
+ *
+ * Any non-inlined function may make further use of the stack, altering the
+ * lowest stack pointer and/or clobbering poison values. To avoid spurious
+ * failures we must avoid printing until the end of the test or have already
+ * encountered a failure condition.
+ */
+static void noinstr check_stackleak_irqoff(void)
+{
+	const unsigned long task_stack_base = (unsigned long)task_stack_page(current);
+	const unsigned long task_stack_low = stackleak_task_low_bound(current);
+	const unsigned long task_stack_high = stackleak_task_high_bound(current);
+	const unsigned long current_sp = current_stack_pointer;
+	const unsigned long lowest_sp = current->lowest_stack;
+	unsigned long untracked_high;
+	unsigned long poison_high, poison_low;
+	bool test_failed = false;
+
+	/*
+	 * Check that the current and lowest recorded stack pointer values fall
+	 * within the expected task stack boundaries. These tests should never
+	 * fail unless the boundaries are incorrect or we're clobbering the
+	 * STACK_END_MAGIC, and in either casee something is seriously wrong.
+	 */
+	if (current_sp < task_stack_low || current_sp >= task_stack_high) {
+		instrumentation_begin();
+		pr_err("FAIL: current_stack_pointer (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n",
+		       current_sp, task_stack_low, task_stack_high - 1);
+		test_failed = true;
+		goto out;
+	}
+	if (lowest_sp < task_stack_low || lowest_sp >= task_stack_high) {
+		instrumentation_begin();
+		pr_err("FAIL: current->lowest_stack (0x%lx) outside of task stack bounds [0x%lx..0x%lx]\n",
+		       lowest_sp, task_stack_low, task_stack_high - 1);
+		test_failed = true;
+		goto out;
+	}
+
+	/*
+	 * Depending on what has run prior to this test, the lowest recorded
+	 * stack pointer could be above or below the current stack pointer.
+	 * Start from the lowest of the two.
+	 *
+	 * Poison values are naturally-aligned unsigned longs. As the current
+	 * stack pointer might not be sufficiently aligned, we must align
+	 * downwards to find the lowest known stack pointer value. This is the
+	 * high boundary for a portion of the stack which may have been used
+	 * without being tracked, and has to be scanned for poison.
+	 */
+	untracked_high = min(current_sp, lowest_sp);
+	untracked_high = ALIGN_DOWN(untracked_high, sizeof(unsigned long));
+
+	/*
+	 * Find the top of the poison in the same way as the erasing code.
+	 */
+	poison_high = stackleak_find_top_of_poison(task_stack_low, untracked_high);
+
+	/*
+	 * Check whether the poisoned portion of the stack (if any) consists
+	 * entirely of poison. This verifies the entries that
+	 * stackleak_find_top_of_poison() should have checked.
+	 */
+	poison_low = poison_high;
+	while (poison_low > task_stack_low) {
+		poison_low -= sizeof(unsigned long);
+
+		if (*(unsigned long *)poison_low == STACKLEAK_POISON)
+			continue;
+
+		instrumentation_begin();
+		pr_err("FAIL: non-poison value %lu bytes below poison boundary: 0x%lx\n",
+		       poison_high - poison_low, *(unsigned long *)poison_low);
+		test_failed = true;
+		goto out;
+	}
+
+	instrumentation_begin();
+	pr_info("stackleak stack usage:\n"
+		"  high offset: %lu bytes\n"
+		"  current:     %lu bytes\n"
+		"  lowest:      %lu bytes\n"
+		"  tracked:     %lu bytes\n"
+		"  untracked:   %lu bytes\n"
+		"  poisoned:    %lu bytes\n"
+		"  low offset:  %lu bytes\n",
+		task_stack_base + THREAD_SIZE - task_stack_high,
+		task_stack_high - current_sp,
+		task_stack_high - lowest_sp,
+		task_stack_high - untracked_high,
+		untracked_high - poison_high,
+		poison_high - task_stack_low,
+		task_stack_low - task_stack_base);
+
+out:
+	if (test_failed) {
+		pr_err("FAIL: the thread stack is NOT properly erased!\n");
+	} else {
+		pr_info("OK: the rest of the thread stack is properly erased\n");
+	}
+	instrumentation_end();
+}
+
+static void lkdtm_STACKLEAK_ERASING(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	check_stackleak_irqoff();
+	local_irq_restore(flags);
+}
+#else /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */
+static void lkdtm_STACKLEAK_ERASING(void)
+{
+	if (IS_ENABLED(CONFIG_HAVE_ARCH_STACKLEAK)) {
+		pr_err("XFAIL: stackleak is not enabled (CONFIG_GCC_PLUGIN_STACKLEAK=n)\n");
+	} else {
+		pr_err("XFAIL: stackleak is not supported on this arch (HAVE_ARCH_STACKLEAK=n)\n");
+	}
+}
+#endif /* defined(CONFIG_GCC_PLUGIN_STACKLEAK) */
+
+static struct crashtype crashtypes[] = {
+	CRASHTYPE(STACKLEAK_ERASING),
+};
+
+struct crashtype_category stackleak_crashtypes = {
+	.crashtypes = crashtypes,
+	.len	    = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c
new file mode 100644
index 0000000000..67db57249a
--- /dev/null
+++ b/drivers/misc/lkdtm/usercopy.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is for all the tests related to copy_to_user() and copy_from_user()
+ * hardening.
+ */
+#include "lkdtm.h"
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mman.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Many of the tests here end up using const sizes, but those would
+ * normally be ignored by hardened usercopy, so force the compiler
+ * into choosing the non-const path to make sure we trigger the
+ * hardened usercopy checks by added "unconst" to all the const copies,
+ * and making sure "cache_size" isn't optimized into a const.
+ */
+static volatile size_t unconst;
+static volatile size_t cache_size = 1024;
+static struct kmem_cache *whitelist_cache;
+
+static const unsigned char test_text[] = "This is a test.\n";
+
+/*
+ * Instead of adding -Wno-return-local-addr, just pass the stack address
+ * through a function to obfuscate it from the compiler.
+ */
+static noinline unsigned char *trick_compiler(unsigned char *stack)
+{
+	return stack + unconst;
+}
+
+static noinline unsigned char *do_usercopy_stack_callee(int value)
+{
+	unsigned char buf[128];
+	int i;
+
+	/* Exercise stack to avoid everything living in registers. */
+	for (i = 0; i < sizeof(buf); i++) {
+		buf[i] = value & 0xff;
+	}
+
+	/*
+	 * Put the target buffer in the middle of stack allocation
+	 * so that we don't step on future stack users regardless
+	 * of stack growth direction.
+	 */
+	return trick_compiler(&buf[(128/2)-32]);
+}
+
+static noinline void do_usercopy_stack(bool to_user, bool bad_frame)
+{
+	unsigned long user_addr;
+	unsigned char good_stack[32];
+	unsigned char *bad_stack;
+	int i;
+
+	/* Exercise stack to avoid everything living in registers. */
+	for (i = 0; i < sizeof(good_stack); i++)
+		good_stack[i] = test_text[i % sizeof(test_text)];
+
+	/* This is a pointer to outside our current stack frame. */
+	if (bad_frame) {
+		bad_stack = do_usercopy_stack_callee((uintptr_t)&bad_stack);
+	} else {
+		/* Put start address just inside stack. */
+		bad_stack = task_stack_page(current) + THREAD_SIZE;
+		bad_stack -= sizeof(unsigned long);
+	}
+
+#ifdef ARCH_HAS_CURRENT_STACK_POINTER
+	pr_info("stack     : %px\n", (void *)current_stack_pointer);
+#endif
+	pr_info("good_stack: %px-%px\n", good_stack, good_stack + sizeof(good_stack));
+	pr_info("bad_stack : %px-%px\n", bad_stack, bad_stack + sizeof(good_stack));
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+
+	if (to_user) {
+		pr_info("attempting good copy_to_user of local stack\n");
+		if (copy_to_user((void __user *)user_addr, good_stack,
+				 unconst + sizeof(good_stack))) {
+			pr_warn("copy_to_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_to_user of distant stack\n");
+		if (copy_to_user((void __user *)user_addr, bad_stack,
+				 unconst + sizeof(good_stack))) {
+			pr_warn("copy_to_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	} else {
+		/*
+		 * There isn't a safe way to not be protected by usercopy
+		 * if we're going to write to another thread's stack.
+		 */
+		if (!bad_frame)
+			goto free_user;
+
+		pr_info("attempting good copy_from_user of local stack\n");
+		if (copy_from_user(good_stack, (void __user *)user_addr,
+				   unconst + sizeof(good_stack))) {
+			pr_warn("copy_from_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_from_user of distant stack\n");
+		if (copy_from_user(bad_stack, (void __user *)user_addr,
+				   unconst + sizeof(good_stack))) {
+			pr_warn("copy_from_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	}
+
+free_user:
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+/*
+ * This checks for whole-object size validation with hardened usercopy,
+ * with or without usercopy whitelisting.
+ */
+static void do_usercopy_slab_size(bool to_user)
+{
+	unsigned long user_addr;
+	unsigned char *one, *two;
+	void __user *test_user_addr;
+	void *test_kern_addr;
+	size_t size = unconst + 1024;
+
+	one = kmalloc(size, GFP_KERNEL);
+	two = kmalloc(size, GFP_KERNEL);
+	if (!one || !two) {
+		pr_warn("Failed to allocate kernel memory\n");
+		goto free_kernel;
+	}
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		goto free_kernel;
+	}
+
+	memset(one, 'A', size);
+	memset(two, 'B', size);
+
+	test_user_addr = (void __user *)(user_addr + 16);
+	test_kern_addr = one + 16;
+
+	if (to_user) {
+		pr_info("attempting good copy_to_user of correct size\n");
+		if (copy_to_user(test_user_addr, test_kern_addr, size / 2)) {
+			pr_warn("copy_to_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_to_user of too large size\n");
+		if (copy_to_user(test_user_addr, test_kern_addr, size)) {
+			pr_warn("copy_to_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	} else {
+		pr_info("attempting good copy_from_user of correct size\n");
+		if (copy_from_user(test_kern_addr, test_user_addr, size / 2)) {
+			pr_warn("copy_from_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_from_user of too large size\n");
+		if (copy_from_user(test_kern_addr, test_user_addr, size)) {
+			pr_warn("copy_from_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	}
+	pr_err("FAIL: bad usercopy not detected!\n");
+	pr_expected_config_param(CONFIG_HARDENED_USERCOPY, "hardened_usercopy");
+
+free_user:
+	vm_munmap(user_addr, PAGE_SIZE);
+free_kernel:
+	kfree(one);
+	kfree(two);
+}
+
+/*
+ * This checks for the specific whitelist window within an object. If this
+ * test passes, then do_usercopy_slab_size() tests will pass too.
+ */
+static void do_usercopy_slab_whitelist(bool to_user)
+{
+	unsigned long user_alloc;
+	unsigned char *buf = NULL;
+	unsigned char __user *user_addr;
+	size_t offset, size;
+
+	/* Make sure cache was prepared. */
+	if (!whitelist_cache) {
+		pr_warn("Failed to allocate kernel cache\n");
+		return;
+	}
+
+	/*
+	 * Allocate a buffer with a whitelisted window in the buffer.
+	 */
+	buf = kmem_cache_alloc(whitelist_cache, GFP_KERNEL);
+	if (!buf) {
+		pr_warn("Failed to allocate buffer from whitelist cache\n");
+		goto free_alloc;
+	}
+
+	/* Allocate user memory we'll poke at. */
+	user_alloc = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_alloc >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		goto free_alloc;
+	}
+	user_addr = (void __user *)user_alloc;
+
+	memset(buf, 'B', cache_size);
+
+	/* Whitelisted window in buffer, from kmem_cache_create_usercopy. */
+	offset = (cache_size / 4) + unconst;
+	size = (cache_size / 16) + unconst;
+
+	if (to_user) {
+		pr_info("attempting good copy_to_user inside whitelist\n");
+		if (copy_to_user(user_addr, buf + offset, size)) {
+			pr_warn("copy_to_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_to_user outside whitelist\n");
+		if (copy_to_user(user_addr, buf + offset - 1, size)) {
+			pr_warn("copy_to_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	} else {
+		pr_info("attempting good copy_from_user inside whitelist\n");
+		if (copy_from_user(buf + offset, user_addr, size)) {
+			pr_warn("copy_from_user failed unexpectedly?!\n");
+			goto free_user;
+		}
+
+		pr_info("attempting bad copy_from_user outside whitelist\n");
+		if (copy_from_user(buf + offset - 1, user_addr, size)) {
+			pr_warn("copy_from_user failed, but lacked Oops\n");
+			goto free_user;
+		}
+	}
+	pr_err("FAIL: bad usercopy not detected!\n");
+	pr_expected_config_param(CONFIG_HARDENED_USERCOPY, "hardened_usercopy");
+
+free_user:
+	vm_munmap(user_alloc, PAGE_SIZE);
+free_alloc:
+	if (buf)
+		kmem_cache_free(whitelist_cache, buf);
+}
+
+/* Callable tests. */
+static void lkdtm_USERCOPY_SLAB_SIZE_TO(void)
+{
+	do_usercopy_slab_size(true);
+}
+
+static void lkdtm_USERCOPY_SLAB_SIZE_FROM(void)
+{
+	do_usercopy_slab_size(false);
+}
+
+static void lkdtm_USERCOPY_SLAB_WHITELIST_TO(void)
+{
+	do_usercopy_slab_whitelist(true);
+}
+
+static void lkdtm_USERCOPY_SLAB_WHITELIST_FROM(void)
+{
+	do_usercopy_slab_whitelist(false);
+}
+
+static void lkdtm_USERCOPY_STACK_FRAME_TO(void)
+{
+	do_usercopy_stack(true, true);
+}
+
+static void lkdtm_USERCOPY_STACK_FRAME_FROM(void)
+{
+	do_usercopy_stack(false, true);
+}
+
+static void lkdtm_USERCOPY_STACK_BEYOND(void)
+{
+	do_usercopy_stack(true, false);
+}
+
+static void lkdtm_USERCOPY_KERNEL(void)
+{
+	unsigned long user_addr;
+
+	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (user_addr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+
+	pr_info("attempting good copy_to_user from kernel rodata: %px\n",
+		test_text);
+	if (copy_to_user((void __user *)user_addr, test_text,
+			 unconst + sizeof(test_text))) {
+		pr_warn("copy_to_user failed unexpectedly?!\n");
+		goto free_user;
+	}
+
+	pr_info("attempting bad copy_to_user from kernel text: %px\n",
+		vm_mmap);
+	if (copy_to_user((void __user *)user_addr, vm_mmap,
+			 unconst + PAGE_SIZE)) {
+		pr_warn("copy_to_user failed, but lacked Oops\n");
+		goto free_user;
+	}
+	pr_err("FAIL: bad copy_to_user() not detected!\n");
+	pr_expected_config_param(CONFIG_HARDENED_USERCOPY, "hardened_usercopy");
+
+free_user:
+	vm_munmap(user_addr, PAGE_SIZE);
+}
+
+/*
+ * This expects "kaddr" to point to a PAGE_SIZE allocation, which means
+ * a more complete test that would include copy_from_user() would risk
+ * memory corruption. Just test copy_to_user() here, as that exercises
+ * almost exactly the same code paths.
+ */
+static void do_usercopy_page_span(const char *name, void *kaddr)
+{
+	unsigned long uaddr;
+
+	uaddr = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_WRITE,
+			MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (uaddr >= TASK_SIZE) {
+		pr_warn("Failed to allocate user memory\n");
+		return;
+	}
+
+	/* Initialize contents. */
+	memset(kaddr, 0xAA, PAGE_SIZE);
+
+	/* Bump the kaddr forward to detect a page-spanning overflow. */
+	kaddr += PAGE_SIZE / 2;
+
+	pr_info("attempting good copy_to_user() from kernel %s: %px\n",
+		name, kaddr);
+	if (copy_to_user((void __user *)uaddr, kaddr,
+			 unconst + (PAGE_SIZE / 2))) {
+		pr_err("copy_to_user() failed unexpectedly?!\n");
+		goto free_user;
+	}
+
+	pr_info("attempting bad copy_to_user() from kernel %s: %px\n",
+		name, kaddr);
+	if (copy_to_user((void __user *)uaddr, kaddr, unconst + PAGE_SIZE)) {
+		pr_warn("Good, copy_to_user() failed, but lacked Oops(?!)\n");
+		goto free_user;
+	}
+
+	pr_err("FAIL: bad copy_to_user() not detected!\n");
+	pr_expected_config_param(CONFIG_HARDENED_USERCOPY, "hardened_usercopy");
+
+free_user:
+	vm_munmap(uaddr, PAGE_SIZE);
+}
+
+static void lkdtm_USERCOPY_VMALLOC(void)
+{
+	void *addr;
+
+	addr = vmalloc(PAGE_SIZE);
+	if (!addr) {
+		pr_err("vmalloc() failed!?\n");
+		return;
+	}
+	do_usercopy_page_span("vmalloc", addr);
+	vfree(addr);
+}
+
+static void lkdtm_USERCOPY_FOLIO(void)
+{
+	struct folio *folio;
+	void *addr;
+
+	/*
+	 * FIXME: Folio checking currently misses 0-order allocations, so
+	 * allocate and bump forward to the last page.
+	 */
+	folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, 1);
+	if (!folio) {
+		pr_err("folio_alloc() failed!?\n");
+		return;
+	}
+	addr = folio_address(folio);
+	if (addr)
+		do_usercopy_page_span("folio", addr + PAGE_SIZE);
+	else
+		pr_err("folio_address() failed?!\n");
+	folio_put(folio);
+}
+
+void __init lkdtm_usercopy_init(void)
+{
+	/* Prepare cache that lacks SLAB_USERCOPY flag. */
+	whitelist_cache =
+		kmem_cache_create_usercopy("lkdtm-usercopy", cache_size,
+					   0, 0,
+					   cache_size / 4,
+					   cache_size / 16,
+					   NULL);
+}
+
+void __exit lkdtm_usercopy_exit(void)
+{
+	kmem_cache_destroy(whitelist_cache);
+}
+
+static struct crashtype crashtypes[] = {
+	CRASHTYPE(USERCOPY_SLAB_SIZE_TO),
+	CRASHTYPE(USERCOPY_SLAB_SIZE_FROM),
+	CRASHTYPE(USERCOPY_SLAB_WHITELIST_TO),
+	CRASHTYPE(USERCOPY_SLAB_WHITELIST_FROM),
+	CRASHTYPE(USERCOPY_STACK_FRAME_TO),
+	CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
+	CRASHTYPE(USERCOPY_STACK_BEYOND),
+	CRASHTYPE(USERCOPY_VMALLOC),
+	CRASHTYPE(USERCOPY_FOLIO),
+	CRASHTYPE(USERCOPY_KERNEL),
+};
+
+struct crashtype_category usercopy_crashtypes = {
+	.crashtypes = crashtypes,
+	.len	    = ARRAY_SIZE(crashtypes),
+};
diff --git a/drivers/misc/mchp_pci1xxxx/Kconfig b/drivers/misc/mchp_pci1xxxx/Kconfig
new file mode 100644
index 0000000000..64e457581f
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/Kconfig
@@ -0,0 +1,14 @@
+config GP_PCI1XXXX
+       tristate "Microchip PCI1XXXX PCIe to GPIO Expander + OTP/EEPROM manager"
+       depends on PCI
+       depends on GPIOLIB
+       depends on NVMEM_SYSFS
+       select GPIOLIB_IRQCHIP
+       select AUXILIARY_BUS
+       help
+         PCI1XXXX is a PCIe GEN 3 switch with one of the endpoints having
+         multiple functions and one of the functions is a GPIO controller
+         which also has registers to interface with the OTP and EEPROM.
+         Select yes, no or module here to include or exclude the driver
+         for the GPIO function.
+
diff --git a/drivers/misc/mchp_pci1xxxx/Makefile b/drivers/misc/mchp_pci1xxxx/Makefile
new file mode 100644
index 0000000000..ae31251dab
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_GP_PCI1XXXX) := mchp_pci1xxxx_gp.o mchp_pci1xxxx_gpio.o mchp_pci1xxxx_otpe2p.o
diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c
new file mode 100644
index 0000000000..32af2b14ff
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022 Microchip Technology Inc.
+
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/gpio/driver.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/idr.h>
+#include "mchp_pci1xxxx_gp.h"
+
+struct aux_bus_device {
+	struct auxiliary_device_wrapper *aux_device_wrapper[2];
+};
+
+static DEFINE_IDA(gp_client_ida);
+static const char aux_dev_otp_e2p_name[15] = "gp_otp_e2p";
+static const char aux_dev_gpio_name[15] = "gp_gpio";
+
+static void gp_auxiliary_device_release(struct device *dev)
+{
+	struct auxiliary_device_wrapper *aux_device_wrapper =
+		(struct auxiliary_device_wrapper *)container_of(dev,
+				struct auxiliary_device_wrapper, aux_dev.dev);
+
+	ida_free(&gp_client_ida, aux_device_wrapper->aux_dev.id);
+	kfree(aux_device_wrapper);
+}
+
+static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct aux_bus_device *aux_bus;
+	int retval;
+
+	retval = pcim_enable_device(pdev);
+	if (retval)
+		return retval;
+
+	aux_bus = devm_kzalloc(&pdev->dev, sizeof(*aux_bus), GFP_KERNEL);
+	if (!aux_bus)
+		return -ENOMEM;
+
+	aux_bus->aux_device_wrapper[0] = kzalloc(sizeof(*aux_bus->aux_device_wrapper[0]),
+						 GFP_KERNEL);
+	if (!aux_bus->aux_device_wrapper[0])
+		return -ENOMEM;
+
+	retval = ida_alloc(&gp_client_ida, GFP_KERNEL);
+	if (retval < 0)
+		goto err_ida_alloc_0;
+
+	aux_bus->aux_device_wrapper[0]->aux_dev.name = aux_dev_otp_e2p_name;
+	aux_bus->aux_device_wrapper[0]->aux_dev.dev.parent = &pdev->dev;
+	aux_bus->aux_device_wrapper[0]->aux_dev.dev.release = gp_auxiliary_device_release;
+	aux_bus->aux_device_wrapper[0]->aux_dev.id = retval;
+
+	aux_bus->aux_device_wrapper[0]->gp_aux_data.region_start = pci_resource_start(pdev, 0);
+	aux_bus->aux_device_wrapper[0]->gp_aux_data.region_length = pci_resource_end(pdev, 0);
+
+	retval = auxiliary_device_init(&aux_bus->aux_device_wrapper[0]->aux_dev);
+	if (retval < 0)
+		goto err_aux_dev_init_0;
+
+	retval = auxiliary_device_add(&aux_bus->aux_device_wrapper[0]->aux_dev);
+	if (retval)
+		goto err_aux_dev_add_0;
+
+	aux_bus->aux_device_wrapper[1] = kzalloc(sizeof(*aux_bus->aux_device_wrapper[1]),
+						 GFP_KERNEL);
+	if (!aux_bus->aux_device_wrapper[1])
+		return -ENOMEM;
+
+	retval = ida_alloc(&gp_client_ida, GFP_KERNEL);
+	if (retval < 0)
+		goto err_ida_alloc_1;
+
+	aux_bus->aux_device_wrapper[1]->aux_dev.name = aux_dev_gpio_name;
+	aux_bus->aux_device_wrapper[1]->aux_dev.dev.parent = &pdev->dev;
+	aux_bus->aux_device_wrapper[1]->aux_dev.dev.release = gp_auxiliary_device_release;
+	aux_bus->aux_device_wrapper[1]->aux_dev.id = retval;
+
+	aux_bus->aux_device_wrapper[1]->gp_aux_data.region_start = pci_resource_start(pdev, 0);
+	aux_bus->aux_device_wrapper[1]->gp_aux_data.region_length = pci_resource_end(pdev, 0);
+
+	retval = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+
+	if (retval < 0)
+		goto err_aux_dev_init_1;
+
+	retval = pci_irq_vector(pdev, 0);
+	if (retval < 0)
+		goto err_aux_dev_init_1;
+
+	pdev->irq = retval;
+	aux_bus->aux_device_wrapper[1]->gp_aux_data.irq_num = pdev->irq;
+
+	retval = auxiliary_device_init(&aux_bus->aux_device_wrapper[1]->aux_dev);
+	if (retval < 0)
+		goto err_aux_dev_init_1;
+
+	retval = auxiliary_device_add(&aux_bus->aux_device_wrapper[1]->aux_dev);
+	if (retval)
+		goto err_aux_dev_add_1;
+
+	pci_set_drvdata(pdev, aux_bus);
+	pci_set_master(pdev);
+
+	return 0;
+
+err_aux_dev_add_1:
+	auxiliary_device_uninit(&aux_bus->aux_device_wrapper[1]->aux_dev);
+
+err_aux_dev_init_1:
+	ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[1]->aux_dev.id);
+
+err_ida_alloc_1:
+	kfree(aux_bus->aux_device_wrapper[1]);
+
+err_aux_dev_add_0:
+	auxiliary_device_uninit(&aux_bus->aux_device_wrapper[0]->aux_dev);
+
+err_aux_dev_init_0:
+	ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[0]->aux_dev.id);
+
+err_ida_alloc_0:
+	kfree(aux_bus->aux_device_wrapper[0]);
+
+	return retval;
+}
+
+static void gp_aux_bus_remove(struct pci_dev *pdev)
+{
+	struct aux_bus_device *aux_bus = pci_get_drvdata(pdev);
+
+	auxiliary_device_delete(&aux_bus->aux_device_wrapper[0]->aux_dev);
+	auxiliary_device_uninit(&aux_bus->aux_device_wrapper[0]->aux_dev);
+	auxiliary_device_delete(&aux_bus->aux_device_wrapper[1]->aux_dev);
+	auxiliary_device_uninit(&aux_bus->aux_device_wrapper[1]->aux_dev);
+}
+
+static const struct pci_device_id pci1xxxx_tbl[] = {
+	{ PCI_DEVICE(0x1055, 0xA005) },
+	{ PCI_DEVICE(0x1055, 0xA015) },
+	{ PCI_DEVICE(0x1055, 0xA025) },
+	{ PCI_DEVICE(0x1055, 0xA035) },
+	{ PCI_DEVICE(0x1055, 0xA045) },
+	{ PCI_DEVICE(0x1055, 0xA055) },
+	{0,}
+};
+MODULE_DEVICE_TABLE(pci, pci1xxxx_tbl);
+
+static struct pci_driver pci1xxxx_gp_driver = {
+	.name = "PCI1xxxxGP",
+	.id_table = pci1xxxx_tbl,
+	.probe = gp_aux_bus_probe,
+	.remove = gp_aux_bus_remove,
+};
+
+module_pci_driver(pci1xxxx_gp_driver);
+
+MODULE_DESCRIPTION("Microchip Technology Inc. PCI1xxxx GP expander");
+MODULE_AUTHOR("Kumaravel Thiagarajan <kumaravel.thiagarajan@microchip.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.h b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.h
new file mode 100644
index 0000000000..37eec73b20
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2022 Microchip Technology Inc. */
+
+#ifndef _GPIO_PCI1XXXX_H
+#define _GPIO_PCI1XXXX_H
+
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/kthread.h>
+#include <linux/types.h>
+#include <linux/auxiliary_bus.h>
+
+/* Perform operations like variable length write, read and write with read back for OTP / EEPROM
+ * Perform bit mode write in OTP
+ */
+
+struct gp_aux_data_type {
+	int irq_num;
+	resource_size_t region_start;
+	resource_size_t region_length;
+};
+
+struct auxiliary_device_wrapper {
+	struct auxiliary_device aux_dev;
+	struct gp_aux_data_type gp_aux_data;
+};
+
+#endif
diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c
new file mode 100644
index 0000000000..e616e3ec2b
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022 Microchip Technology Inc.
+// pci1xxxx gpio driver
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/gpio/driver.h>
+#include <linux/bio.h>
+#include <linux/mutex.h>
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+
+#include "mchp_pci1xxxx_gp.h"
+
+#define PCI1XXXX_NR_PINS		93
+#define PERI_GEN_RESET			0
+#define OUT_EN_OFFSET(x)		((((x) / 32) * 4) + 0x400)
+#define INP_EN_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x10)
+#define OUT_OFFSET(x)			((((x) / 32) * 4) + 0x400 + 0x20)
+#define INP_OFFSET(x)			((((x) / 32) * 4) + 0x400 + 0x30)
+#define PULLUP_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x40)
+#define PULLDOWN_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x50)
+#define OPENDRAIN_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x60)
+#define WAKEMASK_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x70)
+#define MODE_OFFSET(x)			((((x) / 32) * 4) + 0x400 + 0x80)
+#define INTR_LO_TO_HI_EDGE_CONFIG(x)	((((x) / 32) * 4) + 0x400 + 0x90)
+#define INTR_HI_TO_LO_EDGE_CONFIG(x)	((((x) / 32) * 4) + 0x400 + 0xA0)
+#define INTR_LEVEL_CONFIG_OFFSET(x)	((((x) / 32) * 4) + 0x400 + 0xB0)
+#define INTR_LEVEL_MASK_OFFSET(x)	((((x) / 32) * 4) + 0x400 + 0xC0)
+#define INTR_STAT_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0xD0)
+#define DEBOUNCE_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0xE0)
+#define PIO_GLOBAL_CONFIG_OFFSET	(0x400 + 0xF0)
+#define PIO_PCI_CTRL_REG_OFFSET	(0x400 + 0xF4)
+#define INTR_MASK_OFFSET(x)		((((x) / 32) * 4) + 0x400 + 0x100)
+#define INTR_STATUS_OFFSET(x)		(((x) * 4) + 0x400 + 0xD0)
+
+struct pci1xxxx_gpio {
+	struct auxiliary_device *aux_dev;
+	void __iomem *reg_base;
+	struct gpio_chip gpio;
+	spinlock_t lock;
+	int irq_base;
+};
+
+static int pci1xxxx_gpio_get_direction(struct gpio_chip *gpio, unsigned int nr)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+	u32 data;
+	int ret = -EINVAL;
+
+	data = readl(priv->reg_base + INP_EN_OFFSET(nr));
+	if (data & BIT(nr % 32)) {
+		ret =  1;
+	} else {
+		data = readl(priv->reg_base + OUT_EN_OFFSET(nr));
+		if (data & BIT(nr % 32))
+			ret =  0;
+	}
+
+	return ret;
+}
+
+static inline void pci1xxx_assign_bit(void __iomem *base_addr, unsigned int reg_offset,
+				      unsigned int bitpos, bool set)
+{
+	u32 data;
+
+	data = readl(base_addr + reg_offset);
+	if (set)
+		data |= BIT(bitpos);
+	else
+		data &= ~BIT(bitpos);
+	writel(data, base_addr + reg_offset);
+}
+
+static int pci1xxxx_gpio_direction_input(struct gpio_chip *gpio, unsigned int nr)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, INP_EN_OFFSET(nr), (nr % 32), true);
+	pci1xxx_assign_bit(priv->reg_base, OUT_EN_OFFSET(nr), (nr % 32), false);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int pci1xxxx_gpio_get(struct gpio_chip *gpio, unsigned int nr)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+
+	return (readl(priv->reg_base + INP_OFFSET(nr)) >> (nr % 32)) & 1;
+}
+
+static int pci1xxxx_gpio_direction_output(struct gpio_chip *gpio,
+					  unsigned int nr, int val)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+	unsigned long flags;
+	u32 data;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, INP_EN_OFFSET(nr), (nr % 32), false);
+	pci1xxx_assign_bit(priv->reg_base, OUT_EN_OFFSET(nr), (nr % 32), true);
+	data = readl(priv->reg_base + OUT_OFFSET(nr));
+	if (val)
+		data |= (1 << (nr % 32));
+	else
+		data &= ~(1 << (nr % 32));
+	writel(data, priv->reg_base + OUT_OFFSET(nr));
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static void pci1xxxx_gpio_set(struct gpio_chip *gpio,
+			      unsigned int nr, int val)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, OUT_OFFSET(nr), (nr % 32), val);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int pci1xxxx_gpio_set_config(struct gpio_chip *gpio, unsigned int offset,
+				    unsigned long config)
+{
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(gpio);
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	switch (pinconf_to_config_param(config)) {
+	case PIN_CONFIG_BIAS_PULL_UP:
+		pci1xxx_assign_bit(priv->reg_base, PULLUP_OFFSET(offset), (offset % 32), true);
+		break;
+	case PIN_CONFIG_BIAS_PULL_DOWN:
+		pci1xxx_assign_bit(priv->reg_base, PULLDOWN_OFFSET(offset), (offset % 32), true);
+		break;
+	case PIN_CONFIG_BIAS_DISABLE:
+		pci1xxx_assign_bit(priv->reg_base, PULLUP_OFFSET(offset), (offset % 32), false);
+		pci1xxx_assign_bit(priv->reg_base, PULLDOWN_OFFSET(offset), (offset % 32), false);
+		break;
+	case PIN_CONFIG_DRIVE_OPEN_DRAIN:
+		pci1xxx_assign_bit(priv->reg_base, OPENDRAIN_OFFSET(offset), (offset % 32), true);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return ret;
+}
+
+static void pci1xxxx_gpio_irq_ack(struct irq_data *data)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(chip);
+	unsigned int gpio = irqd_to_hwirq(data);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, INTR_STAT_OFFSET(gpio), (gpio % 32), true);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void pci1xxxx_gpio_irq_set_mask(struct irq_data *data, bool set)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(chip);
+	unsigned int gpio = irqd_to_hwirq(data);
+	unsigned long flags;
+
+	if (!set)
+		gpiochip_enable_irq(chip, gpio);
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, INTR_MASK_OFFSET(gpio), (gpio % 32), set);
+	spin_unlock_irqrestore(&priv->lock, flags);
+	if (set)
+		gpiochip_disable_irq(chip, gpio);
+}
+
+static void pci1xxxx_gpio_irq_mask(struct irq_data *data)
+{
+	pci1xxxx_gpio_irq_set_mask(data, true);
+}
+
+static void pci1xxxx_gpio_irq_unmask(struct irq_data *data)
+{
+	pci1xxxx_gpio_irq_set_mask(data, false);
+}
+
+static int pci1xxxx_gpio_set_type(struct irq_data *data, unsigned int trigger_type)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+	struct pci1xxxx_gpio *priv = gpiochip_get_data(chip);
+	unsigned int gpio = irqd_to_hwirq(data);
+	unsigned int bitpos = gpio % 32;
+
+	if (trigger_type & IRQ_TYPE_EDGE_FALLING) {
+		pci1xxx_assign_bit(priv->reg_base, INTR_HI_TO_LO_EDGE_CONFIG(gpio),
+				   bitpos, false);
+		pci1xxx_assign_bit(priv->reg_base, MODE_OFFSET(gpio),
+				   bitpos, false);
+		irq_set_handler_locked(data, handle_edge_irq);
+	} else {
+		pci1xxx_assign_bit(priv->reg_base, INTR_HI_TO_LO_EDGE_CONFIG(gpio),
+				   bitpos, true);
+	}
+
+	if (trigger_type & IRQ_TYPE_EDGE_RISING) {
+		pci1xxx_assign_bit(priv->reg_base, INTR_LO_TO_HI_EDGE_CONFIG(gpio),
+				   bitpos, false);
+		pci1xxx_assign_bit(priv->reg_base, MODE_OFFSET(gpio), bitpos,
+				   false);
+		irq_set_handler_locked(data, handle_edge_irq);
+	} else {
+		pci1xxx_assign_bit(priv->reg_base, INTR_LO_TO_HI_EDGE_CONFIG(gpio),
+				   bitpos, true);
+	}
+
+	if (trigger_type & IRQ_TYPE_LEVEL_LOW) {
+		pci1xxx_assign_bit(priv->reg_base, INTR_LEVEL_CONFIG_OFFSET(gpio),
+				   bitpos, true);
+		pci1xxx_assign_bit(priv->reg_base, INTR_LEVEL_MASK_OFFSET(gpio),
+				   bitpos, false);
+		pci1xxx_assign_bit(priv->reg_base, MODE_OFFSET(gpio), bitpos,
+				   true);
+		irq_set_handler_locked(data, handle_edge_irq);
+	}
+
+	if (trigger_type & IRQ_TYPE_LEVEL_HIGH) {
+		pci1xxx_assign_bit(priv->reg_base, INTR_LEVEL_CONFIG_OFFSET(gpio),
+				   bitpos, false);
+		pci1xxx_assign_bit(priv->reg_base, INTR_LEVEL_MASK_OFFSET(gpio),
+				   bitpos, false);
+		pci1xxx_assign_bit(priv->reg_base, MODE_OFFSET(gpio), bitpos,
+				   true);
+		irq_set_handler_locked(data, handle_edge_irq);
+	}
+
+	if ((!(trigger_type & IRQ_TYPE_LEVEL_LOW)) && (!(trigger_type & IRQ_TYPE_LEVEL_HIGH)))
+		pci1xxx_assign_bit(priv->reg_base, INTR_LEVEL_MASK_OFFSET(gpio), bitpos, true);
+
+	return true;
+}
+
+static irqreturn_t pci1xxxx_gpio_irq_handler(int irq, void *dev_id)
+{
+	struct pci1xxxx_gpio *priv = dev_id;
+	struct gpio_chip *gc =  &priv->gpio;
+	unsigned long int_status = 0;
+	unsigned long flags;
+	u8 pincount;
+	int bit;
+	u8 gpiobank;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET, 16, true);
+	spin_unlock_irqrestore(&priv->lock, flags);
+	for (gpiobank = 0; gpiobank < 3; gpiobank++) {
+		spin_lock_irqsave(&priv->lock, flags);
+		int_status = readl(priv->reg_base + INTR_STATUS_OFFSET(gpiobank));
+		spin_unlock_irqrestore(&priv->lock, flags);
+		if (gpiobank == 2)
+			pincount = 29;
+		else
+			pincount = 32;
+		for_each_set_bit(bit, &int_status, pincount) {
+			unsigned int irq;
+
+			spin_lock_irqsave(&priv->lock, flags);
+			writel(BIT(bit), priv->reg_base + INTR_STATUS_OFFSET(gpiobank));
+			spin_unlock_irqrestore(&priv->lock, flags);
+			irq = irq_find_mapping(gc->irq.domain, (bit + (gpiobank * 32)));
+			generic_handle_irq(irq);
+		}
+	}
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET, 16, false);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static const struct irq_chip pci1xxxx_gpio_irqchip = {
+	.name = "pci1xxxx_gpio",
+	.irq_ack = pci1xxxx_gpio_irq_ack,
+	.irq_mask = pci1xxxx_gpio_irq_mask,
+	.irq_unmask = pci1xxxx_gpio_irq_unmask,
+	.irq_set_type = pci1xxxx_gpio_set_type,
+	.flags = IRQCHIP_IMMUTABLE,
+	GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
+static int pci1xxxx_gpio_suspend(struct device *dev)
+{
+	struct pci1xxxx_gpio *priv = dev_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET,
+			   16, true);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET,
+			   17, false);
+	pci1xxx_assign_bit(priv->reg_base, PERI_GEN_RESET, 16, true);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int pci1xxxx_gpio_resume(struct device *dev)
+{
+	struct pci1xxxx_gpio *priv = dev_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET,
+			   17, true);
+	pci1xxx_assign_bit(priv->reg_base, PIO_GLOBAL_CONFIG_OFFSET,
+			   16, false);
+	pci1xxx_assign_bit(priv->reg_base, PERI_GEN_RESET, 16, false);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int pci1xxxx_gpio_setup(struct pci1xxxx_gpio *priv, int irq)
+{
+	struct gpio_chip *gchip = &priv->gpio;
+	struct gpio_irq_chip *girq;
+	int retval;
+
+	gchip->label = dev_name(&priv->aux_dev->dev);
+	gchip->parent = &priv->aux_dev->dev;
+	gchip->owner = THIS_MODULE;
+	gchip->direction_input = pci1xxxx_gpio_direction_input;
+	gchip->direction_output = pci1xxxx_gpio_direction_output;
+	gchip->get_direction = pci1xxxx_gpio_get_direction;
+	gchip->get = pci1xxxx_gpio_get;
+	gchip->set = pci1xxxx_gpio_set;
+	gchip->set_config = pci1xxxx_gpio_set_config;
+	gchip->dbg_show = NULL;
+	gchip->base = -1;
+	gchip->ngpio =  PCI1XXXX_NR_PINS;
+	gchip->can_sleep = false;
+
+	retval = devm_request_threaded_irq(&priv->aux_dev->dev, irq,
+					   NULL, pci1xxxx_gpio_irq_handler,
+					   IRQF_ONESHOT, "PCI1xxxxGPIO", priv);
+
+	if (retval)
+		return retval;
+
+	girq = &priv->gpio.irq;
+	gpio_irq_chip_set_chip(girq, &pci1xxxx_gpio_irqchip);
+	girq->parent_handler = NULL;
+	girq->num_parents = 0;
+	girq->parents = NULL;
+	girq->default_type = IRQ_TYPE_NONE;
+	girq->handler = handle_bad_irq;
+
+	return 0;
+}
+
+static int pci1xxxx_gpio_probe(struct auxiliary_device *aux_dev,
+			       const struct auxiliary_device_id *id)
+
+{
+	struct auxiliary_device_wrapper *aux_dev_wrapper;
+	struct gp_aux_data_type *pdata;
+	struct pci1xxxx_gpio *priv;
+	int retval;
+
+	aux_dev_wrapper = (struct auxiliary_device_wrapper *)
+			  container_of(aux_dev, struct auxiliary_device_wrapper, aux_dev);
+
+	pdata = &aux_dev_wrapper->gp_aux_data;
+
+	if (!pdata)
+		return -EINVAL;
+
+	priv = devm_kzalloc(&aux_dev->dev, sizeof(struct pci1xxxx_gpio), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	spin_lock_init(&priv->lock);
+	priv->aux_dev = aux_dev;
+
+	if (!devm_request_mem_region(&aux_dev->dev, pdata->region_start, 0x800, aux_dev->name))
+		return -EBUSY;
+
+	priv->reg_base = devm_ioremap(&aux_dev->dev, pdata->region_start, 0x800);
+	if (!priv->reg_base)
+		return -ENOMEM;
+
+	writel(0x0264, (priv->reg_base + 0x400 + 0xF0));
+
+	retval = pci1xxxx_gpio_setup(priv, pdata->irq_num);
+
+	if (retval < 0)
+		return retval;
+
+	dev_set_drvdata(&aux_dev->dev, priv);
+
+	return devm_gpiochip_add_data(&aux_dev->dev, &priv->gpio, priv);
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(pci1xxxx_gpio_pm_ops, pci1xxxx_gpio_suspend, pci1xxxx_gpio_resume);
+
+static const struct auxiliary_device_id pci1xxxx_gpio_auxiliary_id_table[] = {
+	{.name = "mchp_pci1xxxx_gp.gp_gpio"},
+	{}
+};
+MODULE_DEVICE_TABLE(auxiliary, pci1xxxx_gpio_auxiliary_id_table);
+
+static struct auxiliary_driver pci1xxxx_gpio_driver = {
+	.driver = {
+		.name = "PCI1xxxxGPIO",
+		.pm = &pci1xxxx_gpio_pm_ops,
+		},
+	.probe = pci1xxxx_gpio_probe,
+	.id_table = pci1xxxx_gpio_auxiliary_id_table
+};
+module_auxiliary_driver(pci1xxxx_gpio_driver);
+
+MODULE_DESCRIPTION("Microchip Technology Inc. PCI1xxxx GPIO controller");
+MODULE_AUTHOR("Kumaravel Thiagarajan <kumaravel.thiagarajan@microchip.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c
new file mode 100644
index 0000000000..16695cb5e6
--- /dev/null
+++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022-2023 Microchip Technology Inc.
+// PCI1xxxx OTP/EEPROM driver
+
+#include <linux/auxiliary_bus.h>
+#include <linux/device.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/nvmem-provider.h>
+
+#include "mchp_pci1xxxx_gp.h"
+
+#define AUX_DRIVER_NAME			"PCI1xxxxOTPE2P"
+#define EEPROM_NAME			"pci1xxxx_eeprom"
+#define OTP_NAME			"pci1xxxx_otp"
+
+#define PERI_PF3_SYSTEM_REG_ADDR_BASE	0x2000
+#define PERI_PF3_SYSTEM_REG_LENGTH	0x4000
+
+#define EEPROM_SIZE_BYTES		8192
+#define OTP_SIZE_BYTES			8192
+
+#define CONFIG_REG_ADDR_BASE		0
+#define EEPROM_REG_ADDR_BASE		0x0E00
+#define OTP_REG_ADDR_BASE		0x1000
+
+#define MMAP_OTP_OFFSET(x)		(OTP_REG_ADDR_BASE + (x))
+#define MMAP_EEPROM_OFFSET(x)		(EEPROM_REG_ADDR_BASE + (x))
+#define MMAP_CFG_OFFSET(x)		(CONFIG_REG_ADDR_BASE + (x))
+
+#define EEPROM_CMD_REG			0x00
+#define EEPROM_DATA_REG			0x04
+
+#define EEPROM_CMD_EPC_WRITE		(BIT(29) | BIT(28))
+#define EEPROM_CMD_EPC_TIMEOUT_BIT	BIT(17)
+#define EEPROM_CMD_EPC_BUSY_BIT		BIT(31)
+
+#define STATUS_READ_DELAY_US		1
+#define STATUS_READ_TIMEOUT_US		20000
+
+#define OTP_ADDR_HIGH_OFFSET		0x04
+#define OTP_ADDR_LOW_OFFSET		0x08
+#define OTP_PRGM_DATA_OFFSET		0x10
+#define OTP_PRGM_MODE_OFFSET		0x14
+#define OTP_RD_DATA_OFFSET		0x18
+#define OTP_FUNC_CMD_OFFSET		0x20
+#define OTP_CMD_GO_OFFSET		0x28
+#define OTP_PASS_FAIL_OFFSET		0x2C
+#define OTP_STATUS_OFFSET		0x30
+
+#define OTP_FUNC_RD_BIT			BIT(0)
+#define OTP_FUNC_PGM_BIT		BIT(1)
+#define OTP_CMD_GO_BIT			BIT(0)
+#define OTP_STATUS_BUSY_BIT		BIT(0)
+#define OTP_PGM_MODE_BYTE_BIT		BIT(0)
+#define OTP_FAIL_BIT			BIT(0)
+
+#define OTP_PWR_DN_BIT			BIT(0)
+#define OTP_PWR_DN_OFFSET		0x00
+
+#define CFG_SYS_LOCK_OFFSET		0xA0
+#define CFG_SYS_LOCK_PF3		BIT(5)
+
+#define BYTE_LOW			(GENMASK(7, 0))
+#define BYTE_HIGH			(GENMASK(12, 8))
+
+struct pci1xxxx_otp_eeprom_device {
+	struct auxiliary_device *pdev;
+	void __iomem *reg_base;
+	struct nvmem_config nvmem_config_eeprom;
+	struct nvmem_device *nvmem_eeprom;
+	struct nvmem_config nvmem_config_otp;
+	struct nvmem_device *nvmem_otp;
+};
+
+static int set_sys_lock(struct pci1xxxx_otp_eeprom_device *priv)
+{
+	void __iomem *sys_lock = priv->reg_base +
+				 MMAP_CFG_OFFSET(CFG_SYS_LOCK_OFFSET);
+	u8 data;
+
+	writel(CFG_SYS_LOCK_PF3, sys_lock);
+	data = readl(sys_lock);
+	if (data != CFG_SYS_LOCK_PF3)
+		return -EPERM;
+
+	return 0;
+}
+
+static void release_sys_lock(struct pci1xxxx_otp_eeprom_device *priv)
+{
+	void __iomem *sys_lock = priv->reg_base +
+				 MMAP_CFG_OFFSET(CFG_SYS_LOCK_OFFSET);
+	writel(0, sys_lock);
+}
+
+static bool is_eeprom_responsive(struct pci1xxxx_otp_eeprom_device *priv)
+{
+	void __iomem *rb = priv->reg_base;
+	u32 regval;
+	int ret;
+
+	writel(EEPROM_CMD_EPC_TIMEOUT_BIT,
+	       rb + MMAP_EEPROM_OFFSET(EEPROM_CMD_REG));
+	writel(EEPROM_CMD_EPC_BUSY_BIT,
+	       rb + MMAP_EEPROM_OFFSET(EEPROM_CMD_REG));
+
+	/* Wait for the EPC_BUSY bit to get cleared or timeout bit to get set*/
+	ret = read_poll_timeout(readl, regval, !(regval & EEPROM_CMD_EPC_BUSY_BIT),
+				STATUS_READ_DELAY_US, STATUS_READ_TIMEOUT_US,
+				true, rb + MMAP_EEPROM_OFFSET(EEPROM_CMD_REG));
+
+	/* Return failure if either of software or hardware timeouts happen */
+	if (ret < 0 || (!ret && (regval & EEPROM_CMD_EPC_TIMEOUT_BIT)))
+		return false;
+
+	return true;
+}
+
+static int pci1xxxx_eeprom_read(void *priv_t, unsigned int off,
+				void *buf_t, size_t count)
+{
+	struct pci1xxxx_otp_eeprom_device *priv = priv_t;
+	void __iomem *rb = priv->reg_base;
+	char *buf = buf_t;
+	u32 regval;
+	u32 byte;
+	int ret;
+
+	if (off >= priv->nvmem_config_eeprom.size)
+		return -EFAULT;
+
+	if ((off + count) > priv->nvmem_config_eeprom.size)
+		count = priv->nvmem_config_eeprom.size - off;
+
+	ret = set_sys_lock(priv);
+	if (ret)
+		return ret;
+
+	for (byte = 0; byte < count; byte++) {
+		writel(EEPROM_CMD_EPC_BUSY_BIT | (off + byte), rb +
+		       MMAP_EEPROM_OFFSET(EEPROM_CMD_REG));
+
+		ret = read_poll_timeout(readl, regval,
+					!(regval & EEPROM_CMD_EPC_BUSY_BIT),
+					STATUS_READ_DELAY_US,
+					STATUS_READ_TIMEOUT_US, true,
+					rb + MMAP_EEPROM_OFFSET(EEPROM_CMD_REG));
+		if (ret < 0 || (!ret && (regval & EEPROM_CMD_EPC_TIMEOUT_BIT))) {
+			ret = -EIO;
+			goto error;
+		}
+
+		buf[byte] = readl(rb + MMAP_EEPROM_OFFSET(EEPROM_DATA_REG));
+	}
+	ret = byte;
+error:
+	release_sys_lock(priv);
+	return ret;
+}
+
+static int pci1xxxx_eeprom_write(void *priv_t, unsigned int off,
+				 void *value_t, size_t count)
+{
+	struct pci1xxxx_otp_eeprom_device *priv = priv_t;
+	void __iomem *rb = priv->reg_base;
+	char *value = value_t;
+	u32 regval;
+	u32 byte;
+	int ret;
+
+	if (off >= priv->nvmem_config_eeprom.size)
+		return -EFAULT;
+
+	if ((off + count) > priv->nvmem_config_eeprom.size)
+		count = priv->nvmem_config_eeprom.size - off;
+
+	ret = set_sys_lock(priv);
+	if (ret)
+		return ret;
+
+	for (byte = 0; byte < count; byte++) {
+		writel(*(value + byte), rb + MMAP_EEPROM_OFFSET(EEPROM_DATA_REG));
+		regval = EEPROM_CMD_EPC_TIMEOUT_BIT | EEPROM_CMD_EPC_WRITE |
+			 (off + byte);
+		writel(regval, rb + MMAP_EEPROM_OFFSET(EEPROM_CMD_REG));
+		writel(EEPROM_CMD_EPC_BUSY_BIT | regval,
+		       rb + MMAP_EEPROM_OFFSET(EEPROM_CMD_REG));
+
+		ret = read_poll_timeout(readl, regval,
+					!(regval & EEPROM_CMD_EPC_BUSY_BIT),
+					STATUS_READ_DELAY_US,
+					STATUS_READ_TIMEOUT_US, true,
+					rb + MMAP_EEPROM_OFFSET(EEPROM_CMD_REG));
+		if (ret < 0 || (!ret && (regval & EEPROM_CMD_EPC_TIMEOUT_BIT))) {
+			ret = -EIO;
+			goto error;
+		}
+	}
+	ret = byte;
+error:
+	release_sys_lock(priv);
+	return ret;
+}
+
+static void otp_device_set_address(struct pci1xxxx_otp_eeprom_device *priv,
+				   u16 address)
+{
+	u16 lo, hi;
+
+	lo = address & BYTE_LOW;
+	hi = (address & BYTE_HIGH) >> 8;
+	writew(lo, priv->reg_base + MMAP_OTP_OFFSET(OTP_ADDR_LOW_OFFSET));
+	writew(hi, priv->reg_base + MMAP_OTP_OFFSET(OTP_ADDR_HIGH_OFFSET));
+}
+
+static int pci1xxxx_otp_read(void *priv_t, unsigned int off,
+			     void *buf_t, size_t count)
+{
+	struct pci1xxxx_otp_eeprom_device *priv = priv_t;
+	void __iomem *rb = priv->reg_base;
+	char *buf = buf_t;
+	u32 regval;
+	u32 byte;
+	int ret;
+	u8 data;
+
+	if (off >= priv->nvmem_config_otp.size)
+		return -EFAULT;
+
+	if ((off + count) > priv->nvmem_config_otp.size)
+		count = priv->nvmem_config_otp.size - off;
+
+	ret = set_sys_lock(priv);
+	if (ret)
+		return ret;
+
+	for (byte = 0; byte < count; byte++) {
+		otp_device_set_address(priv, (u16)(off + byte));
+		data = readl(rb + MMAP_OTP_OFFSET(OTP_FUNC_CMD_OFFSET));
+		writel(data | OTP_FUNC_RD_BIT,
+		       rb + MMAP_OTP_OFFSET(OTP_FUNC_CMD_OFFSET));
+		data = readl(rb + MMAP_OTP_OFFSET(OTP_CMD_GO_OFFSET));
+		writel(data | OTP_CMD_GO_BIT,
+		       rb + MMAP_OTP_OFFSET(OTP_CMD_GO_OFFSET));
+
+		ret = read_poll_timeout(readl, regval,
+					!(regval & OTP_STATUS_BUSY_BIT),
+					STATUS_READ_DELAY_US,
+					STATUS_READ_TIMEOUT_US, true,
+					rb + MMAP_OTP_OFFSET(OTP_STATUS_OFFSET));
+
+		data = readl(rb + MMAP_OTP_OFFSET(OTP_PASS_FAIL_OFFSET));
+		if (ret < 0 || data & OTP_FAIL_BIT) {
+			ret = -EIO;
+			goto error;
+		}
+
+		buf[byte] = readl(rb + MMAP_OTP_OFFSET(OTP_RD_DATA_OFFSET));
+	}
+	ret = byte;
+error:
+	release_sys_lock(priv);
+	return ret;
+}
+
+static int pci1xxxx_otp_write(void *priv_t, unsigned int off,
+			      void *value_t, size_t count)
+{
+	struct pci1xxxx_otp_eeprom_device *priv = priv_t;
+	void __iomem *rb = priv->reg_base;
+	char *value = value_t;
+	u32 regval;
+	u32 byte;
+	int ret;
+	u8 data;
+
+	if (off >= priv->nvmem_config_otp.size)
+		return -EFAULT;
+
+	if ((off + count) > priv->nvmem_config_otp.size)
+		count = priv->nvmem_config_otp.size - off;
+
+	ret = set_sys_lock(priv);
+	if (ret)
+		return ret;
+
+	for (byte = 0; byte < count; byte++) {
+		otp_device_set_address(priv, (u16)(off + byte));
+
+		/*
+		 * Set OTP_PGM_MODE_BYTE command bit in OTP_PRGM_MODE register
+		 * to enable Byte programming
+		 */
+		data = readl(rb + MMAP_OTP_OFFSET(OTP_PRGM_MODE_OFFSET));
+		writel(data | OTP_PGM_MODE_BYTE_BIT,
+		       rb + MMAP_OTP_OFFSET(OTP_PRGM_MODE_OFFSET));
+		writel(*(value + byte), rb + MMAP_OTP_OFFSET(OTP_PRGM_DATA_OFFSET));
+		data = readl(rb + MMAP_OTP_OFFSET(OTP_FUNC_CMD_OFFSET));
+		writel(data | OTP_FUNC_PGM_BIT,
+		       rb + MMAP_OTP_OFFSET(OTP_FUNC_CMD_OFFSET));
+		data = readl(rb + MMAP_OTP_OFFSET(OTP_CMD_GO_OFFSET));
+		writel(data | OTP_CMD_GO_BIT,
+		       rb + MMAP_OTP_OFFSET(OTP_CMD_GO_OFFSET));
+
+		ret = read_poll_timeout(readl, regval,
+					!(regval & OTP_STATUS_BUSY_BIT),
+					STATUS_READ_DELAY_US,
+					STATUS_READ_TIMEOUT_US, true,
+					rb + MMAP_OTP_OFFSET(OTP_STATUS_OFFSET));
+
+		data = readl(rb + MMAP_OTP_OFFSET(OTP_PASS_FAIL_OFFSET));
+		if (ret < 0 || data & OTP_FAIL_BIT) {
+			ret = -EIO;
+			goto error;
+		}
+	}
+	ret = byte;
+error:
+	release_sys_lock(priv);
+	return ret;
+}
+
+static int pci1xxxx_otp_eeprom_probe(struct auxiliary_device *aux_dev,
+				     const struct auxiliary_device_id *id)
+{
+	struct auxiliary_device_wrapper *aux_dev_wrapper;
+	struct pci1xxxx_otp_eeprom_device *priv;
+	struct gp_aux_data_type *pdata;
+	int ret;
+	u8 data;
+
+	aux_dev_wrapper = container_of(aux_dev, struct auxiliary_device_wrapper,
+				       aux_dev);
+	pdata = &aux_dev_wrapper->gp_aux_data;
+	if (!pdata)
+		return -EINVAL;
+
+	priv = devm_kzalloc(&aux_dev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->pdev = aux_dev;
+
+	if (!devm_request_mem_region(&aux_dev->dev, pdata->region_start +
+				     PERI_PF3_SYSTEM_REG_ADDR_BASE,
+				     PERI_PF3_SYSTEM_REG_LENGTH,
+				     aux_dev->name))
+		return -ENOMEM;
+
+	priv->reg_base = devm_ioremap(&aux_dev->dev, pdata->region_start +
+				      PERI_PF3_SYSTEM_REG_ADDR_BASE,
+				      PERI_PF3_SYSTEM_REG_LENGTH);
+	if (!priv->reg_base)
+		return -ENOMEM;
+
+	ret = set_sys_lock(priv);
+	if (ret)
+		return ret;
+
+	/* Set OTP_PWR_DN to 0 to make OTP Operational */
+	data = readl(priv->reg_base + MMAP_OTP_OFFSET(OTP_PWR_DN_OFFSET));
+	writel(data & ~OTP_PWR_DN_BIT,
+	       priv->reg_base + MMAP_OTP_OFFSET(OTP_PWR_DN_OFFSET));
+
+	dev_set_drvdata(&aux_dev->dev, priv);
+
+	if (is_eeprom_responsive(priv)) {
+		priv->nvmem_config_eeprom.type = NVMEM_TYPE_EEPROM;
+		priv->nvmem_config_eeprom.name = EEPROM_NAME;
+		priv->nvmem_config_eeprom.dev = &aux_dev->dev;
+		priv->nvmem_config_eeprom.owner = THIS_MODULE;
+		priv->nvmem_config_eeprom.reg_read = pci1xxxx_eeprom_read;
+		priv->nvmem_config_eeprom.reg_write = pci1xxxx_eeprom_write;
+		priv->nvmem_config_eeprom.priv = priv;
+		priv->nvmem_config_eeprom.stride = 1;
+		priv->nvmem_config_eeprom.word_size = 1;
+		priv->nvmem_config_eeprom.size = EEPROM_SIZE_BYTES;
+
+		priv->nvmem_eeprom = devm_nvmem_register(&aux_dev->dev,
+							 &priv->nvmem_config_eeprom);
+		if (IS_ERR(priv->nvmem_eeprom))
+			return PTR_ERR(priv->nvmem_eeprom);
+	}
+
+	release_sys_lock(priv);
+
+	priv->nvmem_config_otp.type = NVMEM_TYPE_OTP;
+	priv->nvmem_config_otp.name = OTP_NAME;
+	priv->nvmem_config_otp.dev = &aux_dev->dev;
+	priv->nvmem_config_otp.owner = THIS_MODULE;
+	priv->nvmem_config_otp.reg_read = pci1xxxx_otp_read;
+	priv->nvmem_config_otp.reg_write = pci1xxxx_otp_write;
+	priv->nvmem_config_otp.priv = priv;
+	priv->nvmem_config_otp.stride = 1;
+	priv->nvmem_config_otp.word_size = 1;
+	priv->nvmem_config_otp.size = OTP_SIZE_BYTES;
+
+	priv->nvmem_otp = devm_nvmem_register(&aux_dev->dev,
+					      &priv->nvmem_config_otp);
+	if (IS_ERR(priv->nvmem_otp))
+		return PTR_ERR(priv->nvmem_otp);
+
+	return ret;
+}
+
+static void pci1xxxx_otp_eeprom_remove(struct auxiliary_device *aux_dev)
+{
+	struct pci1xxxx_otp_eeprom_device *priv;
+	void __iomem *sys_lock;
+
+	priv = dev_get_drvdata(&aux_dev->dev);
+	sys_lock = priv->reg_base + MMAP_CFG_OFFSET(CFG_SYS_LOCK_OFFSET);
+	writel(CFG_SYS_LOCK_PF3, sys_lock);
+
+	/* Shut down OTP */
+	writel(OTP_PWR_DN_BIT,
+	       priv->reg_base + MMAP_OTP_OFFSET(OTP_PWR_DN_OFFSET));
+
+	writel(0, sys_lock);
+}
+
+static const struct auxiliary_device_id pci1xxxx_otp_eeprom_auxiliary_id_table[] = {
+	{.name = "mchp_pci1xxxx_gp.gp_otp_e2p"},
+	{},
+};
+MODULE_DEVICE_TABLE(auxiliary, pci1xxxx_otp_eeprom_auxiliary_id_table);
+
+static struct auxiliary_driver pci1xxxx_otp_eeprom_driver = {
+	.driver = {
+		.name = AUX_DRIVER_NAME,
+	},
+	.probe = pci1xxxx_otp_eeprom_probe,
+	.remove = pci1xxxx_otp_eeprom_remove,
+	.id_table = pci1xxxx_otp_eeprom_auxiliary_id_table
+};
+module_auxiliary_driver(pci1xxxx_otp_eeprom_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Kumaravel Thiagarajan <kumaravel.thiagarajan@microchip.com>");
+MODULE_AUTHOR("Tharun Kumar P <tharunkumar.pasumarthi@microchip.com>");
+MODULE_AUTHOR("Vaibhaav Ram T.L <vaibhaavram.tl@microchip.com>");
+MODULE_DESCRIPTION("Microchip Technology Inc. PCI1xxxx OTP EEPROM Programmer");
diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig
new file mode 100644
index 0000000000..37db142de4
--- /dev/null
+++ b/drivers/misc/mei/Kconfig
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2003-2019, Intel Corporation. All rights reserved.
+config INTEL_MEI
+	tristate "Intel Management Engine Interface"
+	depends on X86 && PCI
+	help
+	  The Intel Management Engine (Intel ME) provides Manageability,
+	  Security and Media services for system containing Intel chipsets.
+	  if selected /dev/mei misc device will be created.
+
+	  For more information see
+	  <https://software.intel.com/en-us/manageability/>
+
+config INTEL_MEI_ME
+	tristate "ME Enabled Intel Chipsets"
+	select INTEL_MEI
+	depends on X86 && PCI
+	help
+	  MEI support for ME Enabled Intel chipsets.
+
+	  Supported Chipsets are:
+	  7 Series Chipset Family
+	  6 Series Chipset Family
+	  5 Series Chipset Family
+	  4 Series Chipset Family
+	  Mobile 4 Series Chipset Family
+	  ICH9
+	  82946GZ/GL
+	  82G35 Express
+	  82Q963/Q965
+	  82P965/G965
+	  Mobile PM965/GM965
+	  Mobile GME965/GLE960
+	  82Q35 Express
+	  82G33/G31/P35/P31 Express
+	  82Q33 Express
+	  82X38/X48 Express
+
+config INTEL_MEI_TXE
+	tristate "Intel Trusted Execution Environment with ME Interface"
+	select INTEL_MEI
+	depends on X86 && PCI
+	help
+	  MEI Support for Trusted Execution Environment device on Intel SoCs
+
+	  Supported SoCs:
+	  Intel Bay Trail
+
+config INTEL_MEI_GSC
+	tristate "Intel MEI GSC embedded device"
+	depends on INTEL_MEI
+	depends on INTEL_MEI_ME
+	depends on X86 && PCI
+	depends on DRM_I915
+	help
+	  Intel auxiliary driver for GSC devices embedded in Intel graphics devices.
+
+	  An MEI device here called GSC can be embedded in an
+	  Intel graphics devices, to support a range of chassis
+	  tasks such as graphics card firmware update and security
+	  tasks.
+
+source "drivers/misc/mei/hdcp/Kconfig"
+source "drivers/misc/mei/pxp/Kconfig"
+source "drivers/misc/mei/gsc_proxy/Kconfig"
diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile
new file mode 100644
index 0000000000..14aee253ae
--- /dev/null
+++ b/drivers/misc/mei/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2010-2019, Intel Corporation. All rights reserved.
+# Makefile - Intel Management Engine Interface (Intel MEI) Linux driver
+#
+obj-$(CONFIG_INTEL_MEI) += mei.o
+mei-objs := init.o
+mei-objs += hbm.o
+mei-objs += interrupt.o
+mei-objs += client.o
+mei-objs += main.o
+mei-objs += dma-ring.o
+mei-objs += bus.o
+mei-objs += bus-fixup.o
+mei-$(CONFIG_DEBUG_FS) += debugfs.o
+
+obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o
+mei-me-objs := pci-me.o
+mei-me-objs += hw-me.o
+
+obj-$(CONFIG_INTEL_MEI_GSC) += mei-gsc.o
+mei-gsc-objs := gsc-me.o
+
+obj-$(CONFIG_INTEL_MEI_TXE) += mei-txe.o
+mei-txe-objs := pci-txe.o
+mei-txe-objs += hw-txe.o
+
+mei-$(CONFIG_EVENT_TRACING) += mei-trace.o
+CFLAGS_mei-trace.o = -I$(src)
+
+obj-$(CONFIG_INTEL_MEI_HDCP) += hdcp/
+obj-$(CONFIG_INTEL_MEI_PXP) += pxp/
+obj-$(CONFIG_INTEL_MEI_GSC_PROXY) += gsc_proxy/
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
new file mode 100644
index 0000000000..2733070acf
--- /dev/null
+++ b/drivers/misc/mei/bus-fixup.c
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2013-2023, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+
+#include <linux/mei.h>
+#include <linux/mei_cl_bus.h>
+
+#include "mei_dev.h"
+#include "client.h"
+#include "mkhi.h"
+
+#define MEI_UUID_NFC_INFO UUID_LE(0xd2de1625, 0x382d, 0x417d, \
+			0x48, 0xa4, 0xef, 0xab, 0xba, 0x8a, 0x12, 0x06)
+
+static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO;
+
+#define MEI_UUID_NFC_HCI UUID_LE(0x0bb17a78, 0x2a8e, 0x4c50, \
+			0x94, 0xd4, 0x50, 0x26, 0x67, 0x23, 0x77, 0x5c)
+
+#define MEI_UUID_WD UUID_LE(0x05B79A6F, 0x4628, 0x4D7F, \
+			    0x89, 0x9D, 0xA9, 0x15, 0x14, 0xCB, 0x32, 0xAB)
+
+#define MEI_UUID_MKHIF_FIX UUID_LE(0x55213584, 0x9a29, 0x4916, \
+			0xba, 0xdf, 0xf, 0xb7, 0xed, 0x68, 0x2a, 0xeb)
+
+#define MEI_UUID_IGSC_MKHI UUID_LE(0xE2C2AFA2, 0x3817, 0x4D19, \
+			0x9D, 0x95, 0x06, 0xB1, 0x6B, 0x58, 0x8A, 0x5D)
+
+#define MEI_UUID_IGSC_MKHI_FIX UUID_LE(0x46E0C1FB, 0xA546, 0x414F, \
+			0x91, 0x70, 0xB7, 0xF4, 0x6D, 0x57, 0xB4, 0xAD)
+
+#define MEI_UUID_HDCP UUID_LE(0xB638AB7E, 0x94E2, 0x4EA2, \
+			      0xA5, 0x52, 0xD1, 0xC5, 0x4B, 0x62, 0x7F, 0x04)
+
+#define MEI_UUID_PAVP UUID_LE(0xfbf6fcf1, 0x96cf, 0x4e2e, 0xA6, \
+			      0xa6, 0x1b, 0xab, 0x8c, 0xbe, 0x36, 0xb1)
+
+#define MEI_UUID_ANY NULL_UUID_LE
+
+/**
+ * number_of_connections - determine whether an client be on the bus
+ *    according number of connections
+ *    We support only clients:
+ *       1. with single connection
+ *       2. and fixed clients (max_number_of_connections == 0)
+ *
+ * @cldev: me clients device
+ */
+static void number_of_connections(struct mei_cl_device *cldev)
+{
+	if (cldev->me_cl->props.max_number_of_connections > 1)
+		cldev->do_match = 0;
+}
+
+/**
+ * blacklist - blacklist a client from the bus
+ *
+ * @cldev: me clients device
+ */
+static void blacklist(struct mei_cl_device *cldev)
+{
+	cldev->do_match = 0;
+}
+
+/**
+ * whitelist - forcefully whitelist client
+ *
+ * @cldev: me clients device
+ */
+static void whitelist(struct mei_cl_device *cldev)
+{
+	cldev->do_match = 1;
+}
+
+#define OSTYPE_LINUX    2
+struct mei_os_ver {
+	__le16 build;
+	__le16 reserved1;
+	u8  os_type;
+	u8  major;
+	u8  minor;
+	u8  reserved2;
+} __packed;
+
+struct mkhi_fw_ver_block {
+	u16 minor;
+	u8 major;
+	u8 platform;
+	u16 buildno;
+	u16 hotfix;
+} __packed;
+
+struct mkhi_fw_ver {
+	struct mkhi_fw_ver_block ver[MEI_MAX_FW_VER_BLOCKS];
+} __packed;
+
+#define MKHI_OSVER_BUF_LEN (sizeof(struct mkhi_msg_hdr) + \
+			    sizeof(struct mkhi_fwcaps) + \
+			    sizeof(struct mei_os_ver))
+static int mei_osver(struct mei_cl_device *cldev)
+{
+	const size_t size = MKHI_OSVER_BUF_LEN;
+	u8 buf[MKHI_OSVER_BUF_LEN];
+	struct mkhi_msg *req;
+	struct mkhi_fwcaps *fwcaps;
+	struct mei_os_ver *os_ver;
+	unsigned int mode = MEI_CL_IO_TX_BLOCKING | MEI_CL_IO_TX_INTERNAL;
+
+	memset(buf, 0, size);
+
+	req = (struct mkhi_msg *)buf;
+	req->hdr.group_id = MKHI_FWCAPS_GROUP_ID;
+	req->hdr.command = MKHI_FWCAPS_SET_OS_VER_APP_RULE_CMD;
+
+	fwcaps = (struct mkhi_fwcaps *)req->data;
+
+	fwcaps->id.rule_type = 0x0;
+	fwcaps->id.feature_id = MKHI_FEATURE_PTT;
+	fwcaps->len = sizeof(*os_ver);
+	os_ver = (struct mei_os_ver *)fwcaps->data;
+	os_ver->os_type = OSTYPE_LINUX;
+
+	return __mei_cl_send(cldev->cl, buf, size, 0, mode);
+}
+
+#define MKHI_FWVER_BUF_LEN (sizeof(struct mkhi_msg_hdr) + \
+			    sizeof(struct mkhi_fw_ver))
+#define MKHI_FWVER_LEN(__num) (sizeof(struct mkhi_msg_hdr) + \
+			       sizeof(struct mkhi_fw_ver_block) * (__num))
+static int mei_fwver(struct mei_cl_device *cldev)
+{
+	u8 buf[MKHI_FWVER_BUF_LEN];
+	struct mkhi_msg req;
+	struct mkhi_msg *rsp;
+	struct mkhi_fw_ver *fwver;
+	int bytes_recv, ret, i;
+
+	memset(buf, 0, sizeof(buf));
+
+	req.hdr.group_id = MKHI_GEN_GROUP_ID;
+	req.hdr.command = MKHI_GEN_GET_FW_VERSION_CMD;
+
+	ret = __mei_cl_send(cldev->cl, (u8 *)&req, sizeof(req), 0,
+			    MEI_CL_IO_TX_BLOCKING);
+	if (ret < 0) {
+		dev_info(&cldev->dev, "Could not send ReqFWVersion cmd ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = 0;
+	bytes_recv = __mei_cl_recv(cldev->cl, buf, sizeof(buf), NULL, 0,
+				   cldev->bus->timeouts.mkhi_recv);
+	if (bytes_recv < 0 || (size_t)bytes_recv < MKHI_FWVER_LEN(1)) {
+		/*
+		 * Should be at least one version block,
+		 * error out if nothing found
+		 */
+		dev_info(&cldev->dev, "Could not read FW version ret = %d\n", bytes_recv);
+		return -EIO;
+	}
+
+	rsp = (struct mkhi_msg *)buf;
+	fwver = (struct mkhi_fw_ver *)rsp->data;
+	memset(cldev->bus->fw_ver, 0, sizeof(cldev->bus->fw_ver));
+	for (i = 0; i < MEI_MAX_FW_VER_BLOCKS; i++) {
+		if ((size_t)bytes_recv < MKHI_FWVER_LEN(i + 1))
+			break;
+		dev_dbg(&cldev->dev, "FW version%d %d:%d.%d.%d.%d\n",
+			i, fwver->ver[i].platform,
+			fwver->ver[i].major, fwver->ver[i].minor,
+			fwver->ver[i].hotfix, fwver->ver[i].buildno);
+
+		cldev->bus->fw_ver[i].platform = fwver->ver[i].platform;
+		cldev->bus->fw_ver[i].major = fwver->ver[i].major;
+		cldev->bus->fw_ver[i].minor = fwver->ver[i].minor;
+		cldev->bus->fw_ver[i].hotfix = fwver->ver[i].hotfix;
+		cldev->bus->fw_ver[i].buildno = fwver->ver[i].buildno;
+	}
+	cldev->bus->fw_ver_received = 1;
+
+	return ret;
+}
+
+#define GFX_MEMORY_READY_TIMEOUT 200 /* timeout in milliseconds */
+
+static int mei_gfx_memory_ready(struct mei_cl_device *cldev)
+{
+	struct mkhi_gfx_mem_ready req = {0};
+	unsigned int mode = MEI_CL_IO_TX_INTERNAL | MEI_CL_IO_TX_BLOCKING;
+
+	req.hdr.group_id = MKHI_GROUP_ID_GFX;
+	req.hdr.command = MKHI_GFX_MEMORY_READY_CMD_REQ;
+	req.flags = MKHI_GFX_MEM_READY_PXP_ALLOWED;
+
+	dev_dbg(&cldev->dev, "Sending memory ready command\n");
+	return __mei_cl_send_timeout(cldev->cl, (u8 *)&req, sizeof(req), 0,
+				     mode, GFX_MEMORY_READY_TIMEOUT);
+}
+
+static void mei_mkhi_fix(struct mei_cl_device *cldev)
+{
+	int ret;
+
+	/* No need to enable the client if nothing is needed from it */
+	if (!cldev->bus->fw_f_fw_ver_supported &&
+	    !cldev->bus->hbm_f_os_supported)
+		return;
+
+	ret = mei_cldev_enable(cldev);
+	if (ret)
+		return;
+
+	if (cldev->bus->fw_f_fw_ver_supported) {
+		ret = mei_fwver(cldev);
+		if (ret < 0)
+			dev_info(&cldev->dev, "FW version command failed %d\n",
+				 ret);
+	}
+
+	if (cldev->bus->hbm_f_os_supported) {
+		ret = mei_osver(cldev);
+		if (ret < 0)
+			dev_info(&cldev->dev, "OS version command failed %d\n",
+				 ret);
+	}
+	mei_cldev_disable(cldev);
+}
+
+static void mei_gsc_mkhi_ver(struct mei_cl_device *cldev)
+{
+	int ret;
+
+	/*
+	 * No need to enable the client if nothing is needed from it.
+	 * No need to fill in version if it is already filled in by the fix address client.
+	 */
+	if (!cldev->bus->fw_f_fw_ver_supported || cldev->bus->fw_ver_received)
+		return;
+
+	ret = mei_cldev_enable(cldev);
+	if (ret)
+		return;
+
+	ret = mei_fwver(cldev);
+	if (ret < 0)
+		dev_info(&cldev->dev, "FW version command failed %d\n", ret);
+	mei_cldev_disable(cldev);
+}
+
+static void mei_gsc_mkhi_fix_ver(struct mei_cl_device *cldev)
+{
+	int ret;
+
+	/* No need to enable the client if nothing is needed from it */
+	if (!cldev->bus->fw_f_fw_ver_supported &&
+	    cldev->bus->pxp_mode != MEI_DEV_PXP_INIT)
+		return;
+
+	ret = mei_cldev_enable(cldev);
+	if (ret)
+		return;
+
+	if (cldev->bus->pxp_mode == MEI_DEV_PXP_INIT) {
+		ret = mei_gfx_memory_ready(cldev);
+		if (ret < 0) {
+			dev_err(&cldev->dev, "memory ready command failed %d\n", ret);
+		} else {
+			dev_dbg(&cldev->dev, "memory ready command sent\n");
+			cldev->bus->pxp_mode = MEI_DEV_PXP_SETUP;
+		}
+		/* we go to reset after that */
+		goto out;
+	}
+
+	ret = mei_fwver(cldev);
+	if (ret < 0)
+		dev_info(&cldev->dev, "FW version command failed %d\n",
+			 ret);
+out:
+	mei_cldev_disable(cldev);
+}
+
+/**
+ * mei_wd - wd client on the bus, change protocol version
+ *   as the API has changed.
+ *
+ * @cldev: me clients device
+ */
+#if IS_ENABLED(CONFIG_INTEL_MEI_ME)
+#include <linux/pci.h>
+#include "hw-me-regs.h"
+static void mei_wd(struct mei_cl_device *cldev)
+{
+	struct pci_dev *pdev = to_pci_dev(cldev->dev.parent);
+
+	if (pdev->device == MEI_DEV_ID_WPT_LP ||
+	    pdev->device == MEI_DEV_ID_SPT ||
+	    pdev->device == MEI_DEV_ID_SPT_H)
+		cldev->me_cl->props.protocol_version = 0x2;
+
+	cldev->do_match = 1;
+}
+#else
+static inline void mei_wd(struct mei_cl_device *cldev) {}
+#endif /* CONFIG_INTEL_MEI_ME */
+
+struct mei_nfc_cmd {
+	u8 command;
+	u8 status;
+	u16 req_id;
+	u32 reserved;
+	u16 data_size;
+	u8 sub_command;
+	u8 data[];
+} __packed;
+
+struct mei_nfc_reply {
+	u8 command;
+	u8 status;
+	u16 req_id;
+	u32 reserved;
+	u16 data_size;
+	u8 sub_command;
+	u8 reply_status;
+	u8 data[];
+} __packed;
+
+struct mei_nfc_if_version {
+	u8 radio_version_sw[3];
+	u8 reserved[3];
+	u8 radio_version_hw[3];
+	u8 i2c_addr;
+	u8 fw_ivn;
+	u8 vendor_id;
+	u8 radio_type;
+} __packed;
+
+
+#define MEI_NFC_CMD_MAINTENANCE 0x00
+#define MEI_NFC_SUBCMD_IF_VERSION 0x01
+
+/* Vendors */
+#define MEI_NFC_VENDOR_INSIDE 0x00
+#define MEI_NFC_VENDOR_NXP    0x01
+
+/* Radio types */
+#define MEI_NFC_VENDOR_INSIDE_UREAD 0x00
+#define MEI_NFC_VENDOR_NXP_PN544    0x01
+
+/**
+ * mei_nfc_if_version - get NFC interface version
+ *
+ * @cl: host client (nfc info)
+ * @ver: NFC interface version to be filled in
+ *
+ * Return: 0 on success; < 0 otherwise
+ */
+static int mei_nfc_if_version(struct mei_cl *cl,
+			      struct mei_nfc_if_version *ver)
+{
+	struct mei_device *bus;
+	struct mei_nfc_cmd cmd = {
+		.command = MEI_NFC_CMD_MAINTENANCE,
+		.data_size = 1,
+		.sub_command = MEI_NFC_SUBCMD_IF_VERSION,
+	};
+	struct mei_nfc_reply *reply = NULL;
+	size_t if_version_length;
+	u8 vtag;
+	int bytes_recv, ret;
+
+	bus = cl->dev;
+
+	WARN_ON(mutex_is_locked(&bus->device_lock));
+
+	ret = __mei_cl_send(cl, (u8 *)&cmd, sizeof(cmd), 0,
+			    MEI_CL_IO_TX_BLOCKING);
+	if (ret < 0) {
+		dev_err(bus->dev, "Could not send IF version cmd ret = %d\n", ret);
+		return ret;
+	}
+
+	/* to be sure on the stack we alloc memory */
+	if_version_length = sizeof(*reply) + sizeof(*ver);
+
+	reply = kzalloc(if_version_length, GFP_KERNEL);
+	if (!reply)
+		return -ENOMEM;
+
+	ret = 0;
+	bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length, &vtag,
+				   0, 0);
+	if (bytes_recv < 0 || (size_t)bytes_recv < if_version_length) {
+		dev_err(bus->dev, "Could not read IF version ret = %d\n", bytes_recv);
+		ret = -EIO;
+		goto err;
+	}
+
+	memcpy(ver, reply->data, sizeof(*ver));
+
+	dev_info(bus->dev, "NFC MEI VERSION: IVN 0x%x Vendor ID 0x%x Type 0x%x\n",
+		 ver->fw_ivn, ver->vendor_id, ver->radio_type);
+
+err:
+	kfree(reply);
+	return ret;
+}
+
+/**
+ * mei_nfc_radio_name - derive nfc radio name from the interface version
+ *
+ * @ver: NFC radio version
+ *
+ * Return: radio name string
+ */
+static const char *mei_nfc_radio_name(struct mei_nfc_if_version *ver)
+{
+
+	if (ver->vendor_id == MEI_NFC_VENDOR_INSIDE) {
+		if (ver->radio_type == MEI_NFC_VENDOR_INSIDE_UREAD)
+			return "microread";
+	}
+
+	if (ver->vendor_id == MEI_NFC_VENDOR_NXP) {
+		if (ver->radio_type == MEI_NFC_VENDOR_NXP_PN544)
+			return "pn544";
+	}
+
+	return NULL;
+}
+
+/**
+ * mei_nfc - The nfc fixup function. The function retrieves nfc radio
+ *    name and set is as device attribute so we can load
+ *    the proper device driver for it
+ *
+ * @cldev: me client device (nfc)
+ */
+static void mei_nfc(struct mei_cl_device *cldev)
+{
+	struct mei_device *bus;
+	struct mei_cl *cl;
+	struct mei_me_client *me_cl = NULL;
+	struct mei_nfc_if_version ver;
+	const char *radio_name = NULL;
+	int ret;
+
+	bus = cldev->bus;
+
+	mutex_lock(&bus->device_lock);
+	/* we need to connect to INFO GUID */
+	cl = mei_cl_alloc_linked(bus);
+	if (IS_ERR(cl)) {
+		ret = PTR_ERR(cl);
+		cl = NULL;
+		dev_err(bus->dev, "nfc hook alloc failed %d\n", ret);
+		goto out;
+	}
+
+	me_cl = mei_me_cl_by_uuid(bus, &mei_nfc_info_guid);
+	if (!me_cl) {
+		ret = -ENOTTY;
+		dev_err(bus->dev, "Cannot find nfc info %d\n", ret);
+		goto out;
+	}
+
+	ret = mei_cl_connect(cl, me_cl, NULL);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "Can't connect to the NFC INFO ME ret = %d\n",
+			ret);
+		goto out;
+	}
+
+	mutex_unlock(&bus->device_lock);
+
+	ret = mei_nfc_if_version(cl, &ver);
+	if (ret)
+		goto disconnect;
+
+	radio_name = mei_nfc_radio_name(&ver);
+
+	if (!radio_name) {
+		ret = -ENOENT;
+		dev_err(&cldev->dev, "Can't get the NFC interface version ret = %d\n",
+			ret);
+		goto disconnect;
+	}
+
+	dev_dbg(bus->dev, "nfc radio %s\n", radio_name);
+	strscpy(cldev->name, radio_name, sizeof(cldev->name));
+
+disconnect:
+	mutex_lock(&bus->device_lock);
+	if (mei_cl_disconnect(cl) < 0)
+		dev_err(bus->dev, "Can't disconnect the NFC INFO ME\n");
+
+	mei_cl_flush_queues(cl, NULL);
+
+out:
+	mei_cl_unlink(cl);
+	mutex_unlock(&bus->device_lock);
+	mei_me_cl_put(me_cl);
+	kfree(cl);
+
+	if (ret)
+		cldev->do_match = 0;
+
+	dev_dbg(bus->dev, "end of fixup match = %d\n", cldev->do_match);
+}
+
+/**
+ * vt_support - enable on bus clients with vtag support
+ *
+ * @cldev: me clients device
+ */
+static void vt_support(struct mei_cl_device *cldev)
+{
+	if (cldev->me_cl->props.vt_supported == 1)
+		cldev->do_match = 1;
+}
+
+/**
+ * pxp_is_ready - enable bus client if pxp is ready
+ *
+ * @cldev: me clients device
+ */
+static void pxp_is_ready(struct mei_cl_device *cldev)
+{
+	struct mei_device *bus = cldev->bus;
+
+	switch (bus->pxp_mode) {
+	case MEI_DEV_PXP_READY:
+	case MEI_DEV_PXP_DEFAULT:
+		cldev->do_match = 1;
+	break;
+	default:
+		cldev->do_match = 0;
+	break;
+	}
+}
+
+#define MEI_FIXUP(_uuid, _hook) { _uuid, _hook }
+
+static struct mei_fixup {
+
+	const uuid_le uuid;
+	void (*hook)(struct mei_cl_device *cldev);
+} mei_fixups[] = {
+	MEI_FIXUP(MEI_UUID_ANY, number_of_connections),
+	MEI_FIXUP(MEI_UUID_NFC_INFO, blacklist),
+	MEI_FIXUP(MEI_UUID_NFC_HCI, mei_nfc),
+	MEI_FIXUP(MEI_UUID_WD, mei_wd),
+	MEI_FIXUP(MEI_UUID_MKHIF_FIX, mei_mkhi_fix),
+	MEI_FIXUP(MEI_UUID_IGSC_MKHI_FIX, mei_gsc_mkhi_fix_ver),
+	MEI_FIXUP(MEI_UUID_IGSC_MKHI, mei_gsc_mkhi_ver),
+	MEI_FIXUP(MEI_UUID_HDCP, whitelist),
+	MEI_FIXUP(MEI_UUID_ANY, vt_support),
+	MEI_FIXUP(MEI_UUID_PAVP, pxp_is_ready),
+};
+
+/**
+ * mei_cl_bus_dev_fixup - run fixup handlers
+ *
+ * @cldev: me client device
+ */
+void mei_cl_bus_dev_fixup(struct mei_cl_device *cldev)
+{
+	struct mei_fixup *f;
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(mei_fixups); i++) {
+
+		f = &mei_fixups[i];
+		if (uuid_le_cmp(f->uuid, MEI_UUID_ANY) == 0 ||
+		    uuid_le_cmp(f->uuid, *uuid) == 0)
+			f->hook(cldev);
+	}
+}
+
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
new file mode 100644
index 0000000000..2e65ce6bde
--- /dev/null
+++ b/drivers/misc/mei/bus.c
@@ -0,0 +1,1556 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012-2023, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/sched/signal.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/mei_cl_bus.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+#define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver)
+
+/**
+ * __mei_cl_send - internal client send (write)
+ *
+ * @cl: host client
+ * @buf: buffer to send
+ * @length: buffer length
+ * @vtag: virtual tag
+ * @mode: sending mode
+ *
+ * Return: written size bytes or < 0 on error
+ */
+ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
+		      unsigned int mode)
+{
+	return __mei_cl_send_timeout(cl, buf, length, vtag, mode, MAX_SCHEDULE_TIMEOUT);
+}
+
+/**
+ * __mei_cl_send_timeout - internal client send (write)
+ *
+ * @cl: host client
+ * @buf: buffer to send
+ * @length: buffer length
+ * @vtag: virtual tag
+ * @mode: sending mode
+ * @timeout: send timeout in milliseconds.
+ *           effective only for blocking writes: the MEI_CL_IO_TX_BLOCKING mode bit is set.
+ *           set timeout to the MAX_SCHEDULE_TIMEOUT to maixum allowed wait.
+ *
+ * Return: written size bytes or < 0 on error
+ */
+ssize_t __mei_cl_send_timeout(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
+			      unsigned int mode, unsigned long timeout)
+{
+	struct mei_device *bus;
+	struct mei_cl_cb *cb;
+	ssize_t rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	bus = cl->dev;
+
+	mutex_lock(&bus->device_lock);
+	if (bus->dev_state != MEI_DEV_ENABLED &&
+	    bus->dev_state != MEI_DEV_POWERING_DOWN) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	if (!mei_cl_is_connected(cl)) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	/* Check if we have an ME client device */
+	if (!mei_me_cl_is_active(cl->me_cl)) {
+		rets = -ENOTTY;
+		goto out;
+	}
+
+	if (vtag) {
+		/* Check if vtag is supported by client */
+		rets = mei_cl_vt_support_check(cl);
+		if (rets)
+			goto out;
+	}
+
+	if (length > mei_cl_mtu(cl)) {
+		rets = -EFBIG;
+		goto out;
+	}
+
+	while (cl->tx_cb_queued >= bus->tx_queue_limit) {
+		mutex_unlock(&bus->device_lock);
+		rets = wait_event_interruptible(cl->tx_wait,
+				cl->writing_state == MEI_WRITE_COMPLETE ||
+				(!mei_cl_is_connected(cl)));
+		mutex_lock(&bus->device_lock);
+		if (rets) {
+			if (signal_pending(current))
+				rets = -EINTR;
+			goto out;
+		}
+		if (!mei_cl_is_connected(cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+	}
+
+	cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, NULL);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+	cb->vtag = vtag;
+
+	cb->internal = !!(mode & MEI_CL_IO_TX_INTERNAL);
+	cb->blocking = !!(mode & MEI_CL_IO_TX_BLOCKING);
+	memcpy(cb->buf.data, buf, length);
+	/* hack we point data to header */
+	if (mode & MEI_CL_IO_SGL) {
+		cb->ext_hdr = (struct mei_ext_hdr *)cb->buf.data;
+		cb->buf.data = NULL;
+		cb->buf.size = 0;
+	}
+
+	rets = mei_cl_write(cl, cb, timeout);
+
+	if (mode & MEI_CL_IO_SGL && rets == 0)
+		rets = length;
+
+out:
+	mutex_unlock(&bus->device_lock);
+
+	return rets;
+}
+
+/**
+ * __mei_cl_recv - internal client receive (read)
+ *
+ * @cl: host client
+ * @buf: buffer to receive
+ * @length: buffer length
+ * @mode: io mode
+ * @vtag: virtual tag
+ * @timeout: recv timeout, 0 for infinite timeout
+ *
+ * Return: read size in bytes of < 0 on error
+ */
+ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length, u8 *vtag,
+		      unsigned int mode, unsigned long timeout)
+{
+	struct mei_device *bus;
+	struct mei_cl_cb *cb;
+	size_t r_length;
+	ssize_t rets;
+	bool nonblock = !!(mode & MEI_CL_IO_RX_NONBLOCK);
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	bus = cl->dev;
+
+	mutex_lock(&bus->device_lock);
+	if (bus->dev_state != MEI_DEV_ENABLED &&
+	    bus->dev_state != MEI_DEV_POWERING_DOWN) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	cb = mei_cl_read_cb(cl, NULL);
+	if (cb)
+		goto copy;
+
+	rets = mei_cl_read_start(cl, length, NULL);
+	if (rets && rets != -EBUSY)
+		goto out;
+
+	if (nonblock) {
+		rets = -EAGAIN;
+		goto out;
+	}
+
+	/* wait on event only if there is no other waiter */
+	/* synchronized under device mutex */
+	if (!waitqueue_active(&cl->rx_wait)) {
+
+		mutex_unlock(&bus->device_lock);
+
+		if (timeout) {
+			rets = wait_event_interruptible_timeout
+					(cl->rx_wait,
+					mei_cl_read_cb(cl, NULL) ||
+					(!mei_cl_is_connected(cl)),
+					msecs_to_jiffies(timeout));
+			if (rets == 0)
+				return -ETIME;
+			if (rets < 0) {
+				if (signal_pending(current))
+					return -EINTR;
+				return -ERESTARTSYS;
+			}
+		} else {
+			if (wait_event_interruptible
+					(cl->rx_wait,
+					mei_cl_read_cb(cl, NULL) ||
+					(!mei_cl_is_connected(cl)))) {
+				if (signal_pending(current))
+					return -EINTR;
+				return -ERESTARTSYS;
+			}
+		}
+
+		mutex_lock(&bus->device_lock);
+
+		if (!mei_cl_is_connected(cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+	}
+
+	cb = mei_cl_read_cb(cl, NULL);
+	if (!cb) {
+		rets = 0;
+		goto out;
+	}
+
+copy:
+	if (cb->status) {
+		rets = cb->status;
+		goto free;
+	}
+
+	/* for the GSC type - copy the extended header to the buffer */
+	if (cb->ext_hdr && cb->ext_hdr->type == MEI_EXT_HDR_GSC) {
+		r_length = min_t(size_t, length, cb->ext_hdr->length * sizeof(u32));
+		memcpy(buf, cb->ext_hdr, r_length);
+	} else {
+		r_length = min_t(size_t, length, cb->buf_idx);
+		memcpy(buf, cb->buf.data, r_length);
+	}
+	rets = r_length;
+
+	if (vtag)
+		*vtag = cb->vtag;
+
+free:
+	mei_cl_del_rd_completed(cl, cb);
+out:
+	mutex_unlock(&bus->device_lock);
+
+	return rets;
+}
+
+/**
+ * mei_cldev_send_vtag - me device send with vtag  (write)
+ *
+ * @cldev: me client device
+ * @buf: buffer to send
+ * @length: buffer length
+ * @vtag: virtual tag
+ *
+ * Return:
+ *  * written size in bytes
+ *  * < 0 on error
+ */
+
+ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf,
+			    size_t length, u8 vtag)
+{
+	struct mei_cl *cl = cldev->cl;
+
+	return __mei_cl_send(cl, buf, length, vtag, MEI_CL_IO_TX_BLOCKING);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_send_vtag);
+
+/**
+ * mei_cldev_recv_vtag - client receive with vtag (read)
+ *
+ * @cldev: me client device
+ * @buf: buffer to receive
+ * @length: buffer length
+ * @vtag: virtual tag
+ *
+ * Return:
+ * * read size in bytes
+ * *  < 0 on error
+ */
+
+ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length,
+			    u8 *vtag)
+{
+	struct mei_cl *cl = cldev->cl;
+
+	return __mei_cl_recv(cl, buf, length, vtag, 0, 0);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_recv_vtag);
+
+/**
+ * mei_cldev_recv_nonblock_vtag - non block client receive with vtag (read)
+ *
+ * @cldev: me client device
+ * @buf: buffer to receive
+ * @length: buffer length
+ * @vtag: virtual tag
+ *
+ * Return:
+ * * read size in bytes
+ * * -EAGAIN if function will block.
+ * * < 0 on other error
+ */
+ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf,
+				     size_t length, u8 *vtag)
+{
+	struct mei_cl *cl = cldev->cl;
+
+	return __mei_cl_recv(cl, buf, length, vtag, MEI_CL_IO_RX_NONBLOCK, 0);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_recv_nonblock_vtag);
+
+/**
+ * mei_cldev_send - me device send  (write)
+ *
+ * @cldev: me client device
+ * @buf: buffer to send
+ * @length: buffer length
+ *
+ * Return:
+ *  * written size in bytes
+ *  * < 0 on error
+ */
+ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf, size_t length)
+{
+	return mei_cldev_send_vtag(cldev, buf, length, 0);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_send);
+
+/**
+ * mei_cldev_recv - client receive (read)
+ *
+ * @cldev: me client device
+ * @buf: buffer to receive
+ * @length: buffer length
+ *
+ * Return: read size in bytes of < 0 on error
+ */
+ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
+{
+	return mei_cldev_recv_vtag(cldev, buf, length, NULL);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_recv);
+
+/**
+ * mei_cldev_recv_nonblock - non block client receive (read)
+ *
+ * @cldev: me client device
+ * @buf: buffer to receive
+ * @length: buffer length
+ *
+ * Return: read size in bytes of < 0 on error
+ *         -EAGAIN if function will block.
+ */
+ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf,
+				size_t length)
+{
+	return mei_cldev_recv_nonblock_vtag(cldev, buf, length, NULL);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_recv_nonblock);
+
+/**
+ * mei_cl_bus_rx_work - dispatch rx event for a bus device
+ *
+ * @work: work
+ */
+static void mei_cl_bus_rx_work(struct work_struct *work)
+{
+	struct mei_cl_device *cldev;
+	struct mei_device *bus;
+
+	cldev = container_of(work, struct mei_cl_device, rx_work);
+
+	bus = cldev->bus;
+
+	if (cldev->rx_cb)
+		cldev->rx_cb(cldev);
+
+	mutex_lock(&bus->device_lock);
+	if (mei_cl_is_connected(cldev->cl))
+		mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
+	mutex_unlock(&bus->device_lock);
+}
+
+/**
+ * mei_cl_bus_notif_work - dispatch FW notif event for a bus device
+ *
+ * @work: work
+ */
+static void mei_cl_bus_notif_work(struct work_struct *work)
+{
+	struct mei_cl_device *cldev;
+
+	cldev = container_of(work, struct mei_cl_device, notif_work);
+
+	if (cldev->notif_cb)
+		cldev->notif_cb(cldev);
+}
+
+/**
+ * mei_cl_bus_notify_event - schedule notify cb on bus client
+ *
+ * @cl: host client
+ *
+ * Return: true if event was scheduled
+ *         false if the client is not waiting for event
+ */
+bool mei_cl_bus_notify_event(struct mei_cl *cl)
+{
+	struct mei_cl_device *cldev = cl->cldev;
+
+	if (!cldev || !cldev->notif_cb)
+		return false;
+
+	if (!cl->notify_ev)
+		return false;
+
+	schedule_work(&cldev->notif_work);
+
+	cl->notify_ev = false;
+
+	return true;
+}
+
+/**
+ * mei_cl_bus_rx_event - schedule rx event
+ *
+ * @cl: host client
+ *
+ * Return: true if event was scheduled
+ *         false if the client is not waiting for event
+ */
+bool mei_cl_bus_rx_event(struct mei_cl *cl)
+{
+	struct mei_cl_device *cldev = cl->cldev;
+
+	if (!cldev || !cldev->rx_cb)
+		return false;
+
+	schedule_work(&cldev->rx_work);
+
+	return true;
+}
+
+/**
+ * mei_cldev_register_rx_cb - register Rx event callback
+ *
+ * @cldev: me client devices
+ * @rx_cb: callback function
+ *
+ * Return: 0 on success
+ *         -EALREADY if an callback is already registered
+ *         <0 on other errors
+ */
+int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb)
+{
+	struct mei_device *bus = cldev->bus;
+	int ret;
+
+	if (!rx_cb)
+		return -EINVAL;
+	if (cldev->rx_cb)
+		return -EALREADY;
+
+	cldev->rx_cb = rx_cb;
+	INIT_WORK(&cldev->rx_work, mei_cl_bus_rx_work);
+
+	mutex_lock(&bus->device_lock);
+	if (mei_cl_is_connected(cldev->cl))
+		ret = mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
+	else
+		ret = -ENODEV;
+	mutex_unlock(&bus->device_lock);
+	if (ret && ret != -EBUSY) {
+		cancel_work_sync(&cldev->rx_work);
+		cldev->rx_cb = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_register_rx_cb);
+
+/**
+ * mei_cldev_register_notif_cb - register FW notification event callback
+ *
+ * @cldev: me client devices
+ * @notif_cb: callback function
+ *
+ * Return: 0 on success
+ *         -EALREADY if an callback is already registered
+ *         <0 on other errors
+ */
+int mei_cldev_register_notif_cb(struct mei_cl_device *cldev,
+				mei_cldev_cb_t notif_cb)
+{
+	struct mei_device *bus = cldev->bus;
+	int ret;
+
+	if (!notif_cb)
+		return -EINVAL;
+
+	if (cldev->notif_cb)
+		return -EALREADY;
+
+	cldev->notif_cb = notif_cb;
+	INIT_WORK(&cldev->notif_work, mei_cl_bus_notif_work);
+
+	mutex_lock(&bus->device_lock);
+	ret = mei_cl_notify_request(cldev->cl, NULL, 1);
+	mutex_unlock(&bus->device_lock);
+	if (ret) {
+		cancel_work_sync(&cldev->notif_work);
+		cldev->notif_cb = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_register_notif_cb);
+
+/**
+ * mei_cldev_get_drvdata - driver data getter
+ *
+ * @cldev: mei client device
+ *
+ * Return: driver private data
+ */
+void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev)
+{
+	return dev_get_drvdata(&cldev->dev);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_get_drvdata);
+
+/**
+ * mei_cldev_set_drvdata - driver data setter
+ *
+ * @cldev: mei client device
+ * @data: data to store
+ */
+void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data)
+{
+	dev_set_drvdata(&cldev->dev, data);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata);
+
+/**
+ * mei_cldev_uuid - return uuid of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client uuid
+ */
+const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev)
+{
+	return mei_me_cl_uuid(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_uuid);
+
+/**
+ * mei_cldev_ver - return protocol version of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client protocol version
+ */
+u8 mei_cldev_ver(const struct mei_cl_device *cldev)
+{
+	return mei_me_cl_ver(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_ver);
+
+/**
+ * mei_cldev_enabled - check whether the device is enabled
+ *
+ * @cldev: mei client device
+ *
+ * Return: true if me client is initialized and connected
+ */
+bool mei_cldev_enabled(const struct mei_cl_device *cldev)
+{
+	return mei_cl_is_connected(cldev->cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_enabled);
+
+/**
+ * mei_cl_bus_module_get - acquire module of the underlying
+ *    hw driver.
+ *
+ * @cldev: mei client device
+ *
+ * Return: true on success; false if the module was removed.
+ */
+static bool mei_cl_bus_module_get(struct mei_cl_device *cldev)
+{
+	return try_module_get(cldev->bus->dev->driver->owner);
+}
+
+/**
+ * mei_cl_bus_module_put -  release the underlying hw module.
+ *
+ * @cldev: mei client device
+ */
+static void mei_cl_bus_module_put(struct mei_cl_device *cldev)
+{
+	module_put(cldev->bus->dev->driver->owner);
+}
+
+/**
+ * mei_cl_bus_vtag - get bus vtag entry wrapper
+ *     The tag for bus client is always first.
+ *
+ * @cl: host client
+ *
+ * Return: bus vtag or NULL
+ */
+static inline struct mei_cl_vtag *mei_cl_bus_vtag(struct mei_cl *cl)
+{
+	return list_first_entry_or_null(&cl->vtag_map,
+					struct mei_cl_vtag, list);
+}
+
+/**
+ * mei_cl_bus_vtag_alloc - add bus client entry to vtag map
+ *
+ * @cldev: me client device
+ *
+ * Return:
+ * * 0 on success
+ * * -ENOMEM if memory allocation failed
+ */
+static int mei_cl_bus_vtag_alloc(struct mei_cl_device *cldev)
+{
+	struct mei_cl *cl = cldev->cl;
+	struct mei_cl_vtag *cl_vtag;
+
+	/*
+	 * Bail out if the client does not supports vtags
+	 * or has already allocated one
+	 */
+	if (mei_cl_vt_support_check(cl) || mei_cl_bus_vtag(cl))
+		return 0;
+
+	cl_vtag = mei_cl_vtag_alloc(NULL, 0);
+	if (IS_ERR(cl_vtag))
+		return -ENOMEM;
+
+	list_add_tail(&cl_vtag->list, &cl->vtag_map);
+
+	return 0;
+}
+
+/**
+ * mei_cl_bus_vtag_free - remove the bus entry from vtag map
+ *
+ * @cldev: me client device
+ */
+static void mei_cl_bus_vtag_free(struct mei_cl_device *cldev)
+{
+	struct mei_cl *cl = cldev->cl;
+	struct mei_cl_vtag *cl_vtag;
+
+	cl_vtag = mei_cl_bus_vtag(cl);
+	if (!cl_vtag)
+		return;
+
+	list_del(&cl_vtag->list);
+	kfree(cl_vtag);
+}
+
+void *mei_cldev_dma_map(struct mei_cl_device *cldev, u8 buffer_id, size_t size)
+{
+	struct mei_device *bus;
+	struct mei_cl *cl;
+	int ret;
+
+	if (!cldev || !buffer_id || !size)
+		return ERR_PTR(-EINVAL);
+
+	if (!IS_ALIGNED(size, MEI_FW_PAGE_SIZE)) {
+		dev_err(&cldev->dev, "Map size should be aligned to %lu\n",
+			MEI_FW_PAGE_SIZE);
+		return ERR_PTR(-EINVAL);
+	}
+
+	cl = cldev->cl;
+	bus = cldev->bus;
+
+	mutex_lock(&bus->device_lock);
+	if (cl->state == MEI_FILE_UNINITIALIZED) {
+		ret = mei_cl_link(cl);
+		if (ret)
+			goto notlinked;
+		/* update pointers */
+		cl->cldev = cldev;
+	}
+
+	ret = mei_cl_dma_alloc_and_map(cl, NULL, buffer_id, size);
+	if (ret)
+		mei_cl_unlink(cl);
+notlinked:
+	mutex_unlock(&bus->device_lock);
+	if (ret)
+		return ERR_PTR(ret);
+	return cl->dma.vaddr;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_dma_map);
+
+int mei_cldev_dma_unmap(struct mei_cl_device *cldev)
+{
+	struct mei_device *bus;
+	struct mei_cl *cl;
+	int ret;
+
+	if (!cldev)
+		return -EINVAL;
+
+	cl = cldev->cl;
+	bus = cldev->bus;
+
+	mutex_lock(&bus->device_lock);
+	ret = mei_cl_dma_unmap(cl, NULL);
+
+	mei_cl_flush_queues(cl, NULL);
+	mei_cl_unlink(cl);
+	mutex_unlock(&bus->device_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_dma_unmap);
+
+/**
+ * mei_cldev_enable - enable me client device
+ *     create connection with me client
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success and < 0 on error
+ */
+int mei_cldev_enable(struct mei_cl_device *cldev)
+{
+	struct mei_device *bus = cldev->bus;
+	struct mei_cl *cl;
+	int ret;
+
+	cl = cldev->cl;
+
+	mutex_lock(&bus->device_lock);
+	if (cl->state == MEI_FILE_UNINITIALIZED) {
+		ret = mei_cl_link(cl);
+		if (ret)
+			goto notlinked;
+		/* update pointers */
+		cl->cldev = cldev;
+	}
+
+	if (mei_cl_is_connected(cl)) {
+		ret = 0;
+		goto out;
+	}
+
+	if (!mei_me_cl_is_active(cldev->me_cl)) {
+		dev_err(&cldev->dev, "me client is not active\n");
+		ret = -ENOTTY;
+		goto out;
+	}
+
+	ret = mei_cl_bus_vtag_alloc(cldev);
+	if (ret)
+		goto out;
+
+	ret = mei_cl_connect(cl, cldev->me_cl, NULL);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "cannot connect\n");
+		mei_cl_bus_vtag_free(cldev);
+	}
+
+out:
+	if (ret)
+		mei_cl_unlink(cl);
+notlinked:
+	mutex_unlock(&bus->device_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_enable);
+
+/**
+ * mei_cldev_unregister_callbacks - internal wrapper for unregistering
+ *  callbacks.
+ *
+ * @cldev: client device
+ */
+static void mei_cldev_unregister_callbacks(struct mei_cl_device *cldev)
+{
+	if (cldev->rx_cb) {
+		cancel_work_sync(&cldev->rx_work);
+		cldev->rx_cb = NULL;
+	}
+
+	if (cldev->notif_cb) {
+		cancel_work_sync(&cldev->notif_work);
+		cldev->notif_cb = NULL;
+	}
+}
+
+/**
+ * mei_cldev_disable - disable me client device
+ *     disconnect form the me client
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success and < 0 on error
+ */
+int mei_cldev_disable(struct mei_cl_device *cldev)
+{
+	struct mei_device *bus;
+	struct mei_cl *cl;
+	int err;
+
+	if (!cldev)
+		return -ENODEV;
+
+	cl = cldev->cl;
+
+	bus = cldev->bus;
+
+	mei_cldev_unregister_callbacks(cldev);
+
+	mutex_lock(&bus->device_lock);
+
+	mei_cl_bus_vtag_free(cldev);
+
+	if (!mei_cl_is_connected(cl)) {
+		dev_dbg(bus->dev, "Already disconnected\n");
+		err = 0;
+		goto out;
+	}
+
+	err = mei_cl_disconnect(cl);
+	if (err < 0)
+		dev_err(bus->dev, "Could not disconnect from the ME client\n");
+
+out:
+	/* Flush queues and remove any pending read unless we have mapped DMA */
+	if (!cl->dma_mapped) {
+		mei_cl_flush_queues(cl, NULL);
+		mei_cl_unlink(cl);
+	}
+
+	mutex_unlock(&bus->device_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_disable);
+
+/**
+ * mei_cldev_send_gsc_command - sends a gsc command, by sending
+ * a gsl mei message to gsc and receiving reply from gsc
+ *
+ * @cldev: me client device
+ * @client_id: client id to send the command to
+ * @fence_id: fence id to send the command to
+ * @sg_in: scatter gather list containing addresses for rx message buffer
+ * @total_in_len: total length of data in 'in' sg, can be less than the sum of buffers sizes
+ * @sg_out: scatter gather list containing addresses for tx message buffer
+ *
+ * Return:
+ *  * written size in bytes
+ *  * < 0 on error
+ */
+ssize_t mei_cldev_send_gsc_command(struct mei_cl_device *cldev,
+				   u8 client_id, u32 fence_id,
+				   struct scatterlist *sg_in,
+				   size_t total_in_len,
+				   struct scatterlist *sg_out)
+{
+	struct mei_cl *cl;
+	struct mei_device *bus;
+	ssize_t ret = 0;
+
+	struct mei_ext_hdr_gsc_h2f *ext_hdr;
+	size_t buf_sz = sizeof(struct mei_ext_hdr_gsc_h2f);
+	int sg_out_nents, sg_in_nents;
+	int i;
+	struct scatterlist *sg;
+	struct mei_ext_hdr_gsc_f2h rx_msg;
+	unsigned int sg_len;
+
+	if (!cldev || !sg_in || !sg_out)
+		return -EINVAL;
+
+	cl = cldev->cl;
+	bus = cldev->bus;
+
+	dev_dbg(bus->dev, "client_id %u, fence_id %u\n", client_id, fence_id);
+
+	if (!bus->hbm_f_gsc_supported)
+		return -EOPNOTSUPP;
+
+	sg_out_nents = sg_nents(sg_out);
+	sg_in_nents = sg_nents(sg_in);
+	/* at least one entry in tx and rx sgls must be present */
+	if (sg_out_nents <= 0 || sg_in_nents <= 0)
+		return -EINVAL;
+
+	buf_sz += (sg_out_nents + sg_in_nents) * sizeof(struct mei_gsc_sgl);
+	ext_hdr = kzalloc(buf_sz, GFP_KERNEL);
+	if (!ext_hdr)
+		return -ENOMEM;
+
+	/* construct the GSC message */
+	ext_hdr->hdr.type = MEI_EXT_HDR_GSC;
+	ext_hdr->hdr.length = buf_sz / sizeof(u32); /* length is in dw */
+
+	ext_hdr->client_id = client_id;
+	ext_hdr->addr_type = GSC_ADDRESS_TYPE_PHYSICAL_SGL;
+	ext_hdr->fence_id = fence_id;
+	ext_hdr->input_address_count = sg_in_nents;
+	ext_hdr->output_address_count = sg_out_nents;
+	ext_hdr->reserved[0] = 0;
+	ext_hdr->reserved[1] = 0;
+
+	/* copy in-sgl to the message */
+	for (i = 0, sg = sg_in; i < sg_in_nents; i++, sg++) {
+		ext_hdr->sgl[i].low = lower_32_bits(sg_dma_address(sg));
+		ext_hdr->sgl[i].high = upper_32_bits(sg_dma_address(sg));
+		sg_len = min_t(unsigned int, sg_dma_len(sg), PAGE_SIZE);
+		ext_hdr->sgl[i].length = (sg_len <= total_in_len) ? sg_len : total_in_len;
+		total_in_len -= ext_hdr->sgl[i].length;
+	}
+
+	/* copy out-sgl to the message */
+	for (i = sg_in_nents, sg = sg_out; i < sg_in_nents + sg_out_nents; i++, sg++) {
+		ext_hdr->sgl[i].low = lower_32_bits(sg_dma_address(sg));
+		ext_hdr->sgl[i].high = upper_32_bits(sg_dma_address(sg));
+		sg_len = min_t(unsigned int, sg_dma_len(sg), PAGE_SIZE);
+		ext_hdr->sgl[i].length = sg_len;
+	}
+
+	/* send the message to GSC */
+	ret = __mei_cl_send(cl, (u8 *)ext_hdr, buf_sz, 0, MEI_CL_IO_SGL);
+	if (ret < 0) {
+		dev_err(bus->dev, "__mei_cl_send failed, returned %zd\n", ret);
+		goto end;
+	}
+	if (ret != buf_sz) {
+		dev_err(bus->dev, "__mei_cl_send returned %zd instead of expected %zd\n",
+			ret, buf_sz);
+		ret = -EIO;
+		goto end;
+	}
+
+	/* receive the reply from GSC, note that at this point sg_in should contain the reply */
+	ret = __mei_cl_recv(cl, (u8 *)&rx_msg, sizeof(rx_msg), NULL, MEI_CL_IO_SGL, 0);
+
+	if (ret != sizeof(rx_msg)) {
+		dev_err(bus->dev, "__mei_cl_recv returned %zd instead of expected %zd\n",
+			ret, sizeof(rx_msg));
+		if (ret >= 0)
+			ret = -EIO;
+		goto end;
+	}
+
+	/* check rx_msg.client_id and rx_msg.fence_id match the ones we send */
+	if (rx_msg.client_id != client_id || rx_msg.fence_id != fence_id) {
+		dev_err(bus->dev, "received client_id/fence_id  %u/%u  instead of %u/%u sent\n",
+			rx_msg.client_id, rx_msg.fence_id, client_id, fence_id);
+		ret = -EFAULT;
+		goto end;
+	}
+
+	dev_dbg(bus->dev, "gsc command: successfully written %u bytes\n",  rx_msg.written);
+	ret = rx_msg.written;
+
+end:
+	kfree(ext_hdr);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_send_gsc_command);
+
+/**
+ * mei_cl_device_find - find matching entry in the driver id table
+ *
+ * @cldev: me client device
+ * @cldrv: me client driver
+ *
+ * Return: id on success; NULL if no id is matching
+ */
+static const
+struct mei_cl_device_id *mei_cl_device_find(const struct mei_cl_device *cldev,
+					    const struct mei_cl_driver *cldrv)
+{
+	const struct mei_cl_device_id *id;
+	const uuid_le *uuid;
+	u8 version;
+	bool match;
+
+	uuid = mei_me_cl_uuid(cldev->me_cl);
+	version = mei_me_cl_ver(cldev->me_cl);
+
+	id = cldrv->id_table;
+	while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) {
+		if (!uuid_le_cmp(*uuid, id->uuid)) {
+			match = true;
+
+			if (cldev->name[0])
+				if (strncmp(cldev->name, id->name,
+					    sizeof(id->name)))
+					match = false;
+
+			if (id->version != MEI_CL_VERSION_ANY)
+				if (id->version != version)
+					match = false;
+			if (match)
+				return id;
+		}
+
+		id++;
+	}
+
+	return NULL;
+}
+
+/**
+ * mei_cl_device_match  - device match function
+ *
+ * @dev: device
+ * @drv: driver
+ *
+ * Return:  1 if matching device was found 0 otherwise
+ */
+static int mei_cl_device_match(struct device *dev, struct device_driver *drv)
+{
+	const struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	const struct mei_cl_driver *cldrv = to_mei_cl_driver(drv);
+	const struct mei_cl_device_id *found_id;
+
+	if (!cldev->do_match)
+		return 0;
+
+	if (!cldrv || !cldrv->id_table)
+		return 0;
+
+	found_id = mei_cl_device_find(cldev, cldrv);
+	if (found_id)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * mei_cl_device_probe - bus probe function
+ *
+ * @dev: device
+ *
+ * Return:  0 on success; < 0 otherwise
+ */
+static int mei_cl_device_probe(struct device *dev)
+{
+	struct mei_cl_device *cldev;
+	struct mei_cl_driver *cldrv;
+	const struct mei_cl_device_id *id;
+	int ret;
+
+	cldev = to_mei_cl_device(dev);
+	cldrv = to_mei_cl_driver(dev->driver);
+
+	if (!cldrv || !cldrv->probe)
+		return -ENODEV;
+
+	id = mei_cl_device_find(cldev, cldrv);
+	if (!id)
+		return -ENODEV;
+
+	if (!mei_cl_bus_module_get(cldev)) {
+		dev_err(&cldev->dev, "get hw module failed");
+		return -ENODEV;
+	}
+
+	ret = cldrv->probe(cldev, id);
+	if (ret) {
+		mei_cl_bus_module_put(cldev);
+		return ret;
+	}
+
+	__module_get(THIS_MODULE);
+	return 0;
+}
+
+/**
+ * mei_cl_device_remove - remove device from the bus
+ *
+ * @dev: device
+ *
+ * Return:  0 on success; < 0 otherwise
+ */
+static void mei_cl_device_remove(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct mei_cl_driver *cldrv = to_mei_cl_driver(dev->driver);
+
+	if (cldrv->remove)
+		cldrv->remove(cldev);
+
+	mei_cldev_unregister_callbacks(cldev);
+
+	mei_cl_bus_module_put(cldev);
+	module_put(THIS_MODULE);
+}
+
+static ssize_t name_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s", cldev->name);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t uuid_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+
+	return sprintf(buf, "%pUl", uuid);
+}
+static DEVICE_ATTR_RO(uuid);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	u8 version = mei_me_cl_ver(cldev->me_cl);
+
+	return sprintf(buf, "%02X", version);
+}
+static DEVICE_ATTR_RO(version);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	u8 version = mei_me_cl_ver(cldev->me_cl);
+
+	return scnprintf(buf, PAGE_SIZE, "mei:%s:%pUl:%02X:",
+			 cldev->name, uuid, version);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static ssize_t max_conn_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	u8 maxconn = mei_me_cl_max_conn(cldev->me_cl);
+
+	return sprintf(buf, "%d", maxconn);
+}
+static DEVICE_ATTR_RO(max_conn);
+
+static ssize_t fixed_show(struct device *dev, struct device_attribute *a,
+			  char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	u8 fixed = mei_me_cl_fixed(cldev->me_cl);
+
+	return sprintf(buf, "%d", fixed);
+}
+static DEVICE_ATTR_RO(fixed);
+
+static ssize_t vtag_show(struct device *dev, struct device_attribute *a,
+			 char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	bool vt = mei_me_cl_vt(cldev->me_cl);
+
+	return sprintf(buf, "%d", vt);
+}
+static DEVICE_ATTR_RO(vtag);
+
+static ssize_t max_len_show(struct device *dev, struct device_attribute *a,
+			    char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	u32 maxlen = mei_me_cl_max_len(cldev->me_cl);
+
+	return sprintf(buf, "%u", maxlen);
+}
+static DEVICE_ATTR_RO(max_len);
+
+static struct attribute *mei_cldev_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_uuid.attr,
+	&dev_attr_version.attr,
+	&dev_attr_modalias.attr,
+	&dev_attr_max_conn.attr,
+	&dev_attr_fixed.attr,
+	&dev_attr_vtag.attr,
+	&dev_attr_max_len.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(mei_cldev);
+
+/**
+ * mei_cl_device_uevent - me client bus uevent handler
+ *
+ * @dev: device
+ * @env: uevent kobject
+ *
+ * Return: 0 on success -ENOMEM on when add_uevent_var fails
+ */
+static int mei_cl_device_uevent(const struct device *dev, struct kobj_uevent_env *env)
+{
+	const struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	u8 version = mei_me_cl_ver(cldev->me_cl);
+
+	if (add_uevent_var(env, "MEI_CL_VERSION=%d", version))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MEI_CL_UUID=%pUl", uuid))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:%02X:",
+			   cldev->name, uuid, version))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static struct bus_type mei_cl_bus_type = {
+	.name		= "mei",
+	.dev_groups	= mei_cldev_groups,
+	.match		= mei_cl_device_match,
+	.probe		= mei_cl_device_probe,
+	.remove		= mei_cl_device_remove,
+	.uevent		= mei_cl_device_uevent,
+};
+
+static struct mei_device *mei_dev_bus_get(struct mei_device *bus)
+{
+	if (bus)
+		get_device(bus->dev);
+
+	return bus;
+}
+
+static void mei_dev_bus_put(struct mei_device *bus)
+{
+	if (bus)
+		put_device(bus->dev);
+}
+
+static void mei_cl_bus_dev_release(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+
+	mei_cl_flush_queues(cldev->cl, NULL);
+	mei_me_cl_put(cldev->me_cl);
+	mei_dev_bus_put(cldev->bus);
+	kfree(cldev->cl);
+	kfree(cldev);
+}
+
+static const struct device_type mei_cl_device_type = {
+	.release = mei_cl_bus_dev_release,
+};
+
+/**
+ * mei_cl_bus_set_name - set device name for me client device
+ *  <controller>-<client device>
+ *  Example: 0000:00:16.0-55213584-9a29-4916-badf-0fb7ed682aeb
+ *
+ * @cldev: me client device
+ */
+static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev)
+{
+	dev_set_name(&cldev->dev, "%s-%pUl",
+		     dev_name(cldev->bus->dev),
+		     mei_me_cl_uuid(cldev->me_cl));
+}
+
+/**
+ * mei_cl_bus_dev_alloc - initialize and allocate mei client device
+ *
+ * @bus: mei device
+ * @me_cl: me client
+ *
+ * Return: allocated device structur or NULL on allocation failure
+ */
+static struct mei_cl_device *mei_cl_bus_dev_alloc(struct mei_device *bus,
+						  struct mei_me_client *me_cl)
+{
+	struct mei_cl_device *cldev;
+	struct mei_cl *cl;
+
+	cldev = kzalloc(sizeof(*cldev), GFP_KERNEL);
+	if (!cldev)
+		return NULL;
+
+	cl = mei_cl_allocate(bus);
+	if (!cl) {
+		kfree(cldev);
+		return NULL;
+	}
+
+	device_initialize(&cldev->dev);
+	cldev->dev.parent = bus->dev;
+	cldev->dev.bus    = &mei_cl_bus_type;
+	cldev->dev.type   = &mei_cl_device_type;
+	cldev->bus        = mei_dev_bus_get(bus);
+	cldev->me_cl      = mei_me_cl_get(me_cl);
+	cldev->cl         = cl;
+	mei_cl_bus_set_name(cldev);
+	cldev->is_added   = 0;
+	INIT_LIST_HEAD(&cldev->bus_list);
+	device_enable_async_suspend(&cldev->dev);
+
+	return cldev;
+}
+
+/**
+ * mei_cl_bus_dev_setup - setup me client device
+ *    run fix up routines and set the device name
+ *
+ * @bus: mei device
+ * @cldev: me client device
+ *
+ * Return: true if the device is eligible for enumeration
+ */
+static bool mei_cl_bus_dev_setup(struct mei_device *bus,
+				 struct mei_cl_device *cldev)
+{
+	cldev->do_match = 1;
+	mei_cl_bus_dev_fixup(cldev);
+
+	/* the device name can change during fix up */
+	if (cldev->do_match)
+		mei_cl_bus_set_name(cldev);
+
+	return cldev->do_match == 1;
+}
+
+/**
+ * mei_cl_bus_dev_add - add me client devices
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success; < 0 on failre
+ */
+static int mei_cl_bus_dev_add(struct mei_cl_device *cldev)
+{
+	int ret;
+
+	dev_dbg(cldev->bus->dev, "adding %pUL:%02X\n",
+		mei_me_cl_uuid(cldev->me_cl),
+		mei_me_cl_ver(cldev->me_cl));
+	ret = device_add(&cldev->dev);
+	if (!ret)
+		cldev->is_added = 1;
+
+	return ret;
+}
+
+/**
+ * mei_cl_bus_dev_stop - stop the driver
+ *
+ * @cldev: me client device
+ */
+static void mei_cl_bus_dev_stop(struct mei_cl_device *cldev)
+{
+	cldev->do_match = 0;
+	if (cldev->is_added)
+		device_release_driver(&cldev->dev);
+}
+
+/**
+ * mei_cl_bus_dev_destroy - destroy me client devices object
+ *
+ * @cldev: me client device
+ *
+ * Locking: called under "dev->cl_bus_lock" lock
+ */
+static void mei_cl_bus_dev_destroy(struct mei_cl_device *cldev)
+{
+
+	WARN_ON(!mutex_is_locked(&cldev->bus->cl_bus_lock));
+
+	if (!cldev->is_added)
+		return;
+
+	device_del(&cldev->dev);
+
+	list_del_init(&cldev->bus_list);
+
+	cldev->is_added = 0;
+	put_device(&cldev->dev);
+}
+
+/**
+ * mei_cl_bus_remove_device - remove a devices form the bus
+ *
+ * @cldev: me client device
+ */
+static void mei_cl_bus_remove_device(struct mei_cl_device *cldev)
+{
+	mei_cl_bus_dev_stop(cldev);
+	mei_cl_bus_dev_destroy(cldev);
+}
+
+/**
+ * mei_cl_bus_remove_devices - remove all devices form the bus
+ *
+ * @bus: mei device
+ */
+void mei_cl_bus_remove_devices(struct mei_device *bus)
+{
+	struct mei_cl_device *cldev, *next;
+
+	mutex_lock(&bus->cl_bus_lock);
+	list_for_each_entry_safe(cldev, next, &bus->device_list, bus_list)
+		mei_cl_bus_remove_device(cldev);
+	mutex_unlock(&bus->cl_bus_lock);
+}
+
+
+/**
+ * mei_cl_bus_dev_init - allocate and initializes an mei client devices
+ *     based on me client
+ *
+ * @bus: mei device
+ * @me_cl: me client
+ *
+ * Locking: called under "dev->cl_bus_lock" lock
+ */
+static void mei_cl_bus_dev_init(struct mei_device *bus,
+				struct mei_me_client *me_cl)
+{
+	struct mei_cl_device *cldev;
+
+	WARN_ON(!mutex_is_locked(&bus->cl_bus_lock));
+
+	dev_dbg(bus->dev, "initializing %pUl", mei_me_cl_uuid(me_cl));
+
+	if (me_cl->bus_added)
+		return;
+
+	cldev = mei_cl_bus_dev_alloc(bus, me_cl);
+	if (!cldev)
+		return;
+
+	me_cl->bus_added = true;
+	list_add_tail(&cldev->bus_list, &bus->device_list);
+
+}
+
+/**
+ * mei_cl_bus_rescan - scan me clients list and add create
+ *    devices for eligible clients
+ *
+ * @bus: mei device
+ */
+static void mei_cl_bus_rescan(struct mei_device *bus)
+{
+	struct mei_cl_device *cldev, *n;
+	struct mei_me_client *me_cl;
+
+	mutex_lock(&bus->cl_bus_lock);
+
+	down_read(&bus->me_clients_rwsem);
+	list_for_each_entry(me_cl, &bus->me_clients, list)
+		mei_cl_bus_dev_init(bus, me_cl);
+	up_read(&bus->me_clients_rwsem);
+
+	list_for_each_entry_safe(cldev, n, &bus->device_list, bus_list) {
+
+		if (!mei_me_cl_is_active(cldev->me_cl)) {
+			mei_cl_bus_remove_device(cldev);
+			continue;
+		}
+
+		if (cldev->is_added)
+			continue;
+
+		if (mei_cl_bus_dev_setup(bus, cldev))
+			mei_cl_bus_dev_add(cldev);
+		else {
+			list_del_init(&cldev->bus_list);
+			put_device(&cldev->dev);
+		}
+	}
+	mutex_unlock(&bus->cl_bus_lock);
+
+	dev_dbg(bus->dev, "rescan end");
+}
+
+void mei_cl_bus_rescan_work(struct work_struct *work)
+{
+	struct mei_device *bus =
+		container_of(work, struct mei_device, bus_rescan_work);
+
+	mei_cl_bus_rescan(bus);
+}
+
+int __mei_cldev_driver_register(struct mei_cl_driver *cldrv,
+				struct module *owner)
+{
+	int err;
+
+	cldrv->driver.name = cldrv->name;
+	cldrv->driver.owner = owner;
+	cldrv->driver.bus = &mei_cl_bus_type;
+
+	err = driver_register(&cldrv->driver);
+	if (err)
+		return err;
+
+	pr_debug("mei: driver [%s] registered\n", cldrv->driver.name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__mei_cldev_driver_register);
+
+void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv)
+{
+	driver_unregister(&cldrv->driver);
+
+	pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_driver_unregister);
+
+
+int __init mei_cl_bus_init(void)
+{
+	return bus_register(&mei_cl_bus_type);
+}
+
+void __exit mei_cl_bus_exit(void)
+{
+	bus_unregister(&mei_cl_bus_type);
+}
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
new file mode 100644
index 0000000000..32f2287823
--- /dev/null
+++ b/drivers/misc/mei/client.c
@@ -0,0 +1,2474 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2003-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/sched/signal.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+/**
+ * mei_me_cl_init - initialize me client
+ *
+ * @me_cl: me client
+ */
+void mei_me_cl_init(struct mei_me_client *me_cl)
+{
+	INIT_LIST_HEAD(&me_cl->list);
+	kref_init(&me_cl->refcnt);
+}
+
+/**
+ * mei_me_cl_get - increases me client refcount
+ *
+ * @me_cl: me client
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: me client or NULL
+ */
+struct mei_me_client *mei_me_cl_get(struct mei_me_client *me_cl)
+{
+	if (me_cl && kref_get_unless_zero(&me_cl->refcnt))
+		return me_cl;
+
+	return NULL;
+}
+
+/**
+ * mei_me_cl_release - free me client
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @ref: me_client refcount
+ */
+static void mei_me_cl_release(struct kref *ref)
+{
+	struct mei_me_client *me_cl =
+		container_of(ref, struct mei_me_client, refcnt);
+
+	kfree(me_cl);
+}
+
+/**
+ * mei_me_cl_put - decrease me client refcount and free client if necessary
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @me_cl: me client
+ */
+void mei_me_cl_put(struct mei_me_client *me_cl)
+{
+	if (me_cl)
+		kref_put(&me_cl->refcnt, mei_me_cl_release);
+}
+
+/**
+ * __mei_me_cl_del  - delete me client from the list and decrease
+ *     reference counter
+ *
+ * @dev: mei device
+ * @me_cl: me client
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+static void __mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+	if (!me_cl)
+		return;
+
+	list_del_init(&me_cl->list);
+	mei_me_cl_put(me_cl);
+}
+
+/**
+ * mei_me_cl_del - delete me client from the list and decrease
+ *     reference counter
+ *
+ * @dev: mei device
+ * @me_cl: me client
+ */
+void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+	down_write(&dev->me_clients_rwsem);
+	__mei_me_cl_del(dev, me_cl);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_me_cl_add - add me client to the list
+ *
+ * @dev: mei device
+ * @me_cl: me client
+ */
+void mei_me_cl_add(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+	down_write(&dev->me_clients_rwsem);
+	list_add(&me_cl->list, &dev->me_clients);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * __mei_me_cl_by_uuid - locate me client by uuid
+ *	increases ref count
+ *
+ * @dev: mei device
+ * @uuid: me client uuid
+ *
+ * Return: me client or NULL if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+static struct mei_me_client *__mei_me_cl_by_uuid(struct mei_device *dev,
+					const uuid_le *uuid)
+{
+	struct mei_me_client *me_cl;
+	const uuid_le *pn;
+
+	WARN_ON(!rwsem_is_locked(&dev->me_clients_rwsem));
+
+	list_for_each_entry(me_cl, &dev->me_clients, list) {
+		pn = &me_cl->props.protocol_name;
+		if (uuid_le_cmp(*uuid, *pn) == 0)
+			return mei_me_cl_get(me_cl);
+	}
+
+	return NULL;
+}
+
+/**
+ * mei_me_cl_by_uuid - locate me client by uuid
+ *	increases ref count
+ *
+ * @dev: mei device
+ * @uuid: me client uuid
+ *
+ * Return: me client or NULL if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+struct mei_me_client *mei_me_cl_by_uuid(struct mei_device *dev,
+					const uuid_le *uuid)
+{
+	struct mei_me_client *me_cl;
+
+	down_read(&dev->me_clients_rwsem);
+	me_cl = __mei_me_cl_by_uuid(dev, uuid);
+	up_read(&dev->me_clients_rwsem);
+
+	return me_cl;
+}
+
+/**
+ * mei_me_cl_by_id - locate me client by client id
+ *	increases ref count
+ *
+ * @dev: the device structure
+ * @client_id: me client id
+ *
+ * Return: me client or NULL if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id)
+{
+
+	struct mei_me_client *__me_cl, *me_cl = NULL;
+
+	down_read(&dev->me_clients_rwsem);
+	list_for_each_entry(__me_cl, &dev->me_clients, list) {
+		if (__me_cl->client_id == client_id) {
+			me_cl = mei_me_cl_get(__me_cl);
+			break;
+		}
+	}
+	up_read(&dev->me_clients_rwsem);
+
+	return me_cl;
+}
+
+/**
+ * __mei_me_cl_by_uuid_id - locate me client by client id and uuid
+ *	increases ref count
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ * @client_id: me client id
+ *
+ * Return: me client or null if not found
+ *
+ * Locking: dev->me_clients_rwsem
+ */
+static struct mei_me_client *__mei_me_cl_by_uuid_id(struct mei_device *dev,
+					   const uuid_le *uuid, u8 client_id)
+{
+	struct mei_me_client *me_cl;
+	const uuid_le *pn;
+
+	WARN_ON(!rwsem_is_locked(&dev->me_clients_rwsem));
+
+	list_for_each_entry(me_cl, &dev->me_clients, list) {
+		pn = &me_cl->props.protocol_name;
+		if (uuid_le_cmp(*uuid, *pn) == 0 &&
+		    me_cl->client_id == client_id)
+			return mei_me_cl_get(me_cl);
+	}
+
+	return NULL;
+}
+
+
+/**
+ * mei_me_cl_by_uuid_id - locate me client by client id and uuid
+ *	increases ref count
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ * @client_id: me client id
+ *
+ * Return: me client or null if not found
+ */
+struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev,
+					   const uuid_le *uuid, u8 client_id)
+{
+	struct mei_me_client *me_cl;
+
+	down_read(&dev->me_clients_rwsem);
+	me_cl = __mei_me_cl_by_uuid_id(dev, uuid, client_id);
+	up_read(&dev->me_clients_rwsem);
+
+	return me_cl;
+}
+
+/**
+ * mei_me_cl_rm_by_uuid - remove all me clients matching uuid
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid)
+{
+	struct mei_me_client *me_cl;
+
+	dev_dbg(dev->dev, "remove %pUl\n", uuid);
+
+	down_write(&dev->me_clients_rwsem);
+	me_cl = __mei_me_cl_by_uuid(dev, uuid);
+	__mei_me_cl_del(dev, me_cl);
+	mei_me_cl_put(me_cl);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_me_cl_rm_by_uuid_id - remove all me clients matching client id
+ *
+ * @dev: the device structure
+ * @uuid: me client uuid
+ * @id: me client id
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_me_cl_rm_by_uuid_id(struct mei_device *dev, const uuid_le *uuid, u8 id)
+{
+	struct mei_me_client *me_cl;
+
+	dev_dbg(dev->dev, "remove %pUl %d\n", uuid, id);
+
+	down_write(&dev->me_clients_rwsem);
+	me_cl = __mei_me_cl_by_uuid_id(dev, uuid, id);
+	__mei_me_cl_del(dev, me_cl);
+	mei_me_cl_put(me_cl);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_me_cl_rm_all - remove all me clients
+ *
+ * @dev: the device structure
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_me_cl_rm_all(struct mei_device *dev)
+{
+	struct mei_me_client *me_cl, *next;
+
+	down_write(&dev->me_clients_rwsem);
+	list_for_each_entry_safe(me_cl, next, &dev->me_clients, list)
+		__mei_me_cl_del(dev, me_cl);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
+ * mei_io_cb_free - free mei_cb_private related memory
+ *
+ * @cb: mei callback struct
+ */
+void mei_io_cb_free(struct mei_cl_cb *cb)
+{
+	if (cb == NULL)
+		return;
+
+	list_del(&cb->list);
+	kfree(cb->buf.data);
+	kfree(cb->ext_hdr);
+	kfree(cb);
+}
+
+/**
+ * mei_tx_cb_enqueue - queue tx callback
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @cb: mei callback struct
+ * @head: an instance of list to queue on
+ */
+static inline void mei_tx_cb_enqueue(struct mei_cl_cb *cb,
+				     struct list_head *head)
+{
+	list_add_tail(&cb->list, head);
+	cb->cl->tx_cb_queued++;
+}
+
+/**
+ * mei_tx_cb_dequeue - dequeue tx callback
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @cb: mei callback struct to dequeue and free
+ */
+static inline void mei_tx_cb_dequeue(struct mei_cl_cb *cb)
+{
+	if (!WARN_ON(cb->cl->tx_cb_queued == 0))
+		cb->cl->tx_cb_queued--;
+
+	mei_io_cb_free(cb);
+}
+
+/**
+ * mei_cl_set_read_by_fp - set pending_read flag to vtag struct for given fp
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @cl: mei client
+ * @fp: pointer to file structure
+ */
+static void mei_cl_set_read_by_fp(const struct mei_cl *cl,
+				  const struct file *fp)
+{
+	struct mei_cl_vtag *cl_vtag;
+
+	list_for_each_entry(cl_vtag, &cl->vtag_map, list) {
+		if (cl_vtag->fp == fp) {
+			cl_vtag->pending_read = true;
+			return;
+		}
+	}
+}
+
+/**
+ * mei_io_cb_init - allocate and initialize io callback
+ *
+ * @cl: mei client
+ * @type: operation type
+ * @fp: pointer to file structure
+ *
+ * Return: mei_cl_cb pointer or NULL;
+ */
+static struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl,
+					enum mei_cb_file_ops type,
+					const struct file *fp)
+{
+	struct mei_cl_cb *cb;
+
+	cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+	if (!cb)
+		return NULL;
+
+	INIT_LIST_HEAD(&cb->list);
+	cb->fp = fp;
+	cb->cl = cl;
+	cb->buf_idx = 0;
+	cb->fop_type = type;
+	cb->vtag = 0;
+	cb->ext_hdr = NULL;
+
+	return cb;
+}
+
+/**
+ * mei_io_list_flush_cl - removes cbs belonging to the cl.
+ *
+ * @head:  an instance of our list structure
+ * @cl:    host client
+ */
+static void mei_io_list_flush_cl(struct list_head *head,
+				 const struct mei_cl *cl)
+{
+	struct mei_cl_cb *cb, *next;
+
+	list_for_each_entry_safe(cb, next, head, list) {
+		if (cl == cb->cl) {
+			list_del_init(&cb->list);
+			if (cb->fop_type == MEI_FOP_READ)
+				mei_io_cb_free(cb);
+		}
+	}
+}
+
+/**
+ * mei_io_tx_list_free_cl - removes cb belonging to the cl and free them
+ *
+ * @head: An instance of our list structure
+ * @cl: host client
+ * @fp: file pointer (matching cb file object), may be NULL
+ */
+static void mei_io_tx_list_free_cl(struct list_head *head,
+				   const struct mei_cl *cl,
+				   const struct file *fp)
+{
+	struct mei_cl_cb *cb, *next;
+
+	list_for_each_entry_safe(cb, next, head, list) {
+		if (cl == cb->cl && (!fp || fp == cb->fp))
+			mei_tx_cb_dequeue(cb);
+	}
+}
+
+/**
+ * mei_io_list_free_fp - free cb from a list that matches file pointer
+ *
+ * @head: io list
+ * @fp: file pointer (matching cb file object), may be NULL
+ */
+static void mei_io_list_free_fp(struct list_head *head, const struct file *fp)
+{
+	struct mei_cl_cb *cb, *next;
+
+	list_for_each_entry_safe(cb, next, head, list)
+		if (!fp || fp == cb->fp)
+			mei_io_cb_free(cb);
+}
+
+/**
+ * mei_cl_free_pending - free pending cb
+ *
+ * @cl: host client
+ */
+static void mei_cl_free_pending(struct mei_cl *cl)
+{
+	struct mei_cl_cb *cb;
+
+	cb = list_first_entry_or_null(&cl->rd_pending, struct mei_cl_cb, list);
+	mei_io_cb_free(cb);
+}
+
+/**
+ * mei_cl_alloc_cb - a convenient wrapper for allocating read cb
+ *
+ * @cl: host client
+ * @length: size of the buffer
+ * @fop_type: operation type
+ * @fp: associated file pointer (might be NULL)
+ *
+ * Return: cb on success and NULL on failure
+ */
+struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
+				  enum mei_cb_file_ops fop_type,
+				  const struct file *fp)
+{
+	struct mei_cl_cb *cb;
+
+	cb = mei_io_cb_init(cl, fop_type, fp);
+	if (!cb)
+		return NULL;
+
+	if (length == 0)
+		return cb;
+
+	cb->buf.data = kmalloc(roundup(length, MEI_SLOT_SIZE), GFP_KERNEL);
+	if (!cb->buf.data) {
+		mei_io_cb_free(cb);
+		return NULL;
+	}
+	cb->buf.size = length;
+
+	return cb;
+}
+
+/**
+ * mei_cl_enqueue_ctrl_wr_cb - a convenient wrapper for allocating
+ *     and enqueuing of the control commands cb
+ *
+ * @cl: host client
+ * @length: size of the buffer
+ * @fop_type: operation type
+ * @fp: associated file pointer (might be NULL)
+ *
+ * Return: cb on success and NULL on failure
+ * Locking: called under "dev->device_lock" lock
+ */
+struct mei_cl_cb *mei_cl_enqueue_ctrl_wr_cb(struct mei_cl *cl, size_t length,
+					    enum mei_cb_file_ops fop_type,
+					    const struct file *fp)
+{
+	struct mei_cl_cb *cb;
+
+	/* for RX always allocate at least client's mtu */
+	if (length)
+		length = max_t(size_t, length, mei_cl_mtu(cl));
+
+	cb = mei_cl_alloc_cb(cl, length, fop_type, fp);
+	if (!cb)
+		return NULL;
+
+	list_add_tail(&cb->list, &cl->dev->ctrl_wr_list);
+	return cb;
+}
+
+/**
+ * mei_cl_read_cb - find this cl's callback in the read list
+ *     for a specific file
+ *
+ * @cl: host client
+ * @fp: file pointer (matching cb file object), may be NULL
+ *
+ * Return: cb on success, NULL if cb is not found
+ */
+struct mei_cl_cb *mei_cl_read_cb(struct mei_cl *cl, const struct file *fp)
+{
+	struct mei_cl_cb *cb;
+	struct mei_cl_cb *ret_cb = NULL;
+
+	spin_lock(&cl->rd_completed_lock);
+	list_for_each_entry(cb, &cl->rd_completed, list)
+		if (!fp || fp == cb->fp) {
+			ret_cb = cb;
+			break;
+		}
+	spin_unlock(&cl->rd_completed_lock);
+	return ret_cb;
+}
+
+/**
+ * mei_cl_flush_queues - flushes queue lists belonging to cl.
+ *
+ * @cl: host client
+ * @fp: file pointer (matching cb file object), may be NULL
+ *
+ * Return: 0 on success, -EINVAL if cl or cl->dev is NULL.
+ */
+int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp)
+{
+	struct mei_device *dev;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -EINVAL;
+
+	dev = cl->dev;
+
+	cl_dbg(dev, cl, "remove list entry belonging to cl\n");
+	mei_io_tx_list_free_cl(&cl->dev->write_list, cl, fp);
+	mei_io_tx_list_free_cl(&cl->dev->write_waiting_list, cl, fp);
+	/* free pending and control cb only in final flush */
+	if (!fp) {
+		mei_io_list_flush_cl(&cl->dev->ctrl_wr_list, cl);
+		mei_io_list_flush_cl(&cl->dev->ctrl_rd_list, cl);
+		mei_cl_free_pending(cl);
+	}
+	spin_lock(&cl->rd_completed_lock);
+	mei_io_list_free_fp(&cl->rd_completed, fp);
+	spin_unlock(&cl->rd_completed_lock);
+
+	return 0;
+}
+
+/**
+ * mei_cl_init - initializes cl.
+ *
+ * @cl: host client to be initialized
+ * @dev: mei device
+ */
+static void mei_cl_init(struct mei_cl *cl, struct mei_device *dev)
+{
+	memset(cl, 0, sizeof(*cl));
+	init_waitqueue_head(&cl->wait);
+	init_waitqueue_head(&cl->rx_wait);
+	init_waitqueue_head(&cl->tx_wait);
+	init_waitqueue_head(&cl->ev_wait);
+	INIT_LIST_HEAD(&cl->vtag_map);
+	spin_lock_init(&cl->rd_completed_lock);
+	INIT_LIST_HEAD(&cl->rd_completed);
+	INIT_LIST_HEAD(&cl->rd_pending);
+	INIT_LIST_HEAD(&cl->link);
+	cl->writing_state = MEI_IDLE;
+	cl->state = MEI_FILE_UNINITIALIZED;
+	cl->dev = dev;
+}
+
+/**
+ * mei_cl_allocate - allocates cl  structure and sets it up.
+ *
+ * @dev: mei device
+ * Return:  The allocated file or NULL on failure
+ */
+struct mei_cl *mei_cl_allocate(struct mei_device *dev)
+{
+	struct mei_cl *cl;
+
+	cl = kmalloc(sizeof(*cl), GFP_KERNEL);
+	if (!cl)
+		return NULL;
+
+	mei_cl_init(cl, dev);
+
+	return cl;
+}
+
+/**
+ * mei_cl_link - allocate host id in the host map
+ *
+ * @cl: host client
+ *
+ * Return: 0 on success
+ *	-EINVAL on incorrect values
+ *	-EMFILE if open count exceeded.
+ */
+int mei_cl_link(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+	int id;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -EINVAL;
+
+	dev = cl->dev;
+
+	id = find_first_zero_bit(dev->host_clients_map, MEI_CLIENTS_MAX);
+	if (id >= MEI_CLIENTS_MAX) {
+		dev_err(dev->dev, "id exceeded %d", MEI_CLIENTS_MAX);
+		return -EMFILE;
+	}
+
+	if (dev->open_handle_count >= MEI_MAX_OPEN_HANDLE_COUNT) {
+		dev_err(dev->dev, "open_handle_count exceeded %d",
+			MEI_MAX_OPEN_HANDLE_COUNT);
+		return -EMFILE;
+	}
+
+	dev->open_handle_count++;
+
+	cl->host_client_id = id;
+	list_add_tail(&cl->link, &dev->file_list);
+
+	set_bit(id, dev->host_clients_map);
+
+	cl->state = MEI_FILE_INITIALIZING;
+
+	cl_dbg(dev, cl, "link cl\n");
+	return 0;
+}
+
+/**
+ * mei_cl_unlink - remove host client from the list
+ *
+ * @cl: host client
+ *
+ * Return: always 0
+ */
+int mei_cl_unlink(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+
+	/* don't shout on error exit path */
+	if (!cl)
+		return 0;
+
+	if (WARN_ON(!cl->dev))
+		return 0;
+
+	dev = cl->dev;
+
+	cl_dbg(dev, cl, "unlink client");
+
+	if (cl->state == MEI_FILE_UNINITIALIZED)
+		return 0;
+
+	if (dev->open_handle_count > 0)
+		dev->open_handle_count--;
+
+	/* never clear the 0 bit */
+	if (cl->host_client_id)
+		clear_bit(cl->host_client_id, dev->host_clients_map);
+
+	list_del_init(&cl->link);
+
+	cl->state = MEI_FILE_UNINITIALIZED;
+	cl->writing_state = MEI_IDLE;
+
+	WARN_ON(!list_empty(&cl->rd_completed) ||
+		!list_empty(&cl->rd_pending) ||
+		!list_empty(&cl->link));
+
+	return 0;
+}
+
+void mei_host_client_init(struct mei_device *dev)
+{
+	mei_set_devstate(dev, MEI_DEV_ENABLED);
+	dev->reset_count = 0;
+
+	schedule_work(&dev->bus_rescan_work);
+
+	pm_runtime_mark_last_busy(dev->dev);
+	dev_dbg(dev->dev, "rpm: autosuspend\n");
+	pm_request_autosuspend(dev->dev);
+}
+
+/**
+ * mei_hbuf_acquire - try to acquire host buffer
+ *
+ * @dev: the device structure
+ * Return: true if host buffer was acquired
+ */
+bool mei_hbuf_acquire(struct mei_device *dev)
+{
+	if (mei_pg_state(dev) == MEI_PG_ON ||
+	    mei_pg_in_transition(dev)) {
+		dev_dbg(dev->dev, "device is in pg\n");
+		return false;
+	}
+
+	if (!dev->hbuf_is_ready) {
+		dev_dbg(dev->dev, "hbuf is not ready\n");
+		return false;
+	}
+
+	dev->hbuf_is_ready = false;
+
+	return true;
+}
+
+/**
+ * mei_cl_wake_all - wake up readers, writers and event waiters so
+ *                 they can be interrupted
+ *
+ * @cl: host client
+ */
+static void mei_cl_wake_all(struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+
+	/* synchronized under device mutex */
+	if (waitqueue_active(&cl->rx_wait)) {
+		cl_dbg(dev, cl, "Waking up reading client!\n");
+		wake_up_interruptible(&cl->rx_wait);
+	}
+	/* synchronized under device mutex */
+	if (waitqueue_active(&cl->tx_wait)) {
+		cl_dbg(dev, cl, "Waking up writing client!\n");
+		wake_up_interruptible(&cl->tx_wait);
+	}
+	/* synchronized under device mutex */
+	if (waitqueue_active(&cl->ev_wait)) {
+		cl_dbg(dev, cl, "Waking up waiting for event clients!\n");
+		wake_up_interruptible(&cl->ev_wait);
+	}
+	/* synchronized under device mutex */
+	if (waitqueue_active(&cl->wait)) {
+		cl_dbg(dev, cl, "Waking up ctrl write clients!\n");
+		wake_up(&cl->wait);
+	}
+}
+
+/**
+ * mei_cl_set_disconnected - set disconnected state and clear
+ *   associated states and resources
+ *
+ * @cl: host client
+ */
+static void mei_cl_set_disconnected(struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+
+	if (cl->state == MEI_FILE_DISCONNECTED ||
+	    cl->state <= MEI_FILE_INITIALIZING)
+		return;
+
+	cl->state = MEI_FILE_DISCONNECTED;
+	mei_io_tx_list_free_cl(&dev->write_list, cl, NULL);
+	mei_io_tx_list_free_cl(&dev->write_waiting_list, cl, NULL);
+	mei_io_list_flush_cl(&dev->ctrl_rd_list, cl);
+	mei_io_list_flush_cl(&dev->ctrl_wr_list, cl);
+	mei_cl_wake_all(cl);
+	cl->rx_flow_ctrl_creds = 0;
+	cl->tx_flow_ctrl_creds = 0;
+	cl->timer_count = 0;
+
+	if (!cl->me_cl)
+		return;
+
+	if (!WARN_ON(cl->me_cl->connect_count == 0))
+		cl->me_cl->connect_count--;
+
+	if (cl->me_cl->connect_count == 0)
+		cl->me_cl->tx_flow_ctrl_creds = 0;
+
+	mei_me_cl_put(cl->me_cl);
+	cl->me_cl = NULL;
+}
+
+static int mei_cl_set_connecting(struct mei_cl *cl, struct mei_me_client *me_cl)
+{
+	if (!mei_me_cl_get(me_cl))
+		return -ENOENT;
+
+	/* only one connection is allowed for fixed address clients */
+	if (me_cl->props.fixed_address) {
+		if (me_cl->connect_count) {
+			mei_me_cl_put(me_cl);
+			return -EBUSY;
+		}
+	}
+
+	cl->me_cl = me_cl;
+	cl->state = MEI_FILE_CONNECTING;
+	cl->me_cl->connect_count++;
+
+	return 0;
+}
+
+/*
+ * mei_cl_send_disconnect - send disconnect request
+ *
+ * @cl: host client
+ * @cb: callback block
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_send_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev = cl->dev;
+
+	ret = mei_hbm_cl_disconnect_req(dev, cl);
+	cl->status = ret;
+	if (ret) {
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	cl->timer_count = dev->timeouts.connect;
+	mei_schedule_stall_timer(dev);
+
+	return 0;
+}
+
+/**
+ * mei_cl_irq_disconnect - processes close related operation from
+ *	interrupt thread context - send disconnect request
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb,
+			  struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_request));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_cl_send_disconnect(cl, cb);
+	if (ret)
+		list_move_tail(&cb->list, cmpl_list);
+
+	return ret;
+}
+
+/**
+ * __mei_cl_disconnect - disconnect host client from the me one
+ *     internal function runtime pm has to be already acquired
+ *
+ * @cl: host client
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int __mei_cl_disconnect(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	dev = cl->dev;
+
+	cl->state = MEI_FILE_DISCONNECTING;
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DISCONNECT, NULL);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	if (mei_hbuf_acquire(dev)) {
+		rets = mei_cl_send_disconnect(cl, cb);
+		if (rets) {
+			cl_err(dev, cl, "failed to disconnect.\n");
+			goto out;
+		}
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait,
+			   cl->state == MEI_FILE_DISCONNECT_REPLY ||
+			   cl->state == MEI_FILE_DISCONNECTED,
+			   dev->timeouts.cl_connect);
+	mutex_lock(&dev->device_lock);
+
+	rets = cl->status;
+	if (cl->state != MEI_FILE_DISCONNECT_REPLY &&
+	    cl->state != MEI_FILE_DISCONNECTED) {
+		cl_dbg(dev, cl, "timeout on disconnect from FW client.\n");
+		rets = -ETIME;
+	}
+
+out:
+	/* we disconnect also on error */
+	mei_cl_set_disconnected(cl);
+	if (!rets)
+		cl_dbg(dev, cl, "successfully disconnected from FW client.\n");
+
+	mei_io_cb_free(cb);
+	return rets;
+}
+
+/**
+ * mei_cl_disconnect - disconnect host client from the me one
+ *
+ * @cl: host client
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_cl_disconnect(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	cl_dbg(dev, cl, "disconnecting");
+
+	if (!mei_cl_is_connected(cl))
+		return 0;
+
+	if (mei_cl_is_fixed_address(cl)) {
+		mei_cl_set_disconnected(cl);
+		return 0;
+	}
+
+	if (dev->dev_state == MEI_DEV_POWERING_DOWN ||
+	    dev->dev_state == MEI_DEV_POWER_DOWN) {
+		cl_dbg(dev, cl, "Device is powering down, don't bother with disconnection\n");
+		mei_cl_set_disconnected(cl);
+		return 0;
+	}
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		return rets;
+	}
+
+	rets = __mei_cl_disconnect(cl);
+
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return rets;
+}
+
+
+/**
+ * mei_cl_is_other_connecting - checks if other
+ *    client with the same me client id is connecting
+ *
+ * @cl: private data of the file object
+ *
+ * Return: true if other client is connected, false - otherwise.
+ */
+static bool mei_cl_is_other_connecting(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+
+	dev = cl->dev;
+
+	list_for_each_entry(cb, &dev->ctrl_rd_list, list) {
+		if (cb->fop_type == MEI_FOP_CONNECT &&
+		    mei_cl_me_id(cl) == mei_cl_me_id(cb->cl))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * mei_cl_send_connect - send connect request
+ *
+ * @cl: host client
+ * @cb: callback block
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_send_connect(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev = cl->dev;
+
+	ret = mei_hbm_cl_connect_req(dev, cl);
+	cl->status = ret;
+	if (ret) {
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	cl->timer_count = dev->timeouts.connect;
+	mei_schedule_stall_timer(dev);
+	return 0;
+}
+
+/**
+ * mei_cl_irq_connect - send connect request in irq_thread context
+ *
+ * @cl: host client
+ * @cb: callback block
+ * @cmpl_list: complete list
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb,
+		       struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int rets;
+
+	if (mei_cl_is_other_connecting(cl))
+		return 0;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_request));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	rets = mei_cl_send_connect(cl, cb);
+	if (rets)
+		list_move_tail(&cb->list, cmpl_list);
+
+	return rets;
+}
+
+/**
+ * mei_cl_connect - connect host client to the me one
+ *
+ * @cl: host client
+ * @me_cl: me client
+ * @fp: pointer to file structure
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
+		   const struct file *fp)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev || !me_cl))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	rets = mei_cl_set_connecting(cl, me_cl);
+	if (rets)
+		goto nortpm;
+
+	if (mei_cl_is_fixed_address(cl)) {
+		cl->state = MEI_FILE_CONNECTED;
+		rets = 0;
+		goto nortpm;
+	}
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		goto nortpm;
+	}
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_CONNECT, fp);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	/* run hbuf acquire last so we don't have to undo */
+	if (!mei_cl_is_other_connecting(cl) && mei_hbuf_acquire(dev)) {
+		rets = mei_cl_send_connect(cl, cb);
+		if (rets)
+			goto out;
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait,
+			(cl->state == MEI_FILE_CONNECTED ||
+			 cl->state == MEI_FILE_DISCONNECTED ||
+			 cl->state == MEI_FILE_DISCONNECT_REQUIRED ||
+			 cl->state == MEI_FILE_DISCONNECT_REPLY),
+			dev->timeouts.cl_connect);
+	mutex_lock(&dev->device_lock);
+
+	if (!mei_cl_is_connected(cl)) {
+		if (cl->state == MEI_FILE_DISCONNECT_REQUIRED) {
+			mei_io_list_flush_cl(&dev->ctrl_rd_list, cl);
+			mei_io_list_flush_cl(&dev->ctrl_wr_list, cl);
+			 /* ignore disconnect return valuue;
+			  * in case of failure reset will be invoked
+			  */
+			__mei_cl_disconnect(cl);
+			rets = -EFAULT;
+			goto out;
+		}
+
+		/* timeout or something went really wrong */
+		if (!cl->status)
+			cl->status = -EFAULT;
+	}
+
+	rets = cl->status;
+out:
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	mei_io_cb_free(cb);
+
+nortpm:
+	if (!mei_cl_is_connected(cl))
+		mei_cl_set_disconnected(cl);
+
+	return rets;
+}
+
+/**
+ * mei_cl_alloc_linked - allocate and link host client
+ *
+ * @dev: the device structure
+ *
+ * Return: cl on success ERR_PTR on failure
+ */
+struct mei_cl *mei_cl_alloc_linked(struct mei_device *dev)
+{
+	struct mei_cl *cl;
+	int ret;
+
+	cl = mei_cl_allocate(dev);
+	if (!cl) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = mei_cl_link(cl);
+	if (ret)
+		goto err;
+
+	return cl;
+err:
+	kfree(cl);
+	return ERR_PTR(ret);
+}
+
+/**
+ * mei_cl_tx_flow_ctrl_creds - checks flow_control credits for cl.
+ *
+ * @cl: host client
+ *
+ * Return: 1 if tx_flow_ctrl_creds >0, 0 - otherwise.
+ */
+static int mei_cl_tx_flow_ctrl_creds(struct mei_cl *cl)
+{
+	if (WARN_ON(!cl || !cl->me_cl))
+		return -EINVAL;
+
+	if (cl->tx_flow_ctrl_creds > 0)
+		return 1;
+
+	if (mei_cl_is_fixed_address(cl))
+		return 1;
+
+	if (mei_cl_is_single_recv_buf(cl)) {
+		if (cl->me_cl->tx_flow_ctrl_creds > 0)
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * mei_cl_tx_flow_ctrl_creds_reduce - reduces transmit flow control credits
+ *   for a client
+ *
+ * @cl: host client
+ *
+ * Return:
+ *	0 on success
+ *	-EINVAL when ctrl credits are <= 0
+ */
+static int mei_cl_tx_flow_ctrl_creds_reduce(struct mei_cl *cl)
+{
+	if (WARN_ON(!cl || !cl->me_cl))
+		return -EINVAL;
+
+	if (mei_cl_is_fixed_address(cl))
+		return 0;
+
+	if (mei_cl_is_single_recv_buf(cl)) {
+		if (WARN_ON(cl->me_cl->tx_flow_ctrl_creds <= 0))
+			return -EINVAL;
+		cl->me_cl->tx_flow_ctrl_creds--;
+	} else {
+		if (WARN_ON(cl->tx_flow_ctrl_creds <= 0))
+			return -EINVAL;
+		cl->tx_flow_ctrl_creds--;
+	}
+	return 0;
+}
+
+/**
+ * mei_cl_vtag_alloc - allocate and fill the vtag structure
+ *
+ * @fp: pointer to file structure
+ * @vtag: vm tag
+ *
+ * Return:
+ * * Pointer to allocated struct - on success
+ * * ERR_PTR(-ENOMEM) on memory allocation failure
+ */
+struct mei_cl_vtag *mei_cl_vtag_alloc(struct file *fp, u8 vtag)
+{
+	struct mei_cl_vtag *cl_vtag;
+
+	cl_vtag = kzalloc(sizeof(*cl_vtag), GFP_KERNEL);
+	if (!cl_vtag)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&cl_vtag->list);
+	cl_vtag->vtag = vtag;
+	cl_vtag->fp = fp;
+
+	return cl_vtag;
+}
+
+/**
+ * mei_cl_fp_by_vtag - obtain the file pointer by vtag
+ *
+ * @cl: host client
+ * @vtag: virtual tag
+ *
+ * Return:
+ * * A file pointer - on success
+ * * ERR_PTR(-ENOENT) if vtag is not found in the client vtag list
+ */
+const struct file *mei_cl_fp_by_vtag(const struct mei_cl *cl, u8 vtag)
+{
+	struct mei_cl_vtag *vtag_l;
+
+	list_for_each_entry(vtag_l, &cl->vtag_map, list)
+		/* The client on bus has one fixed fp */
+		if ((cl->cldev && mei_cldev_enabled(cl->cldev)) ||
+		    vtag_l->vtag == vtag)
+			return vtag_l->fp;
+
+	return ERR_PTR(-ENOENT);
+}
+
+/**
+ * mei_cl_reset_read_by_vtag - reset pending_read flag by given vtag
+ *
+ * @cl: host client
+ * @vtag: vm tag
+ */
+static void mei_cl_reset_read_by_vtag(const struct mei_cl *cl, u8 vtag)
+{
+	struct mei_cl_vtag *vtag_l;
+
+	list_for_each_entry(vtag_l, &cl->vtag_map, list) {
+		/* The client on bus has one fixed vtag map */
+		if ((cl->cldev && mei_cldev_enabled(cl->cldev)) ||
+		    vtag_l->vtag == vtag) {
+			vtag_l->pending_read = false;
+			break;
+		}
+	}
+}
+
+/**
+ * mei_cl_read_vtag_add_fc - add flow control for next pending reader
+ *                           in the vtag list
+ *
+ * @cl: host client
+ */
+static void mei_cl_read_vtag_add_fc(struct mei_cl *cl)
+{
+	struct mei_cl_vtag *cl_vtag;
+
+	list_for_each_entry(cl_vtag, &cl->vtag_map, list) {
+		if (cl_vtag->pending_read) {
+			if (mei_cl_enqueue_ctrl_wr_cb(cl,
+						      mei_cl_mtu(cl),
+						      MEI_FOP_READ,
+						      cl_vtag->fp))
+				cl->rx_flow_ctrl_creds++;
+			break;
+		}
+	}
+}
+
+/**
+ * mei_cl_vt_support_check - check if client support vtags
+ *
+ * @cl: host client
+ *
+ * Return:
+ * * 0 - supported, or not connected at all
+ * * -EOPNOTSUPP - vtags are not supported by client
+ */
+int mei_cl_vt_support_check(const struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+
+	if (!dev->hbm_f_vt_supported)
+		return -EOPNOTSUPP;
+
+	if (!cl->me_cl)
+		return 0;
+
+	return cl->me_cl->props.vt_supported ? 0 : -EOPNOTSUPP;
+}
+
+/**
+ * mei_cl_add_rd_completed - add read completed callback to list with lock
+ *                           and vtag check
+ *
+ * @cl: host client
+ * @cb: callback block
+ *
+ */
+void mei_cl_add_rd_completed(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	const struct file *fp;
+
+	if (!mei_cl_vt_support_check(cl)) {
+		fp = mei_cl_fp_by_vtag(cl, cb->vtag);
+		if (IS_ERR(fp)) {
+			/* client already disconnected, discarding */
+			mei_io_cb_free(cb);
+			return;
+		}
+		cb->fp = fp;
+		mei_cl_reset_read_by_vtag(cl, cb->vtag);
+		mei_cl_read_vtag_add_fc(cl);
+	}
+
+	spin_lock(&cl->rd_completed_lock);
+	list_add_tail(&cb->list, &cl->rd_completed);
+	spin_unlock(&cl->rd_completed_lock);
+}
+
+/**
+ * mei_cl_del_rd_completed - free read completed callback with lock
+ *
+ * @cl: host client
+ * @cb: callback block
+ *
+ */
+void mei_cl_del_rd_completed(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	spin_lock(&cl->rd_completed_lock);
+	mei_io_cb_free(cb);
+	spin_unlock(&cl->rd_completed_lock);
+}
+
+/**
+ *  mei_cl_notify_fop2req - convert fop to proper request
+ *
+ * @fop: client notification start response command
+ *
+ * Return:  MEI_HBM_NOTIFICATION_START/STOP
+ */
+u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop)
+{
+	if (fop == MEI_FOP_NOTIFY_START)
+		return MEI_HBM_NOTIFICATION_START;
+	else
+		return MEI_HBM_NOTIFICATION_STOP;
+}
+
+/**
+ *  mei_cl_notify_req2fop - convert notification request top file operation type
+ *
+ * @req: hbm notification request type
+ *
+ * Return:  MEI_FOP_NOTIFY_START/STOP
+ */
+enum mei_cb_file_ops mei_cl_notify_req2fop(u8 req)
+{
+	if (req == MEI_HBM_NOTIFICATION_START)
+		return MEI_FOP_NOTIFY_START;
+	else
+		return MEI_FOP_NOTIFY_STOP;
+}
+
+/**
+ * mei_cl_irq_notify - send notification request in irq_thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb,
+		      struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+	bool request;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_request));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	request = mei_cl_notify_fop2req(cb->fop_type);
+	ret = mei_hbm_cl_notify_req(dev, cl, request);
+	if (ret) {
+		cl->status = ret;
+		list_move_tail(&cb->list, cmpl_list);
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	return 0;
+}
+
+/**
+ * mei_cl_notify_request - send notification stop/start request
+ *
+ * @cl: host client
+ * @fp: associate request with file
+ * @request: 1 for start or 0 for stop
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_notify_request(struct mei_cl *cl,
+			  const struct file *fp, u8 request)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	enum mei_cb_file_ops fop_type;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!dev->hbm_f_ev_supported) {
+		cl_dbg(dev, cl, "notifications not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		return rets;
+	}
+
+	fop_type = mei_cl_notify_req2fop(request);
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, fop_type, fp);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	if (mei_hbuf_acquire(dev)) {
+		if (mei_hbm_cl_notify_req(dev, cl, request)) {
+			rets = -ENODEV;
+			goto out;
+		}
+		list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait,
+			   cl->notify_en == request ||
+			   cl->status ||
+			   !mei_cl_is_connected(cl),
+			   dev->timeouts.cl_connect);
+	mutex_lock(&dev->device_lock);
+
+	if (cl->notify_en != request && !cl->status)
+		cl->status = -EFAULT;
+
+	rets = cl->status;
+
+out:
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	mei_io_cb_free(cb);
+	return rets;
+}
+
+/**
+ * mei_cl_notify - raise notification
+ *
+ * @cl: host client
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_cl_notify(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+
+	if (!cl || !cl->dev)
+		return;
+
+	dev = cl->dev;
+
+	if (!cl->notify_en)
+		return;
+
+	cl_dbg(dev, cl, "notify event");
+	cl->notify_ev = true;
+	if (!mei_cl_bus_notify_event(cl))
+		wake_up_interruptible(&cl->ev_wait);
+
+	if (cl->ev_async)
+		kill_fasync(&cl->ev_async, SIGIO, POLL_PRI);
+
+}
+
+/**
+ * mei_cl_notify_get - get or wait for notification event
+ *
+ * @cl: host client
+ * @block: this request is blocking
+ * @notify_ev: true if notification event was received
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev)
+{
+	struct mei_device *dev;
+	int rets;
+
+	*notify_ev = false;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!dev->hbm_f_ev_supported) {
+		cl_dbg(dev, cl, "notifications not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	if (cl->notify_ev)
+		goto out;
+
+	if (!block)
+		return -EAGAIN;
+
+	mutex_unlock(&dev->device_lock);
+	rets = wait_event_interruptible(cl->ev_wait, cl->notify_ev);
+	mutex_lock(&dev->device_lock);
+
+	if (rets < 0)
+		return rets;
+
+out:
+	*notify_ev = cl->notify_ev;
+	cl->notify_ev = false;
+	return 0;
+}
+
+/**
+ * mei_cl_read_start - the start read client message function.
+ *
+ * @cl: host client
+ * @length: number of bytes to read
+ * @fp: pointer to file structure
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_cl_read_start(struct mei_cl *cl, size_t length, const struct file *fp)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	if (!mei_me_cl_is_active(cl->me_cl)) {
+		cl_err(dev, cl, "no such me client\n");
+		return  -ENOTTY;
+	}
+
+	if (mei_cl_is_fixed_address(cl))
+		return 0;
+
+	/* HW currently supports only one pending read */
+	if (cl->rx_flow_ctrl_creds) {
+		mei_cl_set_read_by_fp(cl, fp);
+		return -EBUSY;
+	}
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, length, MEI_FOP_READ, fp);
+	if (!cb)
+		return -ENOMEM;
+
+	mei_cl_set_read_by_fp(cl, fp);
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		goto nortpm;
+	}
+
+	rets = 0;
+	if (mei_hbuf_acquire(dev)) {
+		rets = mei_hbm_cl_flow_control_req(dev, cl);
+		if (rets < 0)
+			goto out;
+
+		list_move_tail(&cb->list, &cl->rd_pending);
+	}
+	cl->rx_flow_ctrl_creds++;
+
+out:
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+nortpm:
+	if (rets)
+		mei_io_cb_free(cb);
+
+	return rets;
+}
+
+static inline u8 mei_ext_hdr_set_vtag(void *ext, u8 vtag)
+{
+	struct mei_ext_hdr_vtag *vtag_hdr = ext;
+
+	vtag_hdr->hdr.type = MEI_EXT_HDR_VTAG;
+	vtag_hdr->hdr.length = mei_data2slots(sizeof(*vtag_hdr));
+	vtag_hdr->vtag = vtag;
+	vtag_hdr->reserved = 0;
+	return vtag_hdr->hdr.length;
+}
+
+static inline bool mei_ext_hdr_is_gsc(struct mei_ext_hdr *ext)
+{
+	return ext && ext->type == MEI_EXT_HDR_GSC;
+}
+
+static inline u8 mei_ext_hdr_set_gsc(struct mei_ext_hdr *ext, struct mei_ext_hdr *gsc_hdr)
+{
+	memcpy(ext, gsc_hdr, mei_ext_hdr_len(gsc_hdr));
+	return ext->length;
+}
+
+/**
+ * mei_msg_hdr_init - allocate and initialize mei message header
+ *
+ * @cb: message callback structure
+ *
+ * Return: a pointer to initialized header or ERR_PTR on failure
+ */
+static struct mei_msg_hdr *mei_msg_hdr_init(const struct mei_cl_cb *cb)
+{
+	size_t hdr_len;
+	struct mei_ext_meta_hdr *meta;
+	struct mei_msg_hdr *mei_hdr;
+	bool is_ext, is_hbm, is_gsc, is_vtag;
+	struct mei_ext_hdr *next_ext;
+
+	if (!cb)
+		return ERR_PTR(-EINVAL);
+
+	/* Extended header for vtag is attached only on the first fragment */
+	is_vtag = (cb->vtag && cb->buf_idx == 0);
+	is_hbm = cb->cl->me_cl->client_id == 0;
+	is_gsc = ((!is_hbm) && cb->cl->dev->hbm_f_gsc_supported && mei_ext_hdr_is_gsc(cb->ext_hdr));
+	is_ext = is_vtag || is_gsc;
+
+	/* Compute extended header size */
+	hdr_len = sizeof(*mei_hdr);
+
+	if (!is_ext)
+		goto setup_hdr;
+
+	hdr_len += sizeof(*meta);
+	if (is_vtag)
+		hdr_len += sizeof(struct mei_ext_hdr_vtag);
+
+	if (is_gsc)
+		hdr_len += mei_ext_hdr_len(cb->ext_hdr);
+
+setup_hdr:
+	mei_hdr = kzalloc(hdr_len, GFP_KERNEL);
+	if (!mei_hdr)
+		return ERR_PTR(-ENOMEM);
+
+	mei_hdr->host_addr = mei_cl_host_addr(cb->cl);
+	mei_hdr->me_addr = mei_cl_me_id(cb->cl);
+	mei_hdr->internal = cb->internal;
+	mei_hdr->extended = is_ext;
+
+	if (!is_ext)
+		goto out;
+
+	meta = (struct mei_ext_meta_hdr *)mei_hdr->extension;
+	meta->size = 0;
+	next_ext = (struct mei_ext_hdr *)meta->hdrs;
+	if (is_vtag) {
+		meta->count++;
+		meta->size += mei_ext_hdr_set_vtag(next_ext, cb->vtag);
+		next_ext = mei_ext_next(next_ext);
+	}
+
+	if (is_gsc) {
+		meta->count++;
+		meta->size += mei_ext_hdr_set_gsc(next_ext, cb->ext_hdr);
+		next_ext = mei_ext_next(next_ext);
+	}
+
+out:
+	mei_hdr->length = hdr_len - sizeof(*mei_hdr);
+	return mei_hdr;
+}
+
+/**
+ * mei_cl_irq_write - write a message to device
+ *	from the interrupt thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise error.
+ */
+int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
+		     struct list_head *cmpl_list)
+{
+	struct mei_device *dev;
+	struct mei_msg_data *buf;
+	struct mei_msg_hdr *mei_hdr = NULL;
+	size_t hdr_len;
+	size_t hbuf_len, dr_len;
+	size_t buf_len = 0;
+	size_t data_len;
+	int hbuf_slots;
+	u32 dr_slots;
+	u32 dma_len;
+	int rets;
+	bool first_chunk;
+	const void *data = NULL;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	buf = &cb->buf;
+
+	first_chunk = cb->buf_idx == 0;
+
+	rets = first_chunk ? mei_cl_tx_flow_ctrl_creds(cl) : 1;
+	if (rets < 0)
+		goto err;
+
+	if (rets == 0) {
+		cl_dbg(dev, cl, "No flow control credentials: not sending.\n");
+		return 0;
+	}
+
+	if (buf->data) {
+		buf_len = buf->size - cb->buf_idx;
+		data = buf->data + cb->buf_idx;
+	}
+	hbuf_slots = mei_hbuf_empty_slots(dev);
+	if (hbuf_slots < 0) {
+		rets = -EOVERFLOW;
+		goto err;
+	}
+
+	hbuf_len = mei_slots2data(hbuf_slots) & MEI_MSG_MAX_LEN_MASK;
+	dr_slots = mei_dma_ring_empty_slots(dev);
+	dr_len = mei_slots2data(dr_slots);
+
+	mei_hdr = mei_msg_hdr_init(cb);
+	if (IS_ERR(mei_hdr)) {
+		rets = PTR_ERR(mei_hdr);
+		mei_hdr = NULL;
+		goto err;
+	}
+
+	hdr_len = sizeof(*mei_hdr) + mei_hdr->length;
+
+	/**
+	 * Split the message only if we can write the whole host buffer
+	 * otherwise wait for next time the host buffer is empty.
+	 */
+	if (hdr_len + buf_len <= hbuf_len) {
+		data_len = buf_len;
+		mei_hdr->msg_complete = 1;
+	} else if (dr_slots && hbuf_len >= hdr_len + sizeof(dma_len)) {
+		mei_hdr->dma_ring = 1;
+		if (buf_len > dr_len)
+			buf_len = dr_len;
+		else
+			mei_hdr->msg_complete = 1;
+
+		data_len = sizeof(dma_len);
+		dma_len = buf_len;
+		data = &dma_len;
+	} else if ((u32)hbuf_slots == mei_hbuf_depth(dev)) {
+		buf_len = hbuf_len - hdr_len;
+		data_len = buf_len;
+	} else {
+		kfree(mei_hdr);
+		return 0;
+	}
+	mei_hdr->length += data_len;
+
+	if (mei_hdr->dma_ring && buf->data)
+		mei_dma_ring_write(dev, buf->data + cb->buf_idx, buf_len);
+	rets = mei_write_message(dev, mei_hdr, hdr_len, data, data_len);
+
+	if (rets)
+		goto err;
+
+	cl->status = 0;
+	cl->writing_state = MEI_WRITING;
+	cb->buf_idx += buf_len;
+
+	if (first_chunk) {
+		if (mei_cl_tx_flow_ctrl_creds_reduce(cl)) {
+			rets = -EIO;
+			goto err;
+		}
+	}
+
+	if (mei_hdr->msg_complete)
+		list_move_tail(&cb->list, &dev->write_waiting_list);
+
+	kfree(mei_hdr);
+	return 0;
+
+err:
+	kfree(mei_hdr);
+	cl->status = rets;
+	list_move_tail(&cb->list, cmpl_list);
+	return rets;
+}
+
+/**
+ * mei_cl_write - submit a write cb to mei device
+ *	assumes device_lock is locked
+ *
+ * @cl: host client
+ * @cb: write callback with filled data
+ * @timeout: send timeout in milliseconds.
+ *           effective only for blocking writes: the cb->blocking is set.
+ *           set timeout to the MAX_SCHEDULE_TIMEOUT to maixum allowed wait.
+ *
+ * Return: number of bytes sent on success, <0 on failure.
+ */
+ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long timeout)
+{
+	struct mei_device *dev;
+	struct mei_msg_data *buf;
+	struct mei_msg_hdr *mei_hdr = NULL;
+	size_t hdr_len;
+	size_t hbuf_len, dr_len;
+	size_t buf_len;
+	size_t data_len;
+	int hbuf_slots;
+	u32 dr_slots;
+	u32 dma_len;
+	ssize_t rets;
+	bool blocking;
+	const void *data;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	if (WARN_ON(!cb))
+		return -EINVAL;
+
+	dev = cl->dev;
+
+	buf = &cb->buf;
+	buf_len = buf->size;
+
+	cl_dbg(dev, cl, "buf_len=%zd\n", buf_len);
+
+	blocking = cb->blocking;
+	data = buf->data;
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %zd\n", rets);
+		goto free;
+	}
+
+	cb->buf_idx = 0;
+	cl->writing_state = MEI_IDLE;
+
+
+	rets = mei_cl_tx_flow_ctrl_creds(cl);
+	if (rets < 0)
+		goto err;
+
+	mei_hdr = mei_msg_hdr_init(cb);
+	if (IS_ERR(mei_hdr)) {
+		rets = PTR_ERR(mei_hdr);
+		mei_hdr = NULL;
+		goto err;
+	}
+
+	hdr_len = sizeof(*mei_hdr) + mei_hdr->length;
+
+	if (rets == 0) {
+		cl_dbg(dev, cl, "No flow control credentials: not sending.\n");
+		rets = buf_len;
+		goto out;
+	}
+
+	if (!mei_hbuf_acquire(dev)) {
+		cl_dbg(dev, cl, "Cannot acquire the host buffer: not sending.\n");
+		rets = buf_len;
+		goto out;
+	}
+
+	hbuf_slots = mei_hbuf_empty_slots(dev);
+	if (hbuf_slots < 0) {
+		buf_len = -EOVERFLOW;
+		goto out;
+	}
+
+	hbuf_len = mei_slots2data(hbuf_slots) & MEI_MSG_MAX_LEN_MASK;
+	dr_slots = mei_dma_ring_empty_slots(dev);
+	dr_len =  mei_slots2data(dr_slots);
+
+	if (hdr_len + buf_len <= hbuf_len) {
+		data_len = buf_len;
+		mei_hdr->msg_complete = 1;
+	} else if (dr_slots && hbuf_len >= hdr_len + sizeof(dma_len)) {
+		mei_hdr->dma_ring = 1;
+		if (buf_len > dr_len)
+			buf_len = dr_len;
+		else
+			mei_hdr->msg_complete = 1;
+
+		data_len = sizeof(dma_len);
+		dma_len = buf_len;
+		data = &dma_len;
+	} else {
+		buf_len = hbuf_len - hdr_len;
+		data_len = buf_len;
+	}
+
+	mei_hdr->length += data_len;
+
+	if (mei_hdr->dma_ring && buf->data)
+		mei_dma_ring_write(dev, buf->data, buf_len);
+	rets = mei_write_message(dev, mei_hdr, hdr_len, data, data_len);
+
+	if (rets)
+		goto err;
+
+	rets = mei_cl_tx_flow_ctrl_creds_reduce(cl);
+	if (rets)
+		goto err;
+
+	cl->writing_state = MEI_WRITING;
+	cb->buf_idx = buf_len;
+	/* restore return value */
+	buf_len = buf->size;
+
+out:
+	if (mei_hdr->msg_complete)
+		mei_tx_cb_enqueue(cb, &dev->write_waiting_list);
+	else
+		mei_tx_cb_enqueue(cb, &dev->write_list);
+
+	cb = NULL;
+	if (blocking && cl->writing_state != MEI_WRITE_COMPLETE) {
+
+		mutex_unlock(&dev->device_lock);
+		rets = wait_event_interruptible_timeout(cl->tx_wait,
+							cl->writing_state == MEI_WRITE_COMPLETE ||
+							(!mei_cl_is_connected(cl)),
+							msecs_to_jiffies(timeout));
+		mutex_lock(&dev->device_lock);
+		/* clean all queue on timeout as something fatal happened */
+		if (rets == 0) {
+			rets = -ETIME;
+			mei_io_tx_list_free_cl(&dev->write_list, cl, NULL);
+			mei_io_tx_list_free_cl(&dev->write_waiting_list, cl, NULL);
+		}
+		/* wait_event_interruptible returns -ERESTARTSYS */
+		if (rets > 0)
+			rets = 0;
+		if (rets) {
+			if (signal_pending(current))
+				rets = -EINTR;
+			goto err;
+		}
+		if (cl->writing_state != MEI_WRITE_COMPLETE) {
+			rets = -EFAULT;
+			goto err;
+		}
+	}
+
+	rets = buf_len;
+err:
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+free:
+	mei_io_cb_free(cb);
+
+	kfree(mei_hdr);
+
+	return rets;
+}
+
+/**
+ * mei_cl_complete - processes completed operation for a client
+ *
+ * @cl: private data of the file object.
+ * @cb: callback block.
+ */
+void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	struct mei_device *dev = cl->dev;
+
+	switch (cb->fop_type) {
+	case MEI_FOP_WRITE:
+		mei_tx_cb_dequeue(cb);
+		cl->writing_state = MEI_WRITE_COMPLETE;
+		if (waitqueue_active(&cl->tx_wait)) {
+			wake_up_interruptible(&cl->tx_wait);
+		} else {
+			pm_runtime_mark_last_busy(dev->dev);
+			pm_request_autosuspend(dev->dev);
+		}
+		break;
+
+	case MEI_FOP_READ:
+		mei_cl_add_rd_completed(cl, cb);
+		if (!mei_cl_is_fixed_address(cl) &&
+		    !WARN_ON(!cl->rx_flow_ctrl_creds))
+			cl->rx_flow_ctrl_creds--;
+		if (!mei_cl_bus_rx_event(cl))
+			wake_up_interruptible(&cl->rx_wait);
+		break;
+
+	case MEI_FOP_CONNECT:
+	case MEI_FOP_DISCONNECT:
+	case MEI_FOP_NOTIFY_STOP:
+	case MEI_FOP_NOTIFY_START:
+	case MEI_FOP_DMA_MAP:
+	case MEI_FOP_DMA_UNMAP:
+		if (waitqueue_active(&cl->wait))
+			wake_up(&cl->wait);
+
+		break;
+	case MEI_FOP_DISCONNECT_RSP:
+		mei_io_cb_free(cb);
+		mei_cl_set_disconnected(cl);
+		break;
+	default:
+		BUG_ON(0);
+	}
+}
+
+
+/**
+ * mei_cl_all_disconnect - disconnect forcefully all connected clients
+ *
+ * @dev: mei device
+ */
+void mei_cl_all_disconnect(struct mei_device *dev)
+{
+	struct mei_cl *cl;
+
+	list_for_each_entry(cl, &dev->file_list, link)
+		mei_cl_set_disconnected(cl);
+}
+EXPORT_SYMBOL_GPL(mei_cl_all_disconnect);
+
+static struct mei_cl *mei_cl_dma_map_find(struct mei_device *dev, u8 buffer_id)
+{
+	struct mei_cl *cl;
+
+	list_for_each_entry(cl, &dev->file_list, link)
+		if (cl->dma.buffer_id == buffer_id)
+			return cl;
+	return NULL;
+}
+
+/**
+ * mei_cl_irq_dma_map - send client dma map request in irq_thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_irq_dma_map(struct mei_cl *cl, struct mei_cl_cb *cb,
+		       struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_dma_map_request));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_hbm_cl_dma_map_req(dev, cl);
+	if (ret) {
+		cl->status = ret;
+		list_move_tail(&cb->list, cmpl_list);
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	return 0;
+}
+
+/**
+ * mei_cl_irq_dma_unmap - send client dma unmap request in irq_thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_irq_dma_unmap(struct mei_cl *cl, struct mei_cl_cb *cb,
+			 struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_dma_unmap_request));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_hbm_cl_dma_unmap_req(dev, cl);
+	if (ret) {
+		cl->status = ret;
+		list_move_tail(&cb->list, cmpl_list);
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	return 0;
+}
+
+static int mei_cl_dma_alloc(struct mei_cl *cl, u8 buf_id, size_t size)
+{
+	cl->dma.vaddr = dmam_alloc_coherent(cl->dev->dev, size,
+					    &cl->dma.daddr, GFP_KERNEL);
+	if (!cl->dma.vaddr)
+		return -ENOMEM;
+
+	cl->dma.buffer_id = buf_id;
+	cl->dma.size = size;
+
+	return 0;
+}
+
+static void mei_cl_dma_free(struct mei_cl *cl)
+{
+	cl->dma.buffer_id = 0;
+	dmam_free_coherent(cl->dev->dev,
+			   cl->dma.size, cl->dma.vaddr, cl->dma.daddr);
+	cl->dma.size = 0;
+	cl->dma.vaddr = NULL;
+	cl->dma.daddr = 0;
+}
+
+/**
+ * mei_cl_dma_alloc_and_map - send client dma map request
+ *
+ * @cl: host client
+ * @fp: pointer to file structure
+ * @buffer_id: id of the mapped buffer
+ * @size: size of the buffer
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return:
+ * * -ENODEV
+ * * -EINVAL
+ * * -EOPNOTSUPP
+ * * -EPROTO
+ * * -ENOMEM;
+ */
+int mei_cl_dma_alloc_and_map(struct mei_cl *cl, const struct file *fp,
+			     u8 buffer_id, size_t size)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!dev->hbm_f_cd_supported) {
+		cl_dbg(dev, cl, "client dma is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (buffer_id == 0)
+		return -EINVAL;
+
+	if (mei_cl_is_connected(cl))
+		return -EPROTO;
+
+	if (cl->dma_mapped)
+		return -EPROTO;
+
+	if (mei_cl_dma_map_find(dev, buffer_id)) {
+		cl_dbg(dev, cl, "client dma with id %d is already allocated\n",
+		       cl->dma.buffer_id);
+		return -EPROTO;
+	}
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		return rets;
+	}
+
+	rets = mei_cl_dma_alloc(cl, buffer_id, size);
+	if (rets) {
+		pm_runtime_put_noidle(dev->dev);
+		return rets;
+	}
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DMA_MAP, fp);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	if (mei_hbuf_acquire(dev)) {
+		if (mei_hbm_cl_dma_map_req(dev, cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+		list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	}
+
+	cl->status = 0;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait,
+			   cl->dma_mapped || cl->status,
+			   dev->timeouts.cl_connect);
+	mutex_lock(&dev->device_lock);
+
+	if (!cl->dma_mapped && !cl->status)
+		cl->status = -EFAULT;
+
+	rets = cl->status;
+
+out:
+	if (rets)
+		mei_cl_dma_free(cl);
+
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	mei_io_cb_free(cb);
+	return rets;
+}
+
+/**
+ * mei_cl_dma_unmap - send client dma unmap request
+ *
+ * @cl: host client
+ * @fp: pointer to file structure
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_dma_unmap(struct mei_cl *cl, const struct file *fp)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!dev->hbm_f_cd_supported) {
+		cl_dbg(dev, cl, "client dma is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* do not allow unmap for connected client */
+	if (mei_cl_is_connected(cl))
+		return -EPROTO;
+
+	if (!cl->dma_mapped)
+		return -EPROTO;
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		return rets;
+	}
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DMA_UNMAP, fp);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	if (mei_hbuf_acquire(dev)) {
+		if (mei_hbm_cl_dma_unmap_req(dev, cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+		list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	}
+
+	cl->status = 0;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait,
+			   !cl->dma_mapped || cl->status,
+			   dev->timeouts.cl_connect);
+	mutex_lock(&dev->device_lock);
+
+	if (cl->dma_mapped && !cl->status)
+		cl->status = -EFAULT;
+
+	rets = cl->status;
+
+	if (!rets)
+		mei_cl_dma_free(cl);
+out:
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	mei_io_cb_free(cb);
+	return rets;
+}
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
new file mode 100644
index 0000000000..9052860bcf
--- /dev/null
+++ b/drivers/misc/mei/client.h
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2003-2018, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#ifndef _MEI_CLIENT_H_
+#define _MEI_CLIENT_H_
+
+#include <linux/types.h>
+#include <linux/poll.h>
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+
+/*
+ * reference counting base function
+ */
+void mei_me_cl_init(struct mei_me_client *me_cl);
+void mei_me_cl_put(struct mei_me_client *me_cl);
+struct mei_me_client *mei_me_cl_get(struct mei_me_client *me_cl);
+
+void mei_me_cl_add(struct mei_device *dev, struct mei_me_client *me_cl);
+void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl);
+
+struct mei_me_client *mei_me_cl_by_uuid(struct mei_device *dev,
+					const uuid_le *uuid);
+struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id);
+struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev,
+					   const uuid_le *uuid, u8 client_id);
+void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid);
+void mei_me_cl_rm_by_uuid_id(struct mei_device *dev,
+			     const uuid_le *uuid, u8 id);
+void mei_me_cl_rm_all(struct mei_device *dev);
+
+/**
+ * mei_me_cl_is_active - check whether me client is active in the fw
+ *
+ * @me_cl: me client
+ *
+ * Return: true if the me client is active in the firmware
+ */
+static inline bool mei_me_cl_is_active(const struct mei_me_client *me_cl)
+{
+	return !list_empty_careful(&me_cl->list);
+}
+
+/**
+ * mei_me_cl_uuid - return me client protocol name (uuid)
+ *
+ * @me_cl: me client
+ *
+ * Return: me client protocol name
+ */
+static inline const uuid_le *mei_me_cl_uuid(const struct mei_me_client *me_cl)
+{
+	return &me_cl->props.protocol_name;
+}
+
+/**
+ * mei_me_cl_ver - return me client protocol version
+ *
+ * @me_cl: me client
+ *
+ * Return: me client protocol version
+ */
+static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl)
+{
+	return me_cl->props.protocol_version;
+}
+
+/**
+ * mei_me_cl_max_conn - return me client max number of connections
+ *
+ * @me_cl: me client
+ *
+ * Return: me client max number of connections
+ */
+static inline u8 mei_me_cl_max_conn(const struct mei_me_client *me_cl)
+{
+	return me_cl->props.max_number_of_connections;
+}
+
+/**
+ * mei_me_cl_fixed - return me client fixed address, if any
+ *
+ * @me_cl: me client
+ *
+ * Return: me client fixed address
+ */
+static inline u8 mei_me_cl_fixed(const struct mei_me_client *me_cl)
+{
+	return me_cl->props.fixed_address;
+}
+
+/**
+ * mei_me_cl_vt - return me client vtag supported status
+ *
+ * @me_cl: me client
+ *
+ * Return: true if me client supports vt tagging
+ */
+static inline bool mei_me_cl_vt(const struct mei_me_client *me_cl)
+{
+	return me_cl->props.vt_supported == 1;
+}
+
+/**
+ * mei_me_cl_max_len - return me client max msg length
+ *
+ * @me_cl: me client
+ *
+ * Return: me client max msg length
+ */
+static inline u32 mei_me_cl_max_len(const struct mei_me_client *me_cl)
+{
+	return me_cl->props.max_msg_length;
+}
+
+/*
+ * MEI IO Functions
+ */
+void mei_io_cb_free(struct mei_cl_cb *priv_cb);
+
+/*
+ * MEI Host Client Functions
+ */
+
+struct mei_cl *mei_cl_allocate(struct mei_device *dev);
+
+int mei_cl_link(struct mei_cl *cl);
+int mei_cl_unlink(struct mei_cl *cl);
+
+struct mei_cl *mei_cl_alloc_linked(struct mei_device *dev);
+
+struct mei_cl_cb *mei_cl_read_cb(struct mei_cl *cl, const struct file *fp);
+
+void mei_cl_add_rd_completed(struct mei_cl *cl, struct mei_cl_cb *cb);
+void mei_cl_del_rd_completed(struct mei_cl *cl, struct mei_cl_cb *cb);
+
+struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
+				  enum mei_cb_file_ops type,
+				  const struct file *fp);
+struct mei_cl_cb *mei_cl_enqueue_ctrl_wr_cb(struct mei_cl *cl, size_t length,
+					    enum mei_cb_file_ops type,
+					    const struct file *fp);
+int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp);
+
+struct mei_cl_vtag *mei_cl_vtag_alloc(struct file *fp, u8 vtag);
+const struct file *mei_cl_fp_by_vtag(const struct mei_cl *cl, u8 vtag);
+int mei_cl_vt_support_check(const struct mei_cl *cl);
+/*
+ *  MEI input output function prototype
+ */
+
+/**
+ * mei_cl_is_connected - host client is connected
+ *
+ * @cl: host client
+ *
+ * Return: true if the host client is connected
+ */
+static inline bool mei_cl_is_connected(const struct mei_cl *cl)
+{
+	return  cl->state == MEI_FILE_CONNECTED;
+}
+
+/**
+ * mei_cl_me_id - me client id
+ *
+ * @cl: host client
+ *
+ * Return: me client id or 0 if client is not connected
+ */
+static inline u8 mei_cl_me_id(const struct mei_cl *cl)
+{
+	return cl->me_cl ? cl->me_cl->client_id : 0;
+}
+
+/**
+ * mei_cl_mtu - maximal message that client can send and receive
+ *
+ * @cl: host client
+ *
+ * Return: mtu or 0 if client is not connected
+ */
+static inline size_t mei_cl_mtu(const struct mei_cl *cl)
+{
+	return cl->me_cl ? cl->me_cl->props.max_msg_length : 0;
+}
+
+/**
+ * mei_cl_is_fixed_address - check whether the me client uses fixed address
+ *
+ * @cl: host client
+ *
+ * Return: true if the client is connected and it has fixed me address
+ */
+static inline bool mei_cl_is_fixed_address(const struct mei_cl *cl)
+{
+	return cl->me_cl && cl->me_cl->props.fixed_address;
+}
+
+/**
+ * mei_cl_is_single_recv_buf- check whether the me client
+ *       uses single receiving buffer
+ *
+ * @cl: host client
+ *
+ * Return: true if single_recv_buf == 1; 0 otherwise
+ */
+static inline bool mei_cl_is_single_recv_buf(const struct mei_cl *cl)
+{
+	return cl->me_cl->props.single_recv_buf;
+}
+
+/**
+ * mei_cl_uuid -  client's uuid
+ *
+ * @cl: host client
+ *
+ * Return: return uuid of connected me client
+ */
+static inline const uuid_le *mei_cl_uuid(const struct mei_cl *cl)
+{
+	return mei_me_cl_uuid(cl->me_cl);
+}
+
+/**
+ * mei_cl_host_addr - client's host address
+ *
+ * @cl: host client
+ *
+ * Return: 0 for fixed address client, host address for dynamic client
+ */
+static inline u8 mei_cl_host_addr(const struct mei_cl *cl)
+{
+	return  mei_cl_is_fixed_address(cl) ? 0 : cl->host_client_id;
+}
+
+int mei_cl_disconnect(struct mei_cl *cl);
+int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb,
+			  struct list_head *cmpl_list);
+int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
+		   const struct file *file);
+int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb,
+		       struct list_head *cmpl_list);
+int mei_cl_read_start(struct mei_cl *cl, size_t length, const struct file *fp);
+ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long timeout);
+int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
+		     struct list_head *cmpl_list);
+
+void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb);
+
+void mei_host_client_init(struct mei_device *dev);
+
+u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop);
+enum mei_cb_file_ops mei_cl_notify_req2fop(u8 request);
+int mei_cl_notify_request(struct mei_cl *cl,
+			  const struct file *file, u8 request);
+int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb,
+		      struct list_head *cmpl_list);
+int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev);
+void mei_cl_notify(struct mei_cl *cl);
+
+void mei_cl_all_disconnect(struct mei_device *dev);
+
+int mei_cl_irq_dma_map(struct mei_cl *cl, struct mei_cl_cb *cb,
+		       struct list_head *cmpl_list);
+int mei_cl_irq_dma_unmap(struct mei_cl *cl, struct mei_cl_cb *cb,
+			 struct list_head *cmpl_list);
+int mei_cl_dma_alloc_and_map(struct mei_cl *cl, const struct file *fp,
+			     u8 buffer_id, size_t size);
+int mei_cl_dma_unmap(struct mei_cl *cl, const struct file *fp);
+
+#define MEI_CL_FMT "cl:host=%02d me=%02d "
+#define MEI_CL_PRM(cl) (cl)->host_client_id, mei_cl_me_id(cl)
+
+#define cl_dbg(dev, cl, format, arg...) \
+	dev_dbg((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg)
+
+#define cl_warn(dev, cl, format, arg...) \
+	dev_warn((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg)
+
+#define cl_err(dev, cl, format, arg...) \
+	dev_err((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg)
+
+#endif /* _MEI_CLIENT_H_ */
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
new file mode 100644
index 0000000000..3b098d4c8e
--- /dev/null
+++ b/drivers/misc/mei/debugfs.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012-2022, Intel Corporation. All rights reserved
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+#include "hw.h"
+
+static int mei_dbgfs_meclients_show(struct seq_file *m, void *unused)
+{
+	struct mei_device *dev = m->private;
+	struct mei_me_client *me_cl;
+	int i = 0;
+
+	if (!dev)
+		return -ENODEV;
+
+	down_read(&dev->me_clients_rwsem);
+
+	seq_puts(m, "  |id|fix|         UUID                       |con|msg len|sb|refc|vt|\n");
+
+	/*  if the driver is not enabled the list won't be consistent */
+	if (dev->dev_state != MEI_DEV_ENABLED)
+		goto out;
+
+	list_for_each_entry(me_cl, &dev->me_clients, list) {
+		if (!mei_me_cl_get(me_cl))
+			continue;
+
+		seq_printf(m, "%2d|%2d|%3d|%pUl|%3d|%7d|%2d|%4d|%2d|\n",
+			   i++, me_cl->client_id,
+			   me_cl->props.fixed_address,
+			   &me_cl->props.protocol_name,
+			   me_cl->props.max_number_of_connections,
+			   me_cl->props.max_msg_length,
+			   me_cl->props.single_recv_buf,
+			   kref_read(&me_cl->refcnt),
+			   me_cl->props.vt_supported);
+		mei_me_cl_put(me_cl);
+	}
+
+out:
+	up_read(&dev->me_clients_rwsem);
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(mei_dbgfs_meclients);
+
+static int mei_dbgfs_active_show(struct seq_file *m, void *unused)
+{
+	struct mei_device *dev = m->private;
+	struct mei_cl *cl;
+	int i = 0;
+
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	seq_puts(m, "   |me|host|state|rd|wr|wrq\n");
+
+	/*  if the driver is not enabled the list won't be consistent */
+	if (dev->dev_state != MEI_DEV_ENABLED)
+		goto out;
+
+	list_for_each_entry(cl, &dev->file_list, link) {
+
+		seq_printf(m, "%3d|%2d|%4d|%5d|%2d|%2d|%3u\n",
+			   i, mei_cl_me_id(cl), cl->host_client_id, cl->state,
+			   !list_empty(&cl->rd_completed), cl->writing_state,
+			   cl->tx_cb_queued);
+		i++;
+	}
+out:
+	mutex_unlock(&dev->device_lock);
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(mei_dbgfs_active);
+
+static const char *mei_dev_pxp_mode_str(enum mei_dev_pxp_mode state)
+{
+#define MEI_PXP_MODE(state) case MEI_DEV_PXP_##state: return #state
+	switch (state) {
+	MEI_PXP_MODE(DEFAULT);
+	MEI_PXP_MODE(INIT);
+	MEI_PXP_MODE(SETUP);
+	MEI_PXP_MODE(READY);
+	default:
+		return "unknown";
+	}
+#undef MEI_PXP_MODE
+}
+
+static int mei_dbgfs_devstate_show(struct seq_file *m, void *unused)
+{
+	struct mei_device *dev = m->private;
+
+	seq_printf(m, "dev: %s\n", mei_dev_state_str(dev->dev_state));
+	seq_printf(m, "hbm: %s\n", mei_hbm_state_str(dev->hbm_state));
+
+	if (dev->hbm_state >= MEI_HBM_ENUM_CLIENTS &&
+	    dev->hbm_state <= MEI_HBM_STARTED) {
+		seq_puts(m, "hbm features:\n");
+		seq_printf(m, "\tPG: %01d\n", dev->hbm_f_pg_supported);
+		seq_printf(m, "\tDC: %01d\n", dev->hbm_f_dc_supported);
+		seq_printf(m, "\tIE: %01d\n", dev->hbm_f_ie_supported);
+		seq_printf(m, "\tDOT: %01d\n", dev->hbm_f_dot_supported);
+		seq_printf(m, "\tEV: %01d\n", dev->hbm_f_ev_supported);
+		seq_printf(m, "\tFA: %01d\n", dev->hbm_f_fa_supported);
+		seq_printf(m, "\tOS: %01d\n", dev->hbm_f_os_supported);
+		seq_printf(m, "\tDR: %01d\n", dev->hbm_f_dr_supported);
+		seq_printf(m, "\tVT: %01d\n", dev->hbm_f_vt_supported);
+		seq_printf(m, "\tCAP: %01d\n", dev->hbm_f_cap_supported);
+		seq_printf(m, "\tCD: %01d\n", dev->hbm_f_cd_supported);
+	}
+
+	seq_printf(m, "pg:  %s, %s\n",
+		   mei_pg_is_enabled(dev) ? "ENABLED" : "DISABLED",
+		   mei_pg_state_str(mei_pg_state(dev)));
+
+	seq_printf(m, "pxp: %s\n", mei_dev_pxp_mode_str(dev->pxp_mode));
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(mei_dbgfs_devstate);
+
+static ssize_t mei_dbgfs_write_allow_fa(struct file *file,
+					const char __user *user_buf,
+					size_t count, loff_t *ppos)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev = container_of(file->private_data,
+			   struct mei_device, allow_fixed_address);
+
+	ret = debugfs_write_file_bool(file, user_buf, count, ppos);
+	if (ret < 0)
+		return ret;
+	dev->override_fixed_address = true;
+	return ret;
+}
+
+static const struct file_operations mei_dbgfs_allow_fa_fops = {
+	.open = simple_open,
+	.read = debugfs_read_file_bool,
+	.write = mei_dbgfs_write_allow_fa,
+	.llseek = generic_file_llseek,
+};
+
+/**
+ * mei_dbgfs_deregister - Remove the debugfs files and directories
+ *
+ * @dev: the mei device structure
+ */
+void mei_dbgfs_deregister(struct mei_device *dev)
+{
+	if (!dev->dbgfs_dir)
+		return;
+	debugfs_remove_recursive(dev->dbgfs_dir);
+	dev->dbgfs_dir = NULL;
+}
+
+/**
+ * mei_dbgfs_register - Add the debugfs files
+ *
+ * @dev: the mei device structure
+ * @name: the mei device name
+ */
+void mei_dbgfs_register(struct mei_device *dev, const char *name)
+{
+	struct dentry *dir;
+
+	dir = debugfs_create_dir(name, NULL);
+	dev->dbgfs_dir = dir;
+
+	debugfs_create_file("meclients", S_IRUSR, dir, dev,
+			    &mei_dbgfs_meclients_fops);
+	debugfs_create_file("active", S_IRUSR, dir, dev,
+			    &mei_dbgfs_active_fops);
+	debugfs_create_file("devstate", S_IRUSR, dir, dev,
+			    &mei_dbgfs_devstate_fops);
+	debugfs_create_file("allow_fixed_address", S_IRUSR | S_IWUSR, dir,
+			    &dev->allow_fixed_address,
+			    &mei_dbgfs_allow_fa_fops);
+}
diff --git a/drivers/misc/mei/dma-ring.c b/drivers/misc/mei/dma-ring.c
new file mode 100644
index 0000000000..ef56f849b2
--- /dev/null
+++ b/drivers/misc/mei/dma-ring.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(c) 2016-2018 Intel Corporation. All rights reserved.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+
+/**
+ * mei_dmam_dscr_alloc() - allocate a managed coherent buffer
+ *     for the dma descriptor
+ * @dev: mei_device
+ * @dscr: dma descriptor
+ *
+ * Return:
+ * * 0       - on success or zero allocation request
+ * * -EINVAL - if size is not power of 2
+ * * -ENOMEM - of allocation has failed
+ */
+static int mei_dmam_dscr_alloc(struct mei_device *dev,
+			       struct mei_dma_dscr *dscr)
+{
+	if (!dscr->size)
+		return 0;
+
+	if (WARN_ON(!is_power_of_2(dscr->size)))
+		return -EINVAL;
+
+	if (dscr->vaddr)
+		return 0;
+
+	dscr->vaddr = dmam_alloc_coherent(dev->dev, dscr->size, &dscr->daddr,
+					  GFP_KERNEL);
+	if (!dscr->vaddr)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * mei_dmam_dscr_free() - free a managed coherent buffer
+ *     from the dma descriptor
+ * @dev: mei_device
+ * @dscr: dma descriptor
+ */
+static void mei_dmam_dscr_free(struct mei_device *dev,
+			       struct mei_dma_dscr *dscr)
+{
+	if (!dscr->vaddr)
+		return;
+
+	dmam_free_coherent(dev->dev, dscr->size, dscr->vaddr, dscr->daddr);
+	dscr->vaddr = NULL;
+}
+
+/**
+ * mei_dmam_ring_free() - free dma ring buffers
+ * @dev: mei device
+ */
+void mei_dmam_ring_free(struct mei_device *dev)
+{
+	int i;
+
+	for (i = 0; i < DMA_DSCR_NUM; i++)
+		mei_dmam_dscr_free(dev, &dev->dr_dscr[i]);
+}
+
+/**
+ * mei_dmam_ring_alloc() - allocate dma ring buffers
+ * @dev: mei device
+ *
+ * Return: -ENOMEM on allocation failure 0 otherwise
+ */
+int mei_dmam_ring_alloc(struct mei_device *dev)
+{
+	int i;
+
+	for (i = 0; i < DMA_DSCR_NUM; i++)
+		if (mei_dmam_dscr_alloc(dev, &dev->dr_dscr[i]))
+			goto err;
+
+	return 0;
+
+err:
+	mei_dmam_ring_free(dev);
+	return -ENOMEM;
+}
+
+/**
+ * mei_dma_ring_is_allocated() - check if dma ring is allocated
+ * @dev: mei device
+ *
+ * Return: true if dma ring is allocated
+ */
+bool mei_dma_ring_is_allocated(struct mei_device *dev)
+{
+	return !!dev->dr_dscr[DMA_DSCR_HOST].vaddr;
+}
+
+static inline
+struct hbm_dma_ring_ctrl *mei_dma_ring_ctrl(struct mei_device *dev)
+{
+	return (struct hbm_dma_ring_ctrl *)dev->dr_dscr[DMA_DSCR_CTRL].vaddr;
+}
+
+/**
+ * mei_dma_ring_reset() - reset the dma control block
+ * @dev: mei device
+ */
+void mei_dma_ring_reset(struct mei_device *dev)
+{
+	struct hbm_dma_ring_ctrl *ctrl = mei_dma_ring_ctrl(dev);
+
+	if (!ctrl)
+		return;
+
+	memset(ctrl, 0, sizeof(*ctrl));
+}
+
+/**
+ * mei_dma_copy_from() - copy from dma ring into buffer
+ * @dev: mei device
+ * @buf: data buffer
+ * @offset: offset in slots.
+ * @n: number of slots to copy.
+ */
+static size_t mei_dma_copy_from(struct mei_device *dev, unsigned char *buf,
+				u32 offset, u32 n)
+{
+	unsigned char *dbuf = dev->dr_dscr[DMA_DSCR_DEVICE].vaddr;
+
+	size_t b_offset = offset << 2;
+	size_t b_n = n << 2;
+
+	memcpy(buf, dbuf + b_offset, b_n);
+
+	return b_n;
+}
+
+/**
+ * mei_dma_copy_to() - copy to a buffer to the dma ring
+ * @dev: mei device
+ * @buf: data buffer
+ * @offset: offset in slots.
+ * @n: number of slots to copy.
+ */
+static size_t mei_dma_copy_to(struct mei_device *dev, unsigned char *buf,
+			      u32 offset, u32 n)
+{
+	unsigned char *hbuf = dev->dr_dscr[DMA_DSCR_HOST].vaddr;
+
+	size_t b_offset = offset << 2;
+	size_t b_n = n << 2;
+
+	memcpy(hbuf + b_offset, buf, b_n);
+
+	return b_n;
+}
+
+/**
+ * mei_dma_ring_read() - read data from the ring
+ * @dev: mei device
+ * @buf: buffer to read into: may be NULL in case of droping the data.
+ * @len: length to read.
+ */
+void mei_dma_ring_read(struct mei_device *dev, unsigned char *buf, u32 len)
+{
+	struct hbm_dma_ring_ctrl *ctrl = mei_dma_ring_ctrl(dev);
+	u32 dbuf_depth;
+	u32 rd_idx, rem, slots;
+
+	if (WARN_ON(!ctrl))
+		return;
+
+	dev_dbg(dev->dev, "reading from dma %u bytes\n", len);
+
+	if (!len)
+		return;
+
+	dbuf_depth = dev->dr_dscr[DMA_DSCR_DEVICE].size >> 2;
+	rd_idx = READ_ONCE(ctrl->dbuf_rd_idx) & (dbuf_depth - 1);
+	slots = mei_data2slots(len);
+
+	/* if buf is NULL we drop the packet by advancing the pointer.*/
+	if (!buf)
+		goto out;
+
+	if (rd_idx + slots > dbuf_depth) {
+		buf += mei_dma_copy_from(dev, buf, rd_idx, dbuf_depth - rd_idx);
+		rem = slots - (dbuf_depth - rd_idx);
+		rd_idx = 0;
+	} else {
+		rem = slots;
+	}
+
+	mei_dma_copy_from(dev, buf, rd_idx, rem);
+out:
+	WRITE_ONCE(ctrl->dbuf_rd_idx, ctrl->dbuf_rd_idx + slots);
+}
+
+static inline u32 mei_dma_ring_hbuf_depth(struct mei_device *dev)
+{
+	return dev->dr_dscr[DMA_DSCR_HOST].size >> 2;
+}
+
+/**
+ * mei_dma_ring_empty_slots() - calaculate number of empty slots in dma ring
+ * @dev: mei_device
+ *
+ * Return: number of empty slots
+ */
+u32 mei_dma_ring_empty_slots(struct mei_device *dev)
+{
+	struct hbm_dma_ring_ctrl *ctrl = mei_dma_ring_ctrl(dev);
+	u32 wr_idx, rd_idx, hbuf_depth, empty;
+
+	if (!mei_dma_ring_is_allocated(dev))
+		return 0;
+
+	if (WARN_ON(!ctrl))
+		return 0;
+
+	/* easier to work in slots */
+	hbuf_depth = mei_dma_ring_hbuf_depth(dev);
+	rd_idx = READ_ONCE(ctrl->hbuf_rd_idx);
+	wr_idx = READ_ONCE(ctrl->hbuf_wr_idx);
+
+	if (rd_idx > wr_idx)
+		empty = rd_idx - wr_idx;
+	else
+		empty = hbuf_depth - (wr_idx - rd_idx);
+
+	return empty;
+}
+
+/**
+ * mei_dma_ring_write - write data to dma ring host buffer
+ *
+ * @dev: mei_device
+ * @buf: data will be written
+ * @len: data length
+ */
+void mei_dma_ring_write(struct mei_device *dev, unsigned char *buf, u32 len)
+{
+	struct hbm_dma_ring_ctrl *ctrl = mei_dma_ring_ctrl(dev);
+	u32 hbuf_depth;
+	u32 wr_idx, rem, slots;
+
+	if (WARN_ON(!ctrl))
+		return;
+
+	dev_dbg(dev->dev, "writing to dma %u bytes\n", len);
+	hbuf_depth = mei_dma_ring_hbuf_depth(dev);
+	wr_idx = READ_ONCE(ctrl->hbuf_wr_idx) & (hbuf_depth - 1);
+	slots = mei_data2slots(len);
+
+	if (wr_idx + slots > hbuf_depth) {
+		buf += mei_dma_copy_to(dev, buf, wr_idx, hbuf_depth - wr_idx);
+		rem = slots - (hbuf_depth - wr_idx);
+		wr_idx = 0;
+	} else {
+		rem = slots;
+	}
+
+	mei_dma_copy_to(dev, buf, wr_idx, rem);
+
+	WRITE_ONCE(ctrl->hbuf_wr_idx, ctrl->hbuf_wr_idx + slots);
+}
diff --git a/drivers/misc/mei/gsc-me.c b/drivers/misc/mei/gsc-me.c
new file mode 100644
index 0000000000..6be8f1cc05
--- /dev/null
+++ b/drivers/misc/mei/gsc-me.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(c) 2019-2022, Intel Corporation. All rights reserved.
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/module.h>
+#include <linux/mei_aux.h>
+#include <linux/device.h>
+#include <linux/irqreturn.h>
+#include <linux/jiffies.h>
+#include <linux/ktime.h>
+#include <linux/delay.h>
+#include <linux/pm_runtime.h>
+#include <linux/kthread.h>
+
+#include "mei_dev.h"
+#include "hw-me.h"
+#include "hw-me-regs.h"
+
+#include "mei-trace.h"
+
+#define MEI_GSC_RPM_TIMEOUT 500
+
+static int mei_gsc_read_hfs(const struct mei_device *dev, int where, u32 *val)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	*val = ioread32(hw->mem_addr + where + 0xC00);
+
+	return 0;
+}
+
+static void mei_gsc_set_ext_op_mem(const struct mei_me_hw *hw, struct resource *mem)
+{
+	u32 low = lower_32_bits(mem->start);
+	u32 hi  = upper_32_bits(mem->start);
+	u32 limit = (resource_size(mem) / SZ_4K) | GSC_EXT_OP_MEM_VALID;
+
+	iowrite32(low, hw->mem_addr + H_GSC_EXT_OP_MEM_BASE_ADDR_LO_REG);
+	iowrite32(hi, hw->mem_addr + H_GSC_EXT_OP_MEM_BASE_ADDR_HI_REG);
+	iowrite32(limit, hw->mem_addr + H_GSC_EXT_OP_MEM_LIMIT_REG);
+}
+
+static int mei_gsc_probe(struct auxiliary_device *aux_dev,
+			 const struct auxiliary_device_id *aux_dev_id)
+{
+	struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev);
+	struct mei_device *dev;
+	struct mei_me_hw *hw;
+	struct device *device;
+	const struct mei_cfg *cfg;
+	int ret;
+
+	cfg = mei_me_get_cfg(aux_dev_id->driver_data);
+	if (!cfg)
+		return -ENODEV;
+
+	device = &aux_dev->dev;
+
+	dev = mei_me_dev_init(device, cfg, adev->slow_firmware);
+	if (!dev) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	hw = to_me_hw(dev);
+	hw->mem_addr = devm_ioremap_resource(device, &adev->bar);
+	if (IS_ERR(hw->mem_addr)) {
+		ret = PTR_ERR(hw->mem_addr);
+		goto err;
+	}
+
+	hw->irq = adev->irq;
+	hw->read_fws = mei_gsc_read_hfs;
+
+	dev_set_drvdata(device, dev);
+
+	if (adev->ext_op_mem.start) {
+		mei_gsc_set_ext_op_mem(hw, &adev->ext_op_mem);
+		dev->pxp_mode = MEI_DEV_PXP_INIT;
+	}
+
+	/* use polling */
+	if (mei_me_hw_use_polling(hw)) {
+		mei_disable_interrupts(dev);
+		mei_clear_interrupts(dev);
+		init_waitqueue_head(&hw->wait_active);
+		hw->is_active = true; /* start in active mode for initialization */
+		hw->polling_thread = kthread_run(mei_me_polling_thread, dev,
+						 "kmegscirqd/%s", dev_name(device));
+		if (IS_ERR(hw->polling_thread)) {
+			ret = PTR_ERR(hw->polling_thread);
+			dev_err(device, "unable to create kernel thread: %d\n", ret);
+			goto err;
+		}
+	} else {
+		ret = devm_request_threaded_irq(device, hw->irq,
+						mei_me_irq_quick_handler,
+						mei_me_irq_thread_handler,
+						IRQF_ONESHOT, KBUILD_MODNAME, dev);
+		if (ret) {
+			dev_err(device, "irq register failed %d\n", ret);
+			goto err;
+		}
+	}
+
+	pm_runtime_get_noresume(device);
+	pm_runtime_set_active(device);
+	pm_runtime_enable(device);
+
+	/* Continue to char device setup in spite of firmware handshake failure.
+	 * In order to provide access to the firmware status registers to the user
+	 * space via sysfs.
+	 */
+	if (mei_start(dev))
+		dev_warn(device, "init hw failure.\n");
+
+	pm_runtime_set_autosuspend_delay(device, MEI_GSC_RPM_TIMEOUT);
+	pm_runtime_use_autosuspend(device);
+
+	ret = mei_register(dev, device);
+	if (ret)
+		goto register_err;
+
+	pm_runtime_put_noidle(device);
+	return 0;
+
+register_err:
+	mei_stop(dev);
+	if (!mei_me_hw_use_polling(hw))
+		devm_free_irq(device, hw->irq, dev);
+
+err:
+	dev_err(device, "probe failed: %d\n", ret);
+	dev_set_drvdata(device, NULL);
+	return ret;
+}
+
+static void mei_gsc_remove(struct auxiliary_device *aux_dev)
+{
+	struct mei_device *dev;
+	struct mei_me_hw *hw;
+
+	dev = dev_get_drvdata(&aux_dev->dev);
+	if (!dev)
+		return;
+
+	hw = to_me_hw(dev);
+
+	mei_stop(dev);
+
+	hw = to_me_hw(dev);
+	if (mei_me_hw_use_polling(hw))
+		kthread_stop(hw->polling_thread);
+
+	mei_deregister(dev);
+
+	pm_runtime_disable(&aux_dev->dev);
+
+	mei_disable_interrupts(dev);
+	if (!mei_me_hw_use_polling(hw))
+		devm_free_irq(&aux_dev->dev, hw->irq, dev);
+}
+
+static int __maybe_unused mei_gsc_pm_suspend(struct device *device)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+
+	if (!dev)
+		return -ENODEV;
+
+	mei_stop(dev);
+
+	mei_disable_interrupts(dev);
+
+	return 0;
+}
+
+static int __maybe_unused mei_gsc_pm_resume(struct device *device)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	struct auxiliary_device *aux_dev;
+	struct mei_aux_device *adev;
+	int err;
+	struct mei_me_hw *hw;
+
+	if (!dev)
+		return -ENODEV;
+
+	hw = to_me_hw(dev);
+	aux_dev = to_auxiliary_dev(device);
+	adev = auxiliary_dev_to_mei_aux_dev(aux_dev);
+	if (adev->ext_op_mem.start) {
+		mei_gsc_set_ext_op_mem(hw, &adev->ext_op_mem);
+		dev->pxp_mode = MEI_DEV_PXP_INIT;
+	}
+
+	err = mei_restart(dev);
+	if (err)
+		return err;
+
+	/* Start timer if stopped in suspend */
+	schedule_delayed_work(&dev->timer_work, HZ);
+
+	return 0;
+}
+
+static int __maybe_unused mei_gsc_pm_runtime_idle(struct device *device)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+
+	if (!dev)
+		return -ENODEV;
+	if (mei_write_is_idle(dev))
+		pm_runtime_autosuspend(device);
+
+	return -EBUSY;
+}
+
+static int  __maybe_unused mei_gsc_pm_runtime_suspend(struct device *device)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	struct mei_me_hw *hw;
+	int ret;
+
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	if (mei_write_is_idle(dev)) {
+		hw = to_me_hw(dev);
+		hw->pg_state = MEI_PG_ON;
+
+		if (mei_me_hw_use_polling(hw))
+			hw->is_active = false;
+		ret = 0;
+	} else {
+		ret = -EAGAIN;
+	}
+
+	mutex_unlock(&dev->device_lock);
+
+	return ret;
+}
+
+static int __maybe_unused mei_gsc_pm_runtime_resume(struct device *device)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	struct mei_me_hw *hw;
+	irqreturn_t irq_ret;
+
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	hw = to_me_hw(dev);
+	hw->pg_state = MEI_PG_OFF;
+
+	if (mei_me_hw_use_polling(hw)) {
+		hw->is_active = true;
+		wake_up(&hw->wait_active);
+	}
+
+	mutex_unlock(&dev->device_lock);
+
+	irq_ret = mei_me_irq_thread_handler(1, dev);
+	if (irq_ret != IRQ_HANDLED)
+		dev_err(dev->dev, "thread handler fail %d\n", irq_ret);
+
+	return 0;
+}
+
+static const struct dev_pm_ops mei_gsc_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mei_gsc_pm_suspend,
+				mei_gsc_pm_resume)
+	SET_RUNTIME_PM_OPS(mei_gsc_pm_runtime_suspend,
+			   mei_gsc_pm_runtime_resume,
+			   mei_gsc_pm_runtime_idle)
+};
+
+static const struct auxiliary_device_id mei_gsc_id_table[] = {
+	{
+		.name = "i915.mei-gsc",
+		.driver_data = MEI_ME_GSC_CFG,
+
+	},
+	{
+		.name = "i915.mei-gscfi",
+		.driver_data = MEI_ME_GSCFI_CFG,
+	},
+	{
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(auxiliary, mei_gsc_id_table);
+
+static struct auxiliary_driver mei_gsc_driver = {
+	.probe	= mei_gsc_probe,
+	.remove = mei_gsc_remove,
+	.driver = {
+		/* auxiliary_driver_register() sets .name to be the modname */
+		.pm = &mei_gsc_pm_ops,
+	},
+	.id_table = mei_gsc_id_table
+};
+module_auxiliary_driver(mei_gsc_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_ALIAS("auxiliary:i915.mei-gsc");
+MODULE_ALIAS("auxiliary:i915.mei-gscfi");
+MODULE_DESCRIPTION("Intel(R) Graphics System Controller");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/mei/gsc_proxy/Kconfig b/drivers/misc/mei/gsc_proxy/Kconfig
new file mode 100644
index 0000000000..5f68d9f3d6
--- /dev/null
+++ b/drivers/misc/mei/gsc_proxy/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2022-2023, Intel Corporation. All rights reserved.
+#
+config INTEL_MEI_GSC_PROXY
+	tristate "Intel GSC Proxy services of ME Interface"
+	select INTEL_MEI_ME
+	depends on DRM_I915
+	help
+         MEI Support for GSC Proxy Services on Intel platforms.
+
+         MEI GSC proxy enables messaging between GSC service on
+         Intel graphics card and services on CSE (MEI) firmware
+         residing SoC or PCH.
+
diff --git a/drivers/misc/mei/gsc_proxy/Makefile b/drivers/misc/mei/gsc_proxy/Makefile
new file mode 100644
index 0000000000..358847e9aa
--- /dev/null
+++ b/drivers/misc/mei/gsc_proxy/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2022-2023, Intel Corporation. All rights reserved.
+#
+# Makefile - GSC Proxy client driver for Intel MEI Bus Driver.
+
+obj-$(CONFIG_INTEL_MEI_GSC_PROXY) += mei_gsc_proxy.o
diff --git a/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c b/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c
new file mode 100644
index 0000000000..be52b113ae
--- /dev/null
+++ b/drivers/misc/mei/gsc_proxy/mei_gsc_proxy.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022-2023 Intel Corporation
+ */
+
+/**
+ * DOC: MEI_GSC_PROXY Client Driver
+ *
+ * The mei_gsc_proxy driver acts as a translation layer between
+ * proxy user (I915) and ME FW by proxying messages to ME FW
+ */
+
+#include <linux/component.h>
+#include <linux/mei_cl_bus.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <drm/drm_connector.h>
+#include <drm/i915_component.h>
+#include <drm/i915_gsc_proxy_mei_interface.h>
+
+/**
+ * mei_gsc_proxy_send - Sends a proxy message to ME FW.
+ * @dev: device corresponding to the mei_cl_device
+ * @buf: a message buffer to send
+ * @size: size of the message
+ * Return: bytes sent on Success, <0 on Failure
+ */
+static int mei_gsc_proxy_send(struct device *dev, const void *buf, size_t size)
+{
+	ssize_t ret;
+
+	if (!dev || !buf)
+		return -EINVAL;
+
+	ret = mei_cldev_send(to_mei_cl_device(dev), buf, size);
+	if (ret < 0)
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", ret);
+
+	return ret;
+}
+
+/**
+ * mei_gsc_proxy_recv - Receives a proxy message from ME FW.
+ * @dev: device corresponding to the mei_cl_device
+ * @buf: a message buffer to contain the received message
+ * @size: size of the buffer
+ * Return: bytes received on Success, <0 on Failure
+ */
+static int mei_gsc_proxy_recv(struct device *dev, void *buf, size_t size)
+{
+	ssize_t ret;
+
+	if (!dev || !buf)
+		return -EINVAL;
+
+	ret = mei_cldev_recv(to_mei_cl_device(dev), buf, size);
+	if (ret < 0)
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", ret);
+
+	return ret;
+}
+
+static const struct i915_gsc_proxy_component_ops mei_gsc_proxy_ops = {
+	.owner = THIS_MODULE,
+	.send = mei_gsc_proxy_send,
+	.recv = mei_gsc_proxy_recv,
+};
+
+static int mei_component_master_bind(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct i915_gsc_proxy_component *comp_master = mei_cldev_get_drvdata(cldev);
+
+	comp_master->ops = &mei_gsc_proxy_ops;
+	comp_master->mei_dev = dev;
+	return component_bind_all(dev, comp_master);
+}
+
+static void mei_component_master_unbind(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct i915_gsc_proxy_component *comp_master = mei_cldev_get_drvdata(cldev);
+
+	component_unbind_all(dev, comp_master);
+}
+
+static const struct component_master_ops mei_component_master_ops = {
+	.bind = mei_component_master_bind,
+	.unbind = mei_component_master_unbind,
+};
+
+/**
+ * mei_gsc_proxy_component_match - compare function for matching mei.
+ *
+ *    The function checks if the device is pci device and
+ *    Intel VGA adapter, the subcomponent is SW Proxy
+ *    and the parent of MEI PCI and the parent of VGA are the same PCH device.
+ *
+ * @dev: master device
+ * @subcomponent: subcomponent to match (I915_COMPONENT_SWPROXY)
+ * @data: compare data (mei pci parent)
+ *
+ * Return:
+ * * 1 - if components match
+ * * 0 - otherwise
+ */
+static int mei_gsc_proxy_component_match(struct device *dev, int subcomponent,
+					 void *data)
+{
+	struct pci_dev *pdev;
+
+	if (!dev_is_pci(dev))
+		return 0;
+
+	pdev = to_pci_dev(dev);
+
+	if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8) ||
+	    pdev->vendor != PCI_VENDOR_ID_INTEL)
+		return 0;
+
+	if (subcomponent != I915_COMPONENT_GSC_PROXY)
+		return 0;
+
+	return component_compare_dev(dev->parent, ((struct device *)data)->parent);
+}
+
+static int mei_gsc_proxy_probe(struct mei_cl_device *cldev,
+			       const struct mei_cl_device_id *id)
+{
+	struct i915_gsc_proxy_component *comp_master;
+	struct component_match *master_match = NULL;
+	int ret;
+
+	ret = mei_cldev_enable(cldev);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "mei_cldev_enable Failed. %d\n", ret);
+		goto enable_err_exit;
+	}
+
+	comp_master = kzalloc(sizeof(*comp_master), GFP_KERNEL);
+	if (!comp_master) {
+		ret = -ENOMEM;
+		goto err_exit;
+	}
+
+	component_match_add_typed(&cldev->dev, &master_match,
+				  mei_gsc_proxy_component_match, cldev->dev.parent);
+	if (IS_ERR_OR_NULL(master_match)) {
+		ret = -ENOMEM;
+		goto err_exit;
+	}
+
+	mei_cldev_set_drvdata(cldev, comp_master);
+	ret = component_master_add_with_match(&cldev->dev,
+					      &mei_component_master_ops,
+					      master_match);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "Master comp add failed %d\n", ret);
+		goto err_exit;
+	}
+
+	return 0;
+
+err_exit:
+	mei_cldev_set_drvdata(cldev, NULL);
+	kfree(comp_master);
+	mei_cldev_disable(cldev);
+enable_err_exit:
+	return ret;
+}
+
+static void mei_gsc_proxy_remove(struct mei_cl_device *cldev)
+{
+	struct i915_gsc_proxy_component *comp_master = mei_cldev_get_drvdata(cldev);
+	int ret;
+
+	component_master_del(&cldev->dev, &mei_component_master_ops);
+	kfree(comp_master);
+	mei_cldev_set_drvdata(cldev, NULL);
+
+	ret = mei_cldev_disable(cldev);
+	if (ret)
+		dev_warn(&cldev->dev, "mei_cldev_disable() failed %d\n", ret);
+}
+
+#define MEI_UUID_GSC_PROXY UUID_LE(0xf73db04, 0x97ab, 0x4125, \
+				   0xb8, 0x93, 0xe9, 0x4, 0xad, 0xd, 0x54, 0x64)
+
+static struct mei_cl_device_id mei_gsc_proxy_tbl[] = {
+	{ .uuid = MEI_UUID_GSC_PROXY, .version = MEI_CL_VERSION_ANY },
+	{ }
+};
+MODULE_DEVICE_TABLE(mei, mei_gsc_proxy_tbl);
+
+static struct mei_cl_driver mei_gsc_proxy_driver = {
+	.id_table = mei_gsc_proxy_tbl,
+	.name = KBUILD_MODNAME,
+	.probe = mei_gsc_proxy_probe,
+	.remove	= mei_gsc_proxy_remove,
+};
+
+module_mei_cl_driver(mei_gsc_proxy_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MEI GSC PROXY");
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
new file mode 100644
index 0000000000..12a62a911e
--- /dev/null
+++ b/drivers/misc/mei/hbm.c
@@ -0,0 +1,1619 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2003-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+static const char *mei_hbm_status_str(enum mei_hbm_status status)
+{
+#define MEI_HBM_STATUS(status) case MEI_HBMS_##status: return #status
+	switch (status) {
+	MEI_HBM_STATUS(SUCCESS);
+	MEI_HBM_STATUS(CLIENT_NOT_FOUND);
+	MEI_HBM_STATUS(ALREADY_EXISTS);
+	MEI_HBM_STATUS(REJECTED);
+	MEI_HBM_STATUS(INVALID_PARAMETER);
+	MEI_HBM_STATUS(NOT_ALLOWED);
+	MEI_HBM_STATUS(ALREADY_STARTED);
+	MEI_HBM_STATUS(NOT_STARTED);
+	default: return "unknown";
+	}
+#undef MEI_HBM_STATUS
+};
+
+static const char *mei_cl_conn_status_str(enum mei_cl_connect_status status)
+{
+#define MEI_CL_CS(status) case MEI_CL_CONN_##status: return #status
+	switch (status) {
+	MEI_CL_CS(SUCCESS);
+	MEI_CL_CS(NOT_FOUND);
+	MEI_CL_CS(ALREADY_STARTED);
+	MEI_CL_CS(OUT_OF_RESOURCES);
+	MEI_CL_CS(MESSAGE_SMALL);
+	MEI_CL_CS(NOT_ALLOWED);
+	default: return "unknown";
+	}
+#undef MEI_CL_CCS
+}
+
+const char *mei_hbm_state_str(enum mei_hbm_state state)
+{
+#define MEI_HBM_STATE(state) case MEI_HBM_##state: return #state
+	switch (state) {
+	MEI_HBM_STATE(IDLE);
+	MEI_HBM_STATE(STARTING);
+	MEI_HBM_STATE(STARTED);
+	MEI_HBM_STATE(DR_SETUP);
+	MEI_HBM_STATE(ENUM_CLIENTS);
+	MEI_HBM_STATE(CLIENT_PROPERTIES);
+	MEI_HBM_STATE(STOPPED);
+	default:
+		return "unknown";
+	}
+#undef MEI_HBM_STATE
+}
+
+/**
+ * mei_cl_conn_status_to_errno - convert client connect response
+ * status to error code
+ *
+ * @status: client connect response status
+ *
+ * Return: corresponding error code
+ */
+static int mei_cl_conn_status_to_errno(enum mei_cl_connect_status status)
+{
+	switch (status) {
+	case MEI_CL_CONN_SUCCESS:          return 0;
+	case MEI_CL_CONN_NOT_FOUND:        return -ENOTTY;
+	case MEI_CL_CONN_ALREADY_STARTED:  return -EBUSY;
+	case MEI_CL_CONN_OUT_OF_RESOURCES: return -EBUSY;
+	case MEI_CL_CONN_MESSAGE_SMALL:    return -EINVAL;
+	case MEI_CL_CONN_NOT_ALLOWED:      return -EBUSY;
+	default:                           return -EINVAL;
+	}
+}
+
+/**
+ * mei_hbm_write_message - wrapper for sending hbm messages.
+ *
+ * @dev: mei device
+ * @hdr: mei header
+ * @data: payload
+ */
+static inline int mei_hbm_write_message(struct mei_device *dev,
+					struct mei_msg_hdr *hdr,
+					const void *data)
+{
+	return mei_write_message(dev, hdr, sizeof(*hdr), data, hdr->length);
+}
+
+/**
+ * mei_hbm_idle - set hbm to idle state
+ *
+ * @dev: the device structure
+ */
+void mei_hbm_idle(struct mei_device *dev)
+{
+	dev->init_clients_timer = 0;
+	dev->hbm_state = MEI_HBM_IDLE;
+}
+
+/**
+ * mei_hbm_reset - reset hbm counters and book keeping data structurs
+ *
+ * @dev: the device structure
+ */
+void mei_hbm_reset(struct mei_device *dev)
+{
+	mei_me_cl_rm_all(dev);
+
+	mei_hbm_idle(dev);
+}
+
+/**
+ * mei_hbm_hdr - construct hbm header
+ *
+ * @mei_hdr: hbm header
+ * @length: payload length
+ */
+
+static inline void mei_hbm_hdr(struct mei_msg_hdr *mei_hdr, size_t length)
+{
+	memset(mei_hdr, 0, sizeof(*mei_hdr));
+	mei_hdr->length = length;
+	mei_hdr->msg_complete = 1;
+}
+
+/**
+ * mei_hbm_cl_hdr - construct client hbm header
+ *
+ * @cl: client
+ * @hbm_cmd: host bus message command
+ * @buf: buffer for cl header
+ * @len: buffer length
+ */
+static inline
+void mei_hbm_cl_hdr(struct mei_cl *cl, u8 hbm_cmd, void *buf, size_t len)
+{
+	struct mei_hbm_cl_cmd *cmd = buf;
+
+	memset(cmd, 0, len);
+
+	cmd->hbm_cmd = hbm_cmd;
+	cmd->host_addr = mei_cl_host_addr(cl);
+	cmd->me_addr = mei_cl_me_id(cl);
+}
+
+/**
+ * mei_hbm_cl_write - write simple hbm client message
+ *
+ * @dev: the device structure
+ * @cl: client
+ * @hbm_cmd: host bus message command
+ * @buf: message buffer
+ * @len: buffer length
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static inline int mei_hbm_cl_write(struct mei_device *dev, struct mei_cl *cl,
+				   u8 hbm_cmd, void *buf, size_t len)
+{
+	struct mei_msg_hdr mei_hdr;
+
+	mei_hbm_hdr(&mei_hdr, len);
+	mei_hbm_cl_hdr(cl, hbm_cmd, buf, len);
+
+	return mei_hbm_write_message(dev, &mei_hdr, buf);
+}
+
+/**
+ * mei_hbm_cl_addr_equal - check if the client's and
+ *	the message address match
+ *
+ * @cl: client
+ * @cmd: hbm client message
+ *
+ * Return: true if addresses are the same
+ */
+static inline
+bool mei_hbm_cl_addr_equal(struct mei_cl *cl, struct mei_hbm_cl_cmd *cmd)
+{
+	return  mei_cl_host_addr(cl) == cmd->host_addr &&
+		mei_cl_me_id(cl) == cmd->me_addr;
+}
+
+/**
+ * mei_hbm_cl_find_by_cmd - find recipient client
+ *
+ * @dev: the device structure
+ * @buf: a buffer with hbm cl command
+ *
+ * Return: the recipient client or NULL if not found
+ */
+static inline
+struct mei_cl *mei_hbm_cl_find_by_cmd(struct mei_device *dev, void *buf)
+{
+	struct mei_hbm_cl_cmd *cmd = (struct mei_hbm_cl_cmd *)buf;
+	struct mei_cl *cl;
+
+	list_for_each_entry(cl, &dev->file_list, link)
+		if (mei_hbm_cl_addr_equal(cl, cmd))
+			return cl;
+	return NULL;
+}
+
+
+/**
+ * mei_hbm_start_wait - wait for start response message.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+int mei_hbm_start_wait(struct mei_device *dev)
+{
+	int ret;
+
+	if (dev->hbm_state > MEI_HBM_STARTING)
+		return 0;
+
+	mutex_unlock(&dev->device_lock);
+	ret = wait_event_timeout(dev->wait_hbm_start,
+			dev->hbm_state != MEI_HBM_STARTING,
+			dev->timeouts.hbm);
+	mutex_lock(&dev->device_lock);
+
+	if (ret == 0 && (dev->hbm_state <= MEI_HBM_STARTING)) {
+		dev->hbm_state = MEI_HBM_IDLE;
+		dev_err(dev->dev, "waiting for mei start failed\n");
+		return -ETIME;
+	}
+	return 0;
+}
+
+/**
+ * mei_hbm_start_req - sends start request message.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+int mei_hbm_start_req(struct mei_device *dev)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_host_version_request req;
+	int ret;
+
+	mei_hbm_reset(dev);
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	/* host start message */
+	memset(&req, 0, sizeof(req));
+	req.hbm_cmd = HOST_START_REQ_CMD;
+	req.host_version.major_version = HBM_MAJOR_VERSION;
+	req.host_version.minor_version = HBM_MINOR_VERSION;
+
+	dev->hbm_state = MEI_HBM_IDLE;
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret) {
+		dev_err(dev->dev, "version message write failed: ret = %d\n",
+			ret);
+		return ret;
+	}
+
+	dev->hbm_state = MEI_HBM_STARTING;
+	dev->init_clients_timer = dev->timeouts.client_init;
+	mei_schedule_stall_timer(dev);
+	return 0;
+}
+
+/**
+ * mei_hbm_dma_setup_req() - setup DMA request
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_dma_setup_req(struct mei_device *dev)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_dma_setup_request req;
+	unsigned int i;
+	int ret;
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	memset(&req, 0, sizeof(req));
+	req.hbm_cmd = MEI_HBM_DMA_SETUP_REQ_CMD;
+	for (i = 0; i < DMA_DSCR_NUM; i++) {
+		phys_addr_t paddr;
+
+		paddr = dev->dr_dscr[i].daddr;
+		req.dma_dscr[i].addr_hi = upper_32_bits(paddr);
+		req.dma_dscr[i].addr_lo = lower_32_bits(paddr);
+		req.dma_dscr[i].size = dev->dr_dscr[i].size;
+	}
+
+	mei_dma_ring_reset(dev);
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret) {
+		dev_err(dev->dev, "dma setup request write failed: ret = %d.\n",
+			ret);
+		return ret;
+	}
+
+	dev->hbm_state = MEI_HBM_DR_SETUP;
+	dev->init_clients_timer = dev->timeouts.client_init;
+	mei_schedule_stall_timer(dev);
+	return 0;
+}
+
+/**
+ * mei_hbm_capabilities_req - request capabilities
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_capabilities_req(struct mei_device *dev)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_capability_request req;
+	int ret;
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	memset(&req, 0, sizeof(req));
+	req.hbm_cmd = MEI_HBM_CAPABILITIES_REQ_CMD;
+	if (dev->hbm_f_vt_supported)
+		req.capability_requested[0] |= HBM_CAP_VT;
+
+	if (dev->hbm_f_cd_supported)
+		req.capability_requested[0] |= HBM_CAP_CD;
+
+	if (dev->hbm_f_gsc_supported)
+		req.capability_requested[0] |= HBM_CAP_GSC;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret) {
+		dev_err(dev->dev,
+			"capabilities request write failed: ret = %d.\n", ret);
+		return ret;
+	}
+
+	dev->hbm_state = MEI_HBM_CAP_SETUP;
+	dev->init_clients_timer = dev->timeouts.client_init;
+	mei_schedule_stall_timer(dev);
+	return 0;
+}
+
+/**
+ * mei_hbm_enum_clients_req - sends enumeration client request message.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_enum_clients_req(struct mei_device *dev)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_host_enum_request req;
+	int ret;
+
+	/* enumerate clients */
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	memset(&req, 0, sizeof(req));
+	req.hbm_cmd = HOST_ENUM_REQ_CMD;
+	req.flags |= dev->hbm_f_dc_supported ? MEI_HBM_ENUM_F_ALLOW_ADD : 0;
+	req.flags |= dev->hbm_f_ie_supported ?
+			  MEI_HBM_ENUM_F_IMMEDIATE_ENUM : 0;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret) {
+		dev_err(dev->dev, "enumeration request write failed: ret = %d.\n",
+			ret);
+		return ret;
+	}
+	dev->hbm_state = MEI_HBM_ENUM_CLIENTS;
+	dev->init_clients_timer = dev->timeouts.client_init;
+	mei_schedule_stall_timer(dev);
+	return 0;
+}
+
+/**
+ * mei_hbm_me_cl_add - add new me client to the list
+ *
+ * @dev: the device structure
+ * @res: hbm property response
+ *
+ * Return: 0 on success and -ENOMEM on allocation failure
+ */
+
+static int mei_hbm_me_cl_add(struct mei_device *dev,
+			     struct hbm_props_response *res)
+{
+	struct mei_me_client *me_cl;
+	const uuid_le *uuid = &res->client_properties.protocol_name;
+
+	mei_me_cl_rm_by_uuid(dev, uuid);
+
+	me_cl = kzalloc(sizeof(*me_cl), GFP_KERNEL);
+	if (!me_cl)
+		return -ENOMEM;
+
+	mei_me_cl_init(me_cl);
+
+	me_cl->props = res->client_properties;
+	me_cl->client_id = res->me_addr;
+	me_cl->tx_flow_ctrl_creds = 0;
+
+	mei_me_cl_add(dev, me_cl);
+
+	return 0;
+}
+
+/**
+ * mei_hbm_add_cl_resp - send response to fw on client add request
+ *
+ * @dev: the device structure
+ * @addr: me address
+ * @status: response status
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_add_cl_resp(struct mei_device *dev, u8 addr, u8 status)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_add_client_response resp;
+	int ret;
+
+	dev_dbg(dev->dev, "adding client response\n");
+
+	mei_hbm_hdr(&mei_hdr, sizeof(resp));
+
+	memset(&resp, 0, sizeof(resp));
+	resp.hbm_cmd = MEI_HBM_ADD_CLIENT_RES_CMD;
+	resp.me_addr = addr;
+	resp.status  = status;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &resp);
+	if (ret)
+		dev_err(dev->dev, "add client response write failed: ret = %d\n",
+			ret);
+	return ret;
+}
+
+/**
+ * mei_hbm_fw_add_cl_req - request from the fw to add a client
+ *
+ * @dev: the device structure
+ * @req: add client request
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_fw_add_cl_req(struct mei_device *dev,
+			      struct hbm_add_client_request *req)
+{
+	int ret;
+	u8 status = MEI_HBMS_SUCCESS;
+
+	BUILD_BUG_ON(sizeof(struct hbm_add_client_request) !=
+			sizeof(struct hbm_props_response));
+
+	ret = mei_hbm_me_cl_add(dev, (struct hbm_props_response *)req);
+	if (ret)
+		status = !MEI_HBMS_SUCCESS;
+
+	if (dev->dev_state == MEI_DEV_ENABLED)
+		schedule_work(&dev->bus_rescan_work);
+
+	return mei_hbm_add_cl_resp(dev, req->me_addr, status);
+}
+
+/**
+ * mei_hbm_cl_notify_req - send notification request
+ *
+ * @dev: the device structure
+ * @cl: a client to disconnect from
+ * @start: true for start false for stop
+ *
+ * Return: 0 on success and -EIO on write failure
+ */
+int mei_hbm_cl_notify_req(struct mei_device *dev,
+			  struct mei_cl *cl, u8 start)
+{
+
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_notification_request req;
+	int ret;
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+	mei_hbm_cl_hdr(cl, MEI_HBM_NOTIFY_REQ_CMD, &req, sizeof(req));
+
+	req.start = start;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret)
+		dev_err(dev->dev, "notify request failed: ret = %d\n", ret);
+
+	return ret;
+}
+
+/**
+ *  notify_res_to_fop - convert notification response to the proper
+ *      notification FOP
+ *
+ * @cmd: client notification start response command
+ *
+ * Return:  MEI_FOP_NOTIFY_START or MEI_FOP_NOTIFY_STOP;
+ */
+static inline enum mei_cb_file_ops notify_res_to_fop(struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_notification_response *rs =
+		(struct hbm_notification_response *)cmd;
+
+	return mei_cl_notify_req2fop(rs->start);
+}
+
+/**
+ * mei_hbm_cl_notify_start_res - update the client state according
+ *       notify start response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: client notification start response command
+ */
+static void mei_hbm_cl_notify_start_res(struct mei_device *dev,
+					struct mei_cl *cl,
+					struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_notification_response *rs =
+		(struct hbm_notification_response *)cmd;
+
+	cl_dbg(dev, cl, "hbm: notify start response status=%d\n", rs->status);
+
+	if (rs->status == MEI_HBMS_SUCCESS ||
+	    rs->status == MEI_HBMS_ALREADY_STARTED) {
+		cl->notify_en = true;
+		cl->status = 0;
+	} else {
+		cl->status = -EINVAL;
+	}
+}
+
+/**
+ * mei_hbm_cl_notify_stop_res - update the client state according
+ *       notify stop response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: client notification stop response command
+ */
+static void mei_hbm_cl_notify_stop_res(struct mei_device *dev,
+				       struct mei_cl *cl,
+				       struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_notification_response *rs =
+		(struct hbm_notification_response *)cmd;
+
+	cl_dbg(dev, cl, "hbm: notify stop response status=%d\n", rs->status);
+
+	if (rs->status == MEI_HBMS_SUCCESS ||
+	    rs->status == MEI_HBMS_NOT_STARTED) {
+		cl->notify_en = false;
+		cl->status = 0;
+	} else {
+		/* TODO: spec is not clear yet about other possible issues */
+		cl->status = -EINVAL;
+	}
+}
+
+/**
+ * mei_hbm_cl_notify - signal notification event
+ *
+ * @dev: the device structure
+ * @cmd: notification client message
+ */
+static void mei_hbm_cl_notify(struct mei_device *dev,
+			      struct mei_hbm_cl_cmd *cmd)
+{
+	struct mei_cl *cl;
+
+	cl = mei_hbm_cl_find_by_cmd(dev, cmd);
+	if (cl)
+		mei_cl_notify(cl);
+}
+
+/**
+ * mei_hbm_cl_dma_map_req - send client dma map request
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ *
+ * Return: 0 on success and -EIO on write failure
+ */
+int mei_hbm_cl_dma_map_req(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_client_dma_map_request req;
+	int ret;
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	memset(&req, 0, sizeof(req));
+
+	req.hbm_cmd = MEI_HBM_CLIENT_DMA_MAP_REQ_CMD;
+	req.client_buffer_id = cl->dma.buffer_id;
+	req.address_lsb = lower_32_bits(cl->dma.daddr);
+	req.address_msb = upper_32_bits(cl->dma.daddr);
+	req.size = cl->dma.size;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret)
+		dev_err(dev->dev, "dma map request failed: ret = %d\n", ret);
+
+	return ret;
+}
+
+/**
+ * mei_hbm_cl_dma_unmap_req - send client dma unmap request
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ *
+ * Return: 0 on success and -EIO on write failure
+ */
+int mei_hbm_cl_dma_unmap_req(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_client_dma_unmap_request req;
+	int ret;
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	memset(&req, 0, sizeof(req));
+
+	req.hbm_cmd = MEI_HBM_CLIENT_DMA_UNMAP_REQ_CMD;
+	req.client_buffer_id = cl->dma.buffer_id;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret)
+		dev_err(dev->dev, "dma unmap request failed: ret = %d\n", ret);
+
+	return ret;
+}
+
+static void mei_hbm_cl_dma_map_res(struct mei_device *dev,
+				   struct hbm_client_dma_response *res)
+{
+	struct mei_cl *cl;
+	struct mei_cl_cb *cb, *next;
+
+	cl = NULL;
+	list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list, list) {
+		if (cb->fop_type != MEI_FOP_DMA_MAP)
+			continue;
+		if (!cb->cl->dma.buffer_id || cb->cl->dma_mapped)
+			continue;
+
+		cl = cb->cl;
+		break;
+	}
+	if (!cl)
+		return;
+
+	if (res->status) {
+		dev_err(dev->dev, "cl dma map failed %d\n", res->status);
+		cl->status = -EFAULT;
+	} else {
+		dev_dbg(dev->dev, "cl dma map succeeded\n");
+		cl->dma_mapped = 1;
+		cl->status = 0;
+	}
+	wake_up(&cl->wait);
+}
+
+static void mei_hbm_cl_dma_unmap_res(struct mei_device *dev,
+				     struct hbm_client_dma_response *res)
+{
+	struct mei_cl *cl;
+	struct mei_cl_cb *cb, *next;
+
+	cl = NULL;
+	list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list, list) {
+		if (cb->fop_type != MEI_FOP_DMA_UNMAP)
+			continue;
+		if (!cb->cl->dma.buffer_id || !cb->cl->dma_mapped)
+			continue;
+
+		cl = cb->cl;
+		break;
+	}
+	if (!cl)
+		return;
+
+	if (res->status) {
+		dev_err(dev->dev, "cl dma unmap failed %d\n", res->status);
+		cl->status = -EFAULT;
+	} else {
+		dev_dbg(dev->dev, "cl dma unmap succeeded\n");
+		cl->dma_mapped = 0;
+		cl->status = 0;
+	}
+	wake_up(&cl->wait);
+}
+
+/**
+ * mei_hbm_prop_req - request property for a single client
+ *
+ * @dev: the device structure
+ * @start_idx: client index to start search
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_prop_req(struct mei_device *dev, unsigned long start_idx)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_props_request req;
+	unsigned long addr;
+	int ret;
+
+	addr = find_next_bit(dev->me_clients_map, MEI_CLIENTS_MAX, start_idx);
+
+	/* We got all client properties */
+	if (addr == MEI_CLIENTS_MAX) {
+		dev->hbm_state = MEI_HBM_STARTED;
+		mei_host_client_init(dev);
+		return 0;
+	}
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	memset(&req, 0, sizeof(req));
+
+	req.hbm_cmd = HOST_CLIENT_PROPERTIES_REQ_CMD;
+	req.me_addr = addr;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret) {
+		dev_err(dev->dev, "properties request write failed: ret = %d\n",
+			ret);
+		return ret;
+	}
+
+	dev->init_clients_timer = dev->timeouts.client_init;
+	mei_schedule_stall_timer(dev);
+
+	return 0;
+}
+
+/**
+ * mei_hbm_pg - sends pg command
+ *
+ * @dev: the device structure
+ * @pg_cmd: the pg command code
+ *
+ * Return: -EIO on write failure
+ *         -EOPNOTSUPP if the operation is not supported by the protocol
+ */
+int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_power_gate req;
+	int ret;
+
+	if (!dev->hbm_f_pg_supported)
+		return -EOPNOTSUPP;
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	memset(&req, 0, sizeof(req));
+	req.hbm_cmd = pg_cmd;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret)
+		dev_err(dev->dev, "power gate command write failed.\n");
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mei_hbm_pg);
+
+/**
+ * mei_hbm_stop_req - send stop request message
+ *
+ * @dev: mei device
+ *
+ * Return: -EIO on write failure
+ */
+static int mei_hbm_stop_req(struct mei_device *dev)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_host_stop_request req;
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	memset(&req, 0, sizeof(req));
+	req.hbm_cmd = HOST_STOP_REQ_CMD;
+	req.reason = DRIVER_STOP_REQUEST;
+
+	return mei_hbm_write_message(dev, &mei_hdr, &req);
+}
+
+/**
+ * mei_hbm_cl_flow_control_req - sends flow control request.
+ *
+ * @dev: the device structure
+ * @cl: client info
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct hbm_flow_control req;
+
+	cl_dbg(dev, cl, "sending flow control\n");
+	return mei_hbm_cl_write(dev, cl, MEI_FLOW_CONTROL_CMD,
+				&req, sizeof(req));
+}
+
+/**
+ * mei_hbm_add_single_tx_flow_ctrl_creds - adds single buffer credentials.
+ *
+ * @dev: the device structure
+ * @fctrl: flow control response bus message
+ *
+ * Return: 0 on success, < 0 otherwise
+ */
+static int mei_hbm_add_single_tx_flow_ctrl_creds(struct mei_device *dev,
+						 struct hbm_flow_control *fctrl)
+{
+	struct mei_me_client *me_cl;
+	int rets;
+
+	me_cl = mei_me_cl_by_id(dev, fctrl->me_addr);
+	if (!me_cl) {
+		dev_err(dev->dev, "no such me client %d\n", fctrl->me_addr);
+		return -ENOENT;
+	}
+
+	if (WARN_ON(me_cl->props.single_recv_buf == 0)) {
+		rets = -EINVAL;
+		goto out;
+	}
+
+	me_cl->tx_flow_ctrl_creds++;
+	dev_dbg(dev->dev, "recv flow ctrl msg ME %d (single) creds = %d.\n",
+		fctrl->me_addr, me_cl->tx_flow_ctrl_creds);
+
+	rets = 0;
+out:
+	mei_me_cl_put(me_cl);
+	return rets;
+}
+
+/**
+ * mei_hbm_cl_tx_flow_ctrl_creds_res - flow control response from me
+ *
+ * @dev: the device structure
+ * @fctrl: flow control response bus message
+ */
+static void mei_hbm_cl_tx_flow_ctrl_creds_res(struct mei_device *dev,
+					       struct hbm_flow_control *fctrl)
+{
+	struct mei_cl *cl;
+
+	if (!fctrl->host_addr) {
+		/* single receive buffer */
+		mei_hbm_add_single_tx_flow_ctrl_creds(dev, fctrl);
+		return;
+	}
+
+	cl = mei_hbm_cl_find_by_cmd(dev, fctrl);
+	if (cl) {
+		cl->tx_flow_ctrl_creds++;
+		cl_dbg(dev, cl, "flow control creds = %d.\n",
+				cl->tx_flow_ctrl_creds);
+	}
+}
+
+
+/**
+ * mei_hbm_cl_disconnect_req - sends disconnect message to fw.
+ *
+ * @dev: the device structure
+ * @cl: a client to disconnect from
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct hbm_client_connect_request req;
+
+	return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_REQ_CMD,
+				&req, sizeof(req));
+}
+
+/**
+ * mei_hbm_cl_disconnect_rsp - sends disconnect respose to the FW
+ *
+ * @dev: the device structure
+ * @cl: a client to disconnect from
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct hbm_client_connect_response resp;
+
+	return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_RES_CMD,
+				&resp, sizeof(resp));
+}
+
+/**
+ * mei_hbm_cl_disconnect_res - update the client state according
+ *       disconnect response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: disconnect client response host bus message
+ */
+static void mei_hbm_cl_disconnect_res(struct mei_device *dev, struct mei_cl *cl,
+				      struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_client_connect_response *rs =
+		(struct hbm_client_connect_response *)cmd;
+
+	cl_dbg(dev, cl, "hbm: disconnect response status=%d\n", rs->status);
+
+	if (rs->status == MEI_CL_DISCONN_SUCCESS)
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+	cl->status = 0;
+}
+
+/**
+ * mei_hbm_cl_connect_req - send connection request to specific me client
+ *
+ * @dev: the device structure
+ * @cl: a client to connect to
+ *
+ * Return: -EIO on write failure
+ */
+int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct hbm_client_connect_request req;
+
+	return mei_hbm_cl_write(dev, cl, CLIENT_CONNECT_REQ_CMD,
+				&req, sizeof(req));
+}
+
+/**
+ * mei_hbm_cl_connect_res - update the client state according
+ *        connection response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: connect client response host bus message
+ */
+static void mei_hbm_cl_connect_res(struct mei_device *dev, struct mei_cl *cl,
+				   struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_client_connect_response *rs =
+		(struct hbm_client_connect_response *)cmd;
+
+	cl_dbg(dev, cl, "hbm: connect response status=%s\n",
+			mei_cl_conn_status_str(rs->status));
+
+	if (rs->status == MEI_CL_CONN_SUCCESS)
+		cl->state = MEI_FILE_CONNECTED;
+	else {
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+		if (rs->status == MEI_CL_CONN_NOT_FOUND) {
+			mei_me_cl_del(dev, cl->me_cl);
+			if (dev->dev_state == MEI_DEV_ENABLED)
+				schedule_work(&dev->bus_rescan_work);
+		}
+	}
+	cl->status = mei_cl_conn_status_to_errno(rs->status);
+}
+
+/**
+ * mei_hbm_cl_res - process hbm response received on behalf
+ *         an client
+ *
+ * @dev: the device structure
+ * @rs:  hbm client message
+ * @fop_type: file operation type
+ */
+static void mei_hbm_cl_res(struct mei_device *dev,
+			   struct mei_hbm_cl_cmd *rs,
+			   enum mei_cb_file_ops fop_type)
+{
+	struct mei_cl *cl;
+	struct mei_cl_cb *cb, *next;
+
+	cl = NULL;
+	list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list, list) {
+
+		cl = cb->cl;
+
+		if (cb->fop_type != fop_type)
+			continue;
+
+		if (mei_hbm_cl_addr_equal(cl, rs)) {
+			list_del_init(&cb->list);
+			break;
+		}
+	}
+
+	if (!cl)
+		return;
+
+	switch (fop_type) {
+	case MEI_FOP_CONNECT:
+		mei_hbm_cl_connect_res(dev, cl, rs);
+		break;
+	case MEI_FOP_DISCONNECT:
+		mei_hbm_cl_disconnect_res(dev, cl, rs);
+		break;
+	case MEI_FOP_NOTIFY_START:
+		mei_hbm_cl_notify_start_res(dev, cl, rs);
+		break;
+	case MEI_FOP_NOTIFY_STOP:
+		mei_hbm_cl_notify_stop_res(dev, cl, rs);
+		break;
+	default:
+		return;
+	}
+
+	cl->timer_count = 0;
+	wake_up(&cl->wait);
+}
+
+
+/**
+ * mei_hbm_fw_disconnect_req - disconnect request initiated by ME firmware
+ *  host sends disconnect response
+ *
+ * @dev: the device structure.
+ * @disconnect_req: disconnect request bus message from the me
+ *
+ * Return: -ENOMEM on allocation failure
+ */
+static int mei_hbm_fw_disconnect_req(struct mei_device *dev,
+		struct hbm_client_connect_request *disconnect_req)
+{
+	struct mei_cl *cl;
+	struct mei_cl_cb *cb;
+
+	cl = mei_hbm_cl_find_by_cmd(dev, disconnect_req);
+	if (cl) {
+		cl_warn(dev, cl, "fw disconnect request received\n");
+		cl->state = MEI_FILE_DISCONNECTING;
+		cl->timer_count = 0;
+
+		cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DISCONNECT_RSP,
+					       NULL);
+		if (!cb)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * mei_hbm_pg_enter_res - PG enter response received
+ *
+ * @dev: the device structure.
+ *
+ * Return: 0 on success, -EPROTO on state mismatch
+ */
+static int mei_hbm_pg_enter_res(struct mei_device *dev)
+{
+	if (mei_pg_state(dev) != MEI_PG_OFF ||
+	    dev->pg_event != MEI_PG_EVENT_WAIT) {
+		dev_err(dev->dev, "hbm: pg entry response: state mismatch [%s, %d]\n",
+			mei_pg_state_str(mei_pg_state(dev)), dev->pg_event);
+		return -EPROTO;
+	}
+
+	dev->pg_event = MEI_PG_EVENT_RECEIVED;
+	wake_up(&dev->wait_pg);
+
+	return 0;
+}
+
+/**
+ * mei_hbm_pg_resume - process with PG resume
+ *
+ * @dev: the device structure.
+ */
+void mei_hbm_pg_resume(struct mei_device *dev)
+{
+	pm_request_resume(dev->dev);
+}
+EXPORT_SYMBOL_GPL(mei_hbm_pg_resume);
+
+/**
+ * mei_hbm_pg_exit_res - PG exit response received
+ *
+ * @dev: the device structure.
+ *
+ * Return: 0 on success, -EPROTO on state mismatch
+ */
+static int mei_hbm_pg_exit_res(struct mei_device *dev)
+{
+	if (mei_pg_state(dev) != MEI_PG_ON ||
+	    (dev->pg_event != MEI_PG_EVENT_WAIT &&
+	     dev->pg_event != MEI_PG_EVENT_IDLE)) {
+		dev_err(dev->dev, "hbm: pg exit response: state mismatch [%s, %d]\n",
+			mei_pg_state_str(mei_pg_state(dev)), dev->pg_event);
+		return -EPROTO;
+	}
+
+	switch (dev->pg_event) {
+	case MEI_PG_EVENT_WAIT:
+		dev->pg_event = MEI_PG_EVENT_RECEIVED;
+		wake_up(&dev->wait_pg);
+		break;
+	case MEI_PG_EVENT_IDLE:
+		/*
+		* If the driver is not waiting on this then
+		* this is HW initiated exit from PG.
+		* Start runtime pm resume sequence to exit from PG.
+		*/
+		dev->pg_event = MEI_PG_EVENT_RECEIVED;
+		mei_hbm_pg_resume(dev);
+		break;
+	default:
+		WARN(1, "hbm: pg exit response: unexpected pg event = %d\n",
+		     dev->pg_event);
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_hbm_config_features - check what hbm features and commands
+ *        are supported by the fw
+ *
+ * @dev: the device structure
+ */
+static void mei_hbm_config_features(struct mei_device *dev)
+{
+	/* Power Gating Isolation Support */
+	dev->hbm_f_pg_supported = 0;
+	if (dev->version.major_version > HBM_MAJOR_VERSION_PGI)
+		dev->hbm_f_pg_supported = 1;
+
+	if (dev->version.major_version == HBM_MAJOR_VERSION_PGI &&
+	    dev->version.minor_version >= HBM_MINOR_VERSION_PGI)
+		dev->hbm_f_pg_supported = 1;
+
+	dev->hbm_f_dc_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_DC)
+		dev->hbm_f_dc_supported = 1;
+
+	dev->hbm_f_ie_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_IE)
+		dev->hbm_f_ie_supported = 1;
+
+	/* disconnect on connect timeout instead of link reset */
+	dev->hbm_f_dot_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_DOT)
+		dev->hbm_f_dot_supported = 1;
+
+	/* Notification Event Support */
+	dev->hbm_f_ev_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_EV)
+		dev->hbm_f_ev_supported = 1;
+
+	/* Fixed Address Client Support */
+	dev->hbm_f_fa_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_FA)
+		dev->hbm_f_fa_supported = 1;
+
+	/* OS ver message Support */
+	dev->hbm_f_os_supported = 0;
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_OS)
+		dev->hbm_f_os_supported = 1;
+
+	/* DMA Ring Support */
+	dev->hbm_f_dr_supported = 0;
+	if (dev->version.major_version > HBM_MAJOR_VERSION_DR ||
+	    (dev->version.major_version == HBM_MAJOR_VERSION_DR &&
+	     dev->version.minor_version >= HBM_MINOR_VERSION_DR))
+		dev->hbm_f_dr_supported = 1;
+
+	/* VTag Support */
+	dev->hbm_f_vt_supported = 0;
+	if (dev->version.major_version > HBM_MAJOR_VERSION_VT ||
+	    (dev->version.major_version == HBM_MAJOR_VERSION_VT &&
+	     dev->version.minor_version >= HBM_MINOR_VERSION_VT))
+		dev->hbm_f_vt_supported = 1;
+
+	/* GSC support */
+	if (dev->version.major_version > HBM_MAJOR_VERSION_GSC ||
+	    (dev->version.major_version == HBM_MAJOR_VERSION_GSC &&
+	     dev->version.minor_version >= HBM_MINOR_VERSION_GSC))
+		dev->hbm_f_gsc_supported = 1;
+
+	/* Capability message Support */
+	dev->hbm_f_cap_supported = 0;
+	if (dev->version.major_version > HBM_MAJOR_VERSION_CAP ||
+	    (dev->version.major_version == HBM_MAJOR_VERSION_CAP &&
+	     dev->version.minor_version >= HBM_MINOR_VERSION_CAP))
+		dev->hbm_f_cap_supported = 1;
+
+	/* Client DMA Support */
+	dev->hbm_f_cd_supported = 0;
+	if (dev->version.major_version > HBM_MAJOR_VERSION_CD ||
+	    (dev->version.major_version == HBM_MAJOR_VERSION_CD &&
+	     dev->version.minor_version >= HBM_MINOR_VERSION_CD))
+		dev->hbm_f_cd_supported = 1;
+}
+
+/**
+ * mei_hbm_version_is_supported - checks whether the driver can
+ *     support the hbm version of the device
+ *
+ * @dev: the device structure
+ * Return: true if driver can support hbm version of the device
+ */
+bool mei_hbm_version_is_supported(struct mei_device *dev)
+{
+	return	(dev->version.major_version < HBM_MAJOR_VERSION) ||
+		(dev->version.major_version == HBM_MAJOR_VERSION &&
+		 dev->version.minor_version <= HBM_MINOR_VERSION);
+}
+
+/**
+ * mei_hbm_dispatch - bottom half read routine after ISR to
+ * handle the read bus message cmd processing.
+ *
+ * @dev: the device structure
+ * @hdr: header of bus message
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
+{
+	struct mei_bus_message *mei_msg;
+	struct hbm_host_version_response *version_res;
+	struct hbm_props_response *props_res;
+	struct hbm_host_enum_response *enum_res;
+	struct hbm_dma_setup_response *dma_setup_res;
+	struct hbm_add_client_request *add_cl_req;
+	struct hbm_capability_response *capability_res;
+	int ret;
+
+	struct mei_hbm_cl_cmd *cl_cmd;
+	struct hbm_client_connect_request *disconnect_req;
+	struct hbm_flow_control *fctrl;
+	struct hbm_client_dma_response *client_dma_res;
+
+	/* read the message to our buffer */
+	BUG_ON(hdr->length >= sizeof(dev->rd_msg_buf));
+	mei_read_slots(dev, dev->rd_msg_buf, hdr->length);
+	mei_msg = (struct mei_bus_message *)dev->rd_msg_buf;
+	cl_cmd  = (struct mei_hbm_cl_cmd *)mei_msg;
+
+	/* ignore spurious message and prevent reset nesting
+	 * hbm is put to idle during system reset
+	 */
+	if (dev->hbm_state == MEI_HBM_IDLE) {
+		dev_dbg(dev->dev, "hbm: state is idle ignore spurious messages\n");
+		return 0;
+	}
+
+	switch (mei_msg->hbm_cmd) {
+	case HOST_START_RES_CMD:
+		dev_dbg(dev->dev, "hbm: start: response message received.\n");
+
+		dev->init_clients_timer = 0;
+
+		version_res = (struct hbm_host_version_response *)mei_msg;
+
+		dev_dbg(dev->dev, "HBM VERSION: DRIVER=%02d:%02d DEVICE=%02d:%02d\n",
+				HBM_MAJOR_VERSION, HBM_MINOR_VERSION,
+				version_res->me_max_version.major_version,
+				version_res->me_max_version.minor_version);
+
+		if (version_res->host_version_supported) {
+			dev->version.major_version = HBM_MAJOR_VERSION;
+			dev->version.minor_version = HBM_MINOR_VERSION;
+		} else {
+			dev->version.major_version =
+				version_res->me_max_version.major_version;
+			dev->version.minor_version =
+				version_res->me_max_version.minor_version;
+		}
+
+		if (!mei_hbm_version_is_supported(dev)) {
+			dev_warn(dev->dev, "hbm: start: version mismatch - stopping the driver.\n");
+
+			dev->hbm_state = MEI_HBM_STOPPED;
+			if (mei_hbm_stop_req(dev)) {
+				dev_err(dev->dev, "hbm: start: failed to send stop request\n");
+				return -EIO;
+			}
+			break;
+		}
+
+		mei_hbm_config_features(dev);
+
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_STARTING) {
+			if (dev->dev_state == MEI_DEV_POWER_DOWN ||
+			    dev->dev_state == MEI_DEV_POWERING_DOWN) {
+				dev_dbg(dev->dev, "hbm: start: on shutdown, ignoring\n");
+				return 0;
+			}
+			dev_err(dev->dev, "hbm: start: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		if (dev->hbm_f_cap_supported) {
+			if (mei_hbm_capabilities_req(dev))
+				return -EIO;
+			wake_up(&dev->wait_hbm_start);
+			break;
+		}
+
+		if (dev->hbm_f_dr_supported) {
+			if (mei_dmam_ring_alloc(dev))
+				dev_info(dev->dev, "running w/o dma ring\n");
+			if (mei_dma_ring_is_allocated(dev)) {
+				if (mei_hbm_dma_setup_req(dev))
+					return -EIO;
+
+				wake_up(&dev->wait_hbm_start);
+				break;
+			}
+		}
+
+		dev->hbm_f_dr_supported = 0;
+		mei_dmam_ring_free(dev);
+
+		if (mei_hbm_enum_clients_req(dev))
+			return -EIO;
+
+		wake_up(&dev->wait_hbm_start);
+		break;
+
+	case MEI_HBM_CAPABILITIES_RES_CMD:
+		dev_dbg(dev->dev, "hbm: capabilities response: message received.\n");
+
+		dev->init_clients_timer = 0;
+
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_CAP_SETUP) {
+			if (dev->dev_state == MEI_DEV_POWER_DOWN ||
+			    dev->dev_state == MEI_DEV_POWERING_DOWN) {
+				dev_dbg(dev->dev, "hbm: capabilities response: on shutdown, ignoring\n");
+				return 0;
+			}
+			dev_err(dev->dev, "hbm: capabilities response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		capability_res = (struct hbm_capability_response *)mei_msg;
+		if (!(capability_res->capability_granted[0] & HBM_CAP_VT))
+			dev->hbm_f_vt_supported = 0;
+		if (!(capability_res->capability_granted[0] & HBM_CAP_CD))
+			dev->hbm_f_cd_supported = 0;
+
+		if (!(capability_res->capability_granted[0] & HBM_CAP_GSC))
+			dev->hbm_f_gsc_supported = 0;
+
+		if (dev->hbm_f_dr_supported) {
+			if (mei_dmam_ring_alloc(dev))
+				dev_info(dev->dev, "running w/o dma ring\n");
+			if (mei_dma_ring_is_allocated(dev)) {
+				if (mei_hbm_dma_setup_req(dev))
+					return -EIO;
+				break;
+			}
+		}
+
+		dev->hbm_f_dr_supported = 0;
+		mei_dmam_ring_free(dev);
+
+		if (mei_hbm_enum_clients_req(dev))
+			return -EIO;
+		break;
+
+	case MEI_HBM_DMA_SETUP_RES_CMD:
+		dev_dbg(dev->dev, "hbm: dma setup response: message received.\n");
+
+		dev->init_clients_timer = 0;
+
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_DR_SETUP) {
+			if (dev->dev_state == MEI_DEV_POWER_DOWN ||
+			    dev->dev_state == MEI_DEV_POWERING_DOWN) {
+				dev_dbg(dev->dev, "hbm: dma setup response: on shutdown, ignoring\n");
+				return 0;
+			}
+			dev_err(dev->dev, "hbm: dma setup response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		dma_setup_res = (struct hbm_dma_setup_response *)mei_msg;
+
+		if (dma_setup_res->status) {
+			u8 status = dma_setup_res->status;
+
+			if (status == MEI_HBMS_NOT_ALLOWED) {
+				dev_dbg(dev->dev, "hbm: dma setup not allowed\n");
+			} else {
+				dev_info(dev->dev, "hbm: dma setup response: failure = %d %s\n",
+					 status,
+					 mei_hbm_status_str(status));
+			}
+			dev->hbm_f_dr_supported = 0;
+			mei_dmam_ring_free(dev);
+		}
+
+		if (mei_hbm_enum_clients_req(dev))
+			return -EIO;
+		break;
+
+	case CLIENT_CONNECT_RES_CMD:
+		dev_dbg(dev->dev, "hbm: client connect response: message received.\n");
+		mei_hbm_cl_res(dev, cl_cmd, MEI_FOP_CONNECT);
+		break;
+
+	case CLIENT_DISCONNECT_RES_CMD:
+		dev_dbg(dev->dev, "hbm: client disconnect response: message received.\n");
+		mei_hbm_cl_res(dev, cl_cmd, MEI_FOP_DISCONNECT);
+		break;
+
+	case MEI_FLOW_CONTROL_CMD:
+		dev_dbg(dev->dev, "hbm: client flow control response: message received.\n");
+
+		fctrl = (struct hbm_flow_control *)mei_msg;
+		mei_hbm_cl_tx_flow_ctrl_creds_res(dev, fctrl);
+		break;
+
+	case MEI_PG_ISOLATION_ENTRY_RES_CMD:
+		dev_dbg(dev->dev, "hbm: power gate isolation entry response received\n");
+		ret = mei_hbm_pg_enter_res(dev);
+		if (ret)
+			return ret;
+		break;
+
+	case MEI_PG_ISOLATION_EXIT_REQ_CMD:
+		dev_dbg(dev->dev, "hbm: power gate isolation exit request received\n");
+		ret = mei_hbm_pg_exit_res(dev);
+		if (ret)
+			return ret;
+		break;
+
+	case HOST_CLIENT_PROPERTIES_RES_CMD:
+		dev_dbg(dev->dev, "hbm: properties response: message received.\n");
+
+		dev->init_clients_timer = 0;
+
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_CLIENT_PROPERTIES) {
+			if (dev->dev_state == MEI_DEV_POWER_DOWN ||
+			    dev->dev_state == MEI_DEV_POWERING_DOWN) {
+				dev_dbg(dev->dev, "hbm: properties response: on shutdown, ignoring\n");
+				return 0;
+			}
+			dev_err(dev->dev, "hbm: properties response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		props_res = (struct hbm_props_response *)mei_msg;
+
+		if (props_res->status == MEI_HBMS_CLIENT_NOT_FOUND) {
+			dev_dbg(dev->dev, "hbm: properties response: %d CLIENT_NOT_FOUND\n",
+				props_res->me_addr);
+		} else if (props_res->status) {
+			dev_err(dev->dev, "hbm: properties response: wrong status = %d %s\n",
+				props_res->status,
+				mei_hbm_status_str(props_res->status));
+			return -EPROTO;
+		} else {
+			mei_hbm_me_cl_add(dev, props_res);
+		}
+
+		/* request property for the next client */
+		if (mei_hbm_prop_req(dev, props_res->me_addr + 1))
+			return -EIO;
+
+		break;
+
+	case HOST_ENUM_RES_CMD:
+		dev_dbg(dev->dev, "hbm: enumeration response: message received\n");
+
+		dev->init_clients_timer = 0;
+
+		enum_res = (struct hbm_host_enum_response *) mei_msg;
+		BUILD_BUG_ON(sizeof(dev->me_clients_map)
+				< sizeof(enum_res->valid_addresses));
+		memcpy(dev->me_clients_map, enum_res->valid_addresses,
+				sizeof(enum_res->valid_addresses));
+
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_ENUM_CLIENTS) {
+			if (dev->dev_state == MEI_DEV_POWER_DOWN ||
+			    dev->dev_state == MEI_DEV_POWERING_DOWN) {
+				dev_dbg(dev->dev, "hbm: enumeration response: on shutdown, ignoring\n");
+				return 0;
+			}
+			dev_err(dev->dev, "hbm: enumeration response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		dev->hbm_state = MEI_HBM_CLIENT_PROPERTIES;
+
+		/* first property request */
+		if (mei_hbm_prop_req(dev, 0))
+			return -EIO;
+
+		break;
+
+	case HOST_STOP_RES_CMD:
+		dev_dbg(dev->dev, "hbm: stop response: message received\n");
+
+		dev->init_clients_timer = 0;
+
+		if (dev->hbm_state != MEI_HBM_STOPPED) {
+			dev_err(dev->dev, "hbm: stop response: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+
+		mei_set_devstate(dev, MEI_DEV_POWER_DOWN);
+		dev_info(dev->dev, "hbm: stop response: resetting.\n");
+		/* force the reset */
+		return -EPROTO;
+
+	case CLIENT_DISCONNECT_REQ_CMD:
+		dev_dbg(dev->dev, "hbm: disconnect request: message received\n");
+
+		disconnect_req = (struct hbm_client_connect_request *)mei_msg;
+		mei_hbm_fw_disconnect_req(dev, disconnect_req);
+		break;
+
+	case ME_STOP_REQ_CMD:
+		dev_dbg(dev->dev, "hbm: stop request: message received\n");
+		dev->hbm_state = MEI_HBM_STOPPED;
+		if (mei_hbm_stop_req(dev)) {
+			dev_err(dev->dev, "hbm: stop request: failed to send stop request\n");
+			return -EIO;
+		}
+		break;
+
+	case MEI_HBM_ADD_CLIENT_REQ_CMD:
+		dev_dbg(dev->dev, "hbm: add client request received\n");
+		/*
+		 * after the host receives the enum_resp
+		 * message clients may be added or removed
+		 */
+		if (dev->hbm_state <= MEI_HBM_ENUM_CLIENTS ||
+		    dev->hbm_state >= MEI_HBM_STOPPED) {
+			dev_err(dev->dev, "hbm: add client: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+		add_cl_req = (struct hbm_add_client_request *)mei_msg;
+		ret = mei_hbm_fw_add_cl_req(dev, add_cl_req);
+		if (ret) {
+			dev_err(dev->dev, "hbm: add client: failed to send response %d\n",
+				ret);
+			return -EIO;
+		}
+		dev_dbg(dev->dev, "hbm: add client request processed\n");
+		break;
+
+	case MEI_HBM_NOTIFY_RES_CMD:
+		dev_dbg(dev->dev, "hbm: notify response received\n");
+		mei_hbm_cl_res(dev, cl_cmd, notify_res_to_fop(cl_cmd));
+		break;
+
+	case MEI_HBM_NOTIFICATION_CMD:
+		dev_dbg(dev->dev, "hbm: notification\n");
+		mei_hbm_cl_notify(dev, cl_cmd);
+		break;
+
+	case MEI_HBM_CLIENT_DMA_MAP_RES_CMD:
+		dev_dbg(dev->dev, "hbm: client dma map response: message received.\n");
+		client_dma_res = (struct hbm_client_dma_response *)mei_msg;
+		mei_hbm_cl_dma_map_res(dev, client_dma_res);
+		break;
+
+	case MEI_HBM_CLIENT_DMA_UNMAP_RES_CMD:
+		dev_dbg(dev->dev, "hbm: client dma unmap response: message received.\n");
+		client_dma_res = (struct hbm_client_dma_response *)mei_msg;
+		mei_hbm_cl_dma_unmap_res(dev, client_dma_res);
+		break;
+
+	default:
+		WARN(1, "hbm: wrong command %d\n", mei_msg->hbm_cmd);
+		return -EPROTO;
+
+	}
+	return 0;
+}
+
diff --git a/drivers/misc/mei/hbm.h b/drivers/misc/mei/hbm.h
new file mode 100644
index 0000000000..cd5b08ca34
--- /dev/null
+++ b/drivers/misc/mei/hbm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2003-2018, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#ifndef _MEI_HBM_H_
+#define _MEI_HBM_H_
+
+struct mei_device;
+struct mei_msg_hdr;
+struct mei_cl;
+struct mei_dma_data;
+
+/**
+ * enum mei_hbm_state - host bus message protocol state
+ *
+ * @MEI_HBM_IDLE : protocol not started
+ * @MEI_HBM_STARTING : start request message was sent
+ * @MEI_HBM_CAP_SETUP : capabilities request message was sent
+ * @MEI_HBM_DR_SETUP : dma ring setup request message was sent
+ * @MEI_HBM_ENUM_CLIENTS : enumeration request was sent
+ * @MEI_HBM_CLIENT_PROPERTIES : acquiring clients properties
+ * @MEI_HBM_STARTED : enumeration was completed
+ * @MEI_HBM_STOPPED : stopping exchange
+ */
+enum mei_hbm_state {
+	MEI_HBM_IDLE = 0,
+	MEI_HBM_STARTING,
+	MEI_HBM_CAP_SETUP,
+	MEI_HBM_DR_SETUP,
+	MEI_HBM_ENUM_CLIENTS,
+	MEI_HBM_CLIENT_PROPERTIES,
+	MEI_HBM_STARTED,
+	MEI_HBM_STOPPED,
+};
+
+const char *mei_hbm_state_str(enum mei_hbm_state state);
+
+int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr);
+
+void mei_hbm_idle(struct mei_device *dev);
+void mei_hbm_reset(struct mei_device *dev);
+int mei_hbm_start_req(struct mei_device *dev);
+int mei_hbm_start_wait(struct mei_device *dev);
+int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl);
+bool mei_hbm_version_is_supported(struct mei_device *dev);
+int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd);
+void mei_hbm_pg_resume(struct mei_device *dev);
+int mei_hbm_cl_notify_req(struct mei_device *dev,
+			  struct mei_cl *cl, u8 request);
+int mei_hbm_cl_dma_map_req(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_dma_unmap_req(struct mei_device *dev, struct mei_cl *cl);
+#endif /* _MEI_HBM_H_ */
+
diff --git a/drivers/misc/mei/hdcp/Kconfig b/drivers/misc/mei/hdcp/Kconfig
new file mode 100644
index 0000000000..54e1c95269
--- /dev/null
+++ b/drivers/misc/mei/hdcp/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2019, Intel Corporation. All rights reserved.
+#
+config INTEL_MEI_HDCP
+	tristate "Intel HDCP2.2 services of ME Interface"
+	select INTEL_MEI_ME
+	depends on DRM_I915
+	help
+	  MEI Support for HDCP2.2 Services on Intel platforms.
+
+	  Enables the ME FW services required for HDCP2.2 support through
+	  I915 display driver of Intel.
diff --git a/drivers/misc/mei/hdcp/Makefile b/drivers/misc/mei/hdcp/Makefile
new file mode 100644
index 0000000000..3fbb56485c
--- /dev/null
+++ b/drivers/misc/mei/hdcp/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019, Intel Corporation. All rights reserved.
+#
+# Makefile - HDCP client driver for Intel MEI Bus Driver.
+
+obj-$(CONFIG_INTEL_MEI_HDCP) += mei_hdcp.o
diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c
new file mode 100644
index 0000000000..51359cc5ec
--- /dev/null
+++ b/drivers/misc/mei/hdcp/mei_hdcp.c
@@ -0,0 +1,879 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * mei_hdcp.c: HDCP client driver for mei bus
+ *
+ * Author:
+ * Ramalingam C <ramalingam.c@intel.com>
+ */
+
+/**
+ * DOC: MEI_HDCP Client Driver
+ *
+ * The mei_hdcp driver acts as a translation layer between HDCP 2.2
+ * protocol  implementer (I915) and ME FW by translating HDCP2.2
+ * negotiation messages to ME FW command payloads and vice versa.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mei.h>
+#include <linux/mei_cl_bus.h>
+#include <linux/component.h>
+#include <drm/drm_connector.h>
+#include <drm/i915_component.h>
+#include <drm/i915_hdcp_interface.h>
+
+#include "mei_hdcp.h"
+
+/**
+ * mei_hdcp_initiate_session() - Initiate a Wired HDCP2.2 Tx Session in ME FW
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ * @ake_data: AKE_Init msg output.
+ *
+ * Return:  0 on Success, <0 on Failure.
+ */
+static int
+mei_hdcp_initiate_session(struct device *dev, struct hdcp_port_data *data,
+			  struct hdcp2_ake_init *ake_data)
+{
+	struct wired_cmd_initiate_hdcp2_session_in session_init_in = { { 0 } };
+	struct wired_cmd_initiate_hdcp2_session_out
+						session_init_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !data || !ake_data)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	session_init_in.header.api_version = HDCP_API_VERSION;
+	session_init_in.header.command_id = WIRED_INITIATE_HDCP2_SESSION;
+	session_init_in.header.status = FW_HDCP_STATUS_SUCCESS;
+	session_init_in.header.buffer_len =
+				WIRED_CMD_BUF_LEN_INITIATE_HDCP2_SESSION_IN;
+
+	session_init_in.port.integrated_port_type = data->port_type;
+	session_init_in.port.physical_port = (u8)data->hdcp_ddi;
+	session_init_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
+	session_init_in.protocol = data->protocol;
+
+	byte = mei_cldev_send(cldev, (u8 *)&session_init_in,
+			      sizeof(session_init_in));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&session_init_out,
+			      sizeof(session_init_out));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	if (session_init_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "ME cmd 0x%08X Failed. Status: 0x%X\n",
+			WIRED_INITIATE_HDCP2_SESSION,
+			session_init_out.header.status);
+		return -EIO;
+	}
+
+	ake_data->msg_id = HDCP_2_2_AKE_INIT;
+	ake_data->tx_caps = session_init_out.tx_caps;
+	memcpy(ake_data->r_tx, session_init_out.r_tx, HDCP_2_2_RTX_LEN);
+
+	return 0;
+}
+
+/**
+ * mei_hdcp_verify_receiver_cert_prepare_km() - Verify the Receiver Certificate
+ * AKE_Send_Cert and prepare AKE_Stored_Km/AKE_No_Stored_Km
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ * @rx_cert: AKE_Send_Cert for verification
+ * @km_stored: Pairing status flag output
+ * @ek_pub_km: AKE_Stored_Km/AKE_No_Stored_Km output msg
+ * @msg_sz : size of AKE_XXXXX_Km output msg
+ *
+ * Return: 0 on Success, <0 on Failure
+ */
+static int
+mei_hdcp_verify_receiver_cert_prepare_km(struct device *dev,
+					 struct hdcp_port_data *data,
+					 struct hdcp2_ake_send_cert *rx_cert,
+					 bool *km_stored,
+					 struct hdcp2_ake_no_stored_km
+								*ek_pub_km,
+					 size_t *msg_sz)
+{
+	struct wired_cmd_verify_receiver_cert_in verify_rxcert_in = { { 0 } };
+	struct wired_cmd_verify_receiver_cert_out verify_rxcert_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !data || !rx_cert || !km_stored || !ek_pub_km || !msg_sz)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	verify_rxcert_in.header.api_version = HDCP_API_VERSION;
+	verify_rxcert_in.header.command_id = WIRED_VERIFY_RECEIVER_CERT;
+	verify_rxcert_in.header.status = FW_HDCP_STATUS_SUCCESS;
+	verify_rxcert_in.header.buffer_len =
+				WIRED_CMD_BUF_LEN_VERIFY_RECEIVER_CERT_IN;
+
+	verify_rxcert_in.port.integrated_port_type = data->port_type;
+	verify_rxcert_in.port.physical_port = (u8)data->hdcp_ddi;
+	verify_rxcert_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
+
+	verify_rxcert_in.cert_rx = rx_cert->cert_rx;
+	memcpy(verify_rxcert_in.r_rx, &rx_cert->r_rx, HDCP_2_2_RRX_LEN);
+	memcpy(verify_rxcert_in.rx_caps, rx_cert->rx_caps, HDCP_2_2_RXCAPS_LEN);
+
+	byte = mei_cldev_send(cldev, (u8 *)&verify_rxcert_in,
+			      sizeof(verify_rxcert_in));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed: %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&verify_rxcert_out,
+			      sizeof(verify_rxcert_out));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed: %zd\n", byte);
+		return byte;
+	}
+
+	if (verify_rxcert_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "ME cmd 0x%08X Failed. Status: 0x%X\n",
+			WIRED_VERIFY_RECEIVER_CERT,
+			verify_rxcert_out.header.status);
+		return -EIO;
+	}
+
+	*km_stored = !!verify_rxcert_out.km_stored;
+	if (verify_rxcert_out.km_stored) {
+		ek_pub_km->msg_id = HDCP_2_2_AKE_STORED_KM;
+		*msg_sz = sizeof(struct hdcp2_ake_stored_km);
+	} else {
+		ek_pub_km->msg_id = HDCP_2_2_AKE_NO_STORED_KM;
+		*msg_sz = sizeof(struct hdcp2_ake_no_stored_km);
+	}
+
+	memcpy(ek_pub_km->e_kpub_km, &verify_rxcert_out.ekm_buff,
+	       sizeof(verify_rxcert_out.ekm_buff));
+
+	return 0;
+}
+
+/**
+ * mei_hdcp_verify_hprime() - Verify AKE_Send_H_prime at ME FW.
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ * @rx_hprime: AKE_Send_H_prime msg for ME FW verification
+ *
+ * Return: 0 on Success, <0 on Failure
+ */
+static int
+mei_hdcp_verify_hprime(struct device *dev, struct hdcp_port_data *data,
+		       struct hdcp2_ake_send_hprime *rx_hprime)
+{
+	struct wired_cmd_ake_send_hprime_in send_hprime_in = { { 0 } };
+	struct wired_cmd_ake_send_hprime_out send_hprime_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !data || !rx_hprime)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	send_hprime_in.header.api_version = HDCP_API_VERSION;
+	send_hprime_in.header.command_id = WIRED_AKE_SEND_HPRIME;
+	send_hprime_in.header.status = FW_HDCP_STATUS_SUCCESS;
+	send_hprime_in.header.buffer_len = WIRED_CMD_BUF_LEN_AKE_SEND_HPRIME_IN;
+
+	send_hprime_in.port.integrated_port_type = data->port_type;
+	send_hprime_in.port.physical_port = (u8)data->hdcp_ddi;
+	send_hprime_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
+
+	memcpy(send_hprime_in.h_prime, rx_hprime->h_prime,
+	       HDCP_2_2_H_PRIME_LEN);
+
+	byte = mei_cldev_send(cldev, (u8 *)&send_hprime_in,
+			      sizeof(send_hprime_in));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&send_hprime_out,
+			      sizeof(send_hprime_out));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	if (send_hprime_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "ME cmd 0x%08X Failed. Status: 0x%X\n",
+			WIRED_AKE_SEND_HPRIME, send_hprime_out.header.status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_hdcp_store_pairing_info() - Store pairing info received at ME FW
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ * @pairing_info: AKE_Send_Pairing_Info msg input to ME FW
+ *
+ * Return: 0 on Success, <0 on Failure
+ */
+static int
+mei_hdcp_store_pairing_info(struct device *dev, struct hdcp_port_data *data,
+			    struct hdcp2_ake_send_pairing_info *pairing_info)
+{
+	struct wired_cmd_ake_send_pairing_info_in pairing_info_in = { { 0 } };
+	struct wired_cmd_ake_send_pairing_info_out pairing_info_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !data || !pairing_info)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	pairing_info_in.header.api_version = HDCP_API_VERSION;
+	pairing_info_in.header.command_id = WIRED_AKE_SEND_PAIRING_INFO;
+	pairing_info_in.header.status = FW_HDCP_STATUS_SUCCESS;
+	pairing_info_in.header.buffer_len =
+					WIRED_CMD_BUF_LEN_SEND_PAIRING_INFO_IN;
+
+	pairing_info_in.port.integrated_port_type = data->port_type;
+	pairing_info_in.port.physical_port = (u8)data->hdcp_ddi;
+	pairing_info_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
+
+	memcpy(pairing_info_in.e_kh_km, pairing_info->e_kh_km,
+	       HDCP_2_2_E_KH_KM_LEN);
+
+	byte = mei_cldev_send(cldev, (u8 *)&pairing_info_in,
+			      sizeof(pairing_info_in));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&pairing_info_out,
+			      sizeof(pairing_info_out));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	if (pairing_info_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "ME cmd 0x%08X failed. Status: 0x%X\n",
+			WIRED_AKE_SEND_PAIRING_INFO,
+			pairing_info_out.header.status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_hdcp_initiate_locality_check() - Prepare LC_Init
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ * @lc_init_data: LC_Init msg output
+ *
+ * Return: 0 on Success, <0 on Failure
+ */
+static int
+mei_hdcp_initiate_locality_check(struct device *dev,
+				 struct hdcp_port_data *data,
+				 struct hdcp2_lc_init *lc_init_data)
+{
+	struct wired_cmd_init_locality_check_in lc_init_in = { { 0 } };
+	struct wired_cmd_init_locality_check_out lc_init_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !data || !lc_init_data)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	lc_init_in.header.api_version = HDCP_API_VERSION;
+	lc_init_in.header.command_id = WIRED_INIT_LOCALITY_CHECK;
+	lc_init_in.header.status = FW_HDCP_STATUS_SUCCESS;
+	lc_init_in.header.buffer_len = WIRED_CMD_BUF_LEN_INIT_LOCALITY_CHECK_IN;
+
+	lc_init_in.port.integrated_port_type = data->port_type;
+	lc_init_in.port.physical_port = (u8)data->hdcp_ddi;
+	lc_init_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
+
+	byte = mei_cldev_send(cldev, (u8 *)&lc_init_in, sizeof(lc_init_in));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&lc_init_out, sizeof(lc_init_out));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	if (lc_init_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "ME cmd 0x%08X Failed. status: 0x%X\n",
+			WIRED_INIT_LOCALITY_CHECK, lc_init_out.header.status);
+		return -EIO;
+	}
+
+	lc_init_data->msg_id = HDCP_2_2_LC_INIT;
+	memcpy(lc_init_data->r_n, lc_init_out.r_n, HDCP_2_2_RN_LEN);
+
+	return 0;
+}
+
+/**
+ * mei_hdcp_verify_lprime() - Verify lprime.
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ * @rx_lprime: LC_Send_L_prime msg for ME FW verification
+ *
+ * Return: 0 on Success, <0 on Failure
+ */
+static int
+mei_hdcp_verify_lprime(struct device *dev, struct hdcp_port_data *data,
+		       struct hdcp2_lc_send_lprime *rx_lprime)
+{
+	struct wired_cmd_validate_locality_in verify_lprime_in = { { 0 } };
+	struct wired_cmd_validate_locality_out verify_lprime_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !data || !rx_lprime)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	verify_lprime_in.header.api_version = HDCP_API_VERSION;
+	verify_lprime_in.header.command_id = WIRED_VALIDATE_LOCALITY;
+	verify_lprime_in.header.status = FW_HDCP_STATUS_SUCCESS;
+	verify_lprime_in.header.buffer_len =
+					WIRED_CMD_BUF_LEN_VALIDATE_LOCALITY_IN;
+
+	verify_lprime_in.port.integrated_port_type = data->port_type;
+	verify_lprime_in.port.physical_port = (u8)data->hdcp_ddi;
+	verify_lprime_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
+
+	memcpy(verify_lprime_in.l_prime, rx_lprime->l_prime,
+	       HDCP_2_2_L_PRIME_LEN);
+
+	byte = mei_cldev_send(cldev, (u8 *)&verify_lprime_in,
+			      sizeof(verify_lprime_in));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&verify_lprime_out,
+			      sizeof(verify_lprime_out));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	if (verify_lprime_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "ME cmd 0x%08X failed. status: 0x%X\n",
+			WIRED_VALIDATE_LOCALITY,
+			verify_lprime_out.header.status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_hdcp_get_session_key() - Prepare SKE_Send_Eks.
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ * @ske_data: SKE_Send_Eks msg output from ME FW.
+ *
+ * Return: 0 on Success, <0 on Failure
+ */
+static int mei_hdcp_get_session_key(struct device *dev,
+				    struct hdcp_port_data *data,
+				    struct hdcp2_ske_send_eks *ske_data)
+{
+	struct wired_cmd_get_session_key_in get_skey_in = { { 0 } };
+	struct wired_cmd_get_session_key_out get_skey_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !data || !ske_data)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	get_skey_in.header.api_version = HDCP_API_VERSION;
+	get_skey_in.header.command_id = WIRED_GET_SESSION_KEY;
+	get_skey_in.header.status = FW_HDCP_STATUS_SUCCESS;
+	get_skey_in.header.buffer_len = WIRED_CMD_BUF_LEN_GET_SESSION_KEY_IN;
+
+	get_skey_in.port.integrated_port_type = data->port_type;
+	get_skey_in.port.physical_port = (u8)data->hdcp_ddi;
+	get_skey_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
+
+	byte = mei_cldev_send(cldev, (u8 *)&get_skey_in, sizeof(get_skey_in));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&get_skey_out, sizeof(get_skey_out));
+
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	if (get_skey_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "ME cmd 0x%08X failed. status: 0x%X\n",
+			WIRED_GET_SESSION_KEY, get_skey_out.header.status);
+		return -EIO;
+	}
+
+	ske_data->msg_id = HDCP_2_2_SKE_SEND_EKS;
+	memcpy(ske_data->e_dkey_ks, get_skey_out.e_dkey_ks,
+	       HDCP_2_2_E_DKEY_KS_LEN);
+	memcpy(ske_data->riv, get_skey_out.r_iv, HDCP_2_2_RIV_LEN);
+
+	return 0;
+}
+
+/**
+ * mei_hdcp_repeater_check_flow_prepare_ack() - Validate the Downstream topology
+ * and prepare rep_ack.
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ * @rep_topology: Receiver ID List to be validated
+ * @rep_send_ack : repeater ack from ME FW.
+ *
+ * Return: 0 on Success, <0 on Failure
+ */
+static int
+mei_hdcp_repeater_check_flow_prepare_ack(struct device *dev,
+					 struct hdcp_port_data *data,
+					 struct hdcp2_rep_send_receiverid_list
+							*rep_topology,
+					 struct hdcp2_rep_send_ack
+							*rep_send_ack)
+{
+	struct wired_cmd_verify_repeater_in verify_repeater_in = { { 0 } };
+	struct wired_cmd_verify_repeater_out verify_repeater_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !rep_topology || !rep_send_ack || !data)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	verify_repeater_in.header.api_version = HDCP_API_VERSION;
+	verify_repeater_in.header.command_id = WIRED_VERIFY_REPEATER;
+	verify_repeater_in.header.status = FW_HDCP_STATUS_SUCCESS;
+	verify_repeater_in.header.buffer_len =
+					WIRED_CMD_BUF_LEN_VERIFY_REPEATER_IN;
+
+	verify_repeater_in.port.integrated_port_type = data->port_type;
+	verify_repeater_in.port.physical_port = (u8)data->hdcp_ddi;
+	verify_repeater_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
+
+	memcpy(verify_repeater_in.rx_info, rep_topology->rx_info,
+	       HDCP_2_2_RXINFO_LEN);
+	memcpy(verify_repeater_in.seq_num_v, rep_topology->seq_num_v,
+	       HDCP_2_2_SEQ_NUM_LEN);
+	memcpy(verify_repeater_in.v_prime, rep_topology->v_prime,
+	       HDCP_2_2_V_PRIME_HALF_LEN);
+	memcpy(verify_repeater_in.receiver_ids, rep_topology->receiver_ids,
+	       HDCP_2_2_RECEIVER_IDS_MAX_LEN);
+
+	byte = mei_cldev_send(cldev, (u8 *)&verify_repeater_in,
+			      sizeof(verify_repeater_in));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&verify_repeater_out,
+			      sizeof(verify_repeater_out));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	if (verify_repeater_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "ME cmd 0x%08X failed. status: 0x%X\n",
+			WIRED_VERIFY_REPEATER,
+			verify_repeater_out.header.status);
+		return -EIO;
+	}
+
+	memcpy(rep_send_ack->v, verify_repeater_out.v,
+	       HDCP_2_2_V_PRIME_HALF_LEN);
+	rep_send_ack->msg_id = HDCP_2_2_REP_SEND_ACK;
+
+	return 0;
+}
+
+/**
+ * mei_hdcp_verify_mprime() - Verify mprime.
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ * @stream_ready: RepeaterAuth_Stream_Ready msg for ME FW verification.
+ *
+ * Return: 0 on Success, <0 on Failure
+ */
+static int mei_hdcp_verify_mprime(struct device *dev,
+				  struct hdcp_port_data *data,
+				  struct hdcp2_rep_stream_ready *stream_ready)
+{
+	struct wired_cmd_repeater_auth_stream_req_in *verify_mprime_in;
+	struct wired_cmd_repeater_auth_stream_req_out
+					verify_mprime_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+	size_t cmd_size;
+
+	if (!dev || !stream_ready || !data)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	cmd_size = struct_size(verify_mprime_in, streams, data->k);
+	if (cmd_size == SIZE_MAX)
+		return -EINVAL;
+
+	verify_mprime_in = kzalloc(cmd_size, GFP_KERNEL);
+	if (!verify_mprime_in)
+		return -ENOMEM;
+
+	verify_mprime_in->header.api_version = HDCP_API_VERSION;
+	verify_mprime_in->header.command_id = WIRED_REPEATER_AUTH_STREAM_REQ;
+	verify_mprime_in->header.status = FW_HDCP_STATUS_SUCCESS;
+	verify_mprime_in->header.buffer_len = cmd_size  - sizeof(verify_mprime_in->header);
+
+	verify_mprime_in->port.integrated_port_type = data->port_type;
+	verify_mprime_in->port.physical_port = (u8)data->hdcp_ddi;
+	verify_mprime_in->port.attached_transcoder = (u8)data->hdcp_transcoder;
+
+	memcpy(verify_mprime_in->m_prime, stream_ready->m_prime, HDCP_2_2_MPRIME_LEN);
+	drm_hdcp_cpu_to_be24(verify_mprime_in->seq_num_m, data->seq_num_m);
+
+	memcpy(verify_mprime_in->streams, data->streams,
+	       array_size(data->k, sizeof(*data->streams)));
+
+	verify_mprime_in->k = cpu_to_be16(data->k);
+
+	byte = mei_cldev_send(cldev, (u8 *)verify_mprime_in, cmd_size);
+	kfree(verify_mprime_in);
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&verify_mprime_out,
+			      sizeof(verify_mprime_out));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	if (verify_mprime_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "ME cmd 0x%08X failed. status: 0x%X\n",
+			WIRED_REPEATER_AUTH_STREAM_REQ,
+			verify_mprime_out.header.status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_hdcp_enable_authentication() - Mark a port as authenticated
+ * through ME FW
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ *
+ * Return: 0 on Success, <0 on Failure
+ */
+static int mei_hdcp_enable_authentication(struct device *dev,
+					  struct hdcp_port_data *data)
+{
+	struct wired_cmd_enable_auth_in enable_auth_in = { { 0 } };
+	struct wired_cmd_enable_auth_out enable_auth_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !data)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	enable_auth_in.header.api_version = HDCP_API_VERSION;
+	enable_auth_in.header.command_id = WIRED_ENABLE_AUTH;
+	enable_auth_in.header.status = FW_HDCP_STATUS_SUCCESS;
+	enable_auth_in.header.buffer_len = WIRED_CMD_BUF_LEN_ENABLE_AUTH_IN;
+
+	enable_auth_in.port.integrated_port_type = data->port_type;
+	enable_auth_in.port.physical_port = (u8)data->hdcp_ddi;
+	enable_auth_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
+	enable_auth_in.stream_type = data->streams[0].stream_type;
+
+	byte = mei_cldev_send(cldev, (u8 *)&enable_auth_in,
+			      sizeof(enable_auth_in));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&enable_auth_out,
+			      sizeof(enable_auth_out));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	if (enable_auth_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "ME cmd 0x%08X failed. status: 0x%X\n",
+			WIRED_ENABLE_AUTH, enable_auth_out.header.status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_hdcp_close_session() - Close the Wired HDCP Tx session of ME FW per port.
+ * This also disables the authenticated state of the port.
+ * @dev: device corresponding to the mei_cl_device
+ * @data: Intel HW specific hdcp data
+ *
+ * Return: 0 on Success, <0 on Failure
+ */
+static int
+mei_hdcp_close_session(struct device *dev, struct hdcp_port_data *data)
+{
+	struct wired_cmd_close_session_in session_close_in = { { 0 } };
+	struct wired_cmd_close_session_out session_close_out = { { 0 } };
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !data)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	session_close_in.header.api_version = HDCP_API_VERSION;
+	session_close_in.header.command_id = WIRED_CLOSE_SESSION;
+	session_close_in.header.status = FW_HDCP_STATUS_SUCCESS;
+	session_close_in.header.buffer_len =
+				WIRED_CMD_BUF_LEN_CLOSE_SESSION_IN;
+
+	session_close_in.port.integrated_port_type = data->port_type;
+	session_close_in.port.physical_port = (u8)data->hdcp_ddi;
+	session_close_in.port.attached_transcoder = (u8)data->hdcp_transcoder;
+
+	byte = mei_cldev_send(cldev, (u8 *)&session_close_in,
+			      sizeof(session_close_in));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	byte = mei_cldev_recv(cldev, (u8 *)&session_close_out,
+			      sizeof(session_close_out));
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	if (session_close_out.header.status != FW_HDCP_STATUS_SUCCESS) {
+		dev_dbg(dev, "Session Close Failed. status: 0x%X\n",
+			session_close_out.header.status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static const struct i915_hdcp_ops mei_hdcp_ops = {
+	.owner = THIS_MODULE,
+	.initiate_hdcp2_session = mei_hdcp_initiate_session,
+	.verify_receiver_cert_prepare_km =
+				mei_hdcp_verify_receiver_cert_prepare_km,
+	.verify_hprime = mei_hdcp_verify_hprime,
+	.store_pairing_info = mei_hdcp_store_pairing_info,
+	.initiate_locality_check = mei_hdcp_initiate_locality_check,
+	.verify_lprime = mei_hdcp_verify_lprime,
+	.get_session_key = mei_hdcp_get_session_key,
+	.repeater_check_flow_prepare_ack =
+				mei_hdcp_repeater_check_flow_prepare_ack,
+	.verify_mprime = mei_hdcp_verify_mprime,
+	.enable_hdcp_authentication = mei_hdcp_enable_authentication,
+	.close_hdcp_session = mei_hdcp_close_session,
+};
+
+static int mei_component_master_bind(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct i915_hdcp_arbiter *comp_arbiter = mei_cldev_get_drvdata(cldev);
+	int ret;
+
+	dev_dbg(dev, "%s\n", __func__);
+	comp_arbiter->ops = &mei_hdcp_ops;
+	comp_arbiter->hdcp_dev = dev;
+	ret = component_bind_all(dev, comp_arbiter);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void mei_component_master_unbind(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct i915_hdcp_arbiter *comp_arbiter = mei_cldev_get_drvdata(cldev);
+
+	dev_dbg(dev, "%s\n", __func__);
+	component_unbind_all(dev, comp_arbiter);
+}
+
+static const struct component_master_ops mei_component_master_ops = {
+	.bind = mei_component_master_bind,
+	.unbind = mei_component_master_unbind,
+};
+
+/**
+ * mei_hdcp_component_match - compare function for matching mei hdcp.
+ *
+ *    The function checks if the driver is i915, the subcomponent is HDCP
+ *    and the grand parent of hdcp and the parent of i915 are the same
+ *    PCH device.
+ *
+ * @dev: master device
+ * @subcomponent: subcomponent to match (I915_COMPONENT_HDCP)
+ * @data: compare data (mei hdcp device)
+ *
+ * Return:
+ * * 1 - if components match
+ * * 0 - otherwise
+ */
+static int mei_hdcp_component_match(struct device *dev, int subcomponent,
+				    void *data)
+{
+	struct device *base = data;
+
+	if (!dev->driver || strcmp(dev->driver->name, "i915") ||
+	    subcomponent != I915_COMPONENT_HDCP)
+		return 0;
+
+	base = base->parent;
+	if (!base)
+		return 0;
+
+	base = base->parent;
+	dev = dev->parent;
+
+	return (base && dev && dev == base);
+}
+
+static int mei_hdcp_probe(struct mei_cl_device *cldev,
+			  const struct mei_cl_device_id *id)
+{
+	struct i915_hdcp_arbiter *comp_arbiter;
+	struct component_match *master_match;
+	int ret;
+
+	ret = mei_cldev_enable(cldev);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "mei_cldev_enable Failed. %d\n", ret);
+		goto enable_err_exit;
+	}
+
+	comp_arbiter = kzalloc(sizeof(*comp_arbiter), GFP_KERNEL);
+	if (!comp_arbiter) {
+		ret = -ENOMEM;
+		goto err_exit;
+	}
+
+	master_match = NULL;
+	component_match_add_typed(&cldev->dev, &master_match,
+				  mei_hdcp_component_match, &cldev->dev);
+	if (IS_ERR_OR_NULL(master_match)) {
+		ret = -ENOMEM;
+		goto err_exit;
+	}
+
+	mei_cldev_set_drvdata(cldev, comp_arbiter);
+	ret = component_master_add_with_match(&cldev->dev,
+					      &mei_component_master_ops,
+					      master_match);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "Master comp add failed %d\n", ret);
+		goto err_exit;
+	}
+
+	return 0;
+
+err_exit:
+	mei_cldev_set_drvdata(cldev, NULL);
+	kfree(comp_arbiter);
+	mei_cldev_disable(cldev);
+enable_err_exit:
+	return ret;
+}
+
+static void mei_hdcp_remove(struct mei_cl_device *cldev)
+{
+	struct i915_hdcp_arbiter *comp_arbiter = mei_cldev_get_drvdata(cldev);
+	int ret;
+
+	component_master_del(&cldev->dev, &mei_component_master_ops);
+	kfree(comp_arbiter);
+	mei_cldev_set_drvdata(cldev, NULL);
+
+	ret = mei_cldev_disable(cldev);
+	if (ret)
+		dev_warn(&cldev->dev, "mei_cldev_disable() failed\n");
+}
+
+#define MEI_UUID_HDCP UUID_LE(0xB638AB7E, 0x94E2, 0x4EA2, 0xA5, \
+			      0x52, 0xD1, 0xC5, 0x4B, 0x62, 0x7F, 0x04)
+
+static const struct mei_cl_device_id mei_hdcp_tbl[] = {
+	{ .uuid = MEI_UUID_HDCP, .version = MEI_CL_VERSION_ANY },
+	{ }
+};
+MODULE_DEVICE_TABLE(mei, mei_hdcp_tbl);
+
+static struct mei_cl_driver mei_hdcp_driver = {
+	.id_table = mei_hdcp_tbl,
+	.name = KBUILD_MODNAME,
+	.probe = mei_hdcp_probe,
+	.remove	= mei_hdcp_remove,
+};
+
+module_mei_cl_driver(mei_hdcp_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MEI HDCP");
diff --git a/drivers/misc/mei/hdcp/mei_hdcp.h b/drivers/misc/mei/hdcp/mei_hdcp.h
new file mode 100644
index 0000000000..0683d83ec1
--- /dev/null
+++ b/drivers/misc/mei/hdcp/mei_hdcp.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Authors:
+ * Ramalingam C <ramalingam.c@intel.com>
+ */
+
+#ifndef __MEI_HDCP_H__
+#define __MEI_HDCP_H__
+
+#include <drm/display/drm_hdcp.h>
+
+#endif /* __MEI_HDCP_H__ */
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
new file mode 100644
index 0000000000..bdc65d50b9
--- /dev/null
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright (c) 2003-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+#ifndef _MEI_HW_MEI_REGS_H_
+#define _MEI_HW_MEI_REGS_H_
+
+/*
+ * MEI device IDs
+ */
+#define MEI_DEV_ID_82946GZ    0x2974  /* 82946GZ/GL */
+#define MEI_DEV_ID_82G35      0x2984  /* 82G35 Express */
+#define MEI_DEV_ID_82Q965     0x2994  /* 82Q963/Q965 */
+#define MEI_DEV_ID_82G965     0x29A4  /* 82P965/G965 */
+
+#define MEI_DEV_ID_82GM965    0x2A04  /* Mobile PM965/GM965 */
+#define MEI_DEV_ID_82GME965   0x2A14  /* Mobile GME965/GLE960 */
+
+#define MEI_DEV_ID_ICH9_82Q35 0x29B4  /* 82Q35 Express */
+#define MEI_DEV_ID_ICH9_82G33 0x29C4  /* 82G33/G31/P35/P31 Express */
+#define MEI_DEV_ID_ICH9_82Q33 0x29D4  /* 82Q33 Express */
+#define MEI_DEV_ID_ICH9_82X38 0x29E4  /* 82X38/X48 Express */
+#define MEI_DEV_ID_ICH9_3200  0x29F4  /* 3200/3210 Server */
+
+#define MEI_DEV_ID_ICH9_6     0x28B4  /* Bearlake */
+#define MEI_DEV_ID_ICH9_7     0x28C4  /* Bearlake */
+#define MEI_DEV_ID_ICH9_8     0x28D4  /* Bearlake */
+#define MEI_DEV_ID_ICH9_9     0x28E4  /* Bearlake */
+#define MEI_DEV_ID_ICH9_10    0x28F4  /* Bearlake */
+
+#define MEI_DEV_ID_ICH9M_1    0x2A44  /* Cantiga */
+#define MEI_DEV_ID_ICH9M_2    0x2A54  /* Cantiga */
+#define MEI_DEV_ID_ICH9M_3    0x2A64  /* Cantiga */
+#define MEI_DEV_ID_ICH9M_4    0x2A74  /* Cantiga */
+
+#define MEI_DEV_ID_ICH10_1    0x2E04  /* Eaglelake */
+#define MEI_DEV_ID_ICH10_2    0x2E14  /* Eaglelake */
+#define MEI_DEV_ID_ICH10_3    0x2E24  /* Eaglelake */
+#define MEI_DEV_ID_ICH10_4    0x2E34  /* Eaglelake */
+
+#define MEI_DEV_ID_IBXPK_1    0x3B64  /* Calpella */
+#define MEI_DEV_ID_IBXPK_2    0x3B65  /* Calpella */
+
+#define MEI_DEV_ID_CPT_1      0x1C3A  /* Couger Point */
+#define MEI_DEV_ID_PBG_1      0x1D3A  /* C600/X79 Patsburg */
+
+#define MEI_DEV_ID_PPT_1      0x1E3A  /* Panther Point */
+#define MEI_DEV_ID_PPT_2      0x1CBA  /* Panther Point */
+#define MEI_DEV_ID_PPT_3      0x1DBA  /* Panther Point */
+
+#define MEI_DEV_ID_LPT_H      0x8C3A  /* Lynx Point H */
+#define MEI_DEV_ID_LPT_W      0x8D3A  /* Lynx Point - Wellsburg */
+#define MEI_DEV_ID_LPT_LP     0x9C3A  /* Lynx Point LP */
+#define MEI_DEV_ID_LPT_HR     0x8CBA  /* Lynx Point H Refresh */
+
+#define MEI_DEV_ID_WPT_LP     0x9CBA  /* Wildcat Point LP */
+#define MEI_DEV_ID_WPT_LP_2   0x9CBB  /* Wildcat Point LP 2 */
+
+#define MEI_DEV_ID_SPT        0x9D3A  /* Sunrise Point */
+#define MEI_DEV_ID_SPT_2      0x9D3B  /* Sunrise Point 2 */
+#define MEI_DEV_ID_SPT_3      0x9D3E  /* Sunrise Point 3 (iToutch) */
+#define MEI_DEV_ID_SPT_H      0xA13A  /* Sunrise Point H */
+#define MEI_DEV_ID_SPT_H_2    0xA13B  /* Sunrise Point H 2 */
+
+#define MEI_DEV_ID_LBG        0xA1BA  /* Lewisburg (SPT) */
+
+#define MEI_DEV_ID_BXT_M      0x1A9A  /* Broxton M */
+#define MEI_DEV_ID_APL_I      0x5A9A  /* Apollo Lake I */
+
+#define MEI_DEV_ID_DNV_IE     0x19E5  /* Denverton IE */
+
+#define MEI_DEV_ID_GLK        0x319A  /* Gemini Lake */
+
+#define MEI_DEV_ID_KBP        0xA2BA  /* Kaby Point */
+#define MEI_DEV_ID_KBP_2      0xA2BB  /* Kaby Point 2 */
+#define MEI_DEV_ID_KBP_3      0xA2BE  /* Kaby Point 3 (iTouch) */
+
+#define MEI_DEV_ID_CNP_LP     0x9DE0  /* Cannon Point LP */
+#define MEI_DEV_ID_CNP_LP_3   0x9DE4  /* Cannon Point LP 3 (iTouch) */
+#define MEI_DEV_ID_CNP_H      0xA360  /* Cannon Point H */
+#define MEI_DEV_ID_CNP_H_3    0xA364  /* Cannon Point H 3 (iTouch) */
+
+#define MEI_DEV_ID_CMP_LP     0x02e0  /* Comet Point LP */
+#define MEI_DEV_ID_CMP_LP_3   0x02e4  /* Comet Point LP 3 (iTouch) */
+
+#define MEI_DEV_ID_CMP_V      0xA3BA  /* Comet Point Lake V */
+
+#define MEI_DEV_ID_CMP_H      0x06e0  /* Comet Lake H */
+#define MEI_DEV_ID_CMP_H_3    0x06e4  /* Comet Lake H 3 (iTouch) */
+
+#define MEI_DEV_ID_CDF        0x18D3  /* Cedar Fork */
+
+#define MEI_DEV_ID_ICP_LP     0x34E0  /* Ice Lake Point LP */
+#define MEI_DEV_ID_ICP_N      0x38E0  /* Ice Lake Point N */
+
+#define MEI_DEV_ID_JSP_N      0x4DE0  /* Jasper Lake Point N */
+
+#define MEI_DEV_ID_TGP_LP     0xA0E0  /* Tiger Lake Point LP */
+#define MEI_DEV_ID_TGP_H      0x43E0  /* Tiger Lake Point H */
+
+#define MEI_DEV_ID_MCC        0x4B70  /* Mule Creek Canyon (EHL) */
+#define MEI_DEV_ID_MCC_4      0x4B75  /* Mule Creek Canyon 4 (EHL) */
+
+#define MEI_DEV_ID_EBG        0x1BE0  /* Emmitsburg WS */
+
+#define MEI_DEV_ID_ADP_S      0x7AE8  /* Alder Lake Point S */
+#define MEI_DEV_ID_ADP_LP     0x7A60  /* Alder Lake Point LP */
+#define MEI_DEV_ID_ADP_P      0x51E0  /* Alder Lake Point P */
+#define MEI_DEV_ID_ADP_N      0x54E0  /* Alder Lake Point N */
+
+#define MEI_DEV_ID_RPL_S      0x7A68  /* Raptor Lake Point S */
+
+#define MEI_DEV_ID_MTL_M      0x7E70  /* Meteor Lake Point M */
+
+/*
+ * MEI HW Section
+ */
+
+/* Host Firmware Status Registers in PCI Config Space */
+#define PCI_CFG_HFS_1         0x40
+#  define PCI_CFG_HFS_1_D0I3_MSK     0x80000000
+#  define PCI_CFG_HFS_1_OPMODE_MSK 0xf0000 /* OP MODE Mask: SPS <= 4.0 */
+#  define PCI_CFG_HFS_1_OPMODE_SPS 0xf0000 /* SPS SKU : SPS <= 4.0 */
+#define PCI_CFG_HFS_2         0x48
+#define PCI_CFG_HFS_3         0x60
+#  define PCI_CFG_HFS_3_FW_SKU_MSK   0x00000070
+#  define PCI_CFG_HFS_3_FW_SKU_IGN   0x00000000
+#  define PCI_CFG_HFS_3_FW_SKU_SPS   0x00000060
+#define PCI_CFG_HFS_4         0x64
+#define PCI_CFG_HFS_5         0x68
+#  define GSC_CFG_HFS_5_BOOT_TYPE_MSK      0x00000003
+#  define GSC_CFG_HFS_5_BOOT_TYPE_PXP               3
+#define PCI_CFG_HFS_6         0x6C
+
+/* MEI registers */
+/* H_CB_WW - Host Circular Buffer (CB) Write Window register */
+#define H_CB_WW    0
+/* H_CSR - Host Control Status register */
+#define H_CSR      4
+/* ME_CB_RW - ME Circular Buffer Read Window register (read only) */
+#define ME_CB_RW   8
+/* ME_CSR_HA - ME Control Status Host Access register (read only) */
+#define ME_CSR_HA  0xC
+/* H_HGC_CSR - PGI register */
+#define H_HPG_CSR  0x10
+/* H_D0I3C - D0I3 Control  */
+#define H_D0I3C    0x800
+
+#define H_GSC_EXT_OP_MEM_BASE_ADDR_LO_REG 0x100
+#define H_GSC_EXT_OP_MEM_BASE_ADDR_HI_REG 0x104
+#define H_GSC_EXT_OP_MEM_LIMIT_REG        0x108
+#define GSC_EXT_OP_MEM_VALID              BIT(31)
+
+/* register bits of H_CSR (Host Control Status register) */
+/* Host Circular Buffer Depth - maximum number of 32-bit entries in CB */
+#define H_CBD             0xFF000000
+/* Host Circular Buffer Write Pointer */
+#define H_CBWP            0x00FF0000
+/* Host Circular Buffer Read Pointer */
+#define H_CBRP            0x0000FF00
+/* Host Reset */
+#define H_RST             0x00000010
+/* Host Ready */
+#define H_RDY             0x00000008
+/* Host Interrupt Generate */
+#define H_IG              0x00000004
+/* Host Interrupt Status */
+#define H_IS              0x00000002
+/* Host Interrupt Enable */
+#define H_IE              0x00000001
+/* Host D0I3 Interrupt Enable */
+#define H_D0I3C_IE        0x00000020
+/* Host D0I3 Interrupt Status */
+#define H_D0I3C_IS        0x00000040
+
+/* H_CSR masks */
+#define H_CSR_IE_MASK     (H_IE | H_D0I3C_IE)
+#define H_CSR_IS_MASK     (H_IS | H_D0I3C_IS)
+
+/* register bits of ME_CSR_HA (ME Control Status Host Access register) */
+/* ME CB (Circular Buffer) Depth HRA (Host Read Access) - host read only
+access to ME_CBD */
+#define ME_CBD_HRA        0xFF000000
+/* ME CB Write Pointer HRA - host read only access to ME_CBWP */
+#define ME_CBWP_HRA       0x00FF0000
+/* ME CB Read Pointer HRA - host read only access to ME_CBRP */
+#define ME_CBRP_HRA       0x0000FF00
+/* ME Power Gate Isolation Capability HRA  - host ready only access */
+#define ME_PGIC_HRA       0x00000040
+/* ME Reset HRA - host read only access to ME_RST */
+#define ME_RST_HRA        0x00000010
+/* ME Ready HRA - host read only access to ME_RDY */
+#define ME_RDY_HRA        0x00000008
+/* ME Interrupt Generate HRA - host read only access to ME_IG */
+#define ME_IG_HRA         0x00000004
+/* ME Interrupt Status HRA - host read only access to ME_IS */
+#define ME_IS_HRA         0x00000002
+/* ME Interrupt Enable HRA - host read only access to ME_IE */
+#define ME_IE_HRA         0x00000001
+/* TRC control shadow register */
+#define ME_TRC            0x00000030
+
+/* H_HPG_CSR register bits */
+#define H_HPG_CSR_PGIHEXR 0x00000001
+#define H_HPG_CSR_PGI     0x00000002
+
+/* H_D0I3C register bits */
+#define H_D0I3C_CIP      0x00000001
+#define H_D0I3C_IR       0x00000002
+#define H_D0I3C_I3       0x00000004
+#define H_D0I3C_RR       0x00000008
+
+#endif /* _MEI_HW_MEI_REGS_H_ */
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
new file mode 100644
index 0000000000..da4ef0b519
--- /dev/null
+++ b/drivers/misc/mei/hw-me.c
@@ -0,0 +1,1770 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2003-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/pci.h>
+
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/sizes.h>
+#include <linux/delay.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+
+#include "hw-me.h"
+#include "hw-me-regs.h"
+
+#include "mei-trace.h"
+
+/**
+ * mei_me_reg_read - Reads 32bit data from the mei device
+ *
+ * @hw: the me hardware structure
+ * @offset: offset from which to read the data
+ *
+ * Return: register value (u32)
+ */
+static inline u32 mei_me_reg_read(const struct mei_me_hw *hw,
+			       unsigned long offset)
+{
+	return ioread32(hw->mem_addr + offset);
+}
+
+
+/**
+ * mei_me_reg_write - Writes 32bit data to the mei device
+ *
+ * @hw: the me hardware structure
+ * @offset: offset from which to write the data
+ * @value: register value to write (u32)
+ */
+static inline void mei_me_reg_write(const struct mei_me_hw *hw,
+				 unsigned long offset, u32 value)
+{
+	iowrite32(value, hw->mem_addr + offset);
+}
+
+/**
+ * mei_me_mecbrw_read - Reads 32bit data from ME circular buffer
+ *  read window register
+ *
+ * @dev: the device structure
+ *
+ * Return: ME_CB_RW register value (u32)
+ */
+static inline u32 mei_me_mecbrw_read(const struct mei_device *dev)
+{
+	return mei_me_reg_read(to_me_hw(dev), ME_CB_RW);
+}
+
+/**
+ * mei_me_hcbww_write - write 32bit data to the host circular buffer
+ *
+ * @dev: the device structure
+ * @data: 32bit data to be written to the host circular buffer
+ */
+static inline void mei_me_hcbww_write(struct mei_device *dev, u32 data)
+{
+	mei_me_reg_write(to_me_hw(dev), H_CB_WW, data);
+}
+
+/**
+ * mei_me_mecsr_read - Reads 32bit data from the ME CSR
+ *
+ * @dev: the device structure
+ *
+ * Return: ME_CSR_HA register value (u32)
+ */
+static inline u32 mei_me_mecsr_read(const struct mei_device *dev)
+{
+	u32 reg;
+
+	reg = mei_me_reg_read(to_me_hw(dev), ME_CSR_HA);
+	trace_mei_reg_read(dev->dev, "ME_CSR_HA", ME_CSR_HA, reg);
+
+	return reg;
+}
+
+/**
+ * mei_hcsr_read - Reads 32bit data from the host CSR
+ *
+ * @dev: the device structure
+ *
+ * Return: H_CSR register value (u32)
+ */
+static inline u32 mei_hcsr_read(const struct mei_device *dev)
+{
+	u32 reg;
+
+	reg = mei_me_reg_read(to_me_hw(dev), H_CSR);
+	trace_mei_reg_read(dev->dev, "H_CSR", H_CSR, reg);
+
+	return reg;
+}
+
+/**
+ * mei_hcsr_write - writes H_CSR register to the mei device
+ *
+ * @dev: the device structure
+ * @reg: new register value
+ */
+static inline void mei_hcsr_write(struct mei_device *dev, u32 reg)
+{
+	trace_mei_reg_write(dev->dev, "H_CSR", H_CSR, reg);
+	mei_me_reg_write(to_me_hw(dev), H_CSR, reg);
+}
+
+/**
+ * mei_hcsr_set - writes H_CSR register to the mei device,
+ * and ignores the H_IS bit for it is write-one-to-zero.
+ *
+ * @dev: the device structure
+ * @reg: new register value
+ */
+static inline void mei_hcsr_set(struct mei_device *dev, u32 reg)
+{
+	reg &= ~H_CSR_IS_MASK;
+	mei_hcsr_write(dev, reg);
+}
+
+/**
+ * mei_hcsr_set_hig - set host interrupt (set H_IG)
+ *
+ * @dev: the device structure
+ */
+static inline void mei_hcsr_set_hig(struct mei_device *dev)
+{
+	u32 hcsr;
+
+	hcsr = mei_hcsr_read(dev) | H_IG;
+	mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_d0i3c_read - Reads 32bit data from the D0I3C register
+ *
+ * @dev: the device structure
+ *
+ * Return: H_D0I3C register value (u32)
+ */
+static inline u32 mei_me_d0i3c_read(const struct mei_device *dev)
+{
+	u32 reg;
+
+	reg = mei_me_reg_read(to_me_hw(dev), H_D0I3C);
+	trace_mei_reg_read(dev->dev, "H_D0I3C", H_D0I3C, reg);
+
+	return reg;
+}
+
+/**
+ * mei_me_d0i3c_write - writes H_D0I3C register to device
+ *
+ * @dev: the device structure
+ * @reg: new register value
+ */
+static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg)
+{
+	trace_mei_reg_write(dev->dev, "H_D0I3C", H_D0I3C, reg);
+	mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg);
+}
+
+/**
+ * mei_me_trc_status - read trc status register
+ *
+ * @dev: mei device
+ * @trc: trc status register value
+ *
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_trc_status(struct mei_device *dev, u32 *trc)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (!hw->cfg->hw_trc_supported)
+		return -EOPNOTSUPP;
+
+	*trc = mei_me_reg_read(hw, ME_TRC);
+	trace_mei_reg_read(dev->dev, "ME_TRC", ME_TRC, *trc);
+
+	return 0;
+}
+
+/**
+ * mei_me_fw_status - read fw status register from pci config space
+ *
+ * @dev: mei device
+ * @fw_status: fw status register values
+ *
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_fw_status(struct mei_device *dev,
+			    struct mei_fw_status *fw_status)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	const struct mei_fw_status *fw_src = &hw->cfg->fw_status;
+	int ret;
+	int i;
+
+	if (!fw_status || !hw->read_fws)
+		return -EINVAL;
+
+	fw_status->count = fw_src->count;
+	for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) {
+		ret = hw->read_fws(dev, fw_src->status[i],
+				   &fw_status->status[i]);
+		trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_X",
+				       fw_src->status[i],
+				       fw_status->status[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_me_hw_config - configure hw dependent settings
+ *
+ * @dev: mei device
+ *
+ * Return:
+ *  * -EINVAL when read_fws is not set
+ *  * 0 on success
+ *
+ */
+static int mei_me_hw_config(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 hcsr, reg;
+
+	if (WARN_ON(!hw->read_fws))
+		return -EINVAL;
+
+	/* Doesn't change in runtime */
+	hcsr = mei_hcsr_read(dev);
+	hw->hbuf_depth = (hcsr & H_CBD) >> 24;
+
+	reg = 0;
+	hw->read_fws(dev, PCI_CFG_HFS_1, &reg);
+	trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg);
+	hw->d0i3_supported =
+		((reg & PCI_CFG_HFS_1_D0I3_MSK) == PCI_CFG_HFS_1_D0I3_MSK);
+
+	hw->pg_state = MEI_PG_OFF;
+	if (hw->d0i3_supported) {
+		reg = mei_me_d0i3c_read(dev);
+		if (reg & H_D0I3C_I3)
+			hw->pg_state = MEI_PG_ON;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_me_pg_state  - translate internal pg state
+ *   to the mei power gating state
+ *
+ * @dev:  mei device
+ *
+ * Return: MEI_PG_OFF if aliveness is on and MEI_PG_ON otherwise
+ */
+static inline enum mei_pg_state mei_me_pg_state(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	return hw->pg_state;
+}
+
+static inline u32 me_intr_src(u32 hcsr)
+{
+	return hcsr & H_CSR_IS_MASK;
+}
+
+/**
+ * me_intr_disable - disables mei device interrupts
+ *      using supplied hcsr register value.
+ *
+ * @dev: the device structure
+ * @hcsr: supplied hcsr register value
+ */
+static inline void me_intr_disable(struct mei_device *dev, u32 hcsr)
+{
+	hcsr &= ~H_CSR_IE_MASK;
+	mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * me_intr_clear - clear and stop interrupts
+ *
+ * @dev: the device structure
+ * @hcsr: supplied hcsr register value
+ */
+static inline void me_intr_clear(struct mei_device *dev, u32 hcsr)
+{
+	if (me_intr_src(hcsr))
+		mei_hcsr_write(dev, hcsr);
+}
+
+/**
+ * mei_me_intr_clear - clear and stop interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_me_intr_clear(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	me_intr_clear(dev, hcsr);
+}
+/**
+ * mei_me_intr_enable - enables mei device interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_me_intr_enable(struct mei_device *dev)
+{
+	u32 hcsr;
+
+	if (mei_me_hw_use_polling(to_me_hw(dev)))
+		return;
+
+	hcsr = mei_hcsr_read(dev) | H_CSR_IE_MASK;
+	mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_intr_disable - disables mei device interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_me_intr_disable(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	me_intr_disable(dev, hcsr);
+}
+
+/**
+ * mei_me_synchronize_irq - wait for pending IRQ handlers
+ *
+ * @dev: the device structure
+ */
+static void mei_me_synchronize_irq(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (mei_me_hw_use_polling(hw))
+		return;
+
+	synchronize_irq(hw->irq);
+}
+
+/**
+ * mei_me_hw_reset_release - release device from the reset
+ *
+ * @dev: the device structure
+ */
+static void mei_me_hw_reset_release(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	hcsr |= H_IG;
+	hcsr &= ~H_RST;
+	mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_host_set_ready - enable device
+ *
+ * @dev: mei device
+ */
+static void mei_me_host_set_ready(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	if (!mei_me_hw_use_polling(to_me_hw(dev)))
+		hcsr |= H_CSR_IE_MASK;
+
+	hcsr |=  H_IG | H_RDY;
+	mei_hcsr_set(dev, hcsr);
+}
+
+/**
+ * mei_me_host_is_ready - check whether the host has turned ready
+ *
+ * @dev: mei device
+ * Return: bool
+ */
+static bool mei_me_host_is_ready(struct mei_device *dev)
+{
+	u32 hcsr = mei_hcsr_read(dev);
+
+	return (hcsr & H_RDY) == H_RDY;
+}
+
+/**
+ * mei_me_hw_is_ready - check whether the me(hw) has turned ready
+ *
+ * @dev: mei device
+ * Return: bool
+ */
+static bool mei_me_hw_is_ready(struct mei_device *dev)
+{
+	u32 mecsr = mei_me_mecsr_read(dev);
+
+	return (mecsr & ME_RDY_HRA) == ME_RDY_HRA;
+}
+
+/**
+ * mei_me_hw_is_resetting - check whether the me(hw) is in reset
+ *
+ * @dev: mei device
+ * Return: bool
+ */
+static bool mei_me_hw_is_resetting(struct mei_device *dev)
+{
+	u32 mecsr = mei_me_mecsr_read(dev);
+
+	return (mecsr & ME_RST_HRA) == ME_RST_HRA;
+}
+
+/**
+ * mei_gsc_pxp_check - check for gsc firmware entering pxp mode
+ *
+ * @dev: the device structure
+ */
+static void mei_gsc_pxp_check(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 fwsts5 = 0;
+
+	if (dev->pxp_mode == MEI_DEV_PXP_DEFAULT)
+		return;
+
+	hw->read_fws(dev, PCI_CFG_HFS_5, &fwsts5);
+	trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_5", PCI_CFG_HFS_5, fwsts5);
+	if ((fwsts5 & GSC_CFG_HFS_5_BOOT_TYPE_MSK) == GSC_CFG_HFS_5_BOOT_TYPE_PXP) {
+		dev_dbg(dev->dev, "pxp mode is ready 0x%08x\n", fwsts5);
+		dev->pxp_mode = MEI_DEV_PXP_READY;
+	} else {
+		dev_dbg(dev->dev, "pxp mode is not ready 0x%08x\n", fwsts5);
+	}
+}
+
+/**
+ * mei_me_hw_ready_wait - wait until the me(hw) has turned ready
+ *  or timeout is reached
+ *
+ * @dev: mei device
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_hw_ready_wait(struct mei_device *dev)
+{
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_hw_ready,
+			dev->recvd_hw_ready,
+			dev->timeouts.hw_ready);
+	mutex_lock(&dev->device_lock);
+	if (!dev->recvd_hw_ready) {
+		dev_err(dev->dev, "wait hw ready failed\n");
+		return -ETIME;
+	}
+
+	mei_gsc_pxp_check(dev);
+
+	mei_me_hw_reset_release(dev);
+	dev->recvd_hw_ready = false;
+	return 0;
+}
+
+/**
+ * mei_me_hw_start - hw start routine
+ *
+ * @dev: mei device
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_hw_start(struct mei_device *dev)
+{
+	int ret = mei_me_hw_ready_wait(dev);
+
+	if (ret)
+		return ret;
+	dev_dbg(dev->dev, "hw is ready\n");
+
+	mei_me_host_set_ready(dev);
+	return ret;
+}
+
+
+/**
+ * mei_hbuf_filled_slots - gets number of device filled buffer slots
+ *
+ * @dev: the device structure
+ *
+ * Return: number of filled slots
+ */
+static unsigned char mei_hbuf_filled_slots(struct mei_device *dev)
+{
+	u32 hcsr;
+	char read_ptr, write_ptr;
+
+	hcsr = mei_hcsr_read(dev);
+
+	read_ptr = (char) ((hcsr & H_CBRP) >> 8);
+	write_ptr = (char) ((hcsr & H_CBWP) >> 16);
+
+	return (unsigned char) (write_ptr - read_ptr);
+}
+
+/**
+ * mei_me_hbuf_is_empty - checks if host buffer is empty.
+ *
+ * @dev: the device structure
+ *
+ * Return: true if empty, false - otherwise.
+ */
+static bool mei_me_hbuf_is_empty(struct mei_device *dev)
+{
+	return mei_hbuf_filled_slots(dev) == 0;
+}
+
+/**
+ * mei_me_hbuf_empty_slots - counts write empty slots.
+ *
+ * @dev: the device structure
+ *
+ * Return: -EOVERFLOW if overflow, otherwise empty slots count
+ */
+static int mei_me_hbuf_empty_slots(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	unsigned char filled_slots, empty_slots;
+
+	filled_slots = mei_hbuf_filled_slots(dev);
+	empty_slots = hw->hbuf_depth - filled_slots;
+
+	/* check for overflow */
+	if (filled_slots > hw->hbuf_depth)
+		return -EOVERFLOW;
+
+	return empty_slots;
+}
+
+/**
+ * mei_me_hbuf_depth - returns depth of the hw buffer.
+ *
+ * @dev: the device structure
+ *
+ * Return: size of hw buffer in slots
+ */
+static u32 mei_me_hbuf_depth(const struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	return hw->hbuf_depth;
+}
+
+/**
+ * mei_me_hbuf_write - writes a message to host hw buffer.
+ *
+ * @dev: the device structure
+ * @hdr: header of message
+ * @hdr_len: header length in bytes: must be multiplication of a slot (4bytes)
+ * @data: payload
+ * @data_len: payload length in bytes
+ *
+ * Return: 0 if success, < 0 - otherwise.
+ */
+static int mei_me_hbuf_write(struct mei_device *dev,
+			     const void *hdr, size_t hdr_len,
+			     const void *data, size_t data_len)
+{
+	unsigned long rem;
+	unsigned long i;
+	const u32 *reg_buf;
+	u32 dw_cnt;
+	int empty_slots;
+
+	if (WARN_ON(!hdr || hdr_len & 0x3))
+		return -EINVAL;
+
+	if (!data && data_len) {
+		dev_err(dev->dev, "wrong parameters null data with data_len = %zu\n", data_len);
+		return -EINVAL;
+	}
+
+	dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM((struct mei_msg_hdr *)hdr));
+
+	empty_slots = mei_hbuf_empty_slots(dev);
+	dev_dbg(dev->dev, "empty slots = %d.\n", empty_slots);
+
+	if (empty_slots < 0)
+		return -EOVERFLOW;
+
+	dw_cnt = mei_data2slots(hdr_len + data_len);
+	if (dw_cnt > (u32)empty_slots)
+		return -EMSGSIZE;
+
+	reg_buf = hdr;
+	for (i = 0; i < hdr_len / MEI_SLOT_SIZE; i++)
+		mei_me_hcbww_write(dev, reg_buf[i]);
+
+	reg_buf = data;
+	for (i = 0; i < data_len / MEI_SLOT_SIZE; i++)
+		mei_me_hcbww_write(dev, reg_buf[i]);
+
+	rem = data_len & 0x3;
+	if (rem > 0) {
+		u32 reg = 0;
+
+		memcpy(&reg, (const u8 *)data + data_len - rem, rem);
+		mei_me_hcbww_write(dev, reg);
+	}
+
+	mei_hcsr_set_hig(dev);
+	if (!mei_me_hw_is_ready(dev))
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * mei_me_count_full_read_slots - counts read full slots.
+ *
+ * @dev: the device structure
+ *
+ * Return: -EOVERFLOW if overflow, otherwise filled slots count
+ */
+static int mei_me_count_full_read_slots(struct mei_device *dev)
+{
+	u32 me_csr;
+	char read_ptr, write_ptr;
+	unsigned char buffer_depth, filled_slots;
+
+	me_csr = mei_me_mecsr_read(dev);
+	buffer_depth = (unsigned char)((me_csr & ME_CBD_HRA) >> 24);
+	read_ptr = (char) ((me_csr & ME_CBRP_HRA) >> 8);
+	write_ptr = (char) ((me_csr & ME_CBWP_HRA) >> 16);
+	filled_slots = (unsigned char) (write_ptr - read_ptr);
+
+	/* check for overflow */
+	if (filled_slots > buffer_depth)
+		return -EOVERFLOW;
+
+	dev_dbg(dev->dev, "filled_slots =%08x\n", filled_slots);
+	return (int)filled_slots;
+}
+
+/**
+ * mei_me_read_slots - reads a message from mei device.
+ *
+ * @dev: the device structure
+ * @buffer: message buffer will be written
+ * @buffer_length: message size will be read
+ *
+ * Return: always 0
+ */
+static int mei_me_read_slots(struct mei_device *dev, unsigned char *buffer,
+			     unsigned long buffer_length)
+{
+	u32 *reg_buf = (u32 *)buffer;
+
+	for (; buffer_length >= MEI_SLOT_SIZE; buffer_length -= MEI_SLOT_SIZE)
+		*reg_buf++ = mei_me_mecbrw_read(dev);
+
+	if (buffer_length > 0) {
+		u32 reg = mei_me_mecbrw_read(dev);
+
+		memcpy(reg_buf, &reg, buffer_length);
+	}
+
+	mei_hcsr_set_hig(dev);
+	return 0;
+}
+
+/**
+ * mei_me_pg_set - write pg enter register
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_set(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 reg;
+
+	reg = mei_me_reg_read(hw, H_HPG_CSR);
+	trace_mei_reg_read(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+
+	reg |= H_HPG_CSR_PGI;
+
+	trace_mei_reg_write(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+	mei_me_reg_write(hw, H_HPG_CSR, reg);
+}
+
+/**
+ * mei_me_pg_unset - write pg exit register
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_unset(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 reg;
+
+	reg = mei_me_reg_read(hw, H_HPG_CSR);
+	trace_mei_reg_read(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+
+	WARN(!(reg & H_HPG_CSR_PGI), "PGI is not set\n");
+
+	reg |= H_HPG_CSR_PGIHEXR;
+
+	trace_mei_reg_write(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg);
+	mei_me_reg_write(hw, H_HPG_CSR, reg);
+}
+
+/**
+ * mei_me_pg_legacy_enter_sync - perform legacy pg entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_pg_legacy_enter_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	int ret;
+
+	dev->pg_event = MEI_PG_EVENT_WAIT;
+
+	ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD);
+	if (ret)
+		return ret;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_RECEIVED,
+		dev->timeouts.pgi);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event == MEI_PG_EVENT_RECEIVED) {
+		mei_me_pg_set(dev);
+		ret = 0;
+	} else {
+		ret = -ETIME;
+	}
+
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	hw->pg_state = MEI_PG_ON;
+
+	return ret;
+}
+
+/**
+ * mei_me_pg_legacy_exit_sync - perform legacy pg exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_pg_legacy_exit_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	int ret;
+
+	if (dev->pg_event == MEI_PG_EVENT_RECEIVED)
+		goto reply;
+
+	dev->pg_event = MEI_PG_EVENT_WAIT;
+
+	mei_me_pg_unset(dev);
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_RECEIVED,
+		dev->timeouts.pgi);
+	mutex_lock(&dev->device_lock);
+
+reply:
+	if (dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+		ret = -ETIME;
+		goto out;
+	}
+
+	dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+	ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD);
+	if (ret)
+		return ret;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED,
+		dev->timeouts.pgi);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED)
+		ret = 0;
+	else
+		ret = -ETIME;
+
+out:
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	hw->pg_state = MEI_PG_OFF;
+
+	return ret;
+}
+
+/**
+ * mei_me_pg_in_transition - is device now in pg transition
+ *
+ * @dev: the device structure
+ *
+ * Return: true if in pg transition, false otherwise
+ */
+static bool mei_me_pg_in_transition(struct mei_device *dev)
+{
+	return dev->pg_event >= MEI_PG_EVENT_WAIT &&
+	       dev->pg_event <= MEI_PG_EVENT_INTR_WAIT;
+}
+
+/**
+ * mei_me_pg_is_enabled - detect if PG is supported by HW
+ *
+ * @dev: the device structure
+ *
+ * Return: true is pg supported, false otherwise
+ */
+static bool mei_me_pg_is_enabled(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 reg = mei_me_mecsr_read(dev);
+
+	if (hw->d0i3_supported)
+		return true;
+
+	if ((reg & ME_PGIC_HRA) == 0)
+		goto notsupported;
+
+	if (!dev->hbm_f_pg_supported)
+		goto notsupported;
+
+	return true;
+
+notsupported:
+	dev_dbg(dev->dev, "pg: not supported: d0i3 = %d HGP = %d hbm version %d.%d ?= %d.%d\n",
+		hw->d0i3_supported,
+		!!(reg & ME_PGIC_HRA),
+		dev->version.major_version,
+		dev->version.minor_version,
+		HBM_MAJOR_VERSION_PGI,
+		HBM_MINOR_VERSION_PGI);
+
+	return false;
+}
+
+/**
+ * mei_me_d0i3_set - write d0i3 register bit on mei device.
+ *
+ * @dev: the device structure
+ * @intr: ask for interrupt
+ *
+ * Return: D0I3C register value
+ */
+static u32 mei_me_d0i3_set(struct mei_device *dev, bool intr)
+{
+	u32 reg = mei_me_d0i3c_read(dev);
+
+	reg |= H_D0I3C_I3;
+	if (intr)
+		reg |= H_D0I3C_IR;
+	else
+		reg &= ~H_D0I3C_IR;
+	mei_me_d0i3c_write(dev, reg);
+	/* read it to ensure HW consistency */
+	reg = mei_me_d0i3c_read(dev);
+	return reg;
+}
+
+/**
+ * mei_me_d0i3_unset - clean d0i3 register bit on mei device.
+ *
+ * @dev: the device structure
+ *
+ * Return: D0I3C register value
+ */
+static u32 mei_me_d0i3_unset(struct mei_device *dev)
+{
+	u32 reg = mei_me_d0i3c_read(dev);
+
+	reg &= ~H_D0I3C_I3;
+	reg |= H_D0I3C_IR;
+	mei_me_d0i3c_write(dev, reg);
+	/* read it to ensure HW consistency */
+	reg = mei_me_d0i3c_read(dev);
+	return reg;
+}
+
+/**
+ * mei_me_d0i3_enter_sync - perform d0i3 entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_enter_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	int ret;
+	u32 reg;
+
+	reg = mei_me_d0i3c_read(dev);
+	if (reg & H_D0I3C_I3) {
+		/* we are in d0i3, nothing to do */
+		dev_dbg(dev->dev, "d0i3 set not needed\n");
+		ret = 0;
+		goto on;
+	}
+
+	/* PGI entry procedure */
+	dev->pg_event = MEI_PG_EVENT_WAIT;
+
+	ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD);
+	if (ret)
+		/* FIXME: should we reset here? */
+		goto out;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_RECEIVED,
+		dev->timeouts.pgi);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+		ret = -ETIME;
+		goto out;
+	}
+	/* end PGI entry procedure */
+
+	dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+
+	reg = mei_me_d0i3_set(dev, true);
+	if (!(reg & H_D0I3C_CIP)) {
+		dev_dbg(dev->dev, "d0i3 enter wait not needed\n");
+		ret = 0;
+		goto on;
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED,
+		dev->timeouts.d0i3);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) {
+		reg = mei_me_d0i3c_read(dev);
+		if (!(reg & H_D0I3C_I3)) {
+			ret = -ETIME;
+			goto out;
+		}
+	}
+
+	ret = 0;
+on:
+	hw->pg_state = MEI_PG_ON;
+out:
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	dev_dbg(dev->dev, "d0i3 enter ret = %d\n", ret);
+	return ret;
+}
+
+/**
+ * mei_me_d0i3_enter - perform d0i3 entry procedure
+ *   no hbm PG handshake
+ *   no waiting for confirmation; runs with interrupts
+ *   disabled
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_enter(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 reg;
+
+	reg = mei_me_d0i3c_read(dev);
+	if (reg & H_D0I3C_I3) {
+		/* we are in d0i3, nothing to do */
+		dev_dbg(dev->dev, "already d0i3 : set not needed\n");
+		goto on;
+	}
+
+	mei_me_d0i3_set(dev, false);
+on:
+	hw->pg_state = MEI_PG_ON;
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	dev_dbg(dev->dev, "d0i3 enter\n");
+	return 0;
+}
+
+/**
+ * mei_me_d0i3_exit_sync - perform d0i3 exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_exit_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	int ret;
+	u32 reg;
+
+	dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+
+	reg = mei_me_d0i3c_read(dev);
+	if (!(reg & H_D0I3C_I3)) {
+		/* we are not in d0i3, nothing to do */
+		dev_dbg(dev->dev, "d0i3 exit not needed\n");
+		ret = 0;
+		goto off;
+	}
+
+	reg = mei_me_d0i3_unset(dev);
+	if (!(reg & H_D0I3C_CIP)) {
+		dev_dbg(dev->dev, "d0i3 exit wait not needed\n");
+		ret = 0;
+		goto off;
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED,
+		dev->timeouts.d0i3);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) {
+		reg = mei_me_d0i3c_read(dev);
+		if (reg & H_D0I3C_I3) {
+			ret = -ETIME;
+			goto out;
+		}
+	}
+
+	ret = 0;
+off:
+	hw->pg_state = MEI_PG_OFF;
+out:
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+
+	dev_dbg(dev->dev, "d0i3 exit ret = %d\n", ret);
+	return ret;
+}
+
+/**
+ * mei_me_pg_legacy_intr - perform legacy pg processing
+ *			   in interrupt thread handler
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_legacy_intr(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (dev->pg_event != MEI_PG_EVENT_INTR_WAIT)
+		return;
+
+	dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED;
+	hw->pg_state = MEI_PG_OFF;
+	if (waitqueue_active(&dev->wait_pg))
+		wake_up(&dev->wait_pg);
+}
+
+/**
+ * mei_me_d0i3_intr - perform d0i3 processing in interrupt thread handler
+ *
+ * @dev: the device structure
+ * @intr_source: interrupt source
+ */
+static void mei_me_d0i3_intr(struct mei_device *dev, u32 intr_source)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (dev->pg_event == MEI_PG_EVENT_INTR_WAIT &&
+	    (intr_source & H_D0I3C_IS)) {
+		dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED;
+		if (hw->pg_state == MEI_PG_ON) {
+			hw->pg_state = MEI_PG_OFF;
+			if (dev->hbm_state != MEI_HBM_IDLE) {
+				/*
+				 * force H_RDY because it could be
+				 * wiped off during PG
+				 */
+				dev_dbg(dev->dev, "d0i3 set host ready\n");
+				mei_me_host_set_ready(dev);
+			}
+		} else {
+			hw->pg_state = MEI_PG_ON;
+		}
+
+		wake_up(&dev->wait_pg);
+	}
+
+	if (hw->pg_state == MEI_PG_ON && (intr_source & H_IS)) {
+		/*
+		 * HW sent some data and we are in D0i3, so
+		 * we got here because of HW initiated exit from D0i3.
+		 * Start runtime pm resume sequence to exit low power state.
+		 */
+		dev_dbg(dev->dev, "d0i3 want resume\n");
+		mei_hbm_pg_resume(dev);
+	}
+}
+
+/**
+ * mei_me_pg_intr - perform pg processing in interrupt thread handler
+ *
+ * @dev: the device structure
+ * @intr_source: interrupt source
+ */
+static void mei_me_pg_intr(struct mei_device *dev, u32 intr_source)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (hw->d0i3_supported)
+		mei_me_d0i3_intr(dev, intr_source);
+	else
+		mei_me_pg_legacy_intr(dev);
+}
+
+/**
+ * mei_me_pg_enter_sync - perform runtime pm entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_me_pg_enter_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (hw->d0i3_supported)
+		return mei_me_d0i3_enter_sync(dev);
+	else
+		return mei_me_pg_legacy_enter_sync(dev);
+}
+
+/**
+ * mei_me_pg_exit_sync - perform runtime pm exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_me_pg_exit_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (hw->d0i3_supported)
+		return mei_me_d0i3_exit_sync(dev);
+	else
+		return mei_me_pg_legacy_exit_sync(dev);
+}
+
+/**
+ * mei_me_hw_reset - resets fw via mei csr register.
+ *
+ * @dev: the device structure
+ * @intr_enable: if interrupt should be enabled after reset.
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	int ret;
+	u32 hcsr;
+
+	if (intr_enable) {
+		mei_me_intr_enable(dev);
+		if (hw->d0i3_supported) {
+			ret = mei_me_d0i3_exit_sync(dev);
+			if (ret)
+				return ret;
+		} else {
+			hw->pg_state = MEI_PG_OFF;
+		}
+	}
+
+	pm_runtime_set_active(dev->dev);
+
+	hcsr = mei_hcsr_read(dev);
+	/* H_RST may be found lit before reset is started,
+	 * for example if preceding reset flow hasn't completed.
+	 * In that case asserting H_RST will be ignored, therefore
+	 * we need to clean H_RST bit to start a successful reset sequence.
+	 */
+	if ((hcsr & H_RST) == H_RST) {
+		dev_warn(dev->dev, "H_RST is set = 0x%08X", hcsr);
+		hcsr &= ~H_RST;
+		mei_hcsr_set(dev, hcsr);
+		hcsr = mei_hcsr_read(dev);
+	}
+
+	hcsr |= H_RST | H_IG | H_CSR_IS_MASK;
+
+	if (!intr_enable || mei_me_hw_use_polling(to_me_hw(dev)))
+		hcsr &= ~H_CSR_IE_MASK;
+
+	dev->recvd_hw_ready = false;
+	mei_hcsr_write(dev, hcsr);
+
+	/*
+	 * Host reads the H_CSR once to ensure that the
+	 * posted write to H_CSR completes.
+	 */
+	hcsr = mei_hcsr_read(dev);
+
+	if ((hcsr & H_RST) == 0)
+		dev_warn(dev->dev, "H_RST is not set = 0x%08X", hcsr);
+
+	if ((hcsr & H_RDY) == H_RDY)
+		dev_warn(dev->dev, "H_RDY is not cleared 0x%08X", hcsr);
+
+	if (!intr_enable) {
+		mei_me_hw_reset_release(dev);
+		if (hw->d0i3_supported) {
+			ret = mei_me_d0i3_enter(dev);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+/**
+ * mei_me_irq_quick_handler - The ISR of the MEI device
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: irqreturn_t
+ */
+irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id)
+{
+	struct mei_device *dev = (struct mei_device *)dev_id;
+	u32 hcsr;
+
+	hcsr = mei_hcsr_read(dev);
+	if (!me_intr_src(hcsr))
+		return IRQ_NONE;
+
+	dev_dbg(dev->dev, "interrupt source 0x%08X\n", me_intr_src(hcsr));
+
+	/* disable interrupts on device */
+	me_intr_disable(dev, hcsr);
+	return IRQ_WAKE_THREAD;
+}
+EXPORT_SYMBOL_GPL(mei_me_irq_quick_handler);
+
+/**
+ * mei_me_irq_thread_handler - function called after ISR to handle the interrupt
+ * processing.
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: irqreturn_t
+ *
+ */
+irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
+{
+	struct mei_device *dev = (struct mei_device *) dev_id;
+	struct list_head cmpl_list;
+	s32 slots;
+	u32 hcsr;
+	int rets = 0;
+
+	dev_dbg(dev->dev, "function called after ISR to handle the interrupt processing.\n");
+	/* initialize our complete list */
+	mutex_lock(&dev->device_lock);
+
+	hcsr = mei_hcsr_read(dev);
+	me_intr_clear(dev, hcsr);
+
+	INIT_LIST_HEAD(&cmpl_list);
+
+	/* check if ME wants a reset */
+	if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) {
+		dev_warn(dev->dev, "FW not ready: resetting: dev_state = %d pxp = %d\n",
+			 dev->dev_state, dev->pxp_mode);
+		if (dev->dev_state == MEI_DEV_POWERING_DOWN ||
+		    dev->dev_state == MEI_DEV_POWER_DOWN)
+			mei_cl_all_disconnect(dev);
+		else if (dev->dev_state != MEI_DEV_DISABLED)
+			schedule_work(&dev->reset_work);
+		goto end;
+	}
+
+	if (mei_me_hw_is_resetting(dev))
+		mei_hcsr_set_hig(dev);
+
+	mei_me_pg_intr(dev, me_intr_src(hcsr));
+
+	/*  check if we need to start the dev */
+	if (!mei_host_is_ready(dev)) {
+		if (mei_hw_is_ready(dev)) {
+			dev_dbg(dev->dev, "we need to start the dev.\n");
+			dev->recvd_hw_ready = true;
+			wake_up(&dev->wait_hw_ready);
+		} else {
+			dev_dbg(dev->dev, "Spurious Interrupt\n");
+		}
+		goto end;
+	}
+	/* check slots available for reading */
+	slots = mei_count_full_read_slots(dev);
+	while (slots > 0) {
+		dev_dbg(dev->dev, "slots to read = %08x\n", slots);
+		rets = mei_irq_read_handler(dev, &cmpl_list, &slots);
+		/* There is a race between ME write and interrupt delivery:
+		 * Not all data is always available immediately after the
+		 * interrupt, so try to read again on the next interrupt.
+		 */
+		if (rets == -ENODATA)
+			break;
+
+		if (rets) {
+			dev_err(dev->dev, "mei_irq_read_handler ret = %d, state = %d.\n",
+				rets, dev->dev_state);
+			if (dev->dev_state != MEI_DEV_RESETTING &&
+			    dev->dev_state != MEI_DEV_DISABLED &&
+			    dev->dev_state != MEI_DEV_POWERING_DOWN &&
+			    dev->dev_state != MEI_DEV_POWER_DOWN)
+				schedule_work(&dev->reset_work);
+			goto end;
+		}
+	}
+
+	dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+
+	/*
+	 * During PG handshake only allowed write is the replay to the
+	 * PG exit message, so block calling write function
+	 * if the pg event is in PG handshake
+	 */
+	if (dev->pg_event != MEI_PG_EVENT_WAIT &&
+	    dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+		rets = mei_irq_write_handler(dev, &cmpl_list);
+		dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+	}
+
+	mei_irq_compl_handler(dev, &cmpl_list);
+
+end:
+	dev_dbg(dev->dev, "interrupt thread end ret = %d\n", rets);
+	mei_me_intr_enable(dev);
+	mutex_unlock(&dev->device_lock);
+	return IRQ_HANDLED;
+}
+EXPORT_SYMBOL_GPL(mei_me_irq_thread_handler);
+
+#define MEI_POLLING_TIMEOUT_ACTIVE 100
+#define MEI_POLLING_TIMEOUT_IDLE   500
+
+/**
+ * mei_me_polling_thread - interrupt register polling thread
+ *
+ * The thread monitors the interrupt source register and calls
+ * mei_me_irq_thread_handler() to handle the firmware
+ * input.
+ *
+ * The function polls in MEI_POLLING_TIMEOUT_ACTIVE timeout
+ * in case there was an event, in idle case the polling
+ * time increases yet again by MEI_POLLING_TIMEOUT_ACTIVE
+ * up to MEI_POLLING_TIMEOUT_IDLE.
+ *
+ * @_dev: mei device
+ *
+ * Return: always 0
+ */
+int mei_me_polling_thread(void *_dev)
+{
+	struct mei_device *dev = _dev;
+	irqreturn_t irq_ret;
+	long polling_timeout = MEI_POLLING_TIMEOUT_ACTIVE;
+
+	dev_dbg(dev->dev, "kernel thread is running\n");
+	while (!kthread_should_stop()) {
+		struct mei_me_hw *hw = to_me_hw(dev);
+		u32 hcsr;
+
+		wait_event_timeout(hw->wait_active,
+				   hw->is_active || kthread_should_stop(),
+				   msecs_to_jiffies(MEI_POLLING_TIMEOUT_IDLE));
+
+		if (kthread_should_stop())
+			break;
+
+		hcsr = mei_hcsr_read(dev);
+		if (me_intr_src(hcsr)) {
+			polling_timeout = MEI_POLLING_TIMEOUT_ACTIVE;
+			irq_ret = mei_me_irq_thread_handler(1, dev);
+			if (irq_ret != IRQ_HANDLED)
+				dev_err(dev->dev, "irq_ret %d\n", irq_ret);
+		} else {
+			/*
+			 * Increase timeout by MEI_POLLING_TIMEOUT_ACTIVE
+			 * up to MEI_POLLING_TIMEOUT_IDLE
+			 */
+			polling_timeout = clamp_val(polling_timeout + MEI_POLLING_TIMEOUT_ACTIVE,
+						    MEI_POLLING_TIMEOUT_ACTIVE,
+						    MEI_POLLING_TIMEOUT_IDLE);
+		}
+
+		schedule_timeout_interruptible(msecs_to_jiffies(polling_timeout));
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_me_polling_thread);
+
+static const struct mei_hw_ops mei_me_hw_ops = {
+
+	.trc_status = mei_me_trc_status,
+	.fw_status = mei_me_fw_status,
+	.pg_state  = mei_me_pg_state,
+
+	.host_is_ready = mei_me_host_is_ready,
+
+	.hw_is_ready = mei_me_hw_is_ready,
+	.hw_reset = mei_me_hw_reset,
+	.hw_config = mei_me_hw_config,
+	.hw_start = mei_me_hw_start,
+
+	.pg_in_transition = mei_me_pg_in_transition,
+	.pg_is_enabled = mei_me_pg_is_enabled,
+
+	.intr_clear = mei_me_intr_clear,
+	.intr_enable = mei_me_intr_enable,
+	.intr_disable = mei_me_intr_disable,
+	.synchronize_irq = mei_me_synchronize_irq,
+
+	.hbuf_free_slots = mei_me_hbuf_empty_slots,
+	.hbuf_is_ready = mei_me_hbuf_is_empty,
+	.hbuf_depth = mei_me_hbuf_depth,
+
+	.write = mei_me_hbuf_write,
+
+	.rdbuf_full_slots = mei_me_count_full_read_slots,
+	.read_hdr = mei_me_mecbrw_read,
+	.read = mei_me_read_slots
+};
+
+/**
+ * mei_me_fw_type_nm() - check for nm sku
+ *
+ * Read ME FW Status register to check for the Node Manager (NM) Firmware.
+ * The NM FW is only signaled in PCI function 0.
+ * __Note__: Deprecated by PCH8 and newer.
+ *
+ * @pdev: pci device
+ *
+ * Return: true in case of NM firmware
+ */
+static bool mei_me_fw_type_nm(const struct pci_dev *pdev)
+{
+	u32 reg;
+	unsigned int devfn;
+
+	devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_2, &reg);
+	trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_2", PCI_CFG_HFS_2, reg);
+	/* make sure that bit 9 (NM) is up and bit 10 (DM) is down */
+	return (reg & 0x600) == 0x200;
+}
+
+#define MEI_CFG_FW_NM                           \
+	.quirk_probe = mei_me_fw_type_nm
+
+/**
+ * mei_me_fw_type_sps_4() - check for sps 4.0 sku
+ *
+ * Read ME FW Status register to check for SPS Firmware.
+ * The SPS FW is only signaled in the PCI function 0.
+ * __Note__: Deprecated by SPS 5.0 and newer.
+ *
+ * @pdev: pci device
+ *
+ * Return: true in case of SPS firmware
+ */
+static bool mei_me_fw_type_sps_4(const struct pci_dev *pdev)
+{
+	u32 reg;
+	unsigned int devfn;
+
+	devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, &reg);
+	trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg);
+	return (reg & PCI_CFG_HFS_1_OPMODE_MSK) == PCI_CFG_HFS_1_OPMODE_SPS;
+}
+
+#define MEI_CFG_FW_SPS_4                          \
+	.quirk_probe = mei_me_fw_type_sps_4
+
+/**
+ * mei_me_fw_type_sps_ign() - check for sps or ign sku
+ *
+ * Read ME FW Status register to check for SPS or IGN Firmware.
+ * The SPS/IGN FW is only signaled in pci function 0
+ *
+ * @pdev: pci device
+ *
+ * Return: true in case of SPS/IGN firmware
+ */
+static bool mei_me_fw_type_sps_ign(const struct pci_dev *pdev)
+{
+	u32 reg;
+	u32 fw_type;
+	unsigned int devfn;
+
+	devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_3, &reg);
+	trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_3", PCI_CFG_HFS_3, reg);
+	fw_type = (reg & PCI_CFG_HFS_3_FW_SKU_MSK);
+
+	dev_dbg(&pdev->dev, "fw type is %d\n", fw_type);
+
+	return fw_type == PCI_CFG_HFS_3_FW_SKU_IGN ||
+	       fw_type == PCI_CFG_HFS_3_FW_SKU_SPS;
+}
+
+#define MEI_CFG_KIND_ITOUCH                     \
+	.kind = "itouch"
+
+#define MEI_CFG_TYPE_GSC                        \
+	.kind = "gsc"
+
+#define MEI_CFG_TYPE_GSCFI                      \
+	.kind = "gscfi"
+
+#define MEI_CFG_FW_SPS_IGN                      \
+	.quirk_probe = mei_me_fw_type_sps_ign
+
+#define MEI_CFG_FW_VER_SUPP                     \
+	.fw_ver_supported = 1
+
+#define MEI_CFG_ICH_HFS                      \
+	.fw_status.count = 0
+
+#define MEI_CFG_ICH10_HFS                        \
+	.fw_status.count = 1,                   \
+	.fw_status.status[0] = PCI_CFG_HFS_1
+
+#define MEI_CFG_PCH_HFS                         \
+	.fw_status.count = 2,                   \
+	.fw_status.status[0] = PCI_CFG_HFS_1,   \
+	.fw_status.status[1] = PCI_CFG_HFS_2
+
+#define MEI_CFG_PCH8_HFS                        \
+	.fw_status.count = 6,                   \
+	.fw_status.status[0] = PCI_CFG_HFS_1,   \
+	.fw_status.status[1] = PCI_CFG_HFS_2,   \
+	.fw_status.status[2] = PCI_CFG_HFS_3,   \
+	.fw_status.status[3] = PCI_CFG_HFS_4,   \
+	.fw_status.status[4] = PCI_CFG_HFS_5,   \
+	.fw_status.status[5] = PCI_CFG_HFS_6
+
+#define MEI_CFG_DMA_128 \
+	.dma_size[DMA_DSCR_HOST] = SZ_128K, \
+	.dma_size[DMA_DSCR_DEVICE] = SZ_128K, \
+	.dma_size[DMA_DSCR_CTRL] = PAGE_SIZE
+
+#define MEI_CFG_TRC \
+	.hw_trc_supported = 1
+
+/* ICH Legacy devices */
+static const struct mei_cfg mei_me_ich_cfg = {
+	MEI_CFG_ICH_HFS,
+};
+
+/* ICH devices */
+static const struct mei_cfg mei_me_ich10_cfg = {
+	MEI_CFG_ICH10_HFS,
+};
+
+/* PCH6 devices */
+static const struct mei_cfg mei_me_pch6_cfg = {
+	MEI_CFG_PCH_HFS,
+};
+
+/* PCH7 devices */
+static const struct mei_cfg mei_me_pch7_cfg = {
+	MEI_CFG_PCH_HFS,
+	MEI_CFG_FW_VER_SUPP,
+};
+
+/* PCH Cougar Point and Patsburg with quirk for Node Manager exclusion */
+static const struct mei_cfg mei_me_pch_cpt_pbg_cfg = {
+	MEI_CFG_PCH_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_FW_NM,
+};
+
+/* PCH8 Lynx Point and newer devices */
+static const struct mei_cfg mei_me_pch8_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+};
+
+/* PCH8 Lynx Point and newer devices - iTouch */
+static const struct mei_cfg mei_me_pch8_itouch_cfg = {
+	MEI_CFG_KIND_ITOUCH,
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+};
+
+/* PCH8 Lynx Point with quirk for SPS Firmware exclusion */
+static const struct mei_cfg mei_me_pch8_sps_4_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_FW_SPS_4,
+};
+
+/* LBG with quirk for SPS (4.0) Firmware exclusion */
+static const struct mei_cfg mei_me_pch12_sps_4_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_FW_SPS_4,
+};
+
+/* Cannon Lake and newer devices */
+static const struct mei_cfg mei_me_pch12_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_DMA_128,
+};
+
+/* Cannon Lake with quirk for SPS 5.0 and newer Firmware exclusion */
+static const struct mei_cfg mei_me_pch12_sps_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_DMA_128,
+	MEI_CFG_FW_SPS_IGN,
+};
+
+/* Cannon Lake itouch with quirk for SPS 5.0 and newer Firmware exclusion
+ * w/o DMA support.
+ */
+static const struct mei_cfg mei_me_pch12_itouch_sps_cfg = {
+	MEI_CFG_KIND_ITOUCH,
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_FW_SPS_IGN,
+};
+
+/* Tiger Lake and newer devices */
+static const struct mei_cfg mei_me_pch15_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_DMA_128,
+	MEI_CFG_TRC,
+};
+
+/* Tiger Lake with quirk for SPS 5.0 and newer Firmware exclusion */
+static const struct mei_cfg mei_me_pch15_sps_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_DMA_128,
+	MEI_CFG_TRC,
+	MEI_CFG_FW_SPS_IGN,
+};
+
+/* Graphics System Controller */
+static const struct mei_cfg mei_me_gsc_cfg = {
+	MEI_CFG_TYPE_GSC,
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+};
+
+/* Graphics System Controller Firmware Interface */
+static const struct mei_cfg mei_me_gscfi_cfg = {
+	MEI_CFG_TYPE_GSCFI,
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+};
+
+/*
+ * mei_cfg_list - A list of platform platform specific configurations.
+ * Note: has to be synchronized with  enum mei_cfg_idx.
+ */
+static const struct mei_cfg *const mei_cfg_list[] = {
+	[MEI_ME_UNDEF_CFG] = NULL,
+	[MEI_ME_ICH_CFG] = &mei_me_ich_cfg,
+	[MEI_ME_ICH10_CFG] = &mei_me_ich10_cfg,
+	[MEI_ME_PCH6_CFG] = &mei_me_pch6_cfg,
+	[MEI_ME_PCH7_CFG] = &mei_me_pch7_cfg,
+	[MEI_ME_PCH_CPT_PBG_CFG] = &mei_me_pch_cpt_pbg_cfg,
+	[MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg,
+	[MEI_ME_PCH8_ITOUCH_CFG] = &mei_me_pch8_itouch_cfg,
+	[MEI_ME_PCH8_SPS_4_CFG] = &mei_me_pch8_sps_4_cfg,
+	[MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg,
+	[MEI_ME_PCH12_SPS_4_CFG] = &mei_me_pch12_sps_4_cfg,
+	[MEI_ME_PCH12_SPS_CFG] = &mei_me_pch12_sps_cfg,
+	[MEI_ME_PCH12_SPS_ITOUCH_CFG] = &mei_me_pch12_itouch_sps_cfg,
+	[MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg,
+	[MEI_ME_PCH15_SPS_CFG] = &mei_me_pch15_sps_cfg,
+	[MEI_ME_GSC_CFG] = &mei_me_gsc_cfg,
+	[MEI_ME_GSCFI_CFG] = &mei_me_gscfi_cfg,
+};
+
+const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(mei_cfg_list) != MEI_ME_NUM_CFG);
+
+	if (idx >= MEI_ME_NUM_CFG)
+		return NULL;
+
+	return mei_cfg_list[idx];
+}
+EXPORT_SYMBOL_GPL(mei_me_get_cfg);
+
+/**
+ * mei_me_dev_init - allocates and initializes the mei device structure
+ *
+ * @parent: device associated with physical device (pci/platform)
+ * @cfg: per device generation config
+ * @slow_fw: configure longer timeouts as FW is slow
+ *
+ * Return: The mei_device pointer on success, NULL on failure.
+ */
+struct mei_device *mei_me_dev_init(struct device *parent,
+				   const struct mei_cfg *cfg, bool slow_fw)
+{
+	struct mei_device *dev;
+	struct mei_me_hw *hw;
+	int i;
+
+	dev = devm_kzalloc(parent, sizeof(*dev) + sizeof(*hw), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	hw = to_me_hw(dev);
+
+	for (i = 0; i < DMA_DSCR_NUM; i++)
+		dev->dr_dscr[i].size = cfg->dma_size[i];
+
+	mei_device_init(dev, parent, slow_fw, &mei_me_hw_ops);
+	hw->cfg = cfg;
+
+	dev->fw_f_fw_ver_supported = cfg->fw_ver_supported;
+
+	dev->kind = cfg->kind;
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(mei_me_dev_init);
diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h
new file mode 100644
index 0000000000..95cf830b7c
--- /dev/null
+++ b/drivers/misc/mei/hw-me.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2012-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#ifndef _MEI_INTERFACE_H_
+#define _MEI_INTERFACE_H_
+
+#include <linux/irqreturn.h>
+#include <linux/pci.h>
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+/*
+ * mei_cfg - mei device configuration
+ *
+ * @fw_status: FW status
+ * @quirk_probe: device exclusion quirk
+ * @kind: MEI head kind
+ * @dma_size: device DMA buffers size
+ * @fw_ver_supported: is fw version retrievable from FW
+ * @hw_trc_supported: does the hw support trc register
+ */
+struct mei_cfg {
+	const struct mei_fw_status fw_status;
+	bool (*quirk_probe)(const struct pci_dev *pdev);
+	const char *kind;
+	size_t dma_size[DMA_DSCR_NUM];
+	u32 fw_ver_supported:1;
+	u32 hw_trc_supported:1;
+};
+
+
+#define MEI_PCI_DEVICE(dev, cfg) \
+	.vendor = PCI_VENDOR_ID_INTEL, .device = (dev), \
+	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, \
+	.driver_data = (kernel_ulong_t)(cfg),
+
+#define MEI_ME_RPM_TIMEOUT    500 /* ms */
+
+/**
+ * struct mei_me_hw - me hw specific data
+ *
+ * @cfg: per device generation config and ops
+ * @mem_addr: io memory address
+ * @irq: irq number
+ * @pg_state: power gating state
+ * @d0i3_supported: di03 support
+ * @hbuf_depth: depth of hardware host/write buffer in slots
+ * @read_fws: read FW status register handler
+ * @polling_thread: interrupt polling thread
+ * @wait_active: the polling thread activity wait queue
+ * @is_active: the device is active
+ */
+struct mei_me_hw {
+	const struct mei_cfg *cfg;
+	void __iomem *mem_addr;
+	int irq;
+	enum mei_pg_state pg_state;
+	bool d0i3_supported;
+	u8 hbuf_depth;
+	int (*read_fws)(const struct mei_device *dev, int where, u32 *val);
+	/* polling */
+	struct task_struct *polling_thread;
+	wait_queue_head_t wait_active;
+	bool is_active;
+};
+
+#define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw)
+
+static inline bool mei_me_hw_use_polling(const struct mei_me_hw *hw)
+{
+	return hw->irq < 0;
+}
+
+/**
+ * enum mei_cfg_idx - indices to platform specific configurations.
+ *
+ * Note: has to be synchronized with mei_cfg_list[]
+ *
+ * @MEI_ME_UNDEF_CFG:      Lower sentinel.
+ * @MEI_ME_ICH_CFG:        I/O Controller Hub legacy devices.
+ * @MEI_ME_ICH10_CFG:      I/O Controller Hub platforms Gen10
+ * @MEI_ME_PCH6_CFG:       Platform Controller Hub platforms (Gen6).
+ * @MEI_ME_PCH7_CFG:       Platform Controller Hub platforms (Gen7).
+ * @MEI_ME_PCH_CPT_PBG_CFG:Platform Controller Hub workstations
+ *                         with quirk for Node Manager exclusion.
+ * @MEI_ME_PCH8_CFG:       Platform Controller Hub Gen8 and newer
+ *                         client platforms.
+ * @MEI_ME_PCH8_ITOUCH_CFG:Platform Controller Hub Gen8 and newer
+ *                         client platforms (iTouch).
+ * @MEI_ME_PCH8_SPS_4_CFG: Platform Controller Hub Gen8 and newer
+ *                         servers platforms with quirk for
+ *                         SPS firmware exclusion.
+ * @MEI_ME_PCH12_CFG:      Platform Controller Hub Gen12 and newer
+ * @MEI_ME_PCH12_SPS_4_CFG:Platform Controller Hub Gen12 up to 4.0
+ *                         servers platforms with quirk for
+ *                         SPS firmware exclusion.
+ * @MEI_ME_PCH12_SPS_CFG:  Platform Controller Hub Gen12 5.0 and newer
+ *                         servers platforms with quirk for
+ *                         SPS firmware exclusion.
+ * @MEI_ME_PCH15_CFG:      Platform Controller Hub Gen15 and newer
+ * @MEI_ME_PCH15_SPS_CFG:  Platform Controller Hub Gen15 and newer
+ *                         servers platforms with quirk for
+ *                         SPS firmware exclusion.
+ * @MEI_ME_NUM_CFG:        Upper Sentinel.
+ */
+enum mei_cfg_idx {
+	MEI_ME_UNDEF_CFG,
+	MEI_ME_ICH_CFG,
+	MEI_ME_ICH10_CFG,
+	MEI_ME_PCH6_CFG,
+	MEI_ME_PCH7_CFG,
+	MEI_ME_PCH_CPT_PBG_CFG,
+	MEI_ME_PCH8_CFG,
+	MEI_ME_PCH8_ITOUCH_CFG,
+	MEI_ME_PCH8_SPS_4_CFG,
+	MEI_ME_PCH12_CFG,
+	MEI_ME_PCH12_SPS_4_CFG,
+	MEI_ME_PCH12_SPS_CFG,
+	MEI_ME_PCH12_SPS_ITOUCH_CFG,
+	MEI_ME_PCH15_CFG,
+	MEI_ME_PCH15_SPS_CFG,
+	MEI_ME_GSC_CFG,
+	MEI_ME_GSCFI_CFG,
+	MEI_ME_NUM_CFG,
+};
+
+const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx);
+
+struct mei_device *mei_me_dev_init(struct device *parent,
+				   const struct mei_cfg *cfg, bool slow_fw);
+
+int mei_me_pg_enter_sync(struct mei_device *dev);
+int mei_me_pg_exit_sync(struct mei_device *dev);
+
+irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id);
+irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id);
+int mei_me_polling_thread(void *_dev);
+
+#endif /* _MEI_INTERFACE_H_ */
diff --git a/drivers/misc/mei/hw-txe-regs.h b/drivers/misc/mei/hw-txe-regs.h
new file mode 100644
index 0000000000..a92b306dac
--- /dev/null
+++ b/drivers/misc/mei/hw-txe-regs.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright (c) 2013-2014, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+#ifndef _MEI_HW_TXE_REGS_H_
+#define _MEI_HW_TXE_REGS_H_
+
+#include "hw.h"
+
+#define SEC_ALIVENESS_TIMER_TIMEOUT        (5 * MSEC_PER_SEC)
+#define SEC_ALIVENESS_WAIT_TIMEOUT         (1 * MSEC_PER_SEC)
+#define SEC_RESET_WAIT_TIMEOUT             (1 * MSEC_PER_SEC)
+#define SEC_READY_WAIT_TIMEOUT             (5 * MSEC_PER_SEC)
+#define START_MESSAGE_RESPONSE_WAIT_TIMEOUT (5 * MSEC_PER_SEC)
+#define RESET_CANCEL_WAIT_TIMEOUT          (1 * MSEC_PER_SEC)
+
+enum {
+	SEC_BAR,
+	BRIDGE_BAR,
+
+	NUM_OF_MEM_BARS
+};
+
+/* SeC FW Status Register
+ *
+ * FW uses this register in order to report its status to host.
+ * This register resides in PCI-E config space.
+ */
+#define PCI_CFG_TXE_FW_STS0   0x40
+#  define PCI_CFG_TXE_FW_STS0_WRK_ST_MSK    0x0000000F
+#  define PCI_CFG_TXE_FW_STS0_OP_ST_MSK     0x000001C0
+#  define PCI_CFG_TXE_FW_STS0_FW_INIT_CMPLT 0x00000200
+#  define PCI_CFG_TXE_FW_STS0_ERR_CODE_MSK  0x0000F000
+#  define PCI_CFG_TXE_FW_STS0_OP_MODE_MSK   0x000F0000
+#  define PCI_CFG_TXE_FW_STS0_RST_CNT_MSK   0x00F00000
+#define PCI_CFG_TXE_FW_STS1   0x48
+
+#define IPC_BASE_ADDR	0x80400 /* SeC IPC Base Address */
+
+/* IPC Input Doorbell Register */
+#define SEC_IPC_INPUT_DOORBELL_REG       (0x0000 + IPC_BASE_ADDR)
+
+/* IPC Input Status Register
+ * This register indicates whether or not processing of
+ * the most recent command has been completed by the SEC
+ * New commands and payloads should not be written by the Host
+ * until this indicates that the previous command has been processed.
+ */
+#define SEC_IPC_INPUT_STATUS_REG         (0x0008 + IPC_BASE_ADDR)
+#  define SEC_IPC_INPUT_STATUS_RDY    BIT(0)
+
+/* IPC Host Interrupt Status Register */
+#define SEC_IPC_HOST_INT_STATUS_REG      (0x0010 + IPC_BASE_ADDR)
+#define   SEC_IPC_HOST_INT_STATUS_OUT_DB             BIT(0)
+#define   SEC_IPC_HOST_INT_STATUS_IN_RDY             BIT(1)
+#define   SEC_IPC_HOST_INT_STATUS_HDCP_M0_RCVD       BIT(5)
+#define   SEC_IPC_HOST_INT_STATUS_ILL_MEM_ACCESS     BIT(17)
+#define   SEC_IPC_HOST_INT_STATUS_AES_HKEY_ERR       BIT(18)
+#define   SEC_IPC_HOST_INT_STATUS_DES_HKEY_ERR       BIT(19)
+#define   SEC_IPC_HOST_INT_STATUS_TMRMTB_OVERFLOW    BIT(21)
+
+/* Convenient mask for pending interrupts */
+#define   SEC_IPC_HOST_INT_STATUS_PENDING \
+		(SEC_IPC_HOST_INT_STATUS_OUT_DB| \
+		SEC_IPC_HOST_INT_STATUS_IN_RDY)
+
+/* IPC Host Interrupt Mask Register */
+#define SEC_IPC_HOST_INT_MASK_REG        (0x0014 + IPC_BASE_ADDR)
+
+#  define SEC_IPC_HOST_INT_MASK_OUT_DB	BIT(0) /* Output Doorbell Int Mask */
+#  define SEC_IPC_HOST_INT_MASK_IN_RDY	BIT(1) /* Input Ready Int Mask */
+
+/* IPC Input Payload RAM */
+#define SEC_IPC_INPUT_PAYLOAD_REG        (0x0100 + IPC_BASE_ADDR)
+/* IPC Shared Payload RAM */
+#define IPC_SHARED_PAYLOAD_REG           (0x0200 + IPC_BASE_ADDR)
+
+/* SeC Address Translation Table Entry 2 - Ctrl
+ *
+ * This register resides also in SeC's PCI-E Memory space.
+ */
+#define SATT2_CTRL_REG                   0x1040
+#  define SATT2_CTRL_VALID_MSK            BIT(0)
+#  define SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT 8
+#  define SATT2_CTRL_BRIDGE_HOST_EN_MSK   BIT(12)
+
+/* SATT Table Entry 2 SAP Base Address Register */
+#define SATT2_SAP_BA_REG                 0x1044
+/* SATT Table Entry 2 SAP Size Register. */
+#define SATT2_SAP_SIZE_REG               0x1048
+ /* SATT Table Entry 2 SAP Bridge Address - LSB Register */
+#define SATT2_BRG_BA_LSB_REG             0x104C
+
+/* Host High-level Interrupt Status Register */
+#define HHISR_REG                        0x2020
+/* Host High-level Interrupt Enable Register
+ *
+ * Resides in PCI memory space. This is the top hierarchy for
+ * interrupts from SeC to host, aggregating both interrupts that
+ * arrive through HICR registers as well as interrupts
+ * that arrive via IPC.
+ */
+#define HHIER_REG                        0x2024
+#define   IPC_HHIER_SEC	BIT(0)
+#define   IPC_HHIER_BRIDGE	BIT(1)
+#define   IPC_HHIER_MSK	(IPC_HHIER_SEC | IPC_HHIER_BRIDGE)
+
+/* Host High-level Interrupt Mask Register.
+ *
+ * Resides in PCI memory space.
+ * This is the top hierarchy for masking interrupts from SeC to host.
+ */
+#define HHIMR_REG                        0x2028
+#define   IPC_HHIMR_SEC       BIT(0)
+#define   IPC_HHIMR_BRIDGE    BIT(1)
+
+/* Host High-level IRQ Status Register */
+#define HHIRQSR_REG                      0x202C
+
+/* Host Interrupt Cause Register 0 - SeC IPC Readiness
+ *
+ * This register is both an ICR to Host from PCI Memory Space
+ * and it is also exposed in the SeC memory space.
+ * This register is used by SeC's IPC driver in order
+ * to synchronize with host about IPC interface state.
+ */
+#define HICR_SEC_IPC_READINESS_REG       0x2040
+#define   HICR_SEC_IPC_READINESS_HOST_RDY  BIT(0)
+#define   HICR_SEC_IPC_READINESS_SEC_RDY   BIT(1)
+#define   HICR_SEC_IPC_READINESS_SYS_RDY     \
+	  (HICR_SEC_IPC_READINESS_HOST_RDY | \
+	   HICR_SEC_IPC_READINESS_SEC_RDY)
+#define   HICR_SEC_IPC_READINESS_RDY_CLR   BIT(2)
+
+/* Host Interrupt Cause Register 1 - Aliveness Response */
+/* This register is both an ICR to Host from PCI Memory Space
+ * and it is also exposed in the SeC memory space.
+ * The register may be used by SeC to ACK a host request for aliveness.
+ */
+#define HICR_HOST_ALIVENESS_RESP_REG     0x2044
+#define   HICR_HOST_ALIVENESS_RESP_ACK    BIT(0)
+
+/* Host Interrupt Cause Register 2 - SeC IPC Output Doorbell */
+#define HICR_SEC_IPC_OUTPUT_DOORBELL_REG 0x2048
+
+/* Host Interrupt Status Register.
+ *
+ * Resides in PCI memory space.
+ * This is the main register involved in generating interrupts
+ * from SeC to host via HICRs.
+ * The interrupt generation rules are as follows:
+ * An interrupt will be generated whenever for any i,
+ * there is a transition from a state where at least one of
+ * the following conditions did not hold, to a state where
+ * ALL the following conditions hold:
+ * A) HISR.INT[i]_STS == 1.
+ * B) HIER.INT[i]_EN == 1.
+ */
+#define HISR_REG                         0x2060
+#define   HISR_INT_0_STS      BIT(0)
+#define   HISR_INT_1_STS      BIT(1)
+#define   HISR_INT_2_STS      BIT(2)
+#define   HISR_INT_3_STS      BIT(3)
+#define   HISR_INT_4_STS      BIT(4)
+#define   HISR_INT_5_STS      BIT(5)
+#define   HISR_INT_6_STS      BIT(6)
+#define   HISR_INT_7_STS      BIT(7)
+#define   HISR_INT_STS_MSK \
+	(HISR_INT_0_STS | HISR_INT_1_STS | HISR_INT_2_STS)
+
+/* Host Interrupt Enable Register. Resides in PCI memory space. */
+#define HIER_REG                         0x2064
+#define   HIER_INT_0_EN      BIT(0)
+#define   HIER_INT_1_EN      BIT(1)
+#define   HIER_INT_2_EN      BIT(2)
+#define   HIER_INT_3_EN      BIT(3)
+#define   HIER_INT_4_EN      BIT(4)
+#define   HIER_INT_5_EN      BIT(5)
+#define   HIER_INT_6_EN      BIT(6)
+#define   HIER_INT_7_EN      BIT(7)
+
+#define   HIER_INT_EN_MSK \
+	 (HIER_INT_0_EN | HIER_INT_1_EN | HIER_INT_2_EN)
+
+
+/* SEC Memory Space IPC output payload.
+ *
+ * This register is part of the output payload which SEC provides to host.
+ */
+#define BRIDGE_IPC_OUTPUT_PAYLOAD_REG    0x20C0
+
+/* SeC Interrupt Cause Register - Host Aliveness Request
+ * This register is both an ICR to SeC and it is also exposed
+ * in the host-visible PCI memory space.
+ * The register is used by host to request SeC aliveness.
+ */
+#define SICR_HOST_ALIVENESS_REQ_REG      0x214C
+#define   SICR_HOST_ALIVENESS_REQ_REQUESTED    BIT(0)
+
+
+/* SeC Interrupt Cause Register - Host IPC Readiness
+ *
+ * This register is both an ICR to SeC and it is also exposed
+ * in the host-visible PCI memory space.
+ * This register is used by the host's SeC driver uses in order
+ * to synchronize with SeC about IPC interface state.
+ */
+#define SICR_HOST_IPC_READINESS_REQ_REG  0x2150
+
+
+#define SICR_HOST_IPC_READINESS_HOST_RDY  BIT(0)
+#define SICR_HOST_IPC_READINESS_SEC_RDY   BIT(1)
+#define SICR_HOST_IPC_READINESS_SYS_RDY     \
+	(SICR_HOST_IPC_READINESS_HOST_RDY | \
+	 SICR_HOST_IPC_READINESS_SEC_RDY)
+#define SICR_HOST_IPC_READINESS_RDY_CLR   BIT(2)
+
+/* SeC Interrupt Cause Register - SeC IPC Output Status
+ *
+ * This register indicates whether or not processing of the most recent
+ * command has been completed by the Host.
+ * New commands and payloads should not be written by SeC until this
+ * register indicates that the previous command has been processed.
+ */
+#define SICR_SEC_IPC_OUTPUT_STATUS_REG   0x2154
+#  define SEC_IPC_OUTPUT_STATUS_RDY BIT(0)
+
+
+
+/*  MEI IPC Message payload size 64 bytes */
+#define PAYLOAD_SIZE        64
+
+/* MAX size for SATT range 32MB */
+#define SATT_RANGE_MAX     (32 << 20)
+
+
+#endif /* _MEI_HW_TXE_REGS_H_ */
+
diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c
new file mode 100644
index 0000000000..5d0f68b95c
--- /dev/null
+++ b/drivers/misc/mei/hw-txe.c
@@ -0,0 +1,1256 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2013-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/pci.h>
+#include <linux/jiffies.h>
+#include <linux/ktime.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hw-txe.h"
+#include "client.h"
+#include "hbm.h"
+
+#include "mei-trace.h"
+
+#define TXE_HBUF_DEPTH (PAYLOAD_SIZE / MEI_SLOT_SIZE)
+
+/**
+ * mei_txe_reg_read - Reads 32bit data from the txe device
+ *
+ * @base_addr: registers base address
+ * @offset: register offset
+ *
+ * Return: register value
+ */
+static inline u32 mei_txe_reg_read(void __iomem *base_addr,
+					unsigned long offset)
+{
+	return ioread32(base_addr + offset);
+}
+
+/**
+ * mei_txe_reg_write - Writes 32bit data to the txe device
+ *
+ * @base_addr: registers base address
+ * @offset: register offset
+ * @value: the value to write
+ */
+static inline void mei_txe_reg_write(void __iomem *base_addr,
+				unsigned long offset, u32 value)
+{
+	iowrite32(value, base_addr + offset);
+}
+
+/**
+ * mei_txe_sec_reg_read_silent - Reads 32bit data from the SeC BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ *
+ * Doesn't check for aliveness while Reads 32bit data from the SeC BAR
+ *
+ * Return: register value
+ */
+static inline u32 mei_txe_sec_reg_read_silent(struct mei_txe_hw *hw,
+				unsigned long offset)
+{
+	return mei_txe_reg_read(hw->mem_addr[SEC_BAR], offset);
+}
+
+/**
+ * mei_txe_sec_reg_read - Reads 32bit data from the SeC BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ *
+ * Reads 32bit data from the SeC BAR and shout loud if aliveness is not set
+ *
+ * Return: register value
+ */
+static inline u32 mei_txe_sec_reg_read(struct mei_txe_hw *hw,
+				unsigned long offset)
+{
+	WARN(!hw->aliveness, "sec read: aliveness not asserted\n");
+	return mei_txe_sec_reg_read_silent(hw, offset);
+}
+/**
+ * mei_txe_sec_reg_write_silent - Writes 32bit data to the SeC BAR
+ *   doesn't check for aliveness
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ * @value: value to write
+ *
+ * Doesn't check for aliveness while writes 32bit data from to the SeC BAR
+ */
+static inline void mei_txe_sec_reg_write_silent(struct mei_txe_hw *hw,
+				unsigned long offset, u32 value)
+{
+	mei_txe_reg_write(hw->mem_addr[SEC_BAR], offset, value);
+}
+
+/**
+ * mei_txe_sec_reg_write - Writes 32bit data to the SeC BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: register offset
+ * @value: value to write
+ *
+ * Writes 32bit data from the SeC BAR and shout loud if aliveness is not set
+ */
+static inline void mei_txe_sec_reg_write(struct mei_txe_hw *hw,
+				unsigned long offset, u32 value)
+{
+	WARN(!hw->aliveness, "sec write: aliveness not asserted\n");
+	mei_txe_sec_reg_write_silent(hw, offset, value);
+}
+/**
+ * mei_txe_br_reg_read - Reads 32bit data from the Bridge BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: offset from which to read the data
+ *
+ * Return: the byte read.
+ */
+static inline u32 mei_txe_br_reg_read(struct mei_txe_hw *hw,
+				unsigned long offset)
+{
+	return mei_txe_reg_read(hw->mem_addr[BRIDGE_BAR], offset);
+}
+
+/**
+ * mei_txe_br_reg_write - Writes 32bit data to the Bridge BAR
+ *
+ * @hw: the txe hardware structure
+ * @offset: offset from which to write the data
+ * @value: the byte to write
+ */
+static inline void mei_txe_br_reg_write(struct mei_txe_hw *hw,
+				unsigned long offset, u32 value)
+{
+	mei_txe_reg_write(hw->mem_addr[BRIDGE_BAR], offset, value);
+}
+
+/**
+ * mei_txe_aliveness_set - request for aliveness change
+ *
+ * @dev: the device structure
+ * @req: requested aliveness value
+ *
+ * Request for aliveness change and returns true if the change is
+ *   really needed and false if aliveness is already
+ *   in the requested state
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: true if request was send
+ */
+static bool mei_txe_aliveness_set(struct mei_device *dev, u32 req)
+{
+
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	bool do_req = hw->aliveness != req;
+
+	dev_dbg(dev->dev, "Aliveness current=%d request=%d\n",
+				hw->aliveness, req);
+	if (do_req) {
+		dev->pg_event = MEI_PG_EVENT_WAIT;
+		mei_txe_br_reg_write(hw, SICR_HOST_ALIVENESS_REQ_REG, req);
+	}
+	return do_req;
+}
+
+
+/**
+ * mei_txe_aliveness_req_get - get aliveness requested register value
+ *
+ * @dev: the device structure
+ *
+ * Extract HICR_HOST_ALIVENESS_RESP_ACK bit from
+ * HICR_HOST_ALIVENESS_REQ register value
+ *
+ * Return: SICR_HOST_ALIVENESS_REQ_REQUESTED bit value
+ */
+static u32 mei_txe_aliveness_req_get(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 reg;
+
+	reg = mei_txe_br_reg_read(hw, SICR_HOST_ALIVENESS_REQ_REG);
+	return reg & SICR_HOST_ALIVENESS_REQ_REQUESTED;
+}
+
+/**
+ * mei_txe_aliveness_get - get aliveness response register value
+ *
+ * @dev: the device structure
+ *
+ * Return: HICR_HOST_ALIVENESS_RESP_ACK bit from HICR_HOST_ALIVENESS_RESP
+ *         register
+ */
+static u32 mei_txe_aliveness_get(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 reg;
+
+	reg = mei_txe_br_reg_read(hw, HICR_HOST_ALIVENESS_RESP_REG);
+	return reg & HICR_HOST_ALIVENESS_RESP_ACK;
+}
+
+/**
+ * mei_txe_aliveness_poll - waits for aliveness to settle
+ *
+ * @dev: the device structure
+ * @expected: expected aliveness value
+ *
+ * Polls for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set
+ *
+ * Return: 0 if the expected value was received, -ETIME otherwise
+ */
+static int mei_txe_aliveness_poll(struct mei_device *dev, u32 expected)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	ktime_t stop, start;
+
+	start = ktime_get();
+	stop = ktime_add(start, ms_to_ktime(SEC_ALIVENESS_WAIT_TIMEOUT));
+	do {
+		hw->aliveness = mei_txe_aliveness_get(dev);
+		if (hw->aliveness == expected) {
+			dev->pg_event = MEI_PG_EVENT_IDLE;
+			dev_dbg(dev->dev, "aliveness settled after %lld usecs\n",
+				ktime_to_us(ktime_sub(ktime_get(), start)));
+			return 0;
+		}
+		usleep_range(20, 50);
+	} while (ktime_before(ktime_get(), stop));
+
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	dev_err(dev->dev, "aliveness timed out\n");
+	return -ETIME;
+}
+
+/**
+ * mei_txe_aliveness_wait - waits for aliveness to settle
+ *
+ * @dev: the device structure
+ * @expected: expected aliveness value
+ *
+ * Waits for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set
+ *
+ * Return: 0 on success and < 0 otherwise
+ */
+static int mei_txe_aliveness_wait(struct mei_device *dev, u32 expected)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	const unsigned long timeout =
+			msecs_to_jiffies(SEC_ALIVENESS_WAIT_TIMEOUT);
+	long err;
+	int ret;
+
+	hw->aliveness = mei_txe_aliveness_get(dev);
+	if (hw->aliveness == expected)
+		return 0;
+
+	mutex_unlock(&dev->device_lock);
+	err = wait_event_timeout(hw->wait_aliveness_resp,
+			dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout);
+	mutex_lock(&dev->device_lock);
+
+	hw->aliveness = mei_txe_aliveness_get(dev);
+	ret = hw->aliveness == expected ? 0 : -ETIME;
+
+	if (ret)
+		dev_warn(dev->dev, "aliveness timed out = %ld aliveness = %d event = %d\n",
+			err, hw->aliveness, dev->pg_event);
+	else
+		dev_dbg(dev->dev, "aliveness settled after = %d msec aliveness = %d event = %d\n",
+			jiffies_to_msecs(timeout - err),
+			hw->aliveness, dev->pg_event);
+
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	return ret;
+}
+
+/**
+ * mei_txe_aliveness_set_sync - sets an wait for aliveness to complete
+ *
+ * @dev: the device structure
+ * @req: requested aliveness value
+ *
+ * Return: 0 on success and < 0 otherwise
+ */
+int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req)
+{
+	if (mei_txe_aliveness_set(dev, req))
+		return mei_txe_aliveness_wait(dev, req);
+	return 0;
+}
+
+/**
+ * mei_txe_pg_in_transition - is device now in pg transition
+ *
+ * @dev: the device structure
+ *
+ * Return: true if in pg transition, false otherwise
+ */
+static bool mei_txe_pg_in_transition(struct mei_device *dev)
+{
+	return dev->pg_event == MEI_PG_EVENT_WAIT;
+}
+
+/**
+ * mei_txe_pg_is_enabled - detect if PG is supported by HW
+ *
+ * @dev: the device structure
+ *
+ * Return: true is pg supported, false otherwise
+ */
+static bool mei_txe_pg_is_enabled(struct mei_device *dev)
+{
+	return true;
+}
+
+/**
+ * mei_txe_pg_state  - translate aliveness register value
+ *   to the mei power gating state
+ *
+ * @dev: the device structure
+ *
+ * Return: MEI_PG_OFF if aliveness is on and MEI_PG_ON otherwise
+ */
+static inline enum mei_pg_state mei_txe_pg_state(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	return hw->aliveness ? MEI_PG_OFF : MEI_PG_ON;
+}
+
+/**
+ * mei_txe_input_ready_interrupt_enable - sets the Input Ready Interrupt
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_input_ready_interrupt_enable(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 hintmsk;
+	/* Enable the SEC_IPC_HOST_INT_MASK_IN_RDY interrupt */
+	hintmsk = mei_txe_sec_reg_read(hw, SEC_IPC_HOST_INT_MASK_REG);
+	hintmsk |= SEC_IPC_HOST_INT_MASK_IN_RDY;
+	mei_txe_sec_reg_write(hw, SEC_IPC_HOST_INT_MASK_REG, hintmsk);
+}
+
+/**
+ * mei_txe_input_doorbell_set - sets bit 0 in
+ *    SEC_IPC_INPUT_DOORBELL.IPC_INPUT_DOORBELL.
+ *
+ * @hw: the txe hardware structure
+ */
+static void mei_txe_input_doorbell_set(struct mei_txe_hw *hw)
+{
+	/* Clear the interrupt cause */
+	clear_bit(TXE_INTR_IN_READY_BIT, &hw->intr_cause);
+	mei_txe_sec_reg_write(hw, SEC_IPC_INPUT_DOORBELL_REG, 1);
+}
+
+/**
+ * mei_txe_output_ready_set - Sets the SICR_SEC_IPC_OUTPUT_STATUS bit to 1
+ *
+ * @hw: the txe hardware structure
+ */
+static void mei_txe_output_ready_set(struct mei_txe_hw *hw)
+{
+	mei_txe_br_reg_write(hw,
+			SICR_SEC_IPC_OUTPUT_STATUS_REG,
+			SEC_IPC_OUTPUT_STATUS_RDY);
+}
+
+/**
+ * mei_txe_is_input_ready - check if TXE is ready for receiving data
+ *
+ * @dev: the device structure
+ *
+ * Return: true if INPUT STATUS READY bit is set
+ */
+static bool mei_txe_is_input_ready(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 status;
+
+	status = mei_txe_sec_reg_read(hw, SEC_IPC_INPUT_STATUS_REG);
+	return !!(SEC_IPC_INPUT_STATUS_RDY & status);
+}
+
+/**
+ * mei_txe_intr_clear - clear all interrupts
+ *
+ * @dev: the device structure
+ */
+static inline void mei_txe_intr_clear(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_sec_reg_write_silent(hw, SEC_IPC_HOST_INT_STATUS_REG,
+		SEC_IPC_HOST_INT_STATUS_PENDING);
+	mei_txe_br_reg_write(hw, HISR_REG, HISR_INT_STS_MSK);
+	mei_txe_br_reg_write(hw, HHISR_REG, IPC_HHIER_MSK);
+}
+
+/**
+ * mei_txe_intr_disable - disable all interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_intr_disable(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_br_reg_write(hw, HHIER_REG, 0);
+	mei_txe_br_reg_write(hw, HIER_REG, 0);
+}
+/**
+ * mei_txe_intr_enable - enable all interrupts
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_intr_enable(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_br_reg_write(hw, HHIER_REG, IPC_HHIER_MSK);
+	mei_txe_br_reg_write(hw, HIER_REG, HIER_INT_EN_MSK);
+}
+
+/**
+ * mei_txe_synchronize_irq - wait for pending IRQ handlers
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_synchronize_irq(struct mei_device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	synchronize_irq(pdev->irq);
+}
+
+/**
+ * mei_txe_pending_interrupts - check if there are pending interrupts
+ *	only Aliveness, Input ready, and output doorbell are of relevance
+ *
+ * @dev: the device structure
+ *
+ * Checks if there are pending interrupts
+ * only Aliveness, Readiness, Input ready, and Output doorbell are relevant
+ *
+ * Return: true if there are pending interrupts
+ */
+static bool mei_txe_pending_interrupts(struct mei_device *dev)
+{
+
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	bool ret = (hw->intr_cause & (TXE_INTR_READINESS |
+				      TXE_INTR_ALIVENESS |
+				      TXE_INTR_IN_READY  |
+				      TXE_INTR_OUT_DB));
+
+	if (ret) {
+		dev_dbg(dev->dev,
+			"Pending Interrupts InReady=%01d Readiness=%01d, Aliveness=%01d, OutDoor=%01d\n",
+			!!(hw->intr_cause & TXE_INTR_IN_READY),
+			!!(hw->intr_cause & TXE_INTR_READINESS),
+			!!(hw->intr_cause & TXE_INTR_ALIVENESS),
+			!!(hw->intr_cause & TXE_INTR_OUT_DB));
+	}
+	return ret;
+}
+
+/**
+ * mei_txe_input_payload_write - write a dword to the host buffer
+ *	at offset idx
+ *
+ * @dev: the device structure
+ * @idx: index in the host buffer
+ * @value: value
+ */
+static void mei_txe_input_payload_write(struct mei_device *dev,
+			unsigned long idx, u32 value)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_sec_reg_write(hw, SEC_IPC_INPUT_PAYLOAD_REG +
+			(idx * sizeof(u32)), value);
+}
+
+/**
+ * mei_txe_out_data_read - read dword from the device buffer
+ *	at offset idx
+ *
+ * @dev: the device structure
+ * @idx: index in the device buffer
+ *
+ * Return: register value at index
+ */
+static u32 mei_txe_out_data_read(const struct mei_device *dev,
+					unsigned long idx)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	return mei_txe_br_reg_read(hw,
+		BRIDGE_IPC_OUTPUT_PAYLOAD_REG + (idx * sizeof(u32)));
+}
+
+/* Readiness */
+
+/**
+ * mei_txe_readiness_set_host_rdy - set host readiness bit
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_readiness_set_host_rdy(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_br_reg_write(hw,
+		SICR_HOST_IPC_READINESS_REQ_REG,
+		SICR_HOST_IPC_READINESS_HOST_RDY);
+}
+
+/**
+ * mei_txe_readiness_clear - clear host readiness bit
+ *
+ * @dev: the device structure
+ */
+static void mei_txe_readiness_clear(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	mei_txe_br_reg_write(hw, SICR_HOST_IPC_READINESS_REQ_REG,
+				SICR_HOST_IPC_READINESS_RDY_CLR);
+}
+/**
+ * mei_txe_readiness_get - Reads and returns
+ *	the HICR_SEC_IPC_READINESS register value
+ *
+ * @dev: the device structure
+ *
+ * Return: the HICR_SEC_IPC_READINESS register value
+ */
+static u32 mei_txe_readiness_get(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	return mei_txe_br_reg_read(hw, HICR_SEC_IPC_READINESS_REG);
+}
+
+
+/**
+ * mei_txe_readiness_is_sec_rdy - check readiness
+ *  for HICR_SEC_IPC_READINESS_SEC_RDY
+ *
+ * @readiness: cached readiness state
+ *
+ * Return: true if readiness bit is set
+ */
+static inline bool mei_txe_readiness_is_sec_rdy(u32 readiness)
+{
+	return !!(readiness & HICR_SEC_IPC_READINESS_SEC_RDY);
+}
+
+/**
+ * mei_txe_hw_is_ready - check if the hw is ready
+ *
+ * @dev: the device structure
+ *
+ * Return: true if sec is ready
+ */
+static bool mei_txe_hw_is_ready(struct mei_device *dev)
+{
+	u32 readiness =  mei_txe_readiness_get(dev);
+
+	return mei_txe_readiness_is_sec_rdy(readiness);
+}
+
+/**
+ * mei_txe_host_is_ready - check if the host is ready
+ *
+ * @dev: the device structure
+ *
+ * Return: true if host is ready
+ */
+static inline bool mei_txe_host_is_ready(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 reg = mei_txe_br_reg_read(hw, HICR_SEC_IPC_READINESS_REG);
+
+	return !!(reg & HICR_SEC_IPC_READINESS_HOST_RDY);
+}
+
+/**
+ * mei_txe_readiness_wait - wait till readiness settles
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success and -ETIME on timeout
+ */
+static int mei_txe_readiness_wait(struct mei_device *dev)
+{
+	if (mei_txe_hw_is_ready(dev))
+		return 0;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_hw_ready, dev->recvd_hw_ready,
+			msecs_to_jiffies(SEC_RESET_WAIT_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+	if (!dev->recvd_hw_ready) {
+		dev_err(dev->dev, "wait for readiness failed\n");
+		return -ETIME;
+	}
+
+	dev->recvd_hw_ready = false;
+	return 0;
+}
+
+static const struct mei_fw_status mei_txe_fw_sts = {
+	.count = 2,
+	.status[0] = PCI_CFG_TXE_FW_STS0,
+	.status[1] = PCI_CFG_TXE_FW_STS1
+};
+
+/**
+ * mei_txe_fw_status - read fw status register from pci config space
+ *
+ * @dev: mei device
+ * @fw_status: fw status register values
+ *
+ * Return: 0 on success, error otherwise
+ */
+static int mei_txe_fw_status(struct mei_device *dev,
+			     struct mei_fw_status *fw_status)
+{
+	const struct mei_fw_status *fw_src = &mei_txe_fw_sts;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	int ret;
+	int i;
+
+	if (!fw_status)
+		return -EINVAL;
+
+	fw_status->count = fw_src->count;
+	for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) {
+		ret = pci_read_config_dword(pdev, fw_src->status[i],
+					    &fw_status->status[i]);
+		trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HSF_X",
+				       fw_src->status[i],
+				       fw_status->status[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_txe_hw_config - configure hardware at the start of the devices
+ *
+ * @dev: the device structure
+ *
+ * Configure hardware at the start of the device should be done only
+ *   once at the device probe time
+ *
+ * Return: always 0
+ */
+static int mei_txe_hw_config(struct mei_device *dev)
+{
+
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	hw->aliveness = mei_txe_aliveness_get(dev);
+	hw->readiness = mei_txe_readiness_get(dev);
+
+	dev_dbg(dev->dev, "aliveness_resp = 0x%08x, readiness = 0x%08x.\n",
+		hw->aliveness, hw->readiness);
+
+	return 0;
+}
+
+/**
+ * mei_txe_write - writes a message to device.
+ *
+ * @dev: the device structure
+ * @hdr: header of message
+ * @hdr_len: header length in bytes - must multiplication of a slot (4bytes)
+ * @data: payload
+ * @data_len: paylead length in bytes
+ *
+ * Return: 0 if success, < 0 - otherwise.
+ */
+static int mei_txe_write(struct mei_device *dev,
+			 const void *hdr, size_t hdr_len,
+			 const void *data, size_t data_len)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	unsigned long rem;
+	const u32 *reg_buf;
+	u32 slots = TXE_HBUF_DEPTH;
+	u32 dw_cnt;
+	unsigned long i, j;
+
+	if (WARN_ON(!hdr || !data || hdr_len & 0x3))
+		return -EINVAL;
+
+	dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM((struct mei_msg_hdr *)hdr));
+
+	dw_cnt = mei_data2slots(hdr_len + data_len);
+	if (dw_cnt > slots)
+		return -EMSGSIZE;
+
+	if (WARN(!hw->aliveness, "txe write: aliveness not asserted\n"))
+		return -EAGAIN;
+
+	/* Enable Input Ready Interrupt. */
+	mei_txe_input_ready_interrupt_enable(dev);
+
+	if (!mei_txe_is_input_ready(dev)) {
+		char fw_sts_str[MEI_FW_STATUS_STR_SZ];
+
+		mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ);
+		dev_err(dev->dev, "Input is not ready %s\n", fw_sts_str);
+		return -EAGAIN;
+	}
+
+	reg_buf = hdr;
+	for (i = 0; i < hdr_len / MEI_SLOT_SIZE; i++)
+		mei_txe_input_payload_write(dev, i, reg_buf[i]);
+
+	reg_buf = data;
+	for (j = 0; j < data_len / MEI_SLOT_SIZE; j++)
+		mei_txe_input_payload_write(dev, i + j, reg_buf[j]);
+
+	rem = data_len & 0x3;
+	if (rem > 0) {
+		u32 reg = 0;
+
+		memcpy(&reg, (const u8 *)data + data_len - rem, rem);
+		mei_txe_input_payload_write(dev, i + j, reg);
+	}
+
+	/* after each write the whole buffer is consumed */
+	hw->slots = 0;
+
+	/* Set Input-Doorbell */
+	mei_txe_input_doorbell_set(hw);
+
+	return 0;
+}
+
+/**
+ * mei_txe_hbuf_depth - mimics the me hbuf circular buffer
+ *
+ * @dev: the device structure
+ *
+ * Return: the TXE_HBUF_DEPTH
+ */
+static u32 mei_txe_hbuf_depth(const struct mei_device *dev)
+{
+	return TXE_HBUF_DEPTH;
+}
+
+/**
+ * mei_txe_hbuf_empty_slots - mimics the me hbuf circular buffer
+ *
+ * @dev: the device structure
+ *
+ * Return: always TXE_HBUF_DEPTH
+ */
+static int mei_txe_hbuf_empty_slots(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	return hw->slots;
+}
+
+/**
+ * mei_txe_count_full_read_slots - mimics the me device circular buffer
+ *
+ * @dev: the device structure
+ *
+ * Return: always buffer size in dwords count
+ */
+static int mei_txe_count_full_read_slots(struct mei_device *dev)
+{
+	/* read buffers has static size */
+	return TXE_HBUF_DEPTH;
+}
+
+/**
+ * mei_txe_read_hdr - read message header which is always in 4 first bytes
+ *
+ * @dev: the device structure
+ *
+ * Return: mei message header
+ */
+
+static u32 mei_txe_read_hdr(const struct mei_device *dev)
+{
+	return mei_txe_out_data_read(dev, 0);
+}
+/**
+ * mei_txe_read - reads a message from the txe device.
+ *
+ * @dev: the device structure
+ * @buf: message buffer will be written
+ * @len: message size will be read
+ *
+ * Return: -EINVAL on error wrong argument and 0 on success
+ */
+static int mei_txe_read(struct mei_device *dev,
+		unsigned char *buf, unsigned long len)
+{
+
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 *reg_buf, reg;
+	u32 rem;
+	u32 i;
+
+	if (WARN_ON(!buf || !len))
+		return -EINVAL;
+
+	reg_buf = (u32 *)buf;
+	rem = len & 0x3;
+
+	dev_dbg(dev->dev, "buffer-length = %lu buf[0]0x%08X\n",
+		len, mei_txe_out_data_read(dev, 0));
+
+	for (i = 0; i < len / MEI_SLOT_SIZE; i++) {
+		/* skip header: index starts from 1 */
+		reg = mei_txe_out_data_read(dev, i + 1);
+		dev_dbg(dev->dev, "buf[%d] = 0x%08X\n", i, reg);
+		*reg_buf++ = reg;
+	}
+
+	if (rem) {
+		reg = mei_txe_out_data_read(dev, i + 1);
+		memcpy(reg_buf, &reg, rem);
+	}
+
+	mei_txe_output_ready_set(hw);
+	return 0;
+}
+
+/**
+ * mei_txe_hw_reset - resets host and fw.
+ *
+ * @dev: the device structure
+ * @intr_enable: if interrupt should be enabled after reset.
+ *
+ * Return: 0 on success and < 0 in case of error
+ */
+static int mei_txe_hw_reset(struct mei_device *dev, bool intr_enable)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	u32 aliveness_req;
+	/*
+	 * read input doorbell to ensure consistency between  Bridge and SeC
+	 * return value might be garbage return
+	 */
+	(void)mei_txe_sec_reg_read_silent(hw, SEC_IPC_INPUT_DOORBELL_REG);
+
+	aliveness_req = mei_txe_aliveness_req_get(dev);
+	hw->aliveness = mei_txe_aliveness_get(dev);
+
+	/* Disable interrupts in this stage we will poll */
+	mei_txe_intr_disable(dev);
+
+	/*
+	 * If Aliveness Request and Aliveness Response are not equal then
+	 * wait for them to be equal
+	 * Since we might have interrupts disabled - poll for it
+	 */
+	if (aliveness_req != hw->aliveness)
+		if (mei_txe_aliveness_poll(dev, aliveness_req) < 0) {
+			dev_err(dev->dev, "wait for aliveness settle failed ... bailing out\n");
+			return -EIO;
+		}
+
+	/*
+	 * If Aliveness Request and Aliveness Response are set then clear them
+	 */
+	if (aliveness_req) {
+		mei_txe_aliveness_set(dev, 0);
+		if (mei_txe_aliveness_poll(dev, 0) < 0) {
+			dev_err(dev->dev, "wait for aliveness failed ... bailing out\n");
+			return -EIO;
+		}
+	}
+
+	/*
+	 * Set readiness RDY_CLR bit
+	 */
+	mei_txe_readiness_clear(dev);
+
+	return 0;
+}
+
+/**
+ * mei_txe_hw_start - start the hardware after reset
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_txe_hw_start(struct mei_device *dev)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	int ret;
+
+	u32 hisr;
+
+	/* bring back interrupts */
+	mei_txe_intr_enable(dev);
+
+	ret = mei_txe_readiness_wait(dev);
+	if (ret < 0) {
+		dev_err(dev->dev, "waiting for readiness failed\n");
+		return ret;
+	}
+
+	/*
+	 * If HISR.INT2_STS interrupt status bit is set then clear it.
+	 */
+	hisr = mei_txe_br_reg_read(hw, HISR_REG);
+	if (hisr & HISR_INT_2_STS)
+		mei_txe_br_reg_write(hw, HISR_REG, HISR_INT_2_STS);
+
+	/* Clear the interrupt cause of OutputDoorbell */
+	clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause);
+
+	ret = mei_txe_aliveness_set_sync(dev, 1);
+	if (ret < 0) {
+		dev_err(dev->dev, "wait for aliveness failed ... bailing out\n");
+		return ret;
+	}
+
+	pm_runtime_set_active(dev->dev);
+
+	/* enable input ready interrupts:
+	 * SEC_IPC_HOST_INT_MASK.IPC_INPUT_READY_INT_MASK
+	 */
+	mei_txe_input_ready_interrupt_enable(dev);
+
+
+	/*  Set the SICR_SEC_IPC_OUTPUT_STATUS.IPC_OUTPUT_READY bit */
+	mei_txe_output_ready_set(hw);
+
+	/* Set bit SICR_HOST_IPC_READINESS.HOST_RDY
+	 */
+	mei_txe_readiness_set_host_rdy(dev);
+
+	return 0;
+}
+
+/**
+ * mei_txe_check_and_ack_intrs - translate multi BAR interrupt into
+ *  single bit mask and acknowledge the interrupts
+ *
+ * @dev: the device structure
+ * @do_ack: acknowledge interrupts
+ *
+ * Return: true if found interrupts to process.
+ */
+static bool mei_txe_check_and_ack_intrs(struct mei_device *dev, bool do_ack)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	u32 hisr;
+	u32 hhisr;
+	u32 ipc_isr;
+	u32 aliveness;
+	bool generated;
+
+	/* read interrupt registers */
+	hhisr = mei_txe_br_reg_read(hw, HHISR_REG);
+	generated = (hhisr & IPC_HHIER_MSK);
+	if (!generated)
+		goto out;
+
+	hisr = mei_txe_br_reg_read(hw, HISR_REG);
+
+	aliveness = mei_txe_aliveness_get(dev);
+	if (hhisr & IPC_HHIER_SEC && aliveness) {
+		ipc_isr = mei_txe_sec_reg_read_silent(hw,
+				SEC_IPC_HOST_INT_STATUS_REG);
+	} else {
+		ipc_isr = 0;
+		hhisr &= ~IPC_HHIER_SEC;
+	}
+
+	if (do_ack) {
+		/* Save the interrupt causes */
+		hw->intr_cause |= hisr & HISR_INT_STS_MSK;
+		if (ipc_isr & SEC_IPC_HOST_INT_STATUS_IN_RDY)
+			hw->intr_cause |= TXE_INTR_IN_READY;
+
+
+		mei_txe_intr_disable(dev);
+		/* Clear the interrupts in hierarchy:
+		 * IPC and Bridge, than the High Level */
+		mei_txe_sec_reg_write_silent(hw,
+			SEC_IPC_HOST_INT_STATUS_REG, ipc_isr);
+		mei_txe_br_reg_write(hw, HISR_REG, hisr);
+		mei_txe_br_reg_write(hw, HHISR_REG, hhisr);
+	}
+
+out:
+	return generated;
+}
+
+/**
+ * mei_txe_irq_quick_handler - The ISR of the MEI device
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: IRQ_WAKE_THREAD if interrupt is designed for the device
+ *         IRQ_NONE otherwise
+ */
+irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id)
+{
+	struct mei_device *dev = dev_id;
+
+	if (mei_txe_check_and_ack_intrs(dev, true))
+		return IRQ_WAKE_THREAD;
+	return IRQ_NONE;
+}
+
+
+/**
+ * mei_txe_irq_thread_handler - txe interrupt thread
+ *
+ * @irq: The irq number
+ * @dev_id: pointer to the device structure
+ *
+ * Return: IRQ_HANDLED
+ */
+irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id)
+{
+	struct mei_device *dev = (struct mei_device *) dev_id;
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+	struct list_head cmpl_list;
+	s32 slots;
+	int rets = 0;
+
+	dev_dbg(dev->dev, "irq thread: Interrupt Registers HHISR|HISR|SEC=%02X|%04X|%02X\n",
+		mei_txe_br_reg_read(hw, HHISR_REG),
+		mei_txe_br_reg_read(hw, HISR_REG),
+		mei_txe_sec_reg_read_silent(hw, SEC_IPC_HOST_INT_STATUS_REG));
+
+
+	/* initialize our complete list */
+	mutex_lock(&dev->device_lock);
+	INIT_LIST_HEAD(&cmpl_list);
+
+	if (pci_dev_msi_enabled(to_pci_dev(dev->dev)))
+		mei_txe_check_and_ack_intrs(dev, true);
+
+	/* show irq events */
+	mei_txe_pending_interrupts(dev);
+
+	hw->aliveness = mei_txe_aliveness_get(dev);
+	hw->readiness = mei_txe_readiness_get(dev);
+
+	/* Readiness:
+	 * Detection of TXE driver going through reset
+	 * or TXE driver resetting the HECI interface.
+	 */
+	if (test_and_clear_bit(TXE_INTR_READINESS_BIT, &hw->intr_cause)) {
+		dev_dbg(dev->dev, "Readiness Interrupt was received...\n");
+
+		/* Check if SeC is going through reset */
+		if (mei_txe_readiness_is_sec_rdy(hw->readiness)) {
+			dev_dbg(dev->dev, "we need to start the dev.\n");
+			dev->recvd_hw_ready = true;
+		} else {
+			dev->recvd_hw_ready = false;
+			if (dev->dev_state != MEI_DEV_RESETTING) {
+
+				dev_warn(dev->dev, "FW not ready: resetting.\n");
+				schedule_work(&dev->reset_work);
+				goto end;
+
+			}
+		}
+		wake_up(&dev->wait_hw_ready);
+	}
+
+	/************************************************************/
+	/* Check interrupt cause:
+	 * Aliveness: Detection of SeC acknowledge of host request that
+	 * it remain alive or host cancellation of that request.
+	 */
+
+	if (test_and_clear_bit(TXE_INTR_ALIVENESS_BIT, &hw->intr_cause)) {
+		/* Clear the interrupt cause */
+		dev_dbg(dev->dev,
+			"Aliveness Interrupt: Status: %d\n", hw->aliveness);
+		dev->pg_event = MEI_PG_EVENT_RECEIVED;
+		if (waitqueue_active(&hw->wait_aliveness_resp))
+			wake_up(&hw->wait_aliveness_resp);
+	}
+
+
+	/* Output Doorbell:
+	 * Detection of SeC having sent output to host
+	 */
+	slots = mei_count_full_read_slots(dev);
+	if (test_and_clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause)) {
+		/* Read from TXE */
+		rets = mei_irq_read_handler(dev, &cmpl_list, &slots);
+		if (rets &&
+		    (dev->dev_state != MEI_DEV_RESETTING &&
+		     dev->dev_state != MEI_DEV_POWER_DOWN)) {
+			dev_err(dev->dev,
+				"mei_irq_read_handler ret = %d.\n", rets);
+
+			schedule_work(&dev->reset_work);
+			goto end;
+		}
+	}
+	/* Input Ready: Detection if host can write to SeC */
+	if (test_and_clear_bit(TXE_INTR_IN_READY_BIT, &hw->intr_cause)) {
+		dev->hbuf_is_ready = true;
+		hw->slots = TXE_HBUF_DEPTH;
+	}
+
+	if (hw->aliveness && dev->hbuf_is_ready) {
+		/* get the real register value */
+		dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+		rets = mei_irq_write_handler(dev, &cmpl_list);
+		if (rets && rets != -EMSGSIZE)
+			dev_err(dev->dev, "mei_irq_write_handler ret = %d.\n",
+				rets);
+		dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
+	}
+
+	mei_irq_compl_handler(dev, &cmpl_list);
+
+end:
+	dev_dbg(dev->dev, "interrupt thread end ret = %d\n", rets);
+
+	mutex_unlock(&dev->device_lock);
+
+	mei_enable_interrupts(dev);
+	return IRQ_HANDLED;
+}
+
+static const struct mei_hw_ops mei_txe_hw_ops = {
+
+	.host_is_ready = mei_txe_host_is_ready,
+
+	.fw_status = mei_txe_fw_status,
+	.pg_state = mei_txe_pg_state,
+
+	.hw_is_ready = mei_txe_hw_is_ready,
+	.hw_reset = mei_txe_hw_reset,
+	.hw_config = mei_txe_hw_config,
+	.hw_start = mei_txe_hw_start,
+
+	.pg_in_transition = mei_txe_pg_in_transition,
+	.pg_is_enabled = mei_txe_pg_is_enabled,
+
+	.intr_clear = mei_txe_intr_clear,
+	.intr_enable = mei_txe_intr_enable,
+	.intr_disable = mei_txe_intr_disable,
+	.synchronize_irq = mei_txe_synchronize_irq,
+
+	.hbuf_free_slots = mei_txe_hbuf_empty_slots,
+	.hbuf_is_ready = mei_txe_is_input_ready,
+	.hbuf_depth = mei_txe_hbuf_depth,
+
+	.write = mei_txe_write,
+
+	.rdbuf_full_slots = mei_txe_count_full_read_slots,
+	.read_hdr = mei_txe_read_hdr,
+
+	.read = mei_txe_read,
+
+};
+
+/**
+ * mei_txe_dev_init - allocates and initializes txe hardware specific structure
+ *
+ * @pdev: pci device
+ *
+ * Return: struct mei_device * on success or NULL
+ */
+struct mei_device *mei_txe_dev_init(struct pci_dev *pdev)
+{
+	struct mei_device *dev;
+	struct mei_txe_hw *hw;
+
+	dev = devm_kzalloc(&pdev->dev, sizeof(*dev) + sizeof(*hw), GFP_KERNEL);
+	if (!dev)
+		return NULL;
+
+	mei_device_init(dev, &pdev->dev, false, &mei_txe_hw_ops);
+
+	hw = to_txe_hw(dev);
+
+	init_waitqueue_head(&hw->wait_aliveness_resp);
+
+	return dev;
+}
+
+/**
+ * mei_txe_setup_satt2 - SATT2 configuration for DMA support.
+ *
+ * @dev:   the device structure
+ * @addr:  physical address start of the range
+ * @range: physical range size
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range)
+{
+	struct mei_txe_hw *hw = to_txe_hw(dev);
+
+	u32 lo32 = lower_32_bits(addr);
+	u32 hi32 = upper_32_bits(addr);
+	u32 ctrl;
+
+	/* SATT is limited to 36 Bits */
+	if (hi32 & ~0xF)
+		return -EINVAL;
+
+	/* SATT has to be 16Byte aligned */
+	if (lo32 & 0xF)
+		return -EINVAL;
+
+	/* SATT range has to be 4Bytes aligned */
+	if (range & 0x4)
+		return -EINVAL;
+
+	/* SATT is limited to 32 MB range*/
+	if (range > SATT_RANGE_MAX)
+		return -EINVAL;
+
+	ctrl = SATT2_CTRL_VALID_MSK;
+	ctrl |= hi32  << SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT;
+
+	mei_txe_br_reg_write(hw, SATT2_SAP_SIZE_REG, range);
+	mei_txe_br_reg_write(hw, SATT2_BRG_BA_LSB_REG, lo32);
+	mei_txe_br_reg_write(hw, SATT2_CTRL_REG, ctrl);
+	dev_dbg(dev->dev, "SATT2: SAP_SIZE_OFFSET=0x%08X, BRG_BA_LSB_OFFSET=0x%08X, CTRL_OFFSET=0x%08X\n",
+		range, lo32, ctrl);
+
+	return 0;
+}
diff --git a/drivers/misc/mei/hw-txe.h b/drivers/misc/mei/hw-txe.h
new file mode 100644
index 0000000000..96511b04bf
--- /dev/null
+++ b/drivers/misc/mei/hw-txe.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2013-2016, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#ifndef _MEI_HW_TXE_H_
+#define _MEI_HW_TXE_H_
+
+#include <linux/irqreturn.h>
+
+#include "hw.h"
+#include "hw-txe-regs.h"
+
+#define MEI_TXI_RPM_TIMEOUT    500 /* ms */
+
+/* Flatten Hierarchy interrupt cause */
+#define TXE_INTR_READINESS_BIT  0 /* HISR_INT_0_STS */
+#define TXE_INTR_READINESS      HISR_INT_0_STS
+#define TXE_INTR_ALIVENESS_BIT  1 /* HISR_INT_1_STS */
+#define TXE_INTR_ALIVENESS      HISR_INT_1_STS
+#define TXE_INTR_OUT_DB_BIT     2 /* HISR_INT_2_STS */
+#define TXE_INTR_OUT_DB         HISR_INT_2_STS
+#define TXE_INTR_IN_READY_BIT   8 /* beyond HISR */
+#define TXE_INTR_IN_READY       BIT(8)
+
+/**
+ * struct mei_txe_hw - txe hardware specifics
+ *
+ * @mem_addr:            SeC and BRIDGE bars
+ * @aliveness:           aliveness (power gating) state of the hardware
+ * @readiness:           readiness state of the hardware
+ * @slots:               number of empty slots
+ * @wait_aliveness_resp: aliveness wait queue
+ * @intr_cause:          translated interrupt cause
+ */
+struct mei_txe_hw {
+	void __iomem * const *mem_addr;
+	u32 aliveness;
+	u32 readiness;
+	u32 slots;
+
+	wait_queue_head_t wait_aliveness_resp;
+
+	unsigned long intr_cause;
+};
+
+#define to_txe_hw(dev) (struct mei_txe_hw *)((dev)->hw)
+
+static inline struct mei_device *hw_txe_to_mei(struct mei_txe_hw *hw)
+{
+	return container_of((void *)hw, struct mei_device, hw);
+}
+
+struct mei_device *mei_txe_dev_init(struct pci_dev *pdev);
+
+irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id);
+irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id);
+
+int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req);
+
+int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range);
+
+
+#endif /* _MEI_HW_TXE_H_ */
diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h
new file mode 100644
index 0000000000..e910302fcd
--- /dev/null
+++ b/drivers/misc/mei/hw.h
@@ -0,0 +1,836 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2003-2022, Intel Corporation. All rights reserved
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#ifndef _MEI_HW_TYPES_H_
+#define _MEI_HW_TYPES_H_
+
+#include <linux/mei.h>
+
+/*
+ * Timeouts in Seconds
+ */
+#define MEI_HW_READY_TIMEOUT        2  /* Timeout on ready message */
+#define MEI_CONNECT_TIMEOUT         3  /* HPS: at least 2 seconds */
+
+#define MEI_CL_CONNECT_TIMEOUT     15  /* HPS: Client Connect Timeout */
+#define MEI_CL_CONNECT_TIMEOUT_SLOW 30 /* HPS: Client Connect Timeout, slow FW */
+#define MEI_CLIENTS_INIT_TIMEOUT   15  /* HPS: Clients Enumeration Timeout */
+
+#define MEI_PGI_TIMEOUT             1  /* PG Isolation time response 1 sec */
+#define MEI_D0I3_TIMEOUT            5  /* D0i3 set/unset max response time */
+#define MEI_HBM_TIMEOUT             1  /* 1 second */
+#define MEI_HBM_TIMEOUT_SLOW        5  /* 5 second, slow FW */
+
+#define MKHI_RCV_TIMEOUT 500 /* receive timeout in msec */
+#define MKHI_RCV_TIMEOUT_SLOW 10000 /* receive timeout in msec, slow FW */
+
+/*
+ * FW page size for DMA allocations
+ */
+#define MEI_FW_PAGE_SIZE 4096UL
+
+/*
+ * MEI Version
+ */
+#define HBM_MINOR_VERSION                   2
+#define HBM_MAJOR_VERSION                   2
+
+/*
+ * MEI version with PGI support
+ */
+#define HBM_MINOR_VERSION_PGI               1
+#define HBM_MAJOR_VERSION_PGI               1
+
+/*
+ * MEI version with Dynamic clients support
+ */
+#define HBM_MINOR_VERSION_DC               0
+#define HBM_MAJOR_VERSION_DC               2
+
+/*
+ * MEI version with immediate reply to enum request support
+ */
+#define HBM_MINOR_VERSION_IE               0
+#define HBM_MAJOR_VERSION_IE               2
+
+/*
+ * MEI version with disconnect on connection timeout support
+ */
+#define HBM_MINOR_VERSION_DOT              0
+#define HBM_MAJOR_VERSION_DOT              2
+
+/*
+ * MEI version with notification support
+ */
+#define HBM_MINOR_VERSION_EV               0
+#define HBM_MAJOR_VERSION_EV               2
+
+/*
+ * MEI version with fixed address client support
+ */
+#define HBM_MINOR_VERSION_FA               0
+#define HBM_MAJOR_VERSION_FA               2
+
+/*
+ * MEI version with OS ver message support
+ */
+#define HBM_MINOR_VERSION_OS               0
+#define HBM_MAJOR_VERSION_OS               2
+
+/*
+ * MEI version with dma ring support
+ */
+#define HBM_MINOR_VERSION_DR               1
+#define HBM_MAJOR_VERSION_DR               2
+
+/*
+ * MEI version with vm tag support
+ */
+#define HBM_MINOR_VERSION_VT               2
+#define HBM_MAJOR_VERSION_VT               2
+
+/*
+ * MEI version with GSC support
+ */
+#define HBM_MINOR_VERSION_GSC              2
+#define HBM_MAJOR_VERSION_GSC              2
+
+/*
+ * MEI version with capabilities message support
+ */
+#define HBM_MINOR_VERSION_CAP              2
+#define HBM_MAJOR_VERSION_CAP              2
+
+/*
+ * MEI version with client DMA support
+ */
+#define HBM_MINOR_VERSION_CD               2
+#define HBM_MAJOR_VERSION_CD               2
+
+/* Host bus message command opcode */
+#define MEI_HBM_CMD_OP_MSK                  0x7f
+/* Host bus message command RESPONSE */
+#define MEI_HBM_CMD_RES_MSK                 0x80
+
+/*
+ * MEI Bus Message Command IDs
+ */
+#define HOST_START_REQ_CMD                  0x01
+#define HOST_START_RES_CMD                  0x81
+
+#define HOST_STOP_REQ_CMD                   0x02
+#define HOST_STOP_RES_CMD                   0x82
+
+#define ME_STOP_REQ_CMD                     0x03
+
+#define HOST_ENUM_REQ_CMD                   0x04
+#define HOST_ENUM_RES_CMD                   0x84
+
+#define HOST_CLIENT_PROPERTIES_REQ_CMD      0x05
+#define HOST_CLIENT_PROPERTIES_RES_CMD      0x85
+
+#define CLIENT_CONNECT_REQ_CMD              0x06
+#define CLIENT_CONNECT_RES_CMD              0x86
+
+#define CLIENT_DISCONNECT_REQ_CMD           0x07
+#define CLIENT_DISCONNECT_RES_CMD           0x87
+
+#define MEI_FLOW_CONTROL_CMD                0x08
+
+#define MEI_PG_ISOLATION_ENTRY_REQ_CMD      0x0a
+#define MEI_PG_ISOLATION_ENTRY_RES_CMD      0x8a
+#define MEI_PG_ISOLATION_EXIT_REQ_CMD       0x0b
+#define MEI_PG_ISOLATION_EXIT_RES_CMD       0x8b
+
+#define MEI_HBM_ADD_CLIENT_REQ_CMD          0x0f
+#define MEI_HBM_ADD_CLIENT_RES_CMD          0x8f
+
+#define MEI_HBM_NOTIFY_REQ_CMD              0x10
+#define MEI_HBM_NOTIFY_RES_CMD              0x90
+#define MEI_HBM_NOTIFICATION_CMD            0x11
+
+#define MEI_HBM_DMA_SETUP_REQ_CMD           0x12
+#define MEI_HBM_DMA_SETUP_RES_CMD           0x92
+
+#define MEI_HBM_CAPABILITIES_REQ_CMD        0x13
+#define MEI_HBM_CAPABILITIES_RES_CMD        0x93
+
+#define MEI_HBM_CLIENT_DMA_MAP_REQ_CMD      0x14
+#define MEI_HBM_CLIENT_DMA_MAP_RES_CMD      0x94
+
+#define MEI_HBM_CLIENT_DMA_UNMAP_REQ_CMD    0x15
+#define MEI_HBM_CLIENT_DMA_UNMAP_RES_CMD    0x95
+
+/*
+ * MEI Stop Reason
+ * used by hbm_host_stop_request.reason
+ */
+enum mei_stop_reason_types {
+	DRIVER_STOP_REQUEST = 0x00,
+	DEVICE_D1_ENTRY = 0x01,
+	DEVICE_D2_ENTRY = 0x02,
+	DEVICE_D3_ENTRY = 0x03,
+	SYSTEM_S1_ENTRY = 0x04,
+	SYSTEM_S2_ENTRY = 0x05,
+	SYSTEM_S3_ENTRY = 0x06,
+	SYSTEM_S4_ENTRY = 0x07,
+	SYSTEM_S5_ENTRY = 0x08
+};
+
+
+/**
+ * enum mei_hbm_status  - mei host bus messages return values
+ *
+ * @MEI_HBMS_SUCCESS           : status success
+ * @MEI_HBMS_CLIENT_NOT_FOUND  : client not found
+ * @MEI_HBMS_ALREADY_EXISTS    : connection already established
+ * @MEI_HBMS_REJECTED          : connection is rejected
+ * @MEI_HBMS_INVALID_PARAMETER : invalid parameter
+ * @MEI_HBMS_NOT_ALLOWED       : operation not allowed
+ * @MEI_HBMS_ALREADY_STARTED   : system is already started
+ * @MEI_HBMS_NOT_STARTED       : system not started
+ *
+ * @MEI_HBMS_MAX               : sentinel
+ */
+enum mei_hbm_status {
+	MEI_HBMS_SUCCESS           = 0,
+	MEI_HBMS_CLIENT_NOT_FOUND  = 1,
+	MEI_HBMS_ALREADY_EXISTS    = 2,
+	MEI_HBMS_REJECTED          = 3,
+	MEI_HBMS_INVALID_PARAMETER = 4,
+	MEI_HBMS_NOT_ALLOWED       = 5,
+	MEI_HBMS_ALREADY_STARTED   = 6,
+	MEI_HBMS_NOT_STARTED       = 7,
+
+	MEI_HBMS_MAX
+};
+
+
+/*
+ * Client Connect Status
+ * used by hbm_client_connect_response.status
+ */
+enum mei_cl_connect_status {
+	MEI_CL_CONN_SUCCESS          = MEI_HBMS_SUCCESS,
+	MEI_CL_CONN_NOT_FOUND        = MEI_HBMS_CLIENT_NOT_FOUND,
+	MEI_CL_CONN_ALREADY_STARTED  = MEI_HBMS_ALREADY_EXISTS,
+	MEI_CL_CONN_OUT_OF_RESOURCES = MEI_HBMS_REJECTED,
+	MEI_CL_CONN_MESSAGE_SMALL    = MEI_HBMS_INVALID_PARAMETER,
+	MEI_CL_CONN_NOT_ALLOWED      = MEI_HBMS_NOT_ALLOWED,
+};
+
+/*
+ * Client Disconnect Status
+ */
+enum mei_cl_disconnect_status {
+	MEI_CL_DISCONN_SUCCESS = MEI_HBMS_SUCCESS
+};
+
+/**
+ * enum mei_ext_hdr_type - extended header type used in
+ *    extended header TLV
+ *
+ * @MEI_EXT_HDR_NONE: sentinel
+ * @MEI_EXT_HDR_VTAG: vtag header
+ * @MEI_EXT_HDR_GSC: gsc header
+ */
+enum mei_ext_hdr_type {
+	MEI_EXT_HDR_NONE = 0,
+	MEI_EXT_HDR_VTAG = 1,
+	MEI_EXT_HDR_GSC = 2,
+};
+
+/**
+ * struct mei_ext_hdr - extend header descriptor (TLV)
+ * @type: enum mei_ext_hdr_type
+ * @length: length excluding descriptor
+ * @data: the extended header payload
+ */
+struct mei_ext_hdr {
+	u8 type;
+	u8 length;
+	u8 data[];
+} __packed;
+
+/**
+ * struct mei_ext_meta_hdr - extend header meta data
+ * @count: number of headers
+ * @size: total size of the extended header list excluding meta header
+ * @reserved: reserved
+ * @hdrs: extended headers TLV list
+ */
+struct mei_ext_meta_hdr {
+	u8 count;
+	u8 size;
+	u8 reserved[2];
+	u8 hdrs[];
+} __packed;
+
+/**
+ * struct mei_ext_hdr_vtag - extend header for vtag
+ *
+ * @hdr: standard extend header
+ * @vtag: virtual tag
+ * @reserved: reserved
+ */
+struct mei_ext_hdr_vtag {
+	struct mei_ext_hdr hdr;
+	u8 vtag;
+	u8 reserved;
+} __packed;
+
+/*
+ * Extended header iterator functions
+ */
+/**
+ * mei_ext_begin - extended header iterator begin
+ *
+ * @meta: meta header of the extended header list
+ *
+ * Return: The first extended header
+ */
+static inline struct mei_ext_hdr *mei_ext_begin(struct mei_ext_meta_hdr *meta)
+{
+	return (struct mei_ext_hdr *)meta->hdrs;
+}
+
+/**
+ * mei_ext_last - check if the ext is the last one in the TLV list
+ *
+ * @meta: meta header of the extended header list
+ * @ext: a meta header on the list
+ *
+ * Return: true if ext is the last header on the list
+ */
+static inline bool mei_ext_last(struct mei_ext_meta_hdr *meta,
+				struct mei_ext_hdr *ext)
+{
+	return (u8 *)ext >= (u8 *)meta + sizeof(*meta) + (meta->size * 4);
+}
+
+struct mei_gsc_sgl {
+	u32 low;
+	u32 high;
+	u32 length;
+} __packed;
+
+#define GSC_HECI_MSG_KERNEL 0
+#define GSC_HECI_MSG_USER   1
+
+#define GSC_ADDRESS_TYPE_GTT   0
+#define GSC_ADDRESS_TYPE_PPGTT 1
+#define GSC_ADDRESS_TYPE_PHYSICAL_CONTINUOUS 2 /* max of 64K */
+#define GSC_ADDRESS_TYPE_PHYSICAL_SGL 3
+
+/**
+ * struct mei_ext_hdr_gsc_h2f - extended header: gsc host to firmware interface
+ *
+ * @hdr: extended header
+ * @client_id: GSC_HECI_MSG_KERNEL or GSC_HECI_MSG_USER
+ * @addr_type: GSC_ADDRESS_TYPE_{GTT, PPGTT, PHYSICAL_CONTINUOUS, PHYSICAL_SGL}
+ * @fence_id: synchronization marker
+ * @input_address_count: number of input sgl buffers
+ * @output_address_count: number of output sgl buffers
+ * @reserved: reserved
+ * @sgl: sg list
+ */
+struct mei_ext_hdr_gsc_h2f {
+	struct mei_ext_hdr hdr;
+	u8                 client_id;
+	u8                 addr_type;
+	u32                fence_id;
+	u8                 input_address_count;
+	u8                 output_address_count;
+	u8                 reserved[2];
+	struct mei_gsc_sgl sgl[];
+} __packed;
+
+/**
+ * struct mei_ext_hdr_gsc_f2h - gsc firmware to host interface
+ *
+ * @hdr: extended header
+ * @client_id: GSC_HECI_MSG_KERNEL or GSC_HECI_MSG_USER
+ * @reserved: reserved
+ * @fence_id: synchronization marker
+ * @written: number of bytes written to firmware
+ */
+struct mei_ext_hdr_gsc_f2h {
+	struct mei_ext_hdr hdr;
+	u8                 client_id;
+	u8                 reserved;
+	u32                fence_id;
+	u32                written;
+} __packed;
+
+/**
+ * mei_ext_next - following extended header on the TLV list
+ *
+ * @ext: current extend header
+ *
+ * Context: The function does not check for the overflows,
+ *          one should call mei_ext_last before.
+ *
+ * Return: The following extend header after @ext
+ */
+static inline struct mei_ext_hdr *mei_ext_next(struct mei_ext_hdr *ext)
+{
+	return (struct mei_ext_hdr *)((u8 *)ext + (ext->length * 4));
+}
+
+/**
+ * mei_ext_hdr_len - get ext header length in bytes
+ *
+ * @ext: extend header
+ *
+ * Return: extend header length in bytes
+ */
+static inline u32 mei_ext_hdr_len(const struct mei_ext_hdr *ext)
+{
+	if (!ext)
+		return 0;
+
+	return ext->length * sizeof(u32);
+}
+
+/**
+ * struct mei_msg_hdr - MEI BUS Interface Section
+ *
+ * @me_addr: device address
+ * @host_addr: host address
+ * @length: message length
+ * @reserved: reserved
+ * @extended: message has extended header
+ * @dma_ring: message is on dma ring
+ * @internal: message is internal
+ * @msg_complete: last packet of the message
+ * @extension: extension of the header
+ */
+struct mei_msg_hdr {
+	u32 me_addr:8;
+	u32 host_addr:8;
+	u32 length:9;
+	u32 reserved:3;
+	u32 extended:1;
+	u32 dma_ring:1;
+	u32 internal:1;
+	u32 msg_complete:1;
+	u32 extension[];
+} __packed;
+
+/* The length is up to 9 bits */
+#define MEI_MSG_MAX_LEN_MASK GENMASK(9, 0)
+
+struct mei_bus_message {
+	u8 hbm_cmd;
+	u8 data[];
+} __packed;
+
+/**
+ * struct hbm_cl_cmd - client specific host bus command
+ *	CONNECT, DISCONNECT, and FlOW CONTROL
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @data: generic data
+ */
+struct mei_hbm_cl_cmd {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 data;
+};
+
+struct hbm_version {
+	u8 minor_version;
+	u8 major_version;
+} __packed;
+
+struct hbm_host_version_request {
+	u8 hbm_cmd;
+	u8 reserved;
+	struct hbm_version host_version;
+} __packed;
+
+struct hbm_host_version_response {
+	u8 hbm_cmd;
+	u8 host_version_supported;
+	struct hbm_version me_max_version;
+} __packed;
+
+struct hbm_host_stop_request {
+	u8 hbm_cmd;
+	u8 reason;
+	u8 reserved[2];
+} __packed;
+
+struct hbm_host_stop_response {
+	u8 hbm_cmd;
+	u8 reserved[3];
+} __packed;
+
+struct hbm_me_stop_request {
+	u8 hbm_cmd;
+	u8 reason;
+	u8 reserved[2];
+} __packed;
+
+/**
+ * enum hbm_host_enum_flags - enumeration request flags (HBM version >= 2.0)
+ *
+ * @MEI_HBM_ENUM_F_ALLOW_ADD: allow dynamic clients add
+ * @MEI_HBM_ENUM_F_IMMEDIATE_ENUM: allow FW to send answer immediately
+ */
+enum hbm_host_enum_flags {
+	MEI_HBM_ENUM_F_ALLOW_ADD = BIT(0),
+	MEI_HBM_ENUM_F_IMMEDIATE_ENUM = BIT(1),
+};
+
+/**
+ * struct hbm_host_enum_request - enumeration request from host to fw
+ *
+ * @hbm_cmd : bus message command header
+ * @flags   : request flags
+ * @reserved: reserved
+ */
+struct hbm_host_enum_request {
+	u8 hbm_cmd;
+	u8 flags;
+	u8 reserved[2];
+} __packed;
+
+struct hbm_host_enum_response {
+	u8 hbm_cmd;
+	u8 reserved[3];
+	u8 valid_addresses[32];
+} __packed;
+
+/**
+ * struct mei_client_properties - mei client properties
+ *
+ * @protocol_name: guid of the client
+ * @protocol_version: client protocol version
+ * @max_number_of_connections: number of possible connections.
+ * @fixed_address: fixed me address (0 if the client is dynamic)
+ * @single_recv_buf: 1 if all connections share a single receive buffer.
+ * @vt_supported: the client support vtag
+ * @reserved: reserved
+ * @max_msg_length: MTU of the client
+ */
+struct mei_client_properties {
+	uuid_le protocol_name;
+	u8 protocol_version;
+	u8 max_number_of_connections;
+	u8 fixed_address;
+	u8 single_recv_buf:1;
+	u8 vt_supported:1;
+	u8 reserved:6;
+	u32 max_msg_length;
+} __packed;
+
+struct hbm_props_request {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 reserved[2];
+} __packed;
+
+struct hbm_props_response {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 status;
+	u8 reserved;
+	struct mei_client_properties client_properties;
+} __packed;
+
+/**
+ * struct hbm_add_client_request - request to add a client
+ *     might be sent by fw after enumeration has already completed
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @reserved: reserved
+ * @client_properties: client properties
+ */
+struct hbm_add_client_request {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 reserved[2];
+	struct mei_client_properties client_properties;
+} __packed;
+
+/**
+ * struct hbm_add_client_response - response to add a client
+ *     sent by the host to report client addition status to fw
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @status: if HBMS_SUCCESS then the client can now accept connections.
+ * @reserved: reserved
+ */
+struct hbm_add_client_response {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 status;
+	u8 reserved;
+} __packed;
+
+/**
+ * struct hbm_power_gate - power gate request/response
+ *
+ * @hbm_cmd: bus message command header
+ * @reserved: reserved
+ */
+struct hbm_power_gate {
+	u8 hbm_cmd;
+	u8 reserved[3];
+} __packed;
+
+/**
+ * struct hbm_client_connect_request - connect/disconnect request
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @reserved: reserved
+ */
+struct hbm_client_connect_request {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 reserved;
+} __packed;
+
+/**
+ * struct hbm_client_connect_response - connect/disconnect response
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @status: status of the request
+ */
+struct hbm_client_connect_response {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 status;
+} __packed;
+
+
+#define MEI_FC_MESSAGE_RESERVED_LENGTH           5
+
+struct hbm_flow_control {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 reserved[MEI_FC_MESSAGE_RESERVED_LENGTH];
+} __packed;
+
+#define MEI_HBM_NOTIFICATION_START 1
+#define MEI_HBM_NOTIFICATION_STOP  0
+/**
+ * struct hbm_notification_request - start/stop notification request
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @start:  start = 1 or stop = 0 asynchronous notifications
+ */
+struct hbm_notification_request {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 start;
+} __packed;
+
+/**
+ * struct hbm_notification_response - start/stop notification response
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: - address of the client in the driver
+ * @status: (mei_hbm_status) response status for the request
+ *  - MEI_HBMS_SUCCESS: successful stop/start
+ *  - MEI_HBMS_CLIENT_NOT_FOUND: if the connection could not be found.
+ *  - MEI_HBMS_ALREADY_STARTED: for start requests for a previously
+ *                         started notification.
+ *  - MEI_HBMS_NOT_STARTED: for stop request for a connected client for whom
+ *                         asynchronous notifications are currently disabled.
+ *
+ * @start:  start = 1 or stop = 0 asynchronous notifications
+ * @reserved: reserved
+ */
+struct hbm_notification_response {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 status;
+	u8 start;
+	u8 reserved[3];
+} __packed;
+
+/**
+ * struct hbm_notification - notification event
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr:  address of the client in ME
+ * @host_addr:  address of the client in the driver
+ * @reserved: reserved for alignment
+ */
+struct hbm_notification {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 reserved;
+} __packed;
+
+/**
+ * struct hbm_dma_mem_dscr - dma ring
+ *
+ * @addr_hi: the high 32bits of 64 bit address
+ * @addr_lo: the low  32bits of 64 bit address
+ * @size   : size in bytes (must be power of 2)
+ */
+struct hbm_dma_mem_dscr {
+	u32 addr_hi;
+	u32 addr_lo;
+	u32 size;
+} __packed;
+
+enum {
+	DMA_DSCR_HOST = 0,
+	DMA_DSCR_DEVICE = 1,
+	DMA_DSCR_CTRL = 2,
+	DMA_DSCR_NUM,
+};
+
+/**
+ * struct hbm_dma_setup_request - dma setup request
+ *
+ * @hbm_cmd: bus message command header
+ * @reserved: reserved for alignment
+ * @dma_dscr: dma descriptor for HOST, DEVICE, and CTRL
+ */
+struct hbm_dma_setup_request {
+	u8 hbm_cmd;
+	u8 reserved[3];
+	struct hbm_dma_mem_dscr dma_dscr[DMA_DSCR_NUM];
+} __packed;
+
+/**
+ * struct hbm_dma_setup_response - dma setup response
+ *
+ * @hbm_cmd: bus message command header
+ * @status: 0 on success; otherwise DMA setup failed.
+ * @reserved: reserved for alignment
+ */
+struct hbm_dma_setup_response {
+	u8 hbm_cmd;
+	u8 status;
+	u8 reserved[2];
+} __packed;
+
+/**
+ * struct mei_dma_ring_ctrl - dma ring control block
+ *
+ * @hbuf_wr_idx: host circular buffer write index in slots
+ * @reserved1: reserved for alignment
+ * @hbuf_rd_idx: host circular buffer read index in slots
+ * @reserved2: reserved for alignment
+ * @dbuf_wr_idx: device circular buffer write index in slots
+ * @reserved3: reserved for alignment
+ * @dbuf_rd_idx: device circular buffer read index in slots
+ * @reserved4: reserved for alignment
+ */
+struct hbm_dma_ring_ctrl {
+	u32 hbuf_wr_idx;
+	u32 reserved1;
+	u32 hbuf_rd_idx;
+	u32 reserved2;
+	u32 dbuf_wr_idx;
+	u32 reserved3;
+	u32 dbuf_rd_idx;
+	u32 reserved4;
+} __packed;
+
+/* virtual tag supported */
+#define HBM_CAP_VT BIT(0)
+
+/* gsc extended header support */
+#define HBM_CAP_GSC BIT(1)
+
+/* client dma supported */
+#define HBM_CAP_CD BIT(2)
+
+/**
+ * struct hbm_capability_request - capability request from host to fw
+ *
+ * @hbm_cmd : bus message command header
+ * @capability_requested: bitmask of capabilities requested by host
+ */
+struct hbm_capability_request {
+	u8 hbm_cmd;
+	u8 capability_requested[3];
+} __packed;
+
+/**
+ * struct hbm_capability_response - capability response from fw to host
+ *
+ * @hbm_cmd : bus message command header
+ * @capability_granted: bitmask of capabilities granted by FW
+ */
+struct hbm_capability_response {
+	u8 hbm_cmd;
+	u8 capability_granted[3];
+} __packed;
+
+/**
+ * struct hbm_client_dma_map_request - client dma map request from host to fw
+ *
+ * @hbm_cmd: bus message command header
+ * @client_buffer_id: client buffer id
+ * @reserved: reserved
+ * @address_lsb: DMA address LSB
+ * @address_msb: DMA address MSB
+ * @size: DMA size
+ */
+struct hbm_client_dma_map_request {
+	u8 hbm_cmd;
+	u8 client_buffer_id;
+	u8 reserved[2];
+	u32 address_lsb;
+	u32 address_msb;
+	u32 size;
+} __packed;
+
+/**
+ * struct hbm_client_dma_unmap_request
+ *    client dma unmap request from the host to the firmware
+ *
+ * @hbm_cmd: bus message command header
+ * @status: unmap status
+ * @client_buffer_id: client buffer id
+ * @reserved: reserved
+ */
+struct hbm_client_dma_unmap_request {
+	u8 hbm_cmd;
+	u8 status;
+	u8 client_buffer_id;
+	u8 reserved;
+} __packed;
+
+/**
+ * struct hbm_client_dma_response
+ *   client dma unmap response from the firmware to the host
+ *
+ * @hbm_cmd: bus message command header
+ * @status: command status
+ */
+struct hbm_client_dma_response {
+	u8 hbm_cmd;
+	u8 status;
+} __packed;
+
+#endif
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
new file mode 100644
index 0000000000..c35e005b26
--- /dev/null
+++ b/drivers/misc/mei/init.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+const char *mei_dev_state_str(int state)
+{
+#define MEI_DEV_STATE(state) case MEI_DEV_##state: return #state
+	switch (state) {
+	MEI_DEV_STATE(INITIALIZING);
+	MEI_DEV_STATE(INIT_CLIENTS);
+	MEI_DEV_STATE(ENABLED);
+	MEI_DEV_STATE(RESETTING);
+	MEI_DEV_STATE(DISABLED);
+	MEI_DEV_STATE(POWERING_DOWN);
+	MEI_DEV_STATE(POWER_DOWN);
+	MEI_DEV_STATE(POWER_UP);
+	default:
+		return "unknown";
+	}
+#undef MEI_DEV_STATE
+}
+
+const char *mei_pg_state_str(enum mei_pg_state state)
+{
+#define MEI_PG_STATE(state) case MEI_PG_##state: return #state
+	switch (state) {
+	MEI_PG_STATE(OFF);
+	MEI_PG_STATE(ON);
+	default:
+		return "unknown";
+	}
+#undef MEI_PG_STATE
+}
+
+/**
+ * mei_fw_status2str - convert fw status registers to printable string
+ *
+ * @fw_status:  firmware status
+ * @buf: string buffer at minimal size MEI_FW_STATUS_STR_SZ
+ * @len: buffer len must be >= MEI_FW_STATUS_STR_SZ
+ *
+ * Return: number of bytes written or -EINVAL if buffer is to small
+ */
+ssize_t mei_fw_status2str(struct mei_fw_status *fw_status,
+			  char *buf, size_t len)
+{
+	ssize_t cnt = 0;
+	int i;
+
+	buf[0] = '\0';
+
+	if (len < MEI_FW_STATUS_STR_SZ)
+		return -EINVAL;
+
+	for (i = 0; i < fw_status->count; i++)
+		cnt += scnprintf(buf + cnt, len - cnt, "%08X ",
+				fw_status->status[i]);
+
+	/* drop last space */
+	buf[cnt] = '\0';
+	return cnt;
+}
+EXPORT_SYMBOL_GPL(mei_fw_status2str);
+
+/**
+ * mei_cancel_work - Cancel mei background jobs
+ *
+ * @dev: the device structure
+ */
+void mei_cancel_work(struct mei_device *dev)
+{
+	cancel_work_sync(&dev->reset_work);
+	cancel_work_sync(&dev->bus_rescan_work);
+
+	cancel_delayed_work_sync(&dev->timer_work);
+}
+EXPORT_SYMBOL_GPL(mei_cancel_work);
+
+/**
+ * mei_reset - resets host and fw.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success or < 0 if the reset hasn't succeeded
+ */
+int mei_reset(struct mei_device *dev)
+{
+	enum mei_dev_state state = dev->dev_state;
+	bool interrupts_enabled;
+	int ret;
+
+	if (state != MEI_DEV_INITIALIZING &&
+	    state != MEI_DEV_DISABLED &&
+	    state != MEI_DEV_POWER_DOWN &&
+	    state != MEI_DEV_POWER_UP) {
+		char fw_sts_str[MEI_FW_STATUS_STR_SZ];
+
+		mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ);
+		dev_warn(dev->dev, "unexpected reset: dev_state = %s fw status = %s\n",
+			 mei_dev_state_str(state), fw_sts_str);
+	}
+
+	mei_clear_interrupts(dev);
+
+	/* we're already in reset, cancel the init timer
+	 * if the reset was called due the hbm protocol error
+	 * we need to call it before hw start
+	 * so the hbm watchdog won't kick in
+	 */
+	mei_hbm_idle(dev);
+
+	/* enter reset flow */
+	interrupts_enabled = state != MEI_DEV_POWER_DOWN;
+	mei_set_devstate(dev, MEI_DEV_RESETTING);
+
+	dev->reset_count++;
+	if (dev->reset_count > MEI_MAX_CONSEC_RESET) {
+		dev_err(dev->dev, "reset: reached maximal consecutive resets: disabling the device\n");
+		mei_set_devstate(dev, MEI_DEV_DISABLED);
+		return -ENODEV;
+	}
+
+	ret = mei_hw_reset(dev, interrupts_enabled);
+	/* fall through and remove the sw state even if hw reset has failed */
+
+	/* no need to clean up software state in case of power up */
+	if (state != MEI_DEV_INITIALIZING && state != MEI_DEV_POWER_UP)
+		mei_cl_all_disconnect(dev);
+
+	mei_hbm_reset(dev);
+
+	/* clean stale FW version */
+	dev->fw_ver_received = 0;
+
+	memset(dev->rd_msg_hdr, 0, sizeof(dev->rd_msg_hdr));
+
+	if (ret) {
+		dev_err(dev->dev, "hw_reset failed ret = %d\n", ret);
+		return ret;
+	}
+
+	if (state == MEI_DEV_POWER_DOWN) {
+		dev_dbg(dev->dev, "powering down: end of reset\n");
+		mei_set_devstate(dev, MEI_DEV_DISABLED);
+		return 0;
+	}
+
+	ret = mei_hw_start(dev);
+	if (ret) {
+		char fw_sts_str[MEI_FW_STATUS_STR_SZ];
+
+		mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ);
+		dev_err(dev->dev, "hw_start failed ret = %d fw status = %s\n", ret, fw_sts_str);
+		return ret;
+	}
+
+	if (dev->dev_state != MEI_DEV_RESETTING) {
+		dev_dbg(dev->dev, "wrong state = %d on link start\n", dev->dev_state);
+		return 0;
+	}
+
+	dev_dbg(dev->dev, "link is established start sending messages.\n");
+
+	mei_set_devstate(dev, MEI_DEV_INIT_CLIENTS);
+	ret = mei_hbm_start_req(dev);
+	if (ret) {
+		dev_err(dev->dev, "hbm_start failed ret = %d\n", ret);
+		mei_set_devstate(dev, MEI_DEV_RESETTING);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_reset);
+
+/**
+ * mei_start - initializes host and fw to start work.
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_start(struct mei_device *dev)
+{
+	int ret;
+
+	mutex_lock(&dev->device_lock);
+
+	/* acknowledge interrupt and stop interrupts */
+	mei_clear_interrupts(dev);
+
+	ret = mei_hw_config(dev);
+	if (ret)
+		goto err;
+
+	dev_dbg(dev->dev, "reset in start the mei device.\n");
+
+	dev->reset_count = 0;
+	do {
+		mei_set_devstate(dev, MEI_DEV_INITIALIZING);
+		ret = mei_reset(dev);
+
+		if (ret == -ENODEV || dev->dev_state == MEI_DEV_DISABLED) {
+			dev_err(dev->dev, "reset failed ret = %d", ret);
+			goto err;
+		}
+	} while (ret);
+
+	if (mei_hbm_start_wait(dev)) {
+		dev_err(dev->dev, "HBM haven't started");
+		goto err;
+	}
+
+	if (!mei_hbm_version_is_supported(dev)) {
+		dev_dbg(dev->dev, "MEI start failed.\n");
+		goto err;
+	}
+
+	dev_dbg(dev->dev, "link layer has been established.\n");
+
+	mutex_unlock(&dev->device_lock);
+	return 0;
+err:
+	dev_err(dev->dev, "link layer initialization failed.\n");
+	mei_set_devstate(dev, MEI_DEV_DISABLED);
+	mutex_unlock(&dev->device_lock);
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(mei_start);
+
+/**
+ * mei_restart - restart device after suspend
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success or -ENODEV if the restart hasn't succeeded
+ */
+int mei_restart(struct mei_device *dev)
+{
+	int err;
+
+	mutex_lock(&dev->device_lock);
+
+	mei_set_devstate(dev, MEI_DEV_POWER_UP);
+	dev->reset_count = 0;
+
+	err = mei_reset(dev);
+
+	mutex_unlock(&dev->device_lock);
+
+	if (err == -ENODEV || dev->dev_state == MEI_DEV_DISABLED) {
+		dev_err(dev->dev, "device disabled = %d\n", err);
+		return -ENODEV;
+	}
+
+	/* try to start again */
+	if (err)
+		schedule_work(&dev->reset_work);
+
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_restart);
+
+static void mei_reset_work(struct work_struct *work)
+{
+	struct mei_device *dev =
+		container_of(work, struct mei_device,  reset_work);
+	int ret;
+
+	mei_clear_interrupts(dev);
+	mei_synchronize_irq(dev);
+
+	mutex_lock(&dev->device_lock);
+
+	ret = mei_reset(dev);
+
+	mutex_unlock(&dev->device_lock);
+
+	if (dev->dev_state == MEI_DEV_DISABLED) {
+		dev_err(dev->dev, "device disabled = %d\n", ret);
+		return;
+	}
+
+	/* retry reset in case of failure */
+	if (ret)
+		schedule_work(&dev->reset_work);
+}
+
+void mei_stop(struct mei_device *dev)
+{
+	dev_dbg(dev->dev, "stopping the device.\n");
+
+	mutex_lock(&dev->device_lock);
+	mei_set_devstate(dev, MEI_DEV_POWERING_DOWN);
+	mutex_unlock(&dev->device_lock);
+	mei_cl_bus_remove_devices(dev);
+	mutex_lock(&dev->device_lock);
+	mei_set_devstate(dev, MEI_DEV_POWER_DOWN);
+	mutex_unlock(&dev->device_lock);
+
+	mei_cancel_work(dev);
+
+	mei_clear_interrupts(dev);
+	mei_synchronize_irq(dev);
+	/* to catch HW-initiated reset */
+	mei_cancel_work(dev);
+
+	mutex_lock(&dev->device_lock);
+
+	mei_reset(dev);
+	/* move device to disabled state unconditionally */
+	mei_set_devstate(dev, MEI_DEV_DISABLED);
+
+	mutex_unlock(&dev->device_lock);
+}
+EXPORT_SYMBOL_GPL(mei_stop);
+
+/**
+ * mei_write_is_idle - check if the write queues are idle
+ *
+ * @dev: the device structure
+ *
+ * Return: true of there is no pending write
+ */
+bool mei_write_is_idle(struct mei_device *dev)
+{
+	bool idle = (dev->dev_state == MEI_DEV_ENABLED &&
+		list_empty(&dev->ctrl_wr_list) &&
+		list_empty(&dev->write_list)   &&
+		list_empty(&dev->write_waiting_list));
+
+	dev_dbg(dev->dev, "write pg: is idle[%d] state=%s ctrl=%01d write=%01d wwait=%01d\n",
+		idle,
+		mei_dev_state_str(dev->dev_state),
+		list_empty(&dev->ctrl_wr_list),
+		list_empty(&dev->write_list),
+		list_empty(&dev->write_waiting_list));
+
+	return idle;
+}
+EXPORT_SYMBOL_GPL(mei_write_is_idle);
+
+/**
+ * mei_device_init - initialize mei_device structure
+ *
+ * @dev: the mei device
+ * @device: the device structure
+ * @slow_fw: configure longer timeouts as FW is slow
+ * @hw_ops: hw operations
+ */
+void mei_device_init(struct mei_device *dev,
+		     struct device *device,
+		     bool slow_fw,
+		     const struct mei_hw_ops *hw_ops)
+{
+	/* setup our list array */
+	INIT_LIST_HEAD(&dev->file_list);
+	INIT_LIST_HEAD(&dev->device_list);
+	INIT_LIST_HEAD(&dev->me_clients);
+	mutex_init(&dev->device_lock);
+	init_rwsem(&dev->me_clients_rwsem);
+	mutex_init(&dev->cl_bus_lock);
+	init_waitqueue_head(&dev->wait_hw_ready);
+	init_waitqueue_head(&dev->wait_pg);
+	init_waitqueue_head(&dev->wait_hbm_start);
+	dev->dev_state = MEI_DEV_INITIALIZING;
+	dev->reset_count = 0;
+
+	INIT_LIST_HEAD(&dev->write_list);
+	INIT_LIST_HEAD(&dev->write_waiting_list);
+	INIT_LIST_HEAD(&dev->ctrl_wr_list);
+	INIT_LIST_HEAD(&dev->ctrl_rd_list);
+	dev->tx_queue_limit = MEI_TX_QUEUE_LIMIT_DEFAULT;
+
+	INIT_DELAYED_WORK(&dev->timer_work, mei_timer);
+	INIT_WORK(&dev->reset_work, mei_reset_work);
+	INIT_WORK(&dev->bus_rescan_work, mei_cl_bus_rescan_work);
+
+	bitmap_zero(dev->host_clients_map, MEI_CLIENTS_MAX);
+	dev->open_handle_count = 0;
+
+	dev->pxp_mode = MEI_DEV_PXP_DEFAULT;
+
+	/*
+	 * Reserving the first client ID
+	 * 0: Reserved for MEI Bus Message communications
+	 */
+	bitmap_set(dev->host_clients_map, 0, 1);
+
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	dev->ops      = hw_ops;
+	dev->dev      = device;
+
+	dev->timeouts.hw_ready = mei_secs_to_jiffies(MEI_HW_READY_TIMEOUT);
+	dev->timeouts.connect = MEI_CONNECT_TIMEOUT;
+	dev->timeouts.client_init = MEI_CLIENTS_INIT_TIMEOUT;
+	dev->timeouts.pgi = mei_secs_to_jiffies(MEI_PGI_TIMEOUT);
+	dev->timeouts.d0i3 = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT);
+	if (slow_fw) {
+		dev->timeouts.cl_connect = mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT_SLOW);
+		dev->timeouts.hbm = mei_secs_to_jiffies(MEI_HBM_TIMEOUT_SLOW);
+		dev->timeouts.mkhi_recv = msecs_to_jiffies(MKHI_RCV_TIMEOUT_SLOW);
+	} else {
+		dev->timeouts.cl_connect = mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT);
+		dev->timeouts.hbm = mei_secs_to_jiffies(MEI_HBM_TIMEOUT);
+		dev->timeouts.mkhi_recv = msecs_to_jiffies(MKHI_RCV_TIMEOUT);
+	}
+}
+EXPORT_SYMBOL_GPL(mei_device_init);
+
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
new file mode 100644
index 0000000000..0a0e984e56
--- /dev/null
+++ b/drivers/misc/mei/interrupt.c
@@ -0,0 +1,688 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2003-2018, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/export.h>
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "hbm.h"
+#include "client.h"
+
+
+/**
+ * mei_irq_compl_handler - dispatch complete handlers
+ *	for the completed callbacks
+ *
+ * @dev: mei device
+ * @cmpl_list: list of completed cbs
+ */
+void mei_irq_compl_handler(struct mei_device *dev, struct list_head *cmpl_list)
+{
+	struct mei_cl_cb *cb, *next;
+	struct mei_cl *cl;
+
+	list_for_each_entry_safe(cb, next, cmpl_list, list) {
+		cl = cb->cl;
+		list_del_init(&cb->list);
+
+		dev_dbg(dev->dev, "completing call back.\n");
+		mei_cl_complete(cl, cb);
+	}
+}
+EXPORT_SYMBOL_GPL(mei_irq_compl_handler);
+
+/**
+ * mei_cl_hbm_equal - check if hbm is addressed to the client
+ *
+ * @cl: host client
+ * @mei_hdr: header of mei client message
+ *
+ * Return: true if matches, false otherwise
+ */
+static inline int mei_cl_hbm_equal(struct mei_cl *cl,
+			struct mei_msg_hdr *mei_hdr)
+{
+	return  mei_cl_host_addr(cl) == mei_hdr->host_addr &&
+		mei_cl_me_id(cl) == mei_hdr->me_addr;
+}
+
+/**
+ * mei_irq_discard_msg  - discard received message
+ *
+ * @dev: mei device
+ * @hdr: message header
+ * @discard_len: the length of the message to discard (excluding header)
+ */
+static void mei_irq_discard_msg(struct mei_device *dev, struct mei_msg_hdr *hdr,
+				size_t discard_len)
+{
+	if (hdr->dma_ring) {
+		mei_dma_ring_read(dev, NULL,
+				  hdr->extension[dev->rd_msg_hdr_count - 2]);
+		discard_len = 0;
+	}
+	/*
+	 * no need to check for size as it is guarantied
+	 * that length fits into rd_msg_buf
+	 */
+	mei_read_slots(dev, dev->rd_msg_buf, discard_len);
+	dev_dbg(dev->dev, "discarding message " MEI_HDR_FMT "\n",
+		MEI_HDR_PRM(hdr));
+}
+
+/**
+ * mei_cl_irq_read_msg - process client message
+ *
+ * @cl: reading client
+ * @mei_hdr: header of mei client message
+ * @meta: extend meta header
+ * @cmpl_list: completion list
+ *
+ * Return: always 0
+ */
+static int mei_cl_irq_read_msg(struct mei_cl *cl,
+			       struct mei_msg_hdr *mei_hdr,
+			       struct mei_ext_meta_hdr *meta,
+			       struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	struct mei_cl_cb *cb;
+
+	struct mei_ext_hdr_vtag *vtag_hdr = NULL;
+	struct mei_ext_hdr_gsc_f2h *gsc_f2h = NULL;
+
+	size_t buf_sz;
+	u32 length;
+	u32 ext_len;
+
+	length = mei_hdr->length;
+	ext_len = 0;
+	if (mei_hdr->extended) {
+		ext_len = sizeof(*meta) + mei_slots2data(meta->size);
+		length -= ext_len;
+	}
+
+	cb = list_first_entry_or_null(&cl->rd_pending, struct mei_cl_cb, list);
+	if (!cb) {
+		if (!mei_cl_is_fixed_address(cl)) {
+			cl_err(dev, cl, "pending read cb not found\n");
+			goto discard;
+		}
+		cb = mei_cl_alloc_cb(cl, mei_cl_mtu(cl), MEI_FOP_READ, cl->fp);
+		if (!cb)
+			goto discard;
+		list_add_tail(&cb->list, &cl->rd_pending);
+	}
+
+	if (mei_hdr->extended) {
+		struct mei_ext_hdr *ext = mei_ext_begin(meta);
+		do {
+			switch (ext->type) {
+			case MEI_EXT_HDR_VTAG:
+				vtag_hdr = (struct mei_ext_hdr_vtag *)ext;
+				break;
+			case MEI_EXT_HDR_GSC:
+				gsc_f2h = (struct mei_ext_hdr_gsc_f2h *)ext;
+				cb->ext_hdr = kzalloc(sizeof(*gsc_f2h), GFP_KERNEL);
+				if (!cb->ext_hdr) {
+					cb->status = -ENOMEM;
+					goto discard;
+				}
+				break;
+			case MEI_EXT_HDR_NONE:
+				fallthrough;
+			default:
+				cl_err(dev, cl, "unknown extended header\n");
+				cb->status = -EPROTO;
+				break;
+			}
+
+			ext = mei_ext_next(ext);
+		} while (!mei_ext_last(meta, ext));
+
+		if (!vtag_hdr && !gsc_f2h) {
+			cl_dbg(dev, cl, "no vtag or gsc found in extended header.\n");
+			cb->status = -EPROTO;
+			goto discard;
+		}
+	}
+
+	if (vtag_hdr) {
+		cl_dbg(dev, cl, "vtag: %d\n", vtag_hdr->vtag);
+		if (cb->vtag && cb->vtag != vtag_hdr->vtag) {
+			cl_err(dev, cl, "mismatched tag: %d != %d\n",
+			       cb->vtag, vtag_hdr->vtag);
+			cb->status = -EPROTO;
+			goto discard;
+		}
+		cb->vtag = vtag_hdr->vtag;
+	}
+
+	if (gsc_f2h) {
+		u32 ext_hdr_len = mei_ext_hdr_len(&gsc_f2h->hdr);
+
+		if (!dev->hbm_f_gsc_supported) {
+			cl_err(dev, cl, "gsc extended header is not supported\n");
+			cb->status = -EPROTO;
+			goto discard;
+		}
+
+		if (length) {
+			cl_err(dev, cl, "no data allowed in cb with gsc\n");
+			cb->status = -EPROTO;
+			goto discard;
+		}
+		if (ext_hdr_len > sizeof(*gsc_f2h)) {
+			cl_err(dev, cl, "gsc extended header is too big %u\n", ext_hdr_len);
+			cb->status = -EPROTO;
+			goto discard;
+		}
+		memcpy(cb->ext_hdr, gsc_f2h, ext_hdr_len);
+	}
+
+	if (!mei_cl_is_connected(cl)) {
+		cl_dbg(dev, cl, "not connected\n");
+		cb->status = -ENODEV;
+		goto discard;
+	}
+
+	if (mei_hdr->dma_ring)
+		length = mei_hdr->extension[mei_data2slots(ext_len)];
+
+	buf_sz = length + cb->buf_idx;
+	/* catch for integer overflow */
+	if (buf_sz < cb->buf_idx) {
+		cl_err(dev, cl, "message is too big len %d idx %zu\n",
+		       length, cb->buf_idx);
+		cb->status = -EMSGSIZE;
+		goto discard;
+	}
+
+	if (cb->buf.size < buf_sz) {
+		cl_dbg(dev, cl, "message overflow. size %zu len %d idx %zu\n",
+			cb->buf.size, length, cb->buf_idx);
+		cb->status = -EMSGSIZE;
+		goto discard;
+	}
+
+	if (mei_hdr->dma_ring) {
+		mei_dma_ring_read(dev, cb->buf.data + cb->buf_idx, length);
+		/*  for DMA read 0 length to generate interrupt to the device */
+		mei_read_slots(dev, cb->buf.data + cb->buf_idx, 0);
+	} else {
+		mei_read_slots(dev, cb->buf.data + cb->buf_idx, length);
+	}
+
+	cb->buf_idx += length;
+
+	if (mei_hdr->msg_complete) {
+		cl_dbg(dev, cl, "completed read length = %zu\n", cb->buf_idx);
+		list_move_tail(&cb->list, cmpl_list);
+	} else {
+		pm_runtime_mark_last_busy(dev->dev);
+		pm_request_autosuspend(dev->dev);
+	}
+
+	return 0;
+
+discard:
+	if (cb)
+		list_move_tail(&cb->list, cmpl_list);
+	mei_irq_discard_msg(dev, mei_hdr, length);
+	return 0;
+}
+
+/**
+ * mei_cl_irq_disconnect_rsp - send disconnection response message
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_irq_disconnect_rsp(struct mei_cl *cl, struct mei_cl_cb *cb,
+				     struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_response));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_hbm_cl_disconnect_rsp(dev, cl);
+	list_move_tail(&cb->list, cmpl_list);
+
+	return ret;
+}
+
+/**
+ * mei_cl_irq_read - processes client read related operation from the
+ *	interrupt thread context - request for flow control credits
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb,
+			   struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	if (!list_empty(&cl->rd_pending))
+		return 0;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_flow_control));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_hbm_cl_flow_control_req(dev, cl);
+	if (ret) {
+		cl->status = ret;
+		cb->buf_idx = 0;
+		list_move_tail(&cb->list, cmpl_list);
+		return ret;
+	}
+
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_request_autosuspend(dev->dev);
+
+	list_move_tail(&cb->list, &cl->rd_pending);
+
+	return 0;
+}
+
+static inline bool hdr_is_hbm(struct mei_msg_hdr *mei_hdr)
+{
+	return mei_hdr->host_addr == 0 && mei_hdr->me_addr == 0;
+}
+
+static inline bool hdr_is_fixed(struct mei_msg_hdr *mei_hdr)
+{
+	return mei_hdr->host_addr == 0 && mei_hdr->me_addr != 0;
+}
+
+static inline int hdr_is_valid(u32 msg_hdr)
+{
+	struct mei_msg_hdr *mei_hdr;
+	u32 expected_len = 0;
+
+	mei_hdr = (struct mei_msg_hdr *)&msg_hdr;
+	if (!msg_hdr || mei_hdr->reserved)
+		return -EBADMSG;
+
+	if (mei_hdr->dma_ring)
+		expected_len += MEI_SLOT_SIZE;
+	if (mei_hdr->extended)
+		expected_len += MEI_SLOT_SIZE;
+	if (mei_hdr->length < expected_len)
+		return -EBADMSG;
+
+	return 0;
+}
+
+/**
+ * mei_irq_read_handler - bottom half read routine after ISR to
+ * handle the read processing.
+ *
+ * @dev: the device structure
+ * @cmpl_list: An instance of our list structure
+ * @slots: slots to read.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_irq_read_handler(struct mei_device *dev,
+			 struct list_head *cmpl_list, s32 *slots)
+{
+	struct mei_msg_hdr *mei_hdr;
+	struct mei_ext_meta_hdr *meta_hdr = NULL;
+	struct mei_cl *cl;
+	int ret;
+	u32 hdr_size_left;
+	u32 hdr_size_ext;
+	int i;
+	int ext_hdr_end;
+
+	if (!dev->rd_msg_hdr[0]) {
+		dev->rd_msg_hdr[0] = mei_read_hdr(dev);
+		dev->rd_msg_hdr_count = 1;
+		(*slots)--;
+		dev_dbg(dev->dev, "slots =%08x.\n", *slots);
+
+		ret = hdr_is_valid(dev->rd_msg_hdr[0]);
+		if (ret) {
+			dev_err(dev->dev, "corrupted message header 0x%08X\n",
+				dev->rd_msg_hdr[0]);
+			goto end;
+		}
+	}
+
+	mei_hdr = (struct mei_msg_hdr *)dev->rd_msg_hdr;
+	dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM(mei_hdr));
+
+	if (mei_slots2data(*slots) < mei_hdr->length) {
+		dev_err(dev->dev, "less data available than length=%08x.\n",
+				*slots);
+		/* we can't read the message */
+		ret = -ENODATA;
+		goto end;
+	}
+
+	ext_hdr_end = 1;
+	hdr_size_left = mei_hdr->length;
+
+	if (mei_hdr->extended) {
+		if (!dev->rd_msg_hdr[1]) {
+			dev->rd_msg_hdr[1] = mei_read_hdr(dev);
+			dev->rd_msg_hdr_count++;
+			(*slots)--;
+			dev_dbg(dev->dev, "extended header is %08x\n", dev->rd_msg_hdr[1]);
+		}
+		meta_hdr = ((struct mei_ext_meta_hdr *)&dev->rd_msg_hdr[1]);
+		if (check_add_overflow((u32)sizeof(*meta_hdr),
+				       mei_slots2data(meta_hdr->size),
+				       &hdr_size_ext)) {
+			dev_err(dev->dev, "extended message size too big %d\n",
+				meta_hdr->size);
+			return -EBADMSG;
+		}
+		if (hdr_size_left < hdr_size_ext) {
+			dev_err(dev->dev, "corrupted message header len %d\n",
+				mei_hdr->length);
+			return -EBADMSG;
+		}
+		hdr_size_left -= hdr_size_ext;
+
+		ext_hdr_end = meta_hdr->size + 2;
+		for (i = dev->rd_msg_hdr_count; i < ext_hdr_end; i++) {
+			dev->rd_msg_hdr[i] = mei_read_hdr(dev);
+			dev_dbg(dev->dev, "extended header %d is %08x\n", i,
+				dev->rd_msg_hdr[i]);
+			dev->rd_msg_hdr_count++;
+			(*slots)--;
+		}
+	}
+
+	if (mei_hdr->dma_ring) {
+		if (hdr_size_left != sizeof(dev->rd_msg_hdr[ext_hdr_end])) {
+			dev_err(dev->dev, "corrupted message header len %d\n",
+				mei_hdr->length);
+			return -EBADMSG;
+		}
+
+		dev->rd_msg_hdr[ext_hdr_end] = mei_read_hdr(dev);
+		dev->rd_msg_hdr_count++;
+		(*slots)--;
+		mei_hdr->length -= sizeof(dev->rd_msg_hdr[ext_hdr_end]);
+	}
+
+	/*  HBM message */
+	if (hdr_is_hbm(mei_hdr)) {
+		ret = mei_hbm_dispatch(dev, mei_hdr);
+		if (ret) {
+			dev_dbg(dev->dev, "mei_hbm_dispatch failed ret = %d\n",
+					ret);
+			goto end;
+		}
+		goto reset_slots;
+	}
+
+	/* find recipient cl */
+	list_for_each_entry(cl, &dev->file_list, link) {
+		if (mei_cl_hbm_equal(cl, mei_hdr)) {
+			cl_dbg(dev, cl, "got a message\n");
+			ret = mei_cl_irq_read_msg(cl, mei_hdr, meta_hdr, cmpl_list);
+			goto reset_slots;
+		}
+	}
+
+	/* if no recipient cl was found we assume corrupted header */
+	/* A message for not connected fixed address clients
+	 * should be silently discarded
+	 * On power down client may be force cleaned,
+	 * silently discard such messages
+	 */
+	if (hdr_is_fixed(mei_hdr) ||
+	    dev->dev_state == MEI_DEV_POWER_DOWN) {
+		mei_irq_discard_msg(dev, mei_hdr, mei_hdr->length);
+		ret = 0;
+		goto reset_slots;
+	}
+	dev_err(dev->dev, "no destination client found 0x%08X\n", dev->rd_msg_hdr[0]);
+	ret = -EBADMSG;
+	goto end;
+
+reset_slots:
+	/* reset the number of slots and header */
+	memset(dev->rd_msg_hdr, 0, sizeof(dev->rd_msg_hdr));
+	dev->rd_msg_hdr_count = 0;
+	*slots = mei_count_full_read_slots(dev);
+	if (*slots == -EOVERFLOW) {
+		/* overflow - reset */
+		dev_err(dev->dev, "resetting due to slots overflow.\n");
+		/* set the event since message has been read */
+		ret = -ERANGE;
+		goto end;
+	}
+end:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mei_irq_read_handler);
+
+
+/**
+ * mei_irq_write_handler -  dispatch write requests
+ *  after irq received
+ *
+ * @dev: the device structure
+ * @cmpl_list: An instance of our list structure
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int mei_irq_write_handler(struct mei_device *dev, struct list_head *cmpl_list)
+{
+
+	struct mei_cl *cl;
+	struct mei_cl_cb *cb, *next;
+	s32 slots;
+	int ret;
+
+
+	if (!mei_hbuf_acquire(dev))
+		return 0;
+
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if (slots == 0)
+		return -EMSGSIZE;
+
+	/* complete all waiting for write CB */
+	dev_dbg(dev->dev, "complete all waiting for write cb.\n");
+
+	list_for_each_entry_safe(cb, next, &dev->write_waiting_list, list) {
+		cl = cb->cl;
+
+		cl->status = 0;
+		cl_dbg(dev, cl, "MEI WRITE COMPLETE\n");
+		cl->writing_state = MEI_WRITE_COMPLETE;
+		list_move_tail(&cb->list, cmpl_list);
+	}
+
+	/* complete control write list CB */
+	dev_dbg(dev->dev, "complete control write list cb.\n");
+	list_for_each_entry_safe(cb, next, &dev->ctrl_wr_list, list) {
+		cl = cb->cl;
+		switch (cb->fop_type) {
+		case MEI_FOP_DISCONNECT:
+			/* send disconnect message */
+			ret = mei_cl_irq_disconnect(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+
+			break;
+		case MEI_FOP_READ:
+			/* send flow control message */
+			ret = mei_cl_irq_read(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+
+			break;
+		case MEI_FOP_CONNECT:
+			/* connect message */
+			ret = mei_cl_irq_connect(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+
+			break;
+		case MEI_FOP_DISCONNECT_RSP:
+			/* send disconnect resp */
+			ret = mei_cl_irq_disconnect_rsp(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+			break;
+
+		case MEI_FOP_NOTIFY_START:
+		case MEI_FOP_NOTIFY_STOP:
+			ret = mei_cl_irq_notify(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+			break;
+		case MEI_FOP_DMA_MAP:
+			ret = mei_cl_irq_dma_map(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+			break;
+		case MEI_FOP_DMA_UNMAP:
+			ret = mei_cl_irq_dma_unmap(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+			break;
+		default:
+			BUG();
+		}
+
+	}
+	/* complete  write list CB */
+	dev_dbg(dev->dev, "complete write list cb.\n");
+	list_for_each_entry_safe(cb, next, &dev->write_list, list) {
+		cl = cb->cl;
+		ret = mei_cl_irq_write(cl, cb, cmpl_list);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mei_irq_write_handler);
+
+
+/**
+ * mei_connect_timeout  - connect/disconnect timeouts
+ *
+ * @cl: host client
+ */
+static void mei_connect_timeout(struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+
+	if (cl->state == MEI_FILE_CONNECTING) {
+		if (dev->hbm_f_dot_supported) {
+			cl->state = MEI_FILE_DISCONNECT_REQUIRED;
+			wake_up(&cl->wait);
+			return;
+		}
+	}
+	mei_reset(dev);
+}
+
+#define MEI_STALL_TIMER_FREQ (2 * HZ)
+/**
+ * mei_schedule_stall_timer - re-arm stall_timer work
+ *
+ * Schedule stall timer
+ *
+ * @dev: the device structure
+ */
+void mei_schedule_stall_timer(struct mei_device *dev)
+{
+	schedule_delayed_work(&dev->timer_work, MEI_STALL_TIMER_FREQ);
+}
+
+/**
+ * mei_timer - timer function.
+ *
+ * @work: pointer to the work_struct structure
+ *
+ */
+void mei_timer(struct work_struct *work)
+{
+	struct mei_cl *cl;
+	struct mei_device *dev = container_of(work,
+					struct mei_device, timer_work.work);
+	bool reschedule_timer = false;
+
+	mutex_lock(&dev->device_lock);
+
+	/* Catch interrupt stalls during HBM init handshake */
+	if (dev->dev_state == MEI_DEV_INIT_CLIENTS &&
+	    dev->hbm_state != MEI_HBM_IDLE) {
+
+		if (dev->init_clients_timer) {
+			if (--dev->init_clients_timer == 0) {
+				dev_err(dev->dev, "timer: init clients timeout hbm_state = %d.\n",
+					dev->hbm_state);
+				mei_reset(dev);
+				goto out;
+			}
+			reschedule_timer = true;
+		}
+	}
+
+	if (dev->dev_state != MEI_DEV_ENABLED)
+		goto out;
+
+	/*** connect/disconnect timeouts ***/
+	list_for_each_entry(cl, &dev->file_list, link) {
+		if (cl->timer_count) {
+			if (--cl->timer_count == 0) {
+				dev_err(dev->dev, "timer: connect/disconnect timeout.\n");
+				mei_connect_timeout(cl);
+				goto out;
+			}
+			reschedule_timer = true;
+		}
+	}
+
+out:
+	if (dev->dev_state != MEI_DEV_DISABLED && reschedule_timer)
+		mei_schedule_stall_timer(dev);
+
+	mutex_unlock(&dev->device_lock);
+}
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
new file mode 100644
index 0000000000..bb4e9eabda
--- /dev/null
+++ b/drivers/misc/mei/main.c
@@ -0,0 +1,1318 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2003-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/sched/signal.h>
+#include <linux/compat.h>
+#include <linux/jiffies.h>
+#include <linux/interrupt.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+static const struct class mei_class = {
+	.name = "mei",
+};
+
+static dev_t mei_devt;
+#define MEI_MAX_DEVS  MINORMASK
+static DEFINE_MUTEX(mei_minor_lock);
+static DEFINE_IDR(mei_idr);
+
+/**
+ * mei_open - the open function
+ *
+ * @inode: pointer to inode structure
+ * @file: pointer to file structure
+ *
+ * Return: 0 on success, <0 on error
+ */
+static int mei_open(struct inode *inode, struct file *file)
+{
+	struct mei_device *dev;
+	struct mei_cl *cl;
+
+	int err;
+
+	dev = container_of(inode->i_cdev, struct mei_device, cdev);
+
+	mutex_lock(&dev->device_lock);
+
+	if (dev->dev_state != MEI_DEV_ENABLED) {
+		dev_dbg(dev->dev, "dev_state != MEI_ENABLED  dev_state = %s\n",
+		    mei_dev_state_str(dev->dev_state));
+		err = -ENODEV;
+		goto err_unlock;
+	}
+
+	cl = mei_cl_alloc_linked(dev);
+	if (IS_ERR(cl)) {
+		err = PTR_ERR(cl);
+		goto err_unlock;
+	}
+
+	cl->fp = file;
+	file->private_data = cl;
+
+	mutex_unlock(&dev->device_lock);
+
+	return nonseekable_open(inode, file);
+
+err_unlock:
+	mutex_unlock(&dev->device_lock);
+	return err;
+}
+
+/**
+ * mei_cl_vtag_remove_by_fp - remove vtag that corresponds to fp from list
+ *
+ * @cl: host client
+ * @fp: pointer to file structure
+ *
+ */
+static void mei_cl_vtag_remove_by_fp(const struct mei_cl *cl,
+				     const struct file *fp)
+{
+	struct mei_cl_vtag *vtag_l, *next;
+
+	list_for_each_entry_safe(vtag_l, next, &cl->vtag_map, list) {
+		if (vtag_l->fp == fp) {
+			list_del(&vtag_l->list);
+			kfree(vtag_l);
+			return;
+		}
+	}
+}
+
+/**
+ * mei_release - the release function
+ *
+ * @inode: pointer to inode structure
+ * @file: pointer to file structure
+ *
+ * Return: 0 on success, <0 on error
+ */
+static int mei_release(struct inode *inode, struct file *file)
+{
+	struct mei_cl *cl = file->private_data;
+	struct mei_device *dev;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	mutex_lock(&dev->device_lock);
+
+	mei_cl_vtag_remove_by_fp(cl, file);
+
+	if (!list_empty(&cl->vtag_map)) {
+		cl_dbg(dev, cl, "not the last vtag\n");
+		mei_cl_flush_queues(cl, file);
+		rets = 0;
+		goto out;
+	}
+
+	rets = mei_cl_disconnect(cl);
+	/*
+	 * Check again: This is necessary since disconnect releases the lock
+	 * and another client can connect in the meantime.
+	 */
+	if (!list_empty(&cl->vtag_map)) {
+		cl_dbg(dev, cl, "not the last vtag after disconnect\n");
+		mei_cl_flush_queues(cl, file);
+		goto out;
+	}
+
+	mei_cl_flush_queues(cl, NULL);
+	cl_dbg(dev, cl, "removing\n");
+
+	mei_cl_unlink(cl);
+	kfree(cl);
+
+out:
+	file->private_data = NULL;
+
+	mutex_unlock(&dev->device_lock);
+	return rets;
+}
+
+
+/**
+ * mei_read - the read function.
+ *
+ * @file: pointer to file structure
+ * @ubuf: pointer to user buffer
+ * @length: buffer length
+ * @offset: data offset in buffer
+ *
+ * Return: >=0 data length on success , <0 on error
+ */
+static ssize_t mei_read(struct file *file, char __user *ubuf,
+			size_t length, loff_t *offset)
+{
+	struct mei_cl *cl = file->private_data;
+	struct mei_device *dev;
+	struct mei_cl_cb *cb = NULL;
+	bool nonblock = !!(file->f_flags & O_NONBLOCK);
+	ssize_t rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+
+	mutex_lock(&dev->device_lock);
+	if (dev->dev_state != MEI_DEV_ENABLED) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	if (length == 0) {
+		rets = 0;
+		goto out;
+	}
+
+	if (ubuf == NULL) {
+		rets = -EMSGSIZE;
+		goto out;
+	}
+
+	cb = mei_cl_read_cb(cl, file);
+	if (cb)
+		goto copy_buffer;
+
+	if (*offset > 0)
+		*offset = 0;
+
+	rets = mei_cl_read_start(cl, length, file);
+	if (rets && rets != -EBUSY) {
+		cl_dbg(dev, cl, "mei start read failure status = %zd\n", rets);
+		goto out;
+	}
+
+	if (nonblock) {
+		rets = -EAGAIN;
+		goto out;
+	}
+
+	mutex_unlock(&dev->device_lock);
+	if (wait_event_interruptible(cl->rx_wait,
+				     mei_cl_read_cb(cl, file) ||
+				     !mei_cl_is_connected(cl))) {
+		if (signal_pending(current))
+			return -EINTR;
+		return -ERESTARTSYS;
+	}
+	mutex_lock(&dev->device_lock);
+
+	if (!mei_cl_is_connected(cl)) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	cb = mei_cl_read_cb(cl, file);
+	if (!cb) {
+		rets = 0;
+		goto out;
+	}
+
+copy_buffer:
+	/* now copy the data to user space */
+	if (cb->status) {
+		rets = cb->status;
+		cl_dbg(dev, cl, "read operation failed %zd\n", rets);
+		goto free;
+	}
+
+	cl_dbg(dev, cl, "buf.size = %zu buf.idx = %zu offset = %lld\n",
+	       cb->buf.size, cb->buf_idx, *offset);
+	if (*offset >= cb->buf_idx) {
+		rets = 0;
+		goto free;
+	}
+
+	/* length is being truncated to PAGE_SIZE,
+	 * however buf_idx may point beyond that */
+	length = min_t(size_t, length, cb->buf_idx - *offset);
+
+	if (copy_to_user(ubuf, cb->buf.data + *offset, length)) {
+		dev_dbg(dev->dev, "failed to copy data to userland\n");
+		rets = -EFAULT;
+		goto free;
+	}
+
+	rets = length;
+	*offset += length;
+	/* not all data was read, keep the cb */
+	if (*offset < cb->buf_idx)
+		goto out;
+
+free:
+	mei_cl_del_rd_completed(cl, cb);
+	*offset = 0;
+
+out:
+	cl_dbg(dev, cl, "end mei read rets = %zd\n", rets);
+	mutex_unlock(&dev->device_lock);
+	return rets;
+}
+
+/**
+ * mei_cl_vtag_by_fp - obtain the vtag by file pointer
+ *
+ * @cl: host client
+ * @fp: pointer to file structure
+ *
+ * Return: vtag value on success, otherwise 0
+ */
+static u8 mei_cl_vtag_by_fp(const struct mei_cl *cl, const struct file *fp)
+{
+	struct mei_cl_vtag *cl_vtag;
+
+	if (!fp)
+		return 0;
+
+	list_for_each_entry(cl_vtag, &cl->vtag_map, list)
+		if (cl_vtag->fp == fp)
+			return cl_vtag->vtag;
+	return 0;
+}
+
+/**
+ * mei_write - the write function.
+ *
+ * @file: pointer to file structure
+ * @ubuf: pointer to user buffer
+ * @length: buffer length
+ * @offset: data offset in buffer
+ *
+ * Return: >=0 data length on success , <0 on error
+ */
+static ssize_t mei_write(struct file *file, const char __user *ubuf,
+			 size_t length, loff_t *offset)
+{
+	struct mei_cl *cl = file->private_data;
+	struct mei_cl_cb *cb;
+	struct mei_device *dev;
+	ssize_t rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	mutex_lock(&dev->device_lock);
+
+	if (dev->dev_state != MEI_DEV_ENABLED) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	if (!mei_cl_is_connected(cl)) {
+		cl_err(dev, cl, "is not connected");
+		rets = -ENODEV;
+		goto out;
+	}
+
+	if (!mei_me_cl_is_active(cl->me_cl)) {
+		rets = -ENOTTY;
+		goto out;
+	}
+
+	if (length > mei_cl_mtu(cl)) {
+		rets = -EFBIG;
+		goto out;
+	}
+
+	if (length == 0) {
+		rets = 0;
+		goto out;
+	}
+
+	while (cl->tx_cb_queued >= dev->tx_queue_limit) {
+		if (file->f_flags & O_NONBLOCK) {
+			rets = -EAGAIN;
+			goto out;
+		}
+		mutex_unlock(&dev->device_lock);
+		rets = wait_event_interruptible(cl->tx_wait,
+				cl->writing_state == MEI_WRITE_COMPLETE ||
+				(!mei_cl_is_connected(cl)));
+		mutex_lock(&dev->device_lock);
+		if (rets) {
+			if (signal_pending(current))
+				rets = -EINTR;
+			goto out;
+		}
+		if (!mei_cl_is_connected(cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+	}
+
+	cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, file);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+	cb->vtag = mei_cl_vtag_by_fp(cl, file);
+
+	rets = copy_from_user(cb->buf.data, ubuf, length);
+	if (rets) {
+		dev_dbg(dev->dev, "failed to copy data from userland\n");
+		rets = -EFAULT;
+		mei_io_cb_free(cb);
+		goto out;
+	}
+
+	rets = mei_cl_write(cl, cb, MAX_SCHEDULE_TIMEOUT);
+out:
+	mutex_unlock(&dev->device_lock);
+	return rets;
+}
+
+/**
+ * mei_ioctl_connect_client - the connect to fw client IOCTL function
+ *
+ * @file: private data of the file object
+ * @in_client_uuid: requested UUID for connection
+ * @client: IOCTL connect data, output parameters
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_ioctl_connect_client(struct file *file,
+				    const uuid_le *in_client_uuid,
+				    struct mei_client *client)
+{
+	struct mei_device *dev;
+	struct mei_me_client *me_cl;
+	struct mei_cl *cl;
+	int rets;
+
+	cl = file->private_data;
+	dev = cl->dev;
+
+	if (cl->state != MEI_FILE_INITIALIZING &&
+	    cl->state != MEI_FILE_DISCONNECTED)
+		return  -EBUSY;
+
+	/* find ME client we're trying to connect to */
+	me_cl = mei_me_cl_by_uuid(dev, in_client_uuid);
+	if (!me_cl) {
+		dev_dbg(dev->dev, "Cannot connect to FW Client UUID = %pUl\n",
+			in_client_uuid);
+		rets = -ENOTTY;
+		goto end;
+	}
+
+	if (me_cl->props.fixed_address) {
+		bool forbidden = dev->override_fixed_address ?
+			 !dev->allow_fixed_address : !dev->hbm_f_fa_supported;
+		if (forbidden) {
+			dev_dbg(dev->dev, "Connection forbidden to FW Client UUID = %pUl\n",
+				in_client_uuid);
+			rets = -ENOTTY;
+			goto end;
+		}
+	}
+
+	dev_dbg(dev->dev, "Connect to FW Client ID = %d\n",
+			me_cl->client_id);
+	dev_dbg(dev->dev, "FW Client - Protocol Version = %d\n",
+			me_cl->props.protocol_version);
+	dev_dbg(dev->dev, "FW Client - Max Msg Len = %d\n",
+			me_cl->props.max_msg_length);
+
+	/* prepare the output buffer */
+	client->max_msg_length = me_cl->props.max_msg_length;
+	client->protocol_version = me_cl->props.protocol_version;
+	dev_dbg(dev->dev, "Can connect?\n");
+
+	rets = mei_cl_connect(cl, me_cl, file);
+
+end:
+	mei_me_cl_put(me_cl);
+	return rets;
+}
+
+/**
+ * mei_vt_support_check - check if client support vtags
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * @dev: mei_device
+ * @uuid: client UUID
+ *
+ * Return:
+ *	0 - supported
+ *	-ENOTTY - no such client
+ *	-EOPNOTSUPP - vtags are not supported by client
+ */
+static int mei_vt_support_check(struct mei_device *dev, const uuid_le *uuid)
+{
+	struct mei_me_client *me_cl;
+	int ret;
+
+	if (!dev->hbm_f_vt_supported)
+		return -EOPNOTSUPP;
+
+	me_cl = mei_me_cl_by_uuid(dev, uuid);
+	if (!me_cl) {
+		dev_dbg(dev->dev, "Cannot connect to FW Client UUID = %pUl\n",
+			uuid);
+		return -ENOTTY;
+	}
+	ret = me_cl->props.vt_supported ? 0 : -EOPNOTSUPP;
+	mei_me_cl_put(me_cl);
+
+	return ret;
+}
+
+/**
+ * mei_ioctl_connect_vtag - connect to fw client with vtag IOCTL function
+ *
+ * @file: private data of the file object
+ * @in_client_uuid: requested UUID for connection
+ * @client: IOCTL connect data, output parameters
+ * @vtag: vm tag
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_ioctl_connect_vtag(struct file *file,
+				  const uuid_le *in_client_uuid,
+				  struct mei_client *client,
+				  u8 vtag)
+{
+	struct mei_device *dev;
+	struct mei_cl *cl;
+	struct mei_cl *pos;
+	struct mei_cl_vtag *cl_vtag;
+
+	cl = file->private_data;
+	dev = cl->dev;
+
+	dev_dbg(dev->dev, "FW Client %pUl vtag %d\n", in_client_uuid, vtag);
+
+	switch (cl->state) {
+	case MEI_FILE_DISCONNECTED:
+		if (mei_cl_vtag_by_fp(cl, file) != vtag) {
+			dev_err(dev->dev, "reconnect with different vtag\n");
+			return -EINVAL;
+		}
+		break;
+	case MEI_FILE_INITIALIZING:
+		/* malicious connect from another thread may push vtag */
+		if (!IS_ERR(mei_cl_fp_by_vtag(cl, vtag))) {
+			dev_err(dev->dev, "vtag already filled\n");
+			return -EINVAL;
+		}
+
+		list_for_each_entry(pos, &dev->file_list, link) {
+			if (pos == cl)
+				continue;
+			if (!pos->me_cl)
+				continue;
+
+			/* only search for same UUID */
+			if (uuid_le_cmp(*mei_cl_uuid(pos), *in_client_uuid))
+				continue;
+
+			/* if tag already exist try another fp */
+			if (!IS_ERR(mei_cl_fp_by_vtag(pos, vtag)))
+				continue;
+
+			/* replace cl with acquired one */
+			dev_dbg(dev->dev, "replacing with existing cl\n");
+			mei_cl_unlink(cl);
+			kfree(cl);
+			file->private_data = pos;
+			cl = pos;
+			break;
+		}
+
+		cl_vtag = mei_cl_vtag_alloc(file, vtag);
+		if (IS_ERR(cl_vtag))
+			return -ENOMEM;
+
+		list_add_tail(&cl_vtag->list, &cl->vtag_map);
+		break;
+	default:
+		return -EBUSY;
+	}
+
+	while (cl->state != MEI_FILE_INITIALIZING &&
+	       cl->state != MEI_FILE_DISCONNECTED &&
+	       cl->state != MEI_FILE_CONNECTED) {
+		mutex_unlock(&dev->device_lock);
+		wait_event_timeout(cl->wait,
+				   (cl->state == MEI_FILE_CONNECTED ||
+				    cl->state == MEI_FILE_DISCONNECTED ||
+				    cl->state == MEI_FILE_DISCONNECT_REQUIRED ||
+				    cl->state == MEI_FILE_DISCONNECT_REPLY),
+				   dev->timeouts.cl_connect);
+		mutex_lock(&dev->device_lock);
+	}
+
+	if (!mei_cl_is_connected(cl))
+		return mei_ioctl_connect_client(file, in_client_uuid, client);
+
+	client->max_msg_length = cl->me_cl->props.max_msg_length;
+	client->protocol_version = cl->me_cl->props.protocol_version;
+
+	return 0;
+}
+
+/**
+ * mei_ioctl_client_notify_request -
+ *     propagate event notification request to client
+ *
+ * @file: pointer to file structure
+ * @request: 0 - disable, 1 - enable
+ *
+ * Return: 0 on success , <0 on error
+ */
+static int mei_ioctl_client_notify_request(const struct file *file, u32 request)
+{
+	struct mei_cl *cl = file->private_data;
+
+	if (request != MEI_HBM_NOTIFICATION_START &&
+	    request != MEI_HBM_NOTIFICATION_STOP)
+		return -EINVAL;
+
+	return mei_cl_notify_request(cl, file, (u8)request);
+}
+
+/**
+ * mei_ioctl_client_notify_get -  wait for notification request
+ *
+ * @file: pointer to file structure
+ * @notify_get: 0 - disable, 1 - enable
+ *
+ * Return: 0 on success , <0 on error
+ */
+static int mei_ioctl_client_notify_get(const struct file *file, u32 *notify_get)
+{
+	struct mei_cl *cl = file->private_data;
+	bool notify_ev;
+	bool block = (file->f_flags & O_NONBLOCK) == 0;
+	int rets;
+
+	rets = mei_cl_notify_get(cl, block, &notify_ev);
+	if (rets)
+		return rets;
+
+	*notify_get = notify_ev ? 1 : 0;
+	return 0;
+}
+
+/**
+ * mei_ioctl - the IOCTL function
+ *
+ * @file: pointer to file structure
+ * @cmd: ioctl command
+ * @data: pointer to mei message structure
+ *
+ * Return: 0 on success , <0 on error
+ */
+static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data)
+{
+	struct mei_device *dev;
+	struct mei_cl *cl = file->private_data;
+	struct mei_connect_client_data conn;
+	struct mei_connect_client_data_vtag conn_vtag;
+	const uuid_le *cl_uuid;
+	struct mei_client *props;
+	u8 vtag;
+	u32 notify_get, notify_req;
+	int rets;
+
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	dev_dbg(dev->dev, "IOCTL cmd = 0x%x", cmd);
+
+	mutex_lock(&dev->device_lock);
+	if (dev->dev_state != MEI_DEV_ENABLED) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	switch (cmd) {
+	case IOCTL_MEI_CONNECT_CLIENT:
+		dev_dbg(dev->dev, ": IOCTL_MEI_CONNECT_CLIENT.\n");
+		if (copy_from_user(&conn, (char __user *)data, sizeof(conn))) {
+			dev_dbg(dev->dev, "failed to copy data from userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+		cl_uuid = &conn.in_client_uuid;
+		props = &conn.out_client_properties;
+		vtag = 0;
+
+		rets = mei_vt_support_check(dev, cl_uuid);
+		if (rets == -ENOTTY)
+			goto out;
+		if (!rets)
+			rets = mei_ioctl_connect_vtag(file, cl_uuid, props,
+						      vtag);
+		else
+			rets = mei_ioctl_connect_client(file, cl_uuid, props);
+		if (rets)
+			goto out;
+
+		/* if all is ok, copying the data back to user. */
+		if (copy_to_user((char __user *)data, &conn, sizeof(conn))) {
+			dev_dbg(dev->dev, "failed to copy data to userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+
+		break;
+
+	case IOCTL_MEI_CONNECT_CLIENT_VTAG:
+		dev_dbg(dev->dev, "IOCTL_MEI_CONNECT_CLIENT_VTAG\n");
+		if (copy_from_user(&conn_vtag, (char __user *)data,
+				   sizeof(conn_vtag))) {
+			dev_dbg(dev->dev, "failed to copy data from userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+
+		cl_uuid = &conn_vtag.connect.in_client_uuid;
+		props = &conn_vtag.out_client_properties;
+		vtag = conn_vtag.connect.vtag;
+
+		rets = mei_vt_support_check(dev, cl_uuid);
+		if (rets == -EOPNOTSUPP)
+			dev_dbg(dev->dev, "FW Client %pUl does not support vtags\n",
+				cl_uuid);
+		if (rets)
+			goto out;
+
+		if (!vtag) {
+			dev_dbg(dev->dev, "vtag can't be zero\n");
+			rets = -EINVAL;
+			goto out;
+		}
+
+		rets = mei_ioctl_connect_vtag(file, cl_uuid, props, vtag);
+		if (rets)
+			goto out;
+
+		/* if all is ok, copying the data back to user. */
+		if (copy_to_user((char __user *)data, &conn_vtag,
+				 sizeof(conn_vtag))) {
+			dev_dbg(dev->dev, "failed to copy data to userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+
+		break;
+
+	case IOCTL_MEI_NOTIFY_SET:
+		dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_SET.\n");
+		if (copy_from_user(&notify_req,
+				   (char __user *)data, sizeof(notify_req))) {
+			dev_dbg(dev->dev, "failed to copy data from userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+		rets = mei_ioctl_client_notify_request(file, notify_req);
+		break;
+
+	case IOCTL_MEI_NOTIFY_GET:
+		dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_GET.\n");
+		rets = mei_ioctl_client_notify_get(file, &notify_get);
+		if (rets)
+			goto out;
+
+		dev_dbg(dev->dev, "copy connect data to user\n");
+		if (copy_to_user((char __user *)data,
+				&notify_get, sizeof(notify_get))) {
+			dev_dbg(dev->dev, "failed to copy data to userland\n");
+			rets = -EFAULT;
+			goto out;
+
+		}
+		break;
+
+	default:
+		rets = -ENOIOCTLCMD;
+	}
+
+out:
+	mutex_unlock(&dev->device_lock);
+	return rets;
+}
+
+/**
+ * mei_poll - the poll function
+ *
+ * @file: pointer to file structure
+ * @wait: pointer to poll_table structure
+ *
+ * Return: poll mask
+ */
+static __poll_t mei_poll(struct file *file, poll_table *wait)
+{
+	__poll_t req_events = poll_requested_events(wait);
+	struct mei_cl *cl = file->private_data;
+	struct mei_device *dev;
+	__poll_t mask = 0;
+	bool notify_en;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return EPOLLERR;
+
+	dev = cl->dev;
+
+	mutex_lock(&dev->device_lock);
+
+	notify_en = cl->notify_en && (req_events & EPOLLPRI);
+
+	if (dev->dev_state != MEI_DEV_ENABLED ||
+	    !mei_cl_is_connected(cl)) {
+		mask = EPOLLERR;
+		goto out;
+	}
+
+	if (notify_en) {
+		poll_wait(file, &cl->ev_wait, wait);
+		if (cl->notify_ev)
+			mask |= EPOLLPRI;
+	}
+
+	if (req_events & (EPOLLIN | EPOLLRDNORM)) {
+		poll_wait(file, &cl->rx_wait, wait);
+
+		if (mei_cl_read_cb(cl, file))
+			mask |= EPOLLIN | EPOLLRDNORM;
+		else
+			mei_cl_read_start(cl, mei_cl_mtu(cl), file);
+	}
+
+	if (req_events & (EPOLLOUT | EPOLLWRNORM)) {
+		poll_wait(file, &cl->tx_wait, wait);
+		if (cl->tx_cb_queued < dev->tx_queue_limit)
+			mask |= EPOLLOUT | EPOLLWRNORM;
+	}
+
+out:
+	mutex_unlock(&dev->device_lock);
+	return mask;
+}
+
+/**
+ * mei_cl_is_write_queued - check if the client has pending writes.
+ *
+ * @cl: writing host client
+ *
+ * Return: true if client is writing, false otherwise.
+ */
+static bool mei_cl_is_write_queued(struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+	struct mei_cl_cb *cb;
+
+	list_for_each_entry(cb, &dev->write_list, list)
+		if (cb->cl == cl)
+			return true;
+	list_for_each_entry(cb, &dev->write_waiting_list, list)
+		if (cb->cl == cl)
+			return true;
+	return false;
+}
+
+/**
+ * mei_fsync - the fsync handler
+ *
+ * @fp:       pointer to file structure
+ * @start:    unused
+ * @end:      unused
+ * @datasync: unused
+ *
+ * Return: 0 on success, -ENODEV if client is not connected
+ */
+static int mei_fsync(struct file *fp, loff_t start, loff_t end, int datasync)
+{
+	struct mei_cl *cl = fp->private_data;
+	struct mei_device *dev;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	mutex_lock(&dev->device_lock);
+
+	if (dev->dev_state != MEI_DEV_ENABLED || !mei_cl_is_connected(cl)) {
+		rets = -ENODEV;
+		goto out;
+	}
+
+	while (mei_cl_is_write_queued(cl)) {
+		mutex_unlock(&dev->device_lock);
+		rets = wait_event_interruptible(cl->tx_wait,
+				cl->writing_state == MEI_WRITE_COMPLETE ||
+				!mei_cl_is_connected(cl));
+		mutex_lock(&dev->device_lock);
+		if (rets) {
+			if (signal_pending(current))
+				rets = -EINTR;
+			goto out;
+		}
+		if (!mei_cl_is_connected(cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+	}
+	rets = 0;
+out:
+	mutex_unlock(&dev->device_lock);
+	return rets;
+}
+
+/**
+ * mei_fasync - asynchronous io support
+ *
+ * @fd: file descriptor
+ * @file: pointer to file structure
+ * @band: band bitmap
+ *
+ * Return: negative on error,
+ *         0 if it did no changes,
+ *         and positive a process was added or deleted
+ */
+static int mei_fasync(int fd, struct file *file, int band)
+{
+
+	struct mei_cl *cl = file->private_data;
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	return fasync_helper(fd, file, band, &cl->ev_async);
+}
+
+/**
+ * trc_show - mei device trc attribute show method
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t trc_show(struct device *device,
+			struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	u32 trc;
+	int ret;
+
+	ret = mei_trc_status(dev, &trc);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%08X\n", trc);
+}
+static DEVICE_ATTR_RO(trc);
+
+/**
+ * fw_status_show - mei device fw_status attribute show method
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t fw_status_show(struct device *device,
+		struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	struct mei_fw_status fw_status;
+	int err, i;
+	ssize_t cnt = 0;
+
+	mutex_lock(&dev->device_lock);
+	err = mei_fw_status(dev, &fw_status);
+	mutex_unlock(&dev->device_lock);
+	if (err) {
+		dev_err(device, "read fw_status error = %d\n", err);
+		return err;
+	}
+
+	for (i = 0; i < fw_status.count; i++)
+		cnt += scnprintf(buf + cnt, PAGE_SIZE - cnt, "%08X\n",
+				fw_status.status[i]);
+	return cnt;
+}
+static DEVICE_ATTR_RO(fw_status);
+
+/**
+ * hbm_ver_show - display HBM protocol version negotiated with FW
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t hbm_ver_show(struct device *device,
+			    struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	struct hbm_version ver;
+
+	mutex_lock(&dev->device_lock);
+	ver = dev->version;
+	mutex_unlock(&dev->device_lock);
+
+	return sprintf(buf, "%u.%u\n", ver.major_version, ver.minor_version);
+}
+static DEVICE_ATTR_RO(hbm_ver);
+
+/**
+ * hbm_ver_drv_show - display HBM protocol version advertised by driver
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t hbm_ver_drv_show(struct device *device,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u.%u\n", HBM_MAJOR_VERSION, HBM_MINOR_VERSION);
+}
+static DEVICE_ATTR_RO(hbm_ver_drv);
+
+static ssize_t tx_queue_limit_show(struct device *device,
+				   struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	u8 size = 0;
+
+	mutex_lock(&dev->device_lock);
+	size = dev->tx_queue_limit;
+	mutex_unlock(&dev->device_lock);
+
+	return sysfs_emit(buf, "%u\n", size);
+}
+
+static ssize_t tx_queue_limit_store(struct device *device,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	u8 limit;
+	unsigned int inp;
+	int err;
+
+	err = kstrtouint(buf, 10, &inp);
+	if (err)
+		return err;
+	if (inp > MEI_TX_QUEUE_LIMIT_MAX || inp < MEI_TX_QUEUE_LIMIT_MIN)
+		return -EINVAL;
+	limit = inp;
+
+	mutex_lock(&dev->device_lock);
+	dev->tx_queue_limit = limit;
+	mutex_unlock(&dev->device_lock);
+
+	return count;
+}
+static DEVICE_ATTR_RW(tx_queue_limit);
+
+/**
+ * fw_ver_show - display ME FW version
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t fw_ver_show(struct device *device,
+			   struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	struct mei_fw_version *ver;
+	ssize_t cnt = 0;
+	int i;
+
+	ver = dev->fw_ver;
+
+	for (i = 0; i < MEI_MAX_FW_VER_BLOCKS; i++)
+		cnt += scnprintf(buf + cnt, PAGE_SIZE - cnt, "%u:%u.%u.%u.%u\n",
+				 ver[i].platform, ver[i].major, ver[i].minor,
+				 ver[i].hotfix, ver[i].buildno);
+	return cnt;
+}
+static DEVICE_ATTR_RO(fw_ver);
+
+/**
+ * dev_state_show - display device state
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t dev_state_show(struct device *device,
+			      struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	enum mei_dev_state dev_state;
+
+	mutex_lock(&dev->device_lock);
+	dev_state = dev->dev_state;
+	mutex_unlock(&dev->device_lock);
+
+	return sprintf(buf, "%s", mei_dev_state_str(dev_state));
+}
+static DEVICE_ATTR_RO(dev_state);
+
+/**
+ * mei_set_devstate: set to new device state and notify sysfs file.
+ *
+ * @dev: mei_device
+ * @state: new device state
+ */
+void mei_set_devstate(struct mei_device *dev, enum mei_dev_state state)
+{
+	struct device *clsdev;
+
+	if (dev->dev_state == state)
+		return;
+
+	dev->dev_state = state;
+
+	clsdev = class_find_device_by_devt(&mei_class, dev->cdev.dev);
+	if (clsdev) {
+		sysfs_notify(&clsdev->kobj, NULL, "dev_state");
+		put_device(clsdev);
+	}
+}
+
+/**
+ * kind_show - display device kind
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf: char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t kind_show(struct device *device,
+			 struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	ssize_t ret;
+
+	if (dev->kind)
+		ret = sprintf(buf, "%s\n", dev->kind);
+	else
+		ret = sprintf(buf, "%s\n", "mei");
+
+	return ret;
+}
+static DEVICE_ATTR_RO(kind);
+
+static struct attribute *mei_attrs[] = {
+	&dev_attr_fw_status.attr,
+	&dev_attr_hbm_ver.attr,
+	&dev_attr_hbm_ver_drv.attr,
+	&dev_attr_tx_queue_limit.attr,
+	&dev_attr_fw_ver.attr,
+	&dev_attr_dev_state.attr,
+	&dev_attr_trc.attr,
+	&dev_attr_kind.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(mei);
+
+/*
+ * file operations structure will be used for mei char device.
+ */
+static const struct file_operations mei_fops = {
+	.owner = THIS_MODULE,
+	.read = mei_read,
+	.unlocked_ioctl = mei_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
+	.open = mei_open,
+	.release = mei_release,
+	.write = mei_write,
+	.poll = mei_poll,
+	.fsync = mei_fsync,
+	.fasync = mei_fasync,
+	.llseek = no_llseek
+};
+
+/**
+ * mei_minor_get - obtain next free device minor number
+ *
+ * @dev:  device pointer
+ *
+ * Return: allocated minor, or -ENOSPC if no free minor left
+ */
+static int mei_minor_get(struct mei_device *dev)
+{
+	int ret;
+
+	mutex_lock(&mei_minor_lock);
+	ret = idr_alloc(&mei_idr, dev, 0, MEI_MAX_DEVS, GFP_KERNEL);
+	if (ret >= 0)
+		dev->minor = ret;
+	else if (ret == -ENOSPC)
+		dev_err(dev->dev, "too many mei devices\n");
+
+	mutex_unlock(&mei_minor_lock);
+	return ret;
+}
+
+/**
+ * mei_minor_free - mark device minor number as free
+ *
+ * @dev:  device pointer
+ */
+static void mei_minor_free(struct mei_device *dev)
+{
+	mutex_lock(&mei_minor_lock);
+	idr_remove(&mei_idr, dev->minor);
+	mutex_unlock(&mei_minor_lock);
+}
+
+int mei_register(struct mei_device *dev, struct device *parent)
+{
+	struct device *clsdev; /* class device */
+	int ret, devno;
+
+	ret = mei_minor_get(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Fill in the data structures */
+	devno = MKDEV(MAJOR(mei_devt), dev->minor);
+	cdev_init(&dev->cdev, &mei_fops);
+	dev->cdev.owner = parent->driver->owner;
+
+	/* Add the device */
+	ret = cdev_add(&dev->cdev, devno, 1);
+	if (ret) {
+		dev_err(parent, "unable to add device %d:%d\n",
+			MAJOR(mei_devt), dev->minor);
+		goto err_dev_add;
+	}
+
+	clsdev = device_create_with_groups(&mei_class, parent, devno,
+					   dev, mei_groups,
+					   "mei%d", dev->minor);
+
+	if (IS_ERR(clsdev)) {
+		dev_err(parent, "unable to create device %d:%d\n",
+			MAJOR(mei_devt), dev->minor);
+		ret = PTR_ERR(clsdev);
+		goto err_dev_create;
+	}
+
+	mei_dbgfs_register(dev, dev_name(clsdev));
+
+	return 0;
+
+err_dev_create:
+	cdev_del(&dev->cdev);
+err_dev_add:
+	mei_minor_free(dev);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mei_register);
+
+void mei_deregister(struct mei_device *dev)
+{
+	int devno;
+
+	devno = dev->cdev.dev;
+	cdev_del(&dev->cdev);
+
+	mei_dbgfs_deregister(dev);
+
+	device_destroy(&mei_class, devno);
+
+	mei_minor_free(dev);
+}
+EXPORT_SYMBOL_GPL(mei_deregister);
+
+static int __init mei_init(void)
+{
+	int ret;
+
+	ret = class_register(&mei_class);
+	if (ret)
+		return ret;
+
+	ret = alloc_chrdev_region(&mei_devt, 0, MEI_MAX_DEVS, "mei");
+	if (ret < 0) {
+		pr_err("unable to allocate char dev region\n");
+		goto err_class;
+	}
+
+	ret = mei_cl_bus_init();
+	if (ret < 0) {
+		pr_err("unable to initialize bus\n");
+		goto err_chrdev;
+	}
+
+	return 0;
+
+err_chrdev:
+	unregister_chrdev_region(mei_devt, MEI_MAX_DEVS);
+err_class:
+	class_unregister(&mei_class);
+	return ret;
+}
+
+static void __exit mei_exit(void)
+{
+	unregister_chrdev_region(mei_devt, MEI_MAX_DEVS);
+	class_unregister(&mei_class);
+	mei_cl_bus_exit();
+}
+
+module_init(mei_init);
+module_exit(mei_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Management Engine Interface");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/misc/mei/mei-trace.c b/drivers/misc/mei/mei-trace.c
new file mode 100644
index 0000000000..48d4c4fcef
--- /dev/null
+++ b/drivers/misc/mei/mei-trace.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2015-2016, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+#include <linux/module.h>
+
+/* sparse doesn't like tracepoint macros */
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "mei-trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL(mei_reg_read);
+EXPORT_TRACEPOINT_SYMBOL(mei_reg_write);
+EXPORT_TRACEPOINT_SYMBOL(mei_pci_cfg_read);
+#endif /* __CHECKER__ */
diff --git a/drivers/misc/mei/mei-trace.h b/drivers/misc/mei/mei-trace.h
new file mode 100644
index 0000000000..fe46ff2b9d
--- /dev/null
+++ b/drivers/misc/mei/mei-trace.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2015-2016, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#if !defined(_MEI_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MEI_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <linux/device.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mei
+
+TRACE_EVENT(mei_reg_read,
+	TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val),
+	TP_ARGS(dev, reg, offs, val),
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(const char *, reg)
+		__field(u32, offs)
+		__field(u32, val)
+	),
+	TP_fast_assign(
+		__assign_str(dev, dev_name(dev));
+		__entry->reg  = reg;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] read %s:[%#x] = %#x",
+		  __get_str(dev), __entry->reg, __entry->offs, __entry->val)
+);
+
+TRACE_EVENT(mei_reg_write,
+	TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val),
+	TP_ARGS(dev, reg, offs, val),
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(const char *, reg)
+		__field(u32, offs)
+		__field(u32, val)
+	),
+	TP_fast_assign(
+		__assign_str(dev, dev_name(dev));
+		__entry->reg = reg;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] write %s[%#x] = %#x",
+		  __get_str(dev), __entry->reg,  __entry->offs, __entry->val)
+);
+
+TRACE_EVENT(mei_pci_cfg_read,
+	TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val),
+	TP_ARGS(dev, reg, offs, val),
+	TP_STRUCT__entry(
+		__string(dev, dev_name(dev))
+		__field(const char *, reg)
+		__field(u32, offs)
+		__field(u32, val)
+	),
+	TP_fast_assign(
+		__assign_str(dev, dev_name(dev));
+		__entry->reg  = reg;
+		__entry->offs = offs;
+		__entry->val = val;
+	),
+	TP_printk("[%s] pci cfg read %s:[%#x] = %#x",
+		  __get_str(dev), __entry->reg, __entry->offs, __entry->val)
+);
+
+#endif /* _MEI_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE mei-trace
+#include <trace/define_trace.h>
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
new file mode 100644
index 0000000000..cdf8a2edf0
--- /dev/null
+++ b/drivers/misc/mei/mei_dev.h
@@ -0,0 +1,878 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2003-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#ifndef _MEI_DEV_H_
+#define _MEI_DEV_H_
+
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/poll.h>
+#include <linux/mei.h>
+#include <linux/mei_cl_bus.h>
+
+static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2)
+{
+	return memcmp(&u1, &u2, sizeof(uuid_le));
+}
+
+#include "hw.h"
+#include "hbm.h"
+
+#define MEI_SLOT_SIZE             sizeof(u32)
+#define MEI_RD_MSG_BUF_SIZE       (128 * MEI_SLOT_SIZE)
+
+/*
+ * Number of Maximum MEI Clients
+ */
+#define MEI_CLIENTS_MAX 256
+
+/*
+ * maximum number of consecutive resets
+ */
+#define MEI_MAX_CONSEC_RESET  3
+
+/*
+ * Number of File descriptors/handles
+ * that can be opened to the driver.
+ *
+ * Limit to 255: 256 Total Clients
+ * minus internal client for MEI Bus Messages
+ */
+#define  MEI_MAX_OPEN_HANDLE_COUNT (MEI_CLIENTS_MAX - 1)
+
+/* File state */
+enum file_state {
+	MEI_FILE_UNINITIALIZED = 0,
+	MEI_FILE_INITIALIZING,
+	MEI_FILE_CONNECTING,
+	MEI_FILE_CONNECTED,
+	MEI_FILE_DISCONNECTING,
+	MEI_FILE_DISCONNECT_REPLY,
+	MEI_FILE_DISCONNECT_REQUIRED,
+	MEI_FILE_DISCONNECTED,
+};
+
+/* MEI device states */
+enum mei_dev_state {
+	MEI_DEV_INITIALIZING = 0,
+	MEI_DEV_INIT_CLIENTS,
+	MEI_DEV_ENABLED,
+	MEI_DEV_RESETTING,
+	MEI_DEV_DISABLED,
+	MEI_DEV_POWERING_DOWN,
+	MEI_DEV_POWER_DOWN,
+	MEI_DEV_POWER_UP
+};
+
+/**
+ * enum mei_dev_pxp_mode - MEI PXP mode state
+ *
+ * @MEI_DEV_PXP_DEFAULT: PCH based device, no initailization required
+ * @MEI_DEV_PXP_INIT:    device requires initialization, send setup message to firmware
+ * @MEI_DEV_PXP_SETUP:   device is in setup stage, waiting for firmware repsonse
+ * @MEI_DEV_PXP_READY:   device initialized
+ */
+enum mei_dev_pxp_mode {
+	MEI_DEV_PXP_DEFAULT = 0,
+	MEI_DEV_PXP_INIT    = 1,
+	MEI_DEV_PXP_SETUP   = 2,
+	MEI_DEV_PXP_READY   = 3,
+};
+
+const char *mei_dev_state_str(int state);
+
+enum mei_file_transaction_states {
+	MEI_IDLE,
+	MEI_WRITING,
+	MEI_WRITE_COMPLETE,
+};
+
+/**
+ * enum mei_cb_file_ops  - file operation associated with the callback
+ * @MEI_FOP_READ:       read
+ * @MEI_FOP_WRITE:      write
+ * @MEI_FOP_CONNECT:    connect
+ * @MEI_FOP_DISCONNECT: disconnect
+ * @MEI_FOP_DISCONNECT_RSP: disconnect response
+ * @MEI_FOP_NOTIFY_START:   start notification
+ * @MEI_FOP_NOTIFY_STOP:    stop notification
+ * @MEI_FOP_DMA_MAP:   request client dma map
+ * @MEI_FOP_DMA_UNMAP: request client dma unmap
+ */
+enum mei_cb_file_ops {
+	MEI_FOP_READ = 0,
+	MEI_FOP_WRITE,
+	MEI_FOP_CONNECT,
+	MEI_FOP_DISCONNECT,
+	MEI_FOP_DISCONNECT_RSP,
+	MEI_FOP_NOTIFY_START,
+	MEI_FOP_NOTIFY_STOP,
+	MEI_FOP_DMA_MAP,
+	MEI_FOP_DMA_UNMAP,
+};
+
+/**
+ * enum mei_cl_io_mode - io mode between driver and fw
+ *
+ * @MEI_CL_IO_TX_BLOCKING: send is blocking
+ * @MEI_CL_IO_TX_INTERNAL: internal communication between driver and FW
+ *
+ * @MEI_CL_IO_RX_NONBLOCK: recv is non-blocking
+ *
+ * @MEI_CL_IO_SGL: send command with sgl list.
+ */
+enum mei_cl_io_mode {
+	MEI_CL_IO_TX_BLOCKING = BIT(0),
+	MEI_CL_IO_TX_INTERNAL = BIT(1),
+
+	MEI_CL_IO_RX_NONBLOCK = BIT(2),
+
+	MEI_CL_IO_SGL         = BIT(3),
+};
+
+/*
+ * Intel MEI message data struct
+ */
+struct mei_msg_data {
+	size_t size;
+	unsigned char *data;
+};
+
+struct mei_dma_data {
+	u8 buffer_id;
+	void *vaddr;
+	dma_addr_t daddr;
+	size_t size;
+};
+
+/**
+ * struct mei_dma_dscr - dma address descriptor
+ *
+ * @vaddr: dma buffer virtual address
+ * @daddr: dma buffer physical address
+ * @size : dma buffer size
+ */
+struct mei_dma_dscr {
+	void *vaddr;
+	dma_addr_t daddr;
+	size_t size;
+};
+
+/* Maximum number of processed FW status registers */
+#define MEI_FW_STATUS_MAX 6
+/* Minimal  buffer for FW status string (8 bytes in dw + space or '\0') */
+#define MEI_FW_STATUS_STR_SZ (MEI_FW_STATUS_MAX * (8 + 1))
+
+
+/*
+ * struct mei_fw_status - storage of FW status data
+ *
+ * @count: number of actually available elements in array
+ * @status: FW status registers
+ */
+struct mei_fw_status {
+	int count;
+	u32 status[MEI_FW_STATUS_MAX];
+};
+
+/**
+ * struct mei_me_client - representation of me (fw) client
+ *
+ * @list: link in me client list
+ * @refcnt: struct reference count
+ * @props: client properties
+ * @client_id: me client id
+ * @tx_flow_ctrl_creds: flow control credits
+ * @connect_count: number connections to this client
+ * @bus_added: added to bus
+ */
+struct mei_me_client {
+	struct list_head list;
+	struct kref refcnt;
+	struct mei_client_properties props;
+	u8 client_id;
+	u8 tx_flow_ctrl_creds;
+	u8 connect_count;
+	u8 bus_added;
+};
+
+
+struct mei_cl;
+
+/**
+ * struct mei_cl_cb - file operation callback structure
+ *
+ * @list: link in callback queue
+ * @cl: file client who is running this operation
+ * @fop_type: file operation type
+ * @buf: buffer for data associated with the callback
+ * @buf_idx: last read index
+ * @vtag: virtual tag
+ * @fp: pointer to file structure
+ * @status: io status of the cb
+ * @internal: communication between driver and FW flag
+ * @blocking: transmission blocking mode
+ * @ext_hdr: extended header
+ */
+struct mei_cl_cb {
+	struct list_head list;
+	struct mei_cl *cl;
+	enum mei_cb_file_ops fop_type;
+	struct mei_msg_data buf;
+	size_t buf_idx;
+	u8 vtag;
+	const struct file *fp;
+	int status;
+	u32 internal:1;
+	u32 blocking:1;
+	struct mei_ext_hdr *ext_hdr;
+};
+
+/**
+ * struct mei_cl_vtag - file pointer to vtag mapping structure
+ *
+ * @list: link in map queue
+ * @fp: file pointer
+ * @vtag: corresponding vtag
+ * @pending_read: the read is pending on this file
+ */
+struct mei_cl_vtag {
+	struct list_head list;
+	const struct file *fp;
+	u8 vtag;
+	u8 pending_read:1;
+};
+
+/**
+ * struct mei_cl - me client host representation
+ *    carried in file->private_data
+ *
+ * @link: link in the clients list
+ * @dev: mei parent device
+ * @state: file operation state
+ * @tx_wait: wait queue for tx completion
+ * @rx_wait: wait queue for rx completion
+ * @wait:  wait queue for management operation
+ * @ev_wait: notification wait queue
+ * @ev_async: event async notification
+ * @status: connection status
+ * @me_cl: fw client connected
+ * @fp: file associated with client
+ * @host_client_id: host id
+ * @vtag_map: vtag map
+ * @tx_flow_ctrl_creds: transmit flow credentials
+ * @rx_flow_ctrl_creds: receive flow credentials
+ * @timer_count:  watchdog timer for operation completion
+ * @notify_en: notification - enabled/disabled
+ * @notify_ev: pending notification event
+ * @tx_cb_queued: number of tx callbacks in queue
+ * @writing_state: state of the tx
+ * @rd_pending: pending read credits
+ * @rd_completed_lock: protects rd_completed queue
+ * @rd_completed: completed read
+ * @dma: dma settings
+ * @dma_mapped: dma buffer is currently mapped.
+ *
+ * @cldev: device on the mei client bus
+ */
+struct mei_cl {
+	struct list_head link;
+	struct mei_device *dev;
+	enum file_state state;
+	wait_queue_head_t tx_wait;
+	wait_queue_head_t rx_wait;
+	wait_queue_head_t wait;
+	wait_queue_head_t ev_wait;
+	struct fasync_struct *ev_async;
+	int status;
+	struct mei_me_client *me_cl;
+	const struct file *fp;
+	u8 host_client_id;
+	struct list_head vtag_map;
+	u8 tx_flow_ctrl_creds;
+	u8 rx_flow_ctrl_creds;
+	u8 timer_count;
+	u8 notify_en;
+	u8 notify_ev;
+	u8 tx_cb_queued;
+	enum mei_file_transaction_states writing_state;
+	struct list_head rd_pending;
+	spinlock_t rd_completed_lock; /* protects rd_completed queue */
+	struct list_head rd_completed;
+	struct mei_dma_data dma;
+	u8 dma_mapped;
+
+	struct mei_cl_device *cldev;
+};
+
+#define MEI_TX_QUEUE_LIMIT_DEFAULT 50
+#define MEI_TX_QUEUE_LIMIT_MAX 255
+#define MEI_TX_QUEUE_LIMIT_MIN 30
+
+/**
+ * struct mei_hw_ops - hw specific ops
+ *
+ * @host_is_ready    : query for host readiness
+ *
+ * @hw_is_ready      : query if hw is ready
+ * @hw_reset         : reset hw
+ * @hw_start         : start hw after reset
+ * @hw_config        : configure hw
+ *
+ * @fw_status        : get fw status registers
+ * @trc_status       : get trc status register
+ * @pg_state         : power gating state of the device
+ * @pg_in_transition : is device now in pg transition
+ * @pg_is_enabled    : is power gating enabled
+ *
+ * @intr_clear       : clear pending interrupts
+ * @intr_enable      : enable interrupts
+ * @intr_disable     : disable interrupts
+ * @synchronize_irq  : synchronize irqs
+ *
+ * @hbuf_free_slots  : query for write buffer empty slots
+ * @hbuf_is_ready    : query if write buffer is empty
+ * @hbuf_depth       : query for write buffer depth
+ *
+ * @write            : write a message to FW
+ *
+ * @rdbuf_full_slots : query how many slots are filled
+ *
+ * @read_hdr         : get first 4 bytes (header)
+ * @read             : read a buffer from the FW
+ */
+struct mei_hw_ops {
+
+	bool (*host_is_ready)(struct mei_device *dev);
+
+	bool (*hw_is_ready)(struct mei_device *dev);
+	int (*hw_reset)(struct mei_device *dev, bool enable);
+	int (*hw_start)(struct mei_device *dev);
+	int (*hw_config)(struct mei_device *dev);
+
+	int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts);
+	int (*trc_status)(struct mei_device *dev, u32 *trc);
+
+	enum mei_pg_state (*pg_state)(struct mei_device *dev);
+	bool (*pg_in_transition)(struct mei_device *dev);
+	bool (*pg_is_enabled)(struct mei_device *dev);
+
+	void (*intr_clear)(struct mei_device *dev);
+	void (*intr_enable)(struct mei_device *dev);
+	void (*intr_disable)(struct mei_device *dev);
+	void (*synchronize_irq)(struct mei_device *dev);
+
+	int (*hbuf_free_slots)(struct mei_device *dev);
+	bool (*hbuf_is_ready)(struct mei_device *dev);
+	u32 (*hbuf_depth)(const struct mei_device *dev);
+	int (*write)(struct mei_device *dev,
+		     const void *hdr, size_t hdr_len,
+		     const void *data, size_t data_len);
+
+	int (*rdbuf_full_slots)(struct mei_device *dev);
+
+	u32 (*read_hdr)(const struct mei_device *dev);
+	int (*read)(struct mei_device *dev,
+		     unsigned char *buf, unsigned long len);
+};
+
+/* MEI bus API*/
+void mei_cl_bus_rescan_work(struct work_struct *work);
+void mei_cl_bus_dev_fixup(struct mei_cl_device *dev);
+ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
+		      unsigned int mode);
+ssize_t __mei_cl_send_timeout(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
+			      unsigned int mode, unsigned long timeout);
+ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length, u8 *vtag,
+		      unsigned int mode, unsigned long timeout);
+bool mei_cl_bus_rx_event(struct mei_cl *cl);
+bool mei_cl_bus_notify_event(struct mei_cl *cl);
+void mei_cl_bus_remove_devices(struct mei_device *bus);
+int mei_cl_bus_init(void);
+void mei_cl_bus_exit(void);
+
+/**
+ * enum mei_pg_event - power gating transition events
+ *
+ * @MEI_PG_EVENT_IDLE: the driver is not in power gating transition
+ * @MEI_PG_EVENT_WAIT: the driver is waiting for a pg event to complete
+ * @MEI_PG_EVENT_RECEIVED: the driver received pg event
+ * @MEI_PG_EVENT_INTR_WAIT: the driver is waiting for a pg event interrupt
+ * @MEI_PG_EVENT_INTR_RECEIVED: the driver received pg event interrupt
+ */
+enum mei_pg_event {
+	MEI_PG_EVENT_IDLE,
+	MEI_PG_EVENT_WAIT,
+	MEI_PG_EVENT_RECEIVED,
+	MEI_PG_EVENT_INTR_WAIT,
+	MEI_PG_EVENT_INTR_RECEIVED,
+};
+
+/**
+ * enum mei_pg_state - device internal power gating state
+ *
+ * @MEI_PG_OFF: device is not power gated - it is active
+ * @MEI_PG_ON:  device is power gated - it is in lower power state
+ */
+enum mei_pg_state {
+	MEI_PG_OFF = 0,
+	MEI_PG_ON =  1,
+};
+
+const char *mei_pg_state_str(enum mei_pg_state state);
+
+/**
+ * struct mei_fw_version - MEI FW version struct
+ *
+ * @platform: platform identifier
+ * @major: major version field
+ * @minor: minor version field
+ * @buildno: build number version field
+ * @hotfix: hotfix number version field
+ */
+struct mei_fw_version {
+	u8 platform;
+	u8 major;
+	u16 minor;
+	u16 buildno;
+	u16 hotfix;
+};
+
+#define MEI_MAX_FW_VER_BLOCKS 3
+
+struct mei_dev_timeouts {
+	unsigned long hw_ready; /* Timeout on ready message, in jiffies */
+	int connect; /* HPS: at least 2 seconds, in seconds */
+	unsigned long cl_connect; /* HPS: Client Connect Timeout, in jiffies */
+	int client_init; /* HPS: Clients Enumeration Timeout, in seconds */
+	unsigned long pgi; /* PG Isolation time response, in jiffies */
+	unsigned int d0i3; /* D0i3 set/unset max response time, in jiffies */
+	unsigned long hbm; /* HBM operation timeout, in jiffies */
+	unsigned long mkhi_recv; /* receive timeout, in jiffies */
+};
+
+/**
+ * struct mei_device -  MEI private device struct
+ *
+ * @dev         : device on a bus
+ * @cdev        : character device
+ * @minor       : minor number allocated for device
+ *
+ * @write_list  : write pending list
+ * @write_waiting_list : write completion list
+ * @ctrl_wr_list : pending control write list
+ * @ctrl_rd_list : pending control read list
+ * @tx_queue_limit: tx queues per client linit
+ *
+ * @file_list   : list of opened handles
+ * @open_handle_count: number of opened handles
+ *
+ * @device_lock : big device lock
+ * @timer_work  : MEI timer delayed work (timeouts)
+ *
+ * @recvd_hw_ready : hw ready message received flag
+ *
+ * @wait_hw_ready : wait queue for receive HW ready message form FW
+ * @wait_pg     : wait queue for receive PG message from FW
+ * @wait_hbm_start : wait queue for receive HBM start message from FW
+ *
+ * @reset_count : number of consecutive resets
+ * @dev_state   : device state
+ * @hbm_state   : state of host bus message protocol
+ * @pxp_mode    : PXP device mode
+ * @init_clients_timer : HBM init handshake timeout
+ *
+ * @pg_event    : power gating event
+ * @pg_domain   : runtime PM domain
+ *
+ * @rd_msg_buf  : control messages buffer
+ * @rd_msg_hdr  : read message header storage
+ * @rd_msg_hdr_count : how many dwords were already read from header
+ *
+ * @hbuf_is_ready : query if the host host/write buffer is ready
+ * @dr_dscr: DMA ring descriptors: TX, RX, and CTRL
+ *
+ * @version     : HBM protocol version in use
+ * @hbm_f_pg_supported  : hbm feature pgi protocol
+ * @hbm_f_dc_supported  : hbm feature dynamic clients
+ * @hbm_f_dot_supported : hbm feature disconnect on timeout
+ * @hbm_f_ev_supported  : hbm feature event notification
+ * @hbm_f_fa_supported  : hbm feature fixed address client
+ * @hbm_f_ie_supported  : hbm feature immediate reply to enum request
+ * @hbm_f_os_supported  : hbm feature support OS ver message
+ * @hbm_f_dr_supported  : hbm feature dma ring supported
+ * @hbm_f_vt_supported  : hbm feature vtag supported
+ * @hbm_f_cap_supported : hbm feature capabilities message supported
+ * @hbm_f_cd_supported  : hbm feature client dma supported
+ * @hbm_f_gsc_supported : hbm feature gsc supported
+ *
+ * @fw_ver : FW versions
+ *
+ * @fw_f_fw_ver_supported : fw feature: fw version supported
+ * @fw_ver_received : fw version received
+ *
+ * @me_clients_rwsem: rw lock over me_clients list
+ * @me_clients  : list of FW clients
+ * @me_clients_map : FW clients bit map
+ * @host_clients_map : host clients id pool
+ *
+ * @allow_fixed_address: allow user space to connect a fixed client
+ * @override_fixed_address: force allow fixed address behavior
+ *
+ * @timeouts: actual timeout values
+ *
+ * @reset_work  : work item for the device reset
+ * @bus_rescan_work : work item for the bus rescan
+ *
+ * @device_list : mei client bus list
+ * @cl_bus_lock : client bus list lock
+ *
+ * @kind        : kind of mei device
+ *
+ * @dbgfs_dir   : debugfs mei root directory
+ *
+ * @ops:        : hw specific operations
+ * @hw          : hw specific data
+ */
+struct mei_device {
+	struct device *dev;
+	struct cdev cdev;
+	int minor;
+
+	struct list_head write_list;
+	struct list_head write_waiting_list;
+	struct list_head ctrl_wr_list;
+	struct list_head ctrl_rd_list;
+	u8 tx_queue_limit;
+
+	struct list_head file_list;
+	long open_handle_count;
+
+	struct mutex device_lock;
+	struct delayed_work timer_work;
+
+	bool recvd_hw_ready;
+	/*
+	 * waiting queue for receive message from FW
+	 */
+	wait_queue_head_t wait_hw_ready;
+	wait_queue_head_t wait_pg;
+	wait_queue_head_t wait_hbm_start;
+
+	/*
+	 * mei device  states
+	 */
+	unsigned long reset_count;
+	enum mei_dev_state dev_state;
+	enum mei_hbm_state hbm_state;
+	enum mei_dev_pxp_mode pxp_mode;
+	u16 init_clients_timer;
+
+	/*
+	 * Power Gating support
+	 */
+	enum mei_pg_event pg_event;
+#ifdef CONFIG_PM
+	struct dev_pm_domain pg_domain;
+#endif /* CONFIG_PM */
+
+	unsigned char rd_msg_buf[MEI_RD_MSG_BUF_SIZE];
+	u32 rd_msg_hdr[MEI_RD_MSG_BUF_SIZE];
+	int rd_msg_hdr_count;
+
+	/* write buffer */
+	bool hbuf_is_ready;
+
+	struct mei_dma_dscr dr_dscr[DMA_DSCR_NUM];
+
+	struct hbm_version version;
+	unsigned int hbm_f_pg_supported:1;
+	unsigned int hbm_f_dc_supported:1;
+	unsigned int hbm_f_dot_supported:1;
+	unsigned int hbm_f_ev_supported:1;
+	unsigned int hbm_f_fa_supported:1;
+	unsigned int hbm_f_ie_supported:1;
+	unsigned int hbm_f_os_supported:1;
+	unsigned int hbm_f_dr_supported:1;
+	unsigned int hbm_f_vt_supported:1;
+	unsigned int hbm_f_cap_supported:1;
+	unsigned int hbm_f_cd_supported:1;
+	unsigned int hbm_f_gsc_supported:1;
+
+	struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS];
+
+	unsigned int fw_f_fw_ver_supported:1;
+	unsigned int fw_ver_received:1;
+
+	struct rw_semaphore me_clients_rwsem;
+	struct list_head me_clients;
+	DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX);
+	DECLARE_BITMAP(host_clients_map, MEI_CLIENTS_MAX);
+
+	bool allow_fixed_address;
+	bool override_fixed_address;
+
+	struct mei_dev_timeouts timeouts;
+
+	struct work_struct reset_work;
+	struct work_struct bus_rescan_work;
+
+	/* List of bus devices */
+	struct list_head device_list;
+	struct mutex cl_bus_lock;
+
+	const char *kind;
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	struct dentry *dbgfs_dir;
+#endif /* CONFIG_DEBUG_FS */
+
+	const struct mei_hw_ops *ops;
+	char hw[] __aligned(sizeof(void *));
+};
+
+static inline unsigned long mei_secs_to_jiffies(unsigned long sec)
+{
+	return msecs_to_jiffies(sec * MSEC_PER_SEC);
+}
+
+/**
+ * mei_data2slots - get slots number from a message length
+ *
+ * @length: size of the messages in bytes
+ *
+ * Return: number of slots
+ */
+static inline u32 mei_data2slots(size_t length)
+{
+	return DIV_ROUND_UP(length, MEI_SLOT_SIZE);
+}
+
+/**
+ * mei_hbm2slots - get slots number from a hbm message length
+ *                 length + size of the mei message header
+ *
+ * @length: size of the messages in bytes
+ *
+ * Return: number of slots
+ */
+static inline u32 mei_hbm2slots(size_t length)
+{
+	return DIV_ROUND_UP(sizeof(struct mei_msg_hdr) + length, MEI_SLOT_SIZE);
+}
+
+/**
+ * mei_slots2data - get data in slots - bytes from slots
+ *
+ * @slots: number of available slots
+ *
+ * Return: number of bytes in slots
+ */
+static inline u32 mei_slots2data(int slots)
+{
+	return slots * MEI_SLOT_SIZE;
+}
+
+/*
+ * mei init function prototypes
+ */
+void mei_device_init(struct mei_device *dev,
+		     struct device *device,
+		     bool slow_fw,
+		     const struct mei_hw_ops *hw_ops);
+int mei_reset(struct mei_device *dev);
+int mei_start(struct mei_device *dev);
+int mei_restart(struct mei_device *dev);
+void mei_stop(struct mei_device *dev);
+void mei_cancel_work(struct mei_device *dev);
+
+void mei_set_devstate(struct mei_device *dev, enum mei_dev_state state);
+
+int mei_dmam_ring_alloc(struct mei_device *dev);
+void mei_dmam_ring_free(struct mei_device *dev);
+bool mei_dma_ring_is_allocated(struct mei_device *dev);
+void mei_dma_ring_reset(struct mei_device *dev);
+void mei_dma_ring_read(struct mei_device *dev, unsigned char *buf, u32 len);
+void mei_dma_ring_write(struct mei_device *dev, unsigned char *buf, u32 len);
+u32 mei_dma_ring_empty_slots(struct mei_device *dev);
+
+/*
+ *  MEI interrupt functions prototype
+ */
+
+void mei_timer(struct work_struct *work);
+void mei_schedule_stall_timer(struct mei_device *dev);
+int mei_irq_read_handler(struct mei_device *dev,
+			 struct list_head *cmpl_list, s32 *slots);
+
+int mei_irq_write_handler(struct mei_device *dev, struct list_head *cmpl_list);
+void mei_irq_compl_handler(struct mei_device *dev, struct list_head *cmpl_list);
+
+/*
+ * Register Access Function
+ */
+
+
+static inline int mei_hw_config(struct mei_device *dev)
+{
+	return dev->ops->hw_config(dev);
+}
+
+static inline enum mei_pg_state mei_pg_state(struct mei_device *dev)
+{
+	return dev->ops->pg_state(dev);
+}
+
+static inline bool mei_pg_in_transition(struct mei_device *dev)
+{
+	return dev->ops->pg_in_transition(dev);
+}
+
+static inline bool mei_pg_is_enabled(struct mei_device *dev)
+{
+	return dev->ops->pg_is_enabled(dev);
+}
+
+static inline int mei_hw_reset(struct mei_device *dev, bool enable)
+{
+	return dev->ops->hw_reset(dev, enable);
+}
+
+static inline int mei_hw_start(struct mei_device *dev)
+{
+	return dev->ops->hw_start(dev);
+}
+
+static inline void mei_clear_interrupts(struct mei_device *dev)
+{
+	dev->ops->intr_clear(dev);
+}
+
+static inline void mei_enable_interrupts(struct mei_device *dev)
+{
+	dev->ops->intr_enable(dev);
+}
+
+static inline void mei_disable_interrupts(struct mei_device *dev)
+{
+	dev->ops->intr_disable(dev);
+}
+
+static inline void mei_synchronize_irq(struct mei_device *dev)
+{
+	dev->ops->synchronize_irq(dev);
+}
+
+static inline bool mei_host_is_ready(struct mei_device *dev)
+{
+	return dev->ops->host_is_ready(dev);
+}
+static inline bool mei_hw_is_ready(struct mei_device *dev)
+{
+	return dev->ops->hw_is_ready(dev);
+}
+
+static inline bool mei_hbuf_is_ready(struct mei_device *dev)
+{
+	return dev->ops->hbuf_is_ready(dev);
+}
+
+static inline int mei_hbuf_empty_slots(struct mei_device *dev)
+{
+	return dev->ops->hbuf_free_slots(dev);
+}
+
+static inline u32 mei_hbuf_depth(const struct mei_device *dev)
+{
+	return dev->ops->hbuf_depth(dev);
+}
+
+static inline int mei_write_message(struct mei_device *dev,
+				    const void *hdr, size_t hdr_len,
+				    const void *data, size_t data_len)
+{
+	return dev->ops->write(dev, hdr, hdr_len, data, data_len);
+}
+
+static inline u32 mei_read_hdr(const struct mei_device *dev)
+{
+	return dev->ops->read_hdr(dev);
+}
+
+static inline void mei_read_slots(struct mei_device *dev,
+		     unsigned char *buf, unsigned long len)
+{
+	dev->ops->read(dev, buf, len);
+}
+
+static inline int mei_count_full_read_slots(struct mei_device *dev)
+{
+	return dev->ops->rdbuf_full_slots(dev);
+}
+
+static inline int mei_trc_status(struct mei_device *dev, u32 *trc)
+{
+	if (dev->ops->trc_status)
+		return dev->ops->trc_status(dev, trc);
+	return -EOPNOTSUPP;
+}
+
+static inline int mei_fw_status(struct mei_device *dev,
+				struct mei_fw_status *fw_status)
+{
+	return dev->ops->fw_status(dev, fw_status);
+}
+
+bool mei_hbuf_acquire(struct mei_device *dev);
+
+bool mei_write_is_idle(struct mei_device *dev);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+void mei_dbgfs_register(struct mei_device *dev, const char *name);
+void mei_dbgfs_deregister(struct mei_device *dev);
+#else
+static inline void mei_dbgfs_register(struct mei_device *dev, const char *name) {}
+static inline void mei_dbgfs_deregister(struct mei_device *dev) {}
+#endif /* CONFIG_DEBUG_FS */
+
+int mei_register(struct mei_device *dev, struct device *parent);
+void mei_deregister(struct mei_device *dev);
+
+#define MEI_HDR_FMT "hdr:host=%02d me=%02d len=%d dma=%1d ext=%1d internal=%1d comp=%1d"
+#define MEI_HDR_PRM(hdr)                  \
+	(hdr)->host_addr, (hdr)->me_addr, \
+	(hdr)->length, (hdr)->dma_ring, (hdr)->extended, \
+	(hdr)->internal, (hdr)->msg_complete
+
+ssize_t mei_fw_status2str(struct mei_fw_status *fw_sts, char *buf, size_t len);
+/**
+ * mei_fw_status_str - fetch and convert fw status registers to printable string
+ *
+ * @dev: the device structure
+ * @buf: string buffer at minimal size MEI_FW_STATUS_STR_SZ
+ * @len: buffer len must be >= MEI_FW_STATUS_STR_SZ
+ *
+ * Return: number of bytes written or < 0 on failure
+ */
+static inline ssize_t mei_fw_status_str(struct mei_device *dev,
+					char *buf, size_t len)
+{
+	struct mei_fw_status fw_status;
+	int ret;
+
+	buf[0] = '\0';
+
+	ret = mei_fw_status(dev, &fw_status);
+	if (ret)
+		return ret;
+
+	ret = mei_fw_status2str(&fw_status, buf, MEI_FW_STATUS_STR_SZ);
+
+	return ret;
+}
+
+
+#endif
diff --git a/drivers/misc/mei/mkhi.h b/drivers/misc/mei/mkhi.h
new file mode 100644
index 0000000000..1473ea4896
--- /dev/null
+++ b/drivers/misc/mei/mkhi.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2003-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#ifndef _MEI_MKHI_H_
+#define _MEI_MKHI_H_
+
+#include <linux/types.h>
+
+#define MKHI_FEATURE_PTT 0x10
+
+#define MKHI_FWCAPS_GROUP_ID 0x3
+#define MKHI_FWCAPS_SET_OS_VER_APP_RULE_CMD 6
+#define MKHI_GEN_GROUP_ID 0xFF
+#define MKHI_GEN_GET_FW_VERSION_CMD 0x2
+
+#define MKHI_GROUP_ID_GFX              0x30
+#define MKHI_GFX_RESET_WARN_CMD_REQ    0x0
+#define MKHI_GFX_MEMORY_READY_CMD_REQ  0x1
+
+/* Allow transition to PXP mode without approval */
+#define MKHI_GFX_MEM_READY_PXP_ALLOWED  0x1
+
+struct mkhi_rule_id {
+	__le16 rule_type;
+	u8 feature_id;
+	u8 reserved;
+} __packed;
+
+struct mkhi_fwcaps {
+	struct mkhi_rule_id id;
+	u8 len;
+	u8 data[];
+} __packed;
+
+struct mkhi_msg_hdr {
+	u8  group_id;
+	u8  command;
+	u8  reserved;
+	u8  result;
+} __packed;
+
+struct mkhi_msg {
+	struct mkhi_msg_hdr hdr;
+	u8 data[];
+} __packed;
+
+struct mkhi_gfx_mem_ready {
+	struct mkhi_msg_hdr hdr;
+	u32    flags;
+} __packed;
+
+#endif /* _MEI_MKHI_H_ */
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
new file mode 100644
index 0000000000..676d566f38
--- /dev/null
+++ b/drivers/misc/mei/pci-me.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2003-2022, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+#include "mei_dev.h"
+#include "client.h"
+#include "hw-me-regs.h"
+#include "hw-me.h"
+
+/* mei_pci_tbl - PCI Device ID Table */
+static const struct pci_device_id mei_me_pci_tbl[] = {
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82946GZ, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82G35, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82Q965, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82G965, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82GM965, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_82GME965, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q35, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82G33, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q33, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82X38, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_3200, MEI_ME_ICH_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_6, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_7, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_8, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_9, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_10, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_1, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_2, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_3, MEI_ME_ICH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_4, MEI_ME_ICH_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_1, MEI_ME_ICH10_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_2, MEI_ME_ICH10_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_3, MEI_ME_ICH10_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_4, MEI_ME_ICH10_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH6_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH6_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CPT_1, MEI_ME_PCH_CPT_PBG_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_PBG_1, MEI_ME_PCH_CPT_PBG_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH7_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH7_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH7_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_4_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_4_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_HR, MEI_ME_PCH8_SPS_4_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP_2, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_3, MEI_ME_PCH8_ITOUCH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_4_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_4_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_SPS_4_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_DNV_IE, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_GLK, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP_3, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_3, MEI_ME_PCH8_ITOUCH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_SPS_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_3, MEI_ME_PCH12_SPS_ITOUCH_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_ITOUCH_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_V, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_ITOUCH_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_N, MEI_ME_PCH12_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_TGP_H, MEI_ME_PCH15_SPS_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_JSP_N, MEI_ME_PCH15_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH15_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CDF, MEI_ME_PCH8_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_EBG, MEI_ME_PCH15_SPS_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)},
+
+	/* required last entry */
+	{0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mei_me_pci_tbl);
+
+#ifdef CONFIG_PM
+static inline void mei_me_set_pm_domain(struct mei_device *dev);
+static inline void mei_me_unset_pm_domain(struct mei_device *dev);
+#else
+static inline void mei_me_set_pm_domain(struct mei_device *dev) {}
+static inline void mei_me_unset_pm_domain(struct mei_device *dev) {}
+#endif /* CONFIG_PM */
+
+static int mei_me_read_fws(const struct mei_device *dev, int where, u32 *val)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	return pci_read_config_dword(pdev, where, val);
+}
+
+/**
+ * mei_me_quirk_probe - probe for devices that doesn't valid ME interface
+ *
+ * @pdev: PCI device structure
+ * @cfg: per generation config
+ *
+ * Return: true if ME Interface is valid, false otherwise
+ */
+static bool mei_me_quirk_probe(struct pci_dev *pdev,
+				const struct mei_cfg *cfg)
+{
+	if (cfg->quirk_probe && cfg->quirk_probe(pdev)) {
+		dev_info(&pdev->dev, "Device doesn't have valid ME Interface\n");
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * mei_me_probe - Device Initialization Routine
+ *
+ * @pdev: PCI device structure
+ * @ent: entry in kcs_pci_tbl
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	const struct mei_cfg *cfg;
+	struct mei_device *dev;
+	struct mei_me_hw *hw;
+	unsigned int irqflags;
+	int err;
+
+	cfg = mei_me_get_cfg(ent->driver_data);
+	if (!cfg)
+		return -ENODEV;
+
+	if (!mei_me_quirk_probe(pdev, cfg))
+		return -ENODEV;
+
+	/* enable pci dev */
+	err = pcim_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable pci device.\n");
+		goto end;
+	}
+	/* set PCI host mastering  */
+	pci_set_master(pdev);
+	/* pci request regions and mapping IO device memory for mei driver */
+	err = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
+	if (err) {
+		dev_err(&pdev->dev, "failed to get pci regions.\n");
+		goto end;
+	}
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
+		goto end;
+	}
+
+	/* allocates and initializes the mei dev structure */
+	dev = mei_me_dev_init(&pdev->dev, cfg, false);
+	if (!dev) {
+		err = -ENOMEM;
+		goto end;
+	}
+	hw = to_me_hw(dev);
+	hw->mem_addr = pcim_iomap_table(pdev)[0];
+	hw->read_fws = mei_me_read_fws;
+
+	pci_enable_msi(pdev);
+
+	hw->irq = pdev->irq;
+
+	 /* request and enable interrupt */
+	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED;
+
+	err = request_threaded_irq(pdev->irq,
+			mei_me_irq_quick_handler,
+			mei_me_irq_thread_handler,
+			irqflags, KBUILD_MODNAME, dev);
+	if (err) {
+		dev_err(&pdev->dev, "request_threaded_irq failure. irq = %d\n",
+		       pdev->irq);
+		goto end;
+	}
+
+	if (mei_start(dev)) {
+		dev_err(&pdev->dev, "init hw failure.\n");
+		err = -ENODEV;
+		goto release_irq;
+	}
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_ME_RPM_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+
+	err = mei_register(dev, &pdev->dev);
+	if (err)
+		goto stop;
+
+	pci_set_drvdata(pdev, dev);
+
+	/*
+	 * MEI requires to resume from runtime suspend mode
+	 * in order to perform link reset flow upon system suspend.
+	 */
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+
+	/*
+	 * ME maps runtime suspend/resume to D0i states,
+	 * hence we need to go around native PCI runtime service which
+	 * eventually brings the device into D3cold/hot state,
+	 * but the mei device cannot wake up from D3 unlike from D0i3.
+	 * To get around the PCI device native runtime pm,
+	 * ME uses runtime pm domain handlers which take precedence
+	 * over the driver's pm handlers.
+	 */
+	mei_me_set_pm_domain(dev);
+
+	if (mei_pg_is_enabled(dev)) {
+		pm_runtime_put_noidle(&pdev->dev);
+		if (hw->d0i3_supported)
+			pm_runtime_allow(&pdev->dev);
+	}
+
+	dev_dbg(&pdev->dev, "initialization successful.\n");
+
+	return 0;
+
+stop:
+	mei_stop(dev);
+release_irq:
+	mei_cancel_work(dev);
+	mei_disable_interrupts(dev);
+	free_irq(pdev->irq, dev);
+end:
+	dev_err(&pdev->dev, "initialization failed.\n");
+	return err;
+}
+
+/**
+ * mei_me_shutdown - Device Removal Routine
+ *
+ * @pdev: PCI device structure
+ *
+ * mei_me_shutdown is called from the reboot notifier
+ * it's a simplified version of remove so we go down
+ * faster.
+ */
+static void mei_me_shutdown(struct pci_dev *pdev)
+{
+	struct mei_device *dev;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return;
+
+	dev_dbg(&pdev->dev, "shutdown\n");
+	mei_stop(dev);
+
+	mei_me_unset_pm_domain(dev);
+
+	mei_disable_interrupts(dev);
+	free_irq(pdev->irq, dev);
+}
+
+/**
+ * mei_me_remove - Device Removal Routine
+ *
+ * @pdev: PCI device structure
+ *
+ * mei_me_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+static void mei_me_remove(struct pci_dev *pdev)
+{
+	struct mei_device *dev;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return;
+
+	if (mei_pg_is_enabled(dev))
+		pm_runtime_get_noresume(&pdev->dev);
+
+	dev_dbg(&pdev->dev, "stop\n");
+	mei_stop(dev);
+
+	mei_me_unset_pm_domain(dev);
+
+	mei_disable_interrupts(dev);
+
+	free_irq(pdev->irq, dev);
+
+	mei_deregister(dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int mei_me_pci_prepare(struct device *device)
+{
+	pm_runtime_resume(device);
+	return 0;
+}
+
+static int mei_me_pci_suspend(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev = pci_get_drvdata(pdev);
+
+	if (!dev)
+		return -ENODEV;
+
+	dev_dbg(&pdev->dev, "suspend\n");
+
+	mei_stop(dev);
+
+	mei_disable_interrupts(dev);
+
+	free_irq(pdev->irq, dev);
+	pci_disable_msi(pdev);
+
+	return 0;
+}
+
+static int mei_me_pci_resume(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev;
+	unsigned int irqflags;
+	int err;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	pci_enable_msi(pdev);
+
+	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED;
+
+	/* request and enable interrupt */
+	err = request_threaded_irq(pdev->irq,
+			mei_me_irq_quick_handler,
+			mei_me_irq_thread_handler,
+			irqflags, KBUILD_MODNAME, dev);
+
+	if (err) {
+		dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n",
+				pdev->irq);
+		return err;
+	}
+
+	err = mei_restart(dev);
+	if (err)
+		return err;
+
+	/* Start timer if stopped in suspend */
+	schedule_delayed_work(&dev->timer_work, HZ);
+
+	return 0;
+}
+
+static void mei_me_pci_complete(struct device *device)
+{
+	pm_runtime_suspend(device);
+}
+#else /* CONFIG_PM_SLEEP */
+
+#define mei_me_pci_prepare NULL
+#define mei_me_pci_complete NULL
+
+#endif /* !CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int mei_me_pm_runtime_idle(struct device *device)
+{
+	struct mei_device *dev;
+
+	dev_dbg(device, "rpm: me: runtime_idle\n");
+
+	dev = dev_get_drvdata(device);
+	if (!dev)
+		return -ENODEV;
+	if (mei_write_is_idle(dev))
+		pm_runtime_autosuspend(device);
+
+	return -EBUSY;
+}
+
+static int mei_me_pm_runtime_suspend(struct device *device)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev_dbg(device, "rpm: me: runtime suspend\n");
+
+	dev = dev_get_drvdata(device);
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	if (mei_write_is_idle(dev))
+		ret = mei_me_pg_enter_sync(dev);
+	else
+		ret = -EAGAIN;
+
+	mutex_unlock(&dev->device_lock);
+
+	dev_dbg(device, "rpm: me: runtime suspend ret=%d\n", ret);
+
+	if (ret && ret != -EAGAIN)
+		schedule_work(&dev->reset_work);
+
+	return ret;
+}
+
+static int mei_me_pm_runtime_resume(struct device *device)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev_dbg(device, "rpm: me: runtime resume\n");
+
+	dev = dev_get_drvdata(device);
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	ret = mei_me_pg_exit_sync(dev);
+
+	mutex_unlock(&dev->device_lock);
+
+	dev_dbg(device, "rpm: me: runtime resume ret = %d\n", ret);
+
+	if (ret)
+		schedule_work(&dev->reset_work);
+
+	return ret;
+}
+
+/**
+ * mei_me_set_pm_domain - fill and set pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_me_set_pm_domain(struct mei_device *dev)
+{
+	struct pci_dev *pdev  = to_pci_dev(dev->dev);
+
+	if (pdev->dev.bus && pdev->dev.bus->pm) {
+		dev->pg_domain.ops = *pdev->dev.bus->pm;
+
+		dev->pg_domain.ops.runtime_suspend = mei_me_pm_runtime_suspend;
+		dev->pg_domain.ops.runtime_resume = mei_me_pm_runtime_resume;
+		dev->pg_domain.ops.runtime_idle = mei_me_pm_runtime_idle;
+
+		dev_pm_domain_set(&pdev->dev, &dev->pg_domain);
+	}
+}
+
+/**
+ * mei_me_unset_pm_domain - clean pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_me_unset_pm_domain(struct mei_device *dev)
+{
+	/* stop using pm callbacks if any */
+	dev_pm_domain_set(dev->dev, NULL);
+}
+
+static const struct dev_pm_ops mei_me_pm_ops = {
+	.prepare = mei_me_pci_prepare,
+	.complete = mei_me_pci_complete,
+	SET_SYSTEM_SLEEP_PM_OPS(mei_me_pci_suspend,
+				mei_me_pci_resume)
+	SET_RUNTIME_PM_OPS(
+		mei_me_pm_runtime_suspend,
+		mei_me_pm_runtime_resume,
+		mei_me_pm_runtime_idle)
+};
+
+#define MEI_ME_PM_OPS	(&mei_me_pm_ops)
+#else
+#define MEI_ME_PM_OPS	NULL
+#endif /* CONFIG_PM */
+/*
+ *  PCI driver structure
+ */
+static struct pci_driver mei_me_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = mei_me_pci_tbl,
+	.probe = mei_me_probe,
+	.remove = mei_me_remove,
+	.shutdown = mei_me_shutdown,
+	.driver.pm = MEI_ME_PM_OPS,
+	.driver.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+};
+
+module_pci_driver(mei_me_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Management Engine Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c
new file mode 100644
index 0000000000..fa20d9a278
--- /dev/null
+++ b/drivers/misc/mei/pci-txe.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2013-2020, Intel Corporation. All rights reserved.
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mei.h>
+
+
+#include "mei_dev.h"
+#include "hw-txe.h"
+
+static const struct pci_device_id mei_txe_pci_tbl[] = {
+	{PCI_VDEVICE(INTEL, 0x0F18)}, /* Baytrail */
+	{PCI_VDEVICE(INTEL, 0x2298)}, /* Cherrytrail */
+
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, mei_txe_pci_tbl);
+
+#ifdef CONFIG_PM
+static inline void mei_txe_set_pm_domain(struct mei_device *dev);
+static inline void mei_txe_unset_pm_domain(struct mei_device *dev);
+#else
+static inline void mei_txe_set_pm_domain(struct mei_device *dev) {}
+static inline void mei_txe_unset_pm_domain(struct mei_device *dev) {}
+#endif /* CONFIG_PM */
+
+/**
+ * mei_txe_probe - Device Initialization Routine
+ *
+ * @pdev: PCI device structure
+ * @ent: entry in mei_txe_pci_tbl
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct mei_device *dev;
+	struct mei_txe_hw *hw;
+	const int mask = BIT(SEC_BAR) | BIT(BRIDGE_BAR);
+	int err;
+
+	/* enable pci dev */
+	err = pcim_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable pci device.\n");
+		goto end;
+	}
+	/* set PCI host mastering  */
+	pci_set_master(pdev);
+	/* pci request regions and mapping IO device memory for mei driver */
+	err = pcim_iomap_regions(pdev, mask, KBUILD_MODNAME);
+	if (err) {
+		dev_err(&pdev->dev, "failed to get pci regions.\n");
+		goto end;
+	}
+
+	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(36));
+	if (err) {
+		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev, "No suitable DMA available.\n");
+			goto end;
+		}
+	}
+
+	/* allocates and initializes the mei dev structure */
+	dev = mei_txe_dev_init(pdev);
+	if (!dev) {
+		err = -ENOMEM;
+		goto end;
+	}
+	hw = to_txe_hw(dev);
+	hw->mem_addr = pcim_iomap_table(pdev);
+
+	pci_enable_msi(pdev);
+
+	/* clear spurious interrupts */
+	mei_clear_interrupts(dev);
+
+	/* request and enable interrupt  */
+	if (pci_dev_msi_enabled(pdev))
+		err = request_threaded_irq(pdev->irq,
+			NULL,
+			mei_txe_irq_thread_handler,
+			IRQF_ONESHOT, KBUILD_MODNAME, dev);
+	else
+		err = request_threaded_irq(pdev->irq,
+			mei_txe_irq_quick_handler,
+			mei_txe_irq_thread_handler,
+			IRQF_SHARED, KBUILD_MODNAME, dev);
+	if (err) {
+		dev_err(&pdev->dev, "mei: request_threaded_irq failure. irq = %d\n",
+			pdev->irq);
+		goto end;
+	}
+
+	if (mei_start(dev)) {
+		dev_err(&pdev->dev, "init hw failure.\n");
+		err = -ENODEV;
+		goto release_irq;
+	}
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_TXI_RPM_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+
+	err = mei_register(dev, &pdev->dev);
+	if (err)
+		goto stop;
+
+	pci_set_drvdata(pdev, dev);
+
+	/*
+	 * MEI requires to resume from runtime suspend mode
+	 * in order to perform link reset flow upon system suspend.
+	 */
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+
+	/*
+	 * TXE maps runtime suspend/resume to own power gating states,
+	 * hence we need to go around native PCI runtime service which
+	 * eventually brings the device into D3cold/hot state.
+	 * But the TXE device cannot wake up from D3 unlike from own
+	 * power gating. To get around PCI device native runtime pm,
+	 * TXE uses runtime pm domain handlers which take precedence.
+	 */
+	mei_txe_set_pm_domain(dev);
+
+	pm_runtime_put_noidle(&pdev->dev);
+
+	return 0;
+
+stop:
+	mei_stop(dev);
+release_irq:
+	mei_cancel_work(dev);
+	mei_disable_interrupts(dev);
+	free_irq(pdev->irq, dev);
+end:
+	dev_err(&pdev->dev, "initialization failed.\n");
+	return err;
+}
+
+/**
+ * mei_txe_shutdown- Device Shutdown Routine
+ *
+ * @pdev: PCI device structure
+ *
+ *  mei_txe_shutdown is called from the reboot notifier
+ *  it's a simplified version of remove so we go down
+ *  faster.
+ */
+static void mei_txe_shutdown(struct pci_dev *pdev)
+{
+	struct mei_device *dev;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return;
+
+	dev_dbg(&pdev->dev, "shutdown\n");
+	mei_stop(dev);
+
+	mei_txe_unset_pm_domain(dev);
+
+	mei_disable_interrupts(dev);
+	free_irq(pdev->irq, dev);
+}
+
+/**
+ * mei_txe_remove - Device Removal Routine
+ *
+ * @pdev: PCI device structure
+ *
+ * mei_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+static void mei_txe_remove(struct pci_dev *pdev)
+{
+	struct mei_device *dev;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev) {
+		dev_err(&pdev->dev, "mei: dev == NULL\n");
+		return;
+	}
+
+	pm_runtime_get_noresume(&pdev->dev);
+
+	mei_stop(dev);
+
+	mei_txe_unset_pm_domain(dev);
+
+	mei_disable_interrupts(dev);
+	free_irq(pdev->irq, dev);
+
+	mei_deregister(dev);
+}
+
+
+#ifdef CONFIG_PM_SLEEP
+static int mei_txe_pci_suspend(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev = pci_get_drvdata(pdev);
+
+	if (!dev)
+		return -ENODEV;
+
+	dev_dbg(&pdev->dev, "suspend\n");
+
+	mei_stop(dev);
+
+	mei_disable_interrupts(dev);
+
+	free_irq(pdev->irq, dev);
+	pci_disable_msi(pdev);
+
+	return 0;
+}
+
+static int mei_txe_pci_resume(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct mei_device *dev;
+	int err;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	pci_enable_msi(pdev);
+
+	mei_clear_interrupts(dev);
+
+	/* request and enable interrupt */
+	if (pci_dev_msi_enabled(pdev))
+		err = request_threaded_irq(pdev->irq,
+			NULL,
+			mei_txe_irq_thread_handler,
+			IRQF_ONESHOT, KBUILD_MODNAME, dev);
+	else
+		err = request_threaded_irq(pdev->irq,
+			mei_txe_irq_quick_handler,
+			mei_txe_irq_thread_handler,
+			IRQF_SHARED, KBUILD_MODNAME, dev);
+	if (err) {
+		dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n",
+				pdev->irq);
+		return err;
+	}
+
+	err = mei_restart(dev);
+
+	return err;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int mei_txe_pm_runtime_idle(struct device *device)
+{
+	struct mei_device *dev;
+
+	dev_dbg(device, "rpm: txe: runtime_idle\n");
+
+	dev = dev_get_drvdata(device);
+	if (!dev)
+		return -ENODEV;
+	if (mei_write_is_idle(dev))
+		pm_runtime_autosuspend(device);
+
+	return -EBUSY;
+}
+static int mei_txe_pm_runtime_suspend(struct device *device)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev_dbg(device, "rpm: txe: runtime suspend\n");
+
+	dev = dev_get_drvdata(device);
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	if (mei_write_is_idle(dev))
+		ret = mei_txe_aliveness_set_sync(dev, 0);
+	else
+		ret = -EAGAIN;
+
+	/* keep irq on we are staying in D0 */
+
+	dev_dbg(device, "rpm: txe: runtime suspend ret=%d\n", ret);
+
+	mutex_unlock(&dev->device_lock);
+
+	if (ret && ret != -EAGAIN)
+		schedule_work(&dev->reset_work);
+
+	return ret;
+}
+
+static int mei_txe_pm_runtime_resume(struct device *device)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev_dbg(device, "rpm: txe: runtime resume\n");
+
+	dev = dev_get_drvdata(device);
+	if (!dev)
+		return -ENODEV;
+
+	mutex_lock(&dev->device_lock);
+
+	mei_enable_interrupts(dev);
+
+	ret = mei_txe_aliveness_set_sync(dev, 1);
+
+	mutex_unlock(&dev->device_lock);
+
+	dev_dbg(device, "rpm: txe: runtime resume ret = %d\n", ret);
+
+	if (ret)
+		schedule_work(&dev->reset_work);
+
+	return ret;
+}
+
+/**
+ * mei_txe_set_pm_domain - fill and set pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_txe_set_pm_domain(struct mei_device *dev)
+{
+	struct pci_dev *pdev  = to_pci_dev(dev->dev);
+
+	if (pdev->dev.bus && pdev->dev.bus->pm) {
+		dev->pg_domain.ops = *pdev->dev.bus->pm;
+
+		dev->pg_domain.ops.runtime_suspend = mei_txe_pm_runtime_suspend;
+		dev->pg_domain.ops.runtime_resume = mei_txe_pm_runtime_resume;
+		dev->pg_domain.ops.runtime_idle = mei_txe_pm_runtime_idle;
+
+		dev_pm_domain_set(&pdev->dev, &dev->pg_domain);
+	}
+}
+
+/**
+ * mei_txe_unset_pm_domain - clean pm domain structure for device
+ *
+ * @dev: mei_device
+ */
+static inline void mei_txe_unset_pm_domain(struct mei_device *dev)
+{
+	/* stop using pm callbacks if any */
+	dev_pm_domain_set(dev->dev, NULL);
+}
+
+static const struct dev_pm_ops mei_txe_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mei_txe_pci_suspend,
+				mei_txe_pci_resume)
+	SET_RUNTIME_PM_OPS(
+		mei_txe_pm_runtime_suspend,
+		mei_txe_pm_runtime_resume,
+		mei_txe_pm_runtime_idle)
+};
+
+#define MEI_TXE_PM_OPS	(&mei_txe_pm_ops)
+#else
+#define MEI_TXE_PM_OPS	NULL
+#endif /* CONFIG_PM */
+
+/*
+ *  PCI driver structure
+ */
+static struct pci_driver mei_txe_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = mei_txe_pci_tbl,
+	.probe = mei_txe_probe,
+	.remove = mei_txe_remove,
+	.shutdown = mei_txe_shutdown,
+	.driver.pm = MEI_TXE_PM_OPS,
+};
+
+module_pci_driver(mei_txe_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Trusted Execution Environment Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mei/pxp/Kconfig b/drivers/misc/mei/pxp/Kconfig
new file mode 100644
index 0000000000..4029b96afc
--- /dev/null
+++ b/drivers/misc/mei/pxp/Kconfig
@@ -0,0 +1,13 @@
+
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2020, Intel Corporation. All rights reserved.
+#
+config INTEL_MEI_PXP
+	tristate "Intel PXP services of ME Interface"
+	select INTEL_MEI_ME
+	depends on DRM_I915
+	help
+	  MEI Support for PXP Services on Intel platforms.
+
+	  Enables the ME FW services required for PXP support through
+	  I915 display driver of Intel.
diff --git a/drivers/misc/mei/pxp/Makefile b/drivers/misc/mei/pxp/Makefile
new file mode 100644
index 0000000000..0329950d57
--- /dev/null
+++ b/drivers/misc/mei/pxp/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2020, Intel Corporation. All rights reserved.
+#
+# Makefile - PXP client driver for Intel MEI Bus Driver.
+
+obj-$(CONFIG_INTEL_MEI_PXP) += mei_pxp.o
diff --git a/drivers/misc/mei/pxp/mei_pxp.c b/drivers/misc/mei/pxp/mei_pxp.c
new file mode 100644
index 0000000000..2dcb9169e4
--- /dev/null
+++ b/drivers/misc/mei/pxp/mei_pxp.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2020 - 2021 Intel Corporation
+ */
+
+/**
+ * DOC: MEI_PXP Client Driver
+ *
+ * The mei_pxp driver acts as a translation layer between PXP
+ * protocol  implementer (I915) and ME FW by translating PXP
+ * negotiation messages to ME FW command payloads and vice versa.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mei.h>
+#include <linux/mei_cl_bus.h>
+#include <linux/component.h>
+#include <drm/drm_connector.h>
+#include <drm/i915_component.h>
+#include <drm/i915_pxp_tee_interface.h>
+
+#include "mei_pxp.h"
+
+/**
+ * mei_pxp_send_message() - Sends a PXP message to ME FW.
+ * @dev: device corresponding to the mei_cl_device
+ * @message: a message buffer to send
+ * @size: size of the message
+ * Return: 0 on Success, <0 on Failure
+ */
+static int
+mei_pxp_send_message(struct device *dev, const void *message, size_t size)
+{
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !message)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	byte = mei_cldev_send(cldev, message, size);
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_send failed. %zd\n", byte);
+		return byte;
+	}
+
+	return 0;
+}
+
+/**
+ * mei_pxp_receive_message() - Receives a PXP message from ME FW.
+ * @dev: device corresponding to the mei_cl_device
+ * @buffer: a message buffer to contain the received message
+ * @size: size of the buffer
+ * Return: bytes sent on Success, <0 on Failure
+ */
+static int
+mei_pxp_receive_message(struct device *dev, void *buffer, size_t size)
+{
+	struct mei_cl_device *cldev;
+	ssize_t byte;
+
+	if (!dev || !buffer)
+		return -EINVAL;
+
+	cldev = to_mei_cl_device(dev);
+
+	byte = mei_cldev_recv(cldev, buffer, size);
+	if (byte < 0) {
+		dev_dbg(dev, "mei_cldev_recv failed. %zd\n", byte);
+		return byte;
+	}
+
+	return byte;
+}
+
+/**
+ * mei_pxp_gsc_command() - sends a gsc command, by sending
+ * a sgl mei message to gsc and receiving reply from gsc
+ *
+ * @dev: device corresponding to the mei_cl_device
+ * @client_id: client id to send the command to
+ * @fence_id: fence id to send the command to
+ * @sg_in: scatter gather list containing addresses for rx message buffer
+ * @total_in_len: total length of data in 'in' sg, can be less than the sum of buffers sizes
+ * @sg_out: scatter gather list containing addresses for tx message buffer
+ *
+ * Return: bytes sent on Success, <0 on Failure
+ */
+static ssize_t mei_pxp_gsc_command(struct device *dev, u8 client_id, u32 fence_id,
+				   struct scatterlist *sg_in, size_t total_in_len,
+				   struct scatterlist *sg_out)
+{
+	struct mei_cl_device *cldev;
+
+	cldev = to_mei_cl_device(dev);
+
+	return mei_cldev_send_gsc_command(cldev, client_id, fence_id, sg_in, total_in_len, sg_out);
+}
+
+static const struct i915_pxp_component_ops mei_pxp_ops = {
+	.owner = THIS_MODULE,
+	.send = mei_pxp_send_message,
+	.recv = mei_pxp_receive_message,
+	.gsc_command = mei_pxp_gsc_command,
+};
+
+static int mei_component_master_bind(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct i915_pxp_component *comp_master = mei_cldev_get_drvdata(cldev);
+	int ret;
+
+	comp_master->ops = &mei_pxp_ops;
+	comp_master->tee_dev = dev;
+	ret = component_bind_all(dev, comp_master);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void mei_component_master_unbind(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct i915_pxp_component *comp_master = mei_cldev_get_drvdata(cldev);
+
+	component_unbind_all(dev, comp_master);
+}
+
+static const struct component_master_ops mei_component_master_ops = {
+	.bind = mei_component_master_bind,
+	.unbind = mei_component_master_unbind,
+};
+
+/**
+ * mei_pxp_component_match - compare function for matching mei pxp.
+ *
+ *    The function checks if the driver is i915, the subcomponent is PXP
+ *    and the grand parent of pxp and the parent of i915 are the same
+ *    PCH device.
+ *
+ * @dev: master device
+ * @subcomponent: subcomponent to match (I915_COMPONENT_PXP)
+ * @data: compare data (mei pxp device)
+ *
+ * Return:
+ * * 1 - if components match
+ * * 0 - otherwise
+ */
+static int mei_pxp_component_match(struct device *dev, int subcomponent,
+				   void *data)
+{
+	struct device *base = data;
+
+	if (!dev)
+		return 0;
+
+	if (!dev->driver || strcmp(dev->driver->name, "i915") ||
+	    subcomponent != I915_COMPONENT_PXP)
+		return 0;
+
+	base = base->parent;
+	if (!base) /* mei device */
+		return 0;
+
+	base = base->parent; /* pci device */
+	/* for dgfx */
+	if (base && dev == base)
+		return 1;
+
+	/* for pch */
+	dev = dev->parent;
+	return (base && dev && dev == base);
+}
+
+static int mei_pxp_probe(struct mei_cl_device *cldev,
+			 const struct mei_cl_device_id *id)
+{
+	struct i915_pxp_component *comp_master;
+	struct component_match *master_match;
+	int ret;
+
+	ret = mei_cldev_enable(cldev);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "mei_cldev_enable Failed. %d\n", ret);
+		goto enable_err_exit;
+	}
+
+	comp_master = kzalloc(sizeof(*comp_master), GFP_KERNEL);
+	if (!comp_master) {
+		ret = -ENOMEM;
+		goto err_exit;
+	}
+
+	master_match = NULL;
+	component_match_add_typed(&cldev->dev, &master_match,
+				  mei_pxp_component_match, &cldev->dev);
+	if (IS_ERR_OR_NULL(master_match)) {
+		ret = -ENOMEM;
+		goto err_exit;
+	}
+
+	mei_cldev_set_drvdata(cldev, comp_master);
+	ret = component_master_add_with_match(&cldev->dev,
+					      &mei_component_master_ops,
+					      master_match);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "Master comp add failed %d\n", ret);
+		goto err_exit;
+	}
+
+	return 0;
+
+err_exit:
+	mei_cldev_set_drvdata(cldev, NULL);
+	kfree(comp_master);
+	mei_cldev_disable(cldev);
+enable_err_exit:
+	return ret;
+}
+
+static void mei_pxp_remove(struct mei_cl_device *cldev)
+{
+	struct i915_pxp_component *comp_master = mei_cldev_get_drvdata(cldev);
+	int ret;
+
+	component_master_del(&cldev->dev, &mei_component_master_ops);
+	kfree(comp_master);
+	mei_cldev_set_drvdata(cldev, NULL);
+
+	ret = mei_cldev_disable(cldev);
+	if (ret)
+		dev_warn(&cldev->dev, "mei_cldev_disable() failed\n");
+}
+
+/* fbf6fcf1-96cf-4e2e-a6a6-1bab8cbe36b1 : PAVP GUID*/
+#define MEI_GUID_PXP UUID_LE(0xfbf6fcf1, 0x96cf, 0x4e2e, 0xA6, \
+			     0xa6, 0x1b, 0xab, 0x8c, 0xbe, 0x36, 0xb1)
+
+static struct mei_cl_device_id mei_pxp_tbl[] = {
+	{ .uuid = MEI_GUID_PXP, .version = MEI_CL_VERSION_ANY },
+	{ }
+};
+MODULE_DEVICE_TABLE(mei, mei_pxp_tbl);
+
+static struct mei_cl_driver mei_pxp_driver = {
+	.id_table = mei_pxp_tbl,
+	.name = KBUILD_MODNAME,
+	.probe = mei_pxp_probe,
+	.remove	= mei_pxp_remove,
+};
+
+module_mei_cl_driver(mei_pxp_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MEI PXP");
diff --git a/drivers/misc/mei/pxp/mei_pxp.h b/drivers/misc/mei/pxp/mei_pxp.h
new file mode 100644
index 0000000000..e7b15373fe
--- /dev/null
+++ b/drivers/misc/mei/pxp/mei_pxp.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Authors:
+ * Vitaly Lubart <vitaly.lubart@intel.com>
+ */
+
+#ifndef __MEI_PXP_H__
+#define __MEI_PXP_H__
+
+/* me_pxp_status: Enumeration of all PXP Status Codes */
+enum me_pxp_status {
+	ME_PXP_STATUS_SUCCESS			= 0x0000,
+
+};
+
+#endif /* __MEI_PXP_H__ */
diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig
new file mode 100644
index 0000000000..c9b0a27caf
--- /dev/null
+++ b/drivers/misc/ocxl/Kconfig
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Open Coherent Accelerator (OCXL) compatible devices
+#
+
+config OCXL_BASE
+	bool
+	select PPC_COPRO_BASE
+
+config OCXL
+	tristate "OpenCAPI coherent accelerator support"
+	depends on HOTPLUG_PCI_POWERNV
+	select OCXL_BASE
+	default m
+	help
+	  Select this option to enable the ocxl driver for Open
+	  Coherent Accelerator Processor Interface (OpenCAPI) devices.
+
+	  OpenCAPI allows FPGA and ASIC accelerators to be coherently
+	  attached to a CPU over an OpenCAPI link.
+
+	  The ocxl driver enables userspace programs to access these
+	  accelerators through devices in /dev/ocxl/.
+
+	  For more information, see https://opencapi.org.
+
+	  This is not to be confused with the support for IBM CAPI
+	  accelerators (CONFIG_CXL), which are PCI-based instead of a
+	  dedicated OpenCAPI link, and don't follow the same protocol.
+
+	  If unsure, say N.
diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile
new file mode 100644
index 0000000000..d07d1bb8e8
--- /dev/null
+++ b/drivers/misc/ocxl/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0+
+ccflags-$(CONFIG_PPC_WERROR)	+= -Werror
+
+ocxl-y				+= main.o pci.o config.o file.o pasid.o mmio.o
+ocxl-y				+= link.o context.o afu_irq.o sysfs.o trace.o
+ocxl-y				+= core.o
+obj-$(CONFIG_OCXL)		+= ocxl.o
+
+# For tracepoints to include our trace.h from tracepoint infrastructure:
+CFLAGS_trace.o := -I$(src)
+
+# ccflags-y += -DDEBUG
diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
new file mode 100644
index 0000000000..a06920b7e0
--- /dev/null
+++ b/drivers/misc/ocxl/afu_irq.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <asm/pnv-ocxl.h>
+#include <asm/xive.h>
+#include "ocxl_internal.h"
+#include "trace.h"
+
+struct afu_irq {
+	int id;
+	int hw_irq;
+	unsigned int virq;
+	char *name;
+	irqreturn_t (*handler)(void *private);
+	void (*free_private)(void *private);
+	void *private;
+};
+
+int ocxl_irq_offset_to_id(struct ocxl_context *ctx, u64 offset)
+{
+	return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT;
+}
+
+u64 ocxl_irq_id_to_offset(struct ocxl_context *ctx, int irq_id)
+{
+	return ctx->afu->irq_base_offset + (irq_id << PAGE_SHIFT);
+}
+
+int ocxl_irq_set_handler(struct ocxl_context *ctx, int irq_id,
+		irqreturn_t (*handler)(void *private),
+		void (*free_private)(void *private),
+		void *private)
+{
+	struct afu_irq *irq;
+	int rc;
+
+	mutex_lock(&ctx->irq_lock);
+	irq = idr_find(&ctx->irq_idr, irq_id);
+	if (!irq) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	irq->handler = handler;
+	irq->private = private;
+	irq->free_private = free_private;
+
+	rc = 0;
+	// Fall through to unlock
+
+unlock:
+	mutex_unlock(&ctx->irq_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_irq_set_handler);
+
+static irqreturn_t afu_irq_handler(int virq, void *data)
+{
+	struct afu_irq *irq = (struct afu_irq *) data;
+
+	trace_ocxl_afu_irq_receive(virq);
+
+	if (irq->handler)
+		return irq->handler(irq->private);
+
+	return IRQ_HANDLED; // Just drop it on the ground
+}
+
+static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq)
+{
+	int rc;
+
+	irq->virq = irq_create_mapping(NULL, irq->hw_irq);
+	if (!irq->virq) {
+		pr_err("irq_create_mapping failed\n");
+		return -ENOMEM;
+	}
+	pr_debug("hw_irq %d mapped to virq %u\n", irq->hw_irq, irq->virq);
+
+	irq->name = kasprintf(GFP_KERNEL, "ocxl-afu-%u", irq->virq);
+	if (!irq->name) {
+		irq_dispose_mapping(irq->virq);
+		return -ENOMEM;
+	}
+
+	rc = request_irq(irq->virq, afu_irq_handler, 0, irq->name, irq);
+	if (rc) {
+		kfree(irq->name);
+		irq->name = NULL;
+		irq_dispose_mapping(irq->virq);
+		pr_err("request_irq failed: %d\n", rc);
+		return rc;
+	}
+	return 0;
+}
+
+static void release_afu_irq(struct afu_irq *irq)
+{
+	free_irq(irq->virq, irq);
+	irq_dispose_mapping(irq->virq);
+	kfree(irq->name);
+}
+
+int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id)
+{
+	struct afu_irq *irq;
+	int rc;
+
+	irq = kzalloc(sizeof(struct afu_irq), GFP_KERNEL);
+	if (!irq)
+		return -ENOMEM;
+
+	/*
+	 * We limit the number of afu irqs per context and per link to
+	 * avoid a single process or user depleting the pool of IPIs
+	 */
+
+	mutex_lock(&ctx->irq_lock);
+
+	irq->id = idr_alloc(&ctx->irq_idr, irq, 0, MAX_IRQ_PER_CONTEXT,
+			GFP_KERNEL);
+	if (irq->id < 0) {
+		rc = -ENOSPC;
+		goto err_unlock;
+	}
+
+	rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq);
+	if (rc)
+		goto err_idr;
+
+	rc = setup_afu_irq(ctx, irq);
+	if (rc)
+		goto err_alloc;
+
+	trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq);
+	mutex_unlock(&ctx->irq_lock);
+
+	*irq_id = irq->id;
+
+	return 0;
+
+err_alloc:
+	ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
+err_idr:
+	idr_remove(&ctx->irq_idr, irq->id);
+err_unlock:
+	mutex_unlock(&ctx->irq_lock);
+	kfree(irq);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_irq_alloc);
+
+static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx)
+{
+	trace_ocxl_afu_irq_free(ctx->pasid, irq->id);
+	if (ctx->mapping)
+		unmap_mapping_range(ctx->mapping,
+				ocxl_irq_id_to_offset(ctx, irq->id),
+				1 << PAGE_SHIFT, 1);
+	release_afu_irq(irq);
+	if (irq->free_private)
+		irq->free_private(irq->private);
+	ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
+	kfree(irq);
+}
+
+int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id)
+{
+	struct afu_irq *irq;
+
+	mutex_lock(&ctx->irq_lock);
+
+	irq = idr_find(&ctx->irq_idr, irq_id);
+	if (!irq) {
+		mutex_unlock(&ctx->irq_lock);
+		return -EINVAL;
+	}
+	idr_remove(&ctx->irq_idr, irq->id);
+	afu_irq_free(irq, ctx);
+	mutex_unlock(&ctx->irq_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_irq_free);
+
+void ocxl_afu_irq_free_all(struct ocxl_context *ctx)
+{
+	struct afu_irq *irq;
+	int id;
+
+	mutex_lock(&ctx->irq_lock);
+	idr_for_each_entry(&ctx->irq_idr, irq, id)
+		afu_irq_free(irq, ctx);
+	mutex_unlock(&ctx->irq_lock);
+}
+
+u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id)
+{
+	struct xive_irq_data *xd;
+	struct afu_irq *irq;
+	u64 addr = 0;
+
+	mutex_lock(&ctx->irq_lock);
+	irq = idr_find(&ctx->irq_idr, irq_id);
+	if (irq) {
+		xd = irq_get_handler_data(irq->virq);
+		addr = xd ? xd->trig_page : 0;
+	}
+	mutex_unlock(&ctx->irq_lock);
+	return addr;
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_irq_get_addr);
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
new file mode 100644
index 0000000000..92ab49705f
--- /dev/null
+++ b/drivers/misc/ocxl/config.c
@@ -0,0 +1,937 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/pci.h>
+#include <asm/pnv-ocxl.h>
+#include <misc/ocxl-config.h>
+#include "ocxl_internal.h"
+
+#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit)))
+#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s)
+
+#define OCXL_DVSEC_AFU_IDX_MASK              GENMASK(5, 0)
+#define OCXL_DVSEC_ACTAG_MASK                GENMASK(11, 0)
+#define OCXL_DVSEC_PASID_MASK                GENMASK(19, 0)
+#define OCXL_DVSEC_PASID_LOG_MASK            GENMASK(4, 0)
+
+#define OCXL_DVSEC_TEMPL_VERSION         0x0
+#define OCXL_DVSEC_TEMPL_NAME            0x4
+#define OCXL_DVSEC_TEMPL_AFU_VERSION     0x1C
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL     0x20
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ  0x28
+#define OCXL_DVSEC_TEMPL_MMIO_PP         0x30
+#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ      0x38
+#define OCXL_DVSEC_TEMPL_ALL_MEM_SZ      0x3C
+#define OCXL_DVSEC_TEMPL_LPC_MEM_START   0x40
+#define OCXL_DVSEC_TEMPL_WWID            0x48
+#define OCXL_DVSEC_TEMPL_LPC_MEM_SZ      0x58
+
+#define OCXL_MAX_AFU_PER_FUNCTION 64
+#define OCXL_TEMPL_LEN_1_0        0x58
+#define OCXL_TEMPL_LEN_1_1        0x60
+#define OCXL_TEMPL_NAME_LEN       24
+#define OCXL_CFG_TIMEOUT     3
+
+static int find_dvsec(struct pci_dev *dev, int dvsec_id)
+{
+	return pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM, dvsec_id);
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+	int vsec = 0;
+	u16 vendor, id;
+	u8 idx;
+
+	while ((vsec = pci_find_next_ext_capability(dev, vsec,
+						    OCXL_EXT_CAP_ID_DVSEC))) {
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+				&vendor);
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+
+		if (vendor == PCI_VENDOR_ID_IBM &&
+			id == OCXL_DVSEC_AFU_CTRL_ID) {
+			pci_read_config_byte(dev,
+					vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+					&idx);
+			if (idx == afu_idx)
+				return vsec;
+		}
+	}
+	return 0;
+}
+
+/**
+ * get_function_0() - Find a related PCI device (function 0)
+ * @dev: PCI device to match
+ *
+ * Returns a pointer to the related device, or null if not found
+ */
+static struct pci_dev *get_function_0(struct pci_dev *dev)
+{
+	unsigned int devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
+
+	return pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
+					   dev->bus->number, devfn);
+}
+
+static void read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	u16 val;
+	int pos;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID);
+	if (!pos) {
+		/*
+		 * PASID capability is not mandatory, but there
+		 * shouldn't be any AFU
+		 */
+		dev_dbg(&dev->dev, "Function doesn't require any PASID\n");
+		fn->max_pasid_log = -1;
+		goto out;
+	}
+	pci_read_config_word(dev, pos + PCI_PASID_CAP, &val);
+	fn->max_pasid_log = EXTRACT_BITS(val, 8, 12);
+
+out:
+	dev_dbg(&dev->dev, "PASID capability:\n");
+	dev_dbg(&dev->dev, "  Max PASID log = %d\n", fn->max_pasid_log);
+}
+
+static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int pos;
+
+	pos = find_dvsec(dev, OCXL_DVSEC_TL_ID);
+	if (!pos && PCI_FUNC(dev->devfn) == 0) {
+		dev_err(&dev->dev, "Can't find TL DVSEC\n");
+		return -ENODEV;
+	}
+	if (pos && PCI_FUNC(dev->devfn) != 0) {
+		dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n");
+		return -ENODEV;
+	}
+	fn->dvsec_tl_pos = pos;
+	return 0;
+}
+
+static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int pos, afu_present;
+	u32 val;
+
+	pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID);
+	if (!pos) {
+		dev_err(&dev->dev, "Can't find function DVSEC\n");
+		return -ENODEV;
+	}
+	fn->dvsec_function_pos = pos;
+
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+	afu_present = EXTRACT_BIT(val, 31);
+	if (!afu_present) {
+		fn->max_afu_index = -1;
+		dev_dbg(&dev->dev, "Function doesn't define any AFU\n");
+		goto out;
+	}
+	fn->max_afu_index = EXTRACT_BITS(val, 24, 29);
+
+out:
+	dev_dbg(&dev->dev, "Function DVSEC:\n");
+	dev_dbg(&dev->dev, "  Max AFU index = %d\n", fn->max_afu_index);
+	return 0;
+}
+
+static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int pos;
+
+	if (fn->max_afu_index < 0) {
+		fn->dvsec_afu_info_pos = -1;
+		return 0;
+	}
+
+	pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID);
+	if (!pos) {
+		dev_err(&dev->dev, "Can't find AFU information DVSEC\n");
+		return -ENODEV;
+	}
+	fn->dvsec_afu_info_pos = pos;
+	return 0;
+}
+
+static int read_dvsec_vendor(struct pci_dev *dev)
+{
+	int pos;
+	u32 cfg, tlx, dlx, reset_reload;
+
+	/*
+	 * vendor specific DVSEC, for IBM images only. Some older
+	 * images may not have it
+	 *
+	 * It's only used on function 0 to specify the version of some
+	 * logic blocks and to give access to special registers to
+	 * enable host-based flashing.
+	 */
+	if (PCI_FUNC(dev->devfn) != 0)
+		return 0;
+
+	pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
+	if (!pos)
+		return 0;
+
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+			      &reset_reload);
+
+	dev_dbg(&dev->dev, "Vendor specific DVSEC:\n");
+	dev_dbg(&dev->dev, "  CFG version = 0x%x\n", cfg);
+	dev_dbg(&dev->dev, "  TLX version = 0x%x\n", tlx);
+	dev_dbg(&dev->dev, "  DLX version = 0x%x\n", dlx);
+	dev_dbg(&dev->dev, "  ResetReload = 0x%x\n", reset_reload);
+	return 0;
+}
+
+/**
+ * get_dvsec_vendor0() - Find a related PCI device (function 0)
+ * @dev: PCI device to match
+ * @dev0: The PCI device (function 0) found
+ * @out_pos: The position of PCI device (function 0)
+ *
+ * Returns 0 on success, negative on failure.
+ *
+ * NOTE: If it's successful, the reference of dev0 is increased,
+ * so after using it, the callers must call pci_dev_put() to give
+ * up the reference.
+ */
+static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0,
+			     int *out_pos)
+{
+	int pos;
+
+	if (PCI_FUNC(dev->devfn) != 0) {
+		dev = get_function_0(dev);
+		if (!dev)
+			return -1;
+	} else {
+		dev = pci_dev_get(dev);
+	}
+	pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
+	if (!pos) {
+		pci_dev_put(dev);
+		return -1;
+	}
+	*dev0 = dev;
+	*out_pos = pos;
+	return 0;
+}
+
+int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val)
+{
+	struct pci_dev *dev0;
+	u32 reset_reload;
+	int pos;
+
+	if (get_dvsec_vendor0(dev, &dev0, &pos))
+		return -1;
+
+	pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+			      &reset_reload);
+	pci_dev_put(dev0);
+	*val = !!(reset_reload & BIT(0));
+	return 0;
+}
+
+int ocxl_config_set_reset_reload(struct pci_dev *dev, int val)
+{
+	struct pci_dev *dev0;
+	u32 reset_reload;
+	int pos;
+
+	if (get_dvsec_vendor0(dev, &dev0, &pos))
+		return -1;
+
+	pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+			      &reset_reload);
+	if (val)
+		reset_reload |= BIT(0);
+	else
+		reset_reload &= ~BIT(0);
+	pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+			       reset_reload);
+	pci_dev_put(dev0);
+	return 0;
+}
+
+static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) {
+		dev_err(&dev->dev,
+			"AFUs are defined but no PASIDs are requested\n");
+		return -EINVAL;
+	}
+
+	if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) {
+		dev_err(&dev->dev,
+			"Max AFU index out of architectural limit (%d vs %d)\n",
+			fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int rc;
+
+	read_pasid(dev, fn);
+
+	rc = read_dvsec_tl(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev,
+			"Invalid Transaction Layer DVSEC configuration: %d\n",
+			rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_function(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev,
+			"Invalid Function DVSEC configuration: %d\n", rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_afu_info(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_vendor(dev);
+	if (rc) {
+		dev_err(&dev->dev,
+			"Invalid vendor specific DVSEC configuration: %d\n",
+			rc);
+		return -ENODEV;
+	}
+
+	rc = validate_function(dev, fn);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_read_function);
+
+static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			int offset, u32 *data)
+{
+	u32 val;
+	unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+	int pos = fn->dvsec_afu_info_pos;
+
+	/* Protect 'data valid' bit */
+	if (EXTRACT_BIT(offset, 31)) {
+		dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n");
+		return -EINVAL;
+	}
+
+	pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+	while (!EXTRACT_BIT(val, 31)) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_err(&dev->dev,
+				"Timeout while reading AFU info DVSEC (offset=%d)\n",
+				offset);
+			return -EBUSY;
+		}
+		cpu_relax();
+		pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+	}
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data);
+	return 0;
+}
+
+/**
+ * read_template_version() - Read the template version from the AFU
+ * @dev: the device for the AFU
+ * @fn: the AFU offsets
+ * @len: outputs the template length
+ * @version: outputs the major<<8,minor version
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int read_template_version(struct pci_dev *dev, struct ocxl_fn_config *fn,
+				 u16 *len, u16 *version)
+{
+	u32 val32;
+	u8 major, minor;
+	int rc;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val32);
+	if (rc)
+		return rc;
+
+	*len = EXTRACT_BITS(val32, 16, 31);
+	major = EXTRACT_BITS(val32, 8, 15);
+	minor = EXTRACT_BITS(val32, 0, 7);
+	*version = (major << 8) + minor;
+	return 0;
+}
+
+int ocxl_config_check_afu_index(struct pci_dev *dev,
+				struct ocxl_fn_config *fn, int afu_idx)
+{
+	int rc;
+	u16 templ_version;
+	u16 len, expected_len;
+
+	pci_write_config_byte(dev,
+			fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+			afu_idx);
+
+	rc = read_template_version(dev, fn, &len, &templ_version);
+	if (rc)
+		return rc;
+
+	/* AFU index map can have holes, in which case we read all 0's */
+	if (!templ_version && !len)
+		return 0;
+
+	dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n",
+		templ_version >> 8, templ_version & 0xFF);
+
+	switch (templ_version) {
+	case 0x0005: // v0.5 was used prior to the spec approval
+	case 0x0100:
+		expected_len = OCXL_TEMPL_LEN_1_0;
+		break;
+	case 0x0101:
+		expected_len = OCXL_TEMPL_LEN_1_1;
+		break;
+	default:
+		dev_warn(&dev->dev, "Unknown AFU template version %#x\n",
+			templ_version);
+		expected_len = len;
+	}
+	if (len != expected_len)
+		dev_warn(&dev->dev,
+			"Unexpected template length %#x in AFU information, expected %#x for version %#x\n",
+			len, expected_len, templ_version);
+	return 1;
+}
+
+static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			struct ocxl_afu_config *afu)
+{
+	int i, rc;
+	u32 val, *ptr;
+
+	BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN);
+	for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) {
+		rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val);
+		if (rc)
+			return rc;
+		ptr = (u32 *) &afu->name[i];
+		*ptr = le32_to_cpu((__force __le32) val);
+	}
+	afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */
+	return 0;
+}
+
+static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			struct ocxl_afu_config *afu)
+{
+	int rc;
+	u32 val;
+
+	/*
+	 * Global MMIO
+	 */
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val);
+	if (rc)
+		return rc;
+	afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2);
+	afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val);
+	if (rc)
+		return rc;
+	afu->global_mmio_offset += (u64) val << 32;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val);
+	if (rc)
+		return rc;
+	afu->global_mmio_size = val;
+
+	/*
+	 * Per-process MMIO
+	 */
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val);
+	if (rc)
+		return rc;
+	afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2);
+	afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val);
+	if (rc)
+		return rc;
+	afu->pp_mmio_offset += (u64) val << 32;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val);
+	if (rc)
+		return rc;
+	afu->pp_mmio_stride = val;
+
+	return 0;
+}
+
+static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+	int pos;
+	u8 val8;
+	u16 val16;
+
+	pos = find_dvsec_afu_ctrl(dev, afu->idx);
+	if (!pos) {
+		dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n",
+			afu->idx);
+		return -ENODEV;
+	}
+	afu->dvsec_afu_control_pos = pos;
+
+	pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8);
+	afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4);
+
+	pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16);
+	afu->actag_supported = EXTRACT_BITS(val16, 0, 11);
+	return 0;
+}
+
+static bool char_allowed(int c)
+{
+	/*
+	 * Permitted Characters : Alphanumeric, hyphen, underscore, comma
+	 */
+	if ((c >= 0x30 && c <= 0x39) /* digits */ ||
+		(c >= 0x41 && c <= 0x5A) /* upper case */ ||
+		(c >= 0x61 && c <= 0x7A) /* lower case */ ||
+		c == 0 /* NULL */ ||
+		c == 0x2D /* - */ ||
+		c == 0x5F /* _ */ ||
+		c == 0x2C /* , */)
+		return true;
+	return false;
+}
+
+static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+	int i;
+
+	if (!afu->name[0]) {
+		dev_err(&dev->dev, "Empty AFU name\n");
+		return -EINVAL;
+	}
+	for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) {
+		if (!char_allowed(afu->name[i])) {
+			dev_err(&dev->dev,
+				"Invalid character in AFU name\n");
+			return -EINVAL;
+		}
+	}
+
+	if (afu->global_mmio_bar != 0 &&
+		afu->global_mmio_bar != 2 &&
+		afu->global_mmio_bar != 4) {
+		dev_err(&dev->dev, "Invalid global MMIO bar number\n");
+		return -EINVAL;
+	}
+	if (afu->pp_mmio_bar != 0 &&
+		afu->pp_mmio_bar != 2 &&
+		afu->pp_mmio_bar != 4) {
+		dev_err(&dev->dev, "Invalid per-process MMIO bar number\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * read_afu_lpc_memory_info() - Populate AFU metadata regarding LPC memory
+ * @dev: the device for the AFU
+ * @fn: the AFU offsets
+ * @afu: the AFU struct to populate the LPC metadata into
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int read_afu_lpc_memory_info(struct pci_dev *dev,
+				    struct ocxl_fn_config *fn,
+				    struct ocxl_afu_config *afu)
+{
+	int rc;
+	u32 val32;
+	u16 templ_version;
+	u16 templ_len;
+	u64 total_mem_size = 0;
+	u64 lpc_mem_size = 0;
+
+	afu->lpc_mem_offset = 0;
+	afu->lpc_mem_size = 0;
+	afu->special_purpose_mem_offset = 0;
+	afu->special_purpose_mem_size = 0;
+	/*
+	 * For AFUs following template v1.0, the LPC memory covers the
+	 * total memory. Its size is a power of 2.
+	 *
+	 * For AFUs with template >= v1.01, the total memory size is
+	 * still a power of 2, but it is split in 2 parts:
+	 * - the LPC memory, whose size can now be anything
+	 * - the remainder memory is a special purpose memory, whose
+	 *   definition is AFU-dependent. It is not accessible through
+	 *   the usual commands for LPC memory
+	 */
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_ALL_MEM_SZ, &val32);
+	if (rc)
+		return rc;
+
+	val32 = EXTRACT_BITS(val32, 0, 7);
+	if (!val32)
+		return 0; /* No LPC memory */
+
+	/*
+	 * The configuration space spec allows for a memory size of up
+	 * to 2^255 bytes.
+	 *
+	 * Current generation hardware uses 56-bit physical addresses,
+	 * but we won't be able to get near close to that, as we won't
+	 * have a hole big enough in the memory map.  Let it pass in
+	 * the driver for now. We'll get an error from the firmware
+	 * when trying to configure something too big.
+	 */
+	total_mem_size = 1ull << val32;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_LPC_MEM_START, &val32);
+	if (rc)
+		return rc;
+
+	afu->lpc_mem_offset = val32;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_LPC_MEM_START + 4, &val32);
+	if (rc)
+		return rc;
+
+	afu->lpc_mem_offset |= (u64) val32 << 32;
+
+	rc = read_template_version(dev, fn, &templ_len, &templ_version);
+	if (rc)
+		return rc;
+
+	if (templ_version >= 0x0101) {
+		rc = read_afu_info(dev, fn,
+				OCXL_DVSEC_TEMPL_LPC_MEM_SZ, &val32);
+		if (rc)
+			return rc;
+		lpc_mem_size = val32;
+
+		rc = read_afu_info(dev, fn,
+				OCXL_DVSEC_TEMPL_LPC_MEM_SZ + 4, &val32);
+		if (rc)
+			return rc;
+		lpc_mem_size |= (u64) val32 << 32;
+	} else {
+		lpc_mem_size = total_mem_size;
+	}
+	afu->lpc_mem_size = lpc_mem_size;
+
+	if (lpc_mem_size < total_mem_size) {
+		afu->special_purpose_mem_offset =
+			afu->lpc_mem_offset + lpc_mem_size;
+		afu->special_purpose_mem_size =
+			total_mem_size - lpc_mem_size;
+	}
+	return 0;
+}
+
+int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			struct ocxl_afu_config *afu, u8 afu_idx)
+{
+	int rc;
+	u32 val32;
+
+	/*
+	 * First, we need to write the AFU idx for the AFU we want to
+	 * access.
+	 */
+	WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx);
+	afu->idx = afu_idx;
+	pci_write_config_byte(dev,
+			fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+			afu->idx);
+
+	rc = read_afu_name(dev, fn, afu);
+	if (rc)
+		return rc;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32);
+	if (rc)
+		return rc;
+	afu->version_major = EXTRACT_BITS(val32, 24, 31);
+	afu->version_minor = EXTRACT_BITS(val32, 16, 23);
+	afu->afuc_type = EXTRACT_BITS(val32, 14, 15);
+	afu->afum_type = EXTRACT_BITS(val32, 12, 13);
+	afu->profile = EXTRACT_BITS(val32, 0, 7);
+
+	rc = read_afu_mmio(dev, fn, afu);
+	if (rc)
+		return rc;
+
+	rc = read_afu_lpc_memory_info(dev, fn, afu);
+	if (rc)
+		return rc;
+
+	rc = read_afu_control(dev, afu);
+	if (rc)
+		return rc;
+
+	dev_dbg(&dev->dev, "AFU configuration:\n");
+	dev_dbg(&dev->dev, "  name = %s\n", afu->name);
+	dev_dbg(&dev->dev, "  version = %d.%d\n", afu->version_major,
+		afu->version_minor);
+	dev_dbg(&dev->dev, "  global mmio bar = %hhu\n", afu->global_mmio_bar);
+	dev_dbg(&dev->dev, "  global mmio offset = %#llx\n",
+		afu->global_mmio_offset);
+	dev_dbg(&dev->dev, "  global mmio size = %#x\n", afu->global_mmio_size);
+	dev_dbg(&dev->dev, "  pp mmio bar = %hhu\n", afu->pp_mmio_bar);
+	dev_dbg(&dev->dev, "  pp mmio offset = %#llx\n", afu->pp_mmio_offset);
+	dev_dbg(&dev->dev, "  pp mmio stride = %#x\n", afu->pp_mmio_stride);
+	dev_dbg(&dev->dev, "  lpc_mem offset = %#llx\n", afu->lpc_mem_offset);
+	dev_dbg(&dev->dev, "  lpc_mem size = %#llx\n", afu->lpc_mem_size);
+	dev_dbg(&dev->dev, "  special purpose mem offset = %#llx\n",
+		afu->special_purpose_mem_offset);
+	dev_dbg(&dev->dev, "  special purpose mem size = %#llx\n",
+		afu->special_purpose_mem_size);
+	dev_dbg(&dev->dev, "  pasid supported (log) = %u\n",
+		afu->pasid_supported_log);
+	dev_dbg(&dev->dev, "  actag supported = %u\n",
+		afu->actag_supported);
+
+	rc = validate_afu(dev, afu);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_read_afu);
+
+int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled,
+			u16 *supported)
+{
+	int rc;
+
+	/*
+	 * This is really a simple wrapper for the kernel API, to
+	 * avoid an external driver using ocxl as a library to call
+	 * platform-dependent code
+	 */
+	rc = pnv_ocxl_get_actag(dev, base, enabled, supported);
+	if (rc) {
+		dev_err(&dev->dev, "Can't get actag for device: %d\n", rc);
+		return rc;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_get_actag_info);
+
+void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base,
+			int actag_count)
+{
+	u16 val;
+
+	val = actag_count & OCXL_DVSEC_ACTAG_MASK;
+	pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val);
+
+	val = actag_base & OCXL_DVSEC_ACTAG_MASK;
+	pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_actag);
+
+int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count)
+{
+	return pnv_ocxl_get_pasid_count(dev, count);
+}
+
+void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base,
+			u32 pasid_count_log)
+{
+	u8 val8;
+	u32 val32;
+
+	val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK;
+	pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8);
+
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+			&val32);
+	val32 &= ~OCXL_DVSEC_PASID_MASK;
+	val32 |= pasid_base & OCXL_DVSEC_PASID_MASK;
+	pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+			val32);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_pasid);
+
+void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable)
+{
+	u8 val;
+
+	pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val);
+	if (enable)
+		val |= 1;
+	else
+		val &= 0xFE;
+	pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state);
+
+int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
+{
+	u32 val;
+	__be32 *be32ptr;
+	u8 timers;
+	int i, rc;
+	long recv_cap;
+	char *recv_rate;
+
+	/*
+	 * Skip on function != 0, as the TL can only be defined on 0
+	 */
+	if (PCI_FUNC(dev->devfn) != 0)
+		return 0;
+
+	recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
+	if (!recv_rate)
+		return -ENOMEM;
+	/*
+	 * The spec defines 64 templates for messages in the
+	 * Transaction Layer (TL).
+	 *
+	 * The host and device each support a subset, so we need to
+	 * configure the transmitters on each side to send only
+	 * templates the receiver understands, at a rate the receiver
+	 * can process.  Per the spec, template 0 must be supported by
+	 * everybody. That's the template which has been used by the
+	 * host and device so far.
+	 *
+	 * The sending rate limit must be set before the template is
+	 * enabled.
+	 */
+
+	/*
+	 * Device -> host
+	 */
+	rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
+				PNV_OCXL_TL_RATE_BUF_SIZE);
+	if (rc)
+		goto out;
+
+	for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+		be32ptr = (__be32 *) &recv_rate[i];
+		pci_write_config_dword(dev,
+				tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
+				be32_to_cpu(*be32ptr));
+	}
+	val = recv_cap >> 32;
+	pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
+	val = recv_cap & GENMASK(31, 0);
+	pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val);
+
+	/*
+	 * Host -> device
+	 */
+	for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+		pci_read_config_dword(dev,
+				tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
+				&val);
+		be32ptr = (__be32 *) &recv_rate[i];
+		*be32ptr = cpu_to_be32(val);
+	}
+	pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
+	recv_cap = (long) val << 32;
+	pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val);
+	recv_cap |= val;
+
+	rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
+				PNV_OCXL_TL_RATE_BUF_SIZE);
+	if (rc)
+		goto out;
+
+	/*
+	 * Opencapi commands needing to be retried are classified per
+	 * the TL in 2 groups: short and long commands.
+	 *
+	 * The short back off timer it not used for now. It will be
+	 * for opencapi 4.0.
+	 *
+	 * The long back off timer is typically used when an AFU hits
+	 * a page fault but the NPU is already processing one. So the
+	 * AFU needs to wait before it can resubmit. Having a value
+	 * too low doesn't break anything, but can generate extra
+	 * traffic on the link.
+	 * We set it to 1.6 us for now. It's shorter than, but in the
+	 * same order of magnitude as the time spent to process a page
+	 * fault.
+	 */
+	timers = 0x2 << 4; /* long timer = 1.6 us */
+	pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
+			timers);
+
+	rc = 0;
+out:
+	kfree(recv_rate);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_TL);
+
+int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid)
+{
+	u32 val;
+	unsigned long timeout;
+
+	pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+			&val);
+	if (EXTRACT_BIT(val, 20)) {
+		dev_err(&dev->dev,
+			"Can't terminate PASID %#x, previous termination didn't complete\n",
+			pasid);
+		return -EBUSY;
+	}
+
+	val &= ~OCXL_DVSEC_PASID_MASK;
+	val |= pasid & OCXL_DVSEC_PASID_MASK;
+	val |= BIT(20);
+	pci_write_config_dword(dev,
+			afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+			val);
+
+	timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+	pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+			&val);
+	while (EXTRACT_BIT(val, 20)) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_err(&dev->dev,
+				"Timeout while waiting for AFU to terminate PASID %#x\n",
+				pasid);
+			return -EBUSY;
+		}
+		cpu_relax();
+		pci_read_config_dword(dev,
+				afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+				&val);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_terminate_pasid);
+
+void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first,
+			u32 tag_count)
+{
+	u32 val;
+
+	val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16;
+	val |= tag_count & OCXL_DVSEC_ACTAG_MASK;
+	pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG,
+			val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_actag);
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
new file mode 100644
index 0000000000..7f83116ae1
--- /dev/null
+++ b/drivers/misc/ocxl/context.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include "trace.h"
+#include "ocxl_internal.h"
+
+int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
+		struct address_space *mapping)
+{
+	int pasid;
+	struct ocxl_context *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->afu = afu;
+	mutex_lock(&afu->contexts_lock);
+	pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
+			afu->pasid_base + afu->pasid_max, GFP_KERNEL);
+	if (pasid < 0) {
+		mutex_unlock(&afu->contexts_lock);
+		kfree(ctx);
+		return pasid;
+	}
+	afu->pasid_count++;
+	mutex_unlock(&afu->contexts_lock);
+
+	ctx->pasid = pasid;
+	ctx->status = OPENED;
+	mutex_init(&ctx->status_mutex);
+	ctx->mapping = mapping;
+	mutex_init(&ctx->mapping_lock);
+	init_waitqueue_head(&ctx->events_wq);
+	mutex_init(&ctx->xsl_error_lock);
+	mutex_init(&ctx->irq_lock);
+	idr_init(&ctx->irq_idr);
+	ctx->tidr = 0;
+
+	/*
+	 * Keep a reference on the AFU to make sure it's valid for the
+	 * duration of the life of the context
+	 */
+	ocxl_afu_get(afu);
+	*context = ctx;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_context_alloc);
+
+/*
+ * Callback for when a translation fault triggers an error
+ * data:	a pointer to the context which triggered the fault
+ * addr:	the address that triggered the error
+ * dsisr:	the value of the PPC64 dsisr register
+ */
+static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
+{
+	struct ocxl_context *ctx = (struct ocxl_context *) data;
+
+	mutex_lock(&ctx->xsl_error_lock);
+	ctx->xsl_error.addr = addr;
+	ctx->xsl_error.dsisr = dsisr;
+	ctx->xsl_error.count++;
+	mutex_unlock(&ctx->xsl_error_lock);
+
+	wake_up_all(&ctx->events_wq);
+}
+
+int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm)
+{
+	int rc;
+	unsigned long pidr = 0;
+	struct pci_dev *dev;
+
+	// Locks both status & tidr
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status != OPENED) {
+		rc = -EIO;
+		goto out;
+	}
+
+	if (mm)
+		pidr = mm->context.id;
+
+	dev = to_pci_dev(ctx->afu->fn->dev.parent);
+	rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, ctx->tidr,
+			      amr, pci_dev_id(dev), mm, xsl_fault_error, ctx);
+	if (rc)
+		goto out;
+
+	ctx->status = ATTACHED;
+out:
+	mutex_unlock(&ctx->status_mutex);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_context_attach);
+
+static vm_fault_t map_afu_irq(struct vm_area_struct *vma, unsigned long address,
+		u64 offset, struct ocxl_context *ctx)
+{
+	u64 trigger_addr;
+	int irq_id = ocxl_irq_offset_to_id(ctx, offset);
+
+	trigger_addr = ocxl_afu_irq_get_addr(ctx, irq_id);
+	if (!trigger_addr)
+		return VM_FAULT_SIGBUS;
+
+	return vmf_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT);
+}
+
+static vm_fault_t map_pp_mmio(struct vm_area_struct *vma, unsigned long address,
+		u64 offset, struct ocxl_context *ctx)
+{
+	u64 pp_mmio_addr;
+	int pasid_off;
+	vm_fault_t ret;
+
+	if (offset >= ctx->afu->config.pp_mmio_stride)
+		return VM_FAULT_SIGBUS;
+
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status != ATTACHED) {
+		mutex_unlock(&ctx->status_mutex);
+		pr_debug("%s: Context not attached, failing mmio mmap\n",
+			__func__);
+		return VM_FAULT_SIGBUS;
+	}
+
+	pasid_off = ctx->pasid - ctx->afu->pasid_base;
+	pp_mmio_addr = ctx->afu->pp_mmio_start +
+		pasid_off * ctx->afu->config.pp_mmio_stride +
+		offset;
+
+	ret = vmf_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT);
+	mutex_unlock(&ctx->status_mutex);
+	return ret;
+}
+
+static vm_fault_t ocxl_mmap_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct ocxl_context *ctx = vma->vm_file->private_data;
+	u64 offset;
+	vm_fault_t ret;
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+	pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__,
+		ctx->pasid, vmf->address, offset);
+
+	if (offset < ctx->afu->irq_base_offset)
+		ret = map_pp_mmio(vma, vmf->address, offset, ctx);
+	else
+		ret = map_afu_irq(vma, vmf->address, offset, ctx);
+	return ret;
+}
+
+static const struct vm_operations_struct ocxl_vmops = {
+	.fault = ocxl_mmap_fault,
+};
+
+static int check_mmap_afu_irq(struct ocxl_context *ctx,
+			struct vm_area_struct *vma)
+{
+	int irq_id = ocxl_irq_offset_to_id(ctx, vma->vm_pgoff << PAGE_SHIFT);
+
+	/* only one page */
+	if (vma_pages(vma) != 1)
+		return -EINVAL;
+
+	/* check offset validty */
+	if (!ocxl_afu_irq_get_addr(ctx, irq_id))
+		return -EINVAL;
+
+	/*
+	 * trigger page should only be accessible in write mode.
+	 *
+	 * It's a bit theoretical, as a page mmaped with only
+	 * PROT_WRITE is currently readable, but it doesn't hurt.
+	 */
+	if ((vma->vm_flags & VM_READ) || (vma->vm_flags & VM_EXEC) ||
+		!(vma->vm_flags & VM_WRITE))
+		return -EINVAL;
+	vm_flags_clear(vma, VM_MAYREAD | VM_MAYEXEC);
+	return 0;
+}
+
+static int check_mmap_mmio(struct ocxl_context *ctx,
+			struct vm_area_struct *vma)
+{
+	if ((vma_pages(vma) + vma->vm_pgoff) >
+		(ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT))
+		return -EINVAL;
+	return 0;
+}
+
+int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma)
+{
+	int rc;
+
+	if ((vma->vm_pgoff << PAGE_SHIFT) < ctx->afu->irq_base_offset)
+		rc = check_mmap_mmio(ctx, vma);
+	else
+		rc = check_mmap_afu_irq(ctx, vma);
+	if (rc)
+		return rc;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_ops = &ocxl_vmops;
+	return 0;
+}
+
+int ocxl_context_detach(struct ocxl_context *ctx)
+{
+	struct pci_dev *dev;
+	int afu_control_pos;
+	enum ocxl_context_status status;
+	int rc;
+
+	mutex_lock(&ctx->status_mutex);
+	status = ctx->status;
+	ctx->status = CLOSED;
+	mutex_unlock(&ctx->status_mutex);
+	if (status != ATTACHED)
+		return 0;
+
+	dev = to_pci_dev(ctx->afu->fn->dev.parent);
+	afu_control_pos = ctx->afu->config.dvsec_afu_control_pos;
+
+	mutex_lock(&ctx->afu->afu_control_lock);
+	rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid);
+	mutex_unlock(&ctx->afu->afu_control_lock);
+	trace_ocxl_terminate_pasid(ctx->pasid, rc);
+	if (rc) {
+		/*
+		 * If we timeout waiting for the AFU to terminate the
+		 * pasid, then it's dangerous to clean up the Process
+		 * Element entry in the SPA, as it may be referenced
+		 * in the future by the AFU. In which case, we would
+		 * checkstop because of an invalid PE access (FIR
+		 * register 2, bit 42). So leave the PE
+		 * defined. Caller shouldn't free the context so that
+		 * PASID remains allocated.
+		 *
+		 * A link reset will be required to cleanup the AFU
+		 * and the SPA.
+		 */
+		if (rc == -EBUSY)
+			return rc;
+	}
+	rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid);
+	if (rc) {
+		dev_warn(&dev->dev,
+			"Couldn't remove PE entry cleanly: %d\n", rc);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_context_detach);
+
+void ocxl_context_detach_all(struct ocxl_afu *afu)
+{
+	struct ocxl_context *ctx;
+	int tmp;
+
+	mutex_lock(&afu->contexts_lock);
+	idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
+		ocxl_context_detach(ctx);
+		/*
+		 * We are force detaching - remove any active mmio
+		 * mappings so userspace cannot interfere with the
+		 * card if it comes back.  Easiest way to exercise
+		 * this is to unbind and rebind the driver via sysfs
+		 * while it is in use.
+		 */
+		mutex_lock(&ctx->mapping_lock);
+		if (ctx->mapping)
+			unmap_mapping_range(ctx->mapping, 0, 0, 1);
+		mutex_unlock(&ctx->mapping_lock);
+	}
+	mutex_unlock(&afu->contexts_lock);
+}
+
+void ocxl_context_free(struct ocxl_context *ctx)
+{
+	mutex_lock(&ctx->afu->contexts_lock);
+	ctx->afu->pasid_count--;
+	idr_remove(&ctx->afu->contexts_idr, ctx->pasid);
+	mutex_unlock(&ctx->afu->contexts_lock);
+
+	ocxl_afu_irq_free_all(ctx);
+	idr_destroy(&ctx->irq_idr);
+	/* reference to the AFU taken in ocxl_context_alloc() */
+	ocxl_afu_put(ctx->afu);
+	kfree(ctx);
+}
+EXPORT_SYMBOL_GPL(ocxl_context_free);
diff --git a/drivers/misc/ocxl/core.c b/drivers/misc/ocxl/core.c
new file mode 100644
index 0000000000..aebfc53a2d
--- /dev/null
+++ b/drivers/misc/ocxl/core.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2019 IBM Corp.
+#include <linux/idr.h>
+#include "ocxl_internal.h"
+
+static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn)
+{
+	return (get_device(&fn->dev) == NULL) ? NULL : fn;
+}
+
+static void ocxl_fn_put(struct ocxl_fn *fn)
+{
+	put_device(&fn->dev);
+}
+
+static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn)
+{
+	struct ocxl_afu *afu;
+
+	afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL);
+	if (!afu)
+		return NULL;
+
+	kref_init(&afu->kref);
+	mutex_init(&afu->contexts_lock);
+	mutex_init(&afu->afu_control_lock);
+	idr_init(&afu->contexts_idr);
+	afu->fn = fn;
+	ocxl_fn_get(fn);
+	return afu;
+}
+
+static void free_afu(struct kref *kref)
+{
+	struct ocxl_afu *afu = container_of(kref, struct ocxl_afu, kref);
+
+	idr_destroy(&afu->contexts_idr);
+	ocxl_fn_put(afu->fn);
+	kfree(afu);
+}
+
+void ocxl_afu_get(struct ocxl_afu *afu)
+{
+	kref_get(&afu->kref);
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_get);
+
+void ocxl_afu_put(struct ocxl_afu *afu)
+{
+	kref_put(&afu->kref, free_afu);
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_put);
+
+static int assign_afu_actag(struct ocxl_afu *afu)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int actag_count, actag_offset;
+	struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
+
+	/*
+	 * if there were not enough actags for the function, each afu
+	 * reduces its count as well
+	 */
+	actag_count = afu->config.actag_supported *
+		fn->actag_enabled / fn->actag_supported;
+	actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
+	if (actag_offset < 0) {
+		dev_err(&pci_dev->dev, "Can't allocate %d actags for AFU: %d\n",
+			actag_count, actag_offset);
+		return actag_offset;
+	}
+	afu->actag_base = fn->actag_base + actag_offset;
+	afu->actag_enabled = actag_count;
+
+	ocxl_config_set_afu_actag(pci_dev, afu->config.dvsec_afu_control_pos,
+				afu->actag_base, afu->actag_enabled);
+	dev_dbg(&pci_dev->dev, "actag base=%d enabled=%d\n",
+		afu->actag_base, afu->actag_enabled);
+	return 0;
+}
+
+static void reclaim_afu_actag(struct ocxl_afu *afu)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int start_offset, size;
+
+	start_offset = afu->actag_base - fn->actag_base;
+	size = afu->actag_enabled;
+	ocxl_actag_afu_free(afu->fn, start_offset, size);
+}
+
+static int assign_afu_pasid(struct ocxl_afu *afu)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int pasid_count, pasid_offset;
+	struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
+
+	/*
+	 * We only support the case where the function configuration
+	 * requested enough PASIDs to cover all AFUs.
+	 */
+	pasid_count = 1 << afu->config.pasid_supported_log;
+	pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
+	if (pasid_offset < 0) {
+		dev_err(&pci_dev->dev, "Can't allocate %d PASIDs for AFU: %d\n",
+			pasid_count, pasid_offset);
+		return pasid_offset;
+	}
+	afu->pasid_base = fn->pasid_base + pasid_offset;
+	afu->pasid_count = 0;
+	afu->pasid_max = pasid_count;
+
+	ocxl_config_set_afu_pasid(pci_dev, afu->config.dvsec_afu_control_pos,
+				afu->pasid_base,
+				afu->config.pasid_supported_log);
+	dev_dbg(&pci_dev->dev, "PASID base=%d, enabled=%d\n",
+		afu->pasid_base, pasid_count);
+	return 0;
+}
+
+static void reclaim_afu_pasid(struct ocxl_afu *afu)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int start_offset, size;
+
+	start_offset = afu->pasid_base - fn->pasid_base;
+	size = 1 << afu->config.pasid_supported_log;
+	ocxl_pasid_afu_free(afu->fn, start_offset, size);
+}
+
+static int reserve_fn_bar(struct ocxl_fn *fn, int bar)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	int rc, idx;
+
+	if (bar != 0 && bar != 2 && bar != 4)
+		return -EINVAL;
+
+	idx = bar >> 1;
+	if (fn->bar_used[idx]++ == 0) {
+		rc = pci_request_region(dev, bar, "ocxl");
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+static void release_fn_bar(struct ocxl_fn *fn, int bar)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	int idx;
+
+	if (bar != 0 && bar != 2 && bar != 4)
+		return;
+
+	idx = bar >> 1;
+	if (--fn->bar_used[idx] == 0)
+		pci_release_region(dev, bar);
+	WARN_ON(fn->bar_used[idx] < 0);
+}
+
+static int map_mmio_areas(struct ocxl_afu *afu)
+{
+	int rc;
+	struct pci_dev *pci_dev = to_pci_dev(afu->fn->dev.parent);
+
+	rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar);
+	if (rc)
+		return rc;
+
+	rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+	if (rc) {
+		release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+		return rc;
+	}
+
+	afu->global_mmio_start =
+		pci_resource_start(pci_dev, afu->config.global_mmio_bar) +
+		afu->config.global_mmio_offset;
+	afu->pp_mmio_start =
+		pci_resource_start(pci_dev, afu->config.pp_mmio_bar) +
+		afu->config.pp_mmio_offset;
+
+	afu->global_mmio_ptr = ioremap(afu->global_mmio_start,
+				afu->config.global_mmio_size);
+	if (!afu->global_mmio_ptr) {
+		release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+		release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+		dev_err(&pci_dev->dev, "Error mapping global mmio area\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * Leave an empty page between the per-process mmio area and
+	 * the AFU interrupt mappings
+	 */
+	afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE;
+	return 0;
+}
+
+static void unmap_mmio_areas(struct ocxl_afu *afu)
+{
+	if (afu->global_mmio_ptr) {
+		iounmap(afu->global_mmio_ptr);
+		afu->global_mmio_ptr = NULL;
+	}
+	afu->global_mmio_start = 0;
+	afu->pp_mmio_start = 0;
+	release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+	release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+}
+
+static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
+{
+	int rc;
+
+	rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx);
+	if (rc)
+		return rc;
+
+	rc = assign_afu_actag(afu);
+	if (rc)
+		return rc;
+
+	rc = assign_afu_pasid(afu);
+	if (rc)
+		goto err_free_actag;
+
+	rc = map_mmio_areas(afu);
+	if (rc)
+		goto err_free_pasid;
+
+	return 0;
+
+err_free_pasid:
+	reclaim_afu_pasid(afu);
+err_free_actag:
+	reclaim_afu_actag(afu);
+	return rc;
+}
+
+static void deconfigure_afu(struct ocxl_afu *afu)
+{
+	unmap_mmio_areas(afu);
+	reclaim_afu_pasid(afu);
+	reclaim_afu_actag(afu);
+}
+
+static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu)
+{
+	ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1);
+
+	return 0;
+}
+
+static void deactivate_afu(struct ocxl_afu *afu)
+{
+	struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
+
+	ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0);
+}
+
+static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx)
+{
+	int rc;
+	struct ocxl_afu *afu;
+
+	afu = alloc_afu(fn);
+	if (!afu)
+		return -ENOMEM;
+
+	rc = configure_afu(afu, afu_idx, dev);
+	if (rc) {
+		ocxl_afu_put(afu);
+		return rc;
+	}
+
+	rc = activate_afu(dev, afu);
+	if (rc) {
+		deconfigure_afu(afu);
+		ocxl_afu_put(afu);
+		return rc;
+	}
+
+	list_add_tail(&afu->list, &fn->afu_list);
+
+	return 0;
+}
+
+static void remove_afu(struct ocxl_afu *afu)
+{
+	list_del(&afu->list);
+	ocxl_context_detach_all(afu);
+	deactivate_afu(afu);
+	deconfigure_afu(afu);
+	ocxl_afu_put(afu); // matches the implicit get in alloc_afu
+}
+
+static struct ocxl_fn *alloc_function(void)
+{
+	struct ocxl_fn *fn;
+
+	fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL);
+	if (!fn)
+		return NULL;
+
+	INIT_LIST_HEAD(&fn->afu_list);
+	INIT_LIST_HEAD(&fn->pasid_list);
+	INIT_LIST_HEAD(&fn->actag_list);
+
+	return fn;
+}
+
+static void free_function(struct ocxl_fn *fn)
+{
+	WARN_ON(!list_empty(&fn->afu_list));
+	WARN_ON(!list_empty(&fn->pasid_list));
+	kfree(fn);
+}
+
+static void free_function_dev(struct device *dev)
+{
+	struct ocxl_fn *fn = container_of(dev, struct ocxl_fn, dev);
+
+	free_function(fn);
+}
+
+static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev)
+{
+	fn->dev.parent = &dev->dev;
+	fn->dev.release = free_function_dev;
+	return dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev));
+}
+
+static int assign_function_actag(struct ocxl_fn *fn)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	u16 base, enabled, supported;
+	int rc;
+
+	rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported);
+	if (rc)
+		return rc;
+
+	fn->actag_base = base;
+	fn->actag_enabled = enabled;
+	fn->actag_supported = supported;
+
+	ocxl_config_set_actag(dev, fn->config.dvsec_function_pos,
+			fn->actag_base,	fn->actag_enabled);
+	dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n",
+		fn->actag_base, fn->actag_enabled);
+	return 0;
+}
+
+static int set_function_pasid(struct ocxl_fn *fn)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	int rc, desired_count, max_count;
+
+	/* A function may not require any PASID */
+	if (fn->config.max_pasid_log < 0)
+		return 0;
+
+	rc = ocxl_config_get_pasid_info(dev, &max_count);
+	if (rc)
+		return rc;
+
+	desired_count = 1 << fn->config.max_pasid_log;
+
+	if (desired_count > max_count) {
+		dev_err(&fn->dev,
+			"Function requires more PASIDs than is available (%d vs. %d)\n",
+			desired_count, max_count);
+		return -ENOSPC;
+	}
+
+	fn->pasid_base = 0;
+	return 0;
+}
+
+static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev)
+{
+	int rc;
+
+	rc = pci_enable_device(dev);
+	if (rc) {
+		dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc);
+		return rc;
+	}
+
+	/*
+	 * Once it has been confirmed to work on our hardware, we
+	 * should reset the function, to force the adapter to restart
+	 * from scratch.
+	 * A function reset would also reset all its AFUs.
+	 *
+	 * Some hints for implementation:
+	 *
+	 * - there's not status bit to know when the reset is done. We
+	 *   should try reading the config space to know when it's
+	 *   done.
+	 * - probably something like:
+	 *	Reset
+	 *	wait 100ms
+	 *	issue config read
+	 *	allow device up to 1 sec to return success on config
+	 *	read before declaring it broken
+	 *
+	 * Some shared logic on the card (CFG, TLX) won't be reset, so
+	 * there's no guarantee that it will be enough.
+	 */
+	rc = ocxl_config_read_function(dev, &fn->config);
+	if (rc)
+		return rc;
+
+	rc = set_function_device(fn, dev);
+	if (rc)
+		return rc;
+
+	rc = assign_function_actag(fn);
+	if (rc)
+		return rc;
+
+	rc = set_function_pasid(fn);
+	if (rc)
+		return rc;
+
+	rc = ocxl_link_setup(dev, 0, &fn->link);
+	if (rc)
+		return rc;
+
+	rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos);
+	if (rc) {
+		ocxl_link_release(dev, fn->link);
+		return rc;
+	}
+	return 0;
+}
+
+static void deconfigure_function(struct ocxl_fn *fn)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+
+	ocxl_link_release(dev, fn->link);
+	pci_disable_device(dev);
+}
+
+static struct ocxl_fn *init_function(struct pci_dev *dev)
+{
+	struct ocxl_fn *fn;
+	int rc;
+
+	fn = alloc_function();
+	if (!fn)
+		return ERR_PTR(-ENOMEM);
+
+	rc = configure_function(fn, dev);
+	if (rc) {
+		free_function(fn);
+		return ERR_PTR(rc);
+	}
+
+	rc = device_register(&fn->dev);
+	if (rc) {
+		deconfigure_function(fn);
+		put_device(&fn->dev);
+		return ERR_PTR(rc);
+	}
+	return fn;
+}
+
+// Device detection & initialisation
+
+struct ocxl_fn *ocxl_function_open(struct pci_dev *dev)
+{
+	int rc, afu_count = 0;
+	u8 afu;
+	struct ocxl_fn *fn;
+
+	if (!radix_enabled()) {
+		dev_err(&dev->dev, "Unsupported memory model (hash)\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	fn = init_function(dev);
+	if (IS_ERR(fn)) {
+		dev_err(&dev->dev, "function init failed: %li\n",
+			PTR_ERR(fn));
+		return fn;
+	}
+
+	for (afu = 0; afu <= fn->config.max_afu_index; afu++) {
+		rc = ocxl_config_check_afu_index(dev, &fn->config, afu);
+		if (rc > 0) {
+			rc = init_afu(dev, fn, afu);
+			if (rc) {
+				dev_err(&dev->dev,
+					"Can't initialize AFU index %d\n", afu);
+				continue;
+			}
+			afu_count++;
+		}
+	}
+	dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count);
+	return fn;
+}
+EXPORT_SYMBOL_GPL(ocxl_function_open);
+
+struct list_head *ocxl_function_afu_list(struct ocxl_fn *fn)
+{
+	return &fn->afu_list;
+}
+EXPORT_SYMBOL_GPL(ocxl_function_afu_list);
+
+struct ocxl_afu *ocxl_function_fetch_afu(struct ocxl_fn *fn, u8 afu_idx)
+{
+	struct ocxl_afu *afu;
+
+	list_for_each_entry(afu, &fn->afu_list, list) {
+		if (afu->config.idx == afu_idx)
+			return afu;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(ocxl_function_fetch_afu);
+
+const struct ocxl_fn_config *ocxl_function_config(struct ocxl_fn *fn)
+{
+	return &fn->config;
+}
+EXPORT_SYMBOL_GPL(ocxl_function_config);
+
+void ocxl_function_close(struct ocxl_fn *fn)
+{
+	struct ocxl_afu *afu, *tmp;
+
+	list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) {
+		remove_afu(afu);
+	}
+
+	deconfigure_function(fn);
+	device_unregister(&fn->dev);
+}
+EXPORT_SYMBOL_GPL(ocxl_function_close);
+
+// AFU Metadata
+
+struct ocxl_afu_config *ocxl_afu_config(struct ocxl_afu *afu)
+{
+	return &afu->config;
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_config);
+
+void ocxl_afu_set_private(struct ocxl_afu *afu, void *private)
+{
+	afu->private = private;
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_set_private);
+
+void *ocxl_afu_get_private(struct ocxl_afu *afu)
+{
+	if (afu)
+		return afu->private;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(ocxl_afu_get_private);
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
new file mode 100644
index 0000000000..6e63f060e4
--- /dev/null
+++ b/drivers/misc/ocxl/file.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/sched/signal.h>
+#include <linux/eventfd.h>
+#include <linux/uaccess.h>
+#include <uapi/misc/ocxl.h>
+#include <asm/reg.h>
+#include <asm/switch_to.h>
+#include "ocxl_internal.h"
+
+
+#define OCXL_NUM_MINORS 256 /* Total to reserve */
+
+static dev_t ocxl_dev;
+static struct class *ocxl_class;
+static DEFINE_MUTEX(minors_idr_lock);
+static struct idr minors_idr;
+
+static struct ocxl_file_info *find_and_get_file_info(dev_t devno)
+{
+	struct ocxl_file_info *info;
+
+	mutex_lock(&minors_idr_lock);
+	info = idr_find(&minors_idr, MINOR(devno));
+	if (info)
+		get_device(&info->dev);
+	mutex_unlock(&minors_idr_lock);
+	return info;
+}
+
+static int allocate_minor(struct ocxl_file_info *info)
+{
+	int minor;
+
+	mutex_lock(&minors_idr_lock);
+	minor = idr_alloc(&minors_idr, info, 0, OCXL_NUM_MINORS, GFP_KERNEL);
+	mutex_unlock(&minors_idr_lock);
+	return minor;
+}
+
+static void free_minor(struct ocxl_file_info *info)
+{
+	mutex_lock(&minors_idr_lock);
+	idr_remove(&minors_idr, MINOR(info->dev.devt));
+	mutex_unlock(&minors_idr_lock);
+}
+
+static int afu_open(struct inode *inode, struct file *file)
+{
+	struct ocxl_file_info *info;
+	struct ocxl_context *ctx;
+	int rc;
+
+	pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+
+	info = find_and_get_file_info(inode->i_rdev);
+	if (!info)
+		return -ENODEV;
+
+	rc = ocxl_context_alloc(&ctx, info->afu, inode->i_mapping);
+	if (rc) {
+		put_device(&info->dev);
+		return rc;
+	}
+	put_device(&info->dev);
+	file->private_data = ctx;
+	return 0;
+}
+
+static long afu_ioctl_attach(struct ocxl_context *ctx,
+			struct ocxl_ioctl_attach __user *uarg)
+{
+	struct ocxl_ioctl_attach arg;
+	u64 amr = 0;
+
+	pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	/* Make sure reserved fields are not set for forward compatibility */
+	if (arg.reserved1 || arg.reserved2 || arg.reserved3)
+		return -EINVAL;
+
+	amr = arg.amr & mfspr(SPRN_UAMOR);
+	return ocxl_context_attach(ctx, amr, current->mm);
+}
+
+static long afu_ioctl_get_metadata(struct ocxl_context *ctx,
+		struct ocxl_ioctl_metadata __user *uarg)
+{
+	struct ocxl_ioctl_metadata arg;
+
+	memset(&arg, 0, sizeof(arg));
+
+	arg.version = 0;
+
+	arg.afu_version_major = ctx->afu->config.version_major;
+	arg.afu_version_minor = ctx->afu->config.version_minor;
+	arg.pasid = ctx->pasid;
+	arg.pp_mmio_size = ctx->afu->config.pp_mmio_stride;
+	arg.global_mmio_size = ctx->afu->config.global_mmio_size;
+
+	if (copy_to_user(uarg, &arg, sizeof(arg)))
+		return -EFAULT;
+
+	return 0;
+}
+
+#ifdef CONFIG_PPC64
+static long afu_ioctl_enable_p9_wait(struct ocxl_context *ctx,
+		struct ocxl_ioctl_p9_wait __user *uarg)
+{
+	struct ocxl_ioctl_p9_wait arg;
+
+	memset(&arg, 0, sizeof(arg));
+
+	if (cpu_has_feature(CPU_FTR_P9_TIDR)) {
+		enum ocxl_context_status status;
+
+		// Locks both status & tidr
+		mutex_lock(&ctx->status_mutex);
+		if (!ctx->tidr) {
+			if (set_thread_tidr(current)) {
+				mutex_unlock(&ctx->status_mutex);
+				return -ENOENT;
+			}
+
+			ctx->tidr = current->thread.tidr;
+		}
+
+		status = ctx->status;
+		mutex_unlock(&ctx->status_mutex);
+
+		if (status == ATTACHED) {
+			int rc = ocxl_link_update_pe(ctx->afu->fn->link,
+				ctx->pasid, ctx->tidr);
+
+			if (rc)
+				return rc;
+		}
+
+		arg.thread_id = ctx->tidr;
+	} else
+		return -ENOENT;
+
+	if (copy_to_user(uarg, &arg, sizeof(arg)))
+		return -EFAULT;
+
+	return 0;
+}
+#endif
+
+
+static long afu_ioctl_get_features(struct ocxl_context *ctx,
+		struct ocxl_ioctl_features __user *uarg)
+{
+	struct ocxl_ioctl_features arg;
+
+	memset(&arg, 0, sizeof(arg));
+
+#ifdef CONFIG_PPC64
+	if (cpu_has_feature(CPU_FTR_P9_TIDR))
+		arg.flags[0] |= OCXL_IOCTL_FEATURES_FLAGS0_P9_WAIT;
+#endif
+
+	if (copy_to_user(uarg, &arg, sizeof(arg)))
+		return -EFAULT;
+
+	return 0;
+}
+
+#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" :			\
+			x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" :	\
+			x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" :		\
+			x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" :	\
+			x == OCXL_IOCTL_GET_METADATA ? "GET_METADATA" :	\
+			x == OCXL_IOCTL_ENABLE_P9_WAIT ? "ENABLE_P9_WAIT" :	\
+			x == OCXL_IOCTL_GET_FEATURES ? "GET_FEATURES" :	\
+			"UNKNOWN")
+
+static irqreturn_t irq_handler(void *private)
+{
+	struct eventfd_ctx *ev_ctx = private;
+
+	eventfd_signal(ev_ctx, 1);
+	return IRQ_HANDLED;
+}
+
+static void irq_free(void *private)
+{
+	struct eventfd_ctx *ev_ctx = private;
+
+	eventfd_ctx_put(ev_ctx);
+}
+
+static long afu_ioctl(struct file *file, unsigned int cmd,
+		unsigned long args)
+{
+	struct ocxl_context *ctx = file->private_data;
+	struct ocxl_ioctl_irq_fd irq_fd;
+	struct eventfd_ctx *ev_ctx;
+	int irq_id;
+	u64 irq_offset;
+	long rc;
+	bool closed;
+
+	pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid,
+		CMD_STR(cmd));
+
+	mutex_lock(&ctx->status_mutex);
+	closed = (ctx->status == CLOSED);
+	mutex_unlock(&ctx->status_mutex);
+
+	if (closed)
+		return -EIO;
+
+	switch (cmd) {
+	case OCXL_IOCTL_ATTACH:
+		rc = afu_ioctl_attach(ctx,
+				(struct ocxl_ioctl_attach __user *) args);
+		break;
+
+	case OCXL_IOCTL_IRQ_ALLOC:
+		rc = ocxl_afu_irq_alloc(ctx, &irq_id);
+		if (!rc) {
+			irq_offset = ocxl_irq_id_to_offset(ctx, irq_id);
+			rc = copy_to_user((u64 __user *) args, &irq_offset,
+					sizeof(irq_offset));
+			if (rc) {
+				ocxl_afu_irq_free(ctx, irq_id);
+				return -EFAULT;
+			}
+		}
+		break;
+
+	case OCXL_IOCTL_IRQ_FREE:
+		rc = copy_from_user(&irq_offset, (u64 __user *) args,
+				sizeof(irq_offset));
+		if (rc)
+			return -EFAULT;
+		irq_id = ocxl_irq_offset_to_id(ctx, irq_offset);
+		rc = ocxl_afu_irq_free(ctx, irq_id);
+		break;
+
+	case OCXL_IOCTL_IRQ_SET_FD:
+		rc = copy_from_user(&irq_fd, (u64 __user *) args,
+				sizeof(irq_fd));
+		if (rc)
+			return -EFAULT;
+		if (irq_fd.reserved)
+			return -EINVAL;
+		irq_id = ocxl_irq_offset_to_id(ctx, irq_fd.irq_offset);
+		ev_ctx = eventfd_ctx_fdget(irq_fd.eventfd);
+		if (IS_ERR(ev_ctx))
+			return PTR_ERR(ev_ctx);
+		rc = ocxl_irq_set_handler(ctx, irq_id, irq_handler, irq_free, ev_ctx);
+		if (rc)
+			eventfd_ctx_put(ev_ctx);
+		break;
+
+	case OCXL_IOCTL_GET_METADATA:
+		rc = afu_ioctl_get_metadata(ctx,
+				(struct ocxl_ioctl_metadata __user *) args);
+		break;
+
+#ifdef CONFIG_PPC64
+	case OCXL_IOCTL_ENABLE_P9_WAIT:
+		rc = afu_ioctl_enable_p9_wait(ctx,
+				(struct ocxl_ioctl_p9_wait __user *) args);
+		break;
+#endif
+
+	case OCXL_IOCTL_GET_FEATURES:
+		rc = afu_ioctl_get_features(ctx,
+				(struct ocxl_ioctl_features __user *) args);
+		break;
+
+	default:
+		rc = -EINVAL;
+	}
+	return rc;
+}
+
+static long afu_compat_ioctl(struct file *file, unsigned int cmd,
+			unsigned long args)
+{
+	return afu_ioctl(file, cmd, args);
+}
+
+static int afu_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct ocxl_context *ctx = file->private_data;
+
+	pr_debug("%s for context %d\n", __func__, ctx->pasid);
+	return ocxl_context_mmap(ctx, vma);
+}
+
+static bool has_xsl_error(struct ocxl_context *ctx)
+{
+	bool ret;
+
+	mutex_lock(&ctx->xsl_error_lock);
+	ret = !!ctx->xsl_error.addr;
+	mutex_unlock(&ctx->xsl_error_lock);
+
+	return ret;
+}
+
+/*
+ * Are there any events pending on the AFU
+ * ctx: The AFU context
+ * Returns: true if there are events pending
+ */
+static bool afu_events_pending(struct ocxl_context *ctx)
+{
+	if (has_xsl_error(ctx))
+		return true;
+	return false;
+}
+
+static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait)
+{
+	struct ocxl_context *ctx = file->private_data;
+	unsigned int mask = 0;
+	bool closed;
+
+	pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+	poll_wait(file, &ctx->events_wq, wait);
+
+	mutex_lock(&ctx->status_mutex);
+	closed = (ctx->status == CLOSED);
+	mutex_unlock(&ctx->status_mutex);
+
+	if (afu_events_pending(ctx))
+		mask = EPOLLIN | EPOLLRDNORM;
+	else if (closed)
+		mask = EPOLLERR;
+
+	return mask;
+}
+
+/*
+ * Populate the supplied buffer with a single XSL error
+ * ctx:	The AFU context to report the error from
+ * header: the event header to populate
+ * buf: The buffer to write the body into (should be at least
+ *      AFU_EVENT_BODY_XSL_ERROR_SIZE)
+ * Return: the amount of buffer that was populated
+ */
+static ssize_t append_xsl_error(struct ocxl_context *ctx,
+				struct ocxl_kernel_event_header *header,
+				char __user *buf)
+{
+	struct ocxl_kernel_event_xsl_fault_error body;
+
+	memset(&body, 0, sizeof(body));
+
+	mutex_lock(&ctx->xsl_error_lock);
+	if (!ctx->xsl_error.addr) {
+		mutex_unlock(&ctx->xsl_error_lock);
+		return 0;
+	}
+
+	body.addr = ctx->xsl_error.addr;
+	body.dsisr = ctx->xsl_error.dsisr;
+	body.count = ctx->xsl_error.count;
+
+	ctx->xsl_error.addr = 0;
+	ctx->xsl_error.dsisr = 0;
+	ctx->xsl_error.count = 0;
+
+	mutex_unlock(&ctx->xsl_error_lock);
+
+	header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR;
+
+	if (copy_to_user(buf, &body, sizeof(body)))
+		return -EFAULT;
+
+	return sizeof(body);
+}
+
+#define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error)
+
+/*
+ * Reports events on the AFU
+ * Format:
+ *	Header (struct ocxl_kernel_event_header)
+ *	Body (struct ocxl_kernel_event_*)
+ *	Header...
+ */
+static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
+			loff_t *off)
+{
+	struct ocxl_context *ctx = file->private_data;
+	struct ocxl_kernel_event_header header;
+	ssize_t rc;
+	ssize_t used = 0;
+	DEFINE_WAIT(event_wait);
+
+	memset(&header, 0, sizeof(header));
+
+	/* Require offset to be 0 */
+	if (*off != 0)
+		return -EINVAL;
+
+	if (count < (sizeof(struct ocxl_kernel_event_header) +
+			AFU_EVENT_BODY_MAX_SIZE))
+		return -EINVAL;
+
+	for (;;) {
+		prepare_to_wait(&ctx->events_wq, &event_wait,
+				TASK_INTERRUPTIBLE);
+
+		if (afu_events_pending(ctx))
+			break;
+
+		if (ctx->status == CLOSED)
+			break;
+
+		if (file->f_flags & O_NONBLOCK) {
+			finish_wait(&ctx->events_wq, &event_wait);
+			return -EAGAIN;
+		}
+
+		if (signal_pending(current)) {
+			finish_wait(&ctx->events_wq, &event_wait);
+			return -ERESTARTSYS;
+		}
+
+		schedule();
+	}
+
+	finish_wait(&ctx->events_wq, &event_wait);
+
+	if (has_xsl_error(ctx)) {
+		used = append_xsl_error(ctx, &header, buf + sizeof(header));
+		if (used < 0)
+			return used;
+	}
+
+	if (!afu_events_pending(ctx))
+		header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST;
+
+	if (copy_to_user(buf, &header, sizeof(header)))
+		return -EFAULT;
+
+	used += sizeof(header);
+
+	rc = used;
+	return rc;
+}
+
+static int afu_release(struct inode *inode, struct file *file)
+{
+	struct ocxl_context *ctx = file->private_data;
+	int rc;
+
+	pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+	rc = ocxl_context_detach(ctx);
+	mutex_lock(&ctx->mapping_lock);
+	ctx->mapping = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	wake_up_all(&ctx->events_wq);
+	if (rc != -EBUSY)
+		ocxl_context_free(ctx);
+	return 0;
+}
+
+static const struct file_operations ocxl_afu_fops = {
+	.owner		= THIS_MODULE,
+	.open           = afu_open,
+	.unlocked_ioctl = afu_ioctl,
+	.compat_ioctl   = afu_compat_ioctl,
+	.mmap           = afu_mmap,
+	.poll           = afu_poll,
+	.read           = afu_read,
+	.release        = afu_release,
+};
+
+// Free the info struct
+static void info_release(struct device *dev)
+{
+	struct ocxl_file_info *info = container_of(dev, struct ocxl_file_info, dev);
+
+	ocxl_afu_put(info->afu);
+	kfree(info);
+}
+
+static int ocxl_file_make_visible(struct ocxl_file_info *info)
+{
+	int rc;
+
+	cdev_init(&info->cdev, &ocxl_afu_fops);
+	rc = cdev_add(&info->cdev, info->dev.devt, 1);
+	if (rc) {
+		dev_err(&info->dev, "Unable to add afu char device: %d\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+static void ocxl_file_make_invisible(struct ocxl_file_info *info)
+{
+	cdev_del(&info->cdev);
+}
+
+int ocxl_file_register_afu(struct ocxl_afu *afu)
+{
+	int minor;
+	int rc;
+	struct ocxl_file_info *info;
+	struct ocxl_fn *fn = afu->fn;
+	struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (info == NULL)
+		return -ENOMEM;
+
+	minor = allocate_minor(info);
+	if (minor < 0) {
+		kfree(info);
+		return minor;
+	}
+
+	info->dev.parent = &fn->dev;
+	info->dev.devt = MKDEV(MAJOR(ocxl_dev), minor);
+	info->dev.class = ocxl_class;
+	info->dev.release = info_release;
+
+	info->afu = afu;
+	ocxl_afu_get(afu);
+
+	rc = dev_set_name(&info->dev, "%s.%s.%hhu",
+		afu->config.name, dev_name(&pci_dev->dev), afu->config.idx);
+	if (rc)
+		goto err_put;
+
+	rc = device_register(&info->dev);
+	if (rc) {
+		free_minor(info);
+		put_device(&info->dev);
+		return rc;
+	}
+
+	rc = ocxl_sysfs_register_afu(info);
+	if (rc)
+		goto err_unregister;
+
+	rc = ocxl_file_make_visible(info);
+	if (rc)
+		goto err_unregister;
+
+	ocxl_afu_set_private(afu, info);
+
+	return 0;
+
+err_unregister:
+	ocxl_sysfs_unregister_afu(info); // safe to call even if register failed
+	free_minor(info);
+	device_unregister(&info->dev);
+	return rc;
+err_put:
+	ocxl_afu_put(afu);
+	free_minor(info);
+	kfree(info);
+	return rc;
+}
+
+void ocxl_file_unregister_afu(struct ocxl_afu *afu)
+{
+	struct ocxl_file_info *info = ocxl_afu_get_private(afu);
+
+	if (!info)
+		return;
+
+	ocxl_file_make_invisible(info);
+	ocxl_sysfs_unregister_afu(info);
+	free_minor(info);
+	device_unregister(&info->dev);
+}
+
+static char *ocxl_devnode(const struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev));
+}
+
+int ocxl_file_init(void)
+{
+	int rc;
+
+	idr_init(&minors_idr);
+
+	rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl");
+	if (rc) {
+		pr_err("Unable to allocate ocxl major number: %d\n", rc);
+		return rc;
+	}
+
+	ocxl_class = class_create("ocxl");
+	if (IS_ERR(ocxl_class)) {
+		pr_err("Unable to create ocxl class\n");
+		unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
+		return PTR_ERR(ocxl_class);
+	}
+
+	ocxl_class->devnode = ocxl_devnode;
+	return 0;
+}
+
+void ocxl_file_exit(void)
+{
+	class_destroy(ocxl_class);
+	unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
+	idr_destroy(&minors_idr);
+}
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
new file mode 100644
index 0000000000..c06c699c0e
--- /dev/null
+++ b/drivers/misc/ocxl/link.c
@@ -0,0 +1,779 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/irqdomain.h>
+#include <asm/copro.h>
+#include <asm/pnv-ocxl.h>
+#include <asm/xive.h>
+#include <misc/ocxl.h>
+#include "ocxl_internal.h"
+#include "trace.h"
+
+
+#define SPA_PASID_BITS		15
+#define SPA_PASID_MAX		((1 << SPA_PASID_BITS) - 1)
+#define SPA_PE_MASK		SPA_PASID_MAX
+#define SPA_SPA_SIZE_LOG	22 /* Each SPA is 4 Mb */
+
+#define SPA_CFG_SF		(1ull << (63-0))
+#define SPA_CFG_TA		(1ull << (63-1))
+#define SPA_CFG_HV		(1ull << (63-3))
+#define SPA_CFG_UV		(1ull << (63-4))
+#define SPA_CFG_XLAT_hpt	(0ull << (63-6)) /* Hashed page table (HPT) mode */
+#define SPA_CFG_XLAT_roh	(2ull << (63-6)) /* Radix on HPT mode */
+#define SPA_CFG_XLAT_ror	(3ull << (63-6)) /* Radix on Radix mode */
+#define SPA_CFG_PR		(1ull << (63-49))
+#define SPA_CFG_TC		(1ull << (63-54))
+#define SPA_CFG_DR		(1ull << (63-59))
+
+#define SPA_XSL_TF		(1ull << (63-3))  /* Translation fault */
+#define SPA_XSL_S		(1ull << (63-38)) /* Store operation */
+
+#define SPA_PE_VALID		0x80000000
+
+struct ocxl_link;
+
+struct pe_data {
+	struct mm_struct *mm;
+	/* callback to trigger when a translation fault occurs */
+	void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr);
+	/* opaque pointer to be passed to the above callback */
+	void *xsl_err_data;
+	struct rcu_head rcu;
+	struct ocxl_link *link;
+	struct mmu_notifier mmu_notifier;
+};
+
+struct spa {
+	struct ocxl_process_element *spa_mem;
+	int spa_order;
+	struct mutex spa_lock;
+	struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */
+	char *irq_name;
+	int virq;
+	void __iomem *reg_dsisr;
+	void __iomem *reg_dar;
+	void __iomem *reg_tfc;
+	void __iomem *reg_pe_handle;
+	/*
+	 * The following field are used by the memory fault
+	 * interrupt handler. We can only have one interrupt at a
+	 * time. The NPU won't raise another interrupt until the
+	 * previous one has been ack'd by writing to the TFC register
+	 */
+	struct xsl_fault {
+		struct work_struct fault_work;
+		u64 pe;
+		u64 dsisr;
+		u64 dar;
+		struct pe_data pe_data;
+	} xsl_fault;
+};
+
+/*
+ * A opencapi link can be used be by several PCI functions. We have
+ * one link per device slot.
+ *
+ * A linked list of opencapi links should suffice, as there's a
+ * limited number of opencapi slots on a system and lookup is only
+ * done when the device is probed
+ */
+struct ocxl_link {
+	struct list_head list;
+	struct kref ref;
+	int domain;
+	int bus;
+	int dev;
+	void __iomem *arva;     /* ATSD register virtual address */
+	spinlock_t atsd_lock;   /* to serialize shootdowns */
+	atomic_t irq_available;
+	struct spa *spa;
+	void *platform_data;
+};
+static LIST_HEAD(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+enum xsl_response {
+	CONTINUE,
+	ADDRESS_ERROR,
+	RESTART,
+};
+
+
+static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe)
+{
+	u64 reg;
+
+	*dsisr = in_be64(spa->reg_dsisr);
+	*dar = in_be64(spa->reg_dar);
+	reg = in_be64(spa->reg_pe_handle);
+	*pe = reg & SPA_PE_MASK;
+}
+
+static void ack_irq(struct spa *spa, enum xsl_response r)
+{
+	u64 reg = 0;
+
+	/* continue is not supported */
+	if (r == RESTART)
+		reg = PPC_BIT(31);
+	else if (r == ADDRESS_ERROR)
+		reg = PPC_BIT(30);
+	else
+		WARN(1, "Invalid irq response %d\n", r);
+
+	if (reg) {
+		trace_ocxl_fault_ack(spa->spa_mem, spa->xsl_fault.pe,
+				spa->xsl_fault.dsisr, spa->xsl_fault.dar, reg);
+		out_be64(spa->reg_tfc, reg);
+	}
+}
+
+static void xsl_fault_handler_bh(struct work_struct *fault_work)
+{
+	vm_fault_t flt = 0;
+	unsigned long access, flags, inv_flags = 0;
+	enum xsl_response r;
+	struct xsl_fault *fault = container_of(fault_work, struct xsl_fault,
+					fault_work);
+	struct spa *spa = container_of(fault, struct spa, xsl_fault);
+
+	int rc;
+
+	/*
+	 * We must release a reference on mm_users whenever exiting this
+	 * function (taken in the memory fault interrupt handler)
+	 */
+	rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr,
+				&flt);
+	if (rc) {
+		pr_debug("copro_handle_mm_fault failed: %d\n", rc);
+		if (fault->pe_data.xsl_err_cb) {
+			fault->pe_data.xsl_err_cb(
+				fault->pe_data.xsl_err_data,
+				fault->dar, fault->dsisr);
+		}
+		r = ADDRESS_ERROR;
+		goto ack;
+	}
+
+	if (!radix_enabled()) {
+		/*
+		 * update_mmu_cache() will not have loaded the hash
+		 * since current->trap is not a 0x400 or 0x300, so
+		 * just call hash_page_mm() here.
+		 */
+		access = _PAGE_PRESENT | _PAGE_READ;
+		if (fault->dsisr & SPA_XSL_S)
+			access |= _PAGE_WRITE;
+
+		if (get_region_id(fault->dar) != USER_REGION_ID)
+			access |= _PAGE_PRIVILEGED;
+
+		local_irq_save(flags);
+		hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300,
+			inv_flags);
+		local_irq_restore(flags);
+	}
+	r = RESTART;
+ack:
+	mmput(fault->pe_data.mm);
+	ack_irq(spa, r);
+}
+
+static irqreturn_t xsl_fault_handler(int irq, void *data)
+{
+	struct ocxl_link *link = (struct ocxl_link *) data;
+	struct spa *spa = link->spa;
+	u64 dsisr, dar, pe_handle;
+	struct pe_data *pe_data;
+	struct ocxl_process_element *pe;
+	int pid;
+	bool schedule = false;
+
+	read_irq(spa, &dsisr, &dar, &pe_handle);
+	trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1);
+
+	WARN_ON(pe_handle > SPA_PE_MASK);
+	pe = spa->spa_mem + pe_handle;
+	pid = be32_to_cpu(pe->pid);
+	/* We could be reading all null values here if the PE is being
+	 * removed while an interrupt kicks in. It's not supposed to
+	 * happen if the driver notified the AFU to terminate the
+	 * PASID, and the AFU waited for pending operations before
+	 * acknowledging. But even if it happens, we won't find a
+	 * memory context below and fail silently, so it should be ok.
+	 */
+	if (!(dsisr & SPA_XSL_TF)) {
+		WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr);
+		ack_irq(spa, ADDRESS_ERROR);
+		return IRQ_HANDLED;
+	}
+
+	rcu_read_lock();
+	pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle);
+	if (!pe_data) {
+		/*
+		 * Could only happen if the driver didn't notify the
+		 * AFU about PASID termination before removing the PE,
+		 * or the AFU didn't wait for all memory access to
+		 * have completed.
+		 *
+		 * Either way, we fail early, but we shouldn't log an
+		 * error message, as it is a valid (if unexpected)
+		 * scenario
+		 */
+		rcu_read_unlock();
+		pr_debug("Unknown mm context for xsl interrupt\n");
+		ack_irq(spa, ADDRESS_ERROR);
+		return IRQ_HANDLED;
+	}
+
+	if (!pe_data->mm) {
+		/*
+		 * translation fault from a kernel context - an OpenCAPI
+		 * device tried to access a bad kernel address
+		 */
+		rcu_read_unlock();
+		pr_warn("Unresolved OpenCAPI xsl fault in kernel context\n");
+		ack_irq(spa, ADDRESS_ERROR);
+		return IRQ_HANDLED;
+	}
+	WARN_ON(pe_data->mm->context.id != pid);
+
+	if (mmget_not_zero(pe_data->mm)) {
+			spa->xsl_fault.pe = pe_handle;
+			spa->xsl_fault.dar = dar;
+			spa->xsl_fault.dsisr = dsisr;
+			spa->xsl_fault.pe_data = *pe_data;
+			schedule = true;
+			/* mm_users count released by bottom half */
+	}
+	rcu_read_unlock();
+	if (schedule)
+		schedule_work(&spa->xsl_fault.fault_work);
+	else
+		ack_irq(spa, ADDRESS_ERROR);
+	return IRQ_HANDLED;
+}
+
+static void unmap_irq_registers(struct spa *spa)
+{
+	pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc,
+				spa->reg_pe_handle);
+}
+
+static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
+{
+	return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar,
+				&spa->reg_tfc, &spa->reg_pe_handle);
+}
+
+static int setup_xsl_irq(struct pci_dev *dev, struct ocxl_link *link)
+{
+	struct spa *spa = link->spa;
+	int rc;
+	int hwirq;
+
+	rc = pnv_ocxl_get_xsl_irq(dev, &hwirq);
+	if (rc)
+		return rc;
+
+	rc = map_irq_registers(dev, spa);
+	if (rc)
+		return rc;
+
+	spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x",
+				link->domain, link->bus, link->dev);
+	if (!spa->irq_name) {
+		dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n");
+		rc = -ENOMEM;
+		goto err_xsl;
+	}
+	/*
+	 * At some point, we'll need to look into allowing a higher
+	 * number of interrupts. Could we have an IRQ domain per link?
+	 */
+	spa->virq = irq_create_mapping(NULL, hwirq);
+	if (!spa->virq) {
+		dev_err(&dev->dev,
+			"irq_create_mapping failed for translation interrupt\n");
+		rc = -EINVAL;
+		goto err_name;
+	}
+
+	dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq);
+
+	rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name,
+			link);
+	if (rc) {
+		dev_err(&dev->dev,
+			"request_irq failed for translation interrupt: %d\n",
+			rc);
+		rc = -EINVAL;
+		goto err_mapping;
+	}
+	return 0;
+
+err_mapping:
+	irq_dispose_mapping(spa->virq);
+err_name:
+	kfree(spa->irq_name);
+err_xsl:
+	unmap_irq_registers(spa);
+	return rc;
+}
+
+static void release_xsl_irq(struct ocxl_link *link)
+{
+	struct spa *spa = link->spa;
+
+	if (spa->virq) {
+		free_irq(spa->virq, link);
+		irq_dispose_mapping(spa->virq);
+	}
+	kfree(spa->irq_name);
+	unmap_irq_registers(spa);
+}
+
+static int alloc_spa(struct pci_dev *dev, struct ocxl_link *link)
+{
+	struct spa *spa;
+
+	spa = kzalloc(sizeof(struct spa), GFP_KERNEL);
+	if (!spa)
+		return -ENOMEM;
+
+	mutex_init(&spa->spa_lock);
+	INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL);
+	INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh);
+
+	spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT;
+	spa->spa_mem = (struct ocxl_process_element *)
+		__get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order);
+	if (!spa->spa_mem) {
+		dev_err(&dev->dev, "Can't allocate Shared Process Area\n");
+		kfree(spa);
+		return -ENOMEM;
+	}
+	pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus,
+		link->dev, spa->spa_mem);
+
+	link->spa = spa;
+	return 0;
+}
+
+static void free_spa(struct ocxl_link *link)
+{
+	struct spa *spa = link->spa;
+
+	pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus,
+		link->dev);
+
+	if (spa && spa->spa_mem) {
+		free_pages((unsigned long) spa->spa_mem, spa->spa_order);
+		kfree(spa);
+		link->spa = NULL;
+	}
+}
+
+static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_link)
+{
+	struct ocxl_link *link;
+	int rc;
+
+	link = kzalloc(sizeof(struct ocxl_link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+
+	kref_init(&link->ref);
+	link->domain = pci_domain_nr(dev->bus);
+	link->bus = dev->bus->number;
+	link->dev = PCI_SLOT(dev->devfn);
+	atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
+	spin_lock_init(&link->atsd_lock);
+
+	rc = alloc_spa(dev, link);
+	if (rc)
+		goto err_free;
+
+	rc = setup_xsl_irq(dev, link);
+	if (rc)
+		goto err_spa;
+
+	/* platform specific hook */
+	rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask,
+				&link->platform_data);
+	if (rc)
+		goto err_xsl_irq;
+
+	/* if link->arva is not defeined, MMIO registers are not used to
+	 * generate TLB invalidate. PowerBus snooping is enabled.
+	 * Otherwise, PowerBus snooping is disabled. TLB Invalidates are
+	 * initiated using MMIO registers.
+	 */
+	pnv_ocxl_map_lpar(dev, mfspr(SPRN_LPID), 0, &link->arva);
+
+	*out_link = link;
+	return 0;
+
+err_xsl_irq:
+	release_xsl_irq(link);
+err_spa:
+	free_spa(link);
+err_free:
+	kfree(link);
+	return rc;
+}
+
+static void free_link(struct ocxl_link *link)
+{
+	release_xsl_irq(link);
+	free_spa(link);
+	kfree(link);
+}
+
+int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
+{
+	int rc = 0;
+	struct ocxl_link *link;
+
+	mutex_lock(&links_list_lock);
+	list_for_each_entry(link, &links_list, list) {
+		/* The functions of a device all share the same link */
+		if (link->domain == pci_domain_nr(dev->bus) &&
+			link->bus == dev->bus->number &&
+			link->dev == PCI_SLOT(dev->devfn)) {
+			kref_get(&link->ref);
+			*link_handle = link;
+			goto unlock;
+		}
+	}
+	rc = alloc_link(dev, PE_mask, &link);
+	if (rc)
+		goto unlock;
+
+	list_add(&link->list, &links_list);
+	*link_handle = link;
+unlock:
+	mutex_unlock(&links_list_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_setup);
+
+static void release_xsl(struct kref *ref)
+{
+	struct ocxl_link *link = container_of(ref, struct ocxl_link, ref);
+
+	if (link->arva) {
+		pnv_ocxl_unmap_lpar(link->arva);
+		link->arva = NULL;
+	}
+
+	list_del(&link->list);
+	/* call platform code before releasing data */
+	pnv_ocxl_spa_release(link->platform_data);
+	free_link(link);
+}
+
+void ocxl_link_release(struct pci_dev *dev, void *link_handle)
+{
+	struct ocxl_link *link = (struct ocxl_link *) link_handle;
+
+	mutex_lock(&links_list_lock);
+	kref_put(&link->ref, release_xsl);
+	mutex_unlock(&links_list_lock);
+}
+EXPORT_SYMBOL_GPL(ocxl_link_release);
+
+static void arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
+					struct mm_struct *mm,
+					unsigned long start, unsigned long end)
+{
+	struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier);
+	struct ocxl_link *link = pe_data->link;
+	unsigned long addr, pid, page_size = PAGE_SIZE;
+
+	pid = mm->context.id;
+	trace_ocxl_mmu_notifier_range(start, end, pid);
+
+	spin_lock(&link->atsd_lock);
+	for (addr = start; addr < end; addr += page_size)
+		pnv_ocxl_tlb_invalidate(link->arva, pid, addr, page_size);
+	spin_unlock(&link->atsd_lock);
+}
+
+static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = {
+	.arch_invalidate_secondary_tlbs = arch_invalidate_secondary_tlbs,
+};
+
+static u64 calculate_cfg_state(bool kernel)
+{
+	u64 state;
+
+	state = SPA_CFG_DR;
+	if (mfspr(SPRN_LPCR) & LPCR_TC)
+		state |= SPA_CFG_TC;
+	if (radix_enabled())
+		state |= SPA_CFG_XLAT_ror;
+	else
+		state |= SPA_CFG_XLAT_hpt;
+	state |= SPA_CFG_HV;
+	if (kernel) {
+		if (mfmsr() & MSR_SF)
+			state |= SPA_CFG_SF;
+	} else {
+		state |= SPA_CFG_PR;
+		if (!test_tsk_thread_flag(current, TIF_32BIT))
+			state |= SPA_CFG_SF;
+	}
+	return state;
+}
+
+int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
+		u64 amr, u16 bdf, struct mm_struct *mm,
+		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
+		void *xsl_err_data)
+{
+	struct ocxl_link *link = (struct ocxl_link *) link_handle;
+	struct spa *spa = link->spa;
+	struct ocxl_process_element *pe;
+	int pe_handle, rc = 0;
+	struct pe_data *pe_data;
+
+	BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128);
+	if (pasid > SPA_PASID_MAX)
+		return -EINVAL;
+
+	mutex_lock(&spa->spa_lock);
+	pe_handle = pasid & SPA_PE_MASK;
+	pe = spa->spa_mem + pe_handle;
+
+	if (pe->software_state) {
+		rc = -EBUSY;
+		goto unlock;
+	}
+
+	pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL);
+	if (!pe_data) {
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	pe_data->mm = mm;
+	pe_data->xsl_err_cb = xsl_err_cb;
+	pe_data->xsl_err_data = xsl_err_data;
+	pe_data->link = link;
+	pe_data->mmu_notifier.ops = &ocxl_mmu_notifier_ops;
+
+	memset(pe, 0, sizeof(struct ocxl_process_element));
+	pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
+	pe->pasid = cpu_to_be32(pasid << (31 - 19));
+	pe->bdf = cpu_to_be16(bdf);
+	pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+	pe->pid = cpu_to_be32(pidr);
+	pe->tid = cpu_to_be32(tidr);
+	pe->amr = cpu_to_be64(amr);
+	pe->software_state = cpu_to_be32(SPA_PE_VALID);
+
+	/*
+	 * For user contexts, register a copro so that TLBIs are seen
+	 * by the nest MMU. If we have a kernel context, TLBIs are
+	 * already global.
+	 */
+	if (mm) {
+		mm_context_add_copro(mm);
+		if (link->arva) {
+			/* Use MMIO registers for the TLB Invalidate
+			 * operations.
+			 */
+			trace_ocxl_init_mmu_notifier(pasid, mm->context.id);
+			mmu_notifier_register(&pe_data->mmu_notifier, mm);
+		}
+	}
+
+	/*
+	 * Barrier is to make sure PE is visible in the SPA before it
+	 * is used by the device. It also helps with the global TLBI
+	 * invalidation
+	 */
+	mb();
+	radix_tree_insert(&spa->pe_tree, pe_handle, pe_data);
+
+	/*
+	 * The mm must stay valid for as long as the device uses it. We
+	 * lower the count when the context is removed from the SPA.
+	 *
+	 * We grab mm_count (and not mm_users), as we don't want to
+	 * end up in a circular dependency if a process mmaps its
+	 * mmio, therefore incrementing the file ref count when
+	 * calling mmap(), and forgets to unmap before exiting. In
+	 * that scenario, when the kernel handles the death of the
+	 * process, the file is not cleaned because unmap was not
+	 * called, and the mm wouldn't be freed because we would still
+	 * have a reference on mm_users. Incrementing mm_count solves
+	 * the problem.
+	 */
+	if (mm)
+		mmgrab(mm);
+	trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr);
+unlock:
+	mutex_unlock(&spa->spa_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
+
+int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
+{
+	struct ocxl_link *link = (struct ocxl_link *) link_handle;
+	struct spa *spa = link->spa;
+	struct ocxl_process_element *pe;
+	int pe_handle, rc;
+
+	if (pasid > SPA_PASID_MAX)
+		return -EINVAL;
+
+	pe_handle = pasid & SPA_PE_MASK;
+	pe = spa->spa_mem + pe_handle;
+
+	mutex_lock(&spa->spa_lock);
+
+	pe->tid = cpu_to_be32(tid);
+
+	/*
+	 * The barrier makes sure the PE is updated
+	 * before we clear the NPU context cache below, so that the
+	 * old PE cannot be reloaded erroneously.
+	 */
+	mb();
+
+	/*
+	 * hook to platform code
+	 * On powerpc, the entry needs to be cleared from the context
+	 * cache of the NPU.
+	 */
+	rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle);
+	WARN_ON(rc);
+
+	mutex_unlock(&spa->spa_lock);
+	return rc;
+}
+
+int ocxl_link_remove_pe(void *link_handle, int pasid)
+{
+	struct ocxl_link *link = (struct ocxl_link *) link_handle;
+	struct spa *spa = link->spa;
+	struct ocxl_process_element *pe;
+	struct pe_data *pe_data;
+	int pe_handle, rc;
+
+	if (pasid > SPA_PASID_MAX)
+		return -EINVAL;
+
+	/*
+	 * About synchronization with our memory fault handler:
+	 *
+	 * Before removing the PE, the driver is supposed to have
+	 * notified the AFU, which should have cleaned up and make
+	 * sure the PASID is no longer in use, including pending
+	 * interrupts. However, there's no way to be sure...
+	 *
+	 * We clear the PE and remove the context from our radix
+	 * tree. From that point on, any new interrupt for that
+	 * context will fail silently, which is ok. As mentioned
+	 * above, that's not expected, but it could happen if the
+	 * driver or AFU didn't do the right thing.
+	 *
+	 * There could still be a bottom half running, but we don't
+	 * need to wait/flush, as it is managing a reference count on
+	 * the mm it reads from the radix tree.
+	 */
+	pe_handle = pasid & SPA_PE_MASK;
+	pe = spa->spa_mem + pe_handle;
+
+	mutex_lock(&spa->spa_lock);
+
+	if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	trace_ocxl_context_remove(current->pid, spa->spa_mem, pasid,
+				be32_to_cpu(pe->pid), be32_to_cpu(pe->tid));
+
+	memset(pe, 0, sizeof(struct ocxl_process_element));
+	/*
+	 * The barrier makes sure the PE is removed from the SPA
+	 * before we clear the NPU context cache below, so that the
+	 * old PE cannot be reloaded erroneously.
+	 */
+	mb();
+
+	/*
+	 * hook to platform code
+	 * On powerpc, the entry needs to be cleared from the context
+	 * cache of the NPU.
+	 */
+	rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle);
+	WARN_ON(rc);
+
+	pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);
+	if (!pe_data) {
+		WARN(1, "Couldn't find pe data when removing PE\n");
+	} else {
+		if (pe_data->mm) {
+			if (link->arva) {
+				trace_ocxl_release_mmu_notifier(pasid,
+								pe_data->mm->context.id);
+				mmu_notifier_unregister(&pe_data->mmu_notifier,
+							pe_data->mm);
+				spin_lock(&link->atsd_lock);
+				pnv_ocxl_tlb_invalidate(link->arva,
+							pe_data->mm->context.id,
+							0ull,
+							PAGE_SIZE);
+				spin_unlock(&link->atsd_lock);
+			}
+			mm_context_remove_copro(pe_data->mm);
+			mmdrop(pe_data->mm);
+		}
+		kfree_rcu(pe_data, rcu);
+	}
+unlock:
+	mutex_unlock(&spa->spa_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_remove_pe);
+
+int ocxl_link_irq_alloc(void *link_handle, int *hw_irq)
+{
+	struct ocxl_link *link = (struct ocxl_link *) link_handle;
+	int irq;
+
+	if (atomic_dec_if_positive(&link->irq_available) < 0)
+		return -ENOSPC;
+
+	irq = xive_native_alloc_irq();
+	if (!irq) {
+		atomic_inc(&link->irq_available);
+		return -ENXIO;
+	}
+
+	*hw_irq = irq;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc);
+
+void ocxl_link_free_irq(void *link_handle, int hw_irq)
+{
+	struct ocxl_link *link = (struct ocxl_link *) link_handle;
+
+	xive_native_free_irq(hw_irq);
+	atomic_inc(&link->irq_available);
+}
+EXPORT_SYMBOL_GPL(ocxl_link_free_irq);
diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c
new file mode 100644
index 0000000000..ef73cf35dd
--- /dev/null
+++ b/drivers/misc/ocxl/main.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <asm/mmu.h>
+#include "ocxl_internal.h"
+
+static int __init init_ocxl(void)
+{
+	int rc = 0;
+
+	if (!tlbie_capable)
+		return -EINVAL;
+
+	rc = ocxl_file_init();
+	if (rc)
+		return rc;
+
+	rc = pci_register_driver(&ocxl_pci_driver);
+	if (rc) {
+		ocxl_file_exit();
+		return rc;
+	}
+	return 0;
+}
+
+static void exit_ocxl(void)
+{
+	pci_unregister_driver(&ocxl_pci_driver);
+	ocxl_file_exit();
+}
+
+module_init(init_ocxl);
+module_exit(exit_ocxl);
+
+MODULE_DESCRIPTION("Open Coherent Accelerator");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ocxl/mmio.c b/drivers/misc/ocxl/mmio.c
new file mode 100644
index 0000000000..aae713db4e
--- /dev/null
+++ b/drivers/misc/ocxl/mmio.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2019 IBM Corp.
+#include <linux/sched/mm.h>
+#include "trace.h"
+#include "ocxl_internal.h"
+
+int ocxl_global_mmio_read32(struct ocxl_afu *afu, size_t offset,
+				enum ocxl_endian endian, u32 *val)
+{
+	if (offset > afu->config.global_mmio_size - 4)
+		return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+	if (endian == OCXL_HOST_ENDIAN)
+		endian = OCXL_BIG_ENDIAN;
+#endif
+
+	switch (endian) {
+	case OCXL_BIG_ENDIAN:
+		*val = readl_be((char *)afu->global_mmio_ptr + offset);
+		break;
+
+	default:
+		*val = readl((char *)afu->global_mmio_ptr + offset);
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_read32);
+
+int ocxl_global_mmio_read64(struct ocxl_afu *afu, size_t offset,
+				enum ocxl_endian endian, u64 *val)
+{
+	if (offset > afu->config.global_mmio_size - 8)
+		return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+	if (endian == OCXL_HOST_ENDIAN)
+		endian = OCXL_BIG_ENDIAN;
+#endif
+
+	switch (endian) {
+	case OCXL_BIG_ENDIAN:
+		*val = readq_be((char *)afu->global_mmio_ptr + offset);
+		break;
+
+	default:
+		*val = readq((char *)afu->global_mmio_ptr + offset);
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_read64);
+
+int ocxl_global_mmio_write32(struct ocxl_afu *afu, size_t offset,
+				enum ocxl_endian endian, u32 val)
+{
+	if (offset > afu->config.global_mmio_size - 4)
+		return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+	if (endian == OCXL_HOST_ENDIAN)
+		endian = OCXL_BIG_ENDIAN;
+#endif
+
+	switch (endian) {
+	case OCXL_BIG_ENDIAN:
+		writel_be(val, (char *)afu->global_mmio_ptr + offset);
+		break;
+
+	default:
+		writel(val, (char *)afu->global_mmio_ptr + offset);
+		break;
+	}
+
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_write32);
+
+int ocxl_global_mmio_write64(struct ocxl_afu *afu, size_t offset,
+				enum ocxl_endian endian, u64 val)
+{
+	if (offset > afu->config.global_mmio_size - 8)
+		return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+	if (endian == OCXL_HOST_ENDIAN)
+		endian = OCXL_BIG_ENDIAN;
+#endif
+
+	switch (endian) {
+	case OCXL_BIG_ENDIAN:
+		writeq_be(val, (char *)afu->global_mmio_ptr + offset);
+		break;
+
+	default:
+		writeq(val, (char *)afu->global_mmio_ptr + offset);
+		break;
+	}
+
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_write64);
+
+int ocxl_global_mmio_set32(struct ocxl_afu *afu, size_t offset,
+				enum ocxl_endian endian, u32 mask)
+{
+	u32 tmp;
+
+	if (offset > afu->config.global_mmio_size - 4)
+		return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+	if (endian == OCXL_HOST_ENDIAN)
+		endian = OCXL_BIG_ENDIAN;
+#endif
+
+	switch (endian) {
+	case OCXL_BIG_ENDIAN:
+		tmp = readl_be((char *)afu->global_mmio_ptr + offset);
+		tmp |= mask;
+		writel_be(tmp, (char *)afu->global_mmio_ptr + offset);
+		break;
+
+	default:
+		tmp = readl((char *)afu->global_mmio_ptr + offset);
+		tmp |= mask;
+		writel(tmp, (char *)afu->global_mmio_ptr + offset);
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_set32);
+
+int ocxl_global_mmio_set64(struct ocxl_afu *afu, size_t offset,
+				enum ocxl_endian endian, u64 mask)
+{
+	u64 tmp;
+
+	if (offset > afu->config.global_mmio_size - 8)
+		return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+	if (endian == OCXL_HOST_ENDIAN)
+		endian = OCXL_BIG_ENDIAN;
+#endif
+
+	switch (endian) {
+	case OCXL_BIG_ENDIAN:
+		tmp = readq_be((char *)afu->global_mmio_ptr + offset);
+		tmp |= mask;
+		writeq_be(tmp, (char *)afu->global_mmio_ptr + offset);
+		break;
+
+	default:
+		tmp = readq((char *)afu->global_mmio_ptr + offset);
+		tmp |= mask;
+		writeq(tmp, (char *)afu->global_mmio_ptr + offset);
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_set64);
+
+int ocxl_global_mmio_clear32(struct ocxl_afu *afu, size_t offset,
+				enum ocxl_endian endian, u32 mask)
+{
+	u32 tmp;
+
+	if (offset > afu->config.global_mmio_size - 4)
+		return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+	if (endian == OCXL_HOST_ENDIAN)
+		endian = OCXL_BIG_ENDIAN;
+#endif
+
+	switch (endian) {
+	case OCXL_BIG_ENDIAN:
+		tmp = readl_be((char *)afu->global_mmio_ptr + offset);
+		tmp &= ~mask;
+		writel_be(tmp, (char *)afu->global_mmio_ptr + offset);
+		break;
+
+	default:
+		tmp = readl((char *)afu->global_mmio_ptr + offset);
+		tmp &= ~mask;
+		writel(tmp, (char *)afu->global_mmio_ptr + offset);
+		break;
+	}
+
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_clear32);
+
+int ocxl_global_mmio_clear64(struct ocxl_afu *afu, size_t offset,
+				enum ocxl_endian endian, u64 mask)
+{
+	u64 tmp;
+
+	if (offset > afu->config.global_mmio_size - 8)
+		return -EINVAL;
+
+#ifdef __BIG_ENDIAN__
+	if (endian == OCXL_HOST_ENDIAN)
+		endian = OCXL_BIG_ENDIAN;
+#endif
+
+	switch (endian) {
+	case OCXL_BIG_ENDIAN:
+		tmp = readq_be((char *)afu->global_mmio_ptr + offset);
+		tmp &= ~mask;
+		writeq_be(tmp, (char *)afu->global_mmio_ptr + offset);
+		break;
+
+	default:
+		tmp = readq((char *)afu->global_mmio_ptr + offset);
+		tmp &= ~mask;
+		writeq(tmp, (char *)afu->global_mmio_ptr + offset);
+		break;
+	}
+
+	writeq(tmp, (char *)afu->global_mmio_ptr + offset);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_global_mmio_clear64);
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
new file mode 100644
index 0000000000..10125a22d5
--- /dev/null
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+// Copyright 2017 IBM Corp.
+#ifndef _OCXL_INTERNAL_H_
+#define _OCXL_INTERNAL_H_
+
+#include <linux/pci.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <misc/ocxl.h>
+
+#define MAX_IRQ_PER_LINK	2000
+#define MAX_IRQ_PER_CONTEXT	MAX_IRQ_PER_LINK
+
+extern struct pci_driver ocxl_pci_driver;
+
+struct ocxl_fn {
+	struct device dev;
+	int bar_used[3];
+	struct ocxl_fn_config config;
+	struct list_head afu_list;
+	int pasid_base;
+	int actag_base;
+	int actag_enabled;
+	int actag_supported;
+	struct list_head pasid_list;
+	struct list_head actag_list;
+	void *link;
+};
+
+struct ocxl_file_info {
+	struct ocxl_afu *afu;
+	struct device dev;
+	struct cdev cdev;
+	struct bin_attribute attr_global_mmio;
+};
+
+struct ocxl_afu {
+	struct kref kref;
+	struct ocxl_fn *fn;
+	struct list_head list;
+	struct ocxl_afu_config config;
+	int pasid_base;
+	int pasid_count; /* opened contexts */
+	int pasid_max; /* maximum number of contexts */
+	int actag_base;
+	int actag_enabled;
+	struct mutex contexts_lock;
+	struct idr contexts_idr;
+	struct mutex afu_control_lock;
+	u64 global_mmio_start;
+	u64 irq_base_offset;
+	void __iomem *global_mmio_ptr;
+	u64 pp_mmio_start;
+	void *private;
+};
+
+enum ocxl_context_status {
+	CLOSED,
+	OPENED,
+	ATTACHED,
+};
+
+// Contains metadata about a translation fault
+struct ocxl_xsl_error {
+	u64 addr; // The address that triggered the fault
+	u64 dsisr; // the value of the dsisr register
+	u64 count; // The number of times this fault has been triggered
+};
+
+struct ocxl_context {
+	struct ocxl_afu *afu;
+	int pasid;
+	struct mutex status_mutex;
+	enum ocxl_context_status status;
+	struct address_space *mapping;
+	struct mutex mapping_lock;
+	wait_queue_head_t events_wq;
+	struct mutex xsl_error_lock;
+	struct ocxl_xsl_error xsl_error;
+	struct mutex irq_lock;
+	struct idr irq_idr;
+	u16 tidr; // Thread ID used for P9 wait implementation
+};
+
+struct ocxl_process_element {
+	__be64 config_state;
+	__be32 pasid;
+	__be16 bdf;
+	__be16 reserved1;
+	__be32 reserved2[9];
+	__be32 lpid;
+	__be32 tid;
+	__be32 pid;
+	__be32 reserved3[10];
+	__be64 amr;
+	__be32 reserved4[3];
+	__be32 software_state;
+};
+
+int ocxl_create_cdev(struct ocxl_afu *afu);
+void ocxl_destroy_cdev(struct ocxl_afu *afu);
+int ocxl_file_register_afu(struct ocxl_afu *afu);
+void ocxl_file_unregister_afu(struct ocxl_afu *afu);
+
+int ocxl_file_init(void);
+void ocxl_file_exit(void);
+
+int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
+void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
+int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
+void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
+
+/*
+ * Get the max PASID value that can be used by the function
+ */
+int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
+
+/*
+ * Control whether the FPGA is reloaded on a link reset
+ */
+int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val);
+int ocxl_config_set_reset_reload(struct pci_dev *dev, int val);
+
+/*
+ * Check if an AFU index is valid for the given function.
+ *
+ * AFU indexes can be sparse, so a driver should check all indexes up
+ * to the maximum found in the function description
+ */
+int ocxl_config_check_afu_index(struct pci_dev *dev,
+				struct ocxl_fn_config *fn, int afu_idx);
+
+/**
+ * ocxl_link_update_pe() - Update values within a Process Element
+ * @link_handle: the link handle associated with the process element
+ * @pasid: the PASID for the AFU context
+ * @tid: the new thread id for the process element
+ *
+ * Returns 0 on success
+ */
+int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
+
+int ocxl_context_mmap(struct ocxl_context *ctx,
+			struct vm_area_struct *vma);
+void ocxl_context_detach_all(struct ocxl_afu *afu);
+
+int ocxl_sysfs_register_afu(struct ocxl_file_info *info);
+void ocxl_sysfs_unregister_afu(struct ocxl_file_info *info);
+
+int ocxl_irq_offset_to_id(struct ocxl_context *ctx, u64 offset);
+u64 ocxl_irq_id_to_offset(struct ocxl_context *ctx, int irq_id);
+void ocxl_afu_irq_free_all(struct ocxl_context *ctx);
+
+#endif /* _OCXL_INTERNAL_H_ */
diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c
new file mode 100644
index 0000000000..d14cb56e69
--- /dev/null
+++ b/drivers/misc/ocxl/pasid.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include "ocxl_internal.h"
+
+
+struct id_range {
+	struct list_head list;
+	u32 start;
+	u32 end;
+};
+
+#ifdef DEBUG
+static void dump_list(struct list_head *head, char *type_str)
+{
+	struct id_range *cur;
+
+	pr_debug("%s ranges allocated:\n", type_str);
+	list_for_each_entry(cur, head, list) {
+		pr_debug("Range %d->%d\n", cur->start, cur->end);
+	}
+}
+#endif
+
+static int range_alloc(struct list_head *head, u32 size, int max_id,
+		char *type_str)
+{
+	struct list_head *pos;
+	struct id_range *cur, *new;
+	int rc, last_end;
+
+	new = kmalloc(sizeof(struct id_range), GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	pos = head;
+	last_end = -1;
+	list_for_each_entry(cur, head, list) {
+		if ((cur->start - last_end) > size)
+			break;
+		last_end = cur->end;
+		pos = &cur->list;
+	}
+
+	new->start = last_end + 1;
+	new->end = new->start + size - 1;
+
+	if (new->end > max_id) {
+		kfree(new);
+		rc = -ENOSPC;
+	} else {
+		list_add(&new->list, pos);
+		rc = new->start;
+	}
+
+#ifdef DEBUG
+	dump_list(head, type_str);
+#endif
+	return rc;
+}
+
+static void range_free(struct list_head *head, u32 start, u32 size,
+		char *type_str)
+{
+	bool found = false;
+	struct id_range *cur, *tmp;
+
+	list_for_each_entry_safe(cur, tmp, head, list) {
+		if (cur->start == start && cur->end == (start + size - 1)) {
+			found = true;
+			list_del(&cur->list);
+			kfree(cur);
+			break;
+		}
+	}
+	WARN_ON(!found);
+#ifdef DEBUG
+	dump_list(head, type_str);
+#endif
+}
+
+int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size)
+{
+	int max_pasid;
+
+	if (fn->config.max_pasid_log < 0)
+		return -ENOSPC;
+	max_pasid = 1 << fn->config.max_pasid_log;
+	return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid");
+}
+
+void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
+{
+	return range_free(&fn->pasid_list, start, size, "afu pasid");
+}
+
+int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size)
+{
+	int max_actag;
+
+	max_actag = fn->actag_enabled;
+	return range_alloc(&fn->actag_list, size, max_actag, "afu actag");
+}
+
+void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
+{
+	return range_free(&fn->actag_list, start, size, "afu actag");
+}
diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c
new file mode 100644
index 0000000000..cb920aa88d
--- /dev/null
+++ b/drivers/misc/ocxl/pci.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2019 IBM Corp.
+#include <linux/module.h>
+#include "ocxl_internal.h"
+
+/*
+ * Any opencapi device which wants to use this 'generic' driver should
+ * use the 0x062B device ID. Vendors should define the subsystem
+ * vendor/device ID to help differentiate devices.
+ */
+static const struct pci_device_id ocxl_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl);
+
+static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	int rc;
+	struct ocxl_afu *afu, *tmp;
+	struct ocxl_fn *fn;
+	struct list_head *afu_list;
+
+	fn = ocxl_function_open(dev);
+	if (IS_ERR(fn))
+		return PTR_ERR(fn);
+
+	pci_set_drvdata(dev, fn);
+
+	afu_list = ocxl_function_afu_list(fn);
+
+	list_for_each_entry_safe(afu, tmp, afu_list, list) {
+		// Cleanup handled within ocxl_file_register_afu()
+		rc = ocxl_file_register_afu(afu);
+		if (rc) {
+			dev_err(&dev->dev, "Failed to register AFU '%s' index %d",
+					afu->config.name, afu->config.idx);
+		}
+	}
+
+	return 0;
+}
+
+static void ocxl_remove(struct pci_dev *dev)
+{
+	struct ocxl_fn *fn;
+	struct ocxl_afu *afu;
+	struct list_head *afu_list;
+
+	fn = pci_get_drvdata(dev);
+	afu_list = ocxl_function_afu_list(fn);
+
+	list_for_each_entry(afu, afu_list, list) {
+		ocxl_file_unregister_afu(afu);
+	}
+
+	ocxl_function_close(fn);
+}
+
+struct pci_driver ocxl_pci_driver = {
+	.name = "ocxl",
+	.id_table = ocxl_pci_tbl,
+	.probe = ocxl_probe,
+	.remove = ocxl_remove,
+	.shutdown = ocxl_remove,
+};
diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c
new file mode 100644
index 0000000000..405180d47d
--- /dev/null
+++ b/drivers/misc/ocxl/sysfs.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sysfs.h>
+#include "ocxl_internal.h"
+
+static inline struct ocxl_afu *to_afu(struct device *device)
+{
+	struct ocxl_file_info *info = container_of(device, struct ocxl_file_info, dev);
+
+	return info->afu;
+}
+
+static ssize_t global_mmio_size_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ocxl_afu *afu = to_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			afu->config.global_mmio_size);
+}
+
+static ssize_t pp_mmio_size_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ocxl_afu *afu = to_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			afu->config.pp_mmio_stride);
+}
+
+static ssize_t afu_version_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ocxl_afu *afu = to_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n",
+			afu->config.version_major,
+			afu->config.version_minor);
+}
+
+static ssize_t contexts_show(struct device *device,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct ocxl_afu *afu = to_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%d/%d\n",
+			afu->pasid_count, afu->pasid_max);
+}
+
+static ssize_t reload_on_reset_show(struct device *device,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct ocxl_afu *afu = to_afu(device);
+	struct ocxl_fn *fn = afu->fn;
+	struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
+	int val;
+
+	if (ocxl_config_get_reset_reload(pci_dev, &val))
+		return scnprintf(buf, PAGE_SIZE, "unavailable\n");
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+static ssize_t reload_on_reset_store(struct device *device,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct ocxl_afu *afu = to_afu(device);
+	struct ocxl_fn *fn = afu->fn;
+	struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
+	int rc, val;
+
+	rc = kstrtoint(buf, 0, &val);
+	if (rc || (val != 0 && val != 1))
+		return -EINVAL;
+
+	if (ocxl_config_set_reset_reload(pci_dev, val))
+		return -ENODEV;
+
+	return count;
+}
+
+static struct device_attribute afu_attrs[] = {
+	__ATTR_RO(global_mmio_size),
+	__ATTR_RO(pp_mmio_size),
+	__ATTR_RO(afu_version),
+	__ATTR_RO(contexts),
+	__ATTR_RW(reload_on_reset),
+};
+
+static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr, char *buf,
+				loff_t off, size_t count)
+{
+	struct ocxl_afu *afu = to_afu(kobj_to_dev(kobj));
+
+	if (count == 0 || off < 0 ||
+		off >= afu->config.global_mmio_size)
+		return 0;
+	memcpy_fromio(buf, afu->global_mmio_ptr + off, count);
+	return count;
+}
+
+static vm_fault_t global_mmio_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct ocxl_afu *afu = vma->vm_private_data;
+	unsigned long offset;
+
+	if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT))
+		return VM_FAULT_SIGBUS;
+
+	offset = vmf->pgoff;
+	offset += (afu->global_mmio_start >> PAGE_SHIFT);
+	return vmf_insert_pfn(vma, vmf->address, offset);
+}
+
+static const struct vm_operations_struct global_mmio_vmops = {
+	.fault = global_mmio_fault,
+};
+
+static int global_mmio_mmap(struct file *filp, struct kobject *kobj,
+			struct bin_attribute *bin_attr,
+			struct vm_area_struct *vma)
+{
+	struct ocxl_afu *afu = to_afu(kobj_to_dev(kobj));
+
+	if ((vma_pages(vma) + vma->vm_pgoff) >
+		(afu->config.global_mmio_size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_ops = &global_mmio_vmops;
+	vma->vm_private_data = afu;
+	return 0;
+}
+
+int ocxl_sysfs_register_afu(struct ocxl_file_info *info)
+{
+	int i, rc;
+
+	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+		rc = device_create_file(&info->dev, &afu_attrs[i]);
+		if (rc)
+			goto err;
+	}
+
+	sysfs_attr_init(&info->attr_global_mmio.attr);
+	info->attr_global_mmio.attr.name = "global_mmio_area";
+	info->attr_global_mmio.attr.mode = 0600;
+	info->attr_global_mmio.size = info->afu->config.global_mmio_size;
+	info->attr_global_mmio.read = global_mmio_read;
+	info->attr_global_mmio.mmap = global_mmio_mmap;
+	rc = device_create_bin_file(&info->dev, &info->attr_global_mmio);
+	if (rc) {
+		dev_err(&info->dev, "Unable to create global mmio attr for afu: %d\n", rc);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		device_remove_file(&info->dev, &afu_attrs[i]);
+
+	return rc;
+}
+
+void ocxl_sysfs_unregister_afu(struct ocxl_file_info *info)
+{
+	int i;
+
+	/*
+	 * device_remove_bin_file is safe to call if the file is not added as
+	 * the files are removed by name, and early exit if not found
+	 */
+	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
+		device_remove_file(&info->dev, &afu_attrs[i]);
+	device_remove_bin_file(&info->dev, &info->attr_global_mmio);
+}
diff --git a/drivers/misc/ocxl/trace.c b/drivers/misc/ocxl/trace.c
new file mode 100644
index 0000000000..1e69470496
--- /dev/null
+++ b/drivers/misc/ocxl/trace.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#endif
diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
new file mode 100644
index 0000000000..a33a5094ff
--- /dev/null
+++ b/drivers/misc/ocxl/trace.h
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+// Copyright 2017 IBM Corp.
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ocxl
+
+#if !defined(_TRACE_OCXL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_OCXL_H
+
+#include <linux/tracepoint.h>
+
+
+TRACE_EVENT(ocxl_mmu_notifier_range,
+	TP_PROTO(unsigned long start, unsigned long end, unsigned long pidr),
+	TP_ARGS(start, end, pidr),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, start)
+		__field(unsigned long, end)
+		__field(unsigned long, pidr)
+	),
+
+	TP_fast_assign(
+		__entry->start = start;
+		__entry->end = end;
+		__entry->pidr = pidr;
+	),
+
+	TP_printk("start=0x%lx end=0x%lx pidr=0x%lx",
+		__entry->start,
+		__entry->end,
+		__entry->pidr
+	)
+);
+
+TRACE_EVENT(ocxl_init_mmu_notifier,
+	TP_PROTO(int pasid, unsigned long pidr),
+	TP_ARGS(pasid, pidr),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(unsigned long, pidr)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->pidr = pidr;
+	),
+
+	TP_printk("pasid=%d, pidr=0x%lx",
+		__entry->pasid,
+		__entry->pidr
+	)
+);
+
+TRACE_EVENT(ocxl_release_mmu_notifier,
+	TP_PROTO(int pasid, unsigned long pidr),
+	TP_ARGS(pasid, pidr),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(unsigned long, pidr)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->pidr = pidr;
+	),
+
+	TP_printk("pasid=%d, pidr=0x%lx",
+		__entry->pasid,
+		__entry->pidr
+	)
+);
+
+DECLARE_EVENT_CLASS(ocxl_context,
+	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+	TP_ARGS(pid, spa, pasid, pidr, tidr),
+
+	TP_STRUCT__entry(
+		__field(pid_t, pid)
+		__field(void*, spa)
+		__field(int, pasid)
+		__field(u32, pidr)
+		__field(u32, tidr)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+		__entry->spa = spa;
+		__entry->pasid = pasid;
+		__entry->pidr = pidr;
+		__entry->tidr = tidr;
+	),
+
+	TP_printk("linux pid=%d spa=0x%p pasid=0x%x pidr=0x%x tidr=0x%x",
+		__entry->pid,
+		__entry->spa,
+		__entry->pasid,
+		__entry->pidr,
+		__entry->tidr
+	)
+);
+
+DEFINE_EVENT(ocxl_context, ocxl_context_add,
+	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+	TP_ARGS(pid, spa, pasid, pidr, tidr)
+);
+
+DEFINE_EVENT(ocxl_context, ocxl_context_remove,
+	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+	TP_ARGS(pid, spa, pasid, pidr, tidr)
+);
+
+TRACE_EVENT(ocxl_terminate_pasid,
+	TP_PROTO(int pasid, int rc),
+	TP_ARGS(pasid, rc),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(int, rc)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->rc = rc;
+	),
+
+	TP_printk("pasid=0x%x rc=%d",
+		__entry->pasid,
+		__entry->rc
+	)
+);
+
+DECLARE_EVENT_CLASS(ocxl_fault_handler,
+	TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+	TP_ARGS(spa, pe, dsisr, dar, tfc),
+
+	TP_STRUCT__entry(
+		__field(void *, spa)
+		__field(u64, pe)
+		__field(u64, dsisr)
+		__field(u64, dar)
+		__field(u64, tfc)
+	),
+
+	TP_fast_assign(
+		__entry->spa = spa;
+		__entry->pe = pe;
+		__entry->dsisr = dsisr;
+		__entry->dar = dar;
+		__entry->tfc = tfc;
+	),
+
+	TP_printk("spa=%p pe=0x%llx dsisr=0x%llx dar=0x%llx tfc=0x%llx",
+		__entry->spa,
+		__entry->pe,
+		__entry->dsisr,
+		__entry->dar,
+		__entry->tfc
+	)
+);
+
+DEFINE_EVENT(ocxl_fault_handler, ocxl_fault,
+	TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+	TP_ARGS(spa, pe, dsisr, dar, tfc)
+);
+
+DEFINE_EVENT(ocxl_fault_handler, ocxl_fault_ack,
+	TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+	TP_ARGS(spa, pe, dsisr, dar, tfc)
+);
+
+TRACE_EVENT(ocxl_afu_irq_alloc,
+	TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq),
+	TP_ARGS(pasid, irq_id, virq, hw_irq),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(int, irq_id)
+		__field(unsigned int, virq)
+		__field(int, hw_irq)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->irq_id = irq_id;
+		__entry->virq = virq;
+		__entry->hw_irq = hw_irq;
+	),
+
+	TP_printk("pasid=0x%x irq_id=%d virq=%u hw_irq=%d",
+		__entry->pasid,
+		__entry->irq_id,
+		__entry->virq,
+		__entry->hw_irq
+	)
+);
+
+TRACE_EVENT(ocxl_afu_irq_free,
+	TP_PROTO(int pasid, int irq_id),
+	TP_ARGS(pasid, irq_id),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(int, irq_id)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->irq_id = irq_id;
+	),
+
+	TP_printk("pasid=0x%x irq_id=%d",
+		__entry->pasid,
+		__entry->irq_id
+	)
+);
+
+TRACE_EVENT(ocxl_afu_irq_receive,
+	TP_PROTO(int virq),
+	TP_ARGS(virq),
+
+	TP_STRUCT__entry(
+		__field(int, virq)
+	),
+
+	TP_fast_assign(
+		__entry->virq = virq;
+	),
+
+	TP_printk("virq=%d",
+		__entry->virq
+	)
+);
+
+#endif /* _TRACE_OCXL_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c
new file mode 100644
index 0000000000..8aea2d070a
--- /dev/null
+++ b/drivers/misc/open-dice.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 - Google LLC
+ * Author: David Brazdil <dbrazdil@google.com>
+ *
+ * Driver for Open Profile for DICE.
+ *
+ * This driver takes ownership of a reserved memory region containing data
+ * generated by the Open Profile for DICE measured boot protocol. The memory
+ * contents are not interpreted by the kernel but can be mapped into a userspace
+ * process via a misc device. Userspace can also request a wipe of the memory.
+ *
+ * Userspace can access the data with (w/o error handling):
+ *
+ *     fd = open("/dev/open-dice0", O_RDWR);
+ *     read(fd, &size, sizeof(unsigned long));
+ *     data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+ *     write(fd, NULL, 0); // wipe
+ *     close(fd);
+ */
+
+#include <linux/io.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
+
+#define DRIVER_NAME "open-dice"
+
+struct open_dice_drvdata {
+	struct mutex lock;
+	char name[16];
+	struct reserved_mem *rmem;
+	struct miscdevice misc;
+};
+
+static inline struct open_dice_drvdata *to_open_dice_drvdata(struct file *filp)
+{
+	return container_of(filp->private_data, struct open_dice_drvdata, misc);
+}
+
+static int open_dice_wipe(struct open_dice_drvdata *drvdata)
+{
+	void *kaddr;
+
+	mutex_lock(&drvdata->lock);
+	kaddr = devm_memremap(drvdata->misc.this_device, drvdata->rmem->base,
+			      drvdata->rmem->size, MEMREMAP_WC);
+	if (IS_ERR(kaddr)) {
+		mutex_unlock(&drvdata->lock);
+		return PTR_ERR(kaddr);
+	}
+
+	memset(kaddr, 0, drvdata->rmem->size);
+	devm_memunmap(drvdata->misc.this_device, kaddr);
+	mutex_unlock(&drvdata->lock);
+	return 0;
+}
+
+/*
+ * Copies the size of the reserved memory region to the user-provided buffer.
+ */
+static ssize_t open_dice_read(struct file *filp, char __user *ptr, size_t len,
+			      loff_t *off)
+{
+	unsigned long val = to_open_dice_drvdata(filp)->rmem->size;
+
+	return simple_read_from_buffer(ptr, len, off, &val, sizeof(val));
+}
+
+/*
+ * Triggers a wipe of the reserved memory region. The user-provided pointer
+ * is never dereferenced.
+ */
+static ssize_t open_dice_write(struct file *filp, const char __user *ptr,
+			       size_t len, loff_t *off)
+{
+	if (open_dice_wipe(to_open_dice_drvdata(filp)))
+		return -EIO;
+
+	/* Consume the input buffer. */
+	return len;
+}
+
+/*
+ * Creates a mapping of the reserved memory region in user address space.
+ */
+static int open_dice_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct open_dice_drvdata *drvdata = to_open_dice_drvdata(filp);
+
+	if (vma->vm_flags & VM_MAYSHARE) {
+		/* Do not allow userspace to modify the underlying data. */
+		if (vma->vm_flags & VM_WRITE)
+			return -EPERM;
+		/* Ensure userspace cannot acquire VM_WRITE later. */
+		vm_flags_clear(vma, VM_MAYWRITE);
+	}
+
+	/* Create write-combine mapping so all clients observe a wipe. */
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP);
+	return vm_iomap_memory(vma, drvdata->rmem->base, drvdata->rmem->size);
+}
+
+static const struct file_operations open_dice_fops = {
+	.owner = THIS_MODULE,
+	.read = open_dice_read,
+	.write = open_dice_write,
+	.mmap = open_dice_mmap,
+};
+
+static int __init open_dice_probe(struct platform_device *pdev)
+{
+	static unsigned int dev_idx;
+	struct device *dev = &pdev->dev;
+	struct reserved_mem *rmem;
+	struct open_dice_drvdata *drvdata;
+	int ret;
+
+	rmem = of_reserved_mem_lookup(dev->of_node);
+	if (!rmem) {
+		dev_err(dev, "failed to lookup reserved memory\n");
+		return -EINVAL;
+	}
+
+	if (!rmem->size || (rmem->size > ULONG_MAX)) {
+		dev_err(dev, "invalid memory region size\n");
+		return -EINVAL;
+	}
+
+	if (!PAGE_ALIGNED(rmem->base) || !PAGE_ALIGNED(rmem->size)) {
+		dev_err(dev, "memory region must be page-aligned\n");
+		return -EINVAL;
+	}
+
+	drvdata = devm_kmalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	*drvdata = (struct open_dice_drvdata){
+		.lock = __MUTEX_INITIALIZER(drvdata->lock),
+		.rmem = rmem,
+		.misc = (struct miscdevice){
+			.parent	= dev,
+			.name	= drvdata->name,
+			.minor	= MISC_DYNAMIC_MINOR,
+			.fops	= &open_dice_fops,
+			.mode	= 0600,
+		},
+	};
+
+	/* Index overflow check not needed, misc_register() will fail. */
+	snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++);
+
+	ret = misc_register(&drvdata->misc);
+	if (ret) {
+		dev_err(dev, "failed to register misc device '%s': %d\n",
+			drvdata->name, ret);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, drvdata);
+	return 0;
+}
+
+static int open_dice_remove(struct platform_device *pdev)
+{
+	struct open_dice_drvdata *drvdata = platform_get_drvdata(pdev);
+
+	misc_deregister(&drvdata->misc);
+	return 0;
+}
+
+static const struct of_device_id open_dice_of_match[] = {
+	{ .compatible = "google,open-dice" },
+	{},
+};
+
+static struct platform_driver open_dice_driver = {
+	.remove = open_dice_remove,
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = open_dice_of_match,
+	},
+};
+
+static int __init open_dice_init(void)
+{
+	int ret = platform_driver_probe(&open_dice_driver, open_dice_probe);
+
+	/* DICE regions are optional. Succeed even with zero instances. */
+	return (ret == -ENODEV) ? 0 : ret;
+}
+
+static void __exit open_dice_exit(void)
+{
+	platform_driver_unregister(&open_dice_driver);
+}
+
+module_init(open_dice_init);
+module_exit(open_dice_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("David Brazdil <dbrazdil@google.com>");
diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c
new file mode 100644
index 0000000000..8d2b713573
--- /dev/null
+++ b/drivers/misc/pch_phub.c
@@ -0,0 +1,878 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/if_ether.h>
+#include <linux/ctype.h>
+#include <linux/dmi.h>
+#include <linux/of.h>
+
+#define PHUB_STATUS 0x00		/* Status Register offset */
+#define PHUB_CONTROL 0x04		/* Control Register offset */
+#define PHUB_TIMEOUT 0x05		/* Time out value for Status Register */
+#define PCH_PHUB_ROM_WRITE_ENABLE 0x01	/* Enabling for writing ROM */
+#define PCH_PHUB_ROM_WRITE_DISABLE 0x00	/* Disabling for writing ROM */
+#define PCH_PHUB_MAC_START_ADDR_EG20T 0x14  /* MAC data area start address
+					       offset */
+#define PCH_PHUB_MAC_START_ADDR_ML7223 0x20C  /* MAC data area start address
+						 offset */
+#define PCH_PHUB_ROM_START_ADDR_EG20T 0x80 /* ROM data area start address offset
+					      (Intel EG20T PCH)*/
+#define PCH_PHUB_ROM_START_ADDR_ML7213 0x400 /* ROM data area start address
+						offset(LAPIS Semicon ML7213)
+					      */
+#define PCH_PHUB_ROM_START_ADDR_ML7223 0x400 /* ROM data area start address
+						offset(LAPIS Semicon ML7223)
+					      */
+
+/* MAX number of INT_REDUCE_CONTROL registers */
+#define MAX_NUM_INT_REDUCE_CONTROL_REG 128
+#define PCI_DEVICE_ID_PCH1_PHUB 0x8801
+#define PCH_MINOR_NOS 1
+#define CLKCFG_CAN_50MHZ 0x12000000
+#define CLKCFG_CANCLK_MASK 0xFF000000
+#define CLKCFG_UART_MASK			0xFFFFFF
+
+/* CM-iTC */
+#define CLKCFG_UART_48MHZ			(1 << 16)
+#define CLKCFG_UART_25MHZ			(2 << 16)
+#define CLKCFG_BAUDDIV				(2 << 20)
+#define CLKCFG_PLL2VCO				(8 << 9)
+#define CLKCFG_UARTCLKSEL			(1 << 18)
+
+/* Macros for ML7213 */
+#define PCI_DEVICE_ID_ROHM_ML7213_PHUB		0x801A
+
+/* Macros for ML7223 */
+#define PCI_DEVICE_ID_ROHM_ML7223_mPHUB	0x8012 /* for Bus-m */
+#define PCI_DEVICE_ID_ROHM_ML7223_nPHUB	0x8002 /* for Bus-n */
+
+/* Macros for ML7831 */
+#define PCI_DEVICE_ID_ROHM_ML7831_PHUB 0x8801
+
+/* SROM ACCESS Macro */
+#define PCH_WORD_ADDR_MASK (~((1 << 2) - 1))
+
+/* Registers address offset */
+#define PCH_PHUB_ID_REG				0x0000
+#define PCH_PHUB_QUEUE_PRI_VAL_REG		0x0004
+#define PCH_PHUB_RC_QUEUE_MAXSIZE_REG		0x0008
+#define PCH_PHUB_BRI_QUEUE_MAXSIZE_REG		0x000C
+#define PCH_PHUB_COMP_RESP_TIMEOUT_REG		0x0010
+#define PCH_PHUB_BUS_SLAVE_CONTROL_REG		0x0014
+#define PCH_PHUB_DEADLOCK_AVOID_TYPE_REG	0x0018
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG0	0x0020
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG1	0x0024
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG2	0x0028
+#define PCH_PHUB_INTPIN_REG_WPERMIT_REG3	0x002C
+#define PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE	0x0040
+#define CLKCFG_REG_OFFSET			0x500
+#define FUNCSEL_REG_OFFSET			0x508
+
+#define PCH_PHUB_OROM_SIZE 15360
+
+/**
+ * struct pch_phub_reg - PHUB register structure
+ * @phub_id_reg:			PHUB_ID register val
+ * @q_pri_val_reg:			QUEUE_PRI_VAL register val
+ * @rc_q_maxsize_reg:			RC_QUEUE_MAXSIZE register val
+ * @bri_q_maxsize_reg:			BRI_QUEUE_MAXSIZE register val
+ * @comp_resp_timeout_reg:		COMP_RESP_TIMEOUT register val
+ * @bus_slave_control_reg:		BUS_SLAVE_CONTROL_REG register val
+ * @deadlock_avoid_type_reg:		DEADLOCK_AVOID_TYPE register val
+ * @intpin_reg_wpermit_reg0:		INTPIN_REG_WPERMIT register 0 val
+ * @intpin_reg_wpermit_reg1:		INTPIN_REG_WPERMIT register 1 val
+ * @intpin_reg_wpermit_reg2:		INTPIN_REG_WPERMIT register 2 val
+ * @intpin_reg_wpermit_reg3:		INTPIN_REG_WPERMIT register 3 val
+ * @int_reduce_control_reg:		INT_REDUCE_CONTROL registers val
+ * @clkcfg_reg:				CLK CFG register val
+ * @funcsel_reg:			Function select register value
+ * @pch_phub_base_address:		Register base address
+ * @pch_phub_extrom_base_address:	external rom base address
+ * @pch_mac_start_address:		MAC address area start address
+ * @pch_opt_rom_start_address:		Option ROM start address
+ * @ioh_type:				Save IOH type
+ * @pdev:				pointer to pci device struct
+ */
+struct pch_phub_reg {
+	u32 phub_id_reg;
+	u32 q_pri_val_reg;
+	u32 rc_q_maxsize_reg;
+	u32 bri_q_maxsize_reg;
+	u32 comp_resp_timeout_reg;
+	u32 bus_slave_control_reg;
+	u32 deadlock_avoid_type_reg;
+	u32 intpin_reg_wpermit_reg0;
+	u32 intpin_reg_wpermit_reg1;
+	u32 intpin_reg_wpermit_reg2;
+	u32 intpin_reg_wpermit_reg3;
+	u32 int_reduce_control_reg[MAX_NUM_INT_REDUCE_CONTROL_REG];
+	u32 clkcfg_reg;
+	u32 funcsel_reg;
+	void __iomem *pch_phub_base_address;
+	void __iomem *pch_phub_extrom_base_address;
+	u32 pch_mac_start_address;
+	u32 pch_opt_rom_start_address;
+	int ioh_type;
+	struct pci_dev *pdev;
+};
+
+/* SROM SPEC for MAC address assignment offset */
+static const int pch_phub_mac_offset[ETH_ALEN] = {0x3, 0x2, 0x1, 0x0, 0xb, 0xa};
+
+static DEFINE_MUTEX(pch_phub_mutex);
+
+/**
+ * pch_phub_read_modify_write_reg() - Reading modifying and writing register
+ * @chip:		Pointer to the PHUB register structure
+ * @reg_addr_offset:	Register offset address value.
+ * @data:		Writing value.
+ * @mask:		Mask value.
+ */
+static void pch_phub_read_modify_write_reg(struct pch_phub_reg *chip,
+					   unsigned int reg_addr_offset,
+					   unsigned int data, unsigned int mask)
+{
+	void __iomem *reg_addr = chip->pch_phub_base_address + reg_addr_offset;
+	iowrite32(((ioread32(reg_addr) & ~mask)) | data, reg_addr);
+}
+
+/* pch_phub_save_reg_conf - saves register configuration */
+static void __maybe_unused pch_phub_save_reg_conf(struct pci_dev *pdev)
+{
+	unsigned int i;
+	struct pch_phub_reg *chip = pci_get_drvdata(pdev);
+
+	void __iomem *p = chip->pch_phub_base_address;
+
+	chip->phub_id_reg = ioread32(p + PCH_PHUB_ID_REG);
+	chip->q_pri_val_reg = ioread32(p + PCH_PHUB_QUEUE_PRI_VAL_REG);
+	chip->rc_q_maxsize_reg = ioread32(p + PCH_PHUB_RC_QUEUE_MAXSIZE_REG);
+	chip->bri_q_maxsize_reg = ioread32(p + PCH_PHUB_BRI_QUEUE_MAXSIZE_REG);
+	chip->comp_resp_timeout_reg =
+				ioread32(p + PCH_PHUB_COMP_RESP_TIMEOUT_REG);
+	chip->bus_slave_control_reg =
+				ioread32(p + PCH_PHUB_BUS_SLAVE_CONTROL_REG);
+	chip->deadlock_avoid_type_reg =
+				ioread32(p + PCH_PHUB_DEADLOCK_AVOID_TYPE_REG);
+	chip->intpin_reg_wpermit_reg0 =
+				ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG0);
+	chip->intpin_reg_wpermit_reg1 =
+				ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG1);
+	chip->intpin_reg_wpermit_reg2 =
+				ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG2);
+	chip->intpin_reg_wpermit_reg3 =
+				ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG3);
+	dev_dbg(&pdev->dev, "%s : "
+		"chip->phub_id_reg=%x, "
+		"chip->q_pri_val_reg=%x, "
+		"chip->rc_q_maxsize_reg=%x, "
+		"chip->bri_q_maxsize_reg=%x, "
+		"chip->comp_resp_timeout_reg=%x, "
+		"chip->bus_slave_control_reg=%x, "
+		"chip->deadlock_avoid_type_reg=%x, "
+		"chip->intpin_reg_wpermit_reg0=%x, "
+		"chip->intpin_reg_wpermit_reg1=%x, "
+		"chip->intpin_reg_wpermit_reg2=%x, "
+		"chip->intpin_reg_wpermit_reg3=%x\n", __func__,
+		chip->phub_id_reg,
+		chip->q_pri_val_reg,
+		chip->rc_q_maxsize_reg,
+		chip->bri_q_maxsize_reg,
+		chip->comp_resp_timeout_reg,
+		chip->bus_slave_control_reg,
+		chip->deadlock_avoid_type_reg,
+		chip->intpin_reg_wpermit_reg0,
+		chip->intpin_reg_wpermit_reg1,
+		chip->intpin_reg_wpermit_reg2,
+		chip->intpin_reg_wpermit_reg3);
+	for (i = 0; i < MAX_NUM_INT_REDUCE_CONTROL_REG; i++) {
+		chip->int_reduce_control_reg[i] =
+		    ioread32(p + PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE + 4 * i);
+		dev_dbg(&pdev->dev, "%s : "
+			"chip->int_reduce_control_reg[%d]=%x\n",
+			__func__, i, chip->int_reduce_control_reg[i]);
+	}
+	chip->clkcfg_reg = ioread32(p + CLKCFG_REG_OFFSET);
+	if ((chip->ioh_type == 2) || (chip->ioh_type == 4))
+		chip->funcsel_reg = ioread32(p + FUNCSEL_REG_OFFSET);
+}
+
+/* pch_phub_restore_reg_conf - restore register configuration */
+static void __maybe_unused pch_phub_restore_reg_conf(struct pci_dev *pdev)
+{
+	unsigned int i;
+	struct pch_phub_reg *chip = pci_get_drvdata(pdev);
+	void __iomem *p;
+	p = chip->pch_phub_base_address;
+
+	iowrite32(chip->phub_id_reg, p + PCH_PHUB_ID_REG);
+	iowrite32(chip->q_pri_val_reg, p + PCH_PHUB_QUEUE_PRI_VAL_REG);
+	iowrite32(chip->rc_q_maxsize_reg, p + PCH_PHUB_RC_QUEUE_MAXSIZE_REG);
+	iowrite32(chip->bri_q_maxsize_reg, p + PCH_PHUB_BRI_QUEUE_MAXSIZE_REG);
+	iowrite32(chip->comp_resp_timeout_reg,
+					p + PCH_PHUB_COMP_RESP_TIMEOUT_REG);
+	iowrite32(chip->bus_slave_control_reg,
+					p + PCH_PHUB_BUS_SLAVE_CONTROL_REG);
+	iowrite32(chip->deadlock_avoid_type_reg,
+					p + PCH_PHUB_DEADLOCK_AVOID_TYPE_REG);
+	iowrite32(chip->intpin_reg_wpermit_reg0,
+					p + PCH_PHUB_INTPIN_REG_WPERMIT_REG0);
+	iowrite32(chip->intpin_reg_wpermit_reg1,
+					p + PCH_PHUB_INTPIN_REG_WPERMIT_REG1);
+	iowrite32(chip->intpin_reg_wpermit_reg2,
+					p + PCH_PHUB_INTPIN_REG_WPERMIT_REG2);
+	iowrite32(chip->intpin_reg_wpermit_reg3,
+					p + PCH_PHUB_INTPIN_REG_WPERMIT_REG3);
+	dev_dbg(&pdev->dev, "%s : "
+		"chip->phub_id_reg=%x, "
+		"chip->q_pri_val_reg=%x, "
+		"chip->rc_q_maxsize_reg=%x, "
+		"chip->bri_q_maxsize_reg=%x, "
+		"chip->comp_resp_timeout_reg=%x, "
+		"chip->bus_slave_control_reg=%x, "
+		"chip->deadlock_avoid_type_reg=%x, "
+		"chip->intpin_reg_wpermit_reg0=%x, "
+		"chip->intpin_reg_wpermit_reg1=%x, "
+		"chip->intpin_reg_wpermit_reg2=%x, "
+		"chip->intpin_reg_wpermit_reg3=%x\n", __func__,
+		chip->phub_id_reg,
+		chip->q_pri_val_reg,
+		chip->rc_q_maxsize_reg,
+		chip->bri_q_maxsize_reg,
+		chip->comp_resp_timeout_reg,
+		chip->bus_slave_control_reg,
+		chip->deadlock_avoid_type_reg,
+		chip->intpin_reg_wpermit_reg0,
+		chip->intpin_reg_wpermit_reg1,
+		chip->intpin_reg_wpermit_reg2,
+		chip->intpin_reg_wpermit_reg3);
+	for (i = 0; i < MAX_NUM_INT_REDUCE_CONTROL_REG; i++) {
+		iowrite32(chip->int_reduce_control_reg[i],
+			p + PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE + 4 * i);
+		dev_dbg(&pdev->dev, "%s : "
+			"chip->int_reduce_control_reg[%d]=%x\n",
+			__func__, i, chip->int_reduce_control_reg[i]);
+	}
+
+	iowrite32(chip->clkcfg_reg, p + CLKCFG_REG_OFFSET);
+	if ((chip->ioh_type == 2) || (chip->ioh_type == 4))
+		iowrite32(chip->funcsel_reg, p + FUNCSEL_REG_OFFSET);
+}
+
+/**
+ * pch_phub_read_serial_rom() - Reading Serial ROM
+ * @chip:		Pointer to the PHUB register structure
+ * @offset_address:	Serial ROM offset address to read.
+ * @data:		Read buffer for specified Serial ROM value.
+ */
+static void pch_phub_read_serial_rom(struct pch_phub_reg *chip,
+				     unsigned int offset_address, u8 *data)
+{
+	void __iomem *mem_addr = chip->pch_phub_extrom_base_address +
+								offset_address;
+
+	*data = ioread8(mem_addr);
+}
+
+/**
+ * pch_phub_write_serial_rom() - Writing Serial ROM
+ * @chip:		Pointer to the PHUB register structure
+ * @offset_address:	Serial ROM offset address.
+ * @data:		Serial ROM value to write.
+ */
+static int pch_phub_write_serial_rom(struct pch_phub_reg *chip,
+				     unsigned int offset_address, u8 data)
+{
+	void __iomem *mem_addr = chip->pch_phub_extrom_base_address +
+					(offset_address & PCH_WORD_ADDR_MASK);
+	int i;
+	unsigned int word_data;
+	unsigned int pos;
+	unsigned int mask;
+	pos = (offset_address % 4) * 8;
+	mask = ~(0xFF << pos);
+
+	iowrite32(PCH_PHUB_ROM_WRITE_ENABLE,
+			chip->pch_phub_extrom_base_address + PHUB_CONTROL);
+
+	word_data = ioread32(mem_addr);
+	iowrite32((word_data & mask) | (u32)data << pos, mem_addr);
+
+	i = 0;
+	while (ioread8(chip->pch_phub_extrom_base_address +
+						PHUB_STATUS) != 0x00) {
+		msleep(1);
+		if (i == PHUB_TIMEOUT)
+			return -ETIMEDOUT;
+		i++;
+	}
+
+	iowrite32(PCH_PHUB_ROM_WRITE_DISABLE,
+			chip->pch_phub_extrom_base_address + PHUB_CONTROL);
+
+	return 0;
+}
+
+/**
+ * pch_phub_read_serial_rom_val() - Read Serial ROM value
+ * @chip:		Pointer to the PHUB register structure
+ * @offset_address:	Serial ROM address offset value.
+ * @data:		Serial ROM value to read.
+ */
+static void pch_phub_read_serial_rom_val(struct pch_phub_reg *chip,
+					 unsigned int offset_address, u8 *data)
+{
+	unsigned int mem_addr;
+
+	mem_addr = chip->pch_mac_start_address +
+			pch_phub_mac_offset[offset_address];
+
+	pch_phub_read_serial_rom(chip, mem_addr, data);
+}
+
+/**
+ * pch_phub_write_serial_rom_val() - writing Serial ROM value
+ * @chip:		Pointer to the PHUB register structure
+ * @offset_address:	Serial ROM address offset value.
+ * @data:		Serial ROM value.
+ */
+static int pch_phub_write_serial_rom_val(struct pch_phub_reg *chip,
+					 unsigned int offset_address, u8 data)
+{
+	int retval;
+	unsigned int mem_addr;
+
+	mem_addr = chip->pch_mac_start_address +
+			pch_phub_mac_offset[offset_address];
+
+	retval = pch_phub_write_serial_rom(chip, mem_addr, data);
+
+	return retval;
+}
+
+/* pch_phub_gbe_serial_rom_conf - makes Serial ROM header format configuration
+ * for Gigabit Ethernet MAC address
+ */
+static int pch_phub_gbe_serial_rom_conf(struct pch_phub_reg *chip)
+{
+	int retval;
+
+	retval = pch_phub_write_serial_rom(chip, 0x0b, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x0a, 0x10);
+	retval |= pch_phub_write_serial_rom(chip, 0x09, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x08, 0x02);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x0f, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x0e, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x0d, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x0c, 0x80);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x13, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x12, 0x10);
+	retval |= pch_phub_write_serial_rom(chip, 0x11, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x10, 0x18);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x1b, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x1a, 0x10);
+	retval |= pch_phub_write_serial_rom(chip, 0x19, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x18, 0x19);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x23, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x22, 0x10);
+	retval |= pch_phub_write_serial_rom(chip, 0x21, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x20, 0x3a);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x27, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x26, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x25, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x24, 0x00);
+
+	return retval;
+}
+
+/* pch_phub_gbe_serial_rom_conf_mp - makes SerialROM header format configuration
+ * for Gigabit Ethernet MAC address
+ */
+static int pch_phub_gbe_serial_rom_conf_mp(struct pch_phub_reg *chip)
+{
+	int retval;
+	u32 offset_addr;
+
+	offset_addr = 0x200;
+	retval = pch_phub_write_serial_rom(chip, 0x03 + offset_addr, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x02 + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x01 + offset_addr, 0x40);
+	retval |= pch_phub_write_serial_rom(chip, 0x00 + offset_addr, 0x02);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x07 + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x06 + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x05 + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x04 + offset_addr, 0x80);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x0b + offset_addr, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x0a + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x09 + offset_addr, 0x40);
+	retval |= pch_phub_write_serial_rom(chip, 0x08 + offset_addr, 0x18);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x13 + offset_addr, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x12 + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x11 + offset_addr, 0x40);
+	retval |= pch_phub_write_serial_rom(chip, 0x10 + offset_addr, 0x19);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x1b + offset_addr, 0xbc);
+	retval |= pch_phub_write_serial_rom(chip, 0x1a + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x19 + offset_addr, 0x40);
+	retval |= pch_phub_write_serial_rom(chip, 0x18 + offset_addr, 0x3a);
+
+	retval |= pch_phub_write_serial_rom(chip, 0x1f + offset_addr, 0x01);
+	retval |= pch_phub_write_serial_rom(chip, 0x1e + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x1d + offset_addr, 0x00);
+	retval |= pch_phub_write_serial_rom(chip, 0x1c + offset_addr, 0x00);
+
+	return retval;
+}
+
+/**
+ * pch_phub_read_gbe_mac_addr() - Read Gigabit Ethernet MAC address
+ * @chip:		Pointer to the PHUB register structure
+ * @data:		Buffer of the Gigabit Ethernet MAC address value.
+ */
+static void pch_phub_read_gbe_mac_addr(struct pch_phub_reg *chip, u8 *data)
+{
+	int i;
+	for (i = 0; i < ETH_ALEN; i++)
+		pch_phub_read_serial_rom_val(chip, i, &data[i]);
+}
+
+/**
+ * pch_phub_write_gbe_mac_addr() - Write MAC address
+ * @chip:		Pointer to the PHUB register structure
+ * @data:		Gigabit Ethernet MAC address value.
+ */
+static int pch_phub_write_gbe_mac_addr(struct pch_phub_reg *chip, u8 *data)
+{
+	int retval;
+	int i;
+
+	if ((chip->ioh_type == 1) || (chip->ioh_type == 5)) /* EG20T or ML7831*/
+		retval = pch_phub_gbe_serial_rom_conf(chip);
+	else	/* ML7223 */
+		retval = pch_phub_gbe_serial_rom_conf_mp(chip);
+	if (retval)
+		return retval;
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		retval = pch_phub_write_serial_rom_val(chip, i, data[i]);
+		if (retval)
+			return retval;
+	}
+
+	return retval;
+}
+
+static ssize_t pch_phub_bin_read(struct file *filp, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	unsigned int rom_signature;
+	unsigned char rom_length;
+	unsigned int tmp;
+	unsigned int addr_offset;
+	unsigned int orom_size;
+	int ret;
+	int err;
+	ssize_t rom_size;
+
+	struct pch_phub_reg *chip = dev_get_drvdata(kobj_to_dev(kobj));
+
+	ret = mutex_lock_interruptible(&pch_phub_mutex);
+	if (ret) {
+		err = -ERESTARTSYS;
+		goto return_err_nomutex;
+	}
+
+	/* Get Rom signature */
+	chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+	if (!chip->pch_phub_extrom_base_address) {
+		err = -ENODATA;
+		goto exrom_map_err;
+	}
+
+	pch_phub_read_serial_rom(chip, chip->pch_opt_rom_start_address,
+				(unsigned char *)&rom_signature);
+	rom_signature &= 0xff;
+	pch_phub_read_serial_rom(chip, chip->pch_opt_rom_start_address + 1,
+				(unsigned char *)&tmp);
+	rom_signature |= (tmp & 0xff) << 8;
+	if (rom_signature == 0xAA55) {
+		pch_phub_read_serial_rom(chip,
+					 chip->pch_opt_rom_start_address + 2,
+					 &rom_length);
+		orom_size = rom_length * 512;
+		if (orom_size < off) {
+			addr_offset = 0;
+			goto return_ok;
+		}
+		if (orom_size < count) {
+			addr_offset = 0;
+			goto return_ok;
+		}
+
+		for (addr_offset = 0; addr_offset < count; addr_offset++) {
+			pch_phub_read_serial_rom(chip,
+			    chip->pch_opt_rom_start_address + addr_offset + off,
+			    &buf[addr_offset]);
+		}
+	} else {
+		err = -ENODATA;
+		goto return_err;
+	}
+return_ok:
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+	mutex_unlock(&pch_phub_mutex);
+	return addr_offset;
+
+return_err:
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+exrom_map_err:
+	mutex_unlock(&pch_phub_mutex);
+return_err_nomutex:
+	return err;
+}
+
+static ssize_t pch_phub_bin_write(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr,
+				  char *buf, loff_t off, size_t count)
+{
+	int err;
+	unsigned int addr_offset;
+	int ret;
+	ssize_t rom_size;
+	struct pch_phub_reg *chip = dev_get_drvdata(kobj_to_dev(kobj));
+
+	ret = mutex_lock_interruptible(&pch_phub_mutex);
+	if (ret)
+		return -ERESTARTSYS;
+
+	if (off > PCH_PHUB_OROM_SIZE) {
+		addr_offset = 0;
+		goto return_ok;
+	}
+	if (count > PCH_PHUB_OROM_SIZE) {
+		addr_offset = 0;
+		goto return_ok;
+	}
+
+	chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+	if (!chip->pch_phub_extrom_base_address) {
+		err = -ENOMEM;
+		goto exrom_map_err;
+	}
+
+	for (addr_offset = 0; addr_offset < count; addr_offset++) {
+		if (PCH_PHUB_OROM_SIZE < off + addr_offset)
+			goto return_ok;
+
+		ret = pch_phub_write_serial_rom(chip,
+			    chip->pch_opt_rom_start_address + addr_offset + off,
+			    buf[addr_offset]);
+		if (ret) {
+			err = ret;
+			goto return_err;
+		}
+	}
+
+return_ok:
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+	mutex_unlock(&pch_phub_mutex);
+	return addr_offset;
+
+return_err:
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+
+exrom_map_err:
+	mutex_unlock(&pch_phub_mutex);
+	return err;
+}
+
+static ssize_t show_pch_mac(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	u8 mac[8];
+	struct pch_phub_reg *chip = dev_get_drvdata(dev);
+	ssize_t rom_size;
+
+	chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+	if (!chip->pch_phub_extrom_base_address)
+		return -ENOMEM;
+
+	pch_phub_read_gbe_mac_addr(chip, mac);
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+
+	return sprintf(buf, "%pM\n", mac);
+}
+
+static ssize_t store_pch_mac(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	u8 mac[ETH_ALEN];
+	ssize_t rom_size;
+	struct pch_phub_reg *chip = dev_get_drvdata(dev);
+	int ret;
+
+	if (!mac_pton(buf, mac))
+		return -EINVAL;
+
+	chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size);
+	if (!chip->pch_phub_extrom_base_address)
+		return -ENOMEM;
+
+	ret = pch_phub_write_gbe_mac_addr(chip, mac);
+	pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(pch_mac, S_IRUGO | S_IWUSR, show_pch_mac, store_pch_mac);
+
+static const struct bin_attribute pch_bin_attr = {
+	.attr = {
+		.name = "pch_firmware",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = PCH_PHUB_OROM_SIZE + 1,
+	.read = pch_phub_bin_read,
+	.write = pch_phub_bin_write,
+};
+
+static int pch_phub_probe(struct pci_dev *pdev,
+				    const struct pci_device_id *id)
+{
+	int ret;
+	struct pch_phub_reg *chip;
+
+	chip = kzalloc(sizeof(struct pch_phub_reg), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		dev_err(&pdev->dev,
+		"%s : pci_enable_device FAILED(ret=%d)", __func__, ret);
+		goto err_pci_enable_dev;
+	}
+	dev_dbg(&pdev->dev, "%s : pci_enable_device returns %d\n", __func__,
+		ret);
+
+	ret = pci_request_regions(pdev, KBUILD_MODNAME);
+	if (ret) {
+		dev_err(&pdev->dev,
+		"%s : pci_request_regions FAILED(ret=%d)", __func__, ret);
+		goto err_req_regions;
+	}
+	dev_dbg(&pdev->dev, "%s : "
+		"pci_request_regions returns %d\n", __func__, ret);
+
+	chip->pch_phub_base_address = pci_iomap(pdev, 1, 0);
+
+
+	if (chip->pch_phub_base_address == NULL) {
+		dev_err(&pdev->dev, "%s : pci_iomap FAILED", __func__);
+		ret = -ENOMEM;
+		goto err_pci_iomap;
+	}
+	dev_dbg(&pdev->dev, "%s : pci_iomap SUCCESS and value "
+		"in pch_phub_base_address variable is %p\n", __func__,
+		chip->pch_phub_base_address);
+
+	chip->pdev = pdev; /* Save pci device struct */
+
+	if (id->driver_data == 1) { /* EG20T PCH */
+		const char *board_name;
+		unsigned int prefetch = 0x000affaa;
+
+		if (pdev->dev.of_node)
+			of_property_read_u32(pdev->dev.of_node,
+						  "intel,eg20t-prefetch",
+						  &prefetch);
+
+		ret = sysfs_create_file(&pdev->dev.kobj,
+					&dev_attr_pch_mac.attr);
+		if (ret)
+			goto err_sysfs_create;
+
+		ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+		if (ret)
+			goto exit_bin_attr;
+
+		pch_phub_read_modify_write_reg(chip,
+					       (unsigned int)CLKCFG_REG_OFFSET,
+					       CLKCFG_CAN_50MHZ,
+					       CLKCFG_CANCLK_MASK);
+
+		/* quirk for CM-iTC board */
+		board_name = dmi_get_system_info(DMI_BOARD_NAME);
+		if (board_name && strstr(board_name, "CM-iTC"))
+			pch_phub_read_modify_write_reg(chip,
+						(unsigned int)CLKCFG_REG_OFFSET,
+						CLKCFG_UART_48MHZ | CLKCFG_BAUDDIV |
+						CLKCFG_PLL2VCO | CLKCFG_UARTCLKSEL,
+						CLKCFG_UART_MASK);
+
+		/* set the prefech value */
+		iowrite32(prefetch, chip->pch_phub_base_address + 0x14);
+		/* set the interrupt delay value */
+		iowrite32(0x25, chip->pch_phub_base_address + 0x44);
+		chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T;
+		chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T;
+
+		/* quirk for MIPS Boston platform */
+		if (pdev->dev.of_node) {
+			if (of_machine_is_compatible("img,boston")) {
+				pch_phub_read_modify_write_reg(chip,
+					(unsigned int)CLKCFG_REG_OFFSET,
+					CLKCFG_UART_25MHZ,
+					CLKCFG_UART_MASK);
+			}
+		}
+	} else if (id->driver_data == 2) { /* ML7213 IOH */
+		ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+		if (ret)
+			goto err_sysfs_create;
+		/* set the prefech value
+		 * Device2(USB OHCI #1/ USB EHCI #1/ USB Device):a
+		 * Device4(SDIO #0,1,2):f
+		 * Device6(SATA 2):f
+		 * Device8(USB OHCI #0/ USB EHCI #0):a
+		 */
+		iowrite32(0x000affa0, chip->pch_phub_base_address + 0x14);
+		chip->pch_opt_rom_start_address =\
+						 PCH_PHUB_ROM_START_ADDR_ML7213;
+	} else if (id->driver_data == 3) { /* ML7223 IOH Bus-m*/
+		/* set the prefech value
+		 * Device8(GbE)
+		 */
+		iowrite32(0x000a0000, chip->pch_phub_base_address + 0x14);
+		/* set the interrupt delay value */
+		iowrite32(0x25, chip->pch_phub_base_address + 0x140);
+		chip->pch_opt_rom_start_address =\
+						 PCH_PHUB_ROM_START_ADDR_ML7223;
+		chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_ML7223;
+	} else if (id->driver_data == 4) { /* ML7223 IOH Bus-n*/
+		ret = sysfs_create_file(&pdev->dev.kobj,
+					&dev_attr_pch_mac.attr);
+		if (ret)
+			goto err_sysfs_create;
+		ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+		if (ret)
+			goto exit_bin_attr;
+		/* set the prefech value
+		 * Device2(USB OHCI #0,1,2,3/ USB EHCI #0):a
+		 * Device4(SDIO #0,1):f
+		 * Device6(SATA 2):f
+		 */
+		iowrite32(0x0000ffa0, chip->pch_phub_base_address + 0x14);
+		chip->pch_opt_rom_start_address =\
+						 PCH_PHUB_ROM_START_ADDR_ML7223;
+		chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_ML7223;
+	} else if (id->driver_data == 5) { /* ML7831 */
+		ret = sysfs_create_file(&pdev->dev.kobj,
+					&dev_attr_pch_mac.attr);
+		if (ret)
+			goto err_sysfs_create;
+
+		ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+		if (ret)
+			goto exit_bin_attr;
+
+		/* set the prefech value */
+		iowrite32(0x000affaa, chip->pch_phub_base_address + 0x14);
+		/* set the interrupt delay value */
+		iowrite32(0x25, chip->pch_phub_base_address + 0x44);
+		chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T;
+		chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T;
+	}
+
+	chip->ioh_type = id->driver_data;
+	pci_set_drvdata(pdev, chip);
+
+	return 0;
+exit_bin_attr:
+	sysfs_remove_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr);
+
+err_sysfs_create:
+	pci_iounmap(pdev, chip->pch_phub_base_address);
+err_pci_iomap:
+	pci_release_regions(pdev);
+err_req_regions:
+	pci_disable_device(pdev);
+err_pci_enable_dev:
+	kfree(chip);
+	dev_err(&pdev->dev, "%s returns %d\n", __func__, ret);
+	return ret;
+}
+
+static void pch_phub_remove(struct pci_dev *pdev)
+{
+	struct pch_phub_reg *chip = pci_get_drvdata(pdev);
+
+	sysfs_remove_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr);
+	sysfs_remove_bin_file(&pdev->dev.kobj, &pch_bin_attr);
+	pci_iounmap(pdev, chip->pch_phub_base_address);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	kfree(chip);
+}
+
+static int __maybe_unused pch_phub_suspend(struct device *dev_d)
+{
+	device_wakeup_disable(dev_d);
+
+	return 0;
+}
+
+static int __maybe_unused pch_phub_resume(struct device *dev_d)
+{
+	device_wakeup_disable(dev_d);
+
+	return 0;
+}
+
+static const struct pci_device_id pch_phub_pcidev_id[] = {
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH1_PHUB),       1,  },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7213_PHUB), 2,  },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7223_mPHUB), 3,  },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7223_nPHUB), 4,  },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7831_PHUB), 5,  },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, pch_phub_pcidev_id);
+
+static SIMPLE_DEV_PM_OPS(pch_phub_pm_ops, pch_phub_suspend, pch_phub_resume);
+
+static struct pci_driver pch_phub_driver = {
+	.name = "pch_phub",
+	.id_table = pch_phub_pcidev_id,
+	.probe = pch_phub_probe,
+	.remove = pch_phub_remove,
+	.driver.pm = &pch_phub_pm_ops,
+};
+
+module_pci_driver(pch_phub_driver);
+
+MODULE_DESCRIPTION("Intel EG20T PCH/LAPIS Semiconductor IOH(ML7213/ML7223) PHUB");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
new file mode 100644
index 0000000000..af51908873
--- /dev/null
+++ b/drivers/misc/pci_endpoint_test.c
@@ -0,0 +1,1025 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host side test driver to test endpoint functionality
+ *
+ * Copyright (C) 2017 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#include <linux/pci_regs.h>
+
+#include <uapi/linux/pcitest.h>
+
+#define DRV_MODULE_NAME				"pci-endpoint-test"
+
+#define IRQ_TYPE_UNDEFINED			-1
+#define IRQ_TYPE_LEGACY				0
+#define IRQ_TYPE_MSI				1
+#define IRQ_TYPE_MSIX				2
+
+#define PCI_ENDPOINT_TEST_MAGIC			0x0
+
+#define PCI_ENDPOINT_TEST_COMMAND		0x4
+#define COMMAND_RAISE_LEGACY_IRQ		BIT(0)
+#define COMMAND_RAISE_MSI_IRQ			BIT(1)
+#define COMMAND_RAISE_MSIX_IRQ			BIT(2)
+#define COMMAND_READ				BIT(3)
+#define COMMAND_WRITE				BIT(4)
+#define COMMAND_COPY				BIT(5)
+
+#define PCI_ENDPOINT_TEST_STATUS		0x8
+#define STATUS_READ_SUCCESS			BIT(0)
+#define STATUS_READ_FAIL			BIT(1)
+#define STATUS_WRITE_SUCCESS			BIT(2)
+#define STATUS_WRITE_FAIL			BIT(3)
+#define STATUS_COPY_SUCCESS			BIT(4)
+#define STATUS_COPY_FAIL			BIT(5)
+#define STATUS_IRQ_RAISED			BIT(6)
+#define STATUS_SRC_ADDR_INVALID			BIT(7)
+#define STATUS_DST_ADDR_INVALID			BIT(8)
+
+#define PCI_ENDPOINT_TEST_LOWER_SRC_ADDR	0x0c
+#define PCI_ENDPOINT_TEST_UPPER_SRC_ADDR	0x10
+
+#define PCI_ENDPOINT_TEST_LOWER_DST_ADDR	0x14
+#define PCI_ENDPOINT_TEST_UPPER_DST_ADDR	0x18
+
+#define PCI_ENDPOINT_TEST_SIZE			0x1c
+#define PCI_ENDPOINT_TEST_CHECKSUM		0x20
+
+#define PCI_ENDPOINT_TEST_IRQ_TYPE		0x24
+#define PCI_ENDPOINT_TEST_IRQ_NUMBER		0x28
+
+#define PCI_ENDPOINT_TEST_FLAGS			0x2c
+#define FLAG_USE_DMA				BIT(0)
+
+#define PCI_DEVICE_ID_TI_AM654			0xb00c
+#define PCI_DEVICE_ID_TI_J7200			0xb00f
+#define PCI_DEVICE_ID_TI_AM64			0xb010
+#define PCI_DEVICE_ID_TI_J721S2		0xb013
+#define PCI_DEVICE_ID_LS1088A			0x80c0
+#define PCI_DEVICE_ID_IMX8			0x0808
+
+#define is_am654_pci_dev(pdev)		\
+		((pdev)->device == PCI_DEVICE_ID_TI_AM654)
+
+#define PCI_DEVICE_ID_RENESAS_R8A774A1		0x0028
+#define PCI_DEVICE_ID_RENESAS_R8A774B1		0x002b
+#define PCI_DEVICE_ID_RENESAS_R8A774C0		0x002d
+#define PCI_DEVICE_ID_RENESAS_R8A774E1		0x0025
+#define PCI_DEVICE_ID_RENESAS_R8A779F0		0x0031
+
+static DEFINE_IDA(pci_endpoint_test_ida);
+
+#define to_endpoint_test(priv) container_of((priv), struct pci_endpoint_test, \
+					    miscdev)
+
+static bool no_msi;
+module_param(no_msi, bool, 0444);
+MODULE_PARM_DESC(no_msi, "Disable MSI interrupt in pci_endpoint_test");
+
+static int irq_type = IRQ_TYPE_MSI;
+module_param(irq_type, int, 0444);
+MODULE_PARM_DESC(irq_type, "IRQ mode selection in pci_endpoint_test (0 - Legacy, 1 - MSI, 2 - MSI-X)");
+
+enum pci_barno {
+	BAR_0,
+	BAR_1,
+	BAR_2,
+	BAR_3,
+	BAR_4,
+	BAR_5,
+};
+
+struct pci_endpoint_test {
+	struct pci_dev	*pdev;
+	void __iomem	*base;
+	void __iomem	*bar[PCI_STD_NUM_BARS];
+	struct completion irq_raised;
+	int		last_irq;
+	int		num_irqs;
+	int		irq_type;
+	/* mutex to protect the ioctls */
+	struct mutex	mutex;
+	struct miscdevice miscdev;
+	enum pci_barno test_reg_bar;
+	size_t alignment;
+	const char *name;
+};
+
+struct pci_endpoint_test_data {
+	enum pci_barno test_reg_bar;
+	size_t alignment;
+	int irq_type;
+};
+
+static inline u32 pci_endpoint_test_readl(struct pci_endpoint_test *test,
+					  u32 offset)
+{
+	return readl(test->base + offset);
+}
+
+static inline void pci_endpoint_test_writel(struct pci_endpoint_test *test,
+					    u32 offset, u32 value)
+{
+	writel(value, test->base + offset);
+}
+
+static inline u32 pci_endpoint_test_bar_readl(struct pci_endpoint_test *test,
+					      int bar, int offset)
+{
+	return readl(test->bar[bar] + offset);
+}
+
+static inline void pci_endpoint_test_bar_writel(struct pci_endpoint_test *test,
+						int bar, u32 offset, u32 value)
+{
+	writel(value, test->bar[bar] + offset);
+}
+
+static irqreturn_t pci_endpoint_test_irqhandler(int irq, void *dev_id)
+{
+	struct pci_endpoint_test *test = dev_id;
+	u32 reg;
+
+	reg = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS);
+	if (reg & STATUS_IRQ_RAISED) {
+		test->last_irq = irq;
+		complete(&test->irq_raised);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void pci_endpoint_test_free_irq_vectors(struct pci_endpoint_test *test)
+{
+	struct pci_dev *pdev = test->pdev;
+
+	pci_free_irq_vectors(pdev);
+	test->irq_type = IRQ_TYPE_UNDEFINED;
+}
+
+static bool pci_endpoint_test_alloc_irq_vectors(struct pci_endpoint_test *test,
+						int type)
+{
+	int irq = -1;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+	bool res = true;
+
+	switch (type) {
+	case IRQ_TYPE_LEGACY:
+		irq = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY);
+		if (irq < 0)
+			dev_err(dev, "Failed to get Legacy interrupt\n");
+		break;
+	case IRQ_TYPE_MSI:
+		irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
+		if (irq < 0)
+			dev_err(dev, "Failed to get MSI interrupts\n");
+		break;
+	case IRQ_TYPE_MSIX:
+		irq = pci_alloc_irq_vectors(pdev, 1, 2048, PCI_IRQ_MSIX);
+		if (irq < 0)
+			dev_err(dev, "Failed to get MSI-X interrupts\n");
+		break;
+	default:
+		dev_err(dev, "Invalid IRQ type selected\n");
+	}
+
+	if (irq < 0) {
+		irq = 0;
+		res = false;
+	}
+
+	test->irq_type = type;
+	test->num_irqs = irq;
+
+	return res;
+}
+
+static void pci_endpoint_test_release_irq(struct pci_endpoint_test *test)
+{
+	int i;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+
+	for (i = 0; i < test->num_irqs; i++)
+		devm_free_irq(dev, pci_irq_vector(pdev, i), test);
+
+	test->num_irqs = 0;
+}
+
+static bool pci_endpoint_test_request_irq(struct pci_endpoint_test *test)
+{
+	int i;
+	int err;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+
+	for (i = 0; i < test->num_irqs; i++) {
+		err = devm_request_irq(dev, pci_irq_vector(pdev, i),
+				       pci_endpoint_test_irqhandler,
+				       IRQF_SHARED, test->name, test);
+		if (err)
+			goto fail;
+	}
+
+	return true;
+
+fail:
+	switch (irq_type) {
+	case IRQ_TYPE_LEGACY:
+		dev_err(dev, "Failed to request IRQ %d for Legacy\n",
+			pci_irq_vector(pdev, i));
+		break;
+	case IRQ_TYPE_MSI:
+		dev_err(dev, "Failed to request IRQ %d for MSI %d\n",
+			pci_irq_vector(pdev, i),
+			i + 1);
+		break;
+	case IRQ_TYPE_MSIX:
+		dev_err(dev, "Failed to request IRQ %d for MSI-X %d\n",
+			pci_irq_vector(pdev, i),
+			i + 1);
+		break;
+	}
+
+	return false;
+}
+
+static bool pci_endpoint_test_bar(struct pci_endpoint_test *test,
+				  enum pci_barno barno)
+{
+	int j;
+	u32 val;
+	int size;
+	struct pci_dev *pdev = test->pdev;
+
+	if (!test->bar[barno])
+		return false;
+
+	size = pci_resource_len(pdev, barno);
+
+	if (barno == test->test_reg_bar)
+		size = 0x4;
+
+	for (j = 0; j < size; j += 4)
+		pci_endpoint_test_bar_writel(test, barno, j, 0xA0A0A0A0);
+
+	for (j = 0; j < size; j += 4) {
+		val = pci_endpoint_test_bar_readl(test, barno, j);
+		if (val != 0xA0A0A0A0)
+			return false;
+	}
+
+	return true;
+}
+
+static bool pci_endpoint_test_legacy_irq(struct pci_endpoint_test *test)
+{
+	u32 val;
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE,
+				 IRQ_TYPE_LEGACY);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 0);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
+				 COMMAND_RAISE_LEGACY_IRQ);
+	val = wait_for_completion_timeout(&test->irq_raised,
+					  msecs_to_jiffies(1000));
+	if (!val)
+		return false;
+
+	return true;
+}
+
+static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test,
+				       u16 msi_num, bool msix)
+{
+	u32 val;
+	struct pci_dev *pdev = test->pdev;
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE,
+				 msix ? IRQ_TYPE_MSIX : IRQ_TYPE_MSI);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, msi_num);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
+				 msix ? COMMAND_RAISE_MSIX_IRQ :
+				 COMMAND_RAISE_MSI_IRQ);
+	val = wait_for_completion_timeout(&test->irq_raised,
+					  msecs_to_jiffies(1000));
+	if (!val)
+		return false;
+
+	return pci_irq_vector(pdev, msi_num - 1) == test->last_irq;
+}
+
+static int pci_endpoint_test_validate_xfer_params(struct device *dev,
+		struct pci_endpoint_test_xfer_param *param, size_t alignment)
+{
+	if (!param->size) {
+		dev_dbg(dev, "Data size is zero\n");
+		return -EINVAL;
+	}
+
+	if (param->size > SIZE_MAX - alignment) {
+		dev_dbg(dev, "Maximum transfer data size exceeded\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool pci_endpoint_test_copy(struct pci_endpoint_test *test,
+				   unsigned long arg)
+{
+	struct pci_endpoint_test_xfer_param param;
+	bool ret = false;
+	void *src_addr;
+	void *dst_addr;
+	u32 flags = 0;
+	bool use_dma;
+	size_t size;
+	dma_addr_t src_phys_addr;
+	dma_addr_t dst_phys_addr;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+	void *orig_src_addr;
+	dma_addr_t orig_src_phys_addr;
+	void *orig_dst_addr;
+	dma_addr_t orig_dst_phys_addr;
+	size_t offset;
+	size_t alignment = test->alignment;
+	int irq_type = test->irq_type;
+	u32 src_crc32;
+	u32 dst_crc32;
+	int err;
+
+	err = copy_from_user(&param, (void __user *)arg, sizeof(param));
+	if (err) {
+		dev_err(dev, "Failed to get transfer param\n");
+		return false;
+	}
+
+	err = pci_endpoint_test_validate_xfer_params(dev, &param, alignment);
+	if (err)
+		return false;
+
+	size = param.size;
+
+	use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
+	if (use_dma)
+		flags |= FLAG_USE_DMA;
+
+	if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		goto err;
+	}
+
+	orig_src_addr = kzalloc(size + alignment, GFP_KERNEL);
+	if (!orig_src_addr) {
+		dev_err(dev, "Failed to allocate source buffer\n");
+		ret = false;
+		goto err;
+	}
+
+	get_random_bytes(orig_src_addr, size + alignment);
+	orig_src_phys_addr = dma_map_single(dev, orig_src_addr,
+					    size + alignment, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, orig_src_phys_addr)) {
+		dev_err(dev, "failed to map source buffer address\n");
+		ret = false;
+		goto err_src_phys_addr;
+	}
+
+	if (alignment && !IS_ALIGNED(orig_src_phys_addr, alignment)) {
+		src_phys_addr = PTR_ALIGN(orig_src_phys_addr, alignment);
+		offset = src_phys_addr - orig_src_phys_addr;
+		src_addr = orig_src_addr + offset;
+	} else {
+		src_phys_addr = orig_src_phys_addr;
+		src_addr = orig_src_addr;
+	}
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_SRC_ADDR,
+				 lower_32_bits(src_phys_addr));
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_SRC_ADDR,
+				 upper_32_bits(src_phys_addr));
+
+	src_crc32 = crc32_le(~0, src_addr, size);
+
+	orig_dst_addr = kzalloc(size + alignment, GFP_KERNEL);
+	if (!orig_dst_addr) {
+		dev_err(dev, "Failed to allocate destination address\n");
+		ret = false;
+		goto err_dst_addr;
+	}
+
+	orig_dst_phys_addr = dma_map_single(dev, orig_dst_addr,
+					    size + alignment, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, orig_dst_phys_addr)) {
+		dev_err(dev, "failed to map destination buffer address\n");
+		ret = false;
+		goto err_dst_phys_addr;
+	}
+
+	if (alignment && !IS_ALIGNED(orig_dst_phys_addr, alignment)) {
+		dst_phys_addr = PTR_ALIGN(orig_dst_phys_addr, alignment);
+		offset = dst_phys_addr - orig_dst_phys_addr;
+		dst_addr = orig_dst_addr + offset;
+	} else {
+		dst_phys_addr = orig_dst_phys_addr;
+		dst_addr = orig_dst_addr;
+	}
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR,
+				 lower_32_bits(dst_phys_addr));
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_DST_ADDR,
+				 upper_32_bits(dst_phys_addr));
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE,
+				 size);
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_FLAGS, flags);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
+				 COMMAND_COPY);
+
+	wait_for_completion(&test->irq_raised);
+
+	dma_unmap_single(dev, orig_dst_phys_addr, size + alignment,
+			 DMA_FROM_DEVICE);
+
+	dst_crc32 = crc32_le(~0, dst_addr, size);
+	if (dst_crc32 == src_crc32)
+		ret = true;
+
+err_dst_phys_addr:
+	kfree(orig_dst_addr);
+
+err_dst_addr:
+	dma_unmap_single(dev, orig_src_phys_addr, size + alignment,
+			 DMA_TO_DEVICE);
+
+err_src_phys_addr:
+	kfree(orig_src_addr);
+
+err:
+	return ret;
+}
+
+static bool pci_endpoint_test_write(struct pci_endpoint_test *test,
+				    unsigned long arg)
+{
+	struct pci_endpoint_test_xfer_param param;
+	bool ret = false;
+	u32 flags = 0;
+	bool use_dma;
+	u32 reg;
+	void *addr;
+	dma_addr_t phys_addr;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+	void *orig_addr;
+	dma_addr_t orig_phys_addr;
+	size_t offset;
+	size_t alignment = test->alignment;
+	int irq_type = test->irq_type;
+	size_t size;
+	u32 crc32;
+	int err;
+
+	err = copy_from_user(&param, (void __user *)arg, sizeof(param));
+	if (err != 0) {
+		dev_err(dev, "Failed to get transfer param\n");
+		return false;
+	}
+
+	err = pci_endpoint_test_validate_xfer_params(dev, &param, alignment);
+	if (err)
+		return false;
+
+	size = param.size;
+
+	use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
+	if (use_dma)
+		flags |= FLAG_USE_DMA;
+
+	if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		goto err;
+	}
+
+	orig_addr = kzalloc(size + alignment, GFP_KERNEL);
+	if (!orig_addr) {
+		dev_err(dev, "Failed to allocate address\n");
+		ret = false;
+		goto err;
+	}
+
+	get_random_bytes(orig_addr, size + alignment);
+
+	orig_phys_addr = dma_map_single(dev, orig_addr, size + alignment,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, orig_phys_addr)) {
+		dev_err(dev, "failed to map source buffer address\n");
+		ret = false;
+		goto err_phys_addr;
+	}
+
+	if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) {
+		phys_addr =  PTR_ALIGN(orig_phys_addr, alignment);
+		offset = phys_addr - orig_phys_addr;
+		addr = orig_addr + offset;
+	} else {
+		phys_addr = orig_phys_addr;
+		addr = orig_addr;
+	}
+
+	crc32 = crc32_le(~0, addr, size);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_CHECKSUM,
+				 crc32);
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_SRC_ADDR,
+				 lower_32_bits(phys_addr));
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_SRC_ADDR,
+				 upper_32_bits(phys_addr));
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size);
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_FLAGS, flags);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
+				 COMMAND_READ);
+
+	wait_for_completion(&test->irq_raised);
+
+	reg = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS);
+	if (reg & STATUS_READ_SUCCESS)
+		ret = true;
+
+	dma_unmap_single(dev, orig_phys_addr, size + alignment,
+			 DMA_TO_DEVICE);
+
+err_phys_addr:
+	kfree(orig_addr);
+
+err:
+	return ret;
+}
+
+static bool pci_endpoint_test_read(struct pci_endpoint_test *test,
+				   unsigned long arg)
+{
+	struct pci_endpoint_test_xfer_param param;
+	bool ret = false;
+	u32 flags = 0;
+	bool use_dma;
+	size_t size;
+	void *addr;
+	dma_addr_t phys_addr;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+	void *orig_addr;
+	dma_addr_t orig_phys_addr;
+	size_t offset;
+	size_t alignment = test->alignment;
+	int irq_type = test->irq_type;
+	u32 crc32;
+	int err;
+
+	err = copy_from_user(&param, (void __user *)arg, sizeof(param));
+	if (err) {
+		dev_err(dev, "Failed to get transfer param\n");
+		return false;
+	}
+
+	err = pci_endpoint_test_validate_xfer_params(dev, &param, alignment);
+	if (err)
+		return false;
+
+	size = param.size;
+
+	use_dma = !!(param.flags & PCITEST_FLAGS_USE_DMA);
+	if (use_dma)
+		flags |= FLAG_USE_DMA;
+
+	if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		goto err;
+	}
+
+	orig_addr = kzalloc(size + alignment, GFP_KERNEL);
+	if (!orig_addr) {
+		dev_err(dev, "Failed to allocate destination address\n");
+		ret = false;
+		goto err;
+	}
+
+	orig_phys_addr = dma_map_single(dev, orig_addr, size + alignment,
+					DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, orig_phys_addr)) {
+		dev_err(dev, "failed to map source buffer address\n");
+		ret = false;
+		goto err_phys_addr;
+	}
+
+	if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) {
+		phys_addr = PTR_ALIGN(orig_phys_addr, alignment);
+		offset = phys_addr - orig_phys_addr;
+		addr = orig_addr + offset;
+	} else {
+		phys_addr = orig_phys_addr;
+		addr = orig_addr;
+	}
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR,
+				 lower_32_bits(phys_addr));
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_DST_ADDR,
+				 upper_32_bits(phys_addr));
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size);
+
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_FLAGS, flags);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1);
+	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
+				 COMMAND_WRITE);
+
+	wait_for_completion(&test->irq_raised);
+
+	dma_unmap_single(dev, orig_phys_addr, size + alignment,
+			 DMA_FROM_DEVICE);
+
+	crc32 = crc32_le(~0, addr, size);
+	if (crc32 == pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_CHECKSUM))
+		ret = true;
+
+err_phys_addr:
+	kfree(orig_addr);
+err:
+	return ret;
+}
+
+static bool pci_endpoint_test_clear_irq(struct pci_endpoint_test *test)
+{
+	pci_endpoint_test_release_irq(test);
+	pci_endpoint_test_free_irq_vectors(test);
+	return true;
+}
+
+static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test,
+				      int req_irq_type)
+{
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+
+	if (req_irq_type < IRQ_TYPE_LEGACY || req_irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		return false;
+	}
+
+	if (test->irq_type == req_irq_type)
+		return true;
+
+	pci_endpoint_test_release_irq(test);
+	pci_endpoint_test_free_irq_vectors(test);
+
+	if (!pci_endpoint_test_alloc_irq_vectors(test, req_irq_type))
+		goto err;
+
+	if (!pci_endpoint_test_request_irq(test))
+		goto err;
+
+	return true;
+
+err:
+	pci_endpoint_test_free_irq_vectors(test);
+	return false;
+}
+
+static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd,
+				    unsigned long arg)
+{
+	int ret = -EINVAL;
+	enum pci_barno bar;
+	struct pci_endpoint_test *test = to_endpoint_test(file->private_data);
+	struct pci_dev *pdev = test->pdev;
+
+	mutex_lock(&test->mutex);
+
+	reinit_completion(&test->irq_raised);
+	test->last_irq = -ENODATA;
+
+	switch (cmd) {
+	case PCITEST_BAR:
+		bar = arg;
+		if (bar > BAR_5)
+			goto ret;
+		if (is_am654_pci_dev(pdev) && bar == BAR_0)
+			goto ret;
+		ret = pci_endpoint_test_bar(test, bar);
+		break;
+	case PCITEST_LEGACY_IRQ:
+		ret = pci_endpoint_test_legacy_irq(test);
+		break;
+	case PCITEST_MSI:
+	case PCITEST_MSIX:
+		ret = pci_endpoint_test_msi_irq(test, arg, cmd == PCITEST_MSIX);
+		break;
+	case PCITEST_WRITE:
+		ret = pci_endpoint_test_write(test, arg);
+		break;
+	case PCITEST_READ:
+		ret = pci_endpoint_test_read(test, arg);
+		break;
+	case PCITEST_COPY:
+		ret = pci_endpoint_test_copy(test, arg);
+		break;
+	case PCITEST_SET_IRQTYPE:
+		ret = pci_endpoint_test_set_irq(test, arg);
+		break;
+	case PCITEST_GET_IRQTYPE:
+		ret = irq_type;
+		break;
+	case PCITEST_CLEAR_IRQ:
+		ret = pci_endpoint_test_clear_irq(test);
+		break;
+	}
+
+ret:
+	mutex_unlock(&test->mutex);
+	return ret;
+}
+
+static const struct file_operations pci_endpoint_test_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = pci_endpoint_test_ioctl,
+};
+
+static int pci_endpoint_test_probe(struct pci_dev *pdev,
+				   const struct pci_device_id *ent)
+{
+	int err;
+	int id;
+	char name[24];
+	enum pci_barno bar;
+	void __iomem *base;
+	struct device *dev = &pdev->dev;
+	struct pci_endpoint_test *test;
+	struct pci_endpoint_test_data *data;
+	enum pci_barno test_reg_bar = BAR_0;
+	struct miscdevice *misc_device;
+
+	if (pci_is_bridge(pdev))
+		return -ENODEV;
+
+	test = devm_kzalloc(dev, sizeof(*test), GFP_KERNEL);
+	if (!test)
+		return -ENOMEM;
+
+	test->test_reg_bar = 0;
+	test->alignment = 0;
+	test->pdev = pdev;
+	test->irq_type = IRQ_TYPE_UNDEFINED;
+
+	if (no_msi)
+		irq_type = IRQ_TYPE_LEGACY;
+
+	data = (struct pci_endpoint_test_data *)ent->driver_data;
+	if (data) {
+		test_reg_bar = data->test_reg_bar;
+		test->test_reg_bar = test_reg_bar;
+		test->alignment = data->alignment;
+		irq_type = data->irq_type;
+	}
+
+	init_completion(&test->irq_raised);
+	mutex_init(&test->mutex);
+
+	if ((dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)) != 0) &&
+	    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) {
+		dev_err(dev, "Cannot set DMA mask\n");
+		return -EINVAL;
+	}
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Cannot enable PCI device\n");
+		return err;
+	}
+
+	err = pci_request_regions(pdev, DRV_MODULE_NAME);
+	if (err) {
+		dev_err(dev, "Cannot obtain PCI resources\n");
+		goto err_disable_pdev;
+	}
+
+	pci_set_master(pdev);
+
+	if (!pci_endpoint_test_alloc_irq_vectors(test, irq_type)) {
+		err = -EINVAL;
+		goto err_disable_irq;
+	}
+
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+		if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
+			base = pci_ioremap_bar(pdev, bar);
+			if (!base) {
+				dev_err(dev, "Failed to read BAR%d\n", bar);
+				WARN_ON(bar == test_reg_bar);
+			}
+			test->bar[bar] = base;
+		}
+	}
+
+	test->base = test->bar[test_reg_bar];
+	if (!test->base) {
+		err = -ENOMEM;
+		dev_err(dev, "Cannot perform PCI test without BAR%d\n",
+			test_reg_bar);
+		goto err_iounmap;
+	}
+
+	pci_set_drvdata(pdev, test);
+
+	id = ida_simple_get(&pci_endpoint_test_ida, 0, 0, GFP_KERNEL);
+	if (id < 0) {
+		err = id;
+		dev_err(dev, "Unable to get id\n");
+		goto err_iounmap;
+	}
+
+	snprintf(name, sizeof(name), DRV_MODULE_NAME ".%d", id);
+	test->name = kstrdup(name, GFP_KERNEL);
+	if (!test->name) {
+		err = -ENOMEM;
+		goto err_ida_remove;
+	}
+
+	if (!pci_endpoint_test_request_irq(test)) {
+		err = -EINVAL;
+		goto err_kfree_test_name;
+	}
+
+	misc_device = &test->miscdev;
+	misc_device->minor = MISC_DYNAMIC_MINOR;
+	misc_device->name = kstrdup(name, GFP_KERNEL);
+	if (!misc_device->name) {
+		err = -ENOMEM;
+		goto err_release_irq;
+	}
+	misc_device->parent = &pdev->dev;
+	misc_device->fops = &pci_endpoint_test_fops;
+
+	err = misc_register(misc_device);
+	if (err) {
+		dev_err(dev, "Failed to register device\n");
+		goto err_kfree_name;
+	}
+
+	return 0;
+
+err_kfree_name:
+	kfree(misc_device->name);
+
+err_release_irq:
+	pci_endpoint_test_release_irq(test);
+
+err_kfree_test_name:
+	kfree(test->name);
+
+err_ida_remove:
+	ida_simple_remove(&pci_endpoint_test_ida, id);
+
+err_iounmap:
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+		if (test->bar[bar])
+			pci_iounmap(pdev, test->bar[bar]);
+	}
+
+err_disable_irq:
+	pci_endpoint_test_free_irq_vectors(test);
+	pci_release_regions(pdev);
+
+err_disable_pdev:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+static void pci_endpoint_test_remove(struct pci_dev *pdev)
+{
+	int id;
+	enum pci_barno bar;
+	struct pci_endpoint_test *test = pci_get_drvdata(pdev);
+	struct miscdevice *misc_device = &test->miscdev;
+
+	if (sscanf(misc_device->name, DRV_MODULE_NAME ".%d", &id) != 1)
+		return;
+	if (id < 0)
+		return;
+
+	pci_endpoint_test_release_irq(test);
+	pci_endpoint_test_free_irq_vectors(test);
+
+	misc_deregister(&test->miscdev);
+	kfree(misc_device->name);
+	kfree(test->name);
+	ida_simple_remove(&pci_endpoint_test_ida, id);
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+		if (test->bar[bar])
+			pci_iounmap(pdev, test->bar[bar]);
+	}
+
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static const struct pci_endpoint_test_data default_data = {
+	.test_reg_bar = BAR_0,
+	.alignment = SZ_4K,
+	.irq_type = IRQ_TYPE_MSI,
+};
+
+static const struct pci_endpoint_test_data am654_data = {
+	.test_reg_bar = BAR_2,
+	.alignment = SZ_64K,
+	.irq_type = IRQ_TYPE_MSI,
+};
+
+static const struct pci_endpoint_test_data j721e_data = {
+	.alignment = 256,
+	.irq_type = IRQ_TYPE_MSI,
+};
+
+static const struct pci_device_id pci_endpoint_test_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA74x),
+	  .driver_data = (kernel_ulong_t)&default_data,
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA72x),
+	  .driver_data = (kernel_ulong_t)&default_data,
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, 0x81c0),
+	  .driver_data = (kernel_ulong_t)&default_data,
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, PCI_DEVICE_ID_IMX8),},
+	{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, PCI_DEVICE_ID_LS1088A),
+	  .driver_data = (kernel_ulong_t)&default_data,
+	},
+	{ PCI_DEVICE_DATA(SYNOPSYS, EDDA, NULL) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_AM654),
+	  .driver_data = (kernel_ulong_t)&am654_data
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_R8A774A1),},
+	{ PCI_DEVICE(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_R8A774B1),},
+	{ PCI_DEVICE(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_R8A774C0),},
+	{ PCI_DEVICE(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_R8A774E1),},
+	{ PCI_DEVICE(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_R8A779F0),
+	  .driver_data = (kernel_ulong_t)&default_data,
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_J721E),
+	  .driver_data = (kernel_ulong_t)&j721e_data,
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_J7200),
+	  .driver_data = (kernel_ulong_t)&j721e_data,
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_AM64),
+	  .driver_data = (kernel_ulong_t)&j721e_data,
+	},
+	{ PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_J721S2),
+	  .driver_data = (kernel_ulong_t)&j721e_data,
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, pci_endpoint_test_tbl);
+
+static struct pci_driver pci_endpoint_test_driver = {
+	.name		= DRV_MODULE_NAME,
+	.id_table	= pci_endpoint_test_tbl,
+	.probe		= pci_endpoint_test_probe,
+	.remove		= pci_endpoint_test_remove,
+	.sriov_configure = pci_sriov_configure_simple,
+};
+module_pci_driver(pci_endpoint_test_driver);
+
+MODULE_DESCRIPTION("PCI ENDPOINT TEST HOST DRIVER");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
new file mode 100644
index 0000000000..7966a6b8b5
--- /dev/null
+++ b/drivers/misc/phantom.c
@@ -0,0 +1,563 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  Copyright (C) 2005-2007 Jiri Slaby <jirislaby@gmail.com>
+ *
+ *  You need a userspace library to cooperate with this driver. It (and other
+ *  info) may be obtained here:
+ *  http://www.fi.muni.cz/~xslaby/phantom.html
+ *  or alternatively, you might use OpenHaptics provided by Sensable.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/interrupt.h>
+#include <linux/cdev.h>
+#include <linux/slab.h>
+#include <linux/phantom.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+
+#include <linux/atomic.h>
+#include <asm/io.h>
+
+#define PHANTOM_VERSION		"n0.9.8"
+
+#define PHANTOM_MAX_MINORS	8
+
+#define PHN_IRQCTL		0x4c    /* irq control in caddr space */
+
+#define PHB_RUNNING		1
+#define PHB_NOT_OH		2
+
+static DEFINE_MUTEX(phantom_mutex);
+static struct class *phantom_class;
+static int phantom_major;
+
+struct phantom_device {
+	unsigned int opened;
+	void __iomem *caddr;
+	u32 __iomem *iaddr;
+	u32 __iomem *oaddr;
+	unsigned long status;
+	atomic_t counter;
+
+	wait_queue_head_t wait;
+	struct cdev cdev;
+
+	struct mutex open_lock;
+	spinlock_t regs_lock;
+
+	/* used in NOT_OH mode */
+	struct phm_regs oregs;
+	u32 ctl_reg;
+};
+
+static unsigned char phantom_devices[PHANTOM_MAX_MINORS];
+
+static int phantom_status(struct phantom_device *dev, unsigned long newstat)
+{
+	pr_debug("phantom_status %lx %lx\n", dev->status, newstat);
+
+	if (!(dev->status & PHB_RUNNING) && (newstat & PHB_RUNNING)) {
+		atomic_set(&dev->counter, 0);
+		iowrite32(PHN_CTL_IRQ, dev->iaddr + PHN_CONTROL);
+		iowrite32(0x43, dev->caddr + PHN_IRQCTL);
+		ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+	} else if ((dev->status & PHB_RUNNING) && !(newstat & PHB_RUNNING)) {
+		iowrite32(0, dev->caddr + PHN_IRQCTL);
+		ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+	}
+
+	dev->status = newstat;
+
+	return 0;
+}
+
+/*
+ * File ops
+ */
+
+static long phantom_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	struct phantom_device *dev = file->private_data;
+	struct phm_regs rs;
+	struct phm_reg r;
+	void __user *argp = (void __user *)arg;
+	unsigned long flags;
+	unsigned int i;
+
+	switch (cmd) {
+	case PHN_SETREG:
+	case PHN_SET_REG:
+		if (copy_from_user(&r, argp, sizeof(r)))
+			return -EFAULT;
+
+		if (r.reg > 7)
+			return -EINVAL;
+
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) &&
+				phantom_status(dev, dev->status | PHB_RUNNING)){
+			spin_unlock_irqrestore(&dev->regs_lock, flags);
+			return -ENODEV;
+		}
+
+		pr_debug("phantom: writing %x to %u\n", r.value, r.reg);
+
+		/* preserve amp bit (don't allow to change it when in NOT_OH) */
+		if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) {
+			r.value &= ~PHN_CTL_AMP;
+			r.value |= dev->ctl_reg & PHN_CTL_AMP;
+			dev->ctl_reg = r.value;
+		}
+
+		iowrite32(r.value, dev->iaddr + r.reg);
+		ioread32(dev->iaddr); /* PCI posting */
+
+		if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ))
+			phantom_status(dev, dev->status & ~PHB_RUNNING);
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
+	case PHN_SETREGS:
+	case PHN_SET_REGS:
+		if (copy_from_user(&rs, argp, sizeof(rs)))
+			return -EFAULT;
+
+		pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask);
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (dev->status & PHB_NOT_OH)
+			memcpy(&dev->oregs, &rs, sizeof(rs));
+		else {
+			u32 m = min(rs.count, 8U);
+			for (i = 0; i < m; i++)
+				if (rs.mask & BIT(i))
+					iowrite32(rs.values[i], dev->oaddr + i);
+			ioread32(dev->iaddr); /* PCI posting */
+		}
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
+	case PHN_GETREG:
+	case PHN_GET_REG:
+		if (copy_from_user(&r, argp, sizeof(r)))
+			return -EFAULT;
+
+		if (r.reg > 7)
+			return -EINVAL;
+
+		r.value = ioread32(dev->iaddr + r.reg);
+
+		if (copy_to_user(argp, &r, sizeof(r)))
+			return -EFAULT;
+		break;
+	case PHN_GETREGS:
+	case PHN_GET_REGS: {
+		u32 m;
+
+		if (copy_from_user(&rs, argp, sizeof(rs)))
+			return -EFAULT;
+
+		m = min(rs.count, 8U);
+
+		pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask);
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		for (i = 0; i < m; i++)
+			if (rs.mask & BIT(i))
+				rs.values[i] = ioread32(dev->iaddr + i);
+		atomic_set(&dev->counter, 0);
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+
+		if (copy_to_user(argp, &rs, sizeof(rs)))
+			return -EFAULT;
+		break;
+	} case PHN_NOT_OH:
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (dev->status & PHB_RUNNING) {
+			printk(KERN_ERR "phantom: you need to set NOT_OH "
+					"before you start the device!\n");
+			spin_unlock_irqrestore(&dev->regs_lock, flags);
+			return -EINVAL;
+		}
+		dev->status |= PHB_NOT_OH;
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
+	default:
+		return -ENOTTY;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_COMPAT
+static long phantom_compat_ioctl(struct file *filp, unsigned int cmd,
+		unsigned long arg)
+{
+	if (_IOC_NR(cmd) <= 3 && _IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
+		cmd &= ~(_IOC_SIZEMASK << _IOC_SIZESHIFT);
+		cmd |= sizeof(void *) << _IOC_SIZESHIFT;
+	}
+	return phantom_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
+}
+#else
+#define phantom_compat_ioctl NULL
+#endif
+
+static int phantom_open(struct inode *inode, struct file *file)
+{
+	struct phantom_device *dev = container_of(inode->i_cdev,
+			struct phantom_device, cdev);
+
+	mutex_lock(&phantom_mutex);
+	nonseekable_open(inode, file);
+
+	if (mutex_lock_interruptible(&dev->open_lock)) {
+		mutex_unlock(&phantom_mutex);
+		return -ERESTARTSYS;
+	}
+
+	if (dev->opened) {
+		mutex_unlock(&dev->open_lock);
+		mutex_unlock(&phantom_mutex);
+		return -EINVAL;
+	}
+
+	WARN_ON(dev->status & PHB_NOT_OH);
+
+	file->private_data = dev;
+
+	atomic_set(&dev->counter, 0);
+	dev->opened++;
+	mutex_unlock(&dev->open_lock);
+	mutex_unlock(&phantom_mutex);
+	return 0;
+}
+
+static int phantom_release(struct inode *inode, struct file *file)
+{
+	struct phantom_device *dev = file->private_data;
+
+	mutex_lock(&dev->open_lock);
+
+	dev->opened = 0;
+	phantom_status(dev, dev->status & ~PHB_RUNNING);
+	dev->status &= ~PHB_NOT_OH;
+
+	mutex_unlock(&dev->open_lock);
+
+	return 0;
+}
+
+static __poll_t phantom_poll(struct file *file, poll_table *wait)
+{
+	struct phantom_device *dev = file->private_data;
+	__poll_t mask = 0;
+
+	pr_debug("phantom_poll: %d\n", atomic_read(&dev->counter));
+	poll_wait(file, &dev->wait, wait);
+
+	if (!(dev->status & PHB_RUNNING))
+		mask = EPOLLERR;
+	else if (atomic_read(&dev->counter))
+		mask = EPOLLIN | EPOLLRDNORM;
+
+	pr_debug("phantom_poll end: %x/%d\n", mask, atomic_read(&dev->counter));
+
+	return mask;
+}
+
+static const struct file_operations phantom_file_ops = {
+	.open = phantom_open,
+	.release = phantom_release,
+	.unlocked_ioctl = phantom_ioctl,
+	.compat_ioctl = phantom_compat_ioctl,
+	.poll = phantom_poll,
+	.llseek = no_llseek,
+};
+
+static irqreturn_t phantom_isr(int irq, void *data)
+{
+	struct phantom_device *dev = data;
+	unsigned int i;
+	u32 ctl;
+
+	spin_lock(&dev->regs_lock);
+	ctl = ioread32(dev->iaddr + PHN_CONTROL);
+	if (!(ctl & PHN_CTL_IRQ)) {
+		spin_unlock(&dev->regs_lock);
+		return IRQ_NONE;
+	}
+
+	iowrite32(0, dev->iaddr);
+	iowrite32(0xc0, dev->iaddr);
+
+	if (dev->status & PHB_NOT_OH) {
+		struct phm_regs *r = &dev->oregs;
+		u32 m = min(r->count, 8U);
+
+		for (i = 0; i < m; i++)
+			if (r->mask & BIT(i))
+				iowrite32(r->values[i], dev->oaddr + i);
+
+		dev->ctl_reg ^= PHN_CTL_AMP;
+		iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL);
+	}
+	spin_unlock(&dev->regs_lock);
+
+	ioread32(dev->iaddr); /* PCI posting */
+
+	atomic_inc(&dev->counter);
+	wake_up_interruptible(&dev->wait);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Init and deinit driver
+ */
+
+static unsigned int phantom_get_free(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < PHANTOM_MAX_MINORS; i++)
+		if (phantom_devices[i] == 0)
+			break;
+
+	return i;
+}
+
+static int phantom_probe(struct pci_dev *pdev,
+	const struct pci_device_id *pci_id)
+{
+	struct phantom_device *pht;
+	unsigned int minor;
+	int retval;
+
+	retval = pci_enable_device(pdev);
+	if (retval) {
+		dev_err(&pdev->dev, "pci_enable_device failed!\n");
+		goto err;
+	}
+
+	minor = phantom_get_free();
+	if (minor == PHANTOM_MAX_MINORS) {
+		dev_err(&pdev->dev, "too many devices found!\n");
+		retval = -EIO;
+		goto err_dis;
+	}
+
+	phantom_devices[minor] = 1;
+
+	retval = pci_request_regions(pdev, "phantom");
+	if (retval) {
+		dev_err(&pdev->dev, "pci_request_regions failed!\n");
+		goto err_null;
+	}
+
+	retval = -ENOMEM;
+	pht = kzalloc(sizeof(*pht), GFP_KERNEL);
+	if (pht == NULL) {
+		dev_err(&pdev->dev, "unable to allocate device\n");
+		goto err_reg;
+	}
+
+	pht->caddr = pci_iomap(pdev, 0, 0);
+	if (pht->caddr == NULL) {
+		dev_err(&pdev->dev, "can't remap conf space\n");
+		goto err_fr;
+	}
+	pht->iaddr = pci_iomap(pdev, 2, 0);
+	if (pht->iaddr == NULL) {
+		dev_err(&pdev->dev, "can't remap input space\n");
+		goto err_unmc;
+	}
+	pht->oaddr = pci_iomap(pdev, 3, 0);
+	if (pht->oaddr == NULL) {
+		dev_err(&pdev->dev, "can't remap output space\n");
+		goto err_unmi;
+	}
+
+	mutex_init(&pht->open_lock);
+	spin_lock_init(&pht->regs_lock);
+	init_waitqueue_head(&pht->wait);
+	cdev_init(&pht->cdev, &phantom_file_ops);
+	pht->cdev.owner = THIS_MODULE;
+
+	iowrite32(0, pht->caddr + PHN_IRQCTL);
+	ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */
+	retval = request_irq(pdev->irq, phantom_isr,
+			IRQF_SHARED, "phantom", pht);
+	if (retval) {
+		dev_err(&pdev->dev, "can't establish ISR\n");
+		goto err_unmo;
+	}
+
+	retval = cdev_add(&pht->cdev, MKDEV(phantom_major, minor), 1);
+	if (retval) {
+		dev_err(&pdev->dev, "chardev registration failed\n");
+		goto err_irq;
+	}
+
+	if (IS_ERR(device_create(phantom_class, &pdev->dev,
+				 MKDEV(phantom_major, minor), NULL,
+				 "phantom%u", minor)))
+		dev_err(&pdev->dev, "can't create device\n");
+
+	pci_set_drvdata(pdev, pht);
+
+	return 0;
+err_irq:
+	free_irq(pdev->irq, pht);
+err_unmo:
+	pci_iounmap(pdev, pht->oaddr);
+err_unmi:
+	pci_iounmap(pdev, pht->iaddr);
+err_unmc:
+	pci_iounmap(pdev, pht->caddr);
+err_fr:
+	kfree(pht);
+err_reg:
+	pci_release_regions(pdev);
+err_null:
+	phantom_devices[minor] = 0;
+err_dis:
+	pci_disable_device(pdev);
+err:
+	return retval;
+}
+
+static void phantom_remove(struct pci_dev *pdev)
+{
+	struct phantom_device *pht = pci_get_drvdata(pdev);
+	unsigned int minor = MINOR(pht->cdev.dev);
+
+	device_destroy(phantom_class, MKDEV(phantom_major, minor));
+
+	cdev_del(&pht->cdev);
+
+	iowrite32(0, pht->caddr + PHN_IRQCTL);
+	ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */
+	free_irq(pdev->irq, pht);
+
+	pci_iounmap(pdev, pht->oaddr);
+	pci_iounmap(pdev, pht->iaddr);
+	pci_iounmap(pdev, pht->caddr);
+
+	kfree(pht);
+
+	pci_release_regions(pdev);
+
+	phantom_devices[minor] = 0;
+
+	pci_disable_device(pdev);
+}
+
+static int __maybe_unused phantom_suspend(struct device *dev_d)
+{
+	struct phantom_device *dev = dev_get_drvdata(dev_d);
+
+	iowrite32(0, dev->caddr + PHN_IRQCTL);
+	ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
+
+	synchronize_irq(to_pci_dev(dev_d)->irq);
+
+	return 0;
+}
+
+static int __maybe_unused phantom_resume(struct device *dev_d)
+{
+	struct phantom_device *dev = dev_get_drvdata(dev_d);
+
+	iowrite32(0, dev->caddr + PHN_IRQCTL);
+
+	return 0;
+}
+
+static struct pci_device_id phantom_pci_tbl[] = {
+	{ .vendor = PCI_VENDOR_ID_PLX, .device = PCI_DEVICE_ID_PLX_9050,
+	  .subvendor = PCI_VENDOR_ID_PLX, .subdevice = PCI_DEVICE_ID_PLX_9050,
+	  .class = PCI_CLASS_BRIDGE_OTHER << 8, .class_mask = 0xffff00 },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, phantom_pci_tbl);
+
+static SIMPLE_DEV_PM_OPS(phantom_pm_ops, phantom_suspend, phantom_resume);
+
+static struct pci_driver phantom_pci_driver = {
+	.name = "phantom",
+	.id_table = phantom_pci_tbl,
+	.probe = phantom_probe,
+	.remove = phantom_remove,
+	.driver.pm = &phantom_pm_ops,
+};
+
+static CLASS_ATTR_STRING(version, 0444, PHANTOM_VERSION);
+
+static int __init phantom_init(void)
+{
+	int retval;
+	dev_t dev;
+
+	phantom_class = class_create("phantom");
+	if (IS_ERR(phantom_class)) {
+		retval = PTR_ERR(phantom_class);
+		printk(KERN_ERR "phantom: can't register phantom class\n");
+		goto err;
+	}
+	retval = class_create_file(phantom_class, &class_attr_version.attr);
+	if (retval) {
+		printk(KERN_ERR "phantom: can't create sysfs version file\n");
+		goto err_class;
+	}
+
+	retval = alloc_chrdev_region(&dev, 0, PHANTOM_MAX_MINORS, "phantom");
+	if (retval) {
+		printk(KERN_ERR "phantom: can't register character device\n");
+		goto err_attr;
+	}
+	phantom_major = MAJOR(dev);
+
+	retval = pci_register_driver(&phantom_pci_driver);
+	if (retval) {
+		printk(KERN_ERR "phantom: can't register pci driver\n");
+		goto err_unchr;
+	}
+
+	printk(KERN_INFO "Phantom Linux Driver, version " PHANTOM_VERSION ", "
+			"init OK\n");
+
+	return 0;
+err_unchr:
+	unregister_chrdev_region(dev, PHANTOM_MAX_MINORS);
+err_attr:
+	class_remove_file(phantom_class, &class_attr_version.attr);
+err_class:
+	class_destroy(phantom_class);
+err:
+	return retval;
+}
+
+static void __exit phantom_exit(void)
+{
+	pci_unregister_driver(&phantom_pci_driver);
+
+	unregister_chrdev_region(MKDEV(phantom_major, 0), PHANTOM_MAX_MINORS);
+
+	class_remove_file(phantom_class, &class_attr_version.attr);
+	class_destroy(phantom_class);
+
+	pr_debug("phantom: module successfully removed\n");
+}
+
+module_init(phantom_init);
+module_exit(phantom_exit);
+
+MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>");
+MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PHANTOM_VERSION);
diff --git a/drivers/misc/pvpanic/Kconfig b/drivers/misc/pvpanic/Kconfig
new file mode 100644
index 0000000000..12d40a21f6
--- /dev/null
+++ b/drivers/misc/pvpanic/Kconfig
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Pvpanic Kconfig
+#
+# Copyright (C) 2021 Oracle.
+#
+
+config PVPANIC
+	bool "pvpanic device support"
+	help
+	  This option allows to select a specific pvpanic device driver.
+	  pvpanic is a paravirtualized device provided by QEMU; it lets
+	  a virtual machine (guest) communicate panic events to the host.
+
+config PVPANIC_MMIO
+	tristate "pvpanic MMIO device support"
+	depends on HAS_IOMEM && (ACPI || OF) && PVPANIC
+	help
+	  This driver provides support for the MMIO pvpanic device.
+
+config PVPANIC_PCI
+	tristate "pvpanic PCI device support"
+	depends on PCI && PVPANIC
+	help
+	  This driver provides support for the PCI pvpanic device.
+	  pvpanic is a paravirtualized device provided by QEMU which
+	  forwards the panic events from the guest to the host.
diff --git a/drivers/misc/pvpanic/Makefile b/drivers/misc/pvpanic/Makefile
new file mode 100644
index 0000000000..9471df7d4f
--- /dev/null
+++ b/drivers/misc/pvpanic/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Pvpanic Makefile
+#
+# Copyright (C) 2021 Oracle.
+#
+obj-$(CONFIG_PVPANIC_MMIO)	+= pvpanic.o pvpanic-mmio.o
+obj-$(CONFIG_PVPANIC_PCI)	+= pvpanic.o pvpanic-pci.o
diff --git a/drivers/misc/pvpanic/pvpanic-mmio.c b/drivers/misc/pvpanic/pvpanic-mmio.c
new file mode 100644
index 0000000000..eb97167c03
--- /dev/null
+++ b/drivers/misc/pvpanic/pvpanic-mmio.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  Pvpanic MMIO Device Support
+ *
+ *  Copyright (C) 2013 Fujitsu.
+ *  Copyright (C) 2018 ZTE.
+ *  Copyright (C) 2021 Oracle.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include <uapi/misc/pvpanic.h>
+
+#include "pvpanic.h"
+
+MODULE_AUTHOR("Hu Tao <hutao@cn.fujitsu.com>");
+MODULE_DESCRIPTION("pvpanic-mmio device driver");
+MODULE_LICENSE("GPL");
+
+static ssize_t capability_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pvpanic_instance *pi = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%x\n", pi->capability);
+}
+static DEVICE_ATTR_RO(capability);
+
+static ssize_t events_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pvpanic_instance *pi = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%x\n", pi->events);
+}
+
+static ssize_t events_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct pvpanic_instance *pi = dev_get_drvdata(dev);
+	unsigned int tmp;
+	int err;
+
+	err = kstrtouint(buf, 16, &tmp);
+	if (err)
+		return err;
+
+	if ((tmp & pi->capability) != tmp)
+		return -EINVAL;
+
+	pi->events = tmp;
+
+	return count;
+}
+static DEVICE_ATTR_RW(events);
+
+static struct attribute *pvpanic_mmio_dev_attrs[] = {
+	&dev_attr_capability.attr,
+	&dev_attr_events.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(pvpanic_mmio_dev);
+
+static int pvpanic_mmio_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct pvpanic_instance *pi;
+	struct resource *res;
+	void __iomem *base;
+
+	res = platform_get_mem_or_io(pdev, 0);
+	if (!res)
+		return -EINVAL;
+
+	switch (resource_type(res)) {
+	case IORESOURCE_IO:
+		base = devm_ioport_map(dev, res->start, resource_size(res));
+		if (!base)
+			return -ENOMEM;
+		break;
+	case IORESOURCE_MEM:
+		base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(base))
+			return PTR_ERR(base);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	pi = devm_kmalloc(dev, sizeof(*pi), GFP_KERNEL);
+	if (!pi)
+		return -ENOMEM;
+
+	pi->base = base;
+	pi->capability = PVPANIC_PANICKED | PVPANIC_CRASH_LOADED;
+
+	/* initialize capability by RDPT */
+	pi->capability &= ioread8(base);
+	pi->events = pi->capability;
+
+	return devm_pvpanic_probe(dev, pi);
+}
+
+static const struct of_device_id pvpanic_mmio_match[] = {
+	{ .compatible = "qemu,pvpanic-mmio", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, pvpanic_mmio_match);
+
+static const struct acpi_device_id pvpanic_device_ids[] = {
+	{ "QEMU0001", 0 },
+	{ "", 0 }
+};
+MODULE_DEVICE_TABLE(acpi, pvpanic_device_ids);
+
+static struct platform_driver pvpanic_mmio_driver = {
+	.driver = {
+		.name = "pvpanic-mmio",
+		.of_match_table = pvpanic_mmio_match,
+		.acpi_match_table = pvpanic_device_ids,
+		.dev_groups = pvpanic_mmio_dev_groups,
+	},
+	.probe = pvpanic_mmio_probe,
+};
+module_platform_driver(pvpanic_mmio_driver);
diff --git a/drivers/misc/pvpanic/pvpanic-pci.c b/drivers/misc/pvpanic/pvpanic-pci.c
new file mode 100644
index 0000000000..07eddb5ea3
--- /dev/null
+++ b/drivers/misc/pvpanic/pvpanic-pci.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  Pvpanic PCI Device Support
+ *
+ *  Copyright (C) 2021 Oracle.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include <uapi/misc/pvpanic.h>
+
+#include "pvpanic.h"
+
+#define PCI_VENDOR_ID_REDHAT             0x1b36
+#define PCI_DEVICE_ID_REDHAT_PVPANIC     0x0011
+
+MODULE_AUTHOR("Mihai Carabas <mihai.carabas@oracle.com>");
+MODULE_DESCRIPTION("pvpanic device driver");
+MODULE_LICENSE("GPL");
+
+static ssize_t capability_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pvpanic_instance *pi = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%x\n", pi->capability);
+}
+static DEVICE_ATTR_RO(capability);
+
+static ssize_t events_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct pvpanic_instance *pi = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%x\n", pi->events);
+}
+
+static ssize_t events_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct pvpanic_instance *pi = dev_get_drvdata(dev);
+	unsigned int tmp;
+	int err;
+
+	err = kstrtouint(buf, 16, &tmp);
+	if (err)
+		return err;
+
+	if ((tmp & pi->capability) != tmp)
+		return -EINVAL;
+
+	pi->events = tmp;
+
+	return count;
+}
+static DEVICE_ATTR_RW(events);
+
+static struct attribute *pvpanic_pci_dev_attrs[] = {
+	&dev_attr_capability.attr,
+	&dev_attr_events.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(pvpanic_pci_dev);
+
+static int pvpanic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct pvpanic_instance *pi;
+	void __iomem *base;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret < 0)
+		return ret;
+
+	base = pcim_iomap(pdev, 0, 0);
+	if (!base)
+		return -ENOMEM;
+
+	pi = devm_kmalloc(&pdev->dev, sizeof(*pi), GFP_KERNEL);
+	if (!pi)
+		return -ENOMEM;
+
+	pi->base = base;
+	pi->capability = PVPANIC_PANICKED | PVPANIC_CRASH_LOADED;
+
+	/* initlize capability by RDPT */
+	pi->capability &= ioread8(base);
+	pi->events = pi->capability;
+
+	return devm_pvpanic_probe(&pdev->dev, pi);
+}
+
+static const struct pci_device_id pvpanic_pci_id_tbl[]  = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_REDHAT, PCI_DEVICE_ID_REDHAT_PVPANIC)},
+	{}
+};
+MODULE_DEVICE_TABLE(pci, pvpanic_pci_id_tbl);
+
+static struct pci_driver pvpanic_pci_driver = {
+	.name =         "pvpanic-pci",
+	.id_table =     pvpanic_pci_id_tbl,
+	.probe =        pvpanic_pci_probe,
+	.driver = {
+		.dev_groups = pvpanic_pci_dev_groups,
+	},
+};
+module_pci_driver(pvpanic_pci_driver);
diff --git a/drivers/misc/pvpanic/pvpanic.c b/drivers/misc/pvpanic/pvpanic.c
new file mode 100644
index 0000000000..049a120063
--- /dev/null
+++ b/drivers/misc/pvpanic/pvpanic.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  Pvpanic Device Support
+ *
+ *  Copyright (C) 2013 Fujitsu.
+ *  Copyright (C) 2018 ZTE.
+ *  Copyright (C) 2021 Oracle.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/panic_notifier.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+
+#include <uapi/misc/pvpanic.h>
+
+#include "pvpanic.h"
+
+MODULE_AUTHOR("Mihai Carabas <mihai.carabas@oracle.com>");
+MODULE_DESCRIPTION("pvpanic device driver");
+MODULE_LICENSE("GPL");
+
+static struct list_head pvpanic_list;
+static spinlock_t pvpanic_lock;
+
+static void
+pvpanic_send_event(unsigned int event)
+{
+	struct pvpanic_instance *pi_cur;
+
+	if (!spin_trylock(&pvpanic_lock))
+		return;
+
+	list_for_each_entry(pi_cur, &pvpanic_list, list) {
+		if (event & pi_cur->capability & pi_cur->events)
+			iowrite8(event, pi_cur->base);
+	}
+	spin_unlock(&pvpanic_lock);
+}
+
+static int
+pvpanic_panic_notify(struct notifier_block *nb, unsigned long code, void *unused)
+{
+	unsigned int event = PVPANIC_PANICKED;
+
+	if (kexec_crash_loaded())
+		event = PVPANIC_CRASH_LOADED;
+
+	pvpanic_send_event(event);
+
+	return NOTIFY_DONE;
+}
+
+/*
+ * Call our notifier very early on panic, deferring the
+ * action taken to the hypervisor.
+ */
+static struct notifier_block pvpanic_panic_nb = {
+	.notifier_call = pvpanic_panic_notify,
+	.priority = INT_MAX,
+};
+
+static void pvpanic_remove(void *param)
+{
+	struct pvpanic_instance *pi_cur, *pi_next;
+	struct pvpanic_instance *pi = param;
+
+	spin_lock(&pvpanic_lock);
+	list_for_each_entry_safe(pi_cur, pi_next, &pvpanic_list, list) {
+		if (pi_cur == pi) {
+			list_del(&pi_cur->list);
+			break;
+		}
+	}
+	spin_unlock(&pvpanic_lock);
+}
+
+int devm_pvpanic_probe(struct device *dev, struct pvpanic_instance *pi)
+{
+	if (!pi || !pi->base)
+		return -EINVAL;
+
+	spin_lock(&pvpanic_lock);
+	list_add(&pi->list, &pvpanic_list);
+	spin_unlock(&pvpanic_lock);
+
+	dev_set_drvdata(dev, pi);
+
+	return devm_add_action_or_reset(dev, pvpanic_remove, pi);
+}
+EXPORT_SYMBOL_GPL(devm_pvpanic_probe);
+
+static int pvpanic_init(void)
+{
+	INIT_LIST_HEAD(&pvpanic_list);
+	spin_lock_init(&pvpanic_lock);
+
+	atomic_notifier_chain_register(&panic_notifier_list, &pvpanic_panic_nb);
+
+	return 0;
+}
+module_init(pvpanic_init);
+
+static void pvpanic_exit(void)
+{
+	atomic_notifier_chain_unregister(&panic_notifier_list, &pvpanic_panic_nb);
+
+}
+module_exit(pvpanic_exit);
diff --git a/drivers/misc/pvpanic/pvpanic.h b/drivers/misc/pvpanic/pvpanic.h
new file mode 100644
index 0000000000..4935459517
--- /dev/null
+++ b/drivers/misc/pvpanic/pvpanic.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  Pvpanic Device Support
+ *
+ *  Copyright (C) 2021 Oracle.
+ */
+
+#ifndef PVPANIC_H_
+#define PVPANIC_H_
+
+struct pvpanic_instance {
+	void __iomem *base;
+	unsigned int capability;
+	unsigned int events;
+	struct list_head list;
+};
+
+int devm_pvpanic_probe(struct device *dev, struct pvpanic_instance *pi);
+
+#endif /* PVPANIC_H_ */
diff --git a/drivers/misc/qcom-coincell.c b/drivers/misc/qcom-coincell.c
new file mode 100644
index 0000000000..3c57f74291
--- /dev/null
+++ b/drivers/misc/qcom-coincell.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015, Sony Mobile Communications Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/platform_device.h>
+
+struct qcom_coincell {
+	struct device	*dev;
+	struct regmap	*regmap;
+	u32		base_addr;
+};
+
+#define QCOM_COINCELL_REG_RSET		0x44
+#define QCOM_COINCELL_REG_VSET		0x45
+#define QCOM_COINCELL_REG_ENABLE	0x46
+
+#define QCOM_COINCELL_ENABLE		BIT(7)
+
+static const int qcom_rset_map[] = { 2100, 1700, 1200, 800 };
+static const int qcom_vset_map[] = { 2500, 3200, 3100, 3000 };
+/* NOTE: for pm8921 and others, voltage of 2500 is 16 (10000b), not 0 */
+
+/* if enable==0, rset and vset are ignored */
+static int qcom_coincell_chgr_config(struct qcom_coincell *chgr, int rset,
+				     int vset, bool enable)
+{
+	int i, j, rc;
+
+	/* if disabling, just do that and skip other operations */
+	if (!enable)
+		return regmap_write(chgr->regmap,
+			  chgr->base_addr + QCOM_COINCELL_REG_ENABLE, 0);
+
+	/* find index for current-limiting resistor */
+	for (i = 0; i < ARRAY_SIZE(qcom_rset_map); i++)
+		if (rset == qcom_rset_map[i])
+			break;
+
+	if (i >= ARRAY_SIZE(qcom_rset_map)) {
+		dev_err(chgr->dev, "invalid rset-ohms value %d\n", rset);
+		return -EINVAL;
+	}
+
+	/* find index for charge voltage */
+	for (j = 0; j < ARRAY_SIZE(qcom_vset_map); j++)
+		if (vset == qcom_vset_map[j])
+			break;
+
+	if (j >= ARRAY_SIZE(qcom_vset_map)) {
+		dev_err(chgr->dev, "invalid vset-millivolts value %d\n", vset);
+		return -EINVAL;
+	}
+
+	rc = regmap_write(chgr->regmap,
+			  chgr->base_addr + QCOM_COINCELL_REG_RSET, i);
+	if (rc) {
+		/*
+		 * This is mainly to flag a bad base_addr (reg) from dts.
+		 * Other failures writing to the registers should be
+		 * extremely rare, or indicative of problems that
+		 * should be reported elsewhere (eg. spmi failure).
+		 */
+		dev_err(chgr->dev, "could not write to RSET register\n");
+		return rc;
+	}
+
+	rc = regmap_write(chgr->regmap,
+		chgr->base_addr + QCOM_COINCELL_REG_VSET, j);
+	if (rc)
+		return rc;
+
+	/* set 'enable' register */
+	return regmap_write(chgr->regmap,
+			    chgr->base_addr + QCOM_COINCELL_REG_ENABLE,
+			    QCOM_COINCELL_ENABLE);
+}
+
+static int qcom_coincell_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct qcom_coincell chgr;
+	u32 rset = 0;
+	u32 vset = 0;
+	bool enable;
+	int rc;
+
+	chgr.dev = &pdev->dev;
+
+	chgr.regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!chgr.regmap) {
+		dev_err(chgr.dev, "Unable to get regmap\n");
+		return -EINVAL;
+	}
+
+	rc = of_property_read_u32(node, "reg", &chgr.base_addr);
+	if (rc)
+		return rc;
+
+	enable = !of_property_read_bool(node, "qcom,charger-disable");
+
+	if (enable) {
+		rc = of_property_read_u32(node, "qcom,rset-ohms", &rset);
+		if (rc) {
+			dev_err(chgr.dev,
+				"can't find 'qcom,rset-ohms' in DT block");
+			return rc;
+		}
+
+		rc = of_property_read_u32(node, "qcom,vset-millivolts", &vset);
+		if (rc) {
+			dev_err(chgr.dev,
+			    "can't find 'qcom,vset-millivolts' in DT block");
+			return rc;
+		}
+	}
+
+	return qcom_coincell_chgr_config(&chgr, rset, vset, enable);
+}
+
+static const struct of_device_id qcom_coincell_match_table[] = {
+	{ .compatible = "qcom,pm8941-coincell", },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, qcom_coincell_match_table);
+
+static struct platform_driver qcom_coincell_driver = {
+	.driver	= {
+		.name		= "qcom-spmi-coincell",
+		.of_match_table	= qcom_coincell_match_table,
+	},
+	.probe		= qcom_coincell_probe,
+};
+
+module_platform_driver(qcom_coincell_driver);
+
+MODULE_DESCRIPTION("Qualcomm PMIC coincell charger driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
new file mode 100644
index 0000000000..8132116ec0
--- /dev/null
+++ b/drivers/misc/sgi-gru/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-$(CONFIG_SGI_GRU_DEBUG)	:= -DDEBUG
+
+obj-$(CONFIG_SGI_GRU) := gru.o
+gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o
+
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
new file mode 100644
index 0000000000..3ad76cd18b
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru.h
@@ -0,0 +1,78 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRU_H__
+#define __GRU_H__
+
+/*
+ * GRU architectural definitions
+ */
+#define GRU_CACHE_LINE_BYTES		64
+#define GRU_HANDLE_STRIDE		256
+#define GRU_CB_BASE			0
+#define GRU_DS_BASE			0x20000
+
+/*
+ * Size used to map GRU GSeg
+ */
+#if defined(CONFIG_IA64)
+#define GRU_GSEG_PAGESIZE	(256 * 1024UL)
+#elif defined(CONFIG_X86_64)
+#define GRU_GSEG_PAGESIZE	(256 * 1024UL)		/* ZZZ 2MB ??? */
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Structure for obtaining GRU resource information
+ */
+struct gru_chiplet_info {
+	int	node;
+	int	chiplet;
+	int	blade;
+	int	total_dsr_bytes;
+	int	total_cbr;
+	int	total_user_dsr_bytes;
+	int	total_user_cbr;
+	int	free_user_dsr_bytes;
+	int	free_user_cbr;
+};
+
+/*
+ * Statictics kept for each context.
+ */
+struct gru_gseg_statistics {
+	unsigned long	fmm_tlbmiss;
+	unsigned long	upm_tlbmiss;
+	unsigned long	tlbdropin;
+	unsigned long	context_stolen;
+	unsigned long	reserved[10];
+};
+
+/* Flags for GRU options on the gru_create_context() call */
+/* Select one of the follow 4 options to specify how TLB misses are handled */
+#define GRU_OPT_MISS_DEFAULT	0x0000	/* Use default mode */
+#define GRU_OPT_MISS_USER_POLL	0x0001	/* User will poll CB for faults */
+#define GRU_OPT_MISS_FMM_INTR	0x0002	/* Send interrupt to cpu to
+					   handle fault */
+#define GRU_OPT_MISS_FMM_POLL	0x0003	/* Use system polling thread */
+#define GRU_OPT_MISS_MASK	0x0003	/* Mask for TLB MISS option */
+
+
+
+#endif		/* __GRU_H__ */
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
new file mode 100644
index 0000000000..04d5170ac1
--- /dev/null
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -0,0 +1,736 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRU_INSTRUCTIONS_H__
+#define __GRU_INSTRUCTIONS_H__
+
+extern int gru_check_status_proc(void *cb);
+extern int gru_wait_proc(void *cb);
+extern void gru_wait_abort_proc(void *cb);
+
+
+
+/*
+ * Architecture dependent functions
+ */
+
+#if defined(CONFIG_IA64)
+#include <linux/compiler.h>
+#include <asm/intrinsics.h>
+#define __flush_cache(p)		ia64_fc((unsigned long)p)
+/* Use volatile on IA64 to ensure ordering via st4.rel */
+#define gru_ordered_store_ulong(p, v)					\
+		do {							\
+			barrier();					\
+			*((volatile unsigned long *)(p)) = v; /* force st.rel */	\
+		} while (0)
+#elif defined(CONFIG_X86_64)
+#include <asm/cacheflush.h>
+#define __flush_cache(p)		clflush(p)
+#define gru_ordered_store_ulong(p, v)					\
+		do {							\
+			barrier();					\
+			*(unsigned long *)p = v;			\
+		} while (0)
+#else
+#error "Unsupported architecture"
+#endif
+
+/*
+ * Control block status and exception codes
+ */
+#define CBS_IDLE			0
+#define CBS_EXCEPTION			1
+#define CBS_ACTIVE			2
+#define CBS_CALL_OS			3
+
+/* CB substatus bitmasks */
+#define CBSS_MSG_QUEUE_MASK		7
+#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK	8
+
+/* CB substatus message queue values (low 3 bits of substatus) */
+#define CBSS_NO_ERROR			0
+#define CBSS_LB_OVERFLOWED		1
+#define CBSS_QLIMIT_REACHED		2
+#define CBSS_PAGE_OVERFLOW		3
+#define CBSS_AMO_NACKED			4
+#define CBSS_PUT_NACKED			5
+
+/*
+ * Structure used to fetch exception detail for CBs that terminate with
+ * CBS_EXCEPTION
+ */
+struct control_block_extended_exc_detail {
+	unsigned long	cb;
+	int		opc;
+	int		ecause;
+	int		exopc;
+	long		exceptdet0;
+	int		exceptdet1;
+	int		cbrstate;
+	int		cbrexecstatus;
+};
+
+/*
+ * Instruction formats
+ */
+
+/*
+ * Generic instruction format.
+ * This definition has precise bit field definitions.
+ */
+struct gru_instruction_bits {
+    /* DW 0  - low */
+    unsigned int		icmd:      1;
+    unsigned char		ima:	   3;	/* CB_DelRep, unmapped mode */
+    unsigned char		reserved0: 4;
+    unsigned int		xtype:     3;
+    unsigned int		iaa0:      2;
+    unsigned int		iaa1:      2;
+    unsigned char		reserved1: 1;
+    unsigned char		opc:       8;	/* opcode */
+    unsigned char		exopc:     8;	/* extended opcode */
+    /* DW 0  - high */
+    unsigned int		idef2:    22;	/* TRi0 */
+    unsigned char		reserved2: 2;
+    unsigned char		istatus:   2;
+    unsigned char		isubstatus:4;
+    unsigned char		reserved3: 1;
+    unsigned char		tlb_fault_color: 1;
+    /* DW 1 */
+    unsigned long		idef4;		/* 42 bits: TRi1, BufSize */
+    /* DW 2-6 */
+    unsigned long		idef1;		/* BAddr0 */
+    unsigned long		idef5;		/* Nelem */
+    unsigned long		idef6;		/* Stride, Operand1 */
+    unsigned long		idef3;		/* BAddr1, Value, Operand2 */
+    unsigned long		reserved4;
+    /* DW 7 */
+    unsigned long		avalue;		 /* AValue */
+};
+
+/*
+ * Generic instruction with friendlier names. This format is used
+ * for inline instructions.
+ */
+struct gru_instruction {
+    /* DW 0 */
+    union {
+    	unsigned long		op64;    /* icmd,xtype,iaa0,ima,opc,tri0 */
+	struct {
+		unsigned int	op32;
+		unsigned int	tri0;
+	};
+    };
+    unsigned long		tri1_bufsize;		/* DW 1 */
+    unsigned long		baddr0;			/* DW 2 */
+    unsigned long		nelem;			/* DW 3 */
+    unsigned long		op1_stride;		/* DW 4 */
+    unsigned long		op2_value_baddr1;	/* DW 5 */
+    unsigned long		reserved0;		/* DW 6 */
+    unsigned long		avalue;			/* DW 7 */
+};
+
+/* Some shifts and masks for the low 64 bits of a GRU command */
+#define GRU_CB_ICMD_SHFT	0
+#define GRU_CB_ICMD_MASK	0x1
+#define GRU_CB_XTYPE_SHFT	8
+#define GRU_CB_XTYPE_MASK	0x7
+#define GRU_CB_IAA0_SHFT	11
+#define GRU_CB_IAA0_MASK	0x3
+#define GRU_CB_IAA1_SHFT	13
+#define GRU_CB_IAA1_MASK	0x3
+#define GRU_CB_IMA_SHFT		1
+#define GRU_CB_IMA_MASK		0x3
+#define GRU_CB_OPC_SHFT		16
+#define GRU_CB_OPC_MASK		0xff
+#define GRU_CB_EXOPC_SHFT	24
+#define GRU_CB_EXOPC_MASK	0xff
+#define GRU_IDEF2_SHFT		32
+#define GRU_IDEF2_MASK		0x3ffff
+#define GRU_ISTATUS_SHFT	56
+#define GRU_ISTATUS_MASK	0x3
+
+/* GRU instruction opcodes (opc field) */
+#define OP_NOP		0x00
+#define OP_BCOPY	0x01
+#define OP_VLOAD	0x02
+#define OP_IVLOAD	0x03
+#define OP_VSTORE	0x04
+#define OP_IVSTORE	0x05
+#define OP_VSET		0x06
+#define OP_IVSET	0x07
+#define OP_MESQ		0x08
+#define OP_GAMXR	0x09
+#define OP_GAMIR	0x0a
+#define OP_GAMIRR	0x0b
+#define OP_GAMER	0x0c
+#define OP_GAMERR	0x0d
+#define OP_BSTORE	0x0e
+#define OP_VFLUSH	0x0f
+
+
+/* Extended opcodes values (exopc field) */
+
+/* GAMIR - AMOs with implicit operands */
+#define EOP_IR_FETCH	0x01 /* Plain fetch of memory */
+#define EOP_IR_CLR	0x02 /* Fetch and clear */
+#define EOP_IR_INC	0x05 /* Fetch and increment */
+#define EOP_IR_DEC	0x07 /* Fetch and decrement */
+#define EOP_IR_QCHK1	0x0d /* Queue check, 64 byte msg */
+#define EOP_IR_QCHK2	0x0e /* Queue check, 128 byte msg */
+
+/* GAMIRR - Registered AMOs with implicit operands */
+#define EOP_IRR_FETCH	0x01 /* Registered fetch of memory */
+#define EOP_IRR_CLR	0x02 /* Registered fetch and clear */
+#define EOP_IRR_INC	0x05 /* Registered fetch and increment */
+#define EOP_IRR_DEC	0x07 /* Registered fetch and decrement */
+#define EOP_IRR_DECZ	0x0f /* Registered fetch and decrement, update on zero*/
+
+/* GAMER - AMOs with explicit operands */
+#define EOP_ER_SWAP	0x00 /* Exchange argument and memory */
+#define EOP_ER_OR	0x01 /* Logical OR with memory */
+#define EOP_ER_AND	0x02 /* Logical AND with memory */
+#define EOP_ER_XOR	0x03 /* Logical XOR with memory */
+#define EOP_ER_ADD	0x04 /* Add value to memory */
+#define EOP_ER_CSWAP	0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ER_CADD	0x0c /* Queue check, operand1*64 byte msg */
+
+/* GAMERR - Registered AMOs with explicit operands */
+#define EOP_ERR_SWAP	0x00 /* Exchange argument and memory */
+#define EOP_ERR_OR	0x01 /* Logical OR with memory */
+#define EOP_ERR_AND	0x02 /* Logical AND with memory */
+#define EOP_ERR_XOR	0x03 /* Logical XOR with memory */
+#define EOP_ERR_ADD	0x04 /* Add value to memory */
+#define EOP_ERR_CSWAP	0x08 /* Compare with operand2, write operand1 if match*/
+#define EOP_ERR_EPOLL	0x09 /* Poll for equality */
+#define EOP_ERR_NPOLL	0x0a /* Poll for inequality */
+
+/* GAMXR - SGI Arithmetic unit */
+#define EOP_XR_CSWAP	0x0b /* Masked compare exchange */
+
+
+/* Transfer types (xtype field) */
+#define XTYPE_B		0x0	/* byte */
+#define XTYPE_S		0x1	/* short (2-byte) */
+#define XTYPE_W		0x2	/* word (4-byte) */
+#define XTYPE_DW	0x3	/* doubleword (8-byte) */
+#define XTYPE_CL	0x6	/* cacheline (64-byte) */
+
+
+/* Instruction access attributes (iaa0, iaa1 fields) */
+#define IAA_RAM		0x0	/* normal cached RAM access */
+#define IAA_NCRAM	0x2	/* noncoherent RAM access */
+#define IAA_MMIO	0x1	/* noncoherent memory-mapped I/O space */
+#define IAA_REGISTER	0x3	/* memory-mapped registers, etc. */
+
+
+/* Instruction mode attributes (ima field) */
+#define IMA_MAPPED	0x0	/* Virtual mode  */
+#define IMA_CB_DELAY	0x1	/* hold read responses until status changes */
+#define IMA_UNMAPPED	0x2	/* bypass the TLBs (OS only) */
+#define IMA_INTERRUPT	0x4	/* Interrupt when instruction completes */
+
+/* CBE ecause bits */
+#define CBE_CAUSE_RI				(1 << 0)
+#define CBE_CAUSE_INVALID_INSTRUCTION		(1 << 1)
+#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN	(1 << 2)
+#define CBE_CAUSE_PE_CHECK_DATA_ERROR		(1 << 3)
+#define CBE_CAUSE_IAA_GAA_MISMATCH		(1 << 4)
+#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION	(1 << 5)
+#define CBE_CAUSE_OS_FATAL_TLB_FAULT		(1 << 6)
+#define CBE_CAUSE_EXECUTION_HW_ERROR		(1 << 7)
+#define CBE_CAUSE_TLBHW_ERROR			(1 << 8)
+#define CBE_CAUSE_RA_REQUEST_TIMEOUT		(1 << 9)
+#define CBE_CAUSE_HA_REQUEST_TIMEOUT		(1 << 10)
+#define CBE_CAUSE_RA_RESPONSE_FATAL		(1 << 11)
+#define CBE_CAUSE_RA_RESPONSE_NON_FATAL		(1 << 12)
+#define CBE_CAUSE_HA_RESPONSE_FATAL		(1 << 13)
+#define CBE_CAUSE_HA_RESPONSE_NON_FATAL		(1 << 14)
+#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR	(1 << 15)
+#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR	(1 << 16)
+#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR	(1 << 17)
+#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR	(1 << 18)
+#define CBE_CAUSE_FORCED_ERROR			(1 << 19)
+
+/* CBE cbrexecstatus bits */
+#define CBR_EXS_ABORT_OCC_BIT			0
+#define CBR_EXS_INT_OCC_BIT			1
+#define CBR_EXS_PENDING_BIT			2
+#define CBR_EXS_QUEUED_BIT			3
+#define CBR_EXS_TLB_INVAL_BIT			4
+#define CBR_EXS_EXCEPTION_BIT			5
+#define CBR_EXS_CB_INT_PENDING_BIT		6
+
+#define CBR_EXS_ABORT_OCC			(1 << CBR_EXS_ABORT_OCC_BIT)
+#define CBR_EXS_INT_OCC				(1 << CBR_EXS_INT_OCC_BIT)
+#define CBR_EXS_PENDING				(1 << CBR_EXS_PENDING_BIT)
+#define CBR_EXS_QUEUED				(1 << CBR_EXS_QUEUED_BIT)
+#define CBR_EXS_TLB_INVAL			(1 << CBR_EXS_TLB_INVAL_BIT)
+#define CBR_EXS_EXCEPTION			(1 << CBR_EXS_EXCEPTION_BIT)
+#define CBR_EXS_CB_INT_PENDING			(1 << CBR_EXS_CB_INT_PENDING_BIT)
+
+/*
+ * Exceptions are retried for the following cases. If any OTHER bits are set
+ * in ecause, the exception is not retryable.
+ */
+#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR |		\
+			      CBE_CAUSE_TLBHW_ERROR |			\
+			      CBE_CAUSE_RA_REQUEST_TIMEOUT |		\
+			      CBE_CAUSE_RA_RESPONSE_NON_FATAL |		\
+			      CBE_CAUSE_HA_RESPONSE_NON_FATAL |		\
+			      CBE_CAUSE_RA_RESPONSE_DATA_ERROR |	\
+			      CBE_CAUSE_HA_RESPONSE_DATA_ERROR		\
+			      )
+
+/* Message queue head structure */
+union gru_mesqhead {
+	unsigned long	val;
+	struct {
+		unsigned int	head;
+		unsigned int	limit;
+	};
+};
+
+
+/* Generate the low word of a GRU instruction */
+static inline unsigned long
+__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
+       unsigned char iaa0, unsigned char iaa1,
+       unsigned long idef2, unsigned char ima)
+{
+    return (1 << GRU_CB_ICMD_SHFT) |
+	   ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) |
+	   (idef2<< GRU_IDEF2_SHFT) |
+	   (iaa0 << GRU_CB_IAA0_SHFT) |
+	   (iaa1 << GRU_CB_IAA1_SHFT) |
+	   (ima << GRU_CB_IMA_SHFT) |
+	   (xtype << GRU_CB_XTYPE_SHFT) |
+	   (opcode << GRU_CB_OPC_SHFT) |
+	   (exopc << GRU_CB_EXOPC_SHFT);
+}
+
+/*
+ * Architecture specific intrinsics
+ */
+static inline void gru_flush_cache(void *p)
+{
+	__flush_cache(p);
+}
+
+/*
+ * Store the lower 64 bits of the command including the "start" bit. Then
+ * start the instruction executing.
+ */
+static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64)
+{
+	gru_ordered_store_ulong(ins, op64);
+	mb();
+	gru_flush_cache(ins);
+}
+
+
+/* Convert "hints" to IMA */
+#define CB_IMA(h)		((h) | IMA_UNMAPPED)
+
+/* Convert data segment cache line index into TRI0 / TRI1 value */
+#define GRU_DINDEX(i)		((i) * GRU_CACHE_LINE_BYTES)
+
+/* Inline functions for GRU instructions.
+ *     Note:
+ *     	- nelem and stride are in elements
+ *     	- tri0/tri1 is in bytes for the beginning of the data segment.
+ */
+static inline void gru_vload_phys(void *cb, unsigned long gpa,
+		unsigned int tri0, int iaa, unsigned long hints)
+{
+	struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+	ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
+	ins->nelem = 1;
+	ins->op1_stride = 1;
+	gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0,
+					(unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vstore_phys(void *cb, unsigned long gpa,
+		unsigned int tri0, int iaa, unsigned long hints)
+{
+	struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+	ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
+	ins->nelem = 1;
+	ins->op1_stride = 1;
+	gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0,
+					(unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vload(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
+					(unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vstore(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_ivload(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned int tri1, unsigned char xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_ivstore(void *cb, unsigned long mem_addr,
+		unsigned int tri0, unsigned int tri1,
+		unsigned char xtype, unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vset(void *cb, unsigned long mem_addr,
+		unsigned long value, unsigned char xtype, unsigned long nelem,
+		unsigned long stride, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op2_value_baddr1 = value;
+	ins->nelem = nelem;
+	ins->op1_stride = stride;
+	gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0,
+					 0, CB_IMA(hints)));
+}
+
+static inline void gru_ivset(void *cb, unsigned long mem_addr,
+		unsigned int tri1, unsigned long value, unsigned char xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op2_value_baddr1 = value;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = tri1;
+	gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_vflush(void *cb, unsigned long mem_addr,
+		unsigned long nelem, unsigned char xtype, unsigned long stride,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)mem_addr;
+	ins->op1_stride = stride;
+	ins->nelem = nelem;
+	gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_nop(void *cb, int hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints)));
+}
+
+
+static inline void gru_bcopy(void *cb, const unsigned long src,
+		unsigned long dest,
+		unsigned int tri0, unsigned int xtype, unsigned long nelem,
+		unsigned int bufsize, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op2_value_baddr1 = (long)dest;
+	ins->nelem = nelem;
+	ins->tri1_bufsize = bufsize;
+	gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM,
+					IAA_RAM, tri0, CB_IMA(hints)));
+}
+
+static inline void gru_bstore(void *cb, const unsigned long src,
+		unsigned long dest, unsigned int tri0, unsigned int xtype,
+		unsigned long nelem, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op2_value_baddr1 = (long)dest;
+	ins->nelem = nelem;
+	gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
+					tri0, CB_IMA(hints)));
+}
+
+static inline void gru_gamir(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamer(void *cb, int exopc, unsigned long src,
+		unsigned int xtype,
+		unsigned long operand1, unsigned long operand2,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op1_stride = operand1;
+	ins->op2_value_baddr1 = operand2;
+	gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
+		unsigned int xtype, unsigned long operand1,
+		unsigned long operand2, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->op1_stride = operand1;
+	ins->op2_value_baddr1 = operand2;
+	gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
+					0, CB_IMA(hints)));
+}
+
+static inline void gru_gamxr(void *cb, unsigned long src,
+		unsigned int tri0, unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)src;
+	ins->nelem = 4;
+	gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
+				 IAA_RAM, 0, 0, CB_IMA(hints)));
+}
+
+static inline void gru_mesq(void *cb, unsigned long queue,
+		unsigned long tri0, unsigned long nelem,
+		unsigned long hints)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	ins->baddr0 = (long)queue;
+	ins->nelem = nelem;
+	gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
+					tri0, CB_IMA(hints)));
+}
+
+static inline unsigned long gru_get_amo_value(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue;
+}
+
+static inline int gru_get_amo_value_head(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue & 0xffffffff;
+}
+
+static inline int gru_get_amo_value_limit(void *cb)
+{
+	struct gru_instruction *ins = (void *)cb;
+
+	return ins->avalue >> 32;
+}
+
+static inline union gru_mesqhead  gru_mesq_head(int head, int limit)
+{
+	union gru_mesqhead mqh;
+
+	mqh.head = head;
+	mqh.limit = limit;
+	return mqh;
+}
+
+/*
+ * Get struct control_block_extended_exc_detail for CB.
+ */
+extern int gru_get_cb_exception_detail(void *cb,
+		       struct control_block_extended_exc_detail *excdet);
+
+#define GRU_EXC_STR_SIZE		256
+
+
+/*
+ * Control block definition for checking status
+ */
+struct gru_control_block_status {
+	unsigned int	icmd		:1;
+	unsigned int	ima		:3;
+	unsigned int	reserved0	:4;
+	unsigned int	unused1		:24;
+	unsigned int	unused2		:24;
+	unsigned int	istatus		:2;
+	unsigned int	isubstatus	:4;
+	unsigned int	unused3		:2;
+};
+
+/* Get CB status */
+static inline int gru_get_cb_status(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->istatus;
+}
+
+/* Get CB message queue substatus */
+static inline int gru_get_cb_message_queue_substatus(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->isubstatus & CBSS_MSG_QUEUE_MASK;
+}
+
+/* Get CB substatus */
+static inline int gru_get_cb_substatus(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+
+	return cbs->isubstatus;
+}
+
+/*
+ * User interface to check an instruction status. UPM and exceptions
+ * are handled automatically. However, this function does NOT wait
+ * for an active instruction to complete.
+ *
+ */
+static inline int gru_check_status(void *cb)
+{
+	struct gru_control_block_status *cbs = (void *)cb;
+	int ret;
+
+	ret = cbs->istatus;
+	if (ret != CBS_ACTIVE)
+		ret = gru_check_status_proc(cb);
+	return ret;
+}
+
+/*
+ * User interface (via inline function) to wait for an instruction
+ * to complete. Completion status (IDLE or EXCEPTION is returned
+ * to the user. Exception due to hardware errors are automatically
+ * retried before returning an exception.
+ *
+ */
+static inline int gru_wait(void *cb)
+{
+	return gru_wait_proc(cb);
+}
+
+/*
+ * Wait for CB to complete. Aborts program if error. (Note: error does NOT
+ * mean TLB mis - only fatal errors such as memory parity error or user
+ * bugs will cause termination.
+ */
+static inline void gru_wait_abort(void *cb)
+{
+	gru_wait_abort_proc(cb);
+}
+
+/*
+ * Get a pointer to the start of a gseg
+ * 	p	- Any valid pointer within the gseg
+ */
+static inline void *gru_get_gseg_pointer (void *p)
+{
+	return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1));
+}
+
+/*
+ * Get a pointer to a control block
+ * 	gseg	- GSeg address returned from gru_get_thread_gru_segment()
+ * 	index	- index of desired CB
+ */
+static inline void *gru_get_cb_pointer(void *gseg,
+						      int index)
+{
+	return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE;
+}
+
+/*
+ * Get a pointer to a cacheline in the data segment portion of a GSeg
+ * 	gseg	- GSeg address returned from gru_get_thread_gru_segment()
+ * 	index	- index of desired cache line
+ */
+static inline void *gru_get_data_pointer(void *gseg, int index)
+{
+	return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES;
+}
+
+/*
+ * Convert a vaddr into the tri index within the GSEG
+ * 	vaddr		- virtual address of within gseg
+ */
+static inline int gru_get_tri(void *vaddr)
+{
+	return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE;
+}
+#endif		/* __GRU_INSTRUCTIONS_H__ */
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
new file mode 100644
index 0000000000..629edb6486
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -0,0 +1,903 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SN Platform GRU Driver
+ *
+ *              FAULT HANDLER FOR GRU DETECTED TLB MISSES
+ *
+ * This file contains code that handles TLB misses within the GRU.
+ * These misses are reported either via interrupts or user polling of
+ * the user CB.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/security.h>
+#include <linux/sync_core.h>
+#include <linux/prefetch.h>
+#include "gru.h"
+#include "grutables.h"
+#include "grulib.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/* Return codes for vtop functions */
+#define VTOP_SUCCESS               0
+#define VTOP_INVALID               -1
+#define VTOP_RETRY                 -2
+
+
+/*
+ * Test if a physical address is a valid GRU GSEG address
+ */
+static inline int is_gru_paddr(unsigned long paddr)
+{
+	return paddr >= gru_start_paddr && paddr < gru_end_paddr;
+}
+
+/*
+ * Find the vma of a GRU segment. Caller must hold mmap_lock.
+ */
+struct vm_area_struct *gru_find_vma(unsigned long vaddr)
+{
+	struct vm_area_struct *vma;
+
+	vma = vma_lookup(current->mm, vaddr);
+	if (vma && vma->vm_ops == &gru_vm_ops)
+		return vma;
+	return NULL;
+}
+
+/*
+ * Find and lock the gts that contains the specified user vaddr.
+ *
+ * Returns:
+ * 	- *gts with the mmap_lock locked for read and the GTS locked.
+ *	- NULL if vaddr invalid OR is not a valid GSEG vaddr.
+ */
+
+static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct gru_thread_state *gts = NULL;
+
+	mmap_read_lock(mm);
+	vma = gru_find_vma(vaddr);
+	if (vma)
+		gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+	if (gts)
+		mutex_lock(&gts->ts_ctxlock);
+	else
+		mmap_read_unlock(mm);
+	return gts;
+}
+
+static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct gru_thread_state *gts = ERR_PTR(-EINVAL);
+
+	mmap_write_lock(mm);
+	vma = gru_find_vma(vaddr);
+	if (!vma)
+		goto err;
+
+	gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
+	if (IS_ERR(gts))
+		goto err;
+	mutex_lock(&gts->ts_ctxlock);
+	mmap_write_downgrade(mm);
+	return gts;
+
+err:
+	mmap_write_unlock(mm);
+	return gts;
+}
+
+/*
+ * Unlock a GTS that was previously locked with gru_find_lock_gts().
+ */
+static void gru_unlock_gts(struct gru_thread_state *gts)
+{
+	mutex_unlock(&gts->ts_ctxlock);
+	mmap_read_unlock(current->mm);
+}
+
+/*
+ * Set a CB.istatus to active using a user virtual address. This must be done
+ * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY.
+ * If the line is evicted, the status may be lost. The in-cache update
+ * is necessary to prevent the user from seeing a stale cb.istatus that will
+ * change as soon as the TFH restart is complete. Races may cause an
+ * occasional failure to clear the cb.istatus, but that is ok.
+ */
+static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk)
+{
+	if (cbk) {
+		cbk->istatus = CBS_ACTIVE;
+	}
+}
+
+/*
+ * Read & clear a TFM
+ *
+ * The GRU has an array of fault maps. A map is private to a cpu
+ * Only one cpu will be accessing a cpu's fault map.
+ *
+ * This function scans the cpu-private fault map & clears all bits that
+ * are set. The function returns a bitmap that indicates the bits that
+ * were cleared. Note that sense the maps may be updated asynchronously by
+ * the GRU, atomic operations must be used to clear bits.
+ */
+static void get_clear_fault_map(struct gru_state *gru,
+				struct gru_tlb_fault_map *imap,
+				struct gru_tlb_fault_map *dmap)
+{
+	unsigned long i, k;
+	struct gru_tlb_fault_map *tfm;
+
+	tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id());
+	prefetchw(tfm);		/* Helps on hardware, required for emulator */
+	for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) {
+		k = tfm->fault_bits[i];
+		if (k)
+			k = xchg(&tfm->fault_bits[i], 0UL);
+		imap->fault_bits[i] = k;
+		k = tfm->done_bits[i];
+		if (k)
+			k = xchg(&tfm->done_bits[i], 0UL);
+		dmap->fault_bits[i] = k;
+	}
+
+	/*
+	 * Not functionally required but helps performance. (Required
+	 * on emulator)
+	 */
+	gru_flush_cache(tfm);
+}
+
+/*
+ * Atomic (interrupt context) & non-atomic (user context) functions to
+ * convert a vaddr into a physical address. The size of the page
+ * is returned in pageshift.
+ * 	returns:
+ * 		  0 - successful
+ * 		< 0 - error code
+ * 		  1 - (atomic only) try again in non-atomic context
+ */
+static int non_atomic_pte_lookup(struct vm_area_struct *vma,
+				 unsigned long vaddr, int write,
+				 unsigned long *paddr, int *pageshift)
+{
+	struct page *page;
+
+#ifdef CONFIG_HUGETLB_PAGE
+	*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+#else
+	*pageshift = PAGE_SHIFT;
+#endif
+	if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page) <= 0)
+		return -EFAULT;
+	*paddr = page_to_phys(page);
+	put_page(page);
+	return 0;
+}
+
+/*
+ * atomic_pte_lookup
+ *
+ * Convert a user virtual address to a physical address
+ * Only supports Intel large pages (2MB only) on x86_64.
+ *	ZZZ - hugepage support is incomplete
+ *
+ * NOTE: mmap_lock is already held on entry to this function. This
+ * guarantees existence of the page tables.
+ */
+static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
+	int write, unsigned long *paddr, int *pageshift)
+{
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t pte;
+
+	pgdp = pgd_offset(vma->vm_mm, vaddr);
+	if (unlikely(pgd_none(*pgdp)))
+		goto err;
+
+	p4dp = p4d_offset(pgdp, vaddr);
+	if (unlikely(p4d_none(*p4dp)))
+		goto err;
+
+	pudp = pud_offset(p4dp, vaddr);
+	if (unlikely(pud_none(*pudp)))
+		goto err;
+
+	pmdp = pmd_offset(pudp, vaddr);
+	if (unlikely(pmd_none(*pmdp)))
+		goto err;
+#ifdef CONFIG_X86_64
+	if (unlikely(pmd_large(*pmdp)))
+		pte = ptep_get((pte_t *)pmdp);
+	else
+#endif
+		pte = *pte_offset_kernel(pmdp, vaddr);
+
+	if (unlikely(!pte_present(pte) ||
+		     (write && (!pte_write(pte) || !pte_dirty(pte)))))
+		return 1;
+
+	*paddr = pte_pfn(pte) << PAGE_SHIFT;
+#ifdef CONFIG_HUGETLB_PAGE
+	*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+#else
+	*pageshift = PAGE_SHIFT;
+#endif
+	return 0;
+
+err:
+	return 1;
+}
+
+static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
+		    int write, int atomic, unsigned long *gpa, int *pageshift)
+{
+	struct mm_struct *mm = gts->ts_mm;
+	struct vm_area_struct *vma;
+	unsigned long paddr;
+	int ret, ps;
+
+	vma = find_vma(mm, vaddr);
+	if (!vma)
+		goto inval;
+
+	/*
+	 * Atomic lookup is faster & usually works even if called in non-atomic
+	 * context.
+	 */
+	rmb();	/* Must/check ms_range_active before loading PTEs */
+	ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps);
+	if (ret) {
+		if (atomic)
+			goto upm;
+		if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps))
+			goto inval;
+	}
+	if (is_gru_paddr(paddr))
+		goto inval;
+	paddr = paddr & ~((1UL << ps) - 1);
+	*gpa = uv_soc_phys_ram_to_gpa(paddr);
+	*pageshift = ps;
+	return VTOP_SUCCESS;
+
+inval:
+	return VTOP_INVALID;
+upm:
+	return VTOP_RETRY;
+}
+
+
+/*
+ * Flush a CBE from cache. The CBE is clean in the cache. Dirty the
+ * CBE cacheline so that the line will be written back to home agent.
+ * Otherwise the line may be silently dropped. This has no impact
+ * except on performance.
+ */
+static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe)
+{
+	if (unlikely(cbe)) {
+		cbe->cbrexecstatus = 0;         /* make CL dirty */
+		gru_flush_cache(cbe);
+	}
+}
+
+/*
+ * Preload the TLB with entries that may be required. Currently, preloading
+ * is implemented only for BCOPY. Preload  <tlb_preload_count> pages OR to
+ * the end of the bcopy tranfer, whichever is smaller.
+ */
+static void gru_preload_tlb(struct gru_state *gru,
+			struct gru_thread_state *gts, int atomic,
+			unsigned long fault_vaddr, int asid, int write,
+			unsigned char tlb_preload_count,
+			struct gru_tlb_fault_handle *tfh,
+			struct gru_control_block_extended *cbe)
+{
+	unsigned long vaddr = 0, gpa;
+	int ret, pageshift;
+
+	if (cbe->opccpy != OP_BCOPY)
+		return;
+
+	if (fault_vaddr == cbe->cbe_baddr0)
+		vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1;
+	else if (fault_vaddr == cbe->cbe_baddr1)
+		vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1;
+
+	fault_vaddr &= PAGE_MASK;
+	vaddr &= PAGE_MASK;
+	vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE);
+
+	while (vaddr > fault_vaddr) {
+		ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+		if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write,
+					  GRU_PAGESIZE(pageshift)))
+			return;
+		gru_dbg(grudev,
+			"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n",
+			atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh,
+			vaddr, asid, write, pageshift, gpa);
+		vaddr -= PAGE_SIZE;
+		STAT(tlb_preload_page);
+	}
+}
+
+/*
+ * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
+ *	Input:
+ *		cb    Address of user CBR. Null if not running in user context
+ * 	Return:
+ * 		  0 = dropin, exception, or switch to UPM successful
+ * 		  1 = range invalidate active
+ * 		< 0 = error code
+ *
+ */
+static int gru_try_dropin(struct gru_state *gru,
+			  struct gru_thread_state *gts,
+			  struct gru_tlb_fault_handle *tfh,
+			  struct gru_instruction_bits *cbk)
+{
+	struct gru_control_block_extended *cbe = NULL;
+	unsigned char tlb_preload_count = gts->ts_tlb_preload_count;
+	int pageshift = 0, asid, write, ret, atomic = !cbk, indexway;
+	unsigned long gpa = 0, vaddr = 0;
+
+	/*
+	 * NOTE: The GRU contains magic hardware that eliminates races between
+	 * TLB invalidates and TLB dropins. If an invalidate occurs
+	 * in the window between reading the TFH and the subsequent TLB dropin,
+	 * the dropin is ignored. This eliminates the need for additional locks.
+	 */
+
+	/*
+	 * Prefetch the CBE if doing TLB preloading
+	 */
+	if (unlikely(tlb_preload_count)) {
+		cbe = gru_tfh_to_cbe(tfh);
+		prefetchw(cbe);
+	}
+
+	/*
+	 * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
+	 * Might be a hardware race OR a stupid user. Ignore FMM because FMM
+	 * is a transient state.
+	 */
+	if (tfh->status != TFHSTATUS_EXCEPTION) {
+		gru_flush_cache(tfh);
+		sync_core();
+		if (tfh->status != TFHSTATUS_EXCEPTION)
+			goto failnoexception;
+		STAT(tfh_stale_on_fault);
+	}
+	if (tfh->state == TFHSTATE_IDLE)
+		goto failidle;
+	if (tfh->state == TFHSTATE_MISS_FMM && cbk)
+		goto failfmm;
+
+	write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
+	vaddr = tfh->missvaddr;
+	asid = tfh->missasid;
+	indexway = tfh->indexway;
+	if (asid == 0)
+		goto failnoasid;
+
+	rmb();	/* TFH must be cache resident before reading ms_range_active */
+
+	/*
+	 * TFH is cache resident - at least briefly. Fail the dropin
+	 * if a range invalidate is active.
+	 */
+	if (atomic_read(&gts->ts_gms->ms_range_active))
+		goto failactive;
+
+	ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+	if (ret == VTOP_INVALID)
+		goto failinval;
+	if (ret == VTOP_RETRY)
+		goto failupm;
+
+	if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) {
+		gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift);
+		if (atomic || !gru_update_cch(gts)) {
+			gts->ts_force_cch_reload = 1;
+			goto failupm;
+		}
+	}
+
+	if (unlikely(cbe) && pageshift == PAGE_SHIFT) {
+		gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe);
+		gru_flush_cache_cbe(cbe);
+	}
+
+	gru_cb_set_istatus_active(cbk);
+	gts->ustats.tlbdropin++;
+	tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
+			  GRU_PAGESIZE(pageshift));
+	gru_dbg(grudev,
+		"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x,"
+		" rw %d, ps %d, gpa 0x%lx\n",
+		atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid,
+		indexway, write, pageshift, gpa);
+	STAT(tlb_dropin);
+	return 0;
+
+failnoasid:
+	/* No asid (delayed unload). */
+	STAT(tlb_dropin_fail_no_asid);
+	gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	if (!cbk)
+		tfh_user_polling_mode(tfh);
+	else
+		gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	return -EAGAIN;
+
+failupm:
+	/* Atomic failure switch CBR to UPM */
+	tfh_user_polling_mode(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_upm);
+	gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	return 1;
+
+failfmm:
+	/* FMM state on UPM call */
+	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_fmm);
+	gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
+	return 0;
+
+failnoexception:
+	/* TFH status did not show exception pending */
+	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	if (cbk)
+		gru_flush_cache(cbk);
+	STAT(tlb_dropin_fail_no_exception);
+	gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n",
+		tfh, tfh->status, tfh->state);
+	return 0;
+
+failidle:
+	/* TFH state was idle  - no miss pending */
+	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	if (cbk)
+		gru_flush_cache(cbk);
+	STAT(tlb_dropin_fail_idle);
+	gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
+	return 0;
+
+failinval:
+	/* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
+	tfh_exception(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_invalid);
+	gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
+	return -EFAULT;
+
+failactive:
+	/* Range invalidate active. Switch to UPM iff atomic */
+	if (!cbk)
+		tfh_user_polling_mode(tfh);
+	else
+		gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
+	STAT(tlb_dropin_fail_range_active);
+	gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
+		tfh, vaddr);
+	return 1;
+}
+
+/*
+ * Process an external interrupt from the GRU. This interrupt is
+ * caused by a TLB miss.
+ * Note that this is the interrupt handler that is registered with linux
+ * interrupt handlers.
+ */
+static irqreturn_t gru_intr(int chiplet, int blade)
+{
+	struct gru_state *gru;
+	struct gru_tlb_fault_map imap, dmap;
+	struct gru_thread_state *gts;
+	struct gru_tlb_fault_handle *tfh = NULL;
+	struct completion *cmp;
+	int cbrnum, ctxnum;
+
+	STAT(intr);
+
+	gru = &gru_base[blade]->bs_grus[chiplet];
+	if (!gru) {
+		dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n",
+			raw_smp_processor_id(), chiplet);
+		return IRQ_NONE;
+	}
+	get_clear_fault_map(gru, &imap, &dmap);
+	gru_dbg(grudev,
+		"cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n",
+		smp_processor_id(), chiplet, gru->gs_gid,
+		imap.fault_bits[0], imap.fault_bits[1],
+		dmap.fault_bits[0], dmap.fault_bits[1]);
+
+	for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) {
+		STAT(intr_cbr);
+		cmp = gru->gs_blade->bs_async_wq;
+		if (cmp)
+			complete(cmp);
+		gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n",
+			gru->gs_gid, cbrnum, cmp ? cmp->done : -1);
+	}
+
+	for_each_cbr_in_tfm(cbrnum, imap.fault_bits) {
+		STAT(intr_tfh);
+		tfh = get_tfh_by_index(gru, cbrnum);
+		prefetchw(tfh);	/* Helps on hdw, required for emulator */
+
+		/*
+		 * When hardware sets a bit in the faultmap, it implicitly
+		 * locks the GRU context so that it cannot be unloaded.
+		 * The gts cannot change until a TFH start/writestart command
+		 * is issued.
+		 */
+		ctxnum = tfh->ctxnum;
+		gts = gru->gs_gts[ctxnum];
+
+		/* Spurious interrupts can cause this. Ignore. */
+		if (!gts) {
+			STAT(intr_spurious);
+			continue;
+		}
+
+		/*
+		 * This is running in interrupt context. Trylock the mmap_lock.
+		 * If it fails, retry the fault in user context.
+		 */
+		gts->ustats.fmm_tlbmiss++;
+		if (!gts->ts_force_cch_reload &&
+					mmap_read_trylock(gts->ts_mm)) {
+			gru_try_dropin(gru, gts, tfh, NULL);
+			mmap_read_unlock(gts->ts_mm);
+		} else {
+			tfh_user_polling_mode(tfh);
+			STAT(intr_mm_lock_failed);
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+irqreturn_t gru0_intr(int irq, void *dev_id)
+{
+	return gru_intr(0, uv_numa_blade_id());
+}
+
+irqreturn_t gru1_intr(int irq, void *dev_id)
+{
+	return gru_intr(1, uv_numa_blade_id());
+}
+
+irqreturn_t gru_intr_mblade(int irq, void *dev_id)
+{
+	int blade;
+
+	for_each_possible_blade(blade) {
+		if (uv_blade_nr_possible_cpus(blade))
+			continue;
+		gru_intr(0, blade);
+		gru_intr(1, blade);
+	}
+	return IRQ_HANDLED;
+}
+
+
+static int gru_user_dropin(struct gru_thread_state *gts,
+			   struct gru_tlb_fault_handle *tfh,
+			   void *cb)
+{
+	struct gru_mm_struct *gms = gts->ts_gms;
+	int ret;
+
+	gts->ustats.upm_tlbmiss++;
+	while (1) {
+		wait_event(gms->ms_wait_queue,
+			   atomic_read(&gms->ms_range_active) == 0);
+		prefetchw(tfh);	/* Helps on hdw, required for emulator */
+		ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb);
+		if (ret <= 0)
+			return ret;
+		STAT(call_os_wait_queue);
+	}
+}
+
+/*
+ * This interface is called as a result of a user detecting a "call OS" bit
+ * in a user CB. Normally means that a TLB fault has occurred.
+ * 	cb - user virtual address of the CB
+ */
+int gru_handle_user_call_os(unsigned long cb)
+{
+	struct gru_tlb_fault_handle *tfh;
+	struct gru_thread_state *gts;
+	void *cbk;
+	int ucbnum, cbrnum, ret = -EINVAL;
+
+	STAT(call_os);
+
+	/* sanity check the cb pointer */
+	ucbnum = get_cb_number((void *)cb);
+	if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
+		return -EINVAL;
+
+again:
+	gts = gru_find_lock_gts(cb);
+	if (!gts)
+		return -EINVAL;
+	gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
+
+	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
+		goto exit;
+
+	if (gru_check_context_placement(gts)) {
+		gru_unlock_gts(gts);
+		gru_unload_context(gts, 1);
+		goto again;
+	}
+
+	/*
+	 * CCH may contain stale data if ts_force_cch_reload is set.
+	 */
+	if (gts->ts_gru && gts->ts_force_cch_reload) {
+		gts->ts_force_cch_reload = 0;
+		gru_update_cch(gts);
+	}
+
+	ret = -EAGAIN;
+	cbrnum = thread_cbr_number(gts, ucbnum);
+	if (gts->ts_gru) {
+		tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
+		cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr,
+				gts->ts_ctxnum, ucbnum);
+		ret = gru_user_dropin(gts, tfh, cbk);
+	}
+exit:
+	gru_unlock_gts(gts);
+	return ret;
+}
+
+/*
+ * Fetch the exception detail information for a CB that terminated with
+ * an exception.
+ */
+int gru_get_exception_detail(unsigned long arg)
+{
+	struct control_block_extended_exc_detail excdet;
+	struct gru_control_block_extended *cbe;
+	struct gru_thread_state *gts;
+	int ucbnum, cbrnum, ret;
+
+	STAT(user_exception);
+	if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
+		return -EFAULT;
+
+	gts = gru_find_lock_gts(excdet.cb);
+	if (!gts)
+		return -EINVAL;
+
+	gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
+	ucbnum = get_cb_number((void *)excdet.cb);
+	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
+		ret = -EINVAL;
+	} else if (gts->ts_gru) {
+		cbrnum = thread_cbr_number(gts, ucbnum);
+		cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
+		gru_flush_cache(cbe);	/* CBE not coherent */
+		sync_core();		/* make sure we are have current data */
+		excdet.opc = cbe->opccpy;
+		excdet.exopc = cbe->exopccpy;
+		excdet.ecause = cbe->ecause;
+		excdet.exceptdet0 = cbe->idef1upd;
+		excdet.exceptdet1 = cbe->idef3upd;
+		excdet.cbrstate = cbe->cbrstate;
+		excdet.cbrexecstatus = cbe->cbrexecstatus;
+		gru_flush_cache_cbe(cbe);
+		ret = 0;
+	} else {
+		ret = -EAGAIN;
+	}
+	gru_unlock_gts(gts);
+
+	gru_dbg(grudev,
+		"cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
+		"exdet0 0x%lx, exdet1 0x%x\n",
+		excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
+		excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
+	if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
+		ret = -EFAULT;
+	return ret;
+}
+
+/*
+ * User request to unload a context. Content is saved for possible reload.
+ */
+static int gru_unload_all_contexts(void)
+{
+	struct gru_thread_state *gts;
+	struct gru_state *gru;
+	int gid, ctxnum;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	foreach_gid(gid) {
+		gru = GID_TO_GRU(gid);
+		spin_lock(&gru->gs_lock);
+		for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+			gts = gru->gs_gts[ctxnum];
+			if (gts && mutex_trylock(&gts->ts_ctxlock)) {
+				spin_unlock(&gru->gs_lock);
+				gru_unload_context(gts, 1);
+				mutex_unlock(&gts->ts_ctxlock);
+				spin_lock(&gru->gs_lock);
+			}
+		}
+		spin_unlock(&gru->gs_lock);
+	}
+	return 0;
+}
+
+int gru_user_unload_context(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_unload_context_req req;
+
+	STAT(user_unload_context);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
+
+	if (!req.gseg)
+		return gru_unload_all_contexts();
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts)
+		return -EINVAL;
+
+	if (gts->ts_gru)
+		gru_unload_context(gts, 1);
+	gru_unlock_gts(gts);
+
+	return 0;
+}
+
+/*
+ * User request to flush a range of virtual addresses from the GRU TLB
+ * (Mainly for testing).
+ */
+int gru_user_flush_tlb(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_flush_tlb_req req;
+	struct gru_mm_struct *gms;
+
+	STAT(user_flush_tlb);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg,
+		req.vaddr, req.len);
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts)
+		return -EINVAL;
+
+	gms = gts->ts_gms;
+	gru_unlock_gts(gts);
+	gru_flush_tlb_range(gms, req.vaddr, req.len);
+
+	return 0;
+}
+
+/*
+ * Fetch GSEG statisticss
+ */
+long gru_get_gseg_statistics(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_get_gseg_statistics_req req;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	/*
+	 * The library creates arrays of contexts for threaded programs.
+	 * If no gts exists in the array, the context has never been used & all
+	 * statistics are implicitly 0.
+	 */
+	gts = gru_find_lock_gts(req.gseg);
+	if (gts) {
+		memcpy(&req.stats, &gts->ustats, sizeof(gts->ustats));
+		gru_unlock_gts(gts);
+	} else {
+		memset(&req.stats, 0, sizeof(gts->ustats));
+	}
+
+	if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Register the current task as the user of the GSEG slice.
+ * Needed for TLB fault interrupt targeting.
+ */
+int gru_set_context_option(unsigned long arg)
+{
+	struct gru_thread_state *gts;
+	struct gru_set_context_option_req req;
+	int ret = 0;
+
+	STAT(set_context_option);
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+	gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
+
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts) {
+		gts = gru_alloc_locked_gts(req.gseg);
+		if (IS_ERR(gts))
+			return PTR_ERR(gts);
+	}
+
+	switch (req.op) {
+	case sco_blade_chiplet:
+		/* Select blade/chiplet for GRU context */
+		if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB ||
+		    req.val1 < -1 || req.val1 >= GRU_MAX_BLADES ||
+		    (req.val1 >= 0 && !gru_base[req.val1])) {
+			ret = -EINVAL;
+		} else {
+			gts->ts_user_blade_id = req.val1;
+			gts->ts_user_chiplet_id = req.val0;
+			if (gru_check_context_placement(gts)) {
+				gru_unlock_gts(gts);
+				gru_unload_context(gts, 1);
+				return ret;
+			}
+		}
+		break;
+	case sco_gseg_owner:
+ 		/* Register the current task as the GSEG owner */
+		gts->ts_tgid_owner = current->tgid;
+		break;
+	case sco_cch_req_slice:
+ 		/* Set the CCH slice option */
+		gts->ts_cch_req_slice = req.val1 & 3;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	gru_unlock_gts(gts);
+
+	return ret;
+}
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
new file mode 100644
index 0000000000..a3d659c11c
--- /dev/null
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SN Platform GRU Driver
+ *
+ *              FILE OPERATIONS & DRIVER INITIALIZATION
+ *
+ * This file supports the user system call for file open, close, mmap, etc.
+ * This also incudes the driver initialization code.
+ *
+ *  (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ *  Copyright (c) 2008-2014 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#ifdef CONFIG_X86_64
+#include <asm/uv/uv_irq.h>
+#endif
+#include <asm/uv/uv.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_mmrs.h>
+
+struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
+unsigned long gru_start_paddr __read_mostly;
+void *gru_start_vaddr __read_mostly;
+unsigned long gru_end_paddr __read_mostly;
+unsigned int gru_max_gids __read_mostly;
+struct gru_stats_s gru_stats;
+
+/* Guaranteed user available resources on each node */
+static int max_user_cbrs, max_user_dsr_bytes;
+
+static struct miscdevice gru_miscdev;
+
+static int gru_supported(void)
+{
+	return is_uv_system() &&
+		(uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE);
+}
+
+/*
+ * gru_vma_close
+ *
+ * Called when unmapping a device mapping. Frees all gru resources
+ * and tables belonging to the vma.
+ */
+static void gru_vma_close(struct vm_area_struct *vma)
+{
+	struct gru_vma_data *vdata;
+	struct gru_thread_state *gts;
+	struct list_head *entry, *next;
+
+	if (!vma->vm_private_data)
+		return;
+
+	vdata = vma->vm_private_data;
+	vma->vm_private_data = NULL;
+	gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file,
+				vdata);
+	list_for_each_safe(entry, next, &vdata->vd_head) {
+		gts =
+		    list_entry(entry, struct gru_thread_state, ts_next);
+		list_del(&gts->ts_next);
+		mutex_lock(&gts->ts_ctxlock);
+		if (gts->ts_gru)
+			gru_unload_context(gts, 0);
+		mutex_unlock(&gts->ts_ctxlock);
+		gts_drop(gts);
+	}
+	kfree(vdata);
+	STAT(vdata_free);
+}
+
+/*
+ * gru_file_mmap
+ *
+ * Called when mmapping the device.  Initializes the vma with a fault handler
+ * and private data structure necessary to allocate, track, and free the
+ * underlying pages.
+ */
+static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
+		return -EPERM;
+
+	if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
+				vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
+		return -EINVAL;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_LOCKED |
+			 VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
+	vma->vm_page_prot = PAGE_SHARED;
+	vma->vm_ops = &gru_vm_ops;
+
+	vma->vm_private_data = gru_alloc_vma_data(vma, 0);
+	if (!vma->vm_private_data)
+		return -ENOMEM;
+
+	gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n",
+		file, vma->vm_start, vma, vma->vm_private_data);
+	return 0;
+}
+
+/*
+ * Create a new GRU context
+ */
+static int gru_create_new_context(unsigned long arg)
+{
+	struct gru_create_context_req req;
+	struct vm_area_struct *vma;
+	struct gru_vma_data *vdata;
+	int ret = -EINVAL;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	if (req.data_segment_bytes > max_user_dsr_bytes)
+		return -EINVAL;
+	if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count)
+		return -EINVAL;
+
+	if (!(req.options & GRU_OPT_MISS_MASK))
+		req.options |= GRU_OPT_MISS_FMM_INTR;
+
+	mmap_write_lock(current->mm);
+	vma = gru_find_vma(req.gseg);
+	if (vma) {
+		vdata = vma->vm_private_data;
+		vdata->vd_user_options = req.options;
+		vdata->vd_dsr_au_count =
+		    GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
+		vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
+		vdata->vd_tlb_preload_count = req.tlb_preload_count;
+		ret = 0;
+	}
+	mmap_write_unlock(current->mm);
+
+	return ret;
+}
+
+/*
+ * Get GRU configuration info (temp - for emulator testing)
+ */
+static long gru_get_config_info(unsigned long arg)
+{
+	struct gru_config_info info;
+	int nodesperblade;
+
+	if (num_online_nodes() > 1 &&
+			(uv_node_to_blade_id(1) == uv_node_to_blade_id(0)))
+		nodesperblade = 2;
+	else
+		nodesperblade = 1;
+	memset(&info, 0, sizeof(info));
+	info.cpus = num_online_cpus();
+	info.nodes = num_online_nodes();
+	info.blades = info.nodes / nodesperblade;
+	info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades;
+
+	if (copy_to_user((void __user *)arg, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * gru_file_unlocked_ioctl
+ *
+ * Called to update file attributes via IOCTL calls.
+ */
+static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
+				    unsigned long arg)
+{
+	int err = -EBADRQC;
+
+	gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg);
+
+	switch (req) {
+	case GRU_CREATE_CONTEXT:
+		err = gru_create_new_context(arg);
+		break;
+	case GRU_SET_CONTEXT_OPTION:
+		err = gru_set_context_option(arg);
+		break;
+	case GRU_USER_GET_EXCEPTION_DETAIL:
+		err = gru_get_exception_detail(arg);
+		break;
+	case GRU_USER_UNLOAD_CONTEXT:
+		err = gru_user_unload_context(arg);
+		break;
+	case GRU_USER_FLUSH_TLB:
+		err = gru_user_flush_tlb(arg);
+		break;
+	case GRU_USER_CALL_OS:
+		err = gru_handle_user_call_os(arg);
+		break;
+	case GRU_GET_GSEG_STATISTICS:
+		err = gru_get_gseg_statistics(arg);
+		break;
+	case GRU_KTEST:
+		err = gru_ktest(arg);
+		break;
+	case GRU_GET_CONFIG_INFO:
+		err = gru_get_config_info(arg);
+		break;
+	case GRU_DUMP_CHIPLET_STATE:
+		err = gru_dump_chiplet_request(arg);
+		break;
+	}
+	return err;
+}
+
+/*
+ * Called at init time to build tables for all GRUs that are present in the
+ * system.
+ */
+static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
+			     void *vaddr, int blade_id, int chiplet_id)
+{
+	spin_lock_init(&gru->gs_lock);
+	spin_lock_init(&gru->gs_asid_lock);
+	gru->gs_gru_base_paddr = paddr;
+	gru->gs_gru_base_vaddr = vaddr;
+	gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id;
+	gru->gs_blade = gru_base[blade_id];
+	gru->gs_blade_id = blade_id;
+	gru->gs_chiplet_id = chiplet_id;
+	gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
+	gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
+	gru->gs_asid_limit = MAX_ASID;
+	gru_tgh_flush_init(gru);
+	if (gru->gs_gid >= gru_max_gids)
+		gru_max_gids = gru->gs_gid + 1;
+	gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n",
+		blade_id, gru->gs_gid, gru->gs_gru_base_vaddr,
+		gru->gs_gru_base_paddr);
+}
+
+static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
+{
+	int pnode, nid, bid, chip;
+	int cbrs, dsrbytes, n;
+	int order = get_order(sizeof(struct gru_blade_state));
+	struct page *page;
+	struct gru_state *gru;
+	unsigned long paddr;
+	void *vaddr;
+
+	max_user_cbrs = GRU_NUM_CB;
+	max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
+	for_each_possible_blade(bid) {
+		pnode = uv_blade_to_pnode(bid);
+		nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */
+		page = alloc_pages_node(nid, GFP_KERNEL, order);
+		if (!page)
+			goto fail;
+		gru_base[bid] = page_address(page);
+		memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
+		gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
+		spin_lock_init(&gru_base[bid]->bs_lock);
+		init_rwsem(&gru_base[bid]->bs_kgts_sema);
+
+		dsrbytes = 0;
+		cbrs = 0;
+		for (gru = gru_base[bid]->bs_grus, chip = 0;
+				chip < GRU_CHIPLETS_PER_BLADE;
+				chip++, gru++) {
+			paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
+			vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
+			gru_init_chiplet(gru, paddr, vaddr, bid, chip);
+			n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+			cbrs = max(cbrs, n);
+			n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+			dsrbytes = max(dsrbytes, n);
+		}
+		max_user_cbrs = min(max_user_cbrs, cbrs);
+		max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes);
+	}
+
+	return 0;
+
+fail:
+	for (bid--; bid >= 0; bid--)
+		free_pages((unsigned long)gru_base[bid], order);
+	return -ENOMEM;
+}
+
+static void gru_free_tables(void)
+{
+	int bid;
+	int order = get_order(sizeof(struct gru_state) *
+			      GRU_CHIPLETS_PER_BLADE);
+
+	for (bid = 0; bid < GRU_MAX_BLADES; bid++)
+		free_pages((unsigned long)gru_base[bid], order);
+}
+
+static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep)
+{
+	unsigned long mmr = 0;
+	int core;
+
+	/*
+	 * We target the cores of a blade and not the hyperthreads themselves.
+	 * There is a max of 8 cores per socket and 2 sockets per blade,
+	 * making for a max total of 16 cores (i.e., 16 CPUs without
+	 * hyperthreading and 32 CPUs with hyperthreading).
+	 */
+	core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
+	if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu))
+		return 0;
+
+	if (chiplet == 0) {
+		mmr = UVH_GR0_TLB_INT0_CONFIG +
+		    core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG);
+	} else if (chiplet == 1) {
+		mmr = UVH_GR1_TLB_INT0_CONFIG +
+		    core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG);
+	} else {
+		BUG();
+	}
+
+	*corep = core;
+	return mmr;
+}
+
+#ifdef CONFIG_IA64
+
+static int gru_irq_count[GRU_CHIPLETS_PER_BLADE];
+
+static void gru_noop(struct irq_data *d)
+{
+}
+
+static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = {
+	[0 ... GRU_CHIPLETS_PER_BLADE - 1] {
+		.irq_mask	= gru_noop,
+		.irq_unmask	= gru_noop,
+		.irq_ack	= gru_noop
+	}
+};
+
+static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
+			irq_handler_t irq_handler, int cpu, int blade)
+{
+	unsigned long mmr;
+	int irq = IRQ_GRU + chiplet;
+	int ret, core;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr == 0)
+		return 0;
+
+	if (gru_irq_count[chiplet] == 0) {
+		gru_chip[chiplet].name = irq_name;
+		ret = irq_set_chip(irq, &gru_chip[chiplet]);
+		if (ret) {
+			printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n",
+			       GRU_DRIVER_ID_STR, -ret);
+			return ret;
+		}
+
+		ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
+		if (ret) {
+			printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
+			       GRU_DRIVER_ID_STR, -ret);
+			return ret;
+		}
+	}
+	gru_irq_count[chiplet]++;
+
+	return 0;
+}
+
+static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
+{
+	unsigned long mmr;
+	int core, irq = IRQ_GRU + chiplet;
+
+	if (gru_irq_count[chiplet] == 0)
+		return;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr == 0)
+		return;
+
+	if (--gru_irq_count[chiplet] == 0)
+		free_irq(irq, NULL);
+}
+
+#elif defined CONFIG_X86_64
+
+static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
+			irq_handler_t irq_handler, int cpu, int blade)
+{
+	unsigned long mmr;
+	int irq, core;
+	int ret;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr == 0)
+		return 0;
+
+	irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU);
+	if (irq < 0) {
+		printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n",
+		       GRU_DRIVER_ID_STR, -irq);
+		return irq;
+	}
+
+	ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
+	if (ret) {
+		uv_teardown_irq(irq);
+		printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
+		       GRU_DRIVER_ID_STR, -ret);
+		return ret;
+	}
+	gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq;
+	return 0;
+}
+
+static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
+{
+	int irq, core;
+	unsigned long mmr;
+
+	mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+	if (mmr) {
+		irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core];
+		if (irq) {
+			free_irq(irq, NULL);
+			uv_teardown_irq(irq);
+		}
+	}
+}
+
+#endif
+
+static void gru_teardown_tlb_irqs(void)
+{
+	int blade;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		blade = uv_cpu_to_blade_id(cpu);
+		gru_chiplet_teardown_tlb_irq(0, cpu, blade);
+		gru_chiplet_teardown_tlb_irq(1, cpu, blade);
+	}
+	for_each_possible_blade(blade) {
+		if (uv_blade_nr_possible_cpus(blade))
+			continue;
+		gru_chiplet_teardown_tlb_irq(0, 0, blade);
+		gru_chiplet_teardown_tlb_irq(1, 0, blade);
+	}
+}
+
+static int gru_setup_tlb_irqs(void)
+{
+	int blade;
+	int cpu;
+	int ret;
+
+	for_each_online_cpu(cpu) {
+		blade = uv_cpu_to_blade_id(cpu);
+		ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade);
+		if (ret != 0)
+			goto exit1;
+
+		ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade);
+		if (ret != 0)
+			goto exit1;
+	}
+	for_each_possible_blade(blade) {
+		if (uv_blade_nr_possible_cpus(blade))
+			continue;
+		ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade);
+		if (ret != 0)
+			goto exit1;
+
+		ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade);
+		if (ret != 0)
+			goto exit1;
+	}
+
+	return 0;
+
+exit1:
+	gru_teardown_tlb_irqs();
+	return ret;
+}
+
+/*
+ * gru_init
+ *
+ * Called at boot or module load time to initialize the GRUs.
+ */
+static int __init gru_init(void)
+{
+	int ret;
+
+	if (!gru_supported())
+		return 0;
+
+#if defined CONFIG_IA64
+	gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
+#else
+	gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG) &
+				0x7fffffffffffUL;
+#endif
+	gru_start_vaddr = __va(gru_start_paddr);
+	gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE;
+	printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
+	       gru_start_paddr, gru_end_paddr);
+	ret = misc_register(&gru_miscdev);
+	if (ret) {
+		printk(KERN_ERR "%s: misc_register failed\n",
+		       GRU_DRIVER_ID_STR);
+		goto exit0;
+	}
+
+	ret = gru_proc_init();
+	if (ret) {
+		printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
+		goto exit1;
+	}
+
+	ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
+	if (ret) {
+		printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
+		goto exit2;
+	}
+
+	ret = gru_setup_tlb_irqs();
+	if (ret != 0)
+		goto exit3;
+
+	gru_kservices_init();
+
+	printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
+	       GRU_DRIVER_VERSION_STR);
+	return 0;
+
+exit3:
+	gru_free_tables();
+exit2:
+	gru_proc_exit();
+exit1:
+	misc_deregister(&gru_miscdev);
+exit0:
+	return ret;
+
+}
+
+static void __exit gru_exit(void)
+{
+	if (!gru_supported())
+		return;
+
+	gru_teardown_tlb_irqs();
+	gru_kservices_exit();
+	gru_free_tables();
+	misc_deregister(&gru_miscdev);
+	gru_proc_exit();
+	mmu_notifier_synchronize();
+}
+
+static const struct file_operations gru_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= gru_file_unlocked_ioctl,
+	.mmap		= gru_file_mmap,
+	.llseek		= noop_llseek,
+};
+
+static struct miscdevice gru_miscdev = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "gru",
+	.fops		= &gru_fops,
+};
+
+const struct vm_operations_struct gru_vm_ops = {
+	.close		= gru_vma_close,
+	.fault		= gru_fault,
+};
+
+#ifndef MODULE
+fs_initcall(gru_init);
+#else
+module_init(gru_init);
+#endif
+module_exit(gru_exit);
+
+module_param(gru_options, ulong, 0644);
+MODULE_PARM_DESC(gru_options, "Various debug options");
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR);
+MODULE_VERSION(GRU_DRIVER_VERSION_STR);
+
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
new file mode 100644
index 0000000000..1d75d5e540
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *              GRU KERNEL MCS INSTRUCTIONS
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/kernel.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+/* 10 sec */
+#ifdef CONFIG_IA64
+#include <asm/processor.h>
+#define GRU_OPERATION_TIMEOUT	(((cycles_t) local_cpu_data->itc_freq)*10)
+#define CLKS2NSEC(c)		((c) *1000000000 / local_cpu_data->itc_freq)
+#else
+#include <linux/sync_core.h>
+#include <asm/tsc.h>
+#define GRU_OPERATION_TIMEOUT	((cycles_t) tsc_khz*10*1000)
+#define CLKS2NSEC(c)		((c) * 1000000 / tsc_khz)
+#endif
+
+/* Extract the status field from a kernel handle */
+#define GET_MSEG_HANDLE_STATUS(h)	(((*(unsigned long *)(h)) >> 16) & 3)
+
+struct mcs_op_statistic mcs_op_statistics[mcsop_last];
+
+static void update_mcs_stats(enum mcs_op op, unsigned long clks)
+{
+	unsigned long nsec;
+
+	nsec = CLKS2NSEC(clks);
+	atomic_long_inc(&mcs_op_statistics[op].count);
+	atomic_long_add(nsec, &mcs_op_statistics[op].total);
+	if (mcs_op_statistics[op].max < nsec)
+		mcs_op_statistics[op].max = nsec;
+}
+
+static void start_instruction(void *h)
+{
+	unsigned long *w0 = h;
+
+	wmb();		/* setting CMD/STATUS bits must be last */
+	*w0 = *w0 | 0x20001;
+	gru_flush_cache(h);
+}
+
+static void report_instruction_timeout(void *h)
+{
+	unsigned long goff = GSEGPOFF((unsigned long)h);
+	char *id = "???";
+
+	if (TYPE_IS(CCH, goff))
+		id = "CCH";
+	else if (TYPE_IS(TGH, goff))
+		id = "TGH";
+	else if (TYPE_IS(TFH, goff))
+		id = "TFH";
+
+	panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id);
+}
+
+static int wait_instruction_complete(void *h, enum mcs_op opc)
+{
+	int status;
+	unsigned long start_time = get_cycles();
+
+	while (1) {
+		cpu_relax();
+		status = GET_MSEG_HANDLE_STATUS(h);
+		if (status != CCHSTATUS_ACTIVE)
+			break;
+		if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) {
+			report_instruction_timeout(h);
+			start_time = get_cycles();
+		}
+	}
+	if (gru_options & OPT_STATS)
+		update_mcs_stats(opc, get_cycles() - start_time);
+	return status;
+}
+
+int cch_allocate(struct gru_context_configuration_handle *cch)
+{
+	int ret;
+
+	cch->opc = CCHOP_ALLOCATE;
+	start_instruction(cch);
+	ret = wait_instruction_complete(cch, cchop_allocate);
+
+	/*
+	 * Stop speculation into the GSEG being mapped by the previous ALLOCATE.
+	 * The GSEG memory does not exist until the ALLOCATE completes.
+	 */
+	sync_core();
+	return ret;
+}
+
+int cch_start(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_START;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_start);
+}
+
+int cch_interrupt(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_INTERRUPT;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_interrupt);
+}
+
+int cch_deallocate(struct gru_context_configuration_handle *cch)
+{
+	int ret;
+
+	cch->opc = CCHOP_DEALLOCATE;
+	start_instruction(cch);
+	ret = wait_instruction_complete(cch, cchop_deallocate);
+
+	/*
+	 * Stop speculation into the GSEG being unmapped by the previous
+	 * DEALLOCATE.
+	 */
+	sync_core();
+	return ret;
+}
+
+int cch_interrupt_sync(struct gru_context_configuration_handle
+				     *cch)
+{
+	cch->opc = CCHOP_INTERRUPT_SYNC;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_interrupt_sync);
+}
+
+int tgh_invalidate(struct gru_tlb_global_handle *tgh,
+				 unsigned long vaddr, unsigned long vaddrmask,
+				 int asid, int pagesize, int global, int n,
+				 unsigned short ctxbitmap)
+{
+	tgh->vaddr = vaddr;
+	tgh->asid = asid;
+	tgh->pagesize = pagesize;
+	tgh->n = n;
+	tgh->global = global;
+	tgh->vaddrmask = vaddrmask;
+	tgh->ctxbitmap = ctxbitmap;
+	tgh->opc = TGHOP_TLBINV;
+	start_instruction(tgh);
+	return wait_instruction_complete(tgh, tghop_invalidate);
+}
+
+int tfh_write_only(struct gru_tlb_fault_handle *tfh,
+				  unsigned long paddr, int gaa,
+				  unsigned long vaddr, int asid, int dirty,
+				  int pagesize)
+{
+	tfh->fillasid = asid;
+	tfh->fillvaddr = vaddr;
+	tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+	tfh->gaa = gaa;
+	tfh->dirty = dirty;
+	tfh->pagesize = pagesize;
+	tfh->opc = TFHOP_WRITE_ONLY;
+	start_instruction(tfh);
+	return wait_instruction_complete(tfh, tfhop_write_only);
+}
+
+void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
+				     unsigned long paddr, int gaa,
+				     unsigned long vaddr, int asid, int dirty,
+				     int pagesize)
+{
+	tfh->fillasid = asid;
+	tfh->fillvaddr = vaddr;
+	tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+	tfh->gaa = gaa;
+	tfh->dirty = dirty;
+	tfh->pagesize = pagesize;
+	tfh->opc = TFHOP_WRITE_RESTART;
+	start_instruction(tfh);
+}
+
+void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_USER_POLLING_MODE;
+	start_instruction(tfh);
+}
+
+void tfh_exception(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_EXCEPTION;
+	start_instruction(tfh);
+}
+
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
new file mode 100644
index 0000000000..5a498bf8d0
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SN Platform GRU Driver
+ *
+ *              GRU HANDLE DEFINITION
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#ifndef __GRUHANDLES_H__
+#define __GRUHANDLES_H__
+#include "gru_instructions.h"
+
+/*
+ * Manifest constants for GRU Memory Map
+ */
+#define GRU_GSEG0_BASE		0
+#define GRU_MCS_BASE		(64 * 1024 * 1024)
+#define GRU_SIZE		(128UL * 1024 * 1024)
+
+/* Handle & resource counts */
+#define GRU_NUM_CB		128
+#define GRU_NUM_DSR_BYTES	(32 * 1024)
+#define GRU_NUM_TFM		16
+#define GRU_NUM_TGH		24
+#define GRU_NUM_CBE		128
+#define GRU_NUM_TFH		128
+#define GRU_NUM_CCH		16
+
+/* Maximum resource counts that can be reserved by user programs */
+#define GRU_NUM_USER_CBR	GRU_NUM_CBE
+#define GRU_NUM_USER_DSR_BYTES	GRU_NUM_DSR_BYTES
+
+/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles
+ * are the same */
+#define GRU_HANDLE_BYTES	64
+#define GRU_HANDLE_STRIDE	256
+
+/* Base addresses of handles */
+#define GRU_TFM_BASE		(GRU_MCS_BASE + 0x00000)
+#define GRU_TGH_BASE		(GRU_MCS_BASE + 0x08000)
+#define GRU_CBE_BASE		(GRU_MCS_BASE + 0x10000)
+#define GRU_TFH_BASE		(GRU_MCS_BASE + 0x18000)
+#define GRU_CCH_BASE		(GRU_MCS_BASE + 0x20000)
+
+/* User gseg constants */
+#define GRU_GSEG_STRIDE		(4 * 1024 * 1024)
+#define GSEG_BASE(a)		((a) & ~(GRU_GSEG_PAGESIZE - 1))
+
+/* Data segment constants */
+#define GRU_DSR_AU_BYTES	1024
+#define GRU_DSR_CL		(GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU_CL		(GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES)
+#define GRU_DSR_AU		(GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES)
+
+/* Control block constants */
+#define GRU_CBR_AU_SIZE		2
+#define GRU_CBR_AU		(GRU_NUM_CBE / GRU_CBR_AU_SIZE)
+
+/* Convert resource counts to the number of AU */
+#define GRU_DS_BYTES_TO_AU(n)	DIV_ROUND_UP(n, GRU_DSR_AU_BYTES)
+#define GRU_CB_COUNT_TO_AU(n)	DIV_ROUND_UP(n, GRU_CBR_AU_SIZE)
+
+/* UV limits */
+#define GRU_CHIPLETS_PER_HUB	2
+#define GRU_HUBS_PER_BLADE	1
+#define GRU_CHIPLETS_PER_BLADE	(GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB)
+
+/* User GRU Gseg offsets */
+#define GRU_CB_BASE		0
+#define GRU_CB_LIMIT		(GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE)
+#define GRU_DS_BASE		0x20000
+#define GRU_DS_LIMIT		(GRU_DS_BASE + GRU_NUM_DSR_BYTES)
+
+/* Convert a GRU physical address to the chiplet offset */
+#define GSEGPOFF(h) 		((h) & (GRU_SIZE - 1))
+
+/* Convert an arbitrary handle address to the beginning of the GRU segment */
+#define GRUBASE(h)		((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
+
+/* Test a valid handle address to determine the type */
+#define TYPE_IS(hn, h)		((h) >= GRU_##hn##_BASE && (h) <	\
+		GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE &&   \
+		(((h) & (GRU_HANDLE_STRIDE - 1)) == 0))
+
+
+/* General addressing macros. */
+static inline void *get_gseg_base_address(void *base, int ctxnum)
+{
+	return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum);
+}
+
+static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line)
+{
+	return (void *)(get_gseg_base_address(base, ctxnum) +
+			GRU_CB_BASE + GRU_HANDLE_STRIDE * line);
+}
+
+static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line)
+{
+	return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE +
+			GRU_CACHE_LINE_BYTES * line);
+}
+
+static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum)
+{
+	return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum)
+{
+	return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum)
+{
+	return (struct gru_control_block_extended *)(base + GRU_CBE_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum)
+{
+	return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE +
+					ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline struct gru_context_configuration_handle *get_cch(void *base,
+					int ctxnum)
+{
+	return (struct gru_context_configuration_handle *)(base +
+				GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE);
+}
+
+static inline unsigned long get_cb_number(void *cb)
+{
+	return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) /
+					GRU_HANDLE_STRIDE;
+}
+
+/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/
+static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode,
+							int chiplet)
+{
+	return paddr + GRU_SIZE * (2 * pnode  + chiplet);
+}
+
+static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
+{
+	return vaddr + GRU_SIZE * (2 * pnode  + chiplet);
+}
+
+static inline struct gru_control_block_extended *gru_tfh_to_cbe(
+					struct gru_tlb_fault_handle *tfh)
+{
+	unsigned long cbe;
+
+	cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE;
+	return (struct gru_control_block_extended*)cbe;
+}
+
+
+
+
+/*
+ * Global TLB Fault Map
+ * 	Bitmap of outstanding TLB misses needing interrupt/polling service.
+ *
+ */
+struct gru_tlb_fault_map {
+	unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+	unsigned long fill0[2];
+	unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)];
+	unsigned long fill1[2];
+};
+
+/*
+ * TGH - TLB Global Handle
+ * 	Used for TLB flushing.
+ *
+ */
+struct gru_tlb_global_handle {
+	unsigned int cmd:1;		/* DW 0 */
+	unsigned int delresp:1;
+	unsigned int opc:1;
+	unsigned int fill1:5;
+
+	unsigned int fill2:8;
+
+	unsigned int status:2;
+	unsigned long fill3:2;
+	unsigned int state:3;
+	unsigned long fill4:1;
+
+	unsigned int cause:3;
+	unsigned long fill5:37;
+
+	unsigned long vaddr:64;		/* DW 1 */
+
+	unsigned int asid:24;		/* DW 2 */
+	unsigned int fill6:8;
+
+	unsigned int pagesize:5;
+	unsigned int fill7:11;
+
+	unsigned int global:1;
+	unsigned int fill8:15;
+
+	unsigned long vaddrmask:39;	/* DW 3 */
+	unsigned int fill9:9;
+	unsigned int n:10;
+	unsigned int fill10:6;
+
+	unsigned int ctxbitmap:16;	/* DW4 */
+	unsigned long fill11[3];
+};
+
+enum gru_tgh_cmd {
+	TGHCMD_START
+};
+
+enum gru_tgh_opc {
+	TGHOP_TLBNOP,
+	TGHOP_TLBINV
+};
+
+enum gru_tgh_status {
+	TGHSTATUS_IDLE,
+	TGHSTATUS_EXCEPTION,
+	TGHSTATUS_ACTIVE
+};
+
+enum gru_tgh_state {
+	TGHSTATE_IDLE,
+	TGHSTATE_PE_INVAL,
+	TGHSTATE_INTERRUPT_INVAL,
+	TGHSTATE_WAITDONE,
+	TGHSTATE_RESTART_CTX,
+};
+
+enum gru_tgh_cause {
+	TGHCAUSE_RR_ECC,
+	TGHCAUSE_TLB_ECC,
+	TGHCAUSE_LRU_ECC,
+	TGHCAUSE_PS_ECC,
+	TGHCAUSE_MUL_ERR,
+	TGHCAUSE_DATA_ERR,
+	TGHCAUSE_SW_FORCE
+};
+
+
+/*
+ * TFH - TLB Global Handle
+ * 	Used for TLB dropins into the GRU TLB.
+ *
+ */
+struct gru_tlb_fault_handle {
+	unsigned int cmd:1;		/* DW 0 - low 32*/
+	unsigned int delresp:1;
+	unsigned int fill0:2;
+	unsigned int opc:3;
+	unsigned int fill1:9;
+
+	unsigned int status:2;
+	unsigned int fill2:2;
+	unsigned int state:3;
+	unsigned int fill3:1;
+
+	unsigned int cause:6;
+	unsigned int cb_int:1;
+	unsigned int fill4:1;
+
+	unsigned int indexway:12;	/* DW 0 - high 32 */
+	unsigned int fill5:4;
+
+	unsigned int ctxnum:4;
+	unsigned int fill6:12;
+
+	unsigned long missvaddr:64;	/* DW 1 */
+
+	unsigned int missasid:24;	/* DW 2 */
+	unsigned int fill7:8;
+	unsigned int fillasid:24;
+	unsigned int dirty:1;
+	unsigned int gaa:2;
+	unsigned long fill8:5;
+
+	unsigned long pfn:41;		/* DW 3 */
+	unsigned int fill9:7;
+	unsigned int pagesize:5;
+	unsigned int fill10:11;
+
+	unsigned long fillvaddr:64;	/* DW 4 */
+
+	unsigned long fill11[3];
+};
+
+enum gru_tfh_opc {
+	TFHOP_NOOP,
+	TFHOP_RESTART,
+	TFHOP_WRITE_ONLY,
+	TFHOP_WRITE_RESTART,
+	TFHOP_EXCEPTION,
+	TFHOP_USER_POLLING_MODE = 7,
+};
+
+enum tfh_status {
+	TFHSTATUS_IDLE,
+	TFHSTATUS_EXCEPTION,
+	TFHSTATUS_ACTIVE,
+};
+
+enum tfh_state {
+	TFHSTATE_INACTIVE,
+	TFHSTATE_IDLE,
+	TFHSTATE_MISS_UPM,
+	TFHSTATE_MISS_FMM,
+	TFHSTATE_HW_ERR,
+	TFHSTATE_WRITE_TLB,
+	TFHSTATE_RESTART_CBR,
+};
+
+/* TFH cause bits */
+enum tfh_cause {
+	TFHCAUSE_NONE,
+	TFHCAUSE_TLB_MISS,
+	TFHCAUSE_TLB_MOD,
+	TFHCAUSE_HW_ERROR_RR,
+	TFHCAUSE_HW_ERROR_MAIN_ARRAY,
+	TFHCAUSE_HW_ERROR_VALID,
+	TFHCAUSE_HW_ERROR_PAGESIZE,
+	TFHCAUSE_INSTRUCTION_EXCEPTION,
+	TFHCAUSE_UNCORRECTIBLE_ERROR,
+};
+
+/* GAA values */
+#define GAA_RAM				0x0
+#define GAA_NCRAM			0x2
+#define GAA_MMIO			0x1
+#define GAA_REGISTER			0x3
+
+/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */
+#define GRU_PADDR_SHIFT			12
+
+/*
+ * Context Configuration handle
+ * 	Used to allocate resources to a GSEG context.
+ *
+ */
+struct gru_context_configuration_handle {
+	unsigned int cmd:1;			/* DW0 */
+	unsigned int delresp:1;
+	unsigned int opc:3;
+	unsigned int unmap_enable:1;
+	unsigned int req_slice_set_enable:1;
+	unsigned int req_slice:2;
+	unsigned int cb_int_enable:1;
+	unsigned int tlb_int_enable:1;
+	unsigned int tfm_fault_bit_enable:1;
+	unsigned int tlb_int_select:4;
+
+	unsigned int status:2;
+	unsigned int state:2;
+	unsigned int reserved2:4;
+
+	unsigned int cause:4;
+	unsigned int tfm_done_bit_enable:1;
+	unsigned int unused:3;
+
+	unsigned int dsr_allocation_map;
+
+	unsigned long cbr_allocation_map;	/* DW1 */
+
+	unsigned int asid[8];			/* DW 2 - 5 */
+	unsigned short sizeavail[8];		/* DW 6 - 7 */
+} __attribute__ ((packed));
+
+enum gru_cch_opc {
+	CCHOP_START = 1,
+	CCHOP_ALLOCATE,
+	CCHOP_INTERRUPT,
+	CCHOP_DEALLOCATE,
+	CCHOP_INTERRUPT_SYNC,
+};
+
+enum gru_cch_status {
+	CCHSTATUS_IDLE,
+	CCHSTATUS_EXCEPTION,
+	CCHSTATUS_ACTIVE,
+};
+
+enum gru_cch_state {
+	CCHSTATE_INACTIVE,
+	CCHSTATE_MAPPED,
+	CCHSTATE_ACTIVE,
+	CCHSTATE_INTERRUPTED,
+};
+
+/* CCH Exception cause */
+enum gru_cch_cause {
+	CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1,
+	CCHCAUSE_ILLEGAL_OPCODE = 2,
+	CCHCAUSE_INVALID_START_REQUEST = 3,
+	CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4,
+	CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5,
+	CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6,
+	CCHCAUSE_CCH_BUSY = 7,
+	CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8,
+	CCHCAUSE_BAD_TFM_CONFIG = 9,
+	CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10,
+	CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11,
+	CCHCAUSE_CBR_DEALLOCATION_ERROR = 12,
+};
+/*
+ * CBE - Control Block Extended
+ * 	Maintains internal GRU state for active CBs.
+ *
+ */
+struct gru_control_block_extended {
+	unsigned int reserved0:1;	/* DW 0  - low */
+	unsigned int imacpy:3;
+	unsigned int reserved1:4;
+	unsigned int xtypecpy:3;
+	unsigned int iaa0cpy:2;
+	unsigned int iaa1cpy:2;
+	unsigned int reserved2:1;
+	unsigned int opccpy:8;
+	unsigned int exopccpy:8;
+
+	unsigned int idef2cpy:22;	/* DW 0  - high */
+	unsigned int reserved3:10;
+
+	unsigned int idef4cpy:22;	/* DW 1 */
+	unsigned int reserved4:10;
+	unsigned int idef4upd:22;
+	unsigned int reserved5:10;
+
+	unsigned long idef1upd:64;	/* DW 2 */
+
+	unsigned long idef5cpy:64;	/* DW 3 */
+
+	unsigned long idef6cpy:64;	/* DW 4 */
+
+	unsigned long idef3upd:64;	/* DW 5 */
+
+	unsigned long idef5upd:64;	/* DW 6 */
+
+	unsigned int idef2upd:22;	/* DW 7 */
+	unsigned int reserved6:10;
+
+	unsigned int ecause:20;
+	unsigned int cbrstate:4;
+	unsigned int cbrexecstatus:8;
+};
+
+/* CBE fields for active BCOPY instructions */
+#define cbe_baddr0	idef1upd
+#define cbe_baddr1	idef3upd
+#define cbe_src_cl	idef6cpy
+#define cbe_nelemcur	idef5upd
+
+enum gru_cbr_state {
+	CBRSTATE_INACTIVE,
+	CBRSTATE_IDLE,
+	CBRSTATE_PE_CHECK,
+	CBRSTATE_QUEUED,
+	CBRSTATE_WAIT_RESPONSE,
+	CBRSTATE_INTERRUPTED,
+	CBRSTATE_INTERRUPTED_MISS_FMM,
+	CBRSTATE_BUSY_INTERRUPT_MISS_FMM,
+	CBRSTATE_INTERRUPTED_MISS_UPM,
+	CBRSTATE_BUSY_INTERRUPTED_MISS_UPM,
+	CBRSTATE_REQUEST_ISSUE,
+	CBRSTATE_BUSY_INTERRUPT,
+};
+
+/* CBE cbrexecstatus bits  - defined in gru_instructions.h*/
+/* CBE ecause bits  - defined in gru_instructions.h */
+
+/*
+ * Convert a processor pagesize into the strange encoded pagesize used by the
+ * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT)
+ * 	pagesize	log pagesize	grupagesize
+ * 	  4k			12	0
+ * 	 16k 			14	1
+ * 	 64k			16	2
+ * 	256k			18	3
+ * 	  1m			20	4
+ * 	  2m			21	5
+ * 	  4m			22	6
+ * 	 16m			24	7
+ * 	 64m			26	8
+ * 	...
+ */
+#define GRU_PAGESIZE(sh)	((((sh) > 20 ? (sh) + 2 : (sh)) >> 1) - 6)
+#define GRU_SIZEAVAIL(sh)	(1UL << GRU_PAGESIZE(sh))
+
+/* minimum TLB purge count to ensure a full purge */
+#define GRUMAXINVAL		1024UL
+
+int cch_allocate(struct gru_context_configuration_handle *cch);
+int cch_start(struct gru_context_configuration_handle *cch);
+int cch_interrupt(struct gru_context_configuration_handle *cch);
+int cch_deallocate(struct gru_context_configuration_handle *cch);
+int cch_interrupt_sync(struct gru_context_configuration_handle *cch);
+int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr,
+	unsigned long vaddrmask, int asid, int pagesize, int global, int n,
+	unsigned short ctxbitmap);
+int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
+void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
+void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh);
+void tfh_exception(struct gru_tlb_fault_handle *tfh);
+
+#endif /* __GRUHANDLES_H__ */
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
new file mode 100644
index 0000000000..9869f4f2f4
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SN Platform GRU Driver
+ *
+ *            Dump GRU State
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <asm/uv/uv_hub.h>
+
+#include <linux/nospec.h>
+
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+#include "grulib.h"
+
+#define CCH_LOCK_ATTEMPTS	10
+
+static int gru_user_copy_handle(void __user **dp, void *s)
+{
+	if (copy_to_user(*dp, s, GRU_HANDLE_BYTES))
+		return -1;
+	*dp += GRU_HANDLE_BYTES;
+	return 0;
+}
+
+static int gru_dump_context_data(void *grubase,
+			struct gru_context_configuration_handle *cch,
+			void __user *ubuf, int ctxnum, int dsrcnt,
+			int flush_cbrs)
+{
+	void *cb, *cbe, *tfh, *gseg;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	tfh = grubase + GRU_TFH_BASE;
+
+	for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) {
+		if (flush_cbrs)
+			gru_flush_cache(cb);
+		if (gru_user_copy_handle(&ubuf, cb))
+			goto fail;
+		if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE))
+			goto fail;
+		if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE))
+			goto fail;
+		cb += GRU_HANDLE_STRIDE;
+	}
+	if (dsrcnt)
+		memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE);
+	return 0;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_tfm(struct gru_state *gru,
+		void __user *ubuf, void __user *ubufend)
+{
+	struct gru_tlb_fault_map *tfm;
+	int i;
+
+	if (GRU_NUM_TFM * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
+		return -EFBIG;
+
+	for (i = 0; i < GRU_NUM_TFM; i++) {
+		tfm = get_tfm(gru->gs_gru_base_vaddr, i);
+		if (gru_user_copy_handle(&ubuf, tfm))
+			goto fail;
+	}
+	return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_tgh(struct gru_state *gru,
+		void __user *ubuf, void __user *ubufend)
+{
+	struct gru_tlb_global_handle *tgh;
+	int i;
+
+	if (GRU_NUM_TGH * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
+		return -EFBIG;
+
+	for (i = 0; i < GRU_NUM_TGH; i++) {
+		tgh = get_tgh(gru->gs_gru_base_vaddr, i);
+		if (gru_user_copy_handle(&ubuf, tgh))
+			goto fail;
+	}
+	return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+
+fail:
+	return -EFAULT;
+}
+
+static int gru_dump_context(struct gru_state *gru, int ctxnum,
+		void __user *ubuf, void __user *ubufend, char data_opt,
+		char lock_cch, char flush_cbrs)
+{
+	struct gru_dump_context_header hdr;
+	struct gru_dump_context_header __user *uhdr = ubuf;
+	struct gru_context_configuration_handle *cch, *ubufcch;
+	struct gru_thread_state *gts;
+	int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0;
+	void *grubase;
+
+	memset(&hdr, 0, sizeof(hdr));
+	grubase = gru->gs_gru_base_vaddr;
+	cch = get_cch(grubase, ctxnum);
+	for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) {
+		cch_locked =  trylock_cch_handle(cch);
+		if (cch_locked)
+			break;
+		msleep(1);
+	}
+
+	ubuf += sizeof(hdr);
+	ubufcch = ubuf;
+	if (gru_user_copy_handle(&ubuf, cch)) {
+		if (cch_locked)
+			unlock_cch_handle(cch);
+		return -EFAULT;
+	}
+	if (cch_locked)
+		ubufcch->delresp = 0;
+	bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES;
+
+	if (cch_locked || !lock_cch) {
+		gts = gru->gs_gts[ctxnum];
+		if (gts && gts->ts_vma) {
+			hdr.pid = gts->ts_tgid_owner;
+			hdr.vaddr = gts->ts_vma->vm_start;
+		}
+		if (cch->state != CCHSTATE_INACTIVE) {
+			cbrcnt = hweight64(cch->cbr_allocation_map) *
+						GRU_CBR_AU_SIZE;
+			dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) *
+						GRU_DSR_AU_CL : 0;
+		}
+		bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES;
+		if (bytes > ubufend - ubuf)
+			ret = -EFBIG;
+		else
+			ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum,
+							dsrcnt, flush_cbrs);
+	}
+	if (cch_locked)
+		unlock_cch_handle(cch);
+	if (ret)
+		return ret;
+
+	hdr.magic = GRU_DUMP_MAGIC;
+	hdr.gid = gru->gs_gid;
+	hdr.ctxnum = ctxnum;
+	hdr.cbrcnt = cbrcnt;
+	hdr.dsrcnt = dsrcnt;
+	hdr.cch_locked = cch_locked;
+	if (copy_to_user(uhdr, &hdr, sizeof(hdr)))
+		return -EFAULT;
+
+	return bytes;
+}
+
+int gru_dump_chiplet_request(unsigned long arg)
+{
+	struct gru_state *gru;
+	struct gru_dump_chiplet_state_req req;
+	void __user *ubuf;
+	void __user *ubufend;
+	int ctxnum, ret, cnt = 0;
+
+	if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+		return -EFAULT;
+
+	/* Currently, only dump by gid is implemented */
+	if (req.gid >= gru_max_gids)
+		return -EINVAL;
+	req.gid = array_index_nospec(req.gid, gru_max_gids);
+
+	gru = GID_TO_GRU(req.gid);
+	ubuf = req.buf;
+	ubufend = req.buf + req.buflen;
+
+	ret = gru_dump_tfm(gru, ubuf, ubufend);
+	if (ret < 0)
+		goto fail;
+	ubuf += ret;
+
+	ret = gru_dump_tgh(gru, ubuf, ubufend);
+	if (ret < 0)
+		goto fail;
+	ubuf += ret;
+
+	for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+		if (req.ctxnum == ctxnum || req.ctxnum < 0) {
+			ret = gru_dump_context(gru, ctxnum, ubuf, ubufend,
+						req.data_opt, req.lock_cch,
+						req.flush_cbrs);
+			if (ret < 0)
+				goto fail;
+			ubuf += ret;
+			cnt++;
+		}
+	}
+
+	if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+		return -EFAULT;
+	return cnt;
+
+fail:
+	return ret;
+}
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
new file mode 100644
index 0000000000..37e804bbb1
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -0,0 +1,1159 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SN Platform GRU Driver
+ *
+ *              KERNEL SERVICES THAT USE THE GRU
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/interrupt.h>
+#include <linux/sync_core.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <asm/io_apic.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+#include "grukservices.h"
+#include "gru_instructions.h"
+#include <asm/uv/uv_hub.h>
+
+/*
+ * Kernel GRU Usage
+ *
+ * The following is an interim algorithm for management of kernel GRU
+ * resources. This will likely be replaced when we better understand the
+ * kernel/user requirements.
+ *
+ * Blade percpu resources reserved for kernel use. These resources are
+ * reserved whenever the kernel context for the blade is loaded. Note
+ * that the kernel context is not guaranteed to be always available. It is
+ * loaded on demand & can be stolen by a user if the user demand exceeds the
+ * kernel demand. The kernel can always reload the kernel context but
+ * a SLEEP may be required!!!.
+ *
+ * Async Overview:
+ *
+ * 	Each blade has one "kernel context" that owns GRU kernel resources
+ * 	located on the blade. Kernel drivers use GRU resources in this context
+ * 	for sending messages, zeroing memory, etc.
+ *
+ * 	The kernel context is dynamically loaded on demand. If it is not in
+ * 	use by the kernel, the kernel context can be unloaded & given to a user.
+ * 	The kernel context will be reloaded when needed. This may require that
+ * 	a context be stolen from a user.
+ * 		NOTE: frequent unloading/reloading of the kernel context is
+ * 		expensive. We are depending on batch schedulers, cpusets, sane
+ * 		drivers or some other mechanism to prevent the need for frequent
+ *	 	stealing/reloading.
+ *
+ * 	The kernel context consists of two parts:
+ * 		- 1 CB & a few DSRs that are reserved for each cpu on the blade.
+ * 		  Each cpu has it's own private resources & does not share them
+ * 		  with other cpus. These resources are used serially, ie,
+ * 		  locked, used & unlocked  on each call to a function in
+ * 		  grukservices.
+ * 		  	(Now that we have dynamic loading of kernel contexts, I
+ * 		  	 may rethink this & allow sharing between cpus....)
+ *
+ *		- Additional resources can be reserved long term & used directly
+ *		  by UV drivers located in the kernel. Drivers using these GRU
+ *		  resources can use asynchronous GRU instructions that send
+ *		  interrupts on completion.
+ *		  	- these resources must be explicitly locked/unlocked
+ *		  	- locked resources prevent (obviously) the kernel
+ *		  	  context from being unloaded.
+ *			- drivers using these resource directly issue their own
+ *			  GRU instruction and must wait/check completion.
+ *
+ * 		  When these resources are reserved, the caller can optionally
+ * 		  associate a wait_queue with the resources and use asynchronous
+ * 		  GRU instructions. When an async GRU instruction completes, the
+ * 		  driver will do a wakeup on the event.
+ *
+ */
+
+
+#define ASYNC_HAN_TO_BID(h)	((h) - 1)
+#define ASYNC_BID_TO_HAN(b)	((b) + 1)
+#define ASYNC_HAN_TO_BS(h)	gru_base[ASYNC_HAN_TO_BID(h)]
+
+#define GRU_NUM_KERNEL_CBR	1
+#define GRU_NUM_KERNEL_DSR_BYTES 256
+#define GRU_NUM_KERNEL_DSR_CL	(GRU_NUM_KERNEL_DSR_BYTES /		\
+					GRU_CACHE_LINE_BYTES)
+
+/* GRU instruction attributes for all instructions */
+#define IMA			IMA_CB_DELAY
+
+/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
+#define __gru_cacheline_aligned__                               \
+	__attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
+
+#define MAGIC	0x1234567887654321UL
+
+/* Default retry count for GRU errors on kernel instructions */
+#define EXCEPTION_RETRY_LIMIT	3
+
+/* Status of message queue sections */
+#define MQS_EMPTY		0
+#define MQS_FULL		1
+#define MQS_NOOP		2
+
+/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
+/* optimized for x86_64 */
+struct message_queue {
+	union gru_mesqhead	head __gru_cacheline_aligned__;	/* CL 0 */
+	int			qlines;				/* DW 1 */
+	long 			hstatus[2];
+	void 			*next __gru_cacheline_aligned__;/* CL 1 */
+	void 			*limit;
+	void 			*start;
+	void 			*start2;
+	char			data ____cacheline_aligned;	/* CL 2 */
+};
+
+/* First word in every message - used by mesq interface */
+struct message_header {
+	char	present;
+	char	present2;
+	char 	lines;
+	char	fill;
+};
+
+#define HSTATUS(mq, h)	((mq) + offsetof(struct message_queue, hstatus[h]))
+
+/*
+ * Reload the blade's kernel context into a GRU chiplet. Called holding
+ * the bs_kgts_sema for READ. Will steal user contexts if necessary.
+ */
+static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
+{
+	struct gru_state *gru;
+	struct gru_thread_state *kgts;
+	void *vaddr;
+	int ctxnum, ncpus;
+
+	up_read(&bs->bs_kgts_sema);
+	down_write(&bs->bs_kgts_sema);
+
+	if (!bs->bs_kgts) {
+		do {
+			bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
+			if (!IS_ERR(bs->bs_kgts))
+				break;
+			msleep(1);
+		} while (true);
+		bs->bs_kgts->ts_user_blade_id = blade_id;
+	}
+	kgts = bs->bs_kgts;
+
+	if (!kgts->ts_gru) {
+		STAT(load_kernel_context);
+		ncpus = uv_blade_nr_possible_cpus(blade_id);
+		kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
+			GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
+		kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
+			GRU_NUM_KERNEL_DSR_BYTES * ncpus +
+				bs->bs_async_dsr_bytes);
+		while (!gru_assign_gru_context(kgts)) {
+			msleep(1);
+			gru_steal_context(kgts);
+		}
+		gru_load_context(kgts);
+		gru = bs->bs_kgts->ts_gru;
+		vaddr = gru->gs_gru_base_vaddr;
+		ctxnum = kgts->ts_ctxnum;
+		bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
+		bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
+	}
+	downgrade_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Free all kernel contexts that are not currently in use.
+ *   Returns 0 if all freed, else number of inuse context.
+ */
+static int gru_free_kernel_contexts(void)
+{
+	struct gru_blade_state *bs;
+	struct gru_thread_state *kgts;
+	int bid, ret = 0;
+
+	for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
+		bs = gru_base[bid];
+		if (!bs)
+			continue;
+
+		/* Ignore busy contexts. Don't want to block here.  */
+		if (down_write_trylock(&bs->bs_kgts_sema)) {
+			kgts = bs->bs_kgts;
+			if (kgts && kgts->ts_gru)
+				gru_unload_context(kgts, 0);
+			bs->bs_kgts = NULL;
+			up_write(&bs->bs_kgts_sema);
+			kfree(kgts);
+		} else {
+			ret++;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Lock & load the kernel context for the specified blade.
+ */
+static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
+{
+	struct gru_blade_state *bs;
+	int bid;
+
+	STAT(lock_kernel_context);
+again:
+	bid = blade_id < 0 ? uv_numa_blade_id() : blade_id;
+	bs = gru_base[bid];
+
+	/* Handle the case where migration occurred while waiting for the sema */
+	down_read(&bs->bs_kgts_sema);
+	if (blade_id < 0 && bid != uv_numa_blade_id()) {
+		up_read(&bs->bs_kgts_sema);
+		goto again;
+	}
+	if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
+		gru_load_kernel_context(bs, bid);
+	return bs;
+
+}
+
+/*
+ * Unlock the kernel context for the specified blade. Context is not
+ * unloaded but may be stolen before next use.
+ */
+static void gru_unlock_kernel_context(int blade_id)
+{
+	struct gru_blade_state *bs;
+
+	bs = gru_base[blade_id];
+	up_read(&bs->bs_kgts_sema);
+	STAT(unlock_kernel_context);
+}
+
+/*
+ * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
+ * 	- returns with preemption disabled
+ */
+static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
+{
+	struct gru_blade_state *bs;
+	int lcpu;
+
+	BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
+	preempt_disable();
+	bs = gru_lock_kernel_context(-1);
+	lcpu = uv_blade_processor_id();
+	*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
+	*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
+	return 0;
+}
+
+/*
+ * Free the current cpus reserved DSR/CBR resources.
+ */
+static void gru_free_cpu_resources(void *cb, void *dsr)
+{
+	gru_unlock_kernel_context(uv_numa_blade_id());
+	preempt_enable();
+}
+
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *   Note: currently supports only 1 reservation per blade.
+ *
+ * 	input:
+ * 		blade_id  - blade on which resources should be reserved
+ * 		cbrs	  - number of CBRs
+ * 		dsr_bytes - number of DSR bytes needed
+ *	output:
+ *		handle to identify resource
+ *		(0 = async resources already reserved)
+ */
+unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+			struct completion *cmp)
+{
+	struct gru_blade_state *bs;
+	struct gru_thread_state *kgts;
+	int ret = 0;
+
+	bs = gru_base[blade_id];
+
+	down_write(&bs->bs_kgts_sema);
+
+	/* Verify no resources already reserved */
+	if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
+		goto done;
+	bs->bs_async_dsr_bytes = dsr_bytes;
+	bs->bs_async_cbrs = cbrs;
+	bs->bs_async_wq = cmp;
+	kgts = bs->bs_kgts;
+
+	/* Resources changed. Unload context if already loaded */
+	if (kgts && kgts->ts_gru)
+		gru_unload_context(kgts, 0);
+	ret = ASYNC_BID_TO_HAN(blade_id);
+
+done:
+	up_write(&bs->bs_kgts_sema);
+	return ret;
+}
+
+/*
+ * Release async resources previously reserved.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_release_async_resources(unsigned long han)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+	down_write(&bs->bs_kgts_sema);
+	bs->bs_async_dsr_bytes = 0;
+	bs->bs_async_cbrs = 0;
+	bs->bs_async_wq = NULL;
+	up_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_wait_async_cbr(unsigned long han)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+	wait_for_completion(bs->bs_async_wq);
+	mb();
+}
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ *	output:
+ *		cb  - pointer to first CBR
+ *		dsr - pointer to first DSR
+ */
+void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr)
+{
+	struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+	int blade_id = ASYNC_HAN_TO_BID(han);
+	int ncpus;
+
+	gru_lock_kernel_context(blade_id);
+	ncpus = uv_blade_nr_possible_cpus(blade_id);
+	if (cb)
+		*cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
+	if (dsr)
+		*dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
+}
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+void gru_unlock_async_resource(unsigned long han)
+{
+	int blade_id = ASYNC_HAN_TO_BID(han);
+
+	gru_unlock_kernel_context(blade_id);
+}
+
+/*----------------------------------------------------------------------*/
+int gru_get_cb_exception_detail(void *cb,
+		struct control_block_extended_exc_detail *excdet)
+{
+	struct gru_control_block_extended *cbe;
+	struct gru_thread_state *kgts = NULL;
+	unsigned long off;
+	int cbrnum, bid;
+
+	/*
+	 * Locate kgts for cb. This algorithm is SLOW but
+	 * this function is rarely called (ie., almost never).
+	 * Performance does not matter.
+	 */
+	for_each_possible_blade(bid) {
+		if (!gru_base[bid])
+			break;
+		kgts = gru_base[bid]->bs_kgts;
+		if (!kgts || !kgts->ts_gru)
+			continue;
+		off = cb - kgts->ts_gru->gs_gru_base_vaddr;
+		if (off < GRU_SIZE)
+			break;
+		kgts = NULL;
+	}
+	BUG_ON(!kgts);
+	cbrnum = thread_cbr_number(kgts, get_cb_number(cb));
+	cbe = get_cbe(GRUBASE(cb), cbrnum);
+	gru_flush_cache(cbe);	/* CBE not coherent */
+	sync_core();
+	excdet->opc = cbe->opccpy;
+	excdet->exopc = cbe->exopccpy;
+	excdet->ecause = cbe->ecause;
+	excdet->exceptdet0 = cbe->idef1upd;
+	excdet->exceptdet1 = cbe->idef3upd;
+	gru_flush_cache(cbe);
+	return 0;
+}
+
+static char *gru_get_cb_exception_detail_str(int ret, void *cb,
+					     char *buf, int size)
+{
+	struct gru_control_block_status *gen = cb;
+	struct control_block_extended_exc_detail excdet;
+
+	if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
+		gru_get_cb_exception_detail(cb, &excdet);
+		snprintf(buf, size,
+			"GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
+			"excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),
+			gen, excdet.opc, excdet.exopc, excdet.ecause,
+			excdet.exceptdet0, excdet.exceptdet1);
+	} else {
+		snprintf(buf, size, "No exception");
+	}
+	return buf;
+}
+
+static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)
+{
+	while (gen->istatus >= CBS_ACTIVE) {
+		cpu_relax();
+		barrier();
+	}
+	return gen->istatus;
+}
+
+static int gru_retry_exception(void *cb)
+{
+	struct gru_control_block_status *gen = cb;
+	struct control_block_extended_exc_detail excdet;
+	int retry = EXCEPTION_RETRY_LIMIT;
+
+	while (1)  {
+		if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
+			return CBS_IDLE;
+		if (gru_get_cb_message_queue_substatus(cb))
+			return CBS_EXCEPTION;
+		gru_get_cb_exception_detail(cb, &excdet);
+		if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
+				(excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
+			break;
+		if (retry-- == 0)
+			break;
+		gen->icmd = 1;
+		gru_flush_cache(gen);
+	}
+	return CBS_EXCEPTION;
+}
+
+int gru_check_status_proc(void *cb)
+{
+	struct gru_control_block_status *gen = cb;
+	int ret;
+
+	ret = gen->istatus;
+	if (ret == CBS_EXCEPTION)
+		ret = gru_retry_exception(cb);
+	rmb();
+	return ret;
+
+}
+
+int gru_wait_proc(void *cb)
+{
+	struct gru_control_block_status *gen = cb;
+	int ret;
+
+	ret = gru_wait_idle_or_exception(gen);
+	if (ret == CBS_EXCEPTION)
+		ret = gru_retry_exception(cb);
+	rmb();
+	return ret;
+}
+
+static void gru_abort(int ret, void *cb, char *str)
+{
+	char buf[GRU_EXC_STR_SIZE];
+
+	panic("GRU FATAL ERROR: %s - %s\n", str,
+	      gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));
+}
+
+void gru_wait_abort_proc(void *cb)
+{
+	int ret;
+
+	ret = gru_wait_proc(cb);
+	if (ret)
+		gru_abort(ret, cb, "gru_wait_abort");
+}
+
+
+/*------------------------------ MESSAGE QUEUES -----------------------------*/
+
+/* Internal status . These are NOT returned to the user. */
+#define MQIE_AGAIN		-1	/* try again */
+
+
+/*
+ * Save/restore the "present" flag that is in the second line of 2-line
+ * messages
+ */
+static inline int get_present2(void *p)
+{
+	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+	return mhdr->present;
+}
+
+static inline void restore_present2(void *p, int val)
+{
+	struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;
+	mhdr->present = val;
+}
+
+/*
+ * Create a message queue.
+ * 	qlines - message queue size in cache lines. Includes 2-line header.
+ */
+int gru_create_message_queue(struct gru_message_queue_desc *mqd,
+		void *p, unsigned int bytes, int nasid, int vector, int apicid)
+{
+	struct message_queue *mq = p;
+	unsigned int qlines;
+
+	qlines = bytes / GRU_CACHE_LINE_BYTES - 2;
+	memset(mq, 0, bytes);
+	mq->start = &mq->data;
+	mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;
+	mq->next = &mq->data;
+	mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;
+	mq->qlines = qlines;
+	mq->hstatus[0] = 0;
+	mq->hstatus[1] = 1;
+	mq->head = gru_mesq_head(2, qlines / 2 + 1);
+	mqd->mq = mq;
+	mqd->mq_gpa = uv_gpa(mq);
+	mqd->qlines = qlines;
+	mqd->interrupt_pnode = nasid >> 1;
+	mqd->interrupt_vector = vector;
+	mqd->interrupt_apicid = apicid;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gru_create_message_queue);
+
+/*
+ * Send a NOOP message to a message queue
+ * 	Returns:
+ * 		 0 - if queue is full after the send. This is the normal case
+ * 		     but various races can change this.
+ *		-1 - if mesq sent successfully but queue not full
+ *		>0 - unexpected error. MQE_xxx returned
+ */
+static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg)
+{
+	const struct message_header noop_header = {
+					.present = MQS_NOOP, .lines = 1};
+	unsigned long m;
+	int substatus, ret;
+	struct message_header save_mhdr, *mhdr = mesg;
+
+	STAT(mesq_noop);
+	save_mhdr = *mhdr;
+	*mhdr = noop_header;
+	gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA);
+	ret = gru_wait(cb);
+
+	if (ret) {
+		substatus = gru_get_cb_message_queue_substatus(cb);
+		switch (substatus) {
+		case CBSS_NO_ERROR:
+			STAT(mesq_noop_unexpected_error);
+			ret = MQE_UNEXPECTED_CB_ERR;
+			break;
+		case CBSS_LB_OVERFLOWED:
+			STAT(mesq_noop_lb_overflow);
+			ret = MQE_CONGESTION;
+			break;
+		case CBSS_QLIMIT_REACHED:
+			STAT(mesq_noop_qlimit_reached);
+			ret = 0;
+			break;
+		case CBSS_AMO_NACKED:
+			STAT(mesq_noop_amo_nacked);
+			ret = MQE_CONGESTION;
+			break;
+		case CBSS_PUT_NACKED:
+			STAT(mesq_noop_put_nacked);
+			m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+			gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
+						IMA);
+			if (gru_wait(cb) == CBS_IDLE)
+				ret = MQIE_AGAIN;
+			else
+				ret = MQE_UNEXPECTED_CB_ERR;
+			break;
+		case CBSS_PAGE_OVERFLOW:
+			STAT(mesq_noop_page_overflow);
+			fallthrough;
+		default:
+			BUG();
+		}
+	}
+	*mhdr = save_mhdr;
+	return ret;
+}
+
+/*
+ * Handle a gru_mesq full.
+ */
+static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg, int lines)
+{
+	union gru_mesqhead mqh;
+	unsigned int limit, head;
+	unsigned long avalue;
+	int half, qlines;
+
+	/* Determine if switching to first/second half of q */
+	avalue = gru_get_amo_value(cb);
+	head = gru_get_amo_value_head(cb);
+	limit = gru_get_amo_value_limit(cb);
+
+	qlines = mqd->qlines;
+	half = (limit != qlines);
+
+	if (half)
+		mqh = gru_mesq_head(qlines / 2 + 1, qlines);
+	else
+		mqh = gru_mesq_head(2, qlines / 2 + 1);
+
+	/* Try to get lock for switching head pointer */
+	gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		goto cberr;
+	if (!gru_get_amo_value(cb)) {
+		STAT(mesq_qf_locked);
+		return MQE_QUEUE_FULL;
+	}
+
+	/* Got the lock. Send optional NOP if queue not full, */
+	if (head != limit) {
+		if (send_noop_message(cb, mqd, mesg)) {
+			gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half),
+					XTYPE_DW, IMA);
+			if (gru_wait(cb) != CBS_IDLE)
+				goto cberr;
+			STAT(mesq_qf_noop_not_full);
+			return MQIE_AGAIN;
+		}
+		avalue++;
+	}
+
+	/* Then flip queuehead to other half of queue. */
+	gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue,
+							IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		goto cberr;
+
+	/* If not successfully in swapping queue head, clear the hstatus lock */
+	if (gru_get_amo_value(cb) != avalue) {
+		STAT(mesq_qf_switch_head_failed);
+		gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW,
+							IMA);
+		if (gru_wait(cb) != CBS_IDLE)
+			goto cberr;
+	}
+	return MQIE_AGAIN;
+cberr:
+	STAT(mesq_qf_unexpected_error);
+	return MQE_UNEXPECTED_CB_ERR;
+}
+
+/*
+ * Handle a PUT failure. Note: if message was a 2-line message, one of the
+ * lines might have successfully have been written. Before sending the
+ * message, "present" must be cleared in BOTH lines to prevent the receiver
+ * from prematurely seeing the full message.
+ */
+static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
+			void *mesg, int lines)
+{
+	unsigned long m;
+	int ret, loops = 200;	/* experimentally determined */
+
+	m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+	if (lines == 2) {
+		gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
+		if (gru_wait(cb) != CBS_IDLE)
+			return MQE_UNEXPECTED_CB_ERR;
+	}
+	gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE)
+		return MQE_UNEXPECTED_CB_ERR;
+
+	if (!mqd->interrupt_vector)
+		return MQE_OK;
+
+	/*
+	 * Send a noop message in order to deliver a cross-partition interrupt
+	 * to the SSI that contains the target message queue. Normally, the
+	 * interrupt is automatically delivered by hardware following mesq
+	 * operations, but some error conditions require explicit delivery.
+	 * The noop message will trigger delivery. Otherwise partition failures
+	 * could cause unrecovered errors.
+	 */
+	do {
+		ret = send_noop_message(cb, mqd, mesg);
+	} while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0));
+
+	if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) {
+		/*
+		 * Don't indicate to the app to resend the message, as it's
+		 * already been successfully sent.  We simply send an OK
+		 * (rather than fail the send with MQE_UNEXPECTED_CB_ERR),
+		 * assuming that the other side is receiving enough
+		 * interrupts to get this message processed anyway.
+		 */
+		ret = MQE_OK;
+	}
+	return ret;
+}
+
+/*
+ * Handle a gru_mesq failure. Some of these failures are software recoverable
+ * or retryable.
+ */
+static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg, int lines)
+{
+	int substatus, ret = 0;
+
+	substatus = gru_get_cb_message_queue_substatus(cb);
+	switch (substatus) {
+	case CBSS_NO_ERROR:
+		STAT(mesq_send_unexpected_error);
+		ret = MQE_UNEXPECTED_CB_ERR;
+		break;
+	case CBSS_LB_OVERFLOWED:
+		STAT(mesq_send_lb_overflow);
+		ret = MQE_CONGESTION;
+		break;
+	case CBSS_QLIMIT_REACHED:
+		STAT(mesq_send_qlimit_reached);
+		ret = send_message_queue_full(cb, mqd, mesg, lines);
+		break;
+	case CBSS_AMO_NACKED:
+		STAT(mesq_send_amo_nacked);
+		ret = MQE_CONGESTION;
+		break;
+	case CBSS_PUT_NACKED:
+		STAT(mesq_send_put_nacked);
+		ret = send_message_put_nacked(cb, mqd, mesg, lines);
+		break;
+	case CBSS_PAGE_OVERFLOW:
+		STAT(mesq_page_overflow);
+		fallthrough;
+	default:
+		BUG();
+	}
+	return ret;
+}
+
+/*
+ * Send a message to a message queue
+ * 	mqd	message queue descriptor
+ * 	mesg	message. ust be vaddr within a GSEG
+ * 	bytes	message size (<= 2 CL)
+ */
+int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg,
+				unsigned int bytes)
+{
+	struct message_header *mhdr;
+	void *cb;
+	void *dsr;
+	int istatus, clines, ret;
+
+	STAT(mesq_send);
+	BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);
+
+	clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES);
+	if (gru_get_cpu_resources(bytes, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	memcpy(dsr, mesg, bytes);
+	mhdr = dsr;
+	mhdr->present = MQS_FULL;
+	mhdr->lines = clines;
+	if (clines == 2) {
+		mhdr->present2 = get_present2(mhdr);
+		restore_present2(mhdr, MQS_FULL);
+	}
+
+	do {
+		ret = MQE_OK;
+		gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA);
+		istatus = gru_wait(cb);
+		if (istatus != CBS_IDLE)
+			ret = send_message_failure(cb, mqd, dsr, clines);
+	} while (ret == MQIE_AGAIN);
+	gru_free_cpu_resources(cb, dsr);
+
+	if (ret)
+		STAT(mesq_send_failed);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_send_message_gpa);
+
+/*
+ * Advance the receive pointer for the queue to the next message.
+ */
+void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg)
+{
+	struct message_queue *mq = mqd->mq;
+	struct message_header *mhdr = mq->next;
+	void *next, *pnext;
+	int half = -1;
+	int lines = mhdr->lines;
+
+	if (lines == 2)
+		restore_present2(mhdr, MQS_EMPTY);
+	mhdr->present = MQS_EMPTY;
+
+	pnext = mq->next;
+	next = pnext + GRU_CACHE_LINE_BYTES * lines;
+	if (next == mq->limit) {
+		next = mq->start;
+		half = 1;
+	} else if (pnext < mq->start2 && next >= mq->start2) {
+		half = 0;
+	}
+
+	if (half >= 0)
+		mq->hstatus[half] = 1;
+	mq->next = next;
+}
+EXPORT_SYMBOL_GPL(gru_free_message);
+
+/*
+ * Get next message from message queue. Return NULL if no message
+ * present. User must call next_message() to move to next message.
+ * 	rmq	message queue
+ */
+void *gru_get_next_message(struct gru_message_queue_desc *mqd)
+{
+	struct message_queue *mq = mqd->mq;
+	struct message_header *mhdr = mq->next;
+	int present = mhdr->present;
+
+	/* skip NOOP messages */
+	while (present == MQS_NOOP) {
+		gru_free_message(mqd, mhdr);
+		mhdr = mq->next;
+		present = mhdr->present;
+	}
+
+	/* Wait for both halves of 2 line messages */
+	if (present == MQS_FULL && mhdr->lines == 2 &&
+				get_present2(mhdr) == MQS_EMPTY)
+		present = MQS_EMPTY;
+
+	if (!present) {
+		STAT(mesq_receive_none);
+		return NULL;
+	}
+
+	if (mhdr->lines == 2)
+		restore_present2(mhdr, mhdr->present2);
+
+	STAT(mesq_receive);
+	return mhdr;
+}
+EXPORT_SYMBOL_GPL(gru_get_next_message);
+
+/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
+
+/*
+ * Load a DW from a global GPA. The GPA can be a memory or MMR address.
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa)
+{
+	void *cb;
+	void *dsr;
+	int ret, iaa;
+
+	STAT(read_gpa);
+	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	iaa = gpa >> 62;
+	gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA);
+	ret = gru_wait(cb);
+	if (ret == CBS_IDLE)
+		*value = *(unsigned long *)dsr;
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_read_gpa);
+
+
+/*
+ * Copy a block of data using the GRU resources
+ */
+int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+				unsigned int bytes)
+{
+	void *cb;
+	void *dsr;
+	int ret;
+
+	STAT(copy_gpa);
+	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
+		  XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA);
+	ret = gru_wait(cb);
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gru_copy_gpa);
+
+/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
+/* 	Temp - will delete after we gain confidence in the GRU		*/
+
+static int quicktest0(unsigned long arg)
+{
+	unsigned long word0;
+	unsigned long word1;
+	void *cb;
+	void *dsr;
+	unsigned long *p;
+	int ret = -EIO;
+
+	if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
+		return MQE_BUG_NO_RESOURCES;
+	p = dsr;
+	word0 = MAGIC;
+	word1 = 0;
+
+	gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE) {
+		printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());
+		goto done;
+	}
+
+	if (*p != MAGIC) {
+		printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);
+		goto done;
+	}
+	gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+	if (gru_wait(cb) != CBS_IDLE) {
+		printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());
+		goto done;
+	}
+
+	if (word0 != word1 || word1 != MAGIC) {
+		printk(KERN_DEBUG
+		       "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",
+		     smp_processor_id(), word1, MAGIC);
+		goto done;
+	}
+	ret = 0;
+
+done:
+	gru_free_cpu_resources(cb, dsr);
+	return ret;
+}
+
+#define ALIGNUP(p, q)	((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
+
+static int quicktest1(unsigned long arg)
+{
+	struct gru_message_queue_desc mqd;
+	void *p, *mq;
+	int i, ret = -EIO;
+	char mes[GRU_CACHE_LINE_BYTES], *m;
+
+	/* Need  1K cacheline aligned that does not cross page boundary */
+	p = kmalloc(4096, 0);
+	if (p == NULL)
+		return -ENOMEM;
+	mq = ALIGNUP(p, 1024);
+	memset(mes, 0xee, sizeof(mes));
+
+	gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
+	for (i = 0; i < 6; i++) {
+		mes[8] = i;
+		do {
+			ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
+		} while (ret == MQE_CONGESTION);
+		if (ret)
+			break;
+	}
+	if (ret != MQE_QUEUE_FULL || i != 4) {
+		printk(KERN_DEBUG "GRU:%d quicktest1: unexpected status %d, i %d\n",
+		       smp_processor_id(), ret, i);
+		goto done;
+	}
+
+	for (i = 0; i < 6; i++) {
+		m = gru_get_next_message(&mqd);
+		if (!m || m[8] != i)
+			break;
+		gru_free_message(&mqd, m);
+	}
+	if (i != 4) {
+		printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",
+			smp_processor_id(), i, m, m ? m[8] : -1);
+		goto done;
+	}
+	ret = 0;
+
+done:
+	kfree(p);
+	return ret;
+}
+
+static int quicktest2(unsigned long arg)
+{
+	static DECLARE_COMPLETION(cmp);
+	unsigned long han;
+	int blade_id = 0;
+	int numcb = 4;
+	int ret = 0;
+	unsigned long *buf;
+	void *cb0, *cb;
+	struct gru_control_block_status *gen;
+	int i, k, istatus, bytes;
+
+	bytes = numcb * 4 * 8;
+	buf = kmalloc(bytes, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = -EBUSY;
+	han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
+	if (!han)
+		goto done;
+
+	gru_lock_async_resource(han, &cb0, NULL);
+	memset(buf, 0xee, bytes);
+	for (i = 0; i < numcb; i++)
+		gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
+				XTYPE_DW, 4, 1, IMA_INTERRUPT);
+
+	ret = 0;
+	k = numcb;
+	do {
+		gru_wait_async_cbr(han);
+		for (i = 0; i < numcb; i++) {
+			cb = cb0 + i * GRU_HANDLE_STRIDE;
+			istatus = gru_check_status(cb);
+			if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)
+				break;
+		}
+		if (i == numcb)
+			continue;
+		if (istatus != CBS_IDLE) {
+			printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);
+			ret = -EFAULT;
+		} else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] ||
+				buf[4 * i + 3]) {
+			printk(KERN_DEBUG "GRU:%d quicktest2:cb %d,  buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
+			       smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]);
+			ret = -EIO;
+		}
+		k--;
+		gen = cb;
+		gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */
+	} while (k);
+	BUG_ON(cmp.done);
+
+	gru_unlock_async_resource(han);
+	gru_release_async_resources(han);
+done:
+	kfree(buf);
+	return ret;
+}
+
+#define BUFSIZE 200
+static int quicktest3(unsigned long arg)
+{
+	char buf1[BUFSIZE], buf2[BUFSIZE];
+	int ret = 0;
+
+	memset(buf2, 0, sizeof(buf2));
+	memset(buf1, get_cycles() & 255, sizeof(buf1));
+	gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);
+	if (memcmp(buf1, buf2, BUFSIZE)) {
+		printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
+		ret = -EIO;
+	}
+	return ret;
+}
+
+/*
+ * Debugging only. User hook for various kernel tests
+ * of driver & gru.
+ */
+int gru_ktest(unsigned long arg)
+{
+	int ret = -EINVAL;
+
+	switch (arg & 0xff) {
+	case 0:
+		ret = quicktest0(arg);
+		break;
+	case 1:
+		ret = quicktest1(arg);
+		break;
+	case 2:
+		ret = quicktest2(arg);
+		break;
+	case 3:
+		ret = quicktest3(arg);
+		break;
+	case 99:
+		ret = gru_free_kernel_contexts();
+		break;
+	}
+	return ret;
+
+}
+
+int gru_kservices_init(void)
+{
+	return 0;
+}
+
+void gru_kservices_exit(void)
+{
+	if (gru_free_kernel_contexts())
+		BUG();
+}
+
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
new file mode 100644
index 0000000000..510e45e973
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+#ifndef __GRU_KSERVICES_H_
+#define __GRU_KSERVICES_H_
+
+
+/*
+ * Message queues using the GRU to send/receive messages.
+ *
+ * These function allow the user to create a message queue for
+ * sending/receiving 1 or 2 cacheline messages using the GRU.
+ *
+ * Processes SENDING messages will use a kernel CBR/DSR to send
+ * the message. This is transparent to the caller.
+ *
+ * The receiver does not use any GRU resources.
+ *
+ * The functions support:
+ * 	- single receiver
+ * 	- multiple senders
+ *	- cross partition message
+ *
+ * Missing features ZZZ:
+ * 	- user options for dealing with timeouts, queue full, etc.
+ * 	- gru_create_message_queue() needs interrupt vector info
+ */
+
+struct gru_message_queue_desc {
+	void		*mq;			/* message queue vaddress */
+	unsigned long	mq_gpa;			/* global address of mq */
+	int		qlines;			/* queue size in CL */
+	int		interrupt_vector;	/* interrupt vector */
+	int		interrupt_pnode;	/* pnode for interrupt */
+	int		interrupt_apicid;	/* lapicid for interrupt */
+};
+
+/*
+ * Initialize a user allocated chunk of memory to be used as
+ * a message queue. The caller must ensure that the queue is
+ * in contiguous physical memory and is cacheline aligned.
+ *
+ * Message queue size is the total number of bytes allocated
+ * to the queue including a 2 cacheline header that is used
+ * to manage the queue.
+ *
+ *  Input:
+ * 	mqd	pointer to message queue descriptor
+ * 	p	pointer to user allocated mesq memory.
+ * 	bytes	size of message queue in bytes
+ *      vector	interrupt vector (zero if no interrupts)
+ *      nasid	nasid of blade where interrupt is delivered
+ *      apicid	apicid of cpu for interrupt
+ *
+ *  Errors:
+ *  	0	OK
+ *  	>0	error
+ */
+extern int gru_create_message_queue(struct gru_message_queue_desc *mqd,
+		void *p, unsigned int bytes, int nasid, int vector, int apicid);
+
+/*
+ * Send a message to a message queue.
+ *
+ * Note: The message queue transport mechanism uses the first 32
+ * bits of the message. Users should avoid using these bits.
+ *
+ *
+ *   Input:
+ * 	mqd	pointer to message queue descriptor
+ * 	mesg	pointer to message. Must be 64-bit aligned
+ * 	bytes	size of message in bytes
+ *
+ *   Output:
+ *      0	message sent
+ *     >0	Send failure - see error codes below
+ *
+ */
+extern int gru_send_message_gpa(struct gru_message_queue_desc *mqd,
+			void *mesg, unsigned int bytes);
+
+/* Status values for gru_send_message() */
+#define MQE_OK			0	/* message sent successfully */
+#define MQE_CONGESTION		1	/* temporary congestion, try again */
+#define MQE_QUEUE_FULL		2	/* queue is full */
+#define MQE_UNEXPECTED_CB_ERR	3	/* unexpected CB error */
+#define MQE_PAGE_OVERFLOW	10	/* BUG - queue overflowed a page */
+#define MQE_BUG_NO_RESOURCES	11	/* BUG - could not alloc GRU cb/dsr */
+
+/*
+ * Advance the receive pointer for the message queue to the next message.
+ * Note: current API requires messages to be gotten & freed in order. Future
+ * API extensions may allow for out-of-order freeing.
+ *
+ *   Input
+ * 	mqd	pointer to message queue descriptor
+ * 	mesq	message being freed
+ */
+extern void gru_free_message(struct gru_message_queue_desc *mqd,
+			     void *mesq);
+
+/*
+ * Get next message from message queue. Returns pointer to
+ * message OR NULL if no message present.
+ * User must call gru_free_message() after message is processed
+ * in order to move the queue pointers to next message.
+ *
+ *   Input
+ * 	mqd	pointer to message queue descriptor
+ *
+ *   Output:
+ *	p	pointer to message
+ *	NULL	no message available
+ */
+extern void *gru_get_next_message(struct gru_message_queue_desc *mqd);
+
+
+/*
+ * Read a GRU global GPA. Source can be located in a remote partition.
+ *
+ *    Input:
+ *    	value		memory address where MMR value is returned
+ *    	gpa		source numalink physical address of GPA
+ *
+ *    Output:
+ *	0		OK
+ *	>0		error
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa);
+
+
+/*
+ * Copy data using the GRU. Source or destination can be located in a remote
+ * partition.
+ *
+ *    Input:
+ *    	dest_gpa	destination global physical address
+ *    	src_gpa		source global physical address
+ *    	bytes		number of bytes to copy
+ *
+ *    Output:
+ *	0		OK
+ *	>0		error
+ */
+extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
+							unsigned int bytes);
+
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *
+ * 	input:
+ * 		blade_id  - blade on which resources should be reserved
+ * 		cbrs	  - number of CBRs
+ * 		dsr_bytes - number of DSR bytes needed
+ * 		cmp	  - completion structure for waiting for
+ * 			    async completions
+ *	output:
+ *		handle to identify resource
+ *		(0 = no resources)
+ */
+extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+				struct completion *cmp);
+
+/*
+ * Release async resources previously reserved.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_release_async_resources(unsigned long han);
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_wait_async_cbr(unsigned long han);
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ *	output:
+ *		cb  - pointer to first CBR
+ *		dsr - pointer to first DSR
+ */
+extern void gru_lock_async_resource(unsigned long han,  void **cb, void **dsr);
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ *	input:
+ *		han - handle to identify resources
+ */
+extern void gru_unlock_async_resource(unsigned long han);
+
+#endif 		/* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
new file mode 100644
index 0000000000..85c1039236
--- /dev/null
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -0,0 +1,153 @@
+/*
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation; either version 2.1 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __GRULIB_H__
+#define __GRULIB_H__
+
+#define GRU_BASENAME		"gru"
+#define GRU_FULLNAME		"/dev/gru"
+#define GRU_IOCTL_NUM 		 'G'
+
+/*
+ * Maximum number of GRU segments that a user can have open
+ * ZZZ temp - set high for testing. Revisit.
+ */
+#define GRU_MAX_OPEN_CONTEXTS		32
+
+/* Set Number of Request Blocks */
+#define GRU_CREATE_CONTEXT		_IOWR(GRU_IOCTL_NUM, 1, void *)
+
+/*  Set Context Options */
+#define GRU_SET_CONTEXT_OPTION		_IOWR(GRU_IOCTL_NUM, 4, void *)
+
+/* Fetch exception detail */
+#define GRU_USER_GET_EXCEPTION_DETAIL	_IOWR(GRU_IOCTL_NUM, 6, void *)
+
+/* For user call_os handling - normally a TLB fault */
+#define GRU_USER_CALL_OS		_IOWR(GRU_IOCTL_NUM, 8, void *)
+
+/* For user unload context */
+#define GRU_USER_UNLOAD_CONTEXT		_IOWR(GRU_IOCTL_NUM, 9, void *)
+
+/* For dumpping GRU chiplet state */
+#define GRU_DUMP_CHIPLET_STATE		_IOWR(GRU_IOCTL_NUM, 11, void *)
+
+/* For getting gseg statistics */
+#define GRU_GET_GSEG_STATISTICS		_IOWR(GRU_IOCTL_NUM, 12, void *)
+
+/* For user TLB flushing (primarily for tests) */
+#define GRU_USER_FLUSH_TLB		_IOWR(GRU_IOCTL_NUM, 50, void *)
+
+/* Get some config options (primarily for tests & emulator) */
+#define GRU_GET_CONFIG_INFO		_IOWR(GRU_IOCTL_NUM, 51, void *)
+
+/* Various kernel self-tests */
+#define GRU_KTEST			_IOWR(GRU_IOCTL_NUM, 52, void *)
+
+#define CONTEXT_WINDOW_BYTES(th)        (GRU_GSEG_PAGESIZE * (th))
+#define THREAD_POINTER(p, th)		(p + GRU_GSEG_PAGESIZE * (th))
+#define GSEG_START(cb)			((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1)))
+
+struct gru_get_gseg_statistics_req {
+	unsigned long			gseg;
+	struct gru_gseg_statistics	stats;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_create_context_req {
+	unsigned long		gseg;
+	unsigned int		data_segment_bytes;
+	unsigned int		control_blocks;
+	unsigned int		maximum_thread_count;
+	unsigned int		options;
+	unsigned char		tlb_preload_count;
+};
+
+/*
+ * Structure used to pass unload context parameters to the driver
+ */
+struct gru_unload_context_req {
+	unsigned long	gseg;
+};
+
+/*
+ * Structure used to set context options
+ */
+enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet};
+struct gru_set_context_option_req {
+	unsigned long	gseg;
+	int		op;
+	int		val0;
+	long		val1;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+struct gru_flush_tlb_req {
+	unsigned long	gseg;
+	unsigned long	vaddr;
+	size_t		len;
+};
+
+/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+enum {dcs_pid, dcs_gid};
+struct gru_dump_chiplet_state_req {
+	unsigned int	op;
+	unsigned int	gid;
+	int		ctxnum;
+	char		data_opt;
+	char		lock_cch;
+	char		flush_cbrs;
+	char		fill[10];
+	pid_t		pid;
+	void		*buf;
+	size_t		buflen;
+	/* ---- output --- */
+	unsigned int	num_contexts;
+};
+
+#define GRU_DUMP_MAGIC	0x3474ab6c
+struct gru_dump_context_header {
+	unsigned int	magic;
+	unsigned int	gid;
+	unsigned char	ctxnum;
+	unsigned char	cbrcnt;
+	unsigned char	dsrcnt;
+	pid_t		pid;
+	unsigned long	vaddr;
+	int		cch_locked;
+	unsigned long	data[];
+};
+
+/*
+ * GRU configuration info (temp - for testing)
+ */
+struct gru_config_info {
+	int		cpus;
+	int		blades;
+	int		nodes;
+	int		chiplets;
+	int		fill[16];
+};
+
+#endif /* __GRULIB_H__ */
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
new file mode 100644
index 0000000000..4eb4b94551
--- /dev/null
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -0,0 +1,977 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SN Platform GRU Driver
+ *
+ *            DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <linux/prefetch.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+
+unsigned long gru_options __read_mostly;
+
+static struct device_driver gru_driver = {
+	.name = "gru"
+};
+
+static struct device gru_device = {
+	.init_name = "",
+	.driver = &gru_driver,
+};
+
+struct device *grudev = &gru_device;
+
+/*
+ * Select a gru fault map to be used by the current cpu. Note that
+ * multiple cpus may be using the same map.
+ *	ZZZ should be inline but did not work on emulator
+ */
+int gru_cpu_fault_map_id(void)
+{
+#ifdef CONFIG_IA64
+	return uv_blade_processor_id() % GRU_NUM_TFM;
+#else
+	int cpu = smp_processor_id();
+	int id, core;
+
+	core = uv_cpu_core_number(cpu);
+	id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
+	return id;
+#endif
+}
+
+/*--------- ASID Management -------------------------------------------
+ *
+ *  Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
+ *  Once MAX is reached, flush the TLB & start over. However,
+ *  some asids may still be in use. There won't be many (percentage wise) still
+ *  in use. Search active contexts & determine the value of the first
+ *  asid in use ("x"s below). Set "limit" to this value.
+ *  This defines a block of assignable asids.
+ *
+ *  When "limit" is reached, search forward from limit+1 and determine the
+ *  next block of assignable asids.
+ *
+ *  Repeat until MAX_ASID is reached, then start over again.
+ *
+ *  Each time MAX_ASID is reached, increment the asid generation. Since
+ *  the search for in-use asids only checks contexts with GRUs currently
+ *  assigned, asids in some contexts will be missed. Prior to loading
+ *  a context, the asid generation of the GTS asid is rechecked. If it
+ *  doesn't match the current generation, a new asid will be assigned.
+ *
+ *   	0---------------x------------x---------------------x----|
+ *	  ^-next	^-limit	   				^-MAX_ASID
+ *
+ * All asid manipulation & context loading/unloading is protected by the
+ * gs_lock.
+ */
+
+/* Hit the asid limit. Start over */
+static int gru_wrap_asid(struct gru_state *gru)
+{
+	gru_dbg(grudev, "gid %d\n", gru->gs_gid);
+	STAT(asid_wrap);
+	gru->gs_asid_gen++;
+	return MIN_ASID;
+}
+
+/* Find the next chunk of unused asids */
+static int gru_reset_asid_limit(struct gru_state *gru, int asid)
+{
+	int i, gid, inuse_asid, limit;
+
+	gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
+	STAT(asid_next);
+	limit = MAX_ASID;
+	if (asid >= limit)
+		asid = gru_wrap_asid(gru);
+	gru_flush_all_tlb(gru);
+	gid = gru->gs_gid;
+again:
+	for (i = 0; i < GRU_NUM_CCH; i++) {
+		if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
+			continue;
+		inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
+		gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
+			gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms,
+			inuse_asid, i);
+		if (inuse_asid == asid) {
+			asid += ASID_INC;
+			if (asid >= limit) {
+				/*
+				 * empty range: reset the range limit and
+				 * start over
+				 */
+				limit = MAX_ASID;
+				if (asid >= MAX_ASID)
+					asid = gru_wrap_asid(gru);
+				goto again;
+			}
+		}
+
+		if ((inuse_asid > asid) && (inuse_asid < limit))
+			limit = inuse_asid;
+	}
+	gru->gs_asid_limit = limit;
+	gru->gs_asid = asid;
+	gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid,
+					asid, limit);
+	return asid;
+}
+
+/* Assign a new ASID to a thread context.  */
+static int gru_assign_asid(struct gru_state *gru)
+{
+	int asid;
+
+	gru->gs_asid += ASID_INC;
+	asid = gru->gs_asid;
+	if (asid >= gru->gs_asid_limit)
+		asid = gru_reset_asid_limit(gru, asid);
+
+	gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
+	return asid;
+}
+
+/*
+ * Clear n bits in a word. Return a word indicating the bits that were cleared.
+ * Optionally, build an array of chars that contain the bit numbers allocated.
+ */
+static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
+				       signed char *idx)
+{
+	unsigned long bits = 0;
+	int i;
+
+	while (n--) {
+		i = find_first_bit(p, mmax);
+		if (i == mmax)
+			BUG();
+		__clear_bit(i, p);
+		__set_bit(i, &bits);
+		if (idx)
+			*idx++ = i;
+	}
+	return bits;
+}
+
+unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
+				       signed char *cbmap)
+{
+	return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
+				 cbmap);
+}
+
+unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
+				       signed char *dsmap)
+{
+	return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
+				 dsmap);
+}
+
+static void reserve_gru_resources(struct gru_state *gru,
+				  struct gru_thread_state *gts)
+{
+	gru->gs_active_contexts++;
+	gts->ts_cbr_map =
+	    gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
+				     gts->ts_cbr_idx);
+	gts->ts_dsr_map =
+	    gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
+}
+
+static void free_gru_resources(struct gru_state *gru,
+			       struct gru_thread_state *gts)
+{
+	gru->gs_active_contexts--;
+	gru->gs_cbr_map |= gts->ts_cbr_map;
+	gru->gs_dsr_map |= gts->ts_dsr_map;
+}
+
+/*
+ * Check if a GRU has sufficient free resources to satisfy an allocation
+ * request. Note: GRU locks may or may not be held when this is called. If
+ * not held, recheck after acquiring the appropriate locks.
+ *
+ * Returns 1 if sufficient resources, 0 if not
+ */
+static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
+			       int dsr_au_count, int max_active_contexts)
+{
+	return hweight64(gru->gs_cbr_map) >= cbr_au_count
+		&& hweight64(gru->gs_dsr_map) >= dsr_au_count
+		&& gru->gs_active_contexts < max_active_contexts;
+}
+
+/*
+ * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
+ * context.
+ */
+static int gru_load_mm_tracker(struct gru_state *gru,
+					struct gru_thread_state *gts)
+{
+	struct gru_mm_struct *gms = gts->ts_gms;
+	struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
+	unsigned short ctxbitmap = (1 << gts->ts_ctxnum);
+	int asid;
+
+	spin_lock(&gms->ms_asid_lock);
+	asid = asids->mt_asid;
+
+	spin_lock(&gru->gs_asid_lock);
+	if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen !=
+			  gru->gs_asid_gen)) {
+		asid = gru_assign_asid(gru);
+		asids->mt_asid = asid;
+		asids->mt_asid_gen = gru->gs_asid_gen;
+		STAT(asid_new);
+	} else {
+		STAT(asid_reuse);
+	}
+	spin_unlock(&gru->gs_asid_lock);
+
+	BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
+	asids->mt_ctxbitmap |= ctxbitmap;
+	if (!test_bit(gru->gs_gid, gms->ms_asidmap))
+		__set_bit(gru->gs_gid, gms->ms_asidmap);
+	spin_unlock(&gms->ms_asid_lock);
+
+	gru_dbg(grudev,
+		"gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n",
+		gru->gs_gid, gts, gms, gts->ts_ctxnum, asid,
+		gms->ms_asidmap[0]);
+	return asid;
+}
+
+static void gru_unload_mm_tracker(struct gru_state *gru,
+					struct gru_thread_state *gts)
+{
+	struct gru_mm_struct *gms = gts->ts_gms;
+	struct gru_mm_tracker *asids;
+	unsigned short ctxbitmap;
+
+	asids = &gms->ms_asids[gru->gs_gid];
+	ctxbitmap = (1 << gts->ts_ctxnum);
+	spin_lock(&gms->ms_asid_lock);
+	spin_lock(&gru->gs_asid_lock);
+	BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
+	asids->mt_ctxbitmap ^= ctxbitmap;
+	gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum %d, asidmap 0x%lx\n",
+		gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
+	spin_unlock(&gru->gs_asid_lock);
+	spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Decrement the reference count on a GTS structure. Free the structure
+ * if the reference count goes to zero.
+ */
+void gts_drop(struct gru_thread_state *gts)
+{
+	if (gts && refcount_dec_and_test(&gts->ts_refcnt)) {
+		if (gts->ts_gms)
+			gru_drop_mmu_notifier(gts->ts_gms);
+		kfree(gts);
+		STAT(gts_free);
+	}
+}
+
+/*
+ * Locate the GTS structure for the current thread.
+ */
+static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
+			    *vdata, int tsid)
+{
+	struct gru_thread_state *gts;
+
+	list_for_each_entry(gts, &vdata->vd_head, ts_next)
+	    if (gts->ts_tsid == tsid)
+		return gts;
+	return NULL;
+}
+
+/*
+ * Allocate a thread state structure.
+ */
+struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+		int cbr_au_count, int dsr_au_count,
+		unsigned char tlb_preload_count, int options, int tsid)
+{
+	struct gru_thread_state *gts;
+	struct gru_mm_struct *gms;
+	int bytes;
+
+	bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
+	bytes += sizeof(struct gru_thread_state);
+	gts = kmalloc(bytes, GFP_KERNEL);
+	if (!gts)
+		return ERR_PTR(-ENOMEM);
+
+	STAT(gts_alloc);
+	memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
+	refcount_set(&gts->ts_refcnt, 1);
+	mutex_init(&gts->ts_ctxlock);
+	gts->ts_cbr_au_count = cbr_au_count;
+	gts->ts_dsr_au_count = dsr_au_count;
+	gts->ts_tlb_preload_count = tlb_preload_count;
+	gts->ts_user_options = options;
+	gts->ts_user_blade_id = -1;
+	gts->ts_user_chiplet_id = -1;
+	gts->ts_tsid = tsid;
+	gts->ts_ctxnum = NULLCTX;
+	gts->ts_tlb_int_select = -1;
+	gts->ts_cch_req_slice = -1;
+	gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
+	if (vma) {
+		gts->ts_mm = current->mm;
+		gts->ts_vma = vma;
+		gms = gru_register_mmu_notifier();
+		if (IS_ERR(gms))
+			goto err;
+		gts->ts_gms = gms;
+	}
+
+	gru_dbg(grudev, "alloc gts %p\n", gts);
+	return gts;
+
+err:
+	gts_drop(gts);
+	return ERR_CAST(gms);
+}
+
+/*
+ * Allocate a vma private data structure.
+ */
+struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
+{
+	struct gru_vma_data *vdata = NULL;
+
+	vdata = kmalloc(sizeof(*vdata), GFP_KERNEL);
+	if (!vdata)
+		return NULL;
+
+	STAT(vdata_alloc);
+	INIT_LIST_HEAD(&vdata->vd_head);
+	spin_lock_init(&vdata->vd_lock);
+	gru_dbg(grudev, "alloc vdata %p\n", vdata);
+	return vdata;
+}
+
+/*
+ * Find the thread state structure for the current thread.
+ */
+struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma,
+					int tsid)
+{
+	struct gru_vma_data *vdata = vma->vm_private_data;
+	struct gru_thread_state *gts;
+
+	spin_lock(&vdata->vd_lock);
+	gts = gru_find_current_gts_nolock(vdata, tsid);
+	spin_unlock(&vdata->vd_lock);
+	gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+	return gts;
+}
+
+/*
+ * Allocate a new thread state for a GSEG. Note that races may allow
+ * another thread to race to create a gts.
+ */
+struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
+					int tsid)
+{
+	struct gru_vma_data *vdata = vma->vm_private_data;
+	struct gru_thread_state *gts, *ngts;
+
+	gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count,
+			    vdata->vd_dsr_au_count,
+			    vdata->vd_tlb_preload_count,
+			    vdata->vd_user_options, tsid);
+	if (IS_ERR(gts))
+		return gts;
+
+	spin_lock(&vdata->vd_lock);
+	ngts = gru_find_current_gts_nolock(vdata, tsid);
+	if (ngts) {
+		gts_drop(gts);
+		gts = ngts;
+		STAT(gts_double_allocate);
+	} else {
+		list_add(&gts->ts_next, &vdata->vd_head);
+	}
+	spin_unlock(&vdata->vd_lock);
+	gru_dbg(grudev, "vma %p, gts %p\n", vma, gts);
+	return gts;
+}
+
+/*
+ * Free the GRU context assigned to the thread state.
+ */
+static void gru_free_gru_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru;
+
+	gru = gts->ts_gru;
+	gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid);
+
+	spin_lock(&gru->gs_lock);
+	gru->gs_gts[gts->ts_ctxnum] = NULL;
+	free_gru_resources(gru, gts);
+	BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0);
+	__clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
+	gts->ts_ctxnum = NULLCTX;
+	gts->ts_gru = NULL;
+	gts->ts_blade = -1;
+	spin_unlock(&gru->gs_lock);
+
+	gts_drop(gts);
+	STAT(free_context);
+}
+
+/*
+ * Prefetching cachelines help hardware performance.
+ * (Strictly a performance enhancement. Not functionally required).
+ */
+static void prefetch_data(void *p, int num, int stride)
+{
+	while (num-- > 0) {
+		prefetchw(p);
+		p += stride;
+	}
+}
+
+static inline long gru_copy_handle(void *d, void *s)
+{
+	memcpy(d, s, GRU_HANDLE_BYTES);
+	return GRU_HANDLE_BYTES;
+}
+
+static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
+				unsigned long cbrmap, unsigned long length)
+{
+	int i, scr;
+
+	prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
+		      GRU_CACHE_LINE_BYTES);
+
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
+		prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
+			      GRU_CACHE_LINE_BYTES);
+		cb += GRU_HANDLE_STRIDE;
+	}
+}
+
+static void gru_load_context_data(void *save, void *grubase, int ctxnum,
+				  unsigned long cbrmap, unsigned long dsrmap,
+				  int data_valid)
+{
+	void *gseg, *cb, *cbe;
+	unsigned long length;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+	gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		if (data_valid) {
+			save += gru_copy_handle(cb, save);
+			save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
+						save);
+		} else {
+			memset(cb, 0, GRU_CACHE_LINE_BYTES);
+			memset(cbe + i * GRU_HANDLE_STRIDE, 0,
+						GRU_CACHE_LINE_BYTES);
+		}
+		/* Flush CBE to hide race in context restart */
+		mb();
+		gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
+		cb += GRU_HANDLE_STRIDE;
+	}
+
+	if (data_valid)
+		memcpy(gseg + GRU_DS_BASE, save, length);
+	else
+		memset(gseg + GRU_DS_BASE, 0, length);
+}
+
+static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
+				    unsigned long cbrmap, unsigned long dsrmap)
+{
+	void *gseg, *cb, *cbe;
+	unsigned long length;
+	int i, scr;
+
+	gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+	cb = gseg + GRU_CB_BASE;
+	cbe = grubase + GRU_CBE_BASE;
+	length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+
+	/* CBEs may not be coherent. Flush them from cache */
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr)
+		gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
+	mb();		/* Let the CL flush complete */
+
+	gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
+	for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
+		save += gru_copy_handle(save, cb);
+		save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
+		cb += GRU_HANDLE_STRIDE;
+	}
+	memcpy(save, gseg + GRU_DS_BASE, length);
+}
+
+void gru_unload_context(struct gru_thread_state *gts, int savestate)
+{
+	struct gru_state *gru = gts->ts_gru;
+	struct gru_context_configuration_handle *cch;
+	int ctxnum = gts->ts_ctxnum;
+
+	if (!is_kernel_context(gts))
+		zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+	gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n",
+		gts, gts->ts_cbr_map, gts->ts_dsr_map);
+	lock_cch_handle(cch);
+	if (cch_interrupt_sync(cch))
+		BUG();
+
+	if (!is_kernel_context(gts))
+		gru_unload_mm_tracker(gru, gts);
+	if (savestate) {
+		gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
+					ctxnum, gts->ts_cbr_map,
+					gts->ts_dsr_map);
+		gts->ts_data_valid = 1;
+	}
+
+	if (cch_deallocate(cch))
+		BUG();
+	unlock_cch_handle(cch);
+
+	gru_free_gru_context(gts);
+}
+
+/*
+ * Load a GRU context by copying it from the thread data structure in memory
+ * to the GRU.
+ */
+void gru_load_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru = gts->ts_gru;
+	struct gru_context_configuration_handle *cch;
+	int i, err, asid, ctxnum = gts->ts_ctxnum;
+
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+	lock_cch_handle(cch);
+	cch->tfm_fault_bit_enable =
+	    (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+	     || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+	cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+	if (cch->tlb_int_enable) {
+		gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+		cch->tlb_int_select = gts->ts_tlb_int_select;
+	}
+	if (gts->ts_cch_req_slice >= 0) {
+		cch->req_slice_set_enable = 1;
+		cch->req_slice = gts->ts_cch_req_slice;
+	} else {
+		cch->req_slice_set_enable =0;
+	}
+	cch->tfm_done_bit_enable = 0;
+	cch->dsr_allocation_map = gts->ts_dsr_map;
+	cch->cbr_allocation_map = gts->ts_cbr_map;
+
+	if (is_kernel_context(gts)) {
+		cch->unmap_enable = 1;
+		cch->tfm_done_bit_enable = 1;
+		cch->cb_int_enable = 1;
+		cch->tlb_int_select = 0;	/* For now, ints go to cpu 0 */
+	} else {
+		cch->unmap_enable = 0;
+		cch->tfm_done_bit_enable = 0;
+		cch->cb_int_enable = 0;
+		asid = gru_load_mm_tracker(gru, gts);
+		for (i = 0; i < 8; i++) {
+			cch->asid[i] = asid + i;
+			cch->sizeavail[i] = gts->ts_sizeavail;
+		}
+	}
+
+	err = cch_allocate(cch);
+	if (err) {
+		gru_dbg(grudev,
+			"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
+			err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map);
+		BUG();
+	}
+
+	gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
+			gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
+
+	if (cch_start(cch))
+		BUG();
+	unlock_cch_handle(cch);
+
+	gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n",
+		gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map,
+		(gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select);
+}
+
+/*
+ * Update fields in an active CCH:
+ * 	- retarget interrupts on local blade
+ * 	- update sizeavail mask
+ */
+int gru_update_cch(struct gru_thread_state *gts)
+{
+	struct gru_context_configuration_handle *cch;
+	struct gru_state *gru = gts->ts_gru;
+	int i, ctxnum = gts->ts_ctxnum, ret = 0;
+
+	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
+
+	lock_cch_handle(cch);
+	if (cch->state == CCHSTATE_ACTIVE) {
+		if (gru->gs_gts[gts->ts_ctxnum] != gts)
+			goto exit;
+		if (cch_interrupt(cch))
+			BUG();
+		for (i = 0; i < 8; i++)
+			cch->sizeavail[i] = gts->ts_sizeavail;
+		gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+		cch->tlb_int_select = gru_cpu_fault_map_id();
+		cch->tfm_fault_bit_enable =
+		  (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+		    || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
+		if (cch_start(cch))
+			BUG();
+		ret = 1;
+	}
+exit:
+	unlock_cch_handle(cch);
+	return ret;
+}
+
+/*
+ * Update CCH tlb interrupt select. Required when all the following is true:
+ * 	- task's GRU context is loaded into a GRU
+ * 	- task is using interrupt notification for TLB faults
+ * 	- task has migrated to a different cpu on the same blade where
+ * 	  it was previously running.
+ */
+static int gru_retarget_intr(struct gru_thread_state *gts)
+{
+	if (gts->ts_tlb_int_select < 0
+	    || gts->ts_tlb_int_select == gru_cpu_fault_map_id())
+		return 0;
+
+	gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
+		gru_cpu_fault_map_id());
+	return gru_update_cch(gts);
+}
+
+/*
+ * Check if a GRU context is allowed to use a specific chiplet. By default
+ * a context is assigned to any blade-local chiplet. However, users can
+ * override this.
+ * 	Returns 1 if assignment allowed, 0 otherwise
+ */
+static int gru_check_chiplet_assignment(struct gru_state *gru,
+					struct gru_thread_state *gts)
+{
+	int blade_id;
+	int chiplet_id;
+
+	blade_id = gts->ts_user_blade_id;
+	if (blade_id < 0)
+		blade_id = uv_numa_blade_id();
+
+	chiplet_id = gts->ts_user_chiplet_id;
+	return gru->gs_blade_id == blade_id &&
+		(chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id);
+}
+
+/*
+ * Unload the gru context if it is not assigned to the correct blade or
+ * chiplet. Misassignment can occur if the process migrates to a different
+ * blade or if the user changes the selected blade/chiplet.
+ */
+int gru_check_context_placement(struct gru_thread_state *gts)
+{
+	struct gru_state *gru;
+	int ret = 0;
+
+	/*
+	 * If the current task is the context owner, verify that the
+	 * context is correctly placed. This test is skipped for non-owner
+	 * references. Pthread apps use non-owner references to the CBRs.
+	 */
+	gru = gts->ts_gru;
+	/*
+	 * If gru or gts->ts_tgid_owner isn't initialized properly, return
+	 * success to indicate that the caller does not need to unload the
+	 * gru context.The caller is responsible for their inspection and
+	 * reinitialization if needed.
+	 */
+	if (!gru || gts->ts_tgid_owner != current->tgid)
+		return ret;
+
+	if (!gru_check_chiplet_assignment(gru, gts)) {
+		STAT(check_context_unload);
+		ret = -EINVAL;
+	} else if (gru_retarget_intr(gts)) {
+		STAT(check_context_retarget_intr);
+	}
+
+	return ret;
+}
+
+
+/*
+ * Insufficient GRU resources available on the local blade. Steal a context from
+ * a process. This is a hack until a _real_ resource scheduler is written....
+ */
+#define next_ctxnum(n)	((n) <  GRU_NUM_CCH - 2 ? (n) + 1 : 0)
+#define next_gru(b, g)	(((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ?  \
+				 ((g)+1) : &(b)->bs_grus[0])
+
+static int is_gts_stealable(struct gru_thread_state *gts,
+		struct gru_blade_state *bs)
+{
+	if (is_kernel_context(gts))
+		return down_write_trylock(&bs->bs_kgts_sema);
+	else
+		return mutex_trylock(&gts->ts_ctxlock);
+}
+
+static void gts_stolen(struct gru_thread_state *gts,
+		struct gru_blade_state *bs)
+{
+	if (is_kernel_context(gts)) {
+		up_write(&bs->bs_kgts_sema);
+		STAT(steal_kernel_context);
+	} else {
+		mutex_unlock(&gts->ts_ctxlock);
+		STAT(steal_user_context);
+	}
+}
+
+void gru_steal_context(struct gru_thread_state *gts)
+{
+	struct gru_blade_state *blade;
+	struct gru_state *gru, *gru0;
+	struct gru_thread_state *ngts = NULL;
+	int ctxnum, ctxnum0, flag = 0, cbr, dsr;
+	int blade_id;
+
+	blade_id = gts->ts_user_blade_id;
+	if (blade_id < 0)
+		blade_id = uv_numa_blade_id();
+	cbr = gts->ts_cbr_au_count;
+	dsr = gts->ts_dsr_au_count;
+
+	blade = gru_base[blade_id];
+	spin_lock(&blade->bs_lock);
+
+	ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
+	gru = blade->bs_lru_gru;
+	if (ctxnum == 0)
+		gru = next_gru(blade, gru);
+	blade->bs_lru_gru = gru;
+	blade->bs_lru_ctxnum = ctxnum;
+	ctxnum0 = ctxnum;
+	gru0 = gru;
+	while (1) {
+		if (gru_check_chiplet_assignment(gru, gts)) {
+			if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
+				break;
+			spin_lock(&gru->gs_lock);
+			for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
+				if (flag && gru == gru0 && ctxnum == ctxnum0)
+					break;
+				ngts = gru->gs_gts[ctxnum];
+				/*
+			 	* We are grabbing locks out of order, so trylock is
+			 	* needed. GTSs are usually not locked, so the odds of
+			 	* success are high. If trylock fails, try to steal a
+			 	* different GSEG.
+			 	*/
+				if (ngts && is_gts_stealable(ngts, blade))
+					break;
+				ngts = NULL;
+			}
+			spin_unlock(&gru->gs_lock);
+			if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
+				break;
+		}
+		if (flag && gru == gru0)
+			break;
+		flag = 1;
+		ctxnum = 0;
+		gru = next_gru(blade, gru);
+	}
+	spin_unlock(&blade->bs_lock);
+
+	if (ngts) {
+		gts->ustats.context_stolen++;
+		ngts->ts_steal_jiffies = jiffies;
+		gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
+		gts_stolen(ngts, blade);
+	} else {
+		STAT(steal_context_failed);
+	}
+	gru_dbg(grudev,
+		"stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
+		" avail cb %ld, ds %ld\n",
+		gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
+		hweight64(gru->gs_dsr_map));
+}
+
+/*
+ * Assign a gru context.
+ */
+static int gru_assign_context_number(struct gru_state *gru)
+{
+	int ctxnum;
+
+	ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
+	__set_bit(ctxnum, &gru->gs_context_map);
+	return ctxnum;
+}
+
+/*
+ * Scan the GRUs on the local blade & assign a GRU context.
+ */
+struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
+{
+	struct gru_state *gru, *grux;
+	int i, max_active_contexts;
+	int blade_id = gts->ts_user_blade_id;
+
+	if (blade_id < 0)
+		blade_id = uv_numa_blade_id();
+again:
+	gru = NULL;
+	max_active_contexts = GRU_NUM_CCH;
+	for_each_gru_on_blade(grux, blade_id, i) {
+		if (!gru_check_chiplet_assignment(grux, gts))
+			continue;
+		if (check_gru_resources(grux, gts->ts_cbr_au_count,
+					gts->ts_dsr_au_count,
+					max_active_contexts)) {
+			gru = grux;
+			max_active_contexts = grux->gs_active_contexts;
+			if (max_active_contexts == 0)
+				break;
+		}
+	}
+
+	if (gru) {
+		spin_lock(&gru->gs_lock);
+		if (!check_gru_resources(gru, gts->ts_cbr_au_count,
+					 gts->ts_dsr_au_count, GRU_NUM_CCH)) {
+			spin_unlock(&gru->gs_lock);
+			goto again;
+		}
+		reserve_gru_resources(gru, gts);
+		gts->ts_gru = gru;
+		gts->ts_blade = gru->gs_blade_id;
+		gts->ts_ctxnum = gru_assign_context_number(gru);
+		refcount_inc(&gts->ts_refcnt);
+		gru->gs_gts[gts->ts_ctxnum] = gts;
+		spin_unlock(&gru->gs_lock);
+
+		STAT(assign_context);
+		gru_dbg(grudev,
+			"gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n",
+			gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
+			gts->ts_gru->gs_gid, gts->ts_ctxnum,
+			gts->ts_cbr_au_count, gts->ts_dsr_au_count);
+	} else {
+		gru_dbg(grudev, "failed to allocate a GTS %s\n", "");
+		STAT(assign_context_failed);
+	}
+
+	return gru;
+}
+
+/*
+ * gru_nopage
+ *
+ * Map the user's GRU segment
+ *
+ * 	Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
+ */
+vm_fault_t gru_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct gru_thread_state *gts;
+	unsigned long paddr, vaddr;
+	unsigned long expires;
+
+	vaddr = vmf->address;
+	gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
+		vma, vaddr, GSEG_BASE(vaddr));
+	STAT(nopfn);
+
+	/* The following check ensures vaddr is a valid address in the VMA */
+	gts = gru_find_thread_state(vma, TSID(vaddr, vma));
+	if (!gts)
+		return VM_FAULT_SIGBUS;
+
+again:
+	mutex_lock(&gts->ts_ctxlock);
+	preempt_disable();
+
+	if (gru_check_context_placement(gts)) {
+		preempt_enable();
+		mutex_unlock(&gts->ts_ctxlock);
+		gru_unload_context(gts, 1);
+		return VM_FAULT_NOPAGE;
+	}
+
+	if (!gts->ts_gru) {
+		STAT(load_user_context);
+		if (!gru_assign_gru_context(gts)) {
+			preempt_enable();
+			mutex_unlock(&gts->ts_ctxlock);
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(GRU_ASSIGN_DELAY);  /* true hack ZZZ */
+			expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY;
+			if (time_before(expires, jiffies))
+				gru_steal_context(gts);
+			goto again;
+		}
+		gru_load_context(gts);
+		paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum);
+		remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1),
+				paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE,
+				vma->vm_page_prot);
+	}
+
+	preempt_enable();
+	mutex_unlock(&gts->ts_ctxlock);
+
+	return VM_FAULT_NOPAGE;
+}
+
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
new file mode 100644
index 0000000000..97b8b38ab4
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SN Platform GRU Driver
+ *
+ *              PROC INTERFACES
+ *
+ * This file supports the /proc interfaces for the GRU driver
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+#define printstat(s, f)		printstat_val(s, &gru_stats.f, #f)
+
+static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
+{
+	unsigned long val = atomic_long_read(v);
+
+	seq_printf(s, "%16lu %s\n", val, id);
+}
+
+static int statistics_show(struct seq_file *s, void *p)
+{
+	printstat(s, vdata_alloc);
+	printstat(s, vdata_free);
+	printstat(s, gts_alloc);
+	printstat(s, gts_free);
+	printstat(s, gms_alloc);
+	printstat(s, gms_free);
+	printstat(s, gts_double_allocate);
+	printstat(s, assign_context);
+	printstat(s, assign_context_failed);
+	printstat(s, free_context);
+	printstat(s, load_user_context);
+	printstat(s, load_kernel_context);
+	printstat(s, lock_kernel_context);
+	printstat(s, unlock_kernel_context);
+	printstat(s, steal_user_context);
+	printstat(s, steal_kernel_context);
+	printstat(s, steal_context_failed);
+	printstat(s, nopfn);
+	printstat(s, asid_new);
+	printstat(s, asid_next);
+	printstat(s, asid_wrap);
+	printstat(s, asid_reuse);
+	printstat(s, intr);
+	printstat(s, intr_cbr);
+	printstat(s, intr_tfh);
+	printstat(s, intr_spurious);
+	printstat(s, intr_mm_lock_failed);
+	printstat(s, call_os);
+	printstat(s, call_os_wait_queue);
+	printstat(s, user_flush_tlb);
+	printstat(s, user_unload_context);
+	printstat(s, user_exception);
+	printstat(s, set_context_option);
+	printstat(s, check_context_retarget_intr);
+	printstat(s, check_context_unload);
+	printstat(s, tlb_dropin);
+	printstat(s, tlb_preload_page);
+	printstat(s, tlb_dropin_fail_no_asid);
+	printstat(s, tlb_dropin_fail_upm);
+	printstat(s, tlb_dropin_fail_invalid);
+	printstat(s, tlb_dropin_fail_range_active);
+	printstat(s, tlb_dropin_fail_idle);
+	printstat(s, tlb_dropin_fail_fmm);
+	printstat(s, tlb_dropin_fail_no_exception);
+	printstat(s, tfh_stale_on_fault);
+	printstat(s, mmu_invalidate_range);
+	printstat(s, mmu_invalidate_page);
+	printstat(s, flush_tlb);
+	printstat(s, flush_tlb_gru);
+	printstat(s, flush_tlb_gru_tgh);
+	printstat(s, flush_tlb_gru_zero_asid);
+	printstat(s, copy_gpa);
+	printstat(s, read_gpa);
+	printstat(s, mesq_receive);
+	printstat(s, mesq_receive_none);
+	printstat(s, mesq_send);
+	printstat(s, mesq_send_failed);
+	printstat(s, mesq_noop);
+	printstat(s, mesq_send_unexpected_error);
+	printstat(s, mesq_send_lb_overflow);
+	printstat(s, mesq_send_qlimit_reached);
+	printstat(s, mesq_send_amo_nacked);
+	printstat(s, mesq_send_put_nacked);
+	printstat(s, mesq_qf_locked);
+	printstat(s, mesq_qf_noop_not_full);
+	printstat(s, mesq_qf_switch_head_failed);
+	printstat(s, mesq_qf_unexpected_error);
+	printstat(s, mesq_noop_unexpected_error);
+	printstat(s, mesq_noop_lb_overflow);
+	printstat(s, mesq_noop_qlimit_reached);
+	printstat(s, mesq_noop_amo_nacked);
+	printstat(s, mesq_noop_put_nacked);
+	printstat(s, mesq_noop_page_overflow);
+	return 0;
+}
+
+static ssize_t statistics_write(struct file *file, const char __user *userbuf,
+				size_t count, loff_t *data)
+{
+	memset(&gru_stats, 0, sizeof(gru_stats));
+	return count;
+}
+
+static int mcs_statistics_show(struct seq_file *s, void *p)
+{
+	int op;
+	unsigned long total, count, max;
+	static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt",
+		"cch_interrupt_sync", "cch_deallocate", "tfh_write_only",
+		"tfh_write_restart", "tgh_invalidate"};
+
+	seq_puts(s, "#id                        count   aver-clks    max-clks\n");
+	for (op = 0; op < mcsop_last; op++) {
+		count = atomic_long_read(&mcs_op_statistics[op].count);
+		total = atomic_long_read(&mcs_op_statistics[op].total);
+		max = mcs_op_statistics[op].max;
+		seq_printf(s, "%-20s%12ld%12ld%12ld\n", id[op], count,
+			   count ? total / count : 0, max);
+	}
+	return 0;
+}
+
+static ssize_t mcs_statistics_write(struct file *file,
+			const char __user *userbuf, size_t count, loff_t *data)
+{
+	memset(mcs_op_statistics, 0, sizeof(mcs_op_statistics));
+	return count;
+}
+
+static int options_show(struct seq_file *s, void *p)
+{
+	seq_printf(s, "#bitmask: 1=trace, 2=statistics\n");
+	seq_printf(s, "0x%lx\n", gru_options);
+	return 0;
+}
+
+static ssize_t options_write(struct file *file, const char __user *userbuf,
+			     size_t count, loff_t *data)
+{
+	int ret;
+
+	ret = kstrtoul_from_user(userbuf, count, 0, &gru_options);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static int cch_seq_show(struct seq_file *file, void *data)
+{
+	long gid = *(long *)data;
+	int i;
+	struct gru_state *gru = GID_TO_GRU(gid);
+	struct gru_thread_state *ts;
+	const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
+
+	if (gid == 0)
+		seq_puts(file, "#  gid  bid  ctx#   asid      pid  cbrs dsbytes    mode\n");
+	if (gru)
+		for (i = 0; i < GRU_NUM_CCH; i++) {
+			ts = gru->gs_gts[i];
+			if (!ts)
+				continue;
+			seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n",
+				   gru->gs_gid, gru->gs_blade_id, i,
+				   is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid,
+				   is_kernel_context(ts) ? 0 : ts->ts_tgid_owner,
+				   ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
+				   ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
+				   mode[ts->ts_user_options &
+					GRU_OPT_MISS_MASK]);
+		}
+
+	return 0;
+}
+
+static int gru_seq_show(struct seq_file *file, void *data)
+{
+	long gid = *(long *)data, ctxfree, cbrfree, dsrfree;
+	struct gru_state *gru = GID_TO_GRU(gid);
+
+	if (gid == 0) {
+		seq_puts(file, "#  gid  nid    ctx   cbr   dsr     ctx   cbr   dsr\n");
+		seq_puts(file, "#             busy  busy  busy    free  free  free\n");
+	}
+	if (gru) {
+		ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
+		cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
+		dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
+		seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n",
+			   gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree,
+			   GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree,
+			   ctxfree, cbrfree, dsrfree);
+	}
+
+	return 0;
+}
+
+static void seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static void *seq_start(struct seq_file *file, loff_t *gid)
+{
+	if (*gid < gru_max_gids)
+		return gid;
+	return NULL;
+}
+
+static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
+{
+	(*gid)++;
+	if (*gid < gru_max_gids)
+		return gid;
+	return NULL;
+}
+
+static const struct seq_operations cch_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= cch_seq_show
+};
+
+static const struct seq_operations gru_seq_ops = {
+	.start	= seq_start,
+	.next	= seq_next,
+	.stop	= seq_stop,
+	.show	= gru_seq_show
+};
+
+static int statistics_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, statistics_show, NULL);
+}
+
+static int mcs_statistics_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mcs_statistics_show, NULL);
+}
+
+static int options_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, options_show, NULL);
+}
+
+/* *INDENT-OFF* */
+static const struct proc_ops statistics_proc_ops = {
+	.proc_open	= statistics_open,
+	.proc_read	= seq_read,
+	.proc_write	= statistics_write,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+};
+
+static const struct proc_ops mcs_statistics_proc_ops = {
+	.proc_open	= mcs_statistics_open,
+	.proc_read	= seq_read,
+	.proc_write	= mcs_statistics_write,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+};
+
+static const struct proc_ops options_proc_ops = {
+	.proc_open	= options_open,
+	.proc_read	= seq_read,
+	.proc_write	= options_write,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+};
+
+static struct proc_dir_entry *proc_gru __read_mostly;
+
+int gru_proc_init(void)
+{
+	proc_gru = proc_mkdir("sgi_uv/gru", NULL);
+	if (!proc_gru)
+		return -1;
+	if (!proc_create("statistics", 0644, proc_gru, &statistics_proc_ops))
+		goto err;
+	if (!proc_create("mcs_statistics", 0644, proc_gru, &mcs_statistics_proc_ops))
+		goto err;
+	if (!proc_create("debug_options", 0644, proc_gru, &options_proc_ops))
+		goto err;
+	if (!proc_create_seq("cch_status", 0444, proc_gru, &cch_seq_ops))
+		goto err;
+	if (!proc_create_seq("gru_status", 0444, proc_gru, &gru_seq_ops))
+		goto err;
+	return 0;
+err:
+	remove_proc_subtree("sgi_uv/gru", NULL);
+	return -1;
+}
+
+void gru_proc_exit(void)
+{
+	remove_proc_subtree("sgi_uv/gru", NULL);
+}
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
new file mode 100644
index 0000000000..640daf1994
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -0,0 +1,659 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SN Platform GRU Driver
+ *
+ *            GRU DRIVER TABLES, MACROS, externs, etc
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#ifndef __GRUTABLES_H__
+#define __GRUTABLES_H__
+
+/*
+ * GRU Chiplet:
+ *   The GRU is a user addressible memory accelerator. It provides
+ *   several forms of load, store, memset, bcopy instructions. In addition, it
+ *   contains special instructions for AMOs, sending messages to message
+ *   queues, etc.
+ *
+ *   The GRU is an integral part of the node controller. It connects
+ *   directly to the cpu socket. In its current implementation, there are 2
+ *   GRU chiplets in the node controller on each blade (~node).
+ *
+ *   The entire GRU memory space is fully coherent and cacheable by the cpus.
+ *
+ *   Each GRU chiplet has a physical memory map that looks like the following:
+ *
+ *   	+-----------------+
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	|/////////////////|
+ *   	+-----------------+
+ *   	|  system control |
+ *   	+-----------------+        _______ +-------------+
+ *   	|/////////////////|       /        |             |
+ *   	|/////////////////|      /         |             |
+ *   	|/////////////////|     /          | instructions|
+ *   	|/////////////////|    /           |             |
+ *   	|/////////////////|   /            |             |
+ *   	|/////////////////|  /             |-------------|
+ *   	|/////////////////| /              |             |
+ *   	+-----------------+                |             |
+ *   	|   context 15    |                |  data       |
+ *   	+-----------------+                |             |
+ *   	|    ......       | \              |             |
+ *   	+-----------------+  \____________ +-------------+
+ *   	|   context 1     |
+ *   	+-----------------+
+ *   	|   context 0     |
+ *   	+-----------------+
+ *
+ *   Each of the "contexts" is a chunk of memory that can be mmaped into user
+ *   space. The context consists of 2 parts:
+ *
+ *  	- an instruction space that can be directly accessed by the user
+ *  	  to issue GRU instructions and to check instruction status.
+ *
+ *  	- a data area that acts as normal RAM.
+ *
+ *   User instructions contain virtual addresses of data to be accessed by the
+ *   GRU. The GRU contains a TLB that is used to convert these user virtual
+ *   addresses to physical addresses.
+ *
+ *   The "system control" area of the GRU chiplet is used by the kernel driver
+ *   to manage user contexts and to perform functions such as TLB dropin and
+ *   purging.
+ *
+ *   One context may be reserved for the kernel and used for cross-partition
+ *   communication. The GRU will also be used to asynchronously zero out
+ *   large blocks of memory (not currently implemented).
+ *
+ *
+ * Tables:
+ *
+ * 	VDATA-VMA Data		- Holds a few parameters. Head of linked list of
+ * 				  GTS tables for threads using the GSEG
+ * 	GTS - Gru Thread State  - contains info for managing a GSEG context. A
+ * 				  GTS is allocated for each thread accessing a
+ * 				  GSEG.
+ *     	GTD - GRU Thread Data   - contains shadow copy of GRU data when GSEG is
+ *     				  not loaded into a GRU
+ *	GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs
+ *				  where a GSEG has been loaded. Similar to
+ *				  an mm_struct but for GRU.
+ *
+ *	GS  - GRU State 	- Used to manage the state of a GRU chiplet
+ *	BS  - Blade State	- Used to manage state of all GRU chiplets
+ *				  on a blade
+ *
+ *
+ *  Normal task tables for task using GRU.
+ *  		- 2 threads in process
+ *  		- 2 GSEGs open in process
+ *  		- GSEG1 is being used by both threads
+ *  		- GSEG2 is used only by thread 2
+ *
+ *       task -->|
+ *       task ---+---> mm ->------ (notifier) -------+-> gms
+ *                     |                             |
+ *                     |--> vma -> vdata ---> gts--->|		GSEG1 (thread1)
+ *                     |                  |          |
+ *                     |                  +-> gts--->|		GSEG1 (thread2)
+ *                     |                             |
+ *                     |--> vma -> vdata ---> gts--->|		GSEG2 (thread2)
+ *                     .
+ *                     .
+ *
+ *  GSEGs are marked DONTCOPY on fork
+ *
+ * At open
+ * 	file.private_data -> NULL
+ *
+ * At mmap,
+ * 	vma -> vdata
+ *
+ * After gseg reference
+ * 	vma -> vdata ->gts
+ *
+ * After fork
+ *   parent
+ * 	vma -> vdata -> gts
+ *   child
+ * 	(vma is not copied)
+ *
+ */
+
+#include <linux/refcount.h>
+#include <linux/rmap.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mm_types.h>
+#include "gru.h"
+#include "grulib.h"
+#include "gruhandles.h"
+
+extern struct gru_stats_s gru_stats;
+extern struct gru_blade_state *gru_base[];
+extern unsigned long gru_start_paddr, gru_end_paddr;
+extern void *gru_start_vaddr;
+extern unsigned int gru_max_gids;
+
+#define GRU_MAX_BLADES		MAX_NUMNODES
+#define GRU_MAX_GRUS		(GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
+
+#define GRU_DRIVER_ID_STR	"SGI GRU Device Driver"
+#define GRU_DRIVER_VERSION_STR	"0.85"
+
+/*
+ * GRU statistics.
+ */
+struct gru_stats_s {
+	atomic_long_t vdata_alloc;
+	atomic_long_t vdata_free;
+	atomic_long_t gts_alloc;
+	atomic_long_t gts_free;
+	atomic_long_t gms_alloc;
+	atomic_long_t gms_free;
+	atomic_long_t gts_double_allocate;
+	atomic_long_t assign_context;
+	atomic_long_t assign_context_failed;
+	atomic_long_t free_context;
+	atomic_long_t load_user_context;
+	atomic_long_t load_kernel_context;
+	atomic_long_t lock_kernel_context;
+	atomic_long_t unlock_kernel_context;
+	atomic_long_t steal_user_context;
+	atomic_long_t steal_kernel_context;
+	atomic_long_t steal_context_failed;
+	atomic_long_t nopfn;
+	atomic_long_t asid_new;
+	atomic_long_t asid_next;
+	atomic_long_t asid_wrap;
+	atomic_long_t asid_reuse;
+	atomic_long_t intr;
+	atomic_long_t intr_cbr;
+	atomic_long_t intr_tfh;
+	atomic_long_t intr_spurious;
+	atomic_long_t intr_mm_lock_failed;
+	atomic_long_t call_os;
+	atomic_long_t call_os_wait_queue;
+	atomic_long_t user_flush_tlb;
+	atomic_long_t user_unload_context;
+	atomic_long_t user_exception;
+	atomic_long_t set_context_option;
+	atomic_long_t check_context_retarget_intr;
+	atomic_long_t check_context_unload;
+	atomic_long_t tlb_dropin;
+	atomic_long_t tlb_preload_page;
+	atomic_long_t tlb_dropin_fail_no_asid;
+	atomic_long_t tlb_dropin_fail_upm;
+	atomic_long_t tlb_dropin_fail_invalid;
+	atomic_long_t tlb_dropin_fail_range_active;
+	atomic_long_t tlb_dropin_fail_idle;
+	atomic_long_t tlb_dropin_fail_fmm;
+	atomic_long_t tlb_dropin_fail_no_exception;
+	atomic_long_t tfh_stale_on_fault;
+	atomic_long_t mmu_invalidate_range;
+	atomic_long_t mmu_invalidate_page;
+	atomic_long_t flush_tlb;
+	atomic_long_t flush_tlb_gru;
+	atomic_long_t flush_tlb_gru_tgh;
+	atomic_long_t flush_tlb_gru_zero_asid;
+
+	atomic_long_t copy_gpa;
+	atomic_long_t read_gpa;
+
+	atomic_long_t mesq_receive;
+	atomic_long_t mesq_receive_none;
+	atomic_long_t mesq_send;
+	atomic_long_t mesq_send_failed;
+	atomic_long_t mesq_noop;
+	atomic_long_t mesq_send_unexpected_error;
+	atomic_long_t mesq_send_lb_overflow;
+	atomic_long_t mesq_send_qlimit_reached;
+	atomic_long_t mesq_send_amo_nacked;
+	atomic_long_t mesq_send_put_nacked;
+	atomic_long_t mesq_page_overflow;
+	atomic_long_t mesq_qf_locked;
+	atomic_long_t mesq_qf_noop_not_full;
+	atomic_long_t mesq_qf_switch_head_failed;
+	atomic_long_t mesq_qf_unexpected_error;
+	atomic_long_t mesq_noop_unexpected_error;
+	atomic_long_t mesq_noop_lb_overflow;
+	atomic_long_t mesq_noop_qlimit_reached;
+	atomic_long_t mesq_noop_amo_nacked;
+	atomic_long_t mesq_noop_put_nacked;
+	atomic_long_t mesq_noop_page_overflow;
+
+};
+
+enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync,
+	cchop_deallocate, tfhop_write_only, tfhop_write_restart,
+	tghop_invalidate, mcsop_last};
+
+struct mcs_op_statistic {
+	atomic_long_t	count;
+	atomic_long_t	total;
+	unsigned long	max;
+};
+
+extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
+
+#define OPT_DPRINT		1
+#define OPT_STATS		2
+
+
+#define IRQ_GRU			110	/* Starting IRQ number for interrupts */
+
+/* Delay in jiffies between attempts to assign a GRU context */
+#define GRU_ASSIGN_DELAY	((HZ * 20) / 1000)
+
+/*
+ * If a process has it's context stolen, min delay in jiffies before trying to
+ * steal a context from another process.
+ */
+#define GRU_STEAL_DELAY		((HZ * 200) / 1000)
+
+#define STAT(id)	do {						\
+				if (gru_options & OPT_STATS)		\
+					atomic_long_inc(&gru_stats.id);	\
+			} while (0)
+
+#ifdef CONFIG_SGI_GRU_DEBUG
+#define gru_dbg(dev, fmt, x...)						\
+	do {								\
+		if (gru_options & OPT_DPRINT)				\
+			printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\
+	} while (0)
+#else
+#define gru_dbg(x...)
+#endif
+
+/*-----------------------------------------------------------------------------
+ * ASID management
+ */
+#define MAX_ASID	0xfffff0
+#define MIN_ASID	8
+#define ASID_INC	8	/* number of regions */
+
+/* Generate a GRU asid value from a GRU base asid & a virtual address. */
+#define VADDR_HI_BIT		64
+#define GRUREGION(addr)		((addr) >> (VADDR_HI_BIT - 3) & 3)
+#define GRUASID(asid, addr)	((asid) + GRUREGION(addr))
+
+/*------------------------------------------------------------------------------
+ *  File & VMS Tables
+ */
+
+struct gru_state;
+
+/*
+ * This structure is pointed to from the mmstruct via the notifier pointer.
+ * There is one of these per address space.
+ */
+struct gru_mm_tracker {				/* pack to reduce size */
+	unsigned int		mt_asid_gen:24;	/* ASID wrap count */
+	unsigned int		mt_asid:24;	/* current base ASID for gru */
+	unsigned short		mt_ctxbitmap:16;/* bitmap of contexts using
+						   asid */
+} __attribute__ ((packed));
+
+struct gru_mm_struct {
+	struct mmu_notifier	ms_notifier;
+	spinlock_t		ms_asid_lock;	/* protects ASID assignment */
+	atomic_t		ms_range_active;/* num range_invals active */
+	wait_queue_head_t	ms_wait_queue;
+	DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS);
+	struct gru_mm_tracker	ms_asids[GRU_MAX_GRUS];
+};
+
+/*
+ * One of these structures is allocated when a GSEG is mmaped. The
+ * structure is pointed to by the vma->vm_private_data field in the vma struct.
+ */
+struct gru_vma_data {
+	spinlock_t		vd_lock;	/* Serialize access to vma */
+	struct list_head	vd_head;	/* head of linked list of gts */
+	long			vd_user_options;/* misc user option flags */
+	int			vd_cbr_au_count;
+	int			vd_dsr_au_count;
+	unsigned char		vd_tlb_preload_count;
+};
+
+/*
+ * One of these is allocated for each thread accessing a mmaped GRU. A linked
+ * list of these structure is hung off the struct gru_vma_data in the mm_struct.
+ */
+struct gru_thread_state {
+	struct list_head	ts_next;	/* list - head at vma-private */
+	struct mutex		ts_ctxlock;	/* load/unload CTX lock */
+	struct mm_struct	*ts_mm;		/* mm currently mapped to
+						   context */
+	struct vm_area_struct	*ts_vma;	/* vma of GRU context */
+	struct gru_state	*ts_gru;	/* GRU where the context is
+						   loaded */
+	struct gru_mm_struct	*ts_gms;	/* asid & ioproc struct */
+	unsigned char		ts_tlb_preload_count; /* TLB preload pages */
+	unsigned long		ts_cbr_map;	/* map of allocated CBRs */
+	unsigned long		ts_dsr_map;	/* map of allocated DATA
+						   resources */
+	unsigned long		ts_steal_jiffies;/* jiffies when context last
+						    stolen */
+	long			ts_user_options;/* misc user option flags */
+	pid_t			ts_tgid_owner;	/* task that is using the
+						   context - for migration */
+	short			ts_user_blade_id;/* user selected blade */
+	signed char		ts_user_chiplet_id;/* user selected chiplet */
+	unsigned short		ts_sizeavail;	/* Pagesizes in use */
+	int			ts_tsid;	/* thread that owns the
+						   structure */
+	int			ts_tlb_int_select;/* target cpu if interrupts
+						     enabled */
+	int			ts_ctxnum;	/* context number where the
+						   context is loaded */
+	refcount_t		ts_refcnt;	/* reference count GTS */
+	unsigned char		ts_dsr_au_count;/* Number of DSR resources
+						   required for contest */
+	unsigned char		ts_cbr_au_count;/* Number of CBR resources
+						   required for contest */
+	signed char		ts_cch_req_slice;/* CCH packet slice */
+	signed char		ts_blade;	/* If >= 0, migrate context if
+						   ref from different blade */
+	signed char		ts_force_cch_reload;
+	signed char		ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
+							  allocated CB */
+	int			ts_data_valid;	/* Indicates if ts_gdata has
+						   valid data */
+	struct gru_gseg_statistics ustats;	/* User statistics */
+	unsigned long		ts_gdata[];	/* save area for GRU data (CB,
+						   DS, CBE) */
+};
+
+/*
+ * Threaded programs actually allocate an array of GSEGs when a context is
+ * created. Each thread uses a separate GSEG. TSID is the index into the GSEG
+ * array.
+ */
+#define TSID(a, v)		(((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE)
+#define UGRUADDR(gts)		((gts)->ts_vma->vm_start +		\
+					(gts)->ts_tsid * GRU_GSEG_PAGESIZE)
+
+#define NULLCTX			(-1)	/* if context not loaded into GRU */
+
+/*-----------------------------------------------------------------------------
+ *  GRU State Tables
+ */
+
+/*
+ * One of these exists for each GRU chiplet.
+ */
+struct gru_state {
+	struct gru_blade_state	*gs_blade;		/* GRU state for entire
+							   blade */
+	unsigned long		gs_gru_base_paddr;	/* Physical address of
+							   gru segments (64) */
+	void			*gs_gru_base_vaddr;	/* Virtual address of
+							   gru segments (64) */
+	unsigned short		gs_gid;			/* unique GRU number */
+	unsigned short		gs_blade_id;		/* blade of GRU */
+	unsigned char		gs_chiplet_id;		/* blade chiplet of GRU */
+	unsigned char		gs_tgh_local_shift;	/* used to pick TGH for
+							   local flush */
+	unsigned char		gs_tgh_first_remote;	/* starting TGH# for
+							   remote flush */
+	spinlock_t		gs_asid_lock;		/* lock used for
+							   assigning asids */
+	spinlock_t		gs_lock;		/* lock used for
+							   assigning contexts */
+
+	/* -- the following are protected by the gs_asid_lock spinlock ---- */
+	unsigned int		gs_asid;		/* Next availe ASID */
+	unsigned int		gs_asid_limit;		/* Limit of available
+							   ASIDs */
+	unsigned int		gs_asid_gen;		/* asid generation.
+							   Inc on wrap */
+
+	/* --- the following fields are protected by the gs_lock spinlock --- */
+	unsigned long		gs_context_map;		/* bitmap to manage
+							   contexts in use */
+	unsigned long		gs_cbr_map;		/* bitmap to manage CB
+							   resources */
+	unsigned long		gs_dsr_map;		/* bitmap used to manage
+							   DATA resources */
+	unsigned int		gs_reserved_cbrs;	/* Number of kernel-
+							   reserved cbrs */
+	unsigned int		gs_reserved_dsr_bytes;	/* Bytes of kernel-
+							   reserved dsrs */
+	unsigned short		gs_active_contexts;	/* number of contexts
+							   in use */
+	struct gru_thread_state	*gs_gts[GRU_NUM_CCH];	/* GTS currently using
+							   the context */
+	int			gs_irq[GRU_NUM_TFM];	/* Interrupt irqs */
+};
+
+/*
+ * This structure contains the GRU state for all the GRUs on a blade.
+ */
+struct gru_blade_state {
+	void			*kernel_cb;		/* First kernel
+							   reserved cb */
+	void			*kernel_dsr;		/* First kernel
+							   reserved DSR */
+	struct rw_semaphore	bs_kgts_sema;		/* lock for kgts */
+	struct gru_thread_state *bs_kgts;		/* GTS for kernel use */
+
+	/* ---- the following are used for managing kernel async GRU CBRs --- */
+	int			bs_async_dsr_bytes;	/* DSRs for async */
+	int			bs_async_cbrs;		/* CBRs AU for async */
+	struct completion	*bs_async_wq;
+
+	/* ---- the following are protected by the bs_lock spinlock ---- */
+	spinlock_t		bs_lock;		/* lock used for
+							   stealing contexts */
+	int			bs_lru_ctxnum;		/* STEAL - last context
+							   stolen */
+	struct gru_state	*bs_lru_gru;		/* STEAL - last gru
+							   stolen */
+
+	struct gru_state	bs_grus[GRU_CHIPLETS_PER_BLADE];
+};
+
+/*-----------------------------------------------------------------------------
+ * Address Primitives
+ */
+#define get_tfm_for_cpu(g, c)						\
+	((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c)))
+#define get_tfh_by_index(g, i)						\
+	((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i)))
+#define get_tgh_by_index(g, i)						\
+	((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i)))
+#define get_cbe_by_index(g, i)						\
+	((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\
+			(i)))
+
+/*-----------------------------------------------------------------------------
+ * Useful Macros
+ */
+
+/* Given a blade# & chiplet#, get a pointer to the GRU */
+#define get_gru(b, c)		(&gru_base[b]->bs_grus[c])
+
+/* Number of bytes to save/restore when unloading/loading GRU contexts */
+#define DSR_BYTES(dsr)		((dsr) * GRU_DSR_AU_BYTES)
+#define CBR_BYTES(cbr)		((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2)
+
+/* Convert a user CB number to the actual CBRNUM */
+#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \
+				  * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE)
+
+/* Convert a gid to a pointer to the GRU */
+#define GID_TO_GRU(gid)							\
+	(gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ?			\
+		(&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]->		\
+			bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) :	\
+	 NULL)
+
+/* Scan all active GRUs in a GRU bitmap */
+#define for_each_gru_in_bitmap(gid, map)				\
+	for_each_set_bit((gid), (map), GRU_MAX_GRUS)
+
+/* Scan all active GRUs on a specific blade */
+#define for_each_gru_on_blade(gru, nid, i)				\
+	for ((gru) = gru_base[nid]->bs_grus, (i) = 0;			\
+			(i) < GRU_CHIPLETS_PER_BLADE;			\
+			(i)++, (gru)++)
+
+/* Scan all GRUs */
+#define foreach_gid(gid)						\
+	for ((gid) = 0; (gid) < gru_max_gids; (gid)++)
+
+/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
+#define for_each_gts_on_gru(gts, gru, ctxnum)				\
+	for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++)		\
+		if (((gts) = (gru)->gs_gts[ctxnum]))
+
+/* Scan each CBR whose bit is set in a TFM (or copy of) */
+#define for_each_cbr_in_tfm(i, map)					\
+	for_each_set_bit((i), (map), GRU_NUM_CBE)
+
+/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
+#define for_each_cbr_in_allocation_map(i, map, k)			\
+	for_each_set_bit((k), (map), GRU_CBR_AU)			\
+		for ((i) = (k)*GRU_CBR_AU_SIZE;				\
+				(i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
+
+#define gseg_physical_address(gru, ctxnum)				\
+		((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE)
+#define gseg_virtual_address(gru, ctxnum)				\
+		((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE)
+
+/*-----------------------------------------------------------------------------
+ * Lock / Unlock GRU handles
+ * 	Use the "delresp" bit in the handle as a "lock" bit.
+ */
+
+/* Lock hierarchy checking enabled only in emulator */
+
+/* 0 = lock failed, 1 = locked */
+static inline int __trylock_handle(void *h)
+{
+	return !test_and_set_bit(1, h);
+}
+
+static inline void __lock_handle(void *h)
+{
+	while (test_and_set_bit(1, h))
+		cpu_relax();
+}
+
+static inline void __unlock_handle(void *h)
+{
+	clear_bit(1, h);
+}
+
+static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+	return __trylock_handle(cch);
+}
+
+static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+	__lock_handle(cch);
+}
+
+static inline void unlock_cch_handle(struct gru_context_configuration_handle
+				     *cch)
+{
+	__unlock_handle(cch);
+}
+
+static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	__lock_handle(tgh);
+}
+
+static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	__unlock_handle(tgh);
+}
+
+static inline int is_kernel_context(struct gru_thread_state *gts)
+{
+	return !gts->ts_mm;
+}
+
+/*
+ * The following are for Nehelem-EX. A more general scheme is needed for
+ * future processors.
+ */
+#define UV_MAX_INT_CORES		8
+#define uv_cpu_socket_number(p)		((cpu_physical_id(p) >> 5) & 1)
+#define uv_cpu_ht_number(p)		(cpu_physical_id(p) & 1)
+#define uv_cpu_core_number(p)		(((cpu_physical_id(p) >> 2) & 4) |	\
+					((cpu_physical_id(p) >> 1) & 3))
+/*-----------------------------------------------------------------------------
+ * Function prototypes & externs
+ */
+struct gru_unload_context_req;
+
+extern const struct vm_operations_struct gru_vm_ops;
+extern struct device *grudev;
+
+extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
+				int tsid);
+extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
+				*vma, int tsid);
+extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
+				*vma, int tsid);
+extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts);
+extern void gru_load_context(struct gru_thread_state *gts);
+extern void gru_steal_context(struct gru_thread_state *gts);
+extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
+extern int gru_update_cch(struct gru_thread_state *gts);
+extern void gts_drop(struct gru_thread_state *gts);
+extern void gru_tgh_flush_init(struct gru_state *gru);
+extern int gru_kservices_init(void);
+extern void gru_kservices_exit(void);
+extern irqreturn_t gru0_intr(int irq, void *dev_id);
+extern irqreturn_t gru1_intr(int irq, void *dev_id);
+extern irqreturn_t gru_intr_mblade(int irq, void *dev_id);
+extern int gru_dump_chiplet_request(unsigned long arg);
+extern long gru_get_gseg_statistics(unsigned long arg);
+extern int gru_handle_user_call_os(unsigned long address);
+extern int gru_user_flush_tlb(unsigned long arg);
+extern int gru_user_unload_context(unsigned long arg);
+extern int gru_get_exception_detail(unsigned long arg);
+extern int gru_set_context_option(unsigned long address);
+extern int gru_check_context_placement(struct gru_thread_state *gts);
+extern int gru_cpu_fault_map_id(void);
+extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
+extern void gru_flush_all_tlb(struct gru_state *gru);
+extern int gru_proc_init(void);
+extern void gru_proc_exit(void);
+
+extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+		int cbr_au_count, int dsr_au_count,
+		unsigned char tlb_preload_count, int options, int tsid);
+extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
+		int cbr_au_count, signed char *cbmap);
+extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
+		int dsr_au_count, signed char *dsmap);
+extern vm_fault_t gru_fault(struct vm_fault *vmf);
+extern struct gru_mm_struct *gru_register_mmu_notifier(void);
+extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
+
+extern int gru_ktest(unsigned long arg);
+extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+					unsigned long len);
+
+extern unsigned long gru_options;
+
+#endif /* __GRUTABLES_H__ */
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
new file mode 100644
index 0000000000..10921cd260
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SN Platform GRU Driver
+ *
+ * 		MMUOPS callbacks  + TLB flushing
+ *
+ * This file handles emu notifier callbacks from the core kernel. The callbacks
+ * are used to update the TLB in the GRU as a result of changes in the
+ * state of a process address space. This file also handles TLB invalidates
+ * from the GRU driver.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/hugetlb.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/srcu.h>
+#include <asm/processor.h>
+#include "gru.h"
+#include "grutables.h"
+#include <asm/uv/uv_hub.h>
+
+#define gru_random()	get_cycles()
+
+/* ---------------------------------- TLB Invalidation functions --------
+ * get_tgh_handle
+ *
+ * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
+ * local blade, use a fixed TGH that is a function of the blade-local cpu
+ * number. Normally, this TGH is private to the cpu & no contention occurs for
+ * the TGH. For offblade GRUs, select a random TGH in the range above the
+ * private TGHs. A spinlock is required to access this TGH & the lock must be
+ * released when the invalidate is completes. This sucks, but it is the best we
+ * can do.
+ *
+ * Note that the spinlock is IN the TGH handle so locking does not involve
+ * additional cache lines.
+ *
+ */
+static inline int get_off_blade_tgh(struct gru_state *gru)
+{
+	int n;
+
+	n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
+	n = gru_random() % n;
+	n += gru->gs_tgh_first_remote;
+	return n;
+}
+
+static inline int get_on_blade_tgh(struct gru_state *gru)
+{
+	return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
+}
+
+static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
+							 *gru)
+{
+	struct gru_tlb_global_handle *tgh;
+	int n;
+
+	preempt_disable();
+	if (uv_numa_blade_id() == gru->gs_blade_id)
+		n = get_on_blade_tgh(gru);
+	else
+		n = get_off_blade_tgh(gru);
+	tgh = get_tgh_by_index(gru, n);
+	lock_tgh_handle(tgh);
+
+	return tgh;
+}
+
+static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+	unlock_tgh_handle(tgh);
+	preempt_enable();
+}
+
+/*
+ * gru_flush_tlb_range
+ *
+ * General purpose TLB invalidation function. This function scans every GRU in
+ * the ENTIRE system (partition) looking for GRUs where the specified MM has
+ * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
+ * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
+ * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
+ * cost of (possibly) a large number of future TLBmisses.
+ *
+ * The current algorithm is optimized based on the following (somewhat true)
+ * assumptions:
+ * 	- GRU contexts are not loaded into a GRU unless a reference is made to
+ * 	  the data segment or control block (this is true, not an assumption).
+ * 	  If a DS/CB is referenced, the user will also issue instructions that
+ * 	  cause TLBmisses. It is not necessary to optimize for the case where
+ * 	  contexts are loaded but no instructions cause TLB misses. (I know
+ * 	  this will happen but I'm not optimizing for it).
+ * 	- GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
+ * 	  a few usec but in unusual cases, it could be longer. Avoid if
+ * 	  possible.
+ * 	- intrablade process migration between cpus is not frequent but is
+ * 	  common.
+ * 	- a GRU context is not typically migrated to a different GRU on the
+ * 	  blade because of intrablade migration
+ *	- interblade migration is rare. Processes migrate their GRU context to
+ *	  the new blade.
+ *	- if interblade migration occurs, migration back to the original blade
+ *	  is very very rare (ie., no optimization for this case)
+ *	- most GRU instruction operate on a subset of the user REGIONS. Code
+ *	  & shared library regions are not likely targets of GRU instructions.
+ *
+ * To help improve the efficiency of TLB invalidation, the GMS data
+ * structure is maintained for EACH address space (MM struct). The GMS is
+ * also the structure that contains the pointer to the mmu callout
+ * functions. This structure is linked to the mm_struct for the address space
+ * using the mmu "register" function. The mmu interfaces are used to
+ * provide the callbacks for TLB invalidation. The GMS contains:
+ *
+ * 	- asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
+ * 	  loaded into the GRU.
+ * 	- asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
+ * 	  the above array
+ *	- ctxbitmap[maxgrus]. Indicates the contexts that are currently active
+ *	  in the GRU for the address space. This bitmap must be passed to the
+ *	  GRU to do an invalidate.
+ *
+ * The current algorithm for invalidating TLBs is:
+ * 	- scan the asidmap for GRUs where the context has been loaded, ie,
+ * 	  asid is non-zero.
+ * 	- for each gru found:
+ * 		- if the ctxtmap is non-zero, there are active contexts in the
+ * 		  GRU. TLB invalidate instructions must be issued to the GRU.
+ *		- if the ctxtmap is zero, no context is active. Set the ASID to
+ *		  zero to force a full TLB invalidation. This is fast but will
+ *		  cause a lot of TLB misses if the context is reloaded onto the
+ *		  GRU
+ *
+ */
+
+void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+			 unsigned long len)
+{
+	struct gru_state *gru;
+	struct gru_mm_tracker *asids;
+	struct gru_tlb_global_handle *tgh;
+	unsigned long num;
+	int grupagesize, pagesize, pageshift, gid, asid;
+
+	/* ZZZ TODO - handle huge pages */
+	pageshift = PAGE_SHIFT;
+	pagesize = (1UL << pageshift);
+	grupagesize = GRU_PAGESIZE(pageshift);
+	num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
+
+	STAT(flush_tlb);
+	gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
+		start, len, gms->ms_asidmap[0]);
+
+	spin_lock(&gms->ms_asid_lock);
+	for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
+		STAT(flush_tlb_gru);
+		gru = GID_TO_GRU(gid);
+		asids = gms->ms_asids + gid;
+		asid = asids->mt_asid;
+		if (asids->mt_ctxbitmap && asid) {
+			STAT(flush_tlb_gru_tgh);
+			asid = GRUASID(asid, start);
+			gru_dbg(grudev,
+	"  FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n",
+			      gid, asid, start, grupagesize, num, asids->mt_ctxbitmap);
+			tgh = get_lock_tgh_handle(gru);
+			tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
+				       num - 1, asids->mt_ctxbitmap);
+			get_unlock_tgh_handle(tgh);
+		} else {
+			STAT(flush_tlb_gru_zero_asid);
+			asids->mt_asid = 0;
+			__clear_bit(gru->gs_gid, gms->ms_asidmap);
+			gru_dbg(grudev,
+	"  CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
+				gid, asid, asids->mt_ctxbitmap,
+				gms->ms_asidmap[0]);
+		}
+	}
+	spin_unlock(&gms->ms_asid_lock);
+}
+
+/*
+ * Flush the entire TLB on a chiplet.
+ */
+void gru_flush_all_tlb(struct gru_state *gru)
+{
+	struct gru_tlb_global_handle *tgh;
+
+	gru_dbg(grudev, "gid %d\n", gru->gs_gid);
+	tgh = get_lock_tgh_handle(gru);
+	tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
+	get_unlock_tgh_handle(tgh);
+}
+
+/*
+ * MMUOPS notifier callout functions
+ */
+static int gru_invalidate_range_start(struct mmu_notifier *mn,
+			const struct mmu_notifier_range *range)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	STAT(mmu_invalidate_range);
+	atomic_inc(&gms->ms_range_active);
+	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
+		range->start, range->end, atomic_read(&gms->ms_range_active));
+	gru_flush_tlb_range(gms, range->start, range->end - range->start);
+
+	return 0;
+}
+
+static void gru_invalidate_range_end(struct mmu_notifier *mn,
+			const struct mmu_notifier_range *range)
+{
+	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+						 ms_notifier);
+
+	/* ..._and_test() provides needed barrier */
+	(void)atomic_dec_and_test(&gms->ms_range_active);
+
+	wake_up_all(&gms->ms_wait_queue);
+	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n",
+		gms, range->start, range->end);
+}
+
+static struct mmu_notifier *gru_alloc_notifier(struct mm_struct *mm)
+{
+	struct gru_mm_struct *gms;
+
+	gms = kzalloc(sizeof(*gms), GFP_KERNEL);
+	if (!gms)
+		return ERR_PTR(-ENOMEM);
+	STAT(gms_alloc);
+	spin_lock_init(&gms->ms_asid_lock);
+	init_waitqueue_head(&gms->ms_wait_queue);
+
+	return &gms->ms_notifier;
+}
+
+static void gru_free_notifier(struct mmu_notifier *mn)
+{
+	kfree(container_of(mn, struct gru_mm_struct, ms_notifier));
+	STAT(gms_free);
+}
+
+static const struct mmu_notifier_ops gru_mmuops = {
+	.invalidate_range_start	= gru_invalidate_range_start,
+	.invalidate_range_end	= gru_invalidate_range_end,
+	.alloc_notifier		= gru_alloc_notifier,
+	.free_notifier		= gru_free_notifier,
+};
+
+struct gru_mm_struct *gru_register_mmu_notifier(void)
+{
+	struct mmu_notifier *mn;
+
+	mn = mmu_notifier_get_locked(&gru_mmuops, current->mm);
+	if (IS_ERR(mn))
+		return ERR_CAST(mn);
+
+	return container_of(mn, struct gru_mm_struct, ms_notifier);
+}
+
+void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
+{
+	mmu_notifier_put(&gms->ms_notifier);
+}
+
+/*
+ * Setup TGH parameters. There are:
+ * 	- 24 TGH handles per GRU chiplet
+ * 	- a portion (MAX_LOCAL_TGH) of the handles are reserved for
+ * 	  use by blade-local cpus
+ * 	- the rest are used by off-blade cpus. This usage is
+ * 	  less frequent than blade-local usage.
+ *
+ * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
+ * has less tan or equal to 16 cpus, each cpu has a unique handle that it can
+ * use.
+ */
+#define MAX_LOCAL_TGH	16
+
+void gru_tgh_flush_init(struct gru_state *gru)
+{
+	int cpus, shift = 0, n;
+
+	cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
+
+	/* n = cpus rounded up to next power of 2 */
+	if (cpus) {
+		n = 1 << fls(cpus - 1);
+
+		/*
+		 * shift count for converting local cpu# to TGH index
+		 *      0 if cpus <= MAX_LOCAL_TGH,
+		 *      1 if cpus <= 2*MAX_LOCAL_TGH,
+		 *      etc
+		 */
+		shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
+	}
+	gru->gs_tgh_local_shift = shift;
+
+	/* first starting TGH index to use for remote purges */
+	gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
+
+}
diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile
new file mode 100644
index 0000000000..34c55a4045
--- /dev/null
+++ b/drivers/misc/sgi-xp/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for SGI's XP devices.
+#
+
+obj-$(CONFIG_SGI_XP)		+= xp.o
+xp-y				:= xp_main.o xp_uv.o
+
+obj-$(CONFIG_SGI_XP)		+= xpc.o
+xpc-y				:= xpc_main.o xpc_channel.o xpc_partition.o \
+				   xpc_uv.o
+
+obj-$(CONFIG_SGI_XP)		+= xpnet.o
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
new file mode 100644
index 0000000000..f1336f43d3
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp.h
@@ -0,0 +1,341 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * External Cross Partition (XP) structures and defines.
+ */
+
+#ifndef _DRIVERS_MISC_SGIXP_XP_H
+#define _DRIVERS_MISC_SGIXP_XP_H
+
+#include <linux/mutex.h>
+
+#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
+#include <asm/uv/uv.h>
+#endif
+
+#ifdef USE_DBUG_ON
+#define DBUG_ON(condition)	BUG_ON(condition)
+#else
+#define DBUG_ON(condition)
+#endif
+
+/*
+ * Define the maximum number of partitions the system can possibly support.
+ * It is based on the maximum number of hardware partitionable regions. The
+ * term 'region' in this context refers to the minimum number of nodes that
+ * can comprise an access protection grouping. The access protection is in
+ * regards to memory, IPI and IOI.
+ *
+ * The maximum number of hardware partitionable regions is equal to the
+ * maximum number of nodes in the entire system divided by the minimum number
+ * of nodes that comprise an access protection grouping.
+ */
+#define XP_MAX_NPARTITIONS_SN2	64
+#define XP_MAX_NPARTITIONS_UV	256
+
+/*
+ * XPC establishes channel connections between the local partition and any
+ * other partition that is currently up. Over these channels, kernel-level
+ * `users' can communicate with their counterparts on the other partitions.
+ *
+ * If the need for additional channels arises, one can simply increase
+ * XPC_MAX_NCHANNELS accordingly. If the day should come where that number
+ * exceeds the absolute MAXIMUM number of channels possible (eight), then one
+ * will need to make changes to the XPC code to accommodate for this.
+ *
+ * The absolute maximum number of channels possible is limited to eight for
+ * performance reasons on sn2 hardware. The internal cross partition structures
+ * require sixteen bytes per channel, and eight allows all of this
+ * interface-shared info to fit in one 128-byte cacheline.
+ */
+#define XPC_MEM_CHANNEL		0	/* memory channel number */
+#define	XPC_NET_CHANNEL		1	/* network channel number */
+
+#define XPC_MAX_NCHANNELS	2	/* max #of channels allowed */
+
+#if XPC_MAX_NCHANNELS > 8
+#error	XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible.
+#endif
+
+/*
+ * Define macro, XPC_MSG_SIZE(), is provided for the user
+ * that wants to fit as many msg entries as possible in a given memory size
+ * (e.g. a memory page).
+ */
+#define XPC_MSG_MAX_SIZE	128
+#define XPC_MSG_HDR_MAX_SIZE	16
+#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE)
+
+#define XPC_MSG_SIZE(_payload_size) \
+				ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \
+				      is_uv_system() ? 64 : 128)
+
+
+/*
+ * Define the return values and values passed to user's callout functions.
+ * (It is important to add new value codes at the end just preceding
+ * xpUnknownReason, which must have the highest numerical value.)
+ */
+enum xp_retval {
+	xpSuccess = 0,
+
+	xpNotConnected,		/*  1: channel is not connected */
+	xpConnected,		/*  2: channel connected (opened) */
+	xpRETIRED1,		/*  3: (formerly xpDisconnected) */
+
+	xpMsgReceived,		/*  4: message received */
+	xpMsgDelivered,		/*  5: message delivered and acknowledged */
+
+	xpRETIRED2,		/*  6: (formerly xpTransferFailed) */
+
+	xpNoWait,		/*  7: operation would require wait */
+	xpRetry,		/*  8: retry operation */
+	xpTimeout,		/*  9: timeout in xpc_allocate_msg_wait() */
+	xpInterrupted,		/* 10: interrupted wait */
+
+	xpUnequalMsgSizes,	/* 11: message size disparity between sides */
+	xpInvalidAddress,	/* 12: invalid address */
+
+	xpNoMemory,		/* 13: no memory available for XPC structures */
+	xpLackOfResources,	/* 14: insufficient resources for operation */
+	xpUnregistered,		/* 15: channel is not registered */
+	xpAlreadyRegistered,	/* 16: channel is already registered */
+
+	xpPartitionDown,	/* 17: remote partition is down */
+	xpNotLoaded,		/* 18: XPC module is not loaded */
+	xpUnloading,		/* 19: this side is unloading XPC module */
+
+	xpBadMagic,		/* 20: XPC MAGIC string not found */
+
+	xpReactivating,		/* 21: remote partition was reactivated */
+
+	xpUnregistering,	/* 22: this side is unregistering channel */
+	xpOtherUnregistering,	/* 23: other side is unregistering channel */
+
+	xpCloneKThread,		/* 24: cloning kernel thread */
+	xpCloneKThreadFailed,	/* 25: cloning kernel thread failed */
+
+	xpNoHeartbeat,		/* 26: remote partition has no heartbeat */
+
+	xpPioReadError,		/* 27: PIO read error */
+	xpPhysAddrRegFailed,	/* 28: registration of phys addr range failed */
+
+	xpRETIRED3,		/* 29: (formerly xpBteDirectoryError) */
+	xpRETIRED4,		/* 30: (formerly xpBtePoisonError) */
+	xpRETIRED5,		/* 31: (formerly xpBteWriteError) */
+	xpRETIRED6,		/* 32: (formerly xpBteAccessError) */
+	xpRETIRED7,		/* 33: (formerly xpBtePWriteError) */
+	xpRETIRED8,		/* 34: (formerly xpBtePReadError) */
+	xpRETIRED9,		/* 35: (formerly xpBteTimeOutError) */
+	xpRETIRED10,		/* 36: (formerly xpBteXtalkError) */
+	xpRETIRED11,		/* 37: (formerly xpBteNotAvailable) */
+	xpRETIRED12,		/* 38: (formerly xpBteUnmappedError) */
+
+	xpBadVersion,		/* 39: bad version number */
+	xpVarsNotSet,		/* 40: the XPC variables are not set up */
+	xpNoRsvdPageAddr,	/* 41: unable to get rsvd page's phys addr */
+	xpInvalidPartid,	/* 42: invalid partition ID */
+	xpLocalPartid,		/* 43: local partition ID */
+
+	xpOtherGoingDown,	/* 44: other side going down, reason unknown */
+	xpSystemGoingDown,	/* 45: system is going down, reason unknown */
+	xpSystemHalt,		/* 46: system is being halted */
+	xpSystemReboot,		/* 47: system is being rebooted */
+	xpSystemPoweroff,	/* 48: system is being powered off */
+
+	xpDisconnecting,	/* 49: channel disconnecting (closing) */
+
+	xpOpenCloseError,	/* 50: channel open/close protocol error */
+
+	xpDisconnected,		/* 51: channel disconnected (closed) */
+
+	xpBteCopyError,		/* 52: bte_copy() returned error */
+	xpSalError,		/* 53: sn SAL error */
+	xpRsvdPageNotSet,	/* 54: the reserved page is not set up */
+	xpPayloadTooBig,	/* 55: payload too large for message slot */
+
+	xpUnsupported,		/* 56: unsupported functionality or resource */
+	xpNeedMoreInfo,		/* 57: more info is needed by SAL */
+
+	xpGruCopyError,		/* 58: gru_copy_gru() returned error */
+	xpGruSendMqError,	/* 59: gru send message queue related error */
+
+	xpBadChannelNumber,	/* 60: invalid channel number */
+	xpBadMsgType,		/* 61: invalid message type */
+	xpBiosError,		/* 62: BIOS error */
+
+	xpUnknownReason		/* 63: unknown reason - must be last in enum */
+};
+
+/*
+ * Define the callout function type used by XPC to update the user on
+ * connection activity and state changes via the user function registered
+ * by xpc_connect().
+ *
+ * Arguments:
+ *
+ *	reason - reason code.
+ *	partid - partition ID associated with condition.
+ *	ch_number - channel # associated with condition.
+ *	data - pointer to optional data.
+ *	key - pointer to optional user-defined value provided as the "key"
+ *	      argument to xpc_connect().
+ *
+ * A reason code of xpConnected indicates that a connection has been
+ * established to the specified partition on the specified channel. The data
+ * argument indicates the max number of entries allowed in the message queue.
+ *
+ * A reason code of xpMsgReceived indicates that a XPC message arrived from
+ * the specified partition on the specified channel. The data argument
+ * specifies the address of the message's payload. The user must call
+ * xpc_received() when finished with the payload.
+ *
+ * All other reason codes indicate failure. The data argmument is NULL.
+ * When a failure reason code is received, one can assume that the channel
+ * is not connected.
+ */
+typedef void (*xpc_channel_func) (enum xp_retval reason, short partid,
+				  int ch_number, void *data, void *key);
+
+/*
+ * Define the callout function type used by XPC to notify the user of
+ * messages received and delivered via the user function registered by
+ * xpc_send_notify().
+ *
+ * Arguments:
+ *
+ *	reason - reason code.
+ *	partid - partition ID associated with condition.
+ *	ch_number - channel # associated with condition.
+ *	key - pointer to optional user-defined value provided as the "key"
+ *	      argument to xpc_send_notify().
+ *
+ * A reason code of xpMsgDelivered indicates that the message was delivered
+ * to the intended recipient and that they have acknowledged its receipt by
+ * calling xpc_received().
+ *
+ * All other reason codes indicate failure.
+ *
+ * NOTE: The user defined function must be callable by an interrupt handler
+ *       and thus cannot block.
+ */
+typedef void (*xpc_notify_func) (enum xp_retval reason, short partid,
+				 int ch_number, void *key);
+
+/*
+ * The following is a registration entry. There is a global array of these,
+ * one per channel. It is used to record the connection registration made
+ * by the users of XPC. As long as a registration entry exists, for any
+ * partition that comes up, XPC will attempt to establish a connection on
+ * that channel. Notification that a connection has been made will occur via
+ * the xpc_channel_func function.
+ *
+ * The 'func' field points to the function to call when aynchronous
+ * notification is required for such events as: a connection established/lost,
+ * or an incoming message received, or an error condition encountered. A
+ * non-NULL 'func' field indicates that there is an active registration for
+ * the channel.
+ */
+struct xpc_registration {
+	struct mutex mutex;
+	xpc_channel_func func;	/* function to call */
+	void *key;		/* pointer to user's key */
+	u16 nentries;		/* #of msg entries in local msg queue */
+	u16 entry_size;		/* message queue's message entry size */
+	u32 assigned_limit;	/* limit on #of assigned kthreads */
+	u32 idle_limit;		/* limit on #of idle kthreads */
+} ____cacheline_aligned;
+
+#define XPC_CHANNEL_REGISTERED(_c)	(xpc_registrations[_c].func != NULL)
+
+/* the following are valid xpc_send() or xpc_send_notify() flags */
+#define XPC_WAIT	0	/* wait flag */
+#define XPC_NOWAIT	1	/* no wait flag */
+
+struct xpc_interface {
+	void (*connect) (int);
+	void (*disconnect) (int);
+	enum xp_retval (*send) (short, int, u32, void *, u16);
+	enum xp_retval (*send_notify) (short, int, u32, void *, u16,
+					xpc_notify_func, void *);
+	void (*received) (short, int, void *);
+	enum xp_retval (*partid_to_nasids) (short, void *);
+};
+
+extern struct xpc_interface xpc_interface;
+
+extern void xpc_set_interface(void (*)(int),
+			      void (*)(int),
+			      enum xp_retval (*)(short, int, u32, void *, u16),
+			      enum xp_retval (*)(short, int, u32, void *, u16,
+						 xpc_notify_func, void *),
+			      void (*)(short, int, void *),
+			      enum xp_retval (*)(short, void *));
+extern void xpc_clear_interface(void);
+
+extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16,
+				   u16, u32, u32);
+extern void xpc_disconnect(int);
+
+static inline enum xp_retval
+xpc_send(short partid, int ch_number, u32 flags, void *payload,
+	 u16 payload_size)
+{
+	if (!xpc_interface.send)
+		return xpNotLoaded;
+
+	return xpc_interface.send(partid, ch_number, flags, payload,
+				  payload_size);
+}
+
+static inline enum xp_retval
+xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
+		u16 payload_size, xpc_notify_func func, void *key)
+{
+	if (!xpc_interface.send_notify)
+		return xpNotLoaded;
+
+	return xpc_interface.send_notify(partid, ch_number, flags, payload,
+					 payload_size, func, key);
+}
+
+static inline void
+xpc_received(short partid, int ch_number, void *payload)
+{
+	if (xpc_interface.received)
+		xpc_interface.received(partid, ch_number, payload);
+}
+
+static inline enum xp_retval
+xpc_partid_to_nasids(short partid, void *nasids)
+{
+	if (!xpc_interface.partid_to_nasids)
+		return xpNotLoaded;
+
+	return xpc_interface.partid_to_nasids(partid, nasids);
+}
+
+extern short xp_max_npartitions;
+extern short xp_partition_id;
+extern u8 xp_region_size;
+
+extern unsigned long (*xp_pa) (void *);
+extern unsigned long (*xp_socket_pa) (unsigned long);
+extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long,
+		       size_t);
+extern int (*xp_cpu_to_nasid) (int);
+extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long);
+extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long);
+
+extern struct device *xp;
+extern enum xp_retval xp_init_uv(void);
+extern void xp_exit_uv(void);
+
+#endif /* _DRIVERS_MISC_SGIXP_XP_H */
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
new file mode 100644
index 0000000000..87d156c15f
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -0,0 +1,261 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) base.
+ *
+ *	XP provides a base from which its users can interact
+ *	with XPC, yet not be dependent on XPC.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include "xp.h"
+
+/* define the XP debug device structures to be used with dev_dbg() et al */
+
+static struct device_driver xp_dbg_name = {
+	.name = "xp"
+};
+
+static struct device xp_dbg_subname = {
+	.init_name = "",		/* set to "" */
+	.driver = &xp_dbg_name
+};
+
+struct device *xp = &xp_dbg_subname;
+
+/* max #of partitions possible */
+short xp_max_npartitions;
+EXPORT_SYMBOL_GPL(xp_max_npartitions);
+
+short xp_partition_id;
+EXPORT_SYMBOL_GPL(xp_partition_id);
+
+u8 xp_region_size;
+EXPORT_SYMBOL_GPL(xp_region_size);
+
+unsigned long (*xp_pa) (void *addr);
+EXPORT_SYMBOL_GPL(xp_pa);
+
+unsigned long (*xp_socket_pa) (unsigned long gpa);
+EXPORT_SYMBOL_GPL(xp_socket_pa);
+
+enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa,
+				    const unsigned long src_gpa, size_t len);
+EXPORT_SYMBOL_GPL(xp_remote_memcpy);
+
+int (*xp_cpu_to_nasid) (int cpuid);
+EXPORT_SYMBOL_GPL(xp_cpu_to_nasid);
+
+enum xp_retval (*xp_expand_memprotect) (unsigned long phys_addr,
+					unsigned long size);
+EXPORT_SYMBOL_GPL(xp_expand_memprotect);
+enum xp_retval (*xp_restrict_memprotect) (unsigned long phys_addr,
+					  unsigned long size);
+EXPORT_SYMBOL_GPL(xp_restrict_memprotect);
+
+/*
+ * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
+ * users of XPC.
+ */
+struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS];
+EXPORT_SYMBOL_GPL(xpc_registrations);
+
+/*
+ * Initialize the XPC interface to NULL to indicate that XPC isn't loaded.
+ */
+struct xpc_interface xpc_interface = { };
+EXPORT_SYMBOL_GPL(xpc_interface);
+
+/*
+ * XPC calls this when it (the XPC module) has been loaded.
+ */
+void
+xpc_set_interface(void (*connect) (int),
+		  void (*disconnect) (int),
+		  enum xp_retval (*send) (short, int, u32, void *, u16),
+		  enum xp_retval (*send_notify) (short, int, u32, void *, u16,
+						  xpc_notify_func, void *),
+		  void (*received) (short, int, void *),
+		  enum xp_retval (*partid_to_nasids) (short, void *))
+{
+	xpc_interface.connect = connect;
+	xpc_interface.disconnect = disconnect;
+	xpc_interface.send = send;
+	xpc_interface.send_notify = send_notify;
+	xpc_interface.received = received;
+	xpc_interface.partid_to_nasids = partid_to_nasids;
+}
+EXPORT_SYMBOL_GPL(xpc_set_interface);
+
+/*
+ * XPC calls this when it (the XPC module) is being unloaded.
+ */
+void
+xpc_clear_interface(void)
+{
+	memset(&xpc_interface, 0, sizeof(xpc_interface));
+}
+EXPORT_SYMBOL_GPL(xpc_clear_interface);
+
+/*
+ * Register for automatic establishment of a channel connection whenever
+ * a partition comes up.
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to register for connection.
+ *	func - function to call for asynchronous notification of channel
+ *	       state changes (i.e., connection, disconnection, error) and
+ *	       the arrival of incoming messages.
+ *      key - pointer to optional user-defined value that gets passed back
+ *	      to the user on any callouts made to func.
+ *	payload_size - size in bytes of the XPC message's payload area which
+ *		       contains a user-defined message. The user should make
+ *		       this large enough to hold their largest message.
+ *	nentries - max #of XPC message entries a message queue can contain.
+ *		   The actual number, which is determined when a connection
+ * 		   is established and may be less then requested, will be
+ *		   passed to the user via the xpConnected callout.
+ *	assigned_limit - max number of kthreads allowed to be processing
+ * 			 messages (per connection) at any given instant.
+ *	idle_limit - max number of kthreads allowed to be idle at any given
+ * 		     instant.
+ */
+enum xp_retval
+xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
+	    u16 nentries, u32 assigned_limit, u32 idle_limit)
+{
+	struct xpc_registration *registration;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+	DBUG_ON(payload_size == 0 || nentries == 0);
+	DBUG_ON(func == NULL);
+	DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
+
+	if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE)
+		return xpPayloadTooBig;
+
+	registration = &xpc_registrations[ch_number];
+
+	if (mutex_lock_interruptible(&registration->mutex) != 0)
+		return xpInterrupted;
+
+	/* if XPC_CHANNEL_REGISTERED(ch_number) */
+	if (registration->func != NULL) {
+		mutex_unlock(&registration->mutex);
+		return xpAlreadyRegistered;
+	}
+
+	/* register the channel for connection */
+	registration->entry_size = XPC_MSG_SIZE(payload_size);
+	registration->nentries = nentries;
+	registration->assigned_limit = assigned_limit;
+	registration->idle_limit = idle_limit;
+	registration->key = key;
+	registration->func = func;
+
+	mutex_unlock(&registration->mutex);
+
+	if (xpc_interface.connect)
+		xpc_interface.connect(ch_number);
+
+	return xpSuccess;
+}
+EXPORT_SYMBOL_GPL(xpc_connect);
+
+/*
+ * Remove the registration for automatic connection of the specified channel
+ * when a partition comes up.
+ *
+ * Before returning this xpc_disconnect() will wait for all connections on the
+ * specified channel have been closed/torndown. So the caller can be assured
+ * that they will not be receiving any more callouts from XPC to their
+ * function registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to unregister.
+ */
+void
+xpc_disconnect(int ch_number)
+{
+	struct xpc_registration *registration;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+	registration = &xpc_registrations[ch_number];
+
+	/*
+	 * We've decided not to make this a down_interruptible(), since we
+	 * figured XPC's users will just turn around and call xpc_disconnect()
+	 * again anyways, so we might as well wait, if need be.
+	 */
+	mutex_lock(&registration->mutex);
+
+	/* if !XPC_CHANNEL_REGISTERED(ch_number) */
+	if (registration->func == NULL) {
+		mutex_unlock(&registration->mutex);
+		return;
+	}
+
+	/* remove the connection registration for the specified channel */
+	registration->func = NULL;
+	registration->key = NULL;
+	registration->nentries = 0;
+	registration->entry_size = 0;
+	registration->assigned_limit = 0;
+	registration->idle_limit = 0;
+
+	if (xpc_interface.disconnect)
+		xpc_interface.disconnect(ch_number);
+
+	mutex_unlock(&registration->mutex);
+
+	return;
+}
+EXPORT_SYMBOL_GPL(xpc_disconnect);
+
+static int __init
+xp_init(void)
+{
+	enum xp_retval ret;
+	int ch_number;
+
+	/* initialize the connection registration mutex */
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++)
+		mutex_init(&xpc_registrations[ch_number].mutex);
+
+	if (is_uv_system())
+		ret = xp_init_uv();
+	else
+		ret = 0;
+
+	if (ret != xpSuccess)
+		return ret;
+
+	return 0;
+}
+
+module_init(xp_init);
+
+static void __exit
+xp_exit(void)
+{
+	if (is_uv_system())
+		xp_exit_uv();
+}
+
+module_exit(xp_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition (XP) base");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
new file mode 100644
index 0000000000..19fc7076af
--- /dev/null
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -0,0 +1,175 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition (XP) uv-based functions.
+ *
+ *      Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/device.h>
+#include <asm/uv/uv_hub.h>
+#if defined CONFIG_X86_64
+#include <asm/uv/bios.h>
+#elif defined CONFIG_IA64_SGI_UV
+#include <asm/sn/sn_sal.h>
+#endif
+#include "../sgi-gru/grukservices.h"
+#include "xp.h"
+
+/*
+ * Convert a virtual memory address to a physical memory address.
+ */
+static unsigned long
+xp_pa_uv(void *addr)
+{
+	return uv_gpa(addr);
+}
+
+/*
+ * Convert a global physical to socket physical address.
+ */
+static unsigned long
+xp_socket_pa_uv(unsigned long gpa)
+{
+	return uv_gpa_to_soc_phys_ram(gpa);
+}
+
+static enum xp_retval
+xp_remote_mmr_read(unsigned long dst_gpa, const unsigned long src_gpa,
+		   size_t len)
+{
+	int ret;
+	unsigned long *dst_va = __va(uv_gpa_to_soc_phys_ram(dst_gpa));
+
+	BUG_ON(!uv_gpa_in_mmr_space(src_gpa));
+	BUG_ON(len != 8);
+
+	ret = gru_read_gpa(dst_va, src_gpa);
+	if (ret == 0)
+		return xpSuccess;
+
+	dev_err(xp, "gru_read_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
+		"len=%ld\n", dst_gpa, src_gpa, len);
+	return xpGruCopyError;
+}
+
+
+static enum xp_retval
+xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa,
+		    size_t len)
+{
+	int ret;
+
+	if (uv_gpa_in_mmr_space(src_gpa))
+		return xp_remote_mmr_read(dst_gpa, src_gpa, len);
+
+	ret = gru_copy_gpa(dst_gpa, src_gpa, len);
+	if (ret == 0)
+		return xpSuccess;
+
+	dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
+		"len=%ld\n", dst_gpa, src_gpa, len);
+	return xpGruCopyError;
+}
+
+static int
+xp_cpu_to_nasid_uv(int cpuid)
+{
+	/* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */
+	return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid));
+}
+
+static enum xp_retval
+xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size)
+{
+	int ret;
+
+#if defined CONFIG_X86_64
+	ret = uv_bios_change_memprotect(phys_addr, size, UV_MEMPROT_ALLOW_RW);
+	if (ret != BIOS_STATUS_SUCCESS) {
+		dev_err(xp, "uv_bios_change_memprotect(,, "
+			"UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret);
+		return xpBiosError;
+	}
+
+#elif defined CONFIG_IA64_SGI_UV
+	u64 nasid_array;
+
+	ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
+				   &nasid_array);
+	if (ret != 0) {
+		dev_err(xp, "sn_change_memprotect(,, "
+			"SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
+		return xpSalError;
+	}
+#else
+	#error not a supported configuration
+#endif
+	return xpSuccess;
+}
+
+static enum xp_retval
+xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size)
+{
+	int ret;
+
+#if defined CONFIG_X86_64
+	ret = uv_bios_change_memprotect(phys_addr, size,
+					UV_MEMPROT_RESTRICT_ACCESS);
+	if (ret != BIOS_STATUS_SUCCESS) {
+		dev_err(xp, "uv_bios_change_memprotect(,, "
+			"UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret);
+		return xpBiosError;
+	}
+
+#elif defined CONFIG_IA64_SGI_UV
+	u64 nasid_array;
+
+	ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
+				   &nasid_array);
+	if (ret != 0) {
+		dev_err(xp, "sn_change_memprotect(,, "
+			"SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
+		return xpSalError;
+	}
+#else
+	#error not a supported configuration
+#endif
+	return xpSuccess;
+}
+
+enum xp_retval
+xp_init_uv(void)
+{
+	WARN_ON(!is_uv_system());
+	if (!is_uv_system())
+		return xpUnsupported;
+
+	xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
+#ifdef CONFIG_X86
+	xp_partition_id = sn_partition_id;
+	xp_region_size = sn_region_size;
+#endif
+	xp_pa = xp_pa_uv;
+	xp_socket_pa = xp_socket_pa_uv;
+	xp_remote_memcpy = xp_remote_memcpy_uv;
+	xp_cpu_to_nasid = xp_cpu_to_nasid_uv;
+	xp_expand_memprotect = xp_expand_memprotect_uv;
+	xp_restrict_memprotect = xp_restrict_memprotect_uv;
+
+	return xpSuccess;
+}
+
+void
+xp_exit_uv(void)
+{
+	WARN_ON(!is_uv_system());
+}
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
new file mode 100644
index 0000000000..225f2bb84e
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -0,0 +1,732 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) structures and macros.
+ */
+
+#ifndef _DRIVERS_MISC_SGIXP_XPC_H
+#define _DRIVERS_MISC_SGIXP_XPC_H
+
+#include <linux/wait.h>
+#include <linux/completion.h>
+#include <linux/timer.h>
+#include <linux/sched.h>
+#include "xp.h"
+
+/*
+ * XPC Version numbers consist of a major and minor number. XPC can always
+ * talk to versions with same major #, and never talk to versions with a
+ * different major #.
+ */
+#define _XPC_VERSION(_maj, _min)	(((_maj) << 4) | ((_min) & 0xf))
+#define XPC_VERSION_MAJOR(_v)		((_v) >> 4)
+#define XPC_VERSION_MINOR(_v)		((_v) & 0xf)
+
+/* define frequency of the heartbeat and frequency how often it's checked */
+#define XPC_HB_DEFAULT_INTERVAL		5	/* incr HB every x secs */
+#define XPC_HB_CHECK_DEFAULT_INTERVAL	20	/* check HB every x secs */
+
+/* define the process name of HB checker and the CPU it is pinned to */
+#define XPC_HB_CHECK_THREAD_NAME	"xpc_hb"
+#define XPC_HB_CHECK_CPU		0
+
+/* define the process name of the discovery thread */
+#define XPC_DISCOVERY_THREAD_NAME	"xpc_discovery"
+
+/*
+ * the reserved page
+ *
+ *   SAL reserves one page of memory per partition for XPC. Though a full page
+ *   in length (16384 bytes), its starting address is not page aligned, but it
+ *   is cacheline aligned. The reserved page consists of the following:
+ *
+ *   reserved page header
+ *
+ *     The first two 64-byte cachelines of the reserved page contain the
+ *     header (struct xpc_rsvd_page). Before SAL initialization has completed,
+ *     SAL has set up the following fields of the reserved page header:
+ *     SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The
+ *     other fields are set up by XPC. (xpc_rsvd_page points to the local
+ *     partition's reserved page.)
+ *
+ *   part_nasids mask
+ *   mach_nasids mask
+ *
+ *     SAL also sets up two bitmaps (or masks), one that reflects the actual
+ *     nasids in this partition (part_nasids), and the other that reflects
+ *     the actual nasids in the entire machine (mach_nasids). We're only
+ *     interested in the even numbered nasids (which contain the processors
+ *     and/or memory), so we only need half as many bits to represent the
+ *     nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure
+ *     to either divide or multiply by 2. The part_nasids mask is located
+ *     starting at the first cacheline following the reserved page header. The
+ *     mach_nasids mask follows right after the part_nasids mask. The size in
+ *     bytes of each mask is reflected by the reserved page header field
+ *     'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids
+ *     and xpc_mach_nasids.)
+ *
+ *     Immediately following the mach_nasids mask are the XPC variables
+ *     required by other partitions. First are those that are generic to all
+ *     partitions (vars), followed on the next available cacheline by those
+ *     which are partition specific (vars part). These are setup by XPC.
+ *
+ * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been
+ *       initialized.
+ */
+struct xpc_rsvd_page {
+	u64 SAL_signature;	/* SAL: unique signature */
+	u64 SAL_version;	/* SAL: version */
+	short SAL_partid;	/* SAL: partition ID */
+	short max_npartitions;	/* value of XPC_MAX_PARTITIONS */
+	u8 version;
+	u8 pad1[3];		/* align to next u64 in 1st 64-byte cacheline */
+	unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
+	union {
+		struct {
+			unsigned long heartbeat_gpa; /* phys addr */
+			unsigned long activate_gru_mq_desc_gpa; /* phys addr */
+		} uv;
+	} sn;
+	u64 pad2[9];		/* align to last u64 in 2nd 64-byte cacheline */
+	u64 SAL_nasids_size;	/* SAL: size of each nasid mask in bytes */
+};
+
+#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */
+
+/* the reserved page sizes and offsets */
+
+#define XPC_RP_HEADER_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
+
+#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \
+				 XPC_RP_HEADER_SIZE))
+#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \
+				 xpc_nasid_mask_nlongs)
+
+
+/*
+ * The following structure describes the partition's heartbeat info which
+ * will be periodically read by other partitions to determine whether this
+ * XPC is still 'alive'.
+ */
+struct xpc_heartbeat_uv {
+	unsigned long value;
+	unsigned long offline;	/* if 0, heartbeat should be changing */
+};
+
+/*
+ * Info pertinent to a GRU message queue using a watch list for irq generation.
+ */
+struct xpc_gru_mq_uv {
+	void *address;		/* address of GRU message queue */
+	unsigned int order;	/* size of GRU message queue as a power of 2 */
+	int irq;		/* irq raised when message is received in mq */
+	int mmr_blade;		/* blade where watchlist was allocated from */
+	unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */
+	unsigned long mmr_value; /* value of irq mmr located on mmr_blade */
+	int watchlist_num;	/* number of watchlist allocatd by BIOS */
+	void *gru_mq_desc;	/* opaque structure used by the GRU driver */
+};
+
+/*
+ * The activate_mq is used to send/receive GRU messages that affect XPC's
+ * partition active state and channel state. This is uv only.
+ */
+struct xpc_activate_mq_msghdr_uv {
+	unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
+	short partid;		/* sender's partid */
+	u8 act_state;		/* sender's act_state at time msg sent */
+	u8 type;		/* message's type */
+	unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */
+};
+
+/* activate_mq defined message types */
+#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV		0
+
+#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV		1
+#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV		2
+
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV	3
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV		4
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV	5
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV		6
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV	7
+
+#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV		8
+#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV		9
+
+struct xpc_activate_mq_msg_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+};
+
+struct xpc_activate_mq_msg_activate_req_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	unsigned long rp_gpa;
+	unsigned long heartbeat_gpa;
+	unsigned long activate_gru_mq_desc_gpa;
+};
+
+struct xpc_activate_mq_msg_deactivate_req_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closerequest_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+	enum xp_retval reason;
+};
+
+struct xpc_activate_mq_msg_chctl_closereply_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+};
+
+struct xpc_activate_mq_msg_chctl_openrequest_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+	short entry_size;	/* size of notify_mq's GRU messages */
+	short local_nentries;	/* ??? Is this needed? What is? */
+};
+
+struct xpc_activate_mq_msg_chctl_openreply_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+	short remote_nentries;	/* ??? Is this needed? What is? */
+	short local_nentries;	/* ??? Is this needed? What is? */
+	unsigned long notify_gru_mq_desc_gpa;
+};
+
+struct xpc_activate_mq_msg_chctl_opencomplete_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+};
+
+/*
+ * Functions registered by add_timer() or called by kernel_thread() only
+ * allow for a single 64-bit argument. The following macros can be used to
+ * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
+ * the passed argument.
+ */
+#define XPC_PACK_ARGS(_arg1, _arg2) \
+			((((u64)_arg1) & 0xffffffff) | \
+			((((u64)_arg2) & 0xffffffff) << 32))
+
+#define XPC_UNPACK_ARG1(_args)	(((u64)_args) & 0xffffffff)
+#define XPC_UNPACK_ARG2(_args)	((((u64)_args) >> 32) & 0xffffffff)
+
+/*
+ * Define a structure that contains arguments associated with opening and
+ * closing a channel.
+ */
+struct xpc_openclose_args {
+	u16 reason;		/* reason why channel is closing */
+	u16 entry_size;		/* sizeof each message entry */
+	u16 remote_nentries;	/* #of message entries in remote msg queue */
+	u16 local_nentries;	/* #of message entries in local msg queue */
+	unsigned long local_msgqueue_pa; /* phys addr of local message queue */
+};
+
+#define XPC_OPENCLOSE_ARGS_SIZE \
+	      L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \
+	      XPC_MAX_NCHANNELS)
+
+
+/*
+ * Structures to define a fifo singly-linked list.
+ */
+
+struct xpc_fifo_entry_uv {
+	struct xpc_fifo_entry_uv *next;
+};
+
+struct xpc_fifo_head_uv {
+	struct xpc_fifo_entry_uv *first;
+	struct xpc_fifo_entry_uv *last;
+	spinlock_t lock;
+	int n_entries;
+};
+
+/*
+ * The format of a uv XPC notify_mq GRU message is as follows:
+ *
+ * A user-defined message resides in the payload area. The max size of the
+ * payload is defined by the user via xpc_connect().
+ *
+ * The size of a message (payload and header) sent via the GRU must be either 1
+ * or 2 GRU_CACHE_LINE_BYTES in length.
+ */
+
+struct xpc_notify_mq_msghdr_uv {
+	union {
+		unsigned int gru_msg_hdr;	/* FOR GRU INTERNAL USE ONLY */
+		struct xpc_fifo_entry_uv next;	/* FOR XPC INTERNAL USE ONLY */
+	} u;
+	short partid;		/* FOR XPC INTERNAL USE ONLY */
+	u8 ch_number;		/* FOR XPC INTERNAL USE ONLY */
+	u8 size;		/* FOR XPC INTERNAL USE ONLY */
+	unsigned int msg_slot_number;	/* FOR XPC INTERNAL USE ONLY */
+};
+
+struct xpc_notify_mq_msg_uv {
+	struct xpc_notify_mq_msghdr_uv hdr;
+	unsigned long payload;
+};
+
+/* struct xpc_notify_sn2 type of notification */
+
+#define	XPC_N_CALL	0x01	/* notify function provided by user */
+
+/*
+ * Define uv's version of the notify entry. It additionally is used to allocate
+ * a msg slot on the remote partition into which is copied a sent message.
+ */
+struct xpc_send_msg_slot_uv {
+	struct xpc_fifo_entry_uv next;
+	unsigned int msg_slot_number;
+	xpc_notify_func func;	/* user's notify function */
+	void *key;		/* pointer to user's key */
+};
+
+/*
+ * Define the structure that manages all the stuff required by a channel. In
+ * particular, they are used to manage the messages sent across the channel.
+ *
+ * This structure is private to a partition, and is NOT shared across the
+ * partition boundary.
+ *
+ * There is an array of these structures for each remote partition. It is
+ * allocated at the time a partition becomes active. The array contains one
+ * of these structures for each potential channel connection to that partition.
+ */
+
+struct xpc_channel_uv {
+	void *cached_notify_gru_mq_desc; /* remote partition's notify mq's */
+					 /* gru mq descriptor */
+
+	struct xpc_send_msg_slot_uv *send_msg_slots;
+	void *recv_msg_slots;	/* each slot will hold a xpc_notify_mq_msg_uv */
+				/* structure plus the user's payload */
+
+	struct xpc_fifo_head_uv msg_slot_free_list;
+	struct xpc_fifo_head_uv recv_msg_list;	/* deliverable payloads */
+};
+
+struct xpc_channel {
+	short partid;		/* ID of remote partition connected */
+	spinlock_t lock;	/* lock for updating this structure */
+	unsigned int flags;	/* general flags */
+
+	enum xp_retval reason;	/* reason why channel is disconnect'g */
+	int reason_line;	/* line# disconnect initiated from */
+
+	u16 number;		/* channel # */
+
+	u16 entry_size;		/* sizeof each msg entry */
+	u16 local_nentries;	/* #of msg entries in local msg queue */
+	u16 remote_nentries;	/* #of msg entries in remote msg queue */
+
+	atomic_t references;	/* #of external references to queues */
+
+	atomic_t n_on_msg_allocate_wq;	/* #on msg allocation wait queue */
+	wait_queue_head_t msg_allocate_wq;	/* msg allocation wait queue */
+
+	u8 delayed_chctl_flags;	/* chctl flags received, but delayed */
+				/* action until channel disconnected */
+
+	atomic_t n_to_notify;	/* #of msg senders to notify */
+
+	xpc_channel_func func;	/* user's channel function */
+	void *key;		/* pointer to user's key */
+
+	struct completion wdisconnect_wait;    /* wait for channel disconnect */
+
+	/* kthread management related fields */
+
+	atomic_t kthreads_assigned;	/* #of kthreads assigned to channel */
+	u32 kthreads_assigned_limit;	/* limit on #of kthreads assigned */
+	atomic_t kthreads_idle;	/* #of kthreads idle waiting for work */
+	u32 kthreads_idle_limit;	/* limit on #of kthreads idle */
+	atomic_t kthreads_active;	/* #of kthreads actively working */
+
+	wait_queue_head_t idle_wq;	/* idle kthread wait queue */
+
+	union {
+		struct xpc_channel_uv uv;
+	} sn;
+
+} ____cacheline_aligned;
+
+/* struct xpc_channel flags */
+
+#define	XPC_C_WASCONNECTED	0x00000001	/* channel was connected */
+
+#define XPC_C_ROPENCOMPLETE	0x00000002    /* remote open channel complete */
+#define XPC_C_OPENCOMPLETE	0x00000004     /* local open channel complete */
+#define	XPC_C_ROPENREPLY	0x00000008	/* remote open channel reply */
+#define	XPC_C_OPENREPLY		0x00000010	/* local open channel reply */
+#define	XPC_C_ROPENREQUEST	0x00000020     /* remote open channel request */
+#define	XPC_C_OPENREQUEST	0x00000040	/* local open channel request */
+
+#define	XPC_C_SETUP		0x00000080 /* channel's msgqueues are alloc'd */
+#define	XPC_C_CONNECTEDCALLOUT	0x00000100     /* connected callout initiated */
+#define	XPC_C_CONNECTEDCALLOUT_MADE \
+				0x00000200     /* connected callout completed */
+#define	XPC_C_CONNECTED		0x00000400	/* local channel is connected */
+#define	XPC_C_CONNECTING	0x00000800	/* channel is being connected */
+
+#define	XPC_C_RCLOSEREPLY	0x00001000	/* remote close channel reply */
+#define	XPC_C_CLOSEREPLY	0x00002000	/* local close channel reply */
+#define	XPC_C_RCLOSEREQUEST	0x00004000    /* remote close channel request */
+#define	XPC_C_CLOSEREQUEST	0x00008000     /* local close channel request */
+
+#define	XPC_C_DISCONNECTED	0x00010000	/* channel is disconnected */
+#define	XPC_C_DISCONNECTING	0x00020000   /* channel is being disconnected */
+#define	XPC_C_DISCONNECTINGCALLOUT \
+				0x00040000 /* disconnecting callout initiated */
+#define	XPC_C_DISCONNECTINGCALLOUT_MADE \
+				0x00080000 /* disconnecting callout completed */
+#define	XPC_C_WDISCONNECT	0x00100000  /* waiting for channel disconnect */
+
+/*
+ * The channel control flags (chctl) union consists of a 64-bit variable which
+ * is divided up into eight bytes, ordered from right to left. Byte zero
+ * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte
+ * can have one or more of the chctl flags set in it.
+ */
+
+union xpc_channel_ctl_flags {
+	u64 all_flags;
+	u8 flags[XPC_MAX_NCHANNELS];
+};
+
+/* chctl flags */
+#define	XPC_CHCTL_CLOSEREQUEST	0x01
+#define	XPC_CHCTL_CLOSEREPLY	0x02
+#define	XPC_CHCTL_OPENREQUEST	0x04
+#define	XPC_CHCTL_OPENREPLY	0x08
+#define XPC_CHCTL_OPENCOMPLETE	0x10
+#define	XPC_CHCTL_MSGREQUEST	0x20
+
+#define XPC_OPENCLOSE_CHCTL_FLAGS \
+			(XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \
+			 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \
+			 XPC_CHCTL_OPENCOMPLETE)
+#define XPC_MSG_CHCTL_FLAGS	XPC_CHCTL_MSGREQUEST
+
+static inline int
+xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+	int ch_number;
+
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+		if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS)
+			return 1;
+	}
+	return 0;
+}
+
+static inline int
+xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl)
+{
+	int ch_number;
+
+	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) {
+		if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+			return 1;
+	}
+	return 0;
+}
+
+struct xpc_partition_uv {
+	unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */
+	struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */
+						  /* partition's heartbeat */
+	unsigned long activate_gru_mq_desc_gpa;	/* phys addr of parititon's */
+						/* activate mq's gru mq */
+						/* descriptor */
+	void *cached_activate_gru_mq_desc; /* cached copy of partition's */
+					   /* activate mq's gru mq descriptor */
+	struct mutex cached_activate_gru_mq_desc_mutex;
+	spinlock_t flags_lock;	/* protect updating of flags */
+	unsigned int flags;	/* general flags */
+	u8 remote_act_state;	/* remote partition's act_state */
+	u8 act_state_req;	/* act_state request from remote partition */
+	enum xp_retval reason;	/* reason for deactivate act_state request */
+};
+
+/* struct xpc_partition_uv flags */
+
+#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV	0x00000001
+#define XPC_P_ENGAGED_UV			0x00000002
+
+/* struct xpc_partition_uv act_state change requests */
+
+#define XPC_P_ASR_ACTIVATE_UV		0x01
+#define XPC_P_ASR_REACTIVATE_UV		0x02
+#define XPC_P_ASR_DEACTIVATE_UV		0x03
+
+struct xpc_partition {
+
+	/* XPC HB infrastructure */
+
+	u8 remote_rp_version;	/* version# of partition's rsvd pg */
+	unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */
+	unsigned long remote_rp_pa;	/* phys addr of partition's rsvd pg */
+	u64 last_heartbeat;	/* HB at last read */
+	u32 activate_IRQ_rcvd;	/* IRQs since activation */
+	spinlock_t act_lock;	/* protect updating of act_state */
+	u8 act_state;		/* from XPC HB viewpoint */
+	enum xp_retval reason;	/* reason partition is deactivating */
+	int reason_line;	/* line# deactivation initiated from */
+
+	unsigned long disengage_timeout;	/* timeout in jiffies */
+	struct timer_list disengage_timer;
+
+	/* XPC infrastructure referencing and teardown control */
+
+	u8 setup_state;		/* infrastructure setup state */
+	wait_queue_head_t teardown_wq;	/* kthread waiting to teardown infra */
+	atomic_t references;	/* #of references to infrastructure */
+
+	u8 nchannels;		/* #of defined channels supported */
+	atomic_t nchannels_active;  /* #of channels that are not DISCONNECTED */
+	atomic_t nchannels_engaged;  /* #of channels engaged with remote part */
+	struct xpc_channel *channels;	/* array of channel structures */
+
+	/* fields used for managing channel avialability and activity */
+
+	union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */
+	spinlock_t chctl_lock;	/* chctl flags lock */
+
+	void *remote_openclose_args_base;  /* base address of kmalloc'd space */
+	struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
+							  /* args */
+
+	/* channel manager related fields */
+
+	atomic_t channel_mgr_requests;	/* #of requests to activate chan mgr */
+	wait_queue_head_t channel_mgr_wq;	/* channel mgr's wait queue */
+
+	union {
+		struct xpc_partition_uv uv;
+	} sn;
+
+} ____cacheline_aligned;
+
+struct xpc_arch_operations {
+	int (*setup_partitions) (void);
+	void (*teardown_partitions) (void);
+	void (*process_activate_IRQ_rcvd) (void);
+	enum xp_retval (*get_partition_rsvd_page_pa)
+		(void *, u64 *, unsigned long *, size_t *);
+	int (*setup_rsvd_page) (struct xpc_rsvd_page *);
+
+	void (*allow_hb) (short);
+	void (*disallow_hb) (short);
+	void (*disallow_all_hbs) (void);
+	void (*increment_heartbeat) (void);
+	void (*offline_heartbeat) (void);
+	void (*online_heartbeat) (void);
+	void (*heartbeat_init) (void);
+	void (*heartbeat_exit) (void);
+	enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *);
+
+	void (*request_partition_activation) (struct xpc_rsvd_page *,
+						 unsigned long, int);
+	void (*request_partition_reactivation) (struct xpc_partition *);
+	void (*request_partition_deactivation) (struct xpc_partition *);
+	void (*cancel_partition_deactivation_request) (struct xpc_partition *);
+	enum xp_retval (*setup_ch_structures) (struct xpc_partition *);
+	void (*teardown_ch_structures) (struct xpc_partition *);
+
+	enum xp_retval (*make_first_contact) (struct xpc_partition *);
+
+	u64 (*get_chctl_all_flags) (struct xpc_partition *);
+	void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *);
+	void (*process_msg_chctl_flags) (struct xpc_partition *, int);
+
+	enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *,
+						      unsigned long);
+
+	enum xp_retval (*setup_msg_structures) (struct xpc_channel *);
+	void (*teardown_msg_structures) (struct xpc_channel *);
+
+	void (*indicate_partition_engaged) (struct xpc_partition *);
+	void (*indicate_partition_disengaged) (struct xpc_partition *);
+	void (*assume_partition_disengaged) (short);
+	int (*partition_engaged) (short);
+	int (*any_partition_engaged) (void);
+
+	int (*n_of_deliverable_payloads) (struct xpc_channel *);
+	enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *,
+					   u16, u8, xpc_notify_func, void *);
+	void *(*get_deliverable_payload) (struct xpc_channel *);
+	void (*received_payload) (struct xpc_channel *, void *);
+	void (*notify_senders_of_disconnect) (struct xpc_channel *);
+};
+
+/* struct xpc_partition act_state values (for XPC HB) */
+
+#define	XPC_P_AS_INACTIVE	0x00	/* partition is not active */
+#define XPC_P_AS_ACTIVATION_REQ	0x01	/* created thread to activate */
+#define XPC_P_AS_ACTIVATING	0x02	/* activation thread started */
+#define XPC_P_AS_ACTIVE		0x03	/* xpc_partition_up() was called */
+#define XPC_P_AS_DEACTIVATING	0x04	/* partition deactivation initiated */
+
+#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
+			xpc_deactivate_partition(__LINE__, (_p), (_reason))
+
+/* struct xpc_partition setup_state values */
+
+#define XPC_P_SS_UNSET		0x00	/* infrastructure was never setup */
+#define XPC_P_SS_SETUP		0x01	/* infrastructure is setup */
+#define XPC_P_SS_WTEARDOWN	0x02	/* waiting to teardown infrastructure */
+#define XPC_P_SS_TORNDOWN	0x03	/* infrastructure is torndown */
+
+/* number of seconds to wait for other partitions to disengage */
+#define XPC_DISENGAGE_DEFAULT_TIMELIMIT		90
+
+/* interval in seconds to print 'waiting deactivation' messages */
+#define XPC_DEACTIVATE_PRINTMSG_INTERVAL	10
+
+#define XPC_PARTID(_p)	((short)((_p) - &xpc_partitions[0]))
+
+/* found in xp_main.c */
+extern struct xpc_registration xpc_registrations[];
+
+/* found in xpc_main.c */
+extern struct device *xpc_part;
+extern struct device *xpc_chan;
+extern struct xpc_arch_operations xpc_arch_ops;
+extern int xpc_disengage_timelimit;
+extern int xpc_disengage_timedout;
+extern int xpc_activate_IRQ_rcvd;
+extern spinlock_t xpc_activate_IRQ_rcvd_lock;
+extern wait_queue_head_t xpc_activate_IRQ_wq;
+extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
+extern void xpc_activate_partition(struct xpc_partition *);
+extern void xpc_activate_kthreads(struct xpc_channel *, int);
+extern void xpc_create_kthreads(struct xpc_channel *, int, int);
+extern void xpc_disconnect_wait(int);
+
+/* found in xpc_uv.c */
+extern int xpc_init_uv(void);
+extern void xpc_exit_uv(void);
+
+/* found in xpc_partition.c */
+extern int xpc_exiting;
+extern int xpc_nasid_mask_nlongs;
+extern struct xpc_rsvd_page *xpc_rsvd_page;
+extern unsigned long *xpc_mach_nasids;
+extern struct xpc_partition *xpc_partitions;
+extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
+extern int xpc_setup_rsvd_page(void);
+extern void xpc_teardown_rsvd_page(void);
+extern int xpc_identify_activate_IRQ_sender(void);
+extern int xpc_partition_disengaged(struct xpc_partition *);
+extern int xpc_partition_disengaged_from_timer(struct xpc_partition *part);
+extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *);
+extern void xpc_mark_partition_inactive(struct xpc_partition *);
+extern void xpc_discovery(void);
+extern enum xp_retval xpc_get_remote_rp(int, unsigned long *,
+					struct xpc_rsvd_page *,
+					unsigned long *);
+extern void xpc_deactivate_partition(const int, struct xpc_partition *,
+				     enum xp_retval);
+extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *);
+
+/* found in xpc_channel.c */
+extern void xpc_initiate_connect(int);
+extern void xpc_initiate_disconnect(int);
+extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *);
+extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16);
+extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16,
+					       xpc_notify_func, void *);
+extern void xpc_initiate_received(short, int, void *);
+extern void xpc_process_sent_chctl_flags(struct xpc_partition *);
+extern void xpc_connected_callout(struct xpc_channel *);
+extern void xpc_deliver_payload(struct xpc_channel *);
+extern void xpc_disconnect_channel(const int, struct xpc_channel *,
+				   enum xp_retval, unsigned long *);
+extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
+extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
+
+static inline void
+xpc_wakeup_channel_mgr(struct xpc_partition *part)
+{
+	if (atomic_inc_return(&part->channel_mgr_requests) == 1)
+		wake_up(&part->channel_mgr_wq);
+}
+
+/*
+ * These next two inlines are used to keep us from tearing down a channel's
+ * msg queues while a thread may be referencing them.
+ */
+static inline void
+xpc_msgqueue_ref(struct xpc_channel *ch)
+{
+	atomic_inc(&ch->references);
+}
+
+static inline void
+xpc_msgqueue_deref(struct xpc_channel *ch)
+{
+	s32 refs = atomic_dec_return(&ch->references);
+
+	DBUG_ON(refs < 0);
+	if (refs == 0)
+		xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
+}
+
+#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
+		xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
+
+/*
+ * These two inlines are used to keep us from tearing down a partition's
+ * setup infrastructure while a thread may be referencing it.
+ */
+static inline void
+xpc_part_deref(struct xpc_partition *part)
+{
+	s32 refs = atomic_dec_return(&part->references);
+
+	DBUG_ON(refs < 0);
+	if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN)
+		wake_up(&part->teardown_wq);
+}
+
+static inline int
+xpc_part_ref(struct xpc_partition *part)
+{
+	int setup;
+
+	atomic_inc(&part->references);
+	setup = (part->setup_state == XPC_P_SS_SETUP);
+	if (!setup)
+		xpc_part_deref(part);
+
+	return setup;
+}
+
+/*
+ * The following macro is to be used for the setting of the reason and
+ * reason_line fields in both the struct xpc_channel and struct xpc_partition
+ * structures.
+ */
+#define XPC_SET_REASON(_p, _reason, _line) \
+	{ \
+		(_p)->reason = _reason; \
+		(_p)->reason_line = _line; \
+	}
+
+#endif /* _DRIVERS_MISC_SGIXP_XPC_H */
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
new file mode 100644
index 0000000000..8e6607fc8a
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -0,0 +1,1011 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) channel support.
+ *
+ *	This is the part of XPC that manages the channels and
+ *	sends/receives messages across them to/from other partitions.
+ *
+ */
+
+#include <linux/device.h>
+#include "xpc.h"
+
+/*
+ * Process a connect message from a remote partition.
+ *
+ * Note: xpc_process_connect() is expecting to be called with the
+ * spin_lock_irqsave held and will leave it locked upon return.
+ */
+static void
+xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	enum xp_retval ret;
+
+	lockdep_assert_held(&ch->lock);
+
+	if (!(ch->flags & XPC_C_OPENREQUEST) ||
+	    !(ch->flags & XPC_C_ROPENREQUEST)) {
+		/* nothing more to do for now */
+		return;
+	}
+	DBUG_ON(!(ch->flags & XPC_C_CONNECTING));
+
+	if (!(ch->flags & XPC_C_SETUP)) {
+		spin_unlock_irqrestore(&ch->lock, *irq_flags);
+		ret = xpc_arch_ops.setup_msg_structures(ch);
+		spin_lock_irqsave(&ch->lock, *irq_flags);
+
+		if (ret != xpSuccess)
+			XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
+		else
+			ch->flags |= XPC_C_SETUP;
+
+		if (ch->flags & XPC_C_DISCONNECTING)
+			return;
+	}
+
+	if (!(ch->flags & XPC_C_OPENREPLY)) {
+		ch->flags |= XPC_C_OPENREPLY;
+		xpc_arch_ops.send_chctl_openreply(ch, irq_flags);
+	}
+
+	if (!(ch->flags & XPC_C_ROPENREPLY))
+		return;
+
+	if (!(ch->flags & XPC_C_OPENCOMPLETE)) {
+		ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED);
+		xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags);
+	}
+
+	if (!(ch->flags & XPC_C_ROPENCOMPLETE))
+		return;
+
+	dev_info(xpc_chan, "channel %d to partition %d connected\n",
+		 ch->number, ch->partid);
+
+	ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP);	/* clear all else */
+}
+
+/*
+ * spin_lock_irqsave() is expected to be held on entry.
+ */
+static void
+xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
+
+	lockdep_assert_held(&ch->lock);
+
+	if (!(ch->flags & XPC_C_DISCONNECTING))
+		return;
+
+	DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+
+	/* make sure all activity has settled down first */
+
+	if (atomic_read(&ch->kthreads_assigned) > 0 ||
+	    atomic_read(&ch->references) > 0) {
+		return;
+	}
+	DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+		!(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE));
+
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
+		/* can't proceed until the other side disengages from us */
+		if (xpc_arch_ops.partition_engaged(ch->partid))
+			return;
+
+	} else {
+
+		/* as long as the other side is up do the full protocol */
+
+		if (!(ch->flags & XPC_C_RCLOSEREQUEST))
+			return;
+
+		if (!(ch->flags & XPC_C_CLOSEREPLY)) {
+			ch->flags |= XPC_C_CLOSEREPLY;
+			xpc_arch_ops.send_chctl_closereply(ch, irq_flags);
+		}
+
+		if (!(ch->flags & XPC_C_RCLOSEREPLY))
+			return;
+	}
+
+	/* wake those waiting for notify completion */
+	if (atomic_read(&ch->n_to_notify) > 0) {
+		/* we do callout while holding ch->lock, callout can't block */
+		xpc_arch_ops.notify_senders_of_disconnect(ch);
+	}
+
+	/* both sides are disconnected now */
+
+	if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
+		spin_unlock_irqrestore(&ch->lock, *irq_flags);
+		xpc_disconnect_callout(ch, xpDisconnected);
+		spin_lock_irqsave(&ch->lock, *irq_flags);
+	}
+
+	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
+
+	/* it's now safe to free the channel's message queues */
+	xpc_arch_ops.teardown_msg_structures(ch);
+
+	ch->func = NULL;
+	ch->key = NULL;
+	ch->entry_size = 0;
+	ch->local_nentries = 0;
+	ch->remote_nentries = 0;
+	ch->kthreads_assigned_limit = 0;
+	ch->kthreads_idle_limit = 0;
+
+	/*
+	 * Mark the channel disconnected and clear all other flags, including
+	 * XPC_C_SETUP (because of call to
+	 * xpc_arch_ops.teardown_msg_structures()) but not including
+	 * XPC_C_WDISCONNECT (if it was set).
+	 */
+	ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
+
+	atomic_dec(&part->nchannels_active);
+
+	if (channel_was_connected) {
+		dev_info(xpc_chan, "channel %d to partition %d disconnected, "
+			 "reason=%d\n", ch->number, ch->partid, ch->reason);
+	}
+
+	if (ch->flags & XPC_C_WDISCONNECT) {
+		/* we won't lose the CPU since we're holding ch->lock */
+		complete(&ch->wdisconnect_wait);
+	} else if (ch->delayed_chctl_flags) {
+		if (part->act_state != XPC_P_AS_DEACTIVATING) {
+			/* time to take action on any delayed chctl flags */
+			spin_lock(&part->chctl_lock);
+			part->chctl.flags[ch->number] |=
+			    ch->delayed_chctl_flags;
+			spin_unlock(&part->chctl_lock);
+		}
+		ch->delayed_chctl_flags = 0;
+	}
+}
+
+/*
+ * Process a change in the channel's remote connection state.
+ */
+static void
+xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
+				  u8 chctl_flags)
+{
+	unsigned long irq_flags;
+	struct xpc_openclose_args *args =
+	    &part->remote_openclose_args[ch_number];
+	struct xpc_channel *ch = &part->channels[ch_number];
+	enum xp_retval reason;
+	enum xp_retval ret;
+	int create_kthread = 0;
+
+	spin_lock_irqsave(&ch->lock, irq_flags);
+
+again:
+
+	if ((ch->flags & XPC_C_DISCONNECTED) &&
+	    (ch->flags & XPC_C_WDISCONNECT)) {
+		/*
+		 * Delay processing chctl flags until thread waiting disconnect
+		 * has had a chance to see that the channel is disconnected.
+		 */
+		ch->delayed_chctl_flags |= chctl_flags;
+		goto out;
+	}
+
+	if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received "
+			"from partid=%d, channel=%d\n", args->reason,
+			ch->partid, ch->number);
+
+		/*
+		 * If RCLOSEREQUEST is set, we're probably waiting for
+		 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
+		 * with this RCLOSEREQUEST in the chctl_flags.
+		 */
+
+		if (ch->flags & XPC_C_RCLOSEREQUEST) {
+			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+			DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+			DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY));
+			DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY);
+
+			DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY));
+			chctl_flags &= ~XPC_CHCTL_CLOSEREPLY;
+			ch->flags |= XPC_C_RCLOSEREPLY;
+
+			/* both sides have finished disconnecting */
+			xpc_process_disconnect(ch, &irq_flags);
+			DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+			goto again;
+		}
+
+		if (ch->flags & XPC_C_DISCONNECTED) {
+			if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) {
+				if (part->chctl.flags[ch_number] &
+				    XPC_CHCTL_OPENREQUEST) {
+
+					DBUG_ON(ch->delayed_chctl_flags != 0);
+					spin_lock(&part->chctl_lock);
+					part->chctl.flags[ch_number] |=
+					    XPC_CHCTL_CLOSEREQUEST;
+					spin_unlock(&part->chctl_lock);
+				}
+				goto out;
+			}
+
+			XPC_SET_REASON(ch, 0, 0);
+			ch->flags &= ~XPC_C_DISCONNECTED;
+
+			atomic_inc(&part->nchannels_active);
+			ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
+		}
+
+		chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY |
+		    XPC_CHCTL_OPENCOMPLETE);
+
+		/*
+		 * The meaningful CLOSEREQUEST connection state fields are:
+		 *      reason = reason connection is to be closed
+		 */
+
+		ch->flags |= XPC_C_RCLOSEREQUEST;
+
+		if (!(ch->flags & XPC_C_DISCONNECTING)) {
+			reason = args->reason;
+			if (reason <= xpSuccess || reason > xpUnknownReason)
+				reason = xpUnknownReason;
+			else if (reason == xpUnregistering)
+				reason = xpOtherUnregistering;
+
+			XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
+
+			DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY);
+			goto out;
+		}
+
+		xpc_process_disconnect(ch, &irq_flags);
+	}
+
+	if (chctl_flags & XPC_CHCTL_CLOSEREPLY) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid="
+			"%d, channel=%d\n", ch->partid, ch->number);
+
+		if (ch->flags & XPC_C_DISCONNECTED) {
+			DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING);
+			goto out;
+		}
+
+		DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
+
+		if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
+			if (part->chctl.flags[ch_number] &
+			    XPC_CHCTL_CLOSEREQUEST) {
+
+				DBUG_ON(ch->delayed_chctl_flags != 0);
+				spin_lock(&part->chctl_lock);
+				part->chctl.flags[ch_number] |=
+				    XPC_CHCTL_CLOSEREPLY;
+				spin_unlock(&part->chctl_lock);
+			}
+			goto out;
+		}
+
+		ch->flags |= XPC_C_RCLOSEREPLY;
+
+		if (ch->flags & XPC_C_CLOSEREPLY) {
+			/* both sides have finished disconnecting */
+			xpc_process_disconnect(ch, &irq_flags);
+		}
+	}
+
+	if (chctl_flags & XPC_CHCTL_OPENREQUEST) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, "
+			"local_nentries=%d) received from partid=%d, "
+			"channel=%d\n", args->entry_size, args->local_nentries,
+			ch->partid, ch->number);
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING ||
+		    (ch->flags & XPC_C_ROPENREQUEST)) {
+			goto out;
+		}
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
+			ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST;
+			goto out;
+		}
+		DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
+				       XPC_C_OPENREQUEST)));
+		DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
+				     XPC_C_OPENREPLY | XPC_C_CONNECTED));
+
+		/*
+		 * The meaningful OPENREQUEST connection state fields are:
+		 *      entry_size = size of channel's messages in bytes
+		 *      local_nentries = remote partition's local_nentries
+		 */
+		if (args->entry_size == 0 || args->local_nentries == 0) {
+			/* assume OPENREQUEST was delayed by mistake */
+			goto out;
+		}
+
+		ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
+		ch->remote_nentries = args->local_nentries;
+
+		if (ch->flags & XPC_C_OPENREQUEST) {
+			if (args->entry_size != ch->entry_size) {
+				XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
+						       &irq_flags);
+				goto out;
+			}
+		} else {
+			ch->entry_size = args->entry_size;
+
+			XPC_SET_REASON(ch, 0, 0);
+			ch->flags &= ~XPC_C_DISCONNECTED;
+
+			atomic_inc(&part->nchannels_active);
+		}
+
+		xpc_process_connect(ch, &irq_flags);
+	}
+
+	if (chctl_flags & XPC_CHCTL_OPENREPLY) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa="
+			"0x%lx, local_nentries=%d, remote_nentries=%d) "
+			"received from partid=%d, channel=%d\n",
+			args->local_msgqueue_pa, args->local_nentries,
+			args->remote_nentries, ch->partid, ch->number);
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+			goto out;
+
+		if (!(ch->flags & XPC_C_OPENREQUEST)) {
+			XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
+					       &irq_flags);
+			goto out;
+		}
+
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
+		DBUG_ON(ch->flags & XPC_C_CONNECTED);
+
+		/*
+		 * The meaningful OPENREPLY connection state fields are:
+		 *      local_msgqueue_pa = physical address of remote
+		 *                          partition's local_msgqueue
+		 *      local_nentries = remote partition's local_nentries
+		 *      remote_nentries = remote partition's remote_nentries
+		 */
+		DBUG_ON(args->local_msgqueue_pa == 0);
+		DBUG_ON(args->local_nentries == 0);
+		DBUG_ON(args->remote_nentries == 0);
+
+		ret = xpc_arch_ops.save_remote_msgqueue_pa(ch,
+						      args->local_msgqueue_pa);
+		if (ret != xpSuccess) {
+			XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags);
+			goto out;
+		}
+		ch->flags |= XPC_C_ROPENREPLY;
+
+		if (args->local_nentries < ch->remote_nentries) {
+			dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
+				"remote_nentries=%d, old remote_nentries=%d, "
+				"partid=%d, channel=%d\n",
+				args->local_nentries, ch->remote_nentries,
+				ch->partid, ch->number);
+
+			ch->remote_nentries = args->local_nentries;
+		}
+		if (args->remote_nentries < ch->local_nentries) {
+			dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
+				"local_nentries=%d, old local_nentries=%d, "
+				"partid=%d, channel=%d\n",
+				args->remote_nentries, ch->local_nentries,
+				ch->partid, ch->number);
+
+			ch->local_nentries = args->remote_nentries;
+		}
+
+		xpc_process_connect(ch, &irq_flags);
+	}
+
+	if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from "
+			"partid=%d, channel=%d\n", ch->partid, ch->number);
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+			goto out;
+
+		if (!(ch->flags & XPC_C_OPENREQUEST) ||
+		    !(ch->flags & XPC_C_OPENREPLY)) {
+			XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
+					       &irq_flags);
+			goto out;
+		}
+
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY));
+		DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
+
+		ch->flags |= XPC_C_ROPENCOMPLETE;
+
+		xpc_process_connect(ch, &irq_flags);
+		create_kthread = 1;
+	}
+
+out:
+	spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+	if (create_kthread)
+		xpc_create_kthreads(ch, 1, 0);
+}
+
+/*
+ * Attempt to establish a channel connection to a remote partition.
+ */
+static enum xp_retval
+xpc_connect_channel(struct xpc_channel *ch)
+{
+	unsigned long irq_flags;
+	struct xpc_registration *registration = &xpc_registrations[ch->number];
+
+	if (mutex_trylock(&registration->mutex) == 0)
+		return xpRetry;
+
+	if (!XPC_CHANNEL_REGISTERED(ch->number)) {
+		mutex_unlock(&registration->mutex);
+		return xpUnregistered;
+	}
+
+	spin_lock_irqsave(&ch->lock, irq_flags);
+
+	DBUG_ON(ch->flags & XPC_C_CONNECTED);
+	DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		mutex_unlock(&registration->mutex);
+		return ch->reason;
+	}
+
+	/* add info from the channel connect registration to the channel */
+
+	ch->kthreads_assigned_limit = registration->assigned_limit;
+	ch->kthreads_idle_limit = registration->idle_limit;
+	DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
+	DBUG_ON(atomic_read(&ch->kthreads_idle) != 0);
+	DBUG_ON(atomic_read(&ch->kthreads_active) != 0);
+
+	ch->func = registration->func;
+	DBUG_ON(registration->func == NULL);
+	ch->key = registration->key;
+
+	ch->local_nentries = registration->nentries;
+
+	if (ch->flags & XPC_C_ROPENREQUEST) {
+		if (registration->entry_size != ch->entry_size) {
+			/* the local and remote sides aren't the same */
+
+			/*
+			 * Because XPC_DISCONNECT_CHANNEL() can block we're
+			 * forced to up the registration sema before we unlock
+			 * the channel lock. But that's okay here because we're
+			 * done with the part that required the registration
+			 * sema. XPC_DISCONNECT_CHANNEL() requires that the
+			 * channel lock be locked and will unlock and relock
+			 * the channel lock as needed.
+			 */
+			mutex_unlock(&registration->mutex);
+			XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
+					       &irq_flags);
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return xpUnequalMsgSizes;
+		}
+	} else {
+		ch->entry_size = registration->entry_size;
+
+		XPC_SET_REASON(ch, 0, 0);
+		ch->flags &= ~XPC_C_DISCONNECTED;
+
+		atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
+	}
+
+	mutex_unlock(&registration->mutex);
+
+	/* initiate the connection */
+
+	ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
+	xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags);
+
+	xpc_process_connect(ch, &irq_flags);
+
+	spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+	return xpSuccess;
+}
+
+void
+xpc_process_sent_chctl_flags(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	union xpc_channel_ctl_flags chctl;
+	struct xpc_channel *ch;
+	int ch_number;
+	u32 ch_flags;
+
+	chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part);
+
+	/*
+	 * Initiate channel connections for registered channels.
+	 *
+	 * For each connected channel that has pending messages activate idle
+	 * kthreads and/or create new kthreads as needed.
+	 */
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		/*
+		 * Process any open or close related chctl flags, and then deal
+		 * with connecting or disconnecting the channel as required.
+		 */
+
+		if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) {
+			xpc_process_openclose_chctl_flags(part, ch_number,
+							chctl.flags[ch_number]);
+		}
+
+		ch_flags = ch->flags;	/* need an atomic snapshot of flags */
+
+		if (ch_flags & XPC_C_DISCONNECTING) {
+			spin_lock_irqsave(&ch->lock, irq_flags);
+			xpc_process_disconnect(ch, &irq_flags);
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			continue;
+		}
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
+			continue;
+
+		if (!(ch_flags & XPC_C_CONNECTED)) {
+			if (!(ch_flags & XPC_C_OPENREQUEST)) {
+				DBUG_ON(ch_flags & XPC_C_SETUP);
+				(void)xpc_connect_channel(ch);
+			}
+			continue;
+		}
+
+		/*
+		 * Process any message related chctl flags, this may involve
+		 * the activation of kthreads to deliver any pending messages
+		 * sent from the other partition.
+		 */
+
+		if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
+			xpc_arch_ops.process_msg_chctl_flags(part, ch_number);
+	}
+}
+
+/*
+ * XPC's heartbeat code calls this function to inform XPC that a partition is
+ * going down.  XPC responds by tearing down the XPartition Communication
+ * infrastructure used for the just downed partition.
+ *
+ * XPC's heartbeat code will never call this function and xpc_partition_up()
+ * at the same time. Nor will it ever make multiple calls to either function
+ * at the same time.
+ */
+void
+xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason)
+{
+	unsigned long irq_flags;
+	int ch_number;
+	struct xpc_channel *ch;
+
+	dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n",
+		XPC_PARTID(part), reason);
+
+	if (!xpc_part_ref(part)) {
+		/* infrastructure for this partition isn't currently set up */
+		return;
+	}
+
+	/* disconnect channels associated with the partition going down */
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		xpc_msgqueue_ref(ch);
+		spin_lock_irqsave(&ch->lock, irq_flags);
+
+		XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
+
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		xpc_msgqueue_deref(ch);
+	}
+
+	xpc_wakeup_channel_mgr(part);
+
+	xpc_part_deref(part);
+}
+
+/*
+ * Called by XP at the time of channel connection registration to cause
+ * XPC to establish connections to all currently active partitions.
+ */
+void
+xpc_initiate_connect(int ch_number)
+{
+	short partid;
+	struct xpc_partition *part;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (xpc_part_ref(part)) {
+			/*
+			 * Initiate the establishment of a connection on the
+			 * newly registered channel to the remote partition.
+			 */
+			xpc_wakeup_channel_mgr(part);
+			xpc_part_deref(part);
+		}
+	}
+}
+
+void
+xpc_connected_callout(struct xpc_channel *ch)
+{
+	/* let the registerer know that a connection has been established */
+
+	if (ch->func != NULL) {
+		dev_dbg(xpc_chan, "ch->func() called, reason=xpConnected, "
+			"partid=%d, channel=%d\n", ch->partid, ch->number);
+
+		ch->func(xpConnected, ch->partid, ch->number,
+			 (void *)(u64)ch->local_nentries, ch->key);
+
+		dev_dbg(xpc_chan, "ch->func() returned, reason=xpConnected, "
+			"partid=%d, channel=%d\n", ch->partid, ch->number);
+	}
+}
+
+/*
+ * Called by XP at the time of channel connection unregistration to cause
+ * XPC to teardown all current connections for the specified channel.
+ *
+ * Before returning xpc_initiate_disconnect() will wait until all connections
+ * on the specified channel have been closed/torndown. So the caller can be
+ * assured that they will not be receiving any more callouts from XPC to the
+ * function they registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to unregister.
+ */
+void
+xpc_initiate_disconnect(int ch_number)
+{
+	unsigned long irq_flags;
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_channel *ch;
+
+	DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
+
+	/* initiate the channel disconnect for every active partition */
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (xpc_part_ref(part)) {
+			ch = &part->channels[ch_number];
+			xpc_msgqueue_ref(ch);
+
+			spin_lock_irqsave(&ch->lock, irq_flags);
+
+			if (!(ch->flags & XPC_C_DISCONNECTED)) {
+				ch->flags |= XPC_C_WDISCONNECT;
+
+				XPC_DISCONNECT_CHANNEL(ch, xpUnregistering,
+						       &irq_flags);
+			}
+
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+			xpc_msgqueue_deref(ch);
+			xpc_part_deref(part);
+		}
+	}
+
+	xpc_disconnect_wait(ch_number);
+}
+
+/*
+ * To disconnect a channel, and reflect it back to all who may be waiting.
+ *
+ * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
+ * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
+ * xpc_disconnect_wait().
+ *
+ * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
+ */
+void
+xpc_disconnect_channel(const int line, struct xpc_channel *ch,
+		       enum xp_retval reason, unsigned long *irq_flags)
+{
+	u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
+
+	lockdep_assert_held(&ch->lock);
+
+	if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+		return;
+
+	DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED)));
+
+	dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n",
+		reason, line, ch->partid, ch->number);
+
+	XPC_SET_REASON(ch, reason, line);
+
+	ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
+	/* some of these may not have been set */
+	ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
+		       XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
+		       XPC_C_CONNECTING | XPC_C_CONNECTED);
+
+	xpc_arch_ops.send_chctl_closerequest(ch, irq_flags);
+
+	if (channel_was_connected)
+		ch->flags |= XPC_C_WASCONNECTED;
+
+	spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+	/* wake all idle kthreads so they can exit */
+	if (atomic_read(&ch->kthreads_idle) > 0) {
+		wake_up_all(&ch->idle_wq);
+
+	} else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+		   !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+		/* start a kthread that will do the xpDisconnecting callout */
+		xpc_create_kthreads(ch, 1, 1);
+	}
+
+	/* wake those waiting to allocate an entry from the local msg queue */
+	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+		wake_up(&ch->msg_allocate_wq);
+
+	spin_lock_irqsave(&ch->lock, *irq_flags);
+}
+
+void
+xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason)
+{
+	/*
+	 * Let the channel's registerer know that the channel is being
+	 * disconnected. We don't want to do this if the registerer was never
+	 * informed of a connection being made.
+	 */
+
+	if (ch->func != NULL) {
+		dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
+			"channel=%d\n", reason, ch->partid, ch->number);
+
+		ch->func(reason, ch->partid, ch->number, NULL, ch->key);
+
+		dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
+			"channel=%d\n", reason, ch->partid, ch->number);
+	}
+}
+
+/*
+ * Wait for a message entry to become available for the specified channel,
+ * but don't wait any longer than 1 jiffy.
+ */
+enum xp_retval
+xpc_allocate_msg_wait(struct xpc_channel *ch)
+{
+	enum xp_retval ret;
+	DEFINE_WAIT(wait);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		DBUG_ON(ch->reason == xpInterrupted);
+		return ch->reason;
+	}
+
+	atomic_inc(&ch->n_on_msg_allocate_wq);
+	prepare_to_wait(&ch->msg_allocate_wq, &wait, TASK_INTERRUPTIBLE);
+	ret = schedule_timeout(1);
+	finish_wait(&ch->msg_allocate_wq, &wait);
+	atomic_dec(&ch->n_on_msg_allocate_wq);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		ret = ch->reason;
+		DBUG_ON(ch->reason == xpInterrupted);
+	} else if (ret == 0) {
+		ret = xpTimeout;
+	} else {
+		ret = xpInterrupted;
+	}
+
+	return ret;
+}
+
+/*
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
+ *
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * Once sent, this routine will not wait for the message to be received, nor
+ * will notification be given when it does happen.
+ *
+ * Arguments:
+ *
+ *	partid - ID of partition to which the channel is connected.
+ *	ch_number - channel # to send message on.
+ *	flags - see xp.h for valid flags.
+ *	payload - pointer to the payload which is to be sent.
+ *	payload_size - size of the payload in bytes.
+ */
+enum xp_retval
+xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
+		  u16 payload_size)
+{
+	struct xpc_partition *part = &xpc_partitions[partid];
+	enum xp_retval ret = xpUnknownReason;
+
+	dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
+		partid, ch_number);
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+	DBUG_ON(payload == NULL);
+
+	if (xpc_part_ref(part)) {
+		ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
+				  flags, payload, payload_size, 0, NULL, NULL);
+		xpc_part_deref(part);
+	}
+
+	return ret;
+}
+
+/*
+ * Send a message that contains the user's payload on the specified channel
+ * connected to the specified partition.
+ *
+ * NOTE that this routine can sleep waiting for a message entry to become
+ * available. To not sleep, pass in the XPC_NOWAIT flag.
+ *
+ * This routine will not wait for the message to be sent or received.
+ *
+ * Once the remote end of the channel has received the message, the function
+ * passed as an argument to xpc_initiate_send_notify() will be called. This
+ * allows the sender to free up or re-use any buffers referenced by the
+ * message, but does NOT mean the message has been processed at the remote
+ * end by a receiver.
+ *
+ * If this routine returns an error, the caller's function will NOT be called.
+ *
+ * Arguments:
+ *
+ *	partid - ID of partition to which the channel is connected.
+ *	ch_number - channel # to send message on.
+ *	flags - see xp.h for valid flags.
+ *	payload - pointer to the payload which is to be sent.
+ *	payload_size - size of the payload in bytes.
+ *	func - function to call with asynchronous notification of message
+ *		  receipt. THIS FUNCTION MUST BE NON-BLOCKING.
+ *	key - user-defined key to be passed to the function when it's called.
+ */
+enum xp_retval
+xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
+			 u16 payload_size, xpc_notify_func func, void *key)
+{
+	struct xpc_partition *part = &xpc_partitions[partid];
+	enum xp_retval ret = xpUnknownReason;
+
+	dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload,
+		partid, ch_number);
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+	DBUG_ON(payload == NULL);
+	DBUG_ON(func == NULL);
+
+	if (xpc_part_ref(part)) {
+		ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
+			  flags, payload, payload_size, XPC_N_CALL, func, key);
+		xpc_part_deref(part);
+	}
+	return ret;
+}
+
+/*
+ * Deliver a message's payload to its intended recipient.
+ */
+void
+xpc_deliver_payload(struct xpc_channel *ch)
+{
+	void *payload;
+
+	payload = xpc_arch_ops.get_deliverable_payload(ch);
+	if (payload != NULL) {
+
+		/*
+		 * This ref is taken to protect the payload itself from being
+		 * freed before the user is finished with it, which the user
+		 * indicates by calling xpc_initiate_received().
+		 */
+		xpc_msgqueue_ref(ch);
+
+		atomic_inc(&ch->kthreads_active);
+
+		if (ch->func != NULL) {
+			dev_dbg(xpc_chan, "ch->func() called, payload=0x%p "
+				"partid=%d channel=%d\n", payload, ch->partid,
+				ch->number);
+
+			/* deliver the message to its intended recipient */
+			ch->func(xpMsgReceived, ch->partid, ch->number, payload,
+				 ch->key);
+
+			dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p "
+				"partid=%d channel=%d\n", payload, ch->partid,
+				ch->number);
+		}
+
+		atomic_dec(&ch->kthreads_active);
+	}
+}
+
+/*
+ * Acknowledge receipt of a delivered message's payload.
+ *
+ * This function, although called by users, does not call xpc_part_ref() to
+ * ensure that the partition infrastructure is in place. It relies on the
+ * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload().
+ *
+ * Arguments:
+ *
+ *	partid - ID of partition to which the channel is connected.
+ *	ch_number - channel # message received on.
+ *	payload - pointer to the payload area allocated via
+ *			xpc_initiate_send() or xpc_initiate_send_notify().
+ */
+void
+xpc_initiate_received(short partid, int ch_number, void *payload)
+{
+	struct xpc_partition *part = &xpc_partitions[partid];
+	struct xpc_channel *ch;
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
+
+	ch = &part->channels[ch_number];
+	xpc_arch_ops.received_payload(ch, payload);
+
+	/* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload()  */
+	xpc_msgqueue_deref(ch);
+}
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
new file mode 100644
index 0000000000..6da509d692
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -0,0 +1,1345 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) support - standard version.
+ *
+ *	XPC provides a message passing capability that crosses partition
+ *	boundaries. This module is made up of two parts:
+ *
+ *	    partition	This part detects the presence/absence of other
+ *			partitions. It provides a heartbeat and monitors
+ *			the heartbeats of other partitions.
+ *
+ *	    channel	This part manages the channels and sends/receives
+ *			messages across them to/from other partitions.
+ *
+ *	There are a couple of additional functions residing in XP, which
+ *	provide an interface to XPC for its users.
+ *
+ *
+ *	Caveats:
+ *
+ *	  . Currently on sn2, we have no way to determine which nasid an IRQ
+ *	    came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
+ *	    followed by an IPI. The amo indicates where data is to be pulled
+ *	    from, so after the IPI arrives, the remote partition checks the amo
+ *	    word. The IPI can actually arrive before the amo however, so other
+ *	    code must periodically check for this case. Also, remote amo
+ *	    operations do not reliably time out. Thus we do a remote PIO read
+ *	    solely to know whether the remote partition is down and whether we
+ *	    should stop sending IPIs to it. This remote PIO read operation is
+ *	    set up in a special nofault region so SAL knows to ignore (and
+ *	    cleanup) any errors due to the remote amo write, PIO read, and/or
+ *	    PIO write operations.
+ *
+ *	    If/when new hardware solves this IPI problem, we should abandon
+ *	    the current approach.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/kdebug.h>
+#include <linux/kthread.h>
+#include "xpc.h"
+
+#ifdef CONFIG_X86_64
+#include <asm/traps.h>
+#endif
+
+/* define two XPC debug device structures to be used with dev_dbg() et al */
+
+static struct device_driver xpc_dbg_name = {
+	.name = "xpc"
+};
+
+static struct device xpc_part_dbg_subname = {
+	.init_name = "",	/* set to "part" at xpc_init() time */
+	.driver = &xpc_dbg_name
+};
+
+static struct device xpc_chan_dbg_subname = {
+	.init_name = "",	/* set to "chan" at xpc_init() time */
+	.driver = &xpc_dbg_name
+};
+
+struct device *xpc_part = &xpc_part_dbg_subname;
+struct device *xpc_chan = &xpc_chan_dbg_subname;
+
+static int xpc_kdebug_ignore;
+
+/* systune related variables for /proc/sys directories */
+
+static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
+static int xpc_hb_min_interval = 1;
+static int xpc_hb_max_interval = 10;
+
+static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
+static int xpc_hb_check_min_interval = 10;
+static int xpc_hb_check_max_interval = 120;
+
+int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
+static int xpc_disengage_min_timelimit;	/* = 0 */
+static int xpc_disengage_max_timelimit = 120;
+
+static struct ctl_table xpc_sys_xpc_hb[] = {
+	{
+	 .procname = "hb_interval",
+	 .data = &xpc_hb_interval,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = &xpc_hb_min_interval,
+	 .extra2 = &xpc_hb_max_interval},
+	{
+	 .procname = "hb_check_interval",
+	 .data = &xpc_hb_check_interval,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = &xpc_hb_check_min_interval,
+	 .extra2 = &xpc_hb_check_max_interval},
+	{}
+};
+static struct ctl_table xpc_sys_xpc[] = {
+	{
+	 .procname = "disengage_timelimit",
+	 .data = &xpc_disengage_timelimit,
+	 .maxlen = sizeof(int),
+	 .mode = 0644,
+	 .proc_handler = proc_dointvec_minmax,
+	 .extra1 = &xpc_disengage_min_timelimit,
+	 .extra2 = &xpc_disengage_max_timelimit},
+	{}
+};
+
+static struct ctl_table_header *xpc_sysctl;
+static struct ctl_table_header *xpc_sysctl_hb;
+
+/* non-zero if any remote partition disengage was timed out */
+int xpc_disengage_timedout;
+
+/* #of activate IRQs received and not yet processed */
+int xpc_activate_IRQ_rcvd;
+DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
+
+/* IRQ handler notifies this wait queue on receipt of an IRQ */
+DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
+
+static unsigned long xpc_hb_check_timeout;
+static struct timer_list xpc_hb_timer;
+
+/* notification that the xpc_hb_checker thread has exited */
+static DECLARE_COMPLETION(xpc_hb_checker_exited);
+
+/* notification that the xpc_discovery thread has exited */
+static DECLARE_COMPLETION(xpc_discovery_exited);
+
+static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
+
+static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_reboot_notifier = {
+	.notifier_call = xpc_system_reboot,
+};
+
+static int xpc_system_die(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_die_notifier = {
+	.notifier_call = xpc_system_die,
+};
+
+struct xpc_arch_operations xpc_arch_ops;
+
+/*
+ * Timer function to enforce the timelimit on the partition disengage.
+ */
+static void
+xpc_timeout_partition_disengage(struct timer_list *t)
+{
+	struct xpc_partition *part = from_timer(part, t, disengage_timer);
+
+	DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
+
+	xpc_partition_disengaged_from_timer(part);
+
+	DBUG_ON(part->disengage_timeout != 0);
+	DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part)));
+}
+
+/*
+ * Timer to produce the heartbeat.  The timer structures function is
+ * already set when this is initially called.  A tunable is used to
+ * specify when the next timeout should occur.
+ */
+static void
+xpc_hb_beater(struct timer_list *unused)
+{
+	xpc_arch_ops.increment_heartbeat();
+
+	if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+	xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
+	add_timer(&xpc_hb_timer);
+}
+
+static void
+xpc_start_hb_beater(void)
+{
+	xpc_arch_ops.heartbeat_init();
+	timer_setup(&xpc_hb_timer, xpc_hb_beater, 0);
+	xpc_hb_beater(NULL);
+}
+
+static void
+xpc_stop_hb_beater(void)
+{
+	del_timer_sync(&xpc_hb_timer);
+	xpc_arch_ops.heartbeat_exit();
+}
+
+/*
+ * At periodic intervals, scan through all active partitions and ensure
+ * their heartbeat is still active.  If not, the partition is deactivated.
+ */
+static void
+xpc_check_remote_hb(void)
+{
+	struct xpc_partition *part;
+	short partid;
+	enum xp_retval ret;
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+
+		if (xpc_exiting)
+			break;
+
+		if (partid == xp_partition_id)
+			continue;
+
+		part = &xpc_partitions[partid];
+
+		if (part->act_state == XPC_P_AS_INACTIVE ||
+		    part->act_state == XPC_P_AS_DEACTIVATING) {
+			continue;
+		}
+
+		ret = xpc_arch_ops.get_remote_heartbeat(part);
+		if (ret != xpSuccess)
+			XPC_DEACTIVATE_PARTITION(part, ret);
+	}
+}
+
+/*
+ * This thread is responsible for nearly all of the partition
+ * activation/deactivation.
+ */
+static int
+xpc_hb_checker(void *ignore)
+{
+	int force_IRQ = 0;
+
+	/* this thread was marked active by xpc_hb_init() */
+
+	set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU));
+
+	/* set our heartbeating to other partitions into motion */
+	xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
+	xpc_start_hb_beater();
+
+	while (!xpc_exiting) {
+
+		dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
+			"been received\n",
+			(int)(xpc_hb_check_timeout - jiffies),
+			xpc_activate_IRQ_rcvd);
+
+		/* checking of remote heartbeats is skewed by IRQ handling */
+		if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
+			xpc_hb_check_timeout = jiffies +
+			    (xpc_hb_check_interval * HZ);
+
+			dev_dbg(xpc_part, "checking remote heartbeats\n");
+			xpc_check_remote_hb();
+		}
+
+		/* check for outstanding IRQs */
+		if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
+			force_IRQ = 0;
+			dev_dbg(xpc_part, "processing activate IRQs "
+				"received\n");
+			xpc_arch_ops.process_activate_IRQ_rcvd();
+		}
+
+		/* wait for IRQ or timeout */
+		(void)wait_event_interruptible(xpc_activate_IRQ_wq,
+					       (time_is_before_eq_jiffies(
+						xpc_hb_check_timeout) ||
+						xpc_activate_IRQ_rcvd > 0 ||
+						xpc_exiting));
+	}
+
+	xpc_stop_hb_beater();
+
+	dev_dbg(xpc_part, "heartbeat checker is exiting\n");
+
+	/* mark this thread as having exited */
+	complete(&xpc_hb_checker_exited);
+	return 0;
+}
+
+/*
+ * This thread will attempt to discover other partitions to activate
+ * based on info provided by SAL. This new thread is short lived and
+ * will exit once discovery is complete.
+ */
+static int
+xpc_initiate_discovery(void *ignore)
+{
+	xpc_discovery();
+
+	dev_dbg(xpc_part, "discovery thread is exiting\n");
+
+	/* mark this thread as having exited */
+	complete(&xpc_discovery_exited);
+	return 0;
+}
+
+/*
+ * The first kthread assigned to a newly activated partition is the one
+ * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
+ * that kthread until the partition is brought down, at which time that kthread
+ * returns back to XPC HB. (The return of that kthread will signify to XPC HB
+ * that XPC has dismantled all communication infrastructure for the associated
+ * partition.) This kthread becomes the channel manager for that partition.
+ *
+ * Each active partition has a channel manager, who, besides connecting and
+ * disconnecting channels, will ensure that each of the partition's connected
+ * channels has the required number of assigned kthreads to get the work done.
+ */
+static void
+xpc_channel_mgr(struct xpc_partition *part)
+{
+	while (part->act_state != XPC_P_AS_DEACTIVATING ||
+	       atomic_read(&part->nchannels_active) > 0 ||
+	       !xpc_partition_disengaged(part)) {
+
+		xpc_process_sent_chctl_flags(part);
+
+		/*
+		 * Wait until we've been requested to activate kthreads or
+		 * all of the channel's message queues have been torn down or
+		 * a signal is pending.
+		 *
+		 * The channel_mgr_requests is set to 1 after being awakened,
+		 * This is done to prevent the channel mgr from making one pass
+		 * through the loop for each request, since he will
+		 * be servicing all the requests in one pass. The reason it's
+		 * set to 1 instead of 0 is so that other kthreads will know
+		 * that the channel mgr is running and won't bother trying to
+		 * wake him up.
+		 */
+		atomic_dec(&part->channel_mgr_requests);
+		(void)wait_event_interruptible(part->channel_mgr_wq,
+				(atomic_read(&part->channel_mgr_requests) > 0 ||
+				 part->chctl.all_flags != 0 ||
+				 (part->act_state == XPC_P_AS_DEACTIVATING &&
+				 atomic_read(&part->nchannels_active) == 0 &&
+				 xpc_partition_disengaged(part))));
+		atomic_set(&part->channel_mgr_requests, 1);
+	}
+}
+
+/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+	/* see if kzalloc will give us cachline aligned memory by default */
+	*base = kzalloc(size, flags);
+	if (*base == NULL)
+		return NULL;
+
+	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+		return *base;
+
+	kfree(*base);
+
+	/* nope, we'll have to do it ourselves */
+	*base = kzalloc(size + L1_CACHE_BYTES, flags);
+	if (*base == NULL)
+		return NULL;
+
+	return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Setup the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static enum xp_retval
+xpc_setup_ch_structures(struct xpc_partition *part)
+{
+	enum xp_retval ret;
+	int ch_number;
+	struct xpc_channel *ch;
+	short partid = XPC_PARTID(part);
+
+	/*
+	 * Allocate all of the channel structures as a contiguous chunk of
+	 * memory.
+	 */
+	DBUG_ON(part->channels != NULL);
+	part->channels = kcalloc(XPC_MAX_NCHANNELS,
+				 sizeof(struct xpc_channel),
+				 GFP_KERNEL);
+	if (part->channels == NULL) {
+		dev_err(xpc_chan, "can't get memory for channels\n");
+		return xpNoMemory;
+	}
+
+	/* allocate the remote open and close args */
+
+	part->remote_openclose_args =
+	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+					  GFP_KERNEL, &part->
+					  remote_openclose_args_base);
+	if (part->remote_openclose_args == NULL) {
+		dev_err(xpc_chan, "can't get memory for remote connect args\n");
+		ret = xpNoMemory;
+		goto out_1;
+	}
+
+	part->chctl.all_flags = 0;
+	spin_lock_init(&part->chctl_lock);
+
+	atomic_set(&part->channel_mgr_requests, 1);
+	init_waitqueue_head(&part->channel_mgr_wq);
+
+	part->nchannels = XPC_MAX_NCHANNELS;
+
+	atomic_set(&part->nchannels_active, 0);
+	atomic_set(&part->nchannels_engaged, 0);
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch = &part->channels[ch_number];
+
+		ch->partid = partid;
+		ch->number = ch_number;
+		ch->flags = XPC_C_DISCONNECTED;
+
+		atomic_set(&ch->kthreads_assigned, 0);
+		atomic_set(&ch->kthreads_idle, 0);
+		atomic_set(&ch->kthreads_active, 0);
+
+		atomic_set(&ch->references, 0);
+		atomic_set(&ch->n_to_notify, 0);
+
+		spin_lock_init(&ch->lock);
+		init_completion(&ch->wdisconnect_wait);
+
+		atomic_set(&ch->n_on_msg_allocate_wq, 0);
+		init_waitqueue_head(&ch->msg_allocate_wq);
+		init_waitqueue_head(&ch->idle_wq);
+	}
+
+	ret = xpc_arch_ops.setup_ch_structures(part);
+	if (ret != xpSuccess)
+		goto out_2;
+
+	/*
+	 * With the setting of the partition setup_state to XPC_P_SS_SETUP,
+	 * we're declaring that this partition is ready to go.
+	 */
+	part->setup_state = XPC_P_SS_SETUP;
+
+	return xpSuccess;
+
+	/* setup of ch structures failed */
+out_2:
+	kfree(part->remote_openclose_args_base);
+	part->remote_openclose_args = NULL;
+out_1:
+	kfree(part->channels);
+	part->channels = NULL;
+	return ret;
+}
+
+/*
+ * Teardown the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static void
+xpc_teardown_ch_structures(struct xpc_partition *part)
+{
+	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
+	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
+
+	/*
+	 * Make this partition inaccessible to local processes by marking it
+	 * as no longer setup. Then wait before proceeding with the teardown
+	 * until all existing references cease.
+	 */
+	DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
+	part->setup_state = XPC_P_SS_WTEARDOWN;
+
+	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
+
+	/* now we can begin tearing down the infrastructure */
+
+	xpc_arch_ops.teardown_ch_structures(part);
+
+	kfree(part->remote_openclose_args_base);
+	part->remote_openclose_args = NULL;
+	kfree(part->channels);
+	part->channels = NULL;
+
+	part->setup_state = XPC_P_SS_TORNDOWN;
+}
+
+/*
+ * When XPC HB determines that a partition has come up, it will create a new
+ * kthread and that kthread will call this function to attempt to set up the
+ * basic infrastructure used for Cross Partition Communication with the newly
+ * upped partition.
+ *
+ * The kthread that was created by XPC HB and which setup the XPC
+ * infrastructure will remain assigned to the partition becoming the channel
+ * manager for that partition until the partition is deactivating, at which
+ * time the kthread will teardown the XPC infrastructure and then exit.
+ */
+static int
+xpc_activating(void *__partid)
+{
+	short partid = (u64)__partid;
+	struct xpc_partition *part = &xpc_partitions[partid];
+	unsigned long irq_flags;
+
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
+		part->act_state = XPC_P_AS_INACTIVE;
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+		part->remote_rp_pa = 0;
+		return 0;
+	}
+
+	/* indicate the thread is activating */
+	DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
+	part->act_state = XPC_P_AS_ACTIVATING;
+
+	XPC_SET_REASON(part, 0, 0);
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	dev_dbg(xpc_part, "activating partition %d\n", partid);
+
+	xpc_arch_ops.allow_hb(partid);
+
+	if (xpc_setup_ch_structures(part) == xpSuccess) {
+		(void)xpc_part_ref(part);	/* this will always succeed */
+
+		if (xpc_arch_ops.make_first_contact(part) == xpSuccess) {
+			xpc_mark_partition_active(part);
+			xpc_channel_mgr(part);
+			/* won't return until partition is deactivating */
+		}
+
+		xpc_part_deref(part);
+		xpc_teardown_ch_structures(part);
+	}
+
+	xpc_arch_ops.disallow_hb(partid);
+	xpc_mark_partition_inactive(part);
+
+	if (part->reason == xpReactivating) {
+		/* interrupting ourselves results in activating partition */
+		xpc_arch_ops.request_partition_reactivation(part);
+	}
+
+	return 0;
+}
+
+void
+xpc_activate_partition(struct xpc_partition *part)
+{
+	short partid = XPC_PARTID(part);
+	unsigned long irq_flags;
+	struct task_struct *kthread;
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+
+	DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
+
+	part->act_state = XPC_P_AS_ACTIVATION_REQ;
+	XPC_SET_REASON(part, xpCloneKThread, __LINE__);
+
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d",
+			      partid);
+	if (IS_ERR(kthread)) {
+		spin_lock_irqsave(&part->act_lock, irq_flags);
+		part->act_state = XPC_P_AS_INACTIVE;
+		XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+	}
+}
+
+void
+xpc_activate_kthreads(struct xpc_channel *ch, int needed)
+{
+	int idle = atomic_read(&ch->kthreads_idle);
+	int assigned = atomic_read(&ch->kthreads_assigned);
+	int wakeup;
+
+	DBUG_ON(needed <= 0);
+
+	if (idle > 0) {
+		wakeup = (needed > idle) ? idle : needed;
+		needed -= wakeup;
+
+		dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
+			"channel=%d\n", wakeup, ch->partid, ch->number);
+
+		/* only wakeup the requested number of kthreads */
+		wake_up_nr(&ch->idle_wq, wakeup);
+	}
+
+	if (needed <= 0)
+		return;
+
+	if (needed + assigned > ch->kthreads_assigned_limit) {
+		needed = ch->kthreads_assigned_limit - assigned;
+		if (needed <= 0)
+			return;
+	}
+
+	dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
+		needed, ch->partid, ch->number);
+
+	xpc_create_kthreads(ch, needed, 0);
+}
+
+/*
+ * This function is where XPC's kthreads wait for messages to deliver.
+ */
+static void
+xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
+{
+	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
+		xpc_arch_ops.n_of_deliverable_payloads;
+
+	do {
+		/* deliver messages to their intended recipients */
+
+		while (n_of_deliverable_payloads(ch) > 0 &&
+		       !(ch->flags & XPC_C_DISCONNECTING)) {
+			xpc_deliver_payload(ch);
+		}
+
+		if (atomic_inc_return(&ch->kthreads_idle) >
+		    ch->kthreads_idle_limit) {
+			/* too many idle kthreads on this channel */
+			atomic_dec(&ch->kthreads_idle);
+			break;
+		}
+
+		dev_dbg(xpc_chan, "idle kthread calling "
+			"wait_event_interruptible_exclusive()\n");
+
+		(void)wait_event_interruptible_exclusive(ch->idle_wq,
+				(n_of_deliverable_payloads(ch) > 0 ||
+				 (ch->flags & XPC_C_DISCONNECTING)));
+
+		atomic_dec(&ch->kthreads_idle);
+
+	} while (!(ch->flags & XPC_C_DISCONNECTING));
+}
+
+static int
+xpc_kthread_start(void *args)
+{
+	short partid = XPC_UNPACK_ARG1(args);
+	u16 ch_number = XPC_UNPACK_ARG2(args);
+	struct xpc_partition *part = &xpc_partitions[partid];
+	struct xpc_channel *ch;
+	int n_needed;
+	unsigned long irq_flags;
+	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
+		xpc_arch_ops.n_of_deliverable_payloads;
+
+	dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
+		partid, ch_number);
+
+	ch = &part->channels[ch_number];
+
+	if (!(ch->flags & XPC_C_DISCONNECTING)) {
+
+		/* let registerer know that connection has been established */
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
+			ch->flags |= XPC_C_CONNECTEDCALLOUT;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+			xpc_connected_callout(ch);
+
+			spin_lock_irqsave(&ch->lock, irq_flags);
+			ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+			/*
+			 * It is possible that while the callout was being
+			 * made that the remote partition sent some messages.
+			 * If that is the case, we may need to activate
+			 * additional kthreads to help deliver them. We only
+			 * need one less than total #of messages to deliver.
+			 */
+			n_needed = n_of_deliverable_payloads(ch) - 1;
+			if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
+				xpc_activate_kthreads(ch, n_needed);
+
+		} else {
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+		}
+
+		xpc_kthread_waitmsgs(part, ch);
+	}
+
+	/* let registerer know that connection is disconnecting */
+
+	spin_lock_irqsave(&ch->lock, irq_flags);
+	if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+	    !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+		ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+		xpc_disconnect_callout(ch, xpDisconnecting);
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
+	}
+	spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+	if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+	    atomic_dec_return(&part->nchannels_engaged) == 0) {
+		xpc_arch_ops.indicate_partition_disengaged(part);
+	}
+
+	xpc_msgqueue_deref(ch);
+
+	dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
+		partid, ch_number);
+
+	xpc_part_deref(part);
+	return 0;
+}
+
+/*
+ * For each partition that XPC has established communications with, there is
+ * a minimum of one kernel thread assigned to perform any operation that
+ * may potentially sleep or block (basically the callouts to the asynchronous
+ * functions registered via xpc_connect()).
+ *
+ * Additional kthreads are created and destroyed by XPC as the workload
+ * demands.
+ *
+ * A kthread is assigned to one of the active channels that exists for a given
+ * partition.
+ */
+void
+xpc_create_kthreads(struct xpc_channel *ch, int needed,
+		    int ignore_disconnecting)
+{
+	unsigned long irq_flags;
+	u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	struct task_struct *kthread;
+	void (*indicate_partition_disengaged) (struct xpc_partition *) =
+		xpc_arch_ops.indicate_partition_disengaged;
+
+	while (needed-- > 0) {
+
+		/*
+		 * The following is done on behalf of the newly created
+		 * kthread. That kthread is responsible for doing the
+		 * counterpart to the following before it exits.
+		 */
+		if (ignore_disconnecting) {
+			if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
+				/* kthreads assigned had gone to zero */
+				BUG_ON(!(ch->flags &
+					 XPC_C_DISCONNECTINGCALLOUT_MADE));
+				break;
+			}
+
+		} else if (ch->flags & XPC_C_DISCONNECTING) {
+			break;
+
+		} else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+			   atomic_inc_return(&part->nchannels_engaged) == 1) {
+			xpc_arch_ops.indicate_partition_engaged(part);
+		}
+		(void)xpc_part_ref(part);
+		xpc_msgqueue_ref(ch);
+
+		kthread = kthread_run(xpc_kthread_start, (void *)args,
+				      "xpc%02dc%d", ch->partid, ch->number);
+		if (IS_ERR(kthread)) {
+			/* the fork failed */
+
+			/*
+			 * NOTE: if (ignore_disconnecting &&
+			 * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
+			 * then we'll deadlock if all other kthreads assigned
+			 * to this channel are blocked in the channel's
+			 * registerer, because the only thing that will unblock
+			 * them is the xpDisconnecting callout that this
+			 * failed kthread_run() would have made.
+			 */
+
+			if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+			    atomic_dec_return(&part->nchannels_engaged) == 0) {
+				indicate_partition_disengaged(part);
+			}
+			xpc_msgqueue_deref(ch);
+			xpc_part_deref(part);
+
+			if (atomic_read(&ch->kthreads_assigned) <
+			    ch->kthreads_idle_limit) {
+				/*
+				 * Flag this as an error only if we have an
+				 * insufficient #of kthreads for the channel
+				 * to function.
+				 */
+				spin_lock_irqsave(&ch->lock, irq_flags);
+				XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources,
+						       &irq_flags);
+				spin_unlock_irqrestore(&ch->lock, irq_flags);
+			}
+			break;
+		}
+	}
+}
+
+void
+xpc_disconnect_wait(int ch_number)
+{
+	unsigned long irq_flags;
+	short partid;
+	struct xpc_partition *part;
+	struct xpc_channel *ch;
+	int wakeup_channel_mgr;
+
+	/* now wait for all callouts to the caller's function to cease */
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (!xpc_part_ref(part))
+			continue;
+
+		ch = &part->channels[ch_number];
+
+		if (!(ch->flags & XPC_C_WDISCONNECT)) {
+			xpc_part_deref(part);
+			continue;
+		}
+
+		wait_for_completion(&ch->wdisconnect_wait);
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+		wakeup_channel_mgr = 0;
+
+		if (ch->delayed_chctl_flags) {
+			if (part->act_state != XPC_P_AS_DEACTIVATING) {
+				spin_lock(&part->chctl_lock);
+				part->chctl.flags[ch->number] |=
+				    ch->delayed_chctl_flags;
+				spin_unlock(&part->chctl_lock);
+				wakeup_channel_mgr = 1;
+			}
+			ch->delayed_chctl_flags = 0;
+		}
+
+		ch->flags &= ~XPC_C_WDISCONNECT;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+		if (wakeup_channel_mgr)
+			xpc_wakeup_channel_mgr(part);
+
+		xpc_part_deref(part);
+	}
+}
+
+static int
+xpc_setup_partitions(void)
+{
+	short partid;
+	struct xpc_partition *part;
+
+	xpc_partitions = kcalloc(xp_max_npartitions,
+				 sizeof(struct xpc_partition),
+				 GFP_KERNEL);
+	if (xpc_partitions == NULL) {
+		dev_err(xpc_part, "can't get memory for partition structure\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * The first few fields of each entry of xpc_partitions[] need to
+	 * be initialized now so that calls to xpc_connect() and
+	 * xpc_disconnect() can be made prior to the activation of any remote
+	 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
+	 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
+	 * PARTITION HAS BEEN ACTIVATED.
+	 */
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
+
+		part->activate_IRQ_rcvd = 0;
+		spin_lock_init(&part->act_lock);
+		part->act_state = XPC_P_AS_INACTIVE;
+		XPC_SET_REASON(part, 0, 0);
+
+		timer_setup(&part->disengage_timer,
+			    xpc_timeout_partition_disengage, 0);
+
+		part->setup_state = XPC_P_SS_UNSET;
+		init_waitqueue_head(&part->teardown_wq);
+		atomic_set(&part->references, 0);
+	}
+
+	return xpc_arch_ops.setup_partitions();
+}
+
+static void
+xpc_teardown_partitions(void)
+{
+	xpc_arch_ops.teardown_partitions();
+	kfree(xpc_partitions);
+}
+
+static void
+xpc_do_exit(enum xp_retval reason)
+{
+	short partid;
+	int active_part_count, printed_waiting_msg = 0;
+	struct xpc_partition *part;
+	unsigned long printmsg_time, disengage_timeout = 0;
+
+	/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
+	DBUG_ON(xpc_exiting == 1);
+
+	/*
+	 * Let the heartbeat checker thread and the discovery thread
+	 * (if one is running) know that they should exit. Also wake up
+	 * the heartbeat checker thread in case it's sleeping.
+	 */
+	xpc_exiting = 1;
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+	/* wait for the discovery thread to exit */
+	wait_for_completion(&xpc_discovery_exited);
+
+	/* wait for the heartbeat checker thread to exit */
+	wait_for_completion(&xpc_hb_checker_exited);
+
+	/* sleep for a 1/3 of a second or so */
+	(void)msleep_interruptible(300);
+
+	/* wait for all partitions to become inactive */
+
+	printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+	xpc_disengage_timedout = 0;
+
+	do {
+		active_part_count = 0;
+
+		for (partid = 0; partid < xp_max_npartitions; partid++) {
+			part = &xpc_partitions[partid];
+
+			if (xpc_partition_disengaged(part) &&
+			    part->act_state == XPC_P_AS_INACTIVE) {
+				continue;
+			}
+
+			active_part_count++;
+
+			XPC_DEACTIVATE_PARTITION(part, reason);
+
+			if (part->disengage_timeout > disengage_timeout)
+				disengage_timeout = part->disengage_timeout;
+		}
+
+		if (xpc_arch_ops.any_partition_engaged()) {
+			if (time_is_before_jiffies(printmsg_time)) {
+				dev_info(xpc_part, "waiting for remote "
+					 "partitions to deactivate, timeout in "
+					 "%ld seconds\n", (disengage_timeout -
+					 jiffies) / HZ);
+				printmsg_time = jiffies +
+				    (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+				printed_waiting_msg = 1;
+			}
+
+		} else if (active_part_count > 0) {
+			if (printed_waiting_msg) {
+				dev_info(xpc_part, "waiting for local partition"
+					 " to deactivate\n");
+				printed_waiting_msg = 0;
+			}
+
+		} else {
+			if (!xpc_disengage_timedout) {
+				dev_info(xpc_part, "all partitions have "
+					 "deactivated\n");
+			}
+			break;
+		}
+
+		/* sleep for a 1/3 of a second or so */
+		(void)msleep_interruptible(300);
+
+	} while (1);
+
+	DBUG_ON(xpc_arch_ops.any_partition_engaged());
+
+	xpc_teardown_rsvd_page();
+
+	if (reason == xpUnloading) {
+		(void)unregister_die_notifier(&xpc_die_notifier);
+		(void)unregister_reboot_notifier(&xpc_reboot_notifier);
+	}
+
+	/* clear the interface to XPC's functions */
+	xpc_clear_interface();
+
+	if (xpc_sysctl)
+		unregister_sysctl_table(xpc_sysctl);
+	if (xpc_sysctl_hb)
+		unregister_sysctl_table(xpc_sysctl_hb);
+
+	xpc_teardown_partitions();
+
+	if (is_uv_system())
+		xpc_exit_uv();
+}
+
+/*
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+	enum xp_retval reason;
+
+	switch (event) {
+	case SYS_RESTART:
+		reason = xpSystemReboot;
+		break;
+	case SYS_HALT:
+		reason = xpSystemHalt;
+		break;
+	case SYS_POWER_OFF:
+		reason = xpSystemPoweroff;
+		break;
+	default:
+		reason = xpSystemGoingDown;
+	}
+
+	xpc_do_exit(reason);
+	return NOTIFY_DONE;
+}
+
+/* Used to only allow one cpu to complete disconnect */
+static unsigned int xpc_die_disconnecting;
+
+/*
+ * Notify other partitions to deactivate from us by first disengaging from all
+ * references to our memory.
+ */
+static void
+xpc_die_deactivate(void)
+{
+	struct xpc_partition *part;
+	short partid;
+	int any_engaged;
+	long keep_waiting;
+	long wait_to_print;
+
+	if (cmpxchg(&xpc_die_disconnecting, 0, 1))
+		return;
+
+	/* keep xpc_hb_checker thread from doing anything (just in case) */
+	xpc_exiting = 1;
+
+	xpc_arch_ops.disallow_all_hbs();   /*indicate we're deactivated */
+
+	for (partid = 0; partid < xp_max_npartitions; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (xpc_arch_ops.partition_engaged(partid) ||
+		    part->act_state != XPC_P_AS_INACTIVE) {
+			xpc_arch_ops.request_partition_deactivation(part);
+			xpc_arch_ops.indicate_partition_disengaged(part);
+		}
+	}
+
+	/*
+	 * Though we requested that all other partitions deactivate from us,
+	 * we only wait until they've all disengaged or we've reached the
+	 * defined timelimit.
+	 *
+	 * Given that one iteration through the following while-loop takes
+	 * approximately 200 microseconds, calculate the #of loops to take
+	 * before bailing and the #of loops before printing a waiting message.
+	 */
+	keep_waiting = xpc_disengage_timelimit * 1000 * 5;
+	wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
+
+	while (1) {
+		any_engaged = xpc_arch_ops.any_partition_engaged();
+		if (!any_engaged) {
+			dev_info(xpc_part, "all partitions have deactivated\n");
+			break;
+		}
+
+		if (!keep_waiting--) {
+			for (partid = 0; partid < xp_max_npartitions;
+			     partid++) {
+				if (xpc_arch_ops.partition_engaged(partid)) {
+					dev_info(xpc_part, "deactivate from "
+						 "remote partition %d timed "
+						 "out\n", partid);
+				}
+			}
+			break;
+		}
+
+		if (!wait_to_print--) {
+			dev_info(xpc_part, "waiting for remote partitions to "
+				 "deactivate, timeout in %ld seconds\n",
+				 keep_waiting / (1000 * 5));
+			wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
+			    1000 * 5;
+		}
+
+		udelay(200);
+	}
+}
+
+/*
+ * This function is called when the system is being restarted or halted due
+ * to some sort of system failure. If this is the case we need to notify the
+ * other partitions to disengage from all references to our memory.
+ * This function can also be called when our heartbeater could be offlined
+ * for a time. In this case we need to notify other partitions to not worry
+ * about the lack of a heartbeat.
+ */
+static int
+xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args)
+{
+#ifdef CONFIG_IA64		/* !!! temporary kludge */
+	switch (event) {
+	case DIE_MACHINE_RESTART:
+	case DIE_MACHINE_HALT:
+		xpc_die_deactivate();
+		break;
+
+	case DIE_KDEBUG_ENTER:
+		/* Should lack of heartbeat be ignored by other partitions? */
+		if (!xpc_kdebug_ignore)
+			break;
+
+		fallthrough;
+	case DIE_MCA_MONARCH_ENTER:
+	case DIE_INIT_MONARCH_ENTER:
+		xpc_arch_ops.offline_heartbeat();
+		break;
+
+	case DIE_KDEBUG_LEAVE:
+		/* Is lack of heartbeat being ignored by other partitions? */
+		if (!xpc_kdebug_ignore)
+			break;
+
+		fallthrough;
+	case DIE_MCA_MONARCH_LEAVE:
+	case DIE_INIT_MONARCH_LEAVE:
+		xpc_arch_ops.online_heartbeat();
+		break;
+	}
+#else
+	struct die_args *die_args = _die_args;
+
+	switch (event) {
+	case DIE_TRAP:
+		if (die_args->trapnr == X86_TRAP_DF)
+			xpc_die_deactivate();
+
+		if (((die_args->trapnr == X86_TRAP_MF) ||
+		     (die_args->trapnr == X86_TRAP_XF)) &&
+		    !user_mode(die_args->regs))
+			xpc_die_deactivate();
+
+		break;
+	case DIE_INT3:
+	case DIE_DEBUG:
+		break;
+	case DIE_OOPS:
+	case DIE_GPF:
+	default:
+		xpc_die_deactivate();
+	}
+#endif
+
+	return NOTIFY_DONE;
+}
+
+static int __init
+xpc_init(void)
+{
+	int ret;
+	struct task_struct *kthread;
+
+	dev_set_name(xpc_part, "part");
+	dev_set_name(xpc_chan, "chan");
+
+	if (is_uv_system()) {
+		ret = xpc_init_uv();
+
+	} else {
+		ret = -ENODEV;
+	}
+
+	if (ret != 0)
+		return ret;
+
+	ret = xpc_setup_partitions();
+	if (ret != 0) {
+		dev_err(xpc_part, "can't get memory for partition structure\n");
+		goto out_1;
+	}
+
+	xpc_sysctl = register_sysctl("xpc", xpc_sys_xpc);
+	xpc_sysctl_hb = register_sysctl("xpc/hb", xpc_sys_xpc_hb);
+
+	/*
+	 * Fill the partition reserved page with the information needed by
+	 * other partitions to discover we are alive and establish initial
+	 * communications.
+	 */
+	ret = xpc_setup_rsvd_page();
+	if (ret != 0) {
+		dev_err(xpc_part, "can't setup our reserved page\n");
+		goto out_2;
+	}
+
+	/* add ourselves to the reboot_notifier_list */
+	ret = register_reboot_notifier(&xpc_reboot_notifier);
+	if (ret != 0)
+		dev_warn(xpc_part, "can't register reboot notifier\n");
+
+	/* add ourselves to the die_notifier list */
+	ret = register_die_notifier(&xpc_die_notifier);
+	if (ret != 0)
+		dev_warn(xpc_part, "can't register die notifier\n");
+
+	/*
+	 * The real work-horse behind xpc.  This processes incoming
+	 * interrupts and monitors remote heartbeats.
+	 */
+	kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
+	if (IS_ERR(kthread)) {
+		dev_err(xpc_part, "failed while forking hb check thread\n");
+		ret = -EBUSY;
+		goto out_3;
+	}
+
+	/*
+	 * Startup a thread that will attempt to discover other partitions to
+	 * activate based on info provided by SAL. This new thread is short
+	 * lived and will exit once discovery is complete.
+	 */
+	kthread = kthread_run(xpc_initiate_discovery, NULL,
+			      XPC_DISCOVERY_THREAD_NAME);
+	if (IS_ERR(kthread)) {
+		dev_err(xpc_part, "failed while forking discovery thread\n");
+
+		/* mark this new thread as a non-starter */
+		complete(&xpc_discovery_exited);
+
+		xpc_do_exit(xpUnloading);
+		return -EBUSY;
+	}
+
+	/* set the interface to point at XPC's functions */
+	xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
+			  xpc_initiate_send, xpc_initiate_send_notify,
+			  xpc_initiate_received, xpc_initiate_partid_to_nasids);
+
+	return 0;
+
+	/* initialization was not successful */
+out_3:
+	xpc_teardown_rsvd_page();
+
+	(void)unregister_die_notifier(&xpc_die_notifier);
+	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
+out_2:
+	if (xpc_sysctl_hb)
+		unregister_sysctl_table(xpc_sysctl_hb);
+	if (xpc_sysctl)
+		unregister_sysctl_table(xpc_sysctl);
+
+	xpc_teardown_partitions();
+out_1:
+	if (is_uv_system())
+		xpc_exit_uv();
+	return ret;
+}
+
+module_init(xpc_init);
+
+static void __exit
+xpc_exit(void)
+{
+	xpc_do_exit(xpUnloading);
+}
+
+module_exit(xpc_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
+MODULE_LICENSE("GPL");
+
+module_param(xpc_hb_interval, int, 0);
+MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
+		 "heartbeat increments.");
+
+module_param(xpc_hb_check_interval, int, 0);
+MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
+		 "heartbeat checks.");
+
+module_param(xpc_disengage_timelimit, int, 0);
+MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
+		 "for disengage to complete.");
+
+module_param(xpc_kdebug_ignore, int, 0);
+MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
+		 "other partitions when dropping into kdebug.");
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
new file mode 100644
index 0000000000..1999d02923
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -0,0 +1,545 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) partition support.
+ *
+ *	This is the part of XPC that detects the presence/absence of
+ *	other partitions. It provides a heartbeat and monitors the
+ *	heartbeats of other partitions.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/hardirq.h>
+#include <linux/slab.h>
+#include "xpc.h"
+#include <asm/uv/uv_hub.h>
+
+/* XPC is exiting flag */
+int xpc_exiting;
+
+/* this partition's reserved page pointers */
+struct xpc_rsvd_page *xpc_rsvd_page;
+static unsigned long *xpc_part_nasids;
+unsigned long *xpc_mach_nasids;
+
+static int xpc_nasid_mask_nbytes;	/* #of bytes in nasid mask */
+int xpc_nasid_mask_nlongs;	/* #of longs in nasid mask */
+
+struct xpc_partition *xpc_partitions;
+
+/*
+ * Guarantee that the kmalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+	/* see if kmalloc will give us cachline aligned memory by default */
+	*base = kmalloc(size, flags);
+	if (*base == NULL)
+		return NULL;
+
+	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+		return *base;
+
+	kfree(*base);
+
+	/* nope, we'll have to do it ourselves */
+	*base = kmalloc(size + L1_CACHE_BYTES, flags);
+	if (*base == NULL)
+		return NULL;
+
+	return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Given a nasid, get the physical address of the  partition's reserved page
+ * for that nasid. This function returns 0 on any error.
+ */
+static unsigned long
+xpc_get_rsvd_page_pa(int nasid)
+{
+	enum xp_retval ret;
+	u64 cookie = 0;
+	unsigned long rp_pa = nasid;	/* seed with nasid */
+	size_t len = 0;
+	size_t buf_len = 0;
+	void *buf = NULL;
+	void *buf_base = NULL;
+	enum xp_retval (*get_partition_rsvd_page_pa)
+		(void *, u64 *, unsigned long *, size_t *) =
+		xpc_arch_ops.get_partition_rsvd_page_pa;
+
+	while (1) {
+
+		/* !!! rp_pa will need to be _gpa on UV.
+		 * ??? So do we save it into the architecture specific parts
+		 * ??? of the xpc_partition structure? Do we rename this
+		 * ??? function or have two versions? Rename rp_pa for UV to
+		 * ??? rp_gpa?
+		 */
+		ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
+
+		dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
+			"address=0x%016lx, len=0x%016lx\n", ret,
+			(unsigned long)cookie, rp_pa, len);
+
+		if (ret != xpNeedMoreInfo)
+			break;
+
+		if (len > buf_len) {
+			kfree(buf_base);
+			buf_len = L1_CACHE_ALIGN(len);
+			buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
+							    &buf_base);
+			if (buf_base == NULL) {
+				dev_err(xpc_part, "unable to kmalloc "
+					"len=0x%016lx\n", buf_len);
+				ret = xpNoMemory;
+				break;
+			}
+		}
+
+		ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len);
+		if (ret != xpSuccess) {
+			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
+			break;
+		}
+	}
+
+	kfree(buf_base);
+
+	if (ret != xpSuccess)
+		rp_pa = 0;
+
+	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
+	return rp_pa;
+}
+
+/*
+ * Fill the partition reserved page with the information needed by
+ * other partitions to discover we are alive and establish initial
+ * communications.
+ */
+int
+xpc_setup_rsvd_page(void)
+{
+	int ret;
+	struct xpc_rsvd_page *rp;
+	unsigned long rp_pa;
+	unsigned long new_ts_jiffies;
+
+	/* get the local reserved page's address */
+
+	preempt_disable();
+	rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
+	preempt_enable();
+	if (rp_pa == 0) {
+		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
+		return -ESRCH;
+	}
+	rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa));
+
+	if (rp->SAL_version < 3) {
+		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
+		rp->SAL_partid &= 0xff;
+	}
+	BUG_ON(rp->SAL_partid != xp_partition_id);
+
+	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
+		dev_err(xpc_part, "the reserved page's partid of %d is outside "
+			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
+			xp_max_npartitions);
+		return -EINVAL;
+	}
+
+	rp->version = XPC_RP_VERSION;
+	rp->max_npartitions = xp_max_npartitions;
+
+	/* establish the actual sizes of the nasid masks */
+	if (rp->SAL_version == 1) {
+		/* SAL_version 1 didn't set the nasids_size field */
+		rp->SAL_nasids_size = 128;
+	}
+	xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
+	xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
+					      BITS_PER_BYTE);
+
+	/* setup the pointers to the various items in the reserved page */
+	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
+	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
+
+	ret = xpc_arch_ops.setup_rsvd_page(rp);
+	if (ret != 0)
+		return ret;
+
+	/*
+	 * Set timestamp of when reserved page was setup by XPC.
+	 * This signifies to the remote partition that our reserved
+	 * page is initialized.
+	 */
+	new_ts_jiffies = jiffies;
+	if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
+		new_ts_jiffies++;
+	rp->ts_jiffies = new_ts_jiffies;
+
+	xpc_rsvd_page = rp;
+	return 0;
+}
+
+void
+xpc_teardown_rsvd_page(void)
+{
+	/* a zero timestamp indicates our rsvd page is not initialized */
+	xpc_rsvd_page->ts_jiffies = 0;
+}
+
+/*
+ * Get a copy of a portion of the remote partition's rsvd page.
+ *
+ * remote_rp points to a buffer that is cacheline aligned for BTE copies and
+ * is large enough to contain a copy of their reserved page header and
+ * part_nasids mask.
+ */
+enum xp_retval
+xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
+		  struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
+{
+	int l;
+	enum xp_retval ret;
+
+	/* get the reserved page's physical address */
+
+	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
+	if (*remote_rp_pa == 0)
+		return xpNoRsvdPageAddr;
+
+	/* pull over the reserved page header and part_nasids mask */
+	ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
+			       XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
+	if (ret != xpSuccess)
+		return ret;
+
+	if (discovered_nasids != NULL) {
+		unsigned long *remote_part_nasids =
+		    XPC_RP_PART_NASIDS(remote_rp);
+
+		for (l = 0; l < xpc_nasid_mask_nlongs; l++)
+			discovered_nasids[l] |= remote_part_nasids[l];
+	}
+
+	/* zero timestamp indicates the reserved page has not been setup */
+	if (remote_rp->ts_jiffies == 0)
+		return xpRsvdPageNotSet;
+
+	if (XPC_VERSION_MAJOR(remote_rp->version) !=
+	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
+		return xpBadVersion;
+	}
+
+	/* check that both remote and local partids are valid for each side */
+	if (remote_rp->SAL_partid < 0 ||
+	    remote_rp->SAL_partid >= xp_max_npartitions ||
+	    remote_rp->max_npartitions <= xp_partition_id) {
+		return xpInvalidPartid;
+	}
+
+	if (remote_rp->SAL_partid == xp_partition_id)
+		return xpLocalPartid;
+
+	return xpSuccess;
+}
+
+/*
+ * See if the other side has responded to a partition deactivate request
+ * from us. Though we requested the remote partition to deactivate with regard
+ * to us, we really only need to wait for the other side to disengage from us.
+ */
+static int __xpc_partition_disengaged(struct xpc_partition *part,
+				      bool from_timer)
+{
+	short partid = XPC_PARTID(part);
+	int disengaged;
+
+	disengaged = !xpc_arch_ops.partition_engaged(partid);
+	if (part->disengage_timeout) {
+		if (!disengaged) {
+			if (time_is_after_jiffies(part->disengage_timeout)) {
+				/* timelimit hasn't been reached yet */
+				return 0;
+			}
+
+			/*
+			 * Other side hasn't responded to our deactivate
+			 * request in a timely fashion, so assume it's dead.
+			 */
+
+			dev_info(xpc_part, "deactivate request to remote "
+				 "partition %d timed out\n", partid);
+			xpc_disengage_timedout = 1;
+			xpc_arch_ops.assume_partition_disengaged(partid);
+			disengaged = 1;
+		}
+		part->disengage_timeout = 0;
+
+		/* Cancel the timer function if not called from it */
+		if (!from_timer)
+			del_timer_sync(&part->disengage_timer);
+
+		DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
+			part->act_state != XPC_P_AS_INACTIVE);
+		if (part->act_state != XPC_P_AS_INACTIVE)
+			xpc_wakeup_channel_mgr(part);
+
+		xpc_arch_ops.cancel_partition_deactivation_request(part);
+	}
+	return disengaged;
+}
+
+int xpc_partition_disengaged(struct xpc_partition *part)
+{
+	return __xpc_partition_disengaged(part, false);
+}
+
+int xpc_partition_disengaged_from_timer(struct xpc_partition *part)
+{
+	return __xpc_partition_disengaged(part, true);
+}
+
+/*
+ * Mark specified partition as active.
+ */
+enum xp_retval
+xpc_mark_partition_active(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	enum xp_retval ret;
+
+	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+	if (part->act_state == XPC_P_AS_ACTIVATING) {
+		part->act_state = XPC_P_AS_ACTIVE;
+		ret = xpSuccess;
+	} else {
+		DBUG_ON(part->reason == xpSuccess);
+		ret = part->reason;
+	}
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	return ret;
+}
+
+/*
+ * Start the process of deactivating the specified partition.
+ */
+void
+xpc_deactivate_partition(const int line, struct xpc_partition *part,
+			 enum xp_retval reason)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+
+	if (part->act_state == XPC_P_AS_INACTIVE) {
+		XPC_SET_REASON(part, reason, line);
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+		if (reason == xpReactivating) {
+			/* we interrupt ourselves to reactivate partition */
+			xpc_arch_ops.request_partition_reactivation(part);
+		}
+		return;
+	}
+	if (part->act_state == XPC_P_AS_DEACTIVATING) {
+		if ((part->reason == xpUnloading && reason != xpUnloading) ||
+		    reason == xpReactivating) {
+			XPC_SET_REASON(part, reason, line);
+		}
+		spin_unlock_irqrestore(&part->act_lock, irq_flags);
+		return;
+	}
+
+	part->act_state = XPC_P_AS_DEACTIVATING;
+	XPC_SET_REASON(part, reason, line);
+
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+	/* ask remote partition to deactivate with regard to us */
+	xpc_arch_ops.request_partition_deactivation(part);
+
+	/* set a timelimit on the disengage phase of the deactivation request */
+	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
+	part->disengage_timer.expires = part->disengage_timeout;
+	add_timer(&part->disengage_timer);
+
+	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
+		XPC_PARTID(part), reason);
+
+	xpc_partition_going_down(part, reason);
+}
+
+/*
+ * Mark specified partition as inactive.
+ */
+void
+xpc_mark_partition_inactive(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+
+	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
+		XPC_PARTID(part));
+
+	spin_lock_irqsave(&part->act_lock, irq_flags);
+	part->act_state = XPC_P_AS_INACTIVE;
+	spin_unlock_irqrestore(&part->act_lock, irq_flags);
+	part->remote_rp_pa = 0;
+}
+
+/*
+ * SAL has provided a partition and machine mask.  The partition mask
+ * contains a bit for each even nasid in our partition.  The machine
+ * mask contains a bit for each even nasid in the entire machine.
+ *
+ * Using those two bit arrays, we can determine which nasids are
+ * known in the machine.  Each should also have a reserved page
+ * initialized if they are available for partitioning.
+ */
+void
+xpc_discovery(void)
+{
+	void *remote_rp_base;
+	struct xpc_rsvd_page *remote_rp;
+	unsigned long remote_rp_pa;
+	int region;
+	int region_size;
+	int max_regions;
+	int nasid;
+	unsigned long *discovered_nasids;
+	enum xp_retval ret;
+
+	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
+						  xpc_nasid_mask_nbytes,
+						  GFP_KERNEL, &remote_rp_base);
+	if (remote_rp == NULL)
+		return;
+
+	discovered_nasids = kcalloc(xpc_nasid_mask_nlongs, sizeof(long),
+				    GFP_KERNEL);
+	if (discovered_nasids == NULL) {
+		kfree(remote_rp_base);
+		return;
+	}
+
+	/*
+	 * The term 'region' in this context refers to the minimum number of
+	 * nodes that can comprise an access protection grouping. The access
+	 * protection is in regards to memory, IOI and IPI.
+	 */
+	region_size = xp_region_size;
+
+	if (is_uv_system())
+		max_regions = 256;
+	else {
+		max_regions = 64;
+
+		switch (region_size) {
+		case 128:
+			max_regions *= 2;
+			fallthrough;
+		case 64:
+			max_regions *= 2;
+			fallthrough;
+		case 32:
+			max_regions *= 2;
+			region_size = 16;
+		}
+	}
+
+	for (region = 0; region < max_regions; region++) {
+
+		if (xpc_exiting)
+			break;
+
+		dev_dbg(xpc_part, "searching region %d\n", region);
+
+		for (nasid = (region * region_size * 2);
+		     nasid < ((region + 1) * region_size * 2); nasid += 2) {
+
+			if (xpc_exiting)
+				break;
+
+			dev_dbg(xpc_part, "checking nasid %d\n", nasid);
+
+			if (test_bit(nasid / 2, xpc_part_nasids)) {
+				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
+					"part of the local partition; skipping "
+					"region\n", nasid);
+				break;
+			}
+
+			if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
+				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
+					"not on Numa-Link network at reset\n",
+					nasid);
+				continue;
+			}
+
+			if (test_bit(nasid / 2, discovered_nasids)) {
+				dev_dbg(xpc_part, "Nasid %d is part of a "
+					"partition which was previously "
+					"discovered\n", nasid);
+				continue;
+			}
+
+			/* pull over the rsvd page header & part_nasids mask */
+
+			ret = xpc_get_remote_rp(nasid, discovered_nasids,
+						remote_rp, &remote_rp_pa);
+			if (ret != xpSuccess) {
+				dev_dbg(xpc_part, "unable to get reserved page "
+					"from nasid %d, reason=%d\n", nasid,
+					ret);
+
+				if (ret == xpLocalPartid)
+					break;
+
+				continue;
+			}
+
+			xpc_arch_ops.request_partition_activation(remote_rp,
+							 remote_rp_pa, nasid);
+		}
+	}
+
+	kfree(discovered_nasids);
+	kfree(remote_rp_base);
+}
+
+/*
+ * Given a partid, get the nasids owned by that partition from the
+ * remote partition's reserved page.
+ */
+enum xp_retval
+xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
+{
+	struct xpc_partition *part;
+	unsigned long part_nasid_pa;
+
+	part = &xpc_partitions[partid];
+	if (part->remote_rp_pa == 0)
+		return xpPartitionDown;
+
+	memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
+
+	part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
+
+	return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
+				xpc_nasid_mask_nbytes);
+}
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
new file mode 100644
index 0000000000..fff522d347
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -0,0 +1,1817 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+/*
+ * Cross Partition Communication (XPC) uv-based functions.
+ *
+ *     Architecture specific implementation of common functions.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/numa.h>
+#include <asm/uv/uv_hub.h>
+#if defined CONFIG_X86_64
+#include <asm/uv/bios.h>
+#include <asm/uv/uv_irq.h>
+#elif defined CONFIG_IA64_SGI_UV
+#include <asm/sn/intr.h>
+#include <asm/sn/sn_sal.h>
+#endif
+#include "../sgi-gru/gru.h"
+#include "../sgi-gru/grukservices.h"
+#include "xpc.h"
+
+#if defined CONFIG_IA64_SGI_UV
+struct uv_IO_APIC_route_entry {
+	__u64	vector		:  8,
+		delivery_mode	:  3,
+		dest_mode	:  1,
+		delivery_status	:  1,
+		polarity	:  1,
+		__reserved_1	:  1,
+		trigger		:  1,
+		mask		:  1,
+		__reserved_2	: 15,
+		dest		: 32;
+};
+
+#define sn_partition_id 0
+#endif
+
+static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
+
+#define XPC_ACTIVATE_MSG_SIZE_UV	(1 * GRU_CACHE_LINE_BYTES)
+#define XPC_ACTIVATE_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
+					 XPC_ACTIVATE_MSG_SIZE_UV)
+#define XPC_ACTIVATE_IRQ_NAME		"xpc_activate"
+
+#define XPC_NOTIFY_MSG_SIZE_UV		(2 * GRU_CACHE_LINE_BYTES)
+#define XPC_NOTIFY_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
+					 XPC_NOTIFY_MSG_SIZE_UV)
+#define XPC_NOTIFY_IRQ_NAME		"xpc_notify"
+
+static int xpc_mq_node = NUMA_NO_NODE;
+
+static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
+static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
+
+static int
+xpc_setup_partitions_uv(void)
+{
+	short partid;
+	struct xpc_partition_uv *part_uv;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+
+		mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex);
+		spin_lock_init(&part_uv->flags_lock);
+		part_uv->remote_act_state = XPC_P_AS_INACTIVE;
+	}
+	return 0;
+}
+
+static void
+xpc_teardown_partitions_uv(void)
+{
+	short partid;
+	struct xpc_partition_uv *part_uv;
+	unsigned long irq_flags;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+
+		if (part_uv->cached_activate_gru_mq_desc != NULL) {
+			mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
+			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+			kfree(part_uv->cached_activate_gru_mq_desc);
+			part_uv->cached_activate_gru_mq_desc = NULL;
+			mutex_unlock(&part_uv->
+				     cached_activate_gru_mq_desc_mutex);
+		}
+	}
+}
+
+static int
+xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
+{
+	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+#if defined CONFIG_X86_64
+	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+			UV_AFFINITY_CPU);
+	if (mq->irq < 0)
+		return mq->irq;
+
+	mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
+
+#elif defined CONFIG_IA64_SGI_UV
+	if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0)
+		mq->irq = SGI_XPC_ACTIVATE;
+	else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0)
+		mq->irq = SGI_XPC_NOTIFY;
+	else
+		return -EINVAL;
+
+	mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq;
+	uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value);
+#else
+	#error not a supported configuration
+#endif
+
+	return 0;
+}
+
+static void
+xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
+{
+#if defined CONFIG_X86_64
+	uv_teardown_irq(mq->irq);
+
+#elif defined CONFIG_IA64_SGI_UV
+	int mmr_pnode;
+	unsigned long mmr_value;
+
+	mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+	mmr_value = 1UL << 16;
+
+	uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
+#else
+	#error not a supported configuration
+#endif
+}
+
+static int
+xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
+{
+	int ret;
+
+#if defined CONFIG_IA64_SGI_UV
+	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+	ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address),
+				    mq->order, &mq->mmr_offset);
+	if (ret < 0) {
+		dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n",
+			ret);
+		return -EBUSY;
+	}
+#elif defined CONFIG_X86_64
+	ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address),
+					 mq->order, &mq->mmr_offset);
+	if (ret < 0) {
+		dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
+			"ret=%d\n", ret);
+		return ret;
+	}
+#else
+	#error not a supported configuration
+#endif
+
+	mq->watchlist_num = ret;
+	return 0;
+}
+
+static void
+xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
+{
+	int ret;
+	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
+#if defined CONFIG_X86_64
+	ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
+	BUG_ON(ret != BIOS_STATUS_SUCCESS);
+#elif defined CONFIG_IA64_SGI_UV
+	ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
+	BUG_ON(ret != SALRET_OK);
+#else
+	#error not a supported configuration
+#endif
+}
+
+static struct xpc_gru_mq_uv *
+xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
+		     irq_handler_t irq_handler)
+{
+	enum xp_retval xp_ret;
+	int ret;
+	int nid;
+	int nasid;
+	int pg_order;
+	struct page *page;
+	struct xpc_gru_mq_uv *mq;
+	struct uv_IO_APIC_route_entry *mmr_value;
+
+	mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL);
+	if (mq == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
+			"a xpc_gru_mq_uv structure\n");
+		ret = -ENOMEM;
+		goto out_0;
+	}
+
+	mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc),
+				  GFP_KERNEL);
+	if (mq->gru_mq_desc == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
+			"a gru_message_queue_desc structure\n");
+		ret = -ENOMEM;
+		goto out_1;
+	}
+
+	pg_order = get_order(mq_size);
+	mq->order = pg_order + PAGE_SHIFT;
+	mq_size = 1UL << mq->order;
+
+	mq->mmr_blade = uv_cpu_to_blade_id(cpu);
+
+	nid = cpu_to_node(cpu);
+	page = __alloc_pages_node(nid,
+				      GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
+				      pg_order);
+	if (page == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
+			"bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
+		ret = -ENOMEM;
+		goto out_2;
+	}
+	mq->address = page_address(page);
+
+	/* enable generation of irq when GRU mq operation occurs to this mq */
+	ret = xpc_gru_mq_watchlist_alloc_uv(mq);
+	if (ret != 0)
+		goto out_3;
+
+	ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
+	if (ret != 0)
+		goto out_4;
+
+	ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
+	if (ret != 0) {
+		dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
+			mq->irq, -ret);
+		goto out_5;
+	}
+
+	nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu));
+
+	mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value;
+	ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size,
+				     nasid, mmr_value->vector, mmr_value->dest);
+	if (ret != 0) {
+		dev_err(xpc_part, "gru_create_message_queue() returned "
+			"error=%d\n", ret);
+		ret = -EINVAL;
+		goto out_6;
+	}
+
+	/* allow other partitions to access this GRU mq */
+	xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
+	if (xp_ret != xpSuccess) {
+		ret = -EACCES;
+		goto out_6;
+	}
+
+	return mq;
+
+	/* something went wrong */
+out_6:
+	free_irq(mq->irq, NULL);
+out_5:
+	xpc_release_gru_mq_irq_uv(mq);
+out_4:
+	xpc_gru_mq_watchlist_free_uv(mq);
+out_3:
+	free_pages((unsigned long)mq->address, pg_order);
+out_2:
+	kfree(mq->gru_mq_desc);
+out_1:
+	kfree(mq);
+out_0:
+	return ERR_PTR(ret);
+}
+
+static void
+xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
+{
+	unsigned int mq_size;
+	int pg_order;
+	int ret;
+
+	/* disallow other partitions to access GRU mq */
+	mq_size = 1UL << mq->order;
+	ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
+	BUG_ON(ret != xpSuccess);
+
+	/* unregister irq handler and release mq irq/vector mapping */
+	free_irq(mq->irq, NULL);
+	xpc_release_gru_mq_irq_uv(mq);
+
+	/* disable generation of irq when GRU mq op occurs to this mq */
+	xpc_gru_mq_watchlist_free_uv(mq);
+
+	pg_order = mq->order - PAGE_SHIFT;
+	free_pages((unsigned long)mq->address, pg_order);
+
+	kfree(mq);
+}
+
+static enum xp_retval
+xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg,
+		 size_t msg_size)
+{
+	enum xp_retval xp_ret;
+	int ret;
+
+	while (1) {
+		ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size);
+		if (ret == MQE_OK) {
+			xp_ret = xpSuccess;
+			break;
+		}
+
+		if (ret == MQE_QUEUE_FULL) {
+			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+				"error=MQE_QUEUE_FULL\n");
+			/* !!! handle QLimit reached; delay & try again */
+			/* ??? Do we add a limit to the number of retries? */
+			(void)msleep_interruptible(10);
+		} else if (ret == MQE_CONGESTION) {
+			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
+				"error=MQE_CONGESTION\n");
+			/* !!! handle LB Overflow; simply try again */
+			/* ??? Do we add a limit to the number of retries? */
+		} else {
+			/* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
+			dev_err(xpc_chan, "gru_send_message_gpa() returned "
+				"error=%d\n", ret);
+			xp_ret = xpGruSendMqError;
+			break;
+		}
+	}
+	return xp_ret;
+}
+
+static void
+xpc_process_activate_IRQ_rcvd_uv(void)
+{
+	unsigned long irq_flags;
+	short partid;
+	struct xpc_partition *part;
+	u8 act_state_req;
+
+	DBUG_ON(xpc_activate_IRQ_rcvd == 0);
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part = &xpc_partitions[partid];
+
+		if (part->sn.uv.act_state_req == 0)
+			continue;
+
+		xpc_activate_IRQ_rcvd--;
+		BUG_ON(xpc_activate_IRQ_rcvd < 0);
+
+		act_state_req = part->sn.uv.act_state_req;
+		part->sn.uv.act_state_req = 0;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
+			if (part->act_state == XPC_P_AS_INACTIVE)
+				xpc_activate_partition(part);
+			else if (part->act_state == XPC_P_AS_DEACTIVATING)
+				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+		} else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
+			if (part->act_state == XPC_P_AS_INACTIVE)
+				xpc_activate_partition(part);
+			else
+				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
+
+		} else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
+			XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
+
+		} else {
+			BUG();
+		}
+
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (xpc_activate_IRQ_rcvd == 0)
+			break;
+	}
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+}
+
+static void
+xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
+			      struct xpc_activate_mq_msghdr_uv *msg_hdr,
+			      int part_setup,
+			      int *wakeup_hb_checker)
+{
+	unsigned long irq_flags;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct xpc_openclose_args *args;
+
+	part_uv->remote_act_state = msg_hdr->act_state;
+
+	switch (msg_hdr->type) {
+	case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
+		/* syncing of remote_act_state was just done above */
+		break;
+
+	case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
+		struct xpc_activate_mq_msg_activate_req_uv *msg;
+
+		/*
+		 * ??? Do we deal here with ts_jiffies being different
+		 * ??? if act_state != XPC_P_AS_INACTIVE instead of
+		 * ??? below?
+		 */
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_activate_req_uv, hdr);
+
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
+		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
+		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
+		part_uv->heartbeat_gpa = msg->heartbeat_gpa;
+
+		if (msg->activate_gru_mq_desc_gpa !=
+		    part_uv->activate_gru_mq_desc_gpa) {
+			spin_lock(&part_uv->flags_lock);
+			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+			spin_unlock(&part_uv->flags_lock);
+			part_uv->activate_gru_mq_desc_gpa =
+			    msg->activate_gru_mq_desc_gpa;
+		}
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
+		struct xpc_activate_mq_msg_deactivate_req_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_deactivate_req_uv, hdr);
+
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = msg->reason;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+		return;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
+		struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
+
+		if (!part_setup)
+			break;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_closerequest_uv,
+				   hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->reason = msg->reason;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
+		struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
+
+		if (!part_setup)
+			break;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_closereply_uv,
+				   hdr);
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
+		struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
+
+		if (!part_setup)
+			break;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_openrequest_uv,
+				   hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->entry_size = msg->entry_size;
+		args->local_nentries = msg->local_nentries;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
+		struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
+
+		if (!part_setup)
+			break;
+
+		msg = container_of(msg_hdr, struct
+				   xpc_activate_mq_msg_chctl_openreply_uv, hdr);
+		args = &part->remote_openclose_args[msg->ch_number];
+		args->remote_nentries = msg->remote_nentries;
+		args->local_nentries = msg->local_nentries;
+		args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa;
+
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+		break;
+	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
+		struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
+
+		if (!part_setup)
+			break;
+
+		msg = container_of(msg_hdr, struct
+				xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+	}
+		fallthrough;
+	case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags |= XPC_P_ENGAGED_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+
+	case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags &= ~XPC_P_ENGAGED_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+		break;
+
+	default:
+		dev_err(xpc_part, "received unknown activate_mq msg type=%d "
+			"from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
+
+		/* get hb checker to deactivate from the remote partition */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = xpBadMsgType;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+		return;
+	}
+
+	if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
+	    part->remote_rp_ts_jiffies != 0) {
+		/*
+		 * ??? Does what we do here need to be sensitive to
+		 * ??? act_state or remote_act_state?
+		 */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		(*wakeup_hb_checker)++;
+	}
+}
+
+static irqreturn_t
+xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
+{
+	struct xpc_activate_mq_msghdr_uv *msg_hdr;
+	short partid;
+	struct xpc_partition *part;
+	int wakeup_hb_checker = 0;
+	int part_referenced;
+
+	while (1) {
+		msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc);
+		if (msg_hdr == NULL)
+			break;
+
+		partid = msg_hdr->partid;
+		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+			dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
+				"received invalid partid=0x%x in message\n",
+				partid);
+		} else {
+			part = &xpc_partitions[partid];
+
+			part_referenced = xpc_part_ref(part);
+			xpc_handle_activate_mq_msg_uv(part, msg_hdr,
+						      part_referenced,
+						      &wakeup_hb_checker);
+			if (part_referenced)
+				xpc_part_deref(part);
+		}
+
+		gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr);
+	}
+
+	if (wakeup_hb_checker)
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+
+	return IRQ_HANDLED;
+}
+
+static enum xp_retval
+xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc,
+				unsigned long gru_mq_desc_gpa)
+{
+	enum xp_retval ret;
+
+	ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa,
+			       sizeof(struct gru_message_queue_desc));
+	if (ret == xpSuccess)
+		gru_mq_desc->mq = NULL;
+
+	return ret;
+}
+
+static enum xp_retval
+xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
+			 int msg_type)
+{
+	struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct gru_message_queue_desc *gru_mq_desc;
+	unsigned long irq_flags;
+	enum xp_retval ret;
+
+	DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
+
+	msg_hdr->type = msg_type;
+	msg_hdr->partid = xp_partition_id;
+	msg_hdr->act_state = part->act_state;
+	msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
+
+	mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
+again:
+	if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) {
+		gru_mq_desc = part_uv->cached_activate_gru_mq_desc;
+		if (gru_mq_desc == NULL) {
+			gru_mq_desc = kmalloc(sizeof(struct
+					      gru_message_queue_desc),
+					      GFP_ATOMIC);
+			if (gru_mq_desc == NULL) {
+				ret = xpNoMemory;
+				goto done;
+			}
+			part_uv->cached_activate_gru_mq_desc = gru_mq_desc;
+		}
+
+		ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc,
+						      part_uv->
+						      activate_gru_mq_desc_gpa);
+		if (ret != xpSuccess)
+			goto done;
+
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+	}
+
+	/* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
+	ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg,
+			       msg_size);
+	if (ret != xpSuccess) {
+		smp_rmb();	/* ensure a fresh copy of part_uv->flags */
+		if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV))
+			goto again;
+	}
+done:
+	mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex);
+	return ret;
+}
+
+static void
+xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
+			      size_t msg_size, int msg_type)
+{
+	enum xp_retval ret;
+
+	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+	if (unlikely(ret != xpSuccess))
+		XPC_DEACTIVATE_PARTITION(part, ret);
+}
+
+static void
+xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
+			 void *msg, size_t msg_size, int msg_type)
+{
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
+	enum xp_retval ret;
+
+	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
+	if (unlikely(ret != xpSuccess)) {
+		if (irq_flags != NULL)
+			spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+		XPC_DEACTIVATE_PARTITION(part, ret);
+
+		if (irq_flags != NULL)
+			spin_lock_irqsave(&ch->lock, *irq_flags);
+	}
+}
+
+static void
+xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
+{
+	unsigned long irq_flags;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+
+	/*
+	 * !!! Make our side think that the remote partition sent an activate
+	 * !!! mq message our way by doing what the activate IRQ handler would
+	 * !!! do had one really been sent.
+	 */
+
+	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+	if (part_uv->act_state_req == 0)
+		xpc_activate_IRQ_rcvd++;
+	part_uv->act_state_req = act_state_req;
+	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+	wake_up_interruptible(&xpc_activate_IRQ_wq);
+}
+
+static enum xp_retval
+xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
+				  size_t *len)
+{
+	s64 status;
+	enum xp_retval ret;
+
+#if defined CONFIG_X86_64
+	status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
+					  (u64 *)len);
+	if (status == BIOS_STATUS_SUCCESS)
+		ret = xpSuccess;
+	else if (status == BIOS_STATUS_MORE_PASSES)
+		ret = xpNeedMoreInfo;
+	else
+		ret = xpBiosError;
+
+#elif defined CONFIG_IA64_SGI_UV
+	status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
+	if (status == SALRET_OK)
+		ret = xpSuccess;
+	else if (status == SALRET_MORE_PASSES)
+		ret = xpNeedMoreInfo;
+	else
+		ret = xpSalError;
+
+#else
+	#error not a supported configuration
+#endif
+
+	return ret;
+}
+
+static int
+xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
+{
+	xpc_heartbeat_uv =
+	    &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
+	rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
+	rp->sn.uv.activate_gru_mq_desc_gpa =
+	    uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
+	return 0;
+}
+
+static void
+xpc_allow_hb_uv(short partid)
+{
+}
+
+static void
+xpc_disallow_hb_uv(short partid)
+{
+}
+
+static void
+xpc_disallow_all_hbs_uv(void)
+{
+}
+
+static void
+xpc_increment_heartbeat_uv(void)
+{
+	xpc_heartbeat_uv->value++;
+}
+
+static void
+xpc_offline_heartbeat_uv(void)
+{
+	xpc_increment_heartbeat_uv();
+	xpc_heartbeat_uv->offline = 1;
+}
+
+static void
+xpc_online_heartbeat_uv(void)
+{
+	xpc_increment_heartbeat_uv();
+	xpc_heartbeat_uv->offline = 0;
+}
+
+static void
+xpc_heartbeat_init_uv(void)
+{
+	xpc_heartbeat_uv->value = 1;
+	xpc_heartbeat_uv->offline = 0;
+}
+
+static void
+xpc_heartbeat_exit_uv(void)
+{
+	xpc_offline_heartbeat_uv();
+}
+
+static enum xp_retval
+xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
+{
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	enum xp_retval ret;
+
+	ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
+			       part_uv->heartbeat_gpa,
+			       sizeof(struct xpc_heartbeat_uv));
+	if (ret != xpSuccess)
+		return ret;
+
+	if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
+	    !part_uv->cached_heartbeat.offline) {
+
+		ret = xpNoHeartbeat;
+	} else {
+		part->last_heartbeat = part_uv->cached_heartbeat.value;
+	}
+	return ret;
+}
+
+static void
+xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
+				    unsigned long remote_rp_gpa, int nasid)
+{
+	short partid = remote_rp->SAL_partid;
+	struct xpc_partition *part = &xpc_partitions[partid];
+	struct xpc_activate_mq_msg_activate_req_uv msg;
+
+	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
+	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+	part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
+	part->sn.uv.activate_gru_mq_desc_gpa =
+	    remote_rp->sn.uv.activate_gru_mq_desc_gpa;
+
+	/*
+	 * ??? Is it a good idea to make this conditional on what is
+	 * ??? potentially stale state information?
+	 */
+	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
+		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
+		msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
+		msg.activate_gru_mq_desc_gpa =
+		    xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
+		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
+	}
+
+	if (part->act_state == XPC_P_AS_INACTIVE)
+		xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_reactivation_uv(struct xpc_partition *part)
+{
+	xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
+}
+
+static void
+xpc_request_partition_deactivation_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_deactivate_req_uv msg;
+
+	/*
+	 * ??? Is it a good idea to make this conditional on what is
+	 * ??? potentially stale state information?
+	 */
+	if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
+	    part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
+
+		msg.reason = part->reason;
+		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+					 XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
+	}
+}
+
+static void
+xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
+{
+	/* nothing needs to be done */
+	return;
+}
+
+static void
+xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
+{
+	head->first = NULL;
+	head->last = NULL;
+	spin_lock_init(&head->lock);
+	head->n_entries = 0;
+}
+
+static void *
+xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
+{
+	unsigned long irq_flags;
+	struct xpc_fifo_entry_uv *first;
+
+	spin_lock_irqsave(&head->lock, irq_flags);
+	first = head->first;
+	if (head->first != NULL) {
+		head->first = first->next;
+		if (head->first == NULL)
+			head->last = NULL;
+
+		head->n_entries--;
+		BUG_ON(head->n_entries < 0);
+
+		first->next = NULL;
+	}
+	spin_unlock_irqrestore(&head->lock, irq_flags);
+	return first;
+}
+
+static void
+xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
+		      struct xpc_fifo_entry_uv *last)
+{
+	unsigned long irq_flags;
+
+	last->next = NULL;
+	spin_lock_irqsave(&head->lock, irq_flags);
+	if (head->last != NULL)
+		head->last->next = last;
+	else
+		head->first = last;
+	head->last = last;
+	head->n_entries++;
+	spin_unlock_irqrestore(&head->lock, irq_flags);
+}
+
+static int
+xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
+{
+	return head->n_entries;
+}
+
+/*
+ * Setup the channel structures that are uv specific.
+ */
+static enum xp_retval
+xpc_setup_ch_structures_uv(struct xpc_partition *part)
+{
+	struct xpc_channel_uv *ch_uv;
+	int ch_number;
+
+	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+		ch_uv = &part->channels[ch_number].sn.uv;
+
+		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+	}
+
+	return xpSuccess;
+}
+
+/*
+ * Teardown the channel structures that are uv specific.
+ */
+static void
+xpc_teardown_ch_structures_uv(struct xpc_partition *part)
+{
+	/* nothing needs to be done */
+	return;
+}
+
+static enum xp_retval
+xpc_make_first_contact_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	/*
+	 * We send a sync msg to get the remote partition's remote_act_state
+	 * updated to our current act_state which at this point should
+	 * be XPC_P_AS_ACTIVATING.
+	 */
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
+
+	while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) ||
+		 (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) {
+
+		dev_dbg(xpc_part, "waiting to make first contact with "
+			"partition %d\n", XPC_PARTID(part));
+
+		/* wait a 1/4 of a second or so */
+		(void)msleep_interruptible(250);
+
+		if (part->act_state == XPC_P_AS_DEACTIVATING)
+			return part->reason;
+	}
+
+	return xpSuccess;
+}
+
+static u64
+xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	union xpc_channel_ctl_flags chctl;
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	chctl = part->chctl;
+	if (chctl.all_flags != 0)
+		part->chctl.all_flags = 0;
+
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+	return chctl.all_flags;
+}
+
+static enum xp_retval
+xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+	struct xpc_send_msg_slot_uv *msg_slot;
+	unsigned long irq_flags;
+	int nentries;
+	int entry;
+	size_t nbytes;
+
+	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
+		nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
+		ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_uv->send_msg_slots == NULL)
+			continue;
+
+		for (entry = 0; entry < nentries; entry++) {
+			msg_slot = &ch_uv->send_msg_slots[entry];
+
+			msg_slot->msg_slot_number = entry;
+			xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
+					      &msg_slot->next);
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->local_nentries)
+			ch->local_nentries = nentries;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	return xpNoMemory;
+}
+
+static enum xp_retval
+xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+	struct xpc_notify_mq_msg_uv *msg_slot;
+	unsigned long irq_flags;
+	int nentries;
+	int entry;
+	size_t nbytes;
+
+	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
+		nbytes = nentries * ch->entry_size;
+		ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
+		if (ch_uv->recv_msg_slots == NULL)
+			continue;
+
+		for (entry = 0; entry < nentries; entry++) {
+			msg_slot = ch_uv->recv_msg_slots +
+			    entry * ch->entry_size;
+
+			msg_slot->hdr.msg_slot_number = entry;
+		}
+
+		spin_lock_irqsave(&ch->lock, irq_flags);
+		if (nentries < ch->remote_nentries)
+			ch->remote_nentries = nentries;
+		spin_unlock_irqrestore(&ch->lock, irq_flags);
+		return xpSuccess;
+	}
+
+	return xpNoMemory;
+}
+
+/*
+ * Allocate msg_slots associated with the channel.
+ */
+static enum xp_retval
+xpc_setup_msg_structures_uv(struct xpc_channel *ch)
+{
+	static enum xp_retval ret;
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	DBUG_ON(ch->flags & XPC_C_SETUP);
+
+	ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct
+						   gru_message_queue_desc),
+						   GFP_KERNEL);
+	if (ch_uv->cached_notify_gru_mq_desc == NULL)
+		return xpNoMemory;
+
+	ret = xpc_allocate_send_msg_slot_uv(ch);
+	if (ret == xpSuccess) {
+
+		ret = xpc_allocate_recv_msg_slot_uv(ch);
+		if (ret != xpSuccess) {
+			kfree(ch_uv->send_msg_slots);
+			xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		}
+	}
+	return ret;
+}
+
+/*
+ * Free up msg_slots and clear other stuff that were setup for the specified
+ * channel.
+ */
+static void
+xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	lockdep_assert_held(&ch->lock);
+
+	kfree(ch_uv->cached_notify_gru_mq_desc);
+	ch_uv->cached_notify_gru_mq_desc = NULL;
+
+	if (ch->flags & XPC_C_SETUP) {
+		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
+		kfree(ch_uv->send_msg_slots);
+		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
+		kfree(ch_uv->recv_msg_slots);
+	}
+}
+
+static void
+xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.reason = ch->reason;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_closereply_uv msg;
+
+	msg.ch_number = ch->number;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
+}
+
+static void
+xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.entry_size = ch->entry_size;
+	msg.local_nentries = ch->local_nentries;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
+}
+
+static void
+xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_openreply_uv msg;
+
+	msg.ch_number = ch->number;
+	msg.local_nentries = ch->local_nentries;
+	msg.remote_nentries = ch->remote_nentries;
+	msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc);
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
+}
+
+static void
+xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
+
+	msg.ch_number = ch->number;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
+}
+
+static void
+xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part->chctl_lock, irq_flags);
+	part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
+	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+	xpc_wakeup_channel_mgr(part);
+}
+
+static enum xp_retval
+xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
+			       unsigned long gru_mq_desc_gpa)
+{
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL);
+	return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc,
+					       gru_mq_desc_gpa);
+}
+
+static void
+xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
+}
+
+static void
+xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
+{
+	struct xpc_activate_mq_msg_uv msg;
+
+	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
+				      XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
+}
+
+static void
+xpc_assume_partition_disengaged_uv(short partid)
+{
+	struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+	part_uv->flags &= ~XPC_P_ENGAGED_UV;
+	spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+}
+
+static int
+xpc_partition_engaged_uv(short partid)
+{
+	return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
+}
+
+static int
+xpc_any_partition_engaged_uv(void)
+{
+	struct xpc_partition_uv *part_uv;
+	short partid;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+		if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
+			return 1;
+	}
+	return 0;
+}
+
+static enum xp_retval
+xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
+			 struct xpc_send_msg_slot_uv **address_of_msg_slot)
+{
+	enum xp_retval ret;
+	struct xpc_send_msg_slot_uv *msg_slot;
+	struct xpc_fifo_entry_uv *entry;
+
+	while (1) {
+		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
+		if (entry != NULL)
+			break;
+
+		if (flags & XPC_NOWAIT)
+			return xpNoWait;
+
+		ret = xpc_allocate_msg_wait(ch);
+		if (ret != xpInterrupted && ret != xpTimeout)
+			return ret;
+	}
+
+	msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
+	*address_of_msg_slot = msg_slot;
+	return xpSuccess;
+}
+
+static void
+xpc_free_msg_slot_uv(struct xpc_channel *ch,
+		     struct xpc_send_msg_slot_uv *msg_slot)
+{
+	xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
+
+	/* wakeup anyone waiting for a free msg slot */
+	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
+		wake_up(&ch->msg_allocate_wq);
+}
+
+static void
+xpc_notify_sender_uv(struct xpc_channel *ch,
+		     struct xpc_send_msg_slot_uv *msg_slot,
+		     enum xp_retval reason)
+{
+	xpc_notify_func func = msg_slot->func;
+
+	if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
+
+		atomic_dec(&ch->n_to_notify);
+
+		dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
+			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+			msg_slot->msg_slot_number, ch->partid, ch->number);
+
+		func(reason, ch->partid, ch->number, msg_slot->key);
+
+		dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
+			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
+			msg_slot->msg_slot_number, ch->partid, ch->number);
+	}
+}
+
+static void
+xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
+			    struct xpc_notify_mq_msg_uv *msg)
+{
+	struct xpc_send_msg_slot_uv *msg_slot;
+	int entry = msg->hdr.msg_slot_number % ch->local_nentries;
+
+	msg_slot = &ch->sn.uv.send_msg_slots[entry];
+
+	BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
+	msg_slot->msg_slot_number += ch->local_nentries;
+
+	if (msg_slot->func != NULL)
+		xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
+
+	xpc_free_msg_slot_uv(ch, msg_slot);
+}
+
+static void
+xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
+			    struct xpc_notify_mq_msg_uv *msg)
+{
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct xpc_channel *ch;
+	struct xpc_channel_uv *ch_uv;
+	struct xpc_notify_mq_msg_uv *msg_slot;
+	unsigned long irq_flags;
+	int ch_number = msg->hdr.ch_number;
+
+	if (unlikely(ch_number >= part->nchannels)) {
+		dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
+			"channel number=0x%x in message from partid=%d\n",
+			ch_number, XPC_PARTID(part));
+
+		/* get hb checker to deactivate from the remote partition */
+		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+		if (part_uv->act_state_req == 0)
+			xpc_activate_IRQ_rcvd++;
+		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
+		part_uv->reason = xpBadChannelNumber;
+		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
+
+		wake_up_interruptible(&xpc_activate_IRQ_wq);
+		return;
+	}
+
+	ch = &part->channels[ch_number];
+	xpc_msgqueue_ref(ch);
+
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/* see if we're really dealing with an ACK for a previously sent msg */
+	if (msg->hdr.size == 0) {
+		xpc_handle_notify_mq_ack_uv(ch, msg);
+		xpc_msgqueue_deref(ch);
+		return;
+	}
+
+	/* we're dealing with a normal message sent via the notify_mq */
+	ch_uv = &ch->sn.uv;
+
+	msg_slot = ch_uv->recv_msg_slots +
+	    (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size;
+
+	BUG_ON(msg_slot->hdr.size != 0);
+
+	memcpy(msg_slot, msg, msg->hdr.size);
+
+	xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
+
+	if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
+		/*
+		 * If there is an existing idle kthread get it to deliver
+		 * the payload, otherwise we'll have to get the channel mgr
+		 * for this partition to create a kthread to do the delivery.
+		 */
+		if (atomic_read(&ch->kthreads_idle) > 0)
+			wake_up_nr(&ch->idle_wq, 1);
+		else
+			xpc_send_chctl_local_msgrequest_uv(part, ch->number);
+	}
+	xpc_msgqueue_deref(ch);
+}
+
+static irqreturn_t
+xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
+{
+	struct xpc_notify_mq_msg_uv *msg;
+	short partid;
+	struct xpc_partition *part;
+
+	while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) !=
+	       NULL) {
+
+		partid = msg->hdr.partid;
+		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
+			dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
+				"invalid partid=0x%x in message\n", partid);
+		} else {
+			part = &xpc_partitions[partid];
+
+			if (xpc_part_ref(part)) {
+				xpc_handle_notify_mq_msg_uv(part, msg);
+				xpc_part_deref(part);
+			}
+		}
+
+		gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int
+xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
+{
+	return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
+}
+
+static void
+xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
+{
+	struct xpc_channel *ch = &part->channels[ch_number];
+	int ndeliverable_payloads;
+
+	xpc_msgqueue_ref(ch);
+
+	ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
+
+	if (ndeliverable_payloads > 0 &&
+	    (ch->flags & XPC_C_CONNECTED) &&
+	    (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
+
+		xpc_activate_kthreads(ch, ndeliverable_payloads);
+	}
+
+	xpc_msgqueue_deref(ch);
+}
+
+static enum xp_retval
+xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
+		    u16 payload_size, u8 notify_type, xpc_notify_func func,
+		    void *key)
+{
+	enum xp_retval ret = xpSuccess;
+	struct xpc_send_msg_slot_uv *msg_slot = NULL;
+	struct xpc_notify_mq_msg_uv *msg;
+	u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
+	size_t msg_size;
+
+	DBUG_ON(notify_type != XPC_N_CALL);
+
+	msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
+	if (msg_size > ch->entry_size)
+		return xpPayloadTooBig;
+
+	xpc_msgqueue_ref(ch);
+
+	if (ch->flags & XPC_C_DISCONNECTING) {
+		ret = ch->reason;
+		goto out_1;
+	}
+	if (!(ch->flags & XPC_C_CONNECTED)) {
+		ret = xpNotConnected;
+		goto out_1;
+	}
+
+	ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
+	if (ret != xpSuccess)
+		goto out_1;
+
+	if (func != NULL) {
+		atomic_inc(&ch->n_to_notify);
+
+		msg_slot->key = key;
+		smp_wmb(); /* a non-NULL func must hit memory after the key */
+		msg_slot->func = func;
+
+		if (ch->flags & XPC_C_DISCONNECTING) {
+			ret = ch->reason;
+			goto out_2;
+		}
+	}
+
+	msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
+	msg->hdr.partid = xp_partition_id;
+	msg->hdr.ch_number = ch->number;
+	msg->hdr.size = msg_size;
+	msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
+	memcpy(&msg->payload, payload, payload_size);
+
+	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
+			       msg_size);
+	if (ret == xpSuccess)
+		goto out_1;
+
+	XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+out_2:
+	if (func != NULL) {
+		/*
+		 * Try to NULL the msg_slot's func field. If we fail, then
+		 * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
+		 * case we need to pretend we succeeded to send the message
+		 * since the user will get a callout for the disconnect error
+		 * by xpc_notify_senders_of_disconnect_uv(), and to also get an
+		 * error returned here will confuse them. Additionally, since
+		 * in this case the channel is being disconnected we don't need
+		 * to put the msg_slot back on the free list.
+		 */
+		if (cmpxchg(&msg_slot->func, func, NULL) != func) {
+			ret = xpSuccess;
+			goto out_1;
+		}
+
+		msg_slot->key = NULL;
+		atomic_dec(&ch->n_to_notify);
+	}
+	xpc_free_msg_slot_uv(ch, msg_slot);
+out_1:
+	xpc_msgqueue_deref(ch);
+	return ret;
+}
+
+/*
+ * Tell the callers of xpc_send_notify() that the status of their payloads
+ * is unknown because the channel is now disconnecting.
+ *
+ * We don't worry about putting these msg_slots on the free list since the
+ * msg_slots themselves are about to be kfree'd.
+ */
+static void
+xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
+{
+	struct xpc_send_msg_slot_uv *msg_slot;
+	int entry;
+
+	DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
+
+	for (entry = 0; entry < ch->local_nentries; entry++) {
+
+		if (atomic_read(&ch->n_to_notify) == 0)
+			break;
+
+		msg_slot = &ch->sn.uv.send_msg_slots[entry];
+		if (msg_slot->func != NULL)
+			xpc_notify_sender_uv(ch, msg_slot, ch->reason);
+	}
+}
+
+/*
+ * Get the next deliverable message's payload.
+ */
+static void *
+xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
+{
+	struct xpc_fifo_entry_uv *entry;
+	struct xpc_notify_mq_msg_uv *msg;
+	void *payload = NULL;
+
+	if (!(ch->flags & XPC_C_DISCONNECTING)) {
+		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
+		if (entry != NULL) {
+			msg = container_of(entry, struct xpc_notify_mq_msg_uv,
+					   hdr.u.next);
+			payload = &msg->payload;
+		}
+	}
+	return payload;
+}
+
+static void
+xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
+{
+	struct xpc_notify_mq_msg_uv *msg;
+	enum xp_retval ret;
+
+	msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
+
+	/* return an ACK to the sender of this message */
+
+	msg->hdr.partid = xp_partition_id;
+	msg->hdr.size = 0;	/* size of zero indicates this is an ACK */
+
+	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
+			       sizeof(struct xpc_notify_mq_msghdr_uv));
+	if (ret != xpSuccess)
+		XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
+}
+
+static const struct xpc_arch_operations xpc_arch_ops_uv = {
+	.setup_partitions = xpc_setup_partitions_uv,
+	.teardown_partitions = xpc_teardown_partitions_uv,
+	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
+	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
+	.setup_rsvd_page = xpc_setup_rsvd_page_uv,
+
+	.allow_hb = xpc_allow_hb_uv,
+	.disallow_hb = xpc_disallow_hb_uv,
+	.disallow_all_hbs = xpc_disallow_all_hbs_uv,
+	.increment_heartbeat = xpc_increment_heartbeat_uv,
+	.offline_heartbeat = xpc_offline_heartbeat_uv,
+	.online_heartbeat = xpc_online_heartbeat_uv,
+	.heartbeat_init = xpc_heartbeat_init_uv,
+	.heartbeat_exit = xpc_heartbeat_exit_uv,
+	.get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
+
+	.request_partition_activation =
+		xpc_request_partition_activation_uv,
+	.request_partition_reactivation =
+		xpc_request_partition_reactivation_uv,
+	.request_partition_deactivation =
+		xpc_request_partition_deactivation_uv,
+	.cancel_partition_deactivation_request =
+		xpc_cancel_partition_deactivation_request_uv,
+
+	.setup_ch_structures = xpc_setup_ch_structures_uv,
+	.teardown_ch_structures = xpc_teardown_ch_structures_uv,
+
+	.make_first_contact = xpc_make_first_contact_uv,
+
+	.get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
+	.send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
+	.send_chctl_closereply = xpc_send_chctl_closereply_uv,
+	.send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
+	.send_chctl_openreply = xpc_send_chctl_openreply_uv,
+	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
+	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
+
+	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
+
+	.setup_msg_structures = xpc_setup_msg_structures_uv,
+	.teardown_msg_structures = xpc_teardown_msg_structures_uv,
+
+	.indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
+	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
+	.assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
+	.partition_engaged = xpc_partition_engaged_uv,
+	.any_partition_engaged = xpc_any_partition_engaged_uv,
+
+	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
+	.send_payload = xpc_send_payload_uv,
+	.get_deliverable_payload = xpc_get_deliverable_payload_uv,
+	.received_payload = xpc_received_payload_uv,
+	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
+};
+
+static int
+xpc_init_mq_node(int nid)
+{
+	int cpu;
+
+	cpus_read_lock();
+
+	for_each_cpu(cpu, cpumask_of_node(nid)) {
+		xpc_activate_mq_uv =
+			xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid,
+					     XPC_ACTIVATE_IRQ_NAME,
+					     xpc_handle_activate_IRQ_uv);
+		if (!IS_ERR(xpc_activate_mq_uv))
+			break;
+	}
+	if (IS_ERR(xpc_activate_mq_uv)) {
+		cpus_read_unlock();
+		return PTR_ERR(xpc_activate_mq_uv);
+	}
+
+	for_each_cpu(cpu, cpumask_of_node(nid)) {
+		xpc_notify_mq_uv =
+			xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid,
+					     XPC_NOTIFY_IRQ_NAME,
+					     xpc_handle_notify_IRQ_uv);
+		if (!IS_ERR(xpc_notify_mq_uv))
+			break;
+	}
+	if (IS_ERR(xpc_notify_mq_uv)) {
+		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
+		cpus_read_unlock();
+		return PTR_ERR(xpc_notify_mq_uv);
+	}
+
+	cpus_read_unlock();
+	return 0;
+}
+
+int
+xpc_init_uv(void)
+{
+	int nid;
+	int ret = 0;
+
+	xpc_arch_ops = xpc_arch_ops_uv;
+
+	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
+		dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
+			XPC_MSG_HDR_MAX_SIZE);
+		return -E2BIG;
+	}
+
+	if (xpc_mq_node < 0)
+		for_each_online_node(nid) {
+			ret = xpc_init_mq_node(nid);
+
+			if (!ret)
+				break;
+		}
+	else
+		ret = xpc_init_mq_node(xpc_mq_node);
+
+	if (ret < 0)
+		dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n",
+			-ret);
+
+	return ret;
+}
+
+void
+xpc_exit_uv(void)
+{
+	xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
+	xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
+}
+
+module_param(xpc_mq_node, int, 0);
+MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues.");
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
new file mode 100644
index 0000000000..2396ba3b03
--- /dev/null
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -0,0 +1,599 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (C) 1999-2009 Silicon Graphics, Inc. All rights reserved.
+ */
+
+/*
+ * Cross Partition Network Interface (XPNET) support
+ *
+ *	XPNET provides a virtual network layered on top of the Cross
+ *	Partition communication layer.
+ *
+ *	XPNET provides direct point-to-point and broadcast-like support
+ *	for an ethernet-like device.  The ethernet broadcast medium is
+ *	replaced with a point-to-point message structure which passes
+ *	pointers to a DMA-capable block that a remote partition should
+ *	retrieve and pass to the upper level networking layer.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include "xp.h"
+
+/*
+ * The message payload transferred by XPC.
+ *
+ * buf_pa is the physical address where the DMA should pull from.
+ *
+ * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
+ * cacheline boundary.  To accomplish this, we record the number of
+ * bytes from the beginning of the first cacheline to the first useful
+ * byte of the skb (leadin_ignore) and the number of bytes from the
+ * last useful byte of the skb to the end of the last cacheline
+ * (tailout_ignore).
+ *
+ * size is the number of bytes to transfer which includes the skb->len
+ * (useful bytes of the senders skb) plus the leadin and tailout
+ */
+struct xpnet_message {
+	u16 version;		/* Version for this message */
+	u16 embedded_bytes;	/* #of bytes embedded in XPC message */
+	u32 magic;		/* Special number indicating this is xpnet */
+	unsigned long buf_pa;	/* phys address of buffer to retrieve */
+	u32 size;		/* #of bytes in buffer */
+	u8 leadin_ignore;	/* #of bytes to ignore at the beginning */
+	u8 tailout_ignore;	/* #of bytes to ignore at the end */
+	unsigned char data;	/* body of small packets */
+};
+
+/*
+ * Determine the size of our message, the cacheline aligned size,
+ * and then the number of message will request from XPC.
+ *
+ * XPC expects each message to exist in an individual cacheline.
+ */
+#define XPNET_MSG_SIZE		XPC_MSG_PAYLOAD_MAX_SIZE
+#define XPNET_MSG_DATA_MAX	\
+		(XPNET_MSG_SIZE - offsetof(struct xpnet_message, data))
+#define XPNET_MSG_NENTRIES	(PAGE_SIZE / XPC_MSG_MAX_SIZE)
+
+#define XPNET_MAX_KTHREADS	(XPNET_MSG_NENTRIES + 1)
+#define XPNET_MAX_IDLE_KTHREADS	(XPNET_MSG_NENTRIES + 1)
+
+/*
+ * Version number of XPNET implementation. XPNET can always talk to versions
+ * with same major #, and never talk to versions with a different version.
+ */
+#define _XPNET_VERSION(_major, _minor)	(((_major) << 4) | (_minor))
+#define XPNET_VERSION_MAJOR(_v)		((_v) >> 4)
+#define XPNET_VERSION_MINOR(_v)		((_v) & 0xf)
+
+#define	XPNET_VERSION _XPNET_VERSION(1, 0)	/* version 1.0 */
+#define	XPNET_VERSION_EMBED _XPNET_VERSION(1, 1)	/* version 1.1 */
+#define XPNET_MAGIC	0x88786984	/* "XNET" */
+
+#define XPNET_VALID_MSG(_m)						     \
+   ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
+    && (msg->magic == XPNET_MAGIC))
+
+#define XPNET_DEVICE_NAME		"xp0"
+
+/*
+ * When messages are queued with xpc_send_notify, a kmalloc'd buffer
+ * of the following type is passed as a notification cookie.  When the
+ * notification function is called, we use the cookie to decide
+ * whether all outstanding message sends have completed.  The skb can
+ * then be released.
+ */
+struct xpnet_pending_msg {
+	struct sk_buff *skb;
+	atomic_t use_count;
+};
+
+static struct net_device *xpnet_device;
+
+/*
+ * When we are notified of other partitions activating, we add them to
+ * our bitmask of partitions to which we broadcast.
+ */
+static unsigned long *xpnet_broadcast_partitions;
+/* protect above */
+static DEFINE_SPINLOCK(xpnet_broadcast_lock);
+
+/*
+ * Since the Block Transfer Engine (BTE) is being used for the transfer
+ * and it relies upon cache-line size transfers, we need to reserve at
+ * least one cache-line for head and tail alignment.  The BTE is
+ * limited to 8MB transfers.
+ *
+ * Testing has shown that changing MTU to greater than 64KB has no effect
+ * on TCP as the two sides negotiate a Max Segment Size that is limited
+ * to 64K.  Other protocols May use packets greater than this, but for
+ * now, the default is 64KB.
+ */
+#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
+/* 68 comes from min TCP+IP+MAC header */
+#define XPNET_MIN_MTU 68
+/* 32KB has been determined to be the ideal */
+#define XPNET_DEF_MTU (0x8000UL)
+
+/*
+ * The partid is encapsulated in the MAC address beginning in the following
+ * octet and it consists of two octets.
+ */
+#define XPNET_PARTID_OCTET	2
+
+/* Define the XPNET debug device structures to be used with dev_dbg() et al */
+
+static struct device_driver xpnet_dbg_name = {
+	.name = "xpnet"
+};
+
+static struct device xpnet_dbg_subname = {
+	.init_name = "",	/* set to "" */
+	.driver = &xpnet_dbg_name
+};
+
+static struct device *xpnet = &xpnet_dbg_subname;
+
+/*
+ * Packet was recevied by XPC and forwarded to us.
+ */
+static void
+xpnet_receive(short partid, int channel, struct xpnet_message *msg)
+{
+	struct sk_buff *skb;
+	void *dst;
+	enum xp_retval ret;
+
+	if (!XPNET_VALID_MSG(msg)) {
+		/*
+		 * Packet with a different XPC version.  Ignore.
+		 */
+		xpc_received(partid, channel, (void *)msg);
+
+		xpnet_device->stats.rx_errors++;
+
+		return;
+	}
+	dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
+		msg->leadin_ignore, msg->tailout_ignore);
+
+	/* reserve an extra cache line */
+	skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
+	if (!skb) {
+		dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
+			msg->size + L1_CACHE_BYTES);
+
+		xpc_received(partid, channel, (void *)msg);
+
+		xpnet_device->stats.rx_errors++;
+
+		return;
+	}
+
+	/*
+	 * The allocated skb has some reserved space.
+	 * In order to use xp_remote_memcpy(), we need to get the
+	 * skb->data pointer moved forward.
+	 */
+	skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
+					    (L1_CACHE_BYTES - 1)) +
+			  msg->leadin_ignore));
+
+	/*
+	 * Update the tail pointer to indicate data actually
+	 * transferred.
+	 */
+	skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
+
+	/*
+	 * Move the data over from the other side.
+	 */
+	if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
+	    (msg->embedded_bytes != 0)) {
+		dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
+			"%lu)\n", skb->data, &msg->data,
+			(size_t)msg->embedded_bytes);
+
+		skb_copy_to_linear_data(skb, &msg->data,
+					(size_t)msg->embedded_bytes);
+	} else {
+		dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1));
+		dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
+			"xp_remote_memcpy(0x%p, 0x%p, %u)\n", dst,
+					  (void *)msg->buf_pa, msg->size);
+
+		ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size);
+		if (ret != xpSuccess) {
+			/*
+			 * !!! Need better way of cleaning skb.  Currently skb
+			 * !!! appears in_use and we can't just call
+			 * !!! dev_kfree_skb.
+			 */
+			dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%x) "
+				"returned error=0x%x\n", dst,
+				(void *)msg->buf_pa, msg->size, ret);
+
+			xpc_received(partid, channel, (void *)msg);
+
+			xpnet_device->stats.rx_errors++;
+
+			return;
+		}
+	}
+
+	dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+		"skb->end=0x%p skb->len=%d\n", (void *)skb->head,
+		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
+		skb->len);
+
+	skb->protocol = eth_type_trans(skb, xpnet_device);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	dev_dbg(xpnet, "passing skb to network layer\n"
+		"\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+		"skb->end=0x%p skb->len=%d\n",
+		(void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
+		skb_end_pointer(skb), skb->len);
+
+	xpnet_device->stats.rx_packets++;
+	xpnet_device->stats.rx_bytes += skb->len + ETH_HLEN;
+
+	netif_rx(skb);
+	xpc_received(partid, channel, (void *)msg);
+}
+
+/*
+ * This is the handler which XPC calls during any sort of change in
+ * state or message reception on a connection.
+ */
+static void
+xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
+			  void *data, void *key)
+{
+	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
+	DBUG_ON(channel != XPC_NET_CHANNEL);
+
+	switch (reason) {
+	case xpMsgReceived:	/* message received */
+		DBUG_ON(data == NULL);
+
+		xpnet_receive(partid, channel, (struct xpnet_message *)data);
+		break;
+
+	case xpConnected:	/* connection completed to a partition */
+		spin_lock_bh(&xpnet_broadcast_lock);
+		__set_bit(partid, xpnet_broadcast_partitions);
+		spin_unlock_bh(&xpnet_broadcast_lock);
+
+		netif_carrier_on(xpnet_device);
+
+		dev_dbg(xpnet, "%s connected to partition %d\n",
+			xpnet_device->name, partid);
+		break;
+
+	default:
+		spin_lock_bh(&xpnet_broadcast_lock);
+		__clear_bit(partid, xpnet_broadcast_partitions);
+		spin_unlock_bh(&xpnet_broadcast_lock);
+
+		if (bitmap_empty(xpnet_broadcast_partitions,
+				 xp_max_npartitions)) {
+			netif_carrier_off(xpnet_device);
+		}
+
+		dev_dbg(xpnet, "%s disconnected from partition %d\n",
+			xpnet_device->name, partid);
+		break;
+	}
+}
+
+static int
+xpnet_dev_open(struct net_device *dev)
+{
+	enum xp_retval ret;
+
+	dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
+		"%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
+		(unsigned long)XPNET_MSG_SIZE,
+		(unsigned long)XPNET_MSG_NENTRIES,
+		(unsigned long)XPNET_MAX_KTHREADS,
+		(unsigned long)XPNET_MAX_IDLE_KTHREADS);
+
+	ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
+			  XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
+			  XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
+	if (ret != xpSuccess) {
+		dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
+			"ret=%d\n", dev->name, ret);
+
+		return -ENOMEM;
+	}
+
+	dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
+
+	return 0;
+}
+
+static int
+xpnet_dev_stop(struct net_device *dev)
+{
+	xpc_disconnect(XPC_NET_CHANNEL);
+
+	dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
+
+	return 0;
+}
+
+/*
+ * Notification that the other end has received the message and
+ * DMA'd the skb information.  At this point, they are done with
+ * our side.  When all recipients are done processing, we
+ * release the skb and then release our pending message structure.
+ */
+static void
+xpnet_send_completed(enum xp_retval reason, short partid, int channel,
+		     void *__qm)
+{
+	struct xpnet_pending_msg *queued_msg = (struct xpnet_pending_msg *)__qm;
+
+	DBUG_ON(queued_msg == NULL);
+
+	dev_dbg(xpnet, "message to %d notified with reason %d\n",
+		partid, reason);
+
+	if (atomic_dec_return(&queued_msg->use_count) == 0) {
+		dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
+			(void *)queued_msg->skb->head);
+
+		dev_kfree_skb_any(queued_msg->skb);
+		kfree(queued_msg);
+	}
+}
+
+static void
+xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
+	   u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid)
+{
+	u8 msg_buffer[XPNET_MSG_SIZE];
+	struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer;
+	u16 msg_size = sizeof(struct xpnet_message);
+	enum xp_retval ret;
+
+	msg->embedded_bytes = embedded_bytes;
+	if (unlikely(embedded_bytes != 0)) {
+		msg->version = XPNET_VERSION_EMBED;
+		dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
+			&msg->data, skb->data, (size_t)embedded_bytes);
+		skb_copy_from_linear_data(skb, &msg->data,
+					  (size_t)embedded_bytes);
+		msg_size += embedded_bytes - 1;
+	} else {
+		msg->version = XPNET_VERSION;
+	}
+	msg->magic = XPNET_MAGIC;
+	msg->size = end_addr - start_addr;
+	msg->leadin_ignore = (u64)skb->data - start_addr;
+	msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
+	msg->buf_pa = xp_pa((void *)start_addr);
+
+	dev_dbg(xpnet, "sending XPC message to %d:%d\n"
+		"msg->buf_pa=0x%lx, msg->size=%u, "
+		"msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
+		dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
+		msg->leadin_ignore, msg->tailout_ignore);
+
+	atomic_inc(&queued_msg->use_count);
+
+	ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg,
+			      msg_size, xpnet_send_completed, queued_msg);
+	if (unlikely(ret != xpSuccess))
+		atomic_dec(&queued_msg->use_count);
+}
+
+/*
+ * Network layer has formatted a packet (skb) and is ready to place it
+ * "on the wire".  Prepare and send an xpnet_message to all partitions
+ * which have connected with us and are targets of this packet.
+ *
+ * MAC-NOTE:  For the XPNET driver, the MAC address contains the
+ * destination partid.  If the destination partid octets are 0xffff,
+ * this packet is to be broadcast to all connected partitions.
+ */
+static netdev_tx_t
+xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xpnet_pending_msg *queued_msg;
+	u64 start_addr, end_addr;
+	short dest_partid;
+	u16 embedded_bytes = 0;
+
+	dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+		"skb->end=0x%p skb->len=%d\n", (void *)skb->head,
+		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
+		skb->len);
+
+	if (skb->data[0] == 0x33) {
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;	/* nothing needed to be done */
+	}
+
+	/*
+	 * The xpnet_pending_msg tracks how many outstanding
+	 * xpc_send_notifies are relying on this skb.  When none
+	 * remain, release the skb.
+	 */
+	queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
+	if (queued_msg == NULL) {
+		dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
+			 "packet\n", sizeof(struct xpnet_pending_msg));
+
+		dev->stats.tx_errors++;
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* get the beginning of the first cacheline and end of last */
+	start_addr = ((u64)skb->data & ~(L1_CACHE_BYTES - 1));
+	end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
+
+	/* calculate how many bytes to embed in the XPC message */
+	if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
+		/* skb->data does fit so embed */
+		embedded_bytes = skb->len;
+	}
+
+	/*
+	 * Since the send occurs asynchronously, we set the count to one
+	 * and begin sending.  Any sends that happen to complete before
+	 * we are done sending will not free the skb.  We will be left
+	 * with that task during exit.  This also handles the case of
+	 * a packet destined for a partition which is no longer up.
+	 */
+	atomic_set(&queued_msg->use_count, 1);
+	queued_msg->skb = skb;
+
+	if (skb->data[0] == 0xff) {
+		/* we are being asked to broadcast to all partitions */
+		for_each_set_bit(dest_partid, xpnet_broadcast_partitions,
+			     xp_max_npartitions) {
+
+			xpnet_send(skb, queued_msg, start_addr, end_addr,
+				   embedded_bytes, dest_partid);
+		}
+	} else {
+		dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1];
+		dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8;
+
+		if (dest_partid >= 0 &&
+		    dest_partid < xp_max_npartitions &&
+		    test_bit(dest_partid, xpnet_broadcast_partitions) != 0) {
+
+			xpnet_send(skb, queued_msg, start_addr, end_addr,
+				   embedded_bytes, dest_partid);
+		}
+	}
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	if (atomic_dec_return(&queued_msg->use_count) == 0) {
+		dev_kfree_skb(skb);
+		kfree(queued_msg);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+/*
+ * Deal with transmit timeouts coming from the network layer.
+ */
+static void
+xpnet_dev_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+	dev->stats.tx_errors++;
+}
+
+static const struct net_device_ops xpnet_netdev_ops = {
+	.ndo_open		= xpnet_dev_open,
+	.ndo_stop		= xpnet_dev_stop,
+	.ndo_start_xmit		= xpnet_dev_hard_start_xmit,
+	.ndo_tx_timeout		= xpnet_dev_tx_timeout,
+	.ndo_set_mac_address 	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static int __init
+xpnet_init(void)
+{
+	u8 addr[ETH_ALEN];
+	int result;
+
+	if (!is_uv_system())
+		return -ENODEV;
+
+	dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
+
+	xpnet_broadcast_partitions = bitmap_zalloc(xp_max_npartitions,
+						   GFP_KERNEL);
+	if (xpnet_broadcast_partitions == NULL)
+		return -ENOMEM;
+
+	/*
+	 * use ether_setup() to init the majority of our device
+	 * structure and then override the necessary pieces.
+	 */
+	xpnet_device = alloc_netdev(0, XPNET_DEVICE_NAME, NET_NAME_UNKNOWN,
+				    ether_setup);
+	if (xpnet_device == NULL) {
+		bitmap_free(xpnet_broadcast_partitions);
+		return -ENOMEM;
+	}
+
+	netif_carrier_off(xpnet_device);
+
+	xpnet_device->netdev_ops = &xpnet_netdev_ops;
+	xpnet_device->mtu = XPNET_DEF_MTU;
+	xpnet_device->min_mtu = XPNET_MIN_MTU;
+	xpnet_device->max_mtu = XPNET_MAX_MTU;
+
+	memset(addr, 0, sizeof(addr));
+	/*
+	 * Multicast assumes the LSB of the first octet is set for multicast
+	 * MAC addresses.  We chose the first octet of the MAC to be unlikely
+	 * to collide with any vendor's officially issued MAC.
+	 */
+	addr[0] = 0x02;     /* locally administered, no OUI */
+
+	addr[XPNET_PARTID_OCTET + 1] = xp_partition_id;
+	addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8);
+	eth_hw_addr_set(xpnet_device, addr);
+
+	/*
+	 * ether_setup() sets this to a multicast device.  We are
+	 * really not supporting multicast at this time.
+	 */
+	xpnet_device->flags &= ~IFF_MULTICAST;
+
+	/*
+	 * No need to checksum as it is a DMA transfer.  The BTE will
+	 * report an error if the data is not retrievable and the
+	 * packet will be dropped.
+	 */
+	xpnet_device->features = NETIF_F_HW_CSUM;
+
+	result = register_netdev(xpnet_device);
+	if (result != 0) {
+		free_netdev(xpnet_device);
+		bitmap_free(xpnet_broadcast_partitions);
+	}
+
+	return result;
+}
+
+module_init(xpnet_init);
+
+static void __exit
+xpnet_exit(void)
+{
+	dev_info(xpnet, "unregistering network device %s\n",
+		 xpnet_device[0].name);
+
+	unregister_netdev(xpnet_device);
+	free_netdev(xpnet_device);
+	bitmap_free(xpnet_broadcast_partitions);
+}
+
+module_exit(xpnet_exit);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/smpro-errmon.c b/drivers/misc/smpro-errmon.c
new file mode 100644
index 0000000000..c12035a465
--- /dev/null
+++ b/drivers/misc/smpro-errmon.c
@@ -0,0 +1,610 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ampere Computing SoC's SMpro Error Monitoring Driver
+ *
+ * Copyright (c) 2022, Ampere Computing LLC
+ *
+ */
+
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+/* GPI RAS Error Registers */
+#define GPI_RAS_ERR		0x7E
+
+/* Core and L2C Error Registers */
+#define CORE_CE_ERR_CNT		0x80
+#define CORE_CE_ERR_LEN		0x81
+#define CORE_CE_ERR_DATA	0x82
+#define CORE_UE_ERR_CNT		0x83
+#define CORE_UE_ERR_LEN		0x84
+#define CORE_UE_ERR_DATA	0x85
+
+/* Memory Error Registers */
+#define MEM_CE_ERR_CNT		0x90
+#define MEM_CE_ERR_LEN		0x91
+#define MEM_CE_ERR_DATA		0x92
+#define MEM_UE_ERR_CNT		0x93
+#define MEM_UE_ERR_LEN		0x94
+#define MEM_UE_ERR_DATA		0x95
+
+/* RAS Error/Warning Registers */
+#define ERR_SMPRO_TYPE		0xA0
+#define ERR_PMPRO_TYPE		0xA1
+#define ERR_SMPRO_INFO_LO	0xA2
+#define ERR_SMPRO_INFO_HI	0xA3
+#define ERR_SMPRO_DATA_LO	0xA4
+#define ERR_SMPRO_DATA_HI	0xA5
+#define WARN_SMPRO_INFO_LO	0xAA
+#define WARN_SMPRO_INFO_HI	0xAB
+#define ERR_PMPRO_INFO_LO	0xA6
+#define ERR_PMPRO_INFO_HI	0xA7
+#define ERR_PMPRO_DATA_LO	0xA8
+#define ERR_PMPRO_DATA_HI	0xA9
+#define WARN_PMPRO_INFO_LO	0xAC
+#define WARN_PMPRO_INFO_HI	0xAD
+
+/* Boot Stage Register */
+#define BOOTSTAGE		0xB0
+#define DIMM_SYNDROME_SEL	0xB4
+#define DIMM_SYNDROME_ERR	0xB5
+#define DIMM_SYNDROME_STAGE	4
+
+/* PCIE Error Registers */
+#define PCIE_CE_ERR_CNT		0xC0
+#define PCIE_CE_ERR_LEN		0xC1
+#define PCIE_CE_ERR_DATA	0xC2
+#define PCIE_UE_ERR_CNT		0xC3
+#define PCIE_UE_ERR_LEN		0xC4
+#define PCIE_UE_ERR_DATA	0xC5
+
+/* Other Error Registers */
+#define OTHER_CE_ERR_CNT	0xD0
+#define OTHER_CE_ERR_LEN	0xD1
+#define OTHER_CE_ERR_DATA	0xD2
+#define OTHER_UE_ERR_CNT	0xD8
+#define OTHER_UE_ERR_LEN	0xD9
+#define OTHER_UE_ERR_DATA	0xDA
+
+/* Event Data Registers */
+#define VRD_WARN_FAULT_EVENT_DATA	0x78
+#define VRD_HOT_EVENT_DATA		0x79
+#define DIMM_HOT_EVENT_DATA		0x7A
+#define DIMM_2X_REFRESH_EVENT_DATA	0x96
+
+#define MAX_READ_BLOCK_LENGTH	48
+
+#define RAS_SMPRO_ERR		0
+#define RAS_PMPRO_ERR		1
+
+enum RAS_48BYTES_ERR_TYPES {
+	CORE_CE_ERR,
+	CORE_UE_ERR,
+	MEM_CE_ERR,
+	MEM_UE_ERR,
+	PCIE_CE_ERR,
+	PCIE_UE_ERR,
+	OTHER_CE_ERR,
+	OTHER_UE_ERR,
+	NUM_48BYTES_ERR_TYPE,
+};
+
+struct smpro_error_hdr {
+	u8 count;	/* Number of the RAS errors */
+	u8 len;		/* Number of data bytes */
+	u8 data;	/* Start of 48-byte data */
+	u8 max_cnt;	/* Max num of errors */
+};
+
+/*
+ * Included Address of registers to get Count, Length of data and Data
+ * of the 48 bytes error data
+ */
+static struct smpro_error_hdr smpro_error_table[] = {
+	[CORE_CE_ERR] = {
+		.count = CORE_CE_ERR_CNT,
+		.len = CORE_CE_ERR_LEN,
+		.data = CORE_CE_ERR_DATA,
+		.max_cnt = 32
+	},
+	[CORE_UE_ERR] = {
+		.count = CORE_UE_ERR_CNT,
+		.len = CORE_UE_ERR_LEN,
+		.data = CORE_UE_ERR_DATA,
+		.max_cnt = 32
+	},
+	[MEM_CE_ERR] = {
+		.count = MEM_CE_ERR_CNT,
+		.len = MEM_CE_ERR_LEN,
+		.data = MEM_CE_ERR_DATA,
+		.max_cnt = 16
+	},
+	[MEM_UE_ERR] = {
+		.count = MEM_UE_ERR_CNT,
+		.len = MEM_UE_ERR_LEN,
+		.data = MEM_UE_ERR_DATA,
+		.max_cnt = 16
+	},
+	[PCIE_CE_ERR] = {
+		.count = PCIE_CE_ERR_CNT,
+		.len = PCIE_CE_ERR_LEN,
+		.data = PCIE_CE_ERR_DATA,
+		.max_cnt = 96
+	},
+	[PCIE_UE_ERR] = {
+		.count = PCIE_UE_ERR_CNT,
+		.len = PCIE_UE_ERR_LEN,
+		.data = PCIE_UE_ERR_DATA,
+		.max_cnt = 96
+	},
+	[OTHER_CE_ERR] = {
+		.count = OTHER_CE_ERR_CNT,
+		.len = OTHER_CE_ERR_LEN,
+		.data = OTHER_CE_ERR_DATA,
+		.max_cnt = 8
+	},
+	[OTHER_UE_ERR] = {
+		.count = OTHER_UE_ERR_CNT,
+		.len = OTHER_UE_ERR_LEN,
+		.data = OTHER_UE_ERR_DATA,
+		.max_cnt = 8
+	},
+};
+
+/*
+ * List of SCP registers which are used to get
+ * one type of RAS Internal errors.
+ */
+struct smpro_int_error_hdr {
+	u8 type;
+	u8 info_l;
+	u8 info_h;
+	u8 data_l;
+	u8 data_h;
+	u8 warn_l;
+	u8 warn_h;
+};
+
+static struct smpro_int_error_hdr list_smpro_int_error_hdr[] = {
+	[RAS_SMPRO_ERR] = {
+		.type = ERR_SMPRO_TYPE,
+		.info_l = ERR_SMPRO_INFO_LO,
+		.info_h = ERR_SMPRO_INFO_HI,
+		.data_l = ERR_SMPRO_DATA_LO,
+		.data_h = ERR_SMPRO_DATA_HI,
+		.warn_l = WARN_SMPRO_INFO_LO,
+		.warn_h = WARN_SMPRO_INFO_HI,
+	},
+	[RAS_PMPRO_ERR] = {
+		.type = ERR_PMPRO_TYPE,
+		.info_l = ERR_PMPRO_INFO_LO,
+		.info_h = ERR_PMPRO_INFO_HI,
+		.data_l = ERR_PMPRO_DATA_LO,
+		.data_h = ERR_PMPRO_DATA_HI,
+		.warn_l = WARN_PMPRO_INFO_LO,
+		.warn_h = WARN_PMPRO_INFO_HI,
+	},
+};
+
+struct smpro_errmon {
+	struct regmap *regmap;
+};
+
+enum EVENT_TYPES {
+	VRD_WARN_FAULT_EVENT,
+	VRD_HOT_EVENT,
+	DIMM_HOT_EVENT,
+	DIMM_2X_REFRESH_EVENT,
+	NUM_EVENTS_TYPE,
+};
+
+/* Included Address of event source and data registers */
+static u8 smpro_event_table[NUM_EVENTS_TYPE] = {
+	VRD_WARN_FAULT_EVENT_DATA,
+	VRD_HOT_EVENT_DATA,
+	DIMM_HOT_EVENT_DATA,
+	DIMM_2X_REFRESH_EVENT_DATA,
+};
+
+static ssize_t smpro_event_data_read(struct device *dev,
+				     struct device_attribute *da, char *buf,
+				     int channel)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	s32 event_data;
+	int ret;
+
+	ret = regmap_read(errmon->regmap, smpro_event_table[channel], &event_data);
+	if (ret)
+		return ret;
+	/* Clear event after read */
+	if (event_data != 0)
+		regmap_write(errmon->regmap, smpro_event_table[channel], event_data);
+
+	return sysfs_emit(buf, "%04x\n", event_data);
+}
+
+static ssize_t smpro_overflow_data_read(struct device *dev, struct device_attribute *da,
+					char *buf, int channel)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	struct smpro_error_hdr *err_info;
+	s32 err_count;
+	int ret;
+
+	err_info = &smpro_error_table[channel];
+
+	ret = regmap_read(errmon->regmap, err_info->count, &err_count);
+	if (ret)
+		return ret;
+
+	/* Bit 8 indicates the overflow status */
+	return sysfs_emit(buf, "%d\n", (err_count & BIT(8)) ? 1 : 0);
+}
+
+static ssize_t smpro_error_data_read(struct device *dev, struct device_attribute *da,
+				     char *buf, int channel)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	unsigned char err_data[MAX_READ_BLOCK_LENGTH];
+	struct smpro_error_hdr *err_info;
+	s32 err_count, err_length;
+	int ret;
+
+	err_info = &smpro_error_table[channel];
+
+	ret = regmap_read(errmon->regmap, err_info->count, &err_count);
+	/* Error count is the low byte */
+	err_count &= 0xff;
+	if (ret || !err_count || err_count > err_info->max_cnt)
+		return ret;
+
+	ret = regmap_read(errmon->regmap, err_info->len, &err_length);
+	if (ret || err_length <= 0)
+		return ret;
+
+	if (err_length > MAX_READ_BLOCK_LENGTH)
+		err_length = MAX_READ_BLOCK_LENGTH;
+
+	memset(err_data, 0x00, MAX_READ_BLOCK_LENGTH);
+	ret = regmap_noinc_read(errmon->regmap, err_info->data, err_data, err_length);
+	if (ret < 0)
+		return ret;
+
+	/* clear the error */
+	ret = regmap_write(errmon->regmap, err_info->count, 0x100);
+	if (ret)
+		return ret;
+	/*
+	 * The output of Core/Memory/PCIe/Others UE/CE errors follows the format
+	 * specified in section 5.8.1 CE/UE Error Data record in
+	 * Altra SOC BMC Interface specification.
+	 */
+	return sysfs_emit(buf, "%*phN\n", MAX_READ_BLOCK_LENGTH, err_data);
+}
+
+/*
+ * Output format:
+ * <4-byte hex value of error info><4-byte hex value of error extensive data>
+ * Where:
+ *   + error info : The error information
+ *   + error data : Extensive data (32 bits)
+ * Reference to section 5.10 RAS Internal Error Register Definition in
+ * Altra SOC BMC Interface specification
+ */
+static ssize_t smpro_internal_err_read(struct device *dev, struct device_attribute *da,
+				       char *buf, int channel)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	struct smpro_int_error_hdr *err_info;
+	unsigned int err[4] = { 0 };
+	unsigned int err_type;
+	unsigned int val;
+	int ret;
+
+	/* read error status */
+	ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val);
+	if (ret)
+		return ret;
+
+	if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) ||
+	    (channel == RAS_PMPRO_ERR && !(val & BIT(1))))
+		return 0;
+
+	err_info = &list_smpro_int_error_hdr[channel];
+	ret = regmap_read(errmon->regmap, err_info->type, &val);
+	if (ret)
+		return ret;
+
+	err_type = (val & BIT(1)) ? BIT(1) :
+		   (val & BIT(2)) ? BIT(2) : 0;
+
+	if (!err_type)
+		return 0;
+
+	ret = regmap_read(errmon->regmap, err_info->info_l, err + 1);
+	if (ret)
+		return ret;
+
+	ret = regmap_read(errmon->regmap, err_info->info_h, err);
+	if (ret)
+		return ret;
+
+	if (err_type & BIT(2)) {
+		/* Error with data type */
+		ret = regmap_read(errmon->regmap, err_info->data_l, err + 3);
+		if (ret)
+			return ret;
+
+		ret = regmap_read(errmon->regmap, err_info->data_h, err + 2);
+		if (ret)
+			return ret;
+	}
+
+	/* clear the read errors */
+	ret = regmap_write(errmon->regmap, err_info->type, err_type);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%*phN\n", (int)sizeof(err), err);
+}
+
+/*
+ * Output format:
+ * <4-byte hex value of warining info>
+ * Reference to section 5.10 RAS Internal Error Register Definition in
+ * Altra SOC BMC Interface specification
+ */
+static ssize_t smpro_internal_warn_read(struct device *dev, struct device_attribute *da,
+					char *buf, int channel)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	struct smpro_int_error_hdr *err_info;
+	unsigned int warn[2] = { 0 };
+	unsigned int val;
+	int ret;
+
+	/* read error status */
+	ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val);
+	if (ret)
+		return ret;
+
+	if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) ||
+	    (channel == RAS_PMPRO_ERR && !(val & BIT(1))))
+		return 0;
+
+	err_info = &list_smpro_int_error_hdr[channel];
+	ret = regmap_read(errmon->regmap, err_info->type, &val);
+	if (ret)
+		return ret;
+
+	if (!(val & BIT(0)))
+		return 0;
+
+	ret = regmap_read(errmon->regmap, err_info->warn_l, warn + 1);
+	if (ret)
+		return ret;
+
+	ret = regmap_read(errmon->regmap, err_info->warn_h, warn);
+	if (ret)
+		return ret;
+
+	/* clear the warning */
+	ret = regmap_write(errmon->regmap, err_info->type, BIT(0));
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%*phN\n", (int)sizeof(warn), warn);
+}
+
+#define ERROR_OVERFLOW_RO(_error, _index) \
+	static ssize_t overflow_##_error##_show(struct device *dev,            \
+						struct device_attribute *da,   \
+						char *buf)                     \
+	{                                                                      \
+		return smpro_overflow_data_read(dev, da, buf, _index);         \
+	}                                                                      \
+	static DEVICE_ATTR_RO(overflow_##_error)
+
+ERROR_OVERFLOW_RO(core_ce, CORE_CE_ERR);
+ERROR_OVERFLOW_RO(core_ue, CORE_UE_ERR);
+ERROR_OVERFLOW_RO(mem_ce, MEM_CE_ERR);
+ERROR_OVERFLOW_RO(mem_ue, MEM_UE_ERR);
+ERROR_OVERFLOW_RO(pcie_ce, PCIE_CE_ERR);
+ERROR_OVERFLOW_RO(pcie_ue, PCIE_UE_ERR);
+ERROR_OVERFLOW_RO(other_ce, OTHER_CE_ERR);
+ERROR_OVERFLOW_RO(other_ue, OTHER_UE_ERR);
+
+#define ERROR_RO(_error, _index) \
+	static ssize_t error_##_error##_show(struct device *dev,            \
+					     struct device_attribute *da,   \
+					     char *buf)                     \
+	{                                                                   \
+		return smpro_error_data_read(dev, da, buf, _index);         \
+	}                                                                   \
+	static DEVICE_ATTR_RO(error_##_error)
+
+ERROR_RO(core_ce, CORE_CE_ERR);
+ERROR_RO(core_ue, CORE_UE_ERR);
+ERROR_RO(mem_ce, MEM_CE_ERR);
+ERROR_RO(mem_ue, MEM_UE_ERR);
+ERROR_RO(pcie_ce, PCIE_CE_ERR);
+ERROR_RO(pcie_ue, PCIE_UE_ERR);
+ERROR_RO(other_ce, OTHER_CE_ERR);
+ERROR_RO(other_ue, OTHER_UE_ERR);
+
+static ssize_t error_smpro_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	return smpro_internal_err_read(dev, da, buf, RAS_SMPRO_ERR);
+}
+static DEVICE_ATTR_RO(error_smpro);
+
+static ssize_t error_pmpro_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	return smpro_internal_err_read(dev, da, buf, RAS_PMPRO_ERR);
+}
+static DEVICE_ATTR_RO(error_pmpro);
+
+static ssize_t warn_smpro_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	return smpro_internal_warn_read(dev, da, buf, RAS_SMPRO_ERR);
+}
+static DEVICE_ATTR_RO(warn_smpro);
+
+static ssize_t warn_pmpro_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	return smpro_internal_warn_read(dev, da, buf, RAS_PMPRO_ERR);
+}
+static DEVICE_ATTR_RO(warn_pmpro);
+
+#define EVENT_RO(_event, _index) \
+	static ssize_t event_##_event##_show(struct device *dev,            \
+					     struct device_attribute *da,   \
+					     char *buf)                     \
+	{                                                                   \
+		return smpro_event_data_read(dev, da, buf, _index);         \
+	}                                                                   \
+	static DEVICE_ATTR_RO(event_##_event)
+
+EVENT_RO(vrd_warn_fault, VRD_WARN_FAULT_EVENT);
+EVENT_RO(vrd_hot, VRD_HOT_EVENT);
+EVENT_RO(dimm_hot, DIMM_HOT_EVENT);
+EVENT_RO(dimm_2x_refresh, DIMM_2X_REFRESH_EVENT);
+
+static ssize_t smpro_dimm_syndrome_read(struct device *dev, struct device_attribute *da,
+					char *buf, unsigned int slot)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	unsigned int data;
+	int ret;
+
+	ret = regmap_read(errmon->regmap, BOOTSTAGE, &data);
+	if (ret)
+		return ret;
+
+	/* check for valid stage */
+	data = (data >> 8) & 0xff;
+	if (data != DIMM_SYNDROME_STAGE)
+		return ret;
+
+	/* Write the slot ID to retrieve Error Syndrome */
+	ret = regmap_write(errmon->regmap, DIMM_SYNDROME_SEL, slot);
+	if (ret)
+		return ret;
+
+	/* Read the Syndrome error */
+	ret = regmap_read(errmon->regmap, DIMM_SYNDROME_ERR, &data);
+	if (ret || !data)
+		return ret;
+
+	return sysfs_emit(buf, "%04x\n", data);
+}
+
+#define EVENT_DIMM_SYNDROME(_slot) \
+	static ssize_t event_dimm##_slot##_syndrome_show(struct device *dev,          \
+							 struct device_attribute *da, \
+							 char *buf)                   \
+	{                                                                             \
+		return smpro_dimm_syndrome_read(dev, da, buf, _slot);                 \
+	}                                                                             \
+	static DEVICE_ATTR_RO(event_dimm##_slot##_syndrome)
+
+EVENT_DIMM_SYNDROME(0);
+EVENT_DIMM_SYNDROME(1);
+EVENT_DIMM_SYNDROME(2);
+EVENT_DIMM_SYNDROME(3);
+EVENT_DIMM_SYNDROME(4);
+EVENT_DIMM_SYNDROME(5);
+EVENT_DIMM_SYNDROME(6);
+EVENT_DIMM_SYNDROME(7);
+EVENT_DIMM_SYNDROME(8);
+EVENT_DIMM_SYNDROME(9);
+EVENT_DIMM_SYNDROME(10);
+EVENT_DIMM_SYNDROME(11);
+EVENT_DIMM_SYNDROME(12);
+EVENT_DIMM_SYNDROME(13);
+EVENT_DIMM_SYNDROME(14);
+EVENT_DIMM_SYNDROME(15);
+
+static struct attribute *smpro_errmon_attrs[] = {
+	&dev_attr_overflow_core_ce.attr,
+	&dev_attr_overflow_core_ue.attr,
+	&dev_attr_overflow_mem_ce.attr,
+	&dev_attr_overflow_mem_ue.attr,
+	&dev_attr_overflow_pcie_ce.attr,
+	&dev_attr_overflow_pcie_ue.attr,
+	&dev_attr_overflow_other_ce.attr,
+	&dev_attr_overflow_other_ue.attr,
+	&dev_attr_error_core_ce.attr,
+	&dev_attr_error_core_ue.attr,
+	&dev_attr_error_mem_ce.attr,
+	&dev_attr_error_mem_ue.attr,
+	&dev_attr_error_pcie_ce.attr,
+	&dev_attr_error_pcie_ue.attr,
+	&dev_attr_error_other_ce.attr,
+	&dev_attr_error_other_ue.attr,
+	&dev_attr_error_smpro.attr,
+	&dev_attr_error_pmpro.attr,
+	&dev_attr_warn_smpro.attr,
+	&dev_attr_warn_pmpro.attr,
+	&dev_attr_event_vrd_warn_fault.attr,
+	&dev_attr_event_vrd_hot.attr,
+	&dev_attr_event_dimm_hot.attr,
+	&dev_attr_event_dimm_2x_refresh.attr,
+	&dev_attr_event_dimm0_syndrome.attr,
+	&dev_attr_event_dimm1_syndrome.attr,
+	&dev_attr_event_dimm2_syndrome.attr,
+	&dev_attr_event_dimm3_syndrome.attr,
+	&dev_attr_event_dimm4_syndrome.attr,
+	&dev_attr_event_dimm5_syndrome.attr,
+	&dev_attr_event_dimm6_syndrome.attr,
+	&dev_attr_event_dimm7_syndrome.attr,
+	&dev_attr_event_dimm8_syndrome.attr,
+	&dev_attr_event_dimm9_syndrome.attr,
+	&dev_attr_event_dimm10_syndrome.attr,
+	&dev_attr_event_dimm11_syndrome.attr,
+	&dev_attr_event_dimm12_syndrome.attr,
+	&dev_attr_event_dimm13_syndrome.attr,
+	&dev_attr_event_dimm14_syndrome.attr,
+	&dev_attr_event_dimm15_syndrome.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(smpro_errmon);
+
+static int smpro_errmon_probe(struct platform_device *pdev)
+{
+	struct smpro_errmon *errmon;
+
+	errmon = devm_kzalloc(&pdev->dev, sizeof(struct smpro_errmon), GFP_KERNEL);
+	if (!errmon)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, errmon);
+
+	errmon->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!errmon->regmap)
+		return -ENODEV;
+
+	return 0;
+}
+
+static struct platform_driver smpro_errmon_driver = {
+	.probe          = smpro_errmon_probe,
+	.driver = {
+		.name   = "smpro-errmon",
+		.dev_groups = smpro_errmon_groups,
+	},
+};
+
+module_platform_driver(smpro_errmon_driver);
+
+MODULE_AUTHOR("Tung Nguyen <tung.nguyen@amperecomputing.com>");
+MODULE_AUTHOR("Thinh Pham <thinh.pham@amperecomputing.com>");
+MODULE_AUTHOR("Hoang Nguyen <hnguyen@amperecomputing.com>");
+MODULE_AUTHOR("Thu Nguyen <thu@os.amperecomputing.com>");
+MODULE_AUTHOR("Quan Nguyen <quan@os.amperecomputing.com>");
+MODULE_DESCRIPTION("Ampere Altra SMpro driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/smpro-misc.c b/drivers/misc/smpro-misc.c
new file mode 100644
index 0000000000..6c427141e5
--- /dev/null
+++ b/drivers/misc/smpro-misc.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ampere Computing SoC's SMpro Misc Driver
+ *
+ * Copyright (c) 2022, Ampere Computing LLC
+ */
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+/* Boot Stage/Progress Registers */
+#define BOOTSTAGE	0xB0
+#define BOOTSTAGE_LO	0xB1
+#define CUR_BOOTSTAGE	0xB2
+#define BOOTSTAGE_HI	0xB3
+
+/* SOC State Registers */
+#define SOC_POWER_LIMIT		0xE5
+
+struct smpro_misc {
+	struct regmap *regmap;
+};
+
+static ssize_t boot_progress_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	struct smpro_misc *misc = dev_get_drvdata(dev);
+	u16 boot_progress[3] = { 0 };
+	u32 bootstage;
+	u8 boot_stage;
+	u8 cur_stage;
+	u32 reg_lo;
+	u32 reg;
+	int ret;
+
+	/* Read current boot stage */
+	ret = regmap_read(misc->regmap, CUR_BOOTSTAGE, &reg);
+	if (ret)
+		return ret;
+
+	cur_stage = reg & 0xff;
+
+	ret = regmap_read(misc->regmap, BOOTSTAGE, &bootstage);
+	if (ret)
+		return ret;
+
+	boot_stage = (bootstage >> 8) & 0xff;
+
+	if (boot_stage > cur_stage)
+		return -EINVAL;
+
+	ret = regmap_read(misc->regmap,	BOOTSTAGE_LO, &reg_lo);
+	if (!ret)
+		ret = regmap_read(misc->regmap, BOOTSTAGE_HI, &reg);
+	if (ret)
+		return ret;
+
+	/* Firmware to report new boot stage next time */
+	if (boot_stage < cur_stage) {
+		ret = regmap_write(misc->regmap, BOOTSTAGE, ((bootstage & 0xff00) | 0x1));
+		if (ret)
+			return ret;
+	}
+
+	boot_progress[0] = bootstage;
+	boot_progress[1] = swab16(reg);
+	boot_progress[2] = swab16(reg_lo);
+
+	return sysfs_emit(buf, "%*phN\n", (int)sizeof(boot_progress), boot_progress);
+}
+
+static DEVICE_ATTR_RO(boot_progress);
+
+static ssize_t soc_power_limit_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	struct smpro_misc *misc = dev_get_drvdata(dev);
+	unsigned int value;
+	int ret;
+
+	ret = regmap_read(misc->regmap, SOC_POWER_LIMIT, &value);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", value);
+}
+
+static ssize_t soc_power_limit_store(struct device *dev, struct device_attribute *da,
+				     const char *buf, size_t count)
+{
+	struct smpro_misc *misc = dev_get_drvdata(dev);
+	unsigned long val;
+	s32 ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	ret = regmap_write(misc->regmap, SOC_POWER_LIMIT, (unsigned int)val);
+	if (ret)
+		return -EPROTO;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(soc_power_limit);
+
+static struct attribute *smpro_misc_attrs[] = {
+	&dev_attr_boot_progress.attr,
+	&dev_attr_soc_power_limit.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(smpro_misc);
+
+static int smpro_misc_probe(struct platform_device *pdev)
+{
+	struct smpro_misc *misc;
+
+	misc = devm_kzalloc(&pdev->dev, sizeof(struct smpro_misc), GFP_KERNEL);
+	if (!misc)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, misc);
+
+	misc->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!misc->regmap)
+		return -ENODEV;
+
+	return 0;
+}
+
+static struct platform_driver smpro_misc_driver = {
+	.probe		= smpro_misc_probe,
+	.driver = {
+		.name	= "smpro-misc",
+		.dev_groups = smpro_misc_groups,
+	},
+};
+
+module_platform_driver(smpro_misc_driver);
+
+MODULE_AUTHOR("Tung Nguyen <tungnguyen@os.amperecomputing.com>");
+MODULE_AUTHOR("Quan Nguyen <quan@os.amperecomputing.com>");
+MODULE_DESCRIPTION("Ampere Altra SMpro Misc driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c
new file mode 100644
index 0000000000..b71dbbd737
--- /dev/null
+++ b/drivers/misc/sram-exec.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SRAM protect-exec region helper functions
+ *
+ * Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
+ *	Dave Gerlach
+ */
+
+#include <linux/device.h>
+#include <linux/genalloc.h>
+#include <linux/mm.h>
+#include <linux/sram.h>
+#include <linux/set_memory.h>
+
+#include <asm/fncpy.h>
+
+#include "sram.h"
+
+static DEFINE_MUTEX(exec_pool_list_mutex);
+static LIST_HEAD(exec_pool_list);
+
+int sram_check_protect_exec(struct sram_dev *sram, struct sram_reserve *block,
+			    struct sram_partition *part)
+{
+	unsigned long base = (unsigned long)part->base;
+	unsigned long end = base + block->size;
+
+	if (!PAGE_ALIGNED(base) || !PAGE_ALIGNED(end)) {
+		dev_err(sram->dev,
+			"SRAM pool marked with 'protect-exec' is not page aligned and will not be created.\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int sram_add_protect_exec(struct sram_partition *part)
+{
+	mutex_lock(&exec_pool_list_mutex);
+	list_add_tail(&part->list, &exec_pool_list);
+	mutex_unlock(&exec_pool_list_mutex);
+
+	return 0;
+}
+
+/**
+ * sram_exec_copy - copy data to a protected executable region of sram
+ *
+ * @pool: struct gen_pool retrieved that is part of this sram
+ * @dst: Destination address for the copy, that must be inside pool
+ * @src: Source address for the data to copy
+ * @size: Size of copy to perform, which starting from dst, must reside in pool
+ *
+ * Return: Address for copied data that can safely be called through function
+ *	   pointer, or NULL if problem.
+ *
+ * This helper function allows sram driver to act as central control location
+ * of 'protect-exec' pools which are normal sram pools but are always set
+ * read-only and executable except when copying data to them, at which point
+ * they are set to read-write non-executable, to make sure no memory is
+ * writeable and executable at the same time. This region must be page-aligned
+ * and is checked during probe, otherwise page attribute manipulation would
+ * not be possible. Care must be taken to only call the returned address as
+ * dst address is not guaranteed to be safely callable.
+ *
+ * NOTE: This function uses the fncpy macro to move code to the executable
+ * region. Some architectures have strict requirements for relocating
+ * executable code, so fncpy is a macro that must be defined by any arch
+ * making use of this functionality that guarantees a safe copy of exec
+ * data and returns a safe address that can be called as a C function
+ * pointer.
+ */
+void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
+		     size_t size)
+{
+	struct sram_partition *part = NULL, *p;
+	unsigned long base;
+	int pages;
+	void *dst_cpy;
+	int ret;
+
+	mutex_lock(&exec_pool_list_mutex);
+	list_for_each_entry(p, &exec_pool_list, list) {
+		if (p->pool == pool)
+			part = p;
+	}
+	mutex_unlock(&exec_pool_list_mutex);
+
+	if (!part)
+		return NULL;
+
+	if (!gen_pool_has_addr(pool, (unsigned long)dst, size))
+		return NULL;
+
+	base = (unsigned long)part->base;
+	pages = PAGE_ALIGN(size) / PAGE_SIZE;
+
+	mutex_lock(&part->lock);
+
+	ret = set_memory_nx((unsigned long)base, pages);
+	if (ret)
+		goto error_out;
+	ret = set_memory_rw((unsigned long)base, pages);
+	if (ret)
+		goto error_out;
+
+	dst_cpy = fncpy(dst, src, size);
+
+	ret = set_memory_rox((unsigned long)base, pages);
+	if (ret)
+		goto error_out;
+
+	mutex_unlock(&part->lock);
+
+	return dst_cpy;
+
+error_out:
+	mutex_unlock(&part->lock);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(sram_exec_copy);
diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c
new file mode 100644
index 0000000000..e248c0a888
--- /dev/null
+++ b/drivers/misc/sram.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Generic on-chip SRAM allocation driver
+ *
+ * Copyright (C) 2012 Philipp Zabel, Pengutronix
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/genalloc.h>
+#include <linux/io.h>
+#include <linux/list_sort.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/mfd/syscon.h>
+#include <soc/at91/atmel-secumod.h>
+
+#include "sram.h"
+
+#define SRAM_GRANULARITY	32
+
+static ssize_t sram_read(struct file *filp, struct kobject *kobj,
+			 struct bin_attribute *attr,
+			 char *buf, loff_t pos, size_t count)
+{
+	struct sram_partition *part;
+
+	part = container_of(attr, struct sram_partition, battr);
+
+	mutex_lock(&part->lock);
+	memcpy_fromio(buf, part->base + pos, count);
+	mutex_unlock(&part->lock);
+
+	return count;
+}
+
+static ssize_t sram_write(struct file *filp, struct kobject *kobj,
+			  struct bin_attribute *attr,
+			  char *buf, loff_t pos, size_t count)
+{
+	struct sram_partition *part;
+
+	part = container_of(attr, struct sram_partition, battr);
+
+	mutex_lock(&part->lock);
+	memcpy_toio(part->base + pos, buf, count);
+	mutex_unlock(&part->lock);
+
+	return count;
+}
+
+static int sram_add_pool(struct sram_dev *sram, struct sram_reserve *block,
+			 phys_addr_t start, struct sram_partition *part)
+{
+	int ret;
+
+	part->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
+					  NUMA_NO_NODE, block->label);
+	if (IS_ERR(part->pool))
+		return PTR_ERR(part->pool);
+
+	ret = gen_pool_add_virt(part->pool, (unsigned long)part->base, start,
+				block->size, NUMA_NO_NODE);
+	if (ret < 0) {
+		dev_err(sram->dev, "failed to register subpool: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int sram_add_export(struct sram_dev *sram, struct sram_reserve *block,
+			   phys_addr_t start, struct sram_partition *part)
+{
+	sysfs_bin_attr_init(&part->battr);
+	part->battr.attr.name = devm_kasprintf(sram->dev, GFP_KERNEL,
+					       "%llx.sram",
+					       (unsigned long long)start);
+	if (!part->battr.attr.name)
+		return -ENOMEM;
+
+	part->battr.attr.mode = S_IRUSR | S_IWUSR;
+	part->battr.read = sram_read;
+	part->battr.write = sram_write;
+	part->battr.size = block->size;
+
+	return device_create_bin_file(sram->dev, &part->battr);
+}
+
+static int sram_add_partition(struct sram_dev *sram, struct sram_reserve *block,
+			      phys_addr_t start)
+{
+	int ret;
+	struct sram_partition *part = &sram->partition[sram->partitions];
+
+	mutex_init(&part->lock);
+
+	if (sram->config && sram->config->map_only_reserved) {
+		void __iomem *virt_base;
+
+		if (sram->no_memory_wc)
+			virt_base = devm_ioremap_resource(sram->dev, &block->res);
+		else
+			virt_base = devm_ioremap_resource_wc(sram->dev, &block->res);
+
+		if (IS_ERR(virt_base)) {
+			dev_err(sram->dev, "could not map SRAM at %pr\n", &block->res);
+			return PTR_ERR(virt_base);
+		}
+
+		part->base = virt_base;
+	} else {
+		part->base = sram->virt_base + block->start;
+	}
+
+	if (block->pool) {
+		ret = sram_add_pool(sram, block, start, part);
+		if (ret)
+			return ret;
+	}
+	if (block->export) {
+		ret = sram_add_export(sram, block, start, part);
+		if (ret)
+			return ret;
+	}
+	if (block->protect_exec) {
+		ret = sram_check_protect_exec(sram, block, part);
+		if (ret)
+			return ret;
+
+		ret = sram_add_pool(sram, block, start, part);
+		if (ret)
+			return ret;
+
+		sram_add_protect_exec(part);
+	}
+
+	sram->partitions++;
+
+	return 0;
+}
+
+static void sram_free_partitions(struct sram_dev *sram)
+{
+	struct sram_partition *part;
+
+	if (!sram->partitions)
+		return;
+
+	part = &sram->partition[sram->partitions - 1];
+	for (; sram->partitions; sram->partitions--, part--) {
+		if (part->battr.size)
+			device_remove_bin_file(sram->dev, &part->battr);
+
+		if (part->pool &&
+		    gen_pool_avail(part->pool) < gen_pool_size(part->pool))
+			dev_err(sram->dev, "removed pool while SRAM allocated\n");
+	}
+}
+
+static int sram_reserve_cmp(void *priv, const struct list_head *a,
+					const struct list_head *b)
+{
+	struct sram_reserve *ra = list_entry(a, struct sram_reserve, list);
+	struct sram_reserve *rb = list_entry(b, struct sram_reserve, list);
+
+	return ra->start - rb->start;
+}
+
+static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
+{
+	struct device_node *np = sram->dev->of_node, *child;
+	unsigned long size, cur_start, cur_size;
+	struct sram_reserve *rblocks, *block;
+	struct list_head reserve_list;
+	unsigned int nblocks, exports = 0;
+	const char *label;
+	int ret = 0;
+
+	INIT_LIST_HEAD(&reserve_list);
+
+	size = resource_size(res);
+
+	/*
+	 * We need an additional block to mark the end of the memory region
+	 * after the reserved blocks from the dt are processed.
+	 */
+	nblocks = (np) ? of_get_available_child_count(np) + 1 : 1;
+	rblocks = kcalloc(nblocks, sizeof(*rblocks), GFP_KERNEL);
+	if (!rblocks)
+		return -ENOMEM;
+
+	block = &rblocks[0];
+	for_each_available_child_of_node(np, child) {
+		struct resource child_res;
+
+		ret = of_address_to_resource(child, 0, &child_res);
+		if (ret < 0) {
+			dev_err(sram->dev,
+				"could not get address for node %pOF\n",
+				child);
+			goto err_chunks;
+		}
+
+		if (child_res.start < res->start || child_res.end > res->end) {
+			dev_err(sram->dev,
+				"reserved block %pOF outside the sram area\n",
+				child);
+			ret = -EINVAL;
+			goto err_chunks;
+		}
+
+		block->start = child_res.start - res->start;
+		block->size = resource_size(&child_res);
+		block->res = child_res;
+		list_add_tail(&block->list, &reserve_list);
+
+		block->export = of_property_read_bool(child, "export");
+		block->pool = of_property_read_bool(child, "pool");
+		block->protect_exec = of_property_read_bool(child, "protect-exec");
+
+		if ((block->export || block->pool || block->protect_exec) &&
+		    block->size) {
+			exports++;
+
+			label = NULL;
+			ret = of_property_read_string(child, "label", &label);
+			if (ret && ret != -EINVAL) {
+				dev_err(sram->dev,
+					"%pOF has invalid label name\n",
+					child);
+				goto err_chunks;
+			}
+			if (!label)
+				block->label = devm_kasprintf(sram->dev, GFP_KERNEL,
+							      "%s", of_node_full_name(child));
+			else
+				block->label = devm_kstrdup(sram->dev,
+							    label, GFP_KERNEL);
+			if (!block->label) {
+				ret = -ENOMEM;
+				goto err_chunks;
+			}
+
+			dev_dbg(sram->dev, "found %sblock '%s' 0x%x-0x%x\n",
+				block->export ? "exported " : "", block->label,
+				block->start, block->start + block->size);
+		} else {
+			dev_dbg(sram->dev, "found reserved block 0x%x-0x%x\n",
+				block->start, block->start + block->size);
+		}
+
+		block++;
+	}
+	child = NULL;
+
+	/* the last chunk marks the end of the region */
+	rblocks[nblocks - 1].start = size;
+	rblocks[nblocks - 1].size = 0;
+	list_add_tail(&rblocks[nblocks - 1].list, &reserve_list);
+
+	list_sort(NULL, &reserve_list, sram_reserve_cmp);
+
+	if (exports) {
+		sram->partition = devm_kcalloc(sram->dev,
+				       exports, sizeof(*sram->partition),
+				       GFP_KERNEL);
+		if (!sram->partition) {
+			ret = -ENOMEM;
+			goto err_chunks;
+		}
+	}
+
+	cur_start = 0;
+	list_for_each_entry(block, &reserve_list, list) {
+		/* can only happen if sections overlap */
+		if (block->start < cur_start) {
+			dev_err(sram->dev,
+				"block at 0x%x starts after current offset 0x%lx\n",
+				block->start, cur_start);
+			ret = -EINVAL;
+			sram_free_partitions(sram);
+			goto err_chunks;
+		}
+
+		if ((block->export || block->pool || block->protect_exec) &&
+		    block->size) {
+			ret = sram_add_partition(sram, block,
+						 res->start + block->start);
+			if (ret) {
+				sram_free_partitions(sram);
+				goto err_chunks;
+			}
+		}
+
+		/* current start is in a reserved block, so continue after it */
+		if (block->start == cur_start) {
+			cur_start = block->start + block->size;
+			continue;
+		}
+
+		/*
+		 * allocate the space between the current starting
+		 * address and the following reserved block, or the
+		 * end of the region.
+		 */
+		cur_size = block->start - cur_start;
+
+		if (sram->pool) {
+			dev_dbg(sram->dev, "adding chunk 0x%lx-0x%lx\n",
+				cur_start, cur_start + cur_size);
+
+			ret = gen_pool_add_virt(sram->pool,
+					(unsigned long)sram->virt_base + cur_start,
+					res->start + cur_start, cur_size, -1);
+			if (ret < 0) {
+				sram_free_partitions(sram);
+				goto err_chunks;
+			}
+		}
+
+		/* next allocation after this reserved block */
+		cur_start = block->start + block->size;
+	}
+
+err_chunks:
+	of_node_put(child);
+	kfree(rblocks);
+
+	return ret;
+}
+
+static int atmel_securam_wait(void)
+{
+	struct regmap *regmap;
+	u32 val;
+
+	regmap = syscon_regmap_lookup_by_compatible("atmel,sama5d2-secumod");
+	if (IS_ERR(regmap))
+		return -ENODEV;
+
+	return regmap_read_poll_timeout(regmap, AT91_SECUMOD_RAMRDY, val,
+					val & AT91_SECUMOD_RAMRDY_READY,
+					10000, 500000);
+}
+
+static const struct sram_config atmel_securam_config = {
+	.init = atmel_securam_wait,
+};
+
+/*
+ * SYSRAM contains areas that are not accessible by the
+ * kernel, such as the first 256K that is reserved for TZ.
+ * Accesses to those areas (including speculative accesses)
+ * trigger SErrors. As such we must map only the areas of
+ * SYSRAM specified in the device tree.
+ */
+static const struct sram_config tegra_sysram_config = {
+	.map_only_reserved = true,
+};
+
+static const struct of_device_id sram_dt_ids[] = {
+	{ .compatible = "mmio-sram" },
+	{ .compatible = "atmel,sama5d2-securam", .data = &atmel_securam_config },
+	{ .compatible = "nvidia,tegra186-sysram", .data = &tegra_sysram_config },
+	{ .compatible = "nvidia,tegra194-sysram", .data = &tegra_sysram_config },
+	{ .compatible = "nvidia,tegra234-sysram", .data = &tegra_sysram_config },
+	{}
+};
+
+static int sram_probe(struct platform_device *pdev)
+{
+	const struct sram_config *config;
+	struct sram_dev *sram;
+	int ret;
+	struct resource *res;
+	struct clk *clk;
+
+	config = of_device_get_match_data(&pdev->dev);
+
+	sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL);
+	if (!sram)
+		return -ENOMEM;
+
+	sram->dev = &pdev->dev;
+	sram->no_memory_wc = of_property_read_bool(pdev->dev.of_node, "no-memory-wc");
+	sram->config = config;
+
+	if (!config || !config->map_only_reserved) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (sram->no_memory_wc)
+			sram->virt_base = devm_ioremap_resource(&pdev->dev, res);
+		else
+			sram->virt_base = devm_ioremap_resource_wc(&pdev->dev, res);
+		if (IS_ERR(sram->virt_base)) {
+			dev_err(&pdev->dev, "could not map SRAM registers\n");
+			return PTR_ERR(sram->virt_base);
+		}
+
+		sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
+						  NUMA_NO_NODE, NULL);
+		if (IS_ERR(sram->pool))
+			return PTR_ERR(sram->pool);
+	}
+
+	clk = devm_clk_get_optional_enabled(sram->dev, NULL);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	ret = sram_reserve_regions(sram,
+			platform_get_resource(pdev, IORESOURCE_MEM, 0));
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, sram);
+
+	if (config && config->init) {
+		ret = config->init();
+		if (ret)
+			goto err_free_partitions;
+	}
+
+	if (sram->pool)
+		dev_dbg(sram->dev, "SRAM pool: %zu KiB @ 0x%p\n",
+			gen_pool_size(sram->pool) / 1024, sram->virt_base);
+
+	return 0;
+
+err_free_partitions:
+	sram_free_partitions(sram);
+
+	return ret;
+}
+
+static int sram_remove(struct platform_device *pdev)
+{
+	struct sram_dev *sram = platform_get_drvdata(pdev);
+
+	sram_free_partitions(sram);
+
+	if (sram->pool && gen_pool_avail(sram->pool) < gen_pool_size(sram->pool))
+		dev_err(sram->dev, "removed while SRAM allocated\n");
+
+	return 0;
+}
+
+static struct platform_driver sram_driver = {
+	.driver = {
+		.name = "sram",
+		.of_match_table = sram_dt_ids,
+	},
+	.probe = sram_probe,
+	.remove = sram_remove,
+};
+
+static int __init sram_init(void)
+{
+	return platform_driver_register(&sram_driver);
+}
+
+postcore_initcall(sram_init);
diff --git a/drivers/misc/sram.h b/drivers/misc/sram.h
new file mode 100644
index 0000000000..397205b8bf
--- /dev/null
+++ b/drivers/misc/sram.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Defines for the SRAM driver
+ */
+#ifndef __SRAM_H
+#define __SRAM_H
+
+struct sram_config {
+	int (*init)(void);
+	bool map_only_reserved;
+};
+
+struct sram_partition {
+	void __iomem *base;
+
+	struct gen_pool *pool;
+	struct bin_attribute battr;
+	struct mutex lock;
+	struct list_head list;
+};
+
+struct sram_dev {
+	const struct sram_config *config;
+
+	struct device *dev;
+	void __iomem *virt_base;
+	bool no_memory_wc;
+
+	struct gen_pool *pool;
+
+	struct sram_partition *partition;
+	u32 partitions;
+};
+
+struct sram_reserve {
+	struct list_head list;
+	u32 start;
+	u32 size;
+	struct resource res;
+	bool export;
+	bool pool;
+	bool protect_exec;
+	const char *label;
+};
+
+#ifdef CONFIG_SRAM_EXEC
+int sram_check_protect_exec(struct sram_dev *sram, struct sram_reserve *block,
+			    struct sram_partition *part);
+int sram_add_protect_exec(struct sram_partition *part);
+#else
+static inline int sram_check_protect_exec(struct sram_dev *sram,
+					  struct sram_reserve *block,
+					  struct sram_partition *part)
+{
+	return -ENODEV;
+}
+
+static inline int sram_add_protect_exec(struct sram_partition *part)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_SRAM_EXEC */
+#endif /* __SRAM_H */
diff --git a/drivers/misc/ti-st/Kconfig b/drivers/misc/ti-st/Kconfig
new file mode 100644
index 0000000000..1503a6496f
--- /dev/null
+++ b/drivers/misc/ti-st/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# TI's shared transport line discipline and the protocol
+# drivers (BT, FM and GPS)
+#
+menu "Texas Instruments shared transport line discipline"
+config TI_ST
+	tristate "Shared transport core driver"
+	depends on NET && TTY
+	depends on GPIOLIB || COMPILE_TEST
+	select FW_LOADER
+	help
+	  This enables the shared transport core driver for TI
+	  BT / FM and GPS combo chips. This enables protocol drivers
+	  to register themselves with core and send data, the responses
+	  are returned to relevant protocol drivers based on their
+	  packet types.
+
+endmenu
diff --git a/drivers/misc/ti-st/Makefile b/drivers/misc/ti-st/Makefile
new file mode 100644
index 0000000000..9339310095
--- /dev/null
+++ b/drivers/misc/ti-st/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for TI's shared transport line discipline
+# and its protocol drivers (BT, FM, GPS)
+#
+obj-$(CONFIG_TI_ST) 		+= st_drv.o
+st_drv-objs			:= st_core.o st_kim.o st_ll.o
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
new file mode 100644
index 0000000000..b878431553
--- /dev/null
+++ b/drivers/misc/ti-st/st_core.c
@@ -0,0 +1,918 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Shared Transport Line discipline driver Core
+ *	This hooks up ST KIM driver and ST LL driver
+ *  Copyright (C) 2009-2010 Texas Instruments
+ *  Author: Pavan Savoy <pavan_savoy@ti.com>
+ */
+
+#define pr_fmt(fmt)	"(stc): " fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/tty.h>
+
+#include <linux/seq_file.h>
+#include <linux/skbuff.h>
+
+#include <linux/ti_wilink_st.h>
+#include <linux/netdevice.h>
+
+/*
+ * function pointer pointing to either,
+ * st_kim_recv during registration to receive fw download responses
+ * st_int_recv after registration to receive proto stack responses
+ */
+static void (*st_recv)(void *disc_data, const u8 *ptr, size_t count);
+
+/********************************************************************/
+static void add_channel_to_table(struct st_data_s *st_gdata,
+		struct st_proto_s *new_proto)
+{
+	pr_info("%s: id %d\n", __func__, new_proto->chnl_id);
+	/* list now has the channel id as index itself */
+	st_gdata->list[new_proto->chnl_id] = new_proto;
+	st_gdata->is_registered[new_proto->chnl_id] = true;
+}
+
+static void remove_channel_from_table(struct st_data_s *st_gdata,
+		struct st_proto_s *proto)
+{
+	pr_info("%s: id %d\n", __func__, proto->chnl_id);
+/*	st_gdata->list[proto->chnl_id] = NULL; */
+	st_gdata->is_registered[proto->chnl_id] = false;
+}
+
+/*
+ * called from KIM during firmware download.
+ *
+ * This is a wrapper function to tty->ops->write_room.
+ * It returns number of free space available in
+ * uart tx buffer.
+ */
+int st_get_uart_wr_room(struct st_data_s *st_gdata)
+{
+	if (unlikely(st_gdata == NULL || st_gdata->tty == NULL)) {
+		pr_err("tty unavailable to perform write");
+		return -1;
+	}
+
+	return tty_write_room(st_gdata->tty);
+}
+
+/*
+ * can be called in from
+ * -- KIM (during fw download)
+ * -- ST Core (during st_write)
+ *
+ *  This is the internal write function - a wrapper
+ *  to tty->ops->write
+ */
+int st_int_write(struct st_data_s *st_gdata,
+	const unsigned char *data, int count)
+{
+	struct tty_struct *tty;
+	if (unlikely(st_gdata == NULL || st_gdata->tty == NULL)) {
+		pr_err("tty unavailable to perform write");
+		return -EINVAL;
+	}
+	tty = st_gdata->tty;
+#ifdef VERBOSE
+	print_hex_dump(KERN_DEBUG, "<out<", DUMP_PREFIX_NONE,
+		16, 1, data, count, 0);
+#endif
+	return tty->ops->write(tty, data, count);
+
+}
+
+/*
+ * push the skb received to relevant
+ * protocol stacks
+ */
+static void st_send_frame(unsigned char chnl_id, struct st_data_s *st_gdata)
+{
+	pr_debug(" %s(prot:%d) ", __func__, chnl_id);
+
+	if (unlikely
+	    (st_gdata == NULL || st_gdata->rx_skb == NULL
+	     || st_gdata->is_registered[chnl_id] == false)) {
+		pr_err("chnl_id %d not registered, no data to send?",
+			   chnl_id);
+		kfree_skb(st_gdata->rx_skb);
+		return;
+	}
+	/*
+	 * this cannot fail
+	 * this shouldn't take long
+	 * - should be just skb_queue_tail for the
+	 *   protocol stack driver
+	 */
+	if (likely(st_gdata->list[chnl_id]->recv != NULL)) {
+		if (unlikely
+			(st_gdata->list[chnl_id]->recv
+			(st_gdata->list[chnl_id]->priv_data, st_gdata->rx_skb)
+			     != 0)) {
+			pr_err(" proto stack %d's ->recv failed", chnl_id);
+			kfree_skb(st_gdata->rx_skb);
+			return;
+		}
+	} else {
+		pr_err(" proto stack %d's ->recv null", chnl_id);
+		kfree_skb(st_gdata->rx_skb);
+	}
+	return;
+}
+
+/*
+ * st_reg_complete - to call registration complete callbacks
+ * of all protocol stack drivers
+ * This function is being called with spin lock held, protocol drivers are
+ * only expected to complete their waits and do nothing more than that.
+ */
+static void st_reg_complete(struct st_data_s *st_gdata, int err)
+{
+	unsigned char i = 0;
+	pr_info(" %s ", __func__);
+	for (i = 0; i < ST_MAX_CHANNELS; i++) {
+		if (likely(st_gdata != NULL &&
+			st_gdata->is_registered[i] == true &&
+				st_gdata->list[i]->reg_complete_cb != NULL)) {
+			st_gdata->list[i]->reg_complete_cb
+				(st_gdata->list[i]->priv_data, err);
+			pr_info("protocol %d's cb sent %d\n", i, err);
+			if (err) { /* cleanup registered protocol */
+				st_gdata->is_registered[i] = false;
+				if (st_gdata->protos_registered)
+					st_gdata->protos_registered--;
+			}
+		}
+	}
+}
+
+static inline int st_check_data_len(struct st_data_s *st_gdata,
+	unsigned char chnl_id, int len)
+{
+	int room = skb_tailroom(st_gdata->rx_skb);
+
+	pr_debug("len %d room %d", len, room);
+
+	if (!len) {
+		/*
+		 * Received packet has only packet header and
+		 * has zero length payload. So, ask ST CORE to
+		 * forward the packet to protocol driver (BT/FM/GPS)
+		 */
+		st_send_frame(chnl_id, st_gdata);
+
+	} else if (len > room) {
+		/*
+		 * Received packet's payload length is larger.
+		 * We can't accommodate it in created skb.
+		 */
+		pr_err("Data length is too large len %d room %d", len,
+			   room);
+		kfree_skb(st_gdata->rx_skb);
+	} else {
+		/*
+		 * Packet header has non-zero payload length and
+		 * we have enough space in created skb. Lets read
+		 * payload data */
+		st_gdata->rx_state = ST_W4_DATA;
+		st_gdata->rx_count = len;
+		return len;
+	}
+
+	/* Change ST state to continue to process next packet */
+	st_gdata->rx_state = ST_W4_PACKET_TYPE;
+	st_gdata->rx_skb = NULL;
+	st_gdata->rx_count = 0;
+	st_gdata->rx_chnl = 0;
+
+	return 0;
+}
+
+/*
+ * st_wakeup_ack - internal function for action when wake-up ack
+ *	received
+ */
+static inline void st_wakeup_ack(struct st_data_s *st_gdata,
+	unsigned char cmd)
+{
+	struct sk_buff *waiting_skb;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&st_gdata->lock, flags);
+	/*
+	 * de-Q from waitQ and Q in txQ now that the
+	 * chip is awake
+	 */
+	while ((waiting_skb = skb_dequeue(&st_gdata->tx_waitq)))
+		skb_queue_tail(&st_gdata->txq, waiting_skb);
+
+	/* state forwarded to ST LL */
+	st_ll_sleep_state(st_gdata, (unsigned long)cmd);
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+	/* wake up to send the recently copied skbs from waitQ */
+	st_tx_wakeup(st_gdata);
+}
+
+/*
+ * st_int_recv - ST's internal receive function.
+ *	Decodes received RAW data and forwards to corresponding
+ *	client drivers (Bluetooth,FM,GPS..etc).
+ *	This can receive various types of packets,
+ *	HCI-Events, ACL, SCO, 4 types of HCI-LL PM packets
+ *	CH-8 packets from FM, CH-9 packets from GPS cores.
+ */
+static void st_int_recv(void *disc_data, const u8 *ptr, size_t count)
+{
+	struct st_proto_s *proto;
+	unsigned short payload_len = 0;
+	int len = 0;
+	unsigned char type = 0;
+	unsigned char *plen;
+	struct st_data_s *st_gdata = (struct st_data_s *)disc_data;
+	unsigned long flags;
+
+	if (st_gdata == NULL) {
+		pr_err(" received null from TTY ");
+		return;
+	}
+
+	pr_debug("count %zu rx_state %ld"
+		   "rx_count %ld", count, st_gdata->rx_state,
+		   st_gdata->rx_count);
+
+	spin_lock_irqsave(&st_gdata->lock, flags);
+	/* Decode received bytes here */
+	while (count) {
+		if (st_gdata->rx_count) {
+			len = min_t(unsigned int, st_gdata->rx_count, count);
+			skb_put_data(st_gdata->rx_skb, ptr, len);
+			st_gdata->rx_count -= len;
+			count -= len;
+			ptr += len;
+
+			if (st_gdata->rx_count)
+				continue;
+
+			/* Check ST RX state machine , where are we? */
+			switch (st_gdata->rx_state) {
+			/* Waiting for complete packet ? */
+			case ST_W4_DATA:
+				pr_debug("Complete pkt received");
+				/*
+				 * Ask ST CORE to forward
+				 * the packet to protocol driver
+				 */
+				st_send_frame(st_gdata->rx_chnl, st_gdata);
+
+				st_gdata->rx_state = ST_W4_PACKET_TYPE;
+				st_gdata->rx_skb = NULL;
+				continue;
+			/* parse the header to know details */
+			case ST_W4_HEADER:
+				proto = st_gdata->list[st_gdata->rx_chnl];
+				plen =
+				&st_gdata->rx_skb->data
+				[proto->offset_len_in_hdr];
+				pr_debug("plen pointing to %x\n", *plen);
+				if (proto->len_size == 1) /* 1 byte len field */
+					payload_len = *(unsigned char *)plen;
+				else if (proto->len_size == 2)
+					payload_len =
+					__le16_to_cpu(*(unsigned short *)plen);
+				else
+					pr_info("%s: invalid length "
+					"for id %d\n",
+					__func__, proto->chnl_id);
+				st_check_data_len(st_gdata, proto->chnl_id,
+						payload_len);
+				pr_debug("off %d, pay len %d\n",
+					proto->offset_len_in_hdr, payload_len);
+				continue;
+			}	/* end of switch rx_state */
+		}
+
+		/* end of if rx_count */
+
+		/*
+		 * Check first byte of packet and identify module
+		 * owner (BT/FM/GPS)
+		 */
+		switch (*ptr) {
+		case LL_SLEEP_IND:
+		case LL_SLEEP_ACK:
+		case LL_WAKE_UP_IND:
+			pr_debug("PM packet");
+			/*
+			 * this takes appropriate action based on
+			 * sleep state received --
+			 */
+			st_ll_sleep_state(st_gdata, *ptr);
+			/*
+			 * if WAKEUP_IND collides copy from waitq to txq
+			 * and assume chip awake
+			 */
+			spin_unlock_irqrestore(&st_gdata->lock, flags);
+			if (st_ll_getstate(st_gdata) == ST_LL_AWAKE)
+				st_wakeup_ack(st_gdata, LL_WAKE_UP_ACK);
+			spin_lock_irqsave(&st_gdata->lock, flags);
+
+			ptr++;
+			count--;
+			continue;
+		case LL_WAKE_UP_ACK:
+			pr_debug("PM packet");
+
+			spin_unlock_irqrestore(&st_gdata->lock, flags);
+			/* wake up ack received */
+			st_wakeup_ack(st_gdata, *ptr);
+			spin_lock_irqsave(&st_gdata->lock, flags);
+
+			ptr++;
+			count--;
+			continue;
+			/* Unknown packet? */
+		default:
+			type = *ptr;
+
+			/*
+			 * Default case means non-HCILL packets,
+			 * possibilities are packets for:
+			 * (a) valid protocol -  Supported Protocols within
+			 *     the ST_MAX_CHANNELS.
+			 * (b) registered protocol - Checked by
+			 *     "st_gdata->list[type] == NULL)" are supported
+			 *     protocols only.
+			 *  Rules out any invalid protocol and
+			 *  unregistered protocols with channel ID < 16.
+			 */
+
+			if ((type >= ST_MAX_CHANNELS) ||
+					(st_gdata->list[type] == NULL)) {
+				pr_err("chip/interface misbehavior: "
+						"dropping frame starting "
+						"with 0x%02x\n", type);
+				goto done;
+			}
+
+			st_gdata->rx_skb = alloc_skb(
+					st_gdata->list[type]->max_frame_size,
+					GFP_ATOMIC);
+			if (st_gdata->rx_skb == NULL) {
+				pr_err("out of memory: dropping\n");
+				goto done;
+			}
+
+			skb_reserve(st_gdata->rx_skb,
+					st_gdata->list[type]->reserve);
+			/* next 2 required for BT only */
+			st_gdata->rx_skb->cb[0] = type; /*pkt_type*/
+			st_gdata->rx_skb->cb[1] = 0; /*incoming*/
+			st_gdata->rx_chnl = *ptr;
+			st_gdata->rx_state = ST_W4_HEADER;
+			st_gdata->rx_count = st_gdata->list[type]->hdr_len;
+			pr_debug("rx_count %ld\n", st_gdata->rx_count);
+		}
+		ptr++;
+		count--;
+	}
+done:
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+	pr_debug("done %s", __func__);
+	return;
+}
+
+/*
+ * st_int_dequeue - internal de-Q function.
+ *	If the previous data set was not written
+ *	completely, return that skb which has the pending data.
+ *	In normal cases, return top of txq.
+ */
+static struct sk_buff *st_int_dequeue(struct st_data_s *st_gdata)
+{
+	struct sk_buff *returning_skb;
+
+	pr_debug("%s", __func__);
+	if (st_gdata->tx_skb != NULL) {
+		returning_skb = st_gdata->tx_skb;
+		st_gdata->tx_skb = NULL;
+		return returning_skb;
+	}
+	return skb_dequeue(&st_gdata->txq);
+}
+
+/*
+ * st_int_enqueue - internal Q-ing function.
+ *	Will either Q the skb to txq or the tx_waitq
+ *	depending on the ST LL state.
+ *	If the chip is asleep, then Q it onto waitq and
+ *	wakeup the chip.
+ *	txq and waitq needs protection since the other contexts
+ *	may be sending data, waking up chip.
+ */
+static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb)
+{
+	unsigned long flags = 0;
+
+	pr_debug("%s", __func__);
+	spin_lock_irqsave(&st_gdata->lock, flags);
+
+	switch (st_ll_getstate(st_gdata)) {
+	case ST_LL_AWAKE:
+		pr_debug("ST LL is AWAKE, sending normally");
+		skb_queue_tail(&st_gdata->txq, skb);
+		break;
+	case ST_LL_ASLEEP_TO_AWAKE:
+		skb_queue_tail(&st_gdata->tx_waitq, skb);
+		break;
+	case ST_LL_AWAKE_TO_ASLEEP:
+		pr_err("ST LL is illegal state(%ld),"
+			   "purging received skb.", st_ll_getstate(st_gdata));
+		dev_kfree_skb_irq(skb);
+		break;
+	case ST_LL_ASLEEP:
+		skb_queue_tail(&st_gdata->tx_waitq, skb);
+		st_ll_wakeup(st_gdata);
+		break;
+	default:
+		pr_err("ST LL is illegal state(%ld),"
+			   "purging received skb.", st_ll_getstate(st_gdata));
+		dev_kfree_skb_irq(skb);
+		break;
+	}
+
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+	pr_debug("done %s", __func__);
+	return;
+}
+
+/*
+ * internal wakeup function
+ * called from either
+ * - TTY layer when write's finished
+ * - st_write (in context of the protocol stack)
+ */
+static void work_fn_write_wakeup(struct work_struct *work)
+{
+	struct st_data_s *st_gdata = container_of(work, struct st_data_s,
+			work_write_wakeup);
+
+	st_tx_wakeup((void *)st_gdata);
+}
+void st_tx_wakeup(struct st_data_s *st_data)
+{
+	struct sk_buff *skb;
+	unsigned long flags;	/* for irq save flags */
+	pr_debug("%s", __func__);
+	/* check for sending & set flag sending here */
+	if (test_and_set_bit(ST_TX_SENDING, &st_data->tx_state)) {
+		pr_debug("ST already sending");
+		/* keep sending */
+		set_bit(ST_TX_WAKEUP, &st_data->tx_state);
+		return;
+		/* TX_WAKEUP will be checked in another
+		 * context
+		 */
+	}
+	do {			/* come back if st_tx_wakeup is set */
+		/* woke-up to write */
+		clear_bit(ST_TX_WAKEUP, &st_data->tx_state);
+		while ((skb = st_int_dequeue(st_data))) {
+			int len;
+			spin_lock_irqsave(&st_data->lock, flags);
+			/* enable wake-up from TTY */
+			set_bit(TTY_DO_WRITE_WAKEUP, &st_data->tty->flags);
+			len = st_int_write(st_data, skb->data, skb->len);
+			skb_pull(skb, len);
+			/* if skb->len = len as expected, skb->len=0 */
+			if (skb->len) {
+				/* would be the next skb to be sent */
+				st_data->tx_skb = skb;
+				spin_unlock_irqrestore(&st_data->lock, flags);
+				break;
+			}
+			dev_kfree_skb_irq(skb);
+			spin_unlock_irqrestore(&st_data->lock, flags);
+		}
+		/* if wake-up is set in another context- restart sending */
+	} while (test_bit(ST_TX_WAKEUP, &st_data->tx_state));
+
+	/* clear flag sending */
+	clear_bit(ST_TX_SENDING, &st_data->tx_state);
+}
+
+/********************************************************************/
+/* functions called from ST KIM
+*/
+void kim_st_list_protocols(struct st_data_s *st_gdata, void *buf)
+{
+	seq_printf(buf, "[%d]\nBT=%c\nFM=%c\nGPS=%c\n",
+			st_gdata->protos_registered,
+			st_gdata->is_registered[0x04] == true ? 'R' : 'U',
+			st_gdata->is_registered[0x08] == true ? 'R' : 'U',
+			st_gdata->is_registered[0x09] == true ? 'R' : 'U');
+}
+
+/********************************************************************/
+/*
+ * functions called from protocol stack drivers
+ * to be EXPORT-ed
+ */
+long st_register(struct st_proto_s *new_proto)
+{
+	struct st_data_s	*st_gdata;
+	long err = 0;
+	unsigned long flags = 0;
+
+	st_kim_ref(&st_gdata, 0);
+	if (st_gdata == NULL || new_proto == NULL || new_proto->recv == NULL
+	    || new_proto->reg_complete_cb == NULL) {
+		pr_err("gdata/new_proto/recv or reg_complete_cb not ready");
+		return -EINVAL;
+	}
+
+	if (new_proto->chnl_id >= ST_MAX_CHANNELS) {
+		pr_err("chnl_id %d not supported", new_proto->chnl_id);
+		return -EPROTONOSUPPORT;
+	}
+
+	if (st_gdata->is_registered[new_proto->chnl_id] == true) {
+		pr_err("chnl_id %d already registered", new_proto->chnl_id);
+		return -EALREADY;
+	}
+
+	/* can be from process context only */
+	spin_lock_irqsave(&st_gdata->lock, flags);
+
+	if (test_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state)) {
+		pr_info(" ST_REG_IN_PROGRESS:%d ", new_proto->chnl_id);
+		/* fw download in progress */
+
+		add_channel_to_table(st_gdata, new_proto);
+		st_gdata->protos_registered++;
+		new_proto->write = st_write;
+
+		set_bit(ST_REG_PENDING, &st_gdata->st_state);
+		spin_unlock_irqrestore(&st_gdata->lock, flags);
+		return -EINPROGRESS;
+	} else if (st_gdata->protos_registered == ST_EMPTY) {
+		pr_info(" chnl_id list empty :%d ", new_proto->chnl_id);
+		set_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state);
+		st_recv = st_kim_recv;
+
+		/* enable the ST LL - to set default chip state */
+		st_ll_enable(st_gdata);
+
+		/* release lock previously held - re-locked below */
+		spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+		/*
+		 * this may take a while to complete
+		 * since it involves BT fw download
+		 */
+		err = st_kim_start(st_gdata->kim_data);
+		if (err != 0) {
+			clear_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state);
+			if ((st_gdata->protos_registered != ST_EMPTY) &&
+			    (test_bit(ST_REG_PENDING, &st_gdata->st_state))) {
+				pr_err(" KIM failure complete callback ");
+				spin_lock_irqsave(&st_gdata->lock, flags);
+				st_reg_complete(st_gdata, err);
+				spin_unlock_irqrestore(&st_gdata->lock, flags);
+				clear_bit(ST_REG_PENDING, &st_gdata->st_state);
+			}
+			return -EINVAL;
+		}
+
+		spin_lock_irqsave(&st_gdata->lock, flags);
+
+		clear_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state);
+		st_recv = st_int_recv;
+
+		/*
+		 * this is where all pending registration
+		 * are signalled to be complete by calling callback functions
+		 */
+		if ((st_gdata->protos_registered != ST_EMPTY) &&
+		    (test_bit(ST_REG_PENDING, &st_gdata->st_state))) {
+			pr_debug(" call reg complete callback ");
+			st_reg_complete(st_gdata, 0);
+		}
+		clear_bit(ST_REG_PENDING, &st_gdata->st_state);
+
+		/*
+		 * check for already registered once more,
+		 * since the above check is old
+		 */
+		if (st_gdata->is_registered[new_proto->chnl_id] == true) {
+			pr_err(" proto %d already registered ",
+				   new_proto->chnl_id);
+			spin_unlock_irqrestore(&st_gdata->lock, flags);
+			return -EALREADY;
+		}
+
+		add_channel_to_table(st_gdata, new_proto);
+		st_gdata->protos_registered++;
+		new_proto->write = st_write;
+		spin_unlock_irqrestore(&st_gdata->lock, flags);
+		return err;
+	}
+	/* if fw is already downloaded & new stack registers protocol */
+	else {
+		add_channel_to_table(st_gdata, new_proto);
+		st_gdata->protos_registered++;
+		new_proto->write = st_write;
+
+		/* lock already held before entering else */
+		spin_unlock_irqrestore(&st_gdata->lock, flags);
+		return err;
+	}
+}
+EXPORT_SYMBOL_GPL(st_register);
+
+/*
+ * to unregister a protocol -
+ * to be called from protocol stack driver
+ */
+long st_unregister(struct st_proto_s *proto)
+{
+	long err = 0;
+	unsigned long flags = 0;
+	struct st_data_s	*st_gdata;
+
+	pr_debug("%s: %d ", __func__, proto->chnl_id);
+
+	st_kim_ref(&st_gdata, 0);
+	if (!st_gdata || proto->chnl_id >= ST_MAX_CHANNELS) {
+		pr_err(" chnl_id %d not supported", proto->chnl_id);
+		return -EPROTONOSUPPORT;
+	}
+
+	spin_lock_irqsave(&st_gdata->lock, flags);
+
+	if (st_gdata->is_registered[proto->chnl_id] == false) {
+		pr_err(" chnl_id %d not registered", proto->chnl_id);
+		spin_unlock_irqrestore(&st_gdata->lock, flags);
+		return -EPROTONOSUPPORT;
+	}
+
+	if (st_gdata->protos_registered)
+		st_gdata->protos_registered--;
+
+	remove_channel_from_table(st_gdata, proto);
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+	if ((st_gdata->protos_registered == ST_EMPTY) &&
+	    (!test_bit(ST_REG_PENDING, &st_gdata->st_state))) {
+		pr_info(" all chnl_ids unregistered ");
+
+		/* stop traffic on tty */
+		if (st_gdata->tty) {
+			tty_ldisc_flush(st_gdata->tty);
+			stop_tty(st_gdata->tty);
+		}
+
+		/* all chnl_ids now unregistered */
+		st_kim_stop(st_gdata->kim_data);
+		/* disable ST LL */
+		st_ll_disable(st_gdata);
+	}
+	return err;
+}
+
+/*
+ * called in protocol stack drivers
+ * via the write function pointer
+ */
+long st_write(struct sk_buff *skb)
+{
+	struct st_data_s *st_gdata;
+	long len;
+
+	st_kim_ref(&st_gdata, 0);
+	if (unlikely(skb == NULL || st_gdata == NULL
+		|| st_gdata->tty == NULL)) {
+		pr_err("data/tty unavailable to perform write");
+		return -EINVAL;
+	}
+
+	pr_debug("%d to be written", skb->len);
+	len = skb->len;
+
+	/* st_ll to decide where to enqueue the skb */
+	st_int_enqueue(st_gdata, skb);
+	/* wake up */
+	st_tx_wakeup(st_gdata);
+
+	/* return number of bytes written */
+	return len;
+}
+
+/* for protocols making use of shared transport */
+EXPORT_SYMBOL_GPL(st_unregister);
+
+/********************************************************************/
+/*
+ * functions called from TTY layer
+ */
+static int st_tty_open(struct tty_struct *tty)
+{
+	struct st_data_s *st_gdata;
+	pr_info("%s ", __func__);
+
+	st_kim_ref(&st_gdata, 0);
+	st_gdata->tty = tty;
+	tty->disc_data = st_gdata;
+
+	/* don't do an wakeup for now */
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+
+	/* mem already allocated
+	 */
+	tty->receive_room = 65536;
+	/* Flush any pending characters in the driver and discipline. */
+	tty_ldisc_flush(tty);
+	tty_driver_flush_buffer(tty);
+	/*
+	 * signal to UIM via KIM that -
+	 * installation of N_TI_WL ldisc is complete
+	 */
+	st_kim_complete(st_gdata->kim_data);
+	pr_debug("done %s", __func__);
+
+	return 0;
+}
+
+static void st_tty_close(struct tty_struct *tty)
+{
+	unsigned char i;
+	unsigned long flags;
+	struct	st_data_s *st_gdata = tty->disc_data;
+
+	pr_info("%s ", __func__);
+
+	/*
+	 * TODO:
+	 * if a protocol has been registered & line discipline
+	 * un-installed for some reason - what should be done ?
+	 */
+	spin_lock_irqsave(&st_gdata->lock, flags);
+	for (i = ST_BT; i < ST_MAX_CHANNELS; i++) {
+		if (st_gdata->is_registered[i] == true)
+			pr_err("%d not un-registered", i);
+		st_gdata->list[i] = NULL;
+		st_gdata->is_registered[i] = false;
+	}
+	st_gdata->protos_registered = 0;
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+	/*
+	 * signal to UIM via KIM that -
+	 * N_TI_WL ldisc is un-installed
+	 */
+	st_kim_complete(st_gdata->kim_data);
+	st_gdata->tty = NULL;
+	/* Flush any pending characters in the driver and discipline. */
+	tty_ldisc_flush(tty);
+	tty_driver_flush_buffer(tty);
+
+	spin_lock_irqsave(&st_gdata->lock, flags);
+	/* empty out txq and tx_waitq */
+	skb_queue_purge(&st_gdata->txq);
+	skb_queue_purge(&st_gdata->tx_waitq);
+	/* reset the TTY Rx states of ST */
+	st_gdata->rx_count = 0;
+	st_gdata->rx_state = ST_W4_PACKET_TYPE;
+	kfree_skb(st_gdata->rx_skb);
+	st_gdata->rx_skb = NULL;
+	spin_unlock_irqrestore(&st_gdata->lock, flags);
+
+	pr_debug("%s: done ", __func__);
+}
+
+static void st_tty_receive(struct tty_struct *tty, const u8 *data,
+			   const u8 *tty_flags, size_t count)
+{
+#ifdef VERBOSE
+	print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE,
+		16, 1, data, count, 0);
+#endif
+
+	/*
+	 * if fw download is in progress then route incoming data
+	 * to KIM for validation
+	 */
+	st_recv(tty->disc_data, data, count);
+	pr_debug("done %s", __func__);
+}
+
+/*
+ * wake-up function called in from the TTY layer
+ * inside the internal wakeup function will be called
+ */
+static void st_tty_wakeup(struct tty_struct *tty)
+{
+	struct	st_data_s *st_gdata = tty->disc_data;
+	pr_debug("%s ", __func__);
+	/* don't do an wakeup for now */
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+
+	/*
+	 * schedule the internal wakeup instead of calling directly to
+	 * avoid lockup (port->lock needed in tty->ops->write is
+	 * already taken here
+	 */
+	schedule_work(&st_gdata->work_write_wakeup);
+}
+
+static void st_tty_flush_buffer(struct tty_struct *tty)
+{
+	struct	st_data_s *st_gdata = tty->disc_data;
+	pr_debug("%s ", __func__);
+
+	kfree_skb(st_gdata->tx_skb);
+	st_gdata->tx_skb = NULL;
+
+	tty_driver_flush_buffer(tty);
+	return;
+}
+
+static struct tty_ldisc_ops st_ldisc_ops = {
+	.num = N_TI_WL,
+	.name = "n_st",
+	.open = st_tty_open,
+	.close = st_tty_close,
+	.receive_buf = st_tty_receive,
+	.write_wakeup = st_tty_wakeup,
+	.flush_buffer = st_tty_flush_buffer,
+	.owner = THIS_MODULE
+};
+
+/********************************************************************/
+int st_core_init(struct st_data_s **core_data)
+{
+	struct st_data_s *st_gdata;
+	long err;
+
+	err = tty_register_ldisc(&st_ldisc_ops);
+	if (err) {
+		pr_err("error registering %d line discipline %ld",
+			   N_TI_WL, err);
+		return err;
+	}
+	pr_debug("registered n_shared line discipline");
+
+	st_gdata = kzalloc(sizeof(struct st_data_s), GFP_KERNEL);
+	if (!st_gdata) {
+		pr_err("memory allocation failed");
+		err = -ENOMEM;
+		goto err_unreg_ldisc;
+	}
+
+	/* Initialize ST TxQ and Tx waitQ queue head. All BT/FM/GPS module skb's
+	 * will be pushed in this queue for actual transmission.
+	 */
+	skb_queue_head_init(&st_gdata->txq);
+	skb_queue_head_init(&st_gdata->tx_waitq);
+
+	/* Locking used in st_int_enqueue() to avoid multiple execution */
+	spin_lock_init(&st_gdata->lock);
+
+	err = st_ll_init(st_gdata);
+	if (err) {
+		pr_err("error during st_ll initialization(%ld)", err);
+		goto err_free_gdata;
+	}
+
+	INIT_WORK(&st_gdata->work_write_wakeup, work_fn_write_wakeup);
+
+	*core_data = st_gdata;
+	return 0;
+err_free_gdata:
+	kfree(st_gdata);
+err_unreg_ldisc:
+	tty_unregister_ldisc(&st_ldisc_ops);
+	return err;
+}
+
+void st_core_exit(struct st_data_s *st_gdata)
+{
+	long err;
+	/* internal module cleanup */
+	err = st_ll_deinit(st_gdata);
+	if (err)
+		pr_err("error during deinit of ST LL %ld", err);
+
+	if (st_gdata != NULL) {
+		/* Free ST Tx Qs and skbs */
+		skb_queue_purge(&st_gdata->txq);
+		skb_queue_purge(&st_gdata->tx_waitq);
+		kfree_skb(st_gdata->rx_skb);
+		kfree_skb(st_gdata->tx_skb);
+		/* TTY ldisc cleanup */
+		tty_unregister_ldisc(&st_ldisc_ops);
+		/* free the global data pointer */
+		kfree(st_gdata);
+	}
+}
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
new file mode 100644
index 0000000000..fe682e0553
--- /dev/null
+++ b/drivers/misc/ti-st/st_kim.c
@@ -0,0 +1,839 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Shared Transport Line discipline driver Core
+ *	Init Manager module responsible for GPIO control
+ *	and firmware download
+ *  Copyright (C) 2009-2010 Texas Instruments
+ *  Author: Pavan Savoy <pavan_savoy@ti.com>
+ */
+
+#define pr_fmt(fmt) "(stk) :" fmt
+#include <linux/platform_device.h>
+#include <linux/jiffies.h>
+#include <linux/firmware.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/gpio.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/sysfs.h>
+#include <linux/tty.h>
+
+#include <linux/skbuff.h>
+#include <linux/ti_wilink_st.h>
+#include <linux/module.h>
+
+#define MAX_ST_DEVICES	3	/* Imagine 1 on each UART for now */
+static struct platform_device *st_kim_devices[MAX_ST_DEVICES];
+
+/**********************************************************************/
+/* internal functions */
+
+/*
+ * st_get_plat_device -
+ *	function which returns the reference to the platform device
+ *	requested by id. As of now only 1 such device exists (id=0)
+ *	the context requesting for reference can get the id to be
+ *	requested by a. The protocol driver which is registering or
+ *	b. the tty device which is opened.
+ */
+static struct platform_device *st_get_plat_device(int id)
+{
+	return st_kim_devices[id];
+}
+
+/*
+ * validate_firmware_response -
+ *	function to return whether the firmware response was proper
+ *	in case of error don't complete so that waiting for proper
+ *	response times out
+ */
+static void validate_firmware_response(struct kim_data_s *kim_gdata)
+{
+	struct sk_buff *skb = kim_gdata->rx_skb;
+	if (!skb)
+		return;
+
+	/*
+	 * these magic numbers are the position in the response buffer which
+	 * allows us to distinguish whether the response is for the read
+	 * version info. command
+	 */
+	if (skb->data[2] == 0x01 && skb->data[3] == 0x01 &&
+			skb->data[4] == 0x10 && skb->data[5] == 0x00) {
+		/* fw version response */
+		memcpy(kim_gdata->resp_buffer,
+				kim_gdata->rx_skb->data,
+				kim_gdata->rx_skb->len);
+		kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+		kim_gdata->rx_skb = NULL;
+		kim_gdata->rx_count = 0;
+	} else if (unlikely(skb->data[5] != 0)) {
+		pr_err("no proper response during fw download");
+		pr_err("data6 %x", skb->data[5]);
+		kfree_skb(skb);
+		return;		/* keep waiting for the proper response */
+	}
+	/* becos of all the script being downloaded */
+	complete_all(&kim_gdata->kim_rcvd);
+	kfree_skb(skb);
+}
+
+/*
+ * check for data len received inside kim_int_recv
+ * most often hit the last case to update state to waiting for data
+ */
+static inline int kim_check_data_len(struct kim_data_s *kim_gdata, int len)
+{
+	register int room = skb_tailroom(kim_gdata->rx_skb);
+
+	pr_debug("len %d room %d", len, room);
+
+	if (!len) {
+		validate_firmware_response(kim_gdata);
+	} else if (len > room) {
+		/*
+		 * Received packet's payload length is larger.
+		 * We can't accommodate it in created skb.
+		 */
+		pr_err("Data length is too large len %d room %d", len,
+			   room);
+		kfree_skb(kim_gdata->rx_skb);
+	} else {
+		/*
+		 * Packet header has non-zero payload length and
+		 * we have enough space in created skb. Lets read
+		 * payload data */
+		kim_gdata->rx_state = ST_W4_DATA;
+		kim_gdata->rx_count = len;
+		return len;
+	}
+
+	/*
+	 * Change ST LL state to continue to process next
+	 * packet
+	 */
+	kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+	kim_gdata->rx_skb = NULL;
+	kim_gdata->rx_count = 0;
+
+	return 0;
+}
+
+/*
+ * kim_int_recv - receive function called during firmware download
+ *	firmware download responses on different UART drivers
+ *	have been observed to come in bursts of different
+ *	tty_receive and hence the logic
+ */
+static void kim_int_recv(struct kim_data_s *kim_gdata, const u8 *ptr,
+			 size_t count)
+{
+	int len = 0;
+	unsigned char *plen;
+
+	pr_debug("%s", __func__);
+	/* Decode received bytes here */
+	while (count) {
+		if (kim_gdata->rx_count) {
+			len = min_t(unsigned int, kim_gdata->rx_count, count);
+			skb_put_data(kim_gdata->rx_skb, ptr, len);
+			kim_gdata->rx_count -= len;
+			count -= len;
+			ptr += len;
+
+			if (kim_gdata->rx_count)
+				continue;
+
+			/* Check ST RX state machine , where are we? */
+			switch (kim_gdata->rx_state) {
+				/* Waiting for complete packet ? */
+			case ST_W4_DATA:
+				pr_debug("Complete pkt received");
+				validate_firmware_response(kim_gdata);
+				kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+				kim_gdata->rx_skb = NULL;
+				continue;
+				/* Waiting for Bluetooth event header ? */
+			case ST_W4_HEADER:
+				plen =
+				(unsigned char *)&kim_gdata->rx_skb->data[1];
+				pr_debug("event hdr: plen 0x%02x\n", *plen);
+				kim_check_data_len(kim_gdata, *plen);
+				continue;
+			}	/* end of switch */
+		}		/* end of if rx_state */
+		switch (*ptr) {
+			/* Bluetooth event packet? */
+		case 0x04:
+			kim_gdata->rx_state = ST_W4_HEADER;
+			kim_gdata->rx_count = 2;
+			break;
+		default:
+			pr_info("unknown packet");
+			ptr++;
+			count--;
+			continue;
+		}
+		ptr++;
+		count--;
+		kim_gdata->rx_skb =
+			alloc_skb(1024+8, GFP_ATOMIC);
+		if (!kim_gdata->rx_skb) {
+			pr_err("can't allocate mem for new packet");
+			kim_gdata->rx_state = ST_W4_PACKET_TYPE;
+			kim_gdata->rx_count = 0;
+			return;
+		}
+		skb_reserve(kim_gdata->rx_skb, 8);
+		kim_gdata->rx_skb->cb[0] = 4;
+		kim_gdata->rx_skb->cb[1] = 0;
+
+	}
+	return;
+}
+
+static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name)
+{
+	unsigned short version = 0, chip = 0, min_ver = 0, maj_ver = 0;
+	static const char read_ver_cmd[] = { 0x01, 0x01, 0x10, 0x00 };
+	long timeout;
+
+	pr_debug("%s", __func__);
+
+	reinit_completion(&kim_gdata->kim_rcvd);
+	if (4 != st_int_write(kim_gdata->core_data, read_ver_cmd, 4)) {
+		pr_err("kim: couldn't write 4 bytes");
+		return -EIO;
+	}
+
+	timeout = wait_for_completion_interruptible_timeout(
+		&kim_gdata->kim_rcvd, msecs_to_jiffies(CMD_RESP_TIME));
+	if (timeout <= 0) {
+		pr_err(" waiting for ver info- timed out or received signal");
+		return timeout ? -ERESTARTSYS : -ETIMEDOUT;
+	}
+	reinit_completion(&kim_gdata->kim_rcvd);
+	/*
+	 * the positions 12 & 13 in the response buffer provide with the
+	 * chip, major & minor numbers
+	 */
+
+	version =
+		MAKEWORD(kim_gdata->resp_buffer[12],
+				kim_gdata->resp_buffer[13]);
+	chip = (version & 0x7C00) >> 10;
+	min_ver = (version & 0x007F);
+	maj_ver = (version & 0x0380) >> 7;
+
+	if (version & 0x8000)
+		maj_ver |= 0x0008;
+
+	sprintf(bts_scr_name, "ti-connectivity/TIInit_%d.%d.%d.bts",
+		chip, maj_ver, min_ver);
+
+	/* to be accessed later via sysfs entry */
+	kim_gdata->version.full = version;
+	kim_gdata->version.chip = chip;
+	kim_gdata->version.maj_ver = maj_ver;
+	kim_gdata->version.min_ver = min_ver;
+
+	pr_info("%s", bts_scr_name);
+	return 0;
+}
+
+static void skip_change_remote_baud(unsigned char **ptr, long *len)
+{
+	unsigned char *nxt_action, *cur_action;
+	cur_action = *ptr;
+
+	nxt_action = cur_action + sizeof(struct bts_action) +
+		((struct bts_action *) cur_action)->size;
+
+	if (((struct bts_action *) nxt_action)->type != ACTION_WAIT_EVENT) {
+		pr_err("invalid action after change remote baud command");
+	} else {
+		*ptr = *ptr + sizeof(struct bts_action) +
+			((struct bts_action *)cur_action)->size;
+		*len = *len - (sizeof(struct bts_action) +
+				((struct bts_action *)cur_action)->size);
+		/* warn user on not commenting these in firmware */
+		pr_warn("skipping the wait event of change remote baud");
+	}
+}
+
+/*
+ * download_firmware -
+ *	internal function which parses through the .bts firmware
+ *	script file intreprets SEND, DELAY actions only as of now
+ */
+static long download_firmware(struct kim_data_s *kim_gdata)
+{
+	long err = 0;
+	long len = 0;
+	unsigned char *ptr = NULL;
+	unsigned char *action_ptr = NULL;
+	unsigned char bts_scr_name[40] = { 0 };	/* 40 char long bts scr name? */
+	int wr_room_space;
+	int cmd_size;
+	unsigned long timeout;
+
+	err = read_local_version(kim_gdata, bts_scr_name);
+	if (err != 0) {
+		pr_err("kim: failed to read local ver");
+		return err;
+	}
+	err =
+	    request_firmware(&kim_gdata->fw_entry, bts_scr_name,
+			     &kim_gdata->kim_pdev->dev);
+	if (unlikely((err != 0) || (kim_gdata->fw_entry->data == NULL) ||
+		     (kim_gdata->fw_entry->size == 0))) {
+		pr_err(" request_firmware failed(errno %ld) for %s", err,
+			   bts_scr_name);
+		return -EINVAL;
+	}
+	ptr = (void *)kim_gdata->fw_entry->data;
+	len = kim_gdata->fw_entry->size;
+	/*
+	 * bts_header to remove out magic number and
+	 * version
+	 */
+	ptr += sizeof(struct bts_header);
+	len -= sizeof(struct bts_header);
+
+	while (len > 0 && ptr) {
+		pr_debug(" action size %d, type %d ",
+			   ((struct bts_action *)ptr)->size,
+			   ((struct bts_action *)ptr)->type);
+
+		switch (((struct bts_action *)ptr)->type) {
+		case ACTION_SEND_COMMAND:	/* action send */
+			pr_debug("S");
+			action_ptr = &(((struct bts_action *)ptr)->data[0]);
+			if (unlikely
+			    (((struct hci_command *)action_ptr)->opcode ==
+			     0xFF36)) {
+				/*
+				 * ignore remote change
+				 * baud rate HCI VS command
+				 */
+				pr_warn("change remote baud"
+				    " rate command in firmware");
+				skip_change_remote_baud(&ptr, &len);
+				break;
+			}
+			/*
+			 * Make sure we have enough free space in uart
+			 * tx buffer to write current firmware command
+			 */
+			cmd_size = ((struct bts_action *)ptr)->size;
+			timeout = jiffies + msecs_to_jiffies(CMD_WR_TIME);
+			do {
+				wr_room_space =
+					st_get_uart_wr_room(kim_gdata->core_data);
+				if (wr_room_space < 0) {
+					pr_err("Unable to get free "
+							"space info from uart tx buffer");
+					release_firmware(kim_gdata->fw_entry);
+					return wr_room_space;
+				}
+				mdelay(1); /* wait 1ms before checking room */
+			} while ((wr_room_space < cmd_size) &&
+					time_before(jiffies, timeout));
+
+			/* Timeout happened ? */
+			if (time_after_eq(jiffies, timeout)) {
+				pr_err("Timeout while waiting for free "
+						"free space in uart tx buffer");
+				release_firmware(kim_gdata->fw_entry);
+				return -ETIMEDOUT;
+			}
+			/*
+			 * reinit completion before sending for the
+			 * relevant wait
+			 */
+			reinit_completion(&kim_gdata->kim_rcvd);
+
+			/*
+			 * Free space found in uart buffer, call st_int_write
+			 * to send current firmware command to the uart tx
+			 * buffer.
+			 */
+			err = st_int_write(kim_gdata->core_data,
+			((struct bts_action_send *)action_ptr)->data,
+					   ((struct bts_action *)ptr)->size);
+			if (unlikely(err < 0)) {
+				release_firmware(kim_gdata->fw_entry);
+				return err;
+			}
+			/*
+			 * Check number of bytes written to the uart tx buffer
+			 * and requested command write size
+			 */
+			if (err != cmd_size) {
+				pr_err("Number of bytes written to uart "
+						"tx buffer are not matching with "
+						"requested cmd write size");
+				release_firmware(kim_gdata->fw_entry);
+				return -EIO;
+			}
+			break;
+		case ACTION_WAIT_EVENT:  /* wait */
+			pr_debug("W");
+			err = wait_for_completion_interruptible_timeout(
+					&kim_gdata->kim_rcvd,
+					msecs_to_jiffies(CMD_RESP_TIME));
+			if (err <= 0) {
+				pr_err("response timeout/signaled during fw download ");
+				/* timed out */
+				release_firmware(kim_gdata->fw_entry);
+				return err ? -ERESTARTSYS : -ETIMEDOUT;
+			}
+			reinit_completion(&kim_gdata->kim_rcvd);
+			break;
+		case ACTION_DELAY:	/* sleep */
+			pr_info("sleep command in scr");
+			action_ptr = &(((struct bts_action *)ptr)->data[0]);
+			mdelay(((struct bts_action_delay *)action_ptr)->msec);
+			break;
+		}
+		len =
+		    len - (sizeof(struct bts_action) +
+			   ((struct bts_action *)ptr)->size);
+		ptr =
+		    ptr + sizeof(struct bts_action) +
+		    ((struct bts_action *)ptr)->size;
+	}
+	/* fw download complete */
+	release_firmware(kim_gdata->fw_entry);
+	return 0;
+}
+
+/**********************************************************************/
+/* functions called from ST core */
+/* called from ST Core, when REG_IN_PROGRESS (registration in progress)
+ * can be because of
+ * 1. response to read local version
+ * 2. during send/recv's of firmware download
+ */
+void st_kim_recv(void *disc_data, const u8 *data, size_t count)
+{
+	struct st_data_s	*st_gdata = (struct st_data_s *)disc_data;
+	struct kim_data_s	*kim_gdata = st_gdata->kim_data;
+
+	/*
+	 * proceed to gather all data and distinguish read fw version response
+	 * from other fw responses when data gathering is complete
+	 */
+	kim_int_recv(kim_gdata, data, count);
+	return;
+}
+
+/*
+ * to signal completion of line discipline installation
+ * called from ST Core, upon tty_open
+ */
+void st_kim_complete(void *kim_data)
+{
+	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
+	complete(&kim_gdata->ldisc_installed);
+}
+
+/*
+ * st_kim_start - called from ST Core upon 1st registration
+ *	This involves toggling the chip enable gpio, reading
+ *	the firmware version from chip, forming the fw file name
+ *	based on the chip version, requesting the fw, parsing it
+ *	and perform download(send/recv).
+ */
+long st_kim_start(void *kim_data)
+{
+	long err = 0;
+	long retry = POR_RETRY_COUNT;
+	struct ti_st_plat_data	*pdata;
+	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
+
+	pr_info(" %s", __func__);
+	pdata = kim_gdata->kim_pdev->dev.platform_data;
+
+	do {
+		/* platform specific enabling code here */
+		if (pdata->chip_enable)
+			pdata->chip_enable(kim_gdata);
+
+		/* Configure BT nShutdown to HIGH state */
+		gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
+		mdelay(5);	/* FIXME: a proper toggle */
+		gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH);
+		mdelay(100);
+		/* re-initialize the completion */
+		reinit_completion(&kim_gdata->ldisc_installed);
+		/* send notification to UIM */
+		kim_gdata->ldisc_install = 1;
+		pr_info("ldisc_install = 1");
+		sysfs_notify(&kim_gdata->kim_pdev->dev.kobj,
+				NULL, "install");
+		/* wait for ldisc to be installed */
+		err = wait_for_completion_interruptible_timeout(
+			&kim_gdata->ldisc_installed, msecs_to_jiffies(LDISC_TIME));
+		if (!err) {
+			/*
+			 * ldisc installation timeout,
+			 * flush uart, power cycle BT_EN
+			 */
+			pr_err("ldisc installation timeout");
+			err = st_kim_stop(kim_gdata);
+			continue;
+		} else {
+			/* ldisc installed now */
+			pr_info("line discipline installed");
+			err = download_firmware(kim_gdata);
+			if (err != 0) {
+				/*
+				 * ldisc installed but fw download failed,
+				 * flush uart & power cycle BT_EN
+				 */
+				pr_err("download firmware failed");
+				err = st_kim_stop(kim_gdata);
+				continue;
+			} else {	/* on success don't retry */
+				break;
+			}
+		}
+	} while (retry--);
+	return err;
+}
+
+/*
+ * st_kim_stop - stop communication with chip.
+ *	This can be called from ST Core/KIM, on the-
+ *	(a) last un-register when chip need not be powered there-after,
+ *	(b) upon failure to either install ldisc or download firmware.
+ *	The function is responsible to (a) notify UIM about un-installation,
+ *	(b) flush UART if the ldisc was installed.
+ *	(c) reset BT_EN - pull down nshutdown at the end.
+ *	(d) invoke platform's chip disabling routine.
+ */
+long st_kim_stop(void *kim_data)
+{
+	long err = 0;
+	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
+	struct ti_st_plat_data	*pdata =
+		kim_gdata->kim_pdev->dev.platform_data;
+	struct tty_struct	*tty = kim_gdata->core_data->tty;
+
+	reinit_completion(&kim_gdata->ldisc_installed);
+
+	if (tty) {	/* can be called before ldisc is installed */
+		/* Flush any pending characters in the driver and discipline. */
+		tty_ldisc_flush(tty);
+		tty_driver_flush_buffer(tty);
+	}
+
+	/* send uninstall notification to UIM */
+	pr_info("ldisc_install = 0");
+	kim_gdata->ldisc_install = 0;
+	sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, NULL, "install");
+
+	/* wait for ldisc to be un-installed */
+	err = wait_for_completion_interruptible_timeout(
+		&kim_gdata->ldisc_installed, msecs_to_jiffies(LDISC_TIME));
+	if (!err) {		/* timeout */
+		pr_err(" timed out waiting for ldisc to be un-installed");
+		err = -ETIMEDOUT;
+	}
+
+	/* By default configure BT nShutdown to LOW state */
+	gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
+	mdelay(1);
+	gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH);
+	mdelay(1);
+	gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
+
+	/* platform specific disable */
+	if (pdata->chip_disable)
+		pdata->chip_disable(kim_gdata);
+	return err;
+}
+
+/**********************************************************************/
+/* functions called from subsystems */
+/* called when debugfs entry is read from */
+
+static int version_show(struct seq_file *s, void *unused)
+{
+	struct kim_data_s *kim_gdata = (struct kim_data_s *)s->private;
+	seq_printf(s, "%04X %d.%d.%d\n", kim_gdata->version.full,
+			kim_gdata->version.chip, kim_gdata->version.maj_ver,
+			kim_gdata->version.min_ver);
+	return 0;
+}
+
+static int list_show(struct seq_file *s, void *unused)
+{
+	struct kim_data_s *kim_gdata = (struct kim_data_s *)s->private;
+	kim_st_list_protocols(kim_gdata->core_data, s);
+	return 0;
+}
+
+static ssize_t show_install(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", kim_data->ldisc_install);
+}
+
+#ifdef DEBUG
+static ssize_t store_dev_name(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	pr_debug("storing dev name >%s<", buf);
+	strncpy(kim_data->dev_name, buf, count);
+	pr_debug("stored dev name >%s<", kim_data->dev_name);
+	return count;
+}
+
+static ssize_t store_baud_rate(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	pr_debug("storing baud rate >%s<", buf);
+	sscanf(buf, "%ld", &kim_data->baud_rate);
+	pr_debug("stored baud rate >%ld<", kim_data->baud_rate);
+	return count;
+}
+#endif	/* if DEBUG */
+
+static ssize_t show_dev_name(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	return sprintf(buf, "%s\n", kim_data->dev_name);
+}
+
+static ssize_t show_baud_rate(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", kim_data->baud_rate);
+}
+
+static ssize_t show_flow_cntrl(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kim_data_s *kim_data = dev_get_drvdata(dev);
+	return sprintf(buf, "%d\n", kim_data->flow_cntrl);
+}
+
+/* structures specific for sysfs entries */
+static struct kobj_attribute ldisc_install =
+__ATTR(install, 0444, (void *)show_install, NULL);
+
+static struct kobj_attribute uart_dev_name =
+#ifdef DEBUG	/* TODO: move this to debug-fs if possible */
+__ATTR(dev_name, 0644, (void *)show_dev_name, (void *)store_dev_name);
+#else
+__ATTR(dev_name, 0444, (void *)show_dev_name, NULL);
+#endif
+
+static struct kobj_attribute uart_baud_rate =
+#ifdef DEBUG	/* TODO: move to debugfs */
+__ATTR(baud_rate, 0644, (void *)show_baud_rate, (void *)store_baud_rate);
+#else
+__ATTR(baud_rate, 0444, (void *)show_baud_rate, NULL);
+#endif
+
+static struct kobj_attribute uart_flow_cntrl =
+__ATTR(flow_cntrl, 0444, (void *)show_flow_cntrl, NULL);
+
+static struct attribute *uim_attrs[] = {
+	&ldisc_install.attr,
+	&uart_dev_name.attr,
+	&uart_baud_rate.attr,
+	&uart_flow_cntrl.attr,
+	NULL,
+};
+
+static const struct attribute_group uim_attr_grp = {
+	.attrs = uim_attrs,
+};
+
+/*
+ * st_kim_ref - reference the core's data
+ *	This references the per-ST platform device in the arch/xx/
+ *	board-xx.c file.
+ *	This would enable multiple such platform devices to exist
+ *	on a given platform
+ */
+void st_kim_ref(struct st_data_s **core_data, int id)
+{
+	struct platform_device	*pdev;
+	struct kim_data_s	*kim_gdata;
+	/* get kim_gdata reference from platform device */
+	pdev = st_get_plat_device(id);
+	if (!pdev)
+		goto err;
+	kim_gdata = platform_get_drvdata(pdev);
+	if (!kim_gdata)
+		goto err;
+
+	*core_data = kim_gdata->core_data;
+	return;
+err:
+	*core_data = NULL;
+}
+
+DEFINE_SHOW_ATTRIBUTE(version);
+DEFINE_SHOW_ATTRIBUTE(list);
+
+/**********************************************************************/
+/* functions called from platform device driver subsystem
+ * need to have a relevant platform device entry in the platform's
+ * board-*.c file
+ */
+
+static struct dentry *kim_debugfs_dir;
+static int kim_probe(struct platform_device *pdev)
+{
+	struct kim_data_s	*kim_gdata;
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+	int err;
+
+	if ((pdev->id != -1) && (pdev->id < MAX_ST_DEVICES)) {
+		/* multiple devices could exist */
+		st_kim_devices[pdev->id] = pdev;
+	} else {
+		/* platform's sure about existence of 1 device */
+		st_kim_devices[0] = pdev;
+	}
+
+	kim_gdata = kzalloc(sizeof(struct kim_data_s), GFP_KERNEL);
+	if (!kim_gdata) {
+		pr_err("no mem to allocate");
+		return -ENOMEM;
+	}
+	platform_set_drvdata(pdev, kim_gdata);
+
+	err = st_core_init(&kim_gdata->core_data);
+	if (err != 0) {
+		pr_err(" ST core init failed");
+		err = -EIO;
+		goto err_core_init;
+	}
+	/* refer to itself */
+	kim_gdata->core_data->kim_data = kim_gdata;
+
+	/* Claim the chip enable nShutdown gpio from the system */
+	kim_gdata->nshutdown = pdata->nshutdown_gpio;
+	err = gpio_request(kim_gdata->nshutdown, "kim");
+	if (unlikely(err)) {
+		pr_err(" gpio %d request failed ", kim_gdata->nshutdown);
+		goto err_sysfs_group;
+	}
+
+	/* Configure nShutdown GPIO as output=0 */
+	err = gpio_direction_output(kim_gdata->nshutdown, 0);
+	if (unlikely(err)) {
+		pr_err(" unable to configure gpio %d", kim_gdata->nshutdown);
+		goto err_sysfs_group;
+	}
+	/* get reference of pdev for request_firmware */
+	kim_gdata->kim_pdev = pdev;
+	init_completion(&kim_gdata->kim_rcvd);
+	init_completion(&kim_gdata->ldisc_installed);
+
+	err = sysfs_create_group(&pdev->dev.kobj, &uim_attr_grp);
+	if (err) {
+		pr_err("failed to create sysfs entries");
+		goto err_sysfs_group;
+	}
+
+	/* copying platform data */
+	strncpy(kim_gdata->dev_name, pdata->dev_name, UART_DEV_NAME_LEN);
+	kim_gdata->flow_cntrl = pdata->flow_cntrl;
+	kim_gdata->baud_rate = pdata->baud_rate;
+	pr_info("sysfs entries created\n");
+
+	kim_debugfs_dir = debugfs_create_dir("ti-st", NULL);
+
+	debugfs_create_file("version", S_IRUGO, kim_debugfs_dir,
+				kim_gdata, &version_fops);
+	debugfs_create_file("protocols", S_IRUGO, kim_debugfs_dir,
+				kim_gdata, &list_fops);
+	return 0;
+
+err_sysfs_group:
+	st_core_exit(kim_gdata->core_data);
+
+err_core_init:
+	kfree(kim_gdata);
+
+	return err;
+}
+
+static int kim_remove(struct platform_device *pdev)
+{
+	/* free the GPIOs requested */
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+	struct kim_data_s	*kim_gdata;
+
+	kim_gdata = platform_get_drvdata(pdev);
+
+	/*
+	 * Free the Bluetooth/FM/GPIO
+	 * nShutdown gpio from the system
+	 */
+	gpio_free(pdata->nshutdown_gpio);
+	pr_info("nshutdown GPIO Freed");
+
+	debugfs_remove_recursive(kim_debugfs_dir);
+	sysfs_remove_group(&pdev->dev.kobj, &uim_attr_grp);
+	pr_info("sysfs entries removed");
+
+	kim_gdata->kim_pdev = NULL;
+	st_core_exit(kim_gdata->core_data);
+
+	kfree(kim_gdata);
+	kim_gdata = NULL;
+	return 0;
+}
+
+static int kim_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+
+	if (pdata->suspend)
+		return pdata->suspend(pdev, state);
+
+	return 0;
+}
+
+static int kim_resume(struct platform_device *pdev)
+{
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
+
+	if (pdata->resume)
+		return pdata->resume(pdev);
+
+	return 0;
+}
+
+/**********************************************************************/
+/* entry point for ST KIM module, called in from ST Core */
+static struct platform_driver kim_platform_driver = {
+	.probe = kim_probe,
+	.remove = kim_remove,
+	.suspend = kim_suspend,
+	.resume = kim_resume,
+	.driver = {
+		.name = "kim",
+	},
+};
+
+module_platform_driver(kim_platform_driver);
+
+MODULE_AUTHOR("Pavan Savoy <pavan_savoy@ti.com>");
+MODULE_DESCRIPTION("Shared Transport Driver for TI BT/FM/GPS combo chips ");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c
new file mode 100644
index 0000000000..07406140d2
--- /dev/null
+++ b/drivers/misc/ti-st/st_ll.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Shared Transport driver
+ *	HCI-LL module responsible for TI proprietary HCI_LL protocol
+ *  Copyright (C) 2009-2010 Texas Instruments
+ *  Author: Pavan Savoy <pavan_savoy@ti.com>
+ */
+
+#define pr_fmt(fmt) "(stll) :" fmt
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/ti_wilink_st.h>
+
+/**********************************************************************/
+/* internal functions */
+static void send_ll_cmd(struct st_data_s *st_data,
+	unsigned char cmd)
+{
+
+	pr_debug("%s: writing %x", __func__, cmd);
+	st_int_write(st_data, &cmd, 1);
+	return;
+}
+
+static void ll_device_want_to_sleep(struct st_data_s *st_data)
+{
+	struct kim_data_s	*kim_data;
+	struct ti_st_plat_data	*pdata;
+
+	pr_debug("%s", __func__);
+	/* sanity check */
+	if (st_data->ll_state != ST_LL_AWAKE)
+		pr_err("ERR hcill: ST_LL_GO_TO_SLEEP_IND"
+			  "in state %ld", st_data->ll_state);
+
+	send_ll_cmd(st_data, LL_SLEEP_ACK);
+	/* update state */
+	st_data->ll_state = ST_LL_ASLEEP;
+
+	/* communicate to platform about chip asleep */
+	kim_data = st_data->kim_data;
+	pdata = kim_data->kim_pdev->dev.platform_data;
+	if (pdata->chip_asleep)
+		pdata->chip_asleep(NULL);
+}
+
+static void ll_device_want_to_wakeup(struct st_data_s *st_data)
+{
+	struct kim_data_s	*kim_data;
+	struct ti_st_plat_data	*pdata;
+
+	/* diff actions in diff states */
+	switch (st_data->ll_state) {
+	case ST_LL_ASLEEP:
+		send_ll_cmd(st_data, LL_WAKE_UP_ACK);	/* send wake_ack */
+		break;
+	case ST_LL_ASLEEP_TO_AWAKE:
+		/* duplicate wake_ind */
+		pr_err("duplicate wake_ind while waiting for Wake ack");
+		break;
+	case ST_LL_AWAKE:
+		/* duplicate wake_ind */
+		pr_err("duplicate wake_ind already AWAKE");
+		break;
+	case ST_LL_AWAKE_TO_ASLEEP:
+		/* duplicate wake_ind */
+		pr_err("duplicate wake_ind");
+		break;
+	}
+	/* update state */
+	st_data->ll_state = ST_LL_AWAKE;
+
+	/* communicate to platform about chip wakeup */
+	kim_data = st_data->kim_data;
+	pdata = kim_data->kim_pdev->dev.platform_data;
+	if (pdata->chip_awake)
+		pdata->chip_awake(NULL);
+}
+
+/**********************************************************************/
+/* functions invoked by ST Core */
+
+/* called when ST Core wants to
+ * enable ST LL */
+void st_ll_enable(struct st_data_s *ll)
+{
+	ll->ll_state = ST_LL_AWAKE;
+}
+
+/* called when ST Core /local module wants to
+ * disable ST LL */
+void st_ll_disable(struct st_data_s *ll)
+{
+	ll->ll_state = ST_LL_INVALID;
+}
+
+/* called when ST Core wants to update the state */
+void st_ll_wakeup(struct st_data_s *ll)
+{
+	if (likely(ll->ll_state != ST_LL_AWAKE)) {
+		send_ll_cmd(ll, LL_WAKE_UP_IND);	/* WAKE_IND */
+		ll->ll_state = ST_LL_ASLEEP_TO_AWAKE;
+	} else {
+		/* don't send the duplicate wake_indication */
+		pr_err(" Chip already AWAKE ");
+	}
+}
+
+/* called when ST Core wants the state */
+unsigned long st_ll_getstate(struct st_data_s *ll)
+{
+	pr_debug(" returning state %ld", ll->ll_state);
+	return ll->ll_state;
+}
+
+/* called from ST Core, when a PM related packet arrives */
+unsigned long st_ll_sleep_state(struct st_data_s *st_data,
+	unsigned char cmd)
+{
+	switch (cmd) {
+	case LL_SLEEP_IND:	/* sleep ind */
+		pr_debug("sleep indication recvd");
+		ll_device_want_to_sleep(st_data);
+		break;
+	case LL_SLEEP_ACK:	/* sleep ack */
+		pr_err("sleep ack rcvd: host shouldn't");
+		break;
+	case LL_WAKE_UP_IND:	/* wake ind */
+		pr_debug("wake indication recvd");
+		ll_device_want_to_wakeup(st_data);
+		break;
+	case LL_WAKE_UP_ACK:	/* wake ack */
+		pr_debug("wake ack rcvd");
+		st_data->ll_state = ST_LL_AWAKE;
+		break;
+	default:
+		pr_err(" unknown input/state ");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Called from ST CORE to initialize ST LL */
+long st_ll_init(struct st_data_s *ll)
+{
+	/* set state to invalid */
+	ll->ll_state = ST_LL_INVALID;
+	return 0;
+}
+
+/* Called from ST CORE to de-initialize ST LL */
+long st_ll_deinit(struct st_data_s *ll)
+{
+	return 0;
+}
diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
new file mode 100644
index 0000000000..7dd86a9858
--- /dev/null
+++ b/drivers/misc/tifm_7xx1.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  tifm_7xx1.c - TI FlashMedia driver
+ *
+ *  Copyright (C) 2006 Alex Dubov <oakad@yahoo.com>
+ */
+
+#include <linux/tifm.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+
+#define DRIVER_NAME "tifm_7xx1"
+#define DRIVER_VERSION "0.8"
+
+#define TIFM_IRQ_ENABLE           0x80000000
+#define TIFM_IRQ_SOCKMASK(x)      (x)
+#define TIFM_IRQ_CARDMASK(x)      ((x) << 8)
+#define TIFM_IRQ_FIFOMASK(x)      ((x) << 16)
+#define TIFM_IRQ_SETALL           0xffffffff
+
+static void tifm_7xx1_dummy_eject(struct tifm_adapter *fm,
+				  struct tifm_dev *sock)
+{
+}
+
+static void tifm_7xx1_eject(struct tifm_adapter *fm, struct tifm_dev *sock)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fm->lock, flags);
+	fm->socket_change_set |= 1 << sock->socket_id;
+	tifm_queue_work(&fm->media_switcher);
+	spin_unlock_irqrestore(&fm->lock, flags);
+}
+
+static irqreturn_t tifm_7xx1_isr(int irq, void *dev_id)
+{
+	struct tifm_adapter *fm = dev_id;
+	struct tifm_dev *sock;
+	unsigned int irq_status, cnt;
+
+	spin_lock(&fm->lock);
+	irq_status = readl(fm->addr + FM_INTERRUPT_STATUS);
+	if (irq_status == 0 || irq_status == (~0)) {
+		spin_unlock(&fm->lock);
+		return IRQ_NONE;
+	}
+
+	if (irq_status & TIFM_IRQ_ENABLE) {
+		writel(TIFM_IRQ_ENABLE, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+
+		for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+			sock = fm->sockets[cnt];
+			if (sock) {
+				if ((irq_status >> cnt) & TIFM_IRQ_FIFOMASK(1))
+					sock->data_event(sock);
+				if ((irq_status >> cnt) & TIFM_IRQ_CARDMASK(1))
+					sock->card_event(sock);
+			}
+		}
+
+		fm->socket_change_set |= irq_status
+					 & ((1 << fm->num_sockets) - 1);
+	}
+	writel(irq_status, fm->addr + FM_INTERRUPT_STATUS);
+
+	if (fm->finish_me)
+		complete_all(fm->finish_me);
+	else if (!fm->socket_change_set)
+		writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE);
+	else
+		tifm_queue_work(&fm->media_switcher);
+
+	spin_unlock(&fm->lock);
+	return IRQ_HANDLED;
+}
+
+static unsigned char tifm_7xx1_toggle_sock_power(char __iomem *sock_addr)
+{
+	unsigned int s_state;
+	int cnt;
+
+	writel(0x0e00, sock_addr + SOCK_CONTROL);
+
+	for (cnt = 16; cnt <= 256; cnt <<= 1) {
+		if (!(TIFM_SOCK_STATE_POWERED
+		      & readl(sock_addr + SOCK_PRESENT_STATE)))
+			break;
+
+		msleep(cnt);
+	}
+
+	s_state = readl(sock_addr + SOCK_PRESENT_STATE);
+	if (!(TIFM_SOCK_STATE_OCCUPIED & s_state))
+		return 0;
+
+	writel(readl(sock_addr + SOCK_CONTROL) | TIFM_CTRL_LED,
+	       sock_addr + SOCK_CONTROL);
+
+	/* xd needs some extra time before power on */
+	if (((readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7)
+	    == TIFM_TYPE_XD)
+		msleep(40);
+
+	writel((s_state & TIFM_CTRL_POWER_MASK) | 0x0c00,
+	       sock_addr + SOCK_CONTROL);
+	/* wait for power to stabilize */
+	msleep(20);
+	for (cnt = 16; cnt <= 256; cnt <<= 1) {
+		if ((TIFM_SOCK_STATE_POWERED
+		     & readl(sock_addr + SOCK_PRESENT_STATE)))
+			break;
+
+		msleep(cnt);
+	}
+
+	writel(readl(sock_addr + SOCK_CONTROL) & (~TIFM_CTRL_LED),
+	       sock_addr + SOCK_CONTROL);
+
+	return (readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7;
+}
+
+inline static void tifm_7xx1_sock_power_off(char __iomem *sock_addr)
+{
+	writel((~TIFM_CTRL_POWER_MASK) & readl(sock_addr + SOCK_CONTROL),
+	       sock_addr + SOCK_CONTROL);
+}
+
+inline static char __iomem *
+tifm_7xx1_sock_addr(char __iomem *base_addr, unsigned int sock_num)
+{
+	return base_addr + ((sock_num + 1) << 10);
+}
+
+static void tifm_7xx1_switch_media(struct work_struct *work)
+{
+	struct tifm_adapter *fm = container_of(work, struct tifm_adapter,
+					       media_switcher);
+	struct tifm_dev *sock;
+	char __iomem *sock_addr;
+	unsigned long flags;
+	unsigned char media_id;
+	unsigned int socket_change_set, cnt;
+
+	spin_lock_irqsave(&fm->lock, flags);
+	socket_change_set = fm->socket_change_set;
+	fm->socket_change_set = 0;
+
+	dev_dbg(fm->dev.parent, "checking media set %x\n",
+		socket_change_set);
+
+	if (!socket_change_set) {
+		spin_unlock_irqrestore(&fm->lock, flags);
+		return;
+	}
+
+	for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+		if (!(socket_change_set & (1 << cnt)))
+			continue;
+		sock = fm->sockets[cnt];
+		if (sock) {
+			printk(KERN_INFO
+			       "%s : demand removing card from socket %u:%u\n",
+			       dev_name(&fm->dev), fm->id, cnt);
+			fm->sockets[cnt] = NULL;
+			sock_addr = sock->addr;
+			spin_unlock_irqrestore(&fm->lock, flags);
+			device_unregister(&sock->dev);
+			spin_lock_irqsave(&fm->lock, flags);
+			tifm_7xx1_sock_power_off(sock_addr);
+			writel(0x0e00, sock_addr + SOCK_CONTROL);
+		}
+
+		spin_unlock_irqrestore(&fm->lock, flags);
+
+		media_id = tifm_7xx1_toggle_sock_power(
+				tifm_7xx1_sock_addr(fm->addr, cnt));
+
+		// tifm_alloc_device will check if media_id is valid
+		sock = tifm_alloc_device(fm, cnt, media_id);
+		if (sock) {
+			sock->addr = tifm_7xx1_sock_addr(fm->addr, cnt);
+
+			if (!device_register(&sock->dev)) {
+				spin_lock_irqsave(&fm->lock, flags);
+				if (!fm->sockets[cnt]) {
+					fm->sockets[cnt] = sock;
+					sock = NULL;
+				}
+				spin_unlock_irqrestore(&fm->lock, flags);
+			}
+			if (sock)
+				put_device(&sock->dev);
+		}
+		spin_lock_irqsave(&fm->lock, flags);
+	}
+
+	writel(TIFM_IRQ_FIFOMASK(socket_change_set)
+	       | TIFM_IRQ_CARDMASK(socket_change_set),
+	       fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+
+	writel(TIFM_IRQ_FIFOMASK(socket_change_set)
+	       | TIFM_IRQ_CARDMASK(socket_change_set),
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+
+	writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE);
+	spin_unlock_irqrestore(&fm->lock, flags);
+}
+
+static int __maybe_unused tifm_7xx1_suspend(struct device *dev_d)
+{
+	struct pci_dev *dev = to_pci_dev(dev_d);
+	struct tifm_adapter *fm = pci_get_drvdata(dev);
+	int cnt;
+
+	dev_dbg(&dev->dev, "suspending host\n");
+
+	for (cnt = 0; cnt < fm->num_sockets; cnt++) {
+		if (fm->sockets[cnt])
+			tifm_7xx1_sock_power_off(fm->sockets[cnt]->addr);
+	}
+
+	device_wakeup_disable(dev_d);
+	return 0;
+}
+
+static int __maybe_unused tifm_7xx1_resume(struct device *dev_d)
+{
+	struct pci_dev *dev = to_pci_dev(dev_d);
+	struct tifm_adapter *fm = pci_get_drvdata(dev);
+	int rc;
+	unsigned long timeout;
+	unsigned int good_sockets = 0, bad_sockets = 0;
+	unsigned long flags;
+	/* Maximum number of entries is 4 */
+	unsigned char new_ids[4];
+	DECLARE_COMPLETION_ONSTACK(finish_resume);
+
+	if (WARN_ON(fm->num_sockets > ARRAY_SIZE(new_ids)))
+		return -ENXIO;
+
+	pci_set_master(dev);
+
+	dev_dbg(&dev->dev, "resuming host\n");
+
+	for (rc = 0; rc < fm->num_sockets; rc++)
+		new_ids[rc] = tifm_7xx1_toggle_sock_power(
+					tifm_7xx1_sock_addr(fm->addr, rc));
+	spin_lock_irqsave(&fm->lock, flags);
+	for (rc = 0; rc < fm->num_sockets; rc++) {
+		if (fm->sockets[rc]) {
+			if (fm->sockets[rc]->type == new_ids[rc])
+				good_sockets |= 1 << rc;
+			else
+				bad_sockets |= 1 << rc;
+		}
+	}
+
+	writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+	dev_dbg(&dev->dev, "change sets on resume: good %x, bad %x\n",
+		good_sockets, bad_sockets);
+
+	fm->socket_change_set = 0;
+	if (good_sockets) {
+		fm->finish_me = &finish_resume;
+		spin_unlock_irqrestore(&fm->lock, flags);
+		timeout = wait_for_completion_timeout(&finish_resume, HZ);
+		dev_dbg(&dev->dev, "wait returned %lu\n", timeout);
+		writel(TIFM_IRQ_FIFOMASK(good_sockets)
+		       | TIFM_IRQ_CARDMASK(good_sockets),
+		       fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+		writel(TIFM_IRQ_FIFOMASK(good_sockets)
+		       | TIFM_IRQ_CARDMASK(good_sockets),
+		       fm->addr + FM_SET_INTERRUPT_ENABLE);
+		spin_lock_irqsave(&fm->lock, flags);
+		fm->finish_me = NULL;
+		fm->socket_change_set ^= good_sockets & fm->socket_change_set;
+	}
+
+	fm->socket_change_set |= bad_sockets;
+	if (fm->socket_change_set)
+		tifm_queue_work(&fm->media_switcher);
+
+	spin_unlock_irqrestore(&fm->lock, flags);
+	writel(TIFM_IRQ_ENABLE,
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+
+	return 0;
+}
+
+static int tifm_7xx1_dummy_has_ms_pif(struct tifm_adapter *fm,
+				      struct tifm_dev *sock)
+{
+	return 0;
+}
+
+static int tifm_7xx1_has_ms_pif(struct tifm_adapter *fm, struct tifm_dev *sock)
+{
+	if (((fm->num_sockets == 4) && (sock->socket_id == 2))
+	    || ((fm->num_sockets == 2) && (sock->socket_id == 0)))
+		return 1;
+
+	return 0;
+}
+
+static int tifm_7xx1_probe(struct pci_dev *dev,
+			   const struct pci_device_id *dev_id)
+{
+	struct tifm_adapter *fm;
+	int pci_dev_busy = 0;
+	int rc;
+
+	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+
+	pci_set_master(dev);
+
+	rc = pci_request_regions(dev, DRIVER_NAME);
+	if (rc) {
+		pci_dev_busy = 1;
+		goto err_out;
+	}
+
+	pci_intx(dev, 1);
+
+	fm = tifm_alloc_adapter(dev->device == PCI_DEVICE_ID_TI_XX21_XX11_FM
+				? 4 : 2, &dev->dev);
+	if (!fm) {
+		rc = -ENOMEM;
+		goto err_out_int;
+	}
+
+	INIT_WORK(&fm->media_switcher, tifm_7xx1_switch_media);
+	fm->eject = tifm_7xx1_eject;
+	fm->has_ms_pif = tifm_7xx1_has_ms_pif;
+	pci_set_drvdata(dev, fm);
+
+	fm->addr = pci_ioremap_bar(dev, 0);
+	if (!fm->addr) {
+		rc = -ENODEV;
+		goto err_out_free;
+	}
+
+	rc = request_irq(dev->irq, tifm_7xx1_isr, IRQF_SHARED, DRIVER_NAME, fm);
+	if (rc)
+		goto err_out_unmap;
+
+	rc = tifm_add_adapter(fm);
+	if (rc)
+		goto err_out_irq;
+
+	writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+	       fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+	writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1),
+	       fm->addr + FM_SET_INTERRUPT_ENABLE);
+	return 0;
+
+err_out_irq:
+	free_irq(dev->irq, fm);
+err_out_unmap:
+	iounmap(fm->addr);
+err_out_free:
+	tifm_free_adapter(fm);
+err_out_int:
+	pci_intx(dev, 0);
+	pci_release_regions(dev);
+err_out:
+	if (!pci_dev_busy)
+		pci_disable_device(dev);
+	return rc;
+}
+
+static void tifm_7xx1_remove(struct pci_dev *dev)
+{
+	struct tifm_adapter *fm = pci_get_drvdata(dev);
+	int cnt;
+
+	fm->eject = tifm_7xx1_dummy_eject;
+	fm->has_ms_pif = tifm_7xx1_dummy_has_ms_pif;
+	writel(TIFM_IRQ_SETALL, fm->addr + FM_CLEAR_INTERRUPT_ENABLE);
+	free_irq(dev->irq, fm);
+
+	tifm_remove_adapter(fm);
+
+	for (cnt = 0; cnt < fm->num_sockets; cnt++)
+		tifm_7xx1_sock_power_off(tifm_7xx1_sock_addr(fm->addr, cnt));
+
+	iounmap(fm->addr);
+	pci_intx(dev, 0);
+	pci_release_regions(dev);
+
+	pci_disable_device(dev);
+	tifm_free_adapter(fm);
+}
+
+static const struct pci_device_id tifm_7xx1_pci_tbl[] = {
+	{ PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX21_XX11_FM, PCI_ANY_ID,
+	  PCI_ANY_ID, 0, 0, 0 }, /* xx21 - the one I have */
+        { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX12_FM, PCI_ANY_ID,
+	  PCI_ANY_ID, 0, 0, 0 },
+	{ PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX20_FM, PCI_ANY_ID,
+	  PCI_ANY_ID, 0, 0, 0 },
+	{ }
+};
+
+static SIMPLE_DEV_PM_OPS(tifm_7xx1_pm_ops, tifm_7xx1_suspend, tifm_7xx1_resume);
+
+static struct pci_driver tifm_7xx1_driver = {
+	.name = DRIVER_NAME,
+	.id_table = tifm_7xx1_pci_tbl,
+	.probe = tifm_7xx1_probe,
+	.remove = tifm_7xx1_remove,
+	.driver.pm = &tifm_7xx1_pm_ops,
+};
+
+module_pci_driver(tifm_7xx1_driver);
+MODULE_AUTHOR("Alex Dubov");
+MODULE_DESCRIPTION("TI FlashMedia host driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, tifm_7xx1_pci_tbl);
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c
new file mode 100644
index 0000000000..eee9b65816
--- /dev/null
+++ b/drivers/misc/tifm_core.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  tifm_core.c - TI FlashMedia driver
+ *
+ *  Copyright (C) 2006 Alex Dubov <oakad@yahoo.com>
+ */
+
+#include <linux/tifm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/idr.h>
+#include <linux/module.h>
+
+#define DRIVER_NAME "tifm_core"
+#define DRIVER_VERSION "0.8"
+
+static struct workqueue_struct *workqueue;
+static DEFINE_IDR(tifm_adapter_idr);
+static DEFINE_SPINLOCK(tifm_adapter_lock);
+
+static const char *tifm_media_type_name(unsigned char type, unsigned char nt)
+{
+	const char *card_type_name[3][3] = {
+		{ "SmartMedia/xD", "MemoryStick", "MMC/SD" },
+		{ "XD", "MS", "SD"},
+		{ "xd", "ms", "sd"}
+	};
+
+	if (nt > 2 || type < 1 || type > 3)
+		return NULL;
+	return card_type_name[nt][type - 1];
+}
+
+static int tifm_dev_match(struct tifm_dev *sock, struct tifm_device_id *id)
+{
+	if (sock->type == id->type)
+		return 1;
+	return 0;
+}
+
+static int tifm_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *fm_drv = container_of(drv, struct tifm_driver,
+						  driver);
+	struct tifm_device_id *ids = fm_drv->id_table;
+
+	if (ids) {
+		while (ids->type) {
+			if (tifm_dev_match(sock, ids))
+				return 1;
+			++ids;
+		}
+	}
+	return 0;
+}
+
+static int tifm_uevent(const struct device *dev, struct kobj_uevent_env *env)
+{
+	const struct tifm_dev *sock = container_of_const(dev, struct tifm_dev, dev);
+
+	if (add_uevent_var(env, "TIFM_CARD_TYPE=%s", tifm_media_type_name(sock->type, 1)))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int tifm_device_probe(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+	int rc = -ENODEV;
+
+	get_device(dev);
+	if (dev->driver && drv->probe) {
+		rc = drv->probe(sock);
+		if (!rc)
+			return 0;
+	}
+	put_device(dev);
+	return rc;
+}
+
+static void tifm_dummy_event(struct tifm_dev *sock)
+{
+	return;
+}
+
+static void tifm_device_remove(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+
+	if (dev->driver && drv->remove) {
+		sock->card_event = tifm_dummy_event;
+		sock->data_event = tifm_dummy_event;
+		drv->remove(sock);
+		sock->dev.driver = NULL;
+	}
+
+	put_device(dev);
+}
+
+#ifdef CONFIG_PM
+
+static int tifm_device_suspend(struct device *dev, pm_message_t state)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+
+	if (dev->driver && drv->suspend)
+		return drv->suspend(sock, state);
+	return 0;
+}
+
+static int tifm_device_resume(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver,
+					       driver);
+
+	if (dev->driver && drv->resume)
+		return drv->resume(sock);
+	return 0;
+}
+
+#else
+
+#define tifm_device_suspend NULL
+#define tifm_device_resume NULL
+
+#endif /* CONFIG_PM */
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	return sprintf(buf, "%x", sock->type);
+}
+static DEVICE_ATTR_RO(type);
+
+static struct attribute *tifm_dev_attrs[] = {
+	&dev_attr_type.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(tifm_dev);
+
+static struct bus_type tifm_bus_type = {
+	.name      = "tifm",
+	.dev_groups = tifm_dev_groups,
+	.match     = tifm_bus_match,
+	.uevent    = tifm_uevent,
+	.probe     = tifm_device_probe,
+	.remove    = tifm_device_remove,
+	.suspend   = tifm_device_suspend,
+	.resume    = tifm_device_resume
+};
+
+static void tifm_free(struct device *dev)
+{
+	struct tifm_adapter *fm = container_of(dev, struct tifm_adapter, dev);
+
+	kfree(fm);
+}
+
+static struct class tifm_adapter_class = {
+	.name    = "tifm_adapter",
+	.dev_release = tifm_free
+};
+
+struct tifm_adapter *tifm_alloc_adapter(unsigned int num_sockets,
+					struct device *dev)
+{
+	struct tifm_adapter *fm;
+
+	fm = kzalloc(struct_size(fm, sockets, num_sockets), GFP_KERNEL);
+	if (fm) {
+		fm->dev.class = &tifm_adapter_class;
+		fm->dev.parent = dev;
+		device_initialize(&fm->dev);
+		spin_lock_init(&fm->lock);
+		fm->num_sockets = num_sockets;
+	}
+	return fm;
+}
+EXPORT_SYMBOL(tifm_alloc_adapter);
+
+int tifm_add_adapter(struct tifm_adapter *fm)
+{
+	int rc;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&tifm_adapter_lock);
+	rc = idr_alloc(&tifm_adapter_idr, fm, 0, 0, GFP_NOWAIT);
+	if (rc >= 0)
+		fm->id = rc;
+	spin_unlock(&tifm_adapter_lock);
+	idr_preload_end();
+	if (rc < 0)
+		return rc;
+
+	dev_set_name(&fm->dev, "tifm%u", fm->id);
+	rc = device_add(&fm->dev);
+	if (rc) {
+		spin_lock(&tifm_adapter_lock);
+		idr_remove(&tifm_adapter_idr, fm->id);
+		spin_unlock(&tifm_adapter_lock);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(tifm_add_adapter);
+
+void tifm_remove_adapter(struct tifm_adapter *fm)
+{
+	unsigned int cnt;
+
+	flush_workqueue(workqueue);
+	for (cnt = 0; cnt < fm->num_sockets; ++cnt) {
+		if (fm->sockets[cnt])
+			device_unregister(&fm->sockets[cnt]->dev);
+	}
+
+	spin_lock(&tifm_adapter_lock);
+	idr_remove(&tifm_adapter_idr, fm->id);
+	spin_unlock(&tifm_adapter_lock);
+	device_del(&fm->dev);
+}
+EXPORT_SYMBOL(tifm_remove_adapter);
+
+void tifm_free_adapter(struct tifm_adapter *fm)
+{
+	put_device(&fm->dev);
+}
+EXPORT_SYMBOL(tifm_free_adapter);
+
+void tifm_free_device(struct device *dev)
+{
+	struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev);
+	kfree(sock);
+}
+EXPORT_SYMBOL(tifm_free_device);
+
+struct tifm_dev *tifm_alloc_device(struct tifm_adapter *fm, unsigned int id,
+				   unsigned char type)
+{
+	struct tifm_dev *sock = NULL;
+
+	if (!tifm_media_type_name(type, 0))
+		return sock;
+
+	sock = kzalloc(sizeof(struct tifm_dev), GFP_KERNEL);
+	if (sock) {
+		spin_lock_init(&sock->lock);
+		sock->type = type;
+		sock->socket_id = id;
+		sock->card_event = tifm_dummy_event;
+		sock->data_event = tifm_dummy_event;
+
+		sock->dev.parent = fm->dev.parent;
+		sock->dev.bus = &tifm_bus_type;
+		sock->dev.dma_mask = fm->dev.parent->dma_mask;
+		sock->dev.release = tifm_free_device;
+
+		dev_set_name(&sock->dev, "tifm_%s%u:%u",
+			     tifm_media_type_name(type, 2), fm->id, id);
+		printk(KERN_INFO DRIVER_NAME
+		       ": %s card detected in socket %u:%u\n",
+		       tifm_media_type_name(type, 0), fm->id, id);
+	}
+	return sock;
+}
+EXPORT_SYMBOL(tifm_alloc_device);
+
+void tifm_eject(struct tifm_dev *sock)
+{
+	struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent);
+	fm->eject(fm, sock);
+}
+EXPORT_SYMBOL(tifm_eject);
+
+int tifm_has_ms_pif(struct tifm_dev *sock)
+{
+	struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent);
+	return fm->has_ms_pif(fm, sock);
+}
+EXPORT_SYMBOL(tifm_has_ms_pif);
+
+int tifm_map_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
+		int direction)
+{
+	return dma_map_sg(&to_pci_dev(sock->dev.parent)->dev, sg, nents,
+			  direction);
+}
+EXPORT_SYMBOL(tifm_map_sg);
+
+void tifm_unmap_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents,
+		   int direction)
+{
+	dma_unmap_sg(&to_pci_dev(sock->dev.parent)->dev, sg, nents, direction);
+}
+EXPORT_SYMBOL(tifm_unmap_sg);
+
+void tifm_queue_work(struct work_struct *work)
+{
+	queue_work(workqueue, work);
+}
+EXPORT_SYMBOL(tifm_queue_work);
+
+int tifm_register_driver(struct tifm_driver *drv)
+{
+	drv->driver.bus = &tifm_bus_type;
+
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(tifm_register_driver);
+
+void tifm_unregister_driver(struct tifm_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(tifm_unregister_driver);
+
+static int __init tifm_init(void)
+{
+	int rc;
+
+	workqueue = create_freezable_workqueue("tifm");
+	if (!workqueue)
+		return -ENOMEM;
+
+	rc = bus_register(&tifm_bus_type);
+
+	if (rc)
+		goto err_out_wq;
+
+	rc = class_register(&tifm_adapter_class);
+	if (!rc)
+		return 0;
+
+	bus_unregister(&tifm_bus_type);
+
+err_out_wq:
+	destroy_workqueue(workqueue);
+
+	return rc;
+}
+
+static void __exit tifm_exit(void)
+{
+	class_unregister(&tifm_adapter_class);
+	bus_unregister(&tifm_bus_type);
+	destroy_workqueue(workqueue);
+}
+
+subsys_initcall(tifm_init);
+module_exit(tifm_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alex Dubov");
+MODULE_DESCRIPTION("TI FlashMedia core driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/tps6594-esm.c b/drivers/misc/tps6594-esm.c
new file mode 100644
index 0000000000..b4d67a1a24
--- /dev/null
+++ b/drivers/misc/tps6594-esm.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ESM (Error Signal Monitor) driver for TI TPS6594/TPS6593/LP8764 PMICs
+ *
+ * Copyright (C) 2023 BayLibre Incorporated - https://www.baylibre.com/
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/tps6594.h>
+
+#define TPS6594_DEV_REV_1 0x08
+
+static irqreturn_t tps6594_esm_isr(int irq, void *dev_id)
+{
+	struct platform_device *pdev = dev_id;
+	int i;
+
+	for (i = 0 ; i < pdev->num_resources ; i++) {
+		if (irq == platform_get_irq_byname(pdev, pdev->resource[i].name)) {
+			dev_err(pdev->dev.parent, "%s error detected\n", pdev->resource[i].name);
+			return IRQ_HANDLED;
+		}
+	}
+
+	return IRQ_NONE;
+}
+
+static int tps6594_esm_probe(struct platform_device *pdev)
+{
+	struct tps6594 *tps = dev_get_drvdata(pdev->dev.parent);
+	struct device *dev = &pdev->dev;
+	unsigned int rev;
+	int irq;
+	int ret;
+	int i;
+
+	/*
+	 * Due to a bug in revision 1 of the PMIC, the GPIO3 used for the
+	 * SoC ESM function is used to power the load switch instead.
+	 * As a consequence, ESM can not be used on those PMIC.
+	 * Check the version and return an error in case of revision 1.
+	 */
+	ret = regmap_read(tps->regmap, TPS6594_REG_DEV_REV, &rev);
+	if (ret)
+		return dev_err_probe(dev, ret,
+				     "Failed to read PMIC revision\n");
+	if (rev == TPS6594_DEV_REV_1)
+		return dev_err_probe(dev, -ENODEV,
+			      "ESM not supported for revision 1 PMIC\n");
+
+	for (i = 0; i < pdev->num_resources; i++) {
+		irq = platform_get_irq_byname(pdev, pdev->resource[i].name);
+		if (irq < 0)
+			return irq;
+
+		ret = devm_request_threaded_irq(dev, irq, NULL,
+						tps6594_esm_isr, IRQF_ONESHOT,
+						pdev->resource[i].name, pdev);
+		if (ret)
+			return dev_err_probe(dev, ret, "Failed to request irq\n");
+	}
+
+	ret = regmap_set_bits(tps->regmap, TPS6594_REG_ESM_SOC_MODE_CFG,
+			      TPS6594_BIT_ESM_SOC_EN | TPS6594_BIT_ESM_SOC_ENDRV);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to configure ESM\n");
+
+	ret = regmap_set_bits(tps->regmap, TPS6594_REG_ESM_SOC_START_REG,
+			      TPS6594_BIT_ESM_SOC_START);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to start ESM\n");
+
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+
+	return 0;
+}
+
+static void tps6594_esm_remove(struct platform_device *pdev)
+{
+	struct tps6594 *tps = dev_get_drvdata(pdev->dev.parent);
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	ret = regmap_clear_bits(tps->regmap, TPS6594_REG_ESM_SOC_START_REG,
+				TPS6594_BIT_ESM_SOC_START);
+	if (ret) {
+		dev_err(dev, "Failed to stop ESM\n");
+		goto out;
+	}
+
+	ret = regmap_clear_bits(tps->regmap, TPS6594_REG_ESM_SOC_MODE_CFG,
+				TPS6594_BIT_ESM_SOC_EN | TPS6594_BIT_ESM_SOC_ENDRV);
+	if (ret)
+		dev_err(dev, "Failed to unconfigure ESM\n");
+
+out:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+}
+
+static int tps6594_esm_suspend(struct device *dev)
+{
+	struct tps6594 *tps = dev_get_drvdata(dev->parent);
+	int ret;
+
+	ret = regmap_clear_bits(tps->regmap, TPS6594_REG_ESM_SOC_START_REG,
+				TPS6594_BIT_ESM_SOC_START);
+
+	pm_runtime_put_sync(dev);
+
+	return ret;
+}
+
+static int tps6594_esm_resume(struct device *dev)
+{
+	struct tps6594 *tps = dev_get_drvdata(dev->parent);
+
+	pm_runtime_get_sync(dev);
+
+	return regmap_set_bits(tps->regmap, TPS6594_REG_ESM_SOC_START_REG,
+			       TPS6594_BIT_ESM_SOC_START);
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(tps6594_esm_pm_ops, tps6594_esm_suspend, tps6594_esm_resume);
+
+static struct platform_driver tps6594_esm_driver = {
+	.driver	= {
+		.name = "tps6594-esm",
+		.pm = pm_sleep_ptr(&tps6594_esm_pm_ops),
+	},
+	.probe = tps6594_esm_probe,
+	.remove_new = tps6594_esm_remove,
+};
+
+module_platform_driver(tps6594_esm_driver);
+
+MODULE_ALIAS("platform:tps6594-esm");
+MODULE_AUTHOR("Julien Panis <jpanis@baylibre.com>");
+MODULE_DESCRIPTION("TPS6594 Error Signal Monitor Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/tps6594-pfsm.c b/drivers/misc/tps6594-pfsm.c
new file mode 100644
index 0000000000..88dcac8148
--- /dev/null
+++ b/drivers/misc/tps6594-pfsm.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PFSM (Pre-configurable Finite State Machine) driver for TI TPS6594/TPS6593/LP8764 PMICs
+ *
+ * Copyright (C) 2023 BayLibre Incorporated - https://www.baylibre.com/
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/ioctl.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/tps6594.h>
+
+#include <linux/tps6594_pfsm.h>
+
+#define TPS6594_STARTUP_DEST_MCU_ONLY_VAL 2
+#define TPS6594_STARTUP_DEST_ACTIVE_VAL   3
+#define TPS6594_STARTUP_DEST_SHIFT	  5
+#define TPS6594_STARTUP_DEST_MCU_ONLY	  (TPS6594_STARTUP_DEST_MCU_ONLY_VAL \
+					   << TPS6594_STARTUP_DEST_SHIFT)
+#define TPS6594_STARTUP_DEST_ACTIVE	  (TPS6594_STARTUP_DEST_ACTIVE_VAL \
+					   << TPS6594_STARTUP_DEST_SHIFT)
+
+/*
+ * To update the PMIC firmware, the user must be able to access
+ * page 0 (user registers) and page 1 (NVM control and configuration).
+ */
+#define TPS6594_PMIC_MAX_POS 0x200
+
+#define TPS6594_FILE_TO_PFSM(f) container_of((f)->private_data, struct tps6594_pfsm, miscdev)
+
+/**
+ * struct tps6594_pfsm - device private data structure
+ *
+ * @miscdev: misc device infos
+ * @regmap:  regmap for accessing the device registers
+ */
+struct tps6594_pfsm {
+	struct miscdevice miscdev;
+	struct regmap *regmap;
+};
+
+static ssize_t tps6594_pfsm_read(struct file *f, char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct tps6594_pfsm *pfsm = TPS6594_FILE_TO_PFSM(f);
+	loff_t pos = *ppos;
+	unsigned int val;
+	int ret;
+	int i;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= TPS6594_PMIC_MAX_POS)
+		return 0;
+	if (count > TPS6594_PMIC_MAX_POS - pos)
+		count = TPS6594_PMIC_MAX_POS - pos;
+
+	for (i = 0 ; i < count ; i++) {
+		ret = regmap_read(pfsm->regmap, pos + i, &val);
+		if (ret)
+			return ret;
+
+		if (put_user(val, buf + i))
+			return -EFAULT;
+	}
+
+	*ppos = pos + count;
+
+	return count;
+}
+
+static ssize_t tps6594_pfsm_write(struct file *f, const char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct tps6594_pfsm *pfsm = TPS6594_FILE_TO_PFSM(f);
+	loff_t pos = *ppos;
+	char val;
+	int ret;
+	int i;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= TPS6594_PMIC_MAX_POS || !count)
+		return 0;
+	if (count > TPS6594_PMIC_MAX_POS - pos)
+		count = TPS6594_PMIC_MAX_POS - pos;
+
+	for (i = 0 ; i < count ; i++) {
+		if (get_user(val, buf + i))
+			return -EFAULT;
+
+		ret = regmap_write(pfsm->regmap, pos + i, val);
+		if (ret)
+			return ret;
+	}
+
+	*ppos = pos + count;
+
+	return count;
+}
+
+static int tps6594_pfsm_configure_ret_trig(struct regmap *regmap, u8 gpio_ret, u8 ddr_ret)
+{
+	int ret;
+
+	if (gpio_ret)
+		ret = regmap_set_bits(regmap, TPS6594_REG_FSM_I2C_TRIGGERS,
+				      TPS6594_BIT_TRIGGER_I2C(5) | TPS6594_BIT_TRIGGER_I2C(6));
+	else
+		ret = regmap_clear_bits(regmap, TPS6594_REG_FSM_I2C_TRIGGERS,
+					TPS6594_BIT_TRIGGER_I2C(5) | TPS6594_BIT_TRIGGER_I2C(6));
+	if (ret)
+		return ret;
+
+	if (ddr_ret)
+		ret = regmap_set_bits(regmap, TPS6594_REG_FSM_I2C_TRIGGERS,
+				      TPS6594_BIT_TRIGGER_I2C(7));
+	else
+		ret = regmap_clear_bits(regmap, TPS6594_REG_FSM_I2C_TRIGGERS,
+					TPS6594_BIT_TRIGGER_I2C(7));
+
+	return ret;
+}
+
+static long tps6594_pfsm_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	struct tps6594_pfsm *pfsm = TPS6594_FILE_TO_PFSM(f);
+	struct pmic_state_opt state_opt;
+	void __user *argp = (void __user *)arg;
+	int ret = -ENOIOCTLCMD;
+
+	switch (cmd) {
+	case PMIC_GOTO_STANDBY:
+		/* Disable LP mode */
+		ret = regmap_clear_bits(pfsm->regmap, TPS6594_REG_RTC_CTRL_2,
+					TPS6594_BIT_LP_STANDBY_SEL);
+		if (ret)
+			return ret;
+
+		/* Force trigger */
+		ret = regmap_write_bits(pfsm->regmap, TPS6594_REG_FSM_I2C_TRIGGERS,
+					TPS6594_BIT_TRIGGER_I2C(0), TPS6594_BIT_TRIGGER_I2C(0));
+		break;
+	case PMIC_GOTO_LP_STANDBY:
+		/* Enable LP mode */
+		ret = regmap_set_bits(pfsm->regmap, TPS6594_REG_RTC_CTRL_2,
+				      TPS6594_BIT_LP_STANDBY_SEL);
+		if (ret)
+			return ret;
+
+		/* Force trigger */
+		ret = regmap_write_bits(pfsm->regmap, TPS6594_REG_FSM_I2C_TRIGGERS,
+					TPS6594_BIT_TRIGGER_I2C(0), TPS6594_BIT_TRIGGER_I2C(0));
+		break;
+	case PMIC_UPDATE_PGM:
+		/* Force trigger */
+		ret = regmap_write_bits(pfsm->regmap, TPS6594_REG_FSM_I2C_TRIGGERS,
+					TPS6594_BIT_TRIGGER_I2C(3), TPS6594_BIT_TRIGGER_I2C(3));
+		break;
+	case PMIC_SET_ACTIVE_STATE:
+		/* Modify NSLEEP1-2 bits */
+		ret = regmap_set_bits(pfsm->regmap, TPS6594_REG_FSM_NSLEEP_TRIGGERS,
+				      TPS6594_BIT_NSLEEP1B | TPS6594_BIT_NSLEEP2B);
+		break;
+	case PMIC_SET_MCU_ONLY_STATE:
+		if (copy_from_user(&state_opt, argp, sizeof(state_opt)))
+			return -EFAULT;
+
+		/* Configure retention triggers */
+		ret = tps6594_pfsm_configure_ret_trig(pfsm->regmap, state_opt.gpio_retention,
+						      state_opt.ddr_retention);
+		if (ret)
+			return ret;
+
+		/* Modify NSLEEP1-2 bits */
+		ret = regmap_clear_bits(pfsm->regmap, TPS6594_REG_FSM_NSLEEP_TRIGGERS,
+					TPS6594_BIT_NSLEEP1B);
+		if (ret)
+			return ret;
+
+		ret = regmap_set_bits(pfsm->regmap, TPS6594_REG_FSM_NSLEEP_TRIGGERS,
+				      TPS6594_BIT_NSLEEP2B);
+		break;
+	case PMIC_SET_RETENTION_STATE:
+		if (copy_from_user(&state_opt, argp, sizeof(state_opt)))
+			return -EFAULT;
+
+		/* Configure wake-up destination */
+		if (state_opt.mcu_only_startup_dest)
+			ret = regmap_write_bits(pfsm->regmap, TPS6594_REG_RTC_CTRL_2,
+						TPS6594_MASK_STARTUP_DEST,
+						TPS6594_STARTUP_DEST_MCU_ONLY);
+		else
+			ret = regmap_write_bits(pfsm->regmap, TPS6594_REG_RTC_CTRL_2,
+						TPS6594_MASK_STARTUP_DEST,
+						TPS6594_STARTUP_DEST_ACTIVE);
+		if (ret)
+			return ret;
+
+		/* Configure retention triggers */
+		ret = tps6594_pfsm_configure_ret_trig(pfsm->regmap, state_opt.gpio_retention,
+						      state_opt.ddr_retention);
+		if (ret)
+			return ret;
+
+		/* Modify NSLEEP1-2 bits */
+		ret = regmap_clear_bits(pfsm->regmap, TPS6594_REG_FSM_NSLEEP_TRIGGERS,
+					TPS6594_BIT_NSLEEP2B);
+		break;
+	}
+
+	return ret;
+}
+
+static const struct file_operations tps6594_pfsm_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= generic_file_llseek,
+	.read		= tps6594_pfsm_read,
+	.write		= tps6594_pfsm_write,
+	.unlocked_ioctl	= tps6594_pfsm_ioctl,
+	.compat_ioctl   = compat_ptr_ioctl,
+};
+
+static irqreturn_t tps6594_pfsm_isr(int irq, void *dev_id)
+{
+	struct platform_device *pdev = dev_id;
+	int i;
+
+	for (i = 0 ; i < pdev->num_resources ; i++) {
+		if (irq == platform_get_irq_byname(pdev, pdev->resource[i].name)) {
+			dev_err(pdev->dev.parent, "%s event detected\n", pdev->resource[i].name);
+			return IRQ_HANDLED;
+		}
+	}
+
+	return IRQ_NONE;
+}
+
+static int tps6594_pfsm_probe(struct platform_device *pdev)
+{
+	struct tps6594_pfsm *pfsm;
+	struct tps6594 *tps = dev_get_drvdata(pdev->dev.parent);
+	struct device *dev = &pdev->dev;
+	int irq;
+	int ret;
+	int i;
+
+	pfsm = devm_kzalloc(dev, sizeof(struct tps6594_pfsm), GFP_KERNEL);
+	if (!pfsm)
+		return -ENOMEM;
+
+	pfsm->regmap = tps->regmap;
+
+	pfsm->miscdev.minor = MISC_DYNAMIC_MINOR;
+	pfsm->miscdev.name = devm_kasprintf(dev, GFP_KERNEL, "pfsm-%ld-0x%02x",
+					    tps->chip_id, tps->reg);
+	pfsm->miscdev.fops = &tps6594_pfsm_fops;
+	pfsm->miscdev.parent = dev->parent;
+
+	for (i = 0 ; i < pdev->num_resources ; i++) {
+		irq = platform_get_irq_byname(pdev, pdev->resource[i].name);
+		if (irq < 0)
+			return irq;
+
+		ret = devm_request_threaded_irq(dev, irq, NULL,
+						tps6594_pfsm_isr, IRQF_ONESHOT,
+						pdev->resource[i].name, pdev);
+		if (ret)
+			return dev_err_probe(dev, ret, "Failed to request irq\n");
+	}
+
+	platform_set_drvdata(pdev, pfsm);
+
+	return misc_register(&pfsm->miscdev);
+}
+
+static void tps6594_pfsm_remove(struct platform_device *pdev)
+{
+	struct tps6594_pfsm *pfsm = platform_get_drvdata(pdev);
+
+	misc_deregister(&pfsm->miscdev);
+}
+
+static struct platform_driver tps6594_pfsm_driver = {
+	.driver	= {
+		.name = "tps6594-pfsm",
+	},
+	.probe = tps6594_pfsm_probe,
+	.remove_new = tps6594_pfsm_remove,
+};
+
+module_platform_driver(tps6594_pfsm_driver);
+
+MODULE_ALIAS("platform:tps6594-pfsm");
+MODULE_AUTHOR("Julien Panis <jpanis@baylibre.com>");
+MODULE_DESCRIPTION("TPS6594 Pre-configurable Finite State Machine Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c
new file mode 100644
index 0000000000..a3bc282314
--- /dev/null
+++ b/drivers/misc/tsl2550.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  tsl2550.c - Linux kernel modules for ambient light sensor
+ *
+ *  Copyright (C) 2007 Rodolfo Giometti <giometti@linux.it>
+ *  Copyright (C) 2007 Eurotech S.p.A. <info@eurotech.it>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+#define TSL2550_DRV_NAME	"tsl2550"
+#define DRIVER_VERSION		"1.2"
+
+/*
+ * Defines
+ */
+
+#define TSL2550_POWER_DOWN		0x00
+#define TSL2550_POWER_UP		0x03
+#define TSL2550_STANDARD_RANGE		0x18
+#define TSL2550_EXTENDED_RANGE		0x1d
+#define TSL2550_READ_ADC0		0x43
+#define TSL2550_READ_ADC1		0x83
+
+/*
+ * Structs
+ */
+
+struct tsl2550_data {
+	struct i2c_client *client;
+	struct mutex update_lock;
+
+	unsigned int power_state:1;
+	unsigned int operating_mode:1;
+};
+
+/*
+ * Global data
+ */
+
+static const u8 TSL2550_MODE_RANGE[2] = {
+	TSL2550_STANDARD_RANGE, TSL2550_EXTENDED_RANGE,
+};
+
+/*
+ * Management functions
+ */
+
+static int tsl2550_set_operating_mode(struct i2c_client *client, int mode)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+
+	int ret = i2c_smbus_write_byte(client, TSL2550_MODE_RANGE[mode]);
+
+	data->operating_mode = mode;
+
+	return ret;
+}
+
+static int tsl2550_set_power_state(struct i2c_client *client, int state)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	int ret;
+
+	if (state == 0)
+		ret = i2c_smbus_write_byte(client, TSL2550_POWER_DOWN);
+	else {
+		ret = i2c_smbus_write_byte(client, TSL2550_POWER_UP);
+
+		/* On power up we should reset operating mode also... */
+		tsl2550_set_operating_mode(client, data->operating_mode);
+	}
+
+	data->power_state = state;
+
+	return ret;
+}
+
+static int tsl2550_get_adc_value(struct i2c_client *client, u8 cmd)
+{
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, cmd);
+	if (ret < 0)
+		return ret;
+	if (!(ret & 0x80))
+		return -EAGAIN;
+	return ret & 0x7f;	/* remove the "valid" bit */
+}
+
+/*
+ * LUX calculation
+ */
+
+#define	TSL2550_MAX_LUX		1846
+
+static const u8 ratio_lut[] = {
+	100, 100, 100, 100, 100, 100, 100, 100,
+	100, 100, 100, 100, 100, 100, 99, 99,
+	99, 99, 99, 99, 99, 99, 99, 99,
+	99, 99, 99, 98, 98, 98, 98, 98,
+	98, 98, 97, 97, 97, 97, 97, 96,
+	96, 96, 96, 95, 95, 95, 94, 94,
+	93, 93, 93, 92, 92, 91, 91, 90,
+	89, 89, 88, 87, 87, 86, 85, 84,
+	83, 82, 81, 80, 79, 78, 77, 75,
+	74, 73, 71, 69, 68, 66, 64, 62,
+	60, 58, 56, 54, 52, 49, 47, 44,
+	42, 41, 40, 40, 39, 39, 38, 38,
+	37, 37, 37, 36, 36, 36, 35, 35,
+	35, 35, 34, 34, 34, 34, 33, 33,
+	33, 33, 32, 32, 32, 32, 32, 31,
+	31, 31, 31, 31, 30, 30, 30, 30,
+	30,
+};
+
+static const u16 count_lut[] = {
+	0, 1, 2, 3, 4, 5, 6, 7,
+	8, 9, 10, 11, 12, 13, 14, 15,
+	16, 18, 20, 22, 24, 26, 28, 30,
+	32, 34, 36, 38, 40, 42, 44, 46,
+	49, 53, 57, 61, 65, 69, 73, 77,
+	81, 85, 89, 93, 97, 101, 105, 109,
+	115, 123, 131, 139, 147, 155, 163, 171,
+	179, 187, 195, 203, 211, 219, 227, 235,
+	247, 263, 279, 295, 311, 327, 343, 359,
+	375, 391, 407, 423, 439, 455, 471, 487,
+	511, 543, 575, 607, 639, 671, 703, 735,
+	767, 799, 831, 863, 895, 927, 959, 991,
+	1039, 1103, 1167, 1231, 1295, 1359, 1423, 1487,
+	1551, 1615, 1679, 1743, 1807, 1871, 1935, 1999,
+	2095, 2223, 2351, 2479, 2607, 2735, 2863, 2991,
+	3119, 3247, 3375, 3503, 3631, 3759, 3887, 4015,
+};
+
+/*
+ * This function is described into Taos TSL2550 Designer's Notebook
+ * pages 2, 3.
+ */
+static int tsl2550_calculate_lux(u8 ch0, u8 ch1)
+{
+	unsigned int lux;
+
+	/* Look up count from channel values */
+	u16 c0 = count_lut[ch0];
+	u16 c1 = count_lut[ch1];
+
+	/* Avoid division by 0 and count 1 cannot be greater than count 0 */
+	if (c1 <= c0)
+		if (c0) {
+			/*
+			 * Calculate ratio.
+			 * Note: the "128" is a scaling factor
+			 */
+			u8 r = c1 * 128 / c0;
+
+			/* Calculate LUX */
+			lux = ((c0 - c1) * ratio_lut[r]) / 256;
+		} else
+			lux = 0;
+	else
+		return 0;
+
+	/* LUX range check */
+	return lux > TSL2550_MAX_LUX ? TSL2550_MAX_LUX : lux;
+}
+
+/*
+ * SysFS support
+ */
+
+static ssize_t tsl2550_show_power_state(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(to_i2c_client(dev));
+
+	return sprintf(buf, "%u\n", data->power_state);
+}
+
+static ssize_t tsl2550_store_power_state(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	unsigned long val = simple_strtoul(buf, NULL, 10);
+	int ret;
+
+	if (val > 1)
+		return -EINVAL;
+
+	mutex_lock(&data->update_lock);
+	ret = tsl2550_set_power_state(client, val);
+	mutex_unlock(&data->update_lock);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
+		   tsl2550_show_power_state, tsl2550_store_power_state);
+
+static ssize_t tsl2550_show_operating_mode(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(to_i2c_client(dev));
+
+	return sprintf(buf, "%u\n", data->operating_mode);
+}
+
+static ssize_t tsl2550_store_operating_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	unsigned long val = simple_strtoul(buf, NULL, 10);
+	int ret;
+
+	if (val > 1)
+		return -EINVAL;
+
+	if (data->power_state == 0)
+		return -EBUSY;
+
+	mutex_lock(&data->update_lock);
+	ret = tsl2550_set_operating_mode(client, val);
+	mutex_unlock(&data->update_lock);
+
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(operating_mode, S_IWUSR | S_IRUGO,
+		   tsl2550_show_operating_mode, tsl2550_store_operating_mode);
+
+static ssize_t __tsl2550_show_lux(struct i2c_client *client, char *buf)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	u8 ch0, ch1;
+	int ret;
+
+	ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC0);
+	if (ret < 0)
+		return ret;
+	ch0 = ret;
+
+	ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC1);
+	if (ret < 0)
+		return ret;
+	ch1 = ret;
+
+	/* Do the job */
+	ret = tsl2550_calculate_lux(ch0, ch1);
+	if (ret < 0)
+		return ret;
+	if (data->operating_mode == 1)
+		ret *= 5;
+
+	return sprintf(buf, "%d\n", ret);
+}
+
+static ssize_t tsl2550_show_lux1_input(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	int ret;
+
+	/* No LUX data if not operational */
+	if (!data->power_state)
+		return -EBUSY;
+
+	mutex_lock(&data->update_lock);
+	ret = __tsl2550_show_lux(client, buf);
+	mutex_unlock(&data->update_lock);
+
+	return ret;
+}
+
+static DEVICE_ATTR(lux1_input, S_IRUGO,
+		   tsl2550_show_lux1_input, NULL);
+
+static struct attribute *tsl2550_attributes[] = {
+	&dev_attr_power_state.attr,
+	&dev_attr_operating_mode.attr,
+	&dev_attr_lux1_input.attr,
+	NULL
+};
+
+static const struct attribute_group tsl2550_attr_group = {
+	.attrs = tsl2550_attributes,
+};
+
+/*
+ * Initialization function
+ */
+
+static int tsl2550_init_client(struct i2c_client *client)
+{
+	struct tsl2550_data *data = i2c_get_clientdata(client);
+	int err;
+
+	/*
+	 * Probe the chip. To do so we try to power up the device and then to
+	 * read back the 0x03 code
+	 */
+	err = i2c_smbus_read_byte_data(client, TSL2550_POWER_UP);
+	if (err < 0)
+		return err;
+	if (err != TSL2550_POWER_UP)
+		return -ENODEV;
+	data->power_state = 1;
+
+	/* Set the default operating mode */
+	err = i2c_smbus_write_byte(client,
+				   TSL2550_MODE_RANGE[data->operating_mode]);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+/*
+ * I2C init/probing/exit functions
+ */
+
+static struct i2c_driver tsl2550_driver;
+static int tsl2550_probe(struct i2c_client *client)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	struct tsl2550_data *data;
+	int *opmode, err = 0;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE
+					    | I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		err = -EIO;
+		goto exit;
+	}
+
+	data = kzalloc(sizeof(struct tsl2550_data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto exit;
+	}
+	data->client = client;
+	i2c_set_clientdata(client, data);
+
+	/* Check platform data */
+	opmode = client->dev.platform_data;
+	if (opmode) {
+		if (*opmode < 0 || *opmode > 1) {
+			dev_err(&client->dev, "invalid operating_mode (%d)\n",
+					*opmode);
+			err = -EINVAL;
+			goto exit_kfree;
+		}
+		data->operating_mode = *opmode;
+	} else
+		data->operating_mode = 0;	/* default mode is standard */
+	dev_info(&client->dev, "%s operating mode\n",
+			data->operating_mode ? "extended" : "standard");
+
+	mutex_init(&data->update_lock);
+
+	/* Initialize the TSL2550 chip */
+	err = tsl2550_init_client(client);
+	if (err)
+		goto exit_kfree;
+
+	/* Register sysfs hooks */
+	err = sysfs_create_group(&client->dev.kobj, &tsl2550_attr_group);
+	if (err)
+		goto exit_kfree;
+
+	dev_info(&client->dev, "support ver. %s enabled\n", DRIVER_VERSION);
+
+	return 0;
+
+exit_kfree:
+	kfree(data);
+exit:
+	return err;
+}
+
+static void tsl2550_remove(struct i2c_client *client)
+{
+	sysfs_remove_group(&client->dev.kobj, &tsl2550_attr_group);
+
+	/* Power down the device */
+	tsl2550_set_power_state(client, 0);
+
+	kfree(i2c_get_clientdata(client));
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static int tsl2550_suspend(struct device *dev)
+{
+	return tsl2550_set_power_state(to_i2c_client(dev), 0);
+}
+
+static int tsl2550_resume(struct device *dev)
+{
+	return tsl2550_set_power_state(to_i2c_client(dev), 1);
+}
+
+static SIMPLE_DEV_PM_OPS(tsl2550_pm_ops, tsl2550_suspend, tsl2550_resume);
+#define TSL2550_PM_OPS (&tsl2550_pm_ops)
+
+#else
+
+#define TSL2550_PM_OPS NULL
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct i2c_device_id tsl2550_id[] = {
+	{ "tsl2550", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tsl2550_id);
+
+static const struct of_device_id tsl2550_of_match[] = {
+	{ .compatible = "taos,tsl2550" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tsl2550_of_match);
+
+static struct i2c_driver tsl2550_driver = {
+	.driver = {
+		.name	= TSL2550_DRV_NAME,
+		.of_match_table = tsl2550_of_match,
+		.pm	= TSL2550_PM_OPS,
+	},
+	.probe = tsl2550_probe,
+	.remove	= tsl2550_remove,
+	.id_table = tsl2550_id,
+};
+
+module_i2c_driver(tsl2550_driver);
+
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("TSL2550 ambient light sensor driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
diff --git a/drivers/misc/uacce/Kconfig b/drivers/misc/uacce/Kconfig
new file mode 100644
index 0000000000..5e39b6083b
--- /dev/null
+++ b/drivers/misc/uacce/Kconfig
@@ -0,0 +1,13 @@
+config UACCE
+	tristate "Accelerator Framework for User Land"
+	depends on IOMMU_API
+	help
+	  UACCE provides interface for the user process to access the hardware
+	  without interaction with the kernel space in data path.
+
+	  The user-space interface is described in
+	  include/uapi/misc/uacce/uacce.h
+
+	  See Documentation/misc-devices/uacce.rst for more details.
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/misc/uacce/Makefile b/drivers/misc/uacce/Makefile
new file mode 100644
index 0000000000..5b4374e8b5
--- /dev/null
+++ b/drivers/misc/uacce/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+obj-$(CONFIG_UACCE) += uacce.o
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
new file mode 100644
index 0000000000..930c252753
--- /dev/null
+++ b/drivers/misc/uacce/uacce.c
@@ -0,0 +1,648 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/compat.h>
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/uacce.h>
+
+static struct class *uacce_class;
+static dev_t uacce_devt;
+static DEFINE_XARRAY_ALLOC(uacce_xa);
+
+/*
+ * If the parent driver or the device disappears, the queue state is invalid and
+ * ops are not usable anymore.
+ */
+static bool uacce_queue_is_valid(struct uacce_queue *q)
+{
+	return q->state == UACCE_Q_INIT || q->state == UACCE_Q_STARTED;
+}
+
+static int uacce_start_queue(struct uacce_queue *q)
+{
+	int ret;
+
+	if (q->state != UACCE_Q_INIT)
+		return -EINVAL;
+
+	if (q->uacce->ops->start_queue) {
+		ret = q->uacce->ops->start_queue(q);
+		if (ret < 0)
+			return ret;
+	}
+
+	q->state = UACCE_Q_STARTED;
+	return 0;
+}
+
+static int uacce_put_queue(struct uacce_queue *q)
+{
+	struct uacce_device *uacce = q->uacce;
+
+	if ((q->state == UACCE_Q_STARTED) && uacce->ops->stop_queue)
+		uacce->ops->stop_queue(q);
+
+	if ((q->state == UACCE_Q_INIT || q->state == UACCE_Q_STARTED) &&
+	     uacce->ops->put_queue)
+		uacce->ops->put_queue(q);
+
+	q->state = UACCE_Q_ZOMBIE;
+
+	return 0;
+}
+
+static long uacce_fops_unl_ioctl(struct file *filep,
+				 unsigned int cmd, unsigned long arg)
+{
+	struct uacce_queue *q = filep->private_data;
+	struct uacce_device *uacce = q->uacce;
+	long ret = -ENXIO;
+
+	/*
+	 * uacce->ops->ioctl() may take the mmap_lock when copying arg to/from
+	 * user. Avoid a circular lock dependency with uacce_fops_mmap(), which
+	 * gets called with mmap_lock held, by taking uacce->mutex instead of
+	 * q->mutex. Doing this in uacce_fops_mmap() is not possible because
+	 * uacce_fops_open() calls iommu_sva_bind_device(), which takes
+	 * mmap_lock, while holding uacce->mutex.
+	 */
+	mutex_lock(&uacce->mutex);
+	if (!uacce_queue_is_valid(q))
+		goto out_unlock;
+
+	switch (cmd) {
+	case UACCE_CMD_START_Q:
+		ret = uacce_start_queue(q);
+		break;
+	case UACCE_CMD_PUT_Q:
+		ret = uacce_put_queue(q);
+		break;
+	default:
+		if (uacce->ops->ioctl)
+			ret = uacce->ops->ioctl(q, cmd, arg);
+		else
+			ret = -EINVAL;
+	}
+out_unlock:
+	mutex_unlock(&uacce->mutex);
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long uacce_fops_compat_ioctl(struct file *filep,
+				   unsigned int cmd, unsigned long arg)
+{
+	arg = (unsigned long)compat_ptr(arg);
+
+	return uacce_fops_unl_ioctl(filep, cmd, arg);
+}
+#endif
+
+static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
+{
+	u32 pasid;
+	struct iommu_sva *handle;
+
+	if (!(uacce->flags & UACCE_DEV_SVA))
+		return 0;
+
+	handle = iommu_sva_bind_device(uacce->parent, current->mm);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	pasid = iommu_sva_get_pasid(handle);
+	if (pasid == IOMMU_PASID_INVALID) {
+		iommu_sva_unbind_device(handle);
+		return -ENODEV;
+	}
+
+	q->handle = handle;
+	q->pasid = pasid;
+	return 0;
+}
+
+static void uacce_unbind_queue(struct uacce_queue *q)
+{
+	if (!q->handle)
+		return;
+	iommu_sva_unbind_device(q->handle);
+	q->handle = NULL;
+}
+
+static int uacce_fops_open(struct inode *inode, struct file *filep)
+{
+	struct uacce_device *uacce;
+	struct uacce_queue *q;
+	int ret;
+
+	uacce = xa_load(&uacce_xa, iminor(inode));
+	if (!uacce)
+		return -ENODEV;
+
+	q = kzalloc(sizeof(struct uacce_queue), GFP_KERNEL);
+	if (!q)
+		return -ENOMEM;
+
+	mutex_lock(&uacce->mutex);
+
+	if (!uacce->parent) {
+		ret = -EINVAL;
+		goto out_with_mem;
+	}
+
+	ret = uacce_bind_queue(uacce, q);
+	if (ret)
+		goto out_with_mem;
+
+	q->uacce = uacce;
+
+	if (uacce->ops->get_queue) {
+		ret = uacce->ops->get_queue(uacce, q->pasid, q);
+		if (ret < 0)
+			goto out_with_bond;
+	}
+
+	init_waitqueue_head(&q->wait);
+	filep->private_data = q;
+	q->state = UACCE_Q_INIT;
+	q->mapping = filep->f_mapping;
+	mutex_init(&q->mutex);
+	list_add(&q->list, &uacce->queues);
+	mutex_unlock(&uacce->mutex);
+
+	return 0;
+
+out_with_bond:
+	uacce_unbind_queue(q);
+out_with_mem:
+	kfree(q);
+	mutex_unlock(&uacce->mutex);
+	return ret;
+}
+
+static int uacce_fops_release(struct inode *inode, struct file *filep)
+{
+	struct uacce_queue *q = filep->private_data;
+	struct uacce_device *uacce = q->uacce;
+
+	mutex_lock(&uacce->mutex);
+	uacce_put_queue(q);
+	uacce_unbind_queue(q);
+	list_del(&q->list);
+	mutex_unlock(&uacce->mutex);
+	kfree(q);
+
+	return 0;
+}
+
+static void uacce_vma_close(struct vm_area_struct *vma)
+{
+	struct uacce_queue *q = vma->vm_private_data;
+
+	if (vma->vm_pgoff < UACCE_MAX_REGION) {
+		struct uacce_qfile_region *qfr = q->qfrs[vma->vm_pgoff];
+
+		mutex_lock(&q->mutex);
+		q->qfrs[vma->vm_pgoff] = NULL;
+		mutex_unlock(&q->mutex);
+		kfree(qfr);
+	}
+}
+
+static const struct vm_operations_struct uacce_vm_ops = {
+	.close = uacce_vma_close,
+};
+
+static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+	struct uacce_queue *q = filep->private_data;
+	struct uacce_device *uacce = q->uacce;
+	struct uacce_qfile_region *qfr;
+	enum uacce_qfrt type = UACCE_MAX_REGION;
+	int ret = 0;
+
+	if (vma->vm_pgoff < UACCE_MAX_REGION)
+		type = vma->vm_pgoff;
+	else
+		return -EINVAL;
+
+	qfr = kzalloc(sizeof(*qfr), GFP_KERNEL);
+	if (!qfr)
+		return -ENOMEM;
+
+	vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_WIPEONFORK);
+	vma->vm_ops = &uacce_vm_ops;
+	vma->vm_private_data = q;
+	qfr->type = type;
+
+	mutex_lock(&q->mutex);
+	if (!uacce_queue_is_valid(q)) {
+		ret = -ENXIO;
+		goto out_with_lock;
+	}
+
+	if (q->qfrs[type]) {
+		ret = -EEXIST;
+		goto out_with_lock;
+	}
+
+	switch (type) {
+	case UACCE_QFRT_MMIO:
+	case UACCE_QFRT_DUS:
+		if (!uacce->ops->mmap) {
+			ret = -EINVAL;
+			goto out_with_lock;
+		}
+
+		ret = uacce->ops->mmap(q, vma, qfr);
+		if (ret)
+			goto out_with_lock;
+		break;
+
+	default:
+		ret = -EINVAL;
+		goto out_with_lock;
+	}
+
+	q->qfrs[type] = qfr;
+	mutex_unlock(&q->mutex);
+
+	return ret;
+
+out_with_lock:
+	mutex_unlock(&q->mutex);
+	kfree(qfr);
+	return ret;
+}
+
+static __poll_t uacce_fops_poll(struct file *file, poll_table *wait)
+{
+	struct uacce_queue *q = file->private_data;
+	struct uacce_device *uacce = q->uacce;
+	__poll_t ret = 0;
+
+	mutex_lock(&q->mutex);
+	if (!uacce_queue_is_valid(q))
+		goto out_unlock;
+
+	poll_wait(file, &q->wait, wait);
+
+	if (uacce->ops->is_q_updated && uacce->ops->is_q_updated(q))
+		ret = EPOLLIN | EPOLLRDNORM;
+
+out_unlock:
+	mutex_unlock(&q->mutex);
+	return ret;
+}
+
+static const struct file_operations uacce_fops = {
+	.owner		= THIS_MODULE,
+	.open		= uacce_fops_open,
+	.release	= uacce_fops_release,
+	.unlocked_ioctl	= uacce_fops_unl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= uacce_fops_compat_ioctl,
+#endif
+	.mmap		= uacce_fops_mmap,
+	.poll		= uacce_fops_poll,
+};
+
+#define to_uacce_device(dev) container_of(dev, struct uacce_device, dev)
+
+static ssize_t api_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sysfs_emit(buf, "%s\n", uacce->api_ver);
+}
+
+static ssize_t flags_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sysfs_emit(buf, "%u\n", uacce->flags);
+}
+
+static ssize_t available_instances_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	if (!uacce->ops->get_available_instances)
+		return -ENODEV;
+
+	return sysfs_emit(buf, "%d\n",
+		       uacce->ops->get_available_instances(uacce));
+}
+
+static ssize_t algorithms_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sysfs_emit(buf, "%s\n", uacce->algs);
+}
+
+static ssize_t region_mmio_size_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sysfs_emit(buf, "%lu\n",
+		       uacce->qf_pg_num[UACCE_QFRT_MMIO] << PAGE_SHIFT);
+}
+
+static ssize_t region_dus_size_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sysfs_emit(buf, "%lu\n",
+		       uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT);
+}
+
+static ssize_t isolate_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	return sysfs_emit(buf, "%d\n", uacce->ops->get_isolate_state(uacce));
+}
+
+static ssize_t isolate_strategy_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+	u32 val;
+
+	val = uacce->ops->isolate_err_threshold_read(uacce);
+
+	return sysfs_emit(buf, "%u\n", val);
+}
+
+static ssize_t isolate_strategy_store(struct device *dev, struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+	unsigned long val;
+	int ret;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val > UACCE_MAX_ERR_THRESHOLD)
+		return -EINVAL;
+
+	ret = uacce->ops->isolate_err_threshold_write(uacce, val);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR_RO(api);
+static DEVICE_ATTR_RO(flags);
+static DEVICE_ATTR_RO(available_instances);
+static DEVICE_ATTR_RO(algorithms);
+static DEVICE_ATTR_RO(region_mmio_size);
+static DEVICE_ATTR_RO(region_dus_size);
+static DEVICE_ATTR_RO(isolate);
+static DEVICE_ATTR_RW(isolate_strategy);
+
+static struct attribute *uacce_dev_attrs[] = {
+	&dev_attr_api.attr,
+	&dev_attr_flags.attr,
+	&dev_attr_available_instances.attr,
+	&dev_attr_algorithms.attr,
+	&dev_attr_region_mmio_size.attr,
+	&dev_attr_region_dus_size.attr,
+	&dev_attr_isolate.attr,
+	&dev_attr_isolate_strategy.attr,
+	NULL,
+};
+
+static umode_t uacce_dev_is_visible(struct kobject *kobj,
+				    struct attribute *attr, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	if (((attr == &dev_attr_region_mmio_size.attr) &&
+	    (!uacce->qf_pg_num[UACCE_QFRT_MMIO])) ||
+	    ((attr == &dev_attr_region_dus_size.attr) &&
+	    (!uacce->qf_pg_num[UACCE_QFRT_DUS])))
+		return 0;
+
+	if (attr == &dev_attr_isolate_strategy.attr &&
+	    (!uacce->ops->isolate_err_threshold_read &&
+	     !uacce->ops->isolate_err_threshold_write))
+		return 0;
+
+	if (attr == &dev_attr_isolate.attr && !uacce->ops->get_isolate_state)
+		return 0;
+
+	return attr->mode;
+}
+
+static struct attribute_group uacce_dev_group = {
+	.is_visible	= uacce_dev_is_visible,
+	.attrs		= uacce_dev_attrs,
+};
+
+__ATTRIBUTE_GROUPS(uacce_dev);
+
+static void uacce_release(struct device *dev)
+{
+	struct uacce_device *uacce = to_uacce_device(dev);
+
+	kfree(uacce);
+}
+
+static unsigned int uacce_enable_sva(struct device *parent, unsigned int flags)
+{
+	int ret;
+
+	if (!(flags & UACCE_DEV_SVA))
+		return flags;
+
+	flags &= ~UACCE_DEV_SVA;
+
+	ret = iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_IOPF);
+	if (ret) {
+		dev_err(parent, "failed to enable IOPF feature! ret = %pe\n", ERR_PTR(ret));
+		return flags;
+	}
+
+	ret = iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_SVA);
+	if (ret) {
+		dev_err(parent, "failed to enable SVA feature! ret = %pe\n", ERR_PTR(ret));
+		iommu_dev_disable_feature(parent, IOMMU_DEV_FEAT_IOPF);
+		return flags;
+	}
+
+	return flags | UACCE_DEV_SVA;
+}
+
+static void uacce_disable_sva(struct uacce_device *uacce)
+{
+	if (!(uacce->flags & UACCE_DEV_SVA))
+		return;
+
+	iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA);
+	iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_IOPF);
+}
+
+/**
+ * uacce_alloc() - alloc an accelerator
+ * @parent: pointer of uacce parent device
+ * @interface: pointer of uacce_interface for register
+ *
+ * Returns uacce pointer if success and ERR_PTR if not
+ * Need check returned negotiated uacce->flags
+ */
+struct uacce_device *uacce_alloc(struct device *parent,
+				 struct uacce_interface *interface)
+{
+	unsigned int flags = interface->flags;
+	struct uacce_device *uacce;
+	int ret;
+
+	uacce = kzalloc(sizeof(struct uacce_device), GFP_KERNEL);
+	if (!uacce)
+		return ERR_PTR(-ENOMEM);
+
+	flags = uacce_enable_sva(parent, flags);
+
+	uacce->parent = parent;
+	uacce->flags = flags;
+	uacce->ops = interface->ops;
+
+	ret = xa_alloc(&uacce_xa, &uacce->dev_id, uacce, xa_limit_32b,
+		       GFP_KERNEL);
+	if (ret < 0)
+		goto err_with_uacce;
+
+	INIT_LIST_HEAD(&uacce->queues);
+	mutex_init(&uacce->mutex);
+	device_initialize(&uacce->dev);
+	uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id);
+	uacce->dev.class = uacce_class;
+	uacce->dev.groups = uacce_dev_groups;
+	uacce->dev.parent = uacce->parent;
+	uacce->dev.release = uacce_release;
+	dev_set_name(&uacce->dev, "%s-%d", interface->name, uacce->dev_id);
+
+	return uacce;
+
+err_with_uacce:
+	uacce_disable_sva(uacce);
+	kfree(uacce);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(uacce_alloc);
+
+/**
+ * uacce_register() - add the accelerator to cdev and export to user space
+ * @uacce: The initialized uacce device
+ *
+ * Return 0 if register succeeded, or an error.
+ */
+int uacce_register(struct uacce_device *uacce)
+{
+	if (!uacce)
+		return -ENODEV;
+
+	uacce->cdev = cdev_alloc();
+	if (!uacce->cdev)
+		return -ENOMEM;
+
+	uacce->cdev->ops = &uacce_fops;
+	uacce->cdev->owner = THIS_MODULE;
+
+	return cdev_device_add(uacce->cdev, &uacce->dev);
+}
+EXPORT_SYMBOL_GPL(uacce_register);
+
+/**
+ * uacce_remove() - remove the accelerator
+ * @uacce: the accelerator to remove
+ */
+void uacce_remove(struct uacce_device *uacce)
+{
+	struct uacce_queue *q, *next_q;
+
+	if (!uacce)
+		return;
+
+	/*
+	 * uacce_fops_open() may be running concurrently, even after we remove
+	 * the cdev. Holding uacce->mutex ensures that open() does not obtain a
+	 * removed uacce device.
+	 */
+	mutex_lock(&uacce->mutex);
+	/* ensure no open queue remains */
+	list_for_each_entry_safe(q, next_q, &uacce->queues, list) {
+		/*
+		 * Taking q->mutex ensures that fops do not use the defunct
+		 * uacce->ops after the queue is disabled.
+		 */
+		mutex_lock(&q->mutex);
+		uacce_put_queue(q);
+		mutex_unlock(&q->mutex);
+		uacce_unbind_queue(q);
+
+		/*
+		 * unmap remaining mapping from user space, preventing user still
+		 * access the mmaped area while parent device is already removed
+		 */
+		unmap_mapping_range(q->mapping, 0, 0, 1);
+	}
+
+	/* disable sva now since no opened queues */
+	uacce_disable_sva(uacce);
+
+	if (uacce->cdev)
+		cdev_device_del(uacce->cdev, &uacce->dev);
+	xa_erase(&uacce_xa, uacce->dev_id);
+	/*
+	 * uacce exists as long as there are open fds, but ops will be freed
+	 * now. Ensure that bugs cause NULL deref rather than use-after-free.
+	 */
+	uacce->ops = NULL;
+	uacce->parent = NULL;
+	mutex_unlock(&uacce->mutex);
+	put_device(&uacce->dev);
+}
+EXPORT_SYMBOL_GPL(uacce_remove);
+
+static int __init uacce_init(void)
+{
+	int ret;
+
+	uacce_class = class_create(UACCE_NAME);
+	if (IS_ERR(uacce_class))
+		return PTR_ERR(uacce_class);
+
+	ret = alloc_chrdev_region(&uacce_devt, 0, MINORMASK, UACCE_NAME);
+	if (ret)
+		class_destroy(uacce_class);
+
+	return ret;
+}
+
+static __exit void uacce_exit(void)
+{
+	unregister_chrdev_region(uacce_devt, MINORMASK);
+	class_destroy(uacce_class);
+}
+
+subsys_initcall(uacce_init);
+module_exit(uacce_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("HiSilicon Tech. Co., Ltd.");
+MODULE_DESCRIPTION("Accelerator interface for Userland applications");
diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
new file mode 100644
index 0000000000..6479c962da
--- /dev/null
+++ b/drivers/misc/vcpu_stall_detector.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// VCPU stall detector.
+//  Copyright (C) Google, 2022
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/nmi.h>
+#include <linux/of.h>
+#include <linux/param.h>
+#include <linux/percpu.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define VCPU_STALL_REG_STATUS		(0x00)
+#define VCPU_STALL_REG_LOAD_CNT		(0x04)
+#define VCPU_STALL_REG_CURRENT_CNT	(0x08)
+#define VCPU_STALL_REG_CLOCK_FREQ_HZ	(0x0C)
+#define VCPU_STALL_REG_LEN		(0x10)
+
+#define VCPU_STALL_DEFAULT_CLOCK_HZ	(10)
+#define VCPU_STALL_MAX_CLOCK_HZ		(100)
+#define VCPU_STALL_DEFAULT_TIMEOUT_SEC	(8)
+#define VCPU_STALL_MAX_TIMEOUT_SEC	(600)
+
+struct vcpu_stall_detect_config {
+	u32 clock_freq_hz;
+	u32 stall_timeout_sec;
+
+	void __iomem *membase;
+	struct platform_device *dev;
+	enum cpuhp_state hp_online;
+};
+
+struct vcpu_stall_priv {
+	struct hrtimer vcpu_hrtimer;
+	bool is_initialized;
+};
+
+/* The vcpu stall configuration structure which applies to all the CPUs */
+static struct vcpu_stall_detect_config vcpu_stall_config;
+
+#define vcpu_stall_reg_write(vcpu, reg, value)				\
+	writel_relaxed((value),						\
+		       (void __iomem *)(vcpu_stall_config.membase +	\
+		       (vcpu) * VCPU_STALL_REG_LEN + (reg)))
+
+
+static struct vcpu_stall_priv __percpu *vcpu_stall_detectors;
+
+static enum hrtimer_restart
+vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
+{
+	u32 ticks, ping_timeout_ms;
+
+	/* Reload the stall detector counter register every
+	 * `ping_timeout_ms` to prevent the virtual device
+	 * from decrementing it to 0. The virtual device decrements this
+	 * register at 'clock_freq_hz' frequency.
+	 */
+	ticks = vcpu_stall_config.clock_freq_hz *
+		vcpu_stall_config.stall_timeout_sec;
+	vcpu_stall_reg_write(smp_processor_id(),
+			     VCPU_STALL_REG_LOAD_CNT, ticks);
+
+	ping_timeout_ms = vcpu_stall_config.stall_timeout_sec *
+			  MSEC_PER_SEC / 2;
+	hrtimer_forward_now(hrtimer,
+			    ms_to_ktime(ping_timeout_ms));
+
+	return HRTIMER_RESTART;
+}
+
+static int start_stall_detector_cpu(unsigned int cpu)
+{
+	u32 ticks, ping_timeout_ms;
+	struct vcpu_stall_priv *vcpu_stall_detector =
+		this_cpu_ptr(vcpu_stall_detectors);
+	struct hrtimer *vcpu_hrtimer = &vcpu_stall_detector->vcpu_hrtimer;
+
+	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_CLOCK_FREQ_HZ,
+			     vcpu_stall_config.clock_freq_hz);
+
+	/* Compute the number of ticks required for the stall detector
+	 * counter register based on the internal clock frequency and the
+	 * timeout value given from the device tree.
+	 */
+	ticks = vcpu_stall_config.clock_freq_hz *
+		vcpu_stall_config.stall_timeout_sec;
+	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_LOAD_CNT, ticks);
+
+	/* Enable the internal clock and start the stall detector */
+	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_STATUS, 1);
+
+	/* Pet the stall detector at half of its expiration timeout
+	 * to prevent spurious resets.
+	 */
+	ping_timeout_ms = vcpu_stall_config.stall_timeout_sec *
+			  MSEC_PER_SEC / 2;
+
+	hrtimer_init(vcpu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	vcpu_hrtimer->function = vcpu_stall_detect_timer_fn;
+	vcpu_stall_detector->is_initialized = true;
+
+	hrtimer_start(vcpu_hrtimer, ms_to_ktime(ping_timeout_ms),
+		      HRTIMER_MODE_REL_PINNED);
+
+	return 0;
+}
+
+static int stop_stall_detector_cpu(unsigned int cpu)
+{
+	struct vcpu_stall_priv *vcpu_stall_detector =
+		per_cpu_ptr(vcpu_stall_detectors, cpu);
+
+	if (!vcpu_stall_detector->is_initialized)
+		return 0;
+
+	/* Disable the stall detector for the current CPU */
+	hrtimer_cancel(&vcpu_stall_detector->vcpu_hrtimer);
+	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_STATUS, 0);
+	vcpu_stall_detector->is_initialized = false;
+
+	return 0;
+}
+
+static int vcpu_stall_detect_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct resource *r;
+	void __iomem *membase;
+	u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
+	u32 stall_timeout_sec = VCPU_STALL_DEFAULT_TIMEOUT_SEC;
+	struct device_node *np = pdev->dev.of_node;
+
+	vcpu_stall_detectors = devm_alloc_percpu(&pdev->dev,
+						 typeof(struct vcpu_stall_priv));
+	if (!vcpu_stall_detectors)
+		return -ENOMEM;
+
+	membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
+	if (IS_ERR(membase)) {
+		dev_err(&pdev->dev, "Failed to get memory resource\n");
+		return PTR_ERR(membase);
+	}
+
+	if (!of_property_read_u32(np, "clock-frequency", &clock_freq_hz)) {
+		if (!(clock_freq_hz > 0 &&
+		      clock_freq_hz < VCPU_STALL_MAX_CLOCK_HZ)) {
+			dev_warn(&pdev->dev, "clk out of range\n");
+			clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
+		}
+	}
+
+	if (!of_property_read_u32(np, "timeout-sec", &stall_timeout_sec)) {
+		if (!(stall_timeout_sec > 0 &&
+		      stall_timeout_sec < VCPU_STALL_MAX_TIMEOUT_SEC)) {
+			dev_warn(&pdev->dev, "stall timeout out of range\n");
+			stall_timeout_sec = VCPU_STALL_DEFAULT_TIMEOUT_SEC;
+		}
+	}
+
+	vcpu_stall_config = (struct vcpu_stall_detect_config) {
+		.membase		= membase,
+		.clock_freq_hz		= clock_freq_hz,
+		.stall_timeout_sec	= stall_timeout_sec
+	};
+
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+				"virt/vcpu_stall_detector:online",
+				start_stall_detector_cpu,
+				stop_stall_detector_cpu);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to install cpu hotplug");
+		goto err;
+	}
+
+	vcpu_stall_config.hp_online = ret;
+	return 0;
+err:
+	return ret;
+}
+
+static int vcpu_stall_detect_remove(struct platform_device *pdev)
+{
+	int cpu;
+
+	cpuhp_remove_state(vcpu_stall_config.hp_online);
+
+	for_each_possible_cpu(cpu)
+		stop_stall_detector_cpu(cpu);
+
+	return 0;
+}
+
+static const struct of_device_id vcpu_stall_detect_of_match[] = {
+	{ .compatible = "qemu,vcpu-stall-detector", },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, vcpu_stall_detect_of_match);
+
+static struct platform_driver vcpu_stall_detect_driver = {
+	.probe  = vcpu_stall_detect_probe,
+	.remove = vcpu_stall_detect_remove,
+	.driver = {
+		.name           = KBUILD_MODNAME,
+		.of_match_table = vcpu_stall_detect_of_match,
+	},
+};
+
+module_platform_driver(vcpu_stall_detect_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sebastian Ene <sebastianene@google.com>");
+MODULE_DESCRIPTION("VCPU stall detector");
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
new file mode 100644
index 0000000000..9ce9b9e0e9
--- /dev/null
+++ b/drivers/misc/vmw_balloon.c
@@ -0,0 +1,1937 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VMware Balloon driver.
+ *
+ * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
+ *
+ * This is VMware physical memory management driver for Linux. The driver
+ * acts like a "balloon" that can be inflated to reclaim physical pages by
+ * reserving them in the guest and invalidating them in the monitor,
+ * freeing up the underlying machine pages so they can be allocated to
+ * other guests.  The balloon can also be deflated to allow the guest to
+ * use more physical memory. Higher level policies can control the sizes
+ * of balloons in VMs in order to manage physical memory resources.
+ */
+
+//#define DEBUG
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/balloon_compaction.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <asm/hypervisor.h>
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
+MODULE_ALIAS("dmi:*:svnVMware*:*");
+MODULE_ALIAS("vmware_vmmemctl");
+MODULE_LICENSE("GPL");
+
+static bool __read_mostly vmwballoon_shrinker_enable;
+module_param(vmwballoon_shrinker_enable, bool, 0444);
+MODULE_PARM_DESC(vmwballoon_shrinker_enable,
+	"Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance.");
+
+/* Delay in seconds after shrink before inflation. */
+#define VMBALLOON_SHRINK_DELAY		(5)
+
+/* Maximum number of refused pages we accumulate during inflation cycle */
+#define VMW_BALLOON_MAX_REFUSED		16
+
+/* Magic number for the balloon mount-point */
+#define BALLOON_VMW_MAGIC		0x0ba11007
+
+/*
+ * Hypervisor communication port definitions.
+ */
+#define VMW_BALLOON_HV_PORT		0x5670
+#define VMW_BALLOON_HV_MAGIC		0x456c6d6f
+#define VMW_BALLOON_GUEST_ID		1	/* Linux */
+
+enum vmwballoon_capabilities {
+	/*
+	 * Bit 0 is reserved and not associated to any capability.
+	 */
+	VMW_BALLOON_BASIC_CMDS			= (1 << 1),
+	VMW_BALLOON_BATCHED_CMDS		= (1 << 2),
+	VMW_BALLOON_BATCHED_2M_CMDS		= (1 << 3),
+	VMW_BALLOON_SIGNALLED_WAKEUP_CMD	= (1 << 4),
+	VMW_BALLOON_64_BIT_TARGET		= (1 << 5)
+};
+
+#define VMW_BALLOON_CAPABILITIES_COMMON	(VMW_BALLOON_BASIC_CMDS \
+					| VMW_BALLOON_BATCHED_CMDS \
+					| VMW_BALLOON_BATCHED_2M_CMDS \
+					| VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
+
+#define VMW_BALLOON_2M_ORDER		(PMD_SHIFT - PAGE_SHIFT)
+
+/*
+ * 64-bit targets are only supported in 64-bit
+ */
+#ifdef CONFIG_64BIT
+#define VMW_BALLOON_CAPABILITIES	(VMW_BALLOON_CAPABILITIES_COMMON \
+					| VMW_BALLOON_64_BIT_TARGET)
+#else
+#define VMW_BALLOON_CAPABILITIES	VMW_BALLOON_CAPABILITIES_COMMON
+#endif
+
+enum vmballoon_page_size_type {
+	VMW_BALLOON_4K_PAGE,
+	VMW_BALLOON_2M_PAGE,
+	VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE
+};
+
+#define VMW_BALLOON_NUM_PAGE_SIZES	(VMW_BALLOON_LAST_SIZE + 1)
+
+static const char * const vmballoon_page_size_names[] = {
+	[VMW_BALLOON_4K_PAGE]			= "4k",
+	[VMW_BALLOON_2M_PAGE]			= "2M"
+};
+
+enum vmballoon_op {
+	VMW_BALLOON_INFLATE,
+	VMW_BALLOON_DEFLATE
+};
+
+enum vmballoon_op_stat_type {
+	VMW_BALLOON_OP_STAT,
+	VMW_BALLOON_OP_FAIL_STAT
+};
+
+#define VMW_BALLOON_OP_STAT_TYPES	(VMW_BALLOON_OP_FAIL_STAT + 1)
+
+/**
+ * enum vmballoon_cmd_type - backdoor commands.
+ *
+ * Availability of the commands is as followed:
+ *
+ * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and
+ * %VMW_BALLOON_CMD_GUEST_ID are always available.
+ *
+ * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then
+ * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available.
+ *
+ * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then
+ * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands
+ * are available.
+ *
+ * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then
+ * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK
+ * are supported.
+ *
+ * If the host reports  VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then
+ * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported.
+ *
+ * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor.
+ * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size.
+ * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page.
+ * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about
+ *			    to be deflated from the balloon.
+ * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that
+ *			      runs in the VM.
+ * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of
+ *				  ballooned pages (up to 512).
+ * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of
+ *				  pages that are about to be deflated from the
+ *				  balloon (up to 512).
+ * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK
+ *				     for 2MB pages.
+ * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to
+ *				       @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB
+ *				       pages.
+ * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification
+ *				       that would be invoked when the balloon
+ *				       size changes.
+ * @VMW_BALLOON_CMD_LAST: Value of the last command.
+ */
+enum vmballoon_cmd_type {
+	VMW_BALLOON_CMD_START,
+	VMW_BALLOON_CMD_GET_TARGET,
+	VMW_BALLOON_CMD_LOCK,
+	VMW_BALLOON_CMD_UNLOCK,
+	VMW_BALLOON_CMD_GUEST_ID,
+	/* No command 5 */
+	VMW_BALLOON_CMD_BATCHED_LOCK = 6,
+	VMW_BALLOON_CMD_BATCHED_UNLOCK,
+	VMW_BALLOON_CMD_BATCHED_2M_LOCK,
+	VMW_BALLOON_CMD_BATCHED_2M_UNLOCK,
+	VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
+	VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
+};
+
+#define VMW_BALLOON_CMD_NUM	(VMW_BALLOON_CMD_LAST + 1)
+
+enum vmballoon_error_codes {
+	VMW_BALLOON_SUCCESS,
+	VMW_BALLOON_ERROR_CMD_INVALID,
+	VMW_BALLOON_ERROR_PPN_INVALID,
+	VMW_BALLOON_ERROR_PPN_LOCKED,
+	VMW_BALLOON_ERROR_PPN_UNLOCKED,
+	VMW_BALLOON_ERROR_PPN_PINNED,
+	VMW_BALLOON_ERROR_PPN_NOTNEEDED,
+	VMW_BALLOON_ERROR_RESET,
+	VMW_BALLOON_ERROR_BUSY
+};
+
+#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES	(0x03000000)
+
+#define VMW_BALLOON_CMD_WITH_TARGET_MASK			\
+	((1UL << VMW_BALLOON_CMD_GET_TARGET)		|	\
+	 (1UL << VMW_BALLOON_CMD_LOCK)			|	\
+	 (1UL << VMW_BALLOON_CMD_UNLOCK)		|	\
+	 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK)		|	\
+	 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK)	|	\
+	 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK)	|	\
+	 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
+
+static const char * const vmballoon_cmd_names[] = {
+	[VMW_BALLOON_CMD_START]			= "start",
+	[VMW_BALLOON_CMD_GET_TARGET]		= "target",
+	[VMW_BALLOON_CMD_LOCK]			= "lock",
+	[VMW_BALLOON_CMD_UNLOCK]		= "unlock",
+	[VMW_BALLOON_CMD_GUEST_ID]		= "guestType",
+	[VMW_BALLOON_CMD_BATCHED_LOCK]		= "batchLock",
+	[VMW_BALLOON_CMD_BATCHED_UNLOCK]	= "batchUnlock",
+	[VMW_BALLOON_CMD_BATCHED_2M_LOCK]	= "2m-lock",
+	[VMW_BALLOON_CMD_BATCHED_2M_UNLOCK]	= "2m-unlock",
+	[VMW_BALLOON_CMD_VMCI_DOORBELL_SET]	= "doorbellSet"
+};
+
+enum vmballoon_stat_page {
+	VMW_BALLOON_PAGE_STAT_ALLOC,
+	VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
+	VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
+	VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
+	VMW_BALLOON_PAGE_STAT_FREE,
+	VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE
+};
+
+#define VMW_BALLOON_PAGE_STAT_NUM	(VMW_BALLOON_PAGE_STAT_LAST + 1)
+
+enum vmballoon_stat_general {
+	VMW_BALLOON_STAT_TIMER,
+	VMW_BALLOON_STAT_DOORBELL,
+	VMW_BALLOON_STAT_RESET,
+	VMW_BALLOON_STAT_SHRINK,
+	VMW_BALLOON_STAT_SHRINK_FREE,
+	VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_SHRINK_FREE
+};
+
+#define VMW_BALLOON_STAT_NUM		(VMW_BALLOON_STAT_LAST + 1)
+
+static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
+static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);
+
+struct vmballoon_ctl {
+	struct list_head pages;
+	struct list_head refused_pages;
+	struct list_head prealloc_pages;
+	unsigned int n_refused_pages;
+	unsigned int n_pages;
+	enum vmballoon_page_size_type page_size;
+	enum vmballoon_op op;
+};
+
+/**
+ * struct vmballoon_batch_entry - a batch entry for lock or unlock.
+ *
+ * @status: the status of the operation, which is written by the hypervisor.
+ * @reserved: reserved for future use. Must be set to zero.
+ * @pfn: the physical frame number of the page to be locked or unlocked.
+ */
+struct vmballoon_batch_entry {
+	u64 status : 5;
+	u64 reserved : PAGE_SHIFT - 5;
+	u64 pfn : 52;
+} __packed;
+
+struct vmballoon {
+	/**
+	 * @max_page_size: maximum supported page size for ballooning.
+	 *
+	 * Protected by @conf_sem
+	 */
+	enum vmballoon_page_size_type max_page_size;
+
+	/**
+	 * @size: balloon actual size in basic page size (frames).
+	 *
+	 * While we currently do not support size which is bigger than 32-bit,
+	 * in preparation for future support, use 64-bits.
+	 */
+	atomic64_t size;
+
+	/**
+	 * @target: balloon target size in basic page size (frames).
+	 *
+	 * We do not protect the target under the assumption that setting the
+	 * value is always done through a single write. If this assumption ever
+	 * breaks, we would have to use X_ONCE for accesses, and suffer the less
+	 * optimized code. Although we may read stale target value if multiple
+	 * accesses happen at once, the performance impact should be minor.
+	 */
+	unsigned long target;
+
+	/**
+	 * @reset_required: reset flag
+	 *
+	 * Setting this flag may introduce races, but the code is expected to
+	 * handle them gracefully. In the worst case, another operation will
+	 * fail as reset did not take place. Clearing the flag is done while
+	 * holding @conf_sem for write.
+	 */
+	bool reset_required;
+
+	/**
+	 * @capabilities: hypervisor balloon capabilities.
+	 *
+	 * Protected by @conf_sem.
+	 */
+	unsigned long capabilities;
+
+	/**
+	 * @batch_page: pointer to communication batch page.
+	 *
+	 * When batching is used, batch_page points to a page, which holds up to
+	 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
+	 */
+	struct vmballoon_batch_entry *batch_page;
+
+	/**
+	 * @batch_max_pages: maximum pages that can be locked/unlocked.
+	 *
+	 * Indicates the number of pages that the hypervisor can lock or unlock
+	 * at once, according to whether batching is enabled. If batching is
+	 * disabled, only a single page can be locked/unlock on each operation.
+	 *
+	 * Protected by @conf_sem.
+	 */
+	unsigned int batch_max_pages;
+
+	/**
+	 * @page: page to be locked/unlocked by the hypervisor
+	 *
+	 * @page is only used when batching is disabled and a single page is
+	 * reclaimed on each iteration.
+	 *
+	 * Protected by @comm_lock.
+	 */
+	struct page *page;
+
+	/**
+	 * @shrink_timeout: timeout until the next inflation.
+	 *
+	 * After an shrink event, indicates the time in jiffies after which
+	 * inflation is allowed again. Can be written concurrently with reads,
+	 * so must use READ_ONCE/WRITE_ONCE when accessing.
+	 */
+	unsigned long shrink_timeout;
+
+	/* statistics */
+	struct vmballoon_stats *stats;
+
+	/**
+	 * @b_dev_info: balloon device information descriptor.
+	 */
+	struct balloon_dev_info b_dev_info;
+
+	struct delayed_work dwork;
+
+	/**
+	 * @huge_pages - list of the inflated 2MB pages.
+	 *
+	 * Protected by @b_dev_info.pages_lock .
+	 */
+	struct list_head huge_pages;
+
+	/**
+	 * @vmci_doorbell.
+	 *
+	 * Protected by @conf_sem.
+	 */
+	struct vmci_handle vmci_doorbell;
+
+	/**
+	 * @conf_sem: semaphore to protect the configuration and the statistics.
+	 */
+	struct rw_semaphore conf_sem;
+
+	/**
+	 * @comm_lock: lock to protect the communication with the host.
+	 *
+	 * Lock ordering: @conf_sem -> @comm_lock .
+	 */
+	spinlock_t comm_lock;
+
+	/**
+	 * @shrinker: shrinker interface that is used to avoid over-inflation.
+	 */
+	struct shrinker shrinker;
+
+	/**
+	 * @shrinker_registered: whether the shrinker was registered.
+	 *
+	 * The shrinker interface does not handle gracefully the removal of
+	 * shrinker that was not registered before. This indication allows to
+	 * simplify the unregistration process.
+	 */
+	bool shrinker_registered;
+};
+
+static struct vmballoon balloon;
+
+struct vmballoon_stats {
+	/* timer / doorbell operations */
+	atomic64_t general_stat[VMW_BALLOON_STAT_NUM];
+
+	/* allocation statistics for huge and small pages */
+	atomic64_t
+	       page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES];
+
+	/* Monitor operations: total operations, and failures */
+	atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES];
+};
+
+static inline bool is_vmballoon_stats_on(void)
+{
+	return IS_ENABLED(CONFIG_DEBUG_FS) &&
+		static_branch_unlikely(&balloon_stat_enabled);
+}
+
+static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op,
+					  enum vmballoon_op_stat_type type)
+{
+	if (is_vmballoon_stats_on())
+		atomic64_inc(&b->stats->ops[op][type]);
+}
+
+static inline void vmballoon_stats_gen_inc(struct vmballoon *b,
+					   enum vmballoon_stat_general stat)
+{
+	if (is_vmballoon_stats_on())
+		atomic64_inc(&b->stats->general_stat[stat]);
+}
+
+static inline void vmballoon_stats_gen_add(struct vmballoon *b,
+					   enum vmballoon_stat_general stat,
+					   unsigned int val)
+{
+	if (is_vmballoon_stats_on())
+		atomic64_add(val, &b->stats->general_stat[stat]);
+}
+
+static inline void vmballoon_stats_page_inc(struct vmballoon *b,
+					    enum vmballoon_stat_page stat,
+					    enum vmballoon_page_size_type size)
+{
+	if (is_vmballoon_stats_on())
+		atomic64_inc(&b->stats->page_stat[stat][size]);
+}
+
+static inline void vmballoon_stats_page_add(struct vmballoon *b,
+					    enum vmballoon_stat_page stat,
+					    enum vmballoon_page_size_type size,
+					    unsigned int val)
+{
+	if (is_vmballoon_stats_on())
+		atomic64_add(val, &b->stats->page_stat[stat][size]);
+}
+
+static inline unsigned long
+__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
+		unsigned long arg2, unsigned long *result)
+{
+	unsigned long status, dummy1, dummy2, dummy3, local_result;
+
+	vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT);
+
+	asm volatile ("inl %%dx" :
+		"=a"(status),
+		"=c"(dummy1),
+		"=d"(dummy2),
+		"=b"(local_result),
+		"=S"(dummy3) :
+		"0"(VMW_BALLOON_HV_MAGIC),
+		"1"(cmd),
+		"2"(VMW_BALLOON_HV_PORT),
+		"3"(arg1),
+		"4"(arg2) :
+		"memory");
+
+	/* update the result if needed */
+	if (result)
+		*result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
+							   local_result;
+
+	/* update target when applicable */
+	if (status == VMW_BALLOON_SUCCESS &&
+	    ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
+		WRITE_ONCE(b->target, local_result);
+
+	if (status != VMW_BALLOON_SUCCESS &&
+	    status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
+		vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT);
+		pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
+			 __func__, vmballoon_cmd_names[cmd], arg1, arg2,
+			 status);
+	}
+
+	/* mark reset required accordingly */
+	if (status == VMW_BALLOON_ERROR_RESET)
+		b->reset_required = true;
+
+	return status;
+}
+
+static __always_inline unsigned long
+vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
+	      unsigned long arg2)
+{
+	unsigned long dummy;
+
+	return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
+}
+
+/*
+ * Send "start" command to the host, communicating supported version
+ * of the protocol.
+ */
+static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
+{
+	unsigned long status, capabilities;
+
+	status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
+				 &capabilities);
+
+	switch (status) {
+	case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
+		b->capabilities = capabilities;
+		break;
+	case VMW_BALLOON_SUCCESS:
+		b->capabilities = VMW_BALLOON_BASIC_CMDS;
+		break;
+	default:
+		return -EIO;
+	}
+
+	/*
+	 * 2MB pages are only supported with batching. If batching is for some
+	 * reason disabled, do not use 2MB pages, since otherwise the legacy
+	 * mechanism is used with 2MB pages, causing a failure.
+	 */
+	b->max_page_size = VMW_BALLOON_4K_PAGE;
+	if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
+	    (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
+		b->max_page_size = VMW_BALLOON_2M_PAGE;
+
+
+	return 0;
+}
+
+/**
+ * vmballoon_send_guest_id - communicate guest type to the host.
+ *
+ * @b: pointer to the balloon.
+ *
+ * Communicate guest type to the host so that it can adjust ballooning
+ * algorithm to the one most appropriate for the guest. This command
+ * is normally issued after sending "start" command and is part of
+ * standard reset sequence.
+ *
+ * Return: zero on success or appropriate error code.
+ */
+static int vmballoon_send_guest_id(struct vmballoon *b)
+{
+	unsigned long status;
+
+	status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
+			       VMW_BALLOON_GUEST_ID, 0);
+
+	return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
+}
+
+/**
+ * vmballoon_page_order() - return the order of the page
+ * @page_size: the size of the page.
+ *
+ * Return: the allocation order.
+ */
+static inline
+unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size)
+{
+	return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0;
+}
+
+/**
+ * vmballoon_page_in_frames() - returns the number of frames in a page.
+ * @page_size: the size of the page.
+ *
+ * Return: the number of 4k frames.
+ */
+static inline unsigned int
+vmballoon_page_in_frames(enum vmballoon_page_size_type page_size)
+{
+	return 1 << vmballoon_page_order(page_size);
+}
+
+/**
+ * vmballoon_mark_page_offline() - mark a page as offline
+ * @page: pointer for the page.
+ * @page_size: the size of the page.
+ */
+static void
+vmballoon_mark_page_offline(struct page *page,
+			    enum vmballoon_page_size_type page_size)
+{
+	int i;
+
+	for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
+		__SetPageOffline(page + i);
+}
+
+/**
+ * vmballoon_mark_page_online() - mark a page as online
+ * @page: pointer for the page.
+ * @page_size: the size of the page.
+ */
+static void
+vmballoon_mark_page_online(struct page *page,
+			   enum vmballoon_page_size_type page_size)
+{
+	int i;
+
+	for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
+		__ClearPageOffline(page + i);
+}
+
+/**
+ * vmballoon_send_get_target() - Retrieve desired balloon size from the host.
+ *
+ * @b: pointer to the balloon.
+ *
+ * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required
+ * by the host-guest protocol and EIO if an error occurred in communicating with
+ * the host.
+ */
+static int vmballoon_send_get_target(struct vmballoon *b)
+{
+	unsigned long status;
+	unsigned long limit;
+
+	limit = totalram_pages();
+
+	/* Ensure limit fits in 32-bits if 64-bit targets are not supported */
+	if (!(b->capabilities & VMW_BALLOON_64_BIT_TARGET) &&
+	    limit != (u32)limit)
+		return -EINVAL;
+
+	status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
+
+	return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
+}
+
+/**
+ * vmballoon_alloc_page_list - allocates a list of pages.
+ *
+ * @b: pointer to the balloon.
+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
+ * @req_n_pages: the number of requested pages.
+ *
+ * Tries to allocate @req_n_pages. Add them to the list of balloon pages in
+ * @ctl.pages and updates @ctl.n_pages to reflect the number of pages.
+ *
+ * Return: zero on success or error code otherwise.
+ */
+static int vmballoon_alloc_page_list(struct vmballoon *b,
+				     struct vmballoon_ctl *ctl,
+				     unsigned int req_n_pages)
+{
+	struct page *page;
+	unsigned int i;
+
+	for (i = 0; i < req_n_pages; i++) {
+		/*
+		 * First check if we happen to have pages that were allocated
+		 * before. This happens when 2MB page rejected during inflation
+		 * by the hypervisor, and then split into 4KB pages.
+		 */
+		if (!list_empty(&ctl->prealloc_pages)) {
+			page = list_first_entry(&ctl->prealloc_pages,
+						struct page, lru);
+			list_del(&page->lru);
+		} else {
+			if (ctl->page_size == VMW_BALLOON_2M_PAGE)
+				page = alloc_pages(__GFP_HIGHMEM|__GFP_NOWARN|
+					__GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER);
+			else
+				page = balloon_page_alloc();
+
+			vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
+						 ctl->page_size);
+		}
+
+		if (page) {
+			/* Success. Add the page to the list and continue. */
+			list_add(&page->lru, &ctl->pages);
+			continue;
+		}
+
+		/* Allocation failed. Update statistics and stop. */
+		vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
+					 ctl->page_size);
+		break;
+	}
+
+	ctl->n_pages = i;
+
+	return req_n_pages == ctl->n_pages ? 0 : -ENOMEM;
+}
+
+/**
+ * vmballoon_handle_one_result - Handle lock/unlock result for a single page.
+ *
+ * @b: pointer for %struct vmballoon.
+ * @page: pointer for the page whose result should be handled.
+ * @page_size: size of the page.
+ * @status: status of the operation as provided by the hypervisor.
+ */
+static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page,
+				       enum vmballoon_page_size_type page_size,
+				       unsigned long status)
+{
+	/* On success do nothing. The page is already on the balloon list. */
+	if (likely(status == VMW_BALLOON_SUCCESS))
+		return 0;
+
+	pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__,
+		 page_to_pfn(page), status,
+		 vmballoon_page_size_names[page_size]);
+
+	/* Error occurred */
+	vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
+				 page_size);
+
+	return -EIO;
+}
+
+/**
+ * vmballoon_status_page - returns the status of (un)lock operation
+ *
+ * @b: pointer to the balloon.
+ * @idx: index for the page for which the operation is performed.
+ * @p: pointer to where the page struct is returned.
+ *
+ * Following a lock or unlock operation, returns the status of the operation for
+ * an individual page. Provides the page that the operation was performed on on
+ * the @page argument.
+ *
+ * Returns: The status of a lock or unlock operation for an individual page.
+ */
+static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
+					   struct page **p)
+{
+	if (static_branch_likely(&vmw_balloon_batching)) {
+		/* batching mode */
+		*p = pfn_to_page(b->batch_page[idx].pfn);
+		return b->batch_page[idx].status;
+	}
+
+	/* non-batching mode */
+	*p = b->page;
+
+	/*
+	 * If a failure occurs, the indication will be provided in the status
+	 * of the entire operation, which is considered before the individual
+	 * page status. So for non-batching mode, the indication is always of
+	 * success.
+	 */
+	return VMW_BALLOON_SUCCESS;
+}
+
+/**
+ * vmballoon_lock_op - notifies the host about inflated/deflated pages.
+ * @b: pointer to the balloon.
+ * @num_pages: number of inflated/deflated pages.
+ * @page_size: size of the page.
+ * @op: the type of operation (lock or unlock).
+ *
+ * Notify the host about page(s) that were ballooned (or removed from the
+ * balloon) so that host can use it without fear that guest will need it (or
+ * stop using them since the VM does). Host may reject some pages, we need to
+ * check the return value and maybe submit a different page. The pages that are
+ * inflated/deflated are pointed by @b->page.
+ *
+ * Return: result as provided by the hypervisor.
+ */
+static unsigned long vmballoon_lock_op(struct vmballoon *b,
+				       unsigned int num_pages,
+				       enum vmballoon_page_size_type page_size,
+				       enum vmballoon_op op)
+{
+	unsigned long cmd, pfn;
+
+	lockdep_assert_held(&b->comm_lock);
+
+	if (static_branch_likely(&vmw_balloon_batching)) {
+		if (op == VMW_BALLOON_INFLATE)
+			cmd = page_size == VMW_BALLOON_2M_PAGE ?
+				VMW_BALLOON_CMD_BATCHED_2M_LOCK :
+				VMW_BALLOON_CMD_BATCHED_LOCK;
+		else
+			cmd = page_size == VMW_BALLOON_2M_PAGE ?
+				VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
+				VMW_BALLOON_CMD_BATCHED_UNLOCK;
+
+		pfn = PHYS_PFN(virt_to_phys(b->batch_page));
+	} else {
+		cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK :
+						  VMW_BALLOON_CMD_UNLOCK;
+		pfn = page_to_pfn(b->page);
+
+		/* In non-batching mode, PFNs must fit in 32-bit */
+		if (unlikely(pfn != (u32)pfn))
+			return VMW_BALLOON_ERROR_PPN_INVALID;
+	}
+
+	return vmballoon_cmd(b, cmd, pfn, num_pages);
+}
+
+/**
+ * vmballoon_add_page - adds a page towards lock/unlock operation.
+ *
+ * @b: pointer to the balloon.
+ * @idx: index of the page to be ballooned in this batch.
+ * @p: pointer to the page that is about to be ballooned.
+ *
+ * Adds the page to be ballooned. Must be called while holding @comm_lock.
+ */
+static void vmballoon_add_page(struct vmballoon *b, unsigned int idx,
+			       struct page *p)
+{
+	lockdep_assert_held(&b->comm_lock);
+
+	if (static_branch_likely(&vmw_balloon_batching))
+		b->batch_page[idx] = (struct vmballoon_batch_entry)
+					{ .pfn = page_to_pfn(p) };
+	else
+		b->page = p;
+}
+
+/**
+ * vmballoon_lock - lock or unlock a batch of pages.
+ *
+ * @b: pointer to the balloon.
+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
+ *
+ * Notifies the host of about ballooned pages (after inflation or deflation,
+ * according to @ctl). If the host rejects the page put it on the
+ * @ctl refuse list. These refused page are then released when moving to the
+ * next size of pages.
+ *
+ * Note that we neither free any @page here nor put them back on the ballooned
+ * pages list. Instead we queue it for later processing. We do that for several
+ * reasons. First, we do not want to free the page under the lock. Second, it
+ * allows us to unify the handling of lock and unlock. In the inflate case, the
+ * caller will check if there are too many refused pages and release them.
+ * Although it is not identical to the past behavior, it should not affect
+ * performance.
+ */
+static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl)
+{
+	unsigned long batch_status;
+	struct page *page;
+	unsigned int i, num_pages;
+
+	num_pages = ctl->n_pages;
+	if (num_pages == 0)
+		return 0;
+
+	/* communication with the host is done under the communication lock */
+	spin_lock(&b->comm_lock);
+
+	i = 0;
+	list_for_each_entry(page, &ctl->pages, lru)
+		vmballoon_add_page(b, i++, page);
+
+	batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size,
+					 ctl->op);
+
+	/*
+	 * Iterate over the pages in the provided list. Since we are changing
+	 * @ctl->n_pages we are saving the original value in @num_pages and
+	 * use this value to bound the loop.
+	 */
+	for (i = 0; i < num_pages; i++) {
+		unsigned long status;
+
+		status = vmballoon_status_page(b, i, &page);
+
+		/*
+		 * Failure of the whole batch overrides a single operation
+		 * results.
+		 */
+		if (batch_status != VMW_BALLOON_SUCCESS)
+			status = batch_status;
+
+		/* Continue if no error happened */
+		if (!vmballoon_handle_one_result(b, page, ctl->page_size,
+						 status))
+			continue;
+
+		/*
+		 * Error happened. Move the pages to the refused list and update
+		 * the pages number.
+		 */
+		list_move(&page->lru, &ctl->refused_pages);
+		ctl->n_pages--;
+		ctl->n_refused_pages++;
+	}
+
+	spin_unlock(&b->comm_lock);
+
+	return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
+}
+
+/**
+ * vmballoon_release_page_list() - Releases a page list
+ *
+ * @page_list: list of pages to release.
+ * @n_pages: pointer to the number of pages.
+ * @page_size: whether the pages in the list are 2MB (or else 4KB).
+ *
+ * Releases the list of pages and zeros the number of pages.
+ */
+static void vmballoon_release_page_list(struct list_head *page_list,
+				       int *n_pages,
+				       enum vmballoon_page_size_type page_size)
+{
+	struct page *page, *tmp;
+
+	list_for_each_entry_safe(page, tmp, page_list, lru) {
+		list_del(&page->lru);
+		__free_pages(page, vmballoon_page_order(page_size));
+	}
+
+	if (n_pages)
+		*n_pages = 0;
+}
+
+
+/*
+ * Release pages that were allocated while attempting to inflate the
+ * balloon but were refused by the host for one reason or another.
+ */
+static void vmballoon_release_refused_pages(struct vmballoon *b,
+					    struct vmballoon_ctl *ctl)
+{
+	vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
+				 ctl->page_size);
+
+	vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages,
+				    ctl->page_size);
+}
+
+/**
+ * vmballoon_change - retrieve the required balloon change
+ *
+ * @b: pointer for the balloon.
+ *
+ * Return: the required change for the balloon size. A positive number
+ * indicates inflation, a negative number indicates a deflation.
+ */
+static int64_t vmballoon_change(struct vmballoon *b)
+{
+	int64_t size, target;
+
+	size = atomic64_read(&b->size);
+	target = READ_ONCE(b->target);
+
+	/*
+	 * We must cast first because of int sizes
+	 * Otherwise we might get huge positives instead of negatives
+	 */
+
+	if (b->reset_required)
+		return 0;
+
+	/* consider a 2MB slack on deflate, unless the balloon is emptied */
+	if (target < size && target != 0 &&
+	    size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE))
+		return 0;
+
+	/* If an out-of-memory recently occurred, inflation is disallowed. */
+	if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout)))
+		return 0;
+
+	return target - size;
+}
+
+/**
+ * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation.
+ *
+ * @b: pointer to balloon.
+ * @pages: list of pages to enqueue.
+ * @n_pages: pointer to number of pages in list. The value is zeroed.
+ * @page_size: whether the pages are 2MB or 4KB pages.
+ *
+ * Enqueues the provides list of pages in the ballooned page list, clears the
+ * list and zeroes the number of pages that was provided.
+ */
+static void vmballoon_enqueue_page_list(struct vmballoon *b,
+					struct list_head *pages,
+					unsigned int *n_pages,
+					enum vmballoon_page_size_type page_size)
+{
+	unsigned long flags;
+	struct page *page;
+
+	if (page_size == VMW_BALLOON_4K_PAGE) {
+		balloon_page_list_enqueue(&b->b_dev_info, pages);
+	} else {
+		/*
+		 * Keep the huge pages in a local list which is not available
+		 * for the balloon compaction mechanism.
+		 */
+		spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+
+		list_for_each_entry(page, pages, lru) {
+			vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE);
+		}
+
+		list_splice_init(pages, &b->huge_pages);
+		__count_vm_events(BALLOON_INFLATE, *n_pages *
+				  vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
+		spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
+	}
+
+	*n_pages = 0;
+}
+
+/**
+ * vmballoon_dequeue_page_list() - Dequeues page lists for deflation.
+ *
+ * @b: pointer to balloon.
+ * @pages: list of pages to enqueue.
+ * @n_pages: pointer to number of pages in list. The value is zeroed.
+ * @page_size: whether the pages are 2MB or 4KB pages.
+ * @n_req_pages: the number of requested pages.
+ *
+ * Dequeues the number of requested pages from the balloon for deflation. The
+ * number of dequeued pages may be lower, if not enough pages in the requested
+ * size are available.
+ */
+static void vmballoon_dequeue_page_list(struct vmballoon *b,
+					struct list_head *pages,
+					unsigned int *n_pages,
+					enum vmballoon_page_size_type page_size,
+					unsigned int n_req_pages)
+{
+	struct page *page, *tmp;
+	unsigned int i = 0;
+	unsigned long flags;
+
+	/* In the case of 4k pages, use the compaction infrastructure */
+	if (page_size == VMW_BALLOON_4K_PAGE) {
+		*n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages,
+						     n_req_pages);
+		return;
+	}
+
+	/* 2MB pages */
+	spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+	list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) {
+		vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE);
+
+		list_move(&page->lru, pages);
+		if (++i == n_req_pages)
+			break;
+	}
+
+	__count_vm_events(BALLOON_DEFLATE,
+			  i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
+	spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
+	*n_pages = i;
+}
+
+/**
+ * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k.
+ *
+ * If inflation of 2MB pages was denied by the hypervisor, it is likely to be
+ * due to one or few 4KB pages. These 2MB pages may keep being allocated and
+ * then being refused. To prevent this case, this function splits the refused
+ * pages into 4KB pages and adds them into @prealloc_pages list.
+ *
+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
+ */
+static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl)
+{
+	struct page *page, *tmp;
+	unsigned int i, order;
+
+	order = vmballoon_page_order(ctl->page_size);
+
+	list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) {
+		list_del(&page->lru);
+		split_page(page, order);
+		for (i = 0; i < (1 << order); i++)
+			list_add(&page[i].lru, &ctl->prealloc_pages);
+	}
+	ctl->n_refused_pages = 0;
+}
+
+/**
+ * vmballoon_inflate() - Inflate the balloon towards its target size.
+ *
+ * @b: pointer to the balloon.
+ */
+static void vmballoon_inflate(struct vmballoon *b)
+{
+	int64_t to_inflate_frames;
+	struct vmballoon_ctl ctl = {
+		.pages = LIST_HEAD_INIT(ctl.pages),
+		.refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
+		.prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages),
+		.page_size = b->max_page_size,
+		.op = VMW_BALLOON_INFLATE
+	};
+
+	while ((to_inflate_frames = vmballoon_change(b)) > 0) {
+		unsigned int to_inflate_pages, page_in_frames;
+		int alloc_error, lock_error = 0;
+
+		VM_BUG_ON(!list_empty(&ctl.pages));
+		VM_BUG_ON(ctl.n_pages != 0);
+
+		page_in_frames = vmballoon_page_in_frames(ctl.page_size);
+
+		to_inflate_pages = min_t(unsigned long, b->batch_max_pages,
+					 DIV_ROUND_UP_ULL(to_inflate_frames,
+							  page_in_frames));
+
+		/* Start by allocating */
+		alloc_error = vmballoon_alloc_page_list(b, &ctl,
+							to_inflate_pages);
+
+		/* Actually lock the pages by telling the hypervisor */
+		lock_error = vmballoon_lock(b, &ctl);
+
+		/*
+		 * If an error indicates that something serious went wrong,
+		 * stop the inflation.
+		 */
+		if (lock_error)
+			break;
+
+		/* Update the balloon size */
+		atomic64_add(ctl.n_pages * page_in_frames, &b->size);
+
+		vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages,
+					    ctl.page_size);
+
+		/*
+		 * If allocation failed or the number of refused pages exceeds
+		 * the maximum allowed, move to the next page size.
+		 */
+		if (alloc_error ||
+		    ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) {
+			if (ctl.page_size == VMW_BALLOON_4K_PAGE)
+				break;
+
+			/*
+			 * Split the refused pages to 4k. This will also empty
+			 * the refused pages list.
+			 */
+			vmballoon_split_refused_pages(&ctl);
+			ctl.page_size--;
+		}
+
+		cond_resched();
+	}
+
+	/*
+	 * Release pages that were allocated while attempting to inflate the
+	 * balloon but were refused by the host for one reason or another,
+	 * and update the statistics.
+	 */
+	if (ctl.n_refused_pages != 0)
+		vmballoon_release_refused_pages(b, &ctl);
+
+	vmballoon_release_page_list(&ctl.prealloc_pages, NULL, ctl.page_size);
+}
+
+/**
+ * vmballoon_deflate() - Decrease the size of the balloon.
+ *
+ * @b: pointer to the balloon
+ * @n_frames: the number of frames to deflate. If zero, automatically
+ * calculated according to the target size.
+ * @coordinated: whether to coordinate with the host
+ *
+ * Decrease the size of the balloon allowing guest to use more memory.
+ *
+ * Return: The number of deflated frames (i.e., basic page size units)
+ */
+static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames,
+				       bool coordinated)
+{
+	unsigned long deflated_frames = 0;
+	unsigned long tried_frames = 0;
+	struct vmballoon_ctl ctl = {
+		.pages = LIST_HEAD_INIT(ctl.pages),
+		.refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
+		.page_size = VMW_BALLOON_4K_PAGE,
+		.op = VMW_BALLOON_DEFLATE
+	};
+
+	/* free pages to reach target */
+	while (true) {
+		unsigned int to_deflate_pages, n_unlocked_frames;
+		unsigned int page_in_frames;
+		int64_t to_deflate_frames;
+		bool deflated_all;
+
+		page_in_frames = vmballoon_page_in_frames(ctl.page_size);
+
+		VM_BUG_ON(!list_empty(&ctl.pages));
+		VM_BUG_ON(ctl.n_pages);
+		VM_BUG_ON(!list_empty(&ctl.refused_pages));
+		VM_BUG_ON(ctl.n_refused_pages);
+
+		/*
+		 * If we were requested a specific number of frames, we try to
+		 * deflate this number of frames. Otherwise, deflation is
+		 * performed according to the target and balloon size.
+		 */
+		to_deflate_frames = n_frames ? n_frames - tried_frames :
+					       -vmballoon_change(b);
+
+		/* break if no work to do */
+		if (to_deflate_frames <= 0)
+			break;
+
+		/*
+		 * Calculate the number of frames based on current page size,
+		 * but limit the deflated frames to a single chunk
+		 */
+		to_deflate_pages = min_t(unsigned long, b->batch_max_pages,
+					 DIV_ROUND_UP_ULL(to_deflate_frames,
+							  page_in_frames));
+
+		/* First take the pages from the balloon pages. */
+		vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages,
+					    ctl.page_size, to_deflate_pages);
+
+		/*
+		 * Before pages are moving to the refused list, count their
+		 * frames as frames that we tried to deflate.
+		 */
+		tried_frames += ctl.n_pages * page_in_frames;
+
+		/*
+		 * Unlock the pages by communicating with the hypervisor if the
+		 * communication is coordinated (i.e., not pop). We ignore the
+		 * return code. Instead we check if all the pages we manage to
+		 * unlock all the pages. If we failed, we will move to the next
+		 * page size, and would eventually try again later.
+		 */
+		if (coordinated)
+			vmballoon_lock(b, &ctl);
+
+		/*
+		 * Check if we deflated enough. We will move to the next page
+		 * size if we did not manage to do so. This calculation takes
+		 * place now, as once the pages are released, the number of
+		 * pages is zeroed.
+		 */
+		deflated_all = (ctl.n_pages == to_deflate_pages);
+
+		/* Update local and global counters */
+		n_unlocked_frames = ctl.n_pages * page_in_frames;
+		atomic64_sub(n_unlocked_frames, &b->size);
+		deflated_frames += n_unlocked_frames;
+
+		vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE,
+					 ctl.page_size, ctl.n_pages);
+
+		/* free the ballooned pages */
+		vmballoon_release_page_list(&ctl.pages, &ctl.n_pages,
+					    ctl.page_size);
+
+		/* Return the refused pages to the ballooned list. */
+		vmballoon_enqueue_page_list(b, &ctl.refused_pages,
+					    &ctl.n_refused_pages,
+					    ctl.page_size);
+
+		/* If we failed to unlock all the pages, move to next size. */
+		if (!deflated_all) {
+			if (ctl.page_size == b->max_page_size)
+				break;
+			ctl.page_size++;
+		}
+
+		cond_resched();
+	}
+
+	return deflated_frames;
+}
+
+/**
+ * vmballoon_deinit_batching - disables batching mode.
+ *
+ * @b: pointer to &struct vmballoon.
+ *
+ * Disables batching, by deallocating the page for communication with the
+ * hypervisor and disabling the static key to indicate that batching is off.
+ */
+static void vmballoon_deinit_batching(struct vmballoon *b)
+{
+	free_page((unsigned long)b->batch_page);
+	b->batch_page = NULL;
+	static_branch_disable(&vmw_balloon_batching);
+	b->batch_max_pages = 1;
+}
+
+/**
+ * vmballoon_init_batching - enable batching mode.
+ *
+ * @b: pointer to &struct vmballoon.
+ *
+ * Enables batching, by allocating a page for communication with the hypervisor
+ * and enabling the static_key to use batching.
+ *
+ * Return: zero on success or an appropriate error-code.
+ */
+static int vmballoon_init_batching(struct vmballoon *b)
+{
+	struct page *page;
+
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
+		return -ENOMEM;
+
+	b->batch_page = page_address(page);
+	b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
+
+	static_branch_enable(&vmw_balloon_batching);
+
+	return 0;
+}
+
+/*
+ * Receive notification and resize balloon
+ */
+static void vmballoon_doorbell(void *client_data)
+{
+	struct vmballoon *b = client_data;
+
+	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL);
+
+	mod_delayed_work(system_freezable_wq, &b->dwork, 0);
+}
+
+/*
+ * Clean up vmci doorbell
+ */
+static void vmballoon_vmci_cleanup(struct vmballoon *b)
+{
+	vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
+		      VMCI_INVALID_ID, VMCI_INVALID_ID);
+
+	if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
+		vmci_doorbell_destroy(b->vmci_doorbell);
+		b->vmci_doorbell = VMCI_INVALID_HANDLE;
+	}
+}
+
+/**
+ * vmballoon_vmci_init - Initialize vmci doorbell.
+ *
+ * @b: pointer to the balloon.
+ *
+ * Return: zero on success or when wakeup command not supported. Error-code
+ * otherwise.
+ *
+ * Initialize vmci doorbell, to get notified as soon as balloon changes.
+ */
+static int vmballoon_vmci_init(struct vmballoon *b)
+{
+	unsigned long error;
+
+	if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
+		return 0;
+
+	error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
+				     VMCI_PRIVILEGE_FLAG_RESTRICTED,
+				     vmballoon_doorbell, b);
+
+	if (error != VMCI_SUCCESS)
+		goto fail;
+
+	error =	__vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
+				b->vmci_doorbell.context,
+				b->vmci_doorbell.resource, NULL);
+
+	if (error != VMW_BALLOON_SUCCESS)
+		goto fail;
+
+	return 0;
+fail:
+	vmballoon_vmci_cleanup(b);
+	return -EIO;
+}
+
+/**
+ * vmballoon_pop - Quickly release all pages allocate for the balloon.
+ *
+ * @b: pointer to the balloon.
+ *
+ * This function is called when host decides to "reset" balloon for one reason
+ * or another. Unlike normal "deflate" we do not (shall not) notify host of the
+ * pages being released.
+ */
+static void vmballoon_pop(struct vmballoon *b)
+{
+	unsigned long size;
+
+	while ((size = atomic64_read(&b->size)))
+		vmballoon_deflate(b, size, false);
+}
+
+/*
+ * Perform standard reset sequence by popping the balloon (in case it
+ * is not  empty) and then restarting protocol. This operation normally
+ * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
+ */
+static void vmballoon_reset(struct vmballoon *b)
+{
+	int error;
+
+	down_write(&b->conf_sem);
+
+	vmballoon_vmci_cleanup(b);
+
+	/* free all pages, skipping monitor unlock */
+	vmballoon_pop(b);
+
+	if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
+		goto unlock;
+
+	if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
+		if (vmballoon_init_batching(b)) {
+			/*
+			 * We failed to initialize batching, inform the monitor
+			 * about it by sending a null capability.
+			 *
+			 * The guest will retry in one second.
+			 */
+			vmballoon_send_start(b, 0);
+			goto unlock;
+		}
+	} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
+		vmballoon_deinit_batching(b);
+	}
+
+	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET);
+	b->reset_required = false;
+
+	error = vmballoon_vmci_init(b);
+	if (error)
+		pr_err_once("failed to initialize vmci doorbell\n");
+
+	if (vmballoon_send_guest_id(b))
+		pr_err_once("failed to send guest ID to the host\n");
+
+unlock:
+	up_write(&b->conf_sem);
+}
+
+/**
+ * vmballoon_work - periodic balloon worker for reset, inflation and deflation.
+ *
+ * @work: pointer to the &work_struct which is provided by the workqueue.
+ *
+ * Resets the protocol if needed, gets the new size and adjusts balloon as
+ * needed. Repeat in 1 sec.
+ */
+static void vmballoon_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
+	int64_t change = 0;
+
+	if (b->reset_required)
+		vmballoon_reset(b);
+
+	down_read(&b->conf_sem);
+
+	/*
+	 * Update the stats while holding the semaphore to ensure that
+	 * @stats_enabled is consistent with whether the stats are actually
+	 * enabled
+	 */
+	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER);
+
+	if (!vmballoon_send_get_target(b))
+		change = vmballoon_change(b);
+
+	if (change != 0) {
+		pr_debug("%s - size: %llu, target %lu\n", __func__,
+			 atomic64_read(&b->size), READ_ONCE(b->target));
+
+		if (change > 0)
+			vmballoon_inflate(b);
+		else  /* (change < 0) */
+			vmballoon_deflate(b, 0, true);
+	}
+
+	up_read(&b->conf_sem);
+
+	/*
+	 * We are using a freezable workqueue so that balloon operations are
+	 * stopped while the system transitions to/from sleep/hibernation.
+	 */
+	queue_delayed_work(system_freezable_wq,
+			   dwork, round_jiffies_relative(HZ));
+
+}
+
+/**
+ * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure.
+ * @shrinker: pointer to the balloon shrinker.
+ * @sc: page reclaim information.
+ *
+ * Returns: number of pages that were freed during deflation.
+ */
+static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker,
+					     struct shrink_control *sc)
+{
+	struct vmballoon *b = &balloon;
+	unsigned long deflated_frames;
+
+	pr_debug("%s - size: %llu", __func__, atomic64_read(&b->size));
+
+	vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_SHRINK);
+
+	/*
+	 * If the lock is also contended for read, we cannot easily reclaim and
+	 * we bail out.
+	 */
+	if (!down_read_trylock(&b->conf_sem))
+		return 0;
+
+	deflated_frames = vmballoon_deflate(b, sc->nr_to_scan, true);
+
+	vmballoon_stats_gen_add(b, VMW_BALLOON_STAT_SHRINK_FREE,
+				deflated_frames);
+
+	/*
+	 * Delay future inflation for some time to mitigate the situations in
+	 * which balloon continuously grows and shrinks. Use WRITE_ONCE() since
+	 * the access is asynchronous.
+	 */
+	WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY);
+
+	up_read(&b->conf_sem);
+
+	return deflated_frames;
+}
+
+/**
+ * vmballoon_shrinker_count() - return the number of ballooned pages.
+ * @shrinker: pointer to the balloon shrinker.
+ * @sc: page reclaim information.
+ *
+ * Returns: number of 4k pages that are allocated for the balloon and can
+ *	    therefore be reclaimed under pressure.
+ */
+static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker,
+					      struct shrink_control *sc)
+{
+	struct vmballoon *b = &balloon;
+
+	return atomic64_read(&b->size);
+}
+
+static void vmballoon_unregister_shrinker(struct vmballoon *b)
+{
+	if (b->shrinker_registered)
+		unregister_shrinker(&b->shrinker);
+	b->shrinker_registered = false;
+}
+
+static int vmballoon_register_shrinker(struct vmballoon *b)
+{
+	int r;
+
+	/* Do nothing if the shrinker is not enabled */
+	if (!vmwballoon_shrinker_enable)
+		return 0;
+
+	b->shrinker.scan_objects = vmballoon_shrinker_scan;
+	b->shrinker.count_objects = vmballoon_shrinker_count;
+	b->shrinker.seeks = DEFAULT_SEEKS;
+
+	r = register_shrinker(&b->shrinker, "vmw-balloon");
+
+	if (r == 0)
+		b->shrinker_registered = true;
+
+	return r;
+}
+
+/*
+ * DEBUGFS Interface
+ */
+#ifdef CONFIG_DEBUG_FS
+
+static const char * const vmballoon_stat_page_names[] = {
+	[VMW_BALLOON_PAGE_STAT_ALLOC]		= "alloc",
+	[VMW_BALLOON_PAGE_STAT_ALLOC_FAIL]	= "allocFail",
+	[VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC]	= "errAlloc",
+	[VMW_BALLOON_PAGE_STAT_REFUSED_FREE]	= "errFree",
+	[VMW_BALLOON_PAGE_STAT_FREE]		= "free"
+};
+
+static const char * const vmballoon_stat_names[] = {
+	[VMW_BALLOON_STAT_TIMER]		= "timer",
+	[VMW_BALLOON_STAT_DOORBELL]		= "doorbell",
+	[VMW_BALLOON_STAT_RESET]		= "reset",
+	[VMW_BALLOON_STAT_SHRINK]		= "shrink",
+	[VMW_BALLOON_STAT_SHRINK_FREE]		= "shrinkFree"
+};
+
+static int vmballoon_enable_stats(struct vmballoon *b)
+{
+	int r = 0;
+
+	down_write(&b->conf_sem);
+
+	/* did we somehow race with another reader which enabled stats? */
+	if (b->stats)
+		goto out;
+
+	b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL);
+
+	if (!b->stats) {
+		/* allocation failed */
+		r = -ENOMEM;
+		goto out;
+	}
+	static_key_enable(&balloon_stat_enabled.key);
+out:
+	up_write(&b->conf_sem);
+	return r;
+}
+
+/**
+ * vmballoon_debug_show - shows statistics of balloon operations.
+ * @f: pointer to the &struct seq_file.
+ * @offset: ignored.
+ *
+ * Provides the statistics that can be accessed in vmmemctl in the debugfs.
+ * To avoid the overhead - mainly that of memory - of collecting the statistics,
+ * we only collect statistics after the first time the counters are read.
+ *
+ * Return: zero on success or an error code.
+ */
+static int vmballoon_debug_show(struct seq_file *f, void *offset)
+{
+	struct vmballoon *b = f->private;
+	int i, j;
+
+	/* enables stats if they are disabled */
+	if (!b->stats) {
+		int r = vmballoon_enable_stats(b);
+
+		if (r)
+			return r;
+	}
+
+	/* format capabilities info */
+	seq_printf(f, "%-22s: %#16x\n", "balloon capabilities",
+		   VMW_BALLOON_CAPABILITIES);
+	seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities);
+	seq_printf(f, "%-22s: %16s\n", "is resetting",
+		   b->reset_required ? "y" : "n");
+
+	/* format size info */
+	seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target));
+	seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size));
+
+	for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
+		if (vmballoon_cmd_names[i] == NULL)
+			continue;
+
+		seq_printf(f, "%-22s: %16llu (%llu failed)\n",
+			   vmballoon_cmd_names[i],
+			   atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]),
+			   atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT]));
+	}
+
+	for (i = 0; i < VMW_BALLOON_STAT_NUM; i++)
+		seq_printf(f, "%-22s: %16llu\n",
+			   vmballoon_stat_names[i],
+			   atomic64_read(&b->stats->general_stat[i]));
+
+	for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) {
+		for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++)
+			seq_printf(f, "%-18s(%s): %16llu\n",
+				   vmballoon_stat_page_names[i],
+				   vmballoon_page_size_names[j],
+				   atomic64_read(&b->stats->page_stat[i][j]));
+	}
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(vmballoon_debug);
+
+static void __init vmballoon_debugfs_init(struct vmballoon *b)
+{
+	debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
+			    &vmballoon_debug_fops);
+}
+
+static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
+{
+	static_key_disable(&balloon_stat_enabled.key);
+	debugfs_lookup_and_remove("vmmemctl", NULL);
+	kfree(b->stats);
+	b->stats = NULL;
+}
+
+#else
+
+static inline void vmballoon_debugfs_init(struct vmballoon *b)
+{
+}
+
+static inline void vmballoon_debugfs_exit(struct vmballoon *b)
+{
+}
+
+#endif	/* CONFIG_DEBUG_FS */
+
+
+#ifdef CONFIG_BALLOON_COMPACTION
+/**
+ * vmballoon_migratepage() - migrates a balloon page.
+ * @b_dev_info: balloon device information descriptor.
+ * @newpage: the page to which @page should be migrated.
+ * @page: a ballooned page that should be migrated.
+ * @mode: migration mode, ignored.
+ *
+ * This function is really open-coded, but that is according to the interface
+ * that balloon_compaction provides.
+ *
+ * Return: zero on success, -EAGAIN when migration cannot be performed
+ *	   momentarily, and -EBUSY if migration failed and should be retried
+ *	   with that specific page.
+ */
+static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
+				 struct page *newpage, struct page *page,
+				 enum migrate_mode mode)
+{
+	unsigned long status, flags;
+	struct vmballoon *b;
+	int ret;
+
+	b = container_of(b_dev_info, struct vmballoon, b_dev_info);
+
+	/*
+	 * If the semaphore is taken, there is ongoing configuration change
+	 * (i.e., balloon reset), so try again.
+	 */
+	if (!down_read_trylock(&b->conf_sem))
+		return -EAGAIN;
+
+	spin_lock(&b->comm_lock);
+	/*
+	 * We must start by deflating and not inflating, as otherwise the
+	 * hypervisor may tell us that it has enough memory and the new page is
+	 * not needed. Since the old page is isolated, we cannot use the list
+	 * interface to unlock it, as the LRU field is used for isolation.
+	 * Instead, we use the native interface directly.
+	 */
+	vmballoon_add_page(b, 0, page);
+	status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
+				   VMW_BALLOON_DEFLATE);
+
+	if (status == VMW_BALLOON_SUCCESS)
+		status = vmballoon_status_page(b, 0, &page);
+
+	/*
+	 * If a failure happened, let the migration mechanism know that it
+	 * should not retry.
+	 */
+	if (status != VMW_BALLOON_SUCCESS) {
+		spin_unlock(&b->comm_lock);
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	/*
+	 * The page is isolated, so it is safe to delete it without holding
+	 * @pages_lock . We keep holding @comm_lock since we will need it in a
+	 * second.
+	 */
+	balloon_page_delete(page);
+
+	put_page(page);
+
+	/* Inflate */
+	vmballoon_add_page(b, 0, newpage);
+	status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
+				   VMW_BALLOON_INFLATE);
+
+	if (status == VMW_BALLOON_SUCCESS)
+		status = vmballoon_status_page(b, 0, &newpage);
+
+	spin_unlock(&b->comm_lock);
+
+	if (status != VMW_BALLOON_SUCCESS) {
+		/*
+		 * A failure happened. While we can deflate the page we just
+		 * inflated, this deflation can also encounter an error. Instead
+		 * we will decrease the size of the balloon to reflect the
+		 * change and report failure.
+		 */
+		atomic64_dec(&b->size);
+		ret = -EBUSY;
+	} else {
+		/*
+		 * Success. Take a reference for the page, and we will add it to
+		 * the list after acquiring the lock.
+		 */
+		get_page(newpage);
+		ret = MIGRATEPAGE_SUCCESS;
+	}
+
+	/* Update the balloon list under the @pages_lock */
+	spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+
+	/*
+	 * On inflation success, we already took a reference for the @newpage.
+	 * If we succeed just insert it to the list and update the statistics
+	 * under the lock.
+	 */
+	if (ret == MIGRATEPAGE_SUCCESS) {
+		balloon_page_insert(&b->b_dev_info, newpage);
+		__count_vm_event(BALLOON_MIGRATE);
+	}
+
+	/*
+	 * We deflated successfully, so regardless to the inflation success, we
+	 * need to reduce the number of isolated_pages.
+	 */
+	b->b_dev_info.isolated_pages--;
+	spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
+
+out_unlock:
+	up_read(&b->conf_sem);
+	return ret;
+}
+
+/**
+ * vmballoon_compaction_init() - initialized compaction for the balloon.
+ *
+ * @b: pointer to the balloon.
+ *
+ * If during the initialization a failure occurred, this function does not
+ * perform cleanup. The caller must call vmballoon_compaction_deinit() in this
+ * case.
+ *
+ * Return: zero on success or error code on failure.
+ */
+static __init void vmballoon_compaction_init(struct vmballoon *b)
+{
+	b->b_dev_info.migratepage = vmballoon_migratepage;
+}
+
+#else /* CONFIG_BALLOON_COMPACTION */
+static inline void vmballoon_compaction_init(struct vmballoon *b)
+{
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+
+static int __init vmballoon_init(void)
+{
+	int error;
+
+	/*
+	 * Check if we are running on VMware's hypervisor and bail out
+	 * if we are not.
+	 */
+	if (x86_hyper_type != X86_HYPER_VMWARE)
+		return -ENODEV;
+
+	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
+
+	error = vmballoon_register_shrinker(&balloon);
+	if (error)
+		goto fail;
+
+	/*
+	 * Initialization of compaction must be done after the call to
+	 * balloon_devinfo_init() .
+	 */
+	balloon_devinfo_init(&balloon.b_dev_info);
+	vmballoon_compaction_init(&balloon);
+
+	INIT_LIST_HEAD(&balloon.huge_pages);
+	spin_lock_init(&balloon.comm_lock);
+	init_rwsem(&balloon.conf_sem);
+	balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
+	balloon.batch_page = NULL;
+	balloon.page = NULL;
+	balloon.reset_required = true;
+
+	queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
+
+	vmballoon_debugfs_init(&balloon);
+
+	return 0;
+fail:
+	vmballoon_unregister_shrinker(&balloon);
+	return error;
+}
+
+/*
+ * Using late_initcall() instead of module_init() allows the balloon to use the
+ * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
+ * VMCI is probed only after the balloon is initialized. If the balloon is used
+ * as a module, late_initcall() is equivalent to module_init().
+ */
+late_initcall(vmballoon_init);
+
+static void __exit vmballoon_exit(void)
+{
+	vmballoon_unregister_shrinker(&balloon);
+	vmballoon_vmci_cleanup(&balloon);
+	cancel_delayed_work_sync(&balloon.dwork);
+
+	vmballoon_debugfs_exit(&balloon);
+
+	/*
+	 * Deallocate all reserved memory, and reset connection with monitor.
+	 * Reset connection before deallocating memory to avoid potential for
+	 * additional spurious resets from guest touching deallocated pages.
+	 */
+	vmballoon_send_start(&balloon, 0);
+	vmballoon_pop(&balloon);
+}
+module_exit(vmballoon_exit);
diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig
new file mode 100644
index 0000000000..b6d4d7fd68
--- /dev/null
+++ b/drivers/misc/vmw_vmci/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# VMware VMCI device
+#
+
+config VMWARE_VMCI
+	tristate "VMware VMCI Driver"
+	depends on (X86 || ARM64) && !CPU_BIG_ENDIAN && PCI
+	help
+	  This is VMware's Virtual Machine Communication Interface.  It enables
+	  high-speed communication between host and guest in a virtual
+	  environment via the VMCI virtual device.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called vmw_vmci.
diff --git a/drivers/misc/vmw_vmci/Makefile b/drivers/misc/vmw_vmci/Makefile
new file mode 100644
index 0000000000..475fa31a9c
--- /dev/null
+++ b/drivers/misc/vmw_vmci/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci.o
+vmw_vmci-y += vmci_context.o vmci_datagram.o vmci_doorbell.o \
+	vmci_driver.o vmci_event.o vmci_guest.o vmci_handle_array.o \
+	vmci_host.o vmci_queue_pair.o vmci_resource.o vmci_route.o
diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c
new file mode 100644
index 0000000000..f22b44827e
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_context.c
@@ -0,0 +1,1214 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/slab.h>
+
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+/* Use a wide upper bound for the maximum contexts. */
+#define VMCI_MAX_CONTEXTS 2000
+
+/*
+ * List of current VMCI contexts.  Contexts can be added by
+ * vmci_ctx_create() and removed via vmci_ctx_destroy().
+ * These, along with context lookup, are protected by the
+ * list structure's lock.
+ */
+static struct {
+	struct list_head head;
+	spinlock_t lock; /* Spinlock for context list operations */
+} ctx_list = {
+	.head = LIST_HEAD_INIT(ctx_list.head),
+	.lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
+};
+
+/* Used by contexts that did not set up notify flag pointers */
+static bool ctx_dummy_notify;
+
+static void ctx_signal_notify(struct vmci_ctx *context)
+{
+	*context->notify = true;
+}
+
+static void ctx_clear_notify(struct vmci_ctx *context)
+{
+	*context->notify = false;
+}
+
+/*
+ * If nothing requires the attention of the guest, clears both
+ * notify flag and call.
+ */
+static void ctx_clear_notify_call(struct vmci_ctx *context)
+{
+	if (context->pending_datagrams == 0 &&
+	    vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
+		ctx_clear_notify(context);
+}
+
+/*
+ * Sets the context's notify flag iff datagrams are pending for this
+ * context.  Called from vmci_setup_notify().
+ */
+void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
+{
+	spin_lock(&context->lock);
+	if (context->pending_datagrams)
+		ctx_signal_notify(context);
+	spin_unlock(&context->lock);
+}
+
+/*
+ * Allocates and initializes a VMCI context.
+ */
+struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
+				 uintptr_t event_hnd,
+				 int user_version,
+				 const struct cred *cred)
+{
+	struct vmci_ctx *context;
+	int error;
+
+	if (cid == VMCI_INVALID_ID) {
+		pr_devel("Invalid context ID for VMCI context\n");
+		error = -EINVAL;
+		goto err_out;
+	}
+
+	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
+		pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
+			 priv_flags);
+		error = -EINVAL;
+		goto err_out;
+	}
+
+	if (user_version == 0) {
+		pr_devel("Invalid suer_version %d\n", user_version);
+		error = -EINVAL;
+		goto err_out;
+	}
+
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context) {
+		pr_warn("Failed to allocate memory for VMCI context\n");
+		error = -ENOMEM;
+		goto err_out;
+	}
+
+	kref_init(&context->kref);
+	spin_lock_init(&context->lock);
+	INIT_LIST_HEAD(&context->list_item);
+	INIT_LIST_HEAD(&context->datagram_queue);
+	INIT_LIST_HEAD(&context->notifier_list);
+
+	/* Initialize host-specific VMCI context. */
+	init_waitqueue_head(&context->host_context.wait_queue);
+
+	context->queue_pair_array =
+		vmci_handle_arr_create(0, VMCI_MAX_GUEST_QP_COUNT);
+	if (!context->queue_pair_array) {
+		error = -ENOMEM;
+		goto err_free_ctx;
+	}
+
+	context->doorbell_array =
+		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
+	if (!context->doorbell_array) {
+		error = -ENOMEM;
+		goto err_free_qp_array;
+	}
+
+	context->pending_doorbell_array =
+		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
+	if (!context->pending_doorbell_array) {
+		error = -ENOMEM;
+		goto err_free_db_array;
+	}
+
+	context->user_version = user_version;
+
+	context->priv_flags = priv_flags;
+
+	if (cred)
+		context->cred = get_cred(cred);
+
+	context->notify = &ctx_dummy_notify;
+	context->notify_page = NULL;
+
+	/*
+	 * If we collide with an existing context we generate a new
+	 * and use it instead. The VMX will determine if regeneration
+	 * is okay. Since there isn't 4B - 16 VMs running on a given
+	 * host, the below loop will terminate.
+	 */
+	spin_lock(&ctx_list.lock);
+
+	while (vmci_ctx_exists(cid)) {
+		/* We reserve the lowest 16 ids for fixed contexts. */
+		cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
+		if (cid == VMCI_INVALID_ID)
+			cid = VMCI_RESERVED_CID_LIMIT;
+	}
+	context->cid = cid;
+
+	list_add_tail_rcu(&context->list_item, &ctx_list.head);
+	spin_unlock(&ctx_list.lock);
+
+	return context;
+
+ err_free_db_array:
+	vmci_handle_arr_destroy(context->doorbell_array);
+ err_free_qp_array:
+	vmci_handle_arr_destroy(context->queue_pair_array);
+ err_free_ctx:
+	kfree(context);
+ err_out:
+	return ERR_PTR(error);
+}
+
+/*
+ * Destroy VMCI context.
+ */
+void vmci_ctx_destroy(struct vmci_ctx *context)
+{
+	spin_lock(&ctx_list.lock);
+	list_del_rcu(&context->list_item);
+	spin_unlock(&ctx_list.lock);
+	synchronize_rcu();
+
+	vmci_ctx_put(context);
+}
+
+/*
+ * Fire notification for all contexts interested in given cid.
+ */
+static int ctx_fire_notification(u32 context_id, u32 priv_flags)
+{
+	u32 i, array_size;
+	struct vmci_ctx *sub_ctx;
+	struct vmci_handle_arr *subscriber_array;
+	struct vmci_handle context_handle =
+		vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
+
+	/*
+	 * We create an array to hold the subscribers we find when
+	 * scanning through all contexts.
+	 */
+	subscriber_array = vmci_handle_arr_create(0, VMCI_MAX_CONTEXTS);
+	if (subscriber_array == NULL)
+		return VMCI_ERROR_NO_MEM;
+
+	/*
+	 * Scan all contexts to find who is interested in being
+	 * notified about given contextID.
+	 */
+	rcu_read_lock();
+	list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
+		struct vmci_handle_list *node;
+
+		/*
+		 * We only deliver notifications of the removal of
+		 * contexts, if the two contexts are allowed to
+		 * interact.
+		 */
+		if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
+			continue;
+
+		list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
+			if (!vmci_handle_is_equal(node->handle, context_handle))
+				continue;
+
+			vmci_handle_arr_append_entry(&subscriber_array,
+					vmci_make_handle(sub_ctx->cid,
+							 VMCI_EVENT_HANDLER));
+		}
+	}
+	rcu_read_unlock();
+
+	/* Fire event to all subscribers. */
+	array_size = vmci_handle_arr_get_size(subscriber_array);
+	for (i = 0; i < array_size; i++) {
+		int result;
+		struct vmci_event_ctx ev;
+
+		ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
+		ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+						  VMCI_CONTEXT_RESOURCE_ID);
+		ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+		ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
+		ev.payload.context_id = context_id;
+
+		result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
+						&ev.msg.hdr, false);
+		if (result < VMCI_SUCCESS) {
+			pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
+				 ev.msg.event_data.event,
+				 ev.msg.hdr.dst.context);
+			/* We continue to enqueue on next subscriber. */
+		}
+	}
+	vmci_handle_arr_destroy(subscriber_array);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Returns the current number of pending datagrams. The call may
+ * also serve as a synchronization point for the datagram queue,
+ * as no enqueue operations can occur concurrently.
+ */
+int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
+{
+	struct vmci_ctx *context;
+
+	context = vmci_ctx_get(cid);
+	if (context == NULL)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	spin_lock(&context->lock);
+	if (pending)
+		*pending = context->pending_datagrams;
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Queues a VMCI datagram for the appropriate target VM context.
+ */
+int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
+{
+	struct vmci_datagram_queue_entry *dq_entry;
+	struct vmci_ctx *context;
+	struct vmci_handle dg_src;
+	size_t vmci_dg_size;
+
+	vmci_dg_size = VMCI_DG_SIZE(dg);
+	if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
+		pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	/* Get the target VM's VMCI context. */
+	context = vmci_ctx_get(cid);
+	if (!context) {
+		pr_devel("Invalid context (ID=0x%x)\n", cid);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	/* Allocate guest call entry and add it to the target VM's queue. */
+	dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
+	if (dq_entry == NULL) {
+		pr_warn("Failed to allocate memory for datagram\n");
+		vmci_ctx_put(context);
+		return VMCI_ERROR_NO_MEM;
+	}
+	dq_entry->dg = dg;
+	dq_entry->dg_size = vmci_dg_size;
+	dg_src = dg->src;
+	INIT_LIST_HEAD(&dq_entry->list_item);
+
+	spin_lock(&context->lock);
+
+	/*
+	 * We put a higher limit on datagrams from the hypervisor.  If
+	 * the pending datagram is not from hypervisor, then we check
+	 * if enqueueing it would exceed the
+	 * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
+	 * the pending datagram is from hypervisor, we allow it to be
+	 * queued at the destination side provided we don't reach the
+	 * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
+	 */
+	if (context->datagram_queue_size + vmci_dg_size >=
+	    VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
+	    (!vmci_handle_is_equal(dg_src,
+				vmci_make_handle
+				(VMCI_HYPERVISOR_CONTEXT_ID,
+				 VMCI_CONTEXT_RESOURCE_ID)) ||
+	     context->datagram_queue_size + vmci_dg_size >=
+	     VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
+		spin_unlock(&context->lock);
+		vmci_ctx_put(context);
+		kfree(dq_entry);
+		pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
+		return VMCI_ERROR_NO_RESOURCES;
+	}
+
+	list_add(&dq_entry->list_item, &context->datagram_queue);
+	context->pending_datagrams++;
+	context->datagram_queue_size += vmci_dg_size;
+	ctx_signal_notify(context);
+	wake_up(&context->host_context.wait_queue);
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	return vmci_dg_size;
+}
+
+/*
+ * Verifies whether a context with the specified context ID exists.
+ * FIXME: utility is dubious as no decisions can be reliably made
+ * using this data as context can appear and disappear at any time.
+ */
+bool vmci_ctx_exists(u32 cid)
+{
+	struct vmci_ctx *context;
+	bool exists = false;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
+		if (context->cid == cid) {
+			exists = true;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	return exists;
+}
+
+/*
+ * Retrieves VMCI context corresponding to the given cid.
+ */
+struct vmci_ctx *vmci_ctx_get(u32 cid)
+{
+	struct vmci_ctx *c, *context = NULL;
+
+	if (cid == VMCI_INVALID_ID)
+		return NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
+		if (c->cid == cid) {
+			/*
+			 * The context owner drops its own reference to the
+			 * context only after removing it from the list and
+			 * waiting for RCU grace period to expire. This
+			 * means that we are not about to increase the
+			 * reference count of something that is in the
+			 * process of being destroyed.
+			 */
+			context = c;
+			kref_get(&context->kref);
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return context;
+}
+
+/*
+ * Deallocates all parts of a context data structure. This
+ * function doesn't lock the context, because it assumes that
+ * the caller was holding the last reference to context.
+ */
+static void ctx_free_ctx(struct kref *kref)
+{
+	struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
+	struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
+	struct vmci_handle temp_handle;
+	struct vmci_handle_list *notifier, *tmp;
+
+	/*
+	 * Fire event to all contexts interested in knowing this
+	 * context is dying.
+	 */
+	ctx_fire_notification(context->cid, context->priv_flags);
+
+	/*
+	 * Cleanup all queue pair resources attached to context.  If
+	 * the VM dies without cleaning up, this code will make sure
+	 * that no resources are leaked.
+	 */
+	temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
+	while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
+		if (vmci_qp_broker_detach(temp_handle,
+					  context) < VMCI_SUCCESS) {
+			/*
+			 * When vmci_qp_broker_detach() succeeds it
+			 * removes the handle from the array.  If
+			 * detach fails, we must remove the handle
+			 * ourselves.
+			 */
+			vmci_handle_arr_remove_entry(context->queue_pair_array,
+						     temp_handle);
+		}
+		temp_handle =
+		    vmci_handle_arr_get_entry(context->queue_pair_array, 0);
+	}
+
+	/*
+	 * It is fine to destroy this without locking the callQueue, as
+	 * this is the only thread having a reference to the context.
+	 */
+	list_for_each_entry_safe(dq_entry, dq_entry_tmp,
+				 &context->datagram_queue, list_item) {
+		WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
+		list_del(&dq_entry->list_item);
+		kfree(dq_entry->dg);
+		kfree(dq_entry);
+	}
+
+	list_for_each_entry_safe(notifier, tmp,
+				 &context->notifier_list, node) {
+		list_del(&notifier->node);
+		kfree(notifier);
+	}
+
+	vmci_handle_arr_destroy(context->queue_pair_array);
+	vmci_handle_arr_destroy(context->doorbell_array);
+	vmci_handle_arr_destroy(context->pending_doorbell_array);
+	vmci_ctx_unset_notify(context);
+	if (context->cred)
+		put_cred(context->cred);
+	kfree(context);
+}
+
+/*
+ * Drops reference to VMCI context. If this is the last reference to
+ * the context it will be deallocated. A context is created with
+ * a reference count of one, and on destroy, it is removed from
+ * the context list before its reference count is decremented. Thus,
+ * if we reach zero, we are sure that nobody else are about to increment
+ * it (they need the entry in the context list for that), and so there
+ * is no need for locking.
+ */
+void vmci_ctx_put(struct vmci_ctx *context)
+{
+	kref_put(&context->kref, ctx_free_ctx);
+}
+
+/*
+ * Dequeues the next datagram and returns it to caller.
+ * The caller passes in a pointer to the max size datagram
+ * it can handle and the datagram is only unqueued if the
+ * size is less than max_size. If larger max_size is set to
+ * the size of the datagram to give the caller a chance to
+ * set up a larger buffer for the guestcall.
+ */
+int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
+			      size_t *max_size,
+			      struct vmci_datagram **dg)
+{
+	struct vmci_datagram_queue_entry *dq_entry;
+	struct list_head *list_item;
+	int rv;
+
+	/* Dequeue the next datagram entry. */
+	spin_lock(&context->lock);
+	if (context->pending_datagrams == 0) {
+		ctx_clear_notify_call(context);
+		spin_unlock(&context->lock);
+		pr_devel("No datagrams pending\n");
+		return VMCI_ERROR_NO_MORE_DATAGRAMS;
+	}
+
+	list_item = context->datagram_queue.next;
+
+	dq_entry =
+	    list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
+
+	/* Check size of caller's buffer. */
+	if (*max_size < dq_entry->dg_size) {
+		*max_size = dq_entry->dg_size;
+		spin_unlock(&context->lock);
+		pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
+			 (u32) *max_size);
+		return VMCI_ERROR_NO_MEM;
+	}
+
+	list_del(list_item);
+	context->pending_datagrams--;
+	context->datagram_queue_size -= dq_entry->dg_size;
+	if (context->pending_datagrams == 0) {
+		ctx_clear_notify_call(context);
+		rv = VMCI_SUCCESS;
+	} else {
+		/*
+		 * Return the size of the next datagram.
+		 */
+		struct vmci_datagram_queue_entry *next_entry;
+
+		list_item = context->datagram_queue.next;
+		next_entry =
+		    list_entry(list_item, struct vmci_datagram_queue_entry,
+			       list_item);
+
+		/*
+		 * The following size_t -> int truncation is fine as
+		 * the maximum size of a (routable) datagram is 68KB.
+		 */
+		rv = (int)next_entry->dg_size;
+	}
+	spin_unlock(&context->lock);
+
+	/* Caller must free datagram. */
+	*dg = dq_entry->dg;
+	dq_entry->dg = NULL;
+	kfree(dq_entry);
+
+	return rv;
+}
+
+/*
+ * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
+ * page mapped/locked by vmci_setup_notify().
+ */
+void vmci_ctx_unset_notify(struct vmci_ctx *context)
+{
+	struct page *notify_page;
+
+	spin_lock(&context->lock);
+
+	notify_page = context->notify_page;
+	context->notify = &ctx_dummy_notify;
+	context->notify_page = NULL;
+
+	spin_unlock(&context->lock);
+
+	if (notify_page) {
+		kunmap(notify_page);
+		put_page(notify_page);
+	}
+}
+
+/*
+ * Add remote_cid to list of contexts current contexts wants
+ * notifications from/about.
+ */
+int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
+{
+	struct vmci_ctx *context;
+	struct vmci_handle_list *notifier, *n;
+	int result;
+	bool exists = false;
+
+	context = vmci_ctx_get(context_id);
+	if (!context)
+		return VMCI_ERROR_NOT_FOUND;
+
+	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
+		pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
+			 context_id, remote_cid);
+		result = VMCI_ERROR_DST_UNREACHABLE;
+		goto out;
+	}
+
+	if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
+		result = VMCI_ERROR_NO_ACCESS;
+		goto out;
+	}
+
+	notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
+	if (!notifier) {
+		result = VMCI_ERROR_NO_MEM;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&notifier->node);
+	notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
+
+	spin_lock(&context->lock);
+
+	if (context->n_notifiers < VMCI_MAX_CONTEXTS) {
+		list_for_each_entry(n, &context->notifier_list, node) {
+			if (vmci_handle_is_equal(n->handle, notifier->handle)) {
+				exists = true;
+				break;
+			}
+		}
+
+		if (exists) {
+			kfree(notifier);
+			result = VMCI_ERROR_ALREADY_EXISTS;
+		} else {
+			list_add_tail_rcu(&notifier->node,
+					  &context->notifier_list);
+			context->n_notifiers++;
+			result = VMCI_SUCCESS;
+		}
+	} else {
+		kfree(notifier);
+		result = VMCI_ERROR_NO_MEM;
+	}
+
+	spin_unlock(&context->lock);
+
+ out:
+	vmci_ctx_put(context);
+	return result;
+}
+
+/*
+ * Remove remote_cid from current context's list of contexts it is
+ * interested in getting notifications from/about.
+ */
+int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
+{
+	struct vmci_ctx *context;
+	struct vmci_handle_list *notifier = NULL, *iter, *tmp;
+	struct vmci_handle handle;
+
+	context = vmci_ctx_get(context_id);
+	if (!context)
+		return VMCI_ERROR_NOT_FOUND;
+
+	handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
+
+	spin_lock(&context->lock);
+	list_for_each_entry_safe(iter, tmp,
+				 &context->notifier_list, node) {
+		if (vmci_handle_is_equal(iter->handle, handle)) {
+			list_del_rcu(&iter->node);
+			context->n_notifiers--;
+			notifier = iter;
+			break;
+		}
+	}
+	spin_unlock(&context->lock);
+
+	if (notifier)
+		kvfree_rcu_mightsleep(notifier);
+
+	vmci_ctx_put(context);
+
+	return notifier ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
+}
+
+static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
+					u32 *buf_size, void **pbuf)
+{
+	u32 *notifiers;
+	size_t data_size;
+	struct vmci_handle_list *entry;
+	int i = 0;
+
+	if (context->n_notifiers == 0) {
+		*buf_size = 0;
+		*pbuf = NULL;
+		return VMCI_SUCCESS;
+	}
+
+	data_size = context->n_notifiers * sizeof(*notifiers);
+	if (*buf_size < data_size) {
+		*buf_size = data_size;
+		return VMCI_ERROR_MORE_DATA;
+	}
+
+	notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
+	if (!notifiers)
+		return VMCI_ERROR_NO_MEM;
+
+	list_for_each_entry(entry, &context->notifier_list, node)
+		notifiers[i++] = entry->handle.context;
+
+	*buf_size = data_size;
+	*pbuf = notifiers;
+	return VMCI_SUCCESS;
+}
+
+static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
+					u32 *buf_size, void **pbuf)
+{
+	struct dbell_cpt_state *dbells;
+	u32 i, n_doorbells;
+
+	n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
+	if (n_doorbells > 0) {
+		size_t data_size = n_doorbells * sizeof(*dbells);
+		if (*buf_size < data_size) {
+			*buf_size = data_size;
+			return VMCI_ERROR_MORE_DATA;
+		}
+
+		dbells = kzalloc(data_size, GFP_ATOMIC);
+		if (!dbells)
+			return VMCI_ERROR_NO_MEM;
+
+		for (i = 0; i < n_doorbells; i++)
+			dbells[i].handle = vmci_handle_arr_get_entry(
+						context->doorbell_array, i);
+
+		*buf_size = data_size;
+		*pbuf = dbells;
+	} else {
+		*buf_size = 0;
+		*pbuf = NULL;
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Get current context's checkpoint state of given type.
+ */
+int vmci_ctx_get_chkpt_state(u32 context_id,
+			     u32 cpt_type,
+			     u32 *buf_size,
+			     void **pbuf)
+{
+	struct vmci_ctx *context;
+	int result;
+
+	context = vmci_ctx_get(context_id);
+	if (!context)
+		return VMCI_ERROR_NOT_FOUND;
+
+	spin_lock(&context->lock);
+
+	switch (cpt_type) {
+	case VMCI_NOTIFICATION_CPT_STATE:
+		result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
+		break;
+
+	case VMCI_WELLKNOWN_CPT_STATE:
+		/*
+		 * For compatibility with VMX'en with VM to VM communication, we
+		 * always return zero wellknown handles.
+		 */
+
+		*buf_size = 0;
+		*pbuf = NULL;
+		result = VMCI_SUCCESS;
+		break;
+
+	case VMCI_DOORBELL_CPT_STATE:
+		result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
+		break;
+
+	default:
+		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
+		result = VMCI_ERROR_INVALID_ARGS;
+		break;
+	}
+
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	return result;
+}
+
+/*
+ * Set current context's checkpoint state of given type.
+ */
+int vmci_ctx_set_chkpt_state(u32 context_id,
+			     u32 cpt_type,
+			     u32 buf_size,
+			     void *cpt_buf)
+{
+	u32 i;
+	u32 current_id;
+	int result = VMCI_SUCCESS;
+	u32 num_ids = buf_size / sizeof(u32);
+
+	if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
+		/*
+		 * We would end up here if VMX with VM to VM communication
+		 * attempts to restore a checkpoint with wellknown handles.
+		 */
+		pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
+		return VMCI_ERROR_OBSOLETE;
+	}
+
+	if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
+		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
+		current_id = ((u32 *)cpt_buf)[i];
+		result = vmci_ctx_add_notification(context_id, current_id);
+		if (result != VMCI_SUCCESS)
+			break;
+	}
+	if (result != VMCI_SUCCESS)
+		pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
+			 cpt_type, result);
+
+	return result;
+}
+
+/*
+ * Retrieves the specified context's pending notifications in the
+ * form of a handle array. The handle arrays returned are the
+ * actual data - not a copy and should not be modified by the
+ * caller. They must be released using
+ * vmci_ctx_rcv_notifications_release.
+ */
+int vmci_ctx_rcv_notifications_get(u32 context_id,
+				   struct vmci_handle_arr **db_handle_array,
+				   struct vmci_handle_arr **qp_handle_array)
+{
+	struct vmci_ctx *context;
+	int result = VMCI_SUCCESS;
+
+	context = vmci_ctx_get(context_id);
+	if (context == NULL)
+		return VMCI_ERROR_NOT_FOUND;
+
+	spin_lock(&context->lock);
+
+	*db_handle_array = context->pending_doorbell_array;
+	context->pending_doorbell_array =
+		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
+	if (!context->pending_doorbell_array) {
+		context->pending_doorbell_array = *db_handle_array;
+		*db_handle_array = NULL;
+		result = VMCI_ERROR_NO_MEM;
+	}
+	*qp_handle_array = NULL;
+
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	return result;
+}
+
+/*
+ * Releases handle arrays with pending notifications previously
+ * retrieved using vmci_ctx_rcv_notifications_get. If the
+ * notifications were not successfully handed over to the guest,
+ * success must be false.
+ */
+void vmci_ctx_rcv_notifications_release(u32 context_id,
+					struct vmci_handle_arr *db_handle_array,
+					struct vmci_handle_arr *qp_handle_array,
+					bool success)
+{
+	struct vmci_ctx *context = vmci_ctx_get(context_id);
+
+	spin_lock(&context->lock);
+	if (!success) {
+		struct vmci_handle handle;
+
+		/*
+		 * New notifications may have been added while we were not
+		 * holding the context lock, so we transfer any new pending
+		 * doorbell notifications to the old array, and reinstate the
+		 * old array.
+		 */
+
+		handle = vmci_handle_arr_remove_tail(
+					context->pending_doorbell_array);
+		while (!vmci_handle_is_invalid(handle)) {
+			if (!vmci_handle_arr_has_entry(db_handle_array,
+						       handle)) {
+				vmci_handle_arr_append_entry(
+						&db_handle_array, handle);
+			}
+			handle = vmci_handle_arr_remove_tail(
+					context->pending_doorbell_array);
+		}
+		vmci_handle_arr_destroy(context->pending_doorbell_array);
+		context->pending_doorbell_array = db_handle_array;
+		db_handle_array = NULL;
+	} else {
+		ctx_clear_notify_call(context);
+	}
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	if (db_handle_array)
+		vmci_handle_arr_destroy(db_handle_array);
+
+	if (qp_handle_array)
+		vmci_handle_arr_destroy(qp_handle_array);
+}
+
+/*
+ * Registers that a new doorbell handle has been allocated by the
+ * context. Only doorbell handles registered can be notified.
+ */
+int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
+{
+	struct vmci_ctx *context;
+	int result;
+
+	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	context = vmci_ctx_get(context_id);
+	if (context == NULL)
+		return VMCI_ERROR_NOT_FOUND;
+
+	spin_lock(&context->lock);
+	if (!vmci_handle_arr_has_entry(context->doorbell_array, handle))
+		result = vmci_handle_arr_append_entry(&context->doorbell_array,
+						      handle);
+	else
+		result = VMCI_ERROR_DUPLICATE_ENTRY;
+
+	spin_unlock(&context->lock);
+	vmci_ctx_put(context);
+
+	return result;
+}
+
+/*
+ * Unregisters a doorbell handle that was previously registered
+ * with vmci_ctx_dbell_create.
+ */
+int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
+{
+	struct vmci_ctx *context;
+	struct vmci_handle removed_handle;
+
+	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	context = vmci_ctx_get(context_id);
+	if (context == NULL)
+		return VMCI_ERROR_NOT_FOUND;
+
+	spin_lock(&context->lock);
+	removed_handle =
+	    vmci_handle_arr_remove_entry(context->doorbell_array, handle);
+	vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
+	spin_unlock(&context->lock);
+
+	vmci_ctx_put(context);
+
+	return vmci_handle_is_invalid(removed_handle) ?
+	    VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
+}
+
+/*
+ * Unregisters all doorbell handles that were previously
+ * registered with vmci_ctx_dbell_create.
+ */
+int vmci_ctx_dbell_destroy_all(u32 context_id)
+{
+	struct vmci_ctx *context;
+	struct vmci_handle handle;
+
+	if (context_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	context = vmci_ctx_get(context_id);
+	if (context == NULL)
+		return VMCI_ERROR_NOT_FOUND;
+
+	spin_lock(&context->lock);
+	do {
+		struct vmci_handle_arr *arr = context->doorbell_array;
+		handle = vmci_handle_arr_remove_tail(arr);
+	} while (!vmci_handle_is_invalid(handle));
+	do {
+		struct vmci_handle_arr *arr = context->pending_doorbell_array;
+		handle = vmci_handle_arr_remove_tail(arr);
+	} while (!vmci_handle_is_invalid(handle));
+	spin_unlock(&context->lock);
+
+	vmci_ctx_put(context);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Registers a notification of a doorbell handle initiated by the
+ * specified source context. The notification of doorbells are
+ * subject to the same isolation rules as datagram delivery. To
+ * allow host side senders of notifications a finer granularity
+ * of sender rights than those assigned to the sending context
+ * itself, the host context is required to specify a different
+ * set of privilege flags that will override the privileges of
+ * the source context.
+ */
+int vmci_ctx_notify_dbell(u32 src_cid,
+			  struct vmci_handle handle,
+			  u32 src_priv_flags)
+{
+	struct vmci_ctx *dst_context;
+	int result;
+
+	if (vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/* Get the target VM's VMCI context. */
+	dst_context = vmci_ctx_get(handle.context);
+	if (!dst_context) {
+		pr_devel("Invalid context (ID=0x%x)\n", handle.context);
+		return VMCI_ERROR_NOT_FOUND;
+	}
+
+	if (src_cid != handle.context) {
+		u32 dst_priv_flags;
+
+		if (VMCI_CONTEXT_IS_VM(src_cid) &&
+		    VMCI_CONTEXT_IS_VM(handle.context)) {
+			pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
+				 src_cid, handle.context);
+			result = VMCI_ERROR_DST_UNREACHABLE;
+			goto out;
+		}
+
+		result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
+		if (result < VMCI_SUCCESS) {
+			pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
+				handle.context, handle.resource);
+			goto out;
+		}
+
+		if (src_cid != VMCI_HOST_CONTEXT_ID ||
+		    src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
+			src_priv_flags = vmci_context_get_priv_flags(src_cid);
+		}
+
+		if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
+			result = VMCI_ERROR_NO_ACCESS;
+			goto out;
+		}
+	}
+
+	if (handle.context == VMCI_HOST_CONTEXT_ID) {
+		result = vmci_dbell_host_context_notify(src_cid, handle);
+	} else {
+		spin_lock(&dst_context->lock);
+
+		if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
+					       handle)) {
+			result = VMCI_ERROR_NOT_FOUND;
+		} else {
+			if (!vmci_handle_arr_has_entry(
+					dst_context->pending_doorbell_array,
+					handle)) {
+				result = vmci_handle_arr_append_entry(
+					&dst_context->pending_doorbell_array,
+					handle);
+				if (result == VMCI_SUCCESS) {
+					ctx_signal_notify(dst_context);
+					wake_up(&dst_context->host_context.wait_queue);
+				}
+			} else {
+				result = VMCI_SUCCESS;
+			}
+		}
+		spin_unlock(&dst_context->lock);
+	}
+
+ out:
+	vmci_ctx_put(dst_context);
+
+	return result;
+}
+
+bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
+{
+	return context && context->user_version >= VMCI_VERSION_HOSTQP;
+}
+
+/*
+ * Registers that a new queue pair handle has been allocated by
+ * the context.
+ */
+int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
+{
+	int result;
+
+	if (context == NULL || vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle))
+		result = vmci_handle_arr_append_entry(
+			&context->queue_pair_array, handle);
+	else
+		result = VMCI_ERROR_DUPLICATE_ENTRY;
+
+	return result;
+}
+
+/*
+ * Unregisters a queue pair handle that was previously registered
+ * with vmci_ctx_qp_create.
+ */
+int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
+{
+	struct vmci_handle hndl;
+
+	if (context == NULL || vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
+
+	return vmci_handle_is_invalid(hndl) ?
+		VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
+}
+
+/*
+ * Determines whether a given queue pair handle is registered
+ * with the given context.
+ */
+bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
+{
+	if (context == NULL || vmci_handle_is_invalid(handle))
+		return false;
+
+	return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
+}
+
+/*
+ * vmci_context_get_priv_flags() - Retrieve privilege flags.
+ * @context_id: The context ID of the VMCI context.
+ *
+ * Retrieves privilege flags of the given VMCI context ID.
+ */
+u32 vmci_context_get_priv_flags(u32 context_id)
+{
+	if (vmci_host_code_active()) {
+		u32 flags;
+		struct vmci_ctx *context;
+
+		context = vmci_ctx_get(context_id);
+		if (!context)
+			return VMCI_LEAST_PRIVILEGE_FLAGS;
+
+		flags = context->priv_flags;
+		vmci_ctx_put(context);
+		return flags;
+	}
+	return VMCI_NO_PRIVILEGE_FLAGS;
+}
+EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
+
+/*
+ * vmci_is_context_owner() - Determimnes if user is the context owner
+ * @context_id: The context ID of the VMCI context.
+ * @uid:        The host user id (real kernel value).
+ *
+ * Determines whether a given UID is the owner of given VMCI context.
+ */
+bool vmci_is_context_owner(u32 context_id, kuid_t uid)
+{
+	bool is_owner = false;
+
+	if (vmci_host_code_active()) {
+		struct vmci_ctx *context = vmci_ctx_get(context_id);
+		if (context) {
+			if (context->cred)
+				is_owner = uid_eq(context->cred->uid, uid);
+			vmci_ctx_put(context);
+		}
+	}
+
+	return is_owner;
+}
+EXPORT_SYMBOL_GPL(vmci_is_context_owner);
diff --git a/drivers/misc/vmw_vmci/vmci_context.h b/drivers/misc/vmw_vmci/vmci_context.h
new file mode 100644
index 0000000000..4db8701c97
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_context.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VMware VMCI driver (vmciContext.h)
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#ifndef _VMCI_CONTEXT_H_
+#define _VMCI_CONTEXT_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/atomic.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_datagram.h"
+
+/* Used to determine what checkpoint state to get and set. */
+enum {
+	VMCI_NOTIFICATION_CPT_STATE = 1,
+	VMCI_WELLKNOWN_CPT_STATE    = 2,
+	VMCI_DG_OUT_STATE           = 3,
+	VMCI_DG_IN_STATE            = 4,
+	VMCI_DG_IN_SIZE_STATE       = 5,
+	VMCI_DOORBELL_CPT_STATE     = 6,
+};
+
+/* Host specific struct used for signalling */
+struct vmci_host {
+	wait_queue_head_t wait_queue;
+};
+
+struct vmci_handle_list {
+	struct list_head node;
+	struct vmci_handle handle;
+};
+
+struct vmci_ctx {
+	struct list_head list_item;       /* For global VMCI list. */
+	u32 cid;
+	struct kref kref;
+	struct list_head datagram_queue;  /* Head of per VM queue. */
+	u32 pending_datagrams;
+	size_t datagram_queue_size;	  /* Size of datagram queue in bytes. */
+
+	/*
+	 * Version of the code that created
+	 * this context; e.g., VMX.
+	 */
+	int user_version;
+	spinlock_t lock;  /* Locks callQueue and handle_arrays. */
+
+	/*
+	 * queue_pairs attached to.  The array of
+	 * handles for queue pairs is accessed
+	 * from the code for QP API, and there
+	 * it is protected by the QP lock.  It
+	 * is also accessed from the context
+	 * clean up path, which does not
+	 * require a lock.  VMCILock is not
+	 * used to protect the QP array field.
+	 */
+	struct vmci_handle_arr *queue_pair_array;
+
+	/* Doorbells created by context. */
+	struct vmci_handle_arr *doorbell_array;
+
+	/* Doorbells pending for context. */
+	struct vmci_handle_arr *pending_doorbell_array;
+
+	/* Contexts current context is subscribing to. */
+	struct list_head notifier_list;
+	unsigned int n_notifiers;
+
+	struct vmci_host host_context;
+	u32 priv_flags;
+
+	const struct cred *cred;
+	bool *notify;		/* Notify flag pointer - hosted only. */
+	struct page *notify_page;	/* Page backing the notify UVA. */
+};
+
+/* VMCINotifyAddRemoveInfo: Used to add/remove remote context notifications. */
+struct vmci_ctx_info {
+	u32 remote_cid;
+	int result;
+};
+
+/* VMCICptBufInfo: Used to set/get current context's checkpoint state. */
+struct vmci_ctx_chkpt_buf_info {
+	u64 cpt_buf;
+	u32 cpt_type;
+	u32 buf_size;
+	s32 result;
+	u32 _pad;
+};
+
+/*
+ * VMCINotificationReceiveInfo: Used to recieve pending notifications
+ * for doorbells and queue pairs.
+ */
+struct vmci_ctx_notify_recv_info {
+	u64 db_handle_buf_uva;
+	u64 db_handle_buf_size;
+	u64 qp_handle_buf_uva;
+	u64 qp_handle_buf_size;
+	s32 result;
+	u32 _pad;
+};
+
+/*
+ * Utilility function that checks whether two entities are allowed
+ * to interact. If one of them is restricted, the other one must
+ * be trusted.
+ */
+static inline bool vmci_deny_interaction(u32 part_one, u32 part_two)
+{
+	return ((part_one & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
+		!(part_two & VMCI_PRIVILEGE_FLAG_TRUSTED)) ||
+	       ((part_two & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
+		!(part_one & VMCI_PRIVILEGE_FLAG_TRUSTED));
+}
+
+struct vmci_ctx *vmci_ctx_create(u32 cid, u32 flags,
+				 uintptr_t event_hnd, int version,
+				 const struct cred *cred);
+void vmci_ctx_destroy(struct vmci_ctx *context);
+
+bool vmci_ctx_supports_host_qp(struct vmci_ctx *context);
+int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg);
+int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
+			      size_t *max_size, struct vmci_datagram **dg);
+int vmci_ctx_pending_datagrams(u32 cid, u32 *pending);
+struct vmci_ctx *vmci_ctx_get(u32 cid);
+void vmci_ctx_put(struct vmci_ctx *context);
+bool vmci_ctx_exists(u32 cid);
+
+int vmci_ctx_add_notification(u32 context_id, u32 remote_cid);
+int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid);
+int vmci_ctx_get_chkpt_state(u32 context_id, u32 cpt_type,
+			     u32 *num_cids, void **cpt_buf_ptr);
+int vmci_ctx_set_chkpt_state(u32 context_id, u32 cpt_type,
+			     u32 num_cids, void *cpt_buf);
+
+int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle);
+int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle);
+bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle);
+
+void vmci_ctx_check_signal_notify(struct vmci_ctx *context);
+void vmci_ctx_unset_notify(struct vmci_ctx *context);
+
+int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle);
+int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle);
+int vmci_ctx_dbell_destroy_all(u32 context_id);
+int vmci_ctx_notify_dbell(u32 cid, struct vmci_handle handle,
+			  u32 src_priv_flags);
+
+int vmci_ctx_rcv_notifications_get(u32 context_id, struct vmci_handle_arr
+				   **db_handle_array, struct vmci_handle_arr
+				   **qp_handle_array);
+void vmci_ctx_rcv_notifications_release(u32 context_id, struct vmci_handle_arr
+					*db_handle_array, struct vmci_handle_arr
+					*qp_handle_array, bool success);
+
+static inline u32 vmci_ctx_get_id(struct vmci_ctx *context)
+{
+	if (!context)
+		return VMCI_INVALID_ID;
+	return context->cid;
+}
+
+#endif /* _VMCI_CONTEXT_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c
new file mode 100644
index 0000000000..f50d228824
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_datagram.c
@@ -0,0 +1,494 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/bug.h>
+
+#include "vmci_datagram.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_route.h"
+
+/*
+ * struct datagram_entry describes the datagram entity. It is used for datagram
+ * entities created only on the host.
+ */
+struct datagram_entry {
+	struct vmci_resource resource;
+	u32 flags;
+	bool run_delayed;
+	vmci_datagram_recv_cb recv_cb;
+	void *client_data;
+	u32 priv_flags;
+};
+
+struct delayed_datagram_info {
+	struct datagram_entry *entry;
+	struct work_struct work;
+	bool in_dg_host_queue;
+	/* msg and msg_payload must be together. */
+	struct vmci_datagram msg;
+	u8 msg_payload[];
+};
+
+/* Number of in-flight host->host datagrams */
+static atomic_t delayed_dg_host_queue_size = ATOMIC_INIT(0);
+
+/*
+ * Create a datagram entry given a handle pointer.
+ */
+static int dg_create_handle(u32 resource_id,
+			    u32 flags,
+			    u32 priv_flags,
+			    vmci_datagram_recv_cb recv_cb,
+			    void *client_data, struct vmci_handle *out_handle)
+{
+	int result;
+	u32 context_id;
+	struct vmci_handle handle;
+	struct datagram_entry *entry;
+
+	if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0) {
+		context_id = VMCI_INVALID_ID;
+	} else {
+		context_id = vmci_get_context_id();
+		if (context_id == VMCI_INVALID_ID)
+			return VMCI_ERROR_NO_RESOURCES;
+	}
+
+	handle = vmci_make_handle(context_id, resource_id);
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		pr_warn("Failed allocating memory for datagram entry\n");
+		return VMCI_ERROR_NO_MEM;
+	}
+
+	entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ? true : false;
+	entry->flags = flags;
+	entry->recv_cb = recv_cb;
+	entry->client_data = client_data;
+	entry->priv_flags = priv_flags;
+
+	/* Make datagram resource live. */
+	result = vmci_resource_add(&entry->resource,
+				   VMCI_RESOURCE_TYPE_DATAGRAM,
+				   handle);
+	if (result != VMCI_SUCCESS) {
+		pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n",
+			handle.context, handle.resource, result);
+		kfree(entry);
+		return result;
+	}
+
+	*out_handle = vmci_resource_handle(&entry->resource);
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Internal utility function with the same purpose as
+ * vmci_datagram_get_priv_flags that also takes a context_id.
+ */
+static int vmci_datagram_get_priv_flags(u32 context_id,
+					struct vmci_handle handle,
+					u32 *priv_flags)
+{
+	if (context_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (context_id == VMCI_HOST_CONTEXT_ID) {
+		struct datagram_entry *src_entry;
+		struct vmci_resource *resource;
+
+		resource = vmci_resource_by_handle(handle,
+						   VMCI_RESOURCE_TYPE_DATAGRAM);
+		if (!resource)
+			return VMCI_ERROR_INVALID_ARGS;
+
+		src_entry = container_of(resource, struct datagram_entry,
+					 resource);
+		*priv_flags = src_entry->priv_flags;
+		vmci_resource_put(resource);
+	} else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID)
+		*priv_flags = VMCI_MAX_PRIVILEGE_FLAGS;
+	else
+		*priv_flags = vmci_context_get_priv_flags(context_id);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Calls the specified callback in a delayed context.
+ */
+static void dg_delayed_dispatch(struct work_struct *work)
+{
+	struct delayed_datagram_info *dg_info =
+			container_of(work, struct delayed_datagram_info, work);
+
+	dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg);
+
+	vmci_resource_put(&dg_info->entry->resource);
+
+	if (dg_info->in_dg_host_queue)
+		atomic_dec(&delayed_dg_host_queue_size);
+
+	kfree(dg_info);
+}
+
+/*
+ * Dispatch datagram as a host, to the host, or other vm context. This
+ * function cannot dispatch to hypervisor context handlers. This should
+ * have been handled before we get here by vmci_datagram_dispatch.
+ * Returns number of bytes sent on success, error code otherwise.
+ */
+static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg)
+{
+	int retval;
+	size_t dg_size;
+	u32 src_priv_flags;
+
+	dg_size = VMCI_DG_SIZE(dg);
+
+	/* Host cannot send to the hypervisor. */
+	if (dg->dst.context == VMCI_HYPERVISOR_CONTEXT_ID)
+		return VMCI_ERROR_DST_UNREACHABLE;
+
+	/* Check that source handle matches sending context. */
+	if (dg->src.context != context_id) {
+		pr_devel("Sender context (ID=0x%x) is not owner of src datagram entry (handle=0x%x:0x%x)\n",
+			 context_id, dg->src.context, dg->src.resource);
+		return VMCI_ERROR_NO_ACCESS;
+	}
+
+	/* Get hold of privileges of sending endpoint. */
+	retval = vmci_datagram_get_priv_flags(context_id, dg->src,
+					      &src_priv_flags);
+	if (retval != VMCI_SUCCESS) {
+		pr_warn("Couldn't get privileges (handle=0x%x:0x%x)\n",
+			dg->src.context, dg->src.resource);
+		return retval;
+	}
+
+	/* Determine if we should route to host or guest destination. */
+	if (dg->dst.context == VMCI_HOST_CONTEXT_ID) {
+		/* Route to host datagram entry. */
+		struct datagram_entry *dst_entry;
+		struct vmci_resource *resource;
+
+		if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
+		    dg->dst.resource == VMCI_EVENT_HANDLER) {
+			return vmci_event_dispatch(dg);
+		}
+
+		resource = vmci_resource_by_handle(dg->dst,
+						   VMCI_RESOURCE_TYPE_DATAGRAM);
+		if (!resource) {
+			pr_devel("Sending to invalid destination (handle=0x%x:0x%x)\n",
+				 dg->dst.context, dg->dst.resource);
+			return VMCI_ERROR_INVALID_RESOURCE;
+		}
+		dst_entry = container_of(resource, struct datagram_entry,
+					 resource);
+		if (vmci_deny_interaction(src_priv_flags,
+					  dst_entry->priv_flags)) {
+			vmci_resource_put(resource);
+			return VMCI_ERROR_NO_ACCESS;
+		}
+
+		/*
+		 * If a VMCI datagram destined for the host is also sent by the
+		 * host, we always run it delayed. This ensures that no locks
+		 * are held when the datagram callback runs.
+		 */
+		if (dst_entry->run_delayed ||
+		    dg->src.context == VMCI_HOST_CONTEXT_ID) {
+			struct delayed_datagram_info *dg_info;
+
+			if (atomic_add_return(1, &delayed_dg_host_queue_size)
+			    == VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE) {
+				atomic_dec(&delayed_dg_host_queue_size);
+				vmci_resource_put(resource);
+				return VMCI_ERROR_NO_MEM;
+			}
+
+			dg_info = kmalloc(sizeof(*dg_info) +
+				    (size_t) dg->payload_size, GFP_ATOMIC);
+			if (!dg_info) {
+				atomic_dec(&delayed_dg_host_queue_size);
+				vmci_resource_put(resource);
+				return VMCI_ERROR_NO_MEM;
+			}
+
+			dg_info->in_dg_host_queue = true;
+			dg_info->entry = dst_entry;
+			memcpy(&dg_info->msg, dg, dg_size);
+
+			INIT_WORK(&dg_info->work, dg_delayed_dispatch);
+			schedule_work(&dg_info->work);
+			retval = VMCI_SUCCESS;
+
+		} else {
+			retval = dst_entry->recv_cb(dst_entry->client_data, dg);
+			vmci_resource_put(resource);
+			if (retval < VMCI_SUCCESS)
+				return retval;
+		}
+	} else {
+		/* Route to destination VM context. */
+		struct vmci_datagram *new_dg;
+
+		if (context_id != dg->dst.context) {
+			if (vmci_deny_interaction(src_priv_flags,
+						  vmci_context_get_priv_flags
+						  (dg->dst.context))) {
+				return VMCI_ERROR_NO_ACCESS;
+			} else if (VMCI_CONTEXT_IS_VM(context_id)) {
+				/*
+				 * If the sending context is a VM, it
+				 * cannot reach another VM.
+				 */
+
+				pr_devel("Datagram communication between VMs not supported (src=0x%x, dst=0x%x)\n",
+					 context_id, dg->dst.context);
+				return VMCI_ERROR_DST_UNREACHABLE;
+			}
+		}
+
+		/* We make a copy to enqueue. */
+		new_dg = kmemdup(dg, dg_size, GFP_KERNEL);
+		if (new_dg == NULL)
+			return VMCI_ERROR_NO_MEM;
+
+		retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg);
+		if (retval < VMCI_SUCCESS) {
+			kfree(new_dg);
+			return retval;
+		}
+	}
+
+	/*
+	 * We currently truncate the size to signed 32 bits. This doesn't
+	 * matter for this handler as it only support 4Kb messages.
+	 */
+	return (int)dg_size;
+}
+
+/*
+ * Dispatch datagram as a guest, down through the VMX and potentially to
+ * the host.
+ * Returns number of bytes sent on success, error code otherwise.
+ */
+static int dg_dispatch_as_guest(struct vmci_datagram *dg)
+{
+	int retval;
+	struct vmci_resource *resource;
+
+	resource = vmci_resource_by_handle(dg->src,
+					   VMCI_RESOURCE_TYPE_DATAGRAM);
+	if (!resource)
+		return VMCI_ERROR_NO_HANDLE;
+
+	retval = vmci_send_datagram(dg);
+	vmci_resource_put(resource);
+	return retval;
+}
+
+/*
+ * Dispatch datagram.  This will determine the routing for the datagram
+ * and dispatch it accordingly.
+ * Returns number of bytes sent on success, error code otherwise.
+ */
+int vmci_datagram_dispatch(u32 context_id,
+			   struct vmci_datagram *dg, bool from_guest)
+{
+	int retval;
+	enum vmci_route route;
+
+	BUILD_BUG_ON(sizeof(struct vmci_datagram) != 24);
+
+	if (dg->payload_size > VMCI_MAX_DG_SIZE ||
+	    VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) {
+		pr_devel("Payload (size=%llu bytes) too big to send\n",
+			 (unsigned long long)dg->payload_size);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	retval = vmci_route(&dg->src, &dg->dst, from_guest, &route);
+	if (retval < VMCI_SUCCESS) {
+		pr_devel("Failed to route datagram (src=0x%x, dst=0x%x, err=%d)\n",
+			 dg->src.context, dg->dst.context, retval);
+		return retval;
+	}
+
+	if (VMCI_ROUTE_AS_HOST == route) {
+		if (VMCI_INVALID_ID == context_id)
+			context_id = VMCI_HOST_CONTEXT_ID;
+		return dg_dispatch_as_host(context_id, dg);
+	}
+
+	if (VMCI_ROUTE_AS_GUEST == route)
+		return dg_dispatch_as_guest(dg);
+
+	pr_warn("Unknown route (%d) for datagram\n", route);
+	return VMCI_ERROR_DST_UNREACHABLE;
+}
+
+/*
+ * Invoke the handler for the given datagram.  This is intended to be
+ * called only when acting as a guest and receiving a datagram from the
+ * virtual device.
+ */
+int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg)
+{
+	struct vmci_resource *resource;
+	struct datagram_entry *dst_entry;
+
+	resource = vmci_resource_by_handle(dg->dst,
+					   VMCI_RESOURCE_TYPE_DATAGRAM);
+	if (!resource) {
+		pr_devel("destination (handle=0x%x:0x%x) doesn't exist\n",
+			 dg->dst.context, dg->dst.resource);
+		return VMCI_ERROR_NO_HANDLE;
+	}
+
+	dst_entry = container_of(resource, struct datagram_entry, resource);
+	if (dst_entry->run_delayed) {
+		struct delayed_datagram_info *dg_info;
+
+		dg_info = kmalloc(sizeof(*dg_info) + (size_t)dg->payload_size,
+				  GFP_ATOMIC);
+		if (!dg_info) {
+			vmci_resource_put(resource);
+			return VMCI_ERROR_NO_MEM;
+		}
+
+		dg_info->in_dg_host_queue = false;
+		dg_info->entry = dst_entry;
+		memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg));
+
+		INIT_WORK(&dg_info->work, dg_delayed_dispatch);
+		schedule_work(&dg_info->work);
+	} else {
+		dst_entry->recv_cb(dst_entry->client_data, dg);
+		vmci_resource_put(resource);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * vmci_datagram_create_handle_priv() - Create host context datagram endpoint
+ * @resource_id:        The resource ID.
+ * @flags:      Datagram Flags.
+ * @priv_flags: Privilege Flags.
+ * @recv_cb:    Callback when receiving datagrams.
+ * @client_data:        Pointer for a datagram_entry struct
+ * @out_handle: vmci_handle that is populated as a result of this function.
+ *
+ * Creates a host context datagram endpoint and returns a handle to it.
+ */
+int vmci_datagram_create_handle_priv(u32 resource_id,
+				     u32 flags,
+				     u32 priv_flags,
+				     vmci_datagram_recv_cb recv_cb,
+				     void *client_data,
+				     struct vmci_handle *out_handle)
+{
+	if (out_handle == NULL)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (recv_cb == NULL) {
+		pr_devel("Client callback needed when creating datagram\n");
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	return dg_create_handle(resource_id, flags, priv_flags, recv_cb,
+				client_data, out_handle);
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_create_handle_priv);
+
+/*
+ * vmci_datagram_create_handle() - Create host context datagram endpoint
+ * @resource_id:        Resource ID.
+ * @flags:      Datagram Flags.
+ * @recv_cb:    Callback when receiving datagrams.
+ * @client_ata: Pointer for a datagram_entry struct
+ * @out_handle: vmci_handle that is populated as a result of this function.
+ *
+ * Creates a host context datagram endpoint and returns a handle to
+ * it.  Same as vmci_datagram_create_handle_priv without the priviledge
+ * flags argument.
+ */
+int vmci_datagram_create_handle(u32 resource_id,
+				u32 flags,
+				vmci_datagram_recv_cb recv_cb,
+				void *client_data,
+				struct vmci_handle *out_handle)
+{
+	return vmci_datagram_create_handle_priv(
+		resource_id, flags,
+		VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
+		recv_cb, client_data,
+		out_handle);
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_create_handle);
+
+/*
+ * vmci_datagram_destroy_handle() - Destroys datagram handle
+ * @handle:     vmci_handle to be destroyed and reaped.
+ *
+ * Use this function to destroy any datagram handles created by
+ * vmci_datagram_create_handle{,Priv} functions.
+ */
+int vmci_datagram_destroy_handle(struct vmci_handle handle)
+{
+	struct datagram_entry *entry;
+	struct vmci_resource *resource;
+
+	resource = vmci_resource_by_handle(handle, VMCI_RESOURCE_TYPE_DATAGRAM);
+	if (!resource) {
+		pr_devel("Failed to destroy datagram (handle=0x%x:0x%x)\n",
+			 handle.context, handle.resource);
+		return VMCI_ERROR_NOT_FOUND;
+	}
+
+	entry = container_of(resource, struct datagram_entry, resource);
+
+	vmci_resource_put(&entry->resource);
+	vmci_resource_remove(&entry->resource);
+	kfree(entry);
+
+	return VMCI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_destroy_handle);
+
+/*
+ * vmci_datagram_send() - Send a datagram
+ * @msg:        The datagram to send.
+ *
+ * Sends the provided datagram on its merry way.
+ */
+int vmci_datagram_send(struct vmci_datagram *msg)
+{
+	if (msg == NULL)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	return vmci_datagram_dispatch(VMCI_INVALID_ID, msg, false);
+}
+EXPORT_SYMBOL_GPL(vmci_datagram_send);
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.h b/drivers/misc/vmw_vmci/vmci_datagram.h
new file mode 100644
index 0000000000..b5b5b9286f
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_datagram.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#ifndef _VMCI_DATAGRAM_H_
+#define _VMCI_DATAGRAM_H_
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include "vmci_context.h"
+
+#define VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE 256
+
+/*
+ * The struct vmci_datagram_queue_entry is a queue header for the in-kernel VMCI
+ * datagram queues. It is allocated in non-paged memory, as the
+ * content is accessed while holding a spinlock. The pending datagram
+ * itself may be allocated from paged memory. We shadow the size of
+ * the datagram in the non-paged queue entry as this size is used
+ * while holding the same spinlock as above.
+ */
+struct vmci_datagram_queue_entry {
+	struct list_head list_item;	/* For queuing. */
+	size_t dg_size;		/* Size of datagram. */
+	struct vmci_datagram *dg;	/* Pending datagram. */
+};
+
+/* VMCIDatagramSendRecvInfo */
+struct vmci_datagram_snd_rcv_info {
+	u64 addr;
+	u32 len;
+	s32 result;
+};
+
+/* Datagram API for non-public use. */
+int vmci_datagram_dispatch(u32 context_id, struct vmci_datagram *dg,
+			   bool from_guest);
+int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg);
+
+#endif /* _VMCI_DATAGRAM_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c
new file mode 100644
index 0000000000..fa8a7fce44
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.c
@@ -0,0 +1,604 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/completion.h>
+#include <linux/hash.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_resource.h"
+#include "vmci_driver.h"
+#include "vmci_route.h"
+
+
+#define VMCI_DOORBELL_INDEX_BITS	6
+#define VMCI_DOORBELL_INDEX_TABLE_SIZE	(1 << VMCI_DOORBELL_INDEX_BITS)
+#define VMCI_DOORBELL_HASH(_idx)	hash_32(_idx, VMCI_DOORBELL_INDEX_BITS)
+
+/*
+ * DoorbellEntry describes the a doorbell notification handle allocated by the
+ * host.
+ */
+struct dbell_entry {
+	struct vmci_resource resource;
+	struct hlist_node node;
+	struct work_struct work;
+	vmci_callback notify_cb;
+	void *client_data;
+	u32 idx;
+	u32 priv_flags;
+	bool run_delayed;
+	atomic_t active;	/* Only used by guest personality */
+};
+
+/* The VMCI index table keeps track of currently registered doorbells. */
+struct dbell_index_table {
+	spinlock_t lock;	/* Index table lock */
+	struct hlist_head entries[VMCI_DOORBELL_INDEX_TABLE_SIZE];
+};
+
+static struct dbell_index_table vmci_doorbell_it = {
+	.lock = __SPIN_LOCK_UNLOCKED(vmci_doorbell_it.lock),
+};
+
+/*
+ * The max_notify_idx is one larger than the currently known bitmap index in
+ * use, and is used to determine how much of the bitmap needs to be scanned.
+ */
+static u32 max_notify_idx;
+
+/*
+ * The notify_idx_count is used for determining whether there are free entries
+ * within the bitmap (if notify_idx_count + 1 < max_notify_idx).
+ */
+static u32 notify_idx_count;
+
+/*
+ * The last_notify_idx_reserved is used to track the last index handed out - in
+ * the case where multiple handles share a notification index, we hand out
+ * indexes round robin based on last_notify_idx_reserved.
+ */
+static u32 last_notify_idx_reserved;
+
+/* This is a one entry cache used to by the index allocation. */
+static u32 last_notify_idx_released = PAGE_SIZE;
+
+
+/*
+ * Utility function that retrieves the privilege flags associated
+ * with a given doorbell handle. For guest endpoints, the
+ * privileges are determined by the context ID, but for host
+ * endpoints privileges are associated with the complete
+ * handle. Hypervisor endpoints are not yet supported.
+ */
+int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags)
+{
+	if (priv_flags == NULL || handle.context == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (handle.context == VMCI_HOST_CONTEXT_ID) {
+		struct dbell_entry *entry;
+		struct vmci_resource *resource;
+
+		resource = vmci_resource_by_handle(handle,
+						   VMCI_RESOURCE_TYPE_DOORBELL);
+		if (!resource)
+			return VMCI_ERROR_NOT_FOUND;
+
+		entry = container_of(resource, struct dbell_entry, resource);
+		*priv_flags = entry->priv_flags;
+		vmci_resource_put(resource);
+	} else if (handle.context == VMCI_HYPERVISOR_CONTEXT_ID) {
+		/*
+		 * Hypervisor endpoints for notifications are not
+		 * supported (yet).
+		 */
+		return VMCI_ERROR_INVALID_ARGS;
+	} else {
+		*priv_flags = vmci_context_get_priv_flags(handle.context);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Find doorbell entry by bitmap index.
+ */
+static struct dbell_entry *dbell_index_table_find(u32 idx)
+{
+	u32 bucket = VMCI_DOORBELL_HASH(idx);
+	struct dbell_entry *dbell;
+
+	hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket],
+			     node) {
+		if (idx == dbell->idx)
+			return dbell;
+	}
+
+	return NULL;
+}
+
+/*
+ * Add the given entry to the index table.  This willi take a reference to the
+ * entry's resource so that the entry is not deleted before it is removed from
+ * the * table.
+ */
+static void dbell_index_table_add(struct dbell_entry *entry)
+{
+	u32 bucket;
+	u32 new_notify_idx;
+
+	vmci_resource_get(&entry->resource);
+
+	spin_lock_bh(&vmci_doorbell_it.lock);
+
+	/*
+	 * Below we try to allocate an index in the notification
+	 * bitmap with "not too much" sharing between resources. If we
+	 * use less that the full bitmap, we either add to the end if
+	 * there are no unused flags within the currently used area,
+	 * or we search for unused ones. If we use the full bitmap, we
+	 * allocate the index round robin.
+	 */
+	if (max_notify_idx < PAGE_SIZE || notify_idx_count < PAGE_SIZE) {
+		if (last_notify_idx_released < max_notify_idx &&
+		    !dbell_index_table_find(last_notify_idx_released)) {
+			new_notify_idx = last_notify_idx_released;
+			last_notify_idx_released = PAGE_SIZE;
+		} else {
+			bool reused = false;
+			new_notify_idx = last_notify_idx_reserved;
+			if (notify_idx_count + 1 < max_notify_idx) {
+				do {
+					if (!dbell_index_table_find
+					    (new_notify_idx)) {
+						reused = true;
+						break;
+					}
+					new_notify_idx = (new_notify_idx + 1) %
+					    max_notify_idx;
+				} while (new_notify_idx !=
+					 last_notify_idx_released);
+			}
+			if (!reused) {
+				new_notify_idx = max_notify_idx;
+				max_notify_idx++;
+			}
+		}
+	} else {
+		new_notify_idx = (last_notify_idx_reserved + 1) % PAGE_SIZE;
+	}
+
+	last_notify_idx_reserved = new_notify_idx;
+	notify_idx_count++;
+
+	entry->idx = new_notify_idx;
+	bucket = VMCI_DOORBELL_HASH(entry->idx);
+	hlist_add_head(&entry->node, &vmci_doorbell_it.entries[bucket]);
+
+	spin_unlock_bh(&vmci_doorbell_it.lock);
+}
+
+/*
+ * Remove the given entry from the index table.  This will release() the
+ * entry's resource.
+ */
+static void dbell_index_table_remove(struct dbell_entry *entry)
+{
+	spin_lock_bh(&vmci_doorbell_it.lock);
+
+	hlist_del_init(&entry->node);
+
+	notify_idx_count--;
+	if (entry->idx == max_notify_idx - 1) {
+		/*
+		 * If we delete an entry with the maximum known
+		 * notification index, we take the opportunity to
+		 * prune the current max. As there might be other
+		 * unused indices immediately below, we lower the
+		 * maximum until we hit an index in use.
+		 */
+		while (max_notify_idx > 0 &&
+		       !dbell_index_table_find(max_notify_idx - 1))
+			max_notify_idx--;
+	}
+
+	last_notify_idx_released = entry->idx;
+
+	spin_unlock_bh(&vmci_doorbell_it.lock);
+
+	vmci_resource_put(&entry->resource);
+}
+
+/*
+ * Creates a link between the given doorbell handle and the given
+ * index in the bitmap in the device backend. A notification state
+ * is created in hypervisor.
+ */
+static int dbell_link(struct vmci_handle handle, u32 notify_idx)
+{
+	struct vmci_doorbell_link_msg link_msg;
+
+	link_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					    VMCI_DOORBELL_LINK);
+	link_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+	link_msg.hdr.payload_size = sizeof(link_msg) - VMCI_DG_HEADERSIZE;
+	link_msg.handle = handle;
+	link_msg.notify_idx = notify_idx;
+
+	return vmci_send_datagram(&link_msg.hdr);
+}
+
+/*
+ * Unlinks the given doorbell handle from an index in the bitmap in
+ * the device backend. The notification state is destroyed in hypervisor.
+ */
+static int dbell_unlink(struct vmci_handle handle)
+{
+	struct vmci_doorbell_unlink_msg unlink_msg;
+
+	unlink_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					      VMCI_DOORBELL_UNLINK);
+	unlink_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+	unlink_msg.hdr.payload_size = sizeof(unlink_msg) - VMCI_DG_HEADERSIZE;
+	unlink_msg.handle = handle;
+
+	return vmci_send_datagram(&unlink_msg.hdr);
+}
+
+/*
+ * Notify another guest or the host.  We send a datagram down to the
+ * host via the hypervisor with the notification info.
+ */
+static int dbell_notify_as_guest(struct vmci_handle handle, u32 priv_flags)
+{
+	struct vmci_doorbell_notify_msg notify_msg;
+
+	notify_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					      VMCI_DOORBELL_NOTIFY);
+	notify_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+	notify_msg.hdr.payload_size = sizeof(notify_msg) - VMCI_DG_HEADERSIZE;
+	notify_msg.handle = handle;
+
+	return vmci_send_datagram(&notify_msg.hdr);
+}
+
+/*
+ * Calls the specified callback in a delayed context.
+ */
+static void dbell_delayed_dispatch(struct work_struct *work)
+{
+	struct dbell_entry *entry = container_of(work,
+						 struct dbell_entry, work);
+
+	entry->notify_cb(entry->client_data);
+	vmci_resource_put(&entry->resource);
+}
+
+/*
+ * Dispatches a doorbell notification to the host context.
+ */
+int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle)
+{
+	struct dbell_entry *entry;
+	struct vmci_resource *resource;
+
+	if (vmci_handle_is_invalid(handle)) {
+		pr_devel("Notifying an invalid doorbell (handle=0x%x:0x%x)\n",
+			 handle.context, handle.resource);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	resource = vmci_resource_by_handle(handle,
+					   VMCI_RESOURCE_TYPE_DOORBELL);
+	if (!resource) {
+		pr_devel("Notifying an unknown doorbell (handle=0x%x:0x%x)\n",
+			 handle.context, handle.resource);
+		return VMCI_ERROR_NOT_FOUND;
+	}
+
+	entry = container_of(resource, struct dbell_entry, resource);
+	if (entry->run_delayed) {
+		if (!schedule_work(&entry->work))
+			vmci_resource_put(resource);
+	} else {
+		entry->notify_cb(entry->client_data);
+		vmci_resource_put(resource);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Register the notification bitmap with the host.
+ */
+bool vmci_dbell_register_notification_bitmap(u64 bitmap_ppn)
+{
+	int result;
+	struct vmci_notify_bm_set_msg bitmap_set_msg = { };
+
+	bitmap_set_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+						  VMCI_SET_NOTIFY_BITMAP);
+	bitmap_set_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+	bitmap_set_msg.hdr.payload_size = sizeof(bitmap_set_msg) -
+	    VMCI_DG_HEADERSIZE;
+	if (vmci_use_ppn64())
+		bitmap_set_msg.bitmap_ppn64 = bitmap_ppn;
+	else
+		bitmap_set_msg.bitmap_ppn32 = (u32) bitmap_ppn;
+
+	result = vmci_send_datagram(&bitmap_set_msg.hdr);
+	if (result != VMCI_SUCCESS) {
+		pr_devel("Failed to register (PPN=%llu) as notification bitmap (error=%d)\n",
+			 bitmap_ppn, result);
+		return false;
+	}
+	return true;
+}
+
+/*
+ * Executes or schedules the handlers for a given notify index.
+ */
+static void dbell_fire_entries(u32 notify_idx)
+{
+	u32 bucket = VMCI_DOORBELL_HASH(notify_idx);
+	struct dbell_entry *dbell;
+
+	spin_lock_bh(&vmci_doorbell_it.lock);
+
+	hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) {
+		if (dbell->idx == notify_idx &&
+		    atomic_read(&dbell->active) == 1) {
+			if (dbell->run_delayed) {
+				vmci_resource_get(&dbell->resource);
+				if (!schedule_work(&dbell->work))
+					vmci_resource_put(&dbell->resource);
+			} else {
+				dbell->notify_cb(dbell->client_data);
+			}
+		}
+	}
+
+	spin_unlock_bh(&vmci_doorbell_it.lock);
+}
+
+/*
+ * Scans the notification bitmap, collects pending notifications,
+ * resets the bitmap and invokes appropriate callbacks.
+ */
+void vmci_dbell_scan_notification_entries(u8 *bitmap)
+{
+	u32 idx;
+
+	for (idx = 0; idx < max_notify_idx; idx++) {
+		if (bitmap[idx] & 0x1) {
+			bitmap[idx] &= ~1;
+			dbell_fire_entries(idx);
+		}
+	}
+}
+
+/*
+ * vmci_doorbell_create() - Creates a doorbell
+ * @handle:     A handle used to track the resource.  Can be invalid.
+ * @flags:      Flag that determines context of callback.
+ * @priv_flags: Privileges flags.
+ * @notify_cb:  The callback to be ivoked when the doorbell fires.
+ * @client_data:        A parameter to be passed to the callback.
+ *
+ * Creates a doorbell with the given callback. If the handle is
+ * VMCI_INVALID_HANDLE, a free handle will be assigned, if
+ * possible. The callback can be run immediately (potentially with
+ * locks held - the default) or delayed (in a kernel thread) by
+ * specifying the flag VMCI_FLAG_DELAYED_CB. If delayed execution
+ * is selected, a given callback may not be run if the kernel is
+ * unable to allocate memory for the delayed execution (highly
+ * unlikely).
+ */
+int vmci_doorbell_create(struct vmci_handle *handle,
+			 u32 flags,
+			 u32 priv_flags,
+			 vmci_callback notify_cb, void *client_data)
+{
+	struct dbell_entry *entry;
+	struct vmci_handle new_handle;
+	int result;
+
+	if (!handle || !notify_cb || flags & ~VMCI_FLAG_DELAYED_CB ||
+	    priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry == NULL) {
+		pr_warn("Failed allocating memory for datagram entry\n");
+		return VMCI_ERROR_NO_MEM;
+	}
+
+	if (vmci_handle_is_invalid(*handle)) {
+		u32 context_id = vmci_get_context_id();
+
+		if (context_id == VMCI_INVALID_ID) {
+			pr_warn("Failed to get context ID\n");
+			result = VMCI_ERROR_NO_RESOURCES;
+			goto free_mem;
+		}
+
+		/* Let resource code allocate a free ID for us */
+		new_handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
+	} else {
+		bool valid_context = false;
+
+		/*
+		 * Validate the handle.  We must do both of the checks below
+		 * because we can be acting as both a host and a guest at the
+		 * same time. We always allow the host context ID, since the
+		 * host functionality is in practice always there with the
+		 * unified driver.
+		 */
+		if (handle->context == VMCI_HOST_CONTEXT_ID ||
+		    (vmci_guest_code_active() &&
+		     vmci_get_context_id() == handle->context)) {
+			valid_context = true;
+		}
+
+		if (!valid_context || handle->resource == VMCI_INVALID_ID) {
+			pr_devel("Invalid argument (handle=0x%x:0x%x)\n",
+				 handle->context, handle->resource);
+			result = VMCI_ERROR_INVALID_ARGS;
+			goto free_mem;
+		}
+
+		new_handle = *handle;
+	}
+
+	entry->idx = 0;
+	INIT_HLIST_NODE(&entry->node);
+	entry->priv_flags = priv_flags;
+	INIT_WORK(&entry->work, dbell_delayed_dispatch);
+	entry->run_delayed = flags & VMCI_FLAG_DELAYED_CB;
+	entry->notify_cb = notify_cb;
+	entry->client_data = client_data;
+	atomic_set(&entry->active, 0);
+
+	result = vmci_resource_add(&entry->resource,
+				   VMCI_RESOURCE_TYPE_DOORBELL,
+				   new_handle);
+	if (result != VMCI_SUCCESS) {
+		pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n",
+			new_handle.context, new_handle.resource, result);
+		goto free_mem;
+	}
+
+	new_handle = vmci_resource_handle(&entry->resource);
+	if (vmci_guest_code_active()) {
+		dbell_index_table_add(entry);
+		result = dbell_link(new_handle, entry->idx);
+		if (VMCI_SUCCESS != result)
+			goto destroy_resource;
+
+		atomic_set(&entry->active, 1);
+	}
+
+	*handle = new_handle;
+
+	return result;
+
+ destroy_resource:
+	dbell_index_table_remove(entry);
+	vmci_resource_remove(&entry->resource);
+ free_mem:
+	kfree(entry);
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_doorbell_create);
+
+/*
+ * vmci_doorbell_destroy() - Destroy a doorbell.
+ * @handle:     The handle tracking the resource.
+ *
+ * Destroys a doorbell previously created with vmcii_doorbell_create. This
+ * operation may block waiting for a callback to finish.
+ */
+int vmci_doorbell_destroy(struct vmci_handle handle)
+{
+	struct dbell_entry *entry;
+	struct vmci_resource *resource;
+
+	if (vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	resource = vmci_resource_by_handle(handle,
+					   VMCI_RESOURCE_TYPE_DOORBELL);
+	if (!resource) {
+		pr_devel("Failed to destroy doorbell (handle=0x%x:0x%x)\n",
+			 handle.context, handle.resource);
+		return VMCI_ERROR_NOT_FOUND;
+	}
+
+	entry = container_of(resource, struct dbell_entry, resource);
+
+	if (!hlist_unhashed(&entry->node)) {
+		int result;
+
+		dbell_index_table_remove(entry);
+
+		result = dbell_unlink(handle);
+		if (VMCI_SUCCESS != result) {
+
+			/*
+			 * The only reason this should fail would be
+			 * an inconsistency between guest and
+			 * hypervisor state, where the guest believes
+			 * it has an active registration whereas the
+			 * hypervisor doesn't. One case where this may
+			 * happen is if a doorbell is unregistered
+			 * following a hibernation at a time where the
+			 * doorbell state hasn't been restored on the
+			 * hypervisor side yet. Since the handle has
+			 * now been removed in the guest, we just
+			 * print a warning and return success.
+			 */
+			pr_devel("Unlink of doorbell (handle=0x%x:0x%x) unknown by hypervisor (error=%d)\n",
+				 handle.context, handle.resource, result);
+		}
+	}
+
+	/*
+	 * Now remove the resource from the table.  It might still be in use
+	 * after this, in a callback or still on the delayed work queue.
+	 */
+	vmci_resource_put(&entry->resource);
+	vmci_resource_remove(&entry->resource);
+
+	kfree(entry);
+
+	return VMCI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(vmci_doorbell_destroy);
+
+/*
+ * vmci_doorbell_notify() - Ring the doorbell (and hide in the bushes).
+ * @dst:        The handlle identifying the doorbell resource
+ * @priv_flags: Priviledge flags.
+ *
+ * Generates a notification on the doorbell identified by the
+ * handle. For host side generation of notifications, the caller
+ * can specify what the privilege of the calling side is.
+ */
+int vmci_doorbell_notify(struct vmci_handle dst, u32 priv_flags)
+{
+	int retval;
+	enum vmci_route route;
+	struct vmci_handle src;
+
+	if (vmci_handle_is_invalid(dst) ||
+	    (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	src = VMCI_INVALID_HANDLE;
+	retval = vmci_route(&src, &dst, false, &route);
+	if (retval < VMCI_SUCCESS)
+		return retval;
+
+	if (VMCI_ROUTE_AS_HOST == route)
+		return vmci_ctx_notify_dbell(VMCI_HOST_CONTEXT_ID,
+					     dst, priv_flags);
+
+	if (VMCI_ROUTE_AS_GUEST == route)
+		return dbell_notify_as_guest(dst, priv_flags);
+
+	pr_warn("Unknown route (%d) for doorbell\n", route);
+	return VMCI_ERROR_DST_UNREACHABLE;
+}
+EXPORT_SYMBOL_GPL(vmci_doorbell_notify);
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.h b/drivers/misc/vmw_vmci/vmci_doorbell.h
new file mode 100644
index 0000000000..1dfb388d0e
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#ifndef VMCI_DOORBELL_H
+#define VMCI_DOORBELL_H
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/types.h>
+
+#include "vmci_driver.h"
+
+/*
+ * VMCINotifyResourceInfo: Used to create and destroy doorbells, and
+ * generate a notification for a doorbell or queue pair.
+ */
+struct vmci_dbell_notify_resource_info {
+	struct vmci_handle handle;
+	u16 resource;
+	u16 action;
+	s32 result;
+};
+
+/*
+ * Structure used for checkpointing the doorbell mappings. It is
+ * written to the checkpoint as is, so changing this structure will
+ * break checkpoint compatibility.
+ */
+struct dbell_cpt_state {
+	struct vmci_handle handle;
+	u64 bitmap_idx;
+};
+
+int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle);
+int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags);
+
+bool vmci_dbell_register_notification_bitmap(u64 bitmap_ppn);
+void vmci_dbell_scan_notification_entries(u8 *bitmap);
+
+#endif /* VMCI_DOORBELL_H */
diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c
new file mode 100644
index 0000000000..cbb706dabe
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+static bool vmci_disable_host;
+module_param_named(disable_host, vmci_disable_host, bool, 0);
+MODULE_PARM_DESC(disable_host,
+		 "Disable driver host personality (default=enabled)");
+
+static bool vmci_disable_guest;
+module_param_named(disable_guest, vmci_disable_guest, bool, 0);
+MODULE_PARM_DESC(disable_guest,
+		 "Disable driver guest personality (default=enabled)");
+
+static bool vmci_guest_personality_initialized;
+static bool vmci_host_personality_initialized;
+
+static DEFINE_MUTEX(vmci_vsock_mutex); /* protects vmci_vsock_transport_cb */
+static vmci_vsock_cb vmci_vsock_transport_cb;
+static bool vmci_vsock_cb_host_called;
+
+/*
+ * vmci_get_context_id() - Gets the current context ID.
+ *
+ * Returns the current context ID.  Note that since this is accessed only
+ * from code running in the host, this always returns the host context ID.
+ */
+u32 vmci_get_context_id(void)
+{
+	if (vmci_guest_code_active())
+		return vmci_get_vm_context_id();
+	else if (vmci_host_code_active())
+		return VMCI_HOST_CONTEXT_ID;
+
+	return VMCI_INVALID_ID;
+}
+EXPORT_SYMBOL_GPL(vmci_get_context_id);
+
+/*
+ * vmci_register_vsock_callback() - Register the VSOCK vmci_transport callback.
+ *
+ * The callback will be called when the first host or guest becomes active,
+ * or if they are already active when this function is called.
+ * To unregister the callback, call this function with NULL parameter.
+ *
+ * Returns 0 on success. -EBUSY if a callback is already registered.
+ */
+int vmci_register_vsock_callback(vmci_vsock_cb callback)
+{
+	int err = 0;
+
+	mutex_lock(&vmci_vsock_mutex);
+
+	if (vmci_vsock_transport_cb && callback) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	vmci_vsock_transport_cb = callback;
+
+	if (!vmci_vsock_transport_cb) {
+		vmci_vsock_cb_host_called = false;
+		goto out;
+	}
+
+	if (vmci_guest_code_active())
+		vmci_vsock_transport_cb(false);
+
+	if (vmci_host_users() > 0) {
+		vmci_vsock_cb_host_called = true;
+		vmci_vsock_transport_cb(true);
+	}
+
+out:
+	mutex_unlock(&vmci_vsock_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(vmci_register_vsock_callback);
+
+void vmci_call_vsock_callback(bool is_host)
+{
+	mutex_lock(&vmci_vsock_mutex);
+
+	if (!vmci_vsock_transport_cb)
+		goto out;
+
+	/* In the host, this function could be called multiple times,
+	 * but we want to register it only once.
+	 */
+	if (is_host) {
+		if (vmci_vsock_cb_host_called)
+			goto out;
+
+		vmci_vsock_cb_host_called = true;
+	}
+
+	vmci_vsock_transport_cb(is_host);
+out:
+	mutex_unlock(&vmci_vsock_mutex);
+}
+
+static int __init vmci_drv_init(void)
+{
+	int vmci_err;
+	int error;
+
+	vmci_err = vmci_event_init();
+	if (vmci_err < VMCI_SUCCESS) {
+		pr_err("Failed to initialize VMCIEvent (result=%d)\n",
+		       vmci_err);
+		return -EINVAL;
+	}
+
+	if (!vmci_disable_guest) {
+		error = vmci_guest_init();
+		if (error) {
+			pr_warn("Failed to initialize guest personality (err=%d)\n",
+				error);
+		} else {
+			vmci_guest_personality_initialized = true;
+			pr_info("Guest personality initialized and is %s\n",
+				vmci_guest_code_active() ?
+				"active" : "inactive");
+		}
+	}
+
+	if (!vmci_disable_host) {
+		error = vmci_host_init();
+		if (error) {
+			pr_warn("Unable to initialize host personality (err=%d)\n",
+				error);
+		} else {
+			vmci_host_personality_initialized = true;
+			pr_info("Initialized host personality\n");
+		}
+	}
+
+	if (!vmci_guest_personality_initialized &&
+	    !vmci_host_personality_initialized) {
+		vmci_event_exit();
+		return -ENODEV;
+	}
+
+	return 0;
+}
+module_init(vmci_drv_init);
+
+static void __exit vmci_drv_exit(void)
+{
+	if (vmci_guest_personality_initialized)
+		vmci_guest_exit();
+
+	if (vmci_host_personality_initialized)
+		vmci_host_exit();
+
+	vmci_event_exit();
+}
+module_exit(vmci_drv_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface.");
+MODULE_VERSION("1.1.6.0-k");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/vmw_vmci/vmci_driver.h b/drivers/misc/vmw_vmci/vmci_driver.h
new file mode 100644
index 0000000000..990682480b
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_driver.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#ifndef _VMCI_DRIVER_H_
+#define _VMCI_DRIVER_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/wait.h>
+
+#include "vmci_queue_pair.h"
+#include "vmci_context.h"
+
+enum vmci_obj_type {
+	VMCIOBJ_VMX_VM = 10,
+	VMCIOBJ_CONTEXT,
+	VMCIOBJ_SOCKET,
+	VMCIOBJ_NOT_SET,
+};
+
+/* For storing VMCI structures in file handles. */
+struct vmci_obj {
+	void *ptr;
+	enum vmci_obj_type type;
+};
+
+/*
+ * Needed by other components of this module.  It's okay to have one global
+ * instance of this because there can only ever be one VMCI device.  Our
+ * virtual hardware enforces this.
+ */
+extern struct pci_dev *vmci_pdev;
+
+u32 vmci_get_context_id(void);
+int vmci_send_datagram(struct vmci_datagram *dg);
+void vmci_call_vsock_callback(bool is_host);
+
+int vmci_host_init(void);
+void vmci_host_exit(void);
+bool vmci_host_code_active(void);
+int vmci_host_users(void);
+
+int vmci_guest_init(void);
+void vmci_guest_exit(void);
+bool vmci_guest_code_active(void);
+u32 vmci_get_vm_context_id(void);
+
+bool vmci_use_ppn64(void);
+
+#endif /* _VMCI_DRIVER_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c
new file mode 100644
index 0000000000..5d7ac07623
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_event.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+#define EVENT_MAGIC 0xEABE0000
+#define VMCI_EVENT_MAX_ATTEMPTS 10
+
+struct vmci_subscription {
+	u32 id;
+	u32 event;
+	vmci_event_cb callback;
+	void *callback_data;
+	struct list_head node;	/* on one of subscriber lists */
+};
+
+static struct list_head subscriber_array[VMCI_EVENT_MAX];
+static DEFINE_MUTEX(subscriber_mutex);
+
+int __init vmci_event_init(void)
+{
+	int i;
+
+	for (i = 0; i < VMCI_EVENT_MAX; i++)
+		INIT_LIST_HEAD(&subscriber_array[i]);
+
+	return VMCI_SUCCESS;
+}
+
+void vmci_event_exit(void)
+{
+	int e;
+
+	/* We free all memory at exit. */
+	for (e = 0; e < VMCI_EVENT_MAX; e++) {
+		struct vmci_subscription *cur, *p2;
+		list_for_each_entry_safe(cur, p2, &subscriber_array[e], node) {
+
+			/*
+			 * We should never get here because all events
+			 * should have been unregistered before we try
+			 * to unload the driver module.
+			 */
+			pr_warn("Unexpected free events occurring\n");
+			list_del(&cur->node);
+			kfree(cur);
+		}
+	}
+}
+
+/*
+ * Find entry. Assumes subscriber_mutex is held.
+ */
+static struct vmci_subscription *event_find(u32 sub_id)
+{
+	int e;
+
+	for (e = 0; e < VMCI_EVENT_MAX; e++) {
+		struct vmci_subscription *cur;
+		list_for_each_entry(cur, &subscriber_array[e], node) {
+			if (cur->id == sub_id)
+				return cur;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Actually delivers the events to the subscribers.
+ * The callback function for each subscriber is invoked.
+ */
+static void event_deliver(struct vmci_event_msg *event_msg)
+{
+	struct vmci_subscription *cur;
+	struct list_head *subscriber_list;
+
+	rcu_read_lock();
+	subscriber_list = &subscriber_array[event_msg->event_data.event];
+	list_for_each_entry_rcu(cur, subscriber_list, node) {
+		cur->callback(cur->id, &event_msg->event_data,
+			      cur->callback_data);
+	}
+	rcu_read_unlock();
+}
+
+/*
+ * Dispatcher for the VMCI_EVENT_RECEIVE datagrams. Calls all
+ * subscribers for given event.
+ */
+int vmci_event_dispatch(struct vmci_datagram *msg)
+{
+	struct vmci_event_msg *event_msg = (struct vmci_event_msg *)msg;
+
+	if (msg->payload_size < sizeof(u32) ||
+	    msg->payload_size > sizeof(struct vmci_event_data_max))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (!VMCI_EVENT_VALID(event_msg->event_data.event))
+		return VMCI_ERROR_EVENT_UNKNOWN;
+
+	event_deliver(event_msg);
+	return VMCI_SUCCESS;
+}
+
+/*
+ * vmci_event_subscribe() - Subscribe to a given event.
+ * @event:      The event to subscribe to.
+ * @callback:   The callback to invoke upon the event.
+ * @callback_data:      Data to pass to the callback.
+ * @subscription_id:    ID used to track subscription.  Used with
+ *              vmci_event_unsubscribe()
+ *
+ * Subscribes to the provided event. The callback specified will be
+ * fired from RCU critical section and therefore must not sleep.
+ */
+int vmci_event_subscribe(u32 event,
+			 vmci_event_cb callback,
+			 void *callback_data,
+			 u32 *new_subscription_id)
+{
+	struct vmci_subscription *sub;
+	int attempts;
+	int retval;
+	bool have_new_id = false;
+
+	if (!new_subscription_id) {
+		pr_devel("%s: Invalid subscription (NULL)\n", __func__);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	if (!VMCI_EVENT_VALID(event) || !callback) {
+		pr_devel("%s: Failed to subscribe to event (type=%d) (callback=%p) (data=%p)\n",
+			 __func__, event, callback, callback_data);
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	sub = kzalloc(sizeof(*sub), GFP_KERNEL);
+	if (!sub)
+		return VMCI_ERROR_NO_MEM;
+
+	sub->id = VMCI_EVENT_MAX;
+	sub->event = event;
+	sub->callback = callback;
+	sub->callback_data = callback_data;
+	INIT_LIST_HEAD(&sub->node);
+
+	mutex_lock(&subscriber_mutex);
+
+	/* Creation of a new event is always allowed. */
+	for (attempts = 0; attempts < VMCI_EVENT_MAX_ATTEMPTS; attempts++) {
+		static u32 subscription_id;
+		/*
+		 * We try to get an id a couple of time before
+		 * claiming we are out of resources.
+		 */
+
+		/* Test for duplicate id. */
+		if (!event_find(++subscription_id)) {
+			sub->id = subscription_id;
+			have_new_id = true;
+			break;
+		}
+	}
+
+	if (have_new_id) {
+		list_add_rcu(&sub->node, &subscriber_array[event]);
+		retval = VMCI_SUCCESS;
+	} else {
+		retval = VMCI_ERROR_NO_RESOURCES;
+	}
+
+	mutex_unlock(&subscriber_mutex);
+
+	*new_subscription_id = sub->id;
+	return retval;
+}
+EXPORT_SYMBOL_GPL(vmci_event_subscribe);
+
+/*
+ * vmci_event_unsubscribe() - unsubscribe from an event.
+ * @sub_id:     A subscription ID as provided by vmci_event_subscribe()
+ *
+ * Unsubscribe from given event. Removes it from list and frees it.
+ * Will return callback_data if requested by caller.
+ */
+int vmci_event_unsubscribe(u32 sub_id)
+{
+	struct vmci_subscription *s;
+
+	mutex_lock(&subscriber_mutex);
+	s = event_find(sub_id);
+	if (s)
+		list_del_rcu(&s->node);
+	mutex_unlock(&subscriber_mutex);
+
+	if (!s)
+		return VMCI_ERROR_NOT_FOUND;
+
+	kvfree_rcu_mightsleep(s);
+
+	return VMCI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(vmci_event_unsubscribe);
diff --git a/drivers/misc/vmw_vmci/vmci_event.h b/drivers/misc/vmw_vmci/vmci_event.h
new file mode 100644
index 0000000000..89cd0117d4
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_event.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#ifndef __VMCI_EVENT_H__
+#define __VMCI_EVENT_H__
+
+#include <linux/vmw_vmci_api.h>
+
+int vmci_event_init(void);
+void vmci_event_exit(void);
+int vmci_event_dispatch(struct vmci_datagram *msg);
+
+#endif /*__VMCI_EVENT_H__ */
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
new file mode 100644
index 0000000000..4f8d962bb5
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -0,0 +1,978 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/moduleparam.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/processor.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
+
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+#define PCI_DEVICE_ID_VMWARE_VMCI	0x0740
+
+#define VMCI_UTIL_NUM_RESOURCES 1
+
+/*
+ * Datagram buffers for DMA send/receive must accommodate at least
+ * a maximum sized datagram and the header.
+ */
+#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
+
+static bool vmci_disable_msi;
+module_param_named(disable_msi, vmci_disable_msi, bool, 0);
+MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
+
+static bool vmci_disable_msix;
+module_param_named(disable_msix, vmci_disable_msix, bool, 0);
+MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
+
+static u32 ctx_update_sub_id = VMCI_INVALID_ID;
+static u32 vm_context_id = VMCI_INVALID_ID;
+
+struct vmci_guest_device {
+	struct device *dev;	/* PCI device we are attached to */
+	void __iomem *iobase;
+	void __iomem *mmio_base;
+
+	bool exclusive_vectors;
+
+	struct wait_queue_head inout_wq;
+
+	void *data_buffer;
+	dma_addr_t data_buffer_base;
+	void *tx_buffer;
+	dma_addr_t tx_buffer_base;
+	void *notification_bitmap;
+	dma_addr_t notification_base;
+};
+
+static bool use_ppn64;
+
+bool vmci_use_ppn64(void)
+{
+	return use_ppn64;
+}
+
+/* vmci_dev singleton device and supporting data*/
+struct pci_dev *vmci_pdev;
+static struct vmci_guest_device *vmci_dev_g;
+static DEFINE_SPINLOCK(vmci_dev_spinlock);
+
+static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
+
+bool vmci_guest_code_active(void)
+{
+	return atomic_read(&vmci_num_guest_devices) != 0;
+}
+
+u32 vmci_get_vm_context_id(void)
+{
+	if (vm_context_id == VMCI_INVALID_ID) {
+		struct vmci_datagram get_cid_msg;
+		get_cid_msg.dst =
+		    vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+				     VMCI_GET_CONTEXT_ID);
+		get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
+		get_cid_msg.payload_size = 0;
+		vm_context_id = vmci_send_datagram(&get_cid_msg);
+	}
+	return vm_context_id;
+}
+
+static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
+{
+	if (dev->mmio_base != NULL)
+		return readl(dev->mmio_base + reg);
+	return ioread32(dev->iobase + reg);
+}
+
+static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
+{
+	if (dev->mmio_base != NULL)
+		writel(val, dev->mmio_base + reg);
+	else
+		iowrite32(val, dev->iobase + reg);
+}
+
+static void vmci_read_data(struct vmci_guest_device *vmci_dev,
+			   void *dest, size_t size)
+{
+	if (vmci_dev->mmio_base == NULL)
+		ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
+			    dest, size);
+	else {
+		/*
+		 * For DMA datagrams, the data_buffer will contain the header on the
+		 * first page, followed by the incoming datagram(s) on the following
+		 * pages. The header uses an S/G element immediately following the
+		 * header on the first page to point to the data area.
+		 */
+		struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer;
+		struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1);
+		size_t buffer_offset = dest - vmci_dev->data_buffer;
+
+		buffer_header->opcode = 1;
+		buffer_header->size = 1;
+		buffer_header->busy = 0;
+		sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
+		sg_array[0].size = size;
+
+		vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base),
+			       VMCI_DATA_IN_LOW_ADDR);
+
+		wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
+	}
+}
+
+static int vmci_write_data(struct vmci_guest_device *dev,
+			   struct vmci_datagram *dg)
+{
+	int result;
+
+	if (dev->mmio_base != NULL) {
+		struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
+		u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
+
+		if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
+			return VMCI_ERROR_INVALID_ARGS;
+
+		/*
+		 * Initialize send buffer with outgoing datagram
+		 * and set up header for inline data. Device will
+		 * not access buffer asynchronously - only after
+		 * the write to VMCI_DATA_OUT_LOW_ADDR.
+		 */
+		memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
+		buffer_header->opcode = 0;
+		buffer_header->size = VMCI_DG_SIZE(dg);
+		buffer_header->busy = 1;
+
+		vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
+			       VMCI_DATA_OUT_LOW_ADDR);
+
+		/* Caller holds a spinlock, so cannot block. */
+		spin_until_cond(buffer_header->busy == 0);
+
+		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
+		if (result == VMCI_SUCCESS)
+			result = (int)buffer_header->result;
+	} else {
+		iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
+			     dg, VMCI_DG_SIZE(dg));
+		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
+	}
+
+	return result;
+}
+
+/*
+ * VM to hypervisor call mechanism. We use the standard VMware naming
+ * convention since shared code is calling this function as well.
+ */
+int vmci_send_datagram(struct vmci_datagram *dg)
+{
+	unsigned long flags;
+	int result;
+
+	/* Check args. */
+	if (dg == NULL)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/*
+	 * Need to acquire spinlock on the device because the datagram
+	 * data may be spread over multiple pages and the monitor may
+	 * interleave device user rpc calls from multiple
+	 * VCPUs. Acquiring the spinlock precludes that
+	 * possibility. Disabling interrupts to avoid incoming
+	 * datagrams during a "rep out" and possibly landing up in
+	 * this function.
+	 */
+	spin_lock_irqsave(&vmci_dev_spinlock, flags);
+
+	if (vmci_dev_g) {
+		vmci_write_data(vmci_dev_g, dg);
+		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
+	} else {
+		result = VMCI_ERROR_UNAVAILABLE;
+	}
+
+	spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_send_datagram);
+
+/*
+ * Gets called with the new context id if updated or resumed.
+ * Context id.
+ */
+static void vmci_guest_cid_update(u32 sub_id,
+				  const struct vmci_event_data *event_data,
+				  void *client_data)
+{
+	const struct vmci_event_payld_ctx *ev_payload =
+				vmci_event_data_const_payload(event_data);
+
+	if (sub_id != ctx_update_sub_id) {
+		pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
+		return;
+	}
+
+	if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
+		pr_devel("Invalid event data\n");
+		return;
+	}
+
+	pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
+		 vm_context_id, ev_payload->context_id, event_data->event);
+
+	vm_context_id = ev_payload->context_id;
+}
+
+/*
+ * Verify that the host supports the hypercalls we need. If it does not,
+ * try to find fallback hypercalls and use those instead.  Returns 0 if
+ * required hypercalls (or fallback hypercalls) are supported by the host,
+ * an error code otherwise.
+ */
+static int vmci_check_host_caps(struct pci_dev *pdev)
+{
+	bool result;
+	struct vmci_resource_query_msg *msg;
+	u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
+				VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
+	struct vmci_datagram *check_msg;
+
+	check_msg = kzalloc(msg_size, GFP_KERNEL);
+	if (!check_msg) {
+		dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
+		return -ENOMEM;
+	}
+
+	check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					  VMCI_RESOURCES_QUERY);
+	check_msg->src = VMCI_ANON_SRC_HANDLE;
+	check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
+	msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
+
+	msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
+	msg->resources[0] = VMCI_GET_CONTEXT_ID;
+
+	/* Checks that hyper calls are supported */
+	result = vmci_send_datagram(check_msg) == 0x01;
+	kfree(check_msg);
+
+	dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
+		__func__, result ? "PASSED" : "FAILED");
+
+	/* We need the vector. There are no fallbacks. */
+	return result ? 0 : -ENXIO;
+}
+
+/*
+ * Reads datagrams from the device and dispatches them. For IO port
+ * based access to the device, we always start reading datagrams into
+ * only the first page of the datagram buffer. If the datagrams don't
+ * fit into one page, we use the maximum datagram buffer size for the
+ * remainder of the invocation. This is a simple heuristic for not
+ * penalizing small datagrams. For DMA-based datagrams, we always
+ * use the maximum datagram buffer size, since there is no performance
+ * penalty for doing so.
+ *
+ * This function assumes that it has exclusive access to the data
+ * in register(s) for the duration of the call.
+ */
+static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev)
+{
+	u8 *dg_in_buffer = vmci_dev->data_buffer;
+	struct vmci_datagram *dg;
+	size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
+	size_t current_dg_in_buffer_size;
+	size_t remaining_bytes;
+	bool is_io_port = vmci_dev->mmio_base == NULL;
+
+	BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
+
+	if (!is_io_port) {
+		/* For mmio, the first page is used for the header. */
+		dg_in_buffer += PAGE_SIZE;
+
+		/*
+		 * For DMA-based datagram operations, there is no performance
+		 * penalty for reading the maximum buffer size.
+		 */
+		current_dg_in_buffer_size = VMCI_MAX_DG_SIZE;
+	} else {
+		current_dg_in_buffer_size = PAGE_SIZE;
+	}
+	vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size);
+	dg = (struct vmci_datagram *)dg_in_buffer;
+	remaining_bytes = current_dg_in_buffer_size;
+
+	/*
+	 * Read through the buffer until an invalid datagram header is
+	 * encountered. The exit condition for datagrams read through
+	 * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram
+	 * can start on any page boundary in the buffer.
+	 */
+	while (dg->dst.resource != VMCI_INVALID_ID ||
+	       (is_io_port && remaining_bytes > PAGE_SIZE)) {
+		unsigned dg_in_size;
+
+		/*
+		 * If using VMCI_DATA_IN_ADDR, skip to the next page
+		 * as a datagram can start on any page boundary.
+		 */
+		if (dg->dst.resource == VMCI_INVALID_ID) {
+			dg = (struct vmci_datagram *)roundup(
+				(uintptr_t)dg + 1, PAGE_SIZE);
+			remaining_bytes =
+				(size_t)(dg_in_buffer +
+					 current_dg_in_buffer_size -
+					 (u8 *)dg);
+			continue;
+		}
+
+		dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
+
+		if (dg_in_size <= dg_in_buffer_size) {
+			int result;
+
+			/*
+			 * If the remaining bytes in the datagram
+			 * buffer doesn't contain the complete
+			 * datagram, we first make sure we have enough
+			 * room for it and then we read the reminder
+			 * of the datagram and possibly any following
+			 * datagrams.
+			 */
+			if (dg_in_size > remaining_bytes) {
+				if (remaining_bytes !=
+				    current_dg_in_buffer_size) {
+
+					/*
+					 * We move the partial
+					 * datagram to the front and
+					 * read the reminder of the
+					 * datagram and possibly
+					 * following calls into the
+					 * following bytes.
+					 */
+					memmove(dg_in_buffer, dg_in_buffer +
+						current_dg_in_buffer_size -
+						remaining_bytes,
+						remaining_bytes);
+					dg = (struct vmci_datagram *)
+					    dg_in_buffer;
+				}
+
+				if (current_dg_in_buffer_size !=
+				    dg_in_buffer_size)
+					current_dg_in_buffer_size =
+					    dg_in_buffer_size;
+
+				vmci_read_data(vmci_dev,
+					       dg_in_buffer +
+						remaining_bytes,
+					       current_dg_in_buffer_size -
+						remaining_bytes);
+			}
+
+			/*
+			 * We special case event datagrams from the
+			 * hypervisor.
+			 */
+			if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
+			    dg->dst.resource == VMCI_EVENT_HANDLER) {
+				result = vmci_event_dispatch(dg);
+			} else {
+				result = vmci_datagram_invoke_guest_handler(dg);
+			}
+			if (result < VMCI_SUCCESS)
+				dev_dbg(vmci_dev->dev,
+					"Datagram with resource (ID=0x%x) failed (err=%d)\n",
+					 dg->dst.resource, result);
+
+			/* On to the next datagram. */
+			dg = (struct vmci_datagram *)((u8 *)dg +
+						      dg_in_size);
+		} else {
+			size_t bytes_to_skip;
+
+			/*
+			 * Datagram doesn't fit in datagram buffer of maximal
+			 * size. We drop it.
+			 */
+			dev_dbg(vmci_dev->dev,
+				"Failed to receive datagram (size=%u bytes)\n",
+				 dg_in_size);
+
+			bytes_to_skip = dg_in_size - remaining_bytes;
+			if (current_dg_in_buffer_size != dg_in_buffer_size)
+				current_dg_in_buffer_size = dg_in_buffer_size;
+
+			for (;;) {
+				vmci_read_data(vmci_dev, dg_in_buffer,
+					       current_dg_in_buffer_size);
+				if (bytes_to_skip <= current_dg_in_buffer_size)
+					break;
+
+				bytes_to_skip -= current_dg_in_buffer_size;
+			}
+			dg = (struct vmci_datagram *)(dg_in_buffer +
+						      bytes_to_skip);
+		}
+
+		remaining_bytes =
+		    (size_t) (dg_in_buffer + current_dg_in_buffer_size -
+			      (u8 *)dg);
+
+		if (remaining_bytes < VMCI_DG_HEADERSIZE) {
+			/* Get the next batch of datagrams. */
+
+			vmci_read_data(vmci_dev, dg_in_buffer,
+				    current_dg_in_buffer_size);
+			dg = (struct vmci_datagram *)dg_in_buffer;
+			remaining_bytes = current_dg_in_buffer_size;
+		}
+	}
+}
+
+/*
+ * Scans the notification bitmap for raised flags, clears them
+ * and handles the notifications.
+ */
+static void vmci_process_bitmap(struct vmci_guest_device *dev)
+{
+	if (!dev->notification_bitmap) {
+		dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
+		return;
+	}
+
+	vmci_dbell_scan_notification_entries(dev->notification_bitmap);
+}
+
+/*
+ * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
+ * interrupt (vector VMCI_INTR_DATAGRAM).
+ */
+static irqreturn_t vmci_interrupt(int irq, void *_dev)
+{
+	struct vmci_guest_device *dev = _dev;
+
+	/*
+	 * If we are using MSI-X with exclusive vectors then we simply call
+	 * vmci_dispatch_dgs(), since we know the interrupt was meant for us.
+	 * Otherwise we must read the ICR to determine what to do.
+	 */
+
+	if (dev->exclusive_vectors) {
+		vmci_dispatch_dgs(dev);
+	} else {
+		unsigned int icr;
+
+		/* Acknowledge interrupt and determine what needs doing. */
+		icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
+		if (icr == 0 || icr == ~0)
+			return IRQ_NONE;
+
+		if (icr & VMCI_ICR_DATAGRAM) {
+			vmci_dispatch_dgs(dev);
+			icr &= ~VMCI_ICR_DATAGRAM;
+		}
+
+		if (icr & VMCI_ICR_NOTIFICATION) {
+			vmci_process_bitmap(dev);
+			icr &= ~VMCI_ICR_NOTIFICATION;
+		}
+
+
+		if (icr & VMCI_ICR_DMA_DATAGRAM) {
+			wake_up_all(&dev->inout_wq);
+			icr &= ~VMCI_ICR_DMA_DATAGRAM;
+		}
+
+		if (icr != 0)
+			dev_warn(dev->dev,
+				 "Ignoring unknown interrupt cause (%d)\n",
+				 icr);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
+ * which is for the notification bitmap.  Will only get called if we are
+ * using MSI-X with exclusive vectors.
+ */
+static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
+{
+	struct vmci_guest_device *dev = _dev;
+
+	/* For MSI-X we can just assume it was meant for us. */
+	vmci_process_bitmap(dev);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
+ * which is for the completion of a DMA datagram send or receive operation.
+ * Will only get called if we are using MSI-X with exclusive vectors.
+ */
+static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
+{
+	struct vmci_guest_device *dev = _dev;
+
+	wake_up_all(&dev->inout_wq);
+
+	return IRQ_HANDLED;
+}
+
+static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
+{
+	if (vmci_dev->mmio_base != NULL) {
+		if (vmci_dev->tx_buffer != NULL)
+			dma_free_coherent(vmci_dev->dev,
+					  VMCI_DMA_DG_BUFFER_SIZE,
+					  vmci_dev->tx_buffer,
+					  vmci_dev->tx_buffer_base);
+		if (vmci_dev->data_buffer != NULL)
+			dma_free_coherent(vmci_dev->dev,
+					  VMCI_DMA_DG_BUFFER_SIZE,
+					  vmci_dev->data_buffer,
+					  vmci_dev->data_buffer_base);
+	} else {
+		vfree(vmci_dev->data_buffer);
+	}
+}
+
+/*
+ * Most of the initialization at module load time is done here.
+ */
+static int vmci_guest_probe_device(struct pci_dev *pdev,
+				   const struct pci_device_id *id)
+{
+	struct vmci_guest_device *vmci_dev;
+	void __iomem *iobase = NULL;
+	void __iomem *mmio_base = NULL;
+	unsigned int num_irq_vectors;
+	unsigned int capabilities;
+	unsigned int caps_in_use;
+	unsigned long cmd;
+	int vmci_err;
+	int error;
+
+	dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
+
+	error = pcim_enable_device(pdev);
+	if (error) {
+		dev_err(&pdev->dev,
+			"Failed to enable VMCI device: %d\n", error);
+		return error;
+	}
+
+	/*
+	 * The VMCI device with mmio access to registers requests 256KB
+	 * for BAR1. If present, driver will use new VMCI device
+	 * functionality for register access and datagram send/recv.
+	 */
+
+	if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
+		dev_info(&pdev->dev, "MMIO register access is available\n");
+		mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
+					    VMCI_MMIO_ACCESS_SIZE);
+		/* If the map fails, we fall back to IOIO access. */
+		if (!mmio_base)
+			dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
+	}
+
+	if (!mmio_base) {
+		if (IS_ENABLED(CONFIG_ARM64)) {
+			dev_err(&pdev->dev, "MMIO base is invalid\n");
+			return -ENXIO;
+		}
+		error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
+		if (error) {
+			dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
+			return error;
+		}
+		iobase = pcim_iomap_table(pdev)[0];
+	}
+
+	vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
+	if (!vmci_dev) {
+		dev_err(&pdev->dev,
+			"Can't allocate memory for VMCI device\n");
+		return -ENOMEM;
+	}
+
+	vmci_dev->dev = &pdev->dev;
+	vmci_dev->exclusive_vectors = false;
+	vmci_dev->iobase = iobase;
+	vmci_dev->mmio_base = mmio_base;
+
+	init_waitqueue_head(&vmci_dev->inout_wq);
+
+	if (mmio_base != NULL) {
+		vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
+							 &vmci_dev->tx_buffer_base,
+							 GFP_KERNEL);
+		if (!vmci_dev->tx_buffer) {
+			dev_err(&pdev->dev,
+				"Can't allocate memory for datagram tx buffer\n");
+			return -ENOMEM;
+		}
+
+		vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
+							   &vmci_dev->data_buffer_base,
+							   GFP_KERNEL);
+	} else {
+		vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
+	}
+	if (!vmci_dev->data_buffer) {
+		dev_err(&pdev->dev,
+			"Can't allocate memory for datagram buffer\n");
+		error = -ENOMEM;
+		goto err_free_data_buffers;
+	}
+
+	pci_set_master(pdev);	/* To enable queue_pair functionality. */
+
+	/*
+	 * Verify that the VMCI Device supports the capabilities that
+	 * we need. If the device is missing capabilities that we would
+	 * like to use, check for fallback capabilities and use those
+	 * instead (so we can run a new VM on old hosts). Fail the load if
+	 * a required capability is missing and there is no fallback.
+	 *
+	 * Right now, we need datagrams. There are no fallbacks.
+	 */
+	capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
+	if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
+		dev_err(&pdev->dev, "Device does not support datagrams\n");
+		error = -ENXIO;
+		goto err_free_data_buffers;
+	}
+	caps_in_use = VMCI_CAPS_DATAGRAM;
+
+	/*
+	 * Use 64-bit PPNs if the device supports.
+	 *
+	 * There is no check for the return value of dma_set_mask_and_coherent
+	 * since this driver can handle the default mask values if
+	 * dma_set_mask_and_coherent fails.
+	 */
+	if (capabilities & VMCI_CAPS_PPN64) {
+		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+		use_ppn64 = true;
+		caps_in_use |= VMCI_CAPS_PPN64;
+	} else {
+		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
+		use_ppn64 = false;
+	}
+
+	/*
+	 * If the hardware supports notifications, we will use that as
+	 * well.
+	 */
+	if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
+		vmci_dev->notification_bitmap = dma_alloc_coherent(
+			&pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
+			GFP_KERNEL);
+		if (!vmci_dev->notification_bitmap)
+			dev_warn(&pdev->dev,
+				 "Unable to allocate notification bitmap\n");
+		else
+			caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
+	}
+
+	if (mmio_base != NULL) {
+		if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
+			caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
+		} else {
+			dev_err(&pdev->dev,
+				"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
+			error = -ENXIO;
+			goto err_free_notification_bitmap;
+		}
+	}
+
+	dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
+
+	/* Let the host know which capabilities we intend to use. */
+	vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
+
+	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
+		/* Let the device know the size for pages passed down. */
+		vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
+
+		/* Configure the high order parts of the data in/out buffers. */
+		vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
+			       VMCI_DATA_IN_HIGH_ADDR);
+		vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
+			       VMCI_DATA_OUT_HIGH_ADDR);
+	}
+
+	/* Set up global device so that we can start sending datagrams */
+	spin_lock_irq(&vmci_dev_spinlock);
+	vmci_dev_g = vmci_dev;
+	vmci_pdev = pdev;
+	spin_unlock_irq(&vmci_dev_spinlock);
+
+	/*
+	 * Register notification bitmap with device if that capability is
+	 * used.
+	 */
+	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
+		unsigned long bitmap_ppn =
+			vmci_dev->notification_base >> PAGE_SHIFT;
+		if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
+			dev_warn(&pdev->dev,
+				 "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
+				 bitmap_ppn);
+			error = -ENXIO;
+			goto err_remove_vmci_dev_g;
+		}
+	}
+
+	/* Check host capabilities. */
+	error = vmci_check_host_caps(pdev);
+	if (error)
+		goto err_remove_vmci_dev_g;
+
+	/* Enable device. */
+
+	/*
+	 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
+	 * update the internal context id when needed.
+	 */
+	vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
+					vmci_guest_cid_update, NULL,
+					&ctx_update_sub_id);
+	if (vmci_err < VMCI_SUCCESS)
+		dev_warn(&pdev->dev,
+			 "Failed to subscribe to event (type=%d): %d\n",
+			 VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
+
+	/*
+	 * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
+	 * legacy interrupts.
+	 */
+	if (vmci_dev->mmio_base != NULL)
+		num_irq_vectors = VMCI_MAX_INTRS;
+	else
+		num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
+	error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
+				      PCI_IRQ_MSIX);
+	if (error < 0) {
+		error = pci_alloc_irq_vectors(pdev, 1, 1,
+				PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+		if (error < 0)
+			goto err_unsubscribe_event;
+	} else {
+		vmci_dev->exclusive_vectors = true;
+	}
+
+	/*
+	 * Request IRQ for legacy or MSI interrupts, or for first
+	 * MSI-X vector.
+	 */
+	error = request_threaded_irq(pci_irq_vector(pdev, 0), NULL,
+				     vmci_interrupt, IRQF_SHARED,
+				     KBUILD_MODNAME, vmci_dev);
+	if (error) {
+		dev_err(&pdev->dev, "Irq %u in use: %d\n",
+			pci_irq_vector(pdev, 0), error);
+		goto err_disable_msi;
+	}
+
+	/*
+	 * For MSI-X with exclusive vectors we need to request an
+	 * interrupt for each vector so that we get a separate
+	 * interrupt handler routine.  This allows us to distinguish
+	 * between the vectors.
+	 */
+	if (vmci_dev->exclusive_vectors) {
+		error = request_threaded_irq(pci_irq_vector(pdev, 1), NULL,
+					     vmci_interrupt_bm, 0,
+					     KBUILD_MODNAME, vmci_dev);
+		if (error) {
+			dev_err(&pdev->dev,
+				"Failed to allocate irq %u: %d\n",
+				pci_irq_vector(pdev, 1), error);
+			goto err_free_irq;
+		}
+		if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
+			error = request_threaded_irq(pci_irq_vector(pdev, 2),
+						     NULL,
+						    vmci_interrupt_dma_datagram,
+						     0, KBUILD_MODNAME,
+						     vmci_dev);
+			if (error) {
+				dev_err(&pdev->dev,
+					"Failed to allocate irq %u: %d\n",
+					pci_irq_vector(pdev, 2), error);
+				goto err_free_bm_irq;
+			}
+		}
+	}
+
+	dev_dbg(&pdev->dev, "Registered device\n");
+
+	atomic_inc(&vmci_num_guest_devices);
+
+	/* Enable specific interrupt bits. */
+	cmd = VMCI_IMR_DATAGRAM;
+	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
+		cmd |= VMCI_IMR_NOTIFICATION;
+	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
+		cmd |= VMCI_IMR_DMA_DATAGRAM;
+	vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
+
+	/* Enable interrupts. */
+	vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
+
+	pci_set_drvdata(pdev, vmci_dev);
+
+	vmci_call_vsock_callback(false);
+	return 0;
+
+err_free_bm_irq:
+	if (vmci_dev->exclusive_vectors)
+		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
+
+err_free_irq:
+	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
+
+err_disable_msi:
+	pci_free_irq_vectors(pdev);
+
+err_unsubscribe_event:
+	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
+	if (vmci_err < VMCI_SUCCESS)
+		dev_warn(&pdev->dev,
+			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
+			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
+
+err_remove_vmci_dev_g:
+	spin_lock_irq(&vmci_dev_spinlock);
+	vmci_pdev = NULL;
+	vmci_dev_g = NULL;
+	spin_unlock_irq(&vmci_dev_spinlock);
+
+err_free_notification_bitmap:
+	if (vmci_dev->notification_bitmap) {
+		vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
+		dma_free_coherent(&pdev->dev, PAGE_SIZE,
+				  vmci_dev->notification_bitmap,
+				  vmci_dev->notification_base);
+	}
+
+err_free_data_buffers:
+	vmci_free_dg_buffers(vmci_dev);
+
+	/* The rest are managed resources and will be freed by PCI core */
+	return error;
+}
+
+static void vmci_guest_remove_device(struct pci_dev *pdev)
+{
+	struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
+	int vmci_err;
+
+	dev_dbg(&pdev->dev, "Removing device\n");
+
+	atomic_dec(&vmci_num_guest_devices);
+
+	vmci_qp_guest_endpoints_exit();
+
+	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
+	if (vmci_err < VMCI_SUCCESS)
+		dev_warn(&pdev->dev,
+			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
+			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
+
+	spin_lock_irq(&vmci_dev_spinlock);
+	vmci_dev_g = NULL;
+	vmci_pdev = NULL;
+	spin_unlock_irq(&vmci_dev_spinlock);
+
+	dev_dbg(&pdev->dev, "Resetting vmci device\n");
+	vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
+
+	/*
+	 * Free IRQ and then disable MSI/MSI-X as appropriate.  For
+	 * MSI-X, we might have multiple vectors, each with their own
+	 * IRQ, which we must free too.
+	 */
+	if (vmci_dev->exclusive_vectors) {
+		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
+		if (vmci_dev->mmio_base != NULL)
+			free_irq(pci_irq_vector(pdev, 2), vmci_dev);
+	}
+	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
+	pci_free_irq_vectors(pdev);
+
+	if (vmci_dev->notification_bitmap) {
+		/*
+		 * The device reset above cleared the bitmap state of the
+		 * device, so we can safely free it here.
+		 */
+
+		dma_free_coherent(&pdev->dev, PAGE_SIZE,
+				  vmci_dev->notification_bitmap,
+				  vmci_dev->notification_base);
+	}
+
+	vmci_free_dg_buffers(vmci_dev);
+
+	if (vmci_dev->mmio_base != NULL)
+		pci_iounmap(pdev, vmci_dev->mmio_base);
+
+	/* The rest are managed resources and will be freed by PCI core */
+}
+
+static const struct pci_device_id vmci_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
+	{ 0 },
+};
+MODULE_DEVICE_TABLE(pci, vmci_ids);
+
+static struct pci_driver vmci_guest_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= vmci_ids,
+	.probe		= vmci_guest_probe_device,
+	.remove		= vmci_guest_remove_device,
+};
+
+int __init vmci_guest_init(void)
+{
+	return pci_register_driver(&vmci_guest_driver);
+}
+
+void __exit vmci_guest_exit(void)
+{
+	pci_unregister_driver(&vmci_guest_driver);
+}
diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.c b/drivers/misc/vmw_vmci/vmci_handle_array.c
new file mode 100644
index 0000000000..de7fee7ead
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_handle_array.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/slab.h>
+#include "vmci_handle_array.h"
+
+static size_t handle_arr_calc_size(u32 capacity)
+{
+	return VMCI_HANDLE_ARRAY_HEADER_SIZE +
+	    capacity * sizeof(struct vmci_handle);
+}
+
+struct vmci_handle_arr *vmci_handle_arr_create(u32 capacity, u32 max_capacity)
+{
+	struct vmci_handle_arr *array;
+
+	if (max_capacity == 0 || capacity > max_capacity)
+		return NULL;
+
+	if (capacity == 0)
+		capacity = min((u32)VMCI_HANDLE_ARRAY_DEFAULT_CAPACITY,
+			       max_capacity);
+
+	array = kmalloc(handle_arr_calc_size(capacity), GFP_ATOMIC);
+	if (!array)
+		return NULL;
+
+	array->capacity = capacity;
+	array->max_capacity = max_capacity;
+	array->size = 0;
+
+	return array;
+}
+
+void vmci_handle_arr_destroy(struct vmci_handle_arr *array)
+{
+	kfree(array);
+}
+
+int vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
+				 struct vmci_handle handle)
+{
+	struct vmci_handle_arr *array = *array_ptr;
+
+	if (unlikely(array->size >= array->capacity)) {
+		/* reallocate. */
+		struct vmci_handle_arr *new_array;
+		u32 capacity_bump = min(array->max_capacity - array->capacity,
+					array->capacity);
+		size_t new_size = handle_arr_calc_size(array->capacity +
+						       capacity_bump);
+
+		if (array->size >= array->max_capacity)
+			return VMCI_ERROR_NO_MEM;
+
+		new_array = krealloc(array, new_size, GFP_ATOMIC);
+		if (!new_array)
+			return VMCI_ERROR_NO_MEM;
+
+		new_array->capacity += capacity_bump;
+		*array_ptr = array = new_array;
+	}
+
+	array->entries[array->size] = handle;
+	array->size++;
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Handle that was removed, VMCI_INVALID_HANDLE if entry not found.
+ */
+struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array,
+						struct vmci_handle entry_handle)
+{
+	struct vmci_handle handle = VMCI_INVALID_HANDLE;
+	u32 i;
+
+	for (i = 0; i < array->size; i++) {
+		if (vmci_handle_is_equal(array->entries[i], entry_handle)) {
+			handle = array->entries[i];
+			array->size--;
+			array->entries[i] = array->entries[array->size];
+			array->entries[array->size] = VMCI_INVALID_HANDLE;
+			break;
+		}
+	}
+
+	return handle;
+}
+
+/*
+ * Handle that was removed, VMCI_INVALID_HANDLE if array was empty.
+ */
+struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array)
+{
+	struct vmci_handle handle = VMCI_INVALID_HANDLE;
+
+	if (array->size) {
+		array->size--;
+		handle = array->entries[array->size];
+		array->entries[array->size] = VMCI_INVALID_HANDLE;
+	}
+
+	return handle;
+}
+
+/*
+ * Handle at given index, VMCI_INVALID_HANDLE if invalid index.
+ */
+struct vmci_handle
+vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, u32 index)
+{
+	if (unlikely(index >= array->size))
+		return VMCI_INVALID_HANDLE;
+
+	return array->entries[index];
+}
+
+bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array,
+			       struct vmci_handle entry_handle)
+{
+	u32 i;
+
+	for (i = 0; i < array->size; i++)
+		if (vmci_handle_is_equal(array->entries[i], entry_handle))
+			return true;
+
+	return false;
+}
+
+/*
+ * NULL if the array is empty. Otherwise, a pointer to the array
+ * of VMCI handles in the handle array.
+ */
+struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array)
+{
+	if (array->size)
+		return array->entries;
+
+	return NULL;
+}
diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.h b/drivers/misc/vmw_vmci/vmci_handle_array.h
new file mode 100644
index 0000000000..96193f85be
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_handle_array.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#ifndef _VMCI_HANDLE_ARRAY_H_
+#define _VMCI_HANDLE_ARRAY_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/limits.h>
+#include <linux/types.h>
+
+struct vmci_handle_arr {
+	u32 capacity;
+	u32 max_capacity;
+	u32 size;
+	u32 pad;
+	struct vmci_handle entries[];
+};
+
+#define VMCI_HANDLE_ARRAY_HEADER_SIZE				\
+	offsetof(struct vmci_handle_arr, entries)
+/* Select a default capacity that results in a 64 byte sized array */
+#define VMCI_HANDLE_ARRAY_DEFAULT_CAPACITY			6
+/* Make sure that the max array size can be expressed by a u32 */
+#define VMCI_HANDLE_ARRAY_MAX_CAPACITY				\
+	((U32_MAX - VMCI_HANDLE_ARRAY_HEADER_SIZE - 1) /	\
+	sizeof(struct vmci_handle))
+
+struct vmci_handle_arr *vmci_handle_arr_create(u32 capacity, u32 max_capacity);
+void vmci_handle_arr_destroy(struct vmci_handle_arr *array);
+int vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
+				 struct vmci_handle handle);
+struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array,
+						struct vmci_handle
+						entry_handle);
+struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array);
+struct vmci_handle
+vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, u32 index);
+bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array,
+			       struct vmci_handle entry_handle);
+struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array);
+
+static inline u32 vmci_handle_arr_get_size(
+	const struct vmci_handle_arr *array)
+{
+	return array->size;
+}
+
+
+#endif /* _VMCI_HANDLE_ARRAY_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
new file mode 100644
index 0000000000..abe79f6fd2
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -0,0 +1,1027 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+#define VMCI_UTIL_NUM_RESOURCES 1
+
+enum {
+	VMCI_NOTIFY_RESOURCE_QUEUE_PAIR = 0,
+	VMCI_NOTIFY_RESOURCE_DOOR_BELL = 1,
+};
+
+enum {
+	VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY = 0,
+	VMCI_NOTIFY_RESOURCE_ACTION_CREATE = 1,
+	VMCI_NOTIFY_RESOURCE_ACTION_DESTROY = 2,
+};
+
+/*
+ * VMCI driver initialization. This block can also be used to
+ * pass initial group membership etc.
+ */
+struct vmci_init_blk {
+	u32 cid;
+	u32 flags;
+};
+
+/* VMCIqueue_pairAllocInfo_VMToVM */
+struct vmci_qp_alloc_info_vmvm {
+	struct vmci_handle handle;
+	u32 peer;
+	u32 flags;
+	u64 produce_size;
+	u64 consume_size;
+	u64 produce_page_file;	  /* User VA. */
+	u64 consume_page_file;	  /* User VA. */
+	u64 produce_page_file_size;  /* Size of the file name array. */
+	u64 consume_page_file_size;  /* Size of the file name array. */
+	s32 result;
+	u32 _pad;
+};
+
+/* VMCISetNotifyInfo: Used to pass notify flag's address to the host driver. */
+struct vmci_set_notify_info {
+	u64 notify_uva;
+	s32 result;
+	u32 _pad;
+};
+
+/*
+ * Per-instance host state
+ */
+struct vmci_host_dev {
+	struct vmci_ctx *context;
+	int user_version;
+	enum vmci_obj_type ct_type;
+	struct mutex lock;  /* Mutex lock for vmci context access */
+};
+
+static struct vmci_ctx *host_context;
+static bool vmci_host_device_initialized;
+static atomic_t vmci_host_active_users = ATOMIC_INIT(0);
+
+/*
+ * Determines whether the VMCI host personality is
+ * available. Since the core functionality of the host driver is
+ * always present, all guests could possibly use the host
+ * personality. However, to minimize the deviation from the
+ * pre-unified driver state of affairs, we only consider the host
+ * device active if there is no active guest device or if there
+ * are VMX'en with active VMCI contexts using the host device.
+ */
+bool vmci_host_code_active(void)
+{
+	return vmci_host_device_initialized &&
+	    (!vmci_guest_code_active() ||
+	     atomic_read(&vmci_host_active_users) > 0);
+}
+
+int vmci_host_users(void)
+{
+	return atomic_read(&vmci_host_active_users);
+}
+
+/*
+ * Called on open of /dev/vmci.
+ */
+static int vmci_host_open(struct inode *inode, struct file *filp)
+{
+	struct vmci_host_dev *vmci_host_dev;
+
+	vmci_host_dev = kzalloc(sizeof(struct vmci_host_dev), GFP_KERNEL);
+	if (vmci_host_dev == NULL)
+		return -ENOMEM;
+
+	vmci_host_dev->ct_type = VMCIOBJ_NOT_SET;
+	mutex_init(&vmci_host_dev->lock);
+	filp->private_data = vmci_host_dev;
+
+	return 0;
+}
+
+/*
+ * Called on close of /dev/vmci, most often when the process
+ * exits.
+ */
+static int vmci_host_close(struct inode *inode, struct file *filp)
+{
+	struct vmci_host_dev *vmci_host_dev = filp->private_data;
+
+	if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
+		vmci_ctx_destroy(vmci_host_dev->context);
+		vmci_host_dev->context = NULL;
+
+		/*
+		 * The number of active contexts is used to track whether any
+		 * VMX'en are using the host personality. It is incremented when
+		 * a context is created through the IOCTL_VMCI_INIT_CONTEXT
+		 * ioctl.
+		 */
+		atomic_dec(&vmci_host_active_users);
+	}
+	vmci_host_dev->ct_type = VMCIOBJ_NOT_SET;
+
+	kfree(vmci_host_dev);
+	filp->private_data = NULL;
+	return 0;
+}
+
+/*
+ * This is used to wake up the VMX when a VMCI call arrives, or
+ * to wake up select() or poll() at the next clock tick.
+ */
+static __poll_t vmci_host_poll(struct file *filp, poll_table *wait)
+{
+	struct vmci_host_dev *vmci_host_dev = filp->private_data;
+	struct vmci_ctx *context;
+	__poll_t mask = 0;
+
+	if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
+		/*
+		 * Read context only if ct_type == VMCIOBJ_CONTEXT to make
+		 * sure that context is initialized
+		 */
+		context = vmci_host_dev->context;
+
+		/* Check for VMCI calls to this VM context. */
+		if (wait)
+			poll_wait(filp, &context->host_context.wait_queue,
+				  wait);
+
+		spin_lock(&context->lock);
+		if (context->pending_datagrams > 0 ||
+		    vmci_handle_arr_get_size(
+				context->pending_doorbell_array) > 0) {
+			mask = EPOLLIN;
+		}
+		spin_unlock(&context->lock);
+	}
+	return mask;
+}
+
+/*
+ * Copies the handles of a handle array into a user buffer, and
+ * returns the new length in userBufferSize. If the copy to the
+ * user buffer fails, the functions still returns VMCI_SUCCESS,
+ * but retval != 0.
+ */
+static int drv_cp_harray_to_user(void __user *user_buf_uva,
+				 u64 *user_buf_size,
+				 struct vmci_handle_arr *handle_array,
+				 int *retval)
+{
+	u32 array_size = 0;
+	struct vmci_handle *handles;
+
+	if (handle_array)
+		array_size = vmci_handle_arr_get_size(handle_array);
+
+	if (array_size * sizeof(*handles) > *user_buf_size)
+		return VMCI_ERROR_MORE_DATA;
+
+	*user_buf_size = array_size * sizeof(*handles);
+	if (*user_buf_size)
+		*retval = copy_to_user(user_buf_uva,
+				       vmci_handle_arr_get_handles
+				       (handle_array), *user_buf_size);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Sets up a given context for notify to work. Maps the notify
+ * boolean in user VA into kernel space.
+ */
+static int vmci_host_setup_notify(struct vmci_ctx *context,
+				  unsigned long uva)
+{
+	int retval;
+
+	if (context->notify_page) {
+		pr_devel("%s: Notify mechanism is already set up\n", __func__);
+		return VMCI_ERROR_DUPLICATE_ENTRY;
+	}
+
+	/*
+	 * We are using 'bool' internally, but let's make sure we explicit
+	 * about the size.
+	 */
+	BUILD_BUG_ON(sizeof(bool) != sizeof(u8));
+
+	/*
+	 * Lock physical page backing a given user VA.
+	 */
+	retval = get_user_pages_fast(uva, 1, FOLL_WRITE, &context->notify_page);
+	if (retval != 1) {
+		context->notify_page = NULL;
+		return VMCI_ERROR_GENERIC;
+	}
+	if (context->notify_page == NULL)
+		return VMCI_ERROR_UNAVAILABLE;
+
+	/*
+	 * Map the locked page and set up notify pointer.
+	 */
+	context->notify = kmap(context->notify_page) + (uva & (PAGE_SIZE - 1));
+	vmci_ctx_check_signal_notify(context);
+
+	return VMCI_SUCCESS;
+}
+
+static int vmci_host_get_version(struct vmci_host_dev *vmci_host_dev,
+				 unsigned int cmd, void __user *uptr)
+{
+	if (cmd == IOCTL_VMCI_VERSION2) {
+		int __user *vptr = uptr;
+		if (get_user(vmci_host_dev->user_version, vptr))
+			return -EFAULT;
+	}
+
+	/*
+	 * The basic logic here is:
+	 *
+	 * If the user sends in a version of 0 tell it our version.
+	 * If the user didn't send in a version, tell it our version.
+	 * If the user sent in an old version, tell it -its- version.
+	 * If the user sent in an newer version, tell it our version.
+	 *
+	 * The rationale behind telling the caller its version is that
+	 * Workstation 6.5 required that VMX and VMCI kernel module were
+	 * version sync'd.  All new VMX users will be programmed to
+	 * handle the VMCI kernel module version.
+	 */
+
+	if (vmci_host_dev->user_version > 0 &&
+	    vmci_host_dev->user_version < VMCI_VERSION_HOSTQP) {
+		return vmci_host_dev->user_version;
+	}
+
+	return VMCI_VERSION;
+}
+
+#define vmci_ioctl_err(fmt, ...)	\
+	pr_devel("%s: " fmt, ioctl_name, ##__VA_ARGS__)
+
+static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev,
+				     const char *ioctl_name,
+				     void __user *uptr)
+{
+	struct vmci_init_blk init_block;
+	const struct cred *cred;
+	int retval;
+
+	if (copy_from_user(&init_block, uptr, sizeof(init_block))) {
+		vmci_ioctl_err("error reading init block\n");
+		return -EFAULT;
+	}
+
+	mutex_lock(&vmci_host_dev->lock);
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_NOT_SET) {
+		vmci_ioctl_err("received VMCI init on initialized handle\n");
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (init_block.flags & ~VMCI_PRIVILEGE_FLAG_RESTRICTED) {
+		vmci_ioctl_err("unsupported VMCI restriction flag\n");
+		retval = -EINVAL;
+		goto out;
+	}
+
+	cred = get_current_cred();
+	vmci_host_dev->context = vmci_ctx_create(init_block.cid,
+						 init_block.flags, 0,
+						 vmci_host_dev->user_version,
+						 cred);
+	put_cred(cred);
+	if (IS_ERR(vmci_host_dev->context)) {
+		retval = PTR_ERR(vmci_host_dev->context);
+		vmci_ioctl_err("error initializing context\n");
+		goto out;
+	}
+
+	/*
+	 * Copy cid to userlevel, we do this to allow the VMX
+	 * to enforce its policy on cid generation.
+	 */
+	init_block.cid = vmci_ctx_get_id(vmci_host_dev->context);
+	if (copy_to_user(uptr, &init_block, sizeof(init_block))) {
+		vmci_ctx_destroy(vmci_host_dev->context);
+		vmci_host_dev->context = NULL;
+		vmci_ioctl_err("error writing init block\n");
+		retval = -EFAULT;
+		goto out;
+	}
+
+	vmci_host_dev->ct_type = VMCIOBJ_CONTEXT;
+	atomic_inc(&vmci_host_active_users);
+
+	vmci_call_vsock_callback(true);
+
+	retval = 0;
+
+out:
+	mutex_unlock(&vmci_host_dev->lock);
+	return retval;
+}
+
+static int vmci_host_do_send_datagram(struct vmci_host_dev *vmci_host_dev,
+				      const char *ioctl_name,
+				      void __user *uptr)
+{
+	struct vmci_datagram_snd_rcv_info send_info;
+	struct vmci_datagram *dg = NULL;
+	u32 cid;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&send_info, uptr, sizeof(send_info)))
+		return -EFAULT;
+
+	if (send_info.len > VMCI_MAX_DG_SIZE) {
+		vmci_ioctl_err("datagram is too big (size=%d)\n",
+			       send_info.len);
+		return -EINVAL;
+	}
+
+	if (send_info.len < sizeof(*dg)) {
+		vmci_ioctl_err("datagram is too small (size=%d)\n",
+			       send_info.len);
+		return -EINVAL;
+	}
+
+	dg = memdup_user((void __user *)(uintptr_t)send_info.addr,
+			 send_info.len);
+	if (IS_ERR(dg)) {
+		vmci_ioctl_err(
+			"cannot allocate memory to dispatch datagram\n");
+		return PTR_ERR(dg);
+	}
+
+	if (VMCI_DG_SIZE(dg) != send_info.len) {
+		vmci_ioctl_err("datagram size mismatch\n");
+		kfree(dg);
+		return -EINVAL;
+	}
+
+	pr_devel("Datagram dst (handle=0x%x:0x%x) src (handle=0x%x:0x%x), payload (size=%llu bytes)\n",
+		 dg->dst.context, dg->dst.resource,
+		 dg->src.context, dg->src.resource,
+		 (unsigned long long)dg->payload_size);
+
+	/* Get source context id. */
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+	send_info.result = vmci_datagram_dispatch(cid, dg, true);
+	kfree(dg);
+
+	return copy_to_user(uptr, &send_info, sizeof(send_info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_receive_datagram(struct vmci_host_dev *vmci_host_dev,
+					 const char *ioctl_name,
+					 void __user *uptr)
+{
+	struct vmci_datagram_snd_rcv_info recv_info;
+	struct vmci_datagram *dg = NULL;
+	int retval;
+	size_t size;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&recv_info, uptr, sizeof(recv_info)))
+		return -EFAULT;
+
+	size = recv_info.len;
+	recv_info.result = vmci_ctx_dequeue_datagram(vmci_host_dev->context,
+						     &size, &dg);
+
+	if (recv_info.result >= VMCI_SUCCESS) {
+		void __user *ubuf = (void __user *)(uintptr_t)recv_info.addr;
+		retval = copy_to_user(ubuf, dg, VMCI_DG_SIZE(dg));
+		kfree(dg);
+		if (retval != 0)
+			return -EFAULT;
+	}
+
+	return copy_to_user(uptr, &recv_info, sizeof(recv_info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_alloc_queuepair(struct vmci_host_dev *vmci_host_dev,
+					const char *ioctl_name,
+					void __user *uptr)
+{
+	struct vmci_handle handle;
+	int vmci_status;
+	int __user *retptr;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
+		struct vmci_qp_alloc_info_vmvm alloc_info;
+		struct vmci_qp_alloc_info_vmvm __user *info = uptr;
+
+		if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info)))
+			return -EFAULT;
+
+		handle = alloc_info.handle;
+		retptr = &info->result;
+
+		vmci_status = vmci_qp_broker_alloc(alloc_info.handle,
+						alloc_info.peer,
+						alloc_info.flags,
+						VMCI_NO_PRIVILEGE_FLAGS,
+						alloc_info.produce_size,
+						alloc_info.consume_size,
+						NULL,
+						vmci_host_dev->context);
+
+		if (vmci_status == VMCI_SUCCESS)
+			vmci_status = VMCI_SUCCESS_QUEUEPAIR_CREATE;
+	} else {
+		struct vmci_qp_alloc_info alloc_info;
+		struct vmci_qp_alloc_info __user *info = uptr;
+		struct vmci_qp_page_store page_store;
+
+		if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info)))
+			return -EFAULT;
+
+		handle = alloc_info.handle;
+		retptr = &info->result;
+
+		page_store.pages = alloc_info.ppn_va;
+		page_store.len = alloc_info.num_ppns;
+
+		vmci_status = vmci_qp_broker_alloc(alloc_info.handle,
+						alloc_info.peer,
+						alloc_info.flags,
+						VMCI_NO_PRIVILEGE_FLAGS,
+						alloc_info.produce_size,
+						alloc_info.consume_size,
+						&page_store,
+						vmci_host_dev->context);
+	}
+
+	if (put_user(vmci_status, retptr)) {
+		if (vmci_status >= VMCI_SUCCESS) {
+			vmci_status = vmci_qp_broker_detach(handle,
+							vmci_host_dev->context);
+		}
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int vmci_host_do_queuepair_setva(struct vmci_host_dev *vmci_host_dev,
+					const char *ioctl_name,
+					void __user *uptr)
+{
+	struct vmci_qp_set_va_info set_va_info;
+	struct vmci_qp_set_va_info __user *info = uptr;
+	s32 result;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
+		vmci_ioctl_err("is not allowed\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&set_va_info, uptr, sizeof(set_va_info)))
+		return -EFAULT;
+
+	if (set_va_info.va) {
+		/*
+		 * VMX is passing down a new VA for the queue
+		 * pair mapping.
+		 */
+		result = vmci_qp_broker_map(set_va_info.handle,
+					    vmci_host_dev->context,
+					    set_va_info.va);
+	} else {
+		/*
+		 * The queue pair is about to be unmapped by
+		 * the VMX.
+		 */
+		result = vmci_qp_broker_unmap(set_va_info.handle,
+					 vmci_host_dev->context, 0);
+	}
+
+	return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_queuepair_setpf(struct vmci_host_dev *vmci_host_dev,
+					const char *ioctl_name,
+					void __user *uptr)
+{
+	struct vmci_qp_page_file_info page_file_info;
+	struct vmci_qp_page_file_info __user *info = uptr;
+	s32 result;
+
+	if (vmci_host_dev->user_version < VMCI_VERSION_HOSTQP ||
+	    vmci_host_dev->user_version >= VMCI_VERSION_NOVMVM) {
+		vmci_ioctl_err("not supported on this VMX (version=%d)\n",
+			       vmci_host_dev->user_version);
+		return -EINVAL;
+	}
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&page_file_info, uptr, sizeof(*info)))
+		return -EFAULT;
+
+	/*
+	 * Communicate success pre-emptively to the caller.  Note that the
+	 * basic premise is that it is incumbent upon the caller not to look at
+	 * the info.result field until after the ioctl() returns.  And then,
+	 * only if the ioctl() result indicates no error.  We send up the
+	 * SUCCESS status before calling SetPageStore() store because failing
+	 * to copy up the result code means unwinding the SetPageStore().
+	 *
+	 * It turns out the logic to unwind a SetPageStore() opens a can of
+	 * worms.  For example, if a host had created the queue_pair and a
+	 * guest attaches and SetPageStore() is successful but writing success
+	 * fails, then ... the host has to be stopped from writing (anymore)
+	 * data into the queue_pair.  That means an additional test in the
+	 * VMCI_Enqueue() code path.  Ugh.
+	 */
+
+	if (put_user(VMCI_SUCCESS, &info->result)) {
+		/*
+		 * In this case, we can't write a result field of the
+		 * caller's info block.  So, we don't even try to
+		 * SetPageStore().
+		 */
+		return -EFAULT;
+	}
+
+	result = vmci_qp_broker_set_page_store(page_file_info.handle,
+						page_file_info.produce_va,
+						page_file_info.consume_va,
+						vmci_host_dev->context);
+	if (result < VMCI_SUCCESS) {
+		if (put_user(result, &info->result)) {
+			/*
+			 * Note that in this case the SetPageStore()
+			 * call failed but we were unable to
+			 * communicate that to the caller (because the
+			 * copy_to_user() call failed).  So, if we
+			 * simply return an error (in this case
+			 * -EFAULT) then the caller will know that the
+			 *  SetPageStore failed even though we couldn't
+			 *  put the result code in the result field and
+			 *  indicate exactly why it failed.
+			 *
+			 * That says nothing about the issue where we
+			 * were once able to write to the caller's info
+			 * memory and now can't.  Something more
+			 * serious is probably going on than the fact
+			 * that SetPageStore() didn't work.
+			 */
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static int vmci_host_do_qp_detach(struct vmci_host_dev *vmci_host_dev,
+				  const char *ioctl_name,
+				  void __user *uptr)
+{
+	struct vmci_qp_dtch_info detach_info;
+	struct vmci_qp_dtch_info __user *info = uptr;
+	s32 result;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&detach_info, uptr, sizeof(detach_info)))
+		return -EFAULT;
+
+	result = vmci_qp_broker_detach(detach_info.handle,
+				       vmci_host_dev->context);
+	if (result == VMCI_SUCCESS &&
+	    vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
+		result = VMCI_SUCCESS_LAST_DETACH;
+	}
+
+	return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_add_notify(struct vmci_host_dev *vmci_host_dev,
+				       const char *ioctl_name,
+				       void __user *uptr)
+{
+	struct vmci_ctx_info ar_info;
+	struct vmci_ctx_info __user *info = uptr;
+	s32 result;
+	u32 cid;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&ar_info, uptr, sizeof(ar_info)))
+		return -EFAULT;
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+	result = vmci_ctx_add_notification(cid, ar_info.remote_cid);
+
+	return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_remove_notify(struct vmci_host_dev *vmci_host_dev,
+					  const char *ioctl_name,
+					  void __user *uptr)
+{
+	struct vmci_ctx_info ar_info;
+	struct vmci_ctx_info __user *info = uptr;
+	u32 cid;
+	int result;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&ar_info, uptr, sizeof(ar_info)))
+		return -EFAULT;
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+	result = vmci_ctx_remove_notification(cid,
+					      ar_info.remote_cid);
+
+	return put_user(result, &info->result) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_get_cpt_state(struct vmci_host_dev *vmci_host_dev,
+					  const char *ioctl_name,
+					  void __user *uptr)
+{
+	struct vmci_ctx_chkpt_buf_info get_info;
+	u32 cid;
+	void *cpt_buf;
+	int retval;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&get_info, uptr, sizeof(get_info)))
+		return -EFAULT;
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+	get_info.result = vmci_ctx_get_chkpt_state(cid, get_info.cpt_type,
+						&get_info.buf_size, &cpt_buf);
+	if (get_info.result == VMCI_SUCCESS && get_info.buf_size) {
+		void __user *ubuf = (void __user *)(uintptr_t)get_info.cpt_buf;
+		retval = copy_to_user(ubuf, cpt_buf, get_info.buf_size);
+		kfree(cpt_buf);
+
+		if (retval)
+			return -EFAULT;
+	}
+
+	return copy_to_user(uptr, &get_info, sizeof(get_info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_ctx_set_cpt_state(struct vmci_host_dev *vmci_host_dev,
+					  const char *ioctl_name,
+					  void __user *uptr)
+{
+	struct vmci_ctx_chkpt_buf_info set_info;
+	u32 cid;
+	void *cpt_buf;
+	int retval;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&set_info, uptr, sizeof(set_info)))
+		return -EFAULT;
+
+	cpt_buf = memdup_user((void __user *)(uintptr_t)set_info.cpt_buf,
+				set_info.buf_size);
+	if (IS_ERR(cpt_buf))
+		return PTR_ERR(cpt_buf);
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+	set_info.result = vmci_ctx_set_chkpt_state(cid, set_info.cpt_type,
+						   set_info.buf_size, cpt_buf);
+
+	retval = copy_to_user(uptr, &set_info, sizeof(set_info)) ? -EFAULT : 0;
+
+	kfree(cpt_buf);
+	return retval;
+}
+
+static int vmci_host_do_get_context_id(struct vmci_host_dev *vmci_host_dev,
+				       const char *ioctl_name,
+				       void __user *uptr)
+{
+	u32 __user *u32ptr = uptr;
+
+	return put_user(VMCI_HOST_CONTEXT_ID, u32ptr) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_set_notify(struct vmci_host_dev *vmci_host_dev,
+				   const char *ioctl_name,
+				   void __user *uptr)
+{
+	struct vmci_set_notify_info notify_info;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&notify_info, uptr, sizeof(notify_info)))
+		return -EFAULT;
+
+	if (notify_info.notify_uva) {
+		notify_info.result =
+			vmci_host_setup_notify(vmci_host_dev->context,
+					       notify_info.notify_uva);
+	} else {
+		vmci_ctx_unset_notify(vmci_host_dev->context);
+		notify_info.result = VMCI_SUCCESS;
+	}
+
+	return copy_to_user(uptr, &notify_info, sizeof(notify_info)) ?
+		-EFAULT : 0;
+}
+
+static int vmci_host_do_notify_resource(struct vmci_host_dev *vmci_host_dev,
+					const char *ioctl_name,
+					void __user *uptr)
+{
+	struct vmci_dbell_notify_resource_info info;
+	u32 cid;
+
+	if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) {
+		vmci_ioctl_err("invalid for current VMX versions\n");
+		return -EINVAL;
+	}
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&info, uptr, sizeof(info)))
+		return -EFAULT;
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+
+	switch (info.action) {
+	case VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY:
+		if (info.resource == VMCI_NOTIFY_RESOURCE_DOOR_BELL) {
+			u32 flags = VMCI_NO_PRIVILEGE_FLAGS;
+			info.result = vmci_ctx_notify_dbell(cid, info.handle,
+							    flags);
+		} else {
+			info.result = VMCI_ERROR_UNAVAILABLE;
+		}
+		break;
+
+	case VMCI_NOTIFY_RESOURCE_ACTION_CREATE:
+		info.result = vmci_ctx_dbell_create(cid, info.handle);
+		break;
+
+	case VMCI_NOTIFY_RESOURCE_ACTION_DESTROY:
+		info.result = vmci_ctx_dbell_destroy(cid, info.handle);
+		break;
+
+	default:
+		vmci_ioctl_err("got unknown action (action=%d)\n",
+			       info.action);
+		info.result = VMCI_ERROR_INVALID_ARGS;
+	}
+
+	return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0;
+}
+
+static int vmci_host_do_recv_notifications(struct vmci_host_dev *vmci_host_dev,
+					   const char *ioctl_name,
+					   void __user *uptr)
+{
+	struct vmci_ctx_notify_recv_info info;
+	struct vmci_handle_arr *db_handle_array;
+	struct vmci_handle_arr *qp_handle_array;
+	void __user *ubuf;
+	u32 cid;
+	int retval = 0;
+
+	if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+		vmci_ioctl_err("only valid for contexts\n");
+		return -EINVAL;
+	}
+
+	if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) {
+		vmci_ioctl_err("not supported for the current vmx version\n");
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&info, uptr, sizeof(info)))
+		return -EFAULT;
+
+	if ((info.db_handle_buf_size && !info.db_handle_buf_uva) ||
+	    (info.qp_handle_buf_size && !info.qp_handle_buf_uva)) {
+		return -EINVAL;
+	}
+
+	cid = vmci_ctx_get_id(vmci_host_dev->context);
+
+	info.result = vmci_ctx_rcv_notifications_get(cid,
+				&db_handle_array, &qp_handle_array);
+	if (info.result != VMCI_SUCCESS)
+		return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0;
+
+	ubuf = (void __user *)(uintptr_t)info.db_handle_buf_uva;
+	info.result = drv_cp_harray_to_user(ubuf, &info.db_handle_buf_size,
+					    db_handle_array, &retval);
+	if (info.result == VMCI_SUCCESS && !retval) {
+		ubuf = (void __user *)(uintptr_t)info.qp_handle_buf_uva;
+		info.result = drv_cp_harray_to_user(ubuf,
+						    &info.qp_handle_buf_size,
+						    qp_handle_array, &retval);
+	}
+
+	if (!retval && copy_to_user(uptr, &info, sizeof(info)))
+		retval = -EFAULT;
+
+	vmci_ctx_rcv_notifications_release(cid,
+				db_handle_array, qp_handle_array,
+				info.result == VMCI_SUCCESS && !retval);
+
+	return retval;
+}
+
+static long vmci_host_unlocked_ioctl(struct file *filp,
+				     unsigned int iocmd, unsigned long ioarg)
+{
+#define VMCI_DO_IOCTL(ioctl_name, ioctl_fn) do {			\
+		char *name = "IOCTL_VMCI_" # ioctl_name;		\
+		return vmci_host_do_ ## ioctl_fn(			\
+			vmci_host_dev, name, uptr);			\
+	} while (0)
+
+	struct vmci_host_dev *vmci_host_dev = filp->private_data;
+	void __user *uptr = (void __user *)ioarg;
+
+	switch (iocmd) {
+	case IOCTL_VMCI_INIT_CONTEXT:
+		VMCI_DO_IOCTL(INIT_CONTEXT, init_context);
+	case IOCTL_VMCI_DATAGRAM_SEND:
+		VMCI_DO_IOCTL(DATAGRAM_SEND, send_datagram);
+	case IOCTL_VMCI_DATAGRAM_RECEIVE:
+		VMCI_DO_IOCTL(DATAGRAM_RECEIVE, receive_datagram);
+	case IOCTL_VMCI_QUEUEPAIR_ALLOC:
+		VMCI_DO_IOCTL(QUEUEPAIR_ALLOC, alloc_queuepair);
+	case IOCTL_VMCI_QUEUEPAIR_SETVA:
+		VMCI_DO_IOCTL(QUEUEPAIR_SETVA, queuepair_setva);
+	case IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE:
+		VMCI_DO_IOCTL(QUEUEPAIR_SETPAGEFILE, queuepair_setpf);
+	case IOCTL_VMCI_QUEUEPAIR_DETACH:
+		VMCI_DO_IOCTL(QUEUEPAIR_DETACH, qp_detach);
+	case IOCTL_VMCI_CTX_ADD_NOTIFICATION:
+		VMCI_DO_IOCTL(CTX_ADD_NOTIFICATION, ctx_add_notify);
+	case IOCTL_VMCI_CTX_REMOVE_NOTIFICATION:
+		VMCI_DO_IOCTL(CTX_REMOVE_NOTIFICATION, ctx_remove_notify);
+	case IOCTL_VMCI_CTX_GET_CPT_STATE:
+		VMCI_DO_IOCTL(CTX_GET_CPT_STATE, ctx_get_cpt_state);
+	case IOCTL_VMCI_CTX_SET_CPT_STATE:
+		VMCI_DO_IOCTL(CTX_SET_CPT_STATE, ctx_set_cpt_state);
+	case IOCTL_VMCI_GET_CONTEXT_ID:
+		VMCI_DO_IOCTL(GET_CONTEXT_ID, get_context_id);
+	case IOCTL_VMCI_SET_NOTIFY:
+		VMCI_DO_IOCTL(SET_NOTIFY, set_notify);
+	case IOCTL_VMCI_NOTIFY_RESOURCE:
+		VMCI_DO_IOCTL(NOTIFY_RESOURCE, notify_resource);
+	case IOCTL_VMCI_NOTIFICATIONS_RECEIVE:
+		VMCI_DO_IOCTL(NOTIFICATIONS_RECEIVE, recv_notifications);
+
+	case IOCTL_VMCI_VERSION:
+	case IOCTL_VMCI_VERSION2:
+		return vmci_host_get_version(vmci_host_dev, iocmd, uptr);
+
+	default:
+		pr_devel("%s: Unknown ioctl (iocmd=%d)\n", __func__, iocmd);
+		return -EINVAL;
+	}
+
+#undef VMCI_DO_IOCTL
+}
+
+static const struct file_operations vmuser_fops = {
+	.owner		= THIS_MODULE,
+	.open		= vmci_host_open,
+	.release	= vmci_host_close,
+	.poll		= vmci_host_poll,
+	.unlocked_ioctl	= vmci_host_unlocked_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+};
+
+static struct miscdevice vmci_host_miscdev = {
+	 .name = "vmci",
+	 .minor = MISC_DYNAMIC_MINOR,
+	 .fops = &vmuser_fops,
+};
+
+int __init vmci_host_init(void)
+{
+	int error;
+
+	host_context = vmci_ctx_create(VMCI_HOST_CONTEXT_ID,
+					VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
+					-1, VMCI_VERSION, NULL);
+	if (IS_ERR(host_context)) {
+		error = PTR_ERR(host_context);
+		pr_warn("Failed to initialize VMCIContext (error%d)\n",
+			error);
+		return error;
+	}
+
+	error = misc_register(&vmci_host_miscdev);
+	if (error) {
+		pr_warn("Module registration error (name=%s, major=%d, minor=%d, err=%d)\n",
+			vmci_host_miscdev.name,
+			MISC_MAJOR, vmci_host_miscdev.minor,
+			error);
+		pr_warn("Unable to initialize host personality\n");
+		vmci_ctx_destroy(host_context);
+		return error;
+	}
+
+	pr_info("VMCI host device registered (name=%s, major=%d, minor=%d)\n",
+		vmci_host_miscdev.name, MISC_MAJOR, vmci_host_miscdev.minor);
+
+	vmci_host_device_initialized = true;
+	return 0;
+}
+
+void __exit vmci_host_exit(void)
+{
+	vmci_host_device_initialized = false;
+
+	misc_deregister(&vmci_host_miscdev);
+	vmci_ctx_destroy(host_context);
+	vmci_qp_broker_exit();
+
+	pr_debug("VMCI host driver module unloaded\n");
+}
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
new file mode 100644
index 0000000000..73d71c4ec1
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -0,0 +1,3279 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pagemap.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/wait.h>
+#include <linux/vmalloc.h>
+#include <linux/skbuff.h>
+
+#include "vmci_handle_array.h"
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+#include "vmci_route.h"
+
+/*
+ * In the following, we will distinguish between two kinds of VMX processes -
+ * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
+ * VMCI page files in the VMX and supporting VM to VM communication and the
+ * newer ones that use the guest memory directly. We will in the following
+ * refer to the older VMX versions as old-style VMX'en, and the newer ones as
+ * new-style VMX'en.
+ *
+ * The state transition datagram is as follows (the VMCIQPB_ prefix has been
+ * removed for readability) - see below for more details on the transtions:
+ *
+ *            --------------  NEW  -------------
+ *            |                                |
+ *           \_/                              \_/
+ *     CREATED_NO_MEM <-----------------> CREATED_MEM
+ *            |    |                           |
+ *            |    o-----------------------o   |
+ *            |                            |   |
+ *           \_/                          \_/ \_/
+ *     ATTACHED_NO_MEM <----------------> ATTACHED_MEM
+ *            |                            |   |
+ *            |     o----------------------o   |
+ *            |     |                          |
+ *           \_/   \_/                        \_/
+ *     SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
+ *            |                                |
+ *            |                                |
+ *            -------------> gone <-------------
+ *
+ * In more detail. When a VMCI queue pair is first created, it will be in the
+ * VMCIQPB_NEW state. It will then move into one of the following states:
+ *
+ * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
+ *
+ *     - the created was performed by a host endpoint, in which case there is
+ *       no backing memory yet.
+ *
+ *     - the create was initiated by an old-style VMX, that uses
+ *       vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
+ *       a later point in time. This state can be distinguished from the one
+ *       above by the context ID of the creator. A host side is not allowed to
+ *       attach until the page store has been set.
+ *
+ * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
+ *     is created by a VMX using the queue pair device backend that
+ *     sets the UVAs of the queue pair immediately and stores the
+ *     information for later attachers. At this point, it is ready for
+ *     the host side to attach to it.
+ *
+ * Once the queue pair is in one of the created states (with the exception of
+ * the case mentioned for older VMX'en above), it is possible to attach to the
+ * queue pair. Again we have two new states possible:
+ *
+ * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
+ *   paths:
+ *
+ *     - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
+ *       pair, and attaches to a queue pair previously created by the host side.
+ *
+ *     - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
+ *       already created by a guest.
+ *
+ *     - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
+ *       vmci_qp_broker_set_page_store (see below).
+ *
+ * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
+ *     VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
+ *     bring the queue pair into this state. Once vmci_qp_broker_set_page_store
+ *     is called to register the user memory, the VMCIQPB_ATTACH_MEM state
+ *     will be entered.
+ *
+ * From the attached queue pair, the queue pair can enter the shutdown states
+ * when either side of the queue pair detaches. If the guest side detaches
+ * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
+ * the content of the queue pair will no longer be available. If the host
+ * side detaches first, the queue pair will either enter the
+ * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
+ * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
+ * (e.g., the host detaches while a guest is stunned).
+ *
+ * New-style VMX'en will also unmap guest memory, if the guest is
+ * quiesced, e.g., during a snapshot operation. In that case, the guest
+ * memory will no longer be available, and the queue pair will transition from
+ * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
+ * in which case the queue pair will transition from the *_NO_MEM state at that
+ * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
+ * since the peer may have either attached or detached in the meantime. The
+ * values are laid out such that ++ on a state will move from a *_NO_MEM to a
+ * *_MEM state, and vice versa.
+ */
+
+/* The Kernel specific component of the struct vmci_queue structure. */
+struct vmci_queue_kern_if {
+	struct mutex __mutex;	/* Protects the queue. */
+	struct mutex *mutex;	/* Shared by producer and consumer queues. */
+	size_t num_pages;	/* Number of pages incl. header. */
+	bool host;		/* Host or guest? */
+	union {
+		struct {
+			dma_addr_t *pas;
+			void **vas;
+		} g;		/* Used by the guest. */
+		struct {
+			struct page **page;
+			struct page **header_page;
+		} h;		/* Used by the host. */
+	} u;
+};
+
+/*
+ * This structure is opaque to the clients.
+ */
+struct vmci_qp {
+	struct vmci_handle handle;
+	struct vmci_queue *produce_q;
+	struct vmci_queue *consume_q;
+	u64 produce_q_size;
+	u64 consume_q_size;
+	u32 peer;
+	u32 flags;
+	u32 priv_flags;
+	bool guest_endpoint;
+	unsigned int blocked;
+	unsigned int generation;
+	wait_queue_head_t event;
+};
+
+enum qp_broker_state {
+	VMCIQPB_NEW,
+	VMCIQPB_CREATED_NO_MEM,
+	VMCIQPB_CREATED_MEM,
+	VMCIQPB_ATTACHED_NO_MEM,
+	VMCIQPB_ATTACHED_MEM,
+	VMCIQPB_SHUTDOWN_NO_MEM,
+	VMCIQPB_SHUTDOWN_MEM,
+	VMCIQPB_GONE
+};
+
+#define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
+				     _qpb->state == VMCIQPB_ATTACHED_MEM || \
+				     _qpb->state == VMCIQPB_SHUTDOWN_MEM)
+
+/*
+ * In the queue pair broker, we always use the guest point of view for
+ * the produce and consume queue values and references, e.g., the
+ * produce queue size stored is the guests produce queue size. The
+ * host endpoint will need to swap these around. The only exception is
+ * the local queue pairs on the host, in which case the host endpoint
+ * that creates the queue pair will have the right orientation, and
+ * the attaching host endpoint will need to swap.
+ */
+struct qp_entry {
+	struct list_head list_item;
+	struct vmci_handle handle;
+	u32 peer;
+	u32 flags;
+	u64 produce_size;
+	u64 consume_size;
+	u32 ref_count;
+};
+
+struct qp_broker_entry {
+	struct vmci_resource resource;
+	struct qp_entry qp;
+	u32 create_id;
+	u32 attach_id;
+	enum qp_broker_state state;
+	bool require_trusted_attach;
+	bool created_by_trusted;
+	bool vmci_page_files;	/* Created by VMX using VMCI page files */
+	struct vmci_queue *produce_q;
+	struct vmci_queue *consume_q;
+	struct vmci_queue_header saved_produce_q;
+	struct vmci_queue_header saved_consume_q;
+	vmci_event_release_cb wakeup_cb;
+	void *client_data;
+	void *local_mem;	/* Kernel memory for local queue pair */
+};
+
+struct qp_guest_endpoint {
+	struct vmci_resource resource;
+	struct qp_entry qp;
+	u64 num_ppns;
+	void *produce_q;
+	void *consume_q;
+	struct ppn_set ppn_set;
+};
+
+struct qp_list {
+	struct list_head head;
+	struct mutex mutex;	/* Protect queue list. */
+};
+
+static struct qp_list qp_broker_list = {
+	.head = LIST_HEAD_INIT(qp_broker_list.head),
+	.mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
+};
+
+static struct qp_list qp_guest_endpoints = {
+	.head = LIST_HEAD_INIT(qp_guest_endpoints.head),
+	.mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
+};
+
+#define INVALID_VMCI_GUEST_MEM_ID  0
+#define QPE_NUM_PAGES(_QPE) ((u32) \
+			     (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
+			      DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
+#define QP_SIZES_ARE_VALID(_prod_qsize, _cons_qsize) \
+	((_prod_qsize) + (_cons_qsize) >= max(_prod_qsize, _cons_qsize) && \
+	 (_prod_qsize) + (_cons_qsize) <= VMCI_MAX_GUEST_QP_MEMORY)
+
+/*
+ * Frees kernel VA space for a given queue and its queue header, and
+ * frees physical data pages.
+ */
+static void qp_free_queue(void *q, u64 size)
+{
+	struct vmci_queue *queue = q;
+
+	if (queue) {
+		u64 i;
+
+		/* Given size does not include header, so add in a page here. */
+		for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE) + 1; i++) {
+			dma_free_coherent(&vmci_pdev->dev, PAGE_SIZE,
+					  queue->kernel_if->u.g.vas[i],
+					  queue->kernel_if->u.g.pas[i]);
+		}
+
+		vfree(queue);
+	}
+}
+
+/*
+ * Allocates kernel queue pages of specified size with IOMMU mappings,
+ * plus space for the queue structure/kernel interface and the queue
+ * header.
+ */
+static void *qp_alloc_queue(u64 size, u32 flags)
+{
+	u64 i;
+	struct vmci_queue *queue;
+	size_t pas_size;
+	size_t vas_size;
+	size_t queue_size = sizeof(*queue) + sizeof(*queue->kernel_if);
+	u64 num_pages;
+
+	if (size > SIZE_MAX - PAGE_SIZE)
+		return NULL;
+	num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
+	if (num_pages >
+		 (SIZE_MAX - queue_size) /
+		 (sizeof(*queue->kernel_if->u.g.pas) +
+		  sizeof(*queue->kernel_if->u.g.vas)))
+		return NULL;
+
+	pas_size = num_pages * sizeof(*queue->kernel_if->u.g.pas);
+	vas_size = num_pages * sizeof(*queue->kernel_if->u.g.vas);
+	queue_size += pas_size + vas_size;
+
+	queue = vmalloc(queue_size);
+	if (!queue)
+		return NULL;
+
+	queue->q_header = NULL;
+	queue->saved_header = NULL;
+	queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
+	queue->kernel_if->mutex = NULL;
+	queue->kernel_if->num_pages = num_pages;
+	queue->kernel_if->u.g.pas = (dma_addr_t *)(queue->kernel_if + 1);
+	queue->kernel_if->u.g.vas =
+		(void **)((u8 *)queue->kernel_if->u.g.pas + pas_size);
+	queue->kernel_if->host = false;
+
+	for (i = 0; i < num_pages; i++) {
+		queue->kernel_if->u.g.vas[i] =
+			dma_alloc_coherent(&vmci_pdev->dev, PAGE_SIZE,
+					   &queue->kernel_if->u.g.pas[i],
+					   GFP_KERNEL);
+		if (!queue->kernel_if->u.g.vas[i]) {
+			/* Size excl. the header. */
+			qp_free_queue(queue, i * PAGE_SIZE);
+			return NULL;
+		}
+	}
+
+	/* Queue header is the first page. */
+	queue->q_header = queue->kernel_if->u.g.vas[0];
+
+	return queue;
+}
+
+/*
+ * Copies from a given buffer or iovector to a VMCI Queue.  Uses
+ * kmap_local_page() to dynamically map required portions of the queue
+ * by traversing the offset -> page translation structure for the queue.
+ * Assumes that offset + size does not wrap around in the queue.
+ */
+static int qp_memcpy_to_queue_iter(struct vmci_queue *queue,
+				  u64 queue_offset,
+				  struct iov_iter *from,
+				  size_t size)
+{
+	struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
+	size_t bytes_copied = 0;
+
+	while (bytes_copied < size) {
+		const u64 page_index =
+			(queue_offset + bytes_copied) / PAGE_SIZE;
+		const size_t page_offset =
+		    (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+		void *va;
+		size_t to_copy;
+
+		if (kernel_if->host)
+			va = kmap_local_page(kernel_if->u.h.page[page_index]);
+		else
+			va = kernel_if->u.g.vas[page_index + 1];
+			/* Skip header. */
+
+		if (size - bytes_copied > PAGE_SIZE - page_offset)
+			/* Enough payload to fill up from this page. */
+			to_copy = PAGE_SIZE - page_offset;
+		else
+			to_copy = size - bytes_copied;
+
+		if (!copy_from_iter_full((u8 *)va + page_offset, to_copy,
+					 from)) {
+			if (kernel_if->host)
+				kunmap_local(va);
+			return VMCI_ERROR_INVALID_ARGS;
+		}
+		bytes_copied += to_copy;
+		if (kernel_if->host)
+			kunmap_local(va);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Copies to a given buffer or iovector from a VMCI Queue.  Uses
+ * kmap_local_page() to dynamically map required portions of the queue
+ * by traversing the offset -> page translation structure for the queue.
+ * Assumes that offset + size does not wrap around in the queue.
+ */
+static int qp_memcpy_from_queue_iter(struct iov_iter *to,
+				    const struct vmci_queue *queue,
+				    u64 queue_offset, size_t size)
+{
+	struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
+	size_t bytes_copied = 0;
+
+	while (bytes_copied < size) {
+		const u64 page_index =
+			(queue_offset + bytes_copied) / PAGE_SIZE;
+		const size_t page_offset =
+		    (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
+		void *va;
+		size_t to_copy;
+		int err;
+
+		if (kernel_if->host)
+			va = kmap_local_page(kernel_if->u.h.page[page_index]);
+		else
+			va = kernel_if->u.g.vas[page_index + 1];
+			/* Skip header. */
+
+		if (size - bytes_copied > PAGE_SIZE - page_offset)
+			/* Enough payload to fill up this page. */
+			to_copy = PAGE_SIZE - page_offset;
+		else
+			to_copy = size - bytes_copied;
+
+		err = copy_to_iter((u8 *)va + page_offset, to_copy, to);
+		if (err != to_copy) {
+			if (kernel_if->host)
+				kunmap_local(va);
+			return VMCI_ERROR_INVALID_ARGS;
+		}
+		bytes_copied += to_copy;
+		if (kernel_if->host)
+			kunmap_local(va);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Allocates two list of PPNs --- one for the pages in the produce queue,
+ * and the other for the pages in the consume queue. Intializes the list
+ * of PPNs with the page frame numbers of the KVA for the two queues (and
+ * the queue headers).
+ */
+static int qp_alloc_ppn_set(void *prod_q,
+			    u64 num_produce_pages,
+			    void *cons_q,
+			    u64 num_consume_pages, struct ppn_set *ppn_set)
+{
+	u64 *produce_ppns;
+	u64 *consume_ppns;
+	struct vmci_queue *produce_q = prod_q;
+	struct vmci_queue *consume_q = cons_q;
+	u64 i;
+
+	if (!produce_q || !num_produce_pages || !consume_q ||
+	    !num_consume_pages || !ppn_set)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (ppn_set->initialized)
+		return VMCI_ERROR_ALREADY_EXISTS;
+
+	produce_ppns =
+	    kmalloc_array(num_produce_pages, sizeof(*produce_ppns),
+			  GFP_KERNEL);
+	if (!produce_ppns)
+		return VMCI_ERROR_NO_MEM;
+
+	consume_ppns =
+	    kmalloc_array(num_consume_pages, sizeof(*consume_ppns),
+			  GFP_KERNEL);
+	if (!consume_ppns) {
+		kfree(produce_ppns);
+		return VMCI_ERROR_NO_MEM;
+	}
+
+	for (i = 0; i < num_produce_pages; i++)
+		produce_ppns[i] =
+			produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
+
+	for (i = 0; i < num_consume_pages; i++)
+		consume_ppns[i] =
+			consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
+
+	ppn_set->num_produce_pages = num_produce_pages;
+	ppn_set->num_consume_pages = num_consume_pages;
+	ppn_set->produce_ppns = produce_ppns;
+	ppn_set->consume_ppns = consume_ppns;
+	ppn_set->initialized = true;
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Frees the two list of PPNs for a queue pair.
+ */
+static void qp_free_ppn_set(struct ppn_set *ppn_set)
+{
+	if (ppn_set->initialized) {
+		/* Do not call these functions on NULL inputs. */
+		kfree(ppn_set->produce_ppns);
+		kfree(ppn_set->consume_ppns);
+	}
+	memset(ppn_set, 0, sizeof(*ppn_set));
+}
+
+/*
+ * Populates the list of PPNs in the hypercall structure with the PPNS
+ * of the produce queue and the consume queue.
+ */
+static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set)
+{
+	if (vmci_use_ppn64()) {
+		memcpy(call_buf, ppn_set->produce_ppns,
+		       ppn_set->num_produce_pages *
+		       sizeof(*ppn_set->produce_ppns));
+		memcpy(call_buf +
+		       ppn_set->num_produce_pages *
+		       sizeof(*ppn_set->produce_ppns),
+		       ppn_set->consume_ppns,
+		       ppn_set->num_consume_pages *
+		       sizeof(*ppn_set->consume_ppns));
+	} else {
+		int i;
+		u32 *ppns = (u32 *) call_buf;
+
+		for (i = 0; i < ppn_set->num_produce_pages; i++)
+			ppns[i] = (u32) ppn_set->produce_ppns[i];
+
+		ppns = &ppns[ppn_set->num_produce_pages];
+
+		for (i = 0; i < ppn_set->num_consume_pages; i++)
+			ppns[i] = (u32) ppn_set->consume_ppns[i];
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Allocates kernel VA space of specified size plus space for the queue
+ * and kernel interface.  This is different from the guest queue allocator,
+ * because we do not allocate our own queue header/data pages here but
+ * share those of the guest.
+ */
+static struct vmci_queue *qp_host_alloc_queue(u64 size)
+{
+	struct vmci_queue *queue;
+	size_t queue_page_size;
+	u64 num_pages;
+	const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
+
+	if (size > min_t(size_t, VMCI_MAX_GUEST_QP_MEMORY, SIZE_MAX - PAGE_SIZE))
+		return NULL;
+	num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
+	if (num_pages > (SIZE_MAX - queue_size) /
+		 sizeof(*queue->kernel_if->u.h.page))
+		return NULL;
+
+	queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page);
+
+	if (queue_size + queue_page_size > KMALLOC_MAX_SIZE)
+		return NULL;
+
+	queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
+	if (queue) {
+		queue->q_header = NULL;
+		queue->saved_header = NULL;
+		queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
+		queue->kernel_if->host = true;
+		queue->kernel_if->mutex = NULL;
+		queue->kernel_if->num_pages = num_pages;
+		queue->kernel_if->u.h.header_page =
+		    (struct page **)((u8 *)queue + queue_size);
+		queue->kernel_if->u.h.page =
+			&queue->kernel_if->u.h.header_page[1];
+	}
+
+	return queue;
+}
+
+/*
+ * Frees kernel memory for a given queue (header plus translation
+ * structure).
+ */
+static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
+{
+	kfree(queue);
+}
+
+/*
+ * Initialize the mutex for the pair of queues.  This mutex is used to
+ * protect the q_header and the buffer from changing out from under any
+ * users of either queue.  Of course, it's only any good if the mutexes
+ * are actually acquired.  Queue structure must lie on non-paged memory
+ * or we cannot guarantee access to the mutex.
+ */
+static void qp_init_queue_mutex(struct vmci_queue *produce_q,
+				struct vmci_queue *consume_q)
+{
+	/*
+	 * Only the host queue has shared state - the guest queues do not
+	 * need to synchronize access using a queue mutex.
+	 */
+
+	if (produce_q->kernel_if->host) {
+		produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
+		consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
+		mutex_init(produce_q->kernel_if->mutex);
+	}
+}
+
+/*
+ * Cleans up the mutex for the pair of queues.
+ */
+static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
+				   struct vmci_queue *consume_q)
+{
+	if (produce_q->kernel_if->host) {
+		produce_q->kernel_if->mutex = NULL;
+		consume_q->kernel_if->mutex = NULL;
+	}
+}
+
+/*
+ * Acquire the mutex for the queue.  Note that the produce_q and
+ * the consume_q share a mutex.  So, only one of the two need to
+ * be passed in to this routine.  Either will work just fine.
+ */
+static void qp_acquire_queue_mutex(struct vmci_queue *queue)
+{
+	if (queue->kernel_if->host)
+		mutex_lock(queue->kernel_if->mutex);
+}
+
+/*
+ * Release the mutex for the queue.  Note that the produce_q and
+ * the consume_q share a mutex.  So, only one of the two need to
+ * be passed in to this routine.  Either will work just fine.
+ */
+static void qp_release_queue_mutex(struct vmci_queue *queue)
+{
+	if (queue->kernel_if->host)
+		mutex_unlock(queue->kernel_if->mutex);
+}
+
+/*
+ * Helper function to release pages in the PageStoreAttachInfo
+ * previously obtained using get_user_pages.
+ */
+static void qp_release_pages(struct page **pages,
+			     u64 num_pages, bool dirty)
+{
+	int i;
+
+	for (i = 0; i < num_pages; i++) {
+		if (dirty)
+			set_page_dirty_lock(pages[i]);
+
+		put_page(pages[i]);
+		pages[i] = NULL;
+	}
+}
+
+/*
+ * Lock the user pages referenced by the {produce,consume}Buffer
+ * struct into memory and populate the {produce,consume}Pages
+ * arrays in the attach structure with them.
+ */
+static int qp_host_get_user_memory(u64 produce_uva,
+				   u64 consume_uva,
+				   struct vmci_queue *produce_q,
+				   struct vmci_queue *consume_q)
+{
+	int retval;
+	int err = VMCI_SUCCESS;
+
+	retval = get_user_pages_fast((uintptr_t) produce_uva,
+				     produce_q->kernel_if->num_pages,
+				     FOLL_WRITE,
+				     produce_q->kernel_if->u.h.header_page);
+	if (retval < (int)produce_q->kernel_if->num_pages) {
+		pr_debug("get_user_pages_fast(produce) failed (retval=%d)",
+			retval);
+		if (retval > 0)
+			qp_release_pages(produce_q->kernel_if->u.h.header_page,
+					retval, false);
+		err = VMCI_ERROR_NO_MEM;
+		goto out;
+	}
+
+	retval = get_user_pages_fast((uintptr_t) consume_uva,
+				     consume_q->kernel_if->num_pages,
+				     FOLL_WRITE,
+				     consume_q->kernel_if->u.h.header_page);
+	if (retval < (int)consume_q->kernel_if->num_pages) {
+		pr_debug("get_user_pages_fast(consume) failed (retval=%d)",
+			retval);
+		if (retval > 0)
+			qp_release_pages(consume_q->kernel_if->u.h.header_page,
+					retval, false);
+		qp_release_pages(produce_q->kernel_if->u.h.header_page,
+				 produce_q->kernel_if->num_pages, false);
+		err = VMCI_ERROR_NO_MEM;
+	}
+
+ out:
+	return err;
+}
+
+/*
+ * Registers the specification of the user pages used for backing a queue
+ * pair. Enough information to map in pages is stored in the OS specific
+ * part of the struct vmci_queue structure.
+ */
+static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
+					struct vmci_queue *produce_q,
+					struct vmci_queue *consume_q)
+{
+	u64 produce_uva;
+	u64 consume_uva;
+
+	/*
+	 * The new style and the old style mapping only differs in
+	 * that we either get a single or two UVAs, so we split the
+	 * single UVA range at the appropriate spot.
+	 */
+	produce_uva = page_store->pages;
+	consume_uva = page_store->pages +
+	    produce_q->kernel_if->num_pages * PAGE_SIZE;
+	return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
+				       consume_q);
+}
+
+/*
+ * Releases and removes the references to user pages stored in the attach
+ * struct.  Pages are released from the page cache and may become
+ * swappable again.
+ */
+static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
+					   struct vmci_queue *consume_q)
+{
+	qp_release_pages(produce_q->kernel_if->u.h.header_page,
+			 produce_q->kernel_if->num_pages, true);
+	memset(produce_q->kernel_if->u.h.header_page, 0,
+	       sizeof(*produce_q->kernel_if->u.h.header_page) *
+	       produce_q->kernel_if->num_pages);
+	qp_release_pages(consume_q->kernel_if->u.h.header_page,
+			 consume_q->kernel_if->num_pages, true);
+	memset(consume_q->kernel_if->u.h.header_page, 0,
+	       sizeof(*consume_q->kernel_if->u.h.header_page) *
+	       consume_q->kernel_if->num_pages);
+}
+
+/*
+ * Once qp_host_register_user_memory has been performed on a
+ * queue, the queue pair headers can be mapped into the
+ * kernel. Once mapped, they must be unmapped with
+ * qp_host_unmap_queues prior to calling
+ * qp_host_unregister_user_memory.
+ * Pages are pinned.
+ */
+static int qp_host_map_queues(struct vmci_queue *produce_q,
+			      struct vmci_queue *consume_q)
+{
+	int result;
+
+	if (!produce_q->q_header || !consume_q->q_header) {
+		struct page *headers[2];
+
+		if (produce_q->q_header != consume_q->q_header)
+			return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+
+		if (produce_q->kernel_if->u.h.header_page == NULL ||
+		    *produce_q->kernel_if->u.h.header_page == NULL)
+			return VMCI_ERROR_UNAVAILABLE;
+
+		headers[0] = *produce_q->kernel_if->u.h.header_page;
+		headers[1] = *consume_q->kernel_if->u.h.header_page;
+
+		produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
+		if (produce_q->q_header != NULL) {
+			consume_q->q_header =
+			    (struct vmci_queue_header *)((u8 *)
+							 produce_q->q_header +
+							 PAGE_SIZE);
+			result = VMCI_SUCCESS;
+		} else {
+			pr_warn("vmap failed\n");
+			result = VMCI_ERROR_NO_MEM;
+		}
+	} else {
+		result = VMCI_SUCCESS;
+	}
+
+	return result;
+}
+
+/*
+ * Unmaps previously mapped queue pair headers from the kernel.
+ * Pages are unpinned.
+ */
+static int qp_host_unmap_queues(u32 gid,
+				struct vmci_queue *produce_q,
+				struct vmci_queue *consume_q)
+{
+	if (produce_q->q_header) {
+		if (produce_q->q_header < consume_q->q_header)
+			vunmap(produce_q->q_header);
+		else
+			vunmap(consume_q->q_header);
+
+		produce_q->q_header = NULL;
+		consume_q->q_header = NULL;
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle. Assumes
+ * that the list is locked.
+ */
+static struct qp_entry *qp_list_find(struct qp_list *qp_list,
+				     struct vmci_handle handle)
+{
+	struct qp_entry *entry;
+
+	if (vmci_handle_is_invalid(handle))
+		return NULL;
+
+	list_for_each_entry(entry, &qp_list->head, list_item) {
+		if (vmci_handle_is_equal(entry->handle, handle))
+			return entry;
+	}
+
+	return NULL;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle.
+ */
+static struct qp_guest_endpoint *
+qp_guest_handle_to_entry(struct vmci_handle handle)
+{
+	struct qp_guest_endpoint *entry;
+	struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
+
+	entry = qp ? container_of(
+		qp, struct qp_guest_endpoint, qp) : NULL;
+	return entry;
+}
+
+/*
+ * Finds the entry in the list corresponding to a given handle.
+ */
+static struct qp_broker_entry *
+qp_broker_handle_to_entry(struct vmci_handle handle)
+{
+	struct qp_broker_entry *entry;
+	struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
+
+	entry = qp ? container_of(
+		qp, struct qp_broker_entry, qp) : NULL;
+	return entry;
+}
+
+/*
+ * Dispatches a queue pair event message directly into the local event
+ * queue.
+ */
+static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
+{
+	u32 context_id = vmci_get_context_id();
+	struct vmci_event_qp ev;
+
+	memset(&ev, 0, sizeof(ev));
+	ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
+	ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					  VMCI_CONTEXT_RESOURCE_ID);
+	ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+	ev.msg.event_data.event =
+	    attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
+	ev.payload.peer_id = context_id;
+	ev.payload.handle = handle;
+
+	return vmci_event_dispatch(&ev.msg.hdr);
+}
+
+/*
+ * Allocates and initializes a qp_guest_endpoint structure.
+ * Allocates a queue_pair rid (and handle) iff the given entry has
+ * an invalid handle.  0 through VMCI_RESERVED_RESOURCE_ID_MAX
+ * are reserved handles.  Assumes that the QP list mutex is held
+ * by the caller.
+ */
+static struct qp_guest_endpoint *
+qp_guest_endpoint_create(struct vmci_handle handle,
+			 u32 peer,
+			 u32 flags,
+			 u64 produce_size,
+			 u64 consume_size,
+			 void *produce_q,
+			 void *consume_q)
+{
+	int result;
+	struct qp_guest_endpoint *entry;
+	/* One page each for the queue headers. */
+	const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) +
+	    DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2;
+
+	if (vmci_handle_is_invalid(handle)) {
+		u32 context_id = vmci_get_context_id();
+
+		handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
+	}
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (entry) {
+		entry->qp.peer = peer;
+		entry->qp.flags = flags;
+		entry->qp.produce_size = produce_size;
+		entry->qp.consume_size = consume_size;
+		entry->qp.ref_count = 0;
+		entry->num_ppns = num_ppns;
+		entry->produce_q = produce_q;
+		entry->consume_q = consume_q;
+		INIT_LIST_HEAD(&entry->qp.list_item);
+
+		/* Add resource obj */
+		result = vmci_resource_add(&entry->resource,
+					   VMCI_RESOURCE_TYPE_QPAIR_GUEST,
+					   handle);
+		entry->qp.handle = vmci_resource_handle(&entry->resource);
+		if ((result != VMCI_SUCCESS) ||
+		    qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
+			pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
+				handle.context, handle.resource, result);
+			kfree(entry);
+			entry = NULL;
+		}
+	}
+	return entry;
+}
+
+/*
+ * Frees a qp_guest_endpoint structure.
+ */
+static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
+{
+	qp_free_ppn_set(&entry->ppn_set);
+	qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
+	qp_free_queue(entry->produce_q, entry->qp.produce_size);
+	qp_free_queue(entry->consume_q, entry->qp.consume_size);
+	/* Unlink from resource hash table and free callback */
+	vmci_resource_remove(&entry->resource);
+
+	kfree(entry);
+}
+
+/*
+ * Helper to make a queue_pairAlloc hypercall when the driver is
+ * supporting a guest device.
+ */
+static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
+{
+	struct vmci_qp_alloc_msg *alloc_msg;
+	size_t msg_size;
+	size_t ppn_size;
+	int result;
+
+	if (!entry || entry->num_ppns <= 2)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	ppn_size = vmci_use_ppn64() ? sizeof(u64) : sizeof(u32);
+	msg_size = sizeof(*alloc_msg) +
+	    (size_t) entry->num_ppns * ppn_size;
+	alloc_msg = kmalloc(msg_size, GFP_KERNEL);
+	if (!alloc_msg)
+		return VMCI_ERROR_NO_MEM;
+
+	alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					      VMCI_QUEUEPAIR_ALLOC);
+	alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
+	alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
+	alloc_msg->handle = entry->qp.handle;
+	alloc_msg->peer = entry->qp.peer;
+	alloc_msg->flags = entry->qp.flags;
+	alloc_msg->produce_size = entry->qp.produce_size;
+	alloc_msg->consume_size = entry->qp.consume_size;
+	alloc_msg->num_ppns = entry->num_ppns;
+
+	result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg),
+				     &entry->ppn_set);
+	if (result == VMCI_SUCCESS)
+		result = vmci_send_datagram(&alloc_msg->hdr);
+
+	kfree(alloc_msg);
+
+	return result;
+}
+
+/*
+ * Helper to make a queue_pairDetach hypercall when the driver is
+ * supporting a guest device.
+ */
+static int qp_detatch_hypercall(struct vmci_handle handle)
+{
+	struct vmci_qp_detach_msg detach_msg;
+
+	detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					      VMCI_QUEUEPAIR_DETACH);
+	detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
+	detach_msg.hdr.payload_size = sizeof(handle);
+	detach_msg.handle = handle;
+
+	return vmci_send_datagram(&detach_msg.hdr);
+}
+
+/*
+ * Adds the given entry to the list. Assumes that the list is locked.
+ */
+static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry)
+{
+	if (entry)
+		list_add(&entry->list_item, &qp_list->head);
+}
+
+/*
+ * Removes the given entry from the list. Assumes that the list is locked.
+ */
+static void qp_list_remove_entry(struct qp_list *qp_list,
+				 struct qp_entry *entry)
+{
+	if (entry)
+		list_del(&entry->list_item);
+}
+
+/*
+ * Helper for VMCI queue_pair detach interface. Frees the physical
+ * pages for the queue pair.
+ */
+static int qp_detatch_guest_work(struct vmci_handle handle)
+{
+	int result;
+	struct qp_guest_endpoint *entry;
+	u32 ref_count = ~0;	/* To avoid compiler warning below */
+
+	mutex_lock(&qp_guest_endpoints.mutex);
+
+	entry = qp_guest_handle_to_entry(handle);
+	if (!entry) {
+		mutex_unlock(&qp_guest_endpoints.mutex);
+		return VMCI_ERROR_NOT_FOUND;
+	}
+
+	if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+		result = VMCI_SUCCESS;
+
+		if (entry->qp.ref_count > 1) {
+			result = qp_notify_peer_local(false, handle);
+			/*
+			 * We can fail to notify a local queuepair
+			 * because we can't allocate.  We still want
+			 * to release the entry if that happens, so
+			 * don't bail out yet.
+			 */
+		}
+	} else {
+		result = qp_detatch_hypercall(handle);
+		if (result < VMCI_SUCCESS) {
+			/*
+			 * We failed to notify a non-local queuepair.
+			 * That other queuepair might still be
+			 * accessing the shared memory, so don't
+			 * release the entry yet.  It will get cleaned
+			 * up by VMCIqueue_pair_Exit() if necessary
+			 * (assuming we are going away, otherwise why
+			 * did this fail?).
+			 */
+
+			mutex_unlock(&qp_guest_endpoints.mutex);
+			return result;
+		}
+	}
+
+	/*
+	 * If we get here then we either failed to notify a local queuepair, or
+	 * we succeeded in all cases.  Release the entry if required.
+	 */
+
+	entry->qp.ref_count--;
+	if (entry->qp.ref_count == 0)
+		qp_list_remove_entry(&qp_guest_endpoints, &entry->qp);
+
+	/* If we didn't remove the entry, this could change once we unlock. */
+	if (entry)
+		ref_count = entry->qp.ref_count;
+
+	mutex_unlock(&qp_guest_endpoints.mutex);
+
+	if (ref_count == 0)
+		qp_guest_endpoint_destroy(entry);
+
+	return result;
+}
+
+/*
+ * This functions handles the actual allocation of a VMCI queue
+ * pair guest endpoint. Allocates physical pages for the queue
+ * pair. It makes OS dependent calls through generic wrappers.
+ */
+static int qp_alloc_guest_work(struct vmci_handle *handle,
+			       struct vmci_queue **produce_q,
+			       u64 produce_size,
+			       struct vmci_queue **consume_q,
+			       u64 consume_size,
+			       u32 peer,
+			       u32 flags,
+			       u32 priv_flags)
+{
+	const u64 num_produce_pages =
+	    DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1;
+	const u64 num_consume_pages =
+	    DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1;
+	void *my_produce_q = NULL;
+	void *my_consume_q = NULL;
+	int result;
+	struct qp_guest_endpoint *queue_pair_entry = NULL;
+
+	if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
+		return VMCI_ERROR_NO_ACCESS;
+
+	mutex_lock(&qp_guest_endpoints.mutex);
+
+	queue_pair_entry = qp_guest_handle_to_entry(*handle);
+	if (queue_pair_entry) {
+		if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+			/* Local attach case. */
+			if (queue_pair_entry->qp.ref_count > 1) {
+				pr_devel("Error attempting to attach more than once\n");
+				result = VMCI_ERROR_UNAVAILABLE;
+				goto error_keep_entry;
+			}
+
+			if (queue_pair_entry->qp.produce_size != consume_size ||
+			    queue_pair_entry->qp.consume_size !=
+			    produce_size ||
+			    queue_pair_entry->qp.flags !=
+			    (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
+				pr_devel("Error mismatched queue pair in local attach\n");
+				result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
+				goto error_keep_entry;
+			}
+
+			/*
+			 * Do a local attach.  We swap the consume and
+			 * produce queues for the attacher and deliver
+			 * an attach event.
+			 */
+			result = qp_notify_peer_local(true, *handle);
+			if (result < VMCI_SUCCESS)
+				goto error_keep_entry;
+
+			my_produce_q = queue_pair_entry->consume_q;
+			my_consume_q = queue_pair_entry->produce_q;
+			goto out;
+		}
+
+		result = VMCI_ERROR_ALREADY_EXISTS;
+		goto error_keep_entry;
+	}
+
+	my_produce_q = qp_alloc_queue(produce_size, flags);
+	if (!my_produce_q) {
+		pr_warn("Error allocating pages for produce queue\n");
+		result = VMCI_ERROR_NO_MEM;
+		goto error;
+	}
+
+	my_consume_q = qp_alloc_queue(consume_size, flags);
+	if (!my_consume_q) {
+		pr_warn("Error allocating pages for consume queue\n");
+		result = VMCI_ERROR_NO_MEM;
+		goto error;
+	}
+
+	queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
+						    produce_size, consume_size,
+						    my_produce_q, my_consume_q);
+	if (!queue_pair_entry) {
+		pr_warn("Error allocating memory in %s\n", __func__);
+		result = VMCI_ERROR_NO_MEM;
+		goto error;
+	}
+
+	result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q,
+				  num_consume_pages,
+				  &queue_pair_entry->ppn_set);
+	if (result < VMCI_SUCCESS) {
+		pr_warn("qp_alloc_ppn_set failed\n");
+		goto error;
+	}
+
+	/*
+	 * It's only necessary to notify the host if this queue pair will be
+	 * attached to from another context.
+	 */
+	if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
+		/* Local create case. */
+		u32 context_id = vmci_get_context_id();
+
+		/*
+		 * Enforce similar checks on local queue pairs as we
+		 * do for regular ones.  The handle's context must
+		 * match the creator or attacher context id (here they
+		 * are both the current context id) and the
+		 * attach-only flag cannot exist during create.  We
+		 * also ensure specified peer is this context or an
+		 * invalid one.
+		 */
+		if (queue_pair_entry->qp.handle.context != context_id ||
+		    (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
+		     queue_pair_entry->qp.peer != context_id)) {
+			result = VMCI_ERROR_NO_ACCESS;
+			goto error;
+		}
+
+		if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
+			result = VMCI_ERROR_NOT_FOUND;
+			goto error;
+		}
+	} else {
+		result = qp_alloc_hypercall(queue_pair_entry);
+		if (result < VMCI_SUCCESS) {
+			pr_devel("qp_alloc_hypercall result = %d\n", result);
+			goto error;
+		}
+	}
+
+	qp_init_queue_mutex((struct vmci_queue *)my_produce_q,
+			    (struct vmci_queue *)my_consume_q);
+
+	qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
+
+ out:
+	queue_pair_entry->qp.ref_count++;
+	*handle = queue_pair_entry->qp.handle;
+	*produce_q = (struct vmci_queue *)my_produce_q;
+	*consume_q = (struct vmci_queue *)my_consume_q;
+
+	/*
+	 * We should initialize the queue pair header pages on a local
+	 * queue pair create.  For non-local queue pairs, the
+	 * hypervisor initializes the header pages in the create step.
+	 */
+	if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
+	    queue_pair_entry->qp.ref_count == 1) {
+		vmci_q_header_init((*produce_q)->q_header, *handle);
+		vmci_q_header_init((*consume_q)->q_header, *handle);
+	}
+
+	mutex_unlock(&qp_guest_endpoints.mutex);
+
+	return VMCI_SUCCESS;
+
+ error:
+	mutex_unlock(&qp_guest_endpoints.mutex);
+	if (queue_pair_entry) {
+		/* The queues will be freed inside the destroy routine. */
+		qp_guest_endpoint_destroy(queue_pair_entry);
+	} else {
+		qp_free_queue(my_produce_q, produce_size);
+		qp_free_queue(my_consume_q, consume_size);
+	}
+	return result;
+
+ error_keep_entry:
+	/* This path should only be used when an existing entry was found. */
+	mutex_unlock(&qp_guest_endpoints.mutex);
+	return result;
+}
+
+/*
+ * The first endpoint issuing a queue pair allocation will create the state
+ * of the queue pair in the queue pair broker.
+ *
+ * If the creator is a guest, it will associate a VMX virtual address range
+ * with the queue pair as specified by the page_store. For compatibility with
+ * older VMX'en, that would use a separate step to set the VMX virtual
+ * address range, the virtual address range can be registered later using
+ * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be
+ * used.
+ *
+ * If the creator is the host, a page_store of NULL should be used as well,
+ * since the host is not able to supply a page store for the queue pair.
+ *
+ * For older VMX and host callers, the queue pair will be created in the
+ * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be
+ * created in VMCOQPB_CREATED_MEM state.
+ */
+static int qp_broker_create(struct vmci_handle handle,
+			    u32 peer,
+			    u32 flags,
+			    u32 priv_flags,
+			    u64 produce_size,
+			    u64 consume_size,
+			    struct vmci_qp_page_store *page_store,
+			    struct vmci_ctx *context,
+			    vmci_event_release_cb wakeup_cb,
+			    void *client_data, struct qp_broker_entry **ent)
+{
+	struct qp_broker_entry *entry = NULL;
+	const u32 context_id = vmci_ctx_get_id(context);
+	bool is_local = flags & VMCI_QPFLAG_LOCAL;
+	int result;
+	u64 guest_produce_size;
+	u64 guest_consume_size;
+
+	/* Do not create if the caller asked not to. */
+	if (flags & VMCI_QPFLAG_ATTACH_ONLY)
+		return VMCI_ERROR_NOT_FOUND;
+
+	/*
+	 * Creator's context ID should match handle's context ID or the creator
+	 * must allow the context in handle's context ID as the "peer".
+	 */
+	if (handle.context != context_id && handle.context != peer)
+		return VMCI_ERROR_NO_ACCESS;
+
+	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer))
+		return VMCI_ERROR_DST_UNREACHABLE;
+
+	/*
+	 * Creator's context ID for local queue pairs should match the
+	 * peer, if a peer is specified.
+	 */
+	if (is_local && peer != VMCI_INVALID_ID && context_id != peer)
+		return VMCI_ERROR_NO_ACCESS;
+
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (!entry)
+		return VMCI_ERROR_NO_MEM;
+
+	if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) {
+		/*
+		 * The queue pair broker entry stores values from the guest
+		 * point of view, so a creating host side endpoint should swap
+		 * produce and consume values -- unless it is a local queue
+		 * pair, in which case no swapping is necessary, since the local
+		 * attacher will swap queues.
+		 */
+
+		guest_produce_size = consume_size;
+		guest_consume_size = produce_size;
+	} else {
+		guest_produce_size = produce_size;
+		guest_consume_size = consume_size;
+	}
+
+	entry->qp.handle = handle;
+	entry->qp.peer = peer;
+	entry->qp.flags = flags;
+	entry->qp.produce_size = guest_produce_size;
+	entry->qp.consume_size = guest_consume_size;
+	entry->qp.ref_count = 1;
+	entry->create_id = context_id;
+	entry->attach_id = VMCI_INVALID_ID;
+	entry->state = VMCIQPB_NEW;
+	entry->require_trusted_attach =
+	    !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED);
+	entry->created_by_trusted =
+	    !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED);
+	entry->vmci_page_files = false;
+	entry->wakeup_cb = wakeup_cb;
+	entry->client_data = client_data;
+	entry->produce_q = qp_host_alloc_queue(guest_produce_size);
+	if (entry->produce_q == NULL) {
+		result = VMCI_ERROR_NO_MEM;
+		goto error;
+	}
+	entry->consume_q = qp_host_alloc_queue(guest_consume_size);
+	if (entry->consume_q == NULL) {
+		result = VMCI_ERROR_NO_MEM;
+		goto error;
+	}
+
+	qp_init_queue_mutex(entry->produce_q, entry->consume_q);
+
+	INIT_LIST_HEAD(&entry->qp.list_item);
+
+	if (is_local) {
+		u8 *tmp;
+
+		entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp),
+					   PAGE_SIZE, GFP_KERNEL);
+		if (entry->local_mem == NULL) {
+			result = VMCI_ERROR_NO_MEM;
+			goto error;
+		}
+		entry->state = VMCIQPB_CREATED_MEM;
+		entry->produce_q->q_header = entry->local_mem;
+		tmp = (u8 *)entry->local_mem + PAGE_SIZE *
+		    (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1);
+		entry->consume_q->q_header = (struct vmci_queue_header *)tmp;
+	} else if (page_store) {
+		/*
+		 * The VMX already initialized the queue pair headers, so no
+		 * need for the kernel side to do that.
+		 */
+		result = qp_host_register_user_memory(page_store,
+						      entry->produce_q,
+						      entry->consume_q);
+		if (result < VMCI_SUCCESS)
+			goto error;
+
+		entry->state = VMCIQPB_CREATED_MEM;
+	} else {
+		/*
+		 * A create without a page_store may be either a host
+		 * side create (in which case we are waiting for the
+		 * guest side to supply the memory) or an old style
+		 * queue pair create (in which case we will expect a
+		 * set page store call as the next step).
+		 */
+		entry->state = VMCIQPB_CREATED_NO_MEM;
+	}
+
+	qp_list_add_entry(&qp_broker_list, &entry->qp);
+	if (ent != NULL)
+		*ent = entry;
+
+	/* Add to resource obj */
+	result = vmci_resource_add(&entry->resource,
+				   VMCI_RESOURCE_TYPE_QPAIR_HOST,
+				   handle);
+	if (result != VMCI_SUCCESS) {
+		pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
+			handle.context, handle.resource, result);
+		goto error;
+	}
+
+	entry->qp.handle = vmci_resource_handle(&entry->resource);
+	if (is_local) {
+		vmci_q_header_init(entry->produce_q->q_header,
+				   entry->qp.handle);
+		vmci_q_header_init(entry->consume_q->q_header,
+				   entry->qp.handle);
+	}
+
+	vmci_ctx_qp_create(context, entry->qp.handle);
+
+	return VMCI_SUCCESS;
+
+ error:
+	if (entry != NULL) {
+		qp_host_free_queue(entry->produce_q, guest_produce_size);
+		qp_host_free_queue(entry->consume_q, guest_consume_size);
+		kfree(entry);
+	}
+
+	return result;
+}
+
+/*
+ * Enqueues an event datagram to notify the peer VM attached to
+ * the given queue pair handle about attach/detach event by the
+ * given VM.  Returns Payload size of datagram enqueued on
+ * success, error code otherwise.
+ */
+static int qp_notify_peer(bool attach,
+			  struct vmci_handle handle,
+			  u32 my_id,
+			  u32 peer_id)
+{
+	int rv;
+	struct vmci_event_qp ev;
+
+	if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID ||
+	    peer_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/*
+	 * In vmci_ctx_enqueue_datagram() we enforce the upper limit on
+	 * number of pending events from the hypervisor to a given VM
+	 * otherwise a rogue VM could do an arbitrary number of attach
+	 * and detach operations causing memory pressure in the host
+	 * kernel.
+	 */
+
+	memset(&ev, 0, sizeof(ev));
+	ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER);
+	ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+					  VMCI_CONTEXT_RESOURCE_ID);
+	ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
+	ev.msg.event_data.event = attach ?
+	    VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
+	ev.payload.handle = handle;
+	ev.payload.peer_id = my_id;
+
+	rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
+				    &ev.msg.hdr, false);
+	if (rv < VMCI_SUCCESS)
+		pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n",
+			attach ? "ATTACH" : "DETACH", peer_id);
+
+	return rv;
+}
+
+/*
+ * The second endpoint issuing a queue pair allocation will attach to
+ * the queue pair registered with the queue pair broker.
+ *
+ * If the attacher is a guest, it will associate a VMX virtual address
+ * range with the queue pair as specified by the page_store. At this
+ * point, the already attach host endpoint may start using the queue
+ * pair, and an attach event is sent to it. For compatibility with
+ * older VMX'en, that used a separate step to set the VMX virtual
+ * address range, the virtual address range can be registered later
+ * using vmci_qp_broker_set_page_store. In that case, a page_store of
+ * NULL should be used, and the attach event will be generated once
+ * the actual page store has been set.
+ *
+ * If the attacher is the host, a page_store of NULL should be used as
+ * well, since the page store information is already set by the guest.
+ *
+ * For new VMX and host callers, the queue pair will be moved to the
+ * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be
+ * moved to the VMCOQPB_ATTACHED_NO_MEM state.
+ */
+static int qp_broker_attach(struct qp_broker_entry *entry,
+			    u32 peer,
+			    u32 flags,
+			    u32 priv_flags,
+			    u64 produce_size,
+			    u64 consume_size,
+			    struct vmci_qp_page_store *page_store,
+			    struct vmci_ctx *context,
+			    vmci_event_release_cb wakeup_cb,
+			    void *client_data,
+			    struct qp_broker_entry **ent)
+{
+	const u32 context_id = vmci_ctx_get_id(context);
+	bool is_local = flags & VMCI_QPFLAG_LOCAL;
+	int result;
+
+	if (entry->state != VMCIQPB_CREATED_NO_MEM &&
+	    entry->state != VMCIQPB_CREATED_MEM)
+		return VMCI_ERROR_UNAVAILABLE;
+
+	if (is_local) {
+		if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
+		    context_id != entry->create_id) {
+			return VMCI_ERROR_INVALID_ARGS;
+		}
+	} else if (context_id == entry->create_id ||
+		   context_id == entry->attach_id) {
+		return VMCI_ERROR_ALREADY_EXISTS;
+	}
+
+	if (VMCI_CONTEXT_IS_VM(context_id) &&
+	    VMCI_CONTEXT_IS_VM(entry->create_id))
+		return VMCI_ERROR_DST_UNREACHABLE;
+
+	/*
+	 * If we are attaching from a restricted context then the queuepair
+	 * must have been created by a trusted endpoint.
+	 */
+	if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
+	    !entry->created_by_trusted)
+		return VMCI_ERROR_NO_ACCESS;
+
+	/*
+	 * If we are attaching to a queuepair that was created by a restricted
+	 * context then we must be trusted.
+	 */
+	if (entry->require_trusted_attach &&
+	    (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED)))
+		return VMCI_ERROR_NO_ACCESS;
+
+	/*
+	 * If the creator specifies VMCI_INVALID_ID in "peer" field, access
+	 * control check is not performed.
+	 */
+	if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id)
+		return VMCI_ERROR_NO_ACCESS;
+
+	if (entry->create_id == VMCI_HOST_CONTEXT_ID) {
+		/*
+		 * Do not attach if the caller doesn't support Host Queue Pairs
+		 * and a host created this queue pair.
+		 */
+
+		if (!vmci_ctx_supports_host_qp(context))
+			return VMCI_ERROR_INVALID_RESOURCE;
+
+	} else if (context_id == VMCI_HOST_CONTEXT_ID) {
+		struct vmci_ctx *create_context;
+		bool supports_host_qp;
+
+		/*
+		 * Do not attach a host to a user created queue pair if that
+		 * user doesn't support host queue pair end points.
+		 */
+
+		create_context = vmci_ctx_get(entry->create_id);
+		supports_host_qp = vmci_ctx_supports_host_qp(create_context);
+		vmci_ctx_put(create_context);
+
+		if (!supports_host_qp)
+			return VMCI_ERROR_INVALID_RESOURCE;
+	}
+
+	if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER))
+		return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+
+	if (context_id != VMCI_HOST_CONTEXT_ID) {
+		/*
+		 * The queue pair broker entry stores values from the guest
+		 * point of view, so an attaching guest should match the values
+		 * stored in the entry.
+		 */
+
+		if (entry->qp.produce_size != produce_size ||
+		    entry->qp.consume_size != consume_size) {
+			return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+		}
+	} else if (entry->qp.produce_size != consume_size ||
+		   entry->qp.consume_size != produce_size) {
+		return VMCI_ERROR_QUEUEPAIR_MISMATCH;
+	}
+
+	if (context_id != VMCI_HOST_CONTEXT_ID) {
+		/*
+		 * If a guest attached to a queue pair, it will supply
+		 * the backing memory.  If this is a pre NOVMVM vmx,
+		 * the backing memory will be supplied by calling
+		 * vmci_qp_broker_set_page_store() following the
+		 * return of the vmci_qp_broker_alloc() call. If it is
+		 * a vmx of version NOVMVM or later, the page store
+		 * must be supplied as part of the
+		 * vmci_qp_broker_alloc call.  Under all circumstances
+		 * must the initially created queue pair not have any
+		 * memory associated with it already.
+		 */
+
+		if (entry->state != VMCIQPB_CREATED_NO_MEM)
+			return VMCI_ERROR_INVALID_ARGS;
+
+		if (page_store != NULL) {
+			/*
+			 * Patch up host state to point to guest
+			 * supplied memory. The VMX already
+			 * initialized the queue pair headers, so no
+			 * need for the kernel side to do that.
+			 */
+
+			result = qp_host_register_user_memory(page_store,
+							      entry->produce_q,
+							      entry->consume_q);
+			if (result < VMCI_SUCCESS)
+				return result;
+
+			entry->state = VMCIQPB_ATTACHED_MEM;
+		} else {
+			entry->state = VMCIQPB_ATTACHED_NO_MEM;
+		}
+	} else if (entry->state == VMCIQPB_CREATED_NO_MEM) {
+		/*
+		 * The host side is attempting to attach to a queue
+		 * pair that doesn't have any memory associated with
+		 * it. This must be a pre NOVMVM vmx that hasn't set
+		 * the page store information yet, or a quiesced VM.
+		 */
+
+		return VMCI_ERROR_UNAVAILABLE;
+	} else {
+		/* The host side has successfully attached to a queue pair. */
+		entry->state = VMCIQPB_ATTACHED_MEM;
+	}
+
+	if (entry->state == VMCIQPB_ATTACHED_MEM) {
+		result =
+		    qp_notify_peer(true, entry->qp.handle, context_id,
+				   entry->create_id);
+		if (result < VMCI_SUCCESS)
+			pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
+				entry->create_id, entry->qp.handle.context,
+				entry->qp.handle.resource);
+	}
+
+	entry->attach_id = context_id;
+	entry->qp.ref_count++;
+	if (wakeup_cb) {
+		entry->wakeup_cb = wakeup_cb;
+		entry->client_data = client_data;
+	}
+
+	/*
+	 * When attaching to local queue pairs, the context already has
+	 * an entry tracking the queue pair, so don't add another one.
+	 */
+	if (!is_local)
+		vmci_ctx_qp_create(context, entry->qp.handle);
+
+	if (ent != NULL)
+		*ent = entry;
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * queue_pair_Alloc for use when setting up queue pair endpoints
+ * on the host.
+ */
+static int qp_broker_alloc(struct vmci_handle handle,
+			   u32 peer,
+			   u32 flags,
+			   u32 priv_flags,
+			   u64 produce_size,
+			   u64 consume_size,
+			   struct vmci_qp_page_store *page_store,
+			   struct vmci_ctx *context,
+			   vmci_event_release_cb wakeup_cb,
+			   void *client_data,
+			   struct qp_broker_entry **ent,
+			   bool *swap)
+{
+	const u32 context_id = vmci_ctx_get_id(context);
+	bool create;
+	struct qp_broker_entry *entry = NULL;
+	bool is_local = flags & VMCI_QPFLAG_LOCAL;
+	int result;
+
+	if (vmci_handle_is_invalid(handle) ||
+	    (flags & ~VMCI_QP_ALL_FLAGS) || is_local ||
+	    !(produce_size || consume_size) ||
+	    !context || context_id == VMCI_INVALID_ID ||
+	    handle.context == VMCI_INVALID_ID) {
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/*
+	 * In the initial argument check, we ensure that non-vmkernel hosts
+	 * are not allowed to create local queue pairs.
+	 */
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	if (!is_local && vmci_ctx_qp_exists(context, handle)) {
+		pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n",
+			 context_id, handle.context, handle.resource);
+		mutex_unlock(&qp_broker_list.mutex);
+		return VMCI_ERROR_ALREADY_EXISTS;
+	}
+
+	if (handle.resource != VMCI_INVALID_ID)
+		entry = qp_broker_handle_to_entry(handle);
+
+	if (!entry) {
+		create = true;
+		result =
+		    qp_broker_create(handle, peer, flags, priv_flags,
+				     produce_size, consume_size, page_store,
+				     context, wakeup_cb, client_data, ent);
+	} else {
+		create = false;
+		result =
+		    qp_broker_attach(entry, peer, flags, priv_flags,
+				     produce_size, consume_size, page_store,
+				     context, wakeup_cb, client_data, ent);
+	}
+
+	mutex_unlock(&qp_broker_list.mutex);
+
+	if (swap)
+		*swap = (context_id == VMCI_HOST_CONTEXT_ID) &&
+		    !(create && is_local);
+
+	return result;
+}
+
+/*
+ * This function implements the kernel API for allocating a queue
+ * pair.
+ */
+static int qp_alloc_host_work(struct vmci_handle *handle,
+			      struct vmci_queue **produce_q,
+			      u64 produce_size,
+			      struct vmci_queue **consume_q,
+			      u64 consume_size,
+			      u32 peer,
+			      u32 flags,
+			      u32 priv_flags,
+			      vmci_event_release_cb wakeup_cb,
+			      void *client_data)
+{
+	struct vmci_handle new_handle;
+	struct vmci_ctx *context;
+	struct qp_broker_entry *entry;
+	int result;
+	bool swap;
+
+	if (vmci_handle_is_invalid(*handle)) {
+		new_handle = vmci_make_handle(
+			VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID);
+	} else
+		new_handle = *handle;
+
+	context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
+	entry = NULL;
+	result =
+	    qp_broker_alloc(new_handle, peer, flags, priv_flags,
+			    produce_size, consume_size, NULL, context,
+			    wakeup_cb, client_data, &entry, &swap);
+	if (result == VMCI_SUCCESS) {
+		if (swap) {
+			/*
+			 * If this is a local queue pair, the attacher
+			 * will swap around produce and consume
+			 * queues.
+			 */
+
+			*produce_q = entry->consume_q;
+			*consume_q = entry->produce_q;
+		} else {
+			*produce_q = entry->produce_q;
+			*consume_q = entry->consume_q;
+		}
+
+		*handle = vmci_resource_handle(&entry->resource);
+	} else {
+		*handle = VMCI_INVALID_HANDLE;
+		pr_devel("queue pair broker failed to alloc (result=%d)\n",
+			 result);
+	}
+	vmci_ctx_put(context);
+	return result;
+}
+
+/*
+ * Allocates a VMCI queue_pair. Only checks validity of input
+ * arguments. The real work is done in the host or guest
+ * specific function.
+ */
+int vmci_qp_alloc(struct vmci_handle *handle,
+		  struct vmci_queue **produce_q,
+		  u64 produce_size,
+		  struct vmci_queue **consume_q,
+		  u64 consume_size,
+		  u32 peer,
+		  u32 flags,
+		  u32 priv_flags,
+		  bool guest_endpoint,
+		  vmci_event_release_cb wakeup_cb,
+		  void *client_data)
+{
+	if (!handle || !produce_q || !consume_q ||
+	    (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (guest_endpoint) {
+		return qp_alloc_guest_work(handle, produce_q,
+					   produce_size, consume_q,
+					   consume_size, peer,
+					   flags, priv_flags);
+	} else {
+		return qp_alloc_host_work(handle, produce_q,
+					  produce_size, consume_q,
+					  consume_size, peer, flags,
+					  priv_flags, wakeup_cb, client_data);
+	}
+}
+
+/*
+ * This function implements the host kernel API for detaching from
+ * a queue pair.
+ */
+static int qp_detatch_host_work(struct vmci_handle handle)
+{
+	int result;
+	struct vmci_ctx *context;
+
+	context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
+
+	result = vmci_qp_broker_detach(handle, context);
+
+	vmci_ctx_put(context);
+	return result;
+}
+
+/*
+ * Detaches from a VMCI queue_pair. Only checks validity of input argument.
+ * Real work is done in the host or guest specific function.
+ */
+static int qp_detatch(struct vmci_handle handle, bool guest_endpoint)
+{
+	if (vmci_handle_is_invalid(handle))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	if (guest_endpoint)
+		return qp_detatch_guest_work(handle);
+	else
+		return qp_detatch_host_work(handle);
+}
+
+/*
+ * Returns the entry from the head of the list. Assumes that the list is
+ * locked.
+ */
+static struct qp_entry *qp_list_get_head(struct qp_list *qp_list)
+{
+	if (!list_empty(&qp_list->head)) {
+		struct qp_entry *entry =
+		    list_first_entry(&qp_list->head, struct qp_entry,
+				     list_item);
+		return entry;
+	}
+
+	return NULL;
+}
+
+void vmci_qp_broker_exit(void)
+{
+	struct qp_entry *entry;
+	struct qp_broker_entry *be;
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	while ((entry = qp_list_get_head(&qp_broker_list))) {
+		be = (struct qp_broker_entry *)entry;
+
+		qp_list_remove_entry(&qp_broker_list, entry);
+		kfree(be);
+	}
+
+	mutex_unlock(&qp_broker_list.mutex);
+}
+
+/*
+ * Requests that a queue pair be allocated with the VMCI queue
+ * pair broker. Allocates a queue pair entry if one does not
+ * exist. Attaches to one if it exists, and retrieves the page
+ * files backing that queue_pair.  Assumes that the queue pair
+ * broker lock is held.
+ */
+int vmci_qp_broker_alloc(struct vmci_handle handle,
+			 u32 peer,
+			 u32 flags,
+			 u32 priv_flags,
+			 u64 produce_size,
+			 u64 consume_size,
+			 struct vmci_qp_page_store *page_store,
+			 struct vmci_ctx *context)
+{
+	if (!QP_SIZES_ARE_VALID(produce_size, consume_size))
+		return VMCI_ERROR_NO_RESOURCES;
+
+	return qp_broker_alloc(handle, peer, flags, priv_flags,
+			       produce_size, consume_size,
+			       page_store, context, NULL, NULL, NULL, NULL);
+}
+
+/*
+ * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate
+ * step to add the UVAs of the VMX mapping of the queue pair. This function
+ * provides backwards compatibility with such VMX'en, and takes care of
+ * registering the page store for a queue pair previously allocated by the
+ * VMX during create or attach. This function will move the queue pair state
+ * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or
+ * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the
+ * attached state with memory, the queue pair is ready to be used by the
+ * host peer, and an attached event will be generated.
+ *
+ * Assumes that the queue pair broker lock is held.
+ *
+ * This function is only used by the hosted platform, since there is no
+ * issue with backwards compatibility for vmkernel.
+ */
+int vmci_qp_broker_set_page_store(struct vmci_handle handle,
+				  u64 produce_uva,
+				  u64 consume_uva,
+				  struct vmci_ctx *context)
+{
+	struct qp_broker_entry *entry;
+	int result;
+	const u32 context_id = vmci_ctx_get_id(context);
+
+	if (vmci_handle_is_invalid(handle) || !context ||
+	    context_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/*
+	 * We only support guest to host queue pairs, so the VMX must
+	 * supply UVAs for the mapped page files.
+	 */
+
+	if (produce_uva == 0 || consume_uva == 0)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	if (!vmci_ctx_qp_exists(context, handle)) {
+		pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+			context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	entry = qp_broker_handle_to_entry(handle);
+	if (!entry) {
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	/*
+	 * If I'm the owner then I can set the page store.
+	 *
+	 * Or, if a host created the queue_pair and I'm the attached peer
+	 * then I can set the page store.
+	 */
+	if (entry->create_id != context_id &&
+	    (entry->create_id != VMCI_HOST_CONTEXT_ID ||
+	     entry->attach_id != context_id)) {
+		result = VMCI_ERROR_QUEUEPAIR_NOTOWNER;
+		goto out;
+	}
+
+	if (entry->state != VMCIQPB_CREATED_NO_MEM &&
+	    entry->state != VMCIQPB_ATTACHED_NO_MEM) {
+		result = VMCI_ERROR_UNAVAILABLE;
+		goto out;
+	}
+
+	result = qp_host_get_user_memory(produce_uva, consume_uva,
+					 entry->produce_q, entry->consume_q);
+	if (result < VMCI_SUCCESS)
+		goto out;
+
+	result = qp_host_map_queues(entry->produce_q, entry->consume_q);
+	if (result < VMCI_SUCCESS) {
+		qp_host_unregister_user_memory(entry->produce_q,
+					       entry->consume_q);
+		goto out;
+	}
+
+	if (entry->state == VMCIQPB_CREATED_NO_MEM)
+		entry->state = VMCIQPB_CREATED_MEM;
+	else
+		entry->state = VMCIQPB_ATTACHED_MEM;
+
+	entry->vmci_page_files = true;
+
+	if (entry->state == VMCIQPB_ATTACHED_MEM) {
+		result =
+		    qp_notify_peer(true, handle, context_id, entry->create_id);
+		if (result < VMCI_SUCCESS) {
+			pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
+				entry->create_id, entry->qp.handle.context,
+				entry->qp.handle.resource);
+		}
+	}
+
+	result = VMCI_SUCCESS;
+ out:
+	mutex_unlock(&qp_broker_list.mutex);
+	return result;
+}
+
+/*
+ * Resets saved queue headers for the given QP broker
+ * entry. Should be used when guest memory becomes available
+ * again, or the guest detaches.
+ */
+static void qp_reset_saved_headers(struct qp_broker_entry *entry)
+{
+	entry->produce_q->saved_header = NULL;
+	entry->consume_q->saved_header = NULL;
+}
+
+/*
+ * The main entry point for detaching from a queue pair registered with the
+ * queue pair broker. If more than one endpoint is attached to the queue
+ * pair, the first endpoint will mainly decrement a reference count and
+ * generate a notification to its peer. The last endpoint will clean up
+ * the queue pair state registered with the broker.
+ *
+ * When a guest endpoint detaches, it will unmap and unregister the guest
+ * memory backing the queue pair. If the host is still attached, it will
+ * no longer be able to access the queue pair content.
+ *
+ * If the queue pair is already in a state where there is no memory
+ * registered for the queue pair (any *_NO_MEM state), it will transition to
+ * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest
+ * endpoint is the first of two endpoints to detach. If the host endpoint is
+ * the first out of two to detach, the queue pair will move to the
+ * VMCIQPB_SHUTDOWN_MEM state.
+ */
+int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context)
+{
+	struct qp_broker_entry *entry;
+	const u32 context_id = vmci_ctx_get_id(context);
+	u32 peer_id;
+	bool is_local = false;
+	int result;
+
+	if (vmci_handle_is_invalid(handle) || !context ||
+	    context_id == VMCI_INVALID_ID) {
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	if (!vmci_ctx_qp_exists(context, handle)) {
+		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	entry = qp_broker_handle_to_entry(handle);
+	if (!entry) {
+		pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	if (context_id != entry->create_id && context_id != entry->attach_id) {
+		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+		goto out;
+	}
+
+	if (context_id == entry->create_id) {
+		peer_id = entry->attach_id;
+		entry->create_id = VMCI_INVALID_ID;
+	} else {
+		peer_id = entry->create_id;
+		entry->attach_id = VMCI_INVALID_ID;
+	}
+	entry->qp.ref_count--;
+
+	is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
+
+	if (context_id != VMCI_HOST_CONTEXT_ID) {
+		bool headers_mapped;
+
+		/*
+		 * Pre NOVMVM vmx'en may detach from a queue pair
+		 * before setting the page store, and in that case
+		 * there is no user memory to detach from. Also, more
+		 * recent VMX'en may detach from a queue pair in the
+		 * quiesced state.
+		 */
+
+		qp_acquire_queue_mutex(entry->produce_q);
+		headers_mapped = entry->produce_q->q_header ||
+		    entry->consume_q->q_header;
+		if (QPBROKERSTATE_HAS_MEM(entry)) {
+			result =
+			    qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID,
+						 entry->produce_q,
+						 entry->consume_q);
+			if (result < VMCI_SUCCESS)
+				pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
+					handle.context, handle.resource,
+					result);
+
+			qp_host_unregister_user_memory(entry->produce_q,
+						       entry->consume_q);
+
+		}
+
+		if (!headers_mapped)
+			qp_reset_saved_headers(entry);
+
+		qp_release_queue_mutex(entry->produce_q);
+
+		if (!headers_mapped && entry->wakeup_cb)
+			entry->wakeup_cb(entry->client_data);
+
+	} else {
+		if (entry->wakeup_cb) {
+			entry->wakeup_cb = NULL;
+			entry->client_data = NULL;
+		}
+	}
+
+	if (entry->qp.ref_count == 0) {
+		qp_list_remove_entry(&qp_broker_list, &entry->qp);
+
+		if (is_local)
+			kfree(entry->local_mem);
+
+		qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
+		qp_host_free_queue(entry->produce_q, entry->qp.produce_size);
+		qp_host_free_queue(entry->consume_q, entry->qp.consume_size);
+		/* Unlink from resource hash table and free callback */
+		vmci_resource_remove(&entry->resource);
+
+		kfree(entry);
+
+		vmci_ctx_qp_destroy(context, handle);
+	} else {
+		qp_notify_peer(false, handle, context_id, peer_id);
+		if (context_id == VMCI_HOST_CONTEXT_ID &&
+		    QPBROKERSTATE_HAS_MEM(entry)) {
+			entry->state = VMCIQPB_SHUTDOWN_MEM;
+		} else {
+			entry->state = VMCIQPB_SHUTDOWN_NO_MEM;
+		}
+
+		if (!is_local)
+			vmci_ctx_qp_destroy(context, handle);
+
+	}
+	result = VMCI_SUCCESS;
+ out:
+	mutex_unlock(&qp_broker_list.mutex);
+	return result;
+}
+
+/*
+ * Establishes the necessary mappings for a queue pair given a
+ * reference to the queue pair guest memory. This is usually
+ * called when a guest is unquiesced and the VMX is allowed to
+ * map guest memory once again.
+ */
+int vmci_qp_broker_map(struct vmci_handle handle,
+		       struct vmci_ctx *context,
+		       u64 guest_mem)
+{
+	struct qp_broker_entry *entry;
+	const u32 context_id = vmci_ctx_get_id(context);
+	int result;
+
+	if (vmci_handle_is_invalid(handle) || !context ||
+	    context_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	if (!vmci_ctx_qp_exists(context, handle)) {
+		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	entry = qp_broker_handle_to_entry(handle);
+	if (!entry) {
+		pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	if (context_id != entry->create_id && context_id != entry->attach_id) {
+		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+		goto out;
+	}
+
+	result = VMCI_SUCCESS;
+
+	if (context_id != VMCI_HOST_CONTEXT_ID &&
+	    !QPBROKERSTATE_HAS_MEM(entry)) {
+		struct vmci_qp_page_store page_store;
+
+		page_store.pages = guest_mem;
+		page_store.len = QPE_NUM_PAGES(entry->qp);
+
+		qp_acquire_queue_mutex(entry->produce_q);
+		qp_reset_saved_headers(entry);
+		result =
+		    qp_host_register_user_memory(&page_store,
+						 entry->produce_q,
+						 entry->consume_q);
+		qp_release_queue_mutex(entry->produce_q);
+		if (result == VMCI_SUCCESS) {
+			/* Move state from *_NO_MEM to *_MEM */
+
+			entry->state++;
+
+			if (entry->wakeup_cb)
+				entry->wakeup_cb(entry->client_data);
+		}
+	}
+
+ out:
+	mutex_unlock(&qp_broker_list.mutex);
+	return result;
+}
+
+/*
+ * Saves a snapshot of the queue headers for the given QP broker
+ * entry. Should be used when guest memory is unmapped.
+ * Results:
+ * VMCI_SUCCESS on success, appropriate error code if guest memory
+ * can't be accessed..
+ */
+static int qp_save_headers(struct qp_broker_entry *entry)
+{
+	int result;
+
+	if (entry->produce_q->saved_header != NULL &&
+	    entry->consume_q->saved_header != NULL) {
+		/*
+		 *  If the headers have already been saved, we don't need to do
+		 *  it again, and we don't want to map in the headers
+		 *  unnecessarily.
+		 */
+
+		return VMCI_SUCCESS;
+	}
+
+	if (NULL == entry->produce_q->q_header ||
+	    NULL == entry->consume_q->q_header) {
+		result = qp_host_map_queues(entry->produce_q, entry->consume_q);
+		if (result < VMCI_SUCCESS)
+			return result;
+	}
+
+	memcpy(&entry->saved_produce_q, entry->produce_q->q_header,
+	       sizeof(entry->saved_produce_q));
+	entry->produce_q->saved_header = &entry->saved_produce_q;
+	memcpy(&entry->saved_consume_q, entry->consume_q->q_header,
+	       sizeof(entry->saved_consume_q));
+	entry->consume_q->saved_header = &entry->saved_consume_q;
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Removes all references to the guest memory of a given queue pair, and
+ * will move the queue pair from state *_MEM to *_NO_MEM. It is usually
+ * called when a VM is being quiesced where access to guest memory should
+ * avoided.
+ */
+int vmci_qp_broker_unmap(struct vmci_handle handle,
+			 struct vmci_ctx *context,
+			 u32 gid)
+{
+	struct qp_broker_entry *entry;
+	const u32 context_id = vmci_ctx_get_id(context);
+	int result;
+
+	if (vmci_handle_is_invalid(handle) || !context ||
+	    context_id == VMCI_INVALID_ID)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	mutex_lock(&qp_broker_list.mutex);
+
+	if (!vmci_ctx_qp_exists(context, handle)) {
+		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	entry = qp_broker_handle_to_entry(handle);
+	if (!entry) {
+		pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
+			 context_id, handle.context, handle.resource);
+		result = VMCI_ERROR_NOT_FOUND;
+		goto out;
+	}
+
+	if (context_id != entry->create_id && context_id != entry->attach_id) {
+		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+		goto out;
+	}
+
+	if (context_id != VMCI_HOST_CONTEXT_ID &&
+	    QPBROKERSTATE_HAS_MEM(entry)) {
+		qp_acquire_queue_mutex(entry->produce_q);
+		result = qp_save_headers(entry);
+		if (result < VMCI_SUCCESS)
+			pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
+				handle.context, handle.resource, result);
+
+		qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q);
+
+		/*
+		 * On hosted, when we unmap queue pairs, the VMX will also
+		 * unmap the guest memory, so we invalidate the previously
+		 * registered memory. If the queue pair is mapped again at a
+		 * later point in time, we will need to reregister the user
+		 * memory with a possibly new user VA.
+		 */
+		qp_host_unregister_user_memory(entry->produce_q,
+					       entry->consume_q);
+
+		/*
+		 * Move state from *_MEM to *_NO_MEM.
+		 */
+		entry->state--;
+
+		qp_release_queue_mutex(entry->produce_q);
+	}
+
+	result = VMCI_SUCCESS;
+
+ out:
+	mutex_unlock(&qp_broker_list.mutex);
+	return result;
+}
+
+/*
+ * Destroys all guest queue pair endpoints. If active guest queue
+ * pairs still exist, hypercalls to attempt detach from these
+ * queue pairs will be made. Any failure to detach is silently
+ * ignored.
+ */
+void vmci_qp_guest_endpoints_exit(void)
+{
+	struct qp_entry *entry;
+	struct qp_guest_endpoint *ep;
+
+	mutex_lock(&qp_guest_endpoints.mutex);
+
+	while ((entry = qp_list_get_head(&qp_guest_endpoints))) {
+		ep = (struct qp_guest_endpoint *)entry;
+
+		/* Don't make a hypercall for local queue_pairs. */
+		if (!(entry->flags & VMCI_QPFLAG_LOCAL))
+			qp_detatch_hypercall(entry->handle);
+
+		/* We cannot fail the exit, so let's reset ref_count. */
+		entry->ref_count = 0;
+		qp_list_remove_entry(&qp_guest_endpoints, entry);
+
+		qp_guest_endpoint_destroy(ep);
+	}
+
+	mutex_unlock(&qp_guest_endpoints.mutex);
+}
+
+/*
+ * Helper routine that will lock the queue pair before subsequent
+ * operations.
+ * Note: Non-blocking on the host side is currently only implemented in ESX.
+ * Since non-blocking isn't yet implemented on the host personality we
+ * have no reason to acquire a spin lock.  So to avoid the use of an
+ * unnecessary lock only acquire the mutex if we can block.
+ */
+static void qp_lock(const struct vmci_qp *qpair)
+{
+	qp_acquire_queue_mutex(qpair->produce_q);
+}
+
+/*
+ * Helper routine that unlocks the queue pair after calling
+ * qp_lock.
+ */
+static void qp_unlock(const struct vmci_qp *qpair)
+{
+	qp_release_queue_mutex(qpair->produce_q);
+}
+
+/*
+ * The queue headers may not be mapped at all times. If a queue is
+ * currently not mapped, it will be attempted to do so.
+ */
+static int qp_map_queue_headers(struct vmci_queue *produce_q,
+				struct vmci_queue *consume_q)
+{
+	int result;
+
+	if (NULL == produce_q->q_header || NULL == consume_q->q_header) {
+		result = qp_host_map_queues(produce_q, consume_q);
+		if (result < VMCI_SUCCESS)
+			return (produce_q->saved_header &&
+				consume_q->saved_header) ?
+			    VMCI_ERROR_QUEUEPAIR_NOT_READY :
+			    VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
+	}
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Helper routine that will retrieve the produce and consume
+ * headers of a given queue pair. If the guest memory of the
+ * queue pair is currently not available, the saved queue headers
+ * will be returned, if these are available.
+ */
+static int qp_get_queue_headers(const struct vmci_qp *qpair,
+				struct vmci_queue_header **produce_q_header,
+				struct vmci_queue_header **consume_q_header)
+{
+	int result;
+
+	result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q);
+	if (result == VMCI_SUCCESS) {
+		*produce_q_header = qpair->produce_q->q_header;
+		*consume_q_header = qpair->consume_q->q_header;
+	} else if (qpair->produce_q->saved_header &&
+		   qpair->consume_q->saved_header) {
+		*produce_q_header = qpair->produce_q->saved_header;
+		*consume_q_header = qpair->consume_q->saved_header;
+		result = VMCI_SUCCESS;
+	}
+
+	return result;
+}
+
+/*
+ * Callback from VMCI queue pair broker indicating that a queue
+ * pair that was previously not ready, now either is ready or
+ * gone forever.
+ */
+static int qp_wakeup_cb(void *client_data)
+{
+	struct vmci_qp *qpair = (struct vmci_qp *)client_data;
+
+	qp_lock(qpair);
+	while (qpair->blocked > 0) {
+		qpair->blocked--;
+		qpair->generation++;
+		wake_up(&qpair->event);
+	}
+	qp_unlock(qpair);
+
+	return VMCI_SUCCESS;
+}
+
+/*
+ * Makes the calling thread wait for the queue pair to become
+ * ready for host side access.  Returns true when thread is
+ * woken up after queue pair state change, false otherwise.
+ */
+static bool qp_wait_for_ready_queue(struct vmci_qp *qpair)
+{
+	unsigned int generation;
+
+	qpair->blocked++;
+	generation = qpair->generation;
+	qp_unlock(qpair);
+	wait_event(qpair->event, generation != qpair->generation);
+	qp_lock(qpair);
+
+	return true;
+}
+
+/*
+ * Enqueues a given buffer to the produce queue using the provided
+ * function. As many bytes as possible (space available in the queue)
+ * are enqueued.  Assumes the queue->mutex has been acquired.  Returns
+ * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue
+ * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the
+ * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if
+ * an error occured when accessing the buffer,
+ * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
+ * available.  Otherwise, the number of bytes written to the queue is
+ * returned.  Updates the tail pointer of the produce queue.
+ */
+static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q,
+				 struct vmci_queue *consume_q,
+				 const u64 produce_q_size,
+				 struct iov_iter *from)
+{
+	s64 free_space;
+	u64 tail;
+	size_t buf_size = iov_iter_count(from);
+	size_t written;
+	ssize_t result;
+
+	result = qp_map_queue_headers(produce_q, consume_q);
+	if (unlikely(result != VMCI_SUCCESS))
+		return result;
+
+	free_space = vmci_q_header_free_space(produce_q->q_header,
+					      consume_q->q_header,
+					      produce_q_size);
+	if (free_space == 0)
+		return VMCI_ERROR_QUEUEPAIR_NOSPACE;
+
+	if (free_space < VMCI_SUCCESS)
+		return (ssize_t) free_space;
+
+	written = (size_t) (free_space > buf_size ? buf_size : free_space);
+	tail = vmci_q_header_producer_tail(produce_q->q_header);
+	if (likely(tail + written < produce_q_size)) {
+		result = qp_memcpy_to_queue_iter(produce_q, tail, from, written);
+	} else {
+		/* Tail pointer wraps around. */
+
+		const size_t tmp = (size_t) (produce_q_size - tail);
+
+		result = qp_memcpy_to_queue_iter(produce_q, tail, from, tmp);
+		if (result >= VMCI_SUCCESS)
+			result = qp_memcpy_to_queue_iter(produce_q, 0, from,
+						 written - tmp);
+	}
+
+	if (result < VMCI_SUCCESS)
+		return result;
+
+	/*
+	 * This virt_wmb() ensures that data written to the queue
+	 * is observable before the new producer_tail is.
+	 */
+	virt_wmb();
+
+	vmci_q_header_add_producer_tail(produce_q->q_header, written,
+					produce_q_size);
+	return written;
+}
+
+/*
+ * Dequeues data (if available) from the given consume queue. Writes data
+ * to the user provided buffer using the provided function.
+ * Assumes the queue->mutex has been acquired.
+ * Results:
+ * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
+ * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
+ * (as defined by the queue size).
+ * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
+ * Otherwise the number of bytes dequeued is returned.
+ * Side effects:
+ * Updates the head pointer of the consume queue.
+ */
+static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q,
+				 struct vmci_queue *consume_q,
+				 const u64 consume_q_size,
+				 struct iov_iter *to,
+				 bool update_consumer)
+{
+	size_t buf_size = iov_iter_count(to);
+	s64 buf_ready;
+	u64 head;
+	size_t read;
+	ssize_t result;
+
+	result = qp_map_queue_headers(produce_q, consume_q);
+	if (unlikely(result != VMCI_SUCCESS))
+		return result;
+
+	buf_ready = vmci_q_header_buf_ready(consume_q->q_header,
+					    produce_q->q_header,
+					    consume_q_size);
+	if (buf_ready == 0)
+		return VMCI_ERROR_QUEUEPAIR_NODATA;
+
+	if (buf_ready < VMCI_SUCCESS)
+		return (ssize_t) buf_ready;
+
+	/*
+	 * This virt_rmb() ensures that data from the queue will be read
+	 * after we have determined how much is ready to be consumed.
+	 */
+	virt_rmb();
+
+	read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready);
+	head = vmci_q_header_consumer_head(produce_q->q_header);
+	if (likely(head + read < consume_q_size)) {
+		result = qp_memcpy_from_queue_iter(to, consume_q, head, read);
+	} else {
+		/* Head pointer wraps around. */
+
+		const size_t tmp = (size_t) (consume_q_size - head);
+
+		result = qp_memcpy_from_queue_iter(to, consume_q, head, tmp);
+		if (result >= VMCI_SUCCESS)
+			result = qp_memcpy_from_queue_iter(to, consume_q, 0,
+						   read - tmp);
+
+	}
+
+	if (result < VMCI_SUCCESS)
+		return result;
+
+	if (update_consumer)
+		vmci_q_header_add_consumer_head(produce_q->q_header,
+						read, consume_q_size);
+
+	return read;
+}
+
+/*
+ * vmci_qpair_alloc() - Allocates a queue pair.
+ * @qpair:      Pointer for the new vmci_qp struct.
+ * @handle:     Handle to track the resource.
+ * @produce_qsize:      Desired size of the producer queue.
+ * @consume_qsize:      Desired size of the consumer queue.
+ * @peer:       ContextID of the peer.
+ * @flags:      VMCI flags.
+ * @priv_flags: VMCI priviledge flags.
+ *
+ * This is the client interface for allocating the memory for a
+ * vmci_qp structure and then attaching to the underlying
+ * queue.  If an error occurs allocating the memory for the
+ * vmci_qp structure no attempt is made to attach.  If an
+ * error occurs attaching, then the structure is freed.
+ */
+int vmci_qpair_alloc(struct vmci_qp **qpair,
+		     struct vmci_handle *handle,
+		     u64 produce_qsize,
+		     u64 consume_qsize,
+		     u32 peer,
+		     u32 flags,
+		     u32 priv_flags)
+{
+	struct vmci_qp *my_qpair;
+	int retval;
+	struct vmci_handle src = VMCI_INVALID_HANDLE;
+	struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID);
+	enum vmci_route route;
+	vmci_event_release_cb wakeup_cb;
+	void *client_data;
+
+	/*
+	 * Restrict the size of a queuepair.  The device already
+	 * enforces a limit on the total amount of memory that can be
+	 * allocated to queuepairs for a guest.  However, we try to
+	 * allocate this memory before we make the queuepair
+	 * allocation hypercall.  On Linux, we allocate each page
+	 * separately, which means rather than fail, the guest will
+	 * thrash while it tries to allocate, and will become
+	 * increasingly unresponsive to the point where it appears to
+	 * be hung.  So we place a limit on the size of an individual
+	 * queuepair here, and leave the device to enforce the
+	 * restriction on total queuepair memory.  (Note that this
+	 * doesn't prevent all cases; a user with only this much
+	 * physical memory could still get into trouble.)  The error
+	 * used by the device is NO_RESOURCES, so use that here too.
+	 */
+
+	if (!QP_SIZES_ARE_VALID(produce_qsize, consume_qsize))
+		return VMCI_ERROR_NO_RESOURCES;
+
+	retval = vmci_route(&src, &dst, false, &route);
+	if (retval < VMCI_SUCCESS)
+		route = vmci_guest_code_active() ?
+		    VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST;
+
+	if (flags & (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED)) {
+		pr_devel("NONBLOCK OR PINNED set");
+		return VMCI_ERROR_INVALID_ARGS;
+	}
+
+	my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL);
+	if (!my_qpair)
+		return VMCI_ERROR_NO_MEM;
+
+	my_qpair->produce_q_size = produce_qsize;
+	my_qpair->consume_q_size = consume_qsize;
+	my_qpair->peer = peer;
+	my_qpair->flags = flags;
+	my_qpair->priv_flags = priv_flags;
+
+	wakeup_cb = NULL;
+	client_data = NULL;
+
+	if (VMCI_ROUTE_AS_HOST == route) {
+		my_qpair->guest_endpoint = false;
+		if (!(flags & VMCI_QPFLAG_LOCAL)) {
+			my_qpair->blocked = 0;
+			my_qpair->generation = 0;
+			init_waitqueue_head(&my_qpair->event);
+			wakeup_cb = qp_wakeup_cb;
+			client_data = (void *)my_qpair;
+		}
+	} else {
+		my_qpair->guest_endpoint = true;
+	}
+
+	retval = vmci_qp_alloc(handle,
+			       &my_qpair->produce_q,
+			       my_qpair->produce_q_size,
+			       &my_qpair->consume_q,
+			       my_qpair->consume_q_size,
+			       my_qpair->peer,
+			       my_qpair->flags,
+			       my_qpair->priv_flags,
+			       my_qpair->guest_endpoint,
+			       wakeup_cb, client_data);
+
+	if (retval < VMCI_SUCCESS) {
+		kfree(my_qpair);
+		return retval;
+	}
+
+	*qpair = my_qpair;
+	my_qpair->handle = *handle;
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_alloc);
+
+/*
+ * vmci_qpair_detach() - Detatches the client from a queue pair.
+ * @qpair:      Reference of a pointer to the qpair struct.
+ *
+ * This is the client interface for detaching from a VMCIQPair.
+ * Note that this routine will free the memory allocated for the
+ * vmci_qp structure too.
+ */
+int vmci_qpair_detach(struct vmci_qp **qpair)
+{
+	int result;
+	struct vmci_qp *old_qpair;
+
+	if (!qpair || !(*qpair))
+		return VMCI_ERROR_INVALID_ARGS;
+
+	old_qpair = *qpair;
+	result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint);
+
+	/*
+	 * The guest can fail to detach for a number of reasons, and
+	 * if it does so, it will cleanup the entry (if there is one).
+	 * The host can fail too, but it won't cleanup the entry
+	 * immediately, it will do that later when the context is
+	 * freed.  Either way, we need to release the qpair struct
+	 * here; there isn't much the caller can do, and we don't want
+	 * to leak.
+	 */
+
+	memset(old_qpair, 0, sizeof(*old_qpair));
+	old_qpair->handle = VMCI_INVALID_HANDLE;
+	old_qpair->peer = VMCI_INVALID_ID;
+	kfree(old_qpair);
+	*qpair = NULL;
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_detach);
+
+/*
+ * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer.
+ * @qpair:      Pointer to the queue pair struct.
+ * @producer_tail:      Reference used for storing producer tail index.
+ * @consumer_head:      Reference used for storing the consumer head index.
+ *
+ * This is the client interface for getting the current indexes of the
+ * QPair from the point of the view of the caller as the producer.
+ */
+int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
+				   u64 *producer_tail,
+				   u64 *consumer_head)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	int result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		vmci_q_header_get_pointers(produce_q_header, consume_q_header,
+					   producer_tail, consumer_head);
+	qp_unlock(qpair);
+
+	if (result == VMCI_SUCCESS &&
+	    ((producer_tail && *producer_tail >= qpair->produce_q_size) ||
+	     (consumer_head && *consumer_head >= qpair->produce_q_size)))
+		return VMCI_ERROR_INVALID_SIZE;
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes);
+
+/*
+ * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the consumer.
+ * @qpair:      Pointer to the queue pair struct.
+ * @consumer_tail:      Reference used for storing consumer tail index.
+ * @producer_head:      Reference used for storing the producer head index.
+ *
+ * This is the client interface for getting the current indexes of the
+ * QPair from the point of the view of the caller as the consumer.
+ */
+int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair,
+				   u64 *consumer_tail,
+				   u64 *producer_head)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	int result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		vmci_q_header_get_pointers(consume_q_header, produce_q_header,
+					   consumer_tail, producer_head);
+	qp_unlock(qpair);
+
+	if (result == VMCI_SUCCESS &&
+	    ((consumer_tail && *consumer_tail >= qpair->consume_q_size) ||
+	     (producer_head && *producer_head >= qpair->consume_q_size)))
+		return VMCI_ERROR_INVALID_SIZE;
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes);
+
+/*
+ * vmci_qpair_produce_free_space() - Retrieves free space in producer queue.
+ * @qpair:      Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of free
+ * space in the QPair from the point of the view of the caller as
+ * the producer which is the common case.  Returns < 0 if err, else
+ * available bytes into which data can be enqueued if > 0.
+ */
+s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	s64 result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		result = vmci_q_header_free_space(produce_q_header,
+						  consume_q_header,
+						  qpair->produce_q_size);
+	else
+		result = 0;
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space);
+
+/*
+ * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue.
+ * @qpair:      Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of free
+ * space in the QPair from the point of the view of the caller as
+ * the consumer which is not the common case.  Returns < 0 if err, else
+ * available bytes into which data can be enqueued if > 0.
+ */
+s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	s64 result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		result = vmci_q_header_free_space(consume_q_header,
+						  produce_q_header,
+						  qpair->consume_q_size);
+	else
+		result = 0;
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space);
+
+/*
+ * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from
+ * producer queue.
+ * @qpair:      Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of
+ * enqueued data in the QPair from the point of the view of the
+ * caller as the producer which is not the common case.  Returns < 0 if err,
+ * else available bytes that may be read.
+ */
+s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	s64 result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		result = vmci_q_header_buf_ready(produce_q_header,
+						 consume_q_header,
+						 qpair->produce_q_size);
+	else
+		result = 0;
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready);
+
+/*
+ * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from
+ * consumer queue.
+ * @qpair:      Pointer to the queue pair struct.
+ *
+ * This is the client interface for getting the amount of
+ * enqueued data in the QPair from the point of the view of the
+ * caller as the consumer which is the normal case.  Returns < 0 if err,
+ * else available bytes that may be read.
+ */
+s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair)
+{
+	struct vmci_queue_header *produce_q_header;
+	struct vmci_queue_header *consume_q_header;
+	s64 result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+	result =
+	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
+	if (result == VMCI_SUCCESS)
+		result = vmci_q_header_buf_ready(consume_q_header,
+						 produce_q_header,
+						 qpair->consume_q_size);
+	else
+		result = 0;
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready);
+
+/*
+ * vmci_qpair_enqueue() - Throw data on the queue.
+ * @qpair:      Pointer to the queue pair struct.
+ * @buf:        Pointer to buffer containing data
+ * @buf_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused).
+ *
+ * This is the client interface for enqueueing data into the queue.
+ * Returns number of bytes enqueued or < 0 on error.
+ */
+ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
+			   const void *buf,
+			   size_t buf_size,
+			   int buf_type)
+{
+	ssize_t result;
+	struct iov_iter from;
+	struct kvec v = {.iov_base = (void *)buf, .iov_len = buf_size};
+
+	if (!qpair || !buf)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	iov_iter_kvec(&from, ITER_SOURCE, &v, 1, buf_size);
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_enqueue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->produce_q_size,
+					   &from);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_enqueue);
+
+/*
+ * vmci_qpair_dequeue() - Get data from the queue.
+ * @qpair:      Pointer to the queue pair struct.
+ * @buf:        Pointer to buffer for the data
+ * @buf_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused).
+ *
+ * This is the client interface for dequeueing data from the queue.
+ * Returns number of bytes dequeued or < 0 on error.
+ */
+ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
+			   void *buf,
+			   size_t buf_size,
+			   int buf_type)
+{
+	ssize_t result;
+	struct iov_iter to;
+	struct kvec v = {.iov_base = buf, .iov_len = buf_size};
+
+	if (!qpair || !buf)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	iov_iter_kvec(&to, ITER_DEST, &v, 1, buf_size);
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_dequeue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->consume_q_size,
+					   &to, true);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_dequeue);
+
+/*
+ * vmci_qpair_peek() - Peek at the data in the queue.
+ * @qpair:      Pointer to the queue pair struct.
+ * @buf:        Pointer to buffer for the data
+ * @buf_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused on Linux).
+ *
+ * This is the client interface for peeking into a queue.  (I.e.,
+ * copy data from the queue without updating the head pointer.)
+ * Returns number of bytes dequeued or < 0 on error.
+ */
+ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
+			void *buf,
+			size_t buf_size,
+			int buf_type)
+{
+	struct iov_iter to;
+	struct kvec v = {.iov_base = buf, .iov_len = buf_size};
+	ssize_t result;
+
+	if (!qpair || !buf)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	iov_iter_kvec(&to, ITER_DEST, &v, 1, buf_size);
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_dequeue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->consume_q_size,
+					   &to, false);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_peek);
+
+/*
+ * vmci_qpair_enquev() - Throw data on the queue using iov.
+ * @qpair:      Pointer to the queue pair struct.
+ * @iov:        Pointer to buffer containing data
+ * @iov_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused).
+ *
+ * This is the client interface for enqueueing data into the queue.
+ * This function uses IO vectors to handle the work. Returns number
+ * of bytes enqueued or < 0 on error.
+ */
+ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
+			  struct msghdr *msg,
+			  size_t iov_size,
+			  int buf_type)
+{
+	ssize_t result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_enqueue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->produce_q_size,
+					   &msg->msg_iter);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
+
+/*
+ * vmci_qpair_dequev() - Get data from the queue using iov.
+ * @qpair:      Pointer to the queue pair struct.
+ * @iov:        Pointer to buffer for the data
+ * @iov_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused).
+ *
+ * This is the client interface for dequeueing data from the queue.
+ * This function uses IO vectors to handle the work. Returns number
+ * of bytes dequeued or < 0 on error.
+ */
+ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
+			  struct msghdr *msg,
+			  size_t iov_size,
+			  int buf_type)
+{
+	ssize_t result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_dequeue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->consume_q_size,
+					   &msg->msg_iter, true);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
+
+/*
+ * vmci_qpair_peekv() - Peek at the data in the queue using iov.
+ * @qpair:      Pointer to the queue pair struct.
+ * @iov:        Pointer to buffer for the data
+ * @iov_size:   Length of buffer.
+ * @buf_type:   Buffer type (Unused on Linux).
+ *
+ * This is the client interface for peeking into a queue.  (I.e.,
+ * copy data from the queue without updating the head pointer.)
+ * This function uses IO vectors to handle the work. Returns number
+ * of bytes peeked or < 0 on error.
+ */
+ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
+			 struct msghdr *msg,
+			 size_t iov_size,
+			 int buf_type)
+{
+	ssize_t result;
+
+	if (!qpair)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	qp_lock(qpair);
+
+	do {
+		result = qp_dequeue_locked(qpair->produce_q,
+					   qpair->consume_q,
+					   qpair->consume_q_size,
+					   &msg->msg_iter, false);
+
+		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
+		    !qp_wait_for_ready_queue(qpair))
+			result = VMCI_ERROR_WOULD_BLOCK;
+
+	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
+
+	qp_unlock(qpair);
+	return result;
+}
+EXPORT_SYMBOL_GPL(vmci_qpair_peekv);
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.h b/drivers/misc/vmw_vmci/vmci_queue_pair.h
new file mode 100644
index 0000000000..c4e6e924d9
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#ifndef _VMCI_QUEUE_PAIR_H_
+#define _VMCI_QUEUE_PAIR_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/types.h>
+
+#include "vmci_context.h"
+
+/* Callback needed for correctly waiting on events. */
+typedef int (*vmci_event_release_cb) (void *client_data);
+
+/* Guest device port I/O. */
+struct ppn_set {
+	u64 num_produce_pages;
+	u64 num_consume_pages;
+	u64 *produce_ppns;
+	u64 *consume_ppns;
+	bool initialized;
+};
+
+/* VMCIqueue_pairAllocInfo */
+struct vmci_qp_alloc_info {
+	struct vmci_handle handle;
+	u32 peer;
+	u32 flags;
+	u64 produce_size;
+	u64 consume_size;
+	u64 ppn_va;	/* Start VA of queue pair PPNs. */
+	u64 num_ppns;
+	s32 result;
+	u32 version;
+};
+
+/* VMCIqueue_pairSetVAInfo */
+struct vmci_qp_set_va_info {
+	struct vmci_handle handle;
+	u64 va;		/* Start VA of queue pair PPNs. */
+	u64 num_ppns;
+	u32 version;
+	s32 result;
+};
+
+/*
+ * For backwards compatibility, here is a version of the
+ * VMCIqueue_pairPageFileInfo before host support end-points was added.
+ * Note that the current version of that structure requires VMX to
+ * pass down the VA of the mapped file.  Before host support was added
+ * there was nothing of the sort.  So, when the driver sees the ioctl
+ * with a parameter that is the sizeof
+ * VMCIqueue_pairPageFileInfo_NoHostQP then it can infer that the version
+ * of VMX running can't attach to host end points because it doesn't
+ * provide the VA of the mapped files.
+ *
+ * The Linux driver doesn't get an indication of the size of the
+ * structure passed down from user space.  So, to fix a long standing
+ * but unfiled bug, the _pad field has been renamed to version.
+ * Existing versions of VMX always initialize the PageFileInfo
+ * structure so that _pad, er, version is set to 0.
+ *
+ * A version value of 1 indicates that the size of the structure has
+ * been increased to include two UVA's: produce_uva and consume_uva.
+ * These UVA's are of the mmap()'d queue contents backing files.
+ *
+ * In addition, if when VMX is sending down the
+ * VMCIqueue_pairPageFileInfo structure it gets an error then it will
+ * try again with the _NoHostQP version of the file to see if an older
+ * VMCI kernel module is running.
+ */
+
+/* VMCIqueue_pairPageFileInfo */
+struct vmci_qp_page_file_info {
+	struct vmci_handle handle;
+	u64 produce_page_file;	  /* User VA. */
+	u64 consume_page_file;	  /* User VA. */
+	u64 produce_page_file_size;  /* Size of the file name array. */
+	u64 consume_page_file_size;  /* Size of the file name array. */
+	s32 result;
+	u32 version;	/* Was _pad. */
+	u64 produce_va;	/* User VA of the mapped file. */
+	u64 consume_va;	/* User VA of the mapped file. */
+};
+
+/* vmci queuepair detach info */
+struct vmci_qp_dtch_info {
+	struct vmci_handle handle;
+	s32 result;
+	u32 _pad;
+};
+
+/*
+ * struct vmci_qp_page_store describes how the memory of a given queue pair
+ * is backed. When the queue pair is between the host and a guest, the
+ * page store consists of references to the guest pages. On vmkernel,
+ * this is a list of PPNs, and on hosted, it is a user VA where the
+ * queue pair is mapped into the VMX address space.
+ */
+struct vmci_qp_page_store {
+	/* Reference to pages backing the queue pair. */
+	u64 pages;
+	/* Length of pageList/virtual address range (in pages). */
+	u32 len;
+};
+
+/*
+ * This data type contains the information about a queue.
+ * There are two queues (hence, queue pairs) per transaction model between a
+ * pair of end points, A & B.  One queue is used by end point A to transmit
+ * commands and responses to B.  The other queue is used by B to transmit
+ * commands and responses.
+ *
+ * struct vmci_queue_kern_if is a per-OS defined Queue structure.  It contains
+ * either a direct pointer to the linear address of the buffer contents or a
+ * pointer to structures which help the OS locate those data pages.  See
+ * vmciKernelIf.c for each platform for its definition.
+ */
+struct vmci_queue {
+	struct vmci_queue_header *q_header;
+	struct vmci_queue_header *saved_header;
+	struct vmci_queue_kern_if *kernel_if;
+};
+
+/*
+ * Utility function that checks whether the fields of the page
+ * store contain valid values.
+ * Result:
+ * true if the page store is wellformed. false otherwise.
+ */
+static inline bool
+VMCI_QP_PAGESTORE_IS_WELLFORMED(struct vmci_qp_page_store *page_store)
+{
+	return page_store->len >= 2;
+}
+
+void vmci_qp_broker_exit(void);
+int vmci_qp_broker_alloc(struct vmci_handle handle, u32 peer,
+			 u32 flags, u32 priv_flags,
+			 u64 produce_size, u64 consume_size,
+			 struct vmci_qp_page_store *page_store,
+			 struct vmci_ctx *context);
+int vmci_qp_broker_set_page_store(struct vmci_handle handle,
+				  u64 produce_uva, u64 consume_uva,
+				  struct vmci_ctx *context);
+int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context);
+
+void vmci_qp_guest_endpoints_exit(void);
+
+int vmci_qp_alloc(struct vmci_handle *handle,
+		  struct vmci_queue **produce_q, u64 produce_size,
+		  struct vmci_queue **consume_q, u64 consume_size,
+		  u32 peer, u32 flags, u32 priv_flags,
+		  bool guest_endpoint, vmci_event_release_cb wakeup_cb,
+		  void *client_data);
+int vmci_qp_broker_map(struct vmci_handle handle,
+		       struct vmci_ctx *context, u64 guest_mem);
+int vmci_qp_broker_unmap(struct vmci_handle handle,
+			 struct vmci_ctx *context, u32 gid);
+
+#endif /* _VMCI_QUEUE_PAIR_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c
new file mode 100644
index 0000000000..692daa9eff
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_resource.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/rculist.h>
+#include <linux/completion.h>
+
+#include "vmci_resource.h"
+#include "vmci_driver.h"
+
+
+#define VMCI_RESOURCE_HASH_BITS         7
+#define VMCI_RESOURCE_HASH_BUCKETS      (1 << VMCI_RESOURCE_HASH_BITS)
+
+struct vmci_hash_table {
+	spinlock_t lock;
+	struct hlist_head entries[VMCI_RESOURCE_HASH_BUCKETS];
+};
+
+static struct vmci_hash_table vmci_resource_table = {
+	.lock = __SPIN_LOCK_UNLOCKED(vmci_resource_table.lock),
+};
+
+static unsigned int vmci_resource_hash(struct vmci_handle handle)
+{
+	return hash_32(handle.resource, VMCI_RESOURCE_HASH_BITS);
+}
+
+/*
+ * Gets a resource (if one exists) matching given handle from the hash table.
+ */
+static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle,
+						  enum vmci_resource_type type)
+{
+	struct vmci_resource *r, *resource = NULL;
+	unsigned int idx = vmci_resource_hash(handle);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(r,
+				 &vmci_resource_table.entries[idx], node) {
+		u32 cid = r->handle.context;
+		u32 rid = r->handle.resource;
+
+		if (r->type == type &&
+		    rid == handle.resource &&
+		    (cid == handle.context || cid == VMCI_INVALID_ID ||
+		     handle.context == VMCI_INVALID_ID)) {
+			resource = r;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return resource;
+}
+
+/*
+ * Find an unused resource ID and return it. The first
+ * VMCI_RESERVED_RESOURCE_ID_MAX are reserved so we start from
+ * its value + 1.
+ * Returns VMCI resource id on success, VMCI_INVALID_ID on failure.
+ */
+static u32 vmci_resource_find_id(u32 context_id,
+				 enum vmci_resource_type resource_type)
+{
+	static u32 resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
+	u32 old_rid = resource_id;
+	u32 current_rid;
+
+	/*
+	 * Generate a unique resource ID.  Keep on trying until we wrap around
+	 * in the RID space.
+	 */
+	do {
+		struct vmci_handle handle;
+
+		current_rid = resource_id;
+		resource_id++;
+		if (unlikely(resource_id == VMCI_INVALID_ID)) {
+			/* Skip the reserved rids. */
+			resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
+		}
+
+		handle = vmci_make_handle(context_id, current_rid);
+		if (!vmci_resource_lookup(handle, resource_type))
+			return current_rid;
+	} while (resource_id != old_rid);
+
+	return VMCI_INVALID_ID;
+}
+
+
+int vmci_resource_add(struct vmci_resource *resource,
+		      enum vmci_resource_type resource_type,
+		      struct vmci_handle handle)
+
+{
+	unsigned int idx;
+	int result;
+
+	spin_lock(&vmci_resource_table.lock);
+
+	if (handle.resource == VMCI_INVALID_ID) {
+		handle.resource = vmci_resource_find_id(handle.context,
+			resource_type);
+		if (handle.resource == VMCI_INVALID_ID) {
+			result = VMCI_ERROR_NO_HANDLE;
+			goto out;
+		}
+	} else if (vmci_resource_lookup(handle, resource_type)) {
+		result = VMCI_ERROR_ALREADY_EXISTS;
+		goto out;
+	}
+
+	resource->handle = handle;
+	resource->type = resource_type;
+	INIT_HLIST_NODE(&resource->node);
+	kref_init(&resource->kref);
+	init_completion(&resource->done);
+
+	idx = vmci_resource_hash(resource->handle);
+	hlist_add_head_rcu(&resource->node, &vmci_resource_table.entries[idx]);
+
+	result = VMCI_SUCCESS;
+
+out:
+	spin_unlock(&vmci_resource_table.lock);
+	return result;
+}
+
+void vmci_resource_remove(struct vmci_resource *resource)
+{
+	struct vmci_handle handle = resource->handle;
+	unsigned int idx = vmci_resource_hash(handle);
+	struct vmci_resource *r;
+
+	/* Remove resource from hash table. */
+	spin_lock(&vmci_resource_table.lock);
+
+	hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) {
+		if (vmci_handle_is_equal(r->handle, resource->handle)) {
+			hlist_del_init_rcu(&r->node);
+			break;
+		}
+	}
+
+	spin_unlock(&vmci_resource_table.lock);
+	synchronize_rcu();
+
+	vmci_resource_put(resource);
+	wait_for_completion(&resource->done);
+}
+
+struct vmci_resource *
+vmci_resource_by_handle(struct vmci_handle resource_handle,
+			enum vmci_resource_type resource_type)
+{
+	struct vmci_resource *r, *resource = NULL;
+
+	rcu_read_lock();
+
+	r = vmci_resource_lookup(resource_handle, resource_type);
+	if (r &&
+	    (resource_type == r->type ||
+	     resource_type == VMCI_RESOURCE_TYPE_ANY)) {
+		resource = vmci_resource_get(r);
+	}
+
+	rcu_read_unlock();
+
+	return resource;
+}
+
+/*
+ * Get a reference to given resource.
+ */
+struct vmci_resource *vmci_resource_get(struct vmci_resource *resource)
+{
+	kref_get(&resource->kref);
+
+	return resource;
+}
+
+static void vmci_release_resource(struct kref *kref)
+{
+	struct vmci_resource *resource =
+		container_of(kref, struct vmci_resource, kref);
+
+	/* Verify the resource has been unlinked from hash table */
+	WARN_ON(!hlist_unhashed(&resource->node));
+
+	/* Signal that container of this resource can now be destroyed */
+	complete(&resource->done);
+}
+
+/*
+ * Resource's release function will get called if last reference.
+ * If it is the last reference, then we are sure that nobody else
+ * can increment the count again (it's gone from the resource hash
+ * table), so there's no need for locking here.
+ */
+int vmci_resource_put(struct vmci_resource *resource)
+{
+	/*
+	 * We propagate the information back to caller in case it wants to know
+	 * whether entry was freed.
+	 */
+	return kref_put(&resource->kref, vmci_release_resource) ?
+		VMCI_SUCCESS_ENTRY_DEAD : VMCI_SUCCESS;
+}
+
+struct vmci_handle vmci_resource_handle(struct vmci_resource *resource)
+{
+	return resource->handle;
+}
diff --git a/drivers/misc/vmw_vmci/vmci_resource.h b/drivers/misc/vmw_vmci/vmci_resource.h
new file mode 100644
index 0000000000..02ae18506e
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_resource.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#ifndef _VMCI_RESOURCE_H_
+#define _VMCI_RESOURCE_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/types.h>
+
+#include "vmci_context.h"
+
+
+enum vmci_resource_type {
+	VMCI_RESOURCE_TYPE_ANY,
+	VMCI_RESOURCE_TYPE_API,
+	VMCI_RESOURCE_TYPE_GROUP,
+	VMCI_RESOURCE_TYPE_DATAGRAM,
+	VMCI_RESOURCE_TYPE_DOORBELL,
+	VMCI_RESOURCE_TYPE_QPAIR_GUEST,
+	VMCI_RESOURCE_TYPE_QPAIR_HOST
+};
+
+struct vmci_resource {
+	struct vmci_handle handle;
+	enum vmci_resource_type type;
+	struct hlist_node node;
+	struct kref kref;
+	struct completion done;
+};
+
+
+int vmci_resource_add(struct vmci_resource *resource,
+		      enum vmci_resource_type resource_type,
+		      struct vmci_handle handle);
+
+void vmci_resource_remove(struct vmci_resource *resource);
+
+struct vmci_resource *
+vmci_resource_by_handle(struct vmci_handle resource_handle,
+			enum vmci_resource_type resource_type);
+
+struct vmci_resource *vmci_resource_get(struct vmci_resource *resource);
+int vmci_resource_put(struct vmci_resource *resource);
+
+struct vmci_handle vmci_resource_handle(struct vmci_resource *resource);
+
+#endif /* _VMCI_RESOURCE_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_route.c b/drivers/misc/vmw_vmci/vmci_route.c
new file mode 100644
index 0000000000..8b91bfa533
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_route.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_route.h"
+
+/*
+ * Make a routing decision for the given source and destination handles.
+ * This will try to determine the route using the handles and the available
+ * devices.  Will set the source context if it is invalid.
+ */
+int vmci_route(struct vmci_handle *src,
+	       const struct vmci_handle *dst,
+	       bool from_guest,
+	       enum vmci_route *route)
+{
+	bool has_host_device = vmci_host_code_active();
+	bool has_guest_device = vmci_guest_code_active();
+
+	*route = VMCI_ROUTE_NONE;
+
+	/*
+	 * "from_guest" is only ever set to true by
+	 * IOCTL_VMCI_DATAGRAM_SEND (or by the vmkernel equivalent),
+	 * which comes from the VMX, so we know it is coming from a
+	 * guest.
+	 *
+	 * To avoid inconsistencies, test these once.  We will test
+	 * them again when we do the actual send to ensure that we do
+	 * not touch a non-existent device.
+	 */
+
+	/* Must have a valid destination context. */
+	if (VMCI_INVALID_ID == dst->context)
+		return VMCI_ERROR_INVALID_ARGS;
+
+	/* Anywhere to hypervisor. */
+	if (VMCI_HYPERVISOR_CONTEXT_ID == dst->context) {
+
+		/*
+		 * If this message already came from a guest then we
+		 * cannot send it to the hypervisor.  It must come
+		 * from a local client.
+		 */
+		if (from_guest)
+			return VMCI_ERROR_DST_UNREACHABLE;
+
+		/*
+		 * We must be acting as a guest in order to send to
+		 * the hypervisor.
+		 */
+		if (!has_guest_device)
+			return VMCI_ERROR_DEVICE_NOT_FOUND;
+
+		/* And we cannot send if the source is the host context. */
+		if (VMCI_HOST_CONTEXT_ID == src->context)
+			return VMCI_ERROR_INVALID_ARGS;
+
+		/*
+		 * If the client passed the ANON source handle then
+		 * respect it (both context and resource are invalid).
+		 * However, if they passed only an invalid context,
+		 * then they probably mean ANY, in which case we
+		 * should set the real context here before passing it
+		 * down.
+		 */
+		if (VMCI_INVALID_ID == src->context &&
+		    VMCI_INVALID_ID != src->resource)
+			src->context = vmci_get_context_id();
+
+		/* Send from local client down to the hypervisor. */
+		*route = VMCI_ROUTE_AS_GUEST;
+		return VMCI_SUCCESS;
+	}
+
+	/* Anywhere to local client on host. */
+	if (VMCI_HOST_CONTEXT_ID == dst->context) {
+		/*
+		 * If it is not from a guest but we are acting as a
+		 * guest, then we need to send it down to the host.
+		 * Note that if we are also acting as a host then this
+		 * will prevent us from sending from local client to
+		 * local client, but we accept that restriction as a
+		 * way to remove any ambiguity from the host context.
+		 */
+		if (src->context == VMCI_HYPERVISOR_CONTEXT_ID) {
+			/*
+			 * If the hypervisor is the source, this is
+			 * host local communication. The hypervisor
+			 * may send vmci event datagrams to the host
+			 * itself, but it will never send datagrams to
+			 * an "outer host" through the guest device.
+			 */
+
+			if (has_host_device) {
+				*route = VMCI_ROUTE_AS_HOST;
+				return VMCI_SUCCESS;
+			} else {
+				return VMCI_ERROR_DEVICE_NOT_FOUND;
+			}
+		}
+
+		if (!from_guest && has_guest_device) {
+			/* If no source context then use the current. */
+			if (VMCI_INVALID_ID == src->context)
+				src->context = vmci_get_context_id();
+
+			/* Send it from local client down to the host. */
+			*route = VMCI_ROUTE_AS_GUEST;
+			return VMCI_SUCCESS;
+		}
+
+		/*
+		 * Otherwise we already received it from a guest and
+		 * it is destined for a local client on this host, or
+		 * it is from another local client on this host.  We
+		 * must be acting as a host to service it.
+		 */
+		if (!has_host_device)
+			return VMCI_ERROR_DEVICE_NOT_FOUND;
+
+		if (VMCI_INVALID_ID == src->context) {
+			/*
+			 * If it came from a guest then it must have a
+			 * valid context.  Otherwise we can use the
+			 * host context.
+			 */
+			if (from_guest)
+				return VMCI_ERROR_INVALID_ARGS;
+
+			src->context = VMCI_HOST_CONTEXT_ID;
+		}
+
+		/* Route to local client. */
+		*route = VMCI_ROUTE_AS_HOST;
+		return VMCI_SUCCESS;
+	}
+
+	/*
+	 * If we are acting as a host then this might be destined for
+	 * a guest.
+	 */
+	if (has_host_device) {
+		/* It will have a context if it is meant for a guest. */
+		if (vmci_ctx_exists(dst->context)) {
+			if (VMCI_INVALID_ID == src->context) {
+				/*
+				 * If it came from a guest then it
+				 * must have a valid context.
+				 * Otherwise we can use the host
+				 * context.
+				 */
+
+				if (from_guest)
+					return VMCI_ERROR_INVALID_ARGS;
+
+				src->context = VMCI_HOST_CONTEXT_ID;
+			} else if (VMCI_CONTEXT_IS_VM(src->context) &&
+				   src->context != dst->context) {
+				/*
+				 * VM to VM communication is not
+				 * allowed. Since we catch all
+				 * communication destined for the host
+				 * above, this must be destined for a
+				 * VM since there is a valid context.
+				 */
+
+				return VMCI_ERROR_DST_UNREACHABLE;
+			}
+
+			/* Pass it up to the guest. */
+			*route = VMCI_ROUTE_AS_HOST;
+			return VMCI_SUCCESS;
+		} else if (!has_guest_device) {
+			/*
+			 * The host is attempting to reach a CID
+			 * without an active context, and we can't
+			 * send it down, since we have no guest
+			 * device.
+			 */
+
+			return VMCI_ERROR_DST_UNREACHABLE;
+		}
+	}
+
+	/*
+	 * We must be a guest trying to send to another guest, which means
+	 * we need to send it down to the host. We do not filter out VM to
+	 * VM communication here, since we want to be able to use the guest
+	 * driver on older versions that do support VM to VM communication.
+	 */
+	if (!has_guest_device) {
+		/*
+		 * Ending up here means we have neither guest nor host
+		 * device.
+		 */
+		return VMCI_ERROR_DEVICE_NOT_FOUND;
+	}
+
+	/* If no source context then use the current context. */
+	if (VMCI_INVALID_ID == src->context)
+		src->context = vmci_get_context_id();
+
+	/*
+	 * Send it from local client down to the host, which will
+	 * route it to the other guest for us.
+	 */
+	*route = VMCI_ROUTE_AS_GUEST;
+	return VMCI_SUCCESS;
+}
diff --git a/drivers/misc/vmw_vmci/vmci_route.h b/drivers/misc/vmw_vmci/vmci_route.h
new file mode 100644
index 0000000000..040dbfb960
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_route.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ */
+
+#ifndef _VMCI_ROUTE_H_
+#define _VMCI_ROUTE_H_
+
+#include <linux/vmw_vmci_defs.h>
+
+enum vmci_route {
+	VMCI_ROUTE_NONE,
+	VMCI_ROUTE_AS_HOST,
+	VMCI_ROUTE_AS_GUEST,
+};
+
+int vmci_route(struct vmci_handle *src, const struct vmci_handle *dst,
+	       bool from_guest, enum vmci_route *route);
+
+#endif /* _VMCI_ROUTE_H_ */
diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c
new file mode 100644
index 0000000000..94a0ee19bf
--- /dev/null
+++ b/drivers/misc/xilinx_sdfec.c
@@ -0,0 +1,1455 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx SDFEC
+ *
+ * Copyright (C) 2019 Xilinx, Inc.
+ *
+ * Description:
+ * This driver is developed for SDFEC16 (Soft Decision FEC 16nm)
+ * IP. It exposes a char device which supports file operations
+ * like  open(), close() and ioctl().
+ */
+
+#include <linux/miscdevice.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/compat.h>
+#include <linux/highmem.h>
+
+#include <uapi/misc/xilinx_sdfec.h>
+
+#define DEV_NAME_LEN 12
+
+static DEFINE_IDA(dev_nrs);
+
+/* Xilinx SDFEC Register Map */
+/* CODE_WRI_PROTECT Register */
+#define XSDFEC_CODE_WR_PROTECT_ADDR (0x4)
+
+/* ACTIVE Register */
+#define XSDFEC_ACTIVE_ADDR (0x8)
+#define XSDFEC_IS_ACTIVITY_SET (0x1)
+
+/* AXIS_WIDTH Register */
+#define XSDFEC_AXIS_WIDTH_ADDR (0xC)
+#define XSDFEC_AXIS_DOUT_WORDS_LSB (5)
+#define XSDFEC_AXIS_DOUT_WIDTH_LSB (3)
+#define XSDFEC_AXIS_DIN_WORDS_LSB (2)
+#define XSDFEC_AXIS_DIN_WIDTH_LSB (0)
+
+/* AXIS_ENABLE Register */
+#define XSDFEC_AXIS_ENABLE_ADDR (0x10)
+#define XSDFEC_AXIS_OUT_ENABLE_MASK (0x38)
+#define XSDFEC_AXIS_IN_ENABLE_MASK (0x7)
+#define XSDFEC_AXIS_ENABLE_MASK                                                \
+	(XSDFEC_AXIS_OUT_ENABLE_MASK | XSDFEC_AXIS_IN_ENABLE_MASK)
+
+/* FEC_CODE Register */
+#define XSDFEC_FEC_CODE_ADDR (0x14)
+
+/* ORDER Register Map */
+#define XSDFEC_ORDER_ADDR (0x18)
+
+/* Interrupt Status Register */
+#define XSDFEC_ISR_ADDR (0x1C)
+/* Interrupt Status Register Bit Mask */
+#define XSDFEC_ISR_MASK (0x3F)
+
+/* Write Only - Interrupt Enable Register */
+#define XSDFEC_IER_ADDR (0x20)
+/* Write Only - Interrupt Disable Register */
+#define XSDFEC_IDR_ADDR (0x24)
+/* Read Only - Interrupt Mask Register */
+#define XSDFEC_IMR_ADDR (0x28)
+
+/* ECC Interrupt Status Register */
+#define XSDFEC_ECC_ISR_ADDR (0x2C)
+/* Single Bit Errors */
+#define XSDFEC_ECC_ISR_SBE_MASK (0x7FF)
+/* PL Initialize Single Bit Errors */
+#define XSDFEC_PL_INIT_ECC_ISR_SBE_MASK (0x3C00000)
+/* Multi Bit Errors */
+#define XSDFEC_ECC_ISR_MBE_MASK (0x3FF800)
+/* PL Initialize Multi Bit Errors */
+#define XSDFEC_PL_INIT_ECC_ISR_MBE_MASK (0x3C000000)
+/* Multi Bit Error to Event Shift */
+#define XSDFEC_ECC_ISR_MBE_TO_EVENT_SHIFT (11)
+/* PL Initialize Multi Bit Error to Event Shift */
+#define XSDFEC_PL_INIT_ECC_ISR_MBE_TO_EVENT_SHIFT (4)
+/* ECC Interrupt Status Bit Mask */
+#define XSDFEC_ECC_ISR_MASK (XSDFEC_ECC_ISR_SBE_MASK | XSDFEC_ECC_ISR_MBE_MASK)
+/* ECC Interrupt Status PL Initialize Bit Mask */
+#define XSDFEC_PL_INIT_ECC_ISR_MASK                                            \
+	(XSDFEC_PL_INIT_ECC_ISR_SBE_MASK | XSDFEC_PL_INIT_ECC_ISR_MBE_MASK)
+/* ECC Interrupt Status All Bit Mask */
+#define XSDFEC_ALL_ECC_ISR_MASK                                                \
+	(XSDFEC_ECC_ISR_MASK | XSDFEC_PL_INIT_ECC_ISR_MASK)
+/* ECC Interrupt Status Single Bit Errors Mask */
+#define XSDFEC_ALL_ECC_ISR_SBE_MASK                                            \
+	(XSDFEC_ECC_ISR_SBE_MASK | XSDFEC_PL_INIT_ECC_ISR_SBE_MASK)
+/* ECC Interrupt Status Multi Bit Errors Mask */
+#define XSDFEC_ALL_ECC_ISR_MBE_MASK                                            \
+	(XSDFEC_ECC_ISR_MBE_MASK | XSDFEC_PL_INIT_ECC_ISR_MBE_MASK)
+
+/* Write Only - ECC Interrupt Enable Register */
+#define XSDFEC_ECC_IER_ADDR (0x30)
+/* Write Only - ECC Interrupt Disable Register */
+#define XSDFEC_ECC_IDR_ADDR (0x34)
+/* Read Only - ECC Interrupt Mask Register */
+#define XSDFEC_ECC_IMR_ADDR (0x38)
+
+/* BYPASS Register */
+#define XSDFEC_BYPASS_ADDR (0x3C)
+
+/* Turbo Code Register */
+#define XSDFEC_TURBO_ADDR (0x100)
+#define XSDFEC_TURBO_SCALE_MASK (0xFFF)
+#define XSDFEC_TURBO_SCALE_BIT_POS (8)
+#define XSDFEC_TURBO_SCALE_MAX (15)
+
+/* REG0 Register */
+#define XSDFEC_LDPC_CODE_REG0_ADDR_BASE (0x2000)
+#define XSDFEC_LDPC_CODE_REG0_ADDR_HIGH (0x27F0)
+#define XSDFEC_REG0_N_MIN (4)
+#define XSDFEC_REG0_N_MAX (32768)
+#define XSDFEC_REG0_N_MUL_P (256)
+#define XSDFEC_REG0_N_LSB (0)
+#define XSDFEC_REG0_K_MIN (2)
+#define XSDFEC_REG0_K_MAX (32766)
+#define XSDFEC_REG0_K_MUL_P (256)
+#define XSDFEC_REG0_K_LSB (16)
+
+/* REG1 Register */
+#define XSDFEC_LDPC_CODE_REG1_ADDR_BASE (0x2004)
+#define XSDFEC_LDPC_CODE_REG1_ADDR_HIGH (0x27f4)
+#define XSDFEC_REG1_PSIZE_MIN (2)
+#define XSDFEC_REG1_PSIZE_MAX (512)
+#define XSDFEC_REG1_NO_PACKING_MASK (0x400)
+#define XSDFEC_REG1_NO_PACKING_LSB (10)
+#define XSDFEC_REG1_NM_MASK (0xFF800)
+#define XSDFEC_REG1_NM_LSB (11)
+#define XSDFEC_REG1_BYPASS_MASK (0x100000)
+
+/* REG2 Register */
+#define XSDFEC_LDPC_CODE_REG2_ADDR_BASE (0x2008)
+#define XSDFEC_LDPC_CODE_REG2_ADDR_HIGH (0x27f8)
+#define XSDFEC_REG2_NLAYERS_MIN (1)
+#define XSDFEC_REG2_NLAYERS_MAX (256)
+#define XSDFEC_REG2_NNMQC_MASK (0xFFE00)
+#define XSDFEC_REG2_NMQC_LSB (9)
+#define XSDFEC_REG2_NORM_TYPE_MASK (0x100000)
+#define XSDFEC_REG2_NORM_TYPE_LSB (20)
+#define XSDFEC_REG2_SPECIAL_QC_MASK (0x200000)
+#define XSDFEC_REG2_SPEICAL_QC_LSB (21)
+#define XSDFEC_REG2_NO_FINAL_PARITY_MASK (0x400000)
+#define XSDFEC_REG2_NO_FINAL_PARITY_LSB (22)
+#define XSDFEC_REG2_MAX_SCHEDULE_MASK (0x1800000)
+#define XSDFEC_REG2_MAX_SCHEDULE_LSB (23)
+
+/* REG3 Register */
+#define XSDFEC_LDPC_CODE_REG3_ADDR_BASE (0x200C)
+#define XSDFEC_LDPC_CODE_REG3_ADDR_HIGH (0x27FC)
+#define XSDFEC_REG3_LA_OFF_LSB (8)
+#define XSDFEC_REG3_QC_OFF_LSB (16)
+
+#define XSDFEC_LDPC_REG_JUMP (0x10)
+#define XSDFEC_REG_WIDTH_JUMP (4)
+
+/* The maximum number of pinned pages */
+#define MAX_NUM_PAGES ((XSDFEC_QC_TABLE_DEPTH / PAGE_SIZE) + 1)
+
+/**
+ * struct xsdfec_clks - For managing SD-FEC clocks
+ * @core_clk: Main processing clock for core
+ * @axi_clk: AXI4-Lite memory-mapped clock
+ * @din_words_clk: DIN Words AXI4-Stream Slave clock
+ * @din_clk: DIN AXI4-Stream Slave clock
+ * @dout_clk: DOUT Words AXI4-Stream Slave clock
+ * @dout_words_clk: DOUT AXI4-Stream Slave clock
+ * @ctrl_clk: Control AXI4-Stream Slave clock
+ * @status_clk: Status AXI4-Stream Slave clock
+ */
+struct xsdfec_clks {
+	struct clk *core_clk;
+	struct clk *axi_clk;
+	struct clk *din_words_clk;
+	struct clk *din_clk;
+	struct clk *dout_clk;
+	struct clk *dout_words_clk;
+	struct clk *ctrl_clk;
+	struct clk *status_clk;
+};
+
+/**
+ * struct xsdfec_dev - Driver data for SDFEC
+ * @miscdev: Misc device handle
+ * @clks: Clocks managed by the SDFEC driver
+ * @waitq: Driver wait queue
+ * @config: Configuration of the SDFEC device
+ * @dev_name: Device name
+ * @flags: spinlock flags
+ * @regs: device physical base address
+ * @dev: pointer to device struct
+ * @state: State of the SDFEC device
+ * @error_data_lock: Error counter and states spinlock
+ * @dev_id: Device ID
+ * @isr_err_count: Count of ISR errors
+ * @cecc_count: Count of Correctable ECC errors (SBE)
+ * @uecc_count: Count of Uncorrectable ECC errors (MBE)
+ * @irq: IRQ number
+ * @state_updated: indicates State updated by interrupt handler
+ * @stats_updated: indicates Stats updated by interrupt handler
+ * @intr_enabled: indicates IRQ enabled
+ *
+ * This structure contains necessary state for SDFEC driver to operate
+ */
+struct xsdfec_dev {
+	struct miscdevice miscdev;
+	struct xsdfec_clks clks;
+	wait_queue_head_t waitq;
+	struct xsdfec_config config;
+	char dev_name[DEV_NAME_LEN];
+	unsigned long flags;
+	void __iomem *regs;
+	struct device *dev;
+	enum xsdfec_state state;
+	/* Spinlock to protect state_updated and stats_updated */
+	spinlock_t error_data_lock;
+	int dev_id;
+	u32 isr_err_count;
+	u32 cecc_count;
+	u32 uecc_count;
+	int irq;
+	bool state_updated;
+	bool stats_updated;
+	bool intr_enabled;
+};
+
+static inline void xsdfec_regwrite(struct xsdfec_dev *xsdfec, u32 addr,
+				   u32 value)
+{
+	dev_dbg(xsdfec->dev, "Writing 0x%x to offset 0x%x", value, addr);
+	iowrite32(value, xsdfec->regs + addr);
+}
+
+static inline u32 xsdfec_regread(struct xsdfec_dev *xsdfec, u32 addr)
+{
+	u32 rval;
+
+	rval = ioread32(xsdfec->regs + addr);
+	dev_dbg(xsdfec->dev, "Read value = 0x%x from offset 0x%x", rval, addr);
+	return rval;
+}
+
+static void update_bool_config_from_reg(struct xsdfec_dev *xsdfec,
+					u32 reg_offset, u32 bit_num,
+					char *config_value)
+{
+	u32 reg_val;
+	u32 bit_mask = 1 << bit_num;
+
+	reg_val = xsdfec_regread(xsdfec, reg_offset);
+	*config_value = (reg_val & bit_mask) > 0;
+}
+
+static void update_config_from_hw(struct xsdfec_dev *xsdfec)
+{
+	u32 reg_value;
+	bool sdfec_started;
+
+	/* Update the Order */
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_ORDER_ADDR);
+	xsdfec->config.order = reg_value;
+
+	update_bool_config_from_reg(xsdfec, XSDFEC_BYPASS_ADDR,
+				    0, /* Bit Number, maybe change to mask */
+				    &xsdfec->config.bypass);
+
+	update_bool_config_from_reg(xsdfec, XSDFEC_CODE_WR_PROTECT_ADDR,
+				    0, /* Bit Number */
+				    &xsdfec->config.code_wr_protect);
+
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_IMR_ADDR);
+	xsdfec->config.irq.enable_isr = (reg_value & XSDFEC_ISR_MASK) > 0;
+
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_ECC_IMR_ADDR);
+	xsdfec->config.irq.enable_ecc_isr =
+		(reg_value & XSDFEC_ECC_ISR_MASK) > 0;
+
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_AXIS_ENABLE_ADDR);
+	sdfec_started = (reg_value & XSDFEC_AXIS_IN_ENABLE_MASK) > 0;
+	if (sdfec_started)
+		xsdfec->state = XSDFEC_STARTED;
+	else
+		xsdfec->state = XSDFEC_STOPPED;
+}
+
+static int xsdfec_get_status(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	struct xsdfec_status status;
+	int err;
+
+	memset(&status, 0, sizeof(status));
+	spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags);
+	status.state = xsdfec->state;
+	xsdfec->state_updated = false;
+	spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags);
+	status.activity = (xsdfec_regread(xsdfec, XSDFEC_ACTIVE_ADDR) &
+			   XSDFEC_IS_ACTIVITY_SET);
+
+	err = copy_to_user(arg, &status, sizeof(status));
+	if (err)
+		err = -EFAULT;
+
+	return err;
+}
+
+static int xsdfec_get_config(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	int err;
+
+	err = copy_to_user(arg, &xsdfec->config, sizeof(xsdfec->config));
+	if (err)
+		err = -EFAULT;
+
+	return err;
+}
+
+static int xsdfec_isr_enable(struct xsdfec_dev *xsdfec, bool enable)
+{
+	u32 mask_read;
+
+	if (enable) {
+		/* Enable */
+		xsdfec_regwrite(xsdfec, XSDFEC_IER_ADDR, XSDFEC_ISR_MASK);
+		mask_read = xsdfec_regread(xsdfec, XSDFEC_IMR_ADDR);
+		if (mask_read & XSDFEC_ISR_MASK) {
+			dev_dbg(xsdfec->dev,
+				"SDFEC enabling irq with IER failed");
+			return -EIO;
+		}
+	} else {
+		/* Disable */
+		xsdfec_regwrite(xsdfec, XSDFEC_IDR_ADDR, XSDFEC_ISR_MASK);
+		mask_read = xsdfec_regread(xsdfec, XSDFEC_IMR_ADDR);
+		if ((mask_read & XSDFEC_ISR_MASK) != XSDFEC_ISR_MASK) {
+			dev_dbg(xsdfec->dev,
+				"SDFEC disabling irq with IDR failed");
+			return -EIO;
+		}
+	}
+	return 0;
+}
+
+static int xsdfec_ecc_isr_enable(struct xsdfec_dev *xsdfec, bool enable)
+{
+	u32 mask_read;
+
+	if (enable) {
+		/* Enable */
+		xsdfec_regwrite(xsdfec, XSDFEC_ECC_IER_ADDR,
+				XSDFEC_ALL_ECC_ISR_MASK);
+		mask_read = xsdfec_regread(xsdfec, XSDFEC_ECC_IMR_ADDR);
+		if (mask_read & XSDFEC_ALL_ECC_ISR_MASK) {
+			dev_dbg(xsdfec->dev,
+				"SDFEC enabling ECC irq with ECC IER failed");
+			return -EIO;
+		}
+	} else {
+		/* Disable */
+		xsdfec_regwrite(xsdfec, XSDFEC_ECC_IDR_ADDR,
+				XSDFEC_ALL_ECC_ISR_MASK);
+		mask_read = xsdfec_regread(xsdfec, XSDFEC_ECC_IMR_ADDR);
+		if (!(((mask_read & XSDFEC_ALL_ECC_ISR_MASK) ==
+		       XSDFEC_ECC_ISR_MASK) ||
+		      ((mask_read & XSDFEC_ALL_ECC_ISR_MASK) ==
+		       XSDFEC_PL_INIT_ECC_ISR_MASK))) {
+			dev_dbg(xsdfec->dev,
+				"SDFEC disable ECC irq with ECC IDR failed");
+			return -EIO;
+		}
+	}
+	return 0;
+}
+
+static int xsdfec_set_irq(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	struct xsdfec_irq irq;
+	int err;
+	int isr_err;
+	int ecc_err;
+
+	err = copy_from_user(&irq, arg, sizeof(irq));
+	if (err)
+		return -EFAULT;
+
+	/* Setup tlast related IRQ */
+	isr_err = xsdfec_isr_enable(xsdfec, irq.enable_isr);
+	if (!isr_err)
+		xsdfec->config.irq.enable_isr = irq.enable_isr;
+
+	/* Setup ECC related IRQ */
+	ecc_err = xsdfec_ecc_isr_enable(xsdfec, irq.enable_ecc_isr);
+	if (!ecc_err)
+		xsdfec->config.irq.enable_ecc_isr = irq.enable_ecc_isr;
+
+	if (isr_err < 0 || ecc_err < 0)
+		err = -EIO;
+
+	return err;
+}
+
+static int xsdfec_set_turbo(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	struct xsdfec_turbo turbo;
+	int err;
+	u32 turbo_write;
+
+	err = copy_from_user(&turbo, arg, sizeof(turbo));
+	if (err)
+		return -EFAULT;
+
+	if (turbo.alg >= XSDFEC_TURBO_ALG_MAX)
+		return -EINVAL;
+
+	if (turbo.scale > XSDFEC_TURBO_SCALE_MAX)
+		return -EINVAL;
+
+	/* Check to see what device tree says about the FEC codes */
+	if (xsdfec->config.code == XSDFEC_LDPC_CODE)
+		return -EIO;
+
+	turbo_write = ((turbo.scale & XSDFEC_TURBO_SCALE_MASK)
+		       << XSDFEC_TURBO_SCALE_BIT_POS) |
+		      turbo.alg;
+	xsdfec_regwrite(xsdfec, XSDFEC_TURBO_ADDR, turbo_write);
+	return err;
+}
+
+static int xsdfec_get_turbo(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	u32 reg_value;
+	struct xsdfec_turbo turbo_params;
+	int err;
+
+	if (xsdfec->config.code == XSDFEC_LDPC_CODE)
+		return -EIO;
+
+	memset(&turbo_params, 0, sizeof(turbo_params));
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_TURBO_ADDR);
+
+	turbo_params.scale = (reg_value & XSDFEC_TURBO_SCALE_MASK) >>
+			     XSDFEC_TURBO_SCALE_BIT_POS;
+	turbo_params.alg = reg_value & 0x1;
+
+	err = copy_to_user(arg, &turbo_params, sizeof(turbo_params));
+	if (err)
+		err = -EFAULT;
+
+	return err;
+}
+
+static int xsdfec_reg0_write(struct xsdfec_dev *xsdfec, u32 n, u32 k, u32 psize,
+			     u32 offset)
+{
+	u32 wdata;
+
+	if (n < XSDFEC_REG0_N_MIN || n > XSDFEC_REG0_N_MAX || psize == 0 ||
+	    (n > XSDFEC_REG0_N_MUL_P * psize) || n <= k || ((n % psize) != 0)) {
+		dev_dbg(xsdfec->dev, "N value is not in range");
+		return -EINVAL;
+	}
+	n <<= XSDFEC_REG0_N_LSB;
+
+	if (k < XSDFEC_REG0_K_MIN || k > XSDFEC_REG0_K_MAX ||
+	    (k > XSDFEC_REG0_K_MUL_P * psize) || ((k % psize) != 0)) {
+		dev_dbg(xsdfec->dev, "K value is not in range");
+		return -EINVAL;
+	}
+	k = k << XSDFEC_REG0_K_LSB;
+	wdata = k | n;
+
+	if (XSDFEC_LDPC_CODE_REG0_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) >
+	    XSDFEC_LDPC_CODE_REG0_ADDR_HIGH) {
+		dev_dbg(xsdfec->dev, "Writing outside of LDPC reg0 space 0x%x",
+			XSDFEC_LDPC_CODE_REG0_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP));
+		return -EINVAL;
+	}
+	xsdfec_regwrite(xsdfec,
+			XSDFEC_LDPC_CODE_REG0_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP),
+			wdata);
+	return 0;
+}
+
+static int xsdfec_reg1_write(struct xsdfec_dev *xsdfec, u32 psize,
+			     u32 no_packing, u32 nm, u32 offset)
+{
+	u32 wdata;
+
+	if (psize < XSDFEC_REG1_PSIZE_MIN || psize > XSDFEC_REG1_PSIZE_MAX) {
+		dev_dbg(xsdfec->dev, "Psize is not in range");
+		return -EINVAL;
+	}
+
+	if (no_packing != 0 && no_packing != 1)
+		dev_dbg(xsdfec->dev, "No-packing bit register invalid");
+	no_packing = ((no_packing << XSDFEC_REG1_NO_PACKING_LSB) &
+		      XSDFEC_REG1_NO_PACKING_MASK);
+
+	if (nm & ~(XSDFEC_REG1_NM_MASK >> XSDFEC_REG1_NM_LSB))
+		dev_dbg(xsdfec->dev, "NM is beyond 10 bits");
+	nm = (nm << XSDFEC_REG1_NM_LSB) & XSDFEC_REG1_NM_MASK;
+
+	wdata = nm | no_packing | psize;
+	if (XSDFEC_LDPC_CODE_REG1_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) >
+	    XSDFEC_LDPC_CODE_REG1_ADDR_HIGH) {
+		dev_dbg(xsdfec->dev, "Writing outside of LDPC reg1 space 0x%x",
+			XSDFEC_LDPC_CODE_REG1_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP));
+		return -EINVAL;
+	}
+	xsdfec_regwrite(xsdfec,
+			XSDFEC_LDPC_CODE_REG1_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP),
+			wdata);
+	return 0;
+}
+
+static int xsdfec_reg2_write(struct xsdfec_dev *xsdfec, u32 nlayers, u32 nmqc,
+			     u32 norm_type, u32 special_qc, u32 no_final_parity,
+			     u32 max_schedule, u32 offset)
+{
+	u32 wdata;
+
+	if (nlayers < XSDFEC_REG2_NLAYERS_MIN ||
+	    nlayers > XSDFEC_REG2_NLAYERS_MAX) {
+		dev_dbg(xsdfec->dev, "Nlayers is not in range");
+		return -EINVAL;
+	}
+
+	if (nmqc & ~(XSDFEC_REG2_NNMQC_MASK >> XSDFEC_REG2_NMQC_LSB))
+		dev_dbg(xsdfec->dev, "NMQC exceeds 11 bits");
+	nmqc = (nmqc << XSDFEC_REG2_NMQC_LSB) & XSDFEC_REG2_NNMQC_MASK;
+
+	if (norm_type > 1)
+		dev_dbg(xsdfec->dev, "Norm type is invalid");
+	norm_type = ((norm_type << XSDFEC_REG2_NORM_TYPE_LSB) &
+		     XSDFEC_REG2_NORM_TYPE_MASK);
+	if (special_qc > 1)
+		dev_dbg(xsdfec->dev, "Special QC in invalid");
+	special_qc = ((special_qc << XSDFEC_REG2_SPEICAL_QC_LSB) &
+		      XSDFEC_REG2_SPECIAL_QC_MASK);
+
+	if (no_final_parity > 1)
+		dev_dbg(xsdfec->dev, "No final parity check invalid");
+	no_final_parity =
+		((no_final_parity << XSDFEC_REG2_NO_FINAL_PARITY_LSB) &
+		 XSDFEC_REG2_NO_FINAL_PARITY_MASK);
+	if (max_schedule &
+	    ~(XSDFEC_REG2_MAX_SCHEDULE_MASK >> XSDFEC_REG2_MAX_SCHEDULE_LSB))
+		dev_dbg(xsdfec->dev, "Max Schedule exceeds 2 bits");
+	max_schedule = ((max_schedule << XSDFEC_REG2_MAX_SCHEDULE_LSB) &
+			XSDFEC_REG2_MAX_SCHEDULE_MASK);
+
+	wdata = (max_schedule | no_final_parity | special_qc | norm_type |
+		 nmqc | nlayers);
+
+	if (XSDFEC_LDPC_CODE_REG2_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) >
+	    XSDFEC_LDPC_CODE_REG2_ADDR_HIGH) {
+		dev_dbg(xsdfec->dev, "Writing outside of LDPC reg2 space 0x%x",
+			XSDFEC_LDPC_CODE_REG2_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP));
+		return -EINVAL;
+	}
+	xsdfec_regwrite(xsdfec,
+			XSDFEC_LDPC_CODE_REG2_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP),
+			wdata);
+	return 0;
+}
+
+static int xsdfec_reg3_write(struct xsdfec_dev *xsdfec, u8 sc_off, u8 la_off,
+			     u16 qc_off, u32 offset)
+{
+	u32 wdata;
+
+	wdata = ((qc_off << XSDFEC_REG3_QC_OFF_LSB) |
+		 (la_off << XSDFEC_REG3_LA_OFF_LSB) | sc_off);
+	if (XSDFEC_LDPC_CODE_REG3_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) >
+	    XSDFEC_LDPC_CODE_REG3_ADDR_HIGH) {
+		dev_dbg(xsdfec->dev, "Writing outside of LDPC reg3 space 0x%x",
+			XSDFEC_LDPC_CODE_REG3_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP));
+		return -EINVAL;
+	}
+	xsdfec_regwrite(xsdfec,
+			XSDFEC_LDPC_CODE_REG3_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP),
+			wdata);
+	return 0;
+}
+
+static int xsdfec_table_write(struct xsdfec_dev *xsdfec, u32 offset,
+			      u32 *src_ptr, u32 len, const u32 base_addr,
+			      const u32 depth)
+{
+	u32 reg = 0;
+	int res, i, nr_pages;
+	u32 n;
+	u32 *addr = NULL;
+	struct page *pages[MAX_NUM_PAGES];
+
+	/*
+	 * Writes that go beyond the length of
+	 * Shared Scale(SC) table should fail
+	 */
+	if (offset > depth / XSDFEC_REG_WIDTH_JUMP ||
+	    len > depth / XSDFEC_REG_WIDTH_JUMP ||
+	    offset + len > depth / XSDFEC_REG_WIDTH_JUMP) {
+		dev_dbg(xsdfec->dev, "Write exceeds SC table length");
+		return -EINVAL;
+	}
+
+	n = (len * XSDFEC_REG_WIDTH_JUMP) / PAGE_SIZE;
+	if ((len * XSDFEC_REG_WIDTH_JUMP) % PAGE_SIZE)
+		n += 1;
+
+	if (WARN_ON_ONCE(n > INT_MAX))
+		return -EINVAL;
+
+	nr_pages = n;
+
+	res = pin_user_pages_fast((unsigned long)src_ptr, nr_pages, 0, pages);
+	if (res < nr_pages) {
+		if (res > 0)
+			unpin_user_pages(pages, res);
+
+		return -EINVAL;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		addr = kmap_local_page(pages[i]);
+		do {
+			xsdfec_regwrite(xsdfec,
+					base_addr + ((offset + reg) *
+						     XSDFEC_REG_WIDTH_JUMP),
+					addr[reg]);
+			reg++;
+		} while ((reg < len) &&
+			 ((reg * XSDFEC_REG_WIDTH_JUMP) % PAGE_SIZE));
+		kunmap_local(addr);
+		unpin_user_page(pages[i]);
+	}
+	return 0;
+}
+
+static int xsdfec_add_ldpc(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	struct xsdfec_ldpc_params *ldpc;
+	int ret, n;
+
+	ldpc = memdup_user(arg, sizeof(*ldpc));
+	if (IS_ERR(ldpc))
+		return PTR_ERR(ldpc);
+
+	if (xsdfec->config.code == XSDFEC_TURBO_CODE) {
+		ret = -EIO;
+		goto err_out;
+	}
+
+	/* Verify Device has not started */
+	if (xsdfec->state == XSDFEC_STARTED) {
+		ret = -EIO;
+		goto err_out;
+	}
+
+	if (xsdfec->config.code_wr_protect) {
+		ret = -EIO;
+		goto err_out;
+	}
+
+	/* Write Reg 0 */
+	ret = xsdfec_reg0_write(xsdfec, ldpc->n, ldpc->k, ldpc->psize,
+				ldpc->code_id);
+	if (ret)
+		goto err_out;
+
+	/* Write Reg 1 */
+	ret = xsdfec_reg1_write(xsdfec, ldpc->psize, ldpc->no_packing, ldpc->nm,
+				ldpc->code_id);
+	if (ret)
+		goto err_out;
+
+	/* Write Reg 2 */
+	ret = xsdfec_reg2_write(xsdfec, ldpc->nlayers, ldpc->nmqc,
+				ldpc->norm_type, ldpc->special_qc,
+				ldpc->no_final_parity, ldpc->max_schedule,
+				ldpc->code_id);
+	if (ret)
+		goto err_out;
+
+	/* Write Reg 3 */
+	ret = xsdfec_reg3_write(xsdfec, ldpc->sc_off, ldpc->la_off,
+				ldpc->qc_off, ldpc->code_id);
+	if (ret)
+		goto err_out;
+
+	/* Write Shared Codes */
+	n = ldpc->nlayers / 4;
+	if (ldpc->nlayers % 4)
+		n++;
+
+	ret = xsdfec_table_write(xsdfec, ldpc->sc_off, ldpc->sc_table, n,
+				 XSDFEC_LDPC_SC_TABLE_ADDR_BASE,
+				 XSDFEC_SC_TABLE_DEPTH);
+	if (ret < 0)
+		goto err_out;
+
+	ret = xsdfec_table_write(xsdfec, 4 * ldpc->la_off, ldpc->la_table,
+				 ldpc->nlayers, XSDFEC_LDPC_LA_TABLE_ADDR_BASE,
+				 XSDFEC_LA_TABLE_DEPTH);
+	if (ret < 0)
+		goto err_out;
+
+	ret = xsdfec_table_write(xsdfec, 4 * ldpc->qc_off, ldpc->qc_table,
+				 ldpc->nqc, XSDFEC_LDPC_QC_TABLE_ADDR_BASE,
+				 XSDFEC_QC_TABLE_DEPTH);
+err_out:
+	kfree(ldpc);
+	return ret;
+}
+
+static int xsdfec_set_order(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	bool order_invalid;
+	enum xsdfec_order order;
+	int err;
+
+	err = get_user(order, (enum xsdfec_order __user *)arg);
+	if (err)
+		return -EFAULT;
+
+	order_invalid = (order != XSDFEC_MAINTAIN_ORDER) &&
+			(order != XSDFEC_OUT_OF_ORDER);
+	if (order_invalid)
+		return -EINVAL;
+
+	/* Verify Device has not started */
+	if (xsdfec->state == XSDFEC_STARTED)
+		return -EIO;
+
+	xsdfec_regwrite(xsdfec, XSDFEC_ORDER_ADDR, order);
+
+	xsdfec->config.order = order;
+
+	return 0;
+}
+
+static int xsdfec_set_bypass(struct xsdfec_dev *xsdfec, bool __user *arg)
+{
+	bool bypass;
+	int err;
+
+	err = get_user(bypass, arg);
+	if (err)
+		return -EFAULT;
+
+	/* Verify Device has not started */
+	if (xsdfec->state == XSDFEC_STARTED)
+		return -EIO;
+
+	if (bypass)
+		xsdfec_regwrite(xsdfec, XSDFEC_BYPASS_ADDR, 1);
+	else
+		xsdfec_regwrite(xsdfec, XSDFEC_BYPASS_ADDR, 0);
+
+	xsdfec->config.bypass = bypass;
+
+	return 0;
+}
+
+static int xsdfec_is_active(struct xsdfec_dev *xsdfec, bool __user *arg)
+{
+	u32 reg_value;
+	bool is_active;
+	int err;
+
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_ACTIVE_ADDR);
+	/* using a double ! operator instead of casting */
+	is_active = !!(reg_value & XSDFEC_IS_ACTIVITY_SET);
+	err = put_user(is_active, arg);
+	if (err)
+		return -EFAULT;
+
+	return err;
+}
+
+static u32
+xsdfec_translate_axis_width_cfg_val(enum xsdfec_axis_width axis_width_cfg)
+{
+	u32 axis_width_field = 0;
+
+	switch (axis_width_cfg) {
+	case XSDFEC_1x128b:
+		axis_width_field = 0;
+		break;
+	case XSDFEC_2x128b:
+		axis_width_field = 1;
+		break;
+	case XSDFEC_4x128b:
+		axis_width_field = 2;
+		break;
+	}
+
+	return axis_width_field;
+}
+
+static u32 xsdfec_translate_axis_words_cfg_val(enum xsdfec_axis_word_include
+	axis_word_inc_cfg)
+{
+	u32 axis_words_field = 0;
+
+	if (axis_word_inc_cfg == XSDFEC_FIXED_VALUE ||
+	    axis_word_inc_cfg == XSDFEC_IN_BLOCK)
+		axis_words_field = 0;
+	else if (axis_word_inc_cfg == XSDFEC_PER_AXI_TRANSACTION)
+		axis_words_field = 1;
+
+	return axis_words_field;
+}
+
+static int xsdfec_cfg_axi_streams(struct xsdfec_dev *xsdfec)
+{
+	u32 reg_value;
+	u32 dout_words_field;
+	u32 dout_width_field;
+	u32 din_words_field;
+	u32 din_width_field;
+	struct xsdfec_config *config = &xsdfec->config;
+
+	/* translate config info to register values */
+	dout_words_field =
+		xsdfec_translate_axis_words_cfg_val(config->dout_word_include);
+	dout_width_field =
+		xsdfec_translate_axis_width_cfg_val(config->dout_width);
+	din_words_field =
+		xsdfec_translate_axis_words_cfg_val(config->din_word_include);
+	din_width_field =
+		xsdfec_translate_axis_width_cfg_val(config->din_width);
+
+	reg_value = dout_words_field << XSDFEC_AXIS_DOUT_WORDS_LSB;
+	reg_value |= dout_width_field << XSDFEC_AXIS_DOUT_WIDTH_LSB;
+	reg_value |= din_words_field << XSDFEC_AXIS_DIN_WORDS_LSB;
+	reg_value |= din_width_field << XSDFEC_AXIS_DIN_WIDTH_LSB;
+
+	xsdfec_regwrite(xsdfec, XSDFEC_AXIS_WIDTH_ADDR, reg_value);
+
+	return 0;
+}
+
+static int xsdfec_start(struct xsdfec_dev *xsdfec)
+{
+	u32 regread;
+
+	regread = xsdfec_regread(xsdfec, XSDFEC_FEC_CODE_ADDR);
+	regread &= 0x1;
+	if (regread != xsdfec->config.code) {
+		dev_dbg(xsdfec->dev,
+			"%s SDFEC HW code does not match driver code, reg %d, code %d",
+			__func__, regread, xsdfec->config.code);
+		return -EINVAL;
+	}
+
+	/* Set AXIS enable */
+	xsdfec_regwrite(xsdfec, XSDFEC_AXIS_ENABLE_ADDR,
+			XSDFEC_AXIS_ENABLE_MASK);
+	/* Done */
+	xsdfec->state = XSDFEC_STARTED;
+	return 0;
+}
+
+static int xsdfec_stop(struct xsdfec_dev *xsdfec)
+{
+	u32 regread;
+
+	if (xsdfec->state != XSDFEC_STARTED)
+		dev_dbg(xsdfec->dev, "Device not started correctly");
+	/* Disable AXIS_ENABLE Input interfaces only */
+	regread = xsdfec_regread(xsdfec, XSDFEC_AXIS_ENABLE_ADDR);
+	regread &= (~XSDFEC_AXIS_IN_ENABLE_MASK);
+	xsdfec_regwrite(xsdfec, XSDFEC_AXIS_ENABLE_ADDR, regread);
+	/* Stop */
+	xsdfec->state = XSDFEC_STOPPED;
+	return 0;
+}
+
+static int xsdfec_clear_stats(struct xsdfec_dev *xsdfec)
+{
+	spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags);
+	xsdfec->isr_err_count = 0;
+	xsdfec->uecc_count = 0;
+	xsdfec->cecc_count = 0;
+	spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags);
+
+	return 0;
+}
+
+static int xsdfec_get_stats(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	int err;
+	struct xsdfec_stats user_stats;
+
+	spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags);
+	user_stats.isr_err_count = xsdfec->isr_err_count;
+	user_stats.cecc_count = xsdfec->cecc_count;
+	user_stats.uecc_count = xsdfec->uecc_count;
+	xsdfec->stats_updated = false;
+	spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags);
+
+	err = copy_to_user(arg, &user_stats, sizeof(user_stats));
+	if (err)
+		err = -EFAULT;
+
+	return err;
+}
+
+static int xsdfec_set_default_config(struct xsdfec_dev *xsdfec)
+{
+	/* Ensure registers are aligned with core configuration */
+	xsdfec_regwrite(xsdfec, XSDFEC_FEC_CODE_ADDR, xsdfec->config.code);
+	xsdfec_cfg_axi_streams(xsdfec);
+	update_config_from_hw(xsdfec);
+
+	return 0;
+}
+
+static long xsdfec_dev_ioctl(struct file *fptr, unsigned int cmd,
+			     unsigned long data)
+{
+	struct xsdfec_dev *xsdfec;
+	void __user *arg = (void __user *)data;
+	int rval;
+
+	xsdfec = container_of(fptr->private_data, struct xsdfec_dev, miscdev);
+
+	/* In failed state allow only reset and get status IOCTLs */
+	if (xsdfec->state == XSDFEC_NEEDS_RESET &&
+	    (cmd != XSDFEC_SET_DEFAULT_CONFIG && cmd != XSDFEC_GET_STATUS &&
+	     cmd != XSDFEC_GET_STATS && cmd != XSDFEC_CLEAR_STATS)) {
+		return -EPERM;
+	}
+
+	switch (cmd) {
+	case XSDFEC_START_DEV:
+		rval = xsdfec_start(xsdfec);
+		break;
+	case XSDFEC_STOP_DEV:
+		rval = xsdfec_stop(xsdfec);
+		break;
+	case XSDFEC_CLEAR_STATS:
+		rval = xsdfec_clear_stats(xsdfec);
+		break;
+	case XSDFEC_GET_STATS:
+		rval = xsdfec_get_stats(xsdfec, arg);
+		break;
+	case XSDFEC_GET_STATUS:
+		rval = xsdfec_get_status(xsdfec, arg);
+		break;
+	case XSDFEC_GET_CONFIG:
+		rval = xsdfec_get_config(xsdfec, arg);
+		break;
+	case XSDFEC_SET_DEFAULT_CONFIG:
+		rval = xsdfec_set_default_config(xsdfec);
+		break;
+	case XSDFEC_SET_IRQ:
+		rval = xsdfec_set_irq(xsdfec, arg);
+		break;
+	case XSDFEC_SET_TURBO:
+		rval = xsdfec_set_turbo(xsdfec, arg);
+		break;
+	case XSDFEC_GET_TURBO:
+		rval = xsdfec_get_turbo(xsdfec, arg);
+		break;
+	case XSDFEC_ADD_LDPC_CODE_PARAMS:
+		rval = xsdfec_add_ldpc(xsdfec, arg);
+		break;
+	case XSDFEC_SET_ORDER:
+		rval = xsdfec_set_order(xsdfec, arg);
+		break;
+	case XSDFEC_SET_BYPASS:
+		rval = xsdfec_set_bypass(xsdfec, arg);
+		break;
+	case XSDFEC_IS_ACTIVE:
+		rval = xsdfec_is_active(xsdfec, (bool __user *)arg);
+		break;
+	default:
+		rval = -ENOTTY;
+		break;
+	}
+	return rval;
+}
+
+static __poll_t xsdfec_poll(struct file *file, poll_table *wait)
+{
+	__poll_t mask = 0;
+	struct xsdfec_dev *xsdfec;
+
+	xsdfec = container_of(file->private_data, struct xsdfec_dev, miscdev);
+
+	poll_wait(file, &xsdfec->waitq, wait);
+
+	/* XSDFEC ISR detected an error */
+	spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags);
+	if (xsdfec->state_updated)
+		mask |= EPOLLIN | EPOLLPRI;
+
+	if (xsdfec->stats_updated)
+		mask |= EPOLLIN | EPOLLRDNORM;
+	spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags);
+
+	return mask;
+}
+
+static const struct file_operations xsdfec_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = xsdfec_dev_ioctl,
+	.poll = xsdfec_poll,
+	.compat_ioctl = compat_ptr_ioctl,
+};
+
+static int xsdfec_parse_of(struct xsdfec_dev *xsdfec)
+{
+	struct device *dev = xsdfec->dev;
+	struct device_node *node = dev->of_node;
+	int rval;
+	const char *fec_code;
+	u32 din_width;
+	u32 din_word_include;
+	u32 dout_width;
+	u32 dout_word_include;
+
+	rval = of_property_read_string(node, "xlnx,sdfec-code", &fec_code);
+	if (rval < 0)
+		return rval;
+
+	if (!strcasecmp(fec_code, "ldpc"))
+		xsdfec->config.code = XSDFEC_LDPC_CODE;
+	else if (!strcasecmp(fec_code, "turbo"))
+		xsdfec->config.code = XSDFEC_TURBO_CODE;
+	else
+		return -EINVAL;
+
+	rval = of_property_read_u32(node, "xlnx,sdfec-din-words",
+				    &din_word_include);
+	if (rval < 0)
+		return rval;
+
+	if (din_word_include < XSDFEC_AXIS_WORDS_INCLUDE_MAX)
+		xsdfec->config.din_word_include = din_word_include;
+	else
+		return -EINVAL;
+
+	rval = of_property_read_u32(node, "xlnx,sdfec-din-width", &din_width);
+	if (rval < 0)
+		return rval;
+
+	switch (din_width) {
+	/* Fall through and set for valid values */
+	case XSDFEC_1x128b:
+	case XSDFEC_2x128b:
+	case XSDFEC_4x128b:
+		xsdfec->config.din_width = din_width;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rval = of_property_read_u32(node, "xlnx,sdfec-dout-words",
+				    &dout_word_include);
+	if (rval < 0)
+		return rval;
+
+	if (dout_word_include < XSDFEC_AXIS_WORDS_INCLUDE_MAX)
+		xsdfec->config.dout_word_include = dout_word_include;
+	else
+		return -EINVAL;
+
+	rval = of_property_read_u32(node, "xlnx,sdfec-dout-width", &dout_width);
+	if (rval < 0)
+		return rval;
+
+	switch (dout_width) {
+	/* Fall through and set for valid values */
+	case XSDFEC_1x128b:
+	case XSDFEC_2x128b:
+	case XSDFEC_4x128b:
+		xsdfec->config.dout_width = dout_width;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Write LDPC to CODE Register */
+	xsdfec_regwrite(xsdfec, XSDFEC_FEC_CODE_ADDR, xsdfec->config.code);
+
+	xsdfec_cfg_axi_streams(xsdfec);
+
+	return 0;
+}
+
+static irqreturn_t xsdfec_irq_thread(int irq, void *dev_id)
+{
+	struct xsdfec_dev *xsdfec = dev_id;
+	irqreturn_t ret = IRQ_HANDLED;
+	u32 ecc_err;
+	u32 isr_err;
+	u32 uecc_count;
+	u32 cecc_count;
+	u32 isr_err_count;
+	u32 aecc_count;
+	u32 tmp;
+
+	WARN_ON(xsdfec->irq != irq);
+
+	/* Mask Interrupts */
+	xsdfec_isr_enable(xsdfec, false);
+	xsdfec_ecc_isr_enable(xsdfec, false);
+	/* Read ISR */
+	ecc_err = xsdfec_regread(xsdfec, XSDFEC_ECC_ISR_ADDR);
+	isr_err = xsdfec_regread(xsdfec, XSDFEC_ISR_ADDR);
+	/* Clear the interrupts */
+	xsdfec_regwrite(xsdfec, XSDFEC_ECC_ISR_ADDR, ecc_err);
+	xsdfec_regwrite(xsdfec, XSDFEC_ISR_ADDR, isr_err);
+
+	tmp = ecc_err & XSDFEC_ALL_ECC_ISR_MBE_MASK;
+	/* Count uncorrectable 2-bit errors */
+	uecc_count = hweight32(tmp);
+	/* Count all ECC errors */
+	aecc_count = hweight32(ecc_err);
+	/* Number of correctable 1-bit ECC error */
+	cecc_count = aecc_count - 2 * uecc_count;
+	/* Count ISR errors */
+	isr_err_count = hweight32(isr_err);
+	dev_dbg(xsdfec->dev, "tmp=%x, uecc=%x, aecc=%x, cecc=%x, isr=%x", tmp,
+		uecc_count, aecc_count, cecc_count, isr_err_count);
+	dev_dbg(xsdfec->dev, "uecc=%x, cecc=%x, isr=%x", xsdfec->uecc_count,
+		xsdfec->cecc_count, xsdfec->isr_err_count);
+
+	spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags);
+	/* Add new errors to a 2-bits counter */
+	if (uecc_count)
+		xsdfec->uecc_count += uecc_count;
+	/* Add new errors to a 1-bits counter */
+	if (cecc_count)
+		xsdfec->cecc_count += cecc_count;
+	/* Add new errors to a ISR counter */
+	if (isr_err_count)
+		xsdfec->isr_err_count += isr_err_count;
+
+	/* Update state/stats flag */
+	if (uecc_count) {
+		if (ecc_err & XSDFEC_ECC_ISR_MBE_MASK)
+			xsdfec->state = XSDFEC_NEEDS_RESET;
+		else if (ecc_err & XSDFEC_PL_INIT_ECC_ISR_MBE_MASK)
+			xsdfec->state = XSDFEC_PL_RECONFIGURE;
+		xsdfec->stats_updated = true;
+		xsdfec->state_updated = true;
+	}
+
+	if (cecc_count)
+		xsdfec->stats_updated = true;
+
+	if (isr_err_count) {
+		xsdfec->state = XSDFEC_NEEDS_RESET;
+		xsdfec->stats_updated = true;
+		xsdfec->state_updated = true;
+	}
+
+	spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags);
+	dev_dbg(xsdfec->dev, "state=%x, stats=%x", xsdfec->state_updated,
+		xsdfec->stats_updated);
+
+	/* Enable another polling */
+	if (xsdfec->state_updated || xsdfec->stats_updated)
+		wake_up_interruptible(&xsdfec->waitq);
+	else
+		ret = IRQ_NONE;
+
+	/* Unmask Interrupts */
+	xsdfec_isr_enable(xsdfec, true);
+	xsdfec_ecc_isr_enable(xsdfec, true);
+
+	return ret;
+}
+
+static int xsdfec_clk_init(struct platform_device *pdev,
+			   struct xsdfec_clks *clks)
+{
+	int err;
+
+	clks->core_clk = devm_clk_get(&pdev->dev, "core_clk");
+	if (IS_ERR(clks->core_clk)) {
+		dev_err(&pdev->dev, "failed to get core_clk");
+		return PTR_ERR(clks->core_clk);
+	}
+
+	clks->axi_clk = devm_clk_get(&pdev->dev, "s_axi_aclk");
+	if (IS_ERR(clks->axi_clk)) {
+		dev_err(&pdev->dev, "failed to get axi_clk");
+		return PTR_ERR(clks->axi_clk);
+	}
+
+	clks->din_words_clk = devm_clk_get(&pdev->dev, "s_axis_din_words_aclk");
+	if (IS_ERR(clks->din_words_clk)) {
+		if (PTR_ERR(clks->din_words_clk) != -ENOENT) {
+			err = PTR_ERR(clks->din_words_clk);
+			return err;
+		}
+		clks->din_words_clk = NULL;
+	}
+
+	clks->din_clk = devm_clk_get(&pdev->dev, "s_axis_din_aclk");
+	if (IS_ERR(clks->din_clk)) {
+		if (PTR_ERR(clks->din_clk) != -ENOENT) {
+			err = PTR_ERR(clks->din_clk);
+			return err;
+		}
+		clks->din_clk = NULL;
+	}
+
+	clks->dout_clk = devm_clk_get(&pdev->dev, "m_axis_dout_aclk");
+	if (IS_ERR(clks->dout_clk)) {
+		if (PTR_ERR(clks->dout_clk) != -ENOENT) {
+			err = PTR_ERR(clks->dout_clk);
+			return err;
+		}
+		clks->dout_clk = NULL;
+	}
+
+	clks->dout_words_clk =
+		devm_clk_get(&pdev->dev, "s_axis_dout_words_aclk");
+	if (IS_ERR(clks->dout_words_clk)) {
+		if (PTR_ERR(clks->dout_words_clk) != -ENOENT) {
+			err = PTR_ERR(clks->dout_words_clk);
+			return err;
+		}
+		clks->dout_words_clk = NULL;
+	}
+
+	clks->ctrl_clk = devm_clk_get(&pdev->dev, "s_axis_ctrl_aclk");
+	if (IS_ERR(clks->ctrl_clk)) {
+		if (PTR_ERR(clks->ctrl_clk) != -ENOENT) {
+			err = PTR_ERR(clks->ctrl_clk);
+			return err;
+		}
+		clks->ctrl_clk = NULL;
+	}
+
+	clks->status_clk = devm_clk_get(&pdev->dev, "m_axis_status_aclk");
+	if (IS_ERR(clks->status_clk)) {
+		if (PTR_ERR(clks->status_clk) != -ENOENT) {
+			err = PTR_ERR(clks->status_clk);
+			return err;
+		}
+		clks->status_clk = NULL;
+	}
+
+	err = clk_prepare_enable(clks->core_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable core_clk (%d)", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(clks->axi_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable axi_clk (%d)", err);
+		goto err_disable_core_clk;
+	}
+
+	err = clk_prepare_enable(clks->din_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable din_clk (%d)", err);
+		goto err_disable_axi_clk;
+	}
+
+	err = clk_prepare_enable(clks->din_words_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable din_words_clk (%d)", err);
+		goto err_disable_din_clk;
+	}
+
+	err = clk_prepare_enable(clks->dout_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable dout_clk (%d)", err);
+		goto err_disable_din_words_clk;
+	}
+
+	err = clk_prepare_enable(clks->dout_words_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable dout_words_clk (%d)",
+			err);
+		goto err_disable_dout_clk;
+	}
+
+	err = clk_prepare_enable(clks->ctrl_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable ctrl_clk (%d)", err);
+		goto err_disable_dout_words_clk;
+	}
+
+	err = clk_prepare_enable(clks->status_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable status_clk (%d)\n", err);
+		goto err_disable_ctrl_clk;
+	}
+
+	return err;
+
+err_disable_ctrl_clk:
+	clk_disable_unprepare(clks->ctrl_clk);
+err_disable_dout_words_clk:
+	clk_disable_unprepare(clks->dout_words_clk);
+err_disable_dout_clk:
+	clk_disable_unprepare(clks->dout_clk);
+err_disable_din_words_clk:
+	clk_disable_unprepare(clks->din_words_clk);
+err_disable_din_clk:
+	clk_disable_unprepare(clks->din_clk);
+err_disable_axi_clk:
+	clk_disable_unprepare(clks->axi_clk);
+err_disable_core_clk:
+	clk_disable_unprepare(clks->core_clk);
+
+	return err;
+}
+
+static void xsdfec_disable_all_clks(struct xsdfec_clks *clks)
+{
+	clk_disable_unprepare(clks->status_clk);
+	clk_disable_unprepare(clks->ctrl_clk);
+	clk_disable_unprepare(clks->dout_words_clk);
+	clk_disable_unprepare(clks->dout_clk);
+	clk_disable_unprepare(clks->din_words_clk);
+	clk_disable_unprepare(clks->din_clk);
+	clk_disable_unprepare(clks->core_clk);
+	clk_disable_unprepare(clks->axi_clk);
+}
+
+static int xsdfec_probe(struct platform_device *pdev)
+{
+	struct xsdfec_dev *xsdfec;
+	struct device *dev;
+	int err;
+	bool irq_enabled = true;
+
+	xsdfec = devm_kzalloc(&pdev->dev, sizeof(*xsdfec), GFP_KERNEL);
+	if (!xsdfec)
+		return -ENOMEM;
+
+	xsdfec->dev = &pdev->dev;
+	spin_lock_init(&xsdfec->error_data_lock);
+
+	err = xsdfec_clk_init(pdev, &xsdfec->clks);
+	if (err)
+		return err;
+
+	dev = xsdfec->dev;
+	xsdfec->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(xsdfec->regs)) {
+		err = PTR_ERR(xsdfec->regs);
+		goto err_xsdfec_dev;
+	}
+
+	xsdfec->irq = platform_get_irq(pdev, 0);
+	if (xsdfec->irq < 0) {
+		dev_dbg(dev, "platform_get_irq failed");
+		irq_enabled = false;
+	}
+
+	err = xsdfec_parse_of(xsdfec);
+	if (err < 0)
+		goto err_xsdfec_dev;
+
+	update_config_from_hw(xsdfec);
+
+	/* Save driver private data */
+	platform_set_drvdata(pdev, xsdfec);
+
+	if (irq_enabled) {
+		init_waitqueue_head(&xsdfec->waitq);
+		/* Register IRQ thread */
+		err = devm_request_threaded_irq(dev, xsdfec->irq, NULL,
+						xsdfec_irq_thread, IRQF_ONESHOT,
+						"xilinx-sdfec16", xsdfec);
+		if (err < 0) {
+			dev_err(dev, "unable to request IRQ%d", xsdfec->irq);
+			goto err_xsdfec_dev;
+		}
+	}
+
+	err = ida_alloc(&dev_nrs, GFP_KERNEL);
+	if (err < 0)
+		goto err_xsdfec_dev;
+	xsdfec->dev_id = err;
+
+	snprintf(xsdfec->dev_name, DEV_NAME_LEN, "xsdfec%d", xsdfec->dev_id);
+	xsdfec->miscdev.minor = MISC_DYNAMIC_MINOR;
+	xsdfec->miscdev.name = xsdfec->dev_name;
+	xsdfec->miscdev.fops = &xsdfec_fops;
+	xsdfec->miscdev.parent = dev;
+	err = misc_register(&xsdfec->miscdev);
+	if (err) {
+		dev_err(dev, "error:%d. Unable to register device", err);
+		goto err_xsdfec_ida;
+	}
+	return 0;
+
+err_xsdfec_ida:
+	ida_free(&dev_nrs, xsdfec->dev_id);
+err_xsdfec_dev:
+	xsdfec_disable_all_clks(&xsdfec->clks);
+	return err;
+}
+
+static int xsdfec_remove(struct platform_device *pdev)
+{
+	struct xsdfec_dev *xsdfec;
+
+	xsdfec = platform_get_drvdata(pdev);
+	misc_deregister(&xsdfec->miscdev);
+	ida_free(&dev_nrs, xsdfec->dev_id);
+	xsdfec_disable_all_clks(&xsdfec->clks);
+	return 0;
+}
+
+static const struct of_device_id xsdfec_of_match[] = {
+	{
+		.compatible = "xlnx,sd-fec-1.1",
+	},
+	{ /* end of table */ }
+};
+MODULE_DEVICE_TABLE(of, xsdfec_of_match);
+
+static struct platform_driver xsdfec_driver = {
+	.driver = {
+		.name = "xilinx-sdfec",
+		.of_match_table = xsdfec_of_match,
+	},
+	.probe = xsdfec_probe,
+	.remove =  xsdfec_remove,
+};
+
+module_platform_driver(xsdfec_driver);
+
+MODULE_AUTHOR("Xilinx, Inc");
+MODULE_DESCRIPTION("Xilinx SD-FEC16 Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/xilinx_tmr_inject.c b/drivers/misc/xilinx_tmr_inject.c
new file mode 100644
index 0000000000..9fc5835bfe
--- /dev/null
+++ b/drivers/misc/xilinx_tmr_inject.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Xilinx TMR Inject IP.
+ *
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ *
+ * Description:
+ * This driver is developed for TMR Inject IP,The Triple Modular Redundancy(TMR)
+ * Inject provides fault injection.
+ */
+
+#include <asm/xilinx_mb_manager.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/fault-inject.h>
+
+/* TMR Inject Register offsets */
+#define XTMR_INJECT_CR_OFFSET		0x0
+#define XTMR_INJECT_AIR_OFFSET		0x4
+#define XTMR_INJECT_IIR_OFFSET		0xC
+#define XTMR_INJECT_EAIR_OFFSET		0x10
+#define XTMR_INJECT_ERR_OFFSET		0x204
+
+/* Register Bitmasks/shifts */
+#define XTMR_INJECT_CR_CPUID_SHIFT	8
+#define XTMR_INJECT_CR_IE_SHIFT		10
+#define XTMR_INJECT_IIR_ADDR_MASK	GENMASK(31, 16)
+
+#define XTMR_INJECT_MAGIC_MAX_VAL	255
+
+/**
+ * struct xtmr_inject_dev - Driver data for TMR Inject
+ * @regs: device physical base address
+ * @magic: Magic hardware configuration value
+ */
+struct xtmr_inject_dev {
+	void __iomem *regs;
+	u32 magic;
+};
+
+static DECLARE_FAULT_ATTR(inject_fault);
+static char *inject_request;
+module_param(inject_request, charp, 0);
+MODULE_PARM_DESC(inject_request, "default fault injection attributes");
+static struct dentry *dbgfs_root;
+
+/* IO accessors */
+static inline void xtmr_inject_write(struct xtmr_inject_dev *xtmr_inject,
+				     u32 addr, u32 value)
+{
+	iowrite32(value, xtmr_inject->regs + addr);
+}
+
+static inline u32 xtmr_inject_read(struct xtmr_inject_dev *xtmr_inject,
+				   u32 addr)
+{
+	return ioread32(xtmr_inject->regs + addr);
+}
+
+static int xtmr_inject_set(void *data, u64 val)
+{
+	if (val != 1)
+		return -EINVAL;
+
+	xmb_inject_err();
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(xtmr_inject_fops, NULL, xtmr_inject_set, "%llu\n");
+
+static void xtmr_init_debugfs(struct xtmr_inject_dev *xtmr_inject)
+{
+	struct dentry *dir;
+
+	dbgfs_root = debugfs_create_dir("xtmr_inject", NULL);
+	dir = fault_create_debugfs_attr("inject_fault", dbgfs_root,
+					&inject_fault);
+	debugfs_create_file("inject_fault", 0200, dir, NULL,
+			    &xtmr_inject_fops);
+}
+
+static void xtmr_inject_init(struct xtmr_inject_dev *xtmr_inject)
+{
+	u32 cr_val;
+
+	if (inject_request)
+		setup_fault_attr(&inject_fault, inject_request);
+	/* Allow fault injection */
+	cr_val = xtmr_inject->magic |
+		 (1 << XTMR_INJECT_CR_IE_SHIFT) |
+		 (1 << XTMR_INJECT_CR_CPUID_SHIFT);
+	xtmr_inject_write(xtmr_inject, XTMR_INJECT_CR_OFFSET,
+			  cr_val);
+	/* Initialize the address inject and instruction inject registers */
+	xtmr_inject_write(xtmr_inject, XTMR_INJECT_AIR_OFFSET,
+			  XMB_INJECT_ERR_OFFSET);
+	xtmr_inject_write(xtmr_inject, XTMR_INJECT_IIR_OFFSET,
+			  XMB_INJECT_ERR_OFFSET & XTMR_INJECT_IIR_ADDR_MASK);
+}
+
+/**
+ * xtmr_inject_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * This is the driver probe routine. It does all the memory
+ * allocation for the device.
+ *
+ * Return: 0 on success and failure value on error
+ */
+static int xtmr_inject_probe(struct platform_device *pdev)
+{
+	struct xtmr_inject_dev *xtmr_inject;
+	int err;
+
+	xtmr_inject = devm_kzalloc(&pdev->dev, sizeof(*xtmr_inject),
+				   GFP_KERNEL);
+	if (!xtmr_inject)
+		return -ENOMEM;
+
+	xtmr_inject->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(xtmr_inject->regs))
+		return PTR_ERR(xtmr_inject->regs);
+
+	err = of_property_read_u32(pdev->dev.of_node, "xlnx,magic",
+				   &xtmr_inject->magic);
+	if (err < 0) {
+		dev_err(&pdev->dev, "unable to read xlnx,magic property");
+		return err;
+	}
+
+	if (xtmr_inject->magic > XTMR_INJECT_MAGIC_MAX_VAL) {
+		dev_err(&pdev->dev, "invalid xlnx,magic property value");
+		return -EINVAL;
+	}
+
+	/* Initialize TMR Inject */
+	xtmr_inject_init(xtmr_inject);
+
+	xtmr_init_debugfs(xtmr_inject);
+
+	platform_set_drvdata(pdev, xtmr_inject);
+
+	return 0;
+}
+
+static int xtmr_inject_remove(struct platform_device *pdev)
+{
+	debugfs_remove_recursive(dbgfs_root);
+	dbgfs_root = NULL;
+	return 0;
+}
+
+static const struct of_device_id xtmr_inject_of_match[] = {
+	{
+		.compatible = "xlnx,tmr-inject-1.0",
+	},
+	{ /* end of table */ }
+};
+MODULE_DEVICE_TABLE(of, xtmr_inject_of_match);
+
+static struct platform_driver xtmr_inject_driver = {
+	.driver = {
+		.name = "xilinx-tmr_inject",
+		.of_match_table = xtmr_inject_of_match,
+	},
+	.probe = xtmr_inject_probe,
+	.remove = xtmr_inject_remove,
+};
+module_platform_driver(xtmr_inject_driver);
+MODULE_AUTHOR("Advanced Micro Devices, Inc");
+MODULE_DESCRIPTION("Xilinx TMR Inject Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/xilinx_tmr_manager.c b/drivers/misc/xilinx_tmr_manager.c
new file mode 100644
index 0000000000..03912a90fd
--- /dev/null
+++ b/drivers/misc/xilinx_tmr_manager.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Xilinx TMR Manager IP.
+ *
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ *
+ * Description:
+ * This driver is developed for TMR Manager,The Triple Modular Redundancy(TMR)
+ * Manager is responsible for handling the TMR subsystem state, including
+ * fault detection and error recovery. The core is triplicated in each of
+ * the sub-blocks in the TMR subsystem, and provides majority voting of
+ * its internal state provides soft error detection, correction and
+ * recovery.
+ */
+
+#include <asm/xilinx_mb_manager.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+/* TMR Manager Register offsets */
+#define XTMR_MANAGER_CR_OFFSET		0x0
+#define XTMR_MANAGER_FFR_OFFSET		0x4
+#define XTMR_MANAGER_CMR0_OFFSET	0x8
+#define XTMR_MANAGER_CMR1_OFFSET	0xC
+#define XTMR_MANAGER_BDIR_OFFSET	0x10
+#define XTMR_MANAGER_SEMIMR_OFFSET	0x1C
+
+/* Register Bitmasks/shifts */
+#define XTMR_MANAGER_CR_MAGIC1_MASK	GENMASK(7, 0)
+#define XTMR_MANAGER_CR_MAGIC2_MASK	GENMASK(15, 8)
+#define XTMR_MANAGER_CR_RIR_MASK	BIT(16)
+#define XTMR_MANAGER_FFR_LM12_MASK	BIT(0)
+#define XTMR_MANAGER_FFR_LM13_MASK	BIT(1)
+#define XTMR_MANAGER_FFR_LM23_MASK	BIT(2)
+
+#define XTMR_MANAGER_CR_MAGIC2_SHIFT	4
+#define XTMR_MANAGER_CR_RIR_SHIFT	16
+#define XTMR_MANAGER_CR_BB_SHIFT	18
+
+#define XTMR_MANAGER_MAGIC1_MAX_VAL	255
+
+/**
+ * struct xtmr_manager_dev - Driver data for TMR Manager
+ * @regs: device physical base address
+ * @cr_val: control register value
+ * @magic1: Magic 1 hardware configuration value
+ * @err_cnt: error statistics count
+ * @phys_baseaddr: Physical base address
+ */
+struct xtmr_manager_dev {
+	void __iomem *regs;
+	u32 cr_val;
+	u32 magic1;
+	u32 err_cnt;
+	resource_size_t phys_baseaddr;
+};
+
+/* IO accessors */
+static inline void xtmr_manager_write(struct xtmr_manager_dev *xtmr_manager,
+				      u32 addr, u32 value)
+{
+	iowrite32(value, xtmr_manager->regs + addr);
+}
+
+static inline u32 xtmr_manager_read(struct xtmr_manager_dev *xtmr_manager,
+				    u32 addr)
+{
+	return ioread32(xtmr_manager->regs + addr);
+}
+
+static void xmb_manager_reset_handler(struct xtmr_manager_dev *xtmr_manager)
+{
+	/* Clear the FFR Register contents as a part of recovery process. */
+	xtmr_manager_write(xtmr_manager, XTMR_MANAGER_FFR_OFFSET, 0);
+}
+
+static void xmb_manager_update_errcnt(struct xtmr_manager_dev *xtmr_manager)
+{
+	xtmr_manager->err_cnt++;
+}
+
+static ssize_t errcnt_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct xtmr_manager_dev *xtmr_manager = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%x\n", xtmr_manager->err_cnt);
+}
+static DEVICE_ATTR_RO(errcnt);
+
+static ssize_t dis_block_break_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	struct xtmr_manager_dev *xtmr_manager = dev_get_drvdata(dev);
+	int ret;
+	long value;
+
+	ret = kstrtoul(buf, 16, &value);
+	if (ret)
+		return ret;
+
+	/* unblock the break signal*/
+	xtmr_manager->cr_val &= ~(1 << XTMR_MANAGER_CR_BB_SHIFT);
+	xtmr_manager_write(xtmr_manager, XTMR_MANAGER_CR_OFFSET,
+			   xtmr_manager->cr_val);
+	return size;
+}
+static DEVICE_ATTR_WO(dis_block_break);
+
+static struct attribute *xtmr_manager_dev_attrs[] = {
+	&dev_attr_dis_block_break.attr,
+	&dev_attr_errcnt.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(xtmr_manager_dev);
+
+static void xtmr_manager_init(struct xtmr_manager_dev *xtmr_manager)
+{
+	/* Clear the SEM interrupt mask register to disable the interrupt */
+	xtmr_manager_write(xtmr_manager, XTMR_MANAGER_SEMIMR_OFFSET, 0);
+
+	/* Allow recovery reset by default */
+	xtmr_manager->cr_val = (1 << XTMR_MANAGER_CR_RIR_SHIFT) |
+				xtmr_manager->magic1;
+	xtmr_manager_write(xtmr_manager, XTMR_MANAGER_CR_OFFSET,
+			   xtmr_manager->cr_val);
+	/*
+	 * Configure Break Delay Initialization Register to zero so that
+	 * break occurs immediately
+	 */
+	xtmr_manager_write(xtmr_manager, XTMR_MANAGER_BDIR_OFFSET, 0);
+
+	/*
+	 * To come out of break handler need to block the break signal
+	 * in the tmr manager, update the xtmr_manager cr_val for the same
+	 */
+	xtmr_manager->cr_val |= (1 << XTMR_MANAGER_CR_BB_SHIFT);
+
+	/*
+	 * When the break vector gets asserted because of error injection,
+	 * the break signal must be blocked before exiting from the
+	 * break handler, Below api updates the TMR manager address and
+	 * control register and error counter callback arguments,
+	 * which will be used by the break handler to block the
+	 * break and call the callback function.
+	 */
+	xmb_manager_register(xtmr_manager->phys_baseaddr, xtmr_manager->cr_val,
+			     (void *)xmb_manager_update_errcnt,
+			     xtmr_manager, (void *)xmb_manager_reset_handler);
+}
+
+/**
+ * xtmr_manager_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * This is the driver probe routine. It does all the memory
+ * allocation for the device.
+ *
+ * Return: 0 on success and failure value on error
+ */
+static int xtmr_manager_probe(struct platform_device *pdev)
+{
+	struct xtmr_manager_dev *xtmr_manager;
+	struct resource *res;
+	int err;
+
+	xtmr_manager = devm_kzalloc(&pdev->dev, sizeof(*xtmr_manager),
+				    GFP_KERNEL);
+	if (!xtmr_manager)
+		return -ENOMEM;
+
+	xtmr_manager->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(xtmr_manager->regs))
+		return PTR_ERR(xtmr_manager->regs);
+
+	xtmr_manager->phys_baseaddr = res->start;
+
+	err = of_property_read_u32(pdev->dev.of_node, "xlnx,magic1",
+				   &xtmr_manager->magic1);
+	if (err < 0) {
+		dev_err(&pdev->dev, "unable to read xlnx,magic1 property");
+		return err;
+	}
+
+	if (xtmr_manager->magic1 > XTMR_MANAGER_MAGIC1_MAX_VAL) {
+		dev_err(&pdev->dev, "invalid xlnx,magic1 property value");
+		return -EINVAL;
+	}
+
+	/* Initialize TMR Manager */
+	xtmr_manager_init(xtmr_manager);
+
+	platform_set_drvdata(pdev, xtmr_manager);
+
+	return 0;
+}
+
+static const struct of_device_id xtmr_manager_of_match[] = {
+	{
+		.compatible = "xlnx,tmr-manager-1.0",
+	},
+	{ /* end of table */ }
+};
+MODULE_DEVICE_TABLE(of, xtmr_manager_of_match);
+
+static struct platform_driver xtmr_manager_driver = {
+	.driver = {
+		.name = "xilinx-tmr_manager",
+		.of_match_table = xtmr_manager_of_match,
+		.dev_groups = xtmr_manager_dev_groups,
+	},
+	.probe = xtmr_manager_probe,
+};
+module_platform_driver(xtmr_manager_driver);
+
+MODULE_AUTHOR("Advanced Micro Devices, Inc");
+MODULE_DESCRIPTION("Xilinx TMR Manager Driver");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3